//#include "stdafx.h" #include "StringCodeConverter.h" #include #include //#include #include #include using namespace std; size_t StringCodeConverter::mbslen( const char *pcszSource ) { string strCurLocale = setlocale( LC_ALL, NULL ); setlocale( LC_ALL, "chinese-simplified" ); int mbl = 0; size_t cnt = 0; for (cnt = 0; *pcszSource; ++cnt) { mbl = mblen( pcszSource, MB_CUR_MAX ); pcszSource += mbl; } setlocale( LC_ALL, strCurLocale.c_str() ); return cnt; } wstring StringCodeConverter::mbs2unicode( const string &cstrSource ) { string strCurLocale = setlocale( LC_ALL, NULL ); setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果 const char *pcszSource = cstrSource.c_str(); size_t iCount = cstrSource.size() + 1; wchar_t *pwszBuffer = new wchar_t[ iCount ]; wmemset( pwszBuffer, 0, iCount ); size_t iConvertedCount = mbstowcs( pwszBuffer, pcszSource, iCount ); if ( iConvertedCount == -1 ) { delete [] pwszBuffer; throw string( "mbs2unicode参数有非中英文字符" ); } wstring wstrDest( pwszBuffer ); delete [] pwszBuffer; setlocale( LC_ALL, strCurLocale.c_str() ); return wstrDest; } void StringCodeConverter::mbs2unicode(const string &strSource, wstring &wstrDest) { const char *pcszSource = strSource.c_str(); string strCurLocale( setlocale(LC_ALL, NULL) );//先保存当前的locale设置 setlocale( LC_ALL, "chs" );//设置为中文 size_t iCharCount = strSource.size() + 1;//unicode字符串所需要的字符数 wchar_t *pwszBuffer = new wchar_t[ iCharCount ]; wmemset( pwszBuffer, 0, iCharCount ); mbstowcs( pwszBuffer, pcszSource, iCharCount ); setlocale( LC_ALL, strCurLocale.c_str() );//改回原来的locale设置 wstrDest = pwszBuffer; delete [] pwszBuffer; } void StringCodeConverter::mbs2unicode( const char *pcszSource, wchar_t *pwszDest ) throw (string) { if ( pcszSource == NULL || pwszDest == NULL ) { throw string( "参数指针为NULL" ); } string strCurLocale = setlocale( LC_ALL, NULL ); setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果 size_t iCharCount = mbslen( pcszSource ) + 1; if ( iCharCount == -1 ) { throw string( "源字符串的编码有非中英文字符" ); } wmemset( pwszDest, 0, iCharCount ); size_t iConvertedCount = mbstowcs( pwszDest, pcszSource, iCharCount ); if ( iConvertedCount == -1 ) { throw string( "源字符串的编码有非中英文字符" ); } *( pwszDest + iConvertedCount ) = NULL; setlocale( LC_ALL, strCurLocale.c_str() ); } void StringCodeConverter::mbs2utf8( const string &cstrSource, char *pszDest ) { const char *pcszSource = cstrSource.c_str(); string strCurLocale( setlocale( LC_ALL, NULL ) ); setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果 while ( *pcszSource != NULL ) { if ( *pcszSource > 0 ) { //英文字符,可以直接复制 *pszDest = *pcszSource; pcszSource++; pszDest++; } else { //非ascii英文字符,先转换成unicode,再转换utf8 int iLen = 0; wchar_t wUnicode = 0; char *pcUnicode = (char *)&wUnicode; //转换成unicode,返回mb字符的长度 iLen = mbtowc( &wUnicode, pcszSource, MB_CUR_MAX ); pszDest[0] = (0xE0 | ((pcUnicode[1] & 0xF0) >> 4)); pszDest[1] = (0x80 | ((pcUnicode[1] & 0x0F) << 2)) + ((pcUnicode[0] & 0xC0) >> 6); pszDest[2] = (0x80 | (pcUnicode[0] & 0x3F)); pszDest += 3; pcszSource += iLen; } } setlocale( LC_ALL, strCurLocale.c_str() ); *pszDest = NULL; } string StringCodeConverter::unicode2mbs( const wstring &wstrSource ) { string strCurLocale = setlocale( LC_ALL, NULL ); setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果 const wchar_t *pcwszSource = wstrSource.c_str(); size_t iCount = wstrSource.size() * 2 + 1; char *pszBuffer = new char[ iCount ]; memset( pszBuffer, 0, iCount ); size_t iConvertedCount = wcstombs( pszBuffer, pcwszSource, iCount ); if ( iConvertedCount == -1 ) { throw string( "unicode2mbs源字符串的编码有非中英文字符" ); } setlocale( LC_ALL, strCurLocale.c_str() ); string strDest( pszBuffer ); delete [] pszBuffer; return strDest; } void StringCodeConverter::unicode2mbs( const wstring &wstrSource, string &strDest ) { string strCurLocale = setlocale( LC_ALL, NULL ); setlocale( LC_ALL, "chs" ); size_t iCount = wstrSource.size() + 1; char *pszBuffer = new char[ iCount ]; const wchar_t *pcwszSource = wstrSource.c_str(); memset( pszBuffer, 0, iCount ); size_t iConvertedCount = wcstombs( pszBuffer, pcwszSource , iCount ); if ( iConvertedCount == -1 ) { delete [] pszBuffer; throw string( "unicode2mbs转换失败,字符串中有非中英文字符!" ); } strDest = pszBuffer; delete [] pszBuffer; setlocale( LC_ALL, strCurLocale.c_str() ); } void StringCodeConverter::unicode2mbs( const wchar_t *pcwszSource, char *pszDest ) { if ( pcwszSource == NULL || pszDest == NULL ) { throw string( "unicode2mbs函数,参数值为NULL" ); } string strCurLocale = setlocale( LC_ALL, NULL ); setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果 size_t iCount = wcslen( pcwszSource )*2 + 1; memset( pszDest, 0, iCount ); size_t iConvertedCount = wcstombs( pszDest, pcwszSource, iCount ); if ( iConvertedCount == -1 ) { throw string( "unicode2mbs转换失败,字符串中有非中英文字符!" ); } setlocale( LC_ALL, strCurLocale.c_str() ); } void StringCodeConverter::unicode2utf8( const wstring &cwstrSource, char *pszDest ) { if ( pszDest == NULL ) { throw string( "参数为NULL" ); } const wchar_t *pcwszSource = cwstrSource.c_str(); while ( *pcwszSource != NULL ) { unsigned short int iUnicode = *pcwszSource; if ( iUnicode < 128 ) { //小于128是英文字符,不需要转换 *pszDest = (char)iUnicode; pszDest++; pcwszSource++; } else { //大于128是mbs字符,需要转换 const char *pcszUnicode = (const char *)pcwszSource; pszDest[0] = (0xE0 | ((pcszUnicode[1] & 0xF0) >> 4 )); pszDest[1] = (0x80 | ((pcszUnicode[1] & 0x0F) << 2)) + ((pcszUnicode[0] & 0xC0) >> 6); pszDest[2] = (0x80 | (pcszUnicode[0] & 0x3F)); pszDest = pszDest + 3; pcwszSource++; } } *pszDest = NULL;//末尾的NULL字符 } string StringCodeConverter::utf8tombs( const char *pcszSource ) { if ( pcszSource == NULL ) { throw string( "参数为NULL" ); } string strCurLocale( setlocale( LC_ALL, NULL ) ); setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果 char *pszDest = new char[ strlen( pcszSource ) ]; char *pszBuffer = pszDest; while ( *pcszSource != 0 ) { if ( *pcszSource > 0 ) { //是ASCII字符 *pszDest = *pcszSource; pszDest++; pcszSource++; } else { wchar_t wUnicode = 0; char *pcUnicode = (char *)&wUnicode; pcUnicode[1] = ((pcszSource[0] & 0x0F) << 4) + ((pcszSource[1] >> 2) & 0x0F); pcUnicode[0] = ((pcszSource[1] & 0x03) << 6) + (pcszSource[2] & 0x3F); wchar_t wMcb; char *pcMbs = (char *)&wMcb; int iLen = 0; iLen = wctomb( pcMbs, wUnicode ); if ( iLen == 1 ) { *pszDest = *pcMbs; } else { pszDest[0] = pcMbs[0]; pszDest[1] = pcMbs[1]; } pszDest += iLen; pcszSource += 3; } } *pszDest = NULL; setlocale( LC_ALL, strCurLocale.c_str() ); strCurLocale = pszBuffer; //废物利用 return strCurLocale; } wstring StringCodeConverter::utf8tounicode( const char *pcszSource ) { if ( pcszSource == NULL ) { throw string( "参数为NULL" ); } string strCurLocale = setlocale( LC_ALL, NULL ); setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果 wchar_t *pwszBuffer = new wchar_t[ strlen( pcszSource ) ]; wchar_t *pwszDest = pwszBuffer; while ( *pcszSource != NULL ) { if ( *pcszSource > 0 ) { //是ASCII字符 wchar_t wUnicode; mbtowc( &wUnicode, pcszSource, 1 ); *pwszDest = wUnicode; pwszDest++; pcszSource++; } else { //中文字符,3个字节 wchar_t wUnicode; char *pcUnicode = (char *)&wUnicode; pcUnicode[1] = ((pcszSource[0] & 0x0F) << 4) + ((pcszSource[1] >> 2) & 0x0F); pcUnicode[0] = ((pcszSource[1] & 0x03) << 6) + (pcszSource[2] & 0x3F); *pwszDest = wUnicode; pwszDest++; pcszSource += 3; } } setlocale( LC_ALL, strCurLocale.c_str() ); *pwszDest = NULL;//末尾空字符 wstring wstrDest( pwszBuffer ); return wstrDest; }