2018-06-01 09:36:01 +00:00
|
|
|
|
//#include "stdafx.h"
|
|
|
|
|
#include "StringCodeConverter.h"
|
|
|
|
|
#include <locale.h>
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
//#include <stdio.h>
|
|
|
|
|
#include <string>
|
|
|
|
|
#include <cstring>
|
2018-06-01 10:21:26 +00:00
|
|
|
|
|
2018-06-01 09:36:01 +00:00
|
|
|
|
using namespace std;
|
|
|
|
|
|
|
|
|
|
size_t StringCodeConverter::mbslen( const char *pcszSource )
|
|
|
|
|
{
|
|
|
|
|
string strCurLocale = setlocale( LC_ALL, NULL );
|
|
|
|
|
setlocale( LC_ALL, "chinese-simplified" );
|
|
|
|
|
|
|
|
|
|
int mbl = 0;
|
|
|
|
|
size_t cnt = 0;
|
|
|
|
|
|
|
|
|
|
for (cnt = 0; *pcszSource; ++cnt)
|
|
|
|
|
{
|
|
|
|
|
mbl = mblen( pcszSource, MB_CUR_MAX );
|
|
|
|
|
pcszSource += mbl;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
setlocale( LC_ALL, strCurLocale.c_str() );
|
|
|
|
|
|
|
|
|
|
return cnt;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
wstring StringCodeConverter::mbs2unicode( const string &cstrSource )
|
|
|
|
|
{
|
|
|
|
|
string strCurLocale = setlocale( LC_ALL, NULL );
|
2018-06-01 10:21:26 +00:00
|
|
|
|
setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果
|
2018-06-01 09:36:01 +00:00
|
|
|
|
|
|
|
|
|
const char *pcszSource = cstrSource.c_str();
|
|
|
|
|
|
|
|
|
|
size_t iCount = cstrSource.size() + 1;
|
|
|
|
|
wchar_t *pwszBuffer = new wchar_t[ iCount ];
|
|
|
|
|
|
|
|
|
|
wmemset( pwszBuffer, 0, iCount );
|
|
|
|
|
size_t iConvertedCount = mbstowcs( pwszBuffer, pcszSource, iCount );
|
|
|
|
|
|
|
|
|
|
if ( iConvertedCount == -1 )
|
|
|
|
|
{
|
2018-06-02 09:47:28 +00:00
|
|
|
|
delete [] pwszBuffer;
|
2018-06-01 10:21:26 +00:00
|
|
|
|
throw string( "mbs2unicode参数有非中英文字符" );
|
2018-06-01 09:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
wstring wstrDest( pwszBuffer );
|
|
|
|
|
delete [] pwszBuffer;
|
|
|
|
|
|
|
|
|
|
setlocale( LC_ALL, strCurLocale.c_str() );
|
|
|
|
|
|
|
|
|
|
return wstrDest;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void StringCodeConverter::mbs2unicode(const string &strSource, wstring &wstrDest)
|
|
|
|
|
{
|
|
|
|
|
const char *pcszSource = strSource.c_str();
|
2018-06-01 10:21:26 +00:00
|
|
|
|
string strCurLocale( setlocale(LC_ALL, NULL) );//先保存当前的locale设置
|
2018-06-01 09:36:01 +00:00
|
|
|
|
|
2018-06-01 10:21:26 +00:00
|
|
|
|
setlocale( LC_ALL, "chs" );//设置为中文
|
2018-06-01 09:36:01 +00:00
|
|
|
|
|
2018-06-01 10:21:26 +00:00
|
|
|
|
size_t iCharCount = strSource.size() + 1;//unicode字符串所需要的字符数
|
2018-06-01 09:36:01 +00:00
|
|
|
|
wchar_t *pwszBuffer = new wchar_t[ iCharCount ];
|
|
|
|
|
|
|
|
|
|
wmemset( pwszBuffer, 0, iCharCount );
|
|
|
|
|
mbstowcs( pwszBuffer, pcszSource, iCharCount );
|
|
|
|
|
|
2018-06-01 10:21:26 +00:00
|
|
|
|
setlocale( LC_ALL, strCurLocale.c_str() );//改回原来的locale设置
|
2018-06-01 09:36:01 +00:00
|
|
|
|
|
|
|
|
|
wstrDest = pwszBuffer;
|
|
|
|
|
delete [] pwszBuffer;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void StringCodeConverter::mbs2unicode( const char *pcszSource, wchar_t *pwszDest ) throw (string)
|
|
|
|
|
{
|
|
|
|
|
if ( pcszSource == NULL || pwszDest == NULL )
|
|
|
|
|
{
|
2018-06-01 10:21:26 +00:00
|
|
|
|
throw string( "参数指针为NULL" );
|
2018-06-01 09:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
string strCurLocale = setlocale( LC_ALL, NULL );
|
2018-06-01 10:21:26 +00:00
|
|
|
|
setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果
|
2018-06-01 09:36:01 +00:00
|
|
|
|
|
|
|
|
|
size_t iCharCount = mbslen( pcszSource ) + 1;
|
|
|
|
|
|
|
|
|
|
if ( iCharCount == -1 )
|
|
|
|
|
{
|
2018-06-01 10:21:26 +00:00
|
|
|
|
throw string( "源字符串的编码有非中英文字符" );
|
2018-06-01 09:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
wmemset( pwszDest, 0, iCharCount );
|
|
|
|
|
size_t iConvertedCount = mbstowcs( pwszDest, pcszSource, iCharCount );
|
|
|
|
|
|
|
|
|
|
if ( iConvertedCount == -1 )
|
|
|
|
|
{
|
2018-06-01 10:21:26 +00:00
|
|
|
|
throw string( "源字符串的编码有非中英文字符" );
|
2018-06-01 09:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*( pwszDest + iConvertedCount ) = NULL;
|
|
|
|
|
|
|
|
|
|
setlocale( LC_ALL, strCurLocale.c_str() );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void StringCodeConverter::mbs2utf8( const string &cstrSource, char *pszDest )
|
|
|
|
|
{
|
|
|
|
|
const char *pcszSource = cstrSource.c_str();
|
|
|
|
|
|
|
|
|
|
string strCurLocale( setlocale( LC_ALL, NULL ) );
|
2018-06-01 10:21:26 +00:00
|
|
|
|
setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果
|
2018-06-01 09:36:01 +00:00
|
|
|
|
|
|
|
|
|
while ( *pcszSource != NULL )
|
|
|
|
|
{
|
|
|
|
|
if ( *pcszSource > 0 )
|
|
|
|
|
{
|
2018-06-01 10:21:26 +00:00
|
|
|
|
//英文字符,可以直接复制
|
2018-06-01 09:36:01 +00:00
|
|
|
|
*pszDest = *pcszSource;
|
|
|
|
|
|
|
|
|
|
pcszSource++;
|
|
|
|
|
pszDest++;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2018-06-01 10:21:26 +00:00
|
|
|
|
//非ascii英文字符,先转换成unicode,再转换utf8
|
2018-06-01 09:36:01 +00:00
|
|
|
|
int iLen = 0;
|
|
|
|
|
wchar_t wUnicode = 0;
|
|
|
|
|
char *pcUnicode = (char *)&wUnicode;
|
|
|
|
|
|
2018-06-01 10:21:26 +00:00
|
|
|
|
//转换成unicode,返回mb字符的长度
|
2018-06-01 09:36:01 +00:00
|
|
|
|
iLen = mbtowc( &wUnicode, pcszSource, MB_CUR_MAX );
|
|
|
|
|
|
|
|
|
|
pszDest[0] = (0xE0 | ((pcUnicode[1] & 0xF0) >> 4));
|
|
|
|
|
pszDest[1] = (0x80 | ((pcUnicode[1] & 0x0F) << 2)) + ((pcUnicode[0] & 0xC0) >> 6);
|
|
|
|
|
pszDest[2] = (0x80 | (pcUnicode[0] & 0x3F));
|
|
|
|
|
|
|
|
|
|
pszDest += 3;
|
|
|
|
|
pcszSource += iLen;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
setlocale( LC_ALL, strCurLocale.c_str() );
|
|
|
|
|
|
|
|
|
|
*pszDest = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
string StringCodeConverter::unicode2mbs( const wstring &wstrSource )
|
|
|
|
|
{
|
|
|
|
|
string strCurLocale = setlocale( LC_ALL, NULL );
|
2018-06-01 10:21:26 +00:00
|
|
|
|
setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果
|
2018-06-01 09:36:01 +00:00
|
|
|
|
|
|
|
|
|
const wchar_t *pcwszSource = wstrSource.c_str();
|
|
|
|
|
|
|
|
|
|
size_t iCount = wstrSource.size() * 2 + 1;
|
|
|
|
|
char *pszBuffer = new char[ iCount ];
|
|
|
|
|
memset( pszBuffer, 0, iCount );
|
|
|
|
|
|
|
|
|
|
size_t iConvertedCount = wcstombs( pszBuffer, pcwszSource, iCount );
|
|
|
|
|
|
|
|
|
|
if ( iConvertedCount == -1 )
|
|
|
|
|
{
|
2018-06-01 10:21:26 +00:00
|
|
|
|
throw string( "unicode2mbs源字符串的编码有非中英文字符" );
|
2018-06-01 09:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
setlocale( LC_ALL, strCurLocale.c_str() );
|
|
|
|
|
|
|
|
|
|
string strDest( pszBuffer );
|
|
|
|
|
delete [] pszBuffer;
|
|
|
|
|
|
|
|
|
|
return strDest;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void StringCodeConverter::unicode2mbs( const wstring &wstrSource, string &strDest )
|
|
|
|
|
{
|
|
|
|
|
string strCurLocale = setlocale( LC_ALL, NULL );
|
|
|
|
|
setlocale( LC_ALL, "chs" );
|
|
|
|
|
|
|
|
|
|
size_t iCount = wstrSource.size() + 1;
|
|
|
|
|
char *pszBuffer = new char[ iCount ];
|
|
|
|
|
const wchar_t *pcwszSource = wstrSource.c_str();
|
|
|
|
|
|
|
|
|
|
memset( pszBuffer, 0, iCount );
|
|
|
|
|
size_t iConvertedCount = wcstombs( pszBuffer, pcwszSource , iCount );
|
|
|
|
|
|
|
|
|
|
if ( iConvertedCount == -1 )
|
|
|
|
|
{
|
|
|
|
|
delete [] pszBuffer;
|
2018-06-01 10:21:26 +00:00
|
|
|
|
throw string( "unicode2mbs转换失败,字符串中有非中英文字符!" );
|
2018-06-01 09:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
strDest = pszBuffer;
|
|
|
|
|
delete [] pszBuffer;
|
|
|
|
|
|
|
|
|
|
setlocale( LC_ALL, strCurLocale.c_str() );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void StringCodeConverter::unicode2mbs( const wchar_t *pcwszSource, char *pszDest )
|
|
|
|
|
{
|
|
|
|
|
if ( pcwszSource == NULL || pszDest == NULL )
|
|
|
|
|
{
|
2018-06-01 10:21:26 +00:00
|
|
|
|
throw string( "unicode2mbs函数,参数值为NULL" );
|
2018-06-01 09:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
string strCurLocale = setlocale( LC_ALL, NULL );
|
2018-06-01 10:21:26 +00:00
|
|
|
|
setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果
|
2018-06-01 09:36:01 +00:00
|
|
|
|
|
|
|
|
|
size_t iCount = wcslen( pcwszSource )*2 + 1;
|
|
|
|
|
memset( pszDest, 0, iCount );
|
|
|
|
|
|
|
|
|
|
size_t iConvertedCount = wcstombs( pszDest, pcwszSource, iCount );
|
|
|
|
|
|
|
|
|
|
if ( iConvertedCount == -1 )
|
|
|
|
|
{
|
2018-06-01 10:21:26 +00:00
|
|
|
|
throw string( "unicode2mbs转换失败,字符串中有非中英文字符!" );
|
2018-06-01 09:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
setlocale( LC_ALL, strCurLocale.c_str() );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void StringCodeConverter::unicode2utf8( const wstring &cwstrSource, char *pszDest )
|
|
|
|
|
{
|
|
|
|
|
if ( pszDest == NULL )
|
|
|
|
|
{
|
2018-06-01 10:21:26 +00:00
|
|
|
|
throw string( "参数为NULL" );
|
2018-06-01 09:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const wchar_t *pcwszSource = cwstrSource.c_str();
|
|
|
|
|
|
|
|
|
|
while ( *pcwszSource != NULL )
|
|
|
|
|
{
|
|
|
|
|
unsigned short int iUnicode = *pcwszSource;
|
|
|
|
|
|
|
|
|
|
if ( iUnicode < 128 )
|
|
|
|
|
{
|
2018-06-01 10:21:26 +00:00
|
|
|
|
//小于128是英文字符,不需要转换
|
2018-06-01 09:36:01 +00:00
|
|
|
|
*pszDest = (char)iUnicode;
|
|
|
|
|
pszDest++;
|
|
|
|
|
pcwszSource++;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2018-06-01 10:21:26 +00:00
|
|
|
|
//大于128是mbs字符,需要转换
|
2018-06-01 09:36:01 +00:00
|
|
|
|
const char *pcszUnicode = (const char *)pcwszSource;
|
|
|
|
|
|
|
|
|
|
pszDest[0] = (0xE0 | ((pcszUnicode[1] & 0xF0) >> 4 ));
|
|
|
|
|
pszDest[1] = (0x80 | ((pcszUnicode[1] & 0x0F) << 2)) + ((pcszUnicode[0] & 0xC0) >> 6);
|
|
|
|
|
pszDest[2] = (0x80 | (pcszUnicode[0] & 0x3F));
|
|
|
|
|
|
|
|
|
|
pszDest = pszDest + 3;
|
|
|
|
|
pcwszSource++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-06-01 10:21:26 +00:00
|
|
|
|
*pszDest = NULL;//末尾的NULL字符
|
2018-06-01 09:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
string StringCodeConverter::utf8tombs( const char *pcszSource )
|
|
|
|
|
{
|
|
|
|
|
if ( pcszSource == NULL )
|
|
|
|
|
{
|
2018-06-01 10:21:26 +00:00
|
|
|
|
throw string( "参数为NULL" );
|
2018-06-01 09:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
string strCurLocale( setlocale( LC_ALL, NULL ) );
|
2018-06-01 10:21:26 +00:00
|
|
|
|
setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果
|
2018-06-01 09:36:01 +00:00
|
|
|
|
|
|
|
|
|
char *pszDest = new char[ strlen( pcszSource ) ];
|
|
|
|
|
char *pszBuffer = pszDest;
|
|
|
|
|
|
|
|
|
|
while ( *pcszSource != 0 )
|
|
|
|
|
{
|
|
|
|
|
if ( *pcszSource > 0 )
|
|
|
|
|
{
|
2018-06-01 10:21:26 +00:00
|
|
|
|
//是ASCII字符
|
2018-06-01 09:36:01 +00:00
|
|
|
|
*pszDest = *pcszSource;
|
|
|
|
|
|
|
|
|
|
pszDest++;
|
|
|
|
|
pcszSource++;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
wchar_t wUnicode = 0;
|
|
|
|
|
char *pcUnicode = (char *)&wUnicode;
|
|
|
|
|
|
|
|
|
|
pcUnicode[1] = ((pcszSource[0] & 0x0F) << 4) + ((pcszSource[1] >> 2) & 0x0F);
|
|
|
|
|
pcUnicode[0] = ((pcszSource[1] & 0x03) << 6) + (pcszSource[2] & 0x3F);
|
|
|
|
|
|
|
|
|
|
wchar_t wMcb;
|
|
|
|
|
char *pcMbs = (char *)&wMcb;
|
|
|
|
|
int iLen = 0;
|
|
|
|
|
|
|
|
|
|
iLen = wctomb( pcMbs, wUnicode );
|
|
|
|
|
|
|
|
|
|
if ( iLen == 1 )
|
|
|
|
|
{
|
|
|
|
|
*pszDest = *pcMbs;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
pszDest[0] = pcMbs[0];
|
|
|
|
|
pszDest[1] = pcMbs[1];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pszDest += iLen;
|
|
|
|
|
pcszSource += 3;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*pszDest = NULL;
|
|
|
|
|
|
|
|
|
|
setlocale( LC_ALL, strCurLocale.c_str() );
|
2018-06-01 10:21:26 +00:00
|
|
|
|
strCurLocale = pszBuffer; //废物利用
|
2018-06-01 09:36:01 +00:00
|
|
|
|
|
|
|
|
|
return strCurLocale;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
wstring StringCodeConverter::utf8tounicode( const char *pcszSource )
|
|
|
|
|
{
|
|
|
|
|
if ( pcszSource == NULL )
|
|
|
|
|
{
|
2018-06-01 10:21:26 +00:00
|
|
|
|
throw string( "参数为NULL" );
|
2018-06-01 09:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
string strCurLocale = setlocale( LC_ALL, NULL );
|
2018-06-01 10:21:26 +00:00
|
|
|
|
setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果
|
2018-06-01 09:36:01 +00:00
|
|
|
|
|
|
|
|
|
wchar_t *pwszBuffer = new wchar_t[ strlen( pcszSource ) ];
|
|
|
|
|
wchar_t *pwszDest = pwszBuffer;
|
|
|
|
|
|
|
|
|
|
while ( *pcszSource != NULL )
|
|
|
|
|
{
|
|
|
|
|
if ( *pcszSource > 0 )
|
|
|
|
|
{
|
2018-06-01 10:21:26 +00:00
|
|
|
|
//是ASCII字符
|
2018-06-01 09:36:01 +00:00
|
|
|
|
wchar_t wUnicode;
|
|
|
|
|
|
|
|
|
|
mbtowc( &wUnicode, pcszSource, 1 );
|
|
|
|
|
|
|
|
|
|
*pwszDest = wUnicode;
|
|
|
|
|
|
|
|
|
|
pwszDest++;
|
|
|
|
|
pcszSource++;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2018-06-01 10:21:26 +00:00
|
|
|
|
//中文字符,3个字节
|
2018-06-01 09:36:01 +00:00
|
|
|
|
wchar_t wUnicode;
|
|
|
|
|
char *pcUnicode = (char *)&wUnicode;
|
|
|
|
|
|
|
|
|
|
pcUnicode[1] = ((pcszSource[0] & 0x0F) << 4) + ((pcszSource[1] >> 2) & 0x0F);
|
|
|
|
|
pcUnicode[0] = ((pcszSource[1] & 0x03) << 6) + (pcszSource[2] & 0x3F);
|
|
|
|
|
|
|
|
|
|
*pwszDest = wUnicode;
|
|
|
|
|
|
|
|
|
|
pwszDest++;
|
|
|
|
|
pcszSource += 3;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
setlocale( LC_ALL, strCurLocale.c_str() );
|
|
|
|
|
|
2018-06-01 10:21:26 +00:00
|
|
|
|
*pwszDest = NULL;//末尾空字符
|
2018-06-01 09:36:01 +00:00
|
|
|
|
wstring wstrDest( pwszBuffer );
|
|
|
|
|
|
|
|
|
|
return wstrDest;
|
|
|
|
|
}
|