telsale-management/代码/cpp/telsale_aux_kit/source/StringCodeConverter.cpp

366 lines
8.7 KiB
C++
Raw Normal View History

//#include "stdafx.h"
#include "StringCodeConverter.h"
#include <locale.h>
#include <stdlib.h>
//#include <stdio.h>
#include <string>
#include <cstring>
2018-06-01 10:21:26 +00:00
using namespace std;
size_t StringCodeConverter::mbslen( const char *pcszSource )
{
string strCurLocale = setlocale( LC_ALL, NULL );
setlocale( LC_ALL, "chinese-simplified" );
int mbl = 0;
size_t cnt = 0;
for (cnt = 0; *pcszSource; ++cnt)
{
mbl = mblen( pcszSource, MB_CUR_MAX );
pcszSource += mbl;
}
setlocale( LC_ALL, strCurLocale.c_str() );
return cnt;
}
wstring StringCodeConverter::mbs2unicode( const string &cstrSource )
{
string strCurLocale = setlocale( LC_ALL, NULL );
2018-06-01 10:21:26 +00:00
setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果
const char *pcszSource = cstrSource.c_str();
size_t iCount = cstrSource.size() + 1;
wchar_t *pwszBuffer = new wchar_t[ iCount ];
wmemset( pwszBuffer, 0, iCount );
size_t iConvertedCount = mbstowcs( pwszBuffer, pcszSource, iCount );
if ( iConvertedCount == -1 )
{
2018-06-02 09:47:28 +00:00
delete [] pwszBuffer;
2018-06-01 10:21:26 +00:00
throw string( "mbs2unicode参数有非中英文字符" );
}
wstring wstrDest( pwszBuffer );
delete [] pwszBuffer;
setlocale( LC_ALL, strCurLocale.c_str() );
return wstrDest;
}
void StringCodeConverter::mbs2unicode(const string &strSource, wstring &wstrDest)
{
const char *pcszSource = strSource.c_str();
2018-06-01 10:21:26 +00:00
string strCurLocale( setlocale(LC_ALL, NULL) );//先保存当前的locale设置
2018-06-01 10:21:26 +00:00
setlocale( LC_ALL, "chs" );//设置为中文
2018-06-01 10:21:26 +00:00
size_t iCharCount = strSource.size() + 1;//unicode字符串所需要的字符数
wchar_t *pwszBuffer = new wchar_t[ iCharCount ];
wmemset( pwszBuffer, 0, iCharCount );
mbstowcs( pwszBuffer, pcszSource, iCharCount );
2018-06-01 10:21:26 +00:00
setlocale( LC_ALL, strCurLocale.c_str() );//改回原来的locale设置
wstrDest = pwszBuffer;
delete [] pwszBuffer;
}
void StringCodeConverter::mbs2unicode( const char *pcszSource, wchar_t *pwszDest ) throw (string)
{
if ( pcszSource == NULL || pwszDest == NULL )
{
2018-06-01 10:21:26 +00:00
throw string( "参数指针为NULL" );
}
string strCurLocale = setlocale( LC_ALL, NULL );
2018-06-01 10:21:26 +00:00
setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果
size_t iCharCount = mbslen( pcszSource ) + 1;
if ( iCharCount == -1 )
{
2018-06-01 10:21:26 +00:00
throw string( "源字符串的编码有非中英文字符" );
}
wmemset( pwszDest, 0, iCharCount );
size_t iConvertedCount = mbstowcs( pwszDest, pcszSource, iCharCount );
if ( iConvertedCount == -1 )
{
2018-06-01 10:21:26 +00:00
throw string( "源字符串的编码有非中英文字符" );
}
*( pwszDest + iConvertedCount ) = NULL;
setlocale( LC_ALL, strCurLocale.c_str() );
}
void StringCodeConverter::mbs2utf8( const string &cstrSource, char *pszDest )
{
const char *pcszSource = cstrSource.c_str();
string strCurLocale( setlocale( LC_ALL, NULL ) );
2018-06-01 10:21:26 +00:00
setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果
while ( *pcszSource != NULL )
{
if ( *pcszSource > 0 )
{
2018-06-01 10:21:26 +00:00
//英文字符,可以直接复制
*pszDest = *pcszSource;
pcszSource++;
pszDest++;
}
else
{
2018-06-01 10:21:26 +00:00
//非ascii英文字符,先转换成unicode再转换utf8
int iLen = 0;
wchar_t wUnicode = 0;
char *pcUnicode = (char *)&wUnicode;
2018-06-01 10:21:26 +00:00
//转换成unicode返回mb字符的长度
iLen = mbtowc( &wUnicode, pcszSource, MB_CUR_MAX );
pszDest[0] = (0xE0 | ((pcUnicode[1] & 0xF0) >> 4));
pszDest[1] = (0x80 | ((pcUnicode[1] & 0x0F) << 2)) + ((pcUnicode[0] & 0xC0) >> 6);
pszDest[2] = (0x80 | (pcUnicode[0] & 0x3F));
pszDest += 3;
pcszSource += iLen;
}
}
setlocale( LC_ALL, strCurLocale.c_str() );
*pszDest = NULL;
}
string StringCodeConverter::unicode2mbs( const wstring &wstrSource )
{
string strCurLocale = setlocale( LC_ALL, NULL );
2018-06-01 10:21:26 +00:00
setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果
const wchar_t *pcwszSource = wstrSource.c_str();
size_t iCount = wstrSource.size() * 2 + 1;
char *pszBuffer = new char[ iCount ];
memset( pszBuffer, 0, iCount );
size_t iConvertedCount = wcstombs( pszBuffer, pcwszSource, iCount );
if ( iConvertedCount == -1 )
{
2018-06-01 10:21:26 +00:00
throw string( "unicode2mbs源字符串的编码有非中英文字符" );
}
setlocale( LC_ALL, strCurLocale.c_str() );
string strDest( pszBuffer );
delete [] pszBuffer;
return strDest;
}
void StringCodeConverter::unicode2mbs( const wstring &wstrSource, string &strDest )
{
string strCurLocale = setlocale( LC_ALL, NULL );
setlocale( LC_ALL, "chs" );
size_t iCount = wstrSource.size() + 1;
char *pszBuffer = new char[ iCount ];
const wchar_t *pcwszSource = wstrSource.c_str();
memset( pszBuffer, 0, iCount );
size_t iConvertedCount = wcstombs( pszBuffer, pcwszSource , iCount );
if ( iConvertedCount == -1 )
{
delete [] pszBuffer;
2018-06-01 10:21:26 +00:00
throw string( "unicode2mbs转换失败字符串中有非中英文字符" );
}
strDest = pszBuffer;
delete [] pszBuffer;
setlocale( LC_ALL, strCurLocale.c_str() );
}
void StringCodeConverter::unicode2mbs( const wchar_t *pcwszSource, char *pszDest )
{
if ( pcwszSource == NULL || pszDest == NULL )
{
2018-06-01 10:21:26 +00:00
throw string( "unicode2mbs函数参数值为NULL" );
}
string strCurLocale = setlocale( LC_ALL, NULL );
2018-06-01 10:21:26 +00:00
setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果
size_t iCount = wcslen( pcwszSource )*2 + 1;
memset( pszDest, 0, iCount );
size_t iConvertedCount = wcstombs( pszDest, pcwszSource, iCount );
if ( iConvertedCount == -1 )
{
2018-06-01 10:21:26 +00:00
throw string( "unicode2mbs转换失败字符串中有非中英文字符" );
}
setlocale( LC_ALL, strCurLocale.c_str() );
}
void StringCodeConverter::unicode2utf8( const wstring &cwstrSource, char *pszDest )
{
if ( pszDest == NULL )
{
2018-06-01 10:21:26 +00:00
throw string( "参数为NULL" );
}
const wchar_t *pcwszSource = cwstrSource.c_str();
while ( *pcwszSource != NULL )
{
unsigned short int iUnicode = *pcwszSource;
if ( iUnicode < 128 )
{
2018-06-01 10:21:26 +00:00
//小于128是英文字符不需要转换
*pszDest = (char)iUnicode;
pszDest++;
pcwszSource++;
}
else
{
2018-06-01 10:21:26 +00:00
//大于128是mbs字符需要转换
const char *pcszUnicode = (const char *)pcwszSource;
pszDest[0] = (0xE0 | ((pcszUnicode[1] & 0xF0) >> 4 ));
pszDest[1] = (0x80 | ((pcszUnicode[1] & 0x0F) << 2)) + ((pcszUnicode[0] & 0xC0) >> 6);
pszDest[2] = (0x80 | (pcszUnicode[0] & 0x3F));
pszDest = pszDest + 3;
pcwszSource++;
}
}
2018-06-01 10:21:26 +00:00
*pszDest = NULL;//末尾的NULL字符
}
string StringCodeConverter::utf8tombs( const char *pcszSource )
{
if ( pcszSource == NULL )
{
2018-06-01 10:21:26 +00:00
throw string( "参数为NULL" );
}
string strCurLocale( setlocale( LC_ALL, NULL ) );
2018-06-01 10:21:26 +00:00
setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果
char *pszDest = new char[ strlen( pcszSource ) ];
char *pszBuffer = pszDest;
while ( *pcszSource != 0 )
{
if ( *pcszSource > 0 )
{
2018-06-01 10:21:26 +00:00
//是ASCII字符
*pszDest = *pcszSource;
pszDest++;
pcszSource++;
}
else
{
wchar_t wUnicode = 0;
char *pcUnicode = (char *)&wUnicode;
pcUnicode[1] = ((pcszSource[0] & 0x0F) << 4) + ((pcszSource[1] >> 2) & 0x0F);
pcUnicode[0] = ((pcszSource[1] & 0x03) << 6) + (pcszSource[2] & 0x3F);
wchar_t wMcb;
char *pcMbs = (char *)&wMcb;
int iLen = 0;
iLen = wctomb( pcMbs, wUnicode );
if ( iLen == 1 )
{
*pszDest = *pcMbs;
}
else
{
pszDest[0] = pcMbs[0];
pszDest[1] = pcMbs[1];
}
pszDest += iLen;
pcszSource += 3;
}
}
*pszDest = NULL;
setlocale( LC_ALL, strCurLocale.c_str() );
2018-06-01 10:21:26 +00:00
strCurLocale = pszBuffer; //废物利用
return strCurLocale;
}
wstring StringCodeConverter::utf8tounicode( const char *pcszSource )
{
if ( pcszSource == NULL )
{
2018-06-01 10:21:26 +00:00
throw string( "参数为NULL" );
}
string strCurLocale = setlocale( LC_ALL, NULL );
2018-06-01 10:21:26 +00:00
setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果
wchar_t *pwszBuffer = new wchar_t[ strlen( pcszSource ) ];
wchar_t *pwszDest = pwszBuffer;
while ( *pcszSource != NULL )
{
if ( *pcszSource > 0 )
{
2018-06-01 10:21:26 +00:00
//是ASCII字符
wchar_t wUnicode;
mbtowc( &wUnicode, pcszSource, 1 );
*pwszDest = wUnicode;
pwszDest++;
pcszSource++;
}
else
{
2018-06-01 10:21:26 +00:00
//中文字符3个字节
wchar_t wUnicode;
char *pcUnicode = (char *)&wUnicode;
pcUnicode[1] = ((pcszSource[0] & 0x0F) << 4) + ((pcszSource[1] >> 2) & 0x0F);
pcUnicode[0] = ((pcszSource[1] & 0x03) << 6) + (pcszSource[2] & 0x3F);
*pwszDest = wUnicode;
pwszDest++;
pcszSource += 3;
}
}
setlocale( LC_ALL, strCurLocale.c_str() );
2018-06-01 10:21:26 +00:00
*pwszDest = NULL;//末尾空字符
wstring wstrDest( pwszBuffer );
return wstrDest;
}