telsale-management/代码/telsale_aux_kit/source/StringCodeConverter.cpp

366 lines
8.3 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//#include "stdafx.h"
#include "StringCodeConverter.h"
#include <locale.h>
#include <stdlib.h>
//#include <stdio.h>
#include <string>
#include <cstring>
using namespace std;
size_t StringCodeConverter::mbslen( const char *pcszSource )
{
string strCurLocale = setlocale( LC_ALL, NULL );
setlocale( LC_ALL, "chinese-simplified" );
int mbl = 0;
size_t cnt = 0;
for (cnt = 0; *pcszSource; ++cnt)
{
mbl = mblen( pcszSource, MB_CUR_MAX );
pcszSource += mbl;
}
setlocale( LC_ALL, strCurLocale.c_str() );
return cnt;
}
wstring StringCodeConverter::mbs2unicode( const string &cstrSource )
{
string strCurLocale = setlocale( LC_ALL, NULL );
setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果
const char *pcszSource = cstrSource.c_str();
size_t iCount = cstrSource.size() + 1;
wchar_t *pwszBuffer = new wchar_t[ iCount ];
wmemset( pwszBuffer, 0, iCount );
size_t iConvertedCount = mbstowcs( pwszBuffer, pcszSource, iCount );
if ( iConvertedCount == -1 )
{
delete [] pwszBuffer;
throw string( "mbs2unicode参数有非中英文字符" );
}
wstring wstrDest( pwszBuffer );
delete [] pwszBuffer;
setlocale( LC_ALL, strCurLocale.c_str() );
return wstrDest;
}
void StringCodeConverter::mbs2unicode(const string &strSource, wstring &wstrDest)
{
const char *pcszSource = strSource.c_str();
string strCurLocale( setlocale(LC_ALL, NULL) );//先保存当前的locale设置
setlocale( LC_ALL, "chs" );//设置为中文
size_t iCharCount = strSource.size() + 1;//unicode字符串所需要的字符数
wchar_t *pwszBuffer = new wchar_t[ iCharCount ];
wmemset( pwszBuffer, 0, iCharCount );
mbstowcs( pwszBuffer, pcszSource, iCharCount );
setlocale( LC_ALL, strCurLocale.c_str() );//改回原来的locale设置
wstrDest = pwszBuffer;
delete [] pwszBuffer;
}
void StringCodeConverter::mbs2unicode( const char *pcszSource, wchar_t *pwszDest ) throw (string)
{
if ( pcszSource == NULL || pwszDest == NULL )
{
throw string( "参数指针为NULL" );
}
string strCurLocale = setlocale( LC_ALL, NULL );
setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果
size_t iCharCount = mbslen( pcszSource ) + 1;
if ( iCharCount == -1 )
{
throw string( "源字符串的编码有非中英文字符" );
}
wmemset( pwszDest, 0, iCharCount );
size_t iConvertedCount = mbstowcs( pwszDest, pcszSource, iCharCount );
if ( iConvertedCount == -1 )
{
throw string( "源字符串的编码有非中英文字符" );
}
*( pwszDest + iConvertedCount ) = NULL;
setlocale( LC_ALL, strCurLocale.c_str() );
}
void StringCodeConverter::mbs2utf8( const string &cstrSource, char *pszDest )
{
const char *pcszSource = cstrSource.c_str();
string strCurLocale( setlocale( LC_ALL, NULL ) );
setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果
while ( *pcszSource != NULL )
{
if ( *pcszSource > 0 )
{
//英文字符,可以直接复制
*pszDest = *pcszSource;
pcszSource++;
pszDest++;
}
else
{
//非ascii英文字符,先转换成unicode再转换utf8
int iLen = 0;
wchar_t wUnicode = 0;
char *pcUnicode = (char *)&wUnicode;
//转换成unicode返回mb字符的长度
iLen = mbtowc( &wUnicode, pcszSource, MB_CUR_MAX );
pszDest[0] = (0xE0 | ((pcUnicode[1] & 0xF0) >> 4));
pszDest[1] = (0x80 | ((pcUnicode[1] & 0x0F) << 2)) + ((pcUnicode[0] & 0xC0) >> 6);
pszDest[2] = (0x80 | (pcUnicode[0] & 0x3F));
pszDest += 3;
pcszSource += iLen;
}
}
setlocale( LC_ALL, strCurLocale.c_str() );
*pszDest = NULL;
}
string StringCodeConverter::unicode2mbs( const wstring &wstrSource )
{
string strCurLocale = setlocale( LC_ALL, NULL );
setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果
const wchar_t *pcwszSource = wstrSource.c_str();
size_t iCount = wstrSource.size() * 2 + 1;
char *pszBuffer = new char[ iCount ];
memset( pszBuffer, 0, iCount );
size_t iConvertedCount = wcstombs( pszBuffer, pcwszSource, iCount );
if ( iConvertedCount == -1 )
{
throw string( "unicode2mbs源字符串的编码有非中英文字符" );
}
setlocale( LC_ALL, strCurLocale.c_str() );
string strDest( pszBuffer );
delete [] pszBuffer;
return strDest;
}
void StringCodeConverter::unicode2mbs( const wstring &wstrSource, string &strDest )
{
string strCurLocale = setlocale( LC_ALL, NULL );
setlocale( LC_ALL, "chs" );
size_t iCount = wstrSource.size() + 1;
char *pszBuffer = new char[ iCount ];
const wchar_t *pcwszSource = wstrSource.c_str();
memset( pszBuffer, 0, iCount );
size_t iConvertedCount = wcstombs( pszBuffer, pcwszSource , iCount );
if ( iConvertedCount == -1 )
{
delete [] pszBuffer;
throw string( "unicode2mbs转换失败字符串中有非中英文字符" );
}
strDest = pszBuffer;
delete [] pszBuffer;
setlocale( LC_ALL, strCurLocale.c_str() );
}
void StringCodeConverter::unicode2mbs( const wchar_t *pcwszSource, char *pszDest )
{
if ( pcwszSource == NULL || pszDest == NULL )
{
throw string( "unicode2mbs函数参数值为NULL" );
}
string strCurLocale = setlocale( LC_ALL, NULL );
setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果
size_t iCount = wcslen( pcwszSource )*2 + 1;
memset( pszDest, 0, iCount );
size_t iConvertedCount = wcstombs( pszDest, pcwszSource, iCount );
if ( iConvertedCount == -1 )
{
throw string( "unicode2mbs转换失败字符串中有非中英文字符" );
}
setlocale( LC_ALL, strCurLocale.c_str() );
}
void StringCodeConverter::unicode2utf8( const wstring &cwstrSource, char *pszDest )
{
if ( pszDest == NULL )
{
throw string( "参数为NULL" );
}
const wchar_t *pcwszSource = cwstrSource.c_str();
while ( *pcwszSource != NULL )
{
unsigned short int iUnicode = *pcwszSource;
if ( iUnicode < 128 )
{
//小于128是英文字符不需要转换
*pszDest = (char)iUnicode;
pszDest++;
pcwszSource++;
}
else
{
//大于128是mbs字符需要转换
const char *pcszUnicode = (const char *)pcwszSource;
pszDest[0] = (0xE0 | ((pcszUnicode[1] & 0xF0) >> 4 ));
pszDest[1] = (0x80 | ((pcszUnicode[1] & 0x0F) << 2)) + ((pcszUnicode[0] & 0xC0) >> 6);
pszDest[2] = (0x80 | (pcszUnicode[0] & 0x3F));
pszDest = pszDest + 3;
pcwszSource++;
}
}
*pszDest = NULL;//末尾的NULL字符
}
string StringCodeConverter::utf8tombs( const char *pcszSource )
{
if ( pcszSource == NULL )
{
throw string( "参数为NULL" );
}
string strCurLocale( setlocale( LC_ALL, NULL ) );
setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果
char *pszDest = new char[ strlen( pcszSource ) ];
char *pszBuffer = pszDest;
while ( *pcszSource != 0 )
{
if ( *pcszSource > 0 )
{
//是ASCII字符
*pszDest = *pcszSource;
pszDest++;
pcszSource++;
}
else
{
wchar_t wUnicode = 0;
char *pcUnicode = (char *)&wUnicode;
pcUnicode[1] = ((pcszSource[0] & 0x0F) << 4) + ((pcszSource[1] >> 2) & 0x0F);
pcUnicode[0] = ((pcszSource[1] & 0x03) << 6) + (pcszSource[2] & 0x3F);
wchar_t wMcb;
char *pcMbs = (char *)&wMcb;
int iLen = 0;
iLen = wctomb( pcMbs, wUnicode );
if ( iLen == 1 )
{
*pszDest = *pcMbs;
}
else
{
pszDest[0] = pcMbs[0];
pszDest[1] = pcMbs[1];
}
pszDest += iLen;
pcszSource += 3;
}
}
*pszDest = NULL;
setlocale( LC_ALL, strCurLocale.c_str() );
strCurLocale = pszBuffer; //废物利用
return strCurLocale;
}
wstring StringCodeConverter::utf8tounicode( const char *pcszSource )
{
if ( pcszSource == NULL )
{
throw string( "参数为NULL" );
}
string strCurLocale = setlocale( LC_ALL, NULL );
setlocale( LC_ALL, "chinese-simplified" ); //改成中文环境,如果接受的参数不是中文字符串,得注意结果
wchar_t *pwszBuffer = new wchar_t[ strlen( pcszSource ) ];
wchar_t *pwszDest = pwszBuffer;
while ( *pcszSource != NULL )
{
if ( *pcszSource > 0 )
{
//是ASCII字符
wchar_t wUnicode;
mbtowc( &wUnicode, pcszSource, 1 );
*pwszDest = wUnicode;
pwszDest++;
pcszSource++;
}
else
{
//中文字符3个字节
wchar_t wUnicode;
char *pcUnicode = (char *)&wUnicode;
pcUnicode[1] = ((pcszSource[0] & 0x0F) << 4) + ((pcszSource[1] >> 2) & 0x0F);
pcUnicode[0] = ((pcszSource[1] & 0x03) << 6) + (pcszSource[2] & 0x3F);
*pwszDest = wUnicode;
pwszDest++;
pcszSource += 3;
}
}
setlocale( LC_ALL, strCurLocale.c_str() );
*pwszDest = NULL;//末尾空字符
wstring wstrDest( pwszBuffer );
return wstrDest;
}