日期：2011-03-22 13:56:00 来源：本站整理
支持UNICODE/UTF8/ANSI之间的转换的类[VC/C++编程]

赞助商链接

分享到： QQ空间新浪微博腾讯微博人人网
　　本文“支持UNICODE/UTF8/ANSI之间的转换的类[VC/C++编程]”是由七道奇为您精心收集，来源于网络转载，文章版权归文章作者所有，本站不对其观点以及内容做任何评价，请读者自行判断，以下是其具体内容：
ZUtf8_16.h文件:
//---------------------------------------------------------------------------
#ifndefZUtf8_16H
#defineZUtf8_16H
//---------------------------------------------------------------------------
/*
　 支持UNICODE,UNICODEBE,UTF8,ASCII之间的转换的类.
　 日期:2007-06-15
　 版本:1.0
　 作者:小笨象
　 阐明:你可以随便利用本文件,不过假如你改正了此中的BUG,
　　　　 大概改正得更好了,请你也告诉我一下,
　　　　 让我也能享用一下开源的好处,谢谢.
*/
enumEncodingType
{
　　 uni8Bit=0,//默许ASCII
　　 uni16BE=1,
　　 uni16LE=2,//Windows默许的编码,也就是UNICODE
　　 uniUTF8=3,
　　 uniUTF8NOBOM=4//没有UTF8标识头的UTF8文件
};
classZUtf8_16
{
private:
　　 EncodingTypem_unicodeMode;//编码方法
　　 intisUTF8_16(constchar*s,unsignedintlen,unsigned*cchUnused);
　　 EncodingType__fastcallDetermineEncoding(unsignedchar*data,size_tiLen);
public:
　　 __fastcallZUtf8_16();
　　 __fastcall~ZUtf8_16();
　　 EncodingType__fastcallGetEncodingType(void){returnm_unicodeMode;};
　　 bool__fastcallLoadFromStream(TMemoryStream*pStream,AnsiString&DestText);
　　 bool__fastcallStreamSaveToFile(TMemoryStream*pStream,
　　　　　　 AnsiStringFileNameA,EncodingTypeunicodeMode);
};
#endif
ZUtf8_16.cpp文件:
//---------------------------------------------------------------------------
#include<vcl.h>
#pragmahdrstop
#include<stdio.h>
#include"ZUtf8_16.h"
#pragmapackage(smart_init)
__fastcallZUtf8_16::ZUtf8_16()
{
　　 m_unicodeMode=uni8Bit;
}
//---------------------------------------------------------------------------
__fastcallZUtf8_16::~ZUtf8_16()
{
}
//---------------------------------------------------------------------------
intZUtf8_16::isUTF8_16(constchar*s,unsignedintlen,unsigned*cchUnused)
{
　　 intrv=1;
　　 intASCII7only=1;
　　 constunsignedchar*sx=(unsignedchar*)s,*endx=sx+len;
　　 while(sx<endx)
　　 {
　　　　 if(!*sx)
　　　　 {
　　　　　 //Fordetection,we'llsaythatNULmeansnotUTF8
　　　　　　 ASCII7only=0;
　　　　　　 rv=0;
　　　　　　 break;
　　　　 }
　　　　 elseif(*sx<0x80)
　　　　 {
　　　　　　 //0nnnnnnnIfthebyte'sfirsthexcodebeginswith0-7,itisanASCIIcharacter.
　　　　　　 sx++;
　　　　 }
　　　　 elseif(*sx<(0x80+0x40))
　　　　 {
　　　　　 //10nnnnnn8throughBcannotbefirsthexcodes
　　　　　　 ASCII7only=0;
　　　　　　 rv=0;
　　　　　　 break;
　　　　 }
　　　　 elseif(*sx<(0x80+0x40+0x20))
　　　　 {
　　　　　　 //110xxxvv10nnnnnn　IfitbeginswithCorD,itisan11bitcharacter
　　　　　　 ASCII7only=0;
　　　　　　 if(sx>=endx-1)break;
　　　　　　 if(!(*sx&0x1F)||(sx[1]&(0x80+0x40))!=0x80){rv=0;break;}
　　　　　　 sx+=2;
　　　　 }
　　　　 elseif(*sx<(0x80+0x40+0x20+0x10))
　　　　 {
　　　　　　 //1110qqqq10xxxxvv10nnnnnnIfitbeginswithE,itis16bit
　　　　　　 ASCII7only=0;
　　　　　　 if(sx>=endx-2)break;
　　　　　　 if(!(*sx&0xF)||(sx[1]&(0x80+0x40))!=0x80||(sx[2]&(0x80+0x40))!=0x80)
　　　　　　 {rv=0;break;}
　　　　　　 sx+=3;
　　　　 }
　　　　 else
　　　　 {
　　　　　　 /*morethan16bitsarenotallowedhere*/
　　　　　　 ASCII7only=0;
　　　　　　 rv=0;
　　　　　　 break;
　　　　 }
　　 }
　　 if(cchUnused)*cchUnused=endx-sx;
　　 return(ASCII7only?0:rv);
}
//---------------------------------------------------------------------------
EncodingType__fastcallZUtf8_16::DetermineEncoding(unsignedchar*data,size_tiLen)
{
　　 //TODO:判断当前文件的编码范例.
　　 m_unicodeMode=uni8Bit;//默许ASCII
　　 if(data[0]==0xFE&&data[1]==0xFF)//Bigendian==UNICODE-BIG　UTF16
　　 {
　　　　 m_unicodeMode=uni16BE;
　　 }
　　 elseif(data[0]==0xFF&&data[1]==0xFE)//Littleendian==UNICODE　UTF16
　　 {
　　　　 m_unicodeMode=uni16LE;//Unicode
　　 }
　　 elseif(data[0]==0xEF&&data[1]==0xBB&&data[2]==0xBF)//UTF8
　　 {
　　　　 m_unicodeMode=uniUTF8;
　　 }
　　 elseif(isUTF8_16(data,iLen,NULL)==1)
　　 {
　　　　 m_unicodeMode=uniUTF8NOBOM;
　　 }
　　 return　m_unicodeMode;
}
//---------------------------------------------------------------------------
bool__fastcallZUtf8_16::LoadFromStream(TMemoryStream*pSourceStream,AnsiString&DestText)
{
　　 //TODO:从流中读取数据
　　 //先判断字符编码
　　 pSourceStream->Position=0;
　　 if(pSourceStream->Size==0)returntrue;
// 本文转自 C++Builder 研究 - http://www.ccrun.com/article.asp?i=1023&d=cbj0f7
　　 m_unicodeMode=DetermineEncoding((char*)pSourceStream->Memory,pSourceStream->Size);
　　 pSourceStream->Position=0;
　　 //再根椐呼应的编码做呼应的事.
　　 switch(m_unicodeMode)
　　 {
　　　　 caseuni8Bit:
　　　　 {
　　　　　　 //什么都不做.以保证翻开一些大的文件时速度快一些.
　　　　　　 //所以调用者需求自己在调的之后判断字符编码,
　　　　　　 //假如是uni8Bit,则需求自己处理.
//　　　　　　intiLength=pSourceStream->Size;
//　　　　　　char　*szUnicode=newchar[iLength+1];
//　　　　　　memset(szUnicode,0x00,iLength+1);
//　　　　　　pSourceStream->Read(szUnicode,iLength);
//　　　　　　DestText=AnsiString(szUnicode);
//　　　　　　delete[]szUnicode;
//　　　　　　szUnicode=NULL;
　　　　　　 break;
　　　　 }
　　　　 caseuni16BE:
　　　　 {
　　　　　 //UCBigendian
　　　　　　 pSourceStream->Position=2;
　　　　　　 intiLength=pSourceStream->Size-2;
　　　　　　 chartemp;
　　　　　　 char*szUnicode=newchar[iLength+2];
　　　　　　 memset(szUnicode,0x00,iLength+2);
　　　　　　 pSourceStream->Read(szUnicode,iLength);
　　　　　 //只要把每两个字节的位置交换一下,就是UNICODELE了.So...
　　　　　　 for(inti=0;i<iLength;i+=2)
　　　　　　 {
　　　　　　　　 temp=szUnicode[i];
　　　　　　　　 szUnicode[i]=szUnicode[i+1];
　　　　　　　　 szUnicode[i+1]=temp;
　　　　　　　　 Application->ProcessMessages();
　　　　　　 }
　　　　　　 DestText=WideCharLenToString((wchar_t*)(szUnicode),iLength/2);
　　　　　　 delete[]szUnicode;
　　　　　　 szUnicode=NULL;
　　　　　　 break;
　　　　 }
　　　　 caseuni16LE:
　　　　 {
　　　　　 //UNICODE　Littleendian
　　　　　　 pSourceStream->Position=2;
　　　　　　 intiLength=pSourceStream->Size-2;
　　　　　　 wchar_t　*szUnicode=newwchar_t[iLength+2];
　　　　　　 memset(szUnicode,0x00,iLength+2);
　　　　　　 pSourceStream->Read(szUnicode,iLength);
　　　　　　 WideStringWideStr=WideString(szUnicode);
　　　　　　 DestText=WideStr;
　　　　　　 delete[]szUnicode;
　　　　　　 szUnicode=NULL;
　　　　　　 break;
　　　　 }
　　　　 caseuniUTF8:
　　　　 {
　　　　　 //UTF8
　　　　　　 pSourceStream->Position=3;
　　　　　　 intiLength=pSourceStream->Size-3;
　　　　　　 char*szUTF8=newchar[iLength+3];
　　　　　　 memset(szUTF8,0x00,iLength+3);
　　　　　　 pSourceStream->Read(szUTF8,iLength);
　　　　　　 AnsiStringUtf8Str=Utf8ToAnsi(szUTF8);
　　　　　　 if(Utf8Str=="")
　　　　　　　　 DestText=AnsiString((char*)pSourceStream->Memory);
　　　　　　 else
　　　　　　　　 DestText=Utf8Str;
　　　　　　 delete[]szUTF8;
　　　　　　 szUTF8=NULL;
　　　　　　 break;
　　　　 }
　　　　 caseuniUTF8NOBOM:
　　　　 {
　　　　　 //UTF8没有头标识的情形.
　　　　　　 intiLength=pSourceStream->Size;
　　　　　　 char*szUTF8=newchar[iLength+3];
　　　　　　 memset(szUTF8,0x00,iLength+3);
　　　　　　 pSourceStream->Read(szUTF8,iLength);
　　　　　　 AnsiStringUtf8Str=Utf8ToAnsi(szUTF8);
　　　　　　 if(Utf8Str=="")
　　　　　　　　 DestText=AnsiString((char*)pSourceStream->Memory);
　　　　　　 else
　　　　　　　　 DestText=Utf8Str;
　　　　　　 delete[]szUTF8;
　　　　　　 szUTF8=NULL;
　　　　　　 break;
　　　　 }
　　 }
　　 returntrue;
}
//---------------------------------------------------------------------------
bool__fastcallZUtf8_16::StreamSaveToFile(TMemoryStream*pStream,
　　　　 AnsiStringFileNameA,EncodingTypeunicodeMode)
{
　　 //TODO:把流内容按指定的格局保存到文件中.
　　 try
　　 {
　　　　 pStream->Position=0;
　　　　 switch(unicodeMode)
　　　　 {
　　　　　　 caseuni8Bit:
　　　　　　 {
　　　　　　　 //什么都不做.直接保存.
　　　　　　　　 pStream->SaveToFile(FileNameA);
　　　　　　　　 break;
　　　　　　 }
　　　　　　 caseuni16BE:
　　　　　　 {
　　　　　　　　 //UCBigendian
　　　　　　　　 intiLength=pStream->Size;
　　　　　　　　 chartemp;
　　　　　　　　 char*pSource=newchar[iLength+2];
　　　　　　　　 memset(pSource,0x00,iLength+2);
　　　　　　　　 pStream->Read(pSource,iLength);
　　　　　　　　 //先看看转成的宽字节数返到nLen
　　　　　　　　 intnLen=MultiByteToWideChar(CP_ACP,0,pSource,iLength,NULL,NULL);
　　　　　　　　 LPWSTRlpwsz=newWCHAR[nLen];
　　　　　　　　 MultiByteToWideChar(CP_ACP,0,pSource,-1,lpwsz,nLen);
　　　　　　　　 intiNewLen=lstrlenW(lpwsz)*sizeof(WCHAR);
　　　　　　　　 char*pDest=newchar[iNewLen];
　　　　　　　　 memcpy(pDest,lpwsz,iNewLen);
　　　　　　　 
　　　　　　　　 //只要把每两个字节的位置交换一下,就是UNICODEBig了.So...
　　　　　　　　 for(inti=0;i<iNewLen;i+=2)
　　　　　　　　 {
　　　　　　　　　　 temp=pDest[i];
　　　　　　　　　　 pDest[i]=pDest[i+1];
　　　　　　　　　　 pDest[i+1]=temp;
　　　　　　　　　　 Application->ProcessMessages();
　　　　　　　　 }
　　　　　　　　 FILE*f=fopen(FileNameA.c_str(),"wb");
　　　　　　　　 //写UnicodeBig头
　　　　　　　　 fputc(0xFE,f);
　　　　　　　　 fputc(0xFF,f);
　　　　　　　　 fwrite(pDest,1,iNewLen,f);
　　　　　　　　 fclose(f);
　　　　　　　　 delete[]pDest;
　　　　　　　　 pDest=NULL;
　　　　　　　　 delete[]lpwsz;
　　　　　　　　 lpwsz=NULL;
　　　　　　　　 delete[]pSource;
　　　　　　　　 pSource=NULL;
　　　　　　　　 break;
　　　　　　 }
　　　　　　 caseuni16LE:
　　　　　　 {
　　　　　　　　 //UNICODE　Littleendian
　　　　　　　　 intiLength=pStream->Size;
　　　　　　　　 char*pSource=newchar[iLength+2];
　　　　　　　　 memset(pSource,0x00,iLength+2);
　　　　　　　　 pStream->Read(pSource,iLength);
　　　　　　　　 //先看看转成的宽字节数返到nLen
　　　　　　　　 intnLen=MultiByteToWideChar(CP_ACP,0,pSource,iLength,NULL,NULL);
　　　　　　　　 LPWSTRlpwsz=newWCHAR[nLen];
　　　　　　　　 MultiByteToWideChar(CP_ACP,0,pSource,-1,lpwsz,nLen);
　　　　　　　　 FILE*f=fopen(FileNameA.c_str(),"wb");
　　　　　　　　 //写Unicode头
　　　　　　　　 fputc(0xFF,f);
　　　　　　　　 fputc(0xFE,f);
　　　　　　　　 //一个宽字节占两个字节
　　　　　　　　 fwrite(lpwsz,1,lstrlenW(lpwsz)*sizeof(WCHAR),f);
　　　　　　　　 fclose(f);
　　　　　　　　 delete[]lpwsz;
　　　　　　　　 lpwsz=NULL;
　　　　　　　　 delete[]pSource;
　　　　　　　　 pSource=NULL;
　　　　　　　　 break;
　　　　　　 }
　　　　　　 caseuniUTF8:
　　　　　　 {
　　　　　　　　 //UTF8
　　　　　　　　 intiLen=pStream->Size;
　　　　　　　　 char*pSource=newchar[iLen+3];
　　　　　　　　 memset(pSource,0x00,iLen+3);
　　　　　　　　 pStream->Read(pSource,iLen);
　　　　　　　　 AnsiStringUtf8Str=AnsiToUtf8(pSource);
　　　　　　　　 delete[]pSource;
　　　　　　　　 pSource=NULL;
　　　　　　　　 FILE*f=fopen(FileNameA.c_str(),"wb");
　　　　　　　　 //写UTF8头
　　　　　　　　 fputc(0xEF,f);
　　　　　　　　 fputc(0xBB,f);
　　　　　　　　 fputc(0xBF,f);
　　　　　　　　 //一个宽字节占两个字节
　　　　　　　　 fwrite(Utf8Str.c_str(),1,Utf8Str.Length(),f);
　　　　　　　　 fclose(f);
　　　　　　　　 break;
　　　　　　 }
　　　　　　 caseuniUTF8NOBOM:
　　　　　　 {
　　　　　　　　 //UTF8没有标识头的情形.
　　　　　　　　 intiLen=pStream->Size;
　　　　　　　　 char*pSource=newchar[iLen+3];
　　　　　　　　 memset(pSource,0x00,iLen+3);
　　　　　　　　 pStream->Read(pSource,iLen);
　　　　　　　　 AnsiStringUtf8Str=AnsiToUtf8(pSource);
　　　　　　　　 delete[]pSource;
　　　　　　　　 pSource=NULL;
　　　　　　　　 FILE*f=fopen(FileNameA.c_str(),"wb");
　　　　　　　　 //一个宽字节占两个字节
　　　　　　　　 fwrite(Utf8Str.c_str(),1,Utf8Str.Length(),f);
　　　　　　　　 fclose(f);
　　　　　　　　 break;
　　　　　　 }
　　　　 }//endofswitch
　　 }
　　 catch(...)
　　 {
　　　　 returnfalse;
　　 }
　　 returntrue;
}
//---------------------------------------------------------------------------
//试用举例:
#include"ZUtf8_16.h"
bool__fastcallLoadFile(AnsiStringstrFileName,TStrings*pList)
{
　　 EncodingTypeunicodeMode;
　　 //TODO:装入文件.
　　 //假如装入成功,则返回true
　　 AnsiStringErrMsg;
　　 boolbReturn=true;
　　 ErrMsg.sprintf("装入%s文档时出错, 该文档不存在"
　　　　　　　　　 "大概被别的程序以独占方法翻开!",strFileName);
　　 if(!FileExists(strFileName))
　　 {
　　　　 MessageBox(0,　ErrMsg.c_str(),"错误",MB_OK|MB_ICONERROR);
　　　　 returnfalse;
　　 }
　　 AnsiStringReturnTxt;
　　 ZUtf8_16zutf8_16;
　　 TMemoryStream*ReadStream=newTMemoryStream();
　　 ReadStream->LoadFromFile(strFileName);
　　 bReturn=zutf8_16.LoadFromStream(ReadStream,ReturnTxt);
　　 if(bReturn)
　　 {
　　　　 unicodeMode=zutf8_16.GetEncodingType();
　　　　 if(unicodeMode==uni8Bit)
　　　　　 pList->LoadFromStream(ReadStream);
　　　　 else
　　　　　　 pList->Text=ReturnTxt;
　　 }
　　 else
　　 {
　　　　 MessageBox(0,　ErrMsg.c_str(),"错误",MB_OK|MB_ICONERROR);
　　 }
　　 deleteReadStream;
　　 ReadStream=NULL;
　　 returnbReturn;
}
　　以上是“支持UNICODE/UTF8/ANSI之间的转换的类[VC/C++编程]”的内容，如果你对以上该文章内容感兴趣，你可以看看七道奇为您推荐以下文章：
支持UNICODE/UTF8/ANSI之间的转换的类
<b>mysql不支持union   select的盲注办法</b>
本文地址：
与您的QQ/BBS好友分享!
[] [返回上一页] [打印]
好的评价　如果您觉得此文章好，就请您
0%(0)
差的评价　如果您觉得此文章差，就请您
0%(0)
文章评论评论内容只代表网友观点，与本站立场无关！

评论摘要(共 0 条，得分 0 分，平均 0 分) 查看完整评论
栏目导航

赞助商链接

免责条款 - 广告合作 - 下载声明 - 欢迎投稿 - 友情连接 -