当前位置:七道奇文章资讯编程技术VC/C++编程
日期:2011-03-22 13:56:00  来源:本站整理

支持UNICODE/UTF8/ANSI之间的转换的类[VC/C++编程]

赞助商链接



  本文“支持UNICODE/UTF8/ANSI之间的转换的类[VC/C++编程]”是由七道奇为您精心收集,来源于网络转载,文章版权归文章作者所有,本站不对其观点以及内容做任何评价,请读者自行判断,以下是其具体内容:
ZUtf8_16.h文件:
//---------------------------------------------------------------------------
#ifndefZUtf8_16H
#defineZUtf8_16H
//---------------------------------------------------------------------------
/*
  支持UNICODE,UNICODEBE,UTF8,ASCII之间的转换的类.
  日期:2007-06-15
  版本:1.0
  作者:小笨象
  阐明:你可以随便利用本文件,不过假如你改正了此中的BUG,
     大概改正得更好了,请你也告诉我一下,
     让我也能享用一下开源的好处,谢谢.
*/
enumEncodingType
{
   uni8Bit=0,//默许ASCII
   uni16BE=1,
   uni16LE=2,//Windows默许的编码,也就是UNICODE
   uniUTF8=3,
   uniUTF8NOBOM=4//没有UTF8标识头的UTF8文件
};
classZUtf8_16
{
private:
   EncodingTypem_unicodeMode;//编码方法
   intisUTF8_16(constchar*s,unsignedintlen,unsigned*cchUnused);
   EncodingType__fastcallDetermineEncoding(unsignedchar*data,size_tiLen);
public:
   __fastcallZUtf8_16();
   __fastcall~ZUtf8_16();
   EncodingType__fastcallGetEncodingType(void){returnm_unicodeMode;};
   bool__fastcallLoadFromStream(TMemoryStream*pStream,AnsiString&DestText);
   bool__fastcallStreamSaveToFile(TMemoryStream*pStream,
       AnsiStringFileNameA,EncodingTypeunicodeMode);
};
#endif
ZUtf8_16.cpp文件:
//---------------------------------------------------------------------------
#include<vcl.h>
#pragmahdrstop
#include<stdio.h>
#include"ZUtf8_16.h"
#pragmapackage(smart_init)
__fastcallZUtf8_16::ZUtf8_16()
{
   m_unicodeMode=uni8Bit;
}
//---------------------------------------------------------------------------
__fastcallZUtf8_16::~ZUtf8_16()
{
}
//---------------------------------------------------------------------------
intZUtf8_16::isUTF8_16(constchar*s,unsignedintlen,unsigned*cchUnused)
{
   intrv=1;
   intASCII7only=1;
   constunsignedchar*sx=(unsignedchar*)s,*endx=sx+len;
   while(sx<endx)
   {
     if(!*sx)
     {
      //Fordetection,we'llsaythatNULmeansnotUTF8
       ASCII7only=0;
       rv=0;
       break;
     }
     elseif(*sx<0x80)
     {
       //0nnnnnnnIfthebyte'sfirsthexcodebeginswith0-7,itisanASCIIcharacter.
       sx++;
     }
     elseif(*sx<(0x80+0x40))
     {
      //10nnnnnn8throughBcannotbefirsthexcodes
       ASCII7only=0;
       rv=0;
       break;
     }
     elseif(*sx<(0x80+0x40+0x20))
     {
       //110xxxvv10nnnnnn IfitbeginswithCorD,itisan11bitcharacter
       ASCII7only=0;
       if(sx>=endx-1)break;
       if(!(*sx&0x1F)||(sx[1]&(0x80+0x40))!=0x80){rv=0;break;}
       sx+=2;
     }
     elseif(*sx<(0x80+0x40+0x20+0x10))
     {
       //1110qqqq10xxxxvv10nnnnnnIfitbeginswithE,itis16bit
       ASCII7only=0;
       if(sx>=endx-2)break;
       if(!(*sx&0xF)||(sx[1]&(0x80+0x40))!=0x80||(sx[2]&(0x80+0x40))!=0x80)
       {rv=0;break;}
       sx+=3;
     }
     else
     {
       /*morethan16bitsarenotallowedhere*/
       ASCII7only=0;
       rv=0;
       break;
     }
   }
   if(cchUnused)*cchUnused=endx-sx;
   return(ASCII7only?0:rv);
}
//---------------------------------------------------------------------------
EncodingType__fastcallZUtf8_16::DetermineEncoding(unsignedchar*data,size_tiLen)
{
   //TODO:判断当前文件的编码范例.
   m_unicodeMode=uni8Bit;//默许ASCII
   if(data[0]==0xFE&&data[1]==0xFF)//Bigendian==UNICODE-BIG UTF16
   {
     m_unicodeMode=uni16BE;
   }
   elseif(data[0]==0xFF&&data[1]==0xFE)//Littleendian==UNICODE UTF16
   {
     m_unicodeMode=uni16LE;//Unicode
   }
   elseif(data[0]==0xEF&&data[1]==0xBB&&data[2]==0xBF)//UTF8
   {
     m_unicodeMode=uniUTF8;
   }
   elseif(isUTF8_16(data,iLen,NULL)==1)
   {
     m_unicodeMode=uniUTF8NOBOM;
   }
   return m_unicodeMode;
}
//---------------------------------------------------------------------------
bool__fastcallZUtf8_16::LoadFromStream(TMemoryStream*pSourceStream,AnsiString&DestText)
{
   //TODO:从流中读取数据
   //先判断字符编码
   pSourceStream->Position=0;
   if(pSourceStream->Size==0)returntrue;
// 本文转自 C++Builder 研究 - http://www.ccrun.com/article.asp?i=1023&d=cbj0f7
   m_unicodeMode=DetermineEncoding((char*)pSourceStream->Memory,pSourceStream->Size);
   pSourceStream->Position=0;
   //再根椐呼应的编码做呼应的事.
   switch(m_unicodeMode)
   {
     caseuni8Bit:
     {
       //什么都不做.以保证翻开一些大的文件时速度快一些.
       //所以调用者需求自己在调的之后判断字符编码,
       //假如是uni8Bit,则需求自己处理.
//      intiLength=pSourceStream->Size;
//      char *szUnicode=newchar[iLength+1];
//      memset(szUnicode,0x00,iLength+1);
//      pSourceStream->Read(szUnicode,iLength);
//      DestText=AnsiString(szUnicode);
//      delete[]szUnicode;
//      szUnicode=NULL;
       break;
     }
     caseuni16BE:
     {
      //UCBigendian
       pSourceStream->Position=2;
       intiLength=pSourceStream->Size-2;
       chartemp;
       char*szUnicode=newchar[iLength+2];
       memset(szUnicode,0x00,iLength+2);
       pSourceStream->Read(szUnicode,iLength);
      //只要把每两个字节的位置交换一下,就是UNICODELE了.So...
       for(inti=0;i<iLength;i+=2)
       {
         temp=szUnicode[i];
         szUnicode[i]=szUnicode[i+1];
         szUnicode[i+1]=temp;
         Application->ProcessMessages();
       }
       DestText=WideCharLenToString((wchar_t*)(szUnicode),iLength/2);
       delete[]szUnicode;
       szUnicode=NULL;
       break;
     }
     caseuni16LE:
     {
      //UNICODE Littleendian
       pSourceStream->Position=2;
       intiLength=pSourceStream->Size-2;
       wchar_t *szUnicode=newwchar_t[iLength+2];
       memset(szUnicode,0x00,iLength+2);
       pSourceStream->Read(szUnicode,iLength);
       WideStringWideStr=WideString(szUnicode);
       DestText=WideStr;
       delete[]szUnicode;
       szUnicode=NULL;
       break;
     }
     caseuniUTF8:
     {
      //UTF8
       pSourceStream->Position=3;
       intiLength=pSourceStream->Size-3;
       char*szUTF8=newchar[iLength+3];
       memset(szUTF8,0x00,iLength+3);
       pSourceStream->Read(szUTF8,iLength);
       AnsiStringUtf8Str=Utf8ToAnsi(szUTF8);
       if(Utf8Str=="")
         DestText=AnsiString((char*)pSourceStream->Memory);
       else
         DestText=Utf8Str;
       delete[]szUTF8;
       szUTF8=NULL;
       break;
     }
     caseuniUTF8NOBOM:
     {
      //UTF8没有头标识的情形.
       intiLength=pSourceStream->Size;
       char*szUTF8=newchar[iLength+3];
       memset(szUTF8,0x00,iLength+3);
       pSourceStream->Read(szUTF8,iLength);
       AnsiStringUtf8Str=Utf8ToAnsi(szUTF8);
       if(Utf8Str=="")
         DestText=AnsiString((char*)pSourceStream->Memory);
       else
         DestText=Utf8Str;
       delete[]szUTF8;
       szUTF8=NULL;
       break;
     }
   }
   returntrue;
}
//---------------------------------------------------------------------------
bool__fastcallZUtf8_16::StreamSaveToFile(TMemoryStream*pStream,
     AnsiStringFileNameA,EncodingTypeunicodeMode)
{
   //TODO:把流内容按指定的格局保存到文件中.
   try
   {
     pStream->Position=0;
     switch(unicodeMode)
     {
       caseuni8Bit:
       {
        //什么都不做.直接保存.
         pStream->SaveToFile(FileNameA);
         break;
       }
       caseuni16BE:
       {
         //UCBigendian
         intiLength=pStream->Size;
         chartemp;
         char*pSource=newchar[iLength+2];
         memset(pSource,0x00,iLength+2);
         pStream->Read(pSource,iLength);
         //先看看转成的宽字节数返到nLen
         intnLen=MultiByteToWideChar(CP_ACP,0,pSource,iLength,NULL,NULL);
         LPWSTRlpwsz=newWCHAR[nLen];
         MultiByteToWideChar(CP_ACP,0,pSource,-1,lpwsz,nLen);
         intiNewLen=lstrlenW(lpwsz)*sizeof(WCHAR);
         char*pDest=newchar[iNewLen];
         memcpy(pDest,lpwsz,iNewLen);
       
         //只要把每两个字节的位置交换一下,就是UNICODEBig了.So...
         for(inti=0;i<iNewLen;i+=2)
         {
           temp=pDest[i];
           pDest[i]=pDest[i+1];
           pDest[i+1]=temp;
           Application->ProcessMessages();
         }
         FILE*f=fopen(FileNameA.c_str(),"wb");
         //写UnicodeBig头
         fputc(0xFE,f);
         fputc(0xFF,f);
         fwrite(pDest,1,iNewLen,f);
         fclose(f);
         delete[]pDest;
         pDest=NULL;
         delete[]lpwsz;
         lpwsz=NULL;
         delete[]pSource;
         pSource=NULL;
         break;
       }
       caseuni16LE:
       {
         //UNICODE Littleendian
         intiLength=pStream->Size;
         char*pSource=newchar[iLength+2];
         memset(pSource,0x00,iLength+2);
         pStream->Read(pSource,iLength);
         //先看看转成的宽字节数返到nLen
         intnLen=MultiByteToWideChar(CP_ACP,0,pSource,iLength,NULL,NULL);
         LPWSTRlpwsz=newWCHAR[nLen];
         MultiByteToWideChar(CP_ACP,0,pSource,-1,lpwsz,nLen);
         FILE*f=fopen(FileNameA.c_str(),"wb");
         //写Unicode头
         fputc(0xFF,f);
         fputc(0xFE,f);
         //一个宽字节占两个字节
         fwrite(lpwsz,1,lstrlenW(lpwsz)*sizeof(WCHAR),f);
         fclose(f);
         delete[]lpwsz;
         lpwsz=NULL;
         delete[]pSource;
         pSource=NULL;
         break;
       }
       caseuniUTF8:
       {
         //UTF8
         intiLen=pStream->Size;
         char*pSource=newchar[iLen+3];
         memset(pSource,0x00,iLen+3);
         pStream->Read(pSource,iLen);
         AnsiStringUtf8Str=AnsiToUtf8(pSource);
         delete[]pSource;
         pSource=NULL;
         FILE*f=fopen(FileNameA.c_str(),"wb");
         //写UTF8头
         fputc(0xEF,f);
         fputc(0xBB,f);
         fputc(0xBF,f);
         //一个宽字节占两个字节
         fwrite(Utf8Str.c_str(),1,Utf8Str.Length(),f);
         fclose(f);
         break;
       }
       caseuniUTF8NOBOM:
       {
         //UTF8没有标识头的情形.
         intiLen=pStream->Size;
         char*pSource=newchar[iLen+3];
         memset(pSource,0x00,iLen+3);
         pStream->Read(pSource,iLen);
         AnsiStringUtf8Str=AnsiToUtf8(pSource);
         delete[]pSource;
         pSource=NULL;
         FILE*f=fopen(FileNameA.c_str(),"wb");
         //一个宽字节占两个字节
         fwrite(Utf8Str.c_str(),1,Utf8Str.Length(),f);
         fclose(f);
         break;
       }
     }//endofswitch
   }
   catch(...)
   {
     returnfalse;
   }
   returntrue;
}
//---------------------------------------------------------------------------
//试用举例:
#include"ZUtf8_16.h"
bool__fastcallLoadFile(AnsiStringstrFileName,TStrings*pList)
{
   EncodingTypeunicodeMode;
   //TODO:装入文件.
   //假如装入成功,则返回true
   AnsiStringErrMsg;
   boolbReturn=true;
   ErrMsg.sprintf("装入%s文档时出错, 该文档不存在"
          "大概被别的程序以独占方法翻开!",strFileName);
   if(!FileExists(strFileName))
   {
     MessageBox(0, ErrMsg.c_str(),"错误",MB_OK|MB_ICONERROR);
     returnfalse;
   }
   AnsiStringReturnTxt;
   ZUtf8_16zutf8_16;
   TMemoryStream*ReadStream=newTMemoryStream();
   ReadStream->LoadFromFile(strFileName);
   bReturn=zutf8_16.LoadFromStream(ReadStream,ReturnTxt);
   if(bReturn)
   {
     unicodeMode=zutf8_16.GetEncodingType();
     if(unicodeMode==uni8Bit)
      pList->LoadFromStream(ReadStream);
     else
       pList->Text=ReturnTxt;
   }
   else
   {
     MessageBox(0, ErrMsg.c_str(),"错误",MB_OK|MB_ICONERROR);
   }
   deleteReadStream;
   ReadStream=NULL;
   returnbReturn;
}

  以上是“支持UNICODE/UTF8/ANSI之间的转换的类[VC/C++编程]”的内容,如果你对以上该文章内容感兴趣,你可以看看七道奇为您推荐以下文章:
  • 支持UNICODE/UTF8/ANSI之间的转换的类
  • <b>mysql不支持union &nbsp; select的盲注办法</b>
  • 本文地址: 与您的QQ/BBS好友分享!
    • 好的评价 如果您觉得此文章好,就请您
        0%(0)
    • 差的评价 如果您觉得此文章差,就请您
        0%(0)

    文章评论评论内容只代表网友观点,与本站立场无关!

       评论摘要(共 0 条,得分 0 分,平均 0 分) 查看完整评论
    Copyright © 2020-2022 www.xiamiku.com. All Rights Reserved .