• 指定存储文件的编码格式(下)


    指定存储文件编码格式共两篇。

    此为下篇。

    对于上篇的内容进行了优化,这样大大提高了代码的通用性和重用性。

    有兴趣的可以都下载下来对比参考。

       1 // test__EncodeAFileToEncodeBFile.cpp : 定义控制台应用程序的入口点。
       2 //
       3 
       4 #include "stdafx.h"
       5 #include <windows.h>
       6 #include <string>
       7 #include <iostream>
       8 
       9 
      10 #ifndef IN
      11 #define IN
      12 #endif
      13 
      14 #ifndef OUT
      15 #define OUT
      16 #endif
      17 
      18 #ifndef INOUT
      19 #define INOUT
      20 #endif
      21 
      22 #ifndef OPTION
      23 #define OPTION
      24 #endif
      25 
      26 #define UTF8_SIGN 3
      27 #define UTF16_SIGN 2
      28 #define FILE_HEADER 6
      29 
      30 enum FileEncodeType
      31 {
      32     OTHER = 0,
      33     UTF8,
      34     UTF8_NO_BOM,
      35     UTF16LE,
      36     UTF16LE_NO_BOM,
      37     UTF16BE,
      38     UTF16BE_NO_BOM
      39 };
      40 
      41 
      42 //************************************
      43 // Method:    IsUTF8EncodeText
      44 // FullName:  IsUTF8EncodeText
      45 // Access:    public 
      46 // Returns:   BOOL
      47 // Qualifier:判断输入内容是否时UTF8编码格式(可以判断不带BOM的UTF8编码)
      48 // Parameter: BYTE * lpText:判断是否时UTF8的内容
      49 // Parameter: INT cchText:UTF8的内容长度(按个数)
      50 //************************************
      51 BOOL IsUTF8EncodeText(IN CONST BYTE* lpText, IN INT cchText)
      52 {
      53     //    UTF8判断规则
      54     //  0000 0000 - 0000 007F 0xxxxxxx
      55     //    0000 0080 - 0000 07FF 110xxxxx 10xxxxxx
      56     //    0000 0800 - 0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
      57     //    0001 0000 - 001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
      58     //    0020 0000 - 03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
      59     //    0400 0000 - 7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx
      60 
      61 
      62     int i;
      63     DWORD nBytes = 0;//UFT8可用1-6个字节编码,ASCII用一个字节
      64     BYTE lpByte;
      65     BOOL bAllAscii = TRUE; //如果全部都是ASCII, 说明不是UTF-8
      66     for (i = 0; i < cchText; i++)
      67     {
      68         lpByte = *(lpText + i);
      69         if ((lpByte & 0x80) != 0) // 判断是否ASCII编码,如果不是,说明有可能是UTF-8,ASCII用7位编码,但用一个字节存,最高位标记为0,o0xxxxxxx
      70             bAllAscii = FALSE;
      71         if (nBytes == 0) //如果不是ASCII码,应该是多字节符,计算字节数
      72         {
      73             if (lpByte >= 0x80)
      74             {
      75                 if (lpByte >= 0xFC && lpByte <= 0xFD)
      76                     nBytes = 6;
      77                 else if (lpByte >= 0xF8)
      78                     nBytes = 5;
      79                 else if (lpByte >= 0xF0)
      80                     nBytes = 4;
      81                 else if (lpByte >= 0xE0)
      82                     nBytes = 3;
      83                 else if (lpByte >= 0xC0)
      84                     nBytes = 2;
      85                 else
      86                 {
      87                     return FALSE;
      88                 }
      89                 nBytes--;
      90             }
      91         }
      92         else //多字节符的非首字节,应为 10xxxxxx
      93         {
      94             if ((lpByte & 0xC0) != 0x80)
      95             {
      96                 return FALSE;
      97             }
      98             nBytes--;
      99         }
     100     }
     101     if (nBytes > 0) //违返规则
     102     {
     103         return FALSE;
     104     }
     105     if (bAllAscii) //如果全部都是ASCII, 说明不是UTF-8
     106     {
     107         return FALSE;
     108     }
     109 
     110     return TRUE;
     111 }
     112 
     113 //************************************
     114 // Method:    GetEncodeType
     115 // FullName:  GetEncodeType
     116 // Access:    public 
     117 // Returns:   FileEncodeType
     118 // Qualifier:识别指定文件编码
     119 // Parameter: IN CONST LPTSTR lpFileName:指定文件名称
     120 // *注:目前只能识别UTF8/UTF8 WITHOUT BOM/UTF16LE/UTF16BE
     121 //************************************
     122 FileEncodeType GetEncodeType(IN CONST LPTSTR lpFileName)
     123 {
     124     FileEncodeType lFileEncodeType = OTHER;
     125     HANDLE hFile = NULL;
     126     LPBYTE lpFileHeader = NULL;
     127     INT cbFileHeader = FILE_HEADER;
     128     INT cchFileHeader = FILE_HEADER;
     129 
     130 
     131     hFile = ::CreateFile(lpFileName, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
     132     if (INVALID_HANDLE_VALUE == hFile)
     133     {
     134         int errLogNumber = GetLastError();
     135         printf_s("error number:%d
    ", errLogNumber);
     136         return lFileEncodeType;
     137     }
     138 
     139     lpFileHeader = (BYTE *)malloc(cbFileHeader);
     140     if (NULL == lpFileHeader)
     141     {
     142         printf_s("malloc error
    ");
     143         ::CloseHandle(hFile);
     144         return lFileEncodeType;
     145     }
     146     ZeroMemory(lpFileHeader, cbFileHeader);
     147 
     148     SetFilePointer(hFile, 0, NULL, FILE_BEGIN);
     149     if (FALSE == ReadFile(hFile, lpFileHeader, FILE_HEADER, NULL, NULL))
     150     {
     151         int errLogNumber = GetLastError();
     152         printf_s("error number:%d
    ", errLogNumber);
     153         free(lpFileHeader);
     154         ::CloseHandle(hFile);
     155         return lFileEncodeType;
     156     }
     157 
     158 
     159 
     160     if (0xef == lpFileHeader[0] && 0xbb == lpFileHeader[1] && 0xbf == lpFileHeader[2])
     161     {
     162         free(lpFileHeader);
     163         ::CloseHandle(hFile);
     164         return lFileEncodeType = UTF8;
     165     }
     166     else if (0xff == lpFileHeader[0] && 0xfe == lpFileHeader[1])
     167     {
     168         free(lpFileHeader);
     169         ::CloseHandle(hFile);
     170         return lFileEncodeType = UTF16BE;
     171     }
     172     else if (0xfe == lpFileHeader[0] && 0xff == lpFileHeader[1])
     173     {
     174         free(lpFileHeader);
     175         ::CloseHandle(hFile);
     176         return lFileEncodeType = UTF16LE;
     177     }
     178     else
     179     {
     180         free(lpFileHeader);
     181 
     182         cbFileHeader = SetFilePointer(hFile, 0, NULL, FILE_END);
     183         if (INVALID_SET_FILE_POINTER == cbFileHeader)
     184         {
     185             int errLogNumber = GetLastError();
     186             printf_s("error number:%d
    ", errLogNumber);
     187             ::CloseHandle(hFile);
     188             return lFileEncodeType = OTHER;
     189         }
     190 
     191         lpFileHeader = (BYTE *)malloc(cbFileHeader);
     192         if (NULL == lpFileHeader)
     193         {
     194             printf_s("malloc error
    ");
     195             ::CloseHandle(hFile);
     196             return lFileEncodeType = OTHER;
     197         }
     198         ZeroMemory(lpFileHeader, cbFileHeader);
     199 
     200         SetFilePointer(hFile, 0, NULL, FILE_BEGIN);
     201         if (FALSE == ReadFile(hFile, lpFileHeader, cbFileHeader, NULL, NULL))
     202         {
     203             int errLogNumber = GetLastError();
     204             printf_s("error number:%d
    ", errLogNumber);
     205             free(lpFileHeader);
     206             ::CloseHandle(hFile);
     207             return lFileEncodeType = OTHER;
     208         }
     209 
     210 
     211 
     212         if (TRUE == IsUTF8EncodeText(lpFileHeader, cbFileHeader))
     213         {
     214             free(lpFileHeader);
     215             ::CloseHandle(hFile);
     216             return lFileEncodeType = UTF8_NO_BOM;
     217         }
     218 
     219 
     220         lFileEncodeType = OTHER;
     221     }
     222 
     223     free(lpFileHeader);
     224     ::CloseHandle(hFile);
     225     return lFileEncodeType;
     226 }
     227 
     228 //************************************
     229 // Method:    GetEncodeType
     230 // FullName:  GetEncodeType
     231 // Access:    public 
     232 // Returns:   FileEncodeType
     233 // Qualifier:识别指定字节流编码
     234 // Parameter: IN CONST BYTE * lpBytes:指定字节流
     235 // Parameter: IN CONST INT cchBytes:指定字节流长度
     236 // *注:目前只能识别UTF8/UTF8 WITHOUT BOM/UTF16LE/UTF16BE
     237 //************************************
     238 FileEncodeType GetEncodeType(IN CONST BYTE *lpBytes, IN CONST INT cchBytes)
     239 {
     240     if (0xef == lpBytes[0] && 0xbb == lpBytes[1] && 0xbf == lpBytes[2])
     241     {
     242         return UTF8;
     243     }
     244     else if (0xff == lpBytes[0] && 0xfe == lpBytes[1])
     245     {
     246         return UTF16LE;
     247     }
     248     else if (0xfe == lpBytes[0] && 0xff == lpBytes[1])
     249     {
     250         return UTF16BE;
     251     }
     252     else
     253     {
     254         if (TRUE == IsUTF8EncodeText(lpBytes, cchBytes))
     255         {
     256             return UTF8_NO_BOM;
     257         }
     258     }
     259 
     260     return OTHER;
     261 }
     262 
     263 //************************************
     264 // Method:    Utf16leAndUtf16beTransfers
     265 // FullName:  Utf16leAndUtf16beTransfers
     266 // Access:    public 
     267 // Returns:   BOOL
     268 // Qualifier:相互转换UTF16LE 和 UTF16BE
     269 // Parameter: INOUT LPBYTE lpUtf16String:指定UTF16编码字节流
     270 // Parameter: IN CONST INT cchUtf16String:指定UTF16编码字节流长度
     271 //************************************
     272 BOOL Utf16leAndUtf16beTransfers(INOUT LPBYTE lpUtf16String, IN CONST INT cchUtf16String)
     273 {
     274     if (NULL == lpUtf16String || cchUtf16String < 0)
     275     {
     276         return FALSE;
     277     }
     278 
     279     for (INT i = 0; i < cchUtf16String; i += 2)//每两值交换
     280     {
     281         lpUtf16String[i] = lpUtf16String[i] ^ lpUtf16String[i + 1];
     282         lpUtf16String[i + 1] = lpUtf16String[i + 1] ^ lpUtf16String[i];
     283         lpUtf16String[i] = lpUtf16String[i] ^ lpUtf16String[i + 1];
     284     }
     285 
     286     return TRUE;
     287 }
     288 
     289 //************************************
     290 // Method:    Utf8ToUtf8NoBOM
     291 // FullName:  Utf8ToUtf8NoBOM
     292 // Access:    public 
     293 // Returns:   BOOL
     294 // Qualifier:UTF8编码转换到UTF8 without BOM编码
     295 // Parameter: IN CONST LPSTR lpUtf8String:UTF8编码字符串
     296 // Parameter: IN CONST INT cchUtf8String:UTF8编码字符串长度
     297 // Parameter: INOUT CONST LPSTR lpUtf8NoBOMString:UTF8 without BOM编码字符串
     298 // Parameter: IN CONST INT cchUtf8NoBOMString:UTF8 without BOM编码字符串长度
     299 //************************************
     300 BOOL Utf8ToUtf8NoBOM(IN CONST LPSTR lpUtf8String, IN CONST INT cchUtf8String, INOUT CONST LPSTR lpUtf8NoBOMString, IN CONST INT cchUtf8NoBOMString)
     301 {
     302     if (NULL == lpUtf8String || cchUtf8String < 0 || NULL == lpUtf8NoBOMString || cchUtf8NoBOMString < 0)
     303     {
     304         return FALSE;
     305     }
     306     if (cchUtf8NoBOMString < cchUtf8String - UTF8_SIGN)
     307     {
     308         return FALSE;
     309     }
     310 
     311     ::CopyMemory(lpUtf8NoBOMString, lpUtf8String + UTF8_SIGN, cchUtf8String - UTF8_SIGN);
     312 
     313     return TRUE;
     314 }
     315 //************************************
     316 // Method:    Utf8NoBOMToUtf8
     317 // FullName:  Utf8NoBOMToUtf8
     318 // Access:    public 
     319 // Returns:   BOOL
     320 // Qualifier:UTF8 without BOM编码转换到UTF8编码
     321 // Parameter: INOUT CONST LPSTR lpUtf8NoBOMString:UTF8 without BOM编码字符串
     322 // Parameter: IN CONST INT cchUtf8NoBOMString:UTF8 without BOM编码字符串长度
     323 // Parameter: IN CONST LPSTR lpUtf8String:UTF8编码字符串
     324 // Parameter: IN CONST INT cchUtf8String:UTF8编码字符串长度
     325 //************************************
     326 BOOL Utf8NoBOMToUtf8(INOUT CONST LPSTR lpUtf8NoBOMString, IN CONST INT cchUtf8NoBOMString, IN CONST LPSTR lpUtf8String, IN CONST INT cchUtf8String)
     327 {
     328     if (NULL == lpUtf8String || cchUtf8String < 0 || NULL == lpUtf8NoBOMString || cchUtf8NoBOMString < 0)
     329     {
     330         return FALSE;
     331     }
     332     if (cchUtf8NoBOMString < cchUtf8String - UTF8_SIGN)
     333     {
     334         return FALSE;
     335     }
     336 
     337     lpUtf8String[0] = 0xef;
     338     lpUtf8String[1] = 0xbb;
     339     lpUtf8String[2] = 0xbf;
     340     ::CopyMemory(lpUtf8String + UTF8_SIGN, lpUtf8NoBOMString, cchUtf8NoBOMString);
     341 
     342     return TRUE;
     343 }
     344 
     345 //************************************
     346 // Method:    BYTETOUTF8
     347 // FullName:  BYTETOUTF8
     348 // Access:    public 
     349 // Returns:   BOOL
     350 // Qualifier:指定编码转换为UTF8(或without BOM)编码
     351 // Parameter: IN CONST LPBYTE lpBytes:指定需要转换的字节流
     352 // Parameter: IN CONST INT cbBytes:指定需要转换的字节流长度(字节单位)
     353 // Parameter: IN CONST FileEncodeType tpBytes:指定需要转换的字节流的实际编码格式
     354 // Parameter: INOUT LPSTR lpText:指定输出的编码流
     355 // Parameter: INOUT LPINT lpcbText:指定输出的编码流长度(字节单位)
     356 // Parameter: IN CONST BOOL bWithBOM:指定输出的编码是否有BOM
     357 //************************************
     358 BOOL BYTETOUTF8(IN CONST LPBYTE lpBytes, IN CONST INT cbBytes, IN CONST FileEncodeType tpBytes, INOUT LPSTR lpText, INOUT LPINT lpcbText, IN CONST BOOL bWithBOM = TRUE)
     359 {
     360     BOOL bResult = FALSE;
     361     LPSTR lpSourceA = NULL;
     362     INT cbSourceA = 0;
     363     INT cchSourceA = 0;
     364     LPWSTR lpSourceW = NULL;
     365     INT cbSourceW = 0;
     366     INT cchSourceW = 0;
     367 
     368 
     369     if (NULL == lpBytes || 
     370         NULL == lpText || 
     371         !(UTF8_NO_BOM == tpBytes || UTF8 == tpBytes || UTF16LE == tpBytes || UTF16BE == tpBytes) ||
     372         cbBytes < 0 || 
     373         NULL == lpcbText)
     374     {
     375         return FALSE;
     376     }
     377 
     378     switch (tpBytes)
     379     {
     380         case UTF8_NO_BOM:
     381         {
     382             if (FALSE == bWithBOM)
     383             {
     384                 return FALSE;
     385             }
     386             if (*lpcbText < cbBytes + UTF8_SIGN)
     387             {
     388                 return FALSE;
     389             }
     390 
     391             cbSourceA = cbBytes;
     392             cchSourceA = cbSourceA;
     393             lpSourceA = (CHAR *)malloc(cbSourceA);
     394             if (NULL == lpSourceA)
     395             {
     396                 printf_s("malloc error
    ");
     397                 return FALSE;
     398             }
     399             ZeroMemory(lpSourceA, cbSourceA);
     400             CopyMemory(lpSourceA, lpBytes, cbSourceA);
     401 
     402             bResult = Utf8NoBOMToUtf8(lpSourceA, cchSourceA, lpText, *lpcbText);
     403             free(lpSourceA);
     404         }
     405             break;
     406         case UTF8:
     407         {
     408             if (TRUE == bWithBOM)
     409             {
     410                 return FALSE;
     411             }
     412             if (*lpcbText < cbBytes - UTF8_SIGN)
     413             {
     414                 return FALSE;
     415             }
     416 
     417             cbSourceA = cbBytes;
     418             cchSourceA = cbSourceA;
     419             lpSourceA = (CHAR *)malloc(cbSourceA);
     420             if (NULL == lpSourceA)
     421             {
     422                 printf_s("malloc error
    ");
     423                 return FALSE;
     424             }
     425             ZeroMemory(lpSourceA, cbSourceA);
     426             CopyMemory(lpSourceA, lpBytes, cbSourceA);
     427 
     428             bResult = Utf8ToUtf8NoBOM(lpSourceA, cchSourceA, lpText, *lpcbText);
     429             free(lpSourceA);
     430         }
     431         break;
     432         case UTF16LE:
     433         {
     434             cbSourceW = cbBytes;
     435             cchSourceW = cbSourceW / sizeof(WCHAR);
     436             lpSourceW = (WCHAR *)malloc(cbSourceW);
     437             if (NULL == lpSourceW)
     438             {
     439                 printf_s("malloc error
    ");
     440                 return FALSE;
     441             }
     442             ZeroMemory(lpSourceW, cbSourceW);
     443             ::CopyMemory(lpSourceW, lpBytes, cbBytes);
     444 
     445             *lpcbText = ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpText, 0, NULL, NULL);//获取所需存储大小
     446 
     447             if (TRUE == bWithBOM)
     448             {
     449                 if (0 == ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpText, *lpcbText, NULL, NULL))
     450                 {
     451                     free(lpSourceW);
     452                     return FALSE;
     453                 }
     454                 bResult = TRUE;
     455             }
     456             else
     457             {
     458                 cbSourceA = *lpcbText;
     459                 cchSourceA = cbSourceA;
     460                 lpSourceA = (CHAR *)malloc(cbSourceA);
     461                 if (NULL == lpSourceA)
     462                 {
     463                     printf_s("malloc error
    ");
     464                     free(lpSourceW);
     465                     return FALSE;
     466                 }
     467                 ZeroMemory(lpSourceA, cbSourceA);
     468                 CopyMemory(lpSourceA, lpBytes, cbSourceA);
     469                 if (0 == ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpSourceA, cbSourceA, NULL, NULL))
     470                 {
     471                     free(lpSourceW);
     472                     free(lpSourceA);
     473                     return FALSE;
     474                 }
     475                 bResult = Utf8ToUtf8NoBOM(lpSourceA, cchSourceA, lpText, *lpcbText);
     476                 *lpcbText -= UTF8_SIGN;
     477                 free(lpSourceA);
     478             }
     479 
     480             free(lpSourceW);
     481         }
     482             break;
     483         case UTF16BE:
     484         {
     485             LPBYTE lpBytesTemp = NULL;
     486             INT cbBytesTemp = 0;
     487             INT cchBytesTemp = 0;
     488 
     489             cbBytesTemp = cbBytes;
     490             lpBytesTemp = (BYTE *)malloc(cbBytesTemp);
     491             if (NULL == lpBytesTemp)
     492             {
     493                 printf_s("malloc error
    ");
     494                 return FALSE;
     495             }
     496             ZeroMemory(lpBytesTemp, cbBytesTemp);
     497             ::CopyMemory(lpBytesTemp, lpBytes, cbBytesTemp);
     498 
     499             if (FALSE == Utf16leAndUtf16beTransfers(lpBytesTemp, cbBytesTemp))
     500             {
     501                 free(lpBytesTemp);
     502                 return FALSE;
     503             }
     504 
     505             cbSourceW = cbBytes;
     506             cchSourceW = cbSourceW / sizeof(WCHAR);
     507             lpSourceW = (WCHAR *)malloc(cbSourceW);
     508             if (NULL == lpSourceW)
     509             {
     510                 printf_s("malloc error
    ");
     511                 free(lpBytesTemp);
     512                 return FALSE;
     513             }
     514             ZeroMemory(lpSourceW, cbSourceW);
     515             ::CopyMemory(lpSourceW, lpBytesTemp, cbSourceW);
     516             free(lpBytesTemp);
     517             lpBytesTemp = NULL;
     518 
     519             *lpcbText = ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpText, 0, NULL, NULL);//获取所需存储大小
     520 
     521             if (TRUE == bWithBOM)
     522             {
     523                 if (0 == ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpText, *lpcbText, NULL, NULL))
     524                 {
     525                     free(lpSourceW);
     526                     return FALSE;
     527                 }
     528                 bResult = TRUE;
     529             }
     530             else
     531             {
     532                 cbSourceA = *lpcbText;
     533                 cchSourceA = cbSourceA;
     534                 lpSourceA = (CHAR *)malloc(cbSourceA);
     535                 if (NULL == lpSourceA)
     536                 {
     537                     printf_s("malloc error
    ");
     538                     free(lpSourceW);
     539                     return FALSE;
     540                 }
     541                 ZeroMemory(lpSourceA, cbSourceA);
     542                 CopyMemory(lpSourceA, lpBytes, cbSourceA);
     543                 if (0 == ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpSourceA, cbSourceA, NULL, NULL))
     544                 {
     545                     free(lpSourceW);
     546                     free(lpSourceA);
     547                     return FALSE;
     548                 }
     549                 bResult = Utf8ToUtf8NoBOM(lpSourceA, cchSourceA, lpText, *lpcbText);
     550                 *lpcbText -= UTF8_SIGN;
     551                 free(lpSourceA);
     552             }
     553 
     554             free(lpSourceW);
     555         }
     556             break;
     557         default:
     558             return FALSE;
     559     }
     560 
     561     return bResult;
     562 }
     563 
     564 //************************************
     565 // Method:    BYTETOUTF16LE
     566 // FullName:  BYTETOUTF16LE
     567 // Access:    public 
     568 // Returns:   BOOL
     569 // Qualifier:指定编码流转换为UTF16LE编码格式
     570 // Parameter: IN CONST LPBYTE lpBytes:指定的编码流
     571 // Parameter: IN CONST INT cbBytes:指定的编码流长度(字节单位)
     572 // Parameter: IN CONST FileEncodeType tpBytes:指定的编码流的实际编码格式
     573 // Parameter: INOUT LPWSTR lpText:指定输出的编码流
     574 // Parameter: INOUT LPINT lpcbText:指定输出的编码流长度(字节单位)
     575 //************************************
     576 BOOL BYTETOUTF16LE(IN CONST LPBYTE lpBytes, IN CONST INT cbBytes, IN CONST FileEncodeType tpBytes, INOUT LPWSTR lpText, INOUT LPINT lpcbText)
     577 {
     578     BOOL bResult = FALSE;
     579     LPSTR lpSourceA = NULL;
     580     INT cbSourceA = 0;
     581     INT cchSourceA = 0;
     582     LPWSTR lpSourceW = NULL;
     583     INT cbSourceW = 0;
     584     INT cchSourceW = 0;
     585 
     586 
     587     if (NULL == lpBytes ||
     588         NULL == lpText ||
     589         !(UTF8_NO_BOM == tpBytes || UTF8 == tpBytes || UTF16LE == tpBytes || UTF16BE == tpBytes) ||
     590         cbBytes < 0 ||
     591         NULL == lpcbText)
     592     {
     593         return FALSE;
     594     }
     595 
     596     switch (tpBytes)
     597     {
     598         case UTF8_NO_BOM:
     599         {
     600             LPSTR lpSourceATemp = NULL;
     601             INT cbSourceATemp = 0;
     602             INT cchSourceATemp = 0;
     603 
     604 
     605             if (*lpcbText < cbBytes + UTF8_SIGN)
     606             {
     607                 return FALSE;
     608             }
     609 
     610             cbSourceA = cbBytes;
     611             cchSourceA = cbSourceA;
     612             lpSourceA = (CHAR *)malloc(cbSourceA);
     613             if (NULL == lpSourceA)
     614             {
     615                 printf_s("malloc error
    ");
     616                 return FALSE;
     617             }
     618             ZeroMemory(lpSourceA, cbSourceA);
     619             CopyMemory(lpSourceA, lpBytes, cbSourceA);
     620 
     621             cbSourceATemp = cbBytes + UTF8_SIGN;
     622             cchSourceATemp = cbSourceATemp;
     623             lpSourceATemp = (CHAR *)malloc(cbSourceATemp);
     624             if (NULL == lpSourceATemp)
     625             {
     626                 printf_s("malloc error
    ");
     627                 return FALSE;
     628             }
     629             ZeroMemory(lpSourceATemp, cbSourceATemp);
     630 
     631             if (FALSE == Utf8NoBOMToUtf8(lpSourceA, cchSourceA, lpSourceATemp, cchSourceATemp))
     632             {
     633                 free(lpSourceA);
     634                 free(lpSourceATemp);
     635                 return FALSE;
     636             }
     637             free(lpSourceA);
     638             lpSourceA = NULL;
     639 
     640             *lpcbText = MultiByteToWideChar(CP_UTF8, 0, lpSourceATemp, cbSourceATemp, lpText, 0);
     641             if (0 == MultiByteToWideChar(CP_UTF8, 0, lpSourceATemp, cbSourceATemp, lpText, *lpcbText))
     642             {
     643                 free(lpSourceATemp);
     644                 return FALSE;
     645             }
     646 
     647             *lpcbText *= sizeof(WCHAR);
     648         
     649             free(lpSourceATemp);
     650             bResult = TRUE;
     651         }
     652         break;
     653         case UTF8:
     654         {
     655             if (*lpcbText < cbBytes + UTF8_SIGN)
     656             {
     657                 return FALSE;
     658             }
     659 
     660             cbSourceA = cbBytes;
     661             cchSourceA = cbSourceA;
     662             lpSourceA = (CHAR *)malloc(cbSourceA);
     663             if (NULL == lpSourceA)
     664             {
     665                 printf_s("malloc error
    ");
     666                 return FALSE;
     667             }
     668             ZeroMemory(lpSourceA, cbSourceA);
     669             CopyMemory(lpSourceA, lpBytes, cbSourceA);
     670 
     671 
     672             *lpcbText = MultiByteToWideChar(CP_UTF8, 0, lpSourceA, cchSourceA, lpText, 0);
     673             if (0 == MultiByteToWideChar(CP_UTF8, 0, lpSourceA, cchSourceA, lpText, *lpcbText))
     674             {
     675                 free(lpSourceA);
     676                 return FALSE;
     677             }
     678 
     679             *lpcbText *= sizeof(WCHAR);
     680 
     681             free(lpSourceA);
     682             bResult = TRUE;
     683         }
     684         break;
     685         case UTF16LE:
     686         {
     687             CopyMemory(lpText, lpBytes, cbBytes);
     688             *lpcbText = cbBytes;
     689             return TRUE;
     690         }
     691         break;
     692         case UTF16BE:
     693         {
     694             if (*lpcbText < cbBytes)
     695             {
     696                 return FALSE;
     697             }
     698 
     699             LPBYTE lpBytesTemp = NULL;
     700             INT cbBytesTemp = 0;
     701 
     702             cbBytesTemp = cbBytes;
     703             lpBytesTemp = (BYTE *)malloc(cbBytesTemp);
     704             if (NULL == lpBytesTemp)
     705             {
     706                 printf_s("malloc error
    ");
     707                 return FALSE;
     708             }
     709             ZeroMemory(lpBytesTemp, cbBytesTemp);
     710             CopyMemory(lpBytesTemp, lpBytes, cbBytesTemp);
     711             if (FALSE == Utf16leAndUtf16beTransfers(lpBytesTemp, cbBytesTemp))
     712             {
     713                 free(lpSourceW);
     714                 return FALSE;
     715             }
     716             CopyMemory(lpText, lpBytesTemp, cbBytesTemp);
     717             *lpcbText = cbBytesTemp;
     718             free(lpBytesTemp);
     719 
     720             bResult = TRUE;
     721         }
     722         break;
     723         default:
     724             return FALSE;
     725     }
     726 
     727     return bResult;
     728 }
     729 //************************************
     730 // Method:    BYTETOUTF16BE
     731 // FullName:  BYTETOUTF16BE
     732 // Access:    public 
     733 // Returns:   BOOL
     734 // Qualifier:指定编码流转换为UTF16BE编码格式
     735 // Parameter: IN CONST LPBYTE lpBytes:指定需要转换的编码流
     736 // Parameter: IN CONST INT cbBytes:指定需要转换的编码流长度(字节单位)
     737 // Parameter: IN CONST FileEncodeType tpBytes::指定需要转换的编码流的实际编码格式
     738 // Parameter: INOUT LPWSTR lpText:指定输出流
     739 // Parameter: INOUT LPINT lpcbText:指定输出流的长度(字节单位)
     740 //************************************
     741 BOOL BYTETOUTF16BE(IN CONST LPBYTE lpBytes, IN CONST INT cbBytes, IN CONST FileEncodeType tpBytes, INOUT LPWSTR lpText, INOUT LPINT lpcbText)
     742 {
     743     if (FALSE == BYTETOUTF16LE(lpBytes, cbBytes, tpBytes, lpText, lpcbText))
     744     {
     745         return FALSE;
     746     }
     747     return Utf16leAndUtf16beTransfers((LPBYTE)lpText, *lpcbText);
     748 }
     749 
     750 
     751 //************************************
     752 // Method:    FileAToFileB
     753 // FullName:  FileAToFileB
     754 // Access:    public 
     755 // Returns:   BOOL
     756 // Qualifier:指定文件A中内容转换为指定编码存入指定文件B中
     757 // Parameter: CONST LPTSTR lpFileA:输入文件A(只读文件)
     758 // Parameter: CONST LPSTR lpFileB:输出文件B(总是创建文件)
     759 // Parameter: FileEncodeType tpFileB:指定输出文件编码(UTF8/UTF8 WITHOUT BOM/UTF16LE/UTF16BE)
     760 // *注:目前只能识别UTF8/UTF8 WITHOUT BOM/UTF16LE/UTF16BE
     761 //************************************
     762 BOOL FileAToFileB(IN CONST LPTSTR lpFileA, IN CONST LPTSTR lpFileB, FileEncodeType tpFileB)
     763 {
     764     BOOL bResult = FALSE;
     765     FileEncodeType tpFileA = OTHER;
     766     HANDLE hFileA = NULL;
     767     HANDLE hFileB = NULL;
     768     LPBYTE lpReadFileBytes = NULL;
     769     INT cbReadFileBytes = 0;
     770     INT cchReadFileBytes = 0;
     771     LPSTR lpWriteFileString = NULL;
     772     INT cbWriteFileString = 0;
     773     INT cchWriteFileString = 0;
     774     LPWSTR lpWriteFileWString = NULL;
     775     INT cbWriteFileWString = 0;
     776     INT cchWriteFileWString = 0;
     777 
     778 
     779     hFileA = ::CreateFile(lpFileA, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
     780     if (INVALID_HANDLE_VALUE == hFileA)
     781     {
     782         int errLogNumber = GetLastError();
     783         printf_s("error number:%d
    ", errLogNumber);
     784         return FALSE;
     785     }
     786 
     787     cbReadFileBytes = SetFilePointer(hFileA, 0, NULL, FILE_END);
     788     if (INVALID_SET_FILE_POINTER == cbReadFileBytes)
     789     {
     790         int errLogNumber = GetLastError();
     791         printf_s("error number:%d
    ", errLogNumber);
     792         ::CloseHandle(hFileA);
     793         return FALSE;
     794     }
     795 
     796     if (1 != sizeof(BYTE))
     797     {
     798         printf_s("byte cell width error
    ");
     799         return FALSE;
     800     }
     801 
     802     lpReadFileBytes = (BYTE *)malloc(cbReadFileBytes);
     803     if (NULL == lpReadFileBytes)
     804     {
     805         printf_s("malloc error
    ");
     806         ::CloseHandle(hFileA);
     807         return FALSE;
     808     }
     809     ZeroMemory(lpReadFileBytes, cbReadFileBytes);
     810 
     811     SetFilePointer(hFileA, 0, NULL, FILE_BEGIN);
     812     if (FALSE == ReadFile(hFileA, lpReadFileBytes, cbReadFileBytes, NULL, NULL))
     813     {
     814         int errLogNumber = GetLastError();
     815         printf_s("error number:%d
    ", errLogNumber);
     816         free(lpReadFileBytes);
     817         ::CloseHandle(hFileA);
     818         return FALSE;
     819     }
     820     ::CloseHandle(hFileA);
     821     hFileA = NULL;
     822 
     823 
     824     if (!(UTF8 == tpFileB || UTF8_NO_BOM == tpFileB || UTF16LE == tpFileB || UTF16BE == tpFileB))
     825     {
     826         printf_s("Unable to identify type error
    ");
     827         free(lpReadFileBytes);
     828         return FALSE;
     829     }
     830 
     831     hFileB = ::CreateFile(lpFileB, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
     832     if (INVALID_HANDLE_VALUE == hFileB)
     833     {
     834         int errLogNumber = GetLastError();
     835         printf_s("error number:%d
    ", errLogNumber);
     836         free(lpReadFileBytes);
     837         return FALSE;
     838     }
     839 
     840 
     841     tpFileA = GetEncodeType(lpReadFileBytes, cbReadFileBytes);
     842     switch (tpFileA)
     843     {
     844         case UTF8:
     845         {
     846             switch (tpFileB)
     847             {
     848             case UTF8_NO_BOM:
     849                 cbWriteFileString = cbReadFileBytes - UTF8_SIGN;
     850                 lpWriteFileString = (CHAR *)malloc(cbWriteFileString);
     851                 ZeroMemory(lpWriteFileString, cbWriteFileString);
     852                 bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF8, lpWriteFileString, &cbWriteFileString, FALSE);
     853                 free(lpReadFileBytes);
     854                 lpReadFileBytes = NULL;
     855                 break;
     856             case UTF16LE:
     857                 cbWriteFileWString = cbReadFileBytes * sizeof(WCHAR);
     858                 lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString);
     859                 ZeroMemory(lpWriteFileWString, cbWriteFileWString);
     860                 bResult = BYTETOUTF16LE(lpReadFileBytes, cbReadFileBytes, UTF8, lpWriteFileWString, &cbWriteFileWString);
     861                 free(lpReadFileBytes);
     862                 lpReadFileBytes = NULL;
     863                 break;
     864             case UTF16BE:
     865                 cbWriteFileWString = cbReadFileBytes * sizeof(WCHAR);
     866                 lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString);
     867                 ZeroMemory(lpWriteFileWString, cbWriteFileWString);
     868                 bResult = BYTETOUTF16BE(lpReadFileBytes, cbReadFileBytes, UTF8, lpWriteFileWString, &cbWriteFileWString);
     869                 free(lpReadFileBytes);
     870                 lpReadFileBytes = NULL;
     871                 break;
     872             default:;
     873             }
     874         }
     875             break;
     876         case UTF8_NO_BOM:
     877         {
     878             switch (tpFileB)
     879             {
     880             case UTF8:
     881                 cbWriteFileString = cbReadFileBytes + UTF8_SIGN;
     882                 lpWriteFileString = (CHAR *)malloc(cbWriteFileString);
     883                 ZeroMemory(lpWriteFileString, cbWriteFileString);
     884                 bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF8_NO_BOM, lpWriteFileString, &cbWriteFileString, TRUE);
     885                 free(lpReadFileBytes);
     886                 lpReadFileBytes = NULL;
     887                 break;
     888             case UTF16LE:
     889                 cbWriteFileWString = (cbReadFileBytes + UTF8_SIGN) * sizeof(WCHAR);
     890                 lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString);
     891                 ZeroMemory(lpWriteFileWString, cbWriteFileWString);
     892                 bResult = BYTETOUTF16LE(lpReadFileBytes, cbReadFileBytes, UTF8_NO_BOM, lpWriteFileWString, &cbWriteFileWString);
     893                 free(lpReadFileBytes);
     894                 lpReadFileBytes = NULL;
     895                 break;
     896             case UTF16BE:
     897                 cbWriteFileWString = (cbReadFileBytes + UTF8_SIGN) * sizeof(WCHAR);
     898                 lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString);
     899                 ZeroMemory(lpWriteFileWString, cbWriteFileWString);
     900                 bResult = BYTETOUTF16BE(lpReadFileBytes, cbReadFileBytes, UTF8_NO_BOM, lpWriteFileWString, &cbWriteFileWString);
     901                 free(lpReadFileBytes);
     902                 lpReadFileBytes = NULL;
     903                 break;
     904             default:;
     905             }
     906         }
     907             break;
     908         case UTF16LE:
     909         {
     910             switch (tpFileB)
     911             {
     912             case UTF8:
     913                 cbWriteFileString = cbReadFileBytes;
     914                 lpWriteFileString = (CHAR *)malloc(cbWriteFileString);
     915                 ZeroMemory(lpWriteFileString, cbWriteFileString);
     916                 bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF16LE, lpWriteFileString, &cbWriteFileString, TRUE);
     917                 free(lpReadFileBytes);
     918                 lpReadFileBytes = NULL;
     919                 break;
     920             case UTF8_NO_BOM:
     921                 cbWriteFileString = cbReadFileBytes;
     922                 lpWriteFileString = (CHAR *)malloc(cbWriteFileString);
     923                 ZeroMemory(lpWriteFileString, cbWriteFileString);
     924                 bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF16LE, lpWriteFileString, &cbWriteFileString, FALSE);
     925                 free(lpReadFileBytes);
     926                 lpReadFileBytes = NULL;
     927                 break;
     928             case UTF16BE:
     929                 cbWriteFileWString = cbReadFileBytes;
     930                 lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString);
     931                 ZeroMemory(lpWriteFileWString, cbWriteFileWString);
     932                 bResult = BYTETOUTF16BE(lpReadFileBytes, cbReadFileBytes, UTF16LE, lpWriteFileWString, &cbWriteFileWString);
     933                 free(lpReadFileBytes);
     934                 lpReadFileBytes = NULL;
     935                 break;
     936             default:;
     937             }
     938         }
     939             break;
     940         case UTF16BE:
     941         {
     942             switch (tpFileB)
     943             {
     944             case UTF8:
     945                 cbWriteFileString = cbReadFileBytes;
     946                 lpWriteFileString = (CHAR *)malloc(cbWriteFileString);
     947                 ZeroMemory(lpWriteFileString, cbWriteFileString);
     948                 bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF16BE, lpWriteFileString, &cbWriteFileString, TRUE);
     949                 free(lpReadFileBytes);
     950                 lpReadFileBytes = NULL;
     951                 break;
     952             case UTF8_NO_BOM:
     953                 cbWriteFileString = cbReadFileBytes;
     954                 lpWriteFileString = (CHAR *)malloc(cbWriteFileString);
     955                 ZeroMemory(lpWriteFileString, cbWriteFileString);
     956                 bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF16BE, lpWriteFileString, &cbWriteFileString, FALSE);
     957                 free(lpReadFileBytes);
     958                 lpReadFileBytes = NULL;
     959                 break;
     960             case UTF16LE:
     961                 cbWriteFileWString = cbReadFileBytes * sizeof(WCHAR);
     962                 lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString);
     963                 ZeroMemory(lpWriteFileWString, cbWriteFileWString);
     964                 bResult = BYTETOUTF16LE(lpReadFileBytes, cbReadFileBytes, UTF16BE, lpWriteFileWString, &cbWriteFileWString);
     965                 free(lpReadFileBytes);
     966                 lpReadFileBytes = NULL;
     967                 break;
     968             default:;
     969             }
     970         }
     971             break;
     972         default:
     973             printf("file encode unable to identify.
    ");
     974             free(lpReadFileBytes);
     975             return FALSE;
     976     }
     977 
     978 
     979     if (NULL != lpWriteFileString)
     980     {
     981         if (FALSE == WriteFile(hFileB, lpWriteFileString, cbWriteFileString, NULL, NULL))
     982         {
     983             free(lpWriteFileString);
     984             return FALSE;
     985         }
     986         free(lpWriteFileString);
     987         lpWriteFileString = NULL;
     988     }
     989     if (NULL != lpWriteFileWString)
     990     {
     991         if (FALSE == WriteFile(hFileB, lpWriteFileWString, cbWriteFileWString, NULL, NULL))
     992         {
     993             free(lpWriteFileWString);
     994             return FALSE;
     995         }
     996         free(lpWriteFileWString);
     997         lpWriteFileWString = NULL;
     998     }
     999     ::CloseHandle(hFileB);
    1000     hFileB = NULL;
    1001 
    1002     return bResult;
    1003 }
    1004 
    1005 
    1006 int _tmain(int argc, _TCHAR* argv[])
    1007 {
    1008     LPTSTR lpFileA_utf8 = TEXT("Input-utf8.txt");
    1009     LPTSTR lpFileA_utf8_no_bom = TEXT("Input-utf8-no-bom.txt");
    1010     LPTSTR lpFileA_utf16le = TEXT("Input-utf16le.txt");
    1011     LPTSTR lpFileA_utf16be = TEXT("Input-utf16be.txt");
    1012 
    1013     LPTSTR lpFileB_utf8 = TEXT("Output-utf8.txt");
    1014     LPTSTR lpFileB_utf8_no_bom = TEXT("Output-utf8-no-bom.txt");
    1015     LPTSTR lpFileB_utf16le = TEXT("Output-utf16le.txt");
    1016     LPTSTR lpFileB_utf16be = TEXT("Output-utf16be.txt");
    1017 
    1018 
    1019 
    1020 
    1021     //FileAToFileB(lpFileA_utf8, lpFileB_utf8_no_bom, UTF8_NO_BOM);
    1022     //FileAToFileB(lpFileA_utf8, lpFileB_utf16le, UTF16LE);
    1023     //FileAToFileB(lpFileA_utf8, lpFileB_utf16be, UTF16BE);
    1024 
    1025     FileAToFileB(lpFileA_utf8_no_bom, lpFileB_utf8, UTF8);
    1026     //FileAToFileB(lpFileA_utf8_no_bom, lpFileB_utf16le, UTF16LE);
    1027     //FileAToFileB(lpFileA_utf8_no_bom, lpFileB_utf16be, UTF16BE);
    1028 
    1029     //FileAToFileB(lpFileA_utf16le, lpFileB_utf8, UTF8);
    1030     //FileAToFileB(lpFileA_utf16le, lpFileB_utf8_no_bom, UTF8_NO_BOM);
    1031     //FileAToFileB(lpFileA_utf16le, lpFileB_utf16be, UTF16BE);
    1032 
    1033     //FileAToFileB(lpFileA_utf16be, lpFileB_utf8, UTF8);
    1034     //FileAToFileB(lpFileA_utf16be, lpFileB_utf8_no_bom, UTF8_NO_BOM);
    1035     //FileAToFileB(lpFileA_utf16be, lpFileB_utf16le, UTF16LE);
    1036 
    1037     return 0;
    1038 }

    *注:源码下载地址,请点击这里

  • 相关阅读:
    2020-2021-1 20201329 《信息安全专业导论》第十一周学习总结
    python gui
    2020-2021-1 20201329 《信息安全专业导论》第十周学习总结
    2020-2021-1 20201329 《信息安全专业导论》第九周学习总结
    四则运算
    熟悉编程语言
    链表
    网站设计
    使用nmap扫描队友
    熟悉编程语言
  • 原文地址:https://www.cnblogs.com/superstargg/p/4248757.html
Copyright © 2020-2023  润新知