终于搞完了,内容稍微有点多分为两篇把。
《指定存储文件的编码格式(上)》
《指定存储文件的编码格式(下)》
本篇为上篇。
主流的文件编码包括:UTF8UTF8-WITHOUT-BOMUTF16LEUTF16BEANSI等。
中文的windows操作系统默认使用就是ANSI编码。
各种编码的主要规则大家可以去wiki网站、unicode.org网站等查看。
本文的上篇和下篇都采用windows函数WideCharToMultiByte和MultiByteToWideChar为基础进行编写的。
本文的上篇和下篇主要完成从指定文件A中读取数据,输出到指定编码的文件B中。
之所以分成上下两篇主要是下篇是对上篇的改进和优化。
本篇源码主要片段:
1 // test__OutputUtf8File.cpp : 定义控制台应用程序的入口点。 2 // 3 4 #include "stdafx.h" 5 #include <windows.h> 6 #include <string> 7 #include <iostream> 8 9 #define UTF8_SIGN 3 10 #define UTF16_SIGN 2 11 12 13 14 //************************************ 15 // Method: Utf16leFileToUtf8File 16 // FullName: Utf16leFileToUtf8File 17 // Access: public 18 // Returns: BOOL 19 // Qualifier:将lpUtf16leFile文件内容写入到lpUtf8File文件中 20 // Parameter: CONST LPTSTR lpUtf16leFile:输入文件utf16le编码 21 // Parameter: CONST LPTSTR lpUtf8File:输出文件为utf8-with-BOM编码 22 // *注:lpUtf16leFile文件只读;lpUtf8File文件总是创建或覆盖 23 //************************************ 24 BOOL Utf16leFileToUtf8File(CONST LPTSTR lpUtf16leFile, CONST LPTSTR lpUtf8File) 25 { 26 HANDLE hUtf16leFile = NULL; 27 HANDLE hUtf8File = NULL; 28 29 30 //create file 31 hUtf16leFile = ::CreateFile(lpUtf16leFile, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); 32 if (INVALID_HANDLE_VALUE == hUtf16leFile) 33 { 34 int errLogNumber = GetLastError(); 35 printf_s("error number:%d ", errLogNumber); 36 return FALSE; 37 } 38 39 40 //read UTF16LE encode file content 41 LPWSTR lpReadContentByUTF16 = NULL; 42 DWORD cbReadContentByUTF16 = 0; 43 DWORD cbPreReadContentByUTF16 = 0; 44 DWORD cchReadContentByUTF16 = 0; 45 46 cbReadContentByUTF16 = SetFilePointer(hUtf16leFile, 0, NULL, FILE_END); 47 if (INVALID_SET_FILE_POINTER == cbReadContentByUTF16) 48 { 49 int errLogNumber = GetLastError(); 50 printf_s("error number:%d ", errLogNumber); 51 ::CloseHandle(hUtf16leFile); 52 return FALSE; 53 } 54 lpReadContentByUTF16 = (WCHAR *)malloc(cbReadContentByUTF16); 55 if (NULL == lpReadContentByUTF16) 56 { 57 printf_s("malloc error "); 58 ::CloseHandle(hUtf16leFile); 59 return FALSE; 60 } 61 ZeroMemory(lpReadContentByUTF16, cbReadContentByUTF16); 62 SetFilePointer(hUtf16leFile, 0, NULL, FILE_BEGIN); 63 if (FALSE == ReadFile(hUtf16leFile, lpReadContentByUTF16, cbReadContentByUTF16, &cbPreReadContentByUTF16, NULL)) 64 { 65 int errLogNumber = GetLastError(); 66 printf_s("error number:%d ", errLogNumber); 67 free(lpReadContentByUTF16); 68 ::CloseHandle(hUtf16leFile); 69 return FALSE; 70 } 71 cchReadContentByUTF16 = ((cbReadContentByUTF16 % sizeof(WCHAR)) != 0 ? (cbReadContentByUTF16 / sizeof(WCHAR) + 1) : (cbReadContentByUTF16 / sizeof(WCHAR))); 72 73 74 //transform encode 75 LPSTR lpWriteContentByUTF8 = NULL; 76 DWORD cchWriteContentByUTF8 = 0; 77 DWORD cbWriteContentByUTF8 = 0; 78 DWORD cbPreWriteContentByUTF8 = 0; 79 80 cbWriteContentByUTF8 = ((cbReadContentByUTF16 % sizeof(WCHAR)) != 0 ? (cbReadContentByUTF16 + sizeof(WCHAR)) : (cbReadContentByUTF16)); 81 cchWriteContentByUTF8 = cchReadContentByUTF16; 82 lpWriteContentByUTF8 = (CHAR *)malloc(cbWriteContentByUTF8); 83 if (NULL == lpWriteContentByUTF8) 84 { 85 printf_s("malloc error "); 86 free(lpReadContentByUTF16); 87 ::CloseHandle(hUtf16leFile); 88 return FALSE; 89 } 90 ZeroMemory(lpWriteContentByUTF8, cbWriteContentByUTF8); 91 if (0 == WideCharToMultiByte(CP_UTF8, 0, lpReadContentByUTF16, cchReadContentByUTF16, lpWriteContentByUTF8, cbWriteContentByUTF8, NULL, NULL)) 92 { 93 printf_s("transform error "); 94 free(lpReadContentByUTF16); 95 free(lpWriteContentByUTF8); 96 ::CloseHandle(hUtf16leFile); 97 return FALSE; 98 } 99 100 101 //write UTF8 encode file content 102 hUtf8File = ::CreateFile(lpUtf8File, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); 103 if (INVALID_HANDLE_VALUE == hUtf8File) 104 { 105 printf("Terminal failure: Unable to write to file. "); 106 free(lpReadContentByUTF16); 107 free(lpWriteContentByUTF8); 108 ::CloseHandle(hUtf16leFile); 109 return FALSE; 110 } 111 for (int i = 0; i != cbWriteContentByUTF8; ++i) 112 { 113 if (TEXT('