//z 2014-03-25 08:18:41 IS2120@BG57IV3 T3343244181.K.F1434403198[T1,L68,R2,V15]
void UnicodeToAnsi(WCHAR *in, char *out, int cchout)
{
int len ;
len = WideCharToMultiByte(CP_ACP,
0,
in,
wcslen(in)+1,
out,
cchout,
NULL,
NULL) ;
if (!len)
ErrorExit("out of memory") ;
}
//z 2014-04-14 22:04:51 IS2120@BG57IV3 T1381068076.K.F1547169058[T4,L105,R3,V66]
2. 一个例子,将文件自动转换为 utf-8
// ChangeFileEncoding.cpp : 定义控制台应用程序的入口点。 // #include "stdafx.h" #include "ChangeFileEncoding.h" #include <string> #ifdef _DEBUG #define new DEBUG_NEW #endif // 唯一的应用程序对象 CWinApp theApp; using namespace std; void recursiveFile(CString strFileType); void convertGBToUTF8(CString strWritePath, const char* gb2312); int _tmain(int argc, TCHAR* argv[], TCHAR* envp[]) { int nRetCode = 0; // 初始化 MFC 并在失败时显示错误 if (!AfxWinInit(::GetModuleHandle(NULL), NULL, ::GetCommandLine(), 0)) { // TODO: 更改错误代码以符合您的需要 _tprintf(_T("错误: MFC 初始化失败 ")); nRetCode = 1; } else { /*for(int i = 0; i < argc; i++) { MessageBox(NULL, argv[i], L"Arglist contents", MB_OK); }*/ //声明一个CFileFind类变量,以用来搜索 //接受一个参数作为源代码文件的根目录 TCHAR *lpszDirName = argv[1]; CString strFileType; strFileType.Format(_T("%s\*.*"), lpszDirName); //递归此目录下的.h文件和.cpp文件,如果发现不是utf8编码则转换为utf8编码 recursiveFile(strFileType); } return nRetCode; } void recursiveFile( CString strFileType) { CFileFind finder; BOOL isFinded = finder.FindFile(strFileType);//查找第一个文件 while(isFinded) { isFinded = finder.FindNextFile(); //递归搜索其他的文件 if(!finder.IsDots()) //如果不是"."目录 { CString strFoundFile = finder.GetFilePath(); if(finder.IsDirectory()) //如果是目录,则递归地调用 { CString strNextFileType; strNextFileType.Format(_T("%s\*.*"), strFoundFile); recursiveFile(strNextFileType); } else { //如果是头文件或cpp文件 if(strFoundFile.Right(4) == _T(".cpp") || strFoundFile.Right(2) == _T(".h")) { CFile fileReader(strFoundFile, CFile::modeRead); byte head[3]; fileReader.Read(head, 3); //判断是否带有BOM文件头 if(head[0] == 0xef && head[1]==0xbb && head[2] == 0xbf ) { fileReader.Close(); continue; } fileReader.SeekToBegin(); int bufLength = 256; char *buf = new char[bufLength]; ZeroMemory(buf, bufLength); int nReadLength; std::string strContent; while((nReadLength = fileReader.Read(buf, bufLength))) { strContent.append(buf, nReadLength); ZeroMemory(buf, nReadLength); } delete buf; fileReader.Close(); convertGBToUTF8(strFoundFile, strContent.c_str()); } } } } finder.Close(); } void convertGBToUTF8(CString strWritePath, const char* gb2312) { CFile fp; fp.Open(strWritePath, CFile::modeCreate|CFile::modeWrite|CFile::typeBinary,NULL); int len = MultiByteToWideChar(CP_ACP, 0, gb2312, -1, NULL, 0); wchar_t* wstr = new wchar_t[len+1]; memset(wstr, 0, len+1); MultiByteToWideChar(CP_ACP, 0, gb2312, -1, wstr, len); len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL); char* str = new char[len+1]; memset(str, 0, len+1); len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, len, NULL, NULL); if(wstr) delete[] wstr; str[len] = ' '; const unsigned char aryBOM[] = {0xEF, 0xBB, 0xBF}; fp.Write(aryBOM, sizeof(aryBOM)); fp.Write(str,len); delete[] str; fp.Close(); }//z 2014-04-14 22:04:51 IS2120@BG57IV3 T1381068076.K.F1547169058[T4,L105,R3,V66]
http://blog.csdn.net/visualcatsharp/article/details/7345854
//z 2014-05-06 12:00:46 L.239'43154 BG57IV3@XCL T1109932947.K.F253293061 [T409,L5358,R263,V7006]
3. v2
// ConvertZ.cpp : 定义控制台应用程序的入口点。 // #include "stdafx.h" #include "ConvertZ.h" #include <string> using namespace std; #ifdef _DEBUG #define new DEBUG_NEW #endif // 唯一的应用程序对象 CWinApp theApp; void recursiveFile(CString strFileType); void convertGBToUTF8(CString strWritePath, const char* gb2312); int _tmain(int argc, TCHAR* argv[], TCHAR* envp[]) { int nRetCode = 0; // 初始化 MFC 并在失败时显示错误 if (!AfxWinInit(::GetModuleHandle(NULL), NULL, ::GetCommandLine(), 0)) { // TODO: 更改错误代码以符合您的需要 _tprintf(_T("错误: MFC 初始化失败 ")); nRetCode = 1; } else { /*for(int i = 0; i < argc; i++) { MessageBox(NULL, argv[i], L"Arglist contents", MB_OK); }*/ //声明一个CFileFind类变量,以用来搜索 if(argc != 2) { CString strUsage; strUsage.Format(_T("usage : %s dir dir [sample] : c:\src "),argv[0]); _tprintf(strUsage.GetBuffer()); strUsage.ReleaseBuffer(); return nRetCode; } //接受一个参数作为源代码文件的根目录 TCHAR *lpszDirName = argv[1]; CString strFileType; strFileType.Format(_T("%s\*.*"), lpszDirName); //递归此目录下的.h文件和.cpp文件,如果发现不是utf8编码则转换为utf8编码 recursiveFile(strFileType); } return nRetCode; } bool isSrcType(const CString strFileType) { CString strExt_R4 = strFileType.Right(4); CString strExt_R2 = strFileType.Right(2); if ((strExt_R4.CompareNoCase(_T(".cpp")) == 0) || (strExt_R2.CompareNoCase(_T(".c")) == 0) || (strExt_R2.CompareNoCase(_T(".h")) == 0) || (strExt_R4.CompareNoCase(_T(".cxx")) == 0) || (strExt_R4.CompareNoCase(_T(".hpp")) == 0) ) { return true; } return false; } void recursiveFile( CString strFileType) { CFileFind finder; BOOL isFinded = finder.FindFile(strFileType);//查找第一个文件 while(isFinded) { isFinded = finder.FindNextFile(); //递归搜索其他的文件 if(!finder.IsDots()) //如果不是"."目录 { CString strFoundFile = finder.GetFilePath(); if(finder.IsDirectory()) //如果是目录,则递归地调用 { CString strNextFileType; strNextFileType.Format(_T("%s\*.*"), strFoundFile); recursiveFile(strNextFileType); } else { //如果是头文件或cpp文件 if(isSrcType(strFoundFile)) { CFile fileReader(strFoundFile, CFile::modeRead|CFile::typeBinary); byte head[3]; fileReader.Read(head, 3); //判断是否带有BOM文件头 if(head[0] == 0xef && head[1]==0xbb && head[2] == 0xbf ) { fileReader.Close(); continue; } fileReader.SeekToBegin(); int bufLength = 256; char *buf = new char[bufLength]; ZeroMemory(buf, bufLength); int nReadLength; std::string strContent; while((nReadLength = fileReader.Read(buf, bufLength))) { strContent.append(buf, nReadLength); ZeroMemory(buf, nReadLength); } delete buf; fileReader.Close(); convertGBToUTF8(strFoundFile, strContent.c_str()); } } } } finder.Close(); } void convertGBToUTF8(CString strWritePath, const char* gb2312) { CFile fp; fp.Open(strWritePath, CFile::modeCreate|CFile::modeWrite|CFile::typeBinary,NULL); const int ngblen = static_cast<int>(strlen(gb2312)); int len = MultiByteToWideChar(CP_ACP, 0, gb2312, ngblen, NULL, 0); wchar_t* wstr = new wchar_t[len+1]; memset(wstr, 0, (len+1)*sizeof(wchar_t)); MultiByteToWideChar(CP_ACP, 0, gb2312, ngblen, wstr, len); wstr[len] = ' '; int newLen = 0; newLen = WideCharToMultiByte(CP_UTF8, 0, wstr, len, NULL, 0, NULL, NULL); char* str = new char[newLen+1]; memset(str, 0, (newLen+1)*sizeof(char)); newLen = WideCharToMultiByte(CP_UTF8, 0, wstr, len, str, newLen, NULL, NULL); if(wstr) { delete[] wstr; wstr = NULL; } str[newLen] = ' '; const unsigned char aryBOM[] = {0xEF, 0xBB, 0xBF}; fp.Write(aryBOM, sizeof(aryBOM)); fp.Write(str,newLen); delete[] str; fp.Close(); }
//z 2014-05-22 16:55:50 L.223'25450 BG57IV3 T427209771 .K.F253293061 [T484,L6693,R325,V8206]
Simple Character Encoding Detection
By 23 Oct 2013
,
|
|
Introduction
One very commonly asked question in programming is how to detect the character encoding of a string
. Well, I'm going
to share a cool method I came up with that can detect if a string
is UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, or UTF-32LE in
just 4 lines of code.
Explanation
We'll be working with null terminated string
s, so the first rule is that we must terminate all string
s
with a quadruple null, regardless of encoding. You may wish to add a definition such as the following:
#define NT "