• VC++ 读取UTF-8和ANSI编码文件


     判断是否是UTF-8文件:

    bool IsUTF8Text(const void* pBuffer, long size)
    {
        bool IsUTF8 = true;
        unsigned char* start = (unsigned char*)pBuffer;
        unsigned char* end = (unsigned char*)pBuffer + size;
        while (start < end)
        {
            if (*start < 0x80) // (10000000): 值小于0x80的为ASCII字符    
            {
                start++;
            }
            else if (*start < (0xC0)) // (11000000): 值介于0x80与0xC0之间的为无效UTF-8字符    
            {
                IsUTF8 = false;
                break;
            }
            else if (*start < (0xE0)) // (11100000): 此范围内为2字节UTF-8字符    
            {
                if (start >= end - 1)
                {
                    break;
                }
    
                if ((start[1] & (0xC0)) != 0x80)
                {
                    IsUTF8 = false;
                    break;
                }
    
                start += 2;
            }
            else if (*start < (0xF0)) // (11110000): 此范围内为3字节UTF-8字符    
            {
                if (start >= end - 2)
                {
                    break;
                }
    
                if ((start[1] & (0xC0)) != 0x80 || (start[2] & (0xC0)) != 0x80)
                {
                    IsUTF8 = false;
                    break;
                }
    
                start += 3;
            }
            else
            {
                IsUTF8 = false;
                break;
            }
        }
    
        return IsUTF8;
    }
    
    bool IsUTF8File(const char* pFileName)
    {
        FILE *f = NULL;
        fopen_s(&f, pFileName, "rb");
        if (NULL == f)
        {
            return false;
        }
    
        fseek(f, 0, SEEK_END);
        long lSize = ftell(f);
        fseek(f, 0, SEEK_SET);  //或rewind(f);  
    
        char *pBuff = new char[lSize + 1];
        memset(pBuff, 0, lSize + 1);
        fread(pBuff, lSize, 1, f);
        fclose(f);
    
        bool bIsUTF8 = IsUTF8Text(pBuff, lSize);
        delete[]pBuff;
        pBuff = NULL;
    
        return bIsUTF8;
    }

    读取文件:

    CString GetFile(CString filename, UINT CodePage)
    {
        CFile fileR;
        CString strFile = L"";
        if (!fileR.Open(filename, CFile::modeRead | CFile::typeBinary))
        {
            return strFile;
        }
        BYTE head[3];
        fileR.Read(head, 3);
        if (!(head[0] == 0xEF && head[1] == 0xBB && head[2] == 0xBF))
        {
            fileR.SeekToBegin();
        }
        ULONGLONG FileSize = fileR.GetLength();
        char* pContent = (char*)calloc(FileSize + 1, sizeof(char));
        fileR.Read(pContent, FileSize);
        fileR.Close();
        int n = MultiByteToWideChar(CodePage, 0, pContent, FileSize + 1, NULL, 0);
        wchar_t* pWideChar = (wchar_t*)calloc(n + 1, sizeof(wchar_t));
        MultiByteToWideChar(CodePage, 0, pContent, FileSize + 1, pWideChar, n);
        strFile = CString(pWideChar);
        free(pContent);
        free(pWideChar);
    
        return strFile;
    }
  • 相关阅读:
    算法练习:求字符串的最长重复子串(Java实现)
    Oracle数据库中遇到的坑
    解决Oracle死锁问题步骤
    转:Spring Cache抽象详解
    Spring MVC测试框架详解——服务端测试
    转:SpringMVC中日期格式的转换
    freemarker判断是否为空
    jQuery Pagination分页插件
    Java链式方法
    mysql强制索引和禁止某个索引
  • 原文地址:https://www.cnblogs.com/2018shawn/p/12167291.html
Copyright © 2020-2023  润新知