• c++ 读取 utf-8 文件到 string


    #include <iostream>
    #include <assert.h>
    #include <fstream>
    #include <string>
    #include <string.h>
    using namespace std;
    
    #ifdef _WIN32
    #include <Windows.h>
    #endif
    
    typedef enum FileType
    {
        FileType_ANSI = 0,
        FileType_UNICODE,
        FileType_UTF8
    }FILETYPE;
    
    #ifdef _WIN32
    string UTF8ToGB(const char* str);
    #endif
    
    FILETYPE GetTextFileType(const std::string & strFileName);
    string ReadTextFile(const std::string & strFileName);
    
    int main()
    {
        string json = ReadTextFile("/tmp/a.json");
    
        getchar();
    
        return 0;
    }
    
    FILETYPE GetTextFileType(const std::string & strFileName)
    {
        FILETYPE fileType = FileType_ANSI;
        std::ifstream file;
        file.open(strFileName.c_str(), std::ios_base::in);
        bool bUnicodeFile = false;
    
        if (file.good())
        {
            char szFlag[3] = { 0 };
            file.read(szFlag, sizeof(char) * 3);
            if ((unsigned char)szFlag[0] == 0xFF
                && (unsigned char)szFlag[1] == 0xFE)
            {
                fileType = FileType_UNICODE;
            }
            else if ((unsigned char)szFlag[0] == 0xEF
                && (unsigned char)szFlag[1] == 0xBB
                && (unsigned char)szFlag[2] == 0xBF)
            {
                fileType = FileType_UTF8;
            }
        }
    
        file.close();
        return fileType;
    }
    
    string ReadTextFile(const std::string & strFileName)
    {
        FILETYPE fileType = GetTextFileType(strFileName);
        if (fileType != FileType_UTF8)
        {
            cout << "UTF-8 file needed!" << endl;
            return "";
        }
    
        FILE * fp = NULL;
        fp = fopen(strFileName.c_str(), "rb");
        fseek(fp, 0, SEEK_END);
        size_t size = ftell(fp);
        fseek(fp, 0, SEEK_SET);
    
        std::string result;
    
        if (fp != NULL)
        {
            // UTF-8 file should offset 3 byte from start position.
            fseek(fp, sizeof(char) * 3, 0);
            int buferSize = (int)size - 3;
            char* szBuf = new char[buferSize + 1];
            memset(szBuf, 0, sizeof(char) * (buferSize + 1));
            fread(szBuf, sizeof(char), buferSize, fp);
            result.append(szBuf);
            delete szBuf;
        }
    
        fclose(fp);
    
    #ifdef _WIN32
        result = UTF8ToGB(result.c_str());
    #endif
    
        return result;
    }
    
    #ifdef _WIN32
    string UTF8ToGB(const char* str)
    {
        string result;
        WCHAR *strSrc;
        LPSTR szRes;
    
        int i = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0);
        strSrc = new WCHAR[i + 1];
        MultiByteToWideChar(CP_UTF8, 0, str, -1, strSrc, i);
    
        i = WideCharToMultiByte(CP_ACP, 0, strSrc, -1, NULL, 0, NULL, NULL);
        szRes = new CHAR[i + 1];
        WideCharToMultiByte(CP_ACP, 0, strSrc, -1, szRes, i, NULL, NULL);
    
        result = szRes;
        delete[]strSrc;
        delete[]szRes;
    
        return result;
    }
    #endif
  • 相关阅读:
    Spark官方调优文档翻译(转载)
    Spark性能优化指南——高级篇(转载)
    Spark性能优化指南——基础篇(转载)
    Apache Spark 内存管理详解(转载)
    Apache Spark 2.2.0新特性介绍(转载)
    SparkSQL – 从0到1认识Catalyst(转载)
    深入研究Spark SQL的Catalyst优化器(原创翻译)
    GC调优在Spark应用中的实践(转载)
    Project Tungsten:让Spark将硬件性能压榨到极限(转载)
    Spark SQL在100TB上的自适应执行实践(转载)
  • 原文地址:https://www.cnblogs.com/nanfei/p/12059085.html
Copyright © 2020-2023  润新知