• Linux 平台和 Windows平台下 Unicode与UTF-8互转


    Windows:

    unsigned char * make_utf8_string(const wchar_t *unicode)
    {
        int size = 0, index = 0, out_index = 0;
        unsigned char *out;
        unsigned short c;
    
        /* first calculate the size of the target string */
        c = unicode[index++];
        while(c) 
        {
            if(c < 0x0080) 
            {
                size += 1;
            } 
            else if(c < 0x0800) 
            {
                size += 2;
            } 
            else 
            {
                size += 3;
            }
    
            c = unicode[index++];
        }
    
        out = (unsigned char*)malloc(size + 1);
        if (out == NULL)
            return NULL;
    
        index = 0;
    
        c = unicode[index++];
        while(c)
        {
            if(c < 0x080) 
            {
                out[out_index++] = (unsigned char)c;
            } 
            else if(c < 0x800) 
            {
                out[out_index++] = 0xc0 | (c >> 6);
                out[out_index++] = 0x80 | (c & 0x3f);
            } 
            else 
            {
                out[out_index++] = 0xe0 | (c >> 12);
                out[out_index++] = 0x80 | ((c >> 6) & 0x3f);
                out[out_index++] = 0x80 | (c & 0x3f);
            }
            c = unicode[index++];
        }
    
        out[out_index] = 0x00;
    
        return out;
    }
    
    wchar_t * make_unicode_string(const unsigned char *utf8)
    {
        int size = 0, index = 0, out_index = 0;
        wchar_t *out;
        unsigned char c;
    
        /* first calculate the size of the target string */
        c = utf8[index++];
        while(c) 
        {
            if((c & 0x80) == 0) 
            {
                index += 0;
            } 
            else if((c & 0xe0) == 0xe0) 
            {
                index += 2;
            } 
            else 
            {
                index += 1;
            }
    
            size += 1;
            c = utf8[index++];
        }
    
        out = (wchar_t*)malloc((size + 1) * sizeof(wchar_t));
        if (out == NULL)
            return NULL;
    
        index = 0;
    
        c = utf8[index++];
        while(c)
        {
            if((c & 0x80) == 0) 
            {
                out[out_index++] = c;
            } 
            else if((c & 0xe0) == 0xe0) 
            {
                out[out_index] = (c & 0x1F) << 12;
                c = utf8[index++];
                out[out_index] |= (c & 0x3F) << 6;
                c = utf8[index++];
                out[out_index++] |= (c & 0x3F);
            } 
            else 
            {
                out[out_index] = (c & 0x3F) << 6;
                c = utf8[index++];
                out[out_index++] |= (c & 0x3F);
            }
    
            c = utf8[index++];
        }
    
        out[out_index] = 0;
    
        return out;
    }
    
    int StrUtil::utf8_encode(const char *from, char **to)
    {
        wchar_t *unicode;
        int wchars, err;
    
        wchars = ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from,
            strlen(from), NULL, 0);
    
        if (wchars == 0)
        {
            fprintf(stderr, "Unicode translation error %d
    ", GetLastError());
            return -1;
        }
    
        unicode = (wchar_t*)calloc(wchars + 1, sizeof(unsigned short));
        if(unicode == NULL)
        {
            fprintf(stderr, "Out of memory processing string to UTF8
    ");
            return -1;
        }
    
        err = ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from,
            strlen(from), unicode, wchars);
        if(err != wchars)
        {
            free(unicode);
            fprintf(stderr, "Unicode encode error %d
    ", GetLastError());
            return -1;
        }
    
        /* On NT-based windows systems, we could use WideCharToMultiByte(), but
        * MS doesn't actually have a consistent API across win32.
        */
        *to = (char *)make_utf8_string(unicode);
    
        free(unicode);
        return 0;
    }
    
    int StrUtil::utf8_decode(const char *from, char **to)
    {
        wchar_t *unicode;
        int chars, err;
    
        /* On NT-based windows systems, we could use MultiByteToWideChar(CP_UTF8), but
        * MS doesn't actually have a consistent API across win32.
        */
        unicode = make_unicode_string((unsigned char*)from);
        if(unicode == NULL)
        {
            fprintf(stderr, "Out of memory processing string from UTF8 to UNICODE16
    ");
            return -1;
        }
    
        chars = ::WideCharToMultiByte(GetConsoleCP(), WC_COMPOSITECHECK, unicode,
            -1, NULL, 0, NULL, NULL);
    
        if(chars == 0)
        {
            fprintf(stderr, "Unicode translation error %d
    ", GetLastError());
            free(unicode);
            return -1;
        }
    
        *to = (char *)calloc(chars + 1, sizeof(unsigned char));
        if(*to == NULL)
        {
            fprintf(stderr, "Out of memory processing string to local charset
    ");
            free(unicode);
            return -1;
        }
    
        err = ::WideCharToMultiByte(GetConsoleCP(), WC_COMPOSITECHECK, unicode,
            -1, *to, chars, NULL, NULL);
        if(err != chars)
        {
            fprintf(stderr, "Unicode decode error %d
    ", GetLastError());
            free(unicode);
            free(*to);
            *to = NULL;
            return -1;
        }
    
        free(unicode);
        return 0;
    }

    Linux 平台:

    unsigned char * make_utf8_string(const wchar_t *unicode)
    {
        int size = 0, index = 0, out_index = 0;
        unsigned char *out;
        unsigned short c;
    
        /* first calculate the size of the target string */
        c = unicode[index++];
        while(c)
        {
            if(c < 0x0080)
            {
                size += 1;
            }
            else if(c < 0x0800)
            {
                size += 2;
            }
            else
            {
                size += 3;
            }
    
            c = unicode[index++];
        }
    
        out = (unsigned char*)malloc(size + 1);
        if (out == NULL)
            return NULL;
    
        index = 0;
    
        c = unicode[index++];
        while(c)
        {
            if(c < 0x080)
            {
                out[out_index++] = (unsigned char)c;
            }
            else if(c < 0x800)
            {
                out[out_index++] = 0xc0 | (c >> 6);
                out[out_index++] = 0x80 | (c & 0x3f);
            }
            else
            {
                out[out_index++] = 0xe0 | (c >> 12);
                out[out_index++] = 0x80 | ((c >> 6) & 0x3f);
                out[out_index++] = 0x80 | (c & 0x3f);
            }
            c = unicode[index++];
        }
    
        out[out_index] = 0x00;
    
        return out;
    }
    
    wchar_t * make_unicode_string(const unsigned char *utf8)
    {
        int size = 0, index = 0, out_index = 0;
        wchar_t *out;
        unsigned char c;
    
        /* first calculate the size of the target string */
        c = utf8[index++];
        while(c)
        {
            if((c & 0x80) == 0)
            {
                index += 0;
            }
            else if((c & 0xe0) == 0xe0)
            {
                index += 2;
            }
            else
            {
                index += 1;
            }
    
            size += 1;
            c = utf8[index++];
        }
    
        out = (wchar_t*)malloc((size + 1) * sizeof(wchar_t));
        if (out == NULL)
            return NULL;
    
        index = 0;
    
        c = utf8[index++];
        while(c)
        {
            if((c & 0x80) == 0)
            {
                out[out_index++] = c;
            }
            else if((c & 0xe0) == 0xe0)
            {
                out[out_index] = (c & 0x1F) << 12;
                c = utf8[index++];
                out[out_index] |= (c & 0x3F) << 6;
                c = utf8[index++];
                out[out_index++] |= (c & 0x3F);
            }
            else
            {
                out[out_index] = (c & 0x3F) << 6;
                c = utf8[index++];
                out[out_index++] |= (c & 0x3F);
            }
    
            c = utf8[index++];
        }
    
        out[out_index] = 0;
    
        return out;
    }
    int utf8_encode(const char *from, char **to)
    {
        wchar_t *unicode = NULL;
        int wchars, err;
    
        setlocale(LC_ALL,"");
        wchars = mbstowcs(unicode, from, 0)+1;
    
        unicode = new wchar_t[wchars];
    
        err = mbstowcs(unicode, from, wchars);
        if(err < 0)
        {
            delete unicode;
            fprintf(stderr, "Unicode encode error 
    ");
            return -1;
        }
    
        setlocale(LC_ALL,"C");
    
        *to = (char *)make_utf8_string(unicode);
    
        delete unicode;
    
        return 0;
    }
    
    int utf8_decode(const char *from, char **to)
    {
        wchar_t *unicode = NULL;
        int chars, err;    
    
       // setlocale(LC_ALL,"zh_CN.GB18030");
    
        unicode = make_unicode_string((unsigned char*)from);
    
        setlocale(LC_ALL,"");
        chars = wcstombs(*to,unicode, 0)*2 + 1;
    
        *to = new char[chars];
        memset(*to, 0, chars);
    
        //setlocale(LC_ALL,"");
        err = wcstombs(*to, unicode, chars);
        setlocale(LC_ALL,"C");
    
    delete unicode;
    if(err < 0) { fprintf(stderr, "Unicode decode error "); delete *to; *to = NULL; return -1; } return 0; }
  • 相关阅读:
    获取ocx运行路径的另一种方法
    使用D3D渲染YUV视频数据
    C++(MFC)中WebBrowser去除3D边框的方法(实现IDocHostUIHandler接口)
    ActiveX控件的安全初始化和脚本操作 和 数字签名SIGN
    解决Eclipse中的卡死现象
    Http请求头和响应头
    HTTP请求头与响应头
    centos7 Mariadb5.5升级到Mariadb10.2
    window下利用navicat访问Linux下的mariadb数据库
    在Linux上安装及配置MariaDB
  • 原文地址:https://www.cnblogs.com/zhehan54/p/8995362.html
Copyright © 2020-2023  润新知