• 字符编码之UCS2与Utf8


    很多操作系统都直接支持utf-8字符串操作,只有MS这个异类用的Unicode,就是所谓的ucs-2

    如果写关于跨平台的代码,那么避免不了要做编码转化

    这里贴一下今天写的把Unicode转化为Utf-8的代码

    Ucs2BeToUcs2Le负责将大端转化为小端
    Ucs2ToUtf8负责将Unicode转化为Utf-8
    Utf8ToUcs2负责将Utf-8转化为Unicode

    本转化函数只考虑了3个字节以下的编码,需要3个字节以上的同学请自行google了啊
      1 // Convert Unicode big endian to Unicode little endian
      2 unsigned Ucs2BeToUcs2Le(unsigned short *ucs2bige, unsigned int size)
      3 {
      4     printf("%s %d\n", __FUNCTION__, __LINE__);
      5 
      6     if (!ucs2bige) {
      7         return 0;
      8     }
      9     
     10     unsigned int length = size;
     11     unsigned short *tmp = ucs2bige;
     12     
     13     while (*tmp && length) {
     14         
     15         length--;
     16         unsigned char val_high = *tmp >> 8;
     17         unsigned char val_low = (unsigned char)*tmp;
     18         
     19         *tmp = val_low << 8 | val_high;
     20         
     21         tmp++;
     22     }
     23     
     24     return size - length;
     25 }
     26 
     27 // Convert Ucs-2 to Utf-8
     28 unsigned int Ucs2ToUtf8(unsigned short *ucs2, unsigned int ucs2_size, 
     29         unsigned char *utf8, unsigned int utf8_size)
     30 {
     31     unsigned int length = 0;
     32     
     33     if (!ucs2) {
     34         return 0;
     35     }
     36     
     37     unsigned short *inbuf = ucs2;
     38     unsigned char *outbuf = utf8;
     39     
     40     if (*inbuf == 0xFFFE) {
     41         Ucs2BeToUcs2Le(inbuf, ucs2_size);
     42     }
     43     
     44     if (!utf8) {
     45         unsigned int insize = ucs2_size;
     46         
     47         while (*inbuf && insize) {
     48             insize--;
     49             
     50 /*            if (*inbuf == 0xFEFF) {
     51                 inbuf++;
     52                 continue;
     53             }*/
     54             
     55             if (0x0080 > *inbuf) {
     56                 length++;
     57             } else if (0x0800 > *inbuf) {
     58                 length += 2;                
     59             } else {
     60                 length += 3;
     61             }
     62             
     63             inbuf++;
     64         }
     65         return length;
     66         
     67     } else {        
     68         unsigned int insize = ucs2_size;
     69         
     70         while (*inbuf && insize && length < utf8_size) {            
     71             insize--;
     72             
     73             if (*inbuf == 0xFFFE) {
     74                 inbuf++;
     75                 continue;
     76             }
     77             
     78             if (0x0080 > *inbuf) {
     79                 /* 1 byte UTF-8 Character.*/
     80                 *outbuf++ = (unsigned char)(*inbuf);
     81                 length++;
     82             } else if (0x0800 > *inbuf) {
     83                 /*2 bytes UTF-8 Character.*/
     84                 *outbuf++ = 0xc0 | ((unsigned char)(*inbuf >> 6));
     85                 *outbuf++ = 0x80 | ((unsigned char)(*inbuf & 0x3F));
     86                 length += 2;
     87 
     88             } else {
     89                 /* 3 bytes UTF-8 Character .*/
     90                 *outbuf++ = 0xE0 | ((unsigned char)(*inbuf >> 12));
     91                 *outbuf++ = 0x80 | ((unsigned char)((*inbuf >> 6) & 0x3F));
     92                 *outbuf++ = 0x80 | ((unsigned char)(*inbuf & 0x3F));
     93                 length += 3; 
     94             }
     95             
     96             inbuf++;
     97         }
     98         
     99         return length;
    100     }
    101 }
    102 
    103 // Convert Utf-8 to Ucs-2 
    104 unsigned int Utf8ToUcs2(unsigned char *utf8, unsigned int utf8_size, 
    105         unsigned short *ucs2, unsigned int ucs2_size)
    106 {
    107     int length = 0;
    108     unsigned int insize = utf8_size;
    109     unsigned char *inbuf = utf8;
    110 
    111     if(!utf8)
    112         return 0;
    113 
    114     if(!ucs2) {
    115         while(*inbuf && insize) {
    116             unsigned char c = *inbuf;
    117             if((c & 0x80) == 0) {
    118                 length += 1;
    119                 insize -= 1;
    120                 inbuf++;
    121             }
    122             else if((c & 0xE0) == 0xC0) {
    123                 length += 1;
    124                 insize -= 2;
    125                 inbuf += 2;
    126             } else if((c & 0xF0) == 0xE0) {
    127                 length += 1;
    128                 insize -= 3;
    129                 inbuf += 3;
    130             }
    131         }
    132         return length;
    133 
    134     } else {
    135         unsigned short *outbuf = ucs2;
    136         unsigned int outsize = ucs2_size;
    137 
    138         while(*inbuf && insize && length < outsize) {
    139             unsigned char c = *inbuf;
    140             if((c & 0x80) == 0) {
    141                 *outbuf++ = c;
    142                 inbuf++;
    143                 length++;
    144                 insize--;
    145             } else if((c & 0xE0) == 0xC0) {
    146                 unsigned short val;
    147 
    148                 val = (c & 0x3F) << 6;
    149                 inbuf++;
    150                 c = *inbuf;
    151                 val |= (c & 0x3F);
    152                 inbuf++;
    153 
    154                 length++;
    155                 insize -= 2;
    156 
    157                 *outbuf++ = val;
    158             } else if((c & 0xF0) == 0xE0) {
    159                 unsigned short val;
    160 
    161                 val = (c & 0x1F) << 12;
    162                 inbuf++;
    163                 c = *inbuf;
    164                 val |= (c & 0x3F) << 6;
    165                 inbuf++;
    166                 c = *inbuf;
    167                 val |= (c & 0x3F);
    168                 inbuf++;
    169 
    170                 insize -= 3;
    171                 length++;
    172 
    173                 *outbuf++ = val;
    174             }
    175         }
    176         return length;
    177     }
    178     return 0;
    179 }
  • 相关阅读:
    android matrix
    canvas变换
    Gradle常用命令
    android View实现变暗效果
    常用设计模式
    Android无埋点数据收集SDK关键技术
    如何打开小米,oppo,华为等手机的系统应用的指定页面
    Android 性能优化
    SpringBoot入坑-请求参数传递
    SpringBoot入坑-配置文件使用
  • 原文地址:https://www.cnblogs.com/jojodru/p/2574616.html
Copyright © 2020-2023  润新知