• utf8编码检测非法字符


    名字只能限定为中文,英语字母,数字.编码为utf8.

    static bool _illegal_char(char c)
    {
    if( (c >= 48 && c <=57) //数字
    || (c >= 65 && c <= 90) //大写字母
    || (c >= 97 && c <= 122)) //小写字母
    return false;
    return true;
    }

    static bool has_illegal_char(unsigned char* str, int len)
    {
    int i = -1, k = 1;
    while(i < len)
    {
    i += k;
    if(str[i] == '\0')
    break;

    if(str[i] < 0x80) // ascii 0-127位字符
    {
    if (_illegal_char(str[i]))
    return true;
    k = 1; // 0-127位占一字节
    }
    else if(str[i] < 0xE0)
    {
    if(str[i] < 0xC4) //ascii 128-255位字符
    if (_illegal_char(str[i]))
    return true;
    k = 2;
    }
    else if(str[i] < 0xF0)
    k = 3;
    else
    k = 4;
    }
    return false;
    }

    以下排除只限定中文为0x4e00-0x9fa5.强暴的做法.改就改吧.

    static bool has_illegal_char(unsigned char* str, int len)
    {
    	int i = -1, k = 1;
    	while(i < len)
    	{
    		i += k;
    		if(str[i] == '\0')
    			break;
    
    		if(str[i] < 0x80) // ascii 0-127位字符
    		{
    			if (_illegal_char(str[i]))
    				return true;
    			k = 1; // 0-127位占一字节
    		}
    		else if(str[i] < 0xE0)
    		{
    			return true;
    			k = 2;
    		}
    		else if(str[i] < 0xF0)
    		{
    			if(str[i] < 0xE4)//查表去
    				return true;
    			k = 3;
    		}
    		else
    		{
    			//unsigned short a = (str[i] | 0x7);
    			//unsigned short b = (str[i+1] | 0x3F);
    			//unsigned short c = (str[i+2] | 0x3F);
    			//unsigned short d = (str[i+3] | 0x3F);
    			if(str[i] > 0xF0 || str[i+1] > 0x89)//要改就改吧
    				return true;
    			if(str[i+1] == 0x89) {
    				if(str[i+2] > 0xBE)
    					return true;
    				if(str[i+2] == 0xBE) {
    					if(str[i+3] > 0xA5)
    						return true;
    				}
    			}
    			k = 4;
    		}
    	}
    	return false;
    }
    

      


    这篇文章:

    http://www.ruanyifeng.com/blog/2007/10/ascii_unicode_and_utf-8.html

    讲解utf8编码比较清楚.

      

  • 相关阅读:
    矩阵相关计算-基础
    列表-元祖-字典
    Python制作NTF传递函数工况文件和后处理文件
    C++栈和堆的生长方向
    malloc/free与new/delete的区别
    C++中4个类型转换相关的关键字/特点/应用场合
    Python 拾遗
    Python 常用库
    Python 视频转字符动画
    Python 命令行(CLI)基础库
  • 原文地址:https://www.cnblogs.com/flytrace/p/2313992.html
Copyright © 2020-2023  润新知