名字只能限定为中文,英语字母,数字.编码为utf8.
static bool _illegal_char(char c)
{
if( (c >= 48 && c <=57) //数字
|| (c >= 65 && c <= 90) //大写字母
|| (c >= 97 && c <= 122)) //小写字母
return false;
return true;
}
static bool has_illegal_char(unsigned char* str, int len)
{
int i = -1, k = 1;
while(i < len)
{
i += k;
if(str[i] == '\0')
break;
if(str[i] < 0x80) // ascii 0-127位字符
{
if (_illegal_char(str[i]))
return true;
k = 1; // 0-127位占一字节
}
else if(str[i] < 0xE0)
{
if(str[i] < 0xC4) //ascii 128-255位字符
if (_illegal_char(str[i]))
return true;
k = 2;
}
else if(str[i] < 0xF0)
k = 3;
else
k = 4;
}
return false;
}
以下排除只限定中文为0x4e00-0x9fa5.强暴的做法.改就改吧.
static bool has_illegal_char(unsigned char* str, int len) { int i = -1, k = 1; while(i < len) { i += k; if(str[i] == '\0') break; if(str[i] < 0x80) // ascii 0-127位字符 { if (_illegal_char(str[i])) return true; k = 1; // 0-127位占一字节 } else if(str[i] < 0xE0) { return true; k = 2; } else if(str[i] < 0xF0) { if(str[i] < 0xE4)//查表去 return true; k = 3; } else { //unsigned short a = (str[i] | 0x7); //unsigned short b = (str[i+1] | 0x3F); //unsigned short c = (str[i+2] | 0x3F); //unsigned short d = (str[i+3] | 0x3F); if(str[i] > 0xF0 || str[i+1] > 0x89)//要改就改吧 return true; if(str[i+1] == 0x89) { if(str[i+2] > 0xBE) return true; if(str[i+2] == 0xBE) { if(str[i+3] > 0xA5) return true; } } k = 4; } } return false; }
这篇文章:
http://www.ruanyifeng.com/blog/2007/10/ascii_unicode_and_utf-8.html
讲解utf8编码比较清楚.