不解释,直接上代码:
由于Iteye代码贴四个字节的UTF-8字符出错,特能图的方式发布几个特殊字符:
-
public class Byte4Check {
-
-
public static void main(String args[]) throws UnsupportedEncodingException {
-
String nickName = "12葫";
-
-
byte[] t = nickName.substring(0, 1).getBytes("UTF-8");
-
for (byte tt : t) {
-
System.out.println(tt);
-
}
-
System.out.println("====================");
-
byte[] t1 = nickName.getBytes("UTF-8");
-
for (int i = 0; i < t1.length;) {
-
byte tt = t1[i];
-
if (CharUtils.isAscii((char) tt)) {
-
byte[] ba = new byte[1];
-
ba[0] = tt;
-
i++;
-
String result = new String(ba);
-
System.out.println("1个字节的字符");
-
System.out.println("字符为:" + result);
-
}
-
if ((tt & 0xE0) == 0xC0) {
-
byte[] ba = new byte[2];
-
ba[0] = tt;
-
ba[1] = t1[i+1];
-
i++;
-
i++;
-
String result = new String(ba);
-
System.out.println("2个字节的字符");
-
System.out.println("字符为:" + result);
-
}
-
if ((tt & 0xF0) == 0xE0) {
-
byte[] ba = new byte[3];
-
ba[0] = tt;
-
ba[1] = t1[i+1];
-
ba[2] = t1[i+2];
-
i++;
-
i++;
-
i++;
-
String result = new String(ba);
-
System.out.println("3个字节的字符");
-
System.out.println("字符为:" + result);
-
}
-
if ((tt & 0xF8) == 0xF0) {
-
byte[] ba = new byte[4];
-
ba[0] = tt;
-
ba[1] = t1[i+1];
-
ba[2] = t1[i+2];
-
ba[3] = t1[i+3];
-
i++;
-
i++;
-
i++;
-
i++;
-
String result = new String(ba);
-
System.out.println("4个字节的字符");
-
System.out.println("字符为:" + result);
-
}
-
}
-
}
-
}
参考文献:
http://www.yunmx.com/archives/2011/138.htm
http://zh.wikipedia.org/wiki/UTF-8
- public class Byte4Check {
- public static void main(String args[]) throws UnsupportedEncodingException {
- String nickName = "12葫";
- byte[] t = nickName.substring(0, 1).getBytes("UTF-8");
- for (byte tt : t) {
- System.out.println(tt);
- }
- System.out.println("====================");
- byte[] t1 = nickName.getBytes("UTF-8");
- for (int i = 0; i < t1.length;) {
- byte tt = t1[i];
- if (CharUtils.isAscii((char) tt)) {
- byte[] ba = new byte[1];
- ba[0] = tt;
- i++;
- String result = new String(ba);
- System.out.println("1个字节的字符");
- System.out.println("字符为:" + result);
- }
- if ((tt & 0xE0) == 0xC0) {
- byte[] ba = new byte[2];
- ba[0] = tt;
- ba[1] = t1[i+1];
- i++;
- i++;
- String result = new String(ba);
- System.out.println("2个字节的字符");
- System.out.println("字符为:" + result);
- }
- if ((tt & 0xF0) == 0xE0) {
- byte[] ba = new byte[3];
- ba[0] = tt;
- ba[1] = t1[i+1];
- ba[2] = t1[i+2];
- i++;
- i++;
- i++;
- String result = new String(ba);
- System.out.println("3个字节的字符");
- System.out.println("字符为:" + result);
- }
- if ((tt & 0xF8) == 0xF0) {
- byte[] ba = new byte[4];
- ba[0] = tt;
- ba[1] = t1[i+1];
- ba[2] = t1[i+2];
- ba[3] = t1[i+3];
- i++;
- i++;
- i++;
- i++;
- String result = new String(ba);
- System.out.println("4个字节的字符");
- System.out.println("字符为:" + result);
- }
- }
- }
- }