分析:http://www.myexception.cn/mysql/639943.html
解决方法:http://blog.sina.com.cn/s/blog_3f78232201011o26.html
public class Utf8Filter { public static void main(String[] args) throws Exception { System.out.println(filterMt4BytesUtf8(new String(new byte[] { (byte) 0x41 }, "utf-8"))); System.out.println(filterMt4BytesUtf8(new String(new byte[] { (byte) 0xf3, (byte) 0xb7, (byte) 0xa2, (byte) 0xbe }, "utf-8"))); System.out.println(filterMt4BytesUtf8(new String(new byte[] { (byte) 0x41, (byte) 0xf3, (byte) 0xb7, (byte) 0xa2, (byte) 0xbe }, "utf-8"))); System.out.println(filterMt4BytesUtf8(new String(new byte[] { (byte) 0xf3, (byte) 0xb7, (byte) 0xa2, (byte) 0xbe, (byte) 0x41 }, "utf-8"))); System.out.println(filterMt4BytesUtf8(new String(new byte[] { (byte) 0x41, (byte) 0xf3, (byte) 0xb7, (byte) 0xa2, (byte) 0xbe, (byte) 0x41 }, "utf-8"))); System.out.println(filterMt4BytesUtf8(new String(new byte[] { (byte) 0xf3, (byte) 0xb7, (byte) 0xa2, (byte) 0xbe, (byte) 0xf3, (byte) 0xb7, (byte) 0xa2, (byte) 0xbe, (byte) 0x41 }, "utf-8"))); System.out.println(filterMt4BytesUtf8(new String(new byte[] { (byte) 0x41, (byte) 0xf3, (byte) 0xb7, (byte) 0xa2, (byte) 0xbe, (byte) 0xf3, (byte) 0xb7, (byte) 0xa2, (byte) 0xbe }, "utf-8"))); System.out.println(filterMt4BytesUtf8(new String(new byte[] { (byte) 0x41, (byte) 0xf3, (byte) 0xb7, (byte) 0xa2, (byte) 0xbe, (byte) 0xf3, (byte) 0xb7, (byte) 0xa2, (byte) 0xbe, (byte) 0x41 }, "utf-8"))); } private static String filterMt4BytesUtf8(String input) { if (StringUtils.isBlank(input)) return input; ByteArrayOutputStream is = new ByteArrayOutputStream(); byte[] bytes = input.getBytes(); outter: for (int i = 0, length = bytes.length; i < length; i++) { byte b = bytes[i]; while (((b & 0xF8) == 0xF0) || ((b & 0xFc) == 0xF8) || ((b & 0xFe) == 0xFc)) { // found a byte of 4, 5, 6 UTF-8 bytes for a character // eat the following bytes of this character while (++i < length && ((b = bytes[i]) & 0xC0) == 0x80) ; // insert a replacement character is.write((byte) 0xEF); is.write((byte) 0xBF); is.write((byte) 0xBD); if (i >= length) { // we reach the end of byte array break outter; } } // found byte of 1, 2, 3 UTF-8 bytes for a character is.write(b); } try { return new String(is.toByteArray(), "utf-8"); } catch (UnsupportedEncodingException e) { logger.error("filter more than 4 bytes utf-8 character failed!", e); } return input; } }