XML文件处理过程中的0x1A 错误处理
package testjavaBasic;
import java.io.ByteArrayInputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
/**
* @author zhangdi
*
*/
public class TestXMlTransferException_0x1a {
public static void main(String[] args) {
String text_UAT="..."; //见test_xml
//System.out.println(CleanInvalidXmlChars_2());
//String cleanInvalidXmlChars = CleanInvalidXmlChars(text);
String[] cleanInvalidXmlChars = (String[])CleanInvalidXmlChars(text_UAT);
for (String string : cleanInvalidXmlChars) {
System.out.println(string);
}
}
/**
* 实例1
* @param text
* @return
*/
public static <T> T CleanInvalidXmlChars(String text) {
//0-注释
// From xml spec valid chars:
// #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
// any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
// [^//x09//x0A//x0D//x20-//xD7EF//xE000-//xFFFD//x10000-//x10FFFF]
/*Pattern pt = Pattern.compile("[^//x09//x0A//x0D//x20-//xD7EF//xE000-//xFFFD//x10000-x10FFFF]");
Matcher mat = pt.matcher(text);
return mat.replaceAll("");*/
//1-注释
//String filter = text.replaceAll("[\x00-\x08\x0b-\x0c\x0e-\x1f]", "");
String[] split = text.split("[\x00-\x08\x0b-\x0c\x0e-\x1f]");
return (T) split;
}
/**
* @return 实例2
*/
public static String CleanInvalidXmlChars_2(){
// 测试的字符串应该为:<r><c d="s" n="j"></c></r>
// 正常的对应的byte数组为
byte[] ba1 = new byte[] { 60, 114, 62, 60, 99, 32, 100, 61, 34, 115,
34, 32, 110, 61, 34, 106, 34, 62, 60, 47, 99, 62, 60, 47, 114,
62 };
System.out.println("ba1 length=" + ba1.length);
String ba1str = new String(ba1);
System.out.println(ba1str);
System.out.println("ba1str length=" + ba1str.length());
System.out.println("-----------------------------------------");
// 和正常的byte 数组相比 多了一个不可见的 31
byte[] ba2 = new byte[] { 60, 114, 62, 60, 99, 32, 100, 61, 34, 115,
34, 32, 110, 61, 34, 106, 31, 34, 62, 60, 47, 99, 62, 60, 47,
114, 62 };
System.out.println("ba2 length=" + ba2.length);
String ba2str = new String(ba2);
System.out.println(ba2str);
System.out.println("ba2str length=" + ba2str.length());
System.out.println("-----------------------------------------");
try {
DocumentBuilderFactory dbfactory = DocumentBuilderFactory
.newInstance();
dbfactory.setIgnoringComments(true);
DocumentBuilder docBuilder = dbfactory.newDocumentBuilder();
// 过滤掉非法不可见字符 如果不过滤 XML解析就报异常
String filter = ba2str.replaceAll(
"[\x00-\x08\x0b-\x0c\x0e-\x1f]", "");
System.out.println("过滤后的length=" + filter.length());
ByteArrayInputStream bais = new ByteArrayInputStream(filter
.getBytes());
Document doc = docBuilder.parse(bais);
Element rootEl = doc.getDocumentElement();
System.out.println("过滤后解析正常 root child length="
+ rootEl.getChildNodes().getLength());
} catch (Exception e) {
e.printStackTrace();
}
return ba2str;
}
}