• XML文件处理过程中的0x1A 错误处理


    XML文件处理过程中的0x1A 错误处理

    package testjavaBasic;
    
    import java.io.ByteArrayInputStream;
    import javax.xml.parsers.DocumentBuilder;
    import javax.xml.parsers.DocumentBuilderFactory;
    
    import org.w3c.dom.Document;
    import org.w3c.dom.Element;
    
    /**
     * @author zhangdi
     *
     */
    public class TestXMlTransferException_0x1a {
        public static void main(String[] args) {
    
            String text_UAT="..."; //见test_xml
    
            //System.out.println(CleanInvalidXmlChars_2());
            //String cleanInvalidXmlChars = CleanInvalidXmlChars(text);
            String[] cleanInvalidXmlChars = (String[])CleanInvalidXmlChars(text_UAT);
            for (String string : cleanInvalidXmlChars) {
                System.out.println(string);
            }
        }
    
        /**
         * 实例1
         * @param text
         * @return
         */
        public static <T> T CleanInvalidXmlChars(String text) {
            //0-注释
            // From xml spec valid chars:
            // #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
            // any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
            // [^//x09//x0A//x0D//x20-//xD7EF//xE000-//xFFFD//x10000-//x10FFFF]
            /*Pattern pt = Pattern.compile("[^//x09//x0A//x0D//x20-//xD7EF//xE000-//xFFFD//x10000-x10FFFF]");
            Matcher mat = pt.matcher(text);
            return mat.replaceAll("");*/
    
            //1-注释
            //String filter = text.replaceAll("[\x00-\x08\x0b-\x0c\x0e-\x1f]", "");
            String[] split = text.split("[\x00-\x08\x0b-\x0c\x0e-\x1f]");
            return (T) split;  
    
            }
    
        /**
         * @return 实例2
         */
        public static String  CleanInvalidXmlChars_2(){
             // 测试的字符串应该为:<r><c d="s" n="j"></c></r>  
            // 正常的对应的byte数组为  
            byte[] ba1 = new byte[] { 60, 114, 62, 60, 99, 32, 100, 61, 34, 115,  
                    34, 32, 110, 61, 34, 106, 34, 62, 60, 47, 99, 62, 60, 47, 114,  
                    62 };  
            System.out.println("ba1     length=" + ba1.length);  
            String ba1str = new String(ba1);  
            System.out.println(ba1str);  
            System.out.println("ba1str  length=" + ba1str.length());  
            System.out.println("-----------------------------------------");  
            // 和正常的byte 数组相比 多了一个不可见的 31  
            byte[] ba2 = new byte[] { 60, 114, 62, 60, 99, 32, 100, 61, 34, 115,  
                    34, 32, 110, 61, 34, 106, 31, 34, 62, 60, 47, 99, 62, 60, 47,  
                    114, 62 };  
            System.out.println("ba2     length=" + ba2.length);  
            String ba2str = new String(ba2);  
            System.out.println(ba2str);  
            System.out.println("ba2str  length=" + ba2str.length());  
            System.out.println("-----------------------------------------");  
            try {  
                DocumentBuilderFactory dbfactory = DocumentBuilderFactory  
                        .newInstance();  
                dbfactory.setIgnoringComments(true);  
                DocumentBuilder docBuilder = dbfactory.newDocumentBuilder();  
    
                // 过滤掉非法不可见字符 如果不过滤 XML解析就报异常  
                String filter = ba2str.replaceAll(  
                        "[\x00-\x08\x0b-\x0c\x0e-\x1f]", "");  
                System.out.println("过滤后的length=" + filter.length());  
                ByteArrayInputStream bais = new ByteArrayInputStream(filter  
                        .getBytes());  
                Document doc = docBuilder.parse(bais);  
                Element rootEl = doc.getDocumentElement();  
                System.out.println("过滤后解析正常 root child length="  
                        + rootEl.getChildNodes().getLength());  
            } catch (Exception e) {  
                e.printStackTrace();  
            }
            return ba2str;  
        }  
    }
    
  • 相关阅读:
    Web开发常用知识点
    我的PHP之旅:开篇,走入开源的世界
    WPF Knowledge Points ContentControl和ContentPresenter的区别
    WPF Knowledge Points 控件状态利器:VisualStateManager详解
    WPF Control Hints ComboBox : 如何去掉ComboBox的DropDownButton
    WPF Control Hints ContextMenu : 怎么通过MenuItem的Click事件取得ContextMenuItem绑定的类实例?
    WPF Knowledge Points Binding.StringFormat不起作用的原理和解决
    AJAX请求 $.ajaxSetup方法的使用
    Html标签输出到前台并导出到Excel
    XML序列化和反序列化
  • 原文地址:https://www.cnblogs.com/DiZhang/p/12544983.html
Copyright © 2020-2023  润新知