• nekohtml转换html时标签变大写的问题


    public static Document transferByNeko(InputStream stream, String charset)
        {
            if (stream == null)
                return null;
            
            if(StringUtils.isEmpty(charset)){
                charset = DEFAULT_CHARSET;
            }
    
    
            //NEKOHTML的DOMParser会将html标签转化成大写,是否设置下面的配置都没有意义,解决办法是需要使用xerces的DOMParser
    //        DOMParser domParser = new DOMParser();
    //        Document doc = null;
    //        ByteArrayOutputStream byteOs = null;
    //        Writer writer = null;
    //        InputSource inputSource = null;
    //        DocumentType documentType = null;
    //        org.w3c.dom.Document document = null;
    //        DOMReader domReader = null;
    //        try {
    //            domParser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
    //            domParser.setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
    //            domParser.setProperty("http://cyberneko.org/html/properties/default-encoding", "UTF-8");
    //
    //            domParser.setFeature("http://xml.org/sax/features/namespaces", false);
    //            domParser.setFeature("http://cyberneko.org/html/features/balance-tags", true);
    //            domParser.setFeature("http://cyberneko.org/html/features/scanner/script/strip-comment-delims", false);
    //
    //            byteOs = new ByteArrayOutputStream();
    //            writer = new Writer(byteOs, charset);
    //            XMLDocumentFilter domFilter[] = {
    //                writer
    //            };
    //            domParser.setProperty("http://cyberneko.org/html/properties/filters", domFilter);
    //            inputSource = new InputSource(new InputStreamReader(stream, Charset.forName(charset)));
    //            domParser.parse(inputSource);
    //            document = domParser.getDocument();
    //            documentType = document.getDoctype();
    //            if (documentType != null)
    //                document.removeChild(documentType);
    //            domReader = new DOMReader();
    //            doc = domReader.read(document);
    //        } catch (SAXNotRecognizedException e) {
    //            e.printStackTrace();
    //        } catch (SAXNotSupportedException e) {
    //            e.printStackTrace();
    //        } catch (UnsupportedEncodingException e) {
    //            e.printStackTrace();
    //        } catch (SAXException e) {
    //            e.printStackTrace();
    //        } catch (IOException e) {
    //            e.printStackTrace();
    //        }finally{
    //            IOUtils.closeQuietly(byteOs);
    //            IOUtils.closeQuietly(stream);
    //        }
    
            //采用xerces的DOMParser
            Document doc = null;
            DocumentType documentType = null;
            org.w3c.dom.Document document = null;
            DOMReader domReader = null;
            ByteArrayOutputStream byteOs = null;
            Writer writer = null;
            InputSource inputSource = null;
            try {
                HTMLConfiguration htmlConfiguration = new HTMLConfiguration();
                htmlConfiguration.setProperty("http://cyberneko.org/html/properties/names/elems","lower");
                org.apache.xerces.parsers.DOMParser parser = new org.apache.xerces.parsers.DOMParser(htmlConfiguration);
                inputSource = new InputSource(new InputStreamReader(stream, Charset.forName(charset)));
                parser.parse(inputSource);
                document = parser.getDocument();
                documentType = document.getDoctype();
                if (documentType != null)
                    document.removeChild(documentType);
                domReader = new DOMReader();
                doc = domReader.read(document);
            } catch (SAXException e) {
                e.printStackTrace();
            } catch (IOException e) {
                e.printStackTrace();
            }
            return doc;
        }
        
  • 相关阅读:
    YII2中andWhere多个or查询
    PHP中使用date获取上月最后一天出现的问题
    使用PHP生成并导出CSV文件
    如何开启MySQL慢查询日志
    PHP使用递归按层级查找数据
    PHP设计模式之单例模式
    Yii2中多表关联查询
    剑指Offer_编程题_合并两个排序的链表
    剑指Offer_编程题_反转链表
    Java自带的性能监测工具_jstack
  • 原文地址:https://www.cnblogs.com/yesun/p/8628285.html
Copyright © 2020-2023  润新知