• Html解析本地搜网站


    业务类

     1 package code.lxy.test;
     2 
     3 import java.io.File;
     4 import java.io.FileNotFoundException;
     5 import java.io.FileOutputStream;
     6 import java.io.PrintWriter;
     7 
     8 import org.htmlparser.Node;
     9 import org.htmlparser.NodeFilter;
    10 import org.htmlparser.Parser;
    11 import org.htmlparser.tags.Div;
    12 import org.htmlparser.tags.LinkTag;
    13 import org.htmlparser.util.NodeList;
    14 import org.htmlparser.util.ParserException;
    15 
    16 public class HtmlParserDemo {
    17     public static void parserHtml(String htmlToParser)
    18             throws FileNotFoundException {
    19         PrintWriter writer = new PrintWriter(new FileOutputStream(new File(
    20                 "d:/test.text")));
    21         Parser parser = new Parser();
    22         try {
    23             parser.setURL(htmlToParser);
    24             parser.setEncoding("UTF-8");
    25             NodeFilter filter = new NodeFilter() {
    26                 @Override
    27                 public boolean accept(Node node) {
    28                     // TODO Auto-generated method stub
    29                     if (node instanceof Div) {
    30                         Div divNode = (Div) node;
    31                         // System.out.println(divNode.getAttribute("class"));
    32                         if (divNode.getAttribute("class") != null) {
    33                             if (divNode.getAttribute("class").endsWith("zuo01_bt")||divNode.getAttribute("class").endsWith("zuo01_con")) {
    34                                 return true;
    35                             }
    36                         }
    37                     }
    38                     return false;
    39                 }
    40             };
    41             NodeList nodelist = parser.extractAllNodesThatMatch(filter);
    42             for (int i = 0; i < nodelist.size(); i++) {
    43                 /*Div divNode=(Div) nodelist.elementAt(i);
    44                 System.out.println(divNode.toPlainTextString());*/
    45                 Div divnode=(Div) nodelist.elementAt(i);
    46                 String test=divnode.getAttribute("class");
    47                 if(divnode.getAttribute("class").equals("zuo01_bt"))
    48                 {
    49                     LinkTag linkTag=(LinkTag) divnode.childAt(1);
    50                     System.out.println(linkTag.getAttribute("title"));
    51                 }else{
    52                     System.out.println(divnode.toPlainTextString());
    53                 }
    54             }
    55             writer.close();
    56         } catch (ParserException e) {
    57             // TODO Auto-generated catch block
    58             e.printStackTrace();
    59         }
    60     }
    61 }

    测试类

    package code.lxy.main;
    
    import java.io.FileNotFoundException;
    
    import code.lxy.test.HtmlParserDemo;
    
    public class MainClass {
    
        /**
         * @param args
         * @throws FileNotFoundException 
         */
        public static void main(String[] args) throws FileNotFoundException {
            // TODO Auto-generated method stub
            HtmlParserDemo.parserHtml("http://www.locoso.com/cate/0sts2");
        }
    
    }

    结果输出显示

  • 相关阅读:
    004.Docker镜像管理
    001.Heartbeat简介
    005.Docker存储管理
    006.Docker网络管理
    012.Docker仓库管理
    013.Docker私有仓库多Harbor同步部署
    007.基于Docker的Etcd分布式部署
    008.Docker Flannel+Etcd分布式网络部署
    009.Docker Compose部署及基础使用
    002.Open-Falcon Server部署及Agent监控
  • 原文地址:https://www.cnblogs.com/dependmyse/p/3020544.html
Copyright © 2020-2023  润新知