• jsoup 抓取省市区


    package com.xazhxc.htjcom.back.controller.base;
    
    import cn.hutool.core.util.StrUtil;
    import com.alibaba.fastjson.JSONArray;
    import com.alibaba.fastjson.JSONObject;
    import com.xazhxc.htjcom.entity.Citys;
    import com.xazhxc.htjcom.init.HttpServerInit;
    import com.xazhxc.htjcom.kit.Kits;
    import com.xazhxc.htjcom.kit.PropsKit;
    import com.xazhxc.htjcom.kit.UploadKit;
    import com.xazhxc.htjcom.service.CitysService;
    import lombok.extern.slf4j.Slf4j;
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.select.Elements;
    import org.tio.core.ChannelContext;
    import org.tio.core.GroupContext;
    import org.tio.http.common.HttpRequest;
    import org.tio.http.common.HttpResponse;
    import org.tio.http.common.UploadFile;
    import org.tio.http.server.annotation.RequestPath;
    import org.tio.http.server.mvc.Routes;
    import org.tio.http.server.util.Resps;
    
    import java.io.BufferedWriter;
    import java.io.File;
    import java.io.FileWriter;
    import java.io.IOException;
    import java.util.HashMap;
    import java.util.Map;
    import java.util.Set;
    
    /**
     * 公共类
     *
     * @author leizhen.wang
     */
    @RequestPath(value = "/base")
    @Slf4j
    public class BaseController {
        static CitysService citysService = Kits.getBean( CitysService.class);
        @Mapper
        ProductService productService;
    
       private static Map<Integer, String> cssMap = new HashMap<Integer, String>();
        private static BufferedWriter bufferedWriter = null;
       static {
           cssMap.put(1, "provincetr");// 省
           cssMap.put(2, "citytr");// 市
           cssMap.put(3, "countytr");// 市
       }
        @RequestPath(value = "/pro")
        public HttpResponse product(HttpRequest request) throws IOException {
    
            new Thread(() -> {
                try {
                    initFile();
                    Document connect = Jsoup.connect( "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2017/" ).get();
                    int level = 1;
                    Elements rowProvince = connect.select("tr." + cssMap.get(level));
                    for (Element provinceElement : rowProvince) {
                        Elements select = provinceElement.select("a");
                        for (Element province  : select) {
                            try {
                                parseNextLevel(province, level + 1, null, null);
                            } catch (IOException e) {
                                e.printStackTrace();
                            }
    //                        System.out.println("----province-----"+province);
                        }
                    }
                    closeStream();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }).start();
    
            return Resps.json( request, Kits.result().ok() );
        }
    
        private static void closeStream() {
            if (bufferedWriter != null) {
                try {
                    bufferedWriter.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    
        private static void parseNextLevel(Element parentElement, int level, String code, String area) throws IOException {
            try {
                Thread.sleep(500);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
    
            String attr = parentElement.attr( "abs:href" );
            if (StrUtil.isEmpty( code )) {
                code = attr.substring( 54,56 )+"0000";
            }
    //        Citys citys = new Citys(String.valueOf( SnowFlakeUtil.getFlowIdInstance().nextId() ), code, parentElement.text(), "-1");
    //        citysService.insert( citys );
            Document doc = Jsoup.connect( attr).get();
            if (doc != null) {
                Elements newsHeadlines = doc.select("tr." + cssMap.get(level));
                for (Element element : newsHeadlines) {
                    if (StrUtil.isEmpty( area )) {
    //                    printInfo(element, level + 1, code);
                    } else {
                        printInfo2(element, level + 1, code);
                    }
                    Elements select = element.select("a");// 在递归调用的时候,这里是判断是否是村一级的数据,村一级的数据没有a标签
    //                System.out.println(select);
                    if (select.size() != 0) {
                        code = element.select("td").first().text();
                        parseNextLevel2(select.last(), level + 1, code, "area");
                    }
                }
            }
    
        }
    
        private static void parseNextLevel2(Element parentElement, int level, String code, String area) throws IOException {
            try {
                Thread.sleep(500);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            String attr = parentElement.attr( "abs:href" );
            if (StrUtil.isEmpty( code )) {
                code = attr.substring( 54,56 )+"0000";
            }
            Document doc = Jsoup.connect( attr).get();
            if (doc != null) {
                Elements newsHeadlines = doc.select("tr." + cssMap.get(level));
                for (Element element : newsHeadlines) {
                    printInfo2(element, level + 1, code);
                }
            }
    
        }
    
        private static void initFile() throws IOException {
            bufferedWriter = new BufferedWriter(new FileWriter(new File("d:\CityInfo.txt"), true));
        }
        private static void printInfo(Element element, int level, String pid) {
    //        System.out.println(pid + "---: "+element.select("td").last().text()+"============="+element.select("td").first().text());
            Citys citys = new Citys(String.valueOf( SnowFlakeUtil.getFlowIdInstance().nextId() ), element.select("td").first().text(), element.select("td").last().text(), pid);
            citysService.insert( citys );
            /*try {
                bufferedWriter.write(element.select("td").last().text() + "{" + level + "}["
                        + element.select("td").first().text() + "]");
                bufferedWriter.newLine();
                bufferedWriter.flush();
            } catch (IOException e) {
                e.printStackTrace();
            }*/
        }
    
        private static void printInfo2(Element element, int level, String pid) {
    //        System.out.println(pid + "---: "+element.select("td").last().text()+"============="+element.select("td").first().text());
            Citys citys = new Citys(String.valueOf( SnowFlakeUtil.getFlowIdInstance().nextId() ), element.select("td").first().text(), element.select("td").last().text(), pid);
            citysService.insert( citys );
            /*try {
                bufferedWriter.write(element.select("td").last().text() + "{" + level + "}["
                        + element.select("td").first().text() + "]");
                bufferedWriter.newLine();
                bufferedWriter.flush();
            } catch (IOException e) {
                e.printStackTrace();
            }*/
        }
    
    
      
    
    
        
    }
    

      

  • 相关阅读:
    spring
    抽象和封装
    Oracle索引的原理
    使用JdbcTemplate.queryForObject 的注意点
    ORM是什么意思
    Java 后台处理数据库的二进制图片流
    Extjs girdPanel显示图片
    斜率dp
    多重背包的二进制优化
    POJ 3249 DAG图最短路
  • 原文地址:https://www.cnblogs.com/joyny/p/9995040.html
Copyright © 2020-2023  润新知