• 第八周总结


    通过热词统计,再一次进行数据的爬取,这次可以爬取,也学习了解析的技术

    源码:

    import java.util.ArrayList;import java.util.Date;import java.util.List;

    import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;

    import com.gargoylesoftware.htmlunit.BrowserVersion;

    import com.gargoylesoftware.htmlunit.WebClient;

    import Dao.AddService;

     

     public class Paqu {

     

        public static void main(String args[]) {

            // TODO Auto-generated method stub

            String sheng="";

            String xinzeng="";

            String leiji="";

            String zhiyu="";

            String siwang="";

             String url = "";

            

            int i=0;

            

            try {

                //构造一个webClient 模拟Chrome 浏览器

            WebClient webClient = new WebClient(BrowserVersion.CHROME);

               webClient.getOptions().setThrowExceptionOnScriptError(false);

             webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);

                webClient.getOptions().setTimeout(8000);

                HtmlPage rootPage = webClient.getPage(url);

                            webClient.waitForBackgroundJavaScript(6000);

                String html = rootPage.asXml();

                Document doc = Jsoup.parse(html);

                //System.out.println(doc);

                Element listdiv1 = doc.select(".wrap").first();

                Elements listdiv2 = listdiv1.select(".province");

                for(Element s:listdiv2) {

                    Elements span = s.getElementsByTag("span");

                    Elements real_name=span.select(".item_name");

                    

                    xinzeng=real_newconfirm.text();

                    leiji=real_confirm.text();

                    zhiyu=real_heal.text();

                    siwang=real_dead.text();

       

                

            } catch (IOException e) {

                // TODO Auto-generated catch block            e.printStackTrace();

                System.out.println("爬取失败");

            }

        }

        

    }

       

      AddService.java:

    package Dao;

    import java.sql.Connection;import java.sql.Statement;

    import utils.DBUtils;

    public class AddService {

        public void add(String table,String sheng,String xinzeng,String leiji,String zhiyu,String dead,String time) {

            String sql = "insert into "+table+" (Province,Newconfirmed_num ,Confirmed_num,Cured_num,Dead_num,Time) values('" + sheng + "','" + xinzeng +"','" + leiji +"','" + zhiyu + "','" + dead+ "','" + time+ "')";

            System.out.println(sql);

            Connection conn = DBUtils.getConn();

            Statement state = null;

            int a = 0;

            try {

                state = conn.createStatement();

                a=state.executeUpdate(sql);

            } catch (Exception e) {

                e.printStackTrace();

            } finally {

                DBUtils.close(state, conn);

            }        

        }

    }

     

  • 相关阅读:
    [Bzoj2286]消耗战(虚树+DP)
    [Bzoj3252]攻略(dfs序+线段树)
    [Bzoj3991]寻宝游戏(dfs序+set)
    [Codeforces947D]Riverside Curio(思维)
    java常见面试题及答案 1-10(基础篇)
    Nginx+Tomcat+Redis实现负载均衡、资源分离、session共享
    MySQL 实现row_number() 分组排序功能
    精华帖----网址收藏
    easyUI loyout tabs自适应宽度
    jquery 图片本地预览
  • 原文地址:https://www.cnblogs.com/2210633591zhang/p/13094673.html
Copyright © 2020-2023  润新知