通过热词统计,再一次进行数据的爬取,这次可以爬取,也学习了解析的技术
源码:
import java.util.ArrayList;import java.util.Date;import java.util.List;
import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.WebClient;
import Dao.AddService;
public class Paqu {
public static void main(String args[]) {
// TODO Auto-generated method stub
String sheng="";
String xinzeng="";
String leiji="";
String zhiyu="";
String siwang="";
String url = "";
int i=0;
try {
//构造一个webClient 模拟Chrome 浏览器
WebClient webClient = new WebClient(BrowserVersion.CHROME);
webClient.getOptions().setThrowExceptionOnScriptError(false);
webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
webClient.getOptions().setTimeout(8000);
HtmlPage rootPage = webClient.getPage(url);
webClient.waitForBackgroundJavaScript(6000);
String html = rootPage.asXml();
Document doc = Jsoup.parse(html);
//System.out.println(doc);
Element listdiv1 = doc.select(".wrap").first();
Elements listdiv2 = listdiv1.select(".province");
for(Element s:listdiv2) {
Elements span = s.getElementsByTag("span");
Elements real_name=span.select(".item_name");
xinzeng=real_newconfirm.text();
leiji=real_confirm.text();
zhiyu=real_heal.text();
siwang=real_dead.text();
} catch (IOException e) {
// TODO Auto-generated catch block e.printStackTrace();
System.out.println("爬取失败");
}
}
}
AddService.java:
package Dao;
import java.sql.Connection;import java.sql.Statement;
import utils.DBUtils;
public class AddService {
public void add(String table,String sheng,String xinzeng,String leiji,String zhiyu,String dead,String time) {
String sql = "insert into "+table+" (Province,Newconfirmed_num ,Confirmed_num,Cured_num,Dead_num,Time) values('" + sheng + "','" + xinzeng +"','" + leiji +"','" + zhiyu + "','" + dead+ "','" + time+ "')";
System.out.println(sql);
Connection conn = DBUtils.getConn();
Statement state = null;
int a = 0;
try {
state = conn.createStatement();
a=state.executeUpdate(sql);
} catch (Exception e) {
e.printStackTrace();
} finally {
DBUtils.close(state, conn);
}
}
}