本周学习了爬虫的方法,可以爬取疫情数据。
package com.yiqin.paqu; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import java.security.Timestamp; import java.sql.Connection; import java.sql.DriverManager; import java.sql.SQLException; import java.sql.Statement; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.text.SimpleDateFormat; import java.util.Date; import java.util.HashMap; import java.util.Map; import javax.net.ssl.HttpsURLConnection; import javax.xml.crypto.Data; import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONObject; import com.yiqin.connect.BaseConnection; public class Paqu { public static void main(String[] args) throws IOException { getListByCountryTypeService2(); } // 鏍筓RL private static String httpRequset(String requesturl) throws IOException { StringBuffer buffer = null; BufferedReader bufferedReader = null; InputStreamReader inputStreamReader = null; InputStream inputStream = null; HttpsURLConnection httpsURLConnection = null; try { URL url = new URL(requesturl); httpsURLConnection = (HttpsURLConnection) url.openConnection(); httpsURLConnection.setDoInput(true); httpsURLConnection.setRequestMethod("GET"); inputStream = httpsURLConnection.getInputStream(); inputStreamReader = new InputStreamReader(inputStream, "utf-8"); bufferedReader = new BufferedReader(inputStreamReader); buffer = new StringBuffer(); String str = null; while ((str = bufferedReader.readLine()) != null) { buffer.append(str); } } catch (MalformedURLException e) { // TODO Auto-generated catch block e.printStackTrace(); } return buffer.toString(); } /** * 鑾峰彇鍏ㄥ浗鍚勪釜鐪佸競鐨勭‘璇娿�佹�浜″拰娌绘剤浜烘暟 * * @return */ public static String getAreaStat() { String url = "https://ncov.dxy.cn/ncovh5/view/pneumonia"; String htmlResult = ""; try { htmlResult = httpRequset(url); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } // System.out.println(htmlResult); // 姝e垯鑾峰彇鏁版嵁 // 鍥犱负html鐨勬暟鎹�牸寮忕湅鐫�灏卞儚json鏍煎紡锛屾墍浠ユ垜浠��鍒欒幏鍙杍son String reg = "window.getAreaStat = (.*?)\}(?=catch)"; Pattern totalPattern = Pattern.compile(reg); Matcher totalMatcher = totalPattern.matcher(htmlResult); String result = ""; if (totalMatcher.find()) { result = totalMatcher.group(1); System.out.println(result); // 鍚勪釜鐪佸競鐨勬槸涓�涓�垪琛↙ist锛屽�鏋滄兂淇濆瓨鍒版暟鎹�簱涓�紝瑕侀亶鍘嗙粨鏋滐紝涓嬮潰鏄痙emo JSONArray array = JSONArray.parseArray(result); try { Connection conn =BaseConnection.getConn(); Statement stmt = conn.createStatement(); Date date = new Date();//鑾峰緱绯荤粺鏃堕棿. SimpleDateFormat sdf = new SimpleDateFormat( "yyyy-MM-dd hh:mm:ss" ); String nowTime = sdf.format(date); for (int i = 0; i <= 30; i++) { com.alibaba.fastjson.JSONObject jsonObject = com.alibaba.fastjson.JSONObject .parseObject(array.getString(i)); String provinceName = jsonObject.getString("provinceName"); String cityname1 = " "; String currentnum = jsonObject.getString("currentConfirmedCount"); String confirmed = jsonObject.getString("confirmedCount"); String cured = jsonObject.getString("curedCount"); String dead = jsonObject.getString("deadCount"); String suspect = jsonObject.getString("suspectedCount"); stmt.executeUpdate("insert into info2(Date,Province,City,currentnum,Confirmed_num,Yisi_num,Cured_num,Dead_num) values('"+ nowTime + "','"+ provinceName + "','"+ cityname1+ "','"+currentnum + "','" + confirmed + "','" + suspect +"','" + cured +"','" + dead +"')"); JSONArray array2 = jsonObject.getJSONArray("cities"); for (int j = 0; j < array2.size(); j++) { com.alibaba.fastjson.JSONObject jsonObject2 = com.alibaba.fastjson.JSONObject .parseObject(array2.getString(j)); String provinceName2 = jsonObject.getString("provinceName"); String cityname = jsonObject2.getString("cityName"); String confirmed2 = jsonObject2.getString("confirmedCount"); String currentnum2 = jsonObject.getString("currentConfirmedCount"); String cured2 = jsonObject2.getString("curedCount"); String dead2 = jsonObject2.getString("deadCount"); String suspect2 = jsonObject2.getString("suspectedCount"); stmt.executeUpdate("insert into info3(Date,Province,City,Confirmed_num,Yisi_num,Cured_num,Dead_num) values('"+ nowTime + "','"+ provinceName2 + "','"+ cityname + "','"+currentnum2+ "','" + confirmed2 + "','" + suspect2 +"','" + cured2 +"','" + dead2 +"')"); } } stmt.close(); conn.close(); } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } } return result; } /** * 鑾峰彇鍏ㄧ悆鍚勪釜鍥藉�鐨勭‘璇娿�佹�浜″拰娌绘剤浜烘暟 * @return */ public static String getListByCountryTypeService2() { String url = "https://ncov.dxy.cn/ncovh5/view/pneumonia"; String htmlResult = ""; try { htmlResult = httpRequset(url); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } // System.out.println(htmlResult); // 姝e垯鑾峰彇鏁版嵁 // 鍥犱负html鐨勬暟鎹�牸寮忕湅鐫�灏卞儚json鏍煎紡锛屾墍浠ユ垜浠��鍒欒幏鍙杍son String reg = "window.getListByCountryTypeService2true = (.*?)\}(?=catch)"; Pattern totalPattern = Pattern.compile(reg); Matcher totalMatcher = totalPattern.matcher(htmlResult); String result = ""; if (totalMatcher.find()) { result = totalMatcher.group(1); System.out.println(result); // 鍚勪釜鐪佸競鐨勬槸涓�涓�垪琛↙ist锛屽�鏋滄兂淇濆瓨鍒版暟鎹�簱涓�紝瑕侀亶鍘嗙粨鏋滐紝涓嬮潰鏄痙emo JSONArray array = JSONArray.parseArray(result); try { Connection conn = BaseConnection.getConn(); Statement stmt = conn.createStatement(); Date date = new Date();//鑾峰緱绯荤粺鏃堕棿. SimpleDateFormat sdf = new SimpleDateFormat( "yyyy-MM-dd hh:mm:ss" ); String nowTime = sdf.format(date); for (int i = 0; i <array.size(); i++) { com.alibaba.fastjson.JSONObject jsonObject = com.alibaba.fastjson.JSONObject .parseObject(array.getString(i)); String continents =jsonObject.getString("continents"); String provinceName = jsonObject.getString("provinceName"); String confirmed = jsonObject.getString("confirmedCount"); String cured = jsonObject.getString("curedCount"); String dead = jsonObject.getString("deadCount"); String suspect = jsonObject.getString("suspectedCount"); stmt.executeUpdate("insert into info4(Date,Continents,Province,Confirmed_num,Yisi_num,Cured_num,Dead_num) values('"+ nowTime + "','"+ continents + "','"+ provinceName + "','" + confirmed + "','" + suspect +"','" + cured +"','" + dead +"')"); } stmt.close(); conn.close(); } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } } return result; } }
package com.yiqin.paqu;
import java.io.BufferedReader;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.net.MalformedURLException;import java.net.URL;import java.security.Timestamp;import java.sql.Connection;import java.sql.DriverManager;import java.sql.SQLException;import java.sql.Statement;import java.util.regex.Matcher;import java.util.regex.Pattern;
import java.text.SimpleDateFormat;import java.util.Date;import java.util.HashMap;import java.util.Map;
import javax.net.ssl.HttpsURLConnection;import javax.xml.crypto.Data;
import com.alibaba.fastjson.JSONArray;import com.alibaba.fastjson.JSONObject;import com.yiqin.connect.BaseConnection;public class Paqu {public static void main(String[] args) throws IOException {getListByCountryTypeService2(); }// 鏍筓RLprivate static String httpRequset(String requesturl) throws IOException {StringBuffer buffer = null;BufferedReader bufferedReader = null;InputStreamReader inputStreamReader = null;InputStream inputStream = null;HttpsURLConnection httpsURLConnection = null;try {URL url = new URL(requesturl);httpsURLConnection = (HttpsURLConnection) url.openConnection();httpsURLConnection.setDoInput(true);httpsURLConnection.setRequestMethod("GET");inputStream = httpsURLConnection.getInputStream();inputStreamReader = new InputStreamReader(inputStream, "utf-8");bufferedReader = new BufferedReader(inputStreamReader);buffer = new StringBuffer();String str = null;while ((str = bufferedReader.readLine()) != null) {buffer.append(str);}} catch (MalformedURLException e) {// TODO Auto-generated catch blocke.printStackTrace();}
return buffer.toString();}
/** * 鑾峰彇鍏ㄥ浗鍚勪釜鐪佸競鐨勭‘璇娿�佹�浜″拰娌绘剤浜烘暟 * * @return */public static String getAreaStat() {String url = "https://ncov.dxy.cn/ncovh5/view/pneumonia";String htmlResult = "";try {htmlResult = httpRequset(url);} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}// System.out.println(htmlResult);
// 姝e垯鑾峰彇鏁版嵁// 鍥犱负html鐨勬暟鎹�牸寮忕湅鐫�灏卞儚json鏍煎紡锛屾墍浠ユ垜浠��鍒欒幏鍙杍sonString reg = "window.getAreaStat = (.*?)\}(?=catch)";Pattern totalPattern = Pattern.compile(reg);Matcher totalMatcher = totalPattern.matcher(htmlResult);
String result = "";if (totalMatcher.find()) {result = totalMatcher.group(1);System.out.println(result);// 鍚勪釜鐪佸競鐨勬槸涓�涓�垪琛↙ist锛屽�鏋滄兂淇濆瓨鍒版暟鎹�簱涓�紝瑕侀亶鍘嗙粨鏋滐紝涓嬮潰鏄痙emoJSONArray array = JSONArray.parseArray(result);try {Connection conn =BaseConnection.getConn();Statement stmt = conn.createStatement();
Date date = new Date();//鑾峰緱绯荤粺鏃堕棿.SimpleDateFormat sdf = new SimpleDateFormat( "yyyy-MM-dd hh:mm:ss" );String nowTime = sdf.format(date);
for (int i = 0; i <= 30; i++) {
com.alibaba.fastjson.JSONObject jsonObject = com.alibaba.fastjson.JSONObject.parseObject(array.getString(i));
String provinceName = jsonObject.getString("provinceName"); String cityname1 = " "; String currentnum = jsonObject.getString("currentConfirmedCount");String confirmed = jsonObject.getString("confirmedCount");String cured = jsonObject.getString("curedCount");String dead = jsonObject.getString("deadCount");String suspect = jsonObject.getString("suspectedCount");stmt.executeUpdate("insert into info2(Date,Province,City,currentnum,Confirmed_num,Yisi_num,Cured_num,Dead_num) values('"+ nowTime + "','"+ provinceName + "','"+ cityname1+ "','"+currentnum + "','" + confirmed + "','" + suspect +"','" + cured +"','" + dead +"')");JSONArray array2 = jsonObject.getJSONArray("cities");for (int j = 0; j < array2.size(); j++) {com.alibaba.fastjson.JSONObject jsonObject2 = com.alibaba.fastjson.JSONObject.parseObject(array2.getString(j));String provinceName2 = jsonObject.getString("provinceName");String cityname = jsonObject2.getString("cityName");String confirmed2 = jsonObject2.getString("confirmedCount"); String currentnum2 = jsonObject.getString("currentConfirmedCount");String cured2 = jsonObject2.getString("curedCount");String dead2 = jsonObject2.getString("deadCount");String suspect2 = jsonObject2.getString("suspectedCount");stmt.executeUpdate("insert into info3(Date,Province,City,Confirmed_num,Yisi_num,Cured_num,Dead_num) values('"+ nowTime + "','"+ provinceName2 + "','"+ cityname + "','"+currentnum2+ "','" + confirmed2 + "','" + suspect2 +"','" + cured2 +"','" + dead2 +"')");}}stmt.close();conn.close();} catch (SQLException e) {// TODO Auto-generated catch blocke.printStackTrace();}}return result;}/** * 鑾峰彇鍏ㄧ悆鍚勪釜鍥藉�鐨勭‘璇娿�佹�浜″拰娌绘剤浜烘暟 * @return */public static String getListByCountryTypeService2() {String url = "https://ncov.dxy.cn/ncovh5/view/pneumonia";String htmlResult = "";try {htmlResult = httpRequset(url);} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}// System.out.println(htmlResult);
// 姝e垯鑾峰彇鏁版嵁// 鍥犱负html鐨勬暟鎹�牸寮忕湅鐫�灏卞儚json鏍煎紡锛屾墍浠ユ垜浠��鍒欒幏鍙杍sonString reg = "window.getListByCountryTypeService2true = (.*?)\}(?=catch)";Pattern totalPattern = Pattern.compile(reg);Matcher totalMatcher = totalPattern.matcher(htmlResult);
String result = "";if (totalMatcher.find()) {result = totalMatcher.group(1);System.out.println(result);// 鍚勪釜鐪佸競鐨勬槸涓�涓�垪琛↙ist锛屽�鏋滄兂淇濆瓨鍒版暟鎹�簱涓�紝瑕侀亶鍘嗙粨鏋滐紝涓嬮潰鏄痙emoJSONArray array = JSONArray.parseArray(result);try {Connection conn = BaseConnection.getConn();Statement stmt = conn.createStatement();
Date date = new Date();//鑾峰緱绯荤粺鏃堕棿.SimpleDateFormat sdf = new SimpleDateFormat( "yyyy-MM-dd hh:mm:ss" );String nowTime = sdf.format(date);
for (int i = 0; i <array.size(); i++) {com.alibaba.fastjson.JSONObject jsonObject = com.alibaba.fastjson.JSONObject.parseObject(array.getString(i)); String continents =jsonObject.getString("continents"); String provinceName = jsonObject.getString("provinceName");String confirmed = jsonObject.getString("confirmedCount");String cured = jsonObject.getString("curedCount");String dead = jsonObject.getString("deadCount");String suspect = jsonObject.getString("suspectedCount");stmt.executeUpdate("insert into info4(Date,Continents,Province,Confirmed_num,Yisi_num,Cured_num,Dead_num) values('"+ nowTime + "','"+ continents + "','"+ provinceName + "','" + confirmed + "','" + suspect +"','" + cured +"','" + dead +"')");}stmt.close();conn.close();} catch (SQLException e) {// TODO Auto-generated catch blocke.printStackTrace();}}return result;}
}