• Java爬取丁香医生疫情数据并存储至数据库


    1、通过页面的url获取html代码

    // 根URL
    	private static String httpRequset(String requesturl) throws IOException {
    		StringBuffer buffer = null;
    		BufferedReader bufferedReader = null;
    		InputStreamReader inputStreamReader = null;
    		InputStream inputStream = null;
    		HttpsURLConnection httpsURLConnection = null;
    		try {
    			URL url = new URL(requesturl);
    			httpsURLConnection = (HttpsURLConnection) url.openConnection();
    			httpsURLConnection.setDoInput(true);
    			httpsURLConnection.setRequestMethod("GET");
    			inputStream = httpsURLConnection.getInputStream();
    			inputStreamReader = new InputStreamReader(inputStream, "utf-8");
    			bufferedReader = new BufferedReader(inputStreamReader);
    			buffer = new StringBuffer();
    			String str = null;
    			while ((str = bufferedReader.readLine()) != null) {
    				buffer.append(str);
    			}
    		} catch (MalformedURLException e) {
    			// TODO Auto-generated catch block
    			e.printStackTrace();
    		}
    
    		return buffer.toString();
    	}
    

      2、获取省市疫情数据

    /**
    	 * 获取全国各个省市的确诊、死亡和治愈人数
    	 * 
    	 * @return
    	 */
    	public static String getAreaStat() {
    		String url = "https://ncov.dxy.cn/ncovh5/view/pneumonia";
    		String htmlResult = "";
    		try {
    			htmlResult = httpRequset(url);
    		} catch (IOException e) {
    			// TODO Auto-generated catch block
    			e.printStackTrace();
    		}
    		// System.out.println(htmlResult);
    
    		// 正则获取数据
    		// 因为html的数据格式看着就像json格式,所以我们正则获取json
    		String reg = "window.getAreaStat = (.*?)\}(?=catch)";
    		Pattern totalPattern = Pattern.compile(reg);
    		Matcher totalMatcher = totalPattern.matcher(htmlResult);
    
    		String result = "";
    		if (totalMatcher.find()) {
    			result = totalMatcher.group(1);
    			System.out.println(result);
    			// 各个省市的是一个列表List,如果想保存到数据库中,要遍历结果,下面是demo
    			JSONArray array = JSONArray.parseArray(result);
    			try {
    				Connection con =BaseConnection.getConnection("VData");
    				Statement stmt = con.createStatement();
    				Date date=new Date(System.currentTimeMillis());
    				for (int i = 0; i <= 30; i++) {
    
    					com.alibaba.fastjson.JSONObject jsonObject = com.alibaba.fastjson.JSONObject
    							.parseObject(array.getString(i));
    					String provinceName = jsonObject.getString("provinceName");
    					String current = jsonObject.getString("currentConfirmedCount");
    					String confirmed = jsonObject.getString("confirmedCount");
    					String cured = jsonObject.getString("curedCount");
    					String dead = jsonObject.getString("deadCount");
    					String suspect=jsonObject.getString("suspectedCount");
    					stmt.executeUpdate("insert into province values('"+provinceName+"','"+confirmed+"','"+suspect+"','"+cured+
    							"','"+dead+"','"+current+"','"+date+"')");
    					
    					JSONArray array2 = jsonObject.getJSONArray("cities");
    					for (int j = 0; j < array2.size(); j++) {
    						com.alibaba.fastjson.JSONObject jsonObject2 = com.alibaba.fastjson.JSONObject
    								.parseObject(array2.getString(j));
    						String cityname = jsonObject2.getString("cityName");
    						String current2 = jsonObject2.getString("currentConfirmedCount");
    						String confirmed2 = jsonObject2.getString("confirmedCount");
    						String cured2 = jsonObject2.getString("curedCount");
    						String dead2 = jsonObject2.getString("deadCount");
    						String suspect2 = jsonObject2.getString("suspectedCount");
    						System.out.println();
    						stmt.executeUpdate("insert into city values('"+cityname+"','"+confirmed2+"','"+suspect2+"','"+cured2+"','"+dead2+"','"+current2+"','"+provinceName+"','"+date+"')");
    					}
    				}
    				stmt.close();
    				con.close();
    			} catch (SQLException e) {
    				// TODO Auto-generated catch block
    				e.printStackTrace();
    			}
    		}
    		return result;
    	}
    

      3、获取世界疫情数据

    /**
    	 * 世界
    	 * 
    	 * @return
    	 */
    	public static String getCountryData() {
    		String url = "https://ncov.dxy.cn/ncovh5/view/pneumonia";
    		String htmlResult = "";
    		try {
    			htmlResult = httpRequset(url);
    		} catch (IOException e) {
    			// TODO Auto-generated catch block
    			e.printStackTrace();
    		}
    		String reg = "window.getListByCountryTypeService2true = (.*?)\}(?=catch)";
    		Pattern totalPattern = Pattern.compile(reg);
    		Matcher totalMatcher = totalPattern.matcher(htmlResult);
    
    		String result = "";
    		if (totalMatcher.find()) {
    			result = totalMatcher.group(1);
    			System.out.println(result);
    			JSONArray array = JSONArray.parseArray(result);
    			try {
    				Connection con =BaseConnection.getConnection("VData");
    				Statement stmt = con.createStatement();
    				for(int i=0;i<array.size();i++) {
    					com.alibaba.fastjson.JSONObject jsobj=com.alibaba.fastjson.JSONObject.parseObject(array.getString(i));
    					if(!jsobj.getString("provinceName").equals("中国")) {
    					Date date = new Date(Long.parseLong(jsobj.getString("createTime")));
    					String s="insert into contury values('"+jsobj.getString("continents")+"','"+jsobj.getString("provinceName")
    					+"','"+jsobj.getString("currentConfirmedCount")+"','"+jsobj.getString("confirmedCount")+"','"+jsobj.getString("suspectedCount")
    					+"','"+jsobj.getString("curedCount")+"','"+jsobj.getString("deadCount")+"','"+date+"')";
    					stmt.executeUpdate(s);
    					}
    				}
    				stmt.close();
    				con.close();
    			}catch (Exception e) {
    				// TODO: handle exception
    			}
    		}
    		return "";
    	}
    

      

  • 相关阅读:
    今天开通我的博客
    在ArcGIS中,利用“行政单元面积权重法”实现人口数据格网化
    ArcGIS 下的水文分析
    常用计数器的verilog实现(binary、gray、onehot、LFSR、环形、扭环形)
    简单组合逻辑电路的verilog实现(包括三态门、38译码器、83优先编码器、8bit奇偶校验器)
    乘法器的verilog实现(并行、移位相加、查找表)
    简单时序逻辑电路的verilog实现,包括D触发器、JK触发器、锁存器、寄存器、
    简单ALU(算术逻辑单元)的verilog实现
    ubuntu下安装virtualbox 错误及解决办法
    C++单例模式对象的控制释放分析
  • 原文地址:https://www.cnblogs.com/XiaoGao128/p/12576648.html
Copyright © 2020-2023  润新知