• JAVA爬虫抓取页面的URL数据





    在互联网发达的今天,程序员往往开发的过程中需要一些稳定的网站数据.这个时候往往有些接口数据会收费,为了方便开发.程序员会使用爬虫技术抓取数据.爬虫往往分几种:网页UR.L抓取,
    根据接口抓取等等.下面介绍是根据URL抓取相应数据.
    附录常用免费天气接口:
    http://api.weatherdt.com/common/?area=101090601&type=forecast[24h_5d{001,002}]&key=3c801494e96ea41ae2c77634b0960977

    http://www.weather.com.cn/data/cityinfo/101090601.html

     http://api.k780.com/?app=weather.future&weaid=langfang&&appkey=10003&sign=b59bc3ef6191eb9f747dd4e83c99f2a4&format=json

    http://api.weatherdt.com/common/?area=101160901&type=observe&key=fd034bf8fe70289698ec4ea79876feaa
    {
        "observe": {
            "101160901": {
                "1001002": {
                    "006": "0.0",
                    "000": "17:15",
                    "005": "53",
                    "004": "2",
                    "003": "2",
                    "007": "879",
                    "002": "25"
                }
            }
        }
    }
    http://www.weather.com.cn/data/sk/101160901.html
    
    {
        "weatherinfo": {
            "city": "天水",
            "cityid": "101160901",
            "temp": "20.5",
            "WD": "北风",
            "WS": "小于3级",
            "SD": "40%",
            "AP": "883.8hPa",
            "njd": "暂无实况",
            "WSE": "<3",
            "time": "17:00",
            "sm": "1.3",
            "isRadar": "1",
            "Radar": "JC_RADAR_AZ9938_JB"
        }
    }




    1
    //抓取森林防火最新页面的URL 2 public void getSlhz(){ 3 String strURL="http://wwww.forestry.gov.cn/Common/index/3563.html"; 4 URL url; 5 6 try{ 7 url = new URL(strURL); 8 HttpURLConnection httpConn=(HttpURLConnection)url.openConnection(); 9 InputStreamReader input=new InputStreamReader(httpConn.getInputStream(),"utf-8"); 10 11 BufferedReader buf= new BufferedReader(input); 12 13 String line=""; 14 StringBuilder conf=new StringBuilder(); 15 while((line=buf.readLine()))!=null){ 16 conf.append(line); 17 } 18 String buf=conf.toString(); 19 int beginIx=buf.indexOf("<ul> <li class="cl"><a href="">); 20 int endIx=buf.indexOf("/" title="""); 21 String result=buf.substring(beginIx,endIx); 22 String resl="http://www.forestry.gov.cn"+result.split("href="")[1]; 23 24 System.out.println(resl); 25 }catch(Exception e){ 26 e.printStackTrace(); 27 28 } 29 30 }

     天气接口爬虫

     
     4 import org.apache.logging.log4j.core.util.JsonUtils;
     5 import org.jsoup.Jsoup;
     6 import org.jsoup.nodes.Document;
     7 import org.jsoup.nodes.Element;
     8 import org.jsoup.select.Elements;
     9 
    10 import net.sf.json.JSONArray;
    11 import net.sf.json.JSONObject;
    12 import java.util.List;
    13 
    14 
    15 public class weth {
    16 
    17 public static void main(String[] args) {
    18   String[] typeStr=new String[]{"tomorrow","third","fourth","fifth","sixth","seventh"};
    19   JSONArray ja=new JSONArray();
    20   for(String str:typeStr){
    21    Document weatherDoc = WeatherDataCatch("http://tianqi.2345.com/"+str+"-54515.htm");
    22    JSONObject jobject = new JSONObject();
    23    Elements weatherData = ((Element) weatherDoc).getElementsByClass("tbody");  //获取数据块
    24    Elements infoF = weatherData.select("[class = phrase]");
    25    String info = infoF.get(0).text();
    26    if(info!=null&&!"".equals(info)){                                  //天气情况
    27     jobject.put("info", info);  
    28    }
    29    Elements wdDom = weatherData.select("[class = temperature]");
    30    String zgwd = wdDom.get(0).text();
    31    if(zgwd!=null&&!"".equals(zgwd)){                                  //最高温度
    32     jobject.put("zgwd", zgwd);  
    33    }
    34    
    35    String zdwd = wdDom.get(1).text();
    36    if(zdwd!=null&&!"".equals(zdwd)){                                  //最低温度
    37     jobject.put("zdwd", zdwd);  
    38    }
    39    
    40    Elements parameter = ((Element) weatherDoc).getElementsByClass("parameter"); //获取数据块
    41    Elements degree = parameter.select("li");
    42    String kqzl = degree.get(0).select("i").text();
    43    if(kqzl!=null&&!"".equals(kqzl)){                                 
    44     jobject.put("kqzl", kqzl);       //空气质量
    45    }
    46    if(str.equals("tomorrow")){//
    47     String fxfs = degree.get(1).select("i").text();
    48     if(fxfs!=null&&!"".equals(fxfs)){                                 
    49      jobject.put("fxfs", fxfs);         //风向风速
    50     }
    51    }else{
    52     String fxfs = degree.get(1).select("i").text();
    53     fxfs+= degree.get(2).select("i").text();
    54     if(fxfs!=null&&!"".equals(fxfs)){                                 
    55      jobject.put("fxfs", fxfs);         //风向风速
    56     }
    57    }
    58    
    59    ja.add(jobject);
    60   }
    61   System.out.println(ja.toString());
    62  }
    63 
    64 
    65 
    66 public static Document WeatherDataCatch(String url){
    67       String result="";
    68        Document doc = null;
    69       try {
    70         doc =  Jsoup.connect(url).timeout(100000).get();
    71 //        Element body = doc.body();
    72 //        result = body.text();
    73       } catch (Exception e) {
    74        // TODO Auto-generated catch block
    75        e.printStackTrace();
    76       }
    77       return doc;
    78      }
    79 }
    80 
    
    [{"info":"阴","zgwd":"最高:27℃","zdwd":"最低:19℃","kqzl":"良","fxfs":"西南风2级"},{"info":"小雨","zgwd":"最高:25℃","zdwd":"最低:18℃","kqzl":"良","fxfs":"西北风2级"},{"info":"晴","zgwd":"最高:28℃","zdwd":"最低:16℃","kqzl":"良","fxfs":"东北风3级"},{"info":"多云","zgwd":"最高:28℃","zdwd":"最低:16℃","kqzl":"良","fxfs":"西南风3级"},{"info":"多云","zgwd":"最高:27℃","zdwd":"最低:16℃","kqzl":"良","fxfs":"东南风3级"},{"info":"小雨","zgwd":"最高:25℃","zdwd":"最低:16℃","kqzl":"良","fxfs":"东南风2级"}]

    pom.xml配置

    <dependency>
                <groupId>org.jsoup</groupId>
                <artifactId>jsoup</artifactId>
                <version>1.7.2</version>
            </dependency>

    天气接口工具类:

    WeatherUtil.java

    package com.gsafety.langfang.screendisplay.utils;
    
    import java.io.BufferedInputStream;
    import java.io.ByteArrayOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.UnsupportedEncodingException;
    import java.net.HttpURLConnection;
    import java.net.MalformedURLException;
    import java.net.URL;
    import java.net.URLEncoder;
    import java.text.SimpleDateFormat;
    import java.util.ArrayList;
    import java.util.Calendar;
    import java.util.Date;
    import java.util.GregorianCalendar;
    import java.util.HashMap;
    import java.util.List;
    import java.util.Map;
    
    import javax.annotation.Resource;
    
    import org.apache.commons.httpclient.HttpClient;
    import org.apache.commons.httpclient.methods.GetMethod;
    import org.apache.commons.lang.StringUtils;
    import org.apache.log4j.Logger;
    import org.springframework.stereotype.Controller;
    import org.springframework.web.bind.annotation.RequestMapping;
    import org.springframework.web.bind.annotation.RequestMethod;
    import org.springframework.web.bind.annotation.ResponseBody;
    
    import com.google.gson.Gson;
    import com.gsafety.cloudframework.common.base.page.PageResult;
    import com.gsafety.cloudframework.config.util.ConfigCacheUtil;
    import com.gsafety.langfang.screendisplay.vo.Returnmsg;
    
    import net.sf.json.JSONObject;
    
    public class WeatherUtil {
        
        private static Logger logger = Logger.getLogger(WeatherUtil.class);
            private static String wUrl;
            private static String area;
            private static String type2day;
            private static String type5day;
            private static String key;
            static {
                String weatherUrl = ConfigCacheUtil.getConf("weatherUrl").getValue();
                if(StringUtils.isNotEmpty(weatherUrl)){
                    JSONObject jsonObject = JSONObject.fromObject(weatherUrl);
                    wUrl = jsonObject.getString("url"); //url
                    area = jsonObject.getString("langfangAreaCode"); //区域编码
                    type2day= jsonObject.getString("type2d");//2天数据类型
                    type5day= jsonObject.getString("type5d");//7天数据类型
                    key= jsonObject.getString("key");//key值
                }
            }
            private static SimpleDateFormat SDF = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
            
            /**
             * 气象小窗口接口请求
             * 数据为当前的数据
             * @return json
             */
         public static JSONObject getDataJson() {
                JSONObject json = null;
                try {
                    Calendar cd=Calendar.getInstance();
                    //开始时间
                    String startTime = SDF.format(cd.getTime());
                    logger.info("*************"+startTime +"气象接口--请求开始**********************");
                        //http://api.weatherdt.com/common/?area=101160901&type=observe&key=fd034bf8fe70289698ec4ea79876feaa
                        String url = wUrl + area + "&type=" + "observe" + "&key=fd034bf8fe70289698ec4ea79876feaa";
                        // 拼装请求
                        GetMethod get = new GetMethod(url);
                        get.releaseConnection();
                        // 调用方法
                        HttpClient client = new HttpClient();
                        logger.info("*************气象接口--地址:" + url + "**********************");
                        String result = "";
                        try {
                            int executeMethod = client.executeMethod(get);
                            result = get.getResponseBodyAsString();
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                        if (StringUtils.isEmpty(result)) {
                            result = "{'observe':{'101160901':{'1001002':{'006': '0.0','000':'17:15','005':'53','004':'2','003':'2','007':'879','002':'25'}}}}";
                        }
                        json = JSONObject.fromObject(result.toString());
                        logger.info("*************气象接口--返回值:" + result + "**********************");
                        //结束时间
                        String endTime = SDF.format(cd.getTime());
                        logger.info("*************"+endTime+"气象接口--请求结束**********************");
                    } catch (Exception e) {
                         e.printStackTrace();
                       logger.info("*************气象接口--请求失败**********************");
                    }
            return json; 
         }
         
         
         /**
             * 其他气象网站的数据,因为大屏要的数据显示不完全
             * @return json  暂时没有用,以后可以用
             * * 
             */
         public static JSONObject getDataofJson() {
                JSONObject json = null;
                String responseStr = null;
                Map map = null;
                try {
                    Calendar cd=Calendar.getInstance();
                    //开始时间
                    String endTime = SDF.format(cd.getTime());
                    cd.add(Calendar.DATE,-60);
                    //结束时间
                    String startTime = SDF.format(cd.getTime());
                    logger.info("*************气象接口--请求开始**********************");
    
                    //http://www.weather.com.cn/data/sk/101160901.html
                    URL url = new URL("http://www.weather.com.cn/data/sk/101160901.html");
                    logger.info("*************气象接口--地址:http://www.weather.com.cn/data/sk/101160901.html**********************");
    
                    // 建立http连接
                    HttpURLConnection conn = (HttpURLConnection) url.openConnection();
                    // 设置允许输出
                    conn.setDoOutput(true);
                    conn.setDoInput(true);
                    // 设置不用缓存
                    conn.setUseCaches(false);
                    // 设置传递方式
                    conn.setRequestMethod("GET");
                    // 设置维持长连接
                    conn.setRequestProperty("Connection", "Keep-Alive");
                    // 设置文件字符集:
                    conn.setRequestProperty("Charset", "UTF-8");
                    // 设置文件类型:
                    conn.setRequestProperty("contentType", "application/json");
                    // 开始连接请求
                    conn.connect();
                    logger.info("*************气象接口--状态:"+conn.getResponseCode()+"**********************");
                    // 请求返回的状态
                    if (conn.getResponseCode() == 200) {
                        // 请求返回的数据
                        InputStream in = conn.getInputStream();
                        
                        byte[] data1 = readBig(in);
                        in.read(data1);
                        // 转成字符串
                        responseStr = new String(data1, "utf-8");
                        //logger.info("*************气象接口--返回值:"+responseStr+"**********************");
                        Gson gson=new Gson();
                        json = JSONObject.fromObject(responseStr);
                    }else{
                       logger.info("*************气象接口--请求失败**********************");
                     
                    }
                    logger.info("*************气象接口--请求结束**********************");
                } catch (IOException e) {
                e.printStackTrace();
            }        
            
            return json; 
         }
         
         
         
         
         
         
         
         
         
            /**
             * 气象小窗口接口请求
             * 数据为2天的数据
             * @return json
             * 
             */
         public static JSONObject getData2dayJson() throws java.net.UnknownHostException {
                JSONObject json = null;
                String url = null;
                Calendar cd=Calendar.getInstance();
                    //开始时间
                   String startTime = SDF.format(cd.getTime());
                   logger.info("*************"+startTime +"气象接口--请求开始**********************");
                    
                    try {
                        url = wUrl+area+"&type=forecast"+URLEncoder.encode("[","UTF-8")+"24h_2d"+URLEncoder.encode("{","UTF-8")+"001,002"+URLEncoder.encode("}]","UTF-8")+"&key="+key;
                    } catch (UnsupportedEncodingException e1) {
                        // TODO Auto-generated catch block
                        e1.printStackTrace();
                    }
                    // 拼装请求
                    GetMethod get = new GetMethod(url);
                    get.releaseConnection();
                    // 调用方法
                    HttpClient client = new HttpClient();
                    logger.info("*************气象接口--地址:" + url + "**********************");
                    String result = "";
                    try {
                        int executeMethod = client.executeMethod(get);
                        result = get.getResponseBodyAsString();
                    } catch (IOException e) {
                        e.printStackTrace();
                       logger.info("*************气象接口--请求失败**********************");
                        
                    }
                    if (StringUtils.isEmpty(result)) {
                        result = "{'forecast':{'24h':{'101090601':{'1001001':[{'003':'35','004':'21','001':'00','002':'00'},{'003':'35','004':'21','001':'00','002':'00'}]}}}}";
                         
                    }
                    json = JSONObject.fromObject(result.toString());
                    logger.info("*************气象接口--返回值:" + result + "**********************");
                    
                     //结束时间+
                    String endTime = SDF.format(cd.getTime());
                    logger.info("*************"+endTime+"气象接口--请求结束**********************");
                 return json;
        }
         
         /**
             * 气象大窗口接口请求
             * 数据为7天的数据
             * @return json
             */
         public static JSONObject getData7dayJson() {
                JSONObject json = null;
                String url=null;
                try {
                    Calendar cd=Calendar.getInstance();
                    //开始时间
                   String startTime = SDF.format(cd.getTime());
                   logger.info("*************"+startTime +"气象接口--请求开始**********************");
                    //URL url = new URL(wUrl+area+"&type="+"forecast[24h_5d{001,002}]&key="+key);
                    url = wUrl+area+"&type=forecast"+URLEncoder.encode("[","UTF-8")+"24h_5d"+URLEncoder.encode("{","UTF-8")+"001,002"+URLEncoder.encode("}]","UTF-8")+"&key="+key;
                    logger.info("*************气象接口--地址:"+url+"   **********************");
                    // 拼装请求
                    GetMethod get = new GetMethod(url);
                    get.releaseConnection();
                    // 调用方法
                    HttpClient client = new HttpClient();
                    String result = "";
                    try {
                        int executeMethod = client.executeMethod(get);
                        result = get.getResponseBodyAsString();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                    if (StringUtils.isEmpty(result)) {
                         result = "{'forecast':{'24h':{'101090601':{'1001001':[{'003':'35','004':'21','001':'00','002':'00'},{'003':'35','004':'21','001':'00','002':'00'},{'003':'35','004':'21','001':'00','002':'00'},{'003':'38','004':'25','001':'01','002':'01'},{'003':'36','004':'25','001':'01','002':'01'},{'003':'33','004':'23','001':'01','002':'01'},{'003':'33','004':'22','001':'01','002':'02'}]}}}}";
                         
                    } 
                    json = JSONObject.fromObject(result.toString());
                    logger.info("*************气象接口--结果:"+result +"**********************");
                  //结束时间+
                    String endTime = SDF.format(cd.getTime());
                    logger.info("*************"+endTime+"气象接口--请求结束**********************");
                } catch (IOException e) {
                e.printStackTrace();
                logger.info("*************气象接口--请求失败**********************");
            }        
            
            return json;
        }
        /**
         * 气象 wukaihua
         * 
         * "observe": {//实况
         * "101010100": {//站号
         * "1001002": {//数据大类
         * "006": "0",//当前降水量(单位是毫米)
         * "007": "1004",//当前气压(单位百帕)
         * "003": "1",//当前风力(单位是级,不用转码)
         * "004": "2",//当前风向编号
         * "000": "10:25",//实况发布时间
         * "005": "79",//当前湿度(单位%)
         * "002": "7"//当前温度(单位摄氏度)
         * 
         * @return
         */
      
         //判断天气
        public static String getWeatherStr(String str) {
            if (StringUtils.isEmpty(str)) {
                return "";
            }
            if ("00".equals(str)) {
                return "晴";
            }
            if ("01".equals(str)) {
                return "多云";
            }
            if ("02".equals(str)) {
                return "阴";
            }
            
            //阵雨
            if ("03".equals(str)) {
                return "阵雨";
            }
            if ("04".equals(str)) {
                return "雷阵雨";
            }
            if ("05".equals(str)) {
                return "雷阵雨伴有冰雹";
            }
            if ("06".equals(str)) {
                return "雨夹雪";
            }
            
            //小雨
            if ("07".equals(str)) {
                return "小雨";
            }
            
            //中雨
            if ("08".equals(str)) {
                return "中雨";
            }
            if ("21".equals(str)) {
                return "小到中雨";
            }
            
            
            //大雨
            if ("09".equals(str)) {
                return "大雨";
            }
            if ("22".equals(str)) {
                return "中到大雨";
            }
            
            //暴雨
            if ("10".equals(str)) {
                return "暴雨";
            }
            if ("11".equals(str)) {
                return "大暴雨";
            }
            if ("12".equals(str)) {
                return "特大暴雨";
            }
            if ("19".equals(str)) {
                return "冻雨";
            }
            if ("23".equals(str)) {
                return "大到暴雨";
            }
            if ("24".equals(str)) {
                return "暴雨到大暴雨";
            }
            if ("25".equals(str)) {
                return "大暴雨到特大暴雨";
            }
            
            
            if ("301".equals(str)) {
                return "雨";
            }
            
            //
            if ("13".equals(str)) {
                return "阵雪";
            }
            if ("14".equals(str)) {
                return "小雪";
            }
            if ("15".equals(str)) {
                return "中雪";
            }
            if ("16".equals(str)) {
                return "大雪";
            }
            if ("17".equals(str)) {
                return "暴雪";
            }
            if ("26".equals(str)) {
                return "小到中雪";
            }
            if ("27".equals(str)) {
                return "中到大雪";
            }
            if ("28".equals(str)) {
                return "大到暴雪";
            }
            if ("302".equals(str)) {
                return "雪";
            }
            
            
            //
            if ("18".equals(str)) {
                return "雾";
            }
            if ("32".equals(str)) {
                return "浓雾";
            }
            if ("49".equals(str)) {
                return "强浓雾";
            }
            if ("57".equals(str)) {
                return "大雾";
            }
            if ("58".equals(str)) {
                return "特强浓雾";
            }
            
            
            //沙尘暴
            if ("20".equals(str)) {
                return "沙尘暴";
            }
            if ("29".equals(str)) {
                return "浮尘";
            }
            if ("30".equals(str)) {
                return "扬沙";
            }
            if ("31".equals(str)) {
                return "强沙尘暴";
            }
            
            
            //
            if ("53".equals(str)) {
                return "霾";
            }
            
            if ("54".equals(str)) {
                return "中度霾";
            }
            if ("55".equals(str)) {
                return "重度霾";
            }
            if ("56".equals(str)) {
                return "严重霾";
            }
            //
            if ("99".equals(str)) {
                return "无";
            }
            
            
            
            return "";
        }
        //判断风向
        public String getWindStr(String str) {
            if (StringUtils.isEmpty(str)) {
                return "";
            }
            if ("0".equals(str)) {
                return "无持续风向";
            }
            if ("1".equals(str)) {
                return "东北风";
            }
            if ("2".equals(02)) {
                return "东风";
            }
            if ("3".equals(str)) {
                return "东南风";
            }
            if ("4".equals(str)) {
                return "南风";
            }
            if ("5".equals(str)) {
                return "西南风";
            }
            if ("6".equals(str)) {
                return "西风";
            }
            if ("7".equals(str)) {
                return "西北风";
            }
            if ("8".equals(str)) {
                return "北风";
            }
            if ("9".equals(str)) {
                return "旋转风";
            }
            return "";
        }
       //由空气指数范围判定状态情况
        public String getAirLevelStr(int str) {
            if (str==0 || str>0 || str<=50) {
                return "优";
            }
            if ( str>50 || str<=100) {
                return "良";
            }
            if ( str>100 || str<=200) {
                return "轻度污染";
            }
            if ( str>200 || str<=300) {
                return "中度污染";
            }
            if (str>300) {
                return "重度污染";
            }
            
            
            return "";
        }
        
        
       //判断日期一周
        public static String getWeekOfDate(Date date) {
            String[] weekDays = { "星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日" };
            Calendar cal = Calendar.getInstance();
            cal.setTime(date);
            int w = cal.get(Calendar.DAY_OF_WEEK) - 1;
            if (w < 0) {
                w = 0;
            }
            return weekDays[w];
        }
        
        private static byte[] readBig(InputStream in) throws IOException {
            BufferedInputStream bis = new BufferedInputStream(in);
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            int c = bis.read();
            while((c!=-1)){
                baos.write(c);
                c = bis.read();
            }
            bis.close();
            return baos.toByteArray();
        }
    }
  • 相关阅读:
    linux命令3
    sersync和rsync数据实时同步配置
    java web框架
    处理 json数据,base64合成图片
    day032进程池(重点)进程池的同步、异步方法,回调函数;管道、数据共享
    day031同步锁、信号量、事件、队列、生成者消费者模型、Jionablequeue
    day030进程的两种创建方法,验证进程的空间隔离,join等待子进程
    day029socketserver模块实现并发,线程、 ftp上传或下载,打印进度条
    day028两种粘包现象,两种解决粘包的方法,subprocess, struck模块
    day027OSI七层协议;tcp三次握手,四次挥手;tcp与udp的区别及两者的撰写方式
  • 原文地址:https://www.cnblogs.com/ComputerVip/p/11577214.html
Copyright © 2020-2023  润新知