• jsoup简单的爬取网页数据


    /**
     * Project Name:JavaTest
     * File Name:BankOfChinaExchangeRate.java
     * Package Name:com.lee.javatest
     * Date:2016年7月22日下午1:34:09
     * Copyright (c) 2016年7月22日, Pwenlee All Rights Reserved.
     *
    */
    
    package com.lee.javatest;
    
    import java.io.Serializable;
    import java.math.BigDecimal;
    import java.text.SimpleDateFormat;
    import java.util.ArrayList;
    import java.util.Arrays;
    import java.util.Date;
    import java.util.List;
    
    import org.apache.http.HttpEntity;
    import org.apache.http.HttpResponse;
    import org.apache.http.HttpStatus;
    import org.apache.http.client.HttpClient;
    import org.apache.http.client.methods.HttpGet;
    import org.apache.http.impl.client.DefaultHttpClient;
    import org.apache.http.params.CoreConnectionPNames;
    import org.apache.http.util.EntityUtils;
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.select.Elements;
    
    /**
     * ClassName:BankOfChinaExchangeRate <br/>
     * Function: 中行外汇牌价. <br/>
     * Date:     2016年7月22日 下午1:34:09 <br/>
     * @author   PwenLee
     * @version  
     * @see      
     */
    public class BankOfChinaExchangeRate implements Serializable{
    
        private static final Integer DEAFULT_PAGESIZE = 20;
        
        private static final long serialVersionUID = -913877619191789389L;
    
        /**
         * 货币名称 中文简体
         */
        private String currency;
        
        /**
         * 现汇买入价
         */
        private BigDecimal buyingRate;
        
        /**
         * 现钞买入价
         */
        private BigDecimal cashBuyingRate;
        
        /**
         * 现汇卖出价
         */
        private BigDecimal sellingRate; 
        
        /**
         * 现钞卖出价
         */
        private BigDecimal cashSellingRate; 
        
        /**
         * 外管局中间价
         */
        private BigDecimal SAFEMiddleRate;
        
        /**
         * 中行折算价
         */
        private BigDecimal bankConvertRate;
        
        /**
         * 发布时间
         */
        private String dateTime;
        
        public String getCurrency() {
            return currency;
        }
    
        public void setCurrency(String currency) {
            this.currency = currency;
        }
    
        public BigDecimal getBuyingRate() {
            return buyingRate;
        }
    
        public void setBuyingRate(BigDecimal buyingRate) {
            this.buyingRate = buyingRate;
        }
    
        public BigDecimal getCashBuyingRate() {
            return cashBuyingRate;
        }
    
        public void setCashBuyingRate(BigDecimal cashBuyingRate) {
            this.cashBuyingRate = cashBuyingRate;
        }
    
        public BigDecimal getSellingRate() {
            return sellingRate;
        }
    
        public void setSellingRate(BigDecimal sellingRate) {
            this.sellingRate = sellingRate;
        }
    
        public BigDecimal getCashSellingRate() {
            return cashSellingRate;
        }
    
        public void setCashSellingRate(BigDecimal cashSellingRate) {
            this.cashSellingRate = cashSellingRate;
        }
    
        public BigDecimal getSAFEMiddleRate() {
            return SAFEMiddleRate;
        }
    
        public void setSAFEMiddleRate(BigDecimal sAFEMiddleRate) {
            SAFEMiddleRate = sAFEMiddleRate;
        }
    
        public BigDecimal getBankConvertRate() {
            return bankConvertRate;
        }
    
        public void setBankConvertRate(BigDecimal bankConvertRate) {
            this.bankConvertRate = bankConvertRate;
        }
        
        public String getDateTime() {
            return dateTime;
        }
    
        public void setDateTime(String dateTime) {
            this.dateTime = dateTime;
        }
        
        /**
         * 
         * BankOfChinaExchangeRate:
         * date:日期  例入“2016-07-22”
         * time:时间  例如“05:30:00”
         * BankOfChinaCurrencyCode 枚举类
         * @author PwenLee
         * @param startDate
         * @param endDate
         * @param currencyCode
         * @return BankOfChinaExchangeRate
         */
        public BankOfChinaExchangeRate (String date, String time, BankOfChinaCurrencyCode currencyCode){
            List<String> context = getExchangeRate(date, time, currencyCode);
            this.currency = context.get(0);
            this.buyingRate = new BigDecimal(context.get(1));
            this.cashBuyingRate = new BigDecimal(context.get(2));
            this.sellingRate = new BigDecimal(context.get(3));
            this.cashSellingRate = new BigDecimal(context.get(4));
            this.SAFEMiddleRate = new BigDecimal(context.get(5));
            this.bankConvertRate = new BigDecimal(context.get(6));
            this.dateTime = context.get(7) + " " + context.get(8);
        }
        
        /**
         * 取当天凌晨05:30:00的数据
         */
        public BankOfChinaExchangeRate(){
            SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd");  
            Date date=new Date();  
            String nowDate=sdf.format(date);  
            List<String> context = getExchangeRate(nowDate, "05:30:00", BankOfChinaCurrencyCode.USD);
            this.currency = context.get(0);
            this.buyingRate = new BigDecimal(context.get(1));
            this.cashBuyingRate = new BigDecimal(context.get(2));
            this.sellingRate = new BigDecimal(context.get(3));
            this.cashSellingRate = new BigDecimal(context.get(4));
            this.SAFEMiddleRate = new BigDecimal(context.get(5));
            this.bankConvertRate = new BigDecimal(context.get(6));
            this.dateTime = context.get(7) + " " + context.get(8);
        }
        
        /**
         * 模拟请求url,返回html源码
         * @author PwenLee
         * @param url
         * @return
         */
        private static String GetHtml(String url) {
            String html = null;
            HttpClient httpClient = new DefaultHttpClient();
            httpClient.getParams().setParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 20000);
            HttpGet httpGet = new HttpGet(url);
            try {
                HttpResponse httpResponse = httpClient.execute(httpGet);
                int resStatu = httpResponse.getStatusLine().getStatusCode();
                if (resStatu == HttpStatus.SC_OK) {
                    HttpEntity entity = httpResponse.getEntity();
                    if (entity != null) {
                        html = EntityUtils.toString(entity, "utf-8");
                    }
                }
            } catch (Exception e) {
                //TODO  打成logger
                System.out.println("Connect " + url + " error");
                e.printStackTrace();
            } finally {
                httpClient.getConnectionManager().shutdown();
            }
            return html;
        }
        
        private List<String> getExchangeRate(String date, String time, BankOfChinaCurrencyCode currencyCode){
            
            Integer totalPage = totalPage(date, time, currencyCode);
            List<String> contextList = new ArrayList<String>();
            if(totalPage <= 0){
                //TODO  logger
                return contextList;
            }
            
            String context = "";
            for(int i=totalPage;i>=0;i--){
                String url = "http://srh.bankofchina.com/search/whpj/search.jsp?erectDate="+date+"&nothing="+date+"&pjname="+currencyCode.getCode()+"&page="+i;
                String html = GetHtml(url);
                Document doc = Jsoup.parse(html);
                Elements linkElements = doc.getElementsByClass("BOC_main");
                Elements datas = linkElements.get(0).getElementsByTag("tr");
                for (Element ele : datas) {
                    if(ele.text().indexOf(time) != -1){
                        context = ele.text();
                        break;
                    }
                }
                if(context != ""){  //TODO  换成StringUtils.isNotBlank
                    break;
                }
            }
            
            if(context == "") {//TODO  换成StringUtils.isBlank
                //TODO  logger
                return contextList;
            }else{
                contextList = Arrays.asList(context.split(" "));
            }
            return contextList;
        }
    
        public static Integer totalPage(){
            Integer totalPage = 0;
            try{
                SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd");  
                Date date=new Date();  
                String nowDate=sdf.format(date);  
                String url = "http://srh.bankofchina.com/search/whpj/search.jsp?erectDate="+nowDate+"&nothing="+nowDate+"&pjname="+BankOfChinaCurrencyCode.USD.getCode();
                String html = GetHtml(url);
                //截取网页总条数变量
                String stringTemp = html.substring(html.indexOf("m_nRecordCount = "));
                //获取变量的值
                String totalcount = stringTemp.substring(stringTemp.indexOf("m_nRecordCount = ")+"m_nRecordCount = ".length(),stringTemp.indexOf(";"));
                Integer totalnum = Integer.valueOf(totalcount);
                if(totalnum % DEAFULT_PAGESIZE == 0){
                    totalPage = totalnum/DEAFULT_PAGESIZE;
                }else{
                    totalPage = totalnum/DEAFULT_PAGESIZE+1;
                }
            }catch(Exception e){
                //TODO 打成logger
            }
            return totalPage;
        }
    
        public static Integer totalPage(String date, String time, BankOfChinaCurrencyCode currencyCode){
            Integer totalPage = 0;
            try{
                String url = "http://srh.bankofchina.com/search/whpj/search.jsp?erectDate="+date+"&nothing="+date+"&pjname="+currencyCode.getCode();
                String html = GetHtml(url);
                //截取网页总条数变量
                String stringTemp = html.substring(html.indexOf("m_nRecordCount = "));
                //获取变量的值
                String totalcount = stringTemp.substring(stringTemp.indexOf("m_nRecordCount = ")+"m_nRecordCount = ".length(),stringTemp.indexOf(";"));
                Integer totalnum = Integer.valueOf(totalcount);
                if(totalnum % DEAFULT_PAGESIZE == 0){
                    totalPage = totalnum/DEAFULT_PAGESIZE;
                }else{
                    totalPage = totalnum/DEAFULT_PAGESIZE+1;
                }
            }catch(Exception e){
                //TODO 打成logger
            }
            return totalPage;
        }
        
        @Override
        public String toString() {
            return "BankOfChinaExchangeRate [currency=" + currency
                    + ", buyingRate=" + buyingRate + ", cashBuyingRate="
                    + cashBuyingRate + ", sellingRate=" + sellingRate
                    + ", cashSellingRate=" + cashSellingRate + ", SAFEMiddleRate="
                    + SAFEMiddleRate + ", bankConvertRate=" + bankConvertRate
                    + ", dateTime=" + dateTime + "]";
        }
        
        
    }
    View Code
  • 相关阅读:
    SpringMVC中请求路径参数使用正则表达式
    SpringBoot单元测试示例2
    数据结构与算法之——八大排序算法
    linux学习之centos(二):虚拟网络三种连接方式和SecureCRT的使用
    linux学习之centos(一):在VMware虚拟机中安装centos6.5
    网易云课堂学习之VS相关
    emplace_back减少内存拷贝和移动
    Lepus经历收获杂谈(一)——confirm features的小工具
    MDM平台学习笔记
    四大开源协议:BSD、Apache、GPL、LGPL
  • 原文地址:https://www.cnblogs.com/pwenlee/p/5704010.html
Copyright © 2020-2023  润新知