• httpclient:Ip 代理


    参考:http://blog.csdn.net/sdfiiiiii/article/details/70432060  http://blog.csdn.net/qy20115549/article/details/54945974

    第一篇博客可以获取http://www.xicidaili.com/网站上所有的代理ip,并测试可不可以用(貌似不是很准),可用的代理ip放到一个list中

    第二篇博客是直接将代理ip设置进代码内,可以用作测试ip可不可用

    第一篇博客

    <dependency>
        <groupId>com.alibaba</groupId>
        <artifactId>fastjson</artifactId>
        <version>1.2.28</version>
    </dependency>
    <dependency>
        <groupId>org.jsoup</groupId>
        <artifactId>jsoup</artifactId>
        <version>1.10.2</version>
    </dependency>
    import com.alibaba.fastjson.JSONObject;
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    
    import java.util.ArrayList;
    import java.util.HashMap;
    import java.util.List;
    import java.util.Map;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    /**
     * 获取代理IP,需要
     * com.alibaba.fastjson.JSONObject以及Jsoup
     */
    public class ProxyCralwerUnusedVPN {
    
        ThreadLocal<Integer> localWantedNumber = new ThreadLocal<Integer>();
        ThreadLocal<List<ProxyInfo>> localProxyInfos = new ThreadLocal<List<ProxyInfo>>();
    
        public static void main(String[] args) {
            ProxyCralwerUnusedVPN proxyCrawler = new ProxyCralwerUnusedVPN();
            /**
             * 想要获取的代理IP个数,由需求方自行指定。(如果个数太多,将导致返回变慢)
             */
            proxyCrawler.startCrawler(1);
        }
    
        /**
         * 暴露给外部模块调用的入口
         * @param wantedNumber 调用方期望获取到的代理IP个数
         */
        public String startCrawler(int wantedNumber) {
            localWantedNumber.set(wantedNumber);
    
            kuaidailiCom("http://www.xicidaili.com/nn/", 15);
            kuaidailiCom("http://www.xicidaili.com/nt/", 15);
            kuaidailiCom("http://www.xicidaili.com/wt/", 15);
            kuaidailiCom("http://www.kuaidaili.com/free/inha/", 15);
            kuaidailiCom("http://www.kuaidaili.com/free/intr/", 15);
            kuaidailiCom("http://www.kuaidaili.com/free/outtr/", 15);
    
            /**
             * 构造返回数据
             */
            ProxyResponse response = new ProxyResponse();
            response.setSuccess("true");
            Map<String, Object> dataInfoMap = new HashMap<String, Object>();
            dataInfoMap.put("numFound", localProxyInfos.get().size());
            dataInfoMap.put("pageNum", 1);
            dataInfoMap.put("proxy", localProxyInfos.get());
            response.setData(dataInfoMap);
            String responseString = JSONObject.toJSON(response).toString();
            System.out.println(responseString);
            return responseString;
        }
    
        private void kuaidailiCom(String baseUrl, int totalPage) {
            String ipReg = "\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} \d{1,6}";
            Pattern ipPtn = Pattern.compile(ipReg);
    
            for (int i = 1; i < totalPage; i++) {
                if (getCurrentProxyNumber() >= localWantedNumber.get()) {
                    return;
                }
                try {
                    Document doc = Jsoup.connect(baseUrl + i + "/")
                            .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
                            .header("Accept-Encoding", "gzip, deflate, sdch")
                            .header("Accept-Language", "zh-CN,zh;q=0.8,en;q=0.6")
                            .header("Cache-Control", "max-age=0")
                            .header("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36")
                            .header("Cookie", "Hm_lvt_7ed65b1cc4b810e9fd37959c9bb51b31=1462812244; _gat=1; _ga=GA1.2.1061361785.1462812244")
                            .header("Host", "www.kuaidaili.com")
                            .header("Referer", "http://www.kuaidaili.com/free/outha/")
                            .timeout(30 * 1000)
                            .get();
                    Matcher m = ipPtn.matcher(doc.text());
    
                    while (m.find()) {
                        if (getCurrentProxyNumber() >= localWantedNumber.get()) {
                            break;
                        }
                        String[] strs = m.group().split(" ");
                        if (checkProxy(strs[0], Integer.parseInt(strs[1]))) {
                            System.out.println("获取到可用代理IP	" + strs[0] + "	" + strs[1]);
                            addProxy(strs[0], strs[1], "http");
                        }
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
    
        private static boolean checkProxy(String ip, Integer port) {
            try {
                //http://1212.ip138.com/ic.asp 可以换成任何比较快的网页
                Jsoup.connect("http://1212.ip138.com/ic.asp")
                        .timeout(2 * 1000)
                        .proxy(ip, port)
                        .get();
                return true;
            } catch (Exception e) {
                return false;
            }
        }
    
        private int getCurrentProxyNumber() {
            List<ProxyInfo> proxyInfos = localProxyInfos.get();
            if (proxyInfos == null) {
                proxyInfos = new ArrayList<ProxyInfo>();
                localProxyInfos.set(proxyInfos);
                return 0;
            }
            else {
                return proxyInfos.size();
            }
        }
        private void addProxy(String ip, String port, String protocol){
            List<ProxyInfo> proxyInfos = localProxyInfos.get();
            if (proxyInfos == null) {
                proxyInfos = new ArrayList<ProxyInfo>();
                proxyInfos.add(new ProxyInfo(ip, port, protocol));
            }
            else {
                proxyInfos.add(new ProxyInfo(ip, port, protocol));
            }
        }
    }
    
    
    
    class ProxyInfo {
        private String userName = "";
        private String ip;
        private String password = "";
        private String type;
        private String port;
        private int is_internet = 1;
        public ProxyInfo(String ip, String port, String type) {
            this.ip = ip;
            this.type = type;
            this.port = port;
        }
        public String getUserName() {
            return userName;
        }
        public void setUserName(String userName) {
            this.userName = userName;
        }
        public String getIp() {
            return ip;
        }
        public void setIp(String ip) {
            this.ip = ip;
        }
        public String getPassword() {
            return password;
        }
        public void setPassword(String password) {
            this.password = password;
        }
        public String getType() {
            return type;
        }
        public void setType(String type) {
            this.type = type;
        }
        public String getPort() {
            return port;
        }
        public void setPort(String port) {
            this.port = port;
        }
        public int getIs_internet() {
            return is_internet;
        }
        public void setIs_internet(int is_internet) {
            this.is_internet = is_internet;
        }
    }
    
    class ProxyResponse {
        private String success;
        private Map<String, Object> data;
        public String getSuccess() {
            return success;
        }
        public void setSuccess(String success) {
            this.success = success;
        }
        public Map<String, Object> getData() {
            return data;
        }
        public void setData(Map<String, Object> data) {
            this.data = data;
        }
    }

     第二篇博客

    import java.io.BufferedReader;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.InputStreamReader;
    import java.io.UnsupportedEncodingException;
    import java.net.InetSocketAddress;
    import java.net.MalformedURLException;
    import java.net.Proxy;
    import java.net.URL;
    import java.net.URLConnection;
    
    public class GetHtml {
        public static void main(String[] args) throws UnsupportedEncodingException {
            //输入代理ip,端口,及所要爬取的url
            gethtml("121.61.101.222",808,"http://club.autohome.com.cn/bbs/forum-c-2533-1.html?orderby=dateline&qaType=-1");
    
        }
        public static String gethtml(String ip,int port,String url) throws UnsupportedEncodingException{
            URL url1 = null;
            try {
                url1 = new URL(url);
            } catch (MalformedURLException e1) {
                e1.printStackTrace();
            }
            InetSocketAddress addr = null;
            //代理服务器的ip及端口
            addr = new InetSocketAddress(ip, port);
            Proxy proxy = new Proxy(Proxy.Type.HTTP, addr); // http proxy
            InputStream in = null;
            try {
                URLConnection conn = url1.openConnection(proxy);
                conn.setConnectTimeout(3000);
                in = conn.getInputStream();
            } catch (Exception e) {
                System.out.println("ip " + " is not aviable");//异常IP
            }
    
            String s = convertStreamToString(in);
            System.out.println(s);
            return s;
    
        }
        public static String convertStreamToString(InputStream is) throws UnsupportedEncodingException {
            if (is == null)
                return "";
            BufferedReader reader = new BufferedReader(new InputStreamReader(is,"gb2312"));
            StringBuilder sb = new StringBuilder();
            String line = null;
            try {
                while ((line = reader.readLine()) != null) {
                    sb.append(line + "/n");
                }
            } catch (IOException e) {
                e.printStackTrace();
            } finally {
                try {
                    is.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
            return sb.toString();
    
        }
    }
  • 相关阅读:
    《数据结构与算法分析:C语言描述》复习——第九章“图论”——最大流问题
    《数据结构与算法分析:C语言描述》复习——第九章“图论”——多源最短路径问题
    《数据结构与算法分析:C语言描述》复习——第九章“图论”——单源带权最短路径问题
    《数据结构与算法分析:C语言描述》复习——第九章“图论”——无权值的最短路径问题
    《数据结构与算法分析:C语言描述》复习——第九章“图论”——拓扑排序
    《数据结构与算法分析:C语言描述》复习——第七章“哈希”——哈希表
    毕业整一年
    存储管理学习笔记
    一个操作系统的实现学习笔记记录(1)
    6自由度空间机器人课程设计的简要记录
  • 原文地址:https://www.cnblogs.com/Michael2397/p/7821930.html
Copyright © 2020-2023  润新知