• 利用httpclient和mysql模拟搜索引擎


    数据抓取模块

    package crowling1;
    
    
    import java.sql.CallableStatement;
    import java.sql.Connection;
    import java.sql.DriverManager;
    import java.sql.PreparedStatement;
    import java.sql.ResultSet;
    import java.sql.SQLException;
    import java.sql.Statement;
    
    import org.apache.http.HttpEntity;
    import org.apache.http.client.methods.CloseableHttpResponse;
    import org.apache.http.client.methods.HttpGet;
    import org.apache.http.impl.client.CloseableHttpClient;
    import org.apache.http.impl.client.HttpClients;
    import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
    import org.apache.http.protocol.BasicHttpContext;
    import org.apache.http.protocol.HttpContext;
    import org.apache.http.util.EntityUtils;
    
    /**
     * An example that performs GETs from multiple threads.
     *
     */
    public class ClientMultiThreadedExecution {
    
        public static void main(String[] args) throws Exception {
            // Create an HttpClient with the ThreadSafeClientConnManager.
            // This connection manager must be used if more than one thread will
            // be using the HttpClient.
            PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager();
            cm.setMaxTotal(1000);
    
            CloseableHttpClient httpclient = HttpClients.custom()
                    .setConnectionManager(cm)
                    .build();
                GetThread[] threads = new GetThread[5000];
                for (int i = 0; i < threads.length; i++) {
                    HttpGet httpget = new HttpGet(reportIP());
                    threads[i] = new GetThread(httpclient, httpget, i + 1);
                }
    
                // start the threads
                for (int j = 0; j < threads.length; j++) {
                    threads[j].start();
                }
    
                // join the threads
                for (int j = 0; j < threads.length; j++) {
                    threads[j].join();
                }
    
            } finally {
                httpclient.close();
            }
        }
    
    
        /**
         * A thread that performs a GET.
         */
        static class GetThread extends Thread {
    
            private final CloseableHttpClient httpClient;
            private final HttpContext context;
            private final HttpGet httpget;
            private final int id;
    
            public GetThread(CloseableHttpClient httpClient, HttpGet httpget, int id) {
                this.httpClient = httpClient;
                this.context = new BasicHttpContext();
                this.httpget = httpget;
                this.id = id;
            }
    
            /**
             * Executes the GetMethod and prints some status information.
             */
            @Override
            public void run() {
                try {
                    System.out.println(id + " - about to get something from " + httpget.getURI());
                    CloseableHttpResponse response = httpClient.execute(httpget, context);
                    try {
                        System.out.println(id + " - get executed");
                        // get the response body as an array of bytes
                        HttpEntity entity = response.getEntity();
                        String str = null;
                        if (entity != null) {
                            byte[] bytes = EntityUtils.toByteArray(entity);
                            str=new String(bytes,"utf-8");
                            System.out.println(id + " - " + bytes.length + " bytes read");
                        }
                        demo3 d=new demo3();
                        String mys="'"+httpget.getURI()+"'";
                        String ip=mys;
                        int begin=str.indexOf("<title>")+7;
                        int end=str.indexOf("</title>");
                        int debegin=str.indexOf("Description");
                        String title="";
                        if (begin!=-1){
                        title="'"+str.substring(begin, end)+"'";
                        }
                        String desc=null;
                        if (debegin!=-1){
                        desc="'"+str.substring(debegin, debegin+10)+"'";
                        }else {
                            desc="'没有获取到描述'";
                        }
                        System.out.println(title);
                        d.createconn();
                        String sql="insert into web values("+ip+","+title+","+desc+")";
                        d.savedata(sql);
                    } finally {
                        response.close();
                    }
                } catch (Exception e) {
                    System.out.println(id + " - error: " + e);
                }
            }
    
        }
        static int a=110;
        static int b=75;
        static int c=114;
        static int d=0;
        public synchronized static String reportIP(){
            if (d==255){
                d=0;
                c++;
            }else if(b==255){
                b=0;
                a++;
            }else if(c==255){
                c=0;
                b++;
            }else {
                d++;
            }
            return new String("http://"+a+"."+b+"."+c+"."+d);
    
        }
    
    }

    数据存储模块

    package crowling1;
    import java.sql.Connection;
    import java.sql.DriverManager;
    import java.sql.PreparedStatement;
    import java.sql.SQLException;
    
    public class demo3 {
        public Connection con;
        public void createconn(){
    
          try{   
                //加载MySql的驱动类   
                Class.forName("com.mysql.jdbc.Driver") ;   
                }catch(ClassNotFoundException e){   
                System.out.println("找不到驱动程序类 ,加载驱动失败!");   
                e.printStackTrace() ;   
                }   
          String url = "jdbc:mysql://localhost:3306/webcro" ;    
             String username = "root" ;   
             String password = "root" ;   
            try{   
                //连接
            con =    
                     DriverManager.getConnection(url , username , password ) ;   
             }catch(SQLException se){   
            System.out.println("数据库连接失败!");   
            se.printStackTrace() ;   
             }   
        }
        public void savedata(String sql) throws SQLException{
            //sql insert into web 
                 //、创建一个Statement    
                   PreparedStatement pstmt = con.prepareStatement(sql) ;   
            // 执行SQL语句   
                   int rows = pstmt.executeUpdate() ;   //如果没有返回,rows=0
        }
    public static void main(String[] args) throws SQLException {
        demo3 d=new demo3();
        String ip="'255.255.255.253'";
        String title="'百度'";
        String desc="'百度'";
        d.createconn();
        String sql="insert into web values("+ip+","+title+","+desc+")";
        d.savedata(sql);
    }
    }
    
  • 相关阅读:
    python中时间日期格式化符号的含义
    关于long long int和__int64用%I64d和%lld输出在不同编译语言下的正确性
    并查集 poj2236
    数据结构 最长公共子序列问题
    449B
    oj判题WA/RE怎么办
    如何准确区别python中strftime strptime的用法
    通过HttpWebRequest在后台对WebService进行调用 【转】
    利用maven 下依赖包
    Maven的安装
  • 原文地址:https://www.cnblogs.com/mrcharles/p/11879919.html
Copyright © 2020-2023  润新知