• jsoup爬取某网站安全数据


    jsoup爬取某网站安全数据

    package com.vfsd.net;
    
    import java.io.IOException;
    import java.sql.SQLException;
    import java.util.Map;
    
    import javax.servlet.ServletException;
    import javax.servlet.annotation.WebServlet;
    import javax.servlet.http.HttpServlet;
    import javax.servlet.http.HttpServletRequest;
    import javax.servlet.http.HttpServletResponse;
    
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.select.Elements;
    
    import com.vfsd.dao.ManageMySQL;
    
    /**
     * Servlet implementation class GetURL13
     */
    @WebServlet("/GetURL13")
    public class GetURL13 extends HttpServlet {
        private static final long serialVersionUID = 1L;
           
        /**
         * @see HttpServlet#HttpServlet()
         */
        public GetURL13() {
            super();
            // TODO Auto-generated constructor stub
        }
        private String message;
        
        @Override
        public void init() throws ServletException {
            message = "Hello world, this message is from servlet!";
            System.out.println("------"+message);
            try {
                ManageMySQL.getConnection();
                
            } catch (SQLException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } catch (Exception e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
        /**
         * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
         */
        protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
            // TODO Auto-generated method stub
            //response.getWriter().append("Served at: ").append(request.getContextPath());
            String agent1 = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36";
            
            int pageNum=1;
            int pageSize=10;
            //for(pageNum=1;pageNum<101;pageNum++)
            for(pageNum=1;pageNum<924;pageNum++)
            {
                try {
                    int page1= (pageNum-1)*pageSize;
                    Map<Integer,String> map1 = ManageMySQL.getNewsLinkInTable(page1,pageSize,"data_bjszfhcxjswyh");
                    for(Integer key : map1.keySet())
                    {
                        System.out.println(key+"  "+map1.get(key));
                        String news_link = map1.get(key);
                        String context1="";
                        String source1="";
                        String publishDate = "";
                        //String context1 = getContentByURL(news_link).replace(" ", "");
                        
                        if(!news_link.contains("void"))
                        {
                            if(news_link.endsWith("html"))
                            {
                                Document documentRoot = Jsoup.connect(news_link).userAgent(agent1).get();
                                Elements elements2 = documentRoot.select("#content_list");
                                //Elements elements2_1 = documentRoot.select("div.div_right");
                                if(elements2.size()==1)
                                {
                                    Element div_ele = elements2.get(0);
                                    context1 = div_ele.text();
                                    ManageMySQL.updateContextAndPublishDate2(key, context1.replace("'", "").replace(""", ""),source1,publishDate,"data_bjszfhcxjswyh");
                                }
                                
                                
                            }
                            
                        }
                        
                    }
                } catch (Exception e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
                
            }
        }
    
    
    
        /**
         * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)
         */
        protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
            // TODO Auto-generated method stub
            doGet(request, response);
        }
    
    }
  • 相关阅读:
    PHP配置文件处理类
    PHP中实现图片上传的类库
    在PHP中实现StringBuilder类
    微软官方及第三方SDK http://msdn.microsoft.com/zhcn/jj923044
    在PHP中模拟asp的response类
    Atitit.并发测试解决方案(2) 获取随机数据库记录 随机抽取数据 随机排序 原理and实现
    atitit. access token是什么??微信平台公众号开发access_token and Web session保持状态机制
    atitit.二进制数据无损转字符串网络传输
    atitit.重装系统需要备份的资料总结 o84..
    atitit.web ui 结构建模工具总结
  • 原文地址:https://www.cnblogs.com/herd/p/11784128.html
Copyright © 2020-2023  润新知