• java+phantomjs实现动态网页抓取


    1.下载地址:http://phantomjs.org/download.html

    2.java代码

    public     void   getHtml(String url)
    {
    	 HTML="";
        String jsPath = "C:\phantomjs\examples\myjs.js";
        String exePath = "C:\phantomjs\bin\phantomjs.exe";
        System.out.println(jsPath);
        System.out.println(exePath);
        Runtime rt = Runtime.getRuntime();
    	Process p;
    	try {
    		p = rt.exec(exePath + " " + jsPath + " " + url);
    
    	InputStream is = p.getInputStream();
    	BufferedReader br = new BufferedReader(new InputStreamReader(is));
    	StringBuffer sbf = new StringBuffer();
    	String tmp = "";
    	while ((tmp = br.readLine()) != null)
    	{
    		sbf.append(tmp);
    	}
    	HTML=sbf.toString();
     
      is.close();
      br.close();
      sbf=null;
      is=null;
      br=null;
    	} catch (IOException e) {
    	 
    		e.printStackTrace();
    	}
     
    }
    

     3.js

       

    var page = require('webpage').create(),
      system = require('system'),
      t, address;
    
    page.settings.loadImages = false;  //为了提升加载速度,不加载图片
    page.settings.resourceTimeout = 10000;//超过10秒放弃加载
    //此处是用来设置截图的参数。不截图没啥用
    page.viewportSize = {
       1280,
      height: 800
    };
    block_urls = ['baidu.com'];//为了提升速度,屏蔽一些需要时间长的。比如百度广告
    page.onResourceRequested = function(requestData, request){
        for(url in block_urls) {
            if(requestData.url.indexOf(block_urls[url]) !== -1) {
                request.abort();
               return;
            }
        }            
    }
     
    address = system.args[1];
    page.open(address, function(status) {
      if (status !== 'success') {
        console.log('FAIL to load the address');
      } else {
     
        console.log(page.content);
        setTimeout(function(){ phantom.exit(); }, 6000);
      }
      phantom.exit();
    });

     

  • 相关阅读:
    QNET弱网测试工具
    echart
    数组按照一大一小打乱排列
    根据环境开启 vconsole
    时间戳在ios上面显示NAN Bug
    去除字符串中html标签
    ios 上 复选框背景黑边bug
    echarts配置项说明
    JS 数据处理技巧及小算法
    js中最常用的几种遍历数据方法
  • 原文地址:https://www.cnblogs.com/xiaoliao/p/10075714.html
Copyright © 2020-2023  润新知