• How to scrape web pages with PhantomJS and jQuery JavaScript phantomjs, scrape, jquery


    How to scrape web pages with PhantomJS and jQuery - JavaScript - phantomjs, scrape, jquery

    How to scrape web pages with PhantomJS and jQuery


    JavaScript
    posted 9 months ago by christian

    This is an example of how to scrape the web using PhantomJS and jQuery:

       1  var page = new WebPage(),
       2      url = 'http://localhost/a-search-form',
       3      stepIndex = 0;
       4 
       5  /**
       6   * From PhantomJS documentation:
       7   * This callback is invoked when there is a JavaScript console. The callback may accept up to three arguments: 
       8   * the string for the message, the line number, and the source identifier.
       9   */
      10  page.onConsoleMessage = function (msg, line, source) {
      11      console.log('console> ' + msg);
      12  };
      13 
      14  /**
      15   * From PhantomJS documentation:
      16   * This callback is invoked when there is a JavaScript alert. The only argument passed to the callback is the string for the message.
      17   */
      18  page.onAlert = function (msg) {
      19      console.log('alert!!> ' + msg);
      20  };
      21 
      22  // Callback is executed each time a page is loaded...
      23  page.open(url, function (status) {
      24    if (status === 'success') {
      25      // State is initially empty. State is persisted between page loads and can be used for identifying which page we're on.
      26      console.log('============================================');
      27      console.log('Step "' + stepIndex + '"');
      28      console.log('============================================');
      29 
      30      // Inject jQuery for scraping (you need to save jquery-1.6.1.min.js in the same folder as this file)
      31      page.injectJs('jquery-1.6.1.min.js');
      32 
      33      // Our "event loop"
      34      if(!phantom.state){
      35        initialize();
      36      } else {
      37        phantom.state();
      38      }
      39 
      40      // Save screenshot for debugging purposes
      41      page.render("step" + stepIndex++ + ".png");
      42    }
      43  });
      44 
      45  // Step 1
      46  function initialize() {
      47    page.evaluate(function() {
      48      $('form#search input.query').val('Jebus saves');
      49      $('form#search').submit();
      50      console.log('Searching...');
      51    });
      52    // Phantom state doesn't change between page reloads
      53    // We use the state to store the search result handler, ie. the next step
      54    phantom.state = parseResults;
      55  }
      56 
      57  // Step 2
      58  function parseResults() {
      59    page.evaluate(function() {
      60      $('#search-result a').each(function(index, link) {
      61        console.log($(link).attr('href'));
      62      })
      63      console.log('Parsed results');
      64    });
      65    // If there was a 3rd step we could point to another function
      66    // but we would have to reload the page for the callback to be called again
      67    phantom.exit();
      68  }
  • 相关阅读:
    0626 Django模型(ORM)
    0625 Django 基础
    0530JavaScript基础2
    CentOS7.5安装cairo-dock,比mac托盘还美
    CentOS7.5安装与使用mysql-workbench
    CentOS7.6安装rime輸入法
    CentOS7.5 firefox Flash插件更新
    记一次ceph集群的严重故障
    ceph笔记(一)
    CentOS7.6打开的程序窗口居中
  • 原文地址:https://www.cnblogs.com/lexus/p/2486113.html
Copyright © 2020-2023  润新知