• phantomjs配置


    # -*- coding: utf-8 -*-
    from lxml import html
    from time import sleep
    import requests
    from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.wait import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from selenium import webdriver
    
    
    url = 'http://www.tianyancha.com/company/638562997'
    '''
    for i in range(0, 100000):
        data = requests.get(url)
        dataHtml = html.fromstring(data.content)
        print dataHtml.xpath('//title/text()')[0], len(data.content),data.status_code                   
    '''
    
    dcap = dict(DesiredCapabilities.PHANTOMJS)
    dcap = {
        "phantomjs.page.settings.userAgent": "Mozilla/5.0 (Windows NT     10.0; WOW64) AppleWebKit/537.36 " 
                                            "(KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36",
        "phantomjs.page.settings.loadImages": False,
        "phantomjs.page.settings.resourceTimeout": 5000
    }
    proxy = [
                '--proxy=120.27.142.209:82',
                '--proxy-type=http',
                '--ignore-ssl-errors=true',
                '--ssl-protocol=tlsv1'
            ]
    for i in range(0, 50):
        driver = webdriver.PhantomJS(desired_capabilities=dcap,service_args=proxy)
        #driver = webdriver.Chrome()
        driver.get(url)
        #wait = WebDriverWait(driver, 10)
        #a = wait.until(EC.presence_of_element_located(By.CSS_SELECTOR,         'div.datatable'))
        sleep(5)
        open('logs/2.html', 'w').write(driver.page_source.encode('utf8'))
        print i, driver.title
  • 相关阅读:
    Lucene教程
    ElasticSearch安装
    MySQL事务
    Java泛型
    Python学习笔记(1)
    @keyframs实现图片gif效果
    glup简单应用---gulpfile.js
    巧用CSS3伪类选择器自定义checkbox和radio的样式
    get传中文参数乱码解决方法
    自定义样式 实现文件控件input[type='file']
  • 原文地址:https://www.cnblogs.com/gao-xiang/p/6956170.html
Copyright © 2020-2023  润新知