• selenium2使用记录


    安装 pip install selenium 

    web

    phantomjs下载 :http://phantomjs.org/download.html

    浏览器驱动下载:http://www.seleniumhq.com/download

    chrome: http://chromedriver.storage.googleapis.com/index.html?path=2.22/

    #!/usr/bin/env python
    # encoding: utf-8
    from selenium import webdriver
    
    driver = webdriver.Chrome()
    url = 'http://www.toutiao.com/news_fashion/'
    
    driver.get(url)
    
    print driver.title
    

    爬取今日头条实例,使用刷新方法,来改变文章内容,暂时还不会控制鼠标滑动来实现

    #!/usr/bin/env python
    # encoding: utf-8
    import time
    from selenium import webdriver
    import itertools
    
    driver = webdriver.Chrome()
    url = 'http://www.toutiao.com/news_fashion/'
    driver.get(url)
    print driver.get(url)
    
    for x in range(2):
        driver.refresh()
        titles = driver.find_elements_by_class_name("title-box")
        contents = driver.find_elements_by_class_name("abstract")
        imgs = driver.find_element_by_css_selector(".feedimg")
        for title, content, img in zip(titles, contents, itertools.repeat(imgs)):
            data = {
                'title': title.text,
                'content': content.text,
                'img': img.get_attribute('src')
            }
            print data
        time.sleep(10)
    
    driver.close()
    

    自动登陆的例子:

    # coding:utf-8
    
    import requests
    from bs4 import BeautifulSoup
    from selenium import webdriver
    import time
    #有验证码
    driver = webdriver.Chrome()
    url = 'http://mp.sohu.com/'
    driver.get(url)
    
    driver.find_element_by_id("userid").clear()
    driver.find_element_by_id('userid').send_keys("username")
    driver.find_element_by_id("pwd").clear()
    driver.find_element_by_id("pwd").send_keys('password')
    driver.find_element_by_id("loginbutton").click()
    
    time.sleep(2)
    driver.close()
    

     scrapy+selenium+phantomjs

    class judge(Spider):
        name = "judge"
        start_urls = ["http://wenshu.court.gov.cn/List/List?sorttype=1&conditions=searchWord+2+AJLX++%E6%A1%88%E4%BB%B6%E7%B1%BB%E5%9E%8B:%E6%B0%91%E4%BA%8B%E6%A1%88%E4%BB%B6"]
    
        def init_driver(self):
            driver = webdriver.Chrome()
            return driver 
    
        def parse(self,response):
            driver = self.init_driver()
            driver.get(self.start_urls[0])
            sel = Selector(text=driver.page_source)
            self.logger.info(u'---------------Parsing----------------')
            print sel.xpath("//div[@class='dataItem'][1]/table/tbody/tr[1]/td/div[@class='wstitle']/a/text()").extract()
            self.logger.info(u'---------------success----------------')
    

      

     

      

     

  • 相关阅读:
    【SSH网上商城项目实战25】使用java email给用户发送邮件
    14个Xcode中常用的快捷键操作
    图文解释XCode常用快捷键的使用
    **iOS开发系列--IOS程序开发概览
    IOS:类方法(静态方法)和实例方法
    IOS:利用dispatch_once创建单例
    ios 沙盒 NSCoding(相当于JAVA对象序列化) 归档 数据存储
    NSString+URLEncoding.h --使用Obj-C对数据等进行URLEncoding编码
    iOS开发网络篇—NSURLConnection基本使用
    Block、委托、回调函数原理剖析(在Object C语境)——这样讲还不懂,根本不可能!
  • 原文地址:https://www.cnblogs.com/whoami101/p/5671426.html
Copyright © 2020-2023  润新知