• selenium+PhantomJS小案例—爬豆瓣网所有电影代码python


    #coding=utf-8
    from selenium import webdriver

    def crawMovie():
    driver=webdriver.PhantomJS()
    driver.get("https://movie.douban.com/")
    movie_list=[]
    more_btn=driver.find_element_by_xpath('(//a[@class="more-link"])[1]')
    more_btn.click()

    while True:
    start_index=len(movie_list)
    xpath_str='//a[@class="item"][position()>%d]'%start_index
    item_tags=driver.find_elements_by_xpath(xpath_str)
    print "start_index:",start_index
    print item_tags
    print "number:",len(item_tags)
    for item_tag in item_tags:
    img_tag=item_tag.find_element_by_tag_name('img')
    cover=img_tag.get_attribute("src")
    title=img_tag.get_attribute("alt")
    rating=item_tag.find_element_by_xpath(".//p/strong").text

    movie="cover:%s,title:%s,rating:%s"%(cover,title,rating)
    #print "movie:",type(movie),movie

    print u"电影名:"+title
    movie_list.append(movie.encode("gbk")+" ")
    print "--"*20
    load_more_btn=driver.find_element_by_xpath('//a[@class="more"]')
    if load_more_btn.get_attribute("style"):
    break
    load_more_btn.click()

    with open("e:\movie_list.txt","w") as fp:
    fp.writelines(movie_list)

    if __name__=="__main__":
    crawMovie()
  • 相关阅读:
    三种等待时间的区别
    多种测试的测试方法
    测试面试题总结
    自动化过程中定位不到元素时使用等待方法
    账号登录测试,多表查询
    TP商城添加购物车自动化测试
    二十四个球
    老鼠喝药
    购物车测试点
    前后端分页
  • 原文地址:https://www.cnblogs.com/reyinever/p/9250467.html
Copyright © 2020-2023  润新知