selenium案例
#coding=utf-8 #select下拉框处理 from selenium import webdriver from selenium.webdriver.common.action_chains import ActionChains import time #导入select方法 from selenium.webdriver.support.select import Select driver=webdriver.Chrome() driver.get("https://www.baidu.com/") #隐式等待10秒 driver.implicitly_wait(10) #鼠标移动到"设置"按钮 mouse=driver.find_element_by_link_text("设置") ActionChains(driver).move_to_element(mouse).perform() #点击"搜索设置" driver.find_element_by_link_text("搜索设置").click() #强制等待4秒,注意:这里使用隐式等待或显式等待都将无法获取元素 time.sleep(4) #分两步,先定位下拉框,再点击选项 choice = driver.find_element_by_name("NR") Select(choice).select_by_index(2) time.sleep(2) driver.find_element_by_xpath("//div[@id='gxszButton']/a[1]").click() time.sleep(2) driver.switch_to.alert.accept() #跳转到百度首页后,进行搜索表 driver.find_element_by_id('kw').send_keys("python") driver.find_element_by_id('su').click()
cookie登陆(百度)
#coding=utf8 from selenium import webdriver import time import pprint base_url = "https://www.baidu.com" driver = webdriver.Chrome() driver.implicitly_wait(10) driver.get(base_url) #打印所有cookie pprint.pprint(driver.get_cookies())
#coding=utf-8 from selenium import webdriver import time import pprint base_url = "https://www.baidu.com" usr_name = "正牌冰峰汽水" usr_pwd = "yanyan8174" driver = webdriver.Chrome() driver.implicitly_wait(10) #清除所有cookie driver.delete_all_cookies() driver.get(base_url) pprint.pprint(driver.get_cookies()) driver.find_element_by_link_text("登录").click() time.sleep(2) driver.find_element_by_id("TANGRAM__PSP_10__footerULoginBtn").click() time.sleep(2) driver.find_element_by_css_selector("#TANGRAM__PSP_10__userName").send_keys(usr_name) driver.find_element_by_css_selector("#TANGRAM__PSP_10__password").send_keys(usr_pwd) time.sleep(10) #在这个等待的时间里去手动输入验证码 driver.find_element_by_css_selector("#TANGRAM__PSP_10__submit").click() time.sleep(3) #打印登录后的cookie pprint.pprint(driver.get_cookies())
#coding=utf-8 from selenium import webdriver import time base_url = "https://www.baidu.com" driver = webdriver.Chrome() driver.implicitly_wait(10) #清除所有cookie driver.delete_all_cookies() driver.get(base_url) cookie_1 = {'httpOnly': True, 'secure': False, 'value': '1', 'name': 'HOSUPPORT', 'domain': '.passport.baidu.com', 'path': '/', 'expiry': 1768236049.395134 } cookie_2 = {'httpOnly': True, 'secure': False, 'value': 'fi_PncwhpxZ%7ETaKAcaFAwWer%7EzluYq4tLyhh8G8D-51Jh32rZKfPIAaPUksyRGhrJ-ndBYw3t-vNiNSFW6D', 'name': 'UBI', 'domain': '.passport.baidu.com', 'path': '/', 'expiry': 1768236050.02163} cookie_3 = {'httpOnly': False, 'secure': False, 'value': 'f39184d315d7eacfb7b1f37fc37f5e72', 'name': 'FP_UID', 'domain': '.baidu.com', 'path': '/', 'expiry': 2556057600} #测试后发现就添加这个cookie即可 cookie_4 = {} #添加cookie driver.add_cookie(cookie_4) #driver.add_cookie(cookie_2) #driver.add_cookie(cookie_3) time.sleep(2) driver.refresh()
cookie登陆+手动输入验证码
#coding=utf-8 from selenium import webdriver import time import pprint base_url = "http://www.xx007.cn/login.asp" usr_name = "benq81" usr_pwd = "jenny8174" driver = webdriver.Chrome() driver.implicitly_wait(10) #清除所有cookie driver.delete_all_cookies() driver.get(base_url) pprint.pprint(driver.get_cookies()) driver.refresh() driver.find_element_by_name("username").send_keys(usr_name) driver.find_element_by_name("password").send_keys(usr_pwd) time.sleep(10) #在这个等待的时间里去手动输入验证码 driver.find_element_by_css_selector('body > table:nth-child(12) > tbody > tr:nth-child(12) > td > input[type="submit"]').click() time.sleep(2) #打印登录后的cookie pprint.pprint(driver.get_cookies())
#coding=utf-8 from selenium import webdriver import time import pprint base_url = "http://www.xx007.cn/" driver = webdriver.Chrome() driver.implicitly_wait(10) driver.get(base_url) pprint.pprint(driver.get_cookies()) cookie01=driver.get_cookie('DvForum') cookie01_value=cookie01['value'].split("=")[-1] print(cookie01_value) cookie01={'domain': 'www.xx007.cn', 'expiry': 1511971.2069744722, 'httpOnly': False, 'name': 'DvForum', 'path': '/', 'secure': False, 'value': 'userid=555232&usercookies=2&userhidden=2&password=CgWJBnM9970wE057&userclass=%B4%BF%C2%F2%BC%D2%BB%E1%D4%B1&username=benq81&StatUserID=368139236' } cookie02={'domain': 'www.xx007.cn', 'httpOnly': False, 'name': 'upNum', 'path': '/', 'secure': False, 'value': '0'} cookie03={'domain': 'www.xx007.cn', 'httpOnly': False, 'name': 'ASPSESSIONIDSATQDDSQ', 'path': '/', 'secure': False, 'value': 'IHDFJPPALBLMOJLCHCHHBFKD'} driver.delete_all_cookies() time.sleep(2) driver.add_cookie(cookie01) driver.add_cookie(cookie02) driver.add_cookie(cookie03) #打印登录后的cookie time.sleep(4) driver.refresh()
使用unittest框架编写测试用例
# coding=utf-8 ''' Project:基础类BasePage,封装所有页面公用的方法, 定义open函数,重定义find_element,switch_frame,send_keys等函数。 在初始化方法中定义驱动driver,基本url,title WebDriverWait提供了显式等待方式。 ''' from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions as EC class BasePage(object): """ BasePage封装所有页面都公用的方法,例如driver, url ,FindElement等 """ #初始化driver、url、pagetitle等 #实例化BasePage类时,最先执行的就是__init__方法,该方法的入参,其实就是BasePage类的入参。 def __init__(self, selenium_driver, base_url, pagetitle): self.driver = selenium_driver self.base_url = base_url self.pagetitle = pagetitle #通过title断言进入的页面是否正确。 #使用title获取当前窗口title,检查输入的title是否在当前title中,返回比较结果(True 或 False) def on_page(self, pagetitle): return pagetitle in self.driver.title #打开页面,并校验页面链接是否加载正确 #以单下划线_开头的方法,在使用import *时,该方法不会被导入,保证该方法为类私有的。 def _open(self, url, pagetitle): #使用get打开访问链接地址 self.driver.get(url) self.driver.maximize_window() #使用assert进行校验,打开的窗口title是否与配置的title一致。调用on_page()方法 assert self.on_page(pagetitle), "打开开页面失败 %s"%url #定义open方法,调用_open()进行打开链接 def open(self): self._open(self.base_url, self.pagetitle) #重写元素定位方法 def find_element(self,*loc): try: #确保元素是可见的。 #注意:以下入参为元组的元素,需要加*。Python存在这种特性,就是将入参放在元组里。 #WebDriverWait(self.driver,10).until(lambda driver: driver.find_element(*loc).is_displayed()) #注意:以下入参本身是元组,不需要加* WebDriverWait(self.driver,10).until(EC.visibility_of_element_located(loc)) return self.driver.find_element(*loc) except: print("%s 页面中未能找到 %s 元素"%(self, loc)) #重写switch_frame方法 def switch_frame(self, loc): return self.driver.switch_to.frame(loc) #定义script方法,用于执行js脚本,范围执行结果 def script(self, src): self.driver.execute_script(src) #重写定义send_keys方法 def send_keys(self, loc, vaule, clear_first=True, click_first=True): try: print("使用send_keys") loc = getattr(self,"_%s"% loc) #getattr相当于实现self.loc if click_first: self.find_element(*loc).click() if clear_first: self.find_element(*loc).clear() self.find_element(*loc).send_keys(vaule) except AttributeError: print ("%s 页面中未能找到 %s 元素"%(self, loc))
# coding=utf-8 ''' Project:页面基本操作方法:如open,input_username,input_password,click_submit ''' from selenium.webdriver.common.by import By from test_basePage import BasePage #继承BasePage类 class LoginPage(BasePage): #定位器,通过元素属性定位元素对象 username_loc =(By.NAME,'email') password_loc =(By.NAME,'password') submit_loc =(By.ID,'dologin') span_loc =(By.CSS_SELECTOR,"div.error-tt>p") dynpw_loc =(By.ID,"lbDynPw") userid_loc =(By.ID,"spnUid") #操作 #通过继承覆盖(Overriding)方法:如果子类和父类的方法名相同,优先用子类自己的方法。 #打开网页 def open(self): #调用page中的_open打开连接 self._open(self.base_url, self.pagetitle) #输入用户名:调用send_keys对象,输入用户名 def input_username(self, username): self.find_element(*self.username_loc).send_keys(username) #输入密码:调用send_keys对象,输入密码 def input_password(self, password): self.find_element(*self.password_loc).send_keys(password) #点击登录:调用click对象,点击登录 def click_submit(self): self.find_element(*self.submit_loc).click() #用户名或密码不合理是Tip框内容展示 def show_span(self): return self.find_element(*self.span_loc).text #切换登录模式为动态密码登录(IE下有效) def swich_DynPw(self): self.find_element(*self.dynpw_loc).click() #登录成功页面中的用户ID查找 def show_userid(self): return self.find_element(*self.userid_loc).text
# coding=utf-8 ''' Project:页面基本操作方法:如open,input_username,input_password,click_submit ''' from selenium.webdriver.common.by import By from test_basePage import BasePage #继承BasePage类 class LoginPage(BasePage): #定位器,通过元素属性定位元素对象 username_loc =(By.NAME,'email') password_loc =(By.NAME,'password') submit_loc =(By.ID,'dologin') error_loc =(By.XPATH,"//div[@class='ferrorhead']") userid_loc=(By.ID,"spnUid") frame_loc="x-URS-iframe" #操作 #通过继承覆盖(Overriding)方法:如果子类和父类的方法名相同,优先用子类自己的方法。 #输入用户名:调用send_keys对象,输入用户名 def input_username(self, username): self.find_element(*self.username_loc).send_keys(username) #输入密码:调用send_keys对象,输入密码 def input_password(self, password): self.find_element(*self.password_loc).send_keys(password) #点击登录:调用click对象,点击登录 def click_submit(self): self.find_element(*self.submit_loc).click() #切换到用户登录框的iframe中 def switch_to_frame(self): self.switch_frame(self.frame_loc) #用户名或密码不合理是Tip框内容展示 def show_error(self): try: WebDriverWait(self.driver,10).until(EC.visibility_of_element_located(self.error_loc)) return self.find_element(*self.error_loc).text except: return False #切换登录模式为动态密码登录(IE下有效) def swich_DynPw(self): self.find_element(*self.dynpw_loc).click() #登录成功后获取当前窗口的title def check_current_title(self): return self.driver.title #登录成功页面中的用户ID查找 def show_userid(self): return self.find_element(*self.userid_loc).text
# -*- coding:utf8 -*- ''' Project:使用unittest框架编写测试用例。 ''' import unittest,time from test_126_loginPage import LoginPage from selenium import webdriver class Caselogin126mail(unittest.TestCase): """ 登录126邮箱的case """ def setUp(self): self.driver = webdriver.Chrome() self.driver.implicitly_wait(30) self.url ="http://www.126.com" self.username ="zhpmiss@126.com" self.password ="zhou0829miss@" #用例执行体 def test_login_mail(self): #声明LoginPage类对象 login_page = LoginPage(self.driver, self.url, "网易") #调用打开页面组件 login_page.open() #切换到登录框Frame time.sleep(4) login_page.switch_to_frame() #调用用户名输入组件 login_page.input_username(self.username) #调用密码输入组件 login_page.input_password(self.password) #调用点击登录按钮组件 login_page.click_submit() if login_page.show_error(): print("测试帐号密码有误的情况下是否弹出提示框:") self.assertEqual(login_page.show_error(),"帐号或密码错误") else: print("测试帐号密码正确的情况下是否进入确定页面:") self.assertEqual(login_page.show_userid(),'zhpmiss@126.com') def tearDown(self): print("测试完毕") #self.driver.quit() if __name__ == "__main__": unittest.main()
完整案例:1、今日头条
from selenium import webdriver from lxml import etree from pyquery import PyQuery as pq import time driver = webdriver.Chrome() #实例化 driver.maximize_window() #窗口最大化 driver.get('https://www.toutiao.com/') driver.implicitly_wait(10) #隐性等待10s【必须有,多加几个】 driver.find_element_by_link_text('科技').click() driver.implicitly_wait(10) #隐性等待10s for i in range(3): js = "var q = document.documentElement.scrollTop="+str(i*500) driver.execute_script(js) time.sleep(2) time.sleep(5) page = driver.page_source doc = pq(page) #用pyquery实例化一下 doc = etree.HTML(str(doc)) contents = doc.xpath('//div[@class="wcommonFeed"]/ul/li') print(contents) #这是一个对象 print("--------------------------") for x in contents: title = x.xpath('div/div[1]/div/div[1]/a/text()') if title: title = title[0] # with open('toutiao.txt','a+',encoding='utf8')as f: # f.write(title+' ') print(title) else: pass
selenium抓取今日头条,滚动向下拿50页,保存到文本文件