1.滚动滚动条(有的时候页面元素element取得对但是并没有回显正确的数据,可能是因为页面第一次加载很慢,所以页面可能做了滚动到哪里就加载到哪里的效果,此刻我们就需要用到滚动条自动滚动这段代码让页面滚动条自动滚动加载去模拟人工的滚动滚动条操作。
1 #滚动滚动条` 2 def MoveScorllBar(driver,down,range1,range2): 3 moneCount=random.randint(range1, range2) 4 for num in range(1,moneCount): 5 if(down=="down"): 6 driver.find_element_by_xpath("//body").send_keys(Keys.DOWN) 7 sleep(random.uniform(0.1,0.5)) 8 else: 9 driver.find_element_by_xpath("//body").send_keys(Keys.UP) 10 sleep(random.uniform(0.1,0.5)) 11 return
#调用时候可用的语句如下:
MoveScorllBar(driver, "down", 10, 30) #当前页面向下点浏览器向下浏览按钮10-30下
MoveScorllBar(driver, "up", 10, 30)#当前页面向上点浏览器向上浏览按钮10-30下
2.弹出新窗口后需要做以下代码处理:
1 #弹出新的浏览器窗口后,将之后的操作指向新的浏览器窗口 2 def SwitchWindow(): 3 windows = driver.window_handles 4 driver.switch_to_window(windows[1])
2.读取文件csv
#读取csv文件 def Readcsvfile(filePath): csvFile = open(filePath, "r") reader = csv.reader(csvFile) # 返回的是迭代类型 return reader
3.写入文件csv
1 #将读取的内容写入一个新的csv文档 2 def Savecsvfile(filePath): 3 csvFile2 = open(filePath, 'w', newline='') # 设置newline,否则两行之间会空一行 4 writer = csv.writer(csvFile2) 5 writer.writerow(['UserName', 'Pwd','Result']) 6 writer.writerows(outputString) 7 #csvfile.close()
4.模拟浏览器访问某网站(反爬虫)——打印访问网页的源代码
1 # coding=utf-8 2 import csv 3 import random 4 import io 5 from selenium import webdriver 6 from time import ctime,sleep 7 import sys 8 from urllib import request,parse,error 9 import http.cookiejar 10 import urllib 11 import re 12 import lxml 13 from bs4 import BeautifulSoup 14 15 subUrl = "http://10.10.1.9/super/login/index.php" 16 17 def GetWebPageSource(url): 18 values = {} 19 data = parse.urlencode(values).encode('utf-8') 20 21 # header 22 user_agent = "" 23 headers = {'User-Agent':user_agent,'Connection':'keep-alive'} 24 25 # 声明cookie 声明opener 26 cookie_filename = 'cookie.txt' 27 cookie = http.cookiejar.MozillaCookieJar(cookie_filename) 28 handler = urllib.request.HTTPCookieProcessor(cookie) 29 opener = urllib.request.build_opener(handler) 30 31 # 声明request 32 request=urllib.request.Request(url, data, headers) 33 #得到响应 34 response = opener.open(request) 35 html=response.read().decode('utf-8') 36 #保存cookie 37 cookie.save(ignore_discard=True,ignore_expires=True) 38 39 return html 40 41 if __name__ == "__main__": 42 html = GetWebPageSource(subUrl) 43 print(html)