注意事项:
- 1.模拟登陆的请求参数有些是动态的,需要从页面源码中动态解析获取
- 2.下载图片最好是用requests来下载,而不是urllib,他是会带上ua信息
- 3.使用Session去自动处理cookie请求
import requests from lxml import etree from urllib import request import http.client, mimetypes, urllib, json, time, requests from get_img_code import YDMHttp def get_code(types,filename): # 用户名(普通用户的用户名) username = 'xxxxxxxx' # 密码 password = 'xxxxxxxx' # 软件ID,开发者分成必要参数。登录开发者后台【我的软件】获得! appid = xxxxx # 软件密钥,开发者分成必要参数。登录开发者后台【我的软件】获得! appkey = 'xxxxxxxxxxxxxxxxxxxxxxxxxx' # 图片文件 filename = filename # 验证码类型,# 例:1004表示4位字母数字,不同类型收费不同。请准确填写,否则影响识别率。在此查询所有类型 http://www.yundama.com/price.html codetype = types # 超时时间,秒 timeout = 60 # 检查 if (username == 'username'): print('请设置好相关参数再测试') else: # 初始化 yundama = YDMHttp(username, password, appid, appkey) # 登陆云打码 uid = yundama.login(); print('uid: %s' % uid) # 查询余额 balance = yundama.balance(); print('balance: %s' % balance) # 开始识别,图片路径,验证码类型ID,超时时间(秒),识别结果 cid, result = yundama.decode(filename, codetype, timeout); print('cid: %s, result: %s' % (cid, result)) return result ######################################### # 将requests请求替换成session请求,可以自动处理cookie requests = requests.Session() home_url="https://so.gushiwen.org/user/login.aspx" login_url="https://so.gushiwen.org/user/login.aspx" headers = {"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"} # 1. 向页面首页发送请求,下载验证码图片到本地 page_text = requests.get(url=home_url,headers=headers).text tree=etree.HTML(page_text) view_state = tree.xpath('//input[@id="__VIEWSTATE"]/@value')[0] view_state_generator = tree.xpath('//input[@id="__VIEWSTATEGENERATOR"]/@value')[0] img_code_url = "https://so.gushiwen.org"+tree.xpath('//img[@id="imgCode"]/@src')[0] page_content = requests.get(url=img_code_url,headers=headers).content with open("./code.png","wb")as f: f.write(page_content) # 2. 识别验证码图片 code = get_code(1004,"./code.png") data={ "__VIEWSTATE": view_state, "__VIEWSTATEGENERATOR":view_state_generator, "from":"", "email": "xxxxxxxxxx@qq.com", "pwd": "xxxxxxxx", "code": code, "denglu": "登录" } print(data) # 3. 实现模拟登录 page_text = requests.post(url = login_url,headers=headers,data=data).text with open("./gushiwen1.html","w",encoding="utf-8")as f: f.write(page_text)