这里介绍一款强大验证码识别平台:超级鹰
- 超级鹰:http://www.chaojiying.com/about.html
- 注册:普通用户
- 登录:普通用户
- 题分查询:充值
- 创建一个软件(id)
- 下载示例代码
1. 打开url
一打开是扫码登陆的页面,这显然不是我们想要的,因此我们应该先打开账号登陆的界面。
2. 打开账号登陆界面
3.利用超级鹰对当前验证码进行识别
4.思路:模拟登录的思路是很简单的,难点在于验证码的识别与正确选项的定位,如何让浏览器自行选择正确的验证码是我们需要解决的最大问题。
from selenium import webdriver import time from PIL import Image from selenium.webdriver import ActionChains import requests from hashlib import md5 from selenium.webdriver import Chrome from selenium.webdriver import ChromeOptions class Chaojiying_Client(object): """超级鹰源代码""" def __init__(self, username, password, soft_id): self.username = username password = password.encode('utf8') self.password = md5(password).hexdigest() self.soft_id = soft_id self.base_params = { 'user': self.username, 'pass2': self.password, 'softid': self.soft_id, } self.headers = { 'Connection': 'Keep-Alive', 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)', } def PostPic(self, im, codetype): """ im: 图片字节 codetype: 题目类型 参考 http://www.chaojiying.com/price.html """ params = { 'codetype': codetype, } params.update(self.base_params) files = {'userfile': ('ccc.jpg', im)} r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers) return r.json() def ReportError(self, im_id): """ im_id:报错题目的图片ID """ params = { 'id': im_id, } params.update(self.base_params) r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers) return r.json() # 检测规避 option = ChromeOptions() option.add_experimental_option('excludeSwitches', ['enable-automation']) bro = webdriver.Chrome(executable_path='./chromedriver.exe',options=option) # 最大化屏幕 bro.maximize_window() # 发送请求 bro.get('https://kyfw.12306.cn/otn/resources/login.html') # 找到账号的登陆的标签 btn = bro.find_elements_by_xpath('/html/body/div[2]/div[2]/ul/li[2]/a')[0] btn.click() time.sleep(2) # 将当前页面截屏生成图片 bro.save_screenshot('12306.png') # 定位到要截取的图片 pic = bro.find_element_by_xpath('/html/body/div[2]/div[2]/div[1]/div[2]/div[3]/div/div[4]/img') # 得到当前图片的左上角坐标 location = pic.location # 得到图片的长和宽 size = pic.size # 得到图片左上角和右下角的坐标 截图准确度与电脑缩放布局有关 当前为125% rangle = (location['x'] * 1.25, location['y'] * 1.25, (location['x'] + size['width']) * 1.25, (location['y'] + size['height']) * 1.25) # 保存生成的验证码图片 i = Image.open('./12306.png') code_img_name = 'code.png' # 裁剪文件的文件名称 frame = i.crop(rangle) # 根据指定区域进行裁剪 frame.save(code_img_name) # 利用超级鹰识别验证码 chaojiying = Chaojiying_Client('用户名', '密码', ' 905993') im = open('./code.png', 'rb').read() result = chaojiying.PostPic(im, 9004)['pic_str'] all_list = [] if '|' in result: # 多个结果 print(result) ret_list = result.split('|') # 148,102|236,203 for ret in ret_list: # 将一组坐标放入列表 x_y = [] x = ret.split(',')[0] y = ret.split(',')[0] x_y.append(x) x_y.append(y) all_list.append(x_y) else: x_y = [] x = result.split(',')[0] y = result.split(',')[0] x_y.append(x) x_y.append(y) all_list.append(x_y) for ret in all_list: x = int(ret[0]) y = int(ret[1]) ActionChains(bro).move_to_element_with_offset(pic, x, y).click().perform() # 以图片对象为参考系 将鼠标移动到相对图片x,y处 点击图片 time.sleep(0.5) bro.find_element_by_id('J-userName').send_keys('xxxx') bro.find_element_by_id('J-password').send_keys('ssss') bro.find_element_by_id('J-login').click() time.sleep(3) bro.quit()