Python3爬虫实战【点触验证码】 — 模拟登陆bilibili
爬虫思路如下:
利用自动化爬虫工具 Selenium 模拟点击输入等操作来进行登录
分析页面,获取点触验证码的点触图片,通过将图片发送给超级鹰打码平台识别后获取坐标信息
根据超级鹰返回的数据,模拟坐标的点选,即可实现登录
一.准备工作
在开始之前,需要先注册一个超级鹰账号并申请一个软件ID,注册页面链接为:https://www.chaojiying.com/user/reg/,注册完成后需要在后台添加一个软件ID,进行充值获得积分,一般充一块钱就可以了。
二.爬虫构建
1.首先我可以到官方网站下载对应的 Python API,链接为:https://www.chaojiying.com/api-14.html ,
from selenium import webdriver import random from selenium.webdriver import ActionChains from chaojiying import Chaojiying_Client USERNAME = 'b站账号' PASSWORD = 'b站密码' import time CHAOJIYING_USERNAME = '超级鹰用户名' CHAOJIYING_PASSWORD = '超级鹰密码' CHAOJIYING_SOFT_ID = 软件ID CHAOJIYING_KIND = 9005 #验证码类型 browser =webdriver.Chrome() browser.get('https://passport.bilibili.com/login') browser.maximize_window() user = browser.find_elements_by_css_selector('#login-username')[0] pwd = browser.find_elements_by_css_selector('#login-passwd')[0] user.click() time.sleep(random.random()*3) user.send_keys(USERNAME) pwd.click() time.sleep(random.random()*3) pwd.send_keys(PASSWORD) time.sleep(random.random()*3) button = browser.find_elements_by_css_selector('#geetest-wrap > div > div.btn-box > a.btn.btn-login')[0] button.click() time.sleep(random.random()*3) img_browser = browser.find_elements_by_css_selector('body > div.geetest_panel.geetest_wind > div.geetest_panel_box.geetest_no_logo.geetest_panelshowclick > div.geetest_panel_next > div > div')[0] img_browser.screenshot('yzm.png') img_path='yzm.png' chaojiying = Chaojiying_Client(CHAOJIYING_USERNAME,CHAOJIYING_PASSWORD,CHAOJIYING_SOFT_ID) im = open('yzm.png', 'rb').read() res =chaojiying.PostPic(im,9005) print(res) if res['err_no']==0: pic_str = res['pic_str'].split('|') for pic in pic_str: x,y=pic.split(',') ActionChains(browser).move_to_element_with_offset(img_browser,int(x),int(y)).click().perform() time.sleep(random.random()*3) time.sleep(1) login_btn = browser.find_elements_by_css_selector('body > div.geetest_panel.geetest_wind > div.geetest_panel_box.geetest_no_logo.geetest_panelshowclick > div.geetest_panel_next > div > div > div.geetest_panel > a > div')[0] login_btn.click() else: print('验证码识别错误') print(chaojiying,res)