• <爬虫>验证码登录三种方式


    一、手动输入

     1 import requests
     2 from bs4 import BeautifulSoup
     3 
     4 '''手动输入验证码:用浏览器登录古诗文网,抓包获取登录接口和form表单;
     5                    将验证码图片下载,输入验证码,加入form表单;
     6                    获取form表单所需参数;
     7                    带着form表单发送登录请求
     8                    注:需要建立会话'''
     9 
    10 headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) '
    11                          'AppleWebKit/537.36 (KHTML, like Gecko) '
    12                          'Chrome/73.0.3683.86 Safari/537.36'}
    13 
    14 def download_code(s):
    15     #向登录界面发送请求
    16     url = 'https://so.gushiwen.org/user/login.aspx?from='
    17     r = s.get(url,headers=headers)
    18 
    19     #解析获取验证码图片链接
    20     soup = BeautifulSoup(r.text,'lxml')
    21     img_src = soup.find('img',id='imgCode')['src']
    22     img_url = 'https://so.gushiwen.org' + img_src
    23     print(img_url)
    24 
    25     #向图片链接发送请求,下载图片
    26     r_img = s.get(img_url,headers=headers)
    27     with open('img/code.png','wb') as fp:
    28         fp.write(r_img.content)
    29 
    30     #查找form表单所需的两个参数
    31     __VIEWSTATE = soup.find('input',id='__VIEWSTATE')['value']
    32     __VIEWSTATEGENERATOR = soup.find('input',id='__VIEWSTATEGENERATOR')['value']
    33 
    34     return __VIEWSTATE,__VIEWSTATEGENERATOR
    35 
    36 def login(s,VIEW,VIEWG):
    37     post_url = 'https://so.gushiwen.org/user/login.aspx?from='
    38 
    39     code = input('输入验证码:')
    40 
    41     form_data = {'__VIEWSTATE':VIEW,
    42                 '__VIEWSTATEGENERATOR': VIEWG,
    43                 'code':code,
    44                 'denglu':'登录',
    45                 'email':'18404904721',
    46                 'from':'',
    47                 'pwd':'gjp625262'}
    48 
    49     r = s.post(url=post_url,headers=headers,data=form_data)
    50 
    51     with open('gushi.html','w',encoding='utf8') as fp:
    52         fp.write(r.text)
    53 
    54 def main():
    55     #创建会话
    56     s = requests.Session()
    57 
    58     #下载验证码
    59     VIEW,VIEWG = download_code(s)
    60 
    61     #进行登录
    62     login(s,VIEW,VIEWG)
    63 
    64 
    65 if __name__ == '__main__':
    66     main()

    二、tesseract光学识别

    from PIL import Image
    import pytesseract
    
    '''下载tesseract
        pip install pytesseract'''
    
    #打开图片
    img = Image.open(r'img/code.png')
    
    #转化为灰度图片
    img = img.convert('L')
    
    #二值化处理
    threshold = 140
    table = []
    for i in range(256):
        if i < threshold:
            table.append(0)
        else:
            table.append(1)
    out = img.point(table,'1')
    out.show()
    img = img.convert('RGB')
    
    #识别图片
    print(pytesseract.image_to_string(img))

    三、打码平台(云打码)

    from YDMHTTPDemo3 import YDMHttp
    '''打码平台:云打码
                 打码兔'''
    
    ######################################################################
    
    # 用户名
    username    = 'mianxiang_mei'
    
    # 密码
    password    = 'gjp625262'
    
    # 软件ID,开发者分成必要参数。登录开发者后台【我的软件】获得!
    appid       = 8212
    
    # 软件密钥,开发者分成必要参数。登录开发者后台【我的软件】获得!
    appkey      = 'dbd2645a635701a0a9f19fd0072d82c3'
    
    # 图片文件
    filename    = 'img/code.png'
    
    # 验证码类型,# 例:1004表示4位字母数字,不同类型收费不同。请准确填写,否则影响识别率。在此查询所有类型 http://www.yundama.com/price.html
    codetype    = 1004
    
    # 超时时间,秒
    timeout     = 60
    
    # 检查
    if (username == 'username'):
        print('请设置好相关参数再测试')
    else:
        # 初始化
        yundama = YDMHttp(username, password, appid, appkey)
    
        # 登陆云打码
        uid = yundama.login();
        print('uid: %s' % uid)
    
        # 查询余额
        balance = yundama.balance();
        print('balance: %s' % balance)
    
        # 开始识别,图片路径,验证码类型ID,超时时间(秒),识别结果
        cid, result = yundama.decode(filename, codetype, timeout);
        print('cid: %s, result: %s' % (cid, result))
    
    ######################################################################

    附:云打码调用的类

    import http.client, mimetypes, urllib, json, time, requests
    
    ######################################################################
    
    class YDMHttp:
    
        apiurl = 'http://api.yundama.com/api.php'
        username = ''
        password = ''
        appid = ''
        appkey = ''
    
        def __init__(self, username, password, appid, appkey):
            self.username = username  
            self.password = password
            self.appid = str(appid)
            self.appkey = appkey
    
        def request(self, fields, files=[]):
            response = self.post_url(self.apiurl, fields, files)
            response = json.loads(response)
            return response
        
        def balance(self):
            data = {'method': 'balance', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey}
            response = self.request(data)
            if (response):
                if (response['ret'] and response['ret'] < 0):
                    return response['ret']
                else:
                    return response['balance']
            else:
                return -9001
        
        def login(self):
            data = {'method': 'login', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey}
            response = self.request(data)
            if (response):
                if (response['ret'] and response['ret'] < 0):
                    return response['ret']
                else:
                    return response['uid']
            else:
                return -9001
    
        def upload(self, filename, codetype, timeout):
            data = {'method': 'upload', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey, 'codetype': str(codetype), 'timeout': str(timeout)}
            file = {'file': filename}
            response = self.request(data, file)
            if (response):
                if (response['ret'] and response['ret'] < 0):
                    return response['ret']
                else:
                    return response['cid']
            else:
                return -9001
    
        def result(self, cid):
            data = {'method': 'result', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey, 'cid': str(cid)}
            response = self.request(data)
            return response and response['text'] or ''
    
        def decode(self, filename, codetype, timeout):
            cid = self.upload(filename, codetype, timeout)
            if (cid > 0):
                for i in range(0, timeout):
                    result = self.result(cid)
                    if (result != ''):
                        return cid, result
                    else:
                        time.sleep(1)
                return -3003, ''
            else:
                return cid, ''
    
        def report(self, cid):
            data = {'method': 'report', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey, 'cid': str(cid), 'flag': '0'}
            response = self.request(data)
            if (response):
                return response['ret']
            else:
                return -9001
    
        def post_url(self, url, fields, files=[]):
            for key in files:
                files[key] = open(files[key], 'rb');
            res = requests.post(url, files=files, data=fields)
            return res.text
  • 相关阅读:
    Collection 和 Collections的区别?
    事务是什么?有哪些属性,并简要说明这些属性的含义。
    jsp有哪些内置对象?作用分别是什么?(至少三个)
    谈谈对XML的理解?说明Web应用中Web.xml文件的作用?
    类有哪三个基本特性?各特性的优点?
    Oracle安装完成后,如何用命令行启动和关闭数据库?
    请说明SQLServer中delete from tablea & truncate table tablea的区别
    试述数据库完整保护的主要任务和措施。
    存储过程和函数的区别
    性能
  • 原文地址:https://www.cnblogs.com/Finance-IT-gao/p/11146507.html
Copyright © 2020-2023  润新知