• 模拟登陆


    1. github

    import requests
    from lxml import etree
    
    
    class Login(object):
        def __init__(self):
            self.headers = {
                'Origin': 'https://github.com',
                'Referer': 'https: // github.com /',
                'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
                'Host': 'github.com'
            }
            # 获取登录页面与token
            self.login_url = 'https://github.com/login'
            # 登录页面
            self.login_post_url = 'https://github.com/session'
            self.session=requests.session()
    
        # 获取token
        def get_token(self):
            headers = {
                'Host': 'github.com',
                'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36',
    
            }
            res_text = self.session.get(url=self.login_url, headers=headers).text
            tree = etree.HTML(res_text)
            token = tree.xpath('//div//input[2]/@value')[0]
            return token
    
        # 模拟登录
        def login(self,username, password):
            token=self.get_token()
            print(token)
            post_data = {
                "login": username,
                "password":password,
                "commit": "Sign in",
                "utf8": "",
                "authenticity_token": token
             }
            res = self.session.post(url=self.login_post_url, data=post_data)
            print(res.status_code)
            if res.status_code == 200:
                self.get_email_page()
                with open("github.html", "wb") as f:
                    f.write(res.content)
        #获取个人页
        def get_email_page(self):
            print('获取个人页')
            email_data = self.session.get('https://github.com/tjp40922').text
            with open('xxxxxx.html', 'w', encoding='utf8') as f:
                f.write(email_data)
    
    
    
    if __name__ == '__main__':
        login = Login()
        username = input('请输入用户名')
        password = input('请输入密码')
        login.login(username, password)

      注意点:

        1.要获取token,而且要一一对应

        2.headers,两次请求的请求头是不一样的,否则会报错,422状态码

    2.scrapy模拟登陆人人

    # -*- coding: utf-8 -*-
    import scrapy
    from scrapy.http import Request,FormRequest
    
    class LoginrrSpider(scrapy.Spider):
        name = 'loginrr'
        allowed_domains = ['renren.com']
        start_urls = ['http://renren.com/']
        headers = {
            'User-Agent':
                'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
        }
    
        # 这里我们先编写start_requests方法(会首先执行)
        def start_requests(self):
            # 首先访问一遍登录页面(在这里我们可以监测有无验证码,若有则需要添加验证码字段,若无则不需要)
            # 然后回调parse方法
            # 设置meta参数,将cookiejar设置为1,表示cookjar开启
            return [Request("http://www.renren.com/PLogin.do", meta={"cookiejar": 1}, callback=self.parse)]
    
        def parse(self, response):
            # 需要发送的表单,这次没有验证码
            data = {
                # 这里输入你们自己的账号密码
                'email': 'xxxxxx',
                'password': 'xxxxxxxx'
            }
            print("正在登陆...")
            # 通过 FormRequest.from_response方法来进行登录
            return [FormRequest.from_response(response,
                                              # 设置cookie信息
                                              meta={"cookiejar": response.meta["cookiejar"]},
                                              headers=self.headers,
                                              formdata=data,
                                              # 回调到next方法
                                              callback=self.next,
                                              )]
    
        def next(self, response):
            # 打印登录成功后的界面源代码
            print(response.body)

     3.scrapy模拟登录豆瓣

    import scrapy
    from faker import Factory
    

    #伪造ua的 f
    = Factory.create() class MailSpider(scrapy.Spider): name = 'douban-mail' allowed_domains = ['accounts.douban.com', 'douban.com'] start_urls = [ 'https://www.douban.com/' ] headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', 'Connection': 'keep-alive', 'Host': 'accounts.douban.com', 'User-Agent': f.user_agent() } formdata = { 'form_email': '您的账号', 'form_password': '您的密码', # 'captcha-solution': '', # 'captcha-id': '', 'login': '登录', 'redir': 'https://www.douban.com/', 'source': 'None' } def start_requests(self): return [scrapy.Request(url='https://www.douban.com/accounts/login', headers=self.headers, meta={'cookiejar': 1}, callback=self.parse_login)] def parse_login(self, response): # 如果有验证码要人为处理 if 'captcha_image' in response.body: print 'Copy the link:' link = response.xpath('//img[@class="captcha_image"]/@src').extract()[0] print link captcha_solution = raw_input('captcha-solution:') captcha_id = urlparse.parse_qs(urlparse.urlparse(link).query, True)['id'] self.formdata['captcha-solution'] = captcha_solution self.formdata['captcha-id'] = captcha_id return [scrapy.FormRequest.from_response(response, formdata=self.formdata, headers=self.headers, meta={'cookiejar': response.meta['cookiejar']}, callback=self.after_login )] def after_login(self, response): print response.status self.headers['Host'] = "www.douban.com" return scrapy.Request(url='https://www.douban.com/doumail/', meta={'cookiejar': response.meta['cookiejar']}, headers=self.headers, callback=self.parse_mail) def parse_mail(self, response): print response.status for item in response.xpath('//div[@class="doumail-list"]/ul/li'): mail = DoubanMailItem() mail['sender_time'] = item.xpath('div[2]/div/span[1]/text()').extract()[0] mail['sender_from'] = item.xpath('div[2]/div/span[2]/text()').extract()[0] mail['url'] = item.xpath('div[2]/p/a/@href').extract()[0] mail['title'] = item.xpath('div[2]/p/a/text()').extract()[0] print mail yield mail
  • 相关阅读:
    springboot: 使web项目支持jsp
    springboot: 集成freemark模板引擎
    Springboot的优点和实现
    Spring的两种动态代理:Jdk和Cglib 的区别和实现
    JAVA单例模式:懒汉式,饿汉式
    java的内存管理 对象的分配与释放
    JAVA反射调用方法
    JAVA内置注解 基本注解
    图——图的定义与操作
    树——二叉树的线索化
  • 原文地址:https://www.cnblogs.com/tjp40922/p/10621522.html
Copyright © 2020-2023  润新知