• Scrapy框架: 登录网站


    一、使用cookies登录网站

    import scrapy
    
    class LoginSpider(scrapy.Spider):
        name = 'login'
        allowed_domains = ['xxx.com']
        start_urls = ['https://www.xxx.com/xx/']
    
        cookies = ""
    
        def start_requests(self):
            for url in self.start_urls:
                yield scrapy.Request(url, cookies=self.cookies, callback=self.parse)
    
        def parse(self, response):
            with open("01login.html", "wb") as f:
                f.write(response.body)
    
    

    二、发送post请求登录, 要手动解析网页获取登录参数

    import scrapy
    
    class LoginSpider(scrapy.Spider):
        name='login_code'
        allowed_domains = ['xxx.com']
        
        #1. 登录页面
        start_urls = ['https://www.xxx.com/login/']
    
        def parse(self, response):
            #2. 代码登录
            login_url='https://www.xxx.com/login'
    
            formdata={
                "username":"xxx",
                "pwd":"xxx",
                "formhash":response.xpath("//input[@id='formhash']/@value").extract_first(),
                "backurl":response.xpath("//input[@id='backurl']/@value").extract_first()
            }
    
            #3. 发送登录请求post
            yield scrapy.FormRequest(login_url, formdata=formdata, callback=self.parse_login)
        
        def parse_login(self, response):
            #4.访问目标页面
            member_url="https://www.xxx.com/member"
            yield scrapy.Request(member_url, callback=self.parse_member)
        
        def parse_member(self, response):
            with open("02login.html",'wb') as f:
                f.write(response.body)
    
    

    三、发送post请求登录, 自动解析网页获取登录参数

    import scrapy
    
    class LoginSpider(scrapy.Spider):
        name='login_code2'
        allowed_domains = ['xxx.com']
        
        #1. 登录页面
        start_urls = ['https://www.xxx.com/login/']
    
        def parse(self, response):
            #2. 代码登录
            login_url='https://www.xxx.com/login'
    
            formdata={
                "username":"xxx",
                "pwd":"xxx"
            }
    
            #3. 发送登录请求post
            yield scrapy.FormRequest.from_response(
                response,
                formxpath="//*[@id='login_pc']",
                formdata=formdata,
                method="POST", #覆盖之前的get请求
                callback=self.parse_login
            )
        
        def parse_login(self, response):
            #4.访问目标页面
            member_url="https://www.xxx.com/member"
            yield scrapy.Request(member_url, callback=self.parse_member)
        
        def parse_member(self, response):
            with open("03login.html",'wb') as f:
                f.write(response.body)
    
    
  • 相关阅读:
    CR开发笔记-1工作前的准备
    CR开发笔记-2基础飞机的搭建以及测试代码
    c++还有一个小时考试
    c# winform 打印笔记
    aspcms部署
    c#复习笔记 继承
    [转]IoC框架
    Cinder-2 窗口的创建过程
    Cinder-1 TinderBox
    admin模板
  • 原文地址:https://www.cnblogs.com/hankleo/p/11829266.html
Copyright © 2020-2023  润新知