• Scrapy框架: 登录网站


    一、使用cookies登录网站

    import scrapy
    
    class LoginSpider(scrapy.Spider):
        name = 'login'
        allowed_domains = ['xxx.com']
        start_urls = ['https://www.xxx.com/xx/']
    
        cookies = ""
    
        def start_requests(self):
            for url in self.start_urls:
                yield scrapy.Request(url, cookies=self.cookies, callback=self.parse)
    
        def parse(self, response):
            with open("01login.html", "wb") as f:
                f.write(response.body)
    
    

    二、发送post请求登录, 要手动解析网页获取登录参数

    import scrapy
    
    class LoginSpider(scrapy.Spider):
        name='login_code'
        allowed_domains = ['xxx.com']
        
        #1. 登录页面
        start_urls = ['https://www.xxx.com/login/']
    
        def parse(self, response):
            #2. 代码登录
            login_url='https://www.xxx.com/login'
    
            formdata={
                "username":"xxx",
                "pwd":"xxx",
                "formhash":response.xpath("//input[@id='formhash']/@value").extract_first(),
                "backurl":response.xpath("//input[@id='backurl']/@value").extract_first()
            }
    
            #3. 发送登录请求post
            yield scrapy.FormRequest(login_url, formdata=formdata, callback=self.parse_login)
        
        def parse_login(self, response):
            #4.访问目标页面
            member_url="https://www.xxx.com/member"
            yield scrapy.Request(member_url, callback=self.parse_member)
        
        def parse_member(self, response):
            with open("02login.html",'wb') as f:
                f.write(response.body)
    
    

    三、发送post请求登录, 自动解析网页获取登录参数

    import scrapy
    
    class LoginSpider(scrapy.Spider):
        name='login_code2'
        allowed_domains = ['xxx.com']
        
        #1. 登录页面
        start_urls = ['https://www.xxx.com/login/']
    
        def parse(self, response):
            #2. 代码登录
            login_url='https://www.xxx.com/login'
    
            formdata={
                "username":"xxx",
                "pwd":"xxx"
            }
    
            #3. 发送登录请求post
            yield scrapy.FormRequest.from_response(
                response,
                formxpath="//*[@id='login_pc']",
                formdata=formdata,
                method="POST", #覆盖之前的get请求
                callback=self.parse_login
            )
        
        def parse_login(self, response):
            #4.访问目标页面
            member_url="https://www.xxx.com/member"
            yield scrapy.Request(member_url, callback=self.parse_member)
        
        def parse_member(self, response):
            with open("03login.html",'wb') as f:
                f.write(response.body)
    
    
  • 相关阅读:
    菱形继承问题
    类的组合
    类的派生
    EasyUI的columns中列标题居中
    C#的一般处理程序中Cookie的写入、读取、清除
    JS中设置input的type="radio"默认选中
    SQL Server 分页语句查询
    CSS中设置字体样式
    C#清空StringBuilder的三种方法
    EasyUI在子tab基础上再打开新的tab标签页
  • 原文地址:https://www.cnblogs.com/hankleo/p/11829266.html
Copyright © 2020-2023  润新知