• python 爬虫登录保存会话去获取只有登录能获取的数据


    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    # import ConfigParser
    
    import datetime
    import sys
    import requests
    from requests.cookies import RequestsCookieJar
    from bs4 import BeautifulSoup
    import log_config
    import time
    import random
    import re
    
    
    def is_form_hash(tag):
        return tag.has_attr('name') and tag.get('name') == 'formhash'
    
    
    def is_refer(tag):
        return tag.has_attr('name') and tag.get('name') == 'referer'
    
    
    class haifeng_crawler:
    
        def __init__(self, user_name, pass_word):
            self.cookies = dict()
            self.username = user_name
            self.password = pass_word
            self.session = requests.session()
    
        def update_cookies(self, new_cookies):
            for key in new_cookies:
                self.cookies[key] = new_cookies[key]
    
        def req_get(self, url):
            requests.session().cookies = requests.utils.cookiejar_from_dict(self.cookies)
            resp = self.session.get(url)
            self.update_cookies(requests.utils.dict_from_cookiejar(resp.cookies))
            print(self.cookies)
            return resp
    
        def req_post(self, url, data):
            requests.session().cookies = requests.utils.cookiejar_from_dict(self.cookies)
            resp = self.session.post(url, data)
            self.update_cookies(requests.utils.dict_from_cookiejar(resp.cookies))
            #print(self.cookies)
            return resp
    
    
    
        def login(self):
            url = 'http://www.96bbs.com/member.php?mod=logging&action=login&infloat=yes&handlekey=login&inajax=1&ajaxtarget=fwin_content_login'
            page_res = self.req_get(url)
            #print(page_res.text)
            soup = BeautifulSoup(page_res.text, "html.parser")
            rt = soup.find('root')
            if rt:
                rt = rt.text
                soup = BeautifulSoup(rt, "html.parser")
            else:
                return None;
            #print(rt);
            bb = is_form_hash
            cc = is_refer
            formhash = soup.find(bb).get("value")
            referer = soup.find(cc).get("value")
            print(formhash)
            print(referer)
            url = 'http://www.96bbs.com/member.php?mod=logging&action=login&loginsubmit=yes&handlekey=login&loginhash=LVCbx&inajax=1'
            data = {
                'formhash': formhash,
                'referer': referer,
                'username': self.username,
                'password': '加密后的密码',
                'questionid': 0,
                'answer': ''
            }
            resp = self.req_post(url,data)
            soup = BeautifulSoup(resp.text, "html.parser")
            rt = soup.find('root').text
            print(rt)
    
        def visit_home(self):
            url = 'http://www.96bbs.com/forum.php'
            self.req_get(url)
    
        def visit_attachment(self,url):
            resp = self.req_get(url)
            print(resp.status_code)
            print(resp.text)
            return resp
    
    
    if __name__ == "__main__":
    
        haifeng = haifeng_crawler("你的用户名","密码需要根据页面取获取加密后的密码")
        haifeng.login()
        haifeng.visit_attachment("http://www.96bbs.com/forum.php?mod=attachment&aid=MjI0NzQ5OHw3YjNkMWMwY3wxNTQwMzYxMzEwfDQ5NzM5OXwzNTM5NTgy")
  • 相关阅读:
    docker部署mysql
    jira+mysql+破解+中文+compose
    swarm 服务器安装
    docker
    mysql创建用户并手授权
    统计数据库表容量情况
    分区表测试
    实战做项目如何选择开源许可协议(一)-了解协议
    创业公司如何实施敏捷开发
    技术人员如何创业《四》- 打造超强执行力团队
  • 原文地址:https://www.cnblogs.com/keepMoveForevery/p/9855008.html
Copyright © 2020-2023  润新知