• 换个方式爬取人人网


    from urllib import request
    
    from urllib import parse,request
    
    #在python当中使用用户名和密码进行登录,然后保存cookie
    from http import cookiejar
    cookie = cookiejar.CookieJar() #生成cookie对象
    cookie_handler = request.HTTPCookieProcessor(cookie)#生成cookie管理器
    http_handler = request.HTTPHandler() #http请求管理器
    https_handler = request.HTTPSHandler() #https请求管理器
    
    #发起请求管理器
    opener = request.build_opener(http_handler,https_handler,cookie_handler)
    
    #登录
    def login():
        #登录的网站地址
        login_url = "http://www.renren.com/PLogin.do"
    
        data = {
            "email":"18811176939",
            "password":"123457"
        }
    
        data = parse.urlencode(data)
    
        req = request.Request(login_url,data=bytes(data,'utf-8'))
        response = opener.open(req)
        html = response.read()
        html = html.decode('utf-8')
        print(html)
    
    
    #主页
    def getHomePage():
        # (1)
        base_url = "http://www.renren.com/964508169/profile"
    
        response =opener.open(base_url)
    
        # (3)
        html = response.read()
    
        # (4)
        html = html.decode("utf-8")
    
        # (5)
        print(html)
    
    #主进程
    if __name__ == "__main__":
        #login()
    
        getHomePage() #获取个人主页
  • 相关阅读:
    契约测试SpringCloud Contract入门
    CircuitBreaker 组件 resilience4j
    阿里开源的15个顶级Java项目
    将军令:数据安全平台建设实践
    ResNet
    设计模式
    muduo评测摘要
    muduo 学习
    RAII
    大数据框架
  • 原文地址:https://www.cnblogs.com/zhangboblogs/p/8542120.html
Copyright © 2020-2023  润新知