• 换个方式爬取人人网


    from urllib import request
    
    from urllib import parse,request
    
    #在python当中使用用户名和密码进行登录,然后保存cookie
    from http import cookiejar
    cookie = cookiejar.CookieJar() #生成cookie对象
    cookie_handler = request.HTTPCookieProcessor(cookie)#生成cookie管理器
    http_handler = request.HTTPHandler() #http请求管理器
    https_handler = request.HTTPSHandler() #https请求管理器
    
    #发起请求管理器
    opener = request.build_opener(http_handler,https_handler,cookie_handler)
    
    #登录
    def login():
        #登录的网站地址
        login_url = "http://www.renren.com/PLogin.do"
    
        data = {
            "email":"18811176939",
            "password":"123457"
        }
    
        data = parse.urlencode(data)
    
        req = request.Request(login_url,data=bytes(data,'utf-8'))
        response = opener.open(req)
        html = response.read()
        html = html.decode('utf-8')
        print(html)
    
    
    #主页
    def getHomePage():
        # (1)
        base_url = "http://www.renren.com/964508169/profile"
    
        response =opener.open(base_url)
    
        # (3)
        html = response.read()
    
        # (4)
        html = html.decode("utf-8")
    
        # (5)
        print(html)
    
    #主进程
    if __name__ == "__main__":
        #login()
    
        getHomePage() #获取个人主页
  • 相关阅读:
    php-instanceof运算符
    windows10-seaslog安装笔记
    [类和对象]1 封装 调用成员函数
    [C++] 拓展属性
    [C++] 引用详解
    [C++] Const详解
    ROS 常用
    win10 ubuntu16双系统安装教程
    [0] OpenCV_Notes
    Ubuntu16.04安装openCV的问题集合
  • 原文地址:https://www.cnblogs.com/zhangboblogs/p/8542120.html
Copyright © 2020-2023  润新知