• 爬取拉钩网


    import re
    import requests
    all_cookie_dict = {}
    
    # ##################################### 第一步:访问登录页面 #####################################
    r1 = requests.get(
        url='https://passport.lagou.com/login/login.html',
        headers={
            'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
        }
    )
    
    token =  re.findall("X_Anti_Forge_Token = '(.*)';",r1.text)[0]
    code =  re.findall("X_Anti_Forge_Code = '(.*)';",r1.text)[0]
    r1_cookie_dict = r1.cookies.get_dict()
    all_cookie_dict.update(r1_cookie_dict)
    
    # ##################################### 第二步:去登陆 #####################################
    r2 = requests.post(
        url='https://passport.lagou.com/login/login.json',
        data={
            'isValidate':'true',
            'username':'1439286684',
            'password':'4565465',
            'request_form_verifyCode':'',
            'submit':''
        },
        headers={
            'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
            'X-Requested-With':'XMLHttpRequest',
            'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
            'Host':'passport.lagou.com',
            'Origin':'https://passport.lagou.com',
            'Referer':'https://passport.lagou.com/login/login.html',
            'X-Anit-Forge-Code':code,
            'X-Anit-Forge-Token':token
        },
        cookies=all_cookie_dict
    
    )
    r2_response_json = r2.json()
    r2_cookie_dict = r2.cookies.get_dict()
    all_cookie_dict.update(r2_cookie_dict)
    # ##################################### 第三步:grant #####################################
    r3 = requests.get(
        url='https://passport.lagou.com/grantServiceTicket/grant.html',
        headers={
            'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
            'Referer':'https://passport.lagou.com/login/login.html',
            'Host':'passport.lagou.com',
        },
        cookies=all_cookie_dict,
        allow_redirects=False
    
    )
    r3_cookie_dict = r3.cookies.get_dict()
    all_cookie_dict.update(r3_cookie_dict)
    # ##################################### 第四步:action #####################################
    r4 = requests.get(
        url=r3.headers['Location'],
        headers={
            'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
            'Referer':'https://passport.lagou.com/login/login.html',
            'Host':'www.lagou.com',
            'Upgrade-Insecure-Requests':'1',
        },
        cookies=all_cookie_dict,
        allow_redirects=False
    
    )
    r4_cookie_dict = r4.cookies.get_dict()
    all_cookie_dict.update(r4_cookie_dict)
    
    # ##################################### 第五步:获取认证信息 #####################################
    r5 = requests.get(
        url=r4.headers['Location'],
        headers={
            'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
            'Referer':'https://passport.lagou.com/login/login.html',
            'Host':'www.lagou.com',
            'Upgrade-Insecure-Requests':'1',
        },
        cookies=all_cookie_dict,
        allow_redirects=False
    
    )
    r5_cookie_dict = r5.cookies.get_dict()
    all_cookie_dict.update(r5_cookie_dict)
    
    print(r5.headers['Location'])
    
    # ##################################### 第六步 #####################################
    r6 = requests.get(
        url=r5.headers['Location'],
        headers={
            'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
            'Referer':'https://passport.lagou.com/login/login.html',
            'Host':'www.lagou.com',
            'Upgrade-Insecure-Requests':'1',
        },
        cookies=all_cookie_dict,
        allow_redirects=False
    
    )
    r6_cookie_dict = r6.cookies.get_dict()
    all_cookie_dict.update(r6_cookie_dict)
    
    print(r6.headers['Location'])
    
    
    # ##################################### 第七步 #####################################
    r7 = requests.get(
        url=r6.headers['Location'],
        headers={
            'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
            'Referer':'https://passport.lagou.com/login/login.html',
            'Host':'www.lagou.com',
            'Upgrade-Insecure-Requests':'1',
        },
        cookies=all_cookie_dict,
        allow_redirects=False
    
    )
    r7_cookie_dict = r7.cookies.get_dict()
    all_cookie_dict.update(r7_cookie_dict)
    
    
    
    
    # ##################################### 第八步:查看个人信息 #####################################
    r8 = requests.get(
        url='https://gate.lagou.com/v1/neirong/account/users/0/',
        headers={
            'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
            'Host':'gate.lagou.com',
            'Pragma':'no-cache',
            'Referer':'https://account.lagou.com/v2/account/userinfo.html',
            'X-L-REQ-HEADER':'{deviceType:1}'
        },
        cookies=all_cookie_dict
    )
    r8_response_json = r8.json()
    # print(r8_response_json)
    all_cookie_dict.update(r8.cookies.get_dict())
    
    
    # ##################################### 第九步:查看个人信息 #####################################
    
    r9 = requests.put(
        url='https://gate.lagou.com/v1/neirong/account/users/0/',
        headers={
            'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
            'Host':'gate.lagou.com',
            'Origin':'https://account.lagou.com',
            'Referer':'https://account.lagou.com/v2/account/userinfo.html',
            'X-L-REQ-HEADER':'{deviceType:1}',
            'X-Anit-Forge-Code':r8_response_json.get('submitCode'),
            'X-Anit-Forge-Token':r8_response_json.get('submitToken'),
            'Content-Type':'application/json;charset=UTF-8',
        },
        json={"userName":"wupeiqi999","sex":"MALE","portrait":"images/myresume/default_headpic.png","positionName":"...","introduce":"...."},
        cookies=all_cookie_dict
    )
    
    print(r9.text)
  • 相关阅读:
    C#计算代码的执行耗时
    c#值类型和引用类型
    C#类、接口、虚方法和抽象方法
    15,了解如何在闭包里使用外围作用域中的变量
    函数闭包,golbal,nonlocal
    init())函数和main()函数
    函数的命名空间
    函数的默认参数是可变不可变引起的奇怪返回值
    遍历目录
    super顺序
  • 原文地址:https://www.cnblogs.com/aaronthon/p/9334711.html
Copyright © 2020-2023  润新知