• 爬腾讯招聘


     1 import requests
     2 import time
     3 import json
     4 import threading
     5 #   采集https://careers.tencent.com/search.html 网站的招聘信息
     6 
     7 #   时间戳
     8 timestamp = '%d' % (time.time() * 1000)
     9 
    10 
    11 #   请求url,解析数据
    12 def parse_url(json_url):
    13     #   发起请求
    14     res = requests.get(json_url).json()
    15     for i in res['Data']['Posts']:
    16         #   职位名称
    17         title = i['RecruitPostName']
    18         #   工作职责
    19         resbity = i['Responsibility']
    20         #   职位ID
    21         id = i['PostId']
    22         #   职位链接
    23         posi_url = 'https://careers.tencent.com/jobdesc.html?postId=' + id
    24         #   根据ID找到工作详情页的内容
    25         id_url = 'https://careers.tencent.com/tencentcareer/api/post/ByPostId?timestamp={}&postId={}&language=zh-cn'.format(
    26             timestamp, id)
    27         res_ment = requests.get(id_url).json()
    28         #   工作要求
    29         rement = res_ment['Data']['Requirement']
    30         #   发布时间
    31         posi_time = i['LastUpdateTime']
    32         item = {
    33             '职位': title,
    34             '职责': resbity,
    35             '要求': rement,
    36             '链接': posi_url,
    37             '时间': posi_time
    38         }
    39         print('正在写入 → ', item)
    40         with open('腾讯招聘.json', 'a', encoding='utf-8') as f:
    41             f.write(json.dumps(item, ensure_ascii=False) + '
    ')
    42 
    43 
    44 # 页数
    45 num = 10
    46 t_list = []
    47 for count in range(1, num + 1):
    48     print('加载第{}页数据'.format(count))
    49     #   json数据源
    50     json_url = 'https://careers.tencent.com/tencentcareer/api/post/Query?timestamp={}&countryId=&cityId=&bgIds=&productId=&categoryId=&parentCategoryId=&attrId=&keyword=&pageIndex={}&pageSize=10&language=zh-cn&area=cn'.format(
    51         timestamp, count)
    52     t = threading.Thread(target=parse_url, args=((json_url,)))
    53     t_list.append(t)
    54 
    55 for t in t_list:
    56     t.start()
    57 for t in t_list:
    58     t.join()
    腾讯招聘
  • 相关阅读:
    去哪儿网门票数据爬虫更新
    每周进度总结12
    每日进度总结20
    每日进度总结19
    每日进度总结18
    每日进度总结17
    每日进度总结16
    每日进度总结15
    每日进度总结14
    每周进度总结11
  • 原文地址:https://www.cnblogs.com/jiyu-hlzy/p/11814265.html
Copyright © 2020-2023  润新知