爬腾讯招聘

 1 import requests
 2 import time
 3 import json
 4 import threading
 5 #   采集https://careers.tencent.com/search.html 网站的招聘信息
 6 
 7 #   时间戳
 8 timestamp = '%d' % (time.time() * 1000)
 9 
10 
11 #   请求url，解析数据
12 def parse_url(json_url):
13     #   发起请求
14     res = requests.get(json_url).json()
15     for i in res['Data']['Posts']:
16         #   职位名称
17         title = i['RecruitPostName']
18         #   工作职责
19         resbity = i['Responsibility']
20         #   职位ID
21         id = i['PostId']
22         #   职位链接
23         posi_url = 'https://careers.tencent.com/jobdesc.html?postId=' + id
24         #   根据ID找到工作详情页的内容
25         id_url = 'https://careers.tencent.com/tencentcareer/api/post/ByPostId?timestamp={}&postId={}&language=zh-cn'.format(
26             timestamp, id)
27         res_ment = requests.get(id_url).json()
28         #   工作要求
29         rement = res_ment['Data']['Requirement']
30         #   发布时间
31         posi_time = i['LastUpdateTime']
32         item = {
33             '职位': title,
34             '职责': resbity,
35             '要求': rement,
36             '链接': posi_url,
37             '时间': posi_time
38         }
39         print('正在写入 → ', item)
40         with open('腾讯招聘.json', 'a', encoding='utf-8') as f:
41             f.write(json.dumps(item, ensure_ascii=False) + '
')
42 
43 
44 # 页数
45 num = 10
46 t_list = []
47 for count in range(1, num + 1):
48     print('加载第{}页数据'.format(count))
49     #   json数据源
50     json_url = 'https://careers.tencent.com/tencentcareer/api/post/Query?timestamp={}&countryId=&cityId=&bgIds=&productId=&categoryId=&parentCategoryId=&attrId=&keyword=&pageIndex={}&pageSize=10&language=zh-cn&area=cn'.format(
51         timestamp, count)
52     t = threading.Thread(target=parse_url, args=((json_url,)))
53     t_list.append(t)
54 
55 for t in t_list:
56     t.start()
57 for t in t_list:
58     t.join()

腾讯招聘

相关阅读:
去哪儿网门票数据爬虫更新
每周进度总结12
每日进度总结20
每日进度总结19
每日进度总结18
每日进度总结17
每日进度总结16
每日进度总结15
每日进度总结14
每周进度总结11

原文地址：https://www.cnblogs.com/jiyu-hlzy/p/11814265.html