• 腾讯招聘信息爬取


     1 import requests
     2 from lxml import etree
     3 import mysqlhelper
     4 
     5 myhelper = mysqlhelper.MysqlHelper()
     6 sql = 'INSERT INTO tenxunzhaoping (title, duty, people_num, address,addtime) VALUES(%s, %s, %s, %s,%s)'
     7 
     8 headers = {
     9     "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
    10     "Accept-Encoding": "gzip, deflate, br",
    11     "Accept-Language": "zh-CN,zh;q=0.9",
    12     "Cache-Control": "no-cache",
    13     "Connection": "keep-alive",
    14     "Cookie": "pgv_pvi=5854498816; _ga=GA1.2.608623393.1534496276; pt2gguin=o1900227304; PHPSESSID=0smi013v1lr7r3ki2aqtacp493; pgv_si=s8414673920",
    15     "Host": "hr.tencent.com",
    16     "Pragma": "no-cache",
    17     "Referer": "https://hr.tencent.com/position.php?&start=10",
    18     "Upgrade-Insecure-Requests": "1",
    19     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
    20 
    21 }
    22 # url = 'https://hr.tencent.com/position.php?keywords=&lid=2156&start=0#a'
    23 base_url = 'https://hr.tencent.com/position.php?keywords=&tid=0&lid=2156&start=%s#a'
    24 
    25 for i in range(0,300,10):
    26     url = base_url % i
    27     response = requests.get(url, headers=headers)
    28     html_ele = response.text
    29     # print(html_ele)
    30     ver = etree.HTML(html_ele)
    31     # print(ver)
    32     for v in range(2,12):
    33         li_list = ver.xpath('//div[@id="position"]/div/table/tr[{}]'.format(v))
    34         # print(li_list)
    35         for li_ele in li_list:
    36             # if li_ele == li_ele.xpath('./tr[1]')[0].text:
    37             #     continue
    38             title = li_ele.xpath('./td/a')[0].text
    39             print(title)
    40             duty = li_ele.xpath('./td[2]')[0].text
    41             print(duty)
    42             people_num = li_ele.xpath('./td[3]')[0].text
    43             print(people_num)
    44             address = li_ele.xpath('./td[4]')[0].text
    45             print(address)
    46             addtime = li_ele.xpath('./td[5]')[0].text
    47             print(addtime)
    48 
    49             data = (title, duty, people_num, address,addtime)
    50             myhelper.execute_modify_sql(sql, data)
  • 相关阅读:
    第一章—v-text和v-html
    第一章—v-for
    第一章—v-show
    react_9
    【软件工程】
    【软件工程】网页设计基础 第一章
    【软件工程】psp四则运算
    【python】网络爬虫与信息提取
    【python】数据库学习笔记,设计自己的大学排名
    【python】用python玩微信跳一跳小游戏
  • 原文地址:https://www.cnblogs.com/daihao9527/p/9503177.html
Copyright © 2020-2023  润新知