• 抓取腾讯招聘python岗位


    # -*- coding: utf-8 -*-
    """
    @author: Dell Created on Mon Dec 23 17:55:06 2019
    """
    import re
    import time
    import requests
    from lxml import etree
    
    from selenium import webdriver
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.common.by import By
    
    
    HEADERS = {
        # 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
        'Referer': 'https://careers.tencent.com/',
        'Accept': 'application/json, text/javascript, */*; q=0.01'
    }
    
    def parse(url):
        driver= webdriver.Chrome()
        driver.get(url)
        text = driver.page_source
        
        # resp = requests.get(url, headers=HEADERS)
        # text = resp.content.decode("utf-8", errors="ignore")
        
        #解析网页
        html = etree.HTML(text)
        divs = html.xpath("//div[@class='recruit-list']")
        
        pos_infos = []
        for div in divs:
            title = div.xpath("./a/h4/text()")[0]#提取职位名称
            address = div.xpath("./a/p/span[2]/text()")[0]#提取职位工作地点
            require = div.xpath("./a/p[@class='recruit-text']/text()")[0]#提取职位要求
           
            pos_info = {'title':title, 'address':address, 'require':require}
            pos_infos.append(pos_info)
        
        driver.close()
        return pos_infos
    
    def save(list):
        with open("tencent.txt", "a+", encoding="utf-8") as f:
            for line in list:
                f.write(str(line) + "
    ")
                
        
    
    if __name__ == "__main__":
        baseurl = "https://careers.tencent.com/search.html?index={}&keyword=python"
        for i in range(1,70):
            url = baseurl.format(i)
            pos_list = parse(url)
            
            save(pos_list)
            for pos in pos_list:
                print(pos)
            print("第%s页解析完成" % str(i), "-" * 50)
        pass
    
    
  • 相关阅读:
    安卓天天练练(三)常用组件Toast
    安卓天天练练(二)相对布局和帧布局
    javascript表单操作
    JavaScript replace() 方法
    android基础(一)
    四大类NoSQL数据库
    php基础八(cookie)
    php基础(七)文件
    php基础(六)Include
    php基础(五)日期
  • 原文地址:https://www.cnblogs.com/zxfei/p/12088112.html
Copyright © 2020-2023  润新知