• 用生产者消费模型爬取智联招聘python岗位信息


    爬取python岗位智联招聘

    • 这里爬取北京地区岗位招聘python岗位,并存入EXECEL文件内,代码如下:
    import json
    import xlwt
    import requests
    from queue import Queue
    from threading import Thread
    
    def producer(q,path):
        res1 = requests.get(path,
                            headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3676.400 QQBrowser/10.4.3469.400'},
                            )
    
        result = json.loads(res1.text)
        for item in result["data"]["results"]:
            jobName = item["jobName"]
            companyname = item["company"]["name"]
            type = item["company"]["type"]["name"]
            size = item["company"]["size"]["name"]
            url = item["company"]['url']
            city = item['city']["items"][0]["name"]
            updateDate = item["updateDate"]
            salary = item["salary"]
            eduLevel = item["eduLevel"]["name"]
            workingExp = item["workingExp"]["name"]
            emplType = item["emplType"]
            condition = item["timeState"]
            msg = "%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s"%(jobName,companyname,type,size,url,city,updateDate,salary,eduLevel,workingExp,emplType,condition)
            q.put(msg)
    
    def consumer(q):
        count = 1
        workbook = xlwt.Workbook(encoding="utf-8")
        worksheet = workbook.add_sheet(r"智联招聘")
        pattern = xlwt.Pattern()
        pattern.pattern = xlwt.Pattern.SOLID_PATTERN
        pattern.pattern_fore_colour = 2
        style = xlwt.XFStyle()
        style.pattern = pattern
    
        worksheet.write(0,0,r"工作名称", style)
        worksheet.write(0,1,r"公司",style)
        worksheet.write(0,2,r"企业性质",style)
        worksheet.write(0,3,r"人数",style)
        worksheet.write(0,4,r"链接网站",style)
        worksheet.write(0,5,r"所在地",style)
        worksheet.write(0,6,r"发布日期",style)
        worksheet.write(0,7,r"薪资",style)
        worksheet.write(0,8,r"学历",style)
        worksheet.write(0,9,r"工作年限",style)
        worksheet.write(0,10,r"职位",style)
        worksheet.write(0,11,r"状态",style)
        while True:
            msg = q.get()
            if not msg:
                workbook.save("智联招聘python信息.xls")
                break
            msg_list = msg.split("|")
            for index in range(len(msg_list)):
                worksheet.write(count, index, label=msg_list[index])
            count += 1
    
    def cp(c_count,p_count):
        q = Queue(10)
        for i in range(c_count):
            Thread(target=consumer, args=(q,)).start()
        p_l = []
        for i in range(p_count,):
            path = "https://fe-api.zhaopin.com/c/i/sou?start=%d&pageSize=90&cityId=530&salary=0,0&workExperience=-1&education=-1&companyType=-1&employmentType=-1&jobWelfareTag=-1&kw=python&kt=3&=0&_v=0.55208065&x-zp-page-request-id=53d546acdd2a4e369735f9cb3a372e05-1560873187327-646177&x-zp-client-id=dc9fd9ff-68eb-42ab-8bc3-796640e734b3" % (
                    i * 90)
            p1 = Thread(target=producer, args=(q,path))
            p1.start()
            p_l.append(p1)
        for p in p_l: p.join()
        for i in range(c_count):q.put(None)
    
    
    if __name__ == '__main__':
        cp(1,12)
    

    • 注意:此文章只用于学术交流
  • 相关阅读:
    在MaxCompute中配置Policy策略遇到结果不一致的问题
    通过DataWorks数据集成归档日志服务数据至MaxCompute进行离线分析
    阿里小二的日常工作要被TA们“接管”了!
    2018年DDoS攻击全态势:战胜第一波攻击成“抗D” 关键
    基于OSS+DataLakeAnalytics+QuickBI的Serverless的查询分析和可视化BI
    威胁快报|首爆,新披露Jenkins RCE漏洞成ImposterMiner挖矿木马新“跳板”
    Lesson 7 Nehe
    Lesson 7 Nehe
    Lesson 7 Nehe
    Lesson 6 Nehe
  • 原文地址:https://www.cnblogs.com/xujunkai/p/11050876.html
Copyright © 2020-2023  润新知