• python3爬虫-通过requests获取拉钩职位信息


    import requests, json, time, tablib
    
    
    def send_ajax_request(data: dict):
        try:
            ajax_response = session.post(url=ajax_url,
                                         params={"needAddtionalResult": "false", "city": city},
                                         data=data,
                                         headers=ajax_headers,
                                         timeout=timeout)
            if ajax_response.status_code == 200:
                return ajax_response.json()
            return {}
        except Exception:
            return {}
    
    
    def get_job_info(info_dic: dict):
        jobInfoMap = info_dic.get("content").get("positionResult").get("result")
    
        for jobInfoDict in jobInfoMap:
            dic = {}
            dic["companyId"] = jobInfoDict.get("companyId")
            dic["companyFullName"] = jobInfoDict.get("companyFullName")
            dic["positionName"] = jobInfoDict.get("positionName")
            dic["workYear"] = jobInfoDict.get("workYear")
            dic["education"] = jobInfoDict.get("education")
            dic["salary"] = jobInfoDict.get("salary")
            dic["jobNature"] = jobInfoDict.get("jobNature")
            dic["companySize"] = jobInfoDict.get("companySize")
            dic["city"] = jobInfoDict.get("city")
            dic["district"] = jobInfoDict.get("district")
            dic["createTime"] = jobInfoDict.get("createTime")
            if is_save_txtfile:
                yield json.dumps(dic, ensure_ascii=False)
            else:
                yield dic.values()
    
    
    def save_to_file(json_data):
        for data in json_data:
            f.write(data + "
    ")
    
    
    def save_to_excel(list_data):
        for line in list_data:
            dataset.append(line)
    
    
    def run():
        for i in range(1, 31):
            data = {
                "first": "false",
                "pn": i,
                "kd": "python"
            }
            info_dic = send_ajax_request(data)
            data = get_job_info(info_dic)
            if is_save_txtfile:
                save_to_file(data)
            else:
                save_to_excel(data)
            print("正在保存数据")
            time.sleep(sleeptime)
    
    
    if __name__ == '__main__':
        session = requests.Session()
        job_name = "python"
        city = "成都"
        timeout = 5
        sleeptime = 10
        doc_url = "https://www.lagou.com/jobs/list_{job_name}".format(job_name=job_name)
        session.headers[
            "User-Agent"] = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36"
        session.headers["Host"] = "www.lagou.com"
    
        doc_response = session.get(url=doc_url, params={"city": city})
    
        ajax_headers = {
            "Origin": "https://www.lagou.com",
            "Referer": doc_response.url
        }
    
        ajax_url = "https://www.lagou.com/jobs/positionAjax.json?=false"
    
        is_save_txtfile = False
    
        if not is_save_txtfile:
            dataset = tablib.Dataset()
            dataset.headers = ["companyId", "companyFullName", "positionName", "workYear",
                               "education", "salary", "jobNature", "companySize", "city",
                               "district", "createTime"]
    
        f = open("jobinfo.txt", "a", encoding="utf-8")
        try:
            run()
        except Exception:
            print('出错了')
        finally:
            if is_save_txtfile:
                f.close()
            else:
                with open("jobInfo.xls", "wb") as f:
                    f.write(dataset.xls)
                    f.flush()
  • 相关阅读:
    Azure 虚拟机诊断设置问题排查
    虚拟机压力测试延迟高的可能原因及 ILPIP 配置 / 查询脚本
    理解 Azure 平台中虚拟机的计算能力
    如何复制或导出托管磁盘
    理解托管磁盘的原理与优势
    Azure 托管镜像和非托管镜像对比
    如何将同一云服务下的虚拟机从经典部署模型迁移到 Azure Resource Manager
    如何将同一 VNET 下的虚拟机从经典部署模型迁移到 Azure Resource Manager
    如何将使用托管磁盘虚拟机的 OS 盘挂载到其他虚拟机上
    基于 Azure 托管磁盘配置高可用共享文件系统
  • 原文地址:https://www.cnblogs.com/zhuchunyu/p/10765945.html
Copyright © 2020-2023  润新知