• 拉钩网的起薪


    import requests
    import re
    import pyecharts
    
    user_agent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"
    res = requests.get("https://www.lagou.com/",headers = {"User-Agent":user_agent})
    # print(res.status_code)
    # print(res.text)
    
    data = re.search("<span>后端开发</span>([sS]*?)后端开发其它</a>",res.text).group(1)
    data = data.replace(" ","").replace("
    ","")
    urls = re.findall('<ahref="(.*?)"data-lg-tj-id="4O00"data-lg-tj-no="01dd"data-lg-tj-cid="idnull"class=".*?">(.*?)</a>',data)
    print(urls)
    
    
    def parser_job(i):
        # 工作的链接
        job_url = re.search('href="(.*?)"', i).group(1)
        name = re.search("<h3>(.*?)</h3>", i).group(1)
        add = re.search("<em>(.*?)</em>", i).group(1)
        money = re.search('"money">(.*?)</span>', i).group(1)
        jy, xl = re.search('-->([sS]*?)
    ', i).group(1).split(" / ")
    
        return {"name":name,"add":add,"job_url":job_url,"money":money,"jy":jy,"xl":xl}
    
    # 指定需要查看的技术方向
    jobs = ["Java","Python"]
    
    # 创建一个页面
    page = pyecharts.Page()
    for url in urls:
        if url[1] not in jobs:
            continue
    
        # 存储解析完成的字典
        datas = []
    
        for j in range(1,11):
            res = requests.get(url[0] + str(j))
            data = re.findall("""<a class="position_link"([sS]*?)<div class="company">""",res.text)
            for i in data:
                dic = parser_job(i)
                if "{" in dic["xl"]:
                    continue
                datas.append(dic)
        print(len(datas))
        # 统计每个起薪有多少个岗位
        count_dic = { }
        for d in datas:
            key = d["money"].split("-")[0]
            if key in count_dic:
                count_dic[key] += 1
            else:
                count_dic[key] =1
        print(url[1],count_dic)
    
        # 创建一个饼图
        pie = pyecharts.Pie()
        # 为饼图添加数据
        """
        标题
        keys
        values
        """
        pie.add(url[1],count_dic.keys(), count_dic.values())
        # 将图加到页面上
        page.add(pie)
    
    
    # 生成一个页面文件
    page.render("test.html")
    import requests
    import re
    import pyecharts
    
    user_agent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"
    res = requests.get("https://www.lagou.com/",headers = {"User-Agent":user_agent})
    # print(res.status_code)
    # print(res.text)
    
    data = re.search("<span>后端开发</span>([sS]*?)后端开发其它</a>",res.text).group(1)
    data = data.replace(" ","").replace("
    ","")
    urls = re.findall('<ahref="(.*?)"data-lg-tj-id="4O00"data-lg-tj-no="01dd"data-lg-tj-cid="idnull"class=".*?">(.*?)</a>',data)
    print(urls)
    
    
    def parser_job(i):
        # 工作的链接
        job_url = re.search('href="(.*?)"', i).group(1)
        name = re.search("<h3>(.*?)</h3>", i).group(1)
        add = re.search("<em>(.*?)</em>", i).group(1)
        money = re.search('"money">(.*?)</span>', i).group(1)
        jy, xl = re.search('-->([sS]*?)
    ', i).group(1).split(" / ")
    
        return {"name":name,"add":add,"job_url":job_url,"money":money,"jy":jy,"xl":xl}
    
    # 指定需要查看的技术方向
    jobs = ["Java","Python"]
    
    # 创建一个页面
    page = pyecharts.Page()
    for url in urls:
        if url[1] not in jobs:
            continue
    
        # 存储解析完成的字典
        datas = []
    
        for j in range(1,11):
            res = requests.get(url[0] + str(j))
            data = re.findall("""<a class="position_link"([sS]*?)<div class="company">""",res.text)
            for i in data:
                dic = parser_job(i)
                if "{" in dic["xl"]:
                    continue
                datas.append(dic)
        print(len(datas))
        # 统计每个起薪有多少个岗位
        count_dic = { }
        for d in datas:
            key = d["money"].split("-")[0]
            if key in count_dic:
                count_dic[key] += 1
            else:
                count_dic[key] =1
        print(url[1],count_dic)
    
        # 创建一个饼图
        pie = pyecharts.Pie()
        # 为饼图添加数据
        """
        标题
        keys
        values
        """
        pie.add(url[1],count_dic.keys(), count_dic.values())
        # 将图加到页面上
        page.add(pie)
    
    
    # 生成一个页面文件
    page.render("test.html")
  • 相关阅读:
    callee与caller
    vi/vim使用进阶: 在VIM中使用GDB调试 – 使用vimgdb
    error: No curses/termcap library found的解决办法
    shell变量详解
    在简历中使用STAR法则
    Hive教程之metastore的三种模式
    分布式服务框架 Zookeeper -- 管理分布式环境中的数据
    ZooKeeper典型应用场景
    HBase Java API类介绍
    Spark使用总结与分享
  • 原文地址:https://www.cnblogs.com/xuecaichang/p/10487888.html
Copyright © 2020-2023  润新知