• 拉勾网 + selenium



    方式一 selenium

    from selenium import webdriver
    import time
    from selenium.webdriver.common.keys import Keys
    bro=webdriver.Chrome()
    
    def get_resume(bro):
        li_list=bro.find_elements_by_class_name('con_list_item')
        for li in li_list:
            title = li.find_element_by_css_selector('.position_link h3').text
            work_time = li.find_element_by_css_selector('.li_b_l').text
            company_name = li.find_element_by_css_selector('.company_name').text
            add = li.find_element_by_css_selector('.add').text
            welfare = li.find_element_by_css_selector('.li_b_r').text
    
            print('''
            标题:%s
            工作时间:%s
            公司名称:%s
            公司地址:%s
            公司福利:%s
            '''%(title, work_time, company_name, add, welfare))
    
        #查找下一页按钮
        next = bro.find_element_by_css_selector('.pager_next ')
        next.click()
        # 点击翻页之后要休息两秒,不能让浏览器操作太频繁,否则会报错
        time.sleep(2)
        # 继续抓取下一页
        get_resume(bro)
        time.sleep(5)
    
    try:
        bro.get('https://www.lagou.com/')
        #隐士等待
        bro.implicitly_wait(10)
        addr_click = bro.find_element_by_partial_link_text('上海站').click()
    
    
        input_search=bro.find_element_by_id('search_input')
        input_search.send_keys("python")
        #模拟键盘操作(模拟键盘敲回车)
        input_search.send_keys(Keys.ENTER)
        time.sleep(1)
        no = bro.find_element_by_css_selector('.body-btn').click()
        get_resume(bro)
    
    except Exception as e:
        print(e)
    finally:
        bro.close()
    

    方式二普通方法

    import requests
    import json
    
    headers = {
        'Accept-Language': "zh-CN,zh;q=0.9",
        'Host': 'www.lagou.com',
        'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3493.3 Safari/537.36",
        'Referer': "https://www.lagou.com/jobs/list_python?city=%E4%B8%8A%E6%B5%B7&cl=false&fromSearch=true&labelWords=&suginput=",
        'Cookie': "_ga=GA1.2.1497342262.1582512706; user_trace_token=20200224105145-cc6cf3ff-ce98-45e4-987a-b86e8c600c0a; LGUID=20200224105145-b49fd29f-44e3-4e76-99b1-81d214d196c6; _gid=GA1.2.1855310676.1586229731; JSESSIONID=ABAAAECAAFDAAEH3E452981BE6E4D71C27858CD16B95E47; WEBTJ-ID=20200407223352-171550fd3077e1-099a2b39121da1-396f7f07-1764000-171550fd309d5c; X_MIDDLE_TOKEN=4541125f78b04f020d1ab29fee30c15f; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%221709905d08a239-0d00b368bd6473-39697407-1764000-1709905d08b9bd%22%2C%22%24device_id%22%3A%221709905d08a239-0d00b368bd6473-39697407-1764000-1709905d08b9bd%22%2C%22props%22%3A%7B%22%24os%22%3A%22MacOS%22%2C%22%24browser%22%3A%22Chrome%22%2C%22%24browser_version%22%3A%2280.0.3987.149%22%2C%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%7D; index_location_city=%E4%B8%8A%E6%B5%B7; PRE_UTM=; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2F; LGSID=20200408152358-9c69660c-a345-498a-8034-10252ffe3e1b; PRE_HOST=www.baidu.com; PRE_SITE=https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DtVZflBXEaVsOXF%5FjFHCfFYOeqdRD4HXjo5Hn4EMTkmG%26ck%3D9551.3.94.252.155.241.150.180%26shh%3Dwww.baidu.com%26sht%3Dbaiduhome%5Fpg%26wd%3D%26eqid%3Df1483e950008cf70000000045e8d7c0b; TG-TRACK-CODE=index_search; _gat=1; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1586229731,1586270033,1586330638,1586332101; SEARCH_ID=42fc072280a0477c92659467a1ad8b00; X_HTTP_TOKEN=dde9d3565777ba88401233685139c3c6eb8d6e4588; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1586332105; LGRID=20200408154824-69c24f2c-ed0d-40aa-acbc-1df8cfac23d1",
        'Accept': "application/json, text/javascript, */*; q=0.01",
        'X-Anit-Forge-Code': "0",
        'X-Anit-Forge-Token': None,
        'X-Requested-With': 'XMLHttpRequest'
    
    }
    form_data = {
        'first': 'false',
        'pn': 1,
        'kd': 'python'
    }
    ret=requests.post('https://www.lagou.com/jobs/positionAjax.json?city=%E4%B8%8A%E6%B5%B7&needAddtionalResult=false',
                     headers=headers,data=form_data).text
    
    ret = json.loads(ret)
    res = ret['content']['positionResult']['result']
    
    for i in res:
        title = i['positionName']
        company = i['companyFullName']
        company_class = i['industryField']
        salary = i['salary']
        financeStage = i['financeStage']
        welfare = i['companyLabelList']
        job_type = i['firstType']
        addr = i['district']
        wyear = i['workYear']
        education = i['education']
    
        print("""
        职位:%s
        公司名称:%s
        公司类型:%s
        薪资:%s
        融资阶段:%s
        公司福利:%s
        工作分类:%s
        公司地址:%s
        工作时间:%s
        学历:%s
        """ % (title, company, company_class, salary, financeStage, welfare, job_type, addr, wyear, education))
    
  • 相关阅读:
    第十八课 顺序存储线性表的分析
    第十七课 StaticList和DynamicList实现
    第十六课 顺序存储结构的抽象实现
    第十五课 线性表的顺序存储结构
    第十四课 线性表的本质和操作
    第十三课 类族结构的进化
    第十二课 顶层父类的创建
    第十一课 异常类构建
    HDU 5773The All-purpose Zero
    HDU 5755 Gambler Bo
  • 原文地址:https://www.cnblogs.com/kai-/p/12668494.html
Copyright © 2020-2023  润新知