• 爬取 爱笔智能 招聘职位


    爬取爱笔智能招聘职位

    http://aibee.com/cn/joinus.aspx 

     1 import requests
     2 from urllib.parse import urlencode
     3 from pyquery import PyQuery as pq
     4 from pymongo import MongoClient
     5 import json
     6 
     7 
     8 base_url = 'http://aibee.com/cn/joinus.aspx?action=jobinfo&'
     9 
    10 headers = {
    11     'Host': 'aibee.com',
    12     'Referer': 'http://aibee.com/cn/joinus.aspx',
    13     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
    14     'X-Requested-With': 'XMLHttpRequest',
    15 } 
    16 
    17 client = MongoClient()
    18 db = client['aibee']
    19 collection = db['aibee']
    20 max_id = 50
    21 
    22 
    23 
    24 def get_page(id):  
    25 
    26     formData = {  
    27             'id': id,
    28         }  
    29      
    30 
    31     data = urlencode(formData)
    32     url = base_url + urlencode(formData)    
    33     try:
    34         response = requests.get(url, data=data, headers=headers)
    35         if response.status_code == 200:
    36 
    37             return response.json()
    38     except requests.ConnectionError as e:
    39         print('Error', e.args)
    40 
    41 
    42 def parse_page(json_1):
    43     if json_1:
    44         items = json_1.get('shuzu')
    45         for item in items:
    46             if id == 1 :
    47                 continue
    48             else:
    49                 
    50                 aibee = {}
    51                 aibee['id'] = item.get('id')
    52                 aibee['title'] = item.get('title')
    53                 aibee['zhize'] = pq(item.get('zhize')).text()
    54                 aibee['yaoqiu'] = pq(item.get('yaoqiu')).text()
    55                 aibee['dtt'] = item.get('dtt')
    56                 aibee['emailaddr'] = item.get('emailaddr')
    57                 yield aibee
    58 
    59 
    60 def write_to_file(content):
    61     with open('aibee.json','a',encoding='utf-8') as f:
    62         f.write(json.dumps(content,ensure_ascii=False)+'
    ')
    63         f.close()
    64 
    65 def save_to_mongo(result):
    66     if collection.insert(result):
    67         print('Saved to Mongo')
    68 
    69 
    70 if __name__ == '__main__':
    71     for id in range(1, max_id + 1):
    72         json_1 = get_page(id)
    73         #print(json_1)
    74 
    75         results = parse_page(json_1)
    76         for result in results:
    77             print(result)
    78             write_to_file(result)
    79             save_to_mongo(result)

     或者:

     1 import requests
     2 from urllib.parse import urlencode
     3 from pyquery import PyQuery as pq
     4 from pymongo import MongoClient
     5 import json
     6 
     7 
     8 url = 'http://aibee.com/cn/joinus.aspx?action=jobinfo'
     9 
    10 headers = {
    11     'Host': 'aibee.com',
    12     'Referer': 'http://aibee.com/cn/joinus.aspx',
    13     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
    14     'X-Requested-With': 'XMLHttpRequest',
    15 } 
    16 
    17 client = MongoClient()
    18 db = client['aibee']
    19 collection = db['aibee']
    20 max_id = 50
    21 
    22 
    23 
    24 def get_page(id):  
    25 
    26     formData = {  
    27             'id': id,
    28         }  
    29     try:
    30         r = requests.post(url, data=formData, headers=headers)
    31         if r.status_code == 200:
    32             return r.json()
    33     except requests.ConnectionError as e:
    34         print('Error', e.args)
    35 
    36 
    37 def parse_page(json_1):
    38     if json_1:
    39         items = json_1.get('shuzu')
    40         for item in items:
    41             if id == 1 :
    42                 continue
    43             else:
    44                 
    45                 aibee = {}
    46                 aibee['id'] = item.get('id')
    47                 aibee['title'] = item.get('title')
    48                 aibee['zhize'] = pq(item.get('zhize')).text()
    49                 aibee['yaoqiu'] = pq(item.get('yaoqiu')).text()
    50                 aibee['dtt'] = item.get('dtt')
    51                 aibee['emailaddr'] = item.get('emailaddr')
    52                 yield aibee
    53 
    54 
    55 def write_to_file(content):
    56     with open('aibee.json','a',encoding='utf-8') as f:
    57         f.write(json.dumps(content,ensure_ascii=False)+'
    ')
    58         f.close()
    59 
    60 def save_to_mongo(result):
    61     if collection.insert(result):
    62         print('Saved to Mongo')
    63 
    64 
    65 if __name__ == '__main__':
    66     for id in range(1, max_id + 1):
    67         json_1 = get_page(id)
    68         #print(json_1)
    69 
    70         results = parse_page(json_1)
    71         for result in results:
    72             print(result)
    73             write_to_file(result)
    74             save_to_mongo(result)
  • 相关阅读:
    GitLab用户权限管理
    类似vant中的tab实现
    Gitgitee/github/gitlab账号分离
    Vim操作
    partition by 用法
    crontab执行feat_gen.sh时,报错找不到pyspark
    SQL同一个字段出现null和0值,有何区别,原因是什么?left join导致null值出现,case when导致0值出现
    linux 定时任务crontab的用法
    卡方检验
    ROC与AUC
  • 原文地址:https://www.cnblogs.com/wanglinjie/p/9226880.html
Copyright © 2020-2023  润新知