• 爬取 爱笔智能 招聘职位


    爬取爱笔智能招聘职位

    http://aibee.com/cn/joinus.aspx 

     1 import requests
     2 from urllib.parse import urlencode
     3 from pyquery import PyQuery as pq
     4 from pymongo import MongoClient
     5 import json
     6 
     7 
     8 base_url = 'http://aibee.com/cn/joinus.aspx?action=jobinfo&'
     9 
    10 headers = {
    11     'Host': 'aibee.com',
    12     'Referer': 'http://aibee.com/cn/joinus.aspx',
    13     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
    14     'X-Requested-With': 'XMLHttpRequest',
    15 } 
    16 
    17 client = MongoClient()
    18 db = client['aibee']
    19 collection = db['aibee']
    20 max_id = 50
    21 
    22 
    23 
    24 def get_page(id):  
    25 
    26     formData = {  
    27             'id': id,
    28         }  
    29      
    30 
    31     data = urlencode(formData)
    32     url = base_url + urlencode(formData)    
    33     try:
    34         response = requests.get(url, data=data, headers=headers)
    35         if response.status_code == 200:
    36 
    37             return response.json()
    38     except requests.ConnectionError as e:
    39         print('Error', e.args)
    40 
    41 
    42 def parse_page(json_1):
    43     if json_1:
    44         items = json_1.get('shuzu')
    45         for item in items:
    46             if id == 1 :
    47                 continue
    48             else:
    49                 
    50                 aibee = {}
    51                 aibee['id'] = item.get('id')
    52                 aibee['title'] = item.get('title')
    53                 aibee['zhize'] = pq(item.get('zhize')).text()
    54                 aibee['yaoqiu'] = pq(item.get('yaoqiu')).text()
    55                 aibee['dtt'] = item.get('dtt')
    56                 aibee['emailaddr'] = item.get('emailaddr')
    57                 yield aibee
    58 
    59 
    60 def write_to_file(content):
    61     with open('aibee.json','a',encoding='utf-8') as f:
    62         f.write(json.dumps(content,ensure_ascii=False)+'
    ')
    63         f.close()
    64 
    65 def save_to_mongo(result):
    66     if collection.insert(result):
    67         print('Saved to Mongo')
    68 
    69 
    70 if __name__ == '__main__':
    71     for id in range(1, max_id + 1):
    72         json_1 = get_page(id)
    73         #print(json_1)
    74 
    75         results = parse_page(json_1)
    76         for result in results:
    77             print(result)
    78             write_to_file(result)
    79             save_to_mongo(result)

     或者:

     1 import requests
     2 from urllib.parse import urlencode
     3 from pyquery import PyQuery as pq
     4 from pymongo import MongoClient
     5 import json
     6 
     7 
     8 url = 'http://aibee.com/cn/joinus.aspx?action=jobinfo'
     9 
    10 headers = {
    11     'Host': 'aibee.com',
    12     'Referer': 'http://aibee.com/cn/joinus.aspx',
    13     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
    14     'X-Requested-With': 'XMLHttpRequest',
    15 } 
    16 
    17 client = MongoClient()
    18 db = client['aibee']
    19 collection = db['aibee']
    20 max_id = 50
    21 
    22 
    23 
    24 def get_page(id):  
    25 
    26     formData = {  
    27             'id': id,
    28         }  
    29     try:
    30         r = requests.post(url, data=formData, headers=headers)
    31         if r.status_code == 200:
    32             return r.json()
    33     except requests.ConnectionError as e:
    34         print('Error', e.args)
    35 
    36 
    37 def parse_page(json_1):
    38     if json_1:
    39         items = json_1.get('shuzu')
    40         for item in items:
    41             if id == 1 :
    42                 continue
    43             else:
    44                 
    45                 aibee = {}
    46                 aibee['id'] = item.get('id')
    47                 aibee['title'] = item.get('title')
    48                 aibee['zhize'] = pq(item.get('zhize')).text()
    49                 aibee['yaoqiu'] = pq(item.get('yaoqiu')).text()
    50                 aibee['dtt'] = item.get('dtt')
    51                 aibee['emailaddr'] = item.get('emailaddr')
    52                 yield aibee
    53 
    54 
    55 def write_to_file(content):
    56     with open('aibee.json','a',encoding='utf-8') as f:
    57         f.write(json.dumps(content,ensure_ascii=False)+'
    ')
    58         f.close()
    59 
    60 def save_to_mongo(result):
    61     if collection.insert(result):
    62         print('Saved to Mongo')
    63 
    64 
    65 if __name__ == '__main__':
    66     for id in range(1, max_id + 1):
    67         json_1 = get_page(id)
    68         #print(json_1)
    69 
    70         results = parse_page(json_1)
    71         for result in results:
    72             print(result)
    73             write_to_file(result)
    74             save_to_mongo(result)
  • 相关阅读:
    java--exceptions
    java-interface
    Java笔记
    memcpy
    const 关键字
    LeeCode整数 反转
    函数调用运算符笔记
    cvCreateImage
    c++继承笔记1
    虚拟机下的debian无法登陆
  • 原文地址:https://www.cnblogs.com/wanglinjie/p/9226880.html
Copyright © 2020-2023  润新知