爬取爱笔智能招聘职位
http://aibee.com/cn/joinus.aspx
1 import requests 2 from urllib.parse import urlencode 3 from pyquery import PyQuery as pq 4 from pymongo import MongoClient 5 import json 6 7 8 base_url = 'http://aibee.com/cn/joinus.aspx?action=jobinfo&' 9 10 headers = { 11 'Host': 'aibee.com', 12 'Referer': 'http://aibee.com/cn/joinus.aspx', 13 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36', 14 'X-Requested-With': 'XMLHttpRequest', 15 } 16 17 client = MongoClient() 18 db = client['aibee'] 19 collection = db['aibee'] 20 max_id = 50 21 22 23 24 def get_page(id): 25 26 formData = { 27 'id': id, 28 } 29 30 31 data = urlencode(formData) 32 url = base_url + urlencode(formData) 33 try: 34 response = requests.get(url, data=data, headers=headers) 35 if response.status_code == 200: 36 37 return response.json() 38 except requests.ConnectionError as e: 39 print('Error', e.args) 40 41 42 def parse_page(json_1): 43 if json_1: 44 items = json_1.get('shuzu') 45 for item in items: 46 if id == 1 : 47 continue 48 else: 49 50 aibee = {} 51 aibee['id'] = item.get('id') 52 aibee['title'] = item.get('title') 53 aibee['zhize'] = pq(item.get('zhize')).text() 54 aibee['yaoqiu'] = pq(item.get('yaoqiu')).text() 55 aibee['dtt'] = item.get('dtt') 56 aibee['emailaddr'] = item.get('emailaddr') 57 yield aibee 58 59 60 def write_to_file(content): 61 with open('aibee.json','a',encoding='utf-8') as f: 62 f.write(json.dumps(content,ensure_ascii=False)+' ') 63 f.close() 64 65 def save_to_mongo(result): 66 if collection.insert(result): 67 print('Saved to Mongo') 68 69 70 if __name__ == '__main__': 71 for id in range(1, max_id + 1): 72 json_1 = get_page(id) 73 #print(json_1) 74 75 results = parse_page(json_1) 76 for result in results: 77 print(result) 78 write_to_file(result) 79 save_to_mongo(result)
或者:
1 import requests 2 from urllib.parse import urlencode 3 from pyquery import PyQuery as pq 4 from pymongo import MongoClient 5 import json 6 7 8 url = 'http://aibee.com/cn/joinus.aspx?action=jobinfo' 9 10 headers = { 11 'Host': 'aibee.com', 12 'Referer': 'http://aibee.com/cn/joinus.aspx', 13 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36', 14 'X-Requested-With': 'XMLHttpRequest', 15 } 16 17 client = MongoClient() 18 db = client['aibee'] 19 collection = db['aibee'] 20 max_id = 50 21 22 23 24 def get_page(id): 25 26 formData = { 27 'id': id, 28 } 29 try: 30 r = requests.post(url, data=formData, headers=headers) 31 if r.status_code == 200: 32 return r.json() 33 except requests.ConnectionError as e: 34 print('Error', e.args) 35 36 37 def parse_page(json_1): 38 if json_1: 39 items = json_1.get('shuzu') 40 for item in items: 41 if id == 1 : 42 continue 43 else: 44 45 aibee = {} 46 aibee['id'] = item.get('id') 47 aibee['title'] = item.get('title') 48 aibee['zhize'] = pq(item.get('zhize')).text() 49 aibee['yaoqiu'] = pq(item.get('yaoqiu')).text() 50 aibee['dtt'] = item.get('dtt') 51 aibee['emailaddr'] = item.get('emailaddr') 52 yield aibee 53 54 55 def write_to_file(content): 56 with open('aibee.json','a',encoding='utf-8') as f: 57 f.write(json.dumps(content,ensure_ascii=False)+' ') 58 f.close() 59 60 def save_to_mongo(result): 61 if collection.insert(result): 62 print('Saved to Mongo') 63 64 65 if __name__ == '__main__': 66 for id in range(1, max_id + 1): 67 json_1 = get_page(id) 68 #print(json_1) 69 70 results = parse_page(json_1) 71 for result in results: 72 print(result) 73 write_to_file(result) 74 save_to_mongo(result)