• pyspider爬取TripAdvisor


     1 #!/usr/bin/env python
     2 # -*- encoding: utf-8 -*-
     3 # Created on 2017-06-11 10:10:53
     4 # Project: london
     5 
     6 from pyspider.libs.base_handler import *
     7 import pymongo
     8 
     9 
    10 class Handler(BaseHandler):
    11     crawl_config = {
    12     }
    13     client = pymongo.MongoClient('localhost')
    14     db = client['trip']
    15 
    16     @every(minutes=24 * 60)
    17     def on_start(self):
    18         self.crawl('https://www.tripadvisor.cn/Attractions-g186338-Activities-c47-London_England.html', callback=self.index_page)
    19 
    20     @config(age=10 * 24 * 60 * 60)
    21     def index_page(self, response):
    22         for each in response.doc('.listing_title > a').items():
    23             self.crawl(each.attr.href, callback=self.detail_page)
    24         next_page = response.doc('.pagination .nav.next').attr.href
    25         self.crawl(next_page,callback = self.index_page)
    26 
    27     @config(priority=2)
    28     def detail_page(self, response):
    29         return {
    30             "name":response.doc('h1').text(),
    31             "url": response.url,
    32             'comment':response.doc('.heading_ratings .taLnk').text(),
    33             'address':response.doc('.addressReset > span.format_address').text(),
    34             'phone':response.doc('.phoneNumber').text(),
    35             'duration':response.doc('#MAP_AND_LISTING > div.main_section.listingbar > div > div.above_fold_listing_details > div > div:nth-child(5) > div > div:nth-child(1)').text(),
    36             'instruction':response.doc('#MAP_AND_LISTING > div.main_section.listingbar > div > div.above_fold_listing_details > div > div:nth-child(6) > div > b').text()
    37         }
    38     def on_result(self,result):
    39         if result:
    40             self.save_to_mongo(result)
    41             
    42     def save_to_mongo(self,result):
    43         if self.db['london'].insert(result):
    44             print('saved to mongo',result)
    45     
  • 相关阅读:
    AlexNet模型
    AlexNet详细解读
    Network in Network学习笔记
    在AlexNet中LRN 局部响应归一化的理
    深度学习相关转载收集
    网络结构解读之inception系列五:Inception V4
    网络结构解读之inception系列四:Inception V3
    网络结构解读之inception系列三:BN-Inception(Inception V2)
    网络结构解读之inception系列二:GoogLeNet(Inception V1)
    mac下安装启动Mongodb
  • 原文地址:https://www.cnblogs.com/themost/p/6985282.html
Copyright © 2020-2023  润新知