我的环境: celery 3.1.25 python 3.6.9 window10
celery tasks 代码如下,其中 QuotesSpider 是我的scrapy项目爬虫类名称
from celery_app import app from scrapy.crawler import CrawlerProcess from scrapy.utils.project import get_project_settings from tutorial.spiders.quotes import QuotesSpider def crawl_run(): scope = 'all' process = CrawlerProcess(settings=get_project_settings()) process.crawl(QuotesSpider, scope) process.start() process.join() @app.task(queue='default') def execute_task(): return crawl_run()
后面发现这样写重复做定时任务的时候会报错,报reactor不能重启的问题,改成下面这样就解决了,这个类要放在和项目scrapy.cfg同级目录下
from crawler.tutorial.crawler.tutorial.spiders.quotes import QuotesSpider from scrapy.utils.project import get_project_settings import scrapy.crawler as crawler from crochet import setup setup() import os class Scraper(): def crawl_run(self): spider = QuotesSpider() settings = get_project_settings() runner = crawler.CrawlerRunner(settings) runner.crawl(spider, 'all') runner.join() if __name__ == '__main__': scraper = Scraper() scraper.crawl_run()