• scrapy 命令行传参 以及发送post请求payload参数



    class
    SciencedirectspiderSpider(scrapy.Spider): name = 'sciencedirectspider' allowed_domains = ['sciencedirect.com'] start_urls = ['https://www.sciencedirect.com/search?qs=kidney%20stone']    # 在初始化这里进行 def __init__(self, year='', search='', **kwargs): self.year = year self.search = search self.urls = 'https://www.sciencedirect.com/search?qs=' + search + '&years=' + year + '&sortBy=date' self.browser = webdriver.Chrome(chrome_options=chorme_options) super().__init__() def start_requests(self): # //*[@id="srp-pagination"]/li[1]/text()[4] url = "https://www.sciencedirect.com/search?qs=kidney%20stone" response = scrapy.Request(self.urls, callback=self.page, meta={'url': self.urls}) yield response

    执行命令:scrapy crawl sciencedirectspider --nolog -a "search=kidney stone" -a "year=2019"

    ** 注意一个-a一个参数

    main执行语句:

    from scrapy.cmdline import execute
    # execute(['scrapy', 'crawl', 'sciencedirectspider','--nolog'])  # 不打印日志
    execute(['scrapy', 'crawl', 'sciencedirectspider','--nolog','-a','search=kidney stone','-a','year=2019'])  # 不打印日志

     post请求payload参数

    class IeeexplorespiderSpider(scrapy.Spider):
        name = 'ieeexplorespider'
        allowed_domains = ['ieeexplore.ieee.org']
        start_urls = ['http://ieeexplore.ieee.org/']
        headers = {
            "Content-Type": "application/json",
            "Host": "ieeexplore.ieee.org",
            "Origin": "https://ieeexplore.ieee.org",
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36"
        }
    
        def start_requests(self):
            # url = "https://ieeexplore.ieee.org/search/searchresult.jsp?newsearch=true&queryText=Security%20Analytics"
            url = "https://ieeexplore.ieee.org/rest/search"
            data = {
                "highlight": True,
                "matchPubs": True,
                "newsearch": True,
                "pageNumber": "1",
                "queryText": "Security Analytics",
                "returnFacets": ["ALL"],
                "returnType": "SEARCH"
            }
    
            response = scrapy.Request(url=url, body=json.dumps(data), method='POST', callback=self.parse,
                                      headers=self.headers)
    
            yield response
    
    
        def parse(self, response):
            print(123)
            print(response.text)
  • 相关阅读:
    mysql死锁问题分析
    你应该知道的RPC原理
    如何健壮你的后端服务?
    如何用消息系统避免分布式事务?
    一个故事讲清楚NIO
    地图匹配实践
    利用模拟退火提高Kmeans的聚类精度
    大数据并行计算利器之MPI/OpenMP
    GPU---并行计算利器
    如何设计实现一个地址反解析服务?
  • 原文地址:https://www.cnblogs.com/wukai66/p/13306954.html
Copyright © 2020-2023  润新知