def parse(self, response): # pattern1 = re.compile('token=(.*?);') # token = pattern1.findall(response.headers.getlist("set-cookie")[1].decode("utf-8"))[0] pattern2 = re.compile('token=(.*?);') token = pattern2.findall(response.headers.getlist("set-cookie")[1].decode("utf-8"))[0] token_md5= token.encode("utf-8") sign= hashlib.md5(token_md5).hexdigest() cookie = { 'token': token, } url='https://scrapingclub.com/exercise/ajaxdetail_sign/?sign='+sign yield scrapy.Request(url= url,cookies=cookie,headers=header,callback=self.parse_json)
关键在于把token转成MD5
def get_md(url): m = hashlib.md5() m.update(url) return m.hexdigest()
copy from: https://www.jianshu.com/p/271b20f7574f
用到了其中的两句,不知道为什么要update,其中就像原作者说的,不能直接hashlib.md5(str)必须是byte,需要转码一次