环境: python scrapy
乐8账号充值
-
难题1:验证码
-
保存验证码图片:
return [Request("http://gh.le890.com/checkcode.php",
meta = {'cookiejar' : response.meta['cookiejar']},
callback = self.save_ver_code)
]
···
def save_ver_code(self, response):
ver_code = 'code' + str(self._id)
path = '/www/gh/crawler/ver_code/' + ver_code
with open(path, 'wb') as f:
f.write(response.body)
···
print u'输入验证码:'
code = raw_input()
···
- 难题2:cookies
- 一般情况:(如TT充值平台http://tcoin.52tt.com/tcoin/login.shtml)
return [Request("http://tcoin.52tt.com/tcoin/login.shtml",
meta = {'cookiejar' : 1},
callback = self.post_login)
]
···
def post_login(self, response):
# asprint 'Preparing login'
self._csrf = response.xpath('//input[@name="_csrf"]/@value').extract()[0]
# 提交表单
return [FormRequest("http://tcoin.52tt.com/tcoin/login",
meta = {'cookiejar' : response.meta['cookiejar']},
headers = self.headers,
formdata = {
'username': 'username',
'password': 'password',
'submit': '',
'_csrf': self._csrf
},
callback = self.read_account_message,
dont_filter = True
)]
- 保存session:(乐8平台需要将session保存到本地文件)
# 将cookie写入文件,并保存到self.co变量
# 在登陆完成之后做此操作
self.cookie_jar = response.meta['cookiejar']
self.cookie_jar.extract_cookies(response, response.request)
try:
with open("cookie.txt", 'wb+') as f:
for cookie in self.cookie_jar:
f.write(str(cookie) + '
')
self.co = str(cookie).split(' ')[1]
self.co = {self.co.split('=')[0]: self.co.split('=')[1]}
except Exception, e:
print e
···
# 如果写入过cookie,则读取出来
if os.path.isfile('cookie.txt'):
with open("cookie.txt") as f:
cookiejar = f.read()
self.co = cookiejar.split(' ')[1]
self.co = {self.co.split('=')[0]: self.co.split('=')[1]}
- 另一种表单提交方案:
return scrapy.FormRequest.from_response(
response,
meta = {'cookiejar' : response.meta['cookiejar']},
formnumber = 1,
formdata = self.post_data,
dont_click = True,
callback = self.parse_page,
)