• 存档


    # -*- coding: utf-8 -*-
    import urllib2,cookielib
    import urllib
    import cStringIO
    import datetime
    from PIL import Image
    from lxml import etree
    import sys
    reload(sys)
    sys.setdefaultencoding('utf8')
    
    def setOpener():
        cookie = cookielib.CookieJar()
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
        opener.addheaders.append(('User-Agent','Mozilla/5.0 (Windows NT 5.1; rv:25.0) Gecko/20100101 Firefox/25.0'))
        return opener
    
    def md5(str):
        import hashlib
        import types
        if type(str) is types.StringType:
            m = hashlib.md5()
            m.update(str)
            return m.hexdigest()
        else:
            return ''
    
    class spider:
        def __init__(self):
            self.opener=setOpener()#保存cookie信息
            self.imgUrl='http://210.42.121.241/servlet/GenImg'
            self.loginUrl='http://210.42.121.241/servlet/Login'
            self.queryScoreUrl='http://210.42.121.241/servlet/Svlt_QueryStuScore'
            self.studentID=''
            self.password=''
            self.captcha=''#验证码
            self.mainPageContent=''
    
    
        def getCaptcha(self):
            res =self.opener.open(urllib2.Request(self.imgUrl))
            tempIm = cStringIO.StringIO(res.read())
            im = Image.open(tempIm)
            return im
            #im.save('test.jpg')
            #im.show()
            #self.captcha = raw_input("验证码:")
    
        def loginMainPage(self):
            #需要post的数据
            pwdMD5=md5(self.password)
            postdata = urllib.urlencode({
                'id':self.studentID,
                'pwd':pwdMD5,
                'xdvfb':self.captcha
                })
            req = urllib2.Request(
                url = self.loginUrl,
                data = postdata
                )
            response = self.opener.open(req)
            self.mainPageContent = response.read().decode('gb2312')
    
    
        def getAndSaveScore(self):
             page=etree.HTML(self.mainPageContent)
             text=page.xpath('//div[@id="school"]/@onclick')
             try:
                token=text[0][65:101]
             except IndexError:
                print "Error:未能正确打开主页面"
                return 0
             else:
              GMT_FORMAT = '%a, %d %b %Y %H:%M:%S GMT'
              GMT_time=datetime.datetime.utcnow().strftime(GMT_FORMAT)
              getParams=urllib.urlencode({
                  'csrftoken':token,
                  'learnType':'',
                  'scoreFlag':'0',
                  't':GMT_time,
                  'term':'',
                  'year':'0'
              })
    
              url = self.queryScoreUrl
              fullUrl=url+'?'+getParams
              #print fullUrl
              req = urllib2.Request(fullUrl)
              response = self.opener.open(req)
              result = response.read().decode('gb2312')
              # 由于该网页是gb2312的编码,所以需要解码
              #print result
              out=open('inputScore.html','wb')
              out.write(result)
              out.close()
              return 1
    
    
    #mySpider=spider()
    #mySpider.getCaptcha()
    #mySpider.loginMainPage()
    #mySpider.getAndSaveScore()
    
    
    
    
    
  • 相关阅读:
    华为oj之字符串分割
    华为oj之字符个数统计
    华为oj之等差数列前n项和
    华为oj之质数因子
    华为oj之求int型正整数在内存中存储时1的个数
    华为oj之字符串反转
    SpringBoot--表单验证
    SpringBoot--异常统一处理
    SpringBoot--文件上传
    SpringBoot--thymeleaf
  • 原文地址:https://www.cnblogs.com/muyangshaonian/p/9650509.html
Copyright © 2020-2023  润新知