• 03爬虫 爬取hfutxc成绩


     1 #-*- coding:utf-8 -*-
     2 # -*- coding: utf-8 -*-
     3 #encoding:utf-8
     4 import urllib
     5 import urllib2
     6 import cookielib
     7 import re
     8 
     9 
    10 class SDU:
    11 
    12     def __init__(self):
    13         self.loginUrl = 'http://222.195.8.201/pass.asp'
    14         self.gradeUrl = 'http://222.195.8.201/student/asp/Select_Success.asp'
    15         self.cookies = cookielib.CookieJar()
    16         self.postdata = urllib.urlencode({
    17             'UserStyle':'student',
    18             'user':'2013217314',
    19             'password':'#######'
    20          })
    21         self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookies))
    22 
    23     def getPage(self):
    24         request  = urllib2.Request(
    25             url = self.loginUrl,
    26             data = self.postdata)
    27         result = self.opener.open(request)
    28         result = self.opener.open(self.gradeUrl)
    29         return result.read().decode('gbk')
    30         #打印登录内容
    31         #print result.read().decode('gbk')
    32 
    33     def getGrades(self):
    34           #获得本学期成绩页面
    35           page = self.getPage()
    36           #正则匹配
    37           myItems = re.findall('<TR bgcolor.*?<TD>.*?</TD>.*?<TD>(.*?)</TD>.*?<TD align="center">.*?</TD>.*?<TD align="center">(.*?)</TD>.*?</TR>',page,re.S)
    38           for item in myItems:
    39               print item[0]+'  '+item[1].strip()+' '
    40               #self.credit.append(item[0].encode('gbk'))
    41               #self.grades.append(item[1].encode('gbk'))
    42           #self.getGrade()
    43 
    44 
    45 sdu = SDU()
    46 sdu.getPage()
    47 sdu.getGrades()
  • 相关阅读:
    Python 必备神器
    python 常用库
    Sublime Text3 配置 Python2 Python3
    Python JSON
    Sublime Text3 3143 注册码
    EFCode First 导航属性
    EF Code First:实体映射,数据迁移,重构(1)
    Entity Framework 复杂类型
    EF 7 Code First
    EF Code First 导航属性 与外键
  • 原文地址:https://www.cnblogs.com/cnblogs321114287/p/6984581.html
Copyright © 2020-2023  润新知