• 电子科技大学 易查分网站 爬虫 批量爬取成绩


    暑假一个人在寝室,闲来无事。

    某天,辅导员恰好发了学年查分的链接,一看,发现是易查分平台,再加上手头的数据,有搞头啊,遂开始设计爬虫。
    易查分这网站很怪,PC版需要输入验证码,手机版就不需要了。为了方便爬取,果断选择手机版。(本来还想训练个自动填充验证码的神经网络的,可难度有些大,有空了以后补上吧)

    该爬虫使用selenium的webdriver技术实现。速度……只能说可以接受吧

    数据准备:需查同学的姓名、学号和身份证号。 (具体获取方式……自行解决)

    这段代码还融合了Excel的读写。

    爬虫有风险,请遵守法律法规哦!

    import xlrd
    import xlwt
    from selenium import webdriver
    import time
    
    allstu = []
    
    
    class stu():
        def __init__(self, name, sex, number, psw):
            self.name = name
            self.sex = sex
            self.number = number
            self.psw = psw[-7:-1]
            self.dic = {}
            self.classify = ''
    
    
    def readData():
        global allstu
        workbook = xlrd.open_workbook('data.xlsx')
        booksheet = workbook.sheet_by_index(0)
        col = booksheet.ncols
        row = booksheet.nrows
        print(row, col)
        for i in range(row):
            allstu.append(stu(booksheet.cell_value(i, 0), booksheet.cell_value(i, 1),
                              booksheet.cell_value(i, 3), booksheet.cell_value(i, 2)))
    
    
    def writeData():
        book = xlwt.Workbook(encoding='utf-8', style_compression=0)
    
        sheet = book.add_sheet('Out', cell_overwrite_ok=True)
        for j in range(len(allsubjects)):
            sheet.write(0, 4 + j, allsubjects[j])
    
        for i in range(len(allstu)):
            sheet.write(i + 1, 0, allstu[i].name)
            sheet.write(i + 1, 1, allstu[i].sex)
            sheet.write(i + 1, 2, allstu[i].number)
            sheet.write(i + 1, 3, allstu[i].classify)
            for j in range(len(allsubjects)):
                sheet.write(i + 1, 4 + j, allstu[i].dic.get(allsubjects[j], ''))
    
        book.save(r'out.xls')
    
    
    allsubjects = []
    readData()
    urls = ['http://241374.yichafen.com/mobile/queryscore/sqcode/MsTcInwmMjkwfDViN2EzZDI0NTllYzAO0O0O.html',
            'http://241374.yichafen.com/mobile/queryscore/sqcode/MsTcInwmMzAxfDViN2E2MGQwNTVkM2UO0O0O.html',
            'http://241374.yichafen.com/mobile/queryscore/sqcode/MsTcInwmMzAyfDViN2E2MTVhY2E2MDQO0O0O.html']
    classes = ['通信工程', '网络工程', '物联网工程']
    driver = webdriver.Chrome()
    
    # i = 0
    i = 15
    while(i < len(allstu)):
    # while(i < 20):
        # time.sleep(0.5)
        found = False
        for k in range(3):
            url = urls[k]
            driver.implicitly_wait(1)
            driver.get(url)
            driver.refresh()
            number = driver.find_element_by_xpath("//input[@name='s_xuehao']")
            number.clear()
            number.send_keys(allstu[i].number)
            name = driver.find_element_by_xpath("//input[@name='s_xingming']")
            name.clear()
            name.send_keys(allstu[i].name)
            psw = driver.find_element_by_xpath("//input[@name='s_2c54d23b18177aabe8759f1f551451f3']")
            psw.clear()
            psw.send_keys(allstu[i].psw)
            button = driver.find_element_by_xpath("//a[@id='submitBtn']")
            button.click()
            flag = False
            try:
                driver.implicitly_wait(0.5)
                errormsg = driver.find_element_by_xpath("//div[@class='weui-dialog__bd']")
            # print(errormsg.text)
            except:
                flag = True
    
            if flag:
                allstu[i].classify = classes[k]
                found = True
                subnames = driver.find_elements_by_class_name('left_cell')
                grades = driver.find_elements_by_class_name('right_cell')
                for j in range(3, len(subnames)):
                    if not subnames[j].text in allsubjects:
                        allsubjects.append(subnames[j].text)
                    allstu[i].dic[subnames[j].text] = grades[j].text
                break
        print('{} {} : {}, finished'.format(str(i + 1), allstu[i].name, allstu[i].classify))
        i += 1
    writeData()
  • 相关阅读:
    MVC概念性的内容
    类 class
    php获取真实IP地址
    面向对象static静态的属性和方法的调用
    smarty 入门2(个人总结)
    smarty入门
    读取文件内容fopen,fgets,fclose
    mysql常用命令
    mybatis查询的三种方式
    MyBatis 映射文件
  • 原文地址:https://www.cnblogs.com/zinyy/p/9589387.html
Copyright © 2020-2023  润新知