• python伪代码之爬取完美志愿全国历年文理分数线运行代码持续更新


    最近好多小伙伴说想搞个项目实战类的,我就花了一点时间做了一个爬虫项目(在代码复制的时候可能会有点问题,缩格一下就没有问题了)
    想要获取更多源码或者答疑或者或者交流学习可以加群:725479218

    
    # -*- coding:utf-8 -*- from function.data_tool import clean_data
    
    import hashlib
    
    import furl.furl
    
    from crawlers.downloader import Downloaderfrom
    
    function.parse_tool import xpath_parsefrom
    
    function.database_tool import auto_sqlseve
    
    down=Downloader(proxy='http://104.224.138.224:8888/proxy')
    
    a = {'吉林': '22', '河北': '13', '陕西': '61', '山西': '14', '青海': '63', '湖南': '43', '广东': '44', '安徽': '34', '四川': '51',
    
         '江西': '36', '浙江': '33', '贵州': '52', '新疆': '65', '内蒙古': '15', '西藏': '54', '江苏': '32', '广西': '45', '湖北': '42',
    
         '海南': '46', '河南': '41', '山东': '37', '福建': '35', '云南': '53', '上海': '31', '北京': '11', '天津': '12', '甘肃': '62',
    
         '宁夏': '64', '黑龙江': '23', '重庆': '50', '辽宁': '21'}
    
    for province in b:
    
         for subject in c:
    
              field_info=[]
    
              key_word=a[province]
    
              reform_url.args['type']=subject
    
              reform_url.args['province']=key_word
    
              response=down.get(url=reform_url,typ='text',encoding='utf-8')
    
              htmlcode = eval(clean_data.clean_space(response))['htmlStr']
    
              xpath_html = xpath_parse.text_tolxml(htmlcode)
    
     year = xpath_html.xpath('string(//th[normalize-space(text())="录取批次"]/..)').replace('
    ', '').replace('	','').replace(
    
                   '录取批次', '').replace(' ', '')
    
              year_split = year.split()
    
              ben_yi = xpath_html.xpath('string(//td[normalize-space(text())="本科第一批"]/..)').replace('
    ', '').replace('	',
    
                                                                                                               '').replace(
    
                   '本科第一批', '').replace(' ', '')
    
    ben_yi_split = ben_yi.split()
    
              ben_er = xpath_html.xpath('string(//td[normalize-space(text())="本科第二批"]/..)').replace('
    ', '').replace('	',
    
                                                                                                               '').replace(
    
                   '本科第二批', '').replace(' ', '')
    
              ben_er_split = ben_er.split()
    
              ben_san = xpath_html.xpath('string(//td[normalize-space(text())="本科第三批"]/..)').replace('
    ', '').replace('	',
    
                                                                                                                '').replace(
    
                   '本科第三批', '').replace(' ', '')
    
              ben_san_split = ben_san.split()
    
              zhuan_yi = xpath_html.xpath('string(//td[normalize-space(text())="专科第一批"]/..)').replace('
    ', '').replace('	',
    
    b = ['安徽', '北京', '重庆', '福建', '甘肃', '贵州', '广东', '广西', '湖北', '海南', '黑龙江', '湖南', '河南', '河北', '吉林', '江西', '江苏', '辽宁', '宁夏',
    
         '内蒙古', '青海', '山西', '山东', '陕西', '四川', '上海', '天津', '西藏', '新疆', '云南', '浙江']
    
    c=['wen','li']
    
    url='https://www.wmzy.com/api/score/getScoreList?type=wen&province=33' reform_url=furl.furl(url)
    
    W=auto_sqlsever.Mssql(database='provincescore',datatable=['ScoreProvince'])
    
                                                                                                                 '').replace(
    
                   '专科第一批', '').replace(' ', '')
    
              zhuan_yi_split = zhuan_yi.split()
    
              zhuan_er = xpath_html.xpath('string(//td[normalize-space(text())="专科第二批"]/..)').replace('
    ', '').replace('	',
    
                                                                                                                 '').replace(
    
                   '专科第二批', '').replace(' ', '')
    
              zhuan_er_split = zhuan_er.split()
    
              if 'wen' in subject:
    
                   subject='文科'  else:
    
                   subject='理科'  print(zhuan_yi_split,zhuan_er_split,ben_san_split,ben_er_split,ben_yi_split)
    
              provincemd5=[hashlib.md5(province.encode()).hexdigest()]*8          tiqian=[0]*8          field_info.extend([[province]*8,provincemd5,year_split,[subject]*8,tiqian,ben_yi_split,ben_er_split,ben_san_split,zhuan_yi_split,zhuan_er_split])
    
              W.insert_data(field_info)
    
    
  • 相关阅读:
    BottomNavigationBarItem fixed
    Flutter进阶—点击、拖动和其他手势
    semaphore demo !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!1
    微信小程序
    Socket套接字 =======================
    socket
    flutter packages.
    安卓抓包https
    Flutter 输入控件TextField设置内容并保持光标(cursor)在末尾
    textfield reload issue and other things reload problem.===================================
  • 原文地址:https://www.cnblogs.com/CoXieLearnPython/p/9177223.html
Copyright © 2020-2023  润新知