• PYTHON流向下载


      1 #-*- coding:utf-8 -*-
      2 import gzip
      3 import re
      4 import http.cookiejar
      5 import urllib.request
      6 import urllib.parse
      7 import xlwt
      8 import time,os
      9  
     10 
     11 
     12 def saveexcel(flow,filename,coding='gbk'):
     13     #flow 需要转换为excel的里面,格式为双层列表
     14     #coding excel页面编码
     15     try:
     16         workbook = xlwt.Workbook(encoding=coding)
     17         sheet = workbook.add_sheet('Sheet1')
     18         for row,rowdata in enumerate(flow):
     19             for col,val in enumerate(rowdata):
     20                 sheet.write(row,col,val.strip(),style = xlwt.Style.default_style)
     21         excelname = '\%s.xls'%filename
     22         workbook.save(excelname)
     23         return excelname
     24 
     25     except Exception as e:
     26         if hasattr(e,"code"):
     27             print ('excel写入失败,错误原因' +str(e.code))
     28         if hasattr(e,"reason"):
     29             print ('excel写入失败,错误原因' +str(e.reason))
     30         return None
     31 
     32  #从指定页面中取表单参数
     33 def getParm(data,parm):
     34     cer = re.compile('name="'+parm+'".* value="(.*?)"', flags = 0)
     35     strlist = cer.findall(data)
     36     
     37     if strlist:
     38         return strlist[0]
     39     else:
     40         return None
     41 
     42 def getOpener():
     43     #自动设置COOKIER
     44     # deal with the Cookies
     45     print( '正在设置cookie')    
     46     cj = http.cookiejar.CookieJar()
     47     pro = urllib.request.HTTPCookieProcessor(cj)
     48     opener = urllib.request.build_opener(pro, urllib.request.HTTPHandler)
     49     urllib.request.install_opener(opener)    
     50     print( '设置cookie成功')        
     51     return opener
     52  
     53 
     54 header = {
     55     'Connection': 'Keep-Alive',
     56     'Accept': 'text/html, application/xhtml+xml, */*',
     57     'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko'
     58 }
     59  
     60 url = 'http://115.231.58.130:8021/Default.aspx'
     61 header['Referer']='http://115.231.58.130:8021/'
     62 #1、设置Cookie
     63 opener = getOpener()
     64 
     65 
     66 ##2、初始化数据开始
     67 request = urllib.request.Request(url)
     68 try:
     69     html = urllib.request.urlopen(request).read()
     70     #取表单参数
     71     EVENTVALIDATION = getParm(html.decode('gbk'),'__EVENTVALIDATION')
     72     VIEWSTATEGENERATOR =  getParm(html.decode('gbk'),'__VIEWSTATEGENERATOR')
     73     VIEWSTATE = getParm(html.decode('gbk'),'__VIEWSTATE')
     74     btnsubmit = getParm(html.decode('gbk'),'sbtnSubmit')   
     75 
     76 except urllib.request.URLError as e:
     77     if hasattr(e,"code"):
     78         print ('请求页面失败,请检查网络设置,错误原因' +str(e.code))
     79     if hasattr(e,"reason"):
     80         print ('请求页面失败,请检查网络设置,错误原因' +str(e.reason))
     81 #取表单参数结束 
     82 
     83 
     84 id = '***'
     85 password = '***'
     86 postDict = {
     87          'LoginID':id,
     88         'Pwd':password,
     89         '__EVENTVALIDATION':EVENTVALIDATION,
     90         '__VIEWSTATEGENERATOR':VIEWSTATEGENERATOR,
     91         '__VIEWSTATE':VIEWSTATE,
     92         'btnSubmit':btnsubmit
     93 }
     94 postData = urllib.parse.urlencode(postDict).encode(encoding='UTF8')
     95 
     96 ##3、正式登录
     97 request = urllib.request.Request(url, postData,headers=header)
     98 try:
     99     response = urllib.request.urlopen(request)
    100     data = response.read()
    101 except urllib.request.URLError as e:
    102     if hasattr(e,"code"):
    103         print ('页面加载失败,请检查网络及账号设置,错误原因' +str(e.code))
    104     if hasattr(e,"reason"):
    105         print ('页面加载失败,请检查网络及账号设置,错误原因' +str(e.reason))
    106 
    107 #登录结束
    108 print('login:',data.decode('gbk'))
    109 
    110 ##4 进入产品搜索界面进行数据提取
    111 
    112 posturl= 'http://115.231.58.130:8021/Search/ProductFlow.aspx'
    113 request = urllib.request.Request(posturl)
    114 try:
    115     html = urllib.request.urlopen(request).read()
    116     #取表单参数
    117     EVENTVALIDATION = getParm(html.decode('gbk'),'__EVENTVALIDATION')
    118     VIEWSTATEGENERATOR =  getParm(html.decode('gbk'),'__VIEWSTATEGENERATOR')
    119     VIEWSTATE = getParm(html.decode('gbk'),'__VIEWSTATE')
    120     #btnsearch = getParm(html.decode('gbk'),'btnSearcht')
    121 except urllib.request.URLError as e:
    122     if hasattr(e,"code"):
    123         print ('请求页面失败,请检查网络设置,错误原因' +str(e.code))
    124     if hasattr(e,"reason"):
    125         print ('请求页面失败,请检查网络设置,错误原因' +str(e.reason))
    126 
    127 
    128 postDict['__EVENTTARGET']=''
    129 postDict['__EVENTARGUMENT']=''  
    130 postDict['__EVENTVALIDATION']=EVENTVALIDATION
    131 postDict['__VIEWSTATEGENERATOR']=VIEWSTATEGENERATOR
    132 postDict['__VIEWSTATE']=VIEWSTATE
    133 postDict['PName']=''
    134 postDict['PID']=''
    135 postDict['txtStartDate']='2016-01-01'
    136 postDict['txtEndDate']='2016-01-31'
    137 postDict['ConvertToExcel.x']='6'
    138 postDict['ConvertToExcel.y']='9'
    139 postDict['btnSearch']='' 
    140 postData = urllib.parse.urlencode(postDict).encode(encoding='UTF8')
    141 
    142 print( '搜索页面数据获取成功,正在抓取流向数据...')    
    143 
    144 
    145 ###登录搜索页面
    146 request = urllib.request.Request(posturl, postData,headers=header)
    147 try:
    148     response = urllib.request.urlopen(request)
    149     data = response.readlines()
    150 except urllib.request.URLError as e:
    151     if hasattr(e,"code"):
    152         print ('页面加载失败,请检查网络及账号设置,错误原因' +str(e.code))
    153     if hasattr(e,"reason"):
    154         print ('页面加载失败,请检查网络及账号设置,错误原因' +str(e.reason))
    155 print( '流向抓取成功,正在保存为excel...')
    156 print('search:',data)
    157 ##5 保存为excel
    158 
    159 workbook = xlwt.Workbook(encoding='gbk')
    160 sheet = workbook.add_sheet('Sheet1')
    161 for row,rowdata in enumerate(data):
    162     rowdata_list = rowdata.decode('gbk').split('	')
    163     for col,val in enumerate(rowdata_list):
    164             sheet.write(row,col,val,style = xlwt.Style.default_style)
    165 
    166 ntime = time.strftime('%Y%m%d%H%M%S')
    167 excelname = ntime+'%s.xls'%'宁波宝瑞达'
    168 workbook.save(excelname)
    169 print( 'excel导出成功,请查看程序目录下%s文件。'%excelname)
  • 相关阅读:
    apiAutoTest:基于mitmproxy实现接口录制
    FastAPI + Vue 前后端分离 接口自动化测试工具 apiAutoTestWeb
    FastAPI项目实战:"异步"接口测试"平台"
    apiAutoTest:自动化测试用例中调用自定义函数的实现
    测试笔记01-Git
    C++:常量
    C++: 变量类型
    C++:数据类型
    C++:第一个c++程序
    mitrproxy抓包微信小程序
  • 原文地址:https://www.cnblogs.com/lrzy/p/5555077.html
Copyright © 2020-2023  润新知