• python简单爬虫(股票信息)


     1 
     2 import requests
     3 from bs4 import BeautifulSoup
     4 import traceback
     5 import re
     6 
     7 def getHTMLText(url, code="utf-8"):   #获取股票页面信息
     8     try:
     9         r = requests.get(url)
    10         r.raise_for_status()
    11         r.encoding = code
    12         return r.text
    13     except:
    14         return ""
    15 
    16 def getStockList(lst, stockURL):      #获取股票列表
    17     html = getHTMLText(stockURL, "GB2312")
    18     soup = BeautifulSoup(html, 'html.parser') 
    19     a = soup.find_all('a')
    20     for i in a:
    21         try:
    22             href = i.attrs['href']
    23             lst.append(re.findall(r"[s][hz]d{6}", href)[0])
    24         except:
    25             continue
    26 
    27 def getStockInfo(lst, stockURL, fpath):  #获取单支股票信息
    28     count = 0
    29     for stock in lst:
    30         url = stockURL + stock + ".html"
    31         html = getHTMLText(url)
    32         try:
    33             if html=="":
    34                 continue
    35             infoDict = {}
    36             soup = BeautifulSoup(html, 'html.parser')
    37             stockInfo = soup.find('div',attrs={'class':'stock-bets'})
    38 
    39             name = stockInfo.find_all(attrs={'class':'bets-name'})[0]
    40             infoDict.update({'股票名称': name.text.split()[0]})
    41             
    42             keyList = stockInfo.find_all('dt')
    43             valueList = stockInfo.find_all('dd')
    44             for i in range(len(keyList)):
    45                 key = keyList[i].text
    46                 val = valueList[i].text
    47                 infoDict[key] = val
    48             
    49             with open(fpath, 'a', encoding='utf-8') as f:   #写入文件
    50                 f.write( str(infoDict) + '
    ' )
    51                 count = count + 1
    52                 print("
    当前进度: {:.2f}%".format(count*100/len(lst)),end="")
    53         except:
    54             count = count + 1
    55             print("
    当前进度: {:.2f}%".format(count*100/len(lst)),end="")
    56             continue
    57 
    58 def main():
    59     stock_list_url = 'http://quote.eastmoney.com/stocklist.html'
    60     stock_info_url = 'https://gupiao.baidu.com/stock/'
    61     output_file = 'C:/BaiduStockInfo.txt'
    62     slist=[]
    63     getStockList(slist, stock_list_url)
    64     getStockInfo(slist, stock_info_url, output_file)
  • 相关阅读:
    GO语言系列- 结构体和接口
    GO语言系列- 高级数据类型之数组、切片、map
    tomcat8开启APR模式
    python中的subprocess.Popen()使用详解---以及注意的问题(死锁)
    linux dig 命令使用
    究竟什么时候该使用MQ?
    Python 列表(List) 的三种遍历(序号和值)方法
    python函数里引用全局变量
    python(xlsxwriter模块使用)
    Linux下生成patch和打patch
  • 原文地址:https://www.cnblogs.com/ouzai/p/13048595.html
Copyright © 2020-2023  润新知