1 2 import requests 3 from bs4 import BeautifulSoup 4 import traceback 5 import re 6 7 def getHTMLText(url, code="utf-8"): #获取股票页面信息 8 try: 9 r = requests.get(url) 10 r.raise_for_status() 11 r.encoding = code 12 return r.text 13 except: 14 return "" 15 16 def getStockList(lst, stockURL): #获取股票列表 17 html = getHTMLText(stockURL, "GB2312") 18 soup = BeautifulSoup(html, 'html.parser') 19 a = soup.find_all('a') 20 for i in a: 21 try: 22 href = i.attrs['href'] 23 lst.append(re.findall(r"[s][hz]d{6}", href)[0]) 24 except: 25 continue 26 27 def getStockInfo(lst, stockURL, fpath): #获取单支股票信息 28 count = 0 29 for stock in lst: 30 url = stockURL + stock + ".html" 31 html = getHTMLText(url) 32 try: 33 if html=="": 34 continue 35 infoDict = {} 36 soup = BeautifulSoup(html, 'html.parser') 37 stockInfo = soup.find('div',attrs={'class':'stock-bets'}) 38 39 name = stockInfo.find_all(attrs={'class':'bets-name'})[0] 40 infoDict.update({'股票名称': name.text.split()[0]}) 41 42 keyList = stockInfo.find_all('dt') 43 valueList = stockInfo.find_all('dd') 44 for i in range(len(keyList)): 45 key = keyList[i].text 46 val = valueList[i].text 47 infoDict[key] = val 48 49 with open(fpath, 'a', encoding='utf-8') as f: #写入文件 50 f.write( str(infoDict) + ' ' ) 51 count = count + 1 52 print(" 当前进度: {:.2f}%".format(count*100/len(lst)),end="") 53 except: 54 count = count + 1 55 print(" 当前进度: {:.2f}%".format(count*100/len(lst)),end="") 56 continue 57 58 def main(): 59 stock_list_url = 'http://quote.eastmoney.com/stocklist.html' 60 stock_info_url = 'https://gupiao.baidu.com/stock/' 61 output_file = 'C:/BaiduStockInfo.txt' 62 slist=[] 63 getStockList(slist, stock_list_url) 64 getStockInfo(slist, stock_info_url, output_file)