#!/user/bin/env python # -*- conding:utf-8 -*- import re import requests from bs4 import BeautifulSoup import traceback def get_html_text(url): try: r = requests.get(url,timeout = 30) r.raise_for_status() r.encoding = r.apparent_encoding return r.text except: return '' def get_stock_list(lst,stock_url): html = get_html_text(stock_url) soup = BeautifulSoup(html,'html.parser') a = soup.find_all('a') for i in a: try: href = i.attrs['href'] lst.append(re.findall(r'[s][hz]d{6}]',href)[0]) except: continue def get_stock_info(lst,stock_url,fpath): for stock in lst: url = stock_url +stock + '.html' html = get_html_text(url) try: if html =='': continue info_dict = {} soup = BeautifulSoup(html,'html.parser') stock_info = soup.find('div',attrs={'class':'stock-bets'}) name = stock_info.find_all(attrs = {'class':'bets-name'})[0] info_dict.update({'股票名称':name.text.split()[0]}) key_list = stock_info.find_all('dt') value_list = stock_info.find_all('dd') for i in range(len(key_list)): key = key_list[i].text val = value_list[i].text info_dict[key] = val with open(fpath,'a',encoding='utf-8') as f: f.write(str(info_dict) + ' ') except: traceback.print_exc() continue def main(): stock_list_rul = 'http://quote.eastmoney.com/stocklist.html' stock_info_rul = 'https://gupiao.baidu.com/stock/' output_file = 'D://baidu_stock_info.txt' slist = [] get_stock_list(slist,stock_list_rul) get_stock_info(slist,stock_info_rul,output_file) if __name__ == '__main__':