• 爬取股票信息


    #目标:获取上交所和深交所所有股票的名称和交易信息
    #输出:保存到文件中
    #技术路线:requests-bs4-re
    
    #候选网站选取原则:
    #                    股票信息静态存在与html界面中,非js代码生成,没有robots协议限制
    #选取心态:
    #        不要纠结于某个网站,多找信息源进行尝试
    
    #程序结构程序设计
    #步骤1:从东方财富网获取股票列表
    #步骤2:根据股票列表逐个到百度股票获取个股信息
    #步骤3:将结果存储到文件中
    
    
    import requests
    from bs4 import BeautifulSoup
    import traceback
    import re
    
    def getHtTMLText(url):
        try:
            r = requests.get(url)
            r.raise_for_status()
            r.encoding = r.apparent_encoding
            return r.text 
        except:
            return ''
    
    def getStockList(lst,stockURL):
        html = getHtTMLText(stockURL)
        soup = BeautifulSoup(html,'html.parser')
        a = soup.find_all('a')
        for i in a:
            try:
                href = i.attrs['href']
                number = re.findall(r'[s][hz]d{6}',href)[0]
                if number is not '':
                    lst.append(number)
            except:
                continue
    
    
    def getStockInfo(lst,stockURL,fpath):
          count = 0
          for stock in lst:
              url = stockURL + stock +  '.html'
              html = getHtTMLText(url)
              try:
                  if html == '':
                      continue              
                  infoDict = {}
                  soup = BeautifulSoup(html,'html.parser')
                  stockInfo = soup.find('div',attrs = {'class':'stock-bets'})
                  name = stockInfo.find_all(attrs = {'class':'bets-name'})[0]
                  infoDict.update({'股票名称':name.text.split()[0]})
                  print('
    '+url)
                  print({'股票名称':name.text.split()[0]})              
                  keyList = stockInfo.find_all('dt')
                  valueList = stockInfo.find_all('dd')
                  for i in range(len(keyList)):
                      key = keyList[i].string.strip()
                      val = valueList[i].string.strip()
                      infoDict[key] = val
                      print('	' + key + ':' + val)
                  with open(fpath,'a',encoding = 'utf-8') as f:
                      f.write(str(infoDict)+'
    ')
                      count = count + 1
                      print('
    当前进度:{:.2f}%'.format(count*100/len(lst)),end = '')
              except:
                  count = count + 1
                  print('
    当前进度:{:.2f}%'.format(count*100/len(lst)),end = '')
                  #traceback.print_exc()
                  continue
        
    def main():
        stock_list_url = 'http://quote.eastmoney.com/stocklist.html'
        stock_info_url = 'http://gupiao.baidu.com/stock/'
        output_file = 'D://BaiduStockInfo.txt'
        slist = []
        getStockList(slist,stock_list_url)
        getStockInfo(slist,stock_info_url,output_file)
    
    main()
  • 相关阅读:
    Numpy 里线性代数函数
    lateral view 使用方法
    Numpy 基础函数
    Numpy 基础操作
    pandas 基础操作记录学习
    pandas向左移动非空单元格
    供应商自动记账
    SAP Smartforms 参数配置
    SAP FPM 相关包 APB_FPM_CORE
    SAP BPC 清除CUBE 中的数据
  • 原文地址:https://www.cnblogs.com/zhanghaijie/p/8418264.html
Copyright © 2020-2023  润新知