• 财经数据(3)-Pandas加载csv文件


    数据来源:问财

    请求问句:所属同花顺行业,所属概念

    下载文件,利用Pandas读取,解析,存储

    # -*- coding: utf-8 -*-
    import pandas as pd
    import time
    from sqlalchemy import create_engine
    
    #====================股票所属概念、板块数据解析存储============================================================================================================
    def Belongto(engine, current):
        print("-------------------------------------")
        print("开始从csv文件中解析股票板块及概念信息")
    
        # 创建空DataFrame,分别用于存储:i问财板块、概念数据
        stock_bk = pd.DataFrame(); stock_concept = pd.DataFrame()
    
        # 读取下载转存的csv文件(原始下载文件为xml格式)
        stk_bel = pd.read_csv('C:\Users\Red-tea-1919\Desktop\2020-01-09.csv', encoding='gbk')[['股票代码', '所属同花顺行业', '所属概念']]
    
        print("-------------------------------------")
        print("开始从csv文件中解析所属板块数据")
        for index, row in stk_bel.iterrows():
            code = row['股票代码'] # 股票代码,无后缀
            cp_code = row['股票代码'][:6]
            yj_bk = (row['所属同花顺行业'].split('-'))[0]  # 同花顺一级板块
            ej_bk = (row['所属同花顺行业'].split('-'))[1]  # 同花顺二级板块
            sj_bk = (row['所属同花顺行业'].split('-'))[2]  # 同花顺三级板块
    
            stock_bk = stock_bk.append({'stk_code':code,'yj_bk':yj_bk,'ej_bk':ej_bk,'sj_bk':sj_bk,'in_date':current},ignore_index=True)
    
            # -----------------------------------------------------------------------------------------------------------------------------------------------------
            concept = row['所属概念'].split(';')  # 所属同花顺概念
            for cp in concept:
                stock_concept = stock_concept.append({'symbol': cp_code, 'concept': cp, 'in_date': current},ignore_index=True)
    
        print("----------------------------------------------")
        print("开始存储股票所属概念数据")
        old_concept = pd.read_sql('select * from belong_concept',engine)
        stock_concept = stock_concept[['symbol', 'concept', 'in_date']]
        stock_concept = stock_concept.append(old_concept,ignore_index=False,sort=False)
        stock_concept = stock_concept.drop_duplicates(subset=['symbol', 'concept'],keep=False)
    
        stock_concept.to_sql('belong_concept', engine, if_exists='append', index=False)
        print(stock_concept)
        print("本次存储股票所属概念数据%s条" % stock_concept.shape[0])
    
        print("----------------------------------------------")
        print("开始存储股票所属板块数据")
        old_bk = pd.read_sql('select * from belong_bk', engine)
        stock_bk = stock_bk[['stk_code','yj_bk','ej_bk','sj_bk','in_date']]
        stock_bk = stock_bk.append(old_bk,ignore_index=False,sort=False)
        stock_bk = stock_bk.drop_duplicates(subset=['stk_code','yj_bk','ej_bk','sj_bk'],keep=False)
    
        stock_bk.to_sql('belong_bk', engine, if_exists='append', index=False)
        print(stock_bk)
        print("本次存储股票所属板块数据%s条" % stock_bk.shape[0])
    
    #====================股票所属概念、板块数据解析存储============================================================================================================
    def topHolderdetail():
        print("-------------------------------------")
        print("开始从csv文件中解析前十大流通股东明细数据")
    
        # 待研究
        pass
    
    
    # ====================主函数====================================================================================================================================
    if __name__ == '__main__':
        print("----------------------------------------------")
        print("问财数据解析程序开始执行")
        start = time.time()
    
        # 创建Pandas读写数据库引擎
        engine = create_engine('mysql://root:123456@127.0.0.1/quant?charset=utf8')
    
        # 获取当前日期
        current = time.strftime("%Y%m%d", time.localtime())
    
        Belongto(engine, current)   # 问财所属概念、板块(问句:所属同花顺行业、所属概念)
        #topHolderdetail()           # 问财流通股东(问句:十大流通股东明细)
    
        end = time.time()
        print('问财数据解析共执行%0.2f秒.' % ((end - start)))
        print("问财数据解析程序执行完成")

      

      

    参考文献:https://blog.csdn.net/Norsaa/article/details/77692944  

  • 相关阅读:
    【递归】拆分自然数
    HDOJ3068最长回文
    博弈论——尼姆博奕
    vijos P1047最小公倍数
    Eular质数筛法-hiho一下 第九十三周
    hdoj-5652 India and China Origins二分+bfs
    hdoj-1166排兵布阵 简单的树状数组
    hdoj-5641 king's phone
    hdoj-1548简单的bfs题目
    命令中"|"的意义
  • 原文地址:https://www.cnblogs.com/Iceredtea/p/12103664.html
Copyright © 2020-2023  润新知