数据来源:问财
请求问句:所属同花顺行业,所属概念
下载文件,利用Pandas读取,解析,存储
# -*- coding: utf-8 -*- import pandas as pd import time from sqlalchemy import create_engine #====================股票所属概念、板块数据解析存储============================================================================================================ def Belongto(engine, current): print("-------------------------------------") print("开始从csv文件中解析股票板块及概念信息") # 创建空DataFrame,分别用于存储:i问财板块、概念数据 stock_bk = pd.DataFrame(); stock_concept = pd.DataFrame() # 读取下载转存的csv文件(原始下载文件为xml格式) stk_bel = pd.read_csv('C:\Users\Red-tea-1919\Desktop\2020-01-09.csv', encoding='gbk')[['股票代码', '所属同花顺行业', '所属概念']] print("-------------------------------------") print("开始从csv文件中解析所属板块数据") for index, row in stk_bel.iterrows(): code = row['股票代码'] # 股票代码,无后缀 cp_code = row['股票代码'][:6] yj_bk = (row['所属同花顺行业'].split('-'))[0] # 同花顺一级板块 ej_bk = (row['所属同花顺行业'].split('-'))[1] # 同花顺二级板块 sj_bk = (row['所属同花顺行业'].split('-'))[2] # 同花顺三级板块 stock_bk = stock_bk.append({'stk_code':code,'yj_bk':yj_bk,'ej_bk':ej_bk,'sj_bk':sj_bk,'in_date':current},ignore_index=True) # ----------------------------------------------------------------------------------------------------------------------------------------------------- concept = row['所属概念'].split(';') # 所属同花顺概念 for cp in concept: stock_concept = stock_concept.append({'symbol': cp_code, 'concept': cp, 'in_date': current},ignore_index=True) print("----------------------------------------------") print("开始存储股票所属概念数据") old_concept = pd.read_sql('select * from belong_concept',engine) stock_concept = stock_concept[['symbol', 'concept', 'in_date']] stock_concept = stock_concept.append(old_concept,ignore_index=False,sort=False) stock_concept = stock_concept.drop_duplicates(subset=['symbol', 'concept'],keep=False) stock_concept.to_sql('belong_concept', engine, if_exists='append', index=False) print(stock_concept) print("本次存储股票所属概念数据%s条" % stock_concept.shape[0]) print("----------------------------------------------") print("开始存储股票所属板块数据") old_bk = pd.read_sql('select * from belong_bk', engine) stock_bk = stock_bk[['stk_code','yj_bk','ej_bk','sj_bk','in_date']] stock_bk = stock_bk.append(old_bk,ignore_index=False,sort=False) stock_bk = stock_bk.drop_duplicates(subset=['stk_code','yj_bk','ej_bk','sj_bk'],keep=False) stock_bk.to_sql('belong_bk', engine, if_exists='append', index=False) print(stock_bk) print("本次存储股票所属板块数据%s条" % stock_bk.shape[0]) #====================股票所属概念、板块数据解析存储============================================================================================================ def topHolderdetail(): print("-------------------------------------") print("开始从csv文件中解析前十大流通股东明细数据") # 待研究 pass # ====================主函数==================================================================================================================================== if __name__ == '__main__': print("----------------------------------------------") print("问财数据解析程序开始执行") start = time.time() # 创建Pandas读写数据库引擎 engine = create_engine('mysql://root:123456@127.0.0.1/quant?charset=utf8') # 获取当前日期 current = time.strftime("%Y%m%d", time.localtime()) Belongto(engine, current) # 问财所属概念、板块(问句:所属同花顺行业、所属概念) #topHolderdetail() # 问财流通股东(问句:十大流通股东明细) end = time.time() print('问财数据解析共执行%0.2f秒.' % ((end - start))) print("问财数据解析程序执行完成")
参考文献:https://blog.csdn.net/Norsaa/article/details/77692944