• python将文件夹下的所有csv文件存入mysql和oracle数据库


    #oracle版

    首先新建python脚本(d:/python/orclImport.py)

    import os  #引入os包
    
    if __name__ == '__main__':
        #遍历文件夹下所有的dmp文件,其中filename为所有文件、文件夹的名称。
        #因为我文件夹下确定都是dmp文件,所以无需进行特殊判断
        for filename in os.listdir("D:dmp"):
            # 调用cmd窗口,并执行dmp的导入命令
            # 因为dmp文件数据量太大,我选择一条一条的去执行,就是如此,电脑已经打字都卡了
            os.system("imp orcl/123456@127.0.0.1/zrr file="D:dmp\"+filename+"" full=y ignore=y")
    

    在命令行执行 python d:/python/orclImport.py

    # mysql版

    import os
    import pymysql
    # import datetime, time
    # from config import *
    
    
    database_table_name = "stockdata"
    infile_path = 'E:/学习资料/stock_data'
    outfile_path = 'E:/学习资料/stock_data1'
    
    
    # 删除首行的股票代码和空行
    def delblankline(infile, outfile):
        infp = open(infile, "r")
        outfp = open(outfile, "w")
        lines = infp.readlines()
        for li in lines[1:]:
            if li.split():
                outfp.writelines(li)
        infp.close()
        outfp.close()
    
    def csv2bigcsv(path, outfile):
        """1.删除股票代码+空行  2.合并所有文件数据"""
        # infp = open(infile, "r")
        outfp = open(outfile, "w")
        i = 1
        for filename in os.listdir(path):
            domain = os.path.abspath(path)  # 获取文件夹的路径,也可去掉
            filepath = os.path.join(domain, filename)  # 文件的完整路径
            infp = open(filepath, 'r')  # 读取文件内容
            lines = infp.readlines()
            if i == 1: outfp.writelines(lines[3].split())
            for li in lines[5:]:
                if li.split():
                    outfp.writelines(li)
            infp.close()
            i += 1
        outfp.close()
    
    # 执行sql语句
    def db_excute(sql):
        # local_infile = 1 执行load data infile
        db_info = {
            'host': '127.0.0.1',
            'user': 'root',
            'password': 'root',
            'port': 3306,
            'database': 'test',
            'charset': 'utf8',
            'local_infile': 1
        }
        db = pymysql.connect(**db_info)
        # db.set_charset('utf8')
        cursor = db.cursor()
        try:
            cursor.execute(sql)
            db.commit()
        except pymysql.Error as e:
            print(e)
        finally:
            db.close()
    
    
    # 将单个csv文件插入数据库
    def csv2mysql(file_path, database_table_name):
        ''' LOAD DATA LOCAL INFILE "/root/test/infohash_20180603.txt"
            INTO TABLE xl_log_analysis.infohash_20180603
            FIELDS TERMINATED BY ','
            OPTIONALLY ENCLOSED BY '"' ESCAPED BY '"'
            LINES TERMINATED BY '
    '
            IGNORE 1 LINES ( infohash );'''
        '''LINES TERMINATED BY \r\n WIN 为\r,LINUX 为\n'''
    
        # " LOAD DATA LOCAL INFILE '" + file_path + "'"
        #       + " INTO TABLE " + database_table_name 
        #       + " FIELDS TERMINATED BY ',' " 
        #       + " OPTIONALLY ENCLOSED BY '"' ESCAPED BY '"' " 
        #       + " LINES TERMINATED BY '\r\n' " 
        #       + " IGNORE 5 LINES; "
    
        # 报1148错误,可以本地开启local_infile服务
        # set global local_infile = 'ON';
        sql = '''LOAD DATA LOCAL INFILE '{0}' INTO TABLE {1} 
                CHARACTER SET UTF8 
                FIELDS TERMINATED BY ',' 
                ENCLOSED BY '"' ESCAPED BY '"' 
                LINES TERMINATED BY '\r\n' 
                IGNORE 5 LINES (`symbol`, `date`, `opening_price`, `highest_price` , `lowest_price`, `closing_price`,  
                `change`, `trading_volume`, `turnover`, `circulation_market_value`, `total_value`, 
                `turnover_rate`, `post_resumption_price`, `report_type`, `report_time`, `PE_TTM`, 
                `PS_TTM`, `PCF_TTM`, `PB`, `ex_duplicate_price`, `tradable_share`, `total_stock_issue`, 
                `total_assets`, `current_asset`, `gross_liabilities`, `current_liabilities`, `total_operating_income`, 
                `operating_income`, `total_operating_costs`, `operating_costs`, `operating_profit`, `net_profits`, 
                `selling_expenses`, `administration_expenses`, `financial_expenses`, `net_cash_flow`, 
                `NOCF`, `NICF`, `NFCF`, `ROE_TTM`);'''.format(file_path, database_table_name)
        db_excute(sql)
        # print(sql)
    
    
    # 将文件路径下的所有csv文件插入数据库
    def to_mysql(infile_path, database_table_name):
        for filename in os.listdir(infile_path):
            print('正在上传:', filename)
            # domain = os.path.abspath(infile_path)  # 获取文件夹的路径,也可去掉
            # file_path = os.path.join(domain, filename)  # 文件的完整路径
            file_path = infile_path + '/' + filename
            # print('file_path: ',file_path)
            csv2mysql(file_path, database_table_name)
    
    
    if __name__ == '__main__':
        # delblankline("sh600000.csv", "sh600000_.csv")
        # csv2bigcsv(infile_path, "all-groups.csv")
    
        # 创建新表
        createtable_sql = '''CREATE TABLE IF NOT EXISTS `{0}` (
    	`id` INT NOT NULL AUTO_INCREMENT COMMENT 'ID',
    	`symbol` VARCHAR ( 10 ) DEFAULT NULL COMMENT '股票代码',
    	`date` DATE DEFAULT NULL COMMENT '日期',
    	`opening_price` FLOAT DEFAULT NULL COMMENT '开盘价',
    	`highest_price` FLOAT DEFAULT NULL COMMENT '最高价',
    	`lowest_price` FLOAT DEFAULT NULL COMMENT '最低价',
    	`closing_price` FLOAT DEFAULT NULL COMMENT '收盘价',
    	`change` FLOAT DEFAULT NULL COMMENT '涨跌幅',
    	`trading_volume` BIGINT DEFAULT NULL COMMENT '交易量',
    	`turnover` BIGINT DEFAULT NULL COMMENT '交易额',
    	`circulation_market_value` BIGINT DEFAULT NULL COMMENT '流通市值',
    	`total_value` BIGINT DEFAULT NULL COMMENT '总市值',
    	`turnover_rate` FLOAT DEFAULT NULL COMMENT '换手率',
    	`post_resumption_price` FLOAT DEFAULT NULL COMMENT '后复权价格',
    	`report_type` VARCHAR ( 64 ) DEFAULT NULL COMMENT '报告类型',
    	`report_time` DATE DEFAULT NULL COMMENT '报告时间',
    	# 市盈率PE 市净率PB 市销率PS 市现率PCF
    	`PE_TTM` FLOAT DEFAULT NULL COMMENT '市盈率TTM',
    	`PS_TTM` FLOAT DEFAULT NULL COMMENT '市销率TTM',
    	`PCF_TTM` FLOAT DEFAULT NULL COMMENT '市现率TTM',
    	`PB` FLOAT DEFAULT NULL COMMENT '市净率',
    	`ex_duplicate_price` FLOAT DEFAULT NULL COMMENT '前复权价格',
    	`tradable_share` BIGINT DEFAULT NULL COMMENT '流通股本',
    	`total_stock_issue` BIGINT DEFAULT NULL COMMENT '总股本',
    	`total_assets` BIGINT DEFAULT NULL COMMENT '总资产',
    	`current_asset` BIGINT DEFAULT NULL COMMENT '流动资产',
    	`gross_liabilities` BIGINT DEFAULT NULL COMMENT '总负债',
    	`current_liabilities` BIGINT DEFAULT NULL COMMENT '流动负债',
    	`total_operating_income` BIGINT DEFAULT NULL COMMENT '营业总收入',
    	`operating_income` BIGINT DEFAULT NULL COMMENT '营业收入',
    	`total_operating_costs` BIGINT DEFAULT NULL COMMENT '营业总成本',
    	`operating_costs` BIGINT DEFAULT NULL COMMENT '营业成本',
    	`operating_profit` BIGINT DEFAULT NULL COMMENT '营业利润',
    	`net_profits` BIGINT DEFAULT NULL COMMENT '净利润',
    	`selling_expenses` BIGINT DEFAULT NULL COMMENT '销售费用',
    	`administration_expenses` BIGINT DEFAULT NULL COMMENT '管理费用',
    	`financial_expenses` BIGINT DEFAULT NULL COMMENT '财务费用',
    	`net_cash_flow` BIGINT DEFAULT NULL COMMENT '净现金流',
    	`NOCF` BIGINT DEFAULT NULL COMMENT '经营活动净现金流operating activities',
    	`NICF` BIGINT DEFAULT NULL COMMENT '投资活动净现金流investment activities',
    	`NFCF` BIGINT DEFAULT NULL COMMENT '筹资活动净现金流financing activities',
    	`ROE_TTM` BIGINT DEFAULT NULL COMMENT '净资产收益率TTM Net asset yield',
    	PRIMARY KEY ( `id` )
    	) ENGINE = INNODB DEFAULT CHARSET = UTF8MB4;'''.format(database_table_name)
        db_excute(createtable_sql)  # 创建新表
        print(createtable_sql)
        to_mysql(infile_path, database_table_name)  # 导入infohash数据到mysql xl_log_analysis 表
    	
    

      

  • 相关阅读:
    Android 富文本框实现 RichEditText
    mmap和普通文件读写的区别和比较 & mmap的注意点
    exit和_exit的区别
    绑定线程到特定CPU处理器
    软中断与硬中断 & 中断抢占 中断嵌套
    线程与信号处理
    内核信号处理 & CPU8个通用寄存器
    SIGSEGV 和 SIGBUS & gdb看汇编
    Linux内核态用户态相关知识 & 相互通信
    Linux进程空间分布 & 上下文
  • 原文地址:https://www.cnblogs.com/iupoint/p/9781074.html
Copyright © 2020-2023  润新知