• python-pymysql 操作数据库-创建-写入-多线程写入-读取-清空表


    python-pymysql 操作数据库

    创建数据库-表
    
    #导入pymysql
    import pymysql
    
    
    # 创建连接
    DBHOST = 'localhost'
    DBUSER = 'root'
    DBPASS = 'lxl123456'
    # DBNAME = 'test'
    
    # pymysql的接口获取链接
    def mysql_conn(host, user, password):
        # 传参版本
        try:
            conn = pymysql.connect(host=host, user=user, password=password)
            print('数据库连接成功!')
        except pymysql.Error as e:
            print("数据库连接失败:" + str(e))
        return conn
    
    # 先创建cursor负责操作conn接口
    conn = mysql_conn(DBHOST, DBUSER, DBPASS)
    cursor=conn.cursor() #设置游标
    
    # 创建数据库的sql(如果数据库存在就不创建,防止异常)
    sql = "CREATE DATABASE IF NOT EXISTS test" 
    # 执行创建数据库的sql
    cursor.execute(sql)
    cursor.execute("use test")
    # 创建表
    sql_2 = '''CREATE TABLE `express2` (
      `运单号` bigint NOT NULL AUTO_INCREMENT,
      `物流单号` VARCHAR(80),
      `物流宝单号` VARCHAR(80),
      `行业` VARCHAR(80),
      `货主` VARCHAR(80),
      `是否保价` VARCHAR(40),
      `是否催派` VARCHAR(40),
      `是否工单发起` VARCHAR(40),
      `是否预售下沉` VARCHAR(40),
      `停滞状态` VARCHAR(80),
      `停滞时长` INT,
      `停滞要求时长` INT,
      `是否超停滞要求时长` VARCHAR(80),
      `物流停滞节点` VARCHAR(80),
      `发货大区` VARCHAR(80),
      `发货省` VARCHAR(80),
      `仓code` VARCHAR(80),
      `仓库` VARCHAR(80),
      `配送公司` VARCHAR(150),
      `配送cp` VARCHAR(150),
      `配送类型` VARCHAR(80),
      `配送大区` VARCHAR(80),
      `收货大区` VARCHAR(80),
      `收货省` VARCHAR(80),
      `收货市` VARCHAR(80),
      `收货区县` VARCHAR(80),
      `区域类型` VARCHAR(80),
      `一级分拨` VARCHAR(80),
      `二级分拨` VARCHAR(80),
      `末分拨` VARCHAR(80),
      `网点id` VARCHAR(80),
      `网点` VARCHAR(80),
      `支付时间` VARCHAR(80),
      `创建时间` VARCHAR(80),
      `接单时间` VARCHAR(80),
      `出库时间` VARCHAR(80),
      `入交接区时间` VARCHAR(80),
      `揽收时间` VARCHAR(80),
      `一级分拨入时间` VARCHAR(80),
      `一级分拨出时间` VARCHAR(80),
      `二级分拨入时间` VARCHAR(80),
      `二级分拨出时间` VARCHAR(80),
      `末分拨入时间` VARCHAR(80),
      `末分拨出时间` VARCHAR(80),
      `网点入时间` VARCHAR(80),
      `领件时间` VARCHAR(80),
      `配送成功时间` VARCHAR(80),
      `配送应签时间` VARCHAR(80),
      PRIMARY KEY (`运单号`)
    ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
    '''
    cursor.execute(sql_2)
    
    #关闭连接和游标
    conn.close()
    cursor.close()
    写入数据库
    
    #导入类
    import numpy as np
    import pandas as pd
    import pymysql
    
    info = pd.read_excel(r'/Users/linxianli/Desktop/数据.xlsx')
    info.shape
    '''
    (443378, 48)
    '''
    
    # 将 NAN 转化为 None
    info = info.astype(object).where(pd.notnull(info), None)
    
    # 创建连接
    DBHOST = 'localhost'
    DBUSER = 'root'
    DBPASS = 'lxl123456'
    DBNAME = 'test'
    
    # pymysql的接口获取链接
    def mysql_conn(host, user, password, db, port=3306, charset="utf8"):
        # 传参版本
        try:
            conn = pymysql.connect(host=host, user=user, password=password, database=db, port=port, charset=charset)
            print('数据库连接成功!')
        except pymysql.Error as e:
            print("数据库连接失败:" + str(e))
        return conn
    
    def write_database():
        # 先创建cursor负责操作conn接口
        conn = mysql_conn(DBHOST, DBUSER, DBPASS, DBNAME)
        cursor=conn.cursor() #设置游标
    
        # # 开启事务
        conn.begin()
    
        # 构造符合sql语句的列,因为sql语句是带有逗号分隔的,(这个对应上面的sql语句的(column1, column2, column3))
        columns = ','.join(list(info.columns))
    
        # 构造每个列对应的数据,对应于上面的((value1, value2, value3))
        data_list = [tuple(i) for i in info.values] # 每个元组都是一条数据,根据df行数生成多少元组数据
    
        # 计算一行有多少value值需要用字符串占位
        s_count = len(data_list[0]) * "%s,"
    
        # 构造sql语句
        insert_sql = "insert into " + "express2" + " (" + columns + ") values (" + s_count[:-1] + ")"
    
        try:
            res = cursor.executemany(insert_sql, data_list)
            print('执行sql受影响的行数:',res)
            conn.commit()
            cursor.close()
            conn.close()
        except Exception as e:
            # 万一失败了,要进行回滚操作
            conn.rollback()
            cursor.close()
            conn.close()
    
    start_time = time.time()
    write_database()
    stop_time = time.time()
    print('run time is %s' % (stop_time - start_time))
    '''
    数据库连接成功!
    执行sql受影响的行数: 443378
    run time is 96.30252575874329
    '''
    多线程写入数据库
    
    import numpy as np
    import pandas as pd
    import time, requests
    import pymysql
    from concurrent.futures import ProcessPoolExecutor
    
    info = pd.read_excel(r'/Users/linxianli/Desktop/数据.xlsx')
    
    # 将 NAN 转化为 None
    info = info.astype(object).where(pd.notnull(info), None)
    
    # 创建连接
    DBHOST = 'localhost'
    DBUSER = 'root'
    DBPASS = 'lxl123456'
    DBNAME = 'test'
    
    # pymysql的接口获取链接
    def mysql_conn(host, user, password, db, port=3306, charset="utf8"):
        # 传参版本
        try:
            conn = pymysql.connect(host=host, user=user, password=password, database=db, port=port, charset=charset)
            print('数据库连接成功!')
        except pymysql.Error as e:
            print("数据库连接失败:" + str(e))
        return conn
    
    def data_handler(info):
        conn = mysql_conn(DBHOST, DBUSER, DBPASS, DBNAME)
        cursor=conn.cursor() #设置游标
    
        # # 开启事务
        conn.begin()
    
        # 构造符合sql语句的列,因为sql语句是带有逗号分隔的,(这个对应上面的sql语句的(column1, column2, column3))
        columns = ','.join(list(info.columns))
    
        # 构造每个列对应的数据,对应于上面的((value1, value2, value3))
        data_list = [tuple(i) for i in info.values] # 每个元组都是一条数据,根据df行数生成多少元组数据
    
        # 计算一行有多少value值需要用字符串占位
        s_count = len(data_list[0]) * "%s,"
    
        # 构造sql语句
        insert_sql = "insert into " + "express2" + " (" + columns + ") values (" + s_count[:-1] + ")"
    
        try:
            res = cursor.executemany(insert_sql, data_list)
            print('执行sql受影响的行数:',res)
            conn.commit()
            cursor.close()
            conn.close()
        except Exception as e:
            # 万一失败了,要进行回滚操作
            conn.rollback()
            cursor.close()
            conn.close()
    
    def run():
        urls = [info[0:100000],info[100000:200000],info[200000:300000],info[300000:450000]]
        with ProcessPoolExecutor() as excute:
            excute.map(data_handler,urls)  ##ProcessPoolExecutor 提供的map函数,可以直接接受可迭代的参数,并且结果可以直接for循环取出
    
    start_time = time.time()
    run()
    stop_time = time.time()
    print('run time is %s' % (stop_time - start_time))
    '''
    数据库连接成功!
    数据库连接成功!
    数据库连接成功!
    数据库连接成功!
    执行sql受影响的行数: 100000
    执行sql受影响的行数: 100000
    执行sql受影响的行数: 100000
    执行sql受影响的行数: 143378
    run time is 57.68310880661011
    '''
    查询数据库表数据-清空表数据
    
    # 导入类
    import numpy as np
    import pandas as pd
    import pymysql
    
    # 创建连接
    DBHOST = 'localhost'
    DBUSER = 'root'
    DBPASS = 'lxl123456'
    DBNAME = 'test'
    
    # pymysql的接口获取链接
    def mysql_conn(host, user, password, db, port=3306, charset="utf8"):
        # 传参版本
        try:
            conn = pymysql.connect(host=host, user=user, password=password, database=db,port=port, charset=charset)
            print('数据库连接成功!')
        except pymysql.Error as e:
            print("数据库连接失败:" + str(e))
        return conn
    
    # 先创建cursor负责操作conn接口
    conn = mysql_conn(DBHOST, DBUSER, DBPASS, DBNAME)
    cursor=conn.cursor() #设置游标
    
    def mysql(sql):
        cursor.execute(sql) #执行查询语句
        jieguo=cursor.fetchall() #查看全部查询结果
        cols=cursor.description #类似 desc table_name返回结果
        col=[] #创建一个空列表以存放列名
        for v in cols:
            col.append(v[0]) #循环提取列名,并添加到col空列表
        dfsql=pd.DataFrame(jieguo,columns=col) #将查询结果转换成DF结构,并给列重新赋值
    #     if dfsql.empty:
    #         return 'empty set' #判断查询结果为空时返回的值
    #     else:   
        return dfsql #以DF结构返回查询结构,DF.to_excel...导出查询结果时可以带列名,这样就解决了mysql直接导出结果无列名的问题
    
    df = mysql('''
        select * from express2
    ''')
    
    conn.close()
    cursor.close()
    
    df.shape
    '''
    (443378, 48)
    '''
    
    # 清空表数据
    # 先创建cursor负责操作conn接口
    conn = mysql_conn(DBHOST, DBUSER, DBPASS, DBNAME)
    cursor=conn.cursor() #设置游标
    
    sql = '''TRUNCATE TABLE `express2`;'''
    cursor.execute(sql)
    print('清空表数据成功!')
    
    conn.close()
    cursor.close()
    '''
    数据库连接成功!
    清空表数据成功!
    '''

    代码还有很多地方可以优化的地方可以慢慢调整

    参考文档:

    https://blog.csdn.net/weixin_42796152/article/details/107931768

    https://blog.csdn.net/blog_liuliang/article/details/78724910

  • 相关阅读:
    LibreOJ 6003. 「网络流 24 题」魔术球 贪心或者最小路径覆盖
    LibreOJ #6002. 「网络流 24 题」最小路径覆盖
    LibreOJ #6000. 「网络流 24 题」搭配飞行员 最大匹配
    LibreOJ 2003. 「SDOI2017」新生舞会 基础01分数规划 最大权匹配
    hdu 1011 Starship Troopers 树形背包dp
    Codeforces Round #135 (Div. 2) D. Choosing Capital for Treeland dfs
    hdu 6199 gems gems gems dp
    hdu 5212 Code 筛法或者莫比乌斯
    hdu 3208 Integer’s Power 筛法
    hdu 5120 Intersection 两个圆的面积交
  • 原文地址:https://www.cnblogs.com/LXL616/p/14043577.html
Copyright © 2020-2023  润新知