• Python操作数据库实战


    pymysql

    # -*- coding: utf-8 -*-
    
    """
    @Datetime: 2018/12/26
    @Author: Zhang Yafei
    """
    import pymysql
    from DBUtils.PooledDB import PooledDB
    
    POOL = PooledDB(
        creator=pymysql,  # 使用链接数据库的模块
        maxconnections=6,  # 连接池允许的最大连接数,0和None表示不限制连接数
        mincached=2,  # 初始化时,链接池中至少创建的空闲的链接,0表示不创建
        maxcached=5,  # 链接池中最多闲置的链接,0和None不限制
        maxshared=3,
        # 链接池中最多共享的链接数量,0和None表示全部共享。PS: 无用,因为pymysql和MySQLdb等模块的 threadsafety都为1,所有值无论设置为多少,_maxcached永远为0,所以永远是所有链接都共享。
        blocking=True,  # 连接池中如果没有可用连接后,是否阻塞等待。True,等待;False,不等待然后报错
        maxusage=None,  # 一个链接最多被重复使用的次数,None表示无限制
        setsession=[],  # 开始会话前执行的命令列表。如:["set datestyle to ...", "set time zone ..."]
        ping=0,
        # ping MySQL服务端,检查是否服务可用。# 如:0 = None = never, 1 = default = whenever it is requested, 2 = when a cursor is created, 4 = when a query is executed, 7 = always
        host='127.0.0.1',
        port=3306,
        user='root',
        password='0000',
        database='flask_code',
        charset='utf8'
    )
    
    
    def connect(type=None):
        conn = POOL.connection()
        cursor = conn.cursor(cursor=type)
        return conn, cursor
    
    
    def connect_close(conn, cursor):
        cursor.close()
        conn.close()
    
    
    def fetchone(sql, arg=list()):
        conn, cursor = connect(type)
        cursor.execute(sql, arg)
        data = cursor.fetchone()
        connect_close(conn, cursor)
        return data
    
    
    def fetchall(sql, arg=list(), type=pymysql.cursors.DictCursor):
        conn, cursor = connect(type)
        cursor.execute(sql, arg)
        data = cursor.fetchall()
        connect_close(conn, cursor)
        return data
    
    
    def insert(sql, arg=list()):
        conn, cursor = connect()
        row = cursor.execute(sql, arg)
        conn.commit()
        connect_close(conn, cursor)
        return row
    mysql_helper

    sqlite

    # -*- coding: utf-8 -*-
    
    """
    @Datetime: 2019/1/31
    @Author: Zhang Yafei
    """
    import sqlite3
    import settings
    import os
    
    BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    
    DB_DIR = os.path.join(BASE_DIR, 'data.db')
    
    def connect():
        '''连接数据库'''
        conn = sqlite3.connect(settings.DATABASES.get('sqlite3'))  # db不存在时将自动创建db
        cursor = conn.cursor()
        return conn, cursor
    
    
    def connect_close(conn, cursor):
        """关闭连接"""
        cursor.close()
        conn.close()
    
    
    def execute(sql, params=tuple()):
        conn, cursor = connect()
        cursor.execute(sql, params)  # 执行这个语句
        connect_close(conn, cursor)
    
    
    def fetchone(sql, params=tuple()):
        conn, cursor = connect()
        result = cursor.execute(sql, params)
        data = result.fetchone()
        connect_close(conn, cursor)
        return data
    
    
    def fetchall(sql, params=tuple()):
        conn, cursor = connect()
        results = cursor.execute(sql, params)
        data = results.fetchall()
        connect_close(conn, cursor)
        return data
    sqlite3_helper
    # -*- coding: utf-8 -*-
    
    """
    @Datetime: 2019/1/31
    @Author: Zhang Yafei
    """
    import sqlite3
    import os
    
    BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    
    DB_DIR = os.path.join(BASE_DIR, 'data.db')
    
    
    class SqliteDB(object):
        def __init__(self):
            self.conn = sqlite3.connect(DB_DIR)  # db不存在时将自动创建db
            self.cursor = self.conn.cursor()
    
        def close(self):
            self.cursor.close()
            self.conn.close()
    
        def execute(self, sql, params=tuple()):
            self.cursor.execute(sql, params)
            self.close()
    
        def fetchone(self, sql, params=tuple()):
            result = self.cursor.execute(sql, params)
            data = result.fetchone()
            self.close()
            return data
    
        def fetchall(self, sql, params=tuple()):
            results = self.cursor.execute(sql, params)
            data = results.fetchall()
            self.close()
            return data
    
    
    if __name__ == '__main__':
        sqlite = SqliteDB()
        # 1. 建表
        sql = '''create table happy(
                 username text,
                 password text,
                 id int)'''
        sqlite.execute(sql)
    
        # 2. 插入数据
        sqlite.execute("INSERT INTO COMPANY (ID,NAME,AGE,ADDRESS,SALARY) 
              VALUES (4, 'Mark', 25, 'Rich-Mond ', 65000.00 )")
    
        # 3. 更改数据
        sqlite.execute("UPDATE COMPANY SET  ID=99  WHERE ID=2")
    
        # 4. 删除表里面的数据
        c.execute("DELETE FROM COMPANY WHERE ID=4")
        c.execute("DELETE FROM COMPANY WHERE ID=3")
    
        # 5. 查询
        data = sqlite.fetchall('select * from label limit 1')
        print(data)
        # 输出
        '''
        [('盘龙云海(排毒养颜胶囊)', 509881, '广东深圳龙岗区/女', '昨天吃的,今天就拉肚子了。感觉肚子有点涨痛!不知道效果怎么样~~~~~',
          '昨天/吃/的/,/今天/就/拉肚子/SB了/。/感觉/肚子/PB有点/涨痛/SB!/不/知道/效果/怎么样/~/~/~/~/~', '2011-09-30 15:26:00',
          'http://ypk.39.net/509881/comment/k0_p...', '昨天/吃/的/,/今天/就/拉肚子/SB了/。/感觉/肚子/PB有点/涨痛/SB!/不/知道/效果/怎么样/~/~/~/~/~',
          '昨天/吃/的/,/今天/就/拉肚子/SB了/。/感觉/肚子/PB有点/涨痛/SB!/不/知道/效果/怎么样/~/~/~/~/~')]
        '''
    sqlite3_helper2
    from DBUtils.PooledDB import PooledDB
    from settings import DATABASE
    import threading
    
    lock = threading.Lock()
    
    
    class SqliteHelper(object):
        def __init__(self, db_config):
            """
            # sqlite3
            # 连接数据库文件名,sqlite不支持加密,不使用用户名和密码
            import sqlite3
            config = {"database": "path/to/your/dbname.db"}
            pool = PooledDB(sqlite3, maxcached=50, maxconnections=1000, maxusage=1000, **config)
            # mysql
            import pymysql
            pool = PooledDB(pymysql,5,host='localhost', user='root',passwd='pwd',db='myDB',port=3306) #5为连接池里的最少连接数
            # postgressql
            import psycopg2
            POOL = PooledDB(creator=psycopg2, host="127.0.0.1", port="5342", user, password, database)
            # sqlserver
            import pymssql
            pool = PooledDB(creator=pymssql, host=host, port=port, user=user, password=password, database=database, charset="utf8")
            :param type:
            """
            self.pool = PooledDB(**db_config)
            self.conn = self.pool.connection()
            self.cursor = self.conn.cursor()
    
        def connect(self):
            conn = self.pool.connection()
            cursor = self.conn.cursor()
            return conn, cursor
    
        @staticmethod
        def connect_close(conn, cursor):
            """关闭连接"""
            conn.close()
            cursor.close()
    
        def __del__(self):
            self.cursor.close()
            self.conn.close()
    
        def execute(self, sql, params=tuple()):
            try:
                lock.acquire(True)
                self.cursor.execute(sql, params)  # 执行这个语句
                self.conn.commit()
                lock.release()
            except Exception as e:
                print(e)
    
        def execute_many(self, sql, params=tuple()):
            try:
                lock.acquire(True)
                self.cursor.executemany(sql, params)
                self.conn.commit()
                lock.release()
            except Exception as e:
                print(e)
    
        def fetchone(self, sql, params=tuple()):
            try:
                lock.acquire(True)
                self.cursor.execute(sql, params)
                data = self.cursor.fetchone()
                lock.release()
                return data
            except Exception as e:
                print(e)
    
        def fetchall(self, sql, params=tuple()):
            try:
                lock.acquire(True)
                self.cursor.execute(sql, params)
                data = self.cursor.fetchall()
                lock.release()
                return data
            except Exception as e:
                print(e)
    SqliteHelper3.py

    mongodb

    # -*- coding: utf-8 -*-
    
    """
    @Datetime: 2019/1/31
    @Author: Zhang Yafei
    """
    import json
    import pymongo
    import pandas as pd
    
    
    class MongoPipeline(object):
        """
        mongodb:
            save(self, data, collection):                    将数据保存到数据库
            read(self, data):                                读取数据库中指定表格
            insert(self, table, dict_data):                 插入数据
            delete(self, table, condition):                 删除指定数据
            update(self, table, condition, new_dict_data):  更新指定数据
            dbFind(self, table, condition=None):             按条件查找
            findAll(self, table):                           查找全部
            close(self):                                    关闭连接
        """
    
        def __init__(self, mongo_db, mongo_uri='localhost'):
            self.mongo_uri = mongo_uri
            self.mongo_db = mongo_db
            self.client = pymongo.MongoClient(self.mongo_uri)
            self.db = self.client[self.mongo_db]
    
        def close(self):
            """
            关闭连接
            :return:
            """
            self.client.close()
    
        def save(self, data, collection):
            """
            将数据保存到数据库表
            :param data:
            :param collection:
            :return: None
            """
            self.collection = self.db[collection]
            try:
                if self.collection.insert(json.loads(data.T.to_json()).values()):
                    print('mongodb insert {} sucess.'.format(collection))
                    return
            except Exception as e:
                print('insert error:', e)
                import traceback
                traceback.print_exc(e)
    
        def read(self, table):
            """
            读取数据库中的数据
            :param table:
            :return: dataframe
            """
            try:
                # 连接数据库
                table = self.db[table]
                # 读取数据
                data = pd.DataFrame(list(table.find()))
                return data
            except Exception as e:
                import traceback
                traceback.print_exc(e)
    
        def insert(self, table, dict_data):
            """
            插入
            :param table:
            :param dict_data:
            :return: None
            """
            try:
                self.db[table].insert(dict_data)
                print("插入成功")
            except Exception as e:
                print(e)
    
        def update(self,table, condition, new_dict_data):
            """
            更新
            :param table:
            :param dict_data:
            :param new_dict_data:
            :return: None
            """
            try:
                self.db[table].update(condition, new_dict_data)
                print("更新成功")
            except Exception as e:
                print(e)
    
        def delete(self,table, condition):
            """
            删除
            :param table:
            :param dict_data:
            :return: None
            """
            try:
                self.db[table].remove(condition)
                print("删除成功")
            except Exception as e:
                print(e)
    
        def dbFind(self, table, condition=None):
            """
            按条件查找
            :param table:
            :param dict_data:
            :return: generator dict
            """
            data = self.db[table].find(condition)
            for item in data:
                yield item
    
        def findAll(self, table):
            """
            查找全部
            :param table:
            :return: generator dict
            """
            for item in self.db[table].find():
                yield item
    
    
    if __name__ == '__main__':
        mongo = MongoPipeline('flask')
        # data = mongo.read('label')
        # print(data.head())
        condition = {"药品ID": 509881}
        data = mongo.dbFind('label', condition)
        print(data)
        for i in data:
            print(i)
        # mongo.findAll()
    mongo_helper

    postgresql

    # -*- coding: utf-8 -*-
    '''
    Datetime: 2019/10/24
    author: Zhang Yafei
    description: python操作 postgressql数据库
    操作流程是:
      1)连接PostgreSQL并设定Cursor
      2)执行SQL脚本
    
      #有几点需要注意  
          - sql脚本必须以';'结尾,不可以省略
      - 不管sql中有几个参数,都需要用%s代替,只有%s, 不管值是字符还是数字, 一律%s.
      - 第二个参数中,一定要传入元组,上述例子中(5)是不行的
      - 执行完INSERT, UPDATE, DELETE这样的sql脚本后, 需要conn.commit()提交一下, 才会把数据提交到数据库当中.
      3)得到查询结果
    
    可以采用fetchone(), fetchall()和fetchmany() 三种方式。fetchone返回一个tuple或者None, fetchall返回一个
    list of tuple,如果没有结果则返回一个空的tuple。fetchmany返回list of tuple, list的长度由size参数决定,
    size的默认值是cursor.arraysize, 如果没有结果可以返回,那么返回一个空的list。
    '''
    import psycopg2
    
    
    def connect():
        conn = psycopg2.connect("dbname=xiaomuchong user=postgres password=0000 host=localhost port=5432")
        cur = conn.cursor()
        return conn, cur
    
    def connect_close(conn, cursor):
        """关闭连接"""
        cursor.close()
        conn.close()
    
    
    def execute(sql, params=tuple()):
        conn, cursor = connect()
        cursor.execute(sql, params)  # 执行这个语句
        conn.commit()
        connect_close(conn, cursor)
    
    
    def fetchone(sql, params=tuple()):
        conn, cursor = connect()
        result = cursor.execute(sql, params)
        data = result.fetchone()
        connect_close(conn, cursor)
        return data
    
    
    def fetchall(sql, params=tuple()):
        conn, cursor = connect()
        cursor.execute(sql, params)
        data = cursor.fetchall()
        connect_close(conn, cursor)
        return data
    
    
    if __name__ == "__main__":
        # create_sql = "CREATE TABLE Employee (name varchar(80), address varchar(80),age int, date date)"
        # execute(create_sql)
    
        # execute("INSERT INTO Employee VALUES('Gopher', 'China Beijing', 100, '2017-05-27')")
        # execute("INSERT INTO Employee VALUES('Zhang', 'China jincheng', 24, '2019-10-24')")
    
        # rows = fetchall("SELECT * FROM Employee;")
        # for row in rows:
        #     print('name=' + str(row[0]) + ' address=' + str(row[1]) + 
        #         ' age=' + str(row[2]) + ' date=' + str(row[3]))
    
        # execute("UPDATE Employee SET age=12 WHERE name='Gopher'")   
    
        execute("DELETE FROM Employee WHERE name='Gopher'")
    postgresql_helper
    # -*- coding: utf-8 -*-
    '''
    Datetime: 2019/10/24
    author: Zhang Yafei
    description: python操作 postgressql数据库
    操作流程是:
      1)连接PostgreSQL并设定Cursor
      2)执行SQL脚本
    
      #有几点需要注意  
          - sql脚本必须以';'结尾,不可以省略
      - 不管sql中有几个参数,都需要用%s代替,只有%s, 不管值是字符还是数字, 一律%s.
      - 第二个参数中,一定要传入元组,上述例子中(5)是不行的
      - 执行完INSERT, UPDATE, DELETE这样的sql脚本后, 需要conn.commit()提交一下, 才会把数据提交到数据库当中.
      3)得到查询结果
    
    可以采用fetchone(), fetchall()和fetchmany() 三种方式。fetchone返回一个tuple或者None, fetchall返回一个
    list of tuple,如果没有结果则返回一个空的tuple。fetchmany返回list of tuple, list的长度由size参数决定,
    size的默认值是cursor.arraysize, 如果没有结果可以返回,那么返回一个空的list。
    '''
    import psycopg2
    
    
    class PostgresSql(object):
        def __init__(self, dbname, user, password, host='localhost', port=5432):
            self.conn = psycopg2.connect(
                f"dbname={dbname} user={user} password={password} host={host} port={port}")
            self.cursor = self.conn.cursor()
    
        def connect_close(self):
            """关闭连接"""
            self.cursor.close()
            self.conn.close()
    
        def execute(self, sql, params=tuple()):
            self.cursor.execute(sql, params)  # 执行这个语句
            self.conn.commit()
    
        def fetchone(self, sql, params=tuple()):
            self.cursor.execute(sql, params)
            data = self.cursor.fetchone()
            return data
    
        def fetchall(self, sql, params=tuple()):
            self.cursor.execute(sql, params)
            data = self.cursor.fetchall()
            return data
    
    
    if __name__ == "__main__":
        postgres = PostgresSql(dbname='xiaomuchong', user='postgres', password='0000', host='localhost', port='5432')
        # create_sql = "CREATE TABLE Employee1 (name varchar(80), address varchar(80),age int, date date)"
        # postgres.execute(create_sql)
    
        # postgres.execute("INSERT INTO Employee1 VALUES('Gopher', 'China Beijing', 100, '2017-05-27')")
        # postgres.execute("INSERT INTO Employee1 VALUES('Zhang', 'China jincheng', 24, '2019-10-24')")
    
        rows = postgres.fetchall("SELECT * FROM Employee1;")
        for row in rows:
            print('name=' + str(row[0]) + ' address=' + str(row[1]) +
                ' age=' + str(row[2]) + ' date=' + str(row[3]))
    
        postgres.execute("UPDATE Employee1 SET age=12 WHERE name='Gopher'")
        postgres.execute("DELETE FROM Employee1 WHERE name='Gopher'")
    
        rows = postgres.fetchall("SELECT * FROM Employee1;")
        for row in rows:
            print('name=' + str(row[0]) + ' address=' + str(row[1]) +
                ' age=' + str(row[2]) + ' date=' + str(row[3]))
    
        postgres.connect_close()
    postgresql_helper_class
    from DBUtils.PooledDB import PooledDB
    
    
    class DBPoolHelper(object):
        def __init__(self, dbname, user=None, password=None, db_type='postgressql', host='localhost', port=5432):
            """
            # sqlite3
            # 连接数据库文件名,sqlite不支持加密,不使用用户名和密码
            import sqlite3
            config = {"datanase": "path/to/your/dbname.db"}
            pool = PooledDB(sqlite3, maxcached=50, maxconnections=1000, maxusage=1000, **config)
            # mysql
            import pymysql
            pool = PooledDB(pymysql,5,host='localhost', user='root',passwd='pwd',db='myDB',port=3306) #5为连接池里的最少连接数
            # postgressql
            import psycopg2
            POOL = PooledDB(creator=psycopg2, host="127.0.0.1", port="5342", user, password, database)
            # sqlserver
            import pymssql
            pool = PooledDB(creator=pymssql, host=host, port=port, user=user, password=password, database=database, charset="utf8")
            :param type:
            """
            if db_type == 'postgressql':
                import psycopg2
                pool = PooledDB(creator=psycopg2, host=host, port=port, user=user, password=password, database=dbname)
            elif db_type == 'mysql':
                import pymysql
                pool = PooledDB(pymysql, 5, host='localhost', user='root', passwd='pwd', db='myDB',
                                port=3306)  # 5为连接池里的最少连接数
            elif db_type == 'sqlite':
                import sqlite3
                config = {"database": dbname}
                pool = PooledDB(sqlite3, maxcached=50, maxconnections=1000, maxusage=1000, **config)
            else:
                raise Exception('请输入正确的数据库类型, db_type="postgresql" or db_type="mysql" or db_type="sqlite"')
            self.conn = pool.connection()
            self.cursor = self.conn.cursor()
    
        def connect_close(self):
            """关闭连接"""
            self.cursor.close()
            self.conn.close()
    
        def execute(self, sql, params=tuple()):
            self.cursor.execute(sql, params)  # 执行这个语句
            self.conn.commit()
    
        def execute_many(self, sql, params=tuple()):
            self.cursor.executemany(sql, params)
            self.conn.commit()
    
        def fetchone(self, sql, params=tuple()):
            self.cursor.execute(sql, params)
            data = self.cursor.fetchone()
            return data
    
        def fetchall(self, sql, params=tuple()):
            self.cursor.execute(sql, params)
            data = self.cursor.fetchall()
            return data
    postgresql_helper3.py

    redis

    def redis_init():
        settings = get_project_settings()
        if settings["REDIS_PARAMS"]:
            pool = ConnectionPool(host=settings["REDIS_HOST"], port=settings["REDIS_PORT"],
                                  password=settings["REDIS_PARAMS"]['password'])
        else:
            pool = ConnectionPool(host=settings["REDIS_HOST"], port=settings["REDIS_PORT"])
        conn = Redis(connection_pool=pool)
        return conn
    
    
    if __name__ == '__main__':
        redis_conn = redis_init()
    redis_helper.py

    DBUtils

    # -*- coding: utf-8 -*-
    
    """
    Datetime: 2020/07/02
    Author: Zhang Yafei
    Description: 
    """
    import time
    from functools import wraps
    from DBUtils.PooledDB import PooledDB
    
    
    def timeit(func):
        """
        装饰器: 判断函数执行时间
        :param func:
        :return:
        """
    
        @wraps(func)
        def inner(*args, **kwargs):
            start = time.time()
            ret = func(*args, **kwargs)
            end = time.time() - start
            if end < 60:
                print(f'花费时间:	{round(end, 2)}秒')
            else:
                min, sec = divmod(end, 60)
                print(f'花费时间	{round(min)}分	{round(sec, 2)}秒')
            return ret
    
        return inner
    
    
    class DBPoolHelper(object):
        def __init__(self, dbname, user=None, password=None, db_type='postgressql', host='localhost', port=5432):
            """
            # sqlite3
            # 连接数据库文件名,sqlite不支持加密,不使用用户名和密码
            import sqlite3
            config = {"datanase": "path/to/your/dbname.db"}
            pool = PooledDB(sqlite3, maxcached=50, maxconnections=1000, maxusage=1000, **config)
            # mysql
            import pymysql
            pool = PooledDB(pymysql,5,host='localhost', user='root',passwd='pwd',db='myDB',port=3306) #5为连接池里的最少连接数
            # postgressql
            import psycopg2
            POOL = PooledDB(creator=psycopg2, host="127.0.0.1", port="5342", user, password, database)
            # sqlserver
            import pymssql
            pool = PooledDB(creator=pymssql, host=host, port=port, user=user, password=password, database=database, charset="utf8")
            :param type:
            """
            if db_type == 'postgressql':
                import psycopg2
                pool = PooledDB(creator=psycopg2, host=host, port=port, user=user, password=password, database=dbname)
            elif db_type == 'mysql':
                import pymysql
                pool = PooledDB(pymysql, 5, host='localhost', user='root', passwd='pwd', db='myDB',
                                port=3306)  # 5为连接池里的最少连接数
            elif db_type == 'sqlite':
                import sqlite3
                config = {"database": dbname}
                pool = PooledDB(sqlite3, maxcached=50, maxconnections=1000, maxusage=1000, **config)
            else:
                raise Exception('请输入正确的数据库类型, db_type="postgresql" or db_type="mysql" or db_type="sqlite"')
            self.conn = pool.connection()
            self.cursor = self.conn.cursor()
    
        def connect_close(self):
            """关闭连接"""
            self.cursor.close()
            self.conn.close()
    
        def execute(self, sql, params=tuple()):
            self.cursor.execute(sql, params)  # 执行这个语句
            self.conn.commit()
    
        def execute_many(self, sql, params=tuple()):
            self.cursor.executemany(sql, params)
            self.conn.commit()
    
        def fetchone(self, sql, params=tuple()):
            self.cursor.execute(sql, params)
            data = self.cursor.fetchone()
            return data
    
        def fetchall(self, sql, params=tuple()):
            self.cursor.execute(sql, params)
            data = self.cursor.fetchall()
            return data
    DBHelper.py

    pandas链接数据库

    import pandas as pd
    from sqlalchemy import create_engine
    
    
    def pandas_db_helper():
        """
        'postgresql://postgres:0000@127.0.0.1:5432/xiaomuchong'
        "mysql+pymysql://root:0000@127.0.0.1:3306/srld?charset=utf8mb4"
        "sqlite: ///sqlite3.db"
        """
        engine = create_engine( 'postgresql://postgres:0000@127.0.0.1:5432/xiaomuchong')
        conn = engine.connect()
        return conn
    
    
    if __name__ == '__main__':
        db = pandas_db_helper()
        data = pd.read_sql_table("team", con=db, index_col="board_id")
        print(data)
    pandas_db_helper.py

    实战项目 

    1.pandas+mysql

    # -*- coding: utf-8 -*-
    
    """
    @Datetime: 2018/12/26
    @Author: Zhang Yafei
    """
    import pymysql
    from DBUtils.PooledDB import PooledDB
    import pandas as pd
    from pymysql.err import InternalError
    import numpy as np
    
    POOL = PooledDB(
        creator=pymysql,  # 使用链接数据库的模块
        maxconnections=6,  # 连接池允许的最大连接数,0和None表示不限制连接数
        mincached=2,  # 初始化时,链接池中至少创建的空闲的链接,0表示不创建
        maxcached=5,  # 链接池中最多闲置的链接,0和None不限制
        maxshared=3,
        # 链接池中最多共享的链接数量,0和None表示全部共享。PS: 无用,因为pymysql和MySQLdb等模块的 threadsafety都为1,所有值无论设置为多少,_maxcached永远为0,所以永远是所有链接都共享。
        blocking=True,  # 连接池中如果没有可用连接后,是否阻塞等待。True,等待;False,不等待然后报错
        maxusage=None,  # 一个链接最多被重复使用的次数,None表示无限制
        setsession=[],  # 开始会话前执行的命令列表。如:["set datestyle to ...", "set time zone ..."]
        ping=0,
        # ping MySQL服务端,检查是否服务可用。# 如:0 = None = never, 1 = default = whenever it is requested, 2 = when a cursor is created, 4 = when a query is executed, 7 = always
        host='127.0.0.1',
        port=3306,
        user='root',
        password='0000',
        database='srld',
        charset='utf8mb4'
    )
    
    
    def connect(type=None):
        conn = POOL.connection()
        cursor = conn.cursor(cursor=type)
        return conn, cursor
    
    
    def connect_close(conn, cursor):
        cursor.close()
        conn.close()
    
    
    def fetchone(sql, arg=list()):
        conn, cursor = connect(type)
        cursor.execute(sql, arg)
        data = cursor.fetchone()
        connect_close(conn, cursor)
        return data
    
    
    def fetchall(sql, arg=list(), type=pymysql.cursors.DictCursor):
        conn, cursor = connect(type)
        cursor.execute(sql, arg)
        data = cursor.fetchall()
        connect_close(conn, cursor)
        return data
    
    
    def insert(sql, arg=list()):
        conn, cursor = connect()
        row = cursor.execute(sql, arg)
        conn.commit()
        connect_close(conn, cursor)
        return row
    
    
    def insert_sql(row):
        pmid_doi, author, journal, year, title, abstract = row
    
        sql = """
              insert into pubmed(PMID_DOI,author,journal,year,title,abstract) values(%s, %s, %s, %s, %s, %s)
             """
        try:
            insert(sql, arg=[pmid_doi, author, journal, year, title, abstract])
        except InternalError:
            # print(pmid_doi, author, journal, year, title, abstract)
            print(author, type(author), np.isnan(author))
            # pmid_doi = None if np.isnan(pmid_doi) else pmid_doi
            # print(author, type(author), np.isnan(author))
            author = None if np.isnan(author) else author
            print(author)
            # journal = None if np.isnan(journal) else journa
            # year = None if np.isnan(year) else year
            # title = None if np.isnan(title) else title
            # abstract = None if np.isnan(abstract) else abstract
            # print(author)
            # print(pmid_doi, author, journal, year, title, abstract)
    
    
    def main():
        df = pd.read_csv('xml副本/documents_1.txt', delimiter='	', header=None)
        df.fillna(0, inplace=True)
        df.apply(insert_sql, axis=1)
    
    
    if __name__ == "__main__":
        main()
        # insert('''insert into pubmed(PMID_DOI,author,journal,year,title,abstract) values('PMID:28231184|DOI:10.3390/foods5040089',    'Moreira-Ascarrunz, Sergio Daniel; Larsson, Hans; Prieto-Linde, Maria Luisa; Johansson, Eva', 'Foods (Basel, Switzerland)', '2016',    'Mineral Nutritional Yield and Nutrient Density of Locally Adapted Wheat Genotypes under Organic Production.','The aim of the present investigation was to investigate the nutritional yield, nutrient density, stability, and adaptability of organically produced wheat for sustainable and nutritional high value food production. This study evaluated the nutritional yield of four minerals (Fe, Zn, Cu, and Mg) in 19 wheat genotypes, selected as being locally adapted under organic agriculture conditions. The new metric of nutritional yield was calculated for each genotype and they were evaluated for stability using the Additive Main effects and Multiplicative Interaction (AMMI) stability analysis and for genotypic value, stability, and adaptability using the Best Linear Unbiased Prediction (BLUP procedure). The results indicated that there were genotypes suitable for production under organic agriculture conditions with satisfactory yields (>4000 kg·ha(-1)). Furthermore, these genotypes showed high nutritional yield and nutrient density for the four minerals studied. Additionally, since these genotypes were stable and adaptable over three environmentally different years, they were designated "balanced genotypes" for the four minerals and for the aforementioned characteristics. Selection and breeding of such "balanced genotypes" may offer an alternative to producing nutritious food under low-input agriculture conditions. Furthermore, the type of evaluation presented here may also be of interest for implementation in research conducted in developing countries, following the objectives of producing enough nutrients for a growing population.')''')
    pandas_mysql1.py
    # -*- coding: utf-8 -*-
    '''
    Datetime: 2019/11/07
    author: Zhang Yafei
    description: 
    '''
    from sqlalchemy import create_engine
    import pandas as pd
    import os
    from multiprocessing import Pool
    from mysql_helper import fetchall, insert
    
    
    engine = create_engine(
        "mysql+pymysql://root:0000@127.0.0.1:3306/srld?charset=utf8mb4",
        max_overflow=0,  # 超过连接池大小外最多创建的连接
        pool_size=5,  # 连接池大小
        pool_timeout=30,  # 池中没有线程最多等待的时间,否则报错
        pool_recycle=-1  # 多久之后对线程池中的线程进行一次连接的回收(重置)
    )
    conn = engine.connect()
    
    
    def filter_file(file_list):
        sql = """ select file_path from data_records """
        file_of_database = {file[0] for file in fetchall(sql, type=None)}
        files = file_list - file_of_database
        print(f'共{len(file_list)}	已经导入{len(file_of_database)}	还需导入:{len(files)}')
        return files
    
    
    def main(file_path):
        table_name = 'pubmed'
        # df = pd.read_csv('xml副本/documents_1.txt',delimiter='	', header=None)
        try:
            df = pd.read_csv(file_path, delimiter='	', header=None)
            df.columns = ['pmid_doi', 'author', 'journal', 'year', 'title', 'abstract']
            df['path'] = file_path
            df['year'] = '2017'
            df.to_sql(table_name, conn, if_exists='append', index=False)
            insert_sql = """ insert into data_records(file_path) values(%s) """
            insert(sql=insert_sql, arg=[file_path])
            print(f'{file_path}	导入成功')
        except Exception as e:
            # insert_sql = """ insert into data_records(file_path, state, remarks) values(%s,%s,%s) """
            # print(f'{file_path}文件导入失败')
            # insert(sql=insert_sql, arg=[file_path,0,str(e)])
            print(f'{file_path}文件导入失败')
    
    
    if __name__ == "__main__":
        dir__names = ['data处理/800w-1600w副本', 'data处理/1600w-2600w副本', 'data处理/xmls_news']
        # for dir_name in dir__names:
        dir_name = dir__names[2]
        file_list = {os.path.join(dir_name, file)
                    for file in os.listdir(dir_name)}
        file_list = filter_file(file_list)
        for file in file_list:
            main(file_path=file)
        
    
        """
        pool = Pool()
        pool.map(main, file_list)
        pool.join()
        pool.close()
        """
    pandas+mysql2

    2.药物数据导入

    from sqlalchemy import create_engine
    import pandas as pd
    import os
     
     
    engine = create_engine(
        "mysql+pymysql://root:0000@127.0.0.1:3306/drug?charset=utf8",
        max_overflow=0,  # 超过连接池大小外最多创建的连接
        pool_size=5,  # 连接池大小
        pool_timeout=30,  # 池中没有线程最多等待的时间,否则报错
        pool_recycle=-1  # 多久之后对线程池中的线程进行一次连接的回收(重置)
    )
    conn = engine.connect()
    
    
    def main(file_path, table_name='drugbank'):
        df = pd.read_table(file_path, header=None)[:900]
        df.columns = ['drugbank_id', 'drug_name']
        df['drugbank_id'] = [f'DB1{num}' for num in range(4195,5095)]
        # print(df)
        df.to_sql(table_name, conn, if_exists='append',index=False)
        print(f'{file_path}	导入成功')
    
    
    if __name__ == "__main__":
        main(file_path='drugs.txt')
    drugbank_to_Sql.py

    3.疾病数据库

    # -*- coding: utf-8 -*-
    '''
    Datetime: 2019/11/08
    author: Zhang Yafei
    description: 
    '''
    from sqlalchemy import create_engine
    import pandas as pd
    import os
     
     
    engine = create_engine(
        "mysql+pymysql://root:0000@127.0.0.1:3306/disease?charset=utf8mb64",
        max_overflow=0,  # 超过连接池大小外最多创建的连接
        pool_size=5,  # 连接池大小
        pool_timeout=30,  # 池中没有线程最多等待的时间,否则报错
        pool_recycle=-1  # 多久之后对线程池中的线程进行一次连接的回收(重置)
    )
    conn = engine.connect()
    
    
    def main(file_path, table_name='disease'):
        df = pd.read_excel(file_path, index_col='#')
        # df.columns = ['PMID_DOI', 'author', 'journal', 'year', 'title', 'abstract']
        df.to_sql(table_name, conn, if_exists='append',index=False)
        print(f'{file_path}	导入成功')
    
    
    if __name__ == "__main__":
        file_list = []
        for base_path,dirs, files in os.walk('MalaCards'):
            # print(base_path, file_path)
            for file in files:
                file_list.append(os.path.join(base_path, file))
        for file in file_list:
            main(file_path=file)
    diease_to_sql.py

     4.科研文献数据库

    # -*- coding: utf-8 -*-
    
    """
    @Datetime: 2018/12/26
    @Author: Zhang Yafei
    """
    import pymysql
    from DBUtils.PooledDB import PooledDB
    import pandas as pd
    from pymysql.err import InternalError
    import numpy as np
    
    POOL = PooledDB(
        creator=pymysql,  # 使用链接数据库的模块
        maxconnections=6,  # 连接池允许的最大连接数,0和None表示不限制连接数
        mincached=2,  # 初始化时,链接池中至少创建的空闲的链接,0表示不创建
        maxcached=5,  # 链接池中最多闲置的链接,0和None不限制
        maxshared=3,
        # 链接池中最多共享的链接数量,0和None表示全部共享。PS: 无用,因为pymysql和MySQLdb等模块的 threadsafety都为1,所有值无论设置为多少,_maxcached永远为0,所以永远是所有链接都共享。
        blocking=True,  # 连接池中如果没有可用连接后,是否阻塞等待。True,等待;False,不等待然后报错
        maxusage=None,  # 一个链接最多被重复使用的次数,None表示无限制
        setsession=[],  # 开始会话前执行的命令列表。如:["set datestyle to ...", "set time zone ..."]
        ping=0,
        # ping MySQL服务端,检查是否服务可用。# 如:0 = None = never, 1 = default = whenever it is requested, 2 = when a cursor is created, 4 = when a query is executed, 7 = always
        host='127.0.0.1',
        port=3306,
        user='root',
        password='0000',
        database='srld',
        charset='utf8mb4'
    )
    
    
    def connect(type=None):
        conn = POOL.connection()
        cursor = conn.cursor(cursor=type)
        return conn, cursor
    
    
    def connect_close(conn, cursor):
        cursor.close()
        conn.close()
    
    
    def fetchone(sql, arg=list()):
        conn, cursor = connect(type)
        cursor.execute(sql, arg)
        data = cursor.fetchone()
        connect_close(conn, cursor)
        return data
    
    
    def fetchall(sql, arg=list(), type=pymysql.cursors.DictCursor):
        conn, cursor = connect(type)
        cursor.execute(sql, arg)
        data = cursor.fetchall()
        connect_close(conn, cursor)
        return data
    
    
    def insert(sql, arg=list()):
        conn, cursor = connect()
        row = cursor.execute(sql, arg)
        conn.commit()
        connect_close(conn, cursor)
        return row
    
    
    def insert_sql(row):
        pmid_doi, author, journal, year, title, abstract = row
    
        sql = """
              insert into pubmed(PMID_DOI,author,journal,year,title,abstract) values(%s, %s, %s, %s, %s, %s)
             """
        try:
            insert(sql, arg=[pmid_doi, author, journal, year, title, abstract])
        except InternalError:
            # print(pmid_doi, author, journal, year, title, abstract)
            print(author, type(author), np.isnan(author))
            # pmid_doi = None if np.isnan(pmid_doi) else pmid_doi
            # print(author, type(author), np.isnan(author))
            author = None if np.isnan(author) else author
            print(author)
            # journal = None if np.isnan(journal) else journa
            # year = None if np.isnan(year) else year
            # title = None if np.isnan(title) else title
            # abstract = None if np.isnan(abstract) else abstract
            # print(author)
            # print(pmid_doi, author, journal, year, title, abstract)
    
    
    def main():
        df = pd.read_csv('xml副本/documents_1.txt', delimiter='	', header=None)
        df.fillna(0, inplace=True)
        df.apply(insert_sql, axis=1)
    
    
    if __name__ == "__main__":
        main()
        # insert('''insert into pubmed(PMID_DOI,author,journal,year,title,abstract) values('PMID:28231184|DOI:10.3390/foods5040089',    'Moreira-Ascarrunz, Sergio Daniel; Larsson, Hans; Prieto-Linde, Maria Luisa; Johansson, Eva', 'Foods (Basel, Switzerland)', '2016',    'Mineral Nutritional Yield and Nutrient Density of Locally Adapted Wheat Genotypes under Organic Production.','The aim of the present investigation was to investigate the nutritional yield, nutrient density, stability, and adaptability of organically produced wheat for sustainable and nutritional high value food production. This study evaluated the nutritional yield of four minerals (Fe, Zn, Cu, and Mg) in 19 wheat genotypes, selected as being locally adapted under organic agriculture conditions. The new metric of nutritional yield was calculated for each genotype and they were evaluated for stability using the Additive Main effects and Multiplicative Interaction (AMMI) stability analysis and for genotypic value, stability, and adaptability using the Best Linear Unbiased Prediction (BLUP procedure). The results indicated that there were genotypes suitable for production under organic agriculture conditions with satisfactory yields (>4000 kg·ha(-1)). Furthermore, these genotypes showed high nutritional yield and nutrient density for the four minerals studied. Additionally, since these genotypes were stable and adaptable over three environmentally different years, they were designated "balanced genotypes" for the four minerals and for the aforementioned characteristics. Selection and breeding of such "balanced genotypes" may offer an alternative to producing nutritious food under low-input agriculture conditions. Furthermore, the type of evaluation presented here may also be of interest for implementation in research conducted in developing countries, following the objectives of producing enough nutrients for a growing population.')''')
    mysql_helper.py
    # -*- coding: utf-8 -*-
    '''
    Datetime: 2019/11/07
    author: Zhang Yafei
    description: 
    '''
    from sqlalchemy import create_engine
    import pandas as pd
    import os
    from multiprocessing import Pool
    from mysql_helper import fetchall, insert
    
    
    engine = create_engine(
        "mysql+pymysql://root:0000@127.0.0.1:3306/srld?charset=utf8mb4",
        max_overflow=0,  # 超过连接池大小外最多创建的连接
        pool_size=5,  # 连接池大小
        pool_timeout=30,  # 池中没有线程最多等待的时间,否则报错
        pool_recycle=-1  # 多久之后对线程池中的线程进行一次连接的回收(重置)
    )
    conn = engine.connect()
    
    
    def filter_file(file_list):
        sql = """ select file_path from data_records """
        file_of_database = {file[0] for file in fetchall(sql, type=None)}
        files = file_list - file_of_database
        print(f'共{len(file_list)}	已经导入{len(file_of_database)}	还需导入:{len(files)}')
        return files
    
    
    def main(file_path):
        table_name = 'pubmed'
        # df = pd.read_csv('xml副本/documents_1.txt',delimiter='	', header=None)
        try:
            df = pd.read_csv(file_path, delimiter='	', header=None)
            df.columns = ['pmid_doi', 'author', 'journal', 'year', 'title', 'abstract']
            df['path'] = file_path
            df['year'] = '2017'
            df.to_sql(table_name, conn, if_exists='append', index=False)
            insert_sql = """ insert into data_records(file_path) values(%s) """
            insert(sql=insert_sql, arg=[file_path])
            print(f'{file_path}	导入成功')
        except Exception as e:
            # insert_sql = """ insert into data_records(file_path, state, remarks) values(%s,%s,%s) """
            # print(f'{file_path}文件导入失败')
            # insert(sql=insert_sql, arg=[file_path,0,str(e)])
            print(f'{file_path}文件导入失败')
    
    
    if __name__ == "__main__":
        dir__names = ['data处理/800w-1600w副本', 'data处理/1600w-2600w副本', 'data处理/xmls_news']
        # for dir_name in dir__names:
        dir_name = dir__names[2]
        file_list = {os.path.join(dir_name, file)
                    for file in os.listdir(dir_name)}
        file_list = filter_file(file_list)
        for file in file_list:
            main(file_path=file)
        
    
        """
        pool = Pool()
        pool.map(main, file_list)
        pool.join()
        pool.close()
        """
    pubmed_data_to_sql.py
    # -*- coding: utf-8 -*-
    '''
    Datetime: 2019/11/08
    author: Zhang Yafei
    description: 
    '''
    from sqlalchemy import create_engine
    import pandas as pd
    import os
    from mysql_helper import fetchall, insert
     
     
    engine = create_engine(
        "mysql+pymysql://root:0000@127.0.0.1:3306/srld?charset=utf8mb4",
        max_overflow=0,  # 超过连接池大小外最多创建的连接
        pool_size=5,  # 连接池大小
        pool_timeout=30,  # 池中没有线程最多等待的时间,否则报错
        pool_recycle=-1  # 多久之后对线程池中的线程进行一次连接的回收(重置)
    )
    conn = engine.connect()
    
    
    def filter_file(file_list):
        file_list = set(file_list)
        sql = """ select path from sci_records """
        file_of_database = {file[0] for file in fetchall(sql, type=None)}
        files = file_list - file_of_database
        print(f'共{len(file_list)}	已经导入{len(file_of_database)}	还需导入:{len(files)}')
        return files
    
    
    def main(file_path, table_name='sci_china'):
        try:
            df = pd.read_csv(file_path, delimiter='	', encoding='utf-16', error_bad_lines=False)
            df = df[['PT', 'CA', 'TI','HO','ID', 'AB','FX','PD','PY','VL','AR']]
            df['path'] = 'China/2013'
            df.rename({'PT': 'AU', 'CA': 'TI', 'TI':'SO','HO':'DE','ID':'AB','AB':'C1','FX':'CR', 'PD':'PY', 'PY':'VL','VL':'IS','AR':'DI'}, axis=1, inplace=True)
            df['PY'] = '2013'
            # print(df.head())
            df.to_sql(table_name, conn, if_exists='append',index=False)
            # insert_sql = """ insert into sci_records(path) values(%s) """
            # insert(sql=insert_sql, arg=[file_path])
            print(f'{file_path}	导入成功')
        except Exception as e:
            print(f'{file_path}	导入失败')
    
    
    if __name__ == "__main__":
        file_list = []
        for base_path,dirs, files in os.walk('China'):
            for file in files:
                if file.endswith('.txt'):
                    file_list.append(os.path.join(base_path, file))
        # main(file_list[0])
        # file_list = filter_file(file_list)
        file_list = file_list[2000:2080]    
        for file in file_list:
            main(file_path=file)
    sci_data_to_sql.py

    5.基因数据导入

    # -*- coding: utf-8 -*-
    '''
    Datetime: 2019/11/08
    author: Zhang Yafei
    description: 
    '''
    from sqlalchemy import create_engine
    import pandas as pd
    import os
     
     
    engine = create_engine(
        "mysql+pymysql://root:0000@127.0.0.1:3306/gene?charset=utf8",
        max_overflow=0,  # 超过连接池大小外最多创建的连接
        pool_size=5,  # 连接池大小
        pool_timeout=30,  # 池中没有线程最多等待的时间,否则报错
        pool_recycle=-1  # 多久之后对线程池中的线程进行一次连接的回收(重置)
    )
    conn = engine.connect()
    
    
    def main(file_path, table_name='arrayexpress'):
        # try:
        df = pd.read_csv(file_path, delimiter='	', error_bad_lines=False, encoding='ISO8859-1')
        df.to_sql(table_name, conn, if_exists='append',index=False)
        # insert_sql = """ insert into sci_records(path) values(%s) """
        # insert(sql=insert_sql, arg=[file_path])
        print(f'{file_path}	导入成功')
        # except Exception as e:
            # print(f'{file_path}	导入失败')
    
    
    if __name__ == "__main__":
        file_list = []
        for base_path,dirs, files in os.walk('ArrayExpress数据'):
            for file in files:
                if file.endswith('.txt'):
                    file_list.append(os.path.join(base_path, file))
        # file_list = filter_file(file_list)
        for file in file_list:
            main(file_path=file)
    arrayexpress_to_sql.py
    # -*- coding: utf-8 -*-
    '''
    Datetime: 2019/11/08
    author: Zhang Yafei
    description: 
    '''
    from sqlalchemy import create_engine
    import pandas as pd
    import os
     
     
    engine = create_engine(
        "mysql+pymysql://root:0000@127.0.0.1:3306/zacopride?charset=utf8",
        max_overflow=0,  # 超过连接池大小外最多创建的连接
        pool_size=5,  # 连接池大小
        pool_timeout=30,  # 池中没有线程最多等待的时间,否则报错
        pool_recycle=-1  # 多久之后对线程池中的线程进行一次连接的回收(重置)
    )
    conn = engine.connect()
    
    
    def main(file_path):
        table_name = file_path.split('\')[-1].strip('.csv').strip().replace(' ', '_').lower()
        df = pd.read_csv(file_path)
        df.to_sql(table_name, conn, if_exists='append',index=False)
        print(f'{file_path}	导入成功')
    
    
    if __name__ == "__main__":
        file_list = []
        for base_path,dirs, files in os.walk('zacopride'):
            # print(base_path, file_path)
            for file in files:
                file_list.append(os.path.join(base_path, file))
        # print(file_list)
        # main(file_list[0])
        for file in file_list:
            main(file_path=file)
    gene_to_sql.py
    # -*- coding: utf-8 -*-
    '''
    Datetime: 2019/11/08
    author: Zhang Yafei
    description: 
    '''
    from sqlalchemy import create_engine
    import pandas as pd
    import os
     
     
    engine = create_engine(
        "mysql+pymysql://root:0000@127.0.0.1:3306/gene?charset=utf8",
        max_overflow=0,  # 超过连接池大小外最多创建的连接
        pool_size=5,  # 连接池大小
        pool_timeout=30,  # 池中没有线程最多等待的时间,否则报错
        pool_recycle=-1  # 多久之后对线程池中的线程进行一次连接的回收(重置)
    )
    conn = engine.connect()
    
    
    
    
    def main(file_path, table_name='series'):
        # try:
        df = pd.read_csv(file_path, error_bad_lines=False)
        # print(df.columns)
        # df['path'] = file_path
        df.to_sql(table_name, conn, if_exists='append',index=False)
        # insert_sql = """ insert into sci_records(path) values(%s) """
        # insert(sql=insert_sql, arg=[file_path])
        print(f'{file_path}	导入成功')
        # except Exception as e:
        #     print(f'{file_path}	导入失败')
    
    
    if __name__ == "__main__":
        file_list = []
        for base_path,dirs, files in os.walk('series'):
            for file in files:
                if file.endswith('.csv'):
                    file_list.append(os.path.join(base_path, file))
        for file in file_list:
            main(file_path=file)
    series_to_sql.py
    作者:张亚飞
    出处:https://www.cnblogs.com/zhangyafei
    gitee:https://gitee.com/zhangyafeii
    本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接。
  • 相关阅读:
    BLE编程中关键步骤
    gradle相关配置内容解析
    Gradle版本变更的问题
    【问题】AndroidStudio导入项目一直卡在Building gradle project infod的最快速解决方案
    jdbc.properties各种数据库连接配置
    EL表达式语言总结
    Android sdk目录介绍
    chrome的常用快捷键和命令
    Unity Hub for Mac 破解
    MAC下安装配置Tomcat
  • 原文地址:https://www.cnblogs.com/zhangyafei/p/10343535.html
Copyright © 2020-2023  润新知