• Python--增量循环删除MySQL表数据


    需求场景:

    有一业务数据库,使用MySQL 5.5版本,每天会写入大量数据,需要不定期将多表中“指定时期前“的数据进行删除,在SQL SERVER中很容易实现,写几个WHILE循环就搞定,虽然MySQL中也存在类似功能,怎奈自己不精通,于是采用Python来实现

    话不多少,上脚本:

    # coding: utf-8
    import MySQLdb
    import time
    import os
    
    # delete config
    # 如果VIEW_OR_RUN = "VIEW",仅生成小批量删除的脚本但不执行
    # 如果VIEW_OR_RUN = "RUN",生成小批量删除的脚本并直接调用执行
    VIEW_OR_RUN = "VIEW"
    DELETE_DATABASE_NAME = ""
    DELETE_TABLE_NAME = ""
    DELETE_TABLE_KEY = ""
    DELETE_CONDITION = ""
    DELETE_ROWS_PER_BATCH = 10000
    SLEEP_SECOND_PER_BATCH = 0.5
    
    # MySQL Connection Config
    Default_MySQL_Host = '192.168.166.169'
    Default_MySQL_Port = 3358
    Default_MySQL_User = "mysql_admin"
    Default_MySQL_Password = 'mysql@Admin@Pwd'
    
    Default_MySQL_Charset = "utf8"
    Default_MySQL_Connect_TimeOut = 120
    Default_MySQL_Socket = "/export/data/mysql/tmp/mysql.sock"
    
    # Common config:
    DATETIME_FORMAT = '%Y-%m-%d %X'
    EXEC_DETAIL_FILE = 'exec_detail.txt'
    EXEC_SCRIPT_FILE = 'delete_scripts.sql'
    
    
    def highlight(s):
        return "%s[30;2m%s%s[1m" % (chr(27), s, chr(27))
    
    
    def print_warning_message(message):
        """
        以红色字体显示消息内容
        :param message: 消息内容
        :return: 无返回值
        """
        message = str(message)
        print(highlight('') + "%s[31;1m%s%s[0m" % (chr(27), message, chr(27)))
        global EXEC_DETAIL_FILE
        write_file(EXEC_DETAIL_FILE, message)
    
    
    def print_info_message(message):
        """
        以绿色字体输出提醒性的消息
        :param message: 消息内容
        :return: 无返回值
        """
        message = str(message)
        print(highlight('') + "%s[32;2m%s%s[0m" % (chr(27), message, chr(27)))
        global EXEC_DETAIL_FILE
        write_file(EXEC_DETAIL_FILE, message)
    
    
    def write_file(file_path, message):
        """
        将传入的message追加写入到file_path指定的文件中
        请先创建文件所在的目录
        :param file_path: 要写入的文件路径
        :param message: 要写入的信息
        :return:
        """
        file_handle = open(file_path, 'a')
        file_handle.writelines(message)
        # 追加一个换行以方便浏览
        file_handle.writelines(chr(13))
        file_handle.close()
    
    
    def get_user_choose_option(input_options, input_message):
        while_flag = True
        choose_option = None
        while while_flag:
            print_info_message(input_message)
            str_input = raw_input("")
            for input_option in input_options:
                if str_input.strip() == input_option:
                    choose_option = input_option
                    while_flag = False
        return choose_option
    
    
    def get_mysql_connection():
        """
        根据默认配置返回数据库连接
        :return: 数据库连接
        """
        if Default_MySQL_Host.lower() == 'localhost':
            conn = MySQLdb.connect(
                    host=Default_MySQL_Host,
                    port=Default_MySQL_Port,
                    user=Default_MySQL_User,
                    passwd=Default_MySQL_Password,
                    connect_timeout=Default_MySQL_Connect_TimeOut,
                    charset=Default_MySQL_Charset,
                    db=DELETE_DATABASE_NAME,
                    unix_socket=Default_MySQL_Socket
            )
        else:
            conn = MySQLdb.connect(
                    host=Default_MySQL_Host,
                    port=Default_MySQL_Port,
                    user=Default_MySQL_User,
                    passwd=Default_MySQL_Password,
                    connect_timeout=Default_MySQL_Connect_TimeOut,
                    charset=Default_MySQL_Charset,
                    db=DELETE_DATABASE_NAME
            )
        return conn
    
    
    def mysql_exec(sql_script, sql_param=None):
        """
        执行传入的脚本,返回影响行数
        :param sql_script:
        :param sql_param:
        :return: 脚本最后一条语句执行影响行数
        """
        try:
            conn = get_mysql_connection()
            print_info_message("在服务器{0}上执行脚本:{1}".format(
                    conn.get_host_info(), sql_script))
            cursor = conn.cursor()
            if sql_param is not None:
                cursor.execute(sql_script, sql_param)
            else:
                cursor.execute(sql_script)
            affect_rows = cursor.rowcount
            conn.commit()
            cursor.close()
            conn.close()
            return affect_rows
        except Exception as ex:
            cursor.close()
            conn.rollback()
            raise Exception(str(ex))
    
    
    def mysql_exec_many(sql_script_list):
        """
        执行传入的脚本,返回影响行数
        :param sql_script_list: 要执行的脚本List,List中每个元素为sql_script, sql_param对
        :return: 返回执行每个脚本影响的行数列表
        """
        try:
            conn = get_mysql_connection()
            exec_result_list = []
            for sql_script, sql_param in sql_script_list:
                print_info_message("在服务器{0}上执行脚本:{1}".format(
                        conn.get_host_info(), sql_script))
                cursor = conn.cursor()
                if sql_param is not None:
                    cursor.execute(sql_script, sql_param)
                else:
                    cursor.execute(sql_script)
                affect_rows = cursor.rowcount
                exec_result_list.append("影响行数:{0}".format(affect_rows))
            conn.commit()
            cursor.close()
            conn.close()
            return exec_result_list
    
        except Exception as ex:
            cursor.close()
            conn.rollback()
            raise Exception(str(ex))
    
    
    def mysql_query(sql_script, sql_param=None):
        """
        执行传入的SQL脚本,并返回查询结果
        :param sql_script:
        :param sql_param:
        :return: 返回SQL查询结果
        """
        try:
            conn = get_mysql_connection()
            print_info_message("在服务器{0}上执行脚本:{1}".format(
                    conn.get_host_info(), sql_script))
            cursor = conn.cursor()
            if sql_param is not None:
                cursor.execute(sql_script, sql_param)
            else:
                cursor.execute(sql_script)
            exec_result = cursor.fetchall()
            cursor.close()
            conn.close()
            return exec_result
        except Exception as ex:
            cursor.close()
            conn.close()
            raise Exception(str(ex))
    
    
    def get_column_info_list(table_name):
        sql_script = """
    DESC {0}
    """.format(table_name)
        column_info_list = []
        query_result = mysql_query(sql_script=sql_script, sql_param=None)
        for row in query_result:
            column_name = row[0]
            column_type = row[1]
            column_key = row[3]
            column_info = column_name, column_key, column_type
            column_info_list.append(column_info)
        return column_info_list
    
    
    def get_id_range():
        """
        按照传入的表获取要删除数据最大ID、最小ID、删除总行数
        :return: 返回要删除数据最大ID、最小ID、删除总行数
        """
        global DELETE_TABLE_NAME
        global DELETE_CONDITION
        sql_script = """
    SELECT
    MAX({2}) AS MAX_ID,
    MIN({2}) AS MIN_ID,
    COUNT(1) AS Total_Count
    FROM {0}
    WHERE {1};
    """.format(DELETE_TABLE_NAME, DELETE_CONDITION, DELETE_TABLE_KEY)
    
        query_result = mysql_query(sql_script=sql_script, sql_param=None)
        max_id, min_id, total_count = query_result[0]
        # 此处有一坑,可能出现total_count不为0 但是max_id 和min_id 为None的情况
        # 因此判断max_id和min_id 是否为NULL
        if (max_id is None) or (min_id is None):
            max_id, min_id, total_count = 0, 0, 0
        return max_id, min_id, total_count
    
    
    def delete_data(current_min_id, current_max_id):
        sql_script = """
    DELETE FROM {0}
    WHERE {4} <= {1}
    and {4} >= {2}
    AND {3};
    """.format(DELETE_TABLE_NAME,
               current_max_id,
               current_min_id,
               DELETE_CONDITION,
               DELETE_TABLE_KEY
               )
        global EXEC_SCRIPT_FILE
        global VIEW_OR_RUN
        if VIEW_OR_RUN == 'RUN':
            row_count = mysql_exec(sql_script)
            print_info_message("影响行数:{0}".format(row_count))
            time.sleep(SLEEP_SECOND_PER_BATCH)
        else:
            print_info_message("生成删除脚本(未执行)")
            print_info_message(sql_script)
        tmp_script = """
    USE {0};
    """.format(DELETE_DATABASE_NAME) + sql_script + """
    COMMIT;
    SELECT SLEEP('{0}');
    ##=====================================================##
    """.format(SLEEP_SECOND_PER_BATCH)
        write_file(file_path=EXEC_SCRIPT_FILE, message=tmp_script)
    
    
    def loop_delete_data():
        max_id, min_id, total_count = get_id_range()
        if min_id == max_id:
            print_info_message("无数据需要结转")
            return
        current_min_id = min_id
        global DELETE_ROWS_PER_BATCH
        while current_min_id <= max_id:
            print_info_message("*" * 70)
            current_max_id = current_min_id + DELETE_ROWS_PER_BATCH
            delete_data(current_min_id, current_max_id)
            current_percent = (current_max_id - min_id) * 100.0 / (max_id - min_id)
            left_rows = max_id - current_max_id
            if left_rows < 0:
                left_rows = 0
            current_percent_str = "%.2f" % current_percent
            info = "当前进度{0}/{1},剩余{2},进度为{3}%"
            info = info.format(current_max_id,
                               max_id,
                               left_rows,
                               current_percent_str)
            print_info_message(info)
            current_min_id = current_max_id
        print_info_message("*" * 70)
        print_info_message("执行完成")
    
    
    def check_config():
        try:
            global DELETE_DATABASE_NAME
            global DELETE_TABLE_NAME
            global DELETE_TABLE_KEY
            global DELETE_CONDITION
            global VIEW_OR_RUN
    
            if str(DELETE_DATABASE_NAME).strip() == "":
                print_warning_message("数据库名不能为空")
                return False
            if str(DELETE_TABLE_NAME).strip() == "":
                print_warning_message("表名不能为空")
                return False
            if str(DELETE_CONDITION).strip() == "":
                print_warning_message("删除条件不能为空")
                return False
            source_columns_info_list = get_column_info_list(DELETE_TABLE_NAME)
            column_count = len(source_columns_info_list)
            primary_key_count = 0
            for column_id in range(column_count):
                source_column_name, source_column_key, source_column_type = source_columns_info_list[column_id]
                if source_column_key.lower() == 'pri':
                    primary_key_count += 1
                    if not ('int' in str(source_column_type).lower()):
                        print_warning_message("主键不为int或bigint")
                        return False
                    else:
                        global DELETE_TABLE_KEY
                        DELETE_TABLE_KEY = source_column_name
    
            if primary_key_count == 0:
                print_warning_message("未找到主键,不瞒足迁移条件")
                return False
    
            if primary_key_count > 1:
                print_warning_message("要删除的表使用复合主键,不满足迁移条件")
                return False
    
            return True
        except Exception as ex:
            print_warning_message("执行出现异常,异常为{0}".format(ex.message))
            return False
    
    
    def clean_env():
        global DELETE_DATABASE_NAME
        global DELETE_TABLE_NAME
        global DELETE_TABLE_KEY
        global DELETE_CONDITION
        global VIEW_OR_RUN
        DELETE_DATABASE_NAME = ""
        DELETE_TABLE_NAME = ""
        DELETE_TABLE_KEY = ""
        DELETE_CONDITION = ""
        VIEW_OR_RUN = "VIEW"
    
        if os.path.exists(EXEC_SCRIPT_FILE):
            os.remove(EXEC_SCRIPT_FILE)
        if os.path.exists(EXEC_DETAIL_FILE):
            os.remove(EXEC_DETAIL_FILE)
    
    
    def user_confirm():
        if VIEW_OR_RUN == 'RUN':
            info = """
    您将在服务器{0}上{1}中删除表{2}中数据
    
    删除数据条件为:
    DELETE FROM {3}
    WHERE {4}
    """
        else:
            info = """
    将生成在服务器{0}上{1}中删除表{2}中数据的脚本
    
    删除数据条件为:
    DELETE FROM {3}
    WHERE {4}
    """
        info = info.format(Default_MySQL_Host,
                           DELETE_DATABASE_NAME,
                           DELETE_TABLE_NAME,
                           DELETE_TABLE_NAME,
                           DELETE_CONDITION)
    
        if VIEW_OR_RUN == "RUN":
            print_warning_message(info)
        else:
            print_info_message(info)
        input_options = ['yes', 'no']
        input_message = """
    请输入yes继续或输入no退出,yes/no?
    """
        user_option = get_user_choose_option(input_options=input_options,
                                             input_message=input_message)
        if user_option == "no":
            return False
        else:
            return True
    
    
    def delete_table_data(database_name, table_name, delete_condition, is_run, is_need_confirm):
        global DELETE_DATABASE_NAME
        global DELETE_TABLE_NAME
        global DELETE_TABLE_KEY
        global DELETE_CONDITION
        global VIEW_OR_RUN
        DELETE_DATABASE_NAME = database_name
        DELETE_TABLE_NAME = table_name
        DELETE_CONDITION = delete_condition
        DELETE_TABLE_KEY = ''
        if is_run:
            VIEW_OR_RUN = "RUN"
        else:
            VIEW_OR_RUN = "VIEW"
        check_result = check_config()
        if not check_result:
            return
        if is_need_confirm:
            confirm_result = user_confirm()
        else:
            confirm_result = True
        if confirm_result:
            loop_delete_data()
    
    
    def main():
        clean_env()
        delete_table_data(database_name="db001",
                          table_name="tb2001",
                          delete_condition="dt<'2017-09-01'",
                          is_run=True,
                          is_need_confirm=True)
    
    
    if __name__ == '__main__':
        main()

    执行效果:

    实现原理:

    由于表存在自增ID,于是给我们增量循环删除的机会,查找出满足删除条件的最大值ID和最小值ID,然后按ID 依次递增,每次小范围内(如10000条)进行删除。

    实现优点:

    实现“小斧子砍大柴”的效果,事务小,对线上影响较小,打印出当前处理到的“ID”,可以随时关闭,稍微修改下代码便可以从该ID开始,方便。

    实现不足:

    为防止主从延迟太高,采用每次删除SLEEP1秒的方式,相对比较糙,最好的方式应该是周期扫描这条复制链路,根据延迟调整SLEEP的周期,反正都脚本化,再智能化点又何妨!

    本文重点依旧是妹子,不能让诸位看官白跑一趟,是不!!!

  • 相关阅读:
    java如何将char类型的数字转换成int型的数字,而不是Ascii
    java 二分查找的注意事项
    IntelliJ IDEA 下的svn配置及使用的非常详细的图文总结
    java中Math的常用方法整理
    判断字符串是否可由重复子字符串组成
    P3558 [POI2013]BAJ-Bytecomputer
    BZOJ 3329. Xorequ
    Codeforces 1221F. Choose a Square
    Codeforces 1221E. Game With String
    Codeforces 1221D. Make The Fence Great Again
  • 原文地址:https://www.cnblogs.com/TeyGao/p/5898591.html
Copyright © 2020-2023  润新知