• 通过Python实现对SQL Server 数据文件大小的监控告警


    1.需求背景 

    系统程序突然报错,报错信息如下:

    The transaction log for database '@dbname' is full. To find out why space in the log cannot be reused, see the log_reuse_wait_desc column in sys.databases

    此时查看log文件,已达2T。

    当时的紧急处理方案是,移除掉镜像,修改数据库恢复模式(由full修改为simple),收缩日志。

    为了防止类似问题再次发生,需对log 文件的大小进行监控,当到达阈值后,触发告警。

    2.主要基础组件(类)

    配置文件 qqmssqltest_db_server_conf.ini

    同过此配置文件获取DB Server信息、DB信息、UID信息、邮件服务器信息等。

    [sqlserver]
    db_user = XXXXXX
    db_pwd = XXXXXXX
    
    
    [sqlserver_qq]
    db_host = 110.119.120.114
    db_port = 1433
    
    
    [windows]
    user = 
    pwd = 
    
    
    [mail]
    host = zheshiceshidemail.qq.com
    port = 25
    user = 
    pwd = 
    sender = zhejiushiceshidebuyaodangzhen@qq.com

     获取连接串的组件mssql_get_db_connect.py

    # -*- coding: utf-8 -*-
    
    
    import sys
    import os
    import datetime
    import configparser
    import pymssql
    # pip3 install pymssql-2.1.4-cp37-cp37m-win_amd64.whl
    # pip3 install pymssql -i https://pypi.doubanio.com/simple
    
    
    # 获取连接串信息
    def mssql_get_db_connect(db_host, db_port):
        db_host = db_host
        db_port = db_port
    
        db_ps_file = os.path.join(sys.path[0], "qqmssqltest_db_server_conf.ini")
        config = configparser.ConfigParser()
        config.read(db_ps_file, encoding="utf-8")
        db_user = config.get('sqlserver', 'db_user')
        db_pwd = config.get('sqlserver', 'db_pwd')
    
        conn = pymssql.connect(host=db_host, port=db_port, user=db_user, password=db_pwd, charset="utf8", login_timeout=5, timeout=600, autocommit=True)
    
        return conn

     执行SQL语句的组件mysql_exec_sql.py

    # -*- coding: utf-8 -*-
    
    import mysql_get_db_connect
    
    
    def mysql_exec_dml_sql(db_host, db_port, exec_sql):
        conn = mysql_get_db_connect.mysql_get_db_connect(db_host, db_port)
        with conn.cursor() as cursor_db:
            cursor_db.execute(exec_sql)
            conn.commit()
    
    
    def mysql_exec_select_sql(db_host, db_port, exec_sql):
        conn = mysql_get_db_connect.mysql_get_db_connect(db_host, db_port)
        with conn.cursor() as cursor_db:
            cursor_db.execute(exec_sql)
            sql_rst = cursor_db.fetchall()
    
        return sql_rst
    
    
    def mysql_exec_select_sql_include_colnames(db_host, db_port, exec_sql):
        conn = mysql_get_db_connect.mysql_get_db_connect(db_host, db_port)
        with conn.cursor() as cursor_db:
            cursor_db.execute(exec_sql)
            sql_rst = cursor_db.fetchall()
            col_names = cursor_db.description
        return sql_rst, col_names

     发邮件的功能send_monitor_mail.py

    # -*- coding: utf-8 -*-
    
    
    # pip3 install PyEmail
    import smtplib
    from email.mime.text import MIMEText
    import configparser
    import os
    import sys
    
    
    # 发送告警邮件
    def send_monitor_mail(mail_subject, mail_body, mail_receivers="testwukongbaigujing@qq.com"):
        db_ps_file = os.path.join(sys.path[0], "qqmssqltest_db_server_conf.ini")
        config = configparser.ConfigParser()
        config.read(db_ps_file, encoding="utf-8")
        mail_host = config.get('mail', 'host')
        mail_port = config.get('mail', 'port')
        # mail_user = config.get('mail', 'user')
        # mail_pwd = config.get('mail', 'pwd')
        sender = config.get('mail', 'sender')
        # receivers = config.get('mail', 'receivers')
    
        # 发送HTML格式邮件
        message = MIMEText(mail_body, 'html', 'utf-8')
        # message = MIMEText(mail_body, 'plain', 'utf-8')
        message['subject'] = mail_subject
        message['From'] = sender
        message['To'] = mail_receivers
    
        try:
            smtpObj = smtplib.SMTP()
            smtpObj.connect(mail_host, mail_port)          # 25 为 SMTP 端口号
            # SMTP AUTH extension not supported by server.
            # https://github.com/miguelgrinberg/microblog/issues/76
            # smtpObj.ehlo()
            # smtpObj.starttls()
            # smtpObj.login(mail_user, mail_pwd)
            smtpObj.sendmail(sender, mail_receivers, message.as_string())
            smtpObj.quit()
            print("邮件发送成功")
        except Exception as e:
            print(e)
        # except smtplib.SMTPException:
            # print("Error: 无法发送邮件")

    3.主要功能代码

    收集到的DB数据文件的信息保存到表mssql_dblogsize中,其建表的脚本如下:

    CREATE TABLE [dbo].[mssql_dblogsize](
        [id] [int] IDENTITY(1,1) NOT NULL,
        [createtime] [datetime] NULL,
        [vip] [nvarchar](100) NULL,
        [port] [nvarchar](100) NULL,
        [Environment] [nvarchar](200) NULL,
        [Dbname] [varchar](200) NULL,
        [Logical_Name] [varchar](200) NULL,
        [Physical_Name] [varchar](1500) NULL,
        [Size] [bigint] NULL,
    PRIMARY KEY CLUSTERED 
    (
        [id] ASC
    )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
    ) ON [PRIMARY]
    GO
    
    ALTER TABLE [dbo].[mssql_dblogsize] ADD  DEFAULT (getdate()) FOR [createtime]
    GO

     为了方便对表mssql_dblogsize的数据进行管理和展示,在其基础上抽象加工出了一个视图v_mssql_dblogsize,注意Size大小的转换(Size/128/1024 as SizeGB)

    创建视图的脚本如下:

    CREATE view [dbo].[v_mssql_dblogsize]
    as 
    SELECT [id]
          ,[createtime]
          ,[vip]
          ,[port]
          ,[Environment]
          ,[Dbname]
          ,[Logical_Name]
          ,[Physical_Name]
          ,Size/128/1024 as SizeGB
      FROM [dbo].[mssql_dblogsize] 
    where size >50*128*1024
    and Physical_Name like '%ldf%'
    GO

    本测试实例使用的数据库为qqDB,监控的各个DB Server保存在了表QQDBServer中,注意Port 不一定为标准端口1433.

    collect_mssql_dblogsize_info.py

    # -*- coding: utf-8 -*-
    
    
    import sys
    import os
    import configparser
    import pymssql
    import mssql_get_db_connect
    import mssql_exec_sql
    from datetime import datetime
    
    def collect_mssql_dblogsize_info():
    
        db_ps_file = os.path.join(sys.path[0], "qqmssqltest_db_server_conf.ini")
        config = configparser.ConfigParser()
        config.read(db_ps_file, encoding="utf-8")
        m_db_host = config.get('sqlserver_qq', 'db_host')
        m_db_port = config.getint('sqlserver_qq', 'db_port')
    
        # 获取需要遍历的DB列表
        exec_sql_1 = """
    SELECT IP, case Port when '1444,1433' then '1433' else Port end as Port, Environment
    FROM qqDB.dbo.QQDBServer  
    where InUse =1 AND ServerType IN ('SQL') 
    and IP=VIP ;
        """
        sql_rst_1 = mssql_exec_sql.mssql_exec_select_sql(m_db_host, m_db_port, exec_sql_1)
        for j in sql_rst_1:
            db_host_2 = j[0]
            db_port_2 = j[1]
            db_Environment = j[2]
            exec_sql_2 = """
            select '""" + db_host_2 + """' as vip, '""" + db_port_2 + """' as port, '""" + db_Environment + """' as Environment,DB_NAME(database_id) AS DatabaseName,
    Name AS Logical_Name,
    Physical_Name, size 
    FROM master.sys.master_files;
            """
            try:
               sql_rst_2 = mssql_exec_sql.mssql_exec_select_sql(db_host_2, db_port_2, exec_sql_2)
            except Exception as e:
               print(e)
    
            for k in sql_rst_2:
               exec_sql_3 = """
               insert into qqDB..mssql_dblogsize([vip], [port], [Environment], [Dbname], [Logical_Name], [Physical_Name], [Size]) 
               values('%s', '%s', '%s', '%s', '%s', '%s', '%s');
               """
               conn = mssql_get_db_connect.mssql_get_db_connect(m_db_host, m_db_port)
               with conn.cursor() as cursor_db:
                   cursor_db.execute(exec_sql_3 % (k[0], k[1], k[2], k[3], k[4], k[5], k[6] ))
                   conn.commit()
    collect_mssql_dblogsize_info()

     告警邮件的功能实现为mssql_alert_dblogsize.py,此份代码的告警阈值设置的为50G,数据来自于视图v_mssql_dblogsize。

    # -*- coding: utf-8 -*-
    
    
    import sys
    import os
    import configparser
    import pymssql
    import mssql_get_db_connect
    import mssql_exec_sql
    import datetime
    import send_monitor_mail
    import pandas as pd
    
    
    def mssql_alert_dblogsize():
        mail_subject = "SQL Server DB Log Size Greater than 50G, please check!!! "
        mail_receivers = "testDBAgrp@qtiantianq.com"
    
    
        db_ps_file = os.path.join(sys.path[0], "qqmssqltest_db_server_conf.ini")
        config = configparser.ConfigParser()
        config.read(db_ps_file, encoding="utf-8")
        m_db_host = config.get('sqlserver_qq', 'db_host')
        m_db_port = config.getint('sqlserver_qq', 'db_port')
    
        # 获取需要遍历的DB列表
    
        exec_sql_4 = """
            SELECT [vip] as IP,[port],[Environment],[Dbname]
          ,[Logical_Name],[Physical_Name],[SizeGB],[createtime]
      FROM qqDB.[dbo].[v_mssql_dblogsize]
      order by VIP,Dbname;
        """
        sql_rst_4, col_name = mssql_exec_sql.mssql_exec_select_sql_include_colnames(m_db_host, m_db_port, exec_sql_4)
        # print(sql_rst_4)
    
        if len(sql_rst_4):
            mail_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    
            columns = []
            for i in range(len(col_name)):
                columns.append(col_name[i][0])
    
            df = pd.DataFrame(columns=columns)
            for i in range(len(sql_rst_4)):
                df.loc[i] = list(sql_rst_4[i])
    
    
            mail_body = df.to_html(index=False, justify="left").replace('<th>', '<th style = "color:red; text-align:left; background-color: yellow">')
            mail_html = "<html><body><h4>" + "Deal All : " + "<br><h4>" + "以下数据库的db log文件,已大于50G.请及时检查,谢谢! " + "<br><h4>" + mail_body + "</body></html>"
    
            send_monitor_mail.send_monitor_mail(mail_subject=mail_subject, mail_body=mail_html, mail_receivers=mail_receivers)
    
    
    mssql_alert_dblogsize()

     4.实现

    定时任务是通过windows的计划任务来实现的,在此不做过多的叙述。告警邮件的部分截图如下:

    5.附录

    1.报错定位,判断是不是log文件过大

    https://blog.csdn.net/weixin_30785593/article/details/99912405

    2.关于为什么数据库log文件过大,我们可以参考以下分享的文章

    https://blog.csdn.net/chinadm123/article/details/44941275

  • 相关阅读:
    org.springframework.web.servlet.mvc.method.annotation.RequestMappingHandlerMapping
    @RequestParam设置默认可以传空值
    python清空列表的方法
    Git分支管理的基本操作
    判断一个坐标点是否在不规则多边形内部的算法
    Flask 解析 Web 端 请求 数组
    SQL server 获取异常
    SQL server 大量数据导入和系统运行慢的问题
    SQL SERVER 跨服务器连接
    #pragma once
  • 原文地址:https://www.cnblogs.com/xuliuzai/p/14659567.html
Copyright © 2020-2023  润新知