• Python实现自动化监控远程服务器


    最近发现Python课器做很多事情,在监控服务器有其独特的优势,耗费资源少,开发周期短。

    首先我们做一个定时或者实时脚本timedtask.py,让其定时监控目标服务器,两种方式:

    第一种:

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    # @Time    : 2017/11/27 15:59
    # @Desc    : 定时任务,以需要的时间间隔执行某个命令
    # @File    : timedtask.py
    # @Software: PyCharm
    
    import time, os
    from monitorserver import alltask
    
    
    def roll_back(cmd, inc = 60):
        while True:
            #执行方法,函数
            alltask()
            time.sleep(inc)
    
    roll_back("echo %time%", 5)

    第二种:

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    # @Time    : 2017/11/27 15:59
    # @Desc    : 定时任务,以需要的时间间隔执行某个命令
    # @File    : timedtask.py
    # @Software: PyCharm
    
    import time, os
    
    def roll_back(cmd, inc = 60):
        while True:
            #监控代码文件所在位置
            os.system('python  /home/../monitorserver.py');
            time.sleep(inc)
    
    roll_back("echo %time%", 5)

    做过监控应该都知道,我们主要监控服务器,负载均衡、磁盘、内存、CPU、网络接口(流量)、端口代码,主要针对这些,我做了以下远程监控,第一种和第二种监控代码一样,代码monitorserver.py如下:

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    # @Time    : 2017/11/27 15:59
    # @Desc    : 服务器监控代码
    # @File    : monitorserver.py
    # @Software: PyCharm
    
    import pexpect
    import re
    
    import time
    import threading
    
    """
    主方法
    127.0.0.1#远程服务器ip地址
    """
    def ssh_command(user, host, password, command):
        ssh_new_key = 'Are you sure you want to continue connecting'
        child = pexpect.spawn('ssh -l %s %s %s' % (user, host, command))
        i = child.expect([pexpect.TIMEOUT, ssh_new_key, 'password: '])
        if i == 0:
            print 'ERROR!'
            print 'SSH could not login. Here is what SSH said:'
            print child.before, child.after
            return None
        if i == 1:
            child.sendline('yes')
            child.expect('password: ')
            i = child.expect([pexpect.TIMEOUT, 'password: '])
            if i == 0:
                print 'ERROR!'
                print 'SSH could not login. Here is what SSH said:'
                print child.before, child.after
                return None
        child.sendline(password)
        return child
    
    
    """
    内存监控
    """
    def mem_info():
    
        child = ssh_command("远程服务器用户名", "127.0.0.1", "远程服务器密码", "cat /proc/meminfo")
        child.expect(pexpect.EOF)
        mem = child.before
        mem_values = re.findall("(d+) kB", mem)
        MemTotal = mem_values[0]
        MemFree = mem_values[1]
        Buffers = mem_values[2]
        Cached = mem_values[3]
        SwapCached=mem_values[4]
        SwapTotal = mem_values[13]
        SwapFree = mem_values[14]
        print '******************************内存监控*********************************'
        print "*******************时间:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "******************"
        print "总内存:",MemTotal
        print "空闲内存:", MemFree
        print "给文件的缓冲大小:",Buffers
        print "高速缓冲存储器使用的大小:", Cached
        print "被高速缓冲存储用的交换空间大小:", SwapCached
        print "给文件的缓冲大小:", Buffers
        if int(SwapTotal) == 0:
            print u"交换内存总共为:0"
        else:
            Rate_Swap = 100 - 100*int(SwapFree)/float(SwapTotal)
            print u"交换内存利用率:", Rate_Swap
        Free_Mem = int(MemFree) + int(Buffers) + int(Cached)
        Used_Mem = int(MemTotal) - Free_Mem
        Rate_Mem = 100*Used_Mem/float(MemTotal)
        print u"内存利用率:", str("%.2f" % Rate_Mem), "%"
    
    
    """
    内核线程、虚拟内存、磁盘、陷阱和 CPU 活动的统计信息
    """
    def vm_stat_info():
        child = ssh_command("远程服务器用户名", "127.0.0.1", "远程服务器密码", "vmstat 1 2 | tail -n 1")
        child.expect(pexpect.EOF)
        vmstat_info = child.before.strip().split()
        processes_waiting = vmstat_info[0]
        processes_sleep = vmstat_info[1]
        swpd = vmstat_info[2]
        free = vmstat_info[3]
        buff = vmstat_info[4]
        cache = vmstat_info[5]
        si = vmstat_info[6]
        so = vmstat_info[7]
        io_bi = vmstat_info[8]
        io_bo = vmstat_info[9]
        system_interrupt = vmstat_info[10]
        system_context_switch = vmstat_info[11]
        cpu_user = vmstat_info[12]
        cpu_sys = vmstat_info[13]
        cpu_idle = vmstat_info[14]
        cpu_wait = vmstat_info[15]
        st=vmstat_info[16]
        print '****************************内核线程、虚拟内存、磁盘、陷阱和 CPU 活动的统计信息监控****************************'
        print "*******************时间:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "******************"
        print "等待运行进程的数量:", processes_waiting
        print "处于不间断状态的进程:", processes_sleep
        print "使用虚拟内存(swap)的总量:", swpd
        print "空闲的内存总量:", free
        print "用作缓冲的内存总量:", buff
        print "用作缓存的内存总量:", cache
        print "交换出内存总量 :", si
        print "交换入内存总量 :", so
        print "从一个块设备接收:", io_bi
        print "发送到块设备:", io_bo
        print "每秒的中断数:", system_interrupt
        print "每秒的上下文切换数:", system_context_switch
        print "用户空间上进程运行的时间百分比:", cpu_user
        print "内核空间上进程运行的时间百分比:", cpu_sys
        print "闲置时间百分比:", cpu_idle
        print "等待IO的时间百分比:", cpu_wait
        print "从虚拟机偷取的时间百分比:", st
    
    
    '''
    cpu监控
    '''
    def cpu_info():
        child = ssh_command("远程服务器用户名", "127.0.0.1", "远程服务器密码", "cat /proc/cpuinfo")
        child.expect(pexpect.EOF)
        cpuinfo = child.before
        cpu_num = re.findall('processor.*?(d+)', cpuinfo)[-1]
        cpu_num = str(int(cpu_num) + 1)
        print '***************************************cpu监控***************************************'
        print "*******************时间:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "******************"
        print u"CPU数目:", cpu_num
        li = cpuinfo.replace('	', '').split('
    ')
        CPUinfo = {}
        procinfo = {}
        nprocs = 0
        for line in li:
            if line.find("processor") > -1:
                CPUinfo['CPU%s' % nprocs] = procinfo
                nprocs = nprocs + 1
            else:
                if len(line.split(':')) == 2:
                    procinfo[line.split(':')[0].strip()] = line.split(':')[1].strip()
                else:
                    procinfo[line.split(':')[0].strip()] = ''
        for processor in CPUinfo.keys():
            print "CPU属于的名字及其编号、标称主频:",CPUinfo[processor]['model name']
            print "CPU属于其系列中的哪一代的代号:", CPUinfo[processor]['model']
            print "CPU制造商:", CPUinfo[processor]['vendor_id']
            print "CPU产品系列代号:", CPUinfo[processor]['cpu family']
            print "CPU的实际使用主频:", CPUinfo[processor]['cpu MHz']
    
    
    """
    负载均衡
    """
    def load_stat():
        child = ssh_command("远程服务器用户名", "127.0.0.1", "远程服务器密码", "cat /proc/loadavg")
        child.expect(pexpect.EOF)
        loadavgs = child.before.strip().split()
        print '************************负载均衡监控****************************'
        print "*******************时间:",time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),"******************"
        print "系统5分钟前的平均负载:", loadavgs[0]
        print "系统10分钟前的平均负载:", loadavgs[1]
        print "系统15分钟前的平均负载:", loadavgs[2]
        print "分子是正在运行的进程数,分母为总进程数:",loadavgs[3]
        print "最近运行的进程id:", loadavgs[4]
    
    
    
    """
    获取网络接口的输入和输出
    """
    def  ionetwork():
        child = ssh_command("远程服务器用户名", "127.0.0.1", "远程服务器密码", "cat /proc/net/dev")
        child.expect(pexpect.EOF)
        netdata = child.before
        li = netdata.strip().split('
    ')
        print '************************获取网络接口的输入和输出监控****************************'
        print "*******************时间:",time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),"******************"
        net = {}
        for line in li[2:]:
            line = line.split(":")
            eth_name = line[0].strip()
            # if eth_name != 'lo':
            net_io = {}
            net_io['Receive'] = round(float(line[1].split()[0]) / (1024.0 * 1024.0), 2)
            net_io['Transmit'] = round(float(line[1].split()[8]) / (1024.0 * 1024.0), 2)
            net[eth_name] = net_io
        print net
    
    
    """ 
    磁盘空间监控
    """
    def disk_stat():
        child = ssh_command("远程服务器用户名", "127.0.0.1", "远程服务器密码", "df -h")
        child.expect(pexpect.EOF)
        disk = child.before
        disklist = disk.strip().split('
    ')
        disklists=[]
        for  disk  in disklist:
            disklists.append(disk.strip().split())
        print '************************磁盘空间监控****************************'
        print "*******************时间:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "******************"
        for  i in disklists[1:]:
            print "	文件系统:", i[0],
            print "	容量:", i[1],
            print "	已用:", i[2],
            print "	可用:", i[3],
            print "	已用%挂载点:", i[4]
    
    
    """ 
    端口监控
    一般是远程服务器用户名用户
    """
    def getComStr():
        child = ssh_command("远程服务器用户名", "127.0.0.1", "远程服务器密码", "netstat -tpln")
        child.expect(pexpect.EOF)
        Com = child.before
        print '******************************端口监控*********************************'
        print "*******************时间:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "******************"
        print Com
    
    
    
    """
    获取网络接口的输入和输出
    """
    def  cpu():
        child = ssh_command("远程服务器用户名", "127.0.0.1", "远程服务器密码", 'cat /proc/stat | grep "cpu "')
        child.expect(pexpect.EOF)
        child1 = ssh_command("远程服务器用户名", "127.0.0.1", "远程服务器密码", 'cat /proc/stat | grep "cpu "')
        child1.expect(pexpect.EOF)
        cpus = child.before.strip().split()
        cpus1 = child1.before.strip().split()
        print '************************cpu使用情况****************************'
        print "*******************时间:",time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),"******************"
        T1=int(cpus[1])+int(cpus[2])+int(cpus[3])+int(cpus[4])+int(cpus[5])+int(cpus[6])+int(cpus[8])+int(cpus[9])
        T2=int(cpus1[1]) + int(cpus1[2]) + int(cpus1[3]) + int(cpus1[4] )+ int(cpus1[5] )+int( cpus1[6] )+ int(cpus1[8] )+ int(cpus1[9])
        Tol=T2-T1
        Idle=int(cpus1[4]) - int(cpus[4])
        print '总的cpu时间1:',T1
        print '总的cpu时间2:', T2
        print '时间间隔内的所有时间片:', Tol
        print '计算空闲时间idle:', Idle
        print "计算cpu使用率:",100*(Tol-Idle)/Tol,"%"
    
    """
    第一种执行
    """
    def alltask():
        try:
            threads = []
            t1 = threading.Thread(target=mem_info)
            threads.append(t1)
            t2 = threading.Thread(target=vm_stat_info)
            threads.append(t2)
            t3 = threading.Thread(target=cpu_info)
            threads.append(t3)
            t4 = threading.Thread(target=load_stat)
            threads.append(t4)
            t5 = threading.Thread(target=ionetwork)
            threads.append(t5)
            t6 = threading.Thread(target=disk_stat)
            threads.append(t6)
            t7 = threading.Thread(target=getComStr)
            threads.append(t7)
            t8 = threading.Thread(target=cpu)
            threads.append(t8)
            for n in range(len(threads)):
                threads[n].start()
        except Exception, e:
            print str(e)
    
    """
    第二种执行
    """
    if __name__ == '__main__':
        try:
            threads = []
            t1 = threading.Thread(target=mem_info)
            threads.append(t1)
            t2 = threading.Thread(target=vm_stat_info)
            threads.append(t2)
            t3 = threading.Thread(target=cpu_info)
            threads.append(t3)
            t4 = threading.Thread(target=load_stat)
            threads.append(t4)
            t5 = threading.Thread(target=ionetwork)
            threads.append(t5)
            t6 = threading.Thread(target=disk_stat)
            threads.append(t6)
            t7 = threading.Thread(target=getComStr)
            threads.append(t7)
            t8 = threading.Thread(target=cpu)
            threads.append(t8)
            for n in range(len(threads)):
                threads[n].start()
        except Exception, e:
            print str(e)

    监控结果如下:

    接下来做的是把监控结果可视化,即可,可惜没时间做,就交给各位了!!!

    花了两天时间整理的,分享给大家,希望对各位有帮助!!!

  • 相关阅读:
    LeetCode:位运算实现加法
    Java基础教程:多线程基础——内存模型
    Python:笔记(3)——面向对象编程
    Java基础教程:反射基础
    MYSQL:基础——3N范式的表结构设计
    GIT学习笔记(1):创建版本库
    你为什么应该经常访问招聘网站?招聘网站至少有4个方面的价值!
    你为什么应该经常访问招聘网站?招聘网站至少有4个方面的价值!
    MySql update inner join!MySql跨表更新 多表update sql语句?如何将select出来的部分数据update到另一个表里面?
    MySql update inner join!MySql跨表更新 多表update sql语句?如何将select出来的部分数据update到另一个表里面?
  • 原文地址:https://www.cnblogs.com/IT-LearnHall/p/9426235.html
Copyright © 2020-2023  润新知