• Python 查找binlog文件


    经常需要在 binlog 中查找一些日志信息,于是写了一个简单的脚本。对于非常巨大的 binlog 文件,该脚本可能会速度慢,毕竟还是用的 list,暂时没想到好办法。

    详细看代码:

    #/usr/bin/python
    
    #2016-04-12
    
    #search string in the binlogs
    
    #usage:
    #put this file into binlog-dir,exec as:
    #"python test.py 111 123 update" or 
    #"python test.py 111 123 update and insert" or 
    #"python test.py 111 123 update or delete"
    #the nums are the binlog-num.
    
    import sys
    import subprocess
     
    def find_str(files):
        for file in files:
            comm = "mysqlbinlog {0}".format(file)
            lines = subprocess.Popen(comm, shell=True, stdout=subprocess.PIPE)
            lines = lines.stdout.readlines()
            for line in lines:
                line = line.lower()
                if len(sys.argv) == 4:
                    if sys.argv[3] + ' ' in line:
                        yield line
                elif len(sys.argv) == 6 and sys.argv[4] == "and":
                    if sys.argv[3] + ' ' in line and sys.argv[5] + ' ' in line:
                        yield line
                elif len(sys.argv) == 6 and sys.argv[4] == "or":
                    if sys.argv[3] + ' ' in line or sys.argv[5] + ' ' in line:
                        yield line
    
        
    if __name__ == "__main__":    
        start = sys.argv[1]
        end = sys.argv[2]
        files = ["updatelog.{0:06d}".format(i) for i in range(int(start), int(end)+1)]    
    
        f = find_str(files)
        for i in f:
            print(i)

    第二次改进版本,由于 py2 不支持 yield from 语句,gen_concatenate() 可能有点绕;详细看代码:

    #/usr/bin/python
    
    #2016-04-12
    
    #search string in the binlogs
    
    #usage:
    #put this file into binlog-dir,exec as "python test.py 111 123 update" or "python test.py 111 123 update and insert" or "python test.py 111 123 update or delete"
    #the nums are the binlog-num.
    
    import sys
    import subprocess
    
    def find_str(files):
        print(sys.argv)
        for file in files:
            comm = "mysqlbinlog {0}".format(file)
            lines = subprocess.Popen(comm, shell=True, stdout=subprocess.PIPE)
            lines = lines.stdout.readlines()
            yield lines  #此处返回的是生成器对象
    
    def gen_concatenate(lines):
      #将多个生成器对象迭代返回
    for i in lines: for it in i: yield it def gen_grep(lines): for line in lines: line = line.lower() if len(sys.argv) == 4: if sys.argv[3] + ' ' in line: yield line elif len(sys.argv) == 6 and sys.argv[4] == "and": if sys.argv[3] + ' ' in line and sys.argv[5] + ' ' in line: yield line elif len(sys.argv) == 6 and sys.argv[4] == "or": if sys.argv[3] + ' ' in line or sys.argv[5] + ' ' in line: yield line if __name__ == "__main__": start = sys.argv[1] end = sys.argv[2] files = ["updatelog.{0:06d}".format(i) for i in range(int(start), int(end)+1)] f = find_str(files) lines = gen_concatenate(f) greplines = gen_grep(lines) for i in greplines: print(i)

    脚本1理解起来更加容易,将实现功能全部封装在一个函数体内;

    更新:

    使用了 re 正则匹配,有时候日志里面记录的表名是带反引号的,比如`user`这样,见代码:

    #/usr/bin/python
    
    #2016-04-27
    
    #search string in the binlogs
    
    #usage:
    #put this file into binlog-dir,exec as "python test.py 111 123 update" or "python test.py 111 123 update and insert" or "python test.py 111 123 update or delete"
    #the nums are the binlog-num.
    
    import sys
    import subprocess
    import re
    
    def find_str(files):
        print(sys.argv)
        for file in files:
            comm = "mysqlbinlog {0}".format(file)
            lines = subprocess.Popen(comm, shell=True, stdout=subprocess.PIPE)
            lines = lines.stdout.readlines()
            yield lines
    
    def gen_concatenate(lines):
        for i in lines:
            for it in i:
                yield it
    
    def gen_grep(lines):
        for line in lines:
            line = line.lower()
            if len(sys.argv) == 4:
                # if sys.argv[3] + ' ' in line:
                #     yield line
                str1 = sys.argv[3]
                str1_match = re.search(str1, line, re.I)
                if str1_match is not None:
                    yield line
            elif len(sys.argv) == 6 and sys.argv[4] == "and":
                # if sys.argv[3] + ' ' in line and sys.argv[5] + ' ' in line:
                #     yield line
                str1 = sys.argv[3]
                str2 = sys.argv[5]
                if re.search(r'{}'.format(str1), line, re.I) is not None and re.search(r'{}'.format(str2), line, re.I):
                    yield line
            elif len(sys.argv) == 6 and sys.argv[4] == "or":
                # if sys.argv[3] + ' ' in line or sys.argv[5] + ' ' in line:
                #     yield line
                str1 = sys.argv[3]
                str2 = sys.argv[5]
                if re.search(r'{}'.format(str1), line, re.I) is not None and re.search(r'{}'.format(str2), line, re.I):
                    yield line
    
    if __name__ == "__main__":
        start = sys.argv[1]
        end = sys.argv[2]
        files = ["updatelog.{0:06d}".format(i) for i in range(int(start), int(end)+1)]
    
        f = find_str(files)
        lines = gen_concatenate(f)
        greplines = gen_grep(lines)
        for i in greplines:
            print(i)
    
    search.py
    search.py
  • 相关阅读:
    HDU-1561
    POJ 1088
    UESTC-878
    CodeForces
    HDU 5753
    HDU 1568
    二分图入门题
    二分图匹配入门题
    树形dp入门
    UVA
  • 原文地址:https://www.cnblogs.com/bvac/p/5384756.html
Copyright © 2020-2023  润新知