• Python正则表达式,统计分析nginx访问日志


    目标:

      1.正则表达式

      2.oop编程,统计nginx访问日志中不同IP地址出现的次数并排序

    1.正则表达式

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    
    
    import re
    
    # match
    # 方法一
    pattern1 = re.compile(r'hello', re.I)
    
    match = pattern1.match('Hello World')
    
    if match:
        print match.group()
    
    # 方法二
    
    m = re.match(r'hello', 'hello world.')
    
    print m.group()
    
    # search
    pattern1 = re.compile(r'World')
    
    match = pattern1.search('Hello, hello World.')
    
    if match:
        print match.group()
    
    
    # split
    pattern1 = re.compile(r'd+')
    match = pattern1.split('one1two2three3')
    print match
    for i in match:
        print i
    
    # findall
    match = pattern1.findall('one1two2three3')
    print match
    
    
    # finditer
    match = pattern1.finditer('one1two2three3')
    for i in match:
        print i.group()

    •运行代码,测试效果

    2.oop编程,统计nginx访问日志中不同IP地址出现的次数并排序

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    
    import re
    
    class CountPatt(object):
        def __init__(self, patt):
            self.patt = re.compile(patt)
            self.result = {}
        def count_patt(self, fname):
            with open(fname) as fobj:
                for line in fobj:
                    match = self.patt.search(line)
                    if match:
                        key = match.group()
                        self.result[key] = self.result.get(key, 0) + 1
    
            return self.result
    
        def sort(self):
            result = []
            alist = self.result.items()
            for i in xrange(len(alist)):
                greater = alist[0]
                for item in alist[1:]:
                    if greater[1] < item[1]:
                        greater = item
                result.append(greater)
                alist.remove(greater)
            return result
    
    
    if __name__ == "__main__":
        httpd_log = '/tmp/access.log'
        ip_pattern = r'^(d+.){3}d+'
        browser_pattern = r'Chrome|Safari|Firefox'
        a = CountPatt(ip_pattern)
        print a.count_patt(httpd_log)
        print a.sort()

    •运行代码,测试效果

    handetiandeMacBook-Pro:test xkops$ python test2.py
    {'192.168.207.21': 25, '192.168.80.165': 20, '192.168.207.1': 46, '127.0.0.1': 10}
    [('192.168.207.1', 46), ('192.168.207.21', 25), ('192.168.80.165', 20), ('127.0.0.1', 10)]
  • 相关阅读:
    8月4日
    8月3日 hive配置
    8月2日
    8月1日
    7月31日
    7月30日
    7月29日
    7月28日
    第六周总结
    重大技术需求进度报告一
  • 原文地址:https://www.cnblogs.com/xkops/p/6289979.html
Copyright © 2020-2023  润新知