1,统计各访问IP的总数
awk '{if($9>0 && $9==200 && substr($6,2)== "GET") a[$1]++}END{for(i in a)print i,a[i]}' access.log|sort -t ' ' -k2 -rn|head -n 10
2,统计包含xx字符的总数
cat access.log | grep 'GET /adsview/cqgd/img/tan/cq_320.png' | grep '10/Jun/2019:15' -c
3,查看实时包含xx字符的数据
tail -f access.log | grep 'cq_mb.html'
4,使用python获取日志,并保存到MongoDB进行分析
执行命令:
python logPy.py ./access.log
logPy.py
import re import sys import argparse from collections import Counter import pymongo parser = argparse.ArgumentParser(description='python for access.log') parser.add_argument('log_file', metavar='LOG_FILE', type=argparse.FileType('r'), help='Path to the Apache log file') # Regex for the common Apache log format. parts = [ r'(?P<host>S+)', # host %h r'S+', # indent %l (unused) r'(?P<user>S+)', # user %u r'[(?P<time>.+)]', # time %t r'"(?P<request>.*)"', # request "%r" r'(?P<status>[0-9]+)', # status %>s r'(?P<size>S+)', # size %b (careful, can be '-') r'"(?P<referrer>.*)"', # referrer "%{Referer}i" r'"(?P<agent>.*)"', # user agent "%{User-agent}i" ] pattern = re.compile(r's+'.join(parts)+r's*') # Initiazlie required variables args = parser.parse_args() log_data = [] # Get components from each line of the log file into a structured dict for line in args.log_file: if pattern.match(line): log_data.append(pattern.match(line).groupdict()) client = pymongo.MongoClient('localhost') db = client['access'] db['cq_ads'].insert_many(log_data) # Using a counter to get stats on the status in log entries # Refer = http://docs.python.org/2/library/collections.html#collections.Counter # status_counter = Counter(x['status'] for x in log_data) # Printing the STATUS count sorted by highest to lowest count # print ("Most common STATUSes in the Apache log file %s are:" % args.log_file.name) # for x in status_counter.most_common(): # print (" %s Status %d times" % x)