日志分析
- 时间戳格式化
awk -F' ' '{ts_high=$3/10000000;ts_low=$3%10000000;sub(/.*/,strftime("%Y-%m-%d %H:%M:%S",ts_high)"."ts_low,$3);sub(/.*/, $NF/10"us", $NF)}1' OFS=' ' xxx.log
这条日志中含有 时间戳(非15位),百纳秒,可读性不高
REQ Sx3 15905362187438714 GET / {"Accept-Encoding":"gzip","Host":"xxx","IP":"xxx","User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36","X-Forwarded-For":"111.7.100.18","X-Real-Ip":"xxx","X-Scheme":"http","bs":0} 400 {"Content-Type":"application/xml","Tbl":"pipeline","X-Log":["redis.g;redis.g;"],"X-Reqid":"ACIAAHcmn5BDuBIW","api":"ListObjects"} <Error><Code>MissingSecurityElement</Code><Message>The request is missing a security element.</Message><Resource>/pipeline</Resource><RequestId>ACIAAHcmn5BDuBIW</RequestId></Error> 180 69429
使用上面的命令处理后
REQ Sx3 2020-05-27 07:36:58.7438714 GET / {"Accept-Encoding":"gzip","Host":"xxx","IP":"xxx","User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36","X-Forwarded-For":"111.7.100.18","X-Real-Ip":"xxx","X-Scheme":"http","bs":0} 400 {"Content-Type":"application/xml","Tbl":"pipeline","X-Log":["redis.g;redis.g;"],"X-Reqid":"ACIAAHcmn5BDuBIW","api":"ListObjects"} <Error><Code>MissingSecurityElement</Code><Message>The request is missing a security element.</Message><Resource>/pipeline</Resource><RequestId>ACIAAHcmn5BDuBIW</RequestId></Error> 180 6942.9us
- nginx日志分析
# 统计ip访问量top10
cat access.log | awk '{print $1}' | sort | uniq -c | sort -rn | head
# 截取指定时间段的ip访问量top10
cat access.log | grep "27/May/2020:15" | awk '{print $1}' | sort | uniq -c | sort -rn | head
# 过滤访问量大于100的ip的top10
cat access.log | awk '{print $1}' | sort | uniq -c | awk '{if($1 >100) print $0}' | sort -rn | head
# 过滤某个api排行top10
awk '{print $7}' access.log | sort |uniq -c | sort -rn | head
# 过滤某个ip访问指定api的top10
grep '106.12.17.243' access.log |awk '{print $7}'|sort |uniq -c |sort -rn |head -n 100
awk '{print $4}' access.log | cut -c 2-21 | sort | uniq -c | sort -nr | head -n 10 # 每秒钟统计
awk '{print $4}' access.log | cut -c 2-18 | sort | uniq -c | sort -nr | head -n 10 # 每分钟统计
awk '{print $4}' access.log | cut -c 2-15 | sort | uniq -c | sort -nr | head -n 10 # 每小时统计