cut 截取自定列
可以按照某个字符进行分割,然后取出其中的指定列:
[root@iz8vbbqbnh4ug2q9so5jflz logs]# cat localhost_access_log.2017-12-02.txt 140.205.201.30 - - [02/Dec/2017:00:15:24 +0800] "GET / HTTP/1.1" 404 - 140.205.201.30 - - [02/Dec/2017:00:17:51 +0800] "GET /rs-status HTTP/1.1" 404 - 140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "GET /phpmyadmin/ HTTP/1.1" 404 - 140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "POST /phpmyadmin/ HTTP/1.1" 404 - 140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "GET /phpmyadmin/ HTTP/1.1" 404 - 140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "POST /phpmyadmin/ HTTP/1.1" 404 - 140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "GET /phpmyadmin/ HTTP/1.1" 404 - 140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "POST /phpmyadmin/ HTTP/1.1" 404 - 140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "GET /phpmyadmin/ HTTP/1.1" 404 - 140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "POST /phpmyadmin/ HTTP/1.1" 404 - 140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "GET /phpmyadmin/ HTTP/1.1" 404 - 140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "POST /phpmyadmin/ HTTP/1.1" 404 - 140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "GET /phpmyadmin/ HTTP/1.1" 404 - 140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "POST /phpmyadmin/ HTTP/1.1" 404 - 140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "GET /phpmyadmin/ HTTP/1.1" 404 - 140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "POST /phpmyadmin/ HTTP/1.1" 404 - 140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "GET /phpmyadmin/ HTTP/1.1" 404 - 140.205.201.30 - - [02/Dec/2017:00:19:07 +0800] "POST /phpmyadmin/ HTTP/1.1" 404 - 140.205.201.30 - - [02/Dec/2017:00:19:07 +0800] "GET /phpmyadmin/ HTTP/1.1" 404 - 140.205.201.30 - - [02/Dec/2017:00:19:07 +0800] "POST /phpmyadmin/ HTTP/1.1" 404 - 140.205.201.30 - - [02/Dec/2017:00:19:07 +0800] "GET /phpmyadmin/ HTTP/1.1" 404 - 140.205.201.30 - - [02/Dec/2017:00:19:07 +0800] "POST /phpmyadmin/ HTTP/1.1" 404 - 140.205.201.30 - - [02/Dec/2017:00:19:09 +0800] "GET /ganglia/index.php HTTP/1.1" 404 - 164.132.91.1 - - [02/Dec/2017:00:22:21 +0800] "GET / HTTP/1.1" 404 - 114.215.45.101 - - [02/Dec/2017:00:23:43 +0800] "GET / HTTP/1.1" 404 - 140.205.201.30 - - [02/Dec/2017:00:32:41 +0800] "GET /index.php HTTP/1.1" 404 - 140.205.201.30 - - [02/Dec/2017:00:39:08 +0800] "GET /jobs/ HTTP/1.1" 404 -
[root@iz8vbbqbnh4ug2q9so5jflz logs]# cat localhost_access_log.2017-12-02.txt |cut -d ' ' -f 6 "GET "GET "GET "POST "GET "POST "GET "POST "GET "POST "GET "POST "GET "POST "GET "POST "GET "POST "GET "POST "GET "POST "GET "GET "GET "GET "GET
可以指定更多的列:
[root@iz8vbbqbnh4ug2q9so5jflz logs]# cat localhost_access_log.2017-12-02.txt |cut -d ' ' -f 2,3,4 - - [02/Dec/2017:00:15:24 - - [02/Dec/2017:00:17:51 - - [02/Dec/2017:00:19:06 - - [02/Dec/2017:00:19:06 - - [02/Dec/2017:00:19:06 - - [02/Dec/2017:00:19:06 - - [02/Dec/2017:00:19:06 - - [02/Dec/2017:00:19:06 - - [02/Dec/2017:00:19:06 - - [02/Dec/2017:00:19:06 - - [02/Dec/2017:00:19:06 - - [02/Dec/2017:00:19:06 - - [02/Dec/2017:00:19:06 - - [02/Dec/2017:00:19:06 - - [02/Dec/2017:00:19:06 - - [02/Dec/2017:00:19:06 - - [02/Dec/2017:00:19:06 - - [02/Dec/2017:00:19:07 - - [02/Dec/2017:00:19:07 - - [02/Dec/2017:00:19:07 - - [02/Dec/2017:00:19:07 - - [02/Dec/2017:00:19:07 - - [02/Dec/2017:00:19:09 - - [02/Dec/2017:00:22:21 - - [02/Dec/2017:00:23:43 - - [02/Dec/2017:00:32:41 - - [02/Dec/2017:00:39:08
[root@iz8vbbqbnh4ug2q9so5jflz logs]# cat localhost_access_log.2017-12-02.txt |cut -d ' ' -f 2,3,6- - - "GET / HTTP/1.1" 404 - - - "GET /rs-status HTTP/1.1" 404 - - - "GET /phpmyadmin/ HTTP/1.1" 404 - - - "POST /phpmyadmin/ HTTP/1.1" 404 - - - "GET /phpmyadmin/ HTTP/1.1" 404 - - - "POST /phpmyadmin/ HTTP/1.1" 404 - - - "GET /phpmyadmin/ HTTP/1.1" 404 - - - "POST /phpmyadmin/ HTTP/1.1" 404 - - - "GET /phpmyadmin/ HTTP/1.1" 404 - - - "POST /phpmyadmin/ HTTP/1.1" 404 - - - "GET /phpmyadmin/ HTTP/1.1" 404 - - - "POST /phpmyadmin/ HTTP/1.1" 404 - - - "GET /phpmyadmin/ HTTP/1.1" 404 - - - "POST /phpmyadmin/ HTTP/1.1" 404 - - - "GET /phpmyadmin/ HTTP/1.1" 404 - - - "POST /phpmyadmin/ HTTP/1.1" 404 - - - "GET /phpmyadmin/ HTTP/1.1" 404 - - - "POST /phpmyadmin/ HTTP/1.1" 404 - - - "GET /phpmyadmin/ HTTP/1.1" 404 - - - "POST /phpmyadmin/ HTTP/1.1" 404 - - - "GET /phpmyadmin/ HTTP/1.1" 404 - - - "POST /phpmyadmin/ HTTP/1.1" 404 - - - "GET /ganglia/index.php HTTP/1.1" 404 - - - "GET / HTTP/1.1" 404 - - - "GET / HTTP/1.1" 404 - - - "GET /index.php HTTP/1.1" 404 - - - "GET /jobs/ HTTP/1.1" 404 -
sort 对列进行排序
例如,对tomcat访问日志,对请求响应返回大小进行排序:
cat localhost_access_log.2017-12-01.txt |sort -t ' ' -k 10
-t : 指定分隔符
-k : 指定排序的列
114.241.108.197 - - [01/Dec/2017:09:03:45 +0800] "GET /js/plugin/jquery-file-upload/js/vendor/tmpl.min.js HTTP/1.1" 200 977 114.241.108.197 - - [01/Dec/2017:11:45:30 +0800] "GET /js/plugin/jquery-file-upload/js/vendor/tmpl.min.js HTTP/1.1" 200 977 114.241.108.197 - - [01/Dec/2017:14:41:04 +0800] "GET /js/plugin/jquery-file-upload/js/vendor/tmpl.min.js HTTP/1.1" 200 977 223.72.82.98 - - [01/Dec/2017:15:26:10 +0800] "GET /js/plugin/jquery-file-upload/js/vendor/tmpl.min.js HTTP/1.1" 200 977 59.108.217.106 - - [01/Dec/2017:09:35:17 +0800] "GET /js/plugin/jquery-file-upload/js/vendor/tmpl.min.js HTTP/1.1" 200 977 59.108.217.106 - - [01/Dec/2017:13:08:46 +0800] "GET /js/plugin/jquery-file-upload/js/vendor/tmpl.min.js HTTP/1.1" 200 977 114.241.108.197 - - [01/Dec/2017:09:03:32 +0800] "GET /img/logo-pale.png HTTP/1.1" 200 9775 114.241.108.197 - - [01/Dec/2017:11:28:29 +0800] "GET /img/logo-pale.png HTTP/1.1" 200 9775 114.241.108.197 - - [01/Dec/2017:14:40:51 +0800] "GET /img/logo-pale.png HTTP/1.1" 200 9775 223.72.82.98 - - [01/Dec/2017:15:26:03 +0800] "GET /img/logo-pale.png HTTP/1.1" 200 9775 59.108.217.106 - - [01/Dec/2017:09:35:01 +0800] "GET /img/logo-pale.png HTTP/1.1" 200 9775 59.108.217.106 - - [01/Dec/2017:09:35:10 +0800] "GET /img/logo-pale.png HTTP/1.1" 200 9775 59.108.217.106 - - [01/Dec/2017:13:08:52 +0800] "GET /img/logo-pale.png HTTP/1.1" 200 9775 114.241.108.197 - - [01/Dec/2017:12:00:15 +0800] "GET /interview/detail.do?manageKey=15ba76c6fbeeccd2f8df875379ac88e9&targetPanel=dialog HTTP/1.1" 200 9952 59.108.217.106 - - [01/Dec/2017:16:44:53 +0800] "GET /interview/detail.do?manageKey=15ba76c6fbeeccd2f8df875379ac88e9&targetPanel=dialog HTTP/1.1" 200 9952 59.108.217.106 - - [01/Dec/2017:16:44:57 +0800] "GET /interview/detail.do?manageKey=15ba76c6fbeeccd2f8df875379ac88e9&targetPanel=dialog HTTP/1.1" 200 9952
排序是由方向的,默认是升序排序,如果要降序排列,可以在列号后面增加一个r:
cat localhost_access_log.2017-12-01.txt |sort -t ' ' -k 10r
最后要注意的是,这里的排序默认是按字符串的字典顺序排列的,如果要按其数值拍,则需要增加一个n:
cat localhost_access_log.2017-12-01.txt |sort -t ' ' -k 10n
114.241.108.197 - - [01/Dec/2017:09:03:28 +0800] "GET /css/smartadmin-production.css HTTP/1.1" 200 394554 114.241.108.197 - - [01/Dec/2017:11:28:29 +0800] "GET /css/smartadmin-production.css HTTP/1.1" 200 394554 114.241.108.197 - - [01/Dec/2017:14:40:49 +0800] "GET /css/smartadmin-production.css HTTP/1.1" 200 394554 223.72.82.98 - - [01/Dec/2017:15:25:59 +0800] "GET /css/smartadmin-production.css HTTP/1.1" 200 394554 59.108.217.106 - - [01/Dec/2017:09:34:56 +0800] "GET /css/smartadmin-production.css HTTP/1.1" 200 394554 59.108.217.106 - - [01/Dec/2017:09:35:06 +0800] "GET /css/smartadmin-production.css HTTP/1.1" 200 394554 59.108.217.106 - - [01/Dec/2017:13:08:43 +0800] "GET /css/smartadmin-production.css HTTP/1.1" 200 394554 112.65.193.14 - - [01/Dec/2017:11:28:44 +0800] "GET /js/jqueryui/1.10.3/jquery-ui.min.js HTTP/1.1" 200 435844 114.241.108.197 - - [01/Dec/2017:09:03:30 +0800] "GET /js/jqueryui/1.10.3/jquery-ui.min.js HTTP/1.1" 200 435844 114.241.108.197 - - [01/Dec/2017:11:28:33 +0800] "GET /js/jqueryui/1.10.3/jquery-ui.min.js HTTP/1.1" 200 435844 114.241.108.197 - - [01/Dec/2017:14:40:49 +0800] "GET /js/jqueryui/1.10.3/jquery-ui.min.js HTTP/1.1" 200 435844 223.72.82.98 - - [01/Dec/2017:15:26:01 +0800] "GET /js/jqueryui/1.10.3/jquery-ui.min.js HTTP/1.1" 200 435844 59.108.217.106 - - [01/Dec/2017:09:34:56 +0800] "GET /js/jqueryui/1.10.3/jquery-ui.min.js HTTP/1.1" 200 435844 59.108.217.106 - - [01/Dec/2017:09:35:06 +0800] "GET /js/jqueryui/1.10.3/jquery-ui.min.js HTTP/1.1" 200 435844 59.108.217.106 - - [01/Dec/2017:13:08:43 +0800] "GET /js/jqueryui/1.10.3/jquery-ui.min.js HTTP/1.1" 200 435844
由此可见,此网站最大的静态资源是这个jquery-ui.min.js文件。
uniq去重
cat localhost_access_log.2017-12-01.txt |cut -d ' ' -f 1,10 |sort -t ' ' -k 2n,1|uniq
223.72.82.98 61692 59.108.217.106 61692 114.241.108.197 95786 223.72.82.98 95786 59.108.217.106 95786 114.241.108.197 116060 223.72.82.98 116060 59.108.217.106 116060 112.65.193.14 284394 114.241.108.197 284394 223.72.82.98 284394 59.108.217.106 284394 114.241.108.197 394554 223.72.82.98 394554 59.108.217.106 394554 112.65.193.14 435844 114.241.108.197 435844 223.72.82.98 435844 59.108.217.106 435844
wc统计
[root@iZ25klm6k7uZ logs]# wc -l localhost_access_log.2017-12-01.txt 统计行数 1967 localhost_access_log.2017-12-01.txt [root@iZ25klm6k7uZ logs]# wc -w localhost_access_log.2017-12-01.txt 统计词数 19670 localhost_access_log.2017-12-01.txt [root@iZ25klm6k7uZ logs]# wc -m localhost_access_log.2017-12-01.txt 共计字符数 219011 localhost_access_log.2017-12-01.txt [root@iZ25klm6k7uZ logs]#
sed正则查找
用sed来查找500的日志信息:
[root@iZ25klm6k7uZ logs]# sed -n '/500/p' localhost_access_log.2017-12-01.txt 119.127.17.97 - - [01/Dec/2017:14:23:18 +0800] "POST /interview/add.do HTTP/1.1" 500 19582 119.127.17.97 - - [01/Dec/2017:14:23:24 +0800] "POST /interview/add.do HTTP/1.1" 500 19582 119.127.17.97 - - [01/Dec/2017:14:24:12 +0800] "POST /interview/add.do HTTP/1.1" 500 19582 119.127.17.97 - - [01/Dec/2017:14:31:11 +0800] "POST /interview/add.do HTTP/1.1" 500 19582 119.127.17.97 - - [01/Dec/2017:14:49:51 +0800] "POST /interview/add.do HTTP/1.1" 500 19582 119.127.17.97 - - [01/Dec/2017:14:49:57 +0800] "POST /interview/add.do HTTP/1.1" 500 19582 119.127.17.97 - - [01/Dec/2017:14:55:45 +0800] "POST /interview/add.do HTTP/1.1" 500 19582 119.127.17.97 - - [01/Dec/2017:14:58:03 +0800] "POST /interview/add.do HTTP/1.1" 500 19582 59.108.217.106 - - [01/Dec/2017:15:00:22 +0800] "POST /interview/add.do HTTP/1.1" 500 19582
注意:-n和-p配合,表示只打印匹配的行。
awk正则匹配
用awk来查找500日志信息:
awk '($9 ~ /500/)' localhost_access_log.2017-12-01.txt
输出和上面的sed一样。
zwk有默认的分隔符,比如 ,空格等。如果要指定分隔符可以用-F。
zwk的强大之处在于它支持编程,格式如下:
awk pattern { action } 例如上面的查找500日志可以完整表达如下:
[root@iZ25klm6k7uZ logs]# awk -F ' ' '($9 ~ /500/){print }' localhost_access_log.2017-12-01.txt 119.127.17.97 - - [01/Dec/2017:14:23:18 +0800] "POST /interview/add.do HTTP/1.1" 500 19582 119.127.17.97 - - [01/Dec/2017:14:23:24 +0800] "POST /interview/add.do HTTP/1.1" 500 19582 119.127.17.97 - - [01/Dec/2017:14:24:12 +0800] "POST /interview/add.do HTTP/1.1" 500 19582 119.127.17.97 - - [01/Dec/2017:14:31:11 +0800] "POST /interview/add.do HTTP/1.1" 500 19582 119.127.17.97 - - [01/Dec/2017:14:49:51 +0800] "POST /interview/add.do HTTP/1.1" 500 19582 119.127.17.97 - - [01/Dec/2017:14:49:57 +0800] "POST /interview/add.do HTTP/1.1" 500 19582 119.127.17.97 - - [01/Dec/2017:14:55:45 +0800] "POST /interview/add.do HTTP/1.1" 500 19582 119.127.17.97 - - [01/Dec/2017:14:58:03 +0800] "POST /interview/add.do HTTP/1.1" 500 19582 59.108.217.106 - - [01/Dec/2017:15:00:22 +0800] "POST /interview/add.do HTTP/1.1" 500 19582
同时查找500和404的日志:
awk -F ' ' '($9 ~ /500/ || $9 ~ /404/){print $1,$6,$7,$9}' localhost_access_log.2017-12-01.txt
或者
awk -F ' ' '($9 ~ /500|404|400/){print $1,"-",$4,"-",$6,"-",$9}' localhost_access_log.2017-12-01.txt