此配置参考了来自http://www.wklken.me/posts/2016/05/24/elk-mysql-slolog.html 做了修改
ELK架构采用 logstash-kafka-logstash-elasticsearch-kibana
第一层logstash只做收集功能,第二层logstash做filter功能
客户端logstash配置:
input { file { path => ["/tmp/mysql_slow.log"] type => "ops_mysql_slow_log" #start_position=>"beginning" add_field => ["env", "PRODUCT"] #超过5秒自动上传,避免没有触发multline功能 codec => multiline { pattern => "^# User@Host:" negate => true what => previous max_bytes => "100kib" auto_flush_interval => 5 } } } output { # stdout {} if [type] == "ops_mysql_slow_log" { kafka{ topic_id => "mysql_slow_all_log" bootstrap_servers => "kafka1:9092" # kafka的地址 compression_type => "gzip" } } }
服务端配置
input { kafka { group_id => "logstash" topic_id => "mysql_slow_all_log" consumer_threads => 15 #decorate_events => true zk_connect => "kafk1:2181" # zookeeper的地址 } } filter { if ("multiline_codec_max_bytes_reached" in [tags]) { drop {} } if [type] =~ ".*_mysql_slow_log"{ grok { # User@Host: logstash[logstash] @ localhost [127.0.0.1] # User@Host: logstash[logstash] @ [127.0.0.1] # User@Host: logstash[logstash] @ localhost [] match => [ "message", "^# User@Host: (?<user>[^[]+)[[^]]+] @ (?<dbhost>[^s]+)? [%{IP:ip}?]" ] } grok { # Query_time: 2.985827 Lock_time: 0.000053 Rows_sent: 0 Rows_examined: 0 match => [ "message", "^# Query_time: %{NUMBER:duration:float}%{SPACE}Lock_time: %{NUMBER:lock_wait:float}%{SPACE}Rows_sent: %{NUMBER:results:int}%{SPACE}Rows_examined:%{SPACE}%{NUMBER:scanned:int}s+(?<sql>[sS]*)"] } # Capture the time the query happened grok { match => [ "message", "^SET timestamp=%{NUMBER:dbtimestamp};" ] } # if codec multiline parse failure #出现# Time: 170807 14:48:27 类似的数据 进行清理 if ("_grokparsefailure" in [tags]) { drop {} } date { match => [ "dbtimestamp", "UNIX" ] } mutate { gsub => [ "sql", " SET timestamp=d+?; ", "", "sql", " use [a-zA-Z0-9-\_]+?;", "", "sql", " # Time: d+s+d+:d+:d+", "", "sql", " /usr/local/mysql/bin/mysqld.+$", "", "sql", " Tcp port:.+$", "", "sql", " Time .+$", "" ] } # calculate unique hash mutate { add_field => {"sql_for_hash" => "%{sql}"} } mutate { gsub => [ "sql_for_hash", "'.+?'", "", "sql_for_hash", "-?d*.{0,1}d+", "" ] } checksum { algorithm => "md5" keys => ["sql_for_hash"] } # Drop the captured timestamp field since it has been moved to the time of the event mutate { # TODO: remove the message field # remove_field => ["timestamp", "message", "sql_for_hash"] remove_field => ["dbtimestamp", "sql_for_hash"] } } } output { if [type] =~ ".*_mysql_slow_log" { elasticsearch { hosts => ["localhost:9200"] codec => "json" workers => 20 index => "logstash-mysql-slow-log-%{+YYYY.MM.dd}" } } }