logstash
官网 推荐内存4GB-8GB
配置
[root@logstash config]# cat dspfilterLog.conf
input {
file {
path => "/data/logs/adx*/adx/adx-*/dspfilterLog/dspfilter.log"
start_position => "beginning"
id => "dspfilter"
}
}
filter {
#时区差8小时,需要加上8小时则为中国上海时间
ruby {
code => "event.set('index_data', event.get('@timestamp').time.localtime + 8*60*60)"
}
ruby {
code => "event.set('@timestamp',event.get('index_data'))"
}
mutate {
#add_field => { "mytime" => "%{+yyyyMMddHH}" }
remove_field => [ "path", "host", "type", "@version", "index_data" ]
}
environment {
add_field => { "mytime" => "%{+yyyyMMddHH}" }
}
}
#输出到oss
output {
oss {
"endpoint" => "oss-cn-xxx.aliyuncs.com"
"bucket" => "log"
"prefix" => "dspfilter/%{mytime}"
"recover" => true
"access_key_id" => "xxx"
"access_key_secret" => "xxx"
"rotation_strategy" => "size_and_time"
"size_rotate" => 67108864
"time_rotate" => 10
#"encoding" => "gzip"
codec => json_lines {
charset => "UTF-8"
}
}
}
#输出到kafka
output {
kafka {
codec => "json"
bootstrap_servers => "kafka-01:9092,kafka-02:9092,kafka-03:9092"
topic_id => "md5filter"
acks => "1"
compression_type => "lz4"
partitioner => "round_robin"
batch_size => 65536
linger_ms => 10
max_request_size => 2097152
id => "kafka-md5filter"
}
}
input kafka插件
bootstrap_servers: kafka-01:9092,kafka-02:9092,kafka-03:9092
topics: ["requestLog"]
consumer_threads: 1 #线程数,最好线程数设置为分区数
max_poll_interval_ms #默认5分钟,5分钟消费完poll返回的数据,如果没有消费完,则会发生重平衡,这个值设置大一点
max_poll_records #默认500,单次调用poll时返回的最大消息数量
fetch_max_bytes #默认50m,获取请求返回的最大数据量
fetch_min_bytes #获取请求返回的最小数据量。如果没有足够的数据可用,则累计到设置的最小数据量的数据一起发送。
gourp_id: "logstash" #消费者组标识
heartbeat_interval_ms: 3000 #默认为3秒,此值必须低于session.timeout.ms
max_partition_fetch_bytes: #默认为1m,返回每个分区的最大数据量,这个大小必须与服务器允许的最大消息大小一样大
启动
[root@logstash config]# logstash -f dspfilterLog.conf
配置缓冲队列
[root@logstash config]# cat logstash.yml
queue.type: persisted
queue.max_bytes: 3gb
多管道配置
[root@logstash config]# cat pipelines.yml
- pipeline.id: md5filter
pipeline.workers: 1
path.config: /data/app/logstash/config/alikafka/alikafka-md5filter.conf
- pipeline.id: adxrequest
pipeline.workers: 2
path.config: /data/app/logstash/config/alikafka/alikafka-adxrequest.conf