flume提供fail over和load balance功能
1.添加collector配置(配置两个collector)
# Name the components on this agent
s1.sources = r1
s1.sinks = k1
s1.channels = c1
# Describe/configure the source
s1.sources.r1.type = avro #设置source类型,固定avro
s1.sources.r1.bind = node2 #设置绑定的hostname,agent会上传数据到这个hostname的端口
s1.sources.r1.port = 52020 #设置port
s1.sources.r1.interceptors = avroSerializeInterceptor
s1.sources.r1.interceptors.avroSerializeInterceptor.type = AvroSerializeInterceptor$Builder
#a1.sources.r1.port = 44444
# Describe the sink
s1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
s1.sinks.k1.kafka.topic = tp002
s1.sinks.k1.kafka.bootstrap.servers = 192.168.0.118:9092,192.168.0.118:9093,192.168.0.118:9094
s1.sinks.k1.kafka.flumeBatchSize = 20
s1.sinks.k1.kafka.producer.acks = 1
s1.sinks.k1.kafka.producer.linger.ms = 1
s1.sinks.k1.kafka.producer.compression.type = snappy
# Use a channel which buffers events in memory
s1.channels.c1.type = memory
s1.channels.c1.capacity = 1000
s1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
s1.sources.r1.channels = c1
s1.sinks.k1.channel = c1
2.添加agent配置
# Name the components on this agent
a1.sources = r1
a1.sinks = k1 k2 #设置多个sink
a1.channels = c1
a1.sinkgroups = g1 #设置sinkgroup,为配置load balance或者failover做准备
# Describe/configure the source
a1.sources.r1.channels = c1
a1.sources.r1.type = exec
a1.sources.r1.command = tail -f /tmp/test.log
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Describe the sink
a1.sinks.k1.channel = c1
a1.sinks.k1.type = avro
a1.sinks.k1.hostname = node2 #设置要上传数据的hostname和端口,对应collector中的配置
a1.sinks.k1.port = 52020
a1.sinks.k2.channel = c1
a1.sinks.k2.type = avro
a1.sinks.k2.hostname = node2 #同上
a1.sinks.k2.port = 52021
# set sink group
a1.sinkgroups.g1.sinks = k1 k2 #设置group中的sink
# set group
a1.sinkgroups.g1.processor.type = failover #设置sinkgroup处理类型为fail over,取值类型为default,failover,load_balance
a1.sinkgroups.g1.processor.priority.k1 = 1 #设置sink权重
a1.sinkgroups.g1.processor.priority.k2 = 2
a1.sinkgroups.g1.processor.maxpenalty = 10000
Load balance配置
启动collector和agent会有相关日志