• Flume基本操作


    Flume基本操作
     
    1.把Telnet产生的内容写入到控制台显示

    bin/flume-ng agent 
    -c conf 
    -n a1 
    -f conf/a1.conf 
    -Dflume.root.logger=DEBUG,console 


    a.conf内容如下:

    ##### define agent name ####
    a1.sources = src1
    a1.channels = channel1
    a1.sinks = sink1
     
    ####  define source  ####
    a1.sources.src1.type = netcat
    a1.sources.src1.bind = haoguan-HP-Compaq-Pro-6380-MT
    a1.sources.src1.port = 44444
     
    ####  define channel  ####
    a1.channels.channel1.type = memory
    a1.channels.channel1.capacity = 1000
    a1.channels.channel1.transactionCapacity = 100
     
    ####  define sink  ####
    a1.sinks.sink1.type = logger
    a1.sinks.sink1.maxBytesToLog = 1024
     
    #### bind the source and sink to the channel
    a1.sources.src1.channels = channel1
    a1.sinks.sink1.channel = channel1  

    2.把hive中产生的log写入到hdfs

    bin/flume-ng agent 
    -c conf 
    -n a2 
    -f conf/flume-hive.conf 
    -Dflume.root.logger=DEBUG,console 

     
    flume-hive.conf内容如下:

    ##### define agent name ####
    a2.sources = src2
    a2.channels = channel2
    a2.sinks = sink2
     
    ####  define source  ####
    a2.sources.src2.type = exec
    a2.sources.src2.command = tail -f /opt/modules/cdh/hive-0.13.1-cdh5.3.6/log/hive.log
    a2.sources.src2.shell = /bin/bash -c
     
    ####  define channel  ####
    a2.channels.channel2.type = memory
    a2.channels.channel2.capacity = 1000
    a2.channels.channel2.transactionCapacity = 100
     
    ####  define sink  ####
    a2.sinks.sink2.type = hdfs
    a2.sinks.sink2.hdfs.path = hdfs://haoguan-HP-Compaq-Pro-6380-MT:9000/flume_hive_log/%Y%m%d  #可以指定时间戳作为分区目录
    a2.sinks.sink2.hdfs.filePrefix = events-
    a2.sinks.sink2.hdfs.fileType = DataStream
    a2.sinks.sink2.hdfs.writeFormat = Text
    a2.sinks.sink2.hdfs.batchSize = 10
    a2.sinks.sink2.hdfs.rollInterval = 30      #设置flush间隔,30秒flush一次,无论有没到达rollSize大小
    a2.sinks.sink2.hdfs.rollSize = 10240       #设置文件大小(byte),到指定大小flush一次,无论有没到达rollInterval间隔
    a2.sinks.sink2.hdfs.rollCount = 0          #rollCount必须设置成0,不然会影响rollInterval,rollSize的设置
    a2.sinks.sink2.hdfs.idleTimeout=0
    a2.sinks.sink2.hdfs.useLocalTimeStamp = true    #使用时间戳作为分区必须设置useLocalTimeStamp为true
     
    #### bind the source and sink to the channel
    a2.sources.src2.channels = channel2
    a2.sinks.sink2.channel = channel2  

     
    如果是HA架构需要把HA的core-site.xml与hdfs-site.xml放入到/opt/modules/cdh/flume-1.5.0-cdh5.3.6/conf中
    a2.sinks.sink2.hdfs.path = hdfs://haoguan-HP-Compaq-Pro-6380-MT:9000/flume_hive_log
    换成
    a2.sinks.sink2.hdfs.path = hdfs://ns1/flume_hive_log
     
     
    3. spooldir方式抽取文件到hdfs中

    bin/flume-ng agent 
    -c conf 
    -n a3 
    -f conf/flume-app.conf 
    -Dflume.root.logger=DEBUG,console 

     
    flume-app.conf内容如下:

    ##### define agent name ####
    a3.sources = src3
    a3.channels = channel3
    a3.sinks = sink3
     
    ####  define source  ####
    a3.sources.src3.type = spooldir
    a3.sources.src3.spoolDir = /opt/modules/cdh/flume-1.5.0-cdh5.3.6/spoollogs #指定被抽取的文件夹
    a3.sources.src3.ignorePattern = ^.*.log$  #过滤被抽取文件夹中指定的文件
    a3.sources.src3.fileSuffix = _COMP         #文件抽取完成以后更改后缀
     
    ####  define channel  ####
    a3.channels.channel3.type = file
    a3.channels.channel3.checkpointDir = /opt/modules/cdh/flume-1.5.0-cdh5.3.6/filechannel/checkpoint
    a3.channels.channel3.dataDirs = /opt/modules/cdh/flume-1.5.0-cdh5.3.6/filechannel/data
    a3.channels.channel3.capacity = 1000
    a3.channels.channel3.transactionCapacity = 100
     
    ####  define sink  ####
    a3.sinks.sink3.type = hdfs
    a3.sinks.sink3.hdfs.path = hdfs://haoguan-HP-Compaq-Pro-6380-MT:9000/flume_app_log
    a3.sinks.sink3.hdfs.filePrefix = events-
    a3.sinks.sink3.hdfs.fileType = DataStream
    a3.sinks.sink3.hdfs.writeFormat = Text
    a3.sinks.sink3.hdfs.batchSize = 10
    a3.sinks.sink3.hdfs.rollInterval = 30
    a3.sinks.sink3.hdfs.rollSize = 10240
    a3.sinks.sink3.hdfs.rollCount = 0
    a3.sinks.sink3.hdfs.idleTimeout=0
    #a3.sinks.sink3.hdfs.useLocalTimeStamp = true
     
    #### bind the source and sink to the channel
    a3.sources.src3.channels = channel3
    a3.sinks.sink3.channel = channel3  



  • 相关阅读:
    构建之法阅读笔记05
    构建之法阅读笔记04
    构建之法阅读笔记03
    学习进度条
    软件工程练习——买书
    软件工程练习——找水王2
    Java作业07
    Java课堂作业06
    读《大道至简》第六章有感
    Java课堂作业05
  • 原文地址:https://www.cnblogs.com/guanhao/p/5629720.html
Copyright © 2020-2023  润新知