flume的資料來源是自定義的NIOSource, 寫往兩條線，一條是hdfs,一條是寫往kafaka
1. 示例一：flume-conf.properties
#############################################
c1.sources = c1so1
c1.channels = c1ch1 c1ch2
c1.sinks = c1si1 c1si2

c1.sources.c1so1.type=nio
c1.sources.c1so1.channels = c1ch1 c1ch2
c1.sources.c1so1.selector.type = replicating

#selector.optional 這意味著c1ch2是可選的，向c1ch2寫入失敗會被忽略。但是向c1ch1寫入失敗會出錯,這裡註釋掉，是因為這兩個channel都必須寫入

#c1.sources.c1so1.selector.optional = c1ch2
c1.sources.c1so1.bind = 10.0.15.71
c1.sources.c1so1.port = 8080
c1.sources.c1so1.delimiter=
c1.sources.c1so1.interceptors = i1 i2
c1.sources.c1so1.interceptors.i1.type = timestamp
c1.sources.c1so1.interceptors.i2.type = host

c1.channels.c1ch1.type = memory
c1.channels.c1ch1.keep-alive=30
c1.channels.c1ch1.byteCapacity=0
c1.channels.c1ch1.capacity = 100000
c1.channels.c1ch1.transactionCapacity = 100

c1.channels.c1ch2.type = memory
c1.channels.c1ch2.keep-alive=30
c1.channels.c1ch2.byteCapacity=0
c1.channels.c1ch2.capacity = 100000
c1.channels.c1ch2.transactionCapacity = 100

c1.sinks.c1si1.type = hdfs
c1.sinks.c1si1.channel = c1ch1
c1.sinks.c1si1.hdfs.path = hdfs://cloud40:9000/m2m/data01/%y-%m-%d
c1.sinks.c1si1.hdfs.filePrefix = log-%{host}
c1.sinks.c1si1.hdfs.idleTimeout=30
c1.sinks.c1si1.hdfs.callTimeout=300000
c1.sinks.c1si1.hdfs.rollCount=0
c1.sinks.c1si1.hdfs.rollSize=16000000
c1.sinks.c1si1.hdfs.rollInterval=0
c1.sinks.c1si1.hdfs.round = false
c1.sinks.c1si1.hdfs.roundValue = 10
c1.sinks.c1si1.hdfs.roundUnit = minute
c1.sinks.c1si1.hdfs.fileType = DataStream
c1.sinks.c1si1.hdfs.writeFormat = Text
c1.sinks.c1si1.hdfs.batchSize=100
c1.sinks.c1si1.serializer.appendNewline = true

c1.sinks.c1si2.type = org.apache.flume.plugins.KafkaSink
c1.sinks.c1si2.channel = c1ch2
c1.sinks.c1si2.metadata.broker.list=10.0.15.203:9092,10.0.15.204:9092,10.0.15.206:9092,10.0.15.207:9092,10.0.15.208:9092
c1.sinks.c1si2.serializer.class=kafka.serializer.StringEncoder
c1.sinks.c1si2.key.serializer.class=kafka.serializer.StringEncoder
c1.sinks.c1si2.request.required.acks=0
c1.sinks.c1si2.max.message.size=8000000
c1.sinks.c1si2.producer.type=sync
c1.sinks.c1si2.custom.encoding=UTF-8
c1.sinks.c1si2.custom.topic.name=p2p

#############################################
c2.sources = c2so1
c2.channels = c2ch1 c2ch2
c2.sinks = c2si1 c2si2

c2.sources.c2so1.type=nio
c2.sources.c2so1.channels = c2ch1 c2ch2
c2.sources.c2so1.selector.type = replicating
c2.sources.c2so1.selector.optional = c2ch2
c2.sources.c2so1.bind = 10.0.15.71
c2.sources.c2so1.port = 8081
c2.sources.c2so1.delimiter=
c2.sources.c2so1.interceptors = i1 i2
c2.sources.c2so1.interceptors.i1.type = timestamp
c2.sources.c2so1.interceptors.i2.type = host

c2.channels.c2ch1.type = memory
c2.channels.c2ch1.keep-alive=30
c2.channels.c2ch1.byteCapacity=0
c2.channels.c2ch1.capacity = 100000
c2.channels.c2ch1.transactionCapacity = 100

c2.channels.c2ch2.type = memory
c2.channels.c2ch2.keep-alive=30
c2.channels.c2ch2.byteCapacity=0
c2.channels.c2ch2.capacity = 100000
c2.channels.c2ch2.transactionCapacity = 100

c2.sinks.c2si1.type = hdfs
c2.sinks.c2si1.channel = c2ch1
c2.sinks.c2si1.hdfs.path = hdfs://cloud40:9000/m2m/data02/%y-%m-%d
c2.sinks.c2si1.hdfs.filePrefix = log-%{host}
c2.sinks.c2si1.hdfs.idleTimeout=30
c2.sinks.c2si1.hdfs.callTimeout=300000
c2.sinks.c2si1.hdfs.rollCount=0
c2.sinks.c2si1.hdfs.rollSize=16000000
c2.sinks.c2si1.hdfs.rollInterval=0
c2.sinks.c2si1.hdfs.round = false
c2.sinks.c2si1.hdfs.roundValue = 10
c2.sinks.c2si1.hdfs.roundUnit = minute
c2.sinks.c2si1.hdfs.fileType = DataStream
c2.sinks.c2si1.hdfs.writeFormat = Text
c2.sinks.c2si1.hdfs.batchSize=100
c2.sinks.c2si1.serializer.appendNewline = true

c2.sinks.c2si2.type = org.apache.flume.plugins.KafkaSink
c2.sinks.c2si2.channel = c2ch2
c2.sinks.c2si2.metadata.broker.list=10.0.15.203:9092,10.0.15.204:9092,10.0.15.206:9092,10.0.15.207:9092,10.0.15.208:9092
c2.sinks.c2si2.serializer.class=kafka.serializer.StringEncoder
c2.sinks.c2si2.key.serializer.class=kafka.serializer.StringEncoder
c2.sinks.c2si2.request.required.acks=0
c2.sinks.c2si2.max.message.size=8000000
c2.sinks.c2si2.producer.type=sync
c2.sinks.c2si2.custom.encoding=UTF-8
c2.sinks.c2si2.custom.topic.name=p2p

2. 示例二：flume-conf.properties
#############################################
c1.sources = c1so1
c1.channels = c1ch1 c1ch2
c1.sinks = c1si1 c1si2

c1.sources.c1so1.type=nio
c1.sources.c1so1.channels = c1ch1 c1ch2
c1.sources.c1so1.selector.type = replicating
#c1.sources.c1so1.selector.optional = c1ch2
c1.sources.c1so1.bind = 10.0.15.70
c1.sources.c1so1.port = 8080
c1.sources.c1so1.delimiter=*
c1.sources.c1so1.interceptors = i1 i2
c1.sources.c1so1.interceptors.i1.type = timestamp
c1.sources.c1so1.interceptors.i2.type = host

c1.channels.c1ch1.type = memory
c1.channels.c1ch1.keep-alive=300
c1.channels.c1ch1.byteCapacity=0
c1.channels.c1ch1.capacity = 200000
c1.channels.c1ch1.transactionCapacity = 1000

c1.channels.c1ch2.type = memory
c1.channels.c1ch2.keep-alive=300
c1.channels.c1ch2.byteCapacity=0
c1.channels.c1ch2.capacity = 200000
c1.channels.c1ch2.transactionCapacity = 1000

c1.sinks.c1si1.type = hdfs
c1.sinks.c1si1.channel = c1ch1
c1.sinks.c1si1.hdfs.path = hdfs://cloud40:9000/m2m/default/%y-%m-%d
c1.sinks.c1si1.hdfs.filePrefix = log-%{host}
c1.sinks.c1si1.hdfs.idleTimeout=30
c1.sinks.c1si1.hdfs.callTimeout=300000
c1.sinks.c1si1.hdfs.rollCount=0
c1.sinks.c1si1.hdfs.rollSize=512000000
c1.sinks.c1si1.hdfs.rollInterval=0
c1.sinks.c1si1.hdfs.round = false
c1.sinks.c1si1.hdfs.roundValue = 10
c1.sinks.c1si1.hdfs.roundUnit = minute
c1.sinks.c1si1.hdfs.fileType = DataStream
c1.sinks.c1si1.hdfs.writeFormat = Text
c1.sinks.c1si1.hdfs.batchSize=1000
c1.sinks.c1si1.serializer.appendNewline = true

c1.sinks.c1si2.type = org.apache.flume.plugins.KafkaSink
c1.sinks.c1si2.channel = c1ch2
c1.sinks.c1si2.metadata.broker.list=10.0.15.203:9092,10.0.15.204:9092,10.0.15.206:9092,10.0.15.207:9092,10.0.15.208:9092
c1.sinks.c1si2.serializer.class=kafka.serializer.StringEncoder
c1.sinks.c1si2.key.serializer.class=kafka.serializer.StringEncoder
c1.sinks.c1si2.request.required.acks=0
c1.sinks.c1si2.max.message.size=8000000
c1.sinks.c1si2.producer.type=sync
c1.sinks.c1si2.custom.encoding=UTF-8
c1.sinks.c1si2.custom.topic.name=m2m

#############################################
c2.sources = c2so1
c2.channels = c2ch1 c2ch2
c2.sinks = c2si1 c2si2

c2.sources.c2so1.type=nio
c2.sources.c2so1.channels = c2ch1 c2ch2
c2.sources.c2so1.selector.type = replicating
#c2.sources.c2so1.selector.optional = c2ch2
c2.sources.c2so1.bind = 10.0.15.70
c2.sources.c2so1.port = 8081
c2.sources.c2so1.delimiter=*
c2.sources.c2so1.interceptors = i1 i2
c2.sources.c2so1.interceptors.i1.type = timestamp
c2.sources.c2so1.interceptors.i2.type = host

c2.channels.c2ch1.type = memory
c2.channels.c2ch1.keep-alive=300
c2.channels.c2ch1.byteCapacity=0
c2.channels.c2ch1.capacity = 200000
c2.channels.c2ch1.transactionCapacity = 1000

c2.channels.c2ch2.type = memory
c2.channels.c2ch2.keep-alive=300
c2.channels.c2ch2.byteCapacity=0
c2.channels.c2ch2.capacity = 200000
c2.channels.c2ch2.transactionCapacity = 1000

c2.sinks.c2si1.type = hdfs
c2.sinks.c2si1.channel = c2ch1
c2.sinks.c2si1.hdfs.path = hdfs://cloud40:9000/m2m/data03/%y-%m-%d
c2.sinks.c2si1.hdfs.filePrefix = log-%{host}
c2.sinks.c2si1.hdfs.idleTimeout=30
c2.sinks.c2si1.hdfs.callTimeout=300000
c2.sinks.c2si1.hdfs.rollCount=0
c2.sinks.c2si1.hdfs.rollSize=512000000
c2.sinks.c2si1.hdfs.rollInterval=0
c2.sinks.c2si1.hdfs.round = false
c2.sinks.c2si1.hdfs.roundValue = 10
c2.sinks.c2si1.hdfs.roundUnit = minute
c2.sinks.c2si1.hdfs.fileType = DataStream
c2.sinks.c2si1.hdfs.writeFormat = Text
c2.sinks.c2si1.hdfs.batchSize=1000
c2.sinks.c2si1.serializer.appendNewline = true

c2.sinks.c2si2.type = org.apache.flume.plugins.KafkaSink
c2.sinks.c2si2.channel = c2ch2
c2.sinks.c2si2.metadata.broker.list=10.0.15.203:9092,10.0.15.204:9092,10.0.15.206:9092,10.0.15.207:9092,10.0.15.208:9092
c2.sinks.c2si2.serializer.class=kafka.serializer.StringEncoder
c2.sinks.c2si2.key.serializer.class=kafka.serializer.StringEncoder
c2.sinks.c2si2.request.required.acks=0
c2.sinks.c2si2.max.message.size=8000000
c2.sinks.c2si2.producer.type=sync
c2.sinks.c2si2.custom.encoding=UTF-8
c2.sinks.c2si2.custom.topic.name=m2m

啟動方式：
nohup /opt/apache-flume-1.4.0-bin/bin/flume-ng agent -c /opt/apache-flume-1.4.0-bin/conf/ -n c1 -f /opt/apache-flume-1
.4.0-bin/conf/flume-conf.properties &

nohup /opt/apache-flume-1.4.0-bin/bin/flume-ng agent -c /opt/apache-flume-1.4.0-bin/conf/ -n c2 -f /opt/apache-flume-1
.4.0-bin/conf/flume-conf.properties &

flume HdfsEventSink KafkaSink配置

相關文章