Home | About | Sematext search-lucene.com search-hadoop.com
 Search Hadoop and all its subprojects:

Switch to Plain View
Flume >> mail # user >> HBaseSink is very slow


Copy link to this message
-
HBaseSink is very slow
Hi,
Could you please guide me the optimum number of log events the HBaseSink can process in a second. Currently my application is generating 5000 log events/second but there is much backlog & it seems HBaseSink is not processing even 300 log events/second.

I have configured a sinkgroup with 5 hbase sink in load_balancing Faison. Would all the 5 hbase sinks in sinkgroup executes in parallel?

Here is my flume-conf.properties file:

agent.sources =     source1 1374869469492 1374947746264 1374947757841 1374947770965 1374948166450 1374948182966 1374948198075 1374948216652 1374948231355 1374948246308 1374948260698
agent.channels = fileChannel
agent.sinks = hbaseSink1 hbaseSink2 hbaseSink3 hbaseSink4 hbaseSink5
agent.sinkgroups = sinkGroup1
agent.sinkgroups.sinkGroup1.sinks = hbaseSink1 hbaseSink2 hbaseSink3 hbaseSink4 hbaseSink5
agent.sinkgroups.sinkGroup1.processor.type = load_balance

# Channel's type is defined.
agent.channels.fileChannel.type = file
agent.channels.fileChannel.checkpointDir = /var/log/flume-ng/file-channel/checkpoint
agent.channels.fileChannel.dataDirs = /var/log/flume-ng/file-channel/data
agent.channels.fileChannel.transactionCapacity = 1000
agent.channels.fileChannel.checkpointInterval = 30000
agent.channels.fileChannel.maxFileSize = 2146435071
agent.channels.fileChannel.minimumRequiredSpace = 524288000
agent.channels.fileChannel.keep-alive = 5
#agent.channels.fileChannel.write-timeout = 10
agent.channels.fileChannel.write-timeout = 50
agent.channels.fileChannel.checkpoint-timeout = 600
agent.channels.fileChannel.capacity = 50000000

#HBaseSink1
agent.sinks.hbaseSink1.type = org.apache.flume.sink.hbase.HBaseSink
agent.sinks.hbaseSink1.table=elf_log
agent.sinks.hbaseSink1.columnFamily=content
agent.sinks.hbaseSink1.serializer=com.citi.sponge.flume.collector.sink.LogHbaseEventSerializer
agent.sinks.hbaseSink1.batchSize=200
agent.sinks.hbaseSink1.channel = fileChannel

#HBaseSink2
agent.sinks.hbaseSink2.type = org.apache.flume.sink.hbase.HBaseSink
agent.sinks.hbaseSink2.table=elf_log
agent.sinks.hbaseSink2.columnFamily=content
agent.sinks.hbaseSink2.serializer=com.citi.sponge.flume.collector.sink.LogHbaseEventSerializer
agent.sinks.hbaseSink2.batchSize=200
agent.sinks.hbaseSink2.channel = fileChannel

#HBaseSink3
agent.sinks.hbaseSink3.type = org.apache.flume.sink.hbase.HBaseSink
agent.sinks.hbaseSink3.table=elf_log
agent.sinks.hbaseSink3.columnFamily=content
agent.sinks.hbaseSink3.serializer=com.citi.sponge.flume.collector.sink.LogHbaseEventSerializer
agent.sinks.hbaseSink3.batchSize=200
agent.sinks.hbaseSink3.channel = fileChannel

#HBaseSink4
agent.sinks.hbaseSink4.type = org.apache.flume.sink.hbase.HBaseSink
agent.sinks.hbaseSink4.table=elf_log
agent.sinks.hbaseSink4.columnFamily=content
agent.sinks.hbaseSink4.serializer=com.citi.sponge.flume.collector.sink.LogHbaseEventSerializer
agent.sinks.hbaseSink4.batchSize=200
agent.sinks.hbaseSink4.channel = fileChannel

#HBaseSink5
agent.sinks.hbaseSink5.type = org.apache.flume.sink.hbase.HBaseSink
agent.sinks.hbaseSink5.table=elf_log
agent.sinks.hbaseSink5.columnFamily=content
agent.sinks.hbaseSink5.serializer=com.citi.sponge.flume.collector.sink.LogHbaseEventSerializer
agent.sinks.hbaseSink5.batchSize=200
agent.sinks.hbaseSink5.channel = fileChannel
agent.sources.1374869469492.batchSize = 1
agent.sources.1374869469492.channels = fileChannel
agent.sources.1374869469492.command = tail -F /var/log/flume-ng/flume.log
agent.sources.1374869469492.interceptors = logIntercept
agent.sources.1374869469492.interceptors.logIntercept.appId = 153299
agent.sources.1374869469492.interceptors.logIntercept.env = regex
agent.sources.1374869469492.interceptors.logIntercept.hostName = vm-e61b-fe34.nam.nsroot.net
agent.sources.1374869469492.interceptors.logIntercept.logFileName = flume.log
agent.sources.1374869469492.interceptors.logIntercept.logFilePath = /var/log/flume-ng/
agent.sources.1374869469492.interceptors.logIntercept.logType = flume log
agent.sources.1374869469492.interceptors.logIntercept.type = com.citi.sponge.flume.agent.source.LogInterceptor$Builder
agent.sources.1374869469492.type = exec

agent.sources.1374947746264.batchSize = 1
agent.sources.1374947746264.channels = fileChannel
agent.sources.1374947746264.command = tail -F /var/log/creditcard/AggKeyListener.0.2013-01-19
agent.sources.1374947746264.interceptors = logIntercept
agent.sources.1374947746264.interceptors.logIntercept.appId = 153299
agent.sources.1374947746264.interceptors.logIntercept.env = testt
agent.sources.1374947746264.interceptors.logIntercept.hostName = vm-e61b-fe34.nam.nsroot.net
agent.sources.1374947746264.interceptors.logIntercept.logFileName = AggKeyListener-0-2013-01-19
agent.sources.1374947746264.interceptors.logIntercept.logFilePath = /var/log/creditcard/
agent.sources.1374947746264.interceptors.logIntercept.logType = creditcard log
agent.sources.1374947746264.interceptors.logIntercept.type = com.citi.sponge.flume.agent.source.LogInterceptor$Builder
agent.sources.1374947746264.type = exec

agent.sources.1374947757841.batchSize = 1
agent.sources.1374947757841.channels = fileChannel
agent.sources.1374947757841.command = tail -F /var/log/creditcard/AggKeyListener.1.2013-01-19
agent.sources.1374947757841.interceptors = logIntercept
agent.sources.1374947757841.interceptors.logIntercept.appId = 153299
agent.sources.1374947757841.interceptors.logIntercept.env = test
agent.sources.1374947757841.interceptors.logIntercept.hostName = vm-e61b-fe34.nam.nsroot.net
agent.sources.1374947757841.interceptors.logIntercept.logFileName = AggKeyListener.1.2013-01-19
agent.sources.1374947757841.interceptors.logIntercept.logFilePath = /var/log/creditcard/
agent.sources.1374947757841.interceptors.logIntercept.logType = creditcard log
agent.sources.1374947757841.interceptors.logIntercept.type = com.citi.sponge.flume.agent.source.LogInterceptor$Builder
agent.sources.1374947757841.type = exec

agent.sources.1374947770965.batchSize = 1
agent.sources.1374947770965.channels = fileCh
+
Jeff Lord 2013-07-29, 22:13