kafka+storm+hbase
kafka+storm+hbase實現計算WordCount。
(1)表名:wc
(2)列族:result
(3)RowKey:word
(4)Field:count
1、 解決:
( 1 )第一步:首先準備 kafka 、 storm 和 hbase 相關 jar 包。 依賴如下 :
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175 |
<project xmlns=
"http://maven.apache.org/POM/4.0.0"
xmlns:xsi=
"http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation=
"http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
>
<modelVersion>
4.0
.
0
</modelVersion>
<groupId>com</groupId>
<artifactId>kafkaSpout</artifactId>
<version>
0.0
.
1
-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-core</artifactId>
<version>
0.9
.
3
</version>
</dependency>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-kafka</artifactId>
<version>
0.9
.
3
</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.
10
</artifactId>
<version>
0.8
.
1.1
</version>
<exclusions>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>
0.99
.
2
</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
<version>
2.5
.
0
</version>
</dependency>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-framework</artifactId>
<version>
2.5
.
0
</version>
<exclusions>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
<version>
1.7
</version>
<scope>system</scope>
<systemPath>C:\Program Files\Java\jdk1.
7
.0_51\lib\tools.jar</systemPath>
</dependency>
</dependencies>
<repositories>
<repository>
<id>central</id>
<url>http:
//repo1.maven.org/maven2/</url>
<snapshots>
<enabled>
false
</enabled>
</snapshots>
<releases>
<enabled>
true
</enabled>
</releases>
</repository>
<repository>
<id>clojars</id>
<url>https:
//clojars.org/repo/</url>
<snapshots>
<enabled>
true
</enabled>
</snapshots>
<releases>
<enabled>
true
</enabled>
</releases>
</repository>
<repository>
<id>scala-tools</id>
<url>http:
//scala-tools.org/repo-releases</url>
<snapshots>
<enabled>
true
</enabled>
</snapshots>
<releases>
<enabled>
true
</enabled>
</releases>
</repository>
<repository>
<id>conjars</id>
<url>http:
//conjars.org/repo/</url>
<snapshots>
<enabled>
true
</enabled>
</snapshots>
<releases>
<enabled>
true
</enabled>
</releases>
</repository>
</repositories>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>
3.1
</version>
<configuration>
<source>
1.6
</source>
<target>
1.6
</target>
<encoding>UTF-
8
</encoding>
<showDeprecation>
true
</showDeprecation>
<showWarnings>
true
</showWarnings>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<mainClass></mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>
package
</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build> </project> |
(2) 將 kafka 發來的資料通過 levelSplit 的 bolt 進行分割處理,然後再傳送到下一個 Bolt 中。程式碼如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 |
package
com.kafka.spout; import
java.util.regex.Matcher; import
java.util.regex.Pattern; import
backtype.storm.topology.BasicOutputCollector; import
backtype.storm.topology.OutputFieldsDeclarer; import
backtype.storm.topology.base.BaseBasicBolt; import
backtype.storm.tuple.Fields; import
backtype.storm.tuple.Tuple; import
backtype.storm.tuple.Values; public
class
LevelSplit
extends
BaseBasicBolt {
public
void
execute(Tuple tuple, BasicOutputCollector collector) {
String words = tuple.getString(
0
).toString();
//the cow jumped over the moon
String []va=words.split(
" "
);
for
(String word : va)
{
collector.emit(
new
Values(word));
}
}
public
void
declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(
new
Fields(
"word"
));
} } |
(3) 將levelSplit 的Bolt 發來的資料到levelCount 的Bolt 中進行計數處理,然後傳送到hbase (Bolt )中。程式碼如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39 |
package
com.kafka.spout; import
java.util.HashMap; import
java.util.Map; import
java.util.Map.Entry; import
backtype.storm.topology.BasicOutputCollector; import
backtype.storm.topology.OutputFieldsDeclarer; import
backtype.storm.topology.base.BaseBasicBolt; import
backtype.storm.tuple.Fields; import
backtype.storm.tuple.Tuple; import
backtype.storm.tuple.Values; public
class
LevelCount
extends
BaseBasicBolt {
Map<String, Integer> counts =
new
HashMap<String, Integer>();
public
void
execute(Tuple tuple, BasicOutputCollector collector) {
// TODO Auto-generated method stub
String word = tuple.getString(
0
);
Integer count = counts.get(word);
if
(count ==
null
)
count =
0
;
count++;
counts.put(word, count);
for
(Entry<String, Integer> e : counts.entrySet()) {
//sum += e.getValue();
System.out.println(e.getKey()
+
"----------->"
+e.getValue());
}
collector.emit(
new
Values(word, count));
}
public
void
declareOutputFields(OutputFieldsDeclarer declarer) {
// TODO Auto-generated method stub
declarer.declare(
new
Fields(
"word"
,
"count"
));
} } |
(4) 準備連線 kafka 和 hbase 條件以及 設定整個拓撲結構並且提交拓撲。程式碼如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77 |
package
com.kafka.spout; import
java.util.HashMap; import
java.util.Map; import
com.google.common.collect.Maps; //import org.apache.storm.guava.collect.Maps; import
backtype.storm.Config; import
backtype.storm.LocalCluster; import
backtype.storm.StormSubmitter; import
backtype.storm.generated.AlreadyAliveException; import
backtype.storm.generated.InvalidTopologyException; import
backtype.storm.spout.SchemeAsMultiScheme; import
backtype.storm.topology.TopologyBuilder; import
backtype.storm.tuple.Fields; import
backtype.storm.utils.Utils; import
storm.kafka.BrokerHosts; import
storm.kafka.KafkaSpout; import
storm.kafka.SpoutConfig; import
storm.kafka.ZkHosts; public
class
StormKafkaTopo {
public
static
void
main(String[] args) {
BrokerHosts brokerHosts =
new
ZkHosts(
"zeb,yjd,ylh"
);
SpoutConfig spoutConfig =
new
SpoutConfig(brokerHosts,
"yjd"
,
"/storm"
,
"kafkaspout"
);
Config conf =
new
Config();
spoutConfig.scheme =
new
SchemeAsMultiScheme(
new
MessageScheme());
SimpleHBaseMapper mapper =
new
SimpleHBaseMapper();
mapper.withColumnFamily(
"result"
);
mapper.withColumnFields(
new
Fields(
"count"
));
mapper.withRowKeyField(
"word"
);
Map<String, Object> map = Maps.newTreeMap();
map.put(
"hbase.rootdir"
,
"hdfs://zeb:9000/hbase"
);
map.put(
"hbase.zookeeper.quorum"
,
"zeb:2181,yjd:2181,ylh:2181"
);
// hbase-bolt
HBaseBolt hBaseBolt =
new
HBaseBolt(
"wc"
, mapper).withConfigKey(
"hbase.conf"
);
conf.setDebug(
true
);
conf.put(
"hbase.conf"
, map);
TopologyBuilder builder =
new
TopologyBuilder();
builder.setSpout(
"spout"
,
new
KafkaSpout(spoutConfig));
builder.setBolt(
"split"
,
new
LevelSplit(),
1
).shuffleGrouping(
"spout"
);
builder.setBolt(
"count"
,
new
LevelCount(),
1
).fieldsGrouping(
"split"
,
new
Fields(
"word"
));
builder.setBolt(
"hbase"
, hBaseBolt,
1
).shuffleGrouping(
"count"
);
if
(args !=
null
&& args.length >
0
) {
//提交到叢集執行
try
{
StormSubmitter.submitTopology(args[
0
], conf, builder.createTopology());
}
catch
(AlreadyAliveException e) {
e.printStackTrace();
}
catch
(InvalidTopologyException e) {
e.printStackTrace();
}
}
else
{
//本地模式執行
LocalCluster cluster =
new
LocalCluster();
cluster.submitTopology(
"Topotest1121"
, conf, builder.createTopology());
Utils.sleep(
1000000
);
cluster.killTopology(
"Topotest1121"
);
cluster.shutdown();
}
} } |
(5) 在kafka 端用控制檯生產資料,如下:
2、 執行結果截圖:
3、 遇到的問題:
(1 )把所有的工作做好後,提交了拓撲,執行程式碼。發生了錯誤1 ,如下:
解決:原來是因為依賴版本要統一的問題,最後將版本修改一致後,成功解決。
(2) 發生了錯誤2 ,如下:
解決:原來是忘記開hbase 中的HMaster 和HRegionServer 。啟動後問題成功解決。
http://shenzhen.offcn.com/
來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/31543790/viewspace-2658918/,如需轉載,請註明出處,否則將追究法律責任。