├── README.md
├── pom.xml
└── src
├── main
├── java
│ └── com
│ │ └── lm
│ │ ├── Application.java
│ │ ├── exception
│ │ ├── ExceptionHandler.java
│ │ └── MessageException.java
│ │ ├── kafkahbase
│ │ ├── HBasePoolConnection.java
│ │ ├── HBaseUtils.java
│ │ ├── OffsetHBaseUtils.java
│ │ ├── OggKafkaJsonUtils.java
│ │ └── OggKafkaUtils.java
│ │ ├── kryo
│ │ └── MyRegistrator.java
│ │ ├── spark
│ │ ├── SparkStreamingKafka.java
│ │ ├── SparkStreamingKafka2.java
│ │ └── SparkStreamingKafka3.java
│ │ └── utils
│ │ ├── BeanUtil.java
│ │ ├── ConstUtil.java
│ │ └── SpringUtils.java
└── resources
│ ├── applicationContext.xml
│ ├── config.properties
│ ├── hbase-site.xml
│ └── log4j.properties
└── test
└── java
└── com
└── lm
└── spring_sparkstreaming_kafka10
└── AppTest.java
/README.md:
--------------------------------------------------------------------------------
1 | # spring-sparkstreaming-kafka-10
2 | spring+spark streaming+kafka 10版本集成和异常问题处理
3 | 博文地址:
4 | http://blog.csdn.net/a123demi/article/details/74935849
5 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 |
5 | com.winit.iwm
6 | p-ogg-kafka-spark-hbase
7 | 0.0.1-SNAPSHOT
8 | jar
9 |
10 | p-ogg-kafka-spark-hbase
11 | http://maven.apache.org
12 |
13 |
14 | UTF-8
15 | 2.1.0
16 | 4.3.9.RELEASE
17 |
18 |
19 |
20 |
21 |
22 | com.alibaba
23 | fastjson
24 | 1.2.32
25 |
26 |
27 |
28 |
29 | org.springframework
30 | spring-core
31 | ${spring.version}
32 |
33 |
34 |
35 | org.springframework
36 | spring-context
37 | ${spring.version}
38 |
39 |
40 | org.springframework
41 | spring-context-support
42 | 4.3.9.RELEASE
43 |
44 |
45 | org.springframework
46 | spring-test
47 | ${spring.version}
48 |
49 |
50 |
51 | org.springframework
52 | spring-aop
53 | ${spring.version}
54 |
55 |
56 | org.aspectj
57 | aspectjrt
58 | 1.8.9
59 |
60 |
61 | org.aspectj
62 | aspectjweaver
63 | 1.8.9
64 |
65 |
66 |
67 |
68 | org.apache.spark
69 | spark-core_2.11
70 | ${spark-version}
71 |
72 |
73 | org.apache.spark
74 | spark-sql_2.11
75 | ${spark-version}
76 |
77 |
78 | org.apache.spark
79 | spark-hive_2.11
80 | ${spark-version}
81 |
82 |
83 | org.apache.spark
84 | spark-streaming_2.11
85 | ${spark-version}
86 |
87 |
88 | org.apache.hadoop
89 | hadoop-client
90 | 2.7.3
91 |
92 |
93 |
94 | org.apache.spark
95 | spark-graphx_2.11
96 | ${spark-version}
97 |
98 |
99 |
100 | org.apache.spark
101 | spark-streaming-kafka-0-10_2.11
102 | 2.1.0
103 |
104 |
105 |
106 | org.springframework.data
107 | spring-data-hadoop
108 | 2.4.0.RELEASE
109 |
110 |
111 |
112 | junit
113 | junit
114 | 3.8.1
115 | test
116 |
117 |
118 |
119 | org.apache.hbase
120 | hbase-client
121 | 1.3.1
122 |
123 |
124 | org.apache.hbase
125 | hbase-server
126 | 1.3.1
127 |
128 |
129 |
130 | org.apache.hbase
131 | hbase-common
132 | 1.3.1
133 |
134 |
135 |
136 | org.apache.commons
137 | commons-lang3
138 | 3.3.2
139 |
140 |
141 |
142 | commons-io
143 | commons-io
144 | 2.4
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 | maven-assembly-plugin
153 |
154 |
155 | jar-with-dependencies
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 | make-assembly
166 | package
167 |
168 | single
169 |
170 |
171 |
172 |
173 |
174 |
175 | org.codehaus.mojo
176 | exec-maven-plugin
177 | 1.2.1
178 |
179 |
180 |
181 | exec
182 |
183 |
184 |
185 |
186 | java
187 | true
188 | false
189 | compile
190 | com.lm.spark.SparkApp.App
191 |
192 |
193 |
194 |
195 | org.apache.maven.plugins
196 | maven-compiler-plugin
197 | 3.1
198 |
199 | 1.8
200 | 1.8
201 | true
202 |
203 |
204 |
205 |
206 |
207 |
--------------------------------------------------------------------------------
/src/main/java/com/lm/Application.java:
--------------------------------------------------------------------------------
1 | package com.lm;
2 |
3 | import com.lm.exception.MessageException;
4 | import com.lm.spark.SparkStreamingKafka2;
5 | import com.lm.utils.SpringUtils;
6 |
7 | /**
8 | * Hello world!
9 | *
10 | */
11 | public class Application {
12 | public static void main(String[] args) {
13 | SparkStreamingKafka2 sparkStreamingKafka =
14 | SpringUtils.getApplicationContext().getBean(SparkStreamingKafka2.class);
15 | try {
16 | sparkStreamingKafka.processSparkStreaming();
17 | } catch (InterruptedException e) {
18 | // TODO Auto-generated catch block
19 | e.printStackTrace();
20 | }catch (MessageException e) {
21 |
22 | System.out.println(e.getMessage());
23 | }
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/src/main/java/com/lm/exception/ExceptionHandler.java:
--------------------------------------------------------------------------------
1 | package com.lm.exception;
2 |
3 | import org.aspectj.lang.JoinPoint;
4 | import org.aspectj.lang.annotation.AfterThrowing;
5 | import org.aspectj.lang.annotation.Aspect;
6 | import org.aspectj.lang.annotation.Pointcut;
7 | import org.slf4j.Logger;
8 | import org.slf4j.LoggerFactory;
9 | import org.springframework.stereotype.Component;
10 | import org.springframework.util.StringUtils;
11 |
12 | @Aspect
13 | @Component
14 | public class ExceptionHandler {
15 | private static final Logger LOGGER = LoggerFactory.getLogger(ExceptionHandler.class);
16 |
17 | /**
18 | * @within(org.springframework.stereotype.Service),拦截带有 @Service 注解的类的所有方法
19 | * @annotation(org.springframework.web.bind.annotation.RequestMapping),
20 | * 拦截带有@RquestMapping的注解方法
21 | */
22 | @Pointcut("execution(public * *(..))")
23 | private void handlerPointcut() {
24 | }
25 |
26 | /**
27 | * 拦截service层异常,记录异常日志,并设置对应的异常信息 目前只拦截Exception,是否要拦截Error需再做考虑
28 | *
29 | * @param e
30 | * 异常对象
31 | */
32 | @AfterThrowing(pointcut = "handlerPointcut()", throwing = "e")
33 | public void handle(JoinPoint point, Exception e) {
34 | String signature = point.getSignature().toString();
35 | String errorMsg = getMessage(signature) == null
36 | ? (StringUtils.isEmpty(e.getMessage()) ? "服务异常" : e.getMessage()) : getMessage(signature);
37 | LOGGER.error(errorMsg);
38 | // throw new ServiceException(errorMsg, e);
39 | }
40 |
41 | /**
42 | * 获取方法签名对应的提示消息
43 | *
44 | * @param signature
45 | * 方法签名
46 | * @return 提示消息
47 | */
48 | private String getMessage(String signature) {
49 | return null;
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/src/main/java/com/lm/exception/MessageException.java:
--------------------------------------------------------------------------------
1 | package com.lm.exception;
2 |
3 | public class MessageException extends RuntimeException {
4 | private static final long serialVersionUID = 8284164474119891530L;
5 |
6 | public MessageException() {
7 | super();
8 | }
9 |
10 | public MessageException(String message) {
11 | super(message);
12 | }
13 |
14 | public MessageException(String message, Throwable cause) {
15 | super(message, cause);
16 | }
17 |
18 | public MessageException(Throwable cause) {
19 | super(cause);
20 | }
21 |
22 | protected MessageException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) {
23 | super(message, cause, enableSuppression, writableStackTrace);
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/src/main/java/com/lm/kafkahbase/HBasePoolConnection.java:
--------------------------------------------------------------------------------
1 | package com.lm.kafkahbase;
2 |
3 | import java.io.IOException;
4 | import java.util.concurrent.ExecutorService;
5 | import java.util.concurrent.Executors;
6 |
7 | import org.apache.hadoop.conf.Configuration;
8 | import org.apache.hadoop.hbase.HBaseConfiguration;
9 | import org.apache.hadoop.hbase.client.Connection;
10 | import org.apache.hadoop.hbase.client.ConnectionFactory;
11 |
12 | import com.lm.exception.MessageException;
13 |
14 | /**
15 | * Hbase连接池
16 | *
17 | * @author liangming.deng
18 | * @date 2017年7月7日
19 | *
20 | */
21 | public class HBasePoolConnection {
22 | private HBasePoolConnection() {
23 | }
24 |
25 | // 连接池
26 | private static Connection connection = null;
27 | // 配置文件
28 | static Configuration hbaseConfiguration = HBaseConfiguration.create();
29 |
30 | public static Connection getConnection() {
31 | if (connection == null) {
32 | ExecutorService pool = Executors.newFixedThreadPool(10);// 建立一个固定大小的线程池
33 | hbaseConfiguration.addResource("hbase-site.xml");
34 | try {
35 | connection = ConnectionFactory.createConnection(hbaseConfiguration, pool);// 创建连接时,拿到配置文件和线程池
36 | } catch (IOException e) {
37 | throw new MessageException("Hbase连接池初始化错误", e);
38 | }
39 | }
40 | return connection;
41 | }
42 |
43 | }
--------------------------------------------------------------------------------
/src/main/java/com/lm/kafkahbase/HBaseUtils.java:
--------------------------------------------------------------------------------
1 | package com.lm.kafkahbase;
2 |
3 | import java.io.IOException;
4 | import java.util.ArrayList;
5 | import java.util.HashMap;
6 | import java.util.Iterator;
7 | import java.util.List;
8 | import java.util.Map;
9 |
10 | import org.apache.hadoop.hbase.Cell;
11 | import org.apache.hadoop.hbase.CellUtil;
12 | import org.apache.hadoop.hbase.HColumnDescriptor;
13 | import org.apache.hadoop.hbase.HTableDescriptor;
14 | import org.apache.hadoop.hbase.MasterNotRunningException;
15 | import org.apache.hadoop.hbase.TableName;
16 | import org.apache.hadoop.hbase.ZooKeeperConnectionException;
17 | import org.apache.hadoop.hbase.client.Admin;
18 | import org.apache.hadoop.hbase.client.BufferedMutator;
19 | import org.apache.hadoop.hbase.client.Delete;
20 | import org.apache.hadoop.hbase.client.Get;
21 | import org.apache.hadoop.hbase.client.HTable;
22 | import org.apache.hadoop.hbase.client.Mutation;
23 | import org.apache.hadoop.hbase.client.Put;
24 | import org.apache.hadoop.hbase.client.Result;
25 | import org.apache.hadoop.hbase.client.ResultScanner;
26 | import org.apache.hadoop.hbase.client.Scan;
27 | import org.apache.hadoop.hbase.client.Table;
28 | import org.apache.hadoop.hbase.filter.BinaryComparator;
29 | import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
30 | import org.apache.hadoop.hbase.filter.Filter;
31 | import org.apache.hadoop.hbase.filter.RowFilter;
32 | import org.apache.hadoop.hbase.util.Bytes;
33 | import org.apache.hadoop.hbase.util.Hash;
34 | import org.slf4j.Logger;
35 | import org.slf4j.LoggerFactory;
36 |
37 | import com.lm.exception.MessageException;
38 |
39 | /**
40 | * HBase使用例子
41 | */
42 | public class HBaseUtils {
43 | private final static Logger logger = LoggerFactory.getLogger(HBaseUtils.class);
44 |
45 | /**
46 | * 关闭连接
47 | *
48 | * @param admin
49 | * @param table
50 | * @param bufferedMutator
51 | */
52 | public static void release(Admin admin, Table table, BufferedMutator bufferedMutator) {
53 | try {
54 | if (admin != null)
55 | admin.close();
56 | if (table != null)
57 | table.close();
58 | if (bufferedMutator != null)
59 | bufferedMutator.close();
60 | } catch (IOException e) {
61 | throw new MessageException("Hbase判断表是否存在", e);
62 | }
63 | }
64 |
65 | /**
66 | * 判断表是否存在
67 | *
68 | * @param tableName
69 | * @return
70 | */
71 | public static boolean isExistTable(String tableName) {
72 | boolean isExist = false;
73 | Admin admin = null;
74 |
75 | try {
76 | admin = HBasePoolConnection.getConnection().getAdmin();
77 | TableName table = TableName.valueOf(tableName);
78 | if (admin.tableExists(table)) {// 如果表已经存在
79 | isExist = true;
80 | }
81 |
82 | } catch (IOException e) {
83 | throw new MessageException("Hbase判断表是否存在", e);
84 |
85 | } finally {
86 | release(admin, null, null);
87 | }
88 |
89 | return isExist;
90 | }
91 |
92 | /**
93 | * 创建表
94 | *
95 | * @param tablename
96 | * 表名
97 | * @param columnFamily
98 | * 列族
99 | * @ @throws
100 | * ZooKeeperConnectionException
101 | * @throws MasterNotRunningException
102 | */
103 | public static void createTable(String tablename, String columnFamily) {
104 |
105 | Admin admin = null;
106 |
107 | try {
108 | admin = HBasePoolConnection.getConnection().getAdmin();
109 | TableName tableName = TableName.valueOf(tablename);
110 | if (!admin.tableExists(tableName)) {// 如果表已经存在
111 | HTableDescriptor tableDesc = new HTableDescriptor(tableName);
112 | tableDesc.addFamily(new HColumnDescriptor(columnFamily));
113 | admin.createTable(tableDesc);
114 | logger.info(tablename + "表已经成功创建!----------------");
115 | }
116 |
117 | } catch (IOException e) {
118 | throw new MessageException("Hbase创建表", e);
119 | } finally {
120 | release(admin, null, null);
121 | }
122 |
123 | }
124 |
125 | /**
126 | * 向表中插入一条新数据
127 | *
128 | * @param tableName
129 | * 表名
130 | * @param row
131 | * 行键key
132 | * @param columnFamily
133 | * 列族
134 | * @param column
135 | * 列名
136 | * @param data
137 | * 要插入的数据 @
138 | */
139 | public static void putData(String tableName, String row, String columnFamily, String column, String data) {
140 | TableName tableNameObj = null;
141 | Table table = null;
142 | try {
143 | tableNameObj = TableName.valueOf(tableName);
144 | table = HBasePoolConnection.getConnection().getTable(tableNameObj);
145 | Put put = new Put(Bytes.toBytes(row));
146 | put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(column), Bytes.toBytes(data));
147 | table.put(put);
148 | logger.info("-------put '" + row + "','" + columnFamily + ":" + column + "','" + data + "'");
149 | } catch (IOException e) {
150 | throw new MessageException("Hbase插入数据", e);
151 | } finally {
152 | release(null, table, null);
153 | }
154 |
155 | }
156 |
157 | /**
158 | * map数据插入
159 | *
160 | * @param tableName
161 | * @param row
162 | * @param columnFamily
163 | * @param datas
164 | * @
165 | */
166 | public static void putData(String tableName, String row, String columnFamily, Map datas) {
167 | if (null == datas || datas.isEmpty()) {
168 | return;
169 | }
170 | TableName tableNameObj = null;
171 | Table table = null;
172 | try {
173 | tableNameObj = TableName.valueOf(tableName);
174 | table = HBasePoolConnection.getConnection().getTable(tableNameObj);
175 | Put put = new Put(Bytes.toBytes(row));
176 | Iterator columns = datas.keySet().iterator();
177 | while (columns.hasNext()) {
178 | String column = columns.next();
179 | put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(column),
180 | Bytes.toBytes(datas.get(column).toString()));
181 | }
182 | table.put(put);
183 | } catch (IOException e) {
184 | throw new MessageException("Hbase插入对象数据", e);
185 | } finally {
186 | release(null, table, null);
187 | }
188 | }
189 |
190 | /**
191 | * 获取指定行的所有数据
192 | *
193 | * @param tableName
194 | * @param row
195 | * @param columnFamily
196 | * @param column
197 | * @return @
198 | */
199 | public static String getData(String tableName, String row, String columnFamily, String column) {
200 |
201 | TableName tableNameObj = null;
202 | Table table = null;
203 | String value = "";
204 | try {
205 | tableNameObj = TableName.valueOf(tableName);
206 | table = HBasePoolConnection.getConnection().getTable(tableNameObj);
207 | Get get = new Get(Bytes.toBytes(row));
208 | Result result = table.get(get);
209 | byte[] rb = result.getValue(Bytes.toBytes(columnFamily), Bytes.toBytes(column));
210 | value = new String(rb, "UTF-8");
211 | logger.info("------" + value);
212 | } catch (IOException e) {
213 | throw new MessageException("Hbase获取指定行", e);
214 | } finally {
215 | release(null, table, null);
216 | }
217 |
218 | return value;
219 | }
220 |
221 | /**
222 | * 获取ResultScanner
223 | *
224 | * @param tableName
225 | * @param topics
226 | * @return @
227 | */
228 | public static ResultScanner getResultScanner(String tableName, String value) {
229 |
230 | TableName tableNameObj = null;
231 | Table table = null;
232 | ResultScanner rs = null;
233 | try {
234 | tableNameObj = TableName.valueOf(tableName);
235 | table = HBasePoolConnection.getConnection().getTable(tableNameObj);
236 | Filter filter = new RowFilter(CompareOp.GREATER_OR_EQUAL, new BinaryComparator(Bytes.toBytes(value + "_")));
237 | Scan s = new Scan();
238 | s.setFilter(filter);
239 | rs = table.getScanner(s);
240 | } catch (IOException e) {
241 | throw new MessageException("Hbase获取指定行", e);
242 | } finally {
243 | release(null, table, null);
244 | }
245 |
246 | return rs;
247 | }
248 |
249 | /**
250 | * 获取指定表的所有数据
251 | *
252 | * @param tableName
253 | * 表名 @
254 | */
255 | public static void scanAll(String tableName) {
256 |
257 | TableName tableNameObj = null;
258 | Table table = null;
259 | ResultScanner resultScanner = null;
260 | try {
261 | tableNameObj = TableName.valueOf(tableName);
262 | table = HBasePoolConnection.getConnection().getTable(tableNameObj);
263 | Scan scan = new Scan();
264 | resultScanner = table.getScanner(scan);
265 | for (Result result : resultScanner) {
266 | List cells = result.listCells();
267 | for (Cell cell : cells) {
268 | String row = new String(result.getRow(), "UTF-8");
269 | String family = new String(CellUtil.cloneFamily(cell), "UTF-8");
270 | String qualifier = new String(CellUtil.cloneQualifier(cell), "UTF-8");
271 | String value = new String(CellUtil.cloneValue(cell), "UTF-8");
272 | logger.info(":::::[row:" + row + "],[family:" + family + "],[qualifier:" + qualifier + "],[value:"
273 | + value + "]");
274 | }
275 | }
276 | } catch (IOException e) {
277 | throw new MessageException("Hbase获取指定行", e);
278 | } finally {
279 | release(null, table, null);
280 | }
281 |
282 | }
283 |
284 | /*
285 | * 删除指定的列
286 | *
287 | * @tableName 表名
288 | *
289 | * @rowKey rowKey
290 | *
291 | * @familyName 列族名
292 | *
293 | * @columnName 列名
294 | */
295 | public static void deleteColumn(String tableName, String rowKey, String falilyName, String columnName) {
296 | TableName tableNameObj = null;
297 | Table table = null;
298 | try {
299 | tableNameObj = TableName.valueOf(tableName);
300 | table = HBasePoolConnection.getConnection().getTable(tableNameObj);
301 | Delete deleteColumn = new Delete(Bytes.toBytes(rowKey));
302 | deleteColumn.addColumn(Bytes.toBytes(falilyName), Bytes.toBytes(columnName));
303 | table.delete(deleteColumn);
304 | logger.info(falilyName + ":" + columnName + "is deleted!");
305 | } catch (IOException e) {
306 | throw new MessageException("Hbase删除指定行", e);
307 | } finally {
308 | release(null, table, null);
309 | }
310 |
311 | }
312 |
313 | public static void bathWriteData(List puts, String tableName, String columnName) {
314 | TableName tableNameObj = null;
315 | Table table = null;
316 | try {
317 | tableNameObj = TableName.valueOf(tableName);
318 | table = HBasePoolConnection.getConnection().getTable(tableNameObj);
319 |
320 | table.put(puts);
321 | ((HTable) table).flushCommits();
322 | } catch (IOException e) {
323 | throw new MessageException("Hbase批量put插入", e);
324 |
325 | } finally {
326 | release(null, table, null);
327 | }
328 |
329 | }
330 |
331 | public static Put getPut(String row, String columnFamily, String column, String data) {
332 |
333 | Put put = new Put(Bytes.toBytes(row));
334 | put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(column), Bytes.toBytes(data));
335 | return put;
336 | }
337 |
338 |
339 | public static Put getPut(String row, String columnFamily, Map map) {
340 |
341 | if(map == null || map.isEmpty()){
342 | return null;
343 | }
344 |
345 | Put put = new Put(Bytes.toBytes(row));
346 |
347 | Iterator keys = map.keySet().iterator();
348 | while(keys.hasNext()){
349 | String key = keys.next();
350 |
351 | put.addColumn(Bytes.toBytes(columnFamily),
352 | Bytes.toBytes(key),
353 | Bytes.toBytes(map.get(key) == null ? "null":map.get(key).toString()));
354 | }
355 |
356 | return put;
357 | }
358 |
359 |
360 | public static Delete getDelete(String row, String columnFamily, String column) {
361 |
362 | Delete delete = new Delete(Bytes.toBytes(row));
363 | delete.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(column));
364 | return delete;
365 | }
366 |
367 | /**
368 | * 批量插入
369 | *
370 | * @param tableName
371 | * @param puts
372 | */
373 | public static void batchInsert(String tableName, List puts) {
374 | BufferedMutator table = null;
375 | try {
376 | // 连接表 获取表对象
377 | table = HBasePoolConnection.getConnection().getBufferedMutator(TableName.valueOf(tableName));
378 | List mutations = new ArrayList();
379 | for (Put put : puts) {
380 | mutations.add(put);
381 | }
382 |
383 | table.mutate(mutations);
384 | // 如果不flush 在后面get可能是看不见的
385 | table.flush();
386 | } catch (IOException e) {
387 | throw new MessageException("Hbase批量插入异常", e);
388 | } finally {
389 | release(null, null, table);
390 | }
391 |
392 | }
393 |
394 | /*
395 | * 删除指定的列
396 | *
397 | * @tableName 表名
398 | *
399 | * @rowKey rowKey
400 | *
401 | * @familyName 列族名
402 | *
403 | * @columnName 列名
404 | */
405 | public static void batchDelete(String tableName, List deletes) {
406 | TableName tableNameObj = null;
407 | Table table = null;
408 | try {
409 | tableNameObj = TableName.valueOf(tableName);
410 | table = HBasePoolConnection.getConnection().getTable(tableNameObj);
411 | table.delete(deletes);
412 |
413 | } catch (IOException e) {
414 | throw new MessageException("Hbase删除指定行", e);
415 | } finally {
416 | release(null, table, null);
417 | }
418 |
419 | }
420 |
421 | /**
422 | * @param args
423 | */
424 | public static void main(String[] args) {
425 | try {
426 | /*
427 | * HbaseDemo.CreateTable("userinfo", "baseinfo");
428 | * HbaseDemo.PutData("userinfo", "row2", "baseinfo", "vio2",
429 | * "驾驶车辆违法信息2:"); HbaseDemo.PutData("userinfo", "row5", "baseinfo",
430 | * "vio2", "驾驶车辆违法信息2:"); HbaseDemo.GetData("userinfo", "row2",
431 | * "baseinfo", "vio2"); HbaseDemo.ScanAll("userinfo");
432 | */
433 |
434 | } catch (Exception e) {
435 | e.printStackTrace();
436 | }
437 | }
438 | }
--------------------------------------------------------------------------------
/src/main/java/com/lm/kafkahbase/OffsetHBaseUtils.java:
--------------------------------------------------------------------------------
1 | package com.lm.kafkahbase;
2 |
3 | import java.util.HashMap;
4 | import java.util.Map;
5 |
6 | import org.apache.hadoop.hbase.Cell;
7 | import org.apache.hadoop.hbase.CellUtil;
8 | import org.apache.hadoop.hbase.client.Result;
9 | import org.apache.hadoop.hbase.client.ResultScanner;
10 | import org.apache.kafka.common.TopicPartition;
11 | import org.slf4j.Logger;
12 | import org.slf4j.LoggerFactory;
13 |
14 | public class OffsetHBaseUtils {
15 | private static Logger LOGGER = LoggerFactory.getLogger(OffsetHBaseUtils.class);
16 | public static Map getOffset(ResultScanner rs){
17 | // begin from the the offsets committed to the database
18 | Map fromOffsets = new HashMap<>();
19 | String s1 = null;
20 | int s2 = 0;
21 | long s3 = 0;
22 | for (Result r : rs) {
23 | System.out.println("rowkey:" + new String(r.getRow()));
24 | for (Cell cell : r.rawCells()) {
25 | String qualifier = new String(CellUtil.cloneQualifier(cell));
26 | String value = new String(CellUtil.cloneValue(cell));
27 | String family = new String(CellUtil.cloneFamily(cell));
28 |
29 | if (qualifier.equals("topic")) {
30 | s1 = value;
31 | LOGGER.info("列族:" + family + " 列:" + qualifier + ":" + s1);
32 | }
33 |
34 | if (qualifier.equals("partition")) {
35 | s2 = Integer.valueOf(value);
36 | LOGGER.info("列族:" + family + " 列:" + qualifier + ":" + s2);
37 | }
38 |
39 | if (qualifier.equals("offset")) {
40 | s3 = Long.valueOf(value);
41 | LOGGER.info("列族:" + family + " 列:" + qualifier + ":" + s3);
42 | }
43 |
44 | }
45 |
46 | fromOffsets.put(new TopicPartition(s1, s2), s3);
47 | }
48 | return fromOffsets;
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/src/main/java/com/lm/kafkahbase/OggKafkaJsonUtils.java:
--------------------------------------------------------------------------------
1 | package com.lm.kafkahbase;
2 |
3 | import java.util.ArrayList;
4 | import java.util.Iterator;
5 | import java.util.List;
6 | import java.util.Map;
7 |
8 | import org.apache.hadoop.hbase.client.Put;
9 |
10 | import com.alibaba.fastjson.JSON;
11 | import com.lm.utils.ConstUtil;
12 |
13 |
14 | public class OggKafkaJsonUtils {
15 | public static void processBatchPut(List oggKafkaJsonValues) {
16 |
17 | List puts = new ArrayList<>();
18 |
19 | System.out.println(oggKafkaJsonValues.size());
20 | for (String json : oggKafkaJsonValues) {
21 | //1.json转map
22 | Map jsonMap = JSON.parseObject(json);
23 | //2.获取状态和表
24 | String tableName = jsonMap.get("table").toString();
25 | List primaryKey =(List) jsonMap.get("primary_keys");
26 |
27 | Map values = (Map)jsonMap.get("after");
28 |
29 | if(values == null || values.isEmpty()){
30 | continue;
31 | }
32 |
33 | String rowKey = values.get("order_id").toString();
34 | puts.add(HBaseUtils.getPut(rowKey, ConstUtil.HBASE_FAMILY, values));
35 | }
36 |
37 | HBaseUtils.batchInsert("dfs.order", puts);
38 |
39 |
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/src/main/java/com/lm/kafkahbase/OggKafkaUtils.java:
--------------------------------------------------------------------------------
1 | package com.lm.kafkahbase;
2 |
3 | import java.util.ArrayList;
4 | import java.util.Arrays;
5 | import java.util.HashMap;
6 | import java.util.Iterator;
7 | import java.util.List;
8 |
9 | import org.apache.hadoop.hbase.client.Delete;
10 | import org.apache.hadoop.hbase.client.Put;
11 | import org.slf4j.Logger;
12 | import org.slf4j.LoggerFactory;
13 |
14 | import com.lm.utils.ConstUtil;
15 |
16 | public class OggKafkaUtils {
17 | private static Logger logger = LoggerFactory.getLogger(OggKafkaUtils.class);
18 |
19 | /**
20 | * 执行接收到的kafka数据到hbase
21 | *
22 | * @param oggKafkaValues
23 | */
24 | public static void processBatchPut(List oggKafkaValues) {
25 | //
26 |
27 | List insertPuts = getOggKafkaPuts(oggKafkaValues);
28 |
29 | HBaseUtils.batchInsert("dfs.order", insertPuts);
30 |
31 | List deletes = getOggKafkaDelete(oggKafkaValues);
32 |
33 | HBaseUtils.batchDelete("dfs.order", deletes);
34 |
35 | }
36 |
37 | /**
38 | * 根据sparkStreaming获取源ogg经kafka发送的消息
39 | *
40 | * @param oggKafkaValues
41 | * @return
42 | */
43 | public static List>> getOggKafkas(List oggKafkaValues) {
44 | // 一次加载多有数据,第一个map为header和body,第二个map对应key,value值
45 | List>> oggKafkaMapList = new ArrayList<>();
46 | for (String value : oggKafkaValues) {
47 | // 获取value对应分隔信息
48 | List values = Arrays.asList(value.split(ConstUtil.KAFKA_SPLIT));
49 |
50 | oggKafkaMapList.add(getKafkaMap(values));
51 | }
52 |
53 | return oggKafkaMapList;
54 | }
55 |
56 | /**
57 | * 根据sparkStreaming获取源ogg经kafka发送的消息
58 | *
59 | * @param oggKafkaValues
60 | * @return
61 | */
62 | public static List getOggKafkaPuts(List oggKafkaValues) {
63 | // 带插入或更新的put
64 | List insertPuts = new ArrayList<>();
65 | // 一次加载多有数据,第一个map为header和body,第二个map对应key,value值
66 | List>> oggKafkaMapList = getOggKafkas(oggKafkaValues);
67 |
68 | for (HashMap> hashMap : oggKafkaMapList) {
69 | if (!"D".equals(hashMap.get("header").get("ogg_status"))) {
70 | insertPuts.addAll(handlerMsgToHBasePut(hashMap));
71 | }
72 | }
73 | return insertPuts;
74 | }
75 |
76 | /**
77 | * 根据sparkStreaming获取源ogg经kafka发送的消息
78 | *
79 | * @param oggKafkaValues
80 | * @return
81 | */
82 | public static List getOggKafkaDelete(List oggKafkaValues) {
83 | // 删除的put
84 | List deletes = new ArrayList<>();
85 | // 一次加载多有数据,第一个map为header和body,第二个map对应key,value值
86 | List>> oggKafkaMapList = getOggKafkas(oggKafkaValues);
87 | for (HashMap> hashMap : oggKafkaMapList) {
88 | if ("D".equals(hashMap.get("header").get("ogg_status"))) {
89 | deletes.addAll(handlerMsgToHBaseDelete(hashMap));
90 | }
91 | }
92 | return deletes;
93 | }
94 |
95 | /**
96 | * 每一个kafka消息对应的map header:消息头 body:消息体
97 | *
98 | * @param kafkaMsgList
99 | * @return
100 | */
101 | public static HashMap> getKafkaMap(List kafkaMsgList) {
102 | HashMap> mapList = new HashMap>();
103 |
104 | if (kafkaMsgList == null || kafkaMsgList.isEmpty()) {
105 | return mapList;
106 | }
107 |
108 | HashMap headerMap = new HashMap<>();
109 | if (kafkaMsgList == null || kafkaMsgList.size() < 5) {
110 | logger.error("getKafkaMap 异常消息头");
111 | return mapList;
112 | }
113 | // 1.数据头
114 | List kafkaHeader = kafkaMsgList.subList(0, 5);
115 | List kafkaBody = kafkaMsgList.subList(5, kafkaMsgList.size());
116 | headerMap.put("ogg_status", kafkaHeader.get(0));
117 | headerMap.put("ogg_table", kafkaHeader.get(1));
118 | headerMap.put("ogg_created", kafkaHeader.get(2));
119 | headerMap.put("ogg_updated", kafkaHeader.get(3));
120 | headerMap.put("ogg_id", kafkaHeader.get(4));
121 | mapList.put("header", headerMap);
122 |
123 | HashMap bodyMap = new HashMap<>();
124 | for (int i = 0; i < kafkaBody.size(); i += 2) {
125 | // 偶数时key,奇数是value
126 | bodyMap.put(kafkaBody.get(i), kafkaBody.get(i + 1));
127 | }
128 | mapList.put("body", bodyMap);
129 | return mapList;
130 | }
131 |
132 | /**
133 | * 每一个加入加入到put
134 | *
135 | * @param kafkaMapList
136 | */
137 | public static List handlerMsgToHBasePut(HashMap> kafkaMapList) {
138 | List puts = new ArrayList<>();
139 | if (kafkaMapList == null || kafkaMapList.size() == 0) {
140 | return puts;
141 | }
142 |
143 | HashMap headerMap = kafkaMapList.get("header");
144 | String ogg_table = headerMap.get("ogg_table").toLowerCase();
145 | ogg_table = "dfs.order";
146 | HashMap bodyMap = kafkaMapList.get("body");
147 |
148 | Iterator keys = bodyMap.keySet().iterator();
149 | String rowKey = bodyMap.get(ConstUtil.TABLE_ROWKEY.get(ogg_table));
150 |
151 | while (keys.hasNext()) {
152 | String key = keys.next();
153 | String value = bodyMap.get(key);
154 | if (null != value && ConstUtil.STRING_NULL.equals(value.toUpperCase())) {
155 | continue;
156 | }
157 |
158 | puts.add(HBaseUtils.getPut(rowKey, ConstUtil.HBASE_FAMILY, key, value));
159 | }
160 |
161 | return puts;
162 |
163 | }
164 |
165 | /**
166 | * 每一个加入加入到put
167 | *
168 | * @param kafkaMapList
169 | */
170 | public static List handlerMsgToHBaseDelete(HashMap> kafkaMapList) {
171 | List deletes = new ArrayList<>();
172 | if (kafkaMapList == null || kafkaMapList.size() == 0) {
173 | return deletes;
174 | }
175 |
176 | HashMap headerMap = kafkaMapList.get("header");
177 | String ogg_table = headerMap.get("ogg_table").toLowerCase();
178 | ogg_table = "dfs.order";
179 | HashMap bodyMap = kafkaMapList.get("body");
180 |
181 | Iterator keys = bodyMap.keySet().iterator();
182 | String rowKey = bodyMap.get(ConstUtil.TABLE_ROWKEY.get(ogg_table));
183 |
184 | while (keys.hasNext()) {
185 | String key = keys.next();
186 | String value = bodyMap.get(key);
187 | if (null != value && ConstUtil.STRING_NULL.equals(value.toUpperCase())) {
188 | continue;
189 | }
190 |
191 | deletes.add(HBaseUtils.getDelete(rowKey, ConstUtil.HBASE_FAMILY, key));
192 | }
193 |
194 | return deletes;
195 |
196 | }
197 |
198 | /**
199 | * 插入操作
200 | *
201 | * @param tableName
202 | * @param columnMaps
203 | */
204 | public static void processInsertHbase(String tableName, HashMap columnMaps) {
205 |
206 | if (columnMaps == null) {
207 | return;
208 | }
209 |
210 | Iterator keys = columnMaps.keySet().iterator();
211 | String rowKey = columnMaps.get("order_id");
212 |
213 | while (keys.hasNext()) {
214 | String key = keys.next();
215 | String value = columnMaps.get(key);
216 | if (null != value && "NULL".equals(value.toUpperCase())) {
217 | value = "";
218 | }
219 |
220 | HBaseUtils.getPut(rowKey, "cf1", key, value);
221 |
222 | }
223 |
224 | }
225 |
226 | /**
227 | * 插入操作
228 | *
229 | * @param tableName
230 | * @param columnMaps
231 | */
232 | public static void processDeleteHbase(String tableName, HashMap columnMaps) {
233 |
234 | if (columnMaps == null) {
235 | return;
236 | }
237 |
238 | Iterator keys = columnMaps.keySet().iterator();
239 | String rowKey = columnMaps.get("order_id");
240 |
241 | while (keys.hasNext()) {
242 | String key = keys.next();
243 | HBaseUtils.deleteColumn(tableName, rowKey, "cf1", key);
244 | }
245 |
246 | }
247 |
248 | public static void main(String[] args) {
249 | String[] arr = new String[] { "0", "1", "2", "3", "4", "5" };
250 | for (int i = 0; i < arr.length; i += 2) {
251 | // 偶数时key,奇数是value
252 | System.out.println(arr[i] + ":" + arr[i + 1]);
253 | }
254 |
255 | List list = Arrays.asList(arr);
256 | List list1 = list.subList(0, 2);
257 | List list2 = list.subList(2, list.size());
258 |
259 | System.out.println(list1.toString());
260 | System.out.println(list2.toString());
261 | }
262 | }
263 |
--------------------------------------------------------------------------------
/src/main/java/com/lm/kryo/MyRegistrator.java:
--------------------------------------------------------------------------------
1 | package com.lm.kryo;
2 |
3 | import org.apache.kafka.clients.consumer.ConsumerRecord;
4 | import org.apache.spark.serializer.KryoRegistrator;
5 |
6 | import com.esotericsoftware.kryo.Kryo;
7 |
8 | public class MyRegistrator implements KryoRegistrator {
9 |
10 | @Override
11 | public void registerClasses(Kryo arg0) {
12 | arg0.register(ConsumerRecord.class);
13 | }
14 |
15 | }
16 |
--------------------------------------------------------------------------------
/src/main/java/com/lm/spark/SparkStreamingKafka.java:
--------------------------------------------------------------------------------
1 | package com.lm.spark;
2 |
3 | import java.io.Serializable;
4 | import java.util.ArrayList;
5 | import java.util.Arrays;
6 | import java.util.Collection;
7 | import java.util.Date;
8 | import java.util.HashMap;
9 | import java.util.List;
10 | import java.util.Map;
11 |
12 | import org.apache.hadoop.hbase.client.ResultScanner;
13 | import org.apache.kafka.clients.consumer.ConsumerRecord;
14 | import org.apache.kafka.common.TopicPartition;
15 | import org.apache.kafka.common.serialization.StringDeserializer;
16 | import org.apache.spark.SparkConf;
17 | import org.apache.spark.api.java.JavaRDD;
18 | import org.apache.spark.api.java.function.VoidFunction2;
19 | import org.apache.spark.streaming.Durations;
20 | import org.apache.spark.streaming.Time;
21 | import org.apache.spark.streaming.api.java.JavaInputDStream;
22 | import org.apache.spark.streaming.api.java.JavaStreamingContext;
23 | import org.apache.spark.streaming.kafka010.ConsumerStrategies;
24 | import org.apache.spark.streaming.kafka010.HasOffsetRanges;
25 | import org.apache.spark.streaming.kafka010.KafkaUtils;
26 | import org.apache.spark.streaming.kafka010.LocationStrategies;
27 | import org.apache.spark.streaming.kafka010.OffsetRange;
28 | import org.slf4j.Logger;
29 | import org.slf4j.LoggerFactory;
30 | import org.springframework.beans.factory.annotation.Value;
31 | import org.springframework.stereotype.Component;
32 |
33 | import com.lm.exception.MessageException;
34 | import com.lm.kafkahbase.HBaseUtils;
35 | import com.lm.kafkahbase.OffsetHBaseUtils;
36 | import com.lm.kafkahbase.OggKafkaUtils;
37 | import com.lm.utils.BeanUtil;
38 |
39 | @Component
40 | public class SparkStreamingKafka implements Serializable {
41 |
42 | /**
43 | *
44 | */
45 | private static final long serialVersionUID = 1L;
46 |
47 | public static Logger LOGGER = LoggerFactory.getLogger(SparkStreamingKafka.class);
48 |
49 | @Value("${spark.appname}")
50 | private String appName;
51 | @Value("${spark.master}")
52 | private String master;
53 | @Value("${spark.seconds}")
54 | private long second;
55 | @Value("${kafka.metadata.broker.list}")
56 | private String metadataBrokerList;
57 | @Value("${kafka.auto.offset.reset}")
58 | private String autoOffsetReset;
59 | @Value("${kafka.topics}")
60 | private String kafkaTopics;
61 | @Value("${kafka.group.id}")
62 | private String kafkaGroupId;
63 |
64 | String datatable = "dfs.offset";
65 | String offsetFamily = "topic_partition_offset";
66 |
67 | public void processSparkStreaming() throws InterruptedException {
68 | // 1.配置sparkconf,必须要配置master
69 | SparkConf conf = new SparkConf().setAppName(appName).setMaster(master);
70 | conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
71 | conf.set("spark.kryo.registrator", "com.lm.kryo.MyRegistrator");
72 |
73 | // 2.根据sparkconf 创建JavaStreamingContext
74 | JavaStreamingContext jsc = new JavaStreamingContext(conf, Durations.seconds(second));
75 |
76 | // 3.配置kafka
77 | Map kafkaParams = new HashMap<>();
78 | kafkaParams.put("bootstrap.servers", metadataBrokerList);
79 | kafkaParams.put("key.deserializer", StringDeserializer.class);
80 | kafkaParams.put("value.deserializer", StringDeserializer.class);
81 | kafkaParams.put("group.id", kafkaGroupId);
82 | kafkaParams.put("auto.offset.reset", autoOffsetReset);
83 | kafkaParams.put("enable.auto.commit", false);
84 |
85 | // 4.kafka主题
86 | Collection topics = Arrays.asList(kafkaTopics.split(","));
87 |
88 | // 5.创建SparkStreaming输入数据来源input Stream
89 | JavaInputDStream> stream = null;
90 | // KafkaUtils.createDirectStream(jsc,
91 | // LocationStrategies.PreferConsistent(),
92 | // ConsumerStrategies. Subscribe(topics, kafkaParams));
93 |
94 | // 判断数据表是否存在,如果不存在则从topic首位置消费,并新建该表;如果表存在,则从表中恢复话题对应分区的消息的offset
95 | boolean isExists = HBaseUtils.isExistTable(datatable);
96 | if (isExists) {
97 |
98 | ResultScanner rs = HBaseUtils.getResultScanner(datatable, kafkaTopics);
99 |
100 | if (rs == null || !rs.iterator().hasNext()) {
101 | stream = KafkaUtils.createDirectStream(jsc, LocationStrategies.PreferConsistent(),
102 | ConsumerStrategies. Subscribe(topics, kafkaParams));
103 |
104 | } else {
105 |
106 | Map fromOffsets = OffsetHBaseUtils.getOffset(rs);
107 | stream = KafkaUtils.createDirectStream(jsc, LocationStrategies.PreferConsistent(),
108 | ConsumerStrategies. Assign(fromOffsets.keySet(), kafkaParams, fromOffsets));
109 | }
110 | } else {
111 | // 如果不存在TopicOffset表,则从topic首位置开始消费
112 | stream = KafkaUtils.createDirectStream(jsc, LocationStrategies.PreferConsistent(),
113 | ConsumerStrategies. Subscribe(topics, kafkaParams));
114 |
115 | // 并创建TopicOffset表
116 | HBaseUtils.createTable(datatable, offsetFamily);
117 |
118 | System.out.println(datatable + "表已经成功创建!----------------");
119 | }
120 |
121 | // 6.spark rdd转化和行动处理
122 | stream.foreachRDD(new VoidFunction2>, Time>() {
123 |
124 | private static final long serialVersionUID = 1L;
125 |
126 | @Override
127 | public void call(JavaRDD> v1, Time v2) {
128 | OffsetRange[] offsetRanges = ((HasOffsetRanges) v1.rdd()).offsetRanges();
129 | for (OffsetRange offsetRange : offsetRanges) {
130 | // begin your transaction
131 | // 为了保证业务的事务性,最好把业务计算结果和offset同时进行hbase的存储,这样可以保证要么都成功,要么都失败,最终从端到端体现消费精确一次消费的意境
132 | // 存储
133 | long startDate = new Date().getTime();
134 | List> consumerRecords = v1.collect();
135 |
136 | List oggValues = new ArrayList<>();
137 | for (ConsumerRecord record : consumerRecords) {
138 | oggValues.add(record.value());
139 | }
140 | // OggKafkaUtils.processBatchPut(oggValues);
141 | System.out.println(oggValues.toString());
142 | long endDate = new Date().getTime();
143 | System.out.println("插入完成:" + (endDate - startDate));
144 |
145 | // update results
146 | // update offsets where the end of existing offsets
147 | // matches
148 | // the beginning of this batch of offsets
149 | // assert that offsets were updated correctly
150 |
151 | try {
152 | HBaseUtils.putData(datatable, offsetFamily, offsetFamily, BeanUtil.objectToMap(offsetRange));
153 | } catch (Exception e) {
154 | throw new MessageException("object与map转化", e);
155 | }
156 | System.out.println("add data Success!");
157 | // end your transaction
158 | }
159 | System.out.println("the RDD records counts is " + v1.count());
160 |
161 | }
162 | });
163 |
164 | // 6. 启动执行
165 | jsc.start();
166 | // 7. 等待执行停止,如有异常直接抛出并关闭
167 | jsc.awaitTermination();
168 | }
169 | }
170 |
--------------------------------------------------------------------------------
/src/main/java/com/lm/spark/SparkStreamingKafka2.java:
--------------------------------------------------------------------------------
1 | package com.lm.spark;
2 |
3 | import java.io.Serializable;
4 | import java.util.ArrayList;
5 | import java.util.Arrays;
6 | import java.util.Collection;
7 | import java.util.Date;
8 | import java.util.HashMap;
9 | import java.util.List;
10 | import java.util.Map;
11 |
12 | import org.apache.hadoop.hbase.client.ResultScanner;
13 | import org.apache.kafka.clients.consumer.ConsumerRecord;
14 | import org.apache.kafka.common.TopicPartition;
15 | import org.apache.kafka.common.serialization.StringDeserializer;
16 | import org.apache.spark.SparkConf;
17 | import org.apache.spark.api.java.JavaRDD;
18 | import org.apache.spark.api.java.function.VoidFunction2;
19 | import org.apache.spark.streaming.Durations;
20 | import org.apache.spark.streaming.Time;
21 | import org.apache.spark.streaming.api.java.JavaInputDStream;
22 | import org.apache.spark.streaming.api.java.JavaStreamingContext;
23 | import org.apache.spark.streaming.kafka010.ConsumerStrategies;
24 | import org.apache.spark.streaming.kafka010.HasOffsetRanges;
25 | import org.apache.spark.streaming.kafka010.KafkaUtils;
26 | import org.apache.spark.streaming.kafka010.LocationStrategies;
27 | import org.apache.spark.streaming.kafka010.OffsetRange;
28 | import org.slf4j.Logger;
29 | import org.slf4j.LoggerFactory;
30 | import org.springframework.beans.factory.annotation.Value;
31 | import org.springframework.stereotype.Component;
32 |
33 | import com.lm.exception.MessageException;
34 | import com.lm.kafkahbase.HBaseUtils;
35 | import com.lm.kafkahbase.OffsetHBaseUtils;
36 | import com.lm.kafkahbase.OggKafkaJsonUtils;
37 | import com.lm.kafkahbase.OggKafkaUtils;
38 | import com.lm.utils.BeanUtil;
39 |
40 | @Component
41 | public class SparkStreamingKafka2 implements Serializable {
42 |
43 | /**
44 | *
45 | */
46 | private static final long serialVersionUID = 1L;
47 |
48 | public static Logger LOGGER = LoggerFactory.getLogger(SparkStreamingKafka2.class);
49 |
50 | @Value("${spark.appname}")
51 | private String appName;
52 | @Value("${spark.master}")
53 | private String master;
54 | @Value("${spark.seconds}")
55 | private long second;
56 | @Value("${kafka.metadata.broker.list}")
57 | private String metadataBrokerList;
58 | @Value("${kafka.auto.offset.reset}")
59 | private String autoOffsetReset;
60 | @Value("${kafka.topics}")
61 | private String kafkaTopics;
62 | @Value("${kafka.group.id}")
63 | private String kafkaGroupId;
64 |
65 | String datatable = "dfs.offset";
66 | String offsetFamily = "topic_partition_offset";
67 |
68 | public void processSparkStreaming() throws InterruptedException {
69 | // 1.配置sparkconf,必须要配置master
70 | SparkConf conf = new SparkConf().setAppName(appName).setMaster(master);
71 | conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
72 | conf.set("spark.kryo.registrator", "com.lm.kryo.MyRegistrator");
73 | conf.set("spark.kryoserializer.buffer.mb", "256");
74 | conf.set("spark.kryoserializer.buffer.max", "512");
75 |
76 | conf.set("spark.executor.memory", "4g");
77 |
78 | // 2.根据sparkconf 创建JavaStreamingContext
79 | JavaStreamingContext jsc = new JavaStreamingContext(conf, Durations.seconds(second));
80 |
81 | // 3.配置kafka
82 | Map kafkaParams = new HashMap<>();
83 | kafkaParams.put("bootstrap.servers", metadataBrokerList);
84 | kafkaParams.put("key.deserializer", StringDeserializer.class);
85 | kafkaParams.put("value.deserializer", StringDeserializer.class);
86 | kafkaParams.put("group.id", kafkaGroupId);
87 | kafkaParams.put("auto.offset.reset", autoOffsetReset);
88 | kafkaParams.put("enable.auto.commit", false);
89 |
90 | // 4.kafka主题
91 | Collection topics = Arrays.asList(kafkaTopics.split(","));
92 |
93 | // 5.创建SparkStreaming输入数据来源input Stream
94 | JavaInputDStream> stream = null;
95 | // KafkaUtils.createDirectStream(jsc,
96 | // LocationStrategies.PreferConsistent(),
97 | // ConsumerStrategies. Subscribe(topics, kafkaParams));
98 |
99 | // 判断数据表是否存在,如果不存在则从topic首位置消费,并新建该表;如果表存在,则从表中恢复话题对应分区的消息的offset
100 | boolean isExists = HBaseUtils.isExistTable(datatable);
101 | if (isExists) {
102 |
103 | ResultScanner rs = HBaseUtils.getResultScanner(datatable, kafkaTopics);
104 |
105 | if (rs == null || !rs.iterator().hasNext()) {
106 | stream = KafkaUtils.createDirectStream(jsc, LocationStrategies.PreferConsistent(),
107 | ConsumerStrategies. Subscribe(topics, kafkaParams));
108 |
109 | } else {
110 |
111 | Map fromOffsets = OffsetHBaseUtils.getOffset(rs);
112 | stream = KafkaUtils.createDirectStream(jsc, LocationStrategies.PreferConsistent(),
113 | ConsumerStrategies. Assign(fromOffsets.keySet(), kafkaParams, fromOffsets));
114 | }
115 | } else {
116 | // 如果不存在TopicOffset表,则从topic首位置开始消费
117 | stream = KafkaUtils.createDirectStream(jsc, LocationStrategies.PreferConsistent(),
118 | ConsumerStrategies. Subscribe(topics, kafkaParams));
119 |
120 | // 并创建TopicOffset表
121 | HBaseUtils.createTable(datatable, offsetFamily);
122 |
123 | System.out.println(datatable + "表已经成功创建!----------------");
124 | }
125 |
126 | // 6.spark rdd转化和行动处理
127 | stream.foreachRDD(new VoidFunction2>, Time>() {
128 |
129 | private static final long serialVersionUID = 1L;
130 |
131 | @Override
132 | public void call(JavaRDD> v1, Time v2) {
133 |
134 | OffsetRange[] offsetRanges = ((HasOffsetRanges) v1.rdd()).offsetRanges();
135 | for (OffsetRange offsetRange : offsetRanges) {
136 | // begin your transaction
137 | // 为了保证业务的事务性,最好把业务计算结果和offset同时进行hbase的存储,这样可以保证要么都成功,要么都失败,最终从端到端体现消费精确一次消费的意境
138 | // 存储
139 | long startDate = new Date().getTime();
140 | List> consumerRecords = v1.collect();
141 |
142 | List oggValues = new ArrayList<>();
143 | for (ConsumerRecord record : consumerRecords) {
144 | oggValues.add(record.value());
145 | }
146 | OggKafkaJsonUtils.processBatchPut(oggValues);
147 | long endDate = new Date().getTime();
148 | System.out.println("插入完成:" + (endDate - startDate));
149 |
150 | // update results
151 | // update offsets where the end of existing offsets
152 | // matches
153 | // the beginning of this batch of offsets
154 | // assert that offsets were updated correctly
155 |
156 | try {
157 | HBaseUtils.putData(datatable, offsetFamily, offsetFamily, BeanUtil.objectToMap(offsetRange));
158 | } catch (Exception e) {
159 | throw new MessageException("object与map转化", e);
160 | }
161 | System.out.println("add data Success!");
162 | // end your transaction
163 | }
164 | System.out.println("the RDD records counts is " + v1.count());
165 |
166 | }
167 | });
168 |
169 | // 6. 启动执行
170 | jsc.start();
171 | // 7. 等待执行停止,如有异常直接抛出并关闭
172 | jsc.awaitTermination();
173 | }
174 | }
175 |
--------------------------------------------------------------------------------
/src/main/java/com/lm/spark/SparkStreamingKafka3.java:
--------------------------------------------------------------------------------
1 | package com.lm.spark;
2 |
3 | import java.io.Serializable;
4 | import java.util.Arrays;
5 | import java.util.Collection;
6 | import java.util.HashMap;
7 | import java.util.List;
8 | import java.util.Map;
9 |
10 | import org.apache.hadoop.hbase.client.Put;
11 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
12 | import org.apache.kafka.clients.consumer.ConsumerRecord;
13 | import org.apache.kafka.common.serialization.StringDeserializer;
14 | import org.apache.spark.SparkConf;
15 | import org.apache.spark.api.java.JavaPairRDD;
16 | import org.apache.spark.api.java.JavaRDD;
17 | import org.apache.spark.api.java.function.Function2;
18 | import org.apache.spark.api.java.function.PairFunction;
19 | import org.apache.spark.api.java.function.VoidFunction2;
20 | import org.apache.spark.streaming.Durations;
21 | import org.apache.spark.streaming.Time;
22 | import org.apache.spark.streaming.api.java.JavaInputDStream;
23 | import org.apache.spark.streaming.api.java.JavaPairDStream;
24 | import org.apache.spark.streaming.api.java.JavaStreamingContext;
25 | import org.apache.spark.streaming.kafka010.ConsumerStrategies;
26 | import org.apache.spark.streaming.kafka010.KafkaUtils;
27 | import org.apache.spark.streaming.kafka010.LocationStrategies;
28 | import org.slf4j.Logger;
29 | import org.slf4j.LoggerFactory;
30 | import org.springframework.beans.factory.annotation.Value;
31 | import org.springframework.stereotype.Component;
32 |
33 | import scala.Tuple2;
34 |
35 | @Component
36 | public class SparkStreamingKafka3 implements Serializable {
37 |
38 | /**
39 | *
40 | */
41 | private static final long serialVersionUID = 1L;
42 |
43 | public static Logger LOGGER = LoggerFactory.getLogger(SparkStreamingKafka3.class);
44 |
45 | @Value("${spark.appname}")
46 | private String appName;
47 | @Value("${spark.master}")
48 | private String master;
49 | @Value("${spark.seconds}")
50 | private long second;
51 | @Value("${kafka.metadata.broker.list}")
52 | private String metadataBrokerList;
53 | @Value("${kafka.auto.offset.reset}")
54 | private String autoOffsetReset;
55 | @Value("${kafka.topics}")
56 | private String kafkaTopics;
57 | @Value("${kafka.group.id}")
58 | private String kafkaGroupId;
59 |
60 | public void processSparkStreaming() throws InterruptedException {
61 | // 1.配置sparkconf,必须要配置master
62 | SparkConf conf = new SparkConf().setAppName(appName).setMaster(master);
63 | conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
64 | conf.set("spark.kryo.registrator", "com.lm.kryo.MyRegistrator");
65 |
66 | // 2.根据sparkconf 创建JavaStreamingContext
67 | JavaStreamingContext jsc = new JavaStreamingContext(conf, Durations.seconds(second));
68 |
69 | // 3.配置kafka
70 | Map kafkaParams = new HashMap<>();
71 | kafkaParams.put("bootstrap.servers", metadataBrokerList);
72 | kafkaParams.put("key.deserializer", StringDeserializer.class);
73 | kafkaParams.put("value.deserializer", StringDeserializer.class);
74 | kafkaParams.put("group.id", kafkaGroupId);
75 | kafkaParams.put("auto.offset.reset", autoOffsetReset);
76 | kafkaParams.put("enable.auto.commit", false);
77 |
78 | // 4.kafka主题
79 | Collection topics = Arrays.asList(kafkaTopics.split(","));
80 |
81 | // 5.创建SparkStreaming输入数据来源input Stream
82 | final JavaInputDStream> stream =
83 | KafkaUtils.createDirectStream(jsc, LocationStrategies.PreferConsistent(),
84 | ConsumerStrategies. Subscribe(topics, kafkaParams));
85 |
86 |
87 |
88 | JavaPairDStream ogg = stream.mapToPair(new PairFunction, ImmutableBytesWritable, Put>() {
89 |
90 | /**
91 | *
92 | */
93 | private static final long serialVersionUID = 1L;
94 |
95 | @Override
96 | public Tuple2 call(ConsumerRecord t) throws Exception {
97 | return null;
98 | }
99 |
100 | });
101 |
102 |
103 |
104 | // JavaPairRDD ogg ;
105 |
106 |
107 | // 6.spark rdd转化和行动处理
108 | stream.foreachRDD(new VoidFunction2>, Time>() {
109 |
110 | private static final long serialVersionUID = 1L;
111 |
112 | @Override
113 | public void call(JavaRDD> v1, Time v2) throws Exception {
114 |
115 | List> consumerRecords = v1.collect();
116 |
117 | }
118 | });
119 |
120 | // 6. 启动执行
121 | jsc.start();
122 | // 7. 等待执行停止,如有异常直接抛出并关闭
123 | jsc.awaitTermination();
124 | }
125 | }
126 |
--------------------------------------------------------------------------------
/src/main/java/com/lm/utils/BeanUtil.java:
--------------------------------------------------------------------------------
1 | package com.lm.utils;
2 |
3 | import java.lang.reflect.Field;
4 | import java.lang.reflect.Modifier;
5 | import java.util.HashMap;
6 | import java.util.Map;
7 |
8 | /**
9 | * 描述:实体工具类 author qiaobin 2016/10/11 16:25.
10 | */
11 | public class BeanUtil {
12 |
13 | /**
14 | * obj转map
15 | * @param obj
16 | * @return
17 | * @throws Exception
18 | */
19 | public static Map objectToMap(Object obj) throws Exception {
20 | if (obj == null) {
21 | return null;
22 | }
23 | Map map = new HashMap();
24 | Field[] declaredFields = obj.getClass().getDeclaredFields();
25 | Field[] superDeclaredFields = obj.getClass().getSuperclass().getDeclaredFields();
26 | for (Field field : declaredFields) {
27 | field.setAccessible(true);
28 | map.put(field.getName(), field.get(obj));
29 | }
30 | for (Field field : superDeclaredFields) {
31 | field.setAccessible(true);
32 | map.put(field.getName(), field.get(obj));
33 | }
34 | return map;
35 | }
36 |
37 | /**
38 | * map转object
39 | * @param map
40 | * @param beanClass
41 | * @return
42 | * @throws Exception
43 | */
44 | public static Object mapToObject(Map map, Class> beanClass) throws Exception {
45 | if (map == null)
46 | return null;
47 | Object obj = beanClass.newInstance();
48 | Field[] fields = obj.getClass().getDeclaredFields();
49 | for (Field field : fields) {
50 | int mod = field.getModifiers();
51 | if (Modifier.isStatic(mod) || Modifier.isFinal(mod)) {
52 | continue;
53 | }
54 | field.setAccessible(true);
55 | field.set(obj, map.get(field.getName()));
56 | }
57 | Field[] superFields = obj.getClass().getSuperclass().getDeclaredFields();
58 | for (Field field : superFields) {
59 | int mod = field.getModifiers();
60 | if (Modifier.isStatic(mod) || Modifier.isFinal(mod)) {
61 | continue;
62 | }
63 | field.setAccessible(true);
64 | field.set(obj, map.get(field.getName()));
65 | }
66 | return obj;
67 | }
68 | }
69 |
--------------------------------------------------------------------------------
/src/main/java/com/lm/utils/ConstUtil.java:
--------------------------------------------------------------------------------
1 | package com.lm.utils;
2 |
3 | import java.util.HashMap;
4 |
5 | /**
6 | * 常量工具
7 | */
8 | public class ConstUtil {
9 | public final static String KAFKA_SPLIT = "\\â«";
10 |
11 | public final static String HBASE_FAMILY = "cf1";
12 |
13 | public final static String STRING_NULL = "NULL";
14 |
15 | public final static HashMap TABLE_ROWKEY = new HashMap<>();
16 |
17 | static{
18 | TABLE_ROWKEY.put("dfs.order", "order_id");
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/src/main/java/com/lm/utils/SpringUtils.java:
--------------------------------------------------------------------------------
1 | package com.lm.utils;
2 |
3 | import org.springframework.context.ApplicationContext;
4 | import org.springframework.context.support.ClassPathXmlApplicationContext;
5 |
6 | final public class SpringUtils {
7 |
8 | private static ApplicationContext ac = null;
9 |
10 | private SpringUtils() {
11 |
12 | }
13 |
14 | static {
15 | ac = new ClassPathXmlApplicationContext("applicationContext.xml");
16 | }
17 |
18 | public static ApplicationContext getApplicationContext() {
19 | // 获得返回的容器对象
20 | return ac;
21 | }
22 |
23 | }
--------------------------------------------------------------------------------
/src/main/resources/applicationContext.xml:
--------------------------------------------------------------------------------
1 |
2 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/src/main/resources/config.properties:
--------------------------------------------------------------------------------
1 | hbase.zk.host=10.32.19.41
2 | hbase.zk.port=2181
3 |
4 | zk.checkpoint.tmpdir=D:/tmp/oggCheckPoint/
5 |
6 | kafka.metadata.broker.list=10.32.19.41:9092,10.32.19.42:9092,10.32.19.43:9092
7 | kafka.auto.offset.reset=earliest
8 | #latest,earliest
9 | kafka.topics=pms
10 | kafka.group.id=sparkstreaming-kafka10
11 |
12 | spark.master=local[2]
13 | spark.appname=SparkStreamingKafka
14 | spark.seconds=5
15 |
--------------------------------------------------------------------------------
/src/main/resources/hbase-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
23 |
24 |
25 |
26 |
27 | hbase.rootdir
28 |
29 | hdfs://hd054:9000/hbase
30 |
31 |
32 |
33 |
34 |
35 | hbase.cluster.distributed
36 |
37 | true
38 |
39 |
40 |
41 |
42 |
43 | hbase.zookeeper.quorum
44 |
45 | 10.32.19.41:2181,10.32.19.42:2181,10.32.19.43:2181
46 |
47 |
48 |
49 |
50 | hbase.zookeeper.property.clientPort
51 | 2181
52 |
53 |
54 |
55 |
56 |
57 | dfs.replication
58 |
59 | 1
60 |
61 |
62 |
63 |
64 | hbase.master.port
65 | 16000
66 |
67 |
68 |
69 | hbase.master.info.port
70 | 16010
71 |
72 |
73 |
74 | hbase.tmp.dir
75 | /home/winit/hbase-1.3.1/data/tmp
76 |
77 |
78 |
79 |
80 | hbase.zookeeper.property.dataDir
81 | /home/winit/zookeeper-3.4.8/data
82 |
83 |
84 |
85 |
111 |
112 |
113 |
114 |
--------------------------------------------------------------------------------
/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one or more
2 | # contributor license agreements. See the NOTICE file distributed with
3 | # this work for additional information regarding copyright ownership.
4 | # The ASF licenses this file to You under the Apache License, Version 2.0
5 | # (the "License"); you may not use this file except in compliance with
6 | # the License. You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
17 | # Set everything to be logged to the console
18 | log4j.rootCategory=info, console
19 | log4j.appender.console=org.apache.log4j.ConsoleAppender
20 | log4j.appender.console.target=System.err
21 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
22 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
23 |
24 | # Settings to quiet third party logs that are too verbose
25 | log4j.logger.org.spark-project.jetty=WARN
26 | log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR
27 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
28 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
29 | log4j.logger.org.apache.parquet=ERROR
30 | log4j.logger.parquet=ERROR
31 |
32 | # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
33 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
34 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
--------------------------------------------------------------------------------
/src/test/java/com/lm/spring_sparkstreaming_kafka10/AppTest.java:
--------------------------------------------------------------------------------
1 | package com.lm.spring_sparkstreaming_kafka10;
2 |
3 | import junit.framework.Test;
4 | import junit.framework.TestCase;
5 | import junit.framework.TestSuite;
6 |
7 | /**
8 | * Unit test for simple App.
9 | */
10 | public class AppTest
11 | extends TestCase
12 | {
13 | /**
14 | * Create the test case
15 | *
16 | * @param testName name of the test case
17 | */
18 | public AppTest( String testName )
19 | {
20 | super( testName );
21 | }
22 |
23 | /**
24 | * @return the suite of tests being tested
25 | */
26 | public static Test suite()
27 | {
28 | return new TestSuite( AppTest.class );
29 | }
30 |
31 | /**
32 | * Rigourous Test :-)
33 | */
34 | public void testApp()
35 | {
36 | assertTrue( true );
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
|