├── README.md ├── pom.xml └── src ├── main ├── java │ └── com │ │ └── lm │ │ ├── Application.java │ │ ├── exception │ │ ├── ExceptionHandler.java │ │ └── MessageException.java │ │ ├── kafkahbase │ │ ├── HBasePoolConnection.java │ │ ├── HBaseUtils.java │ │ ├── OffsetHBaseUtils.java │ │ ├── OggKafkaJsonUtils.java │ │ └── OggKafkaUtils.java │ │ ├── kryo │ │ └── MyRegistrator.java │ │ ├── spark │ │ ├── SparkStreamingKafka.java │ │ ├── SparkStreamingKafka2.java │ │ └── SparkStreamingKafka3.java │ │ └── utils │ │ ├── BeanUtil.java │ │ ├── ConstUtil.java │ │ └── SpringUtils.java └── resources │ ├── applicationContext.xml │ ├── config.properties │ ├── hbase-site.xml │ └── log4j.properties └── test └── java └── com └── lm └── spring_sparkstreaming_kafka10 └── AppTest.java /README.md: -------------------------------------------------------------------------------- 1 | # spring-sparkstreaming-kafka-10 2 | spring+spark streaming+kafka 10版本集成和异常问题处理 3 | 博文地址： 4 | http://blog.csdn.net/a123demi/article/details/74935849 5 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.winit.iwm 6 | p-ogg-kafka-spark-hbase 7 | 0.0.1-SNAPSHOT 8 | jar 9 | 10 | p-ogg-kafka-spark-hbase 11 | http://maven.apache.org 12 | 13 | 14 | UTF-8 15 | 2.1.0 16 | 4.3.9.RELEASE 17 | 18 | 19 | 20 | 21 | 22 | com.alibaba 23 | fastjson 24 | 1.2.32 25 | 26 | 27 | 28 | 29 | org.springframework 30 | spring-core 31 | ${spring.version} 32 | 33 | 34 | 35 | org.springframework 36 | spring-context 37 | ${spring.version} 38 | 39 | 40 | org.springframework 41 | spring-context-support 42 | 4.3.9.RELEASE 43 | 44 | 45 | org.springframework 46 | spring-test 47 | ${spring.version} 48 | 49 | 50 | 51 | org.springframework 52 | spring-aop 53 | ${spring.version} 54 | 55 | 56 | org.aspectj 57 | aspectjrt 58 | 1.8.9 59 | 60 | 61 | org.aspectj 62 | aspectjweaver 63 | 1.8.9 64 | 65 | 66 | 67 | 68 | org.apache.spark 69 | spark-core_2.11 70 | ${spark-version} 71 | 72 | 73 | org.apache.spark 74 | spark-sql_2.11 75 | ${spark-version} 76 | 77 | 78 | org.apache.spark 79 | spark-hive_2.11 80 | ${spark-version} 81 | 82 | 83 | org.apache.spark 84 | spark-streaming_2.11 85 | ${spark-version} 86 | 87 | 88 | org.apache.hadoop 89 | hadoop-client 90 | 2.7.3 91 | 92 | 93 | 94 | org.apache.spark 95 | spark-graphx_2.11 96 | ${spark-version} 97 | 98 | 99 | 100 | org.apache.spark 101 | spark-streaming-kafka-0-10_2.11 102 | 2.1.0 103 | 104 | 105 | 106 | org.springframework.data 107 | spring-data-hadoop 108 | 2.4.0.RELEASE 109 | 110 | 111 | 112 | junit 113 | junit 114 | 3.8.1 115 | test 116 | 117 | 118 | 119 | org.apache.hbase 120 | hbase-client 121 | 1.3.1 122 | 123 | 124 | org.apache.hbase 125 | hbase-server 126 | 1.3.1 127 | 128 | 129 | 130 | org.apache.hbase 131 | hbase-common 132 | 1.3.1 133 | 134 | 135 | 136 | org.apache.commons 137 | commons-lang3 138 | 3.3.2 139 | 140 | 141 | 142 | commons-io 143 | commons-io 144 | 2.4 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | maven-assembly-plugin 153 | 154 | 155 | jar-with-dependencies 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | make-assembly 166 | package 167 | 168 | single 169 | 170 | 171 | 172 | 173 | 174 | 175 | org.codehaus.mojo 176 | exec-maven-plugin 177 | 1.2.1 178 | 179 | 180 | 181 | exec 182 | 183 | 184 | 185 | 186 | java 187 | true 188 | false 189 | compile 190 | com.lm.spark.SparkApp.App 191 | 192 | 193 | 194 | 195 | org.apache.maven.plugins 196 | maven-compiler-plugin 197 | 3.1 198 | 199 | 1.8 200 | 1.8 201 | true 202 | 203 | 204 | 205 | 206 | 207 | -------------------------------------------------------------------------------- /src/main/java/com/lm/Application.java: -------------------------------------------------------------------------------- 1 | package com.lm; 2 | 3 | import com.lm.exception.MessageException; 4 | import com.lm.spark.SparkStreamingKafka2; 5 | import com.lm.utils.SpringUtils; 6 | 7 | /** 8 | * Hello world! 9 | * 10 | */ 11 | public class Application { 12 | public static void main(String[] args) { 13 | SparkStreamingKafka2 sparkStreamingKafka = 14 | SpringUtils.getApplicationContext().getBean(SparkStreamingKafka2.class); 15 | try { 16 | sparkStreamingKafka.processSparkStreaming(); 17 | } catch (InterruptedException e) { 18 | // TODO Auto-generated catch block 19 | e.printStackTrace(); 20 | }catch (MessageException e) { 21 | 22 | System.out.println(e.getMessage()); 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/com/lm/exception/ExceptionHandler.java: -------------------------------------------------------------------------------- 1 | package com.lm.exception; 2 | 3 | import org.aspectj.lang.JoinPoint; 4 | import org.aspectj.lang.annotation.AfterThrowing; 5 | import org.aspectj.lang.annotation.Aspect; 6 | import org.aspectj.lang.annotation.Pointcut; 7 | import org.slf4j.Logger; 8 | import org.slf4j.LoggerFactory; 9 | import org.springframework.stereotype.Component; 10 | import org.springframework.util.StringUtils; 11 | 12 | @Aspect 13 | @Component 14 | public class ExceptionHandler { 15 | private static final Logger LOGGER = LoggerFactory.getLogger(ExceptionHandler.class); 16 | 17 | /** 18 | * @within(org.springframework.stereotype.Service)，拦截带有 @Service 注解的类的所有方法 19 | * @annotation(org.springframework.web.bind.annotation.RequestMapping)， 20 | * 拦截带有@RquestMapping的注解方法 21 | */ 22 | @Pointcut("execution(public * *(..))") 23 | private void handlerPointcut() { 24 | } 25 | 26 | /** 27 | * 拦截service层异常，记录异常日志，并设置对应的异常信息目前只拦截Exception，是否要拦截Error需再做考虑 28 | * 29 | * @param e 30 | * 异常对象 31 | */ 32 | @AfterThrowing(pointcut = "handlerPointcut()", throwing = "e") 33 | public void handle(JoinPoint point, Exception e) { 34 | String signature = point.getSignature().toString(); 35 | String errorMsg = getMessage(signature) == null 36 | ? (StringUtils.isEmpty(e.getMessage()) ? "服务异常" : e.getMessage()) : getMessage(signature); 37 | LOGGER.error(errorMsg); 38 | // throw new ServiceException(errorMsg, e); 39 | } 40 | 41 | /** 42 | * 获取方法签名对应的提示消息 43 | * 44 | * @param signature 45 | * 方法签名 46 | * @return 提示消息 47 | */ 48 | private String getMessage(String signature) { 49 | return null; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/com/lm/exception/MessageException.java: -------------------------------------------------------------------------------- 1 | package com.lm.exception; 2 | 3 | public class MessageException extends RuntimeException { 4 | private static final long serialVersionUID = 8284164474119891530L; 5 | 6 | public MessageException() { 7 | super(); 8 | } 9 | 10 | public MessageException(String message) { 11 | super(message); 12 | } 13 | 14 | public MessageException(String message, Throwable cause) { 15 | super(message, cause); 16 | } 17 | 18 | public MessageException(Throwable cause) { 19 | super(cause); 20 | } 21 | 22 | protected MessageException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { 23 | super(message, cause, enableSuppression, writableStackTrace); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/com/lm/kafkahbase/HBasePoolConnection.java: -------------------------------------------------------------------------------- 1 | package com.lm.kafkahbase; 2 | 3 | import java.io.IOException; 4 | import java.util.concurrent.ExecutorService; 5 | import java.util.concurrent.Executors; 6 | 7 | import org.apache.hadoop.conf.Configuration; 8 | import org.apache.hadoop.hbase.HBaseConfiguration; 9 | import org.apache.hadoop.hbase.client.Connection; 10 | import org.apache.hadoop.hbase.client.ConnectionFactory; 11 | 12 | import com.lm.exception.MessageException; 13 | 14 | /** 15 | * Hbase连接池 16 | * 17 | * @author liangming.deng 18 | * @date 2017年7月7日 19 | * 20 | */ 21 | public class HBasePoolConnection { 22 | private HBasePoolConnection() { 23 | } 24 | 25 | // 连接池 26 | private static Connection connection = null; 27 | // 配置文件 28 | static Configuration hbaseConfiguration = HBaseConfiguration.create(); 29 | 30 | public static Connection getConnection() { 31 | if (connection == null) { 32 | ExecutorService pool = Executors.newFixedThreadPool(10);// 建立一个固定大小的线程池 33 | hbaseConfiguration.addResource("hbase-site.xml"); 34 | try { 35 | connection = ConnectionFactory.createConnection(hbaseConfiguration, pool);// 创建连接时，拿到配置文件和线程池 36 | } catch (IOException e) { 37 | throw new MessageException("Hbase连接池初始化错误", e); 38 | } 39 | } 40 | return connection; 41 | } 42 | 43 | } -------------------------------------------------------------------------------- /src/main/java/com/lm/kafkahbase/HBaseUtils.java: -------------------------------------------------------------------------------- 1 | package com.lm.kafkahbase; 2 | 3 | import java.io.IOException; 4 | import java.util.ArrayList; 5 | import java.util.HashMap; 6 | import java.util.Iterator; 7 | import java.util.List; 8 | import java.util.Map; 9 | 10 | import org.apache.hadoop.hbase.Cell; 11 | import org.apache.hadoop.hbase.CellUtil; 12 | import org.apache.hadoop.hbase.HColumnDescriptor; 13 | import org.apache.hadoop.hbase.HTableDescriptor; 14 | import org.apache.hadoop.hbase.MasterNotRunningException; 15 | import org.apache.hadoop.hbase.TableName; 16 | import org.apache.hadoop.hbase.ZooKeeperConnectionException; 17 | import org.apache.hadoop.hbase.client.Admin; 18 | import org.apache.hadoop.hbase.client.BufferedMutator; 19 | import org.apache.hadoop.hbase.client.Delete; 20 | import org.apache.hadoop.hbase.client.Get; 21 | import org.apache.hadoop.hbase.client.HTable; 22 | import org.apache.hadoop.hbase.client.Mutation; 23 | import org.apache.hadoop.hbase.client.Put; 24 | import org.apache.hadoop.hbase.client.Result; 25 | import org.apache.hadoop.hbase.client.ResultScanner; 26 | import org.apache.hadoop.hbase.client.Scan; 27 | import org.apache.hadoop.hbase.client.Table; 28 | import org.apache.hadoop.hbase.filter.BinaryComparator; 29 | import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; 30 | import org.apache.hadoop.hbase.filter.Filter; 31 | import org.apache.hadoop.hbase.filter.RowFilter; 32 | import org.apache.hadoop.hbase.util.Bytes; 33 | import org.apache.hadoop.hbase.util.Hash; 34 | import org.slf4j.Logger; 35 | import org.slf4j.LoggerFactory; 36 | 37 | import com.lm.exception.MessageException; 38 | 39 | /** 40 | * HBase使用例子 41 | */ 42 | public class HBaseUtils { 43 | private final static Logger logger = LoggerFactory.getLogger(HBaseUtils.class); 44 | 45 | /** 46 | * 关闭连接 47 | * 48 | * @param admin 49 | * @param table 50 | * @param bufferedMutator 51 | */ 52 | public static void release(Admin admin, Table table, BufferedMutator bufferedMutator) { 53 | try { 54 | if (admin != null) 55 | admin.close(); 56 | if (table != null) 57 | table.close(); 58 | if (bufferedMutator != null) 59 | bufferedMutator.close(); 60 | } catch (IOException e) { 61 | throw new MessageException("Hbase判断表是否存在", e); 62 | } 63 | } 64 | 65 | /** 66 | * 判断表是否存在 67 | * 68 | * @param tableName 69 | * @return 70 | */ 71 | public static boolean isExistTable(String tableName) { 72 | boolean isExist = false; 73 | Admin admin = null; 74 | 75 | try { 76 | admin = HBasePoolConnection.getConnection().getAdmin(); 77 | TableName table = TableName.valueOf(tableName); 78 | if (admin.tableExists(table)) {// 如果表已经存在 79 | isExist = true; 80 | } 81 | 82 | } catch (IOException e) { 83 | throw new MessageException("Hbase判断表是否存在", e); 84 | 85 | } finally { 86 | release(admin, null, null); 87 | } 88 | 89 | return isExist; 90 | } 91 | 92 | /** 93 | * 创建表 94 | * 95 | * @param tablename 96 | * 表名 97 | * @param columnFamily 98 | * 列族 99 | * @ @throws 100 | * ZooKeeperConnectionException 101 | * @throws MasterNotRunningException 102 | */ 103 | public static void createTable(String tablename, String columnFamily) { 104 | 105 | Admin admin = null; 106 | 107 | try { 108 | admin = HBasePoolConnection.getConnection().getAdmin(); 109 | TableName tableName = TableName.valueOf(tablename); 110 | if (!admin.tableExists(tableName)) {// 如果表已经存在 111 | HTableDescriptor tableDesc = new HTableDescriptor(tableName); 112 | tableDesc.addFamily(new HColumnDescriptor(columnFamily)); 113 | admin.createTable(tableDesc); 114 | logger.info(tablename + "表已经成功创建!----------------"); 115 | } 116 | 117 | } catch (IOException e) { 118 | throw new MessageException("Hbase创建表", e); 119 | } finally { 120 | release(admin, null, null); 121 | } 122 | 123 | } 124 | 125 | /** 126 | * 向表中插入一条新数据 127 | * 128 | * @param tableName 129 | * 表名 130 | * @param row 131 | * 行键key 132 | * @param columnFamily 133 | * 列族 134 | * @param column 135 | * 列名 136 | * @param data 137 | * 要插入的数据 @ 138 | */ 139 | public static void putData(String tableName, String row, String columnFamily, String column, String data) { 140 | TableName tableNameObj = null; 141 | Table table = null; 142 | try { 143 | tableNameObj = TableName.valueOf(tableName); 144 | table = HBasePoolConnection.getConnection().getTable(tableNameObj); 145 | Put put = new Put(Bytes.toBytes(row)); 146 | put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(column), Bytes.toBytes(data)); 147 | table.put(put); 148 | logger.info("-------put '" + row + "','" + columnFamily + ":" + column + "','" + data + "'"); 149 | } catch (IOException e) { 150 | throw new MessageException("Hbase插入数据", e); 151 | } finally { 152 | release(null, table, null); 153 | } 154 | 155 | } 156 | 157 | /** 158 | * map数据插入 159 | * 160 | * @param tableName 161 | * @param row 162 | * @param columnFamily 163 | * @param datas 164 | * @ 165 | */ 166 | public static void putData(String tableName, String row, String columnFamily, Map datas) { 167 | if (null == datas || datas.isEmpty()) { 168 | return; 169 | } 170 | TableName tableNameObj = null; 171 | Table table = null; 172 | try { 173 | tableNameObj = TableName.valueOf(tableName); 174 | table = HBasePoolConnection.getConnection().getTable(tableNameObj); 175 | Put put = new Put(Bytes.toBytes(row)); 176 | Iterator columns = datas.keySet().iterator(); 177 | while (columns.hasNext()) { 178 | String column = columns.next(); 179 | put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(column), 180 | Bytes.toBytes(datas.get(column).toString())); 181 | } 182 | table.put(put); 183 | } catch (IOException e) { 184 | throw new MessageException("Hbase插入对象数据", e); 185 | } finally { 186 | release(null, table, null); 187 | } 188 | } 189 | 190 | /** 191 | * 获取指定行的所有数据 192 | * 193 | * @param tableName 194 | * @param row 195 | * @param columnFamily 196 | * @param column 197 | * @return @ 198 | */ 199 | public static String getData(String tableName, String row, String columnFamily, String column) { 200 | 201 | TableName tableNameObj = null; 202 | Table table = null; 203 | String value = ""; 204 | try { 205 | tableNameObj = TableName.valueOf(tableName); 206 | table = HBasePoolConnection.getConnection().getTable(tableNameObj); 207 | Get get = new Get(Bytes.toBytes(row)); 208 | Result result = table.get(get); 209 | byte[] rb = result.getValue(Bytes.toBytes(columnFamily), Bytes.toBytes(column)); 210 | value = new String(rb, "UTF-8"); 211 | logger.info("------" + value); 212 | } catch (IOException e) { 213 | throw new MessageException("Hbase获取指定行", e); 214 | } finally { 215 | release(null, table, null); 216 | } 217 | 218 | return value; 219 | } 220 | 221 | /** 222 | * 获取ResultScanner 223 | * 224 | * @param tableName 225 | * @param topics 226 | * @return @ 227 | */ 228 | public static ResultScanner getResultScanner(String tableName, String value) { 229 | 230 | TableName tableNameObj = null; 231 | Table table = null; 232 | ResultScanner rs = null; 233 | try { 234 | tableNameObj = TableName.valueOf(tableName); 235 | table = HBasePoolConnection.getConnection().getTable(tableNameObj); 236 | Filter filter = new RowFilter(CompareOp.GREATER_OR_EQUAL, new BinaryComparator(Bytes.toBytes(value + "_"))); 237 | Scan s = new Scan(); 238 | s.setFilter(filter); 239 | rs = table.getScanner(s); 240 | } catch (IOException e) { 241 | throw new MessageException("Hbase获取指定行", e); 242 | } finally { 243 | release(null, table, null); 244 | } 245 | 246 | return rs; 247 | } 248 | 249 | /** 250 | * 获取指定表的所有数据 251 | * 252 | * @param tableName 253 | * 表名 @ 254 | */ 255 | public static void scanAll(String tableName) { 256 | 257 | TableName tableNameObj = null; 258 | Table table = null; 259 | ResultScanner resultScanner = null; 260 | try { 261 | tableNameObj = TableName.valueOf(tableName); 262 | table = HBasePoolConnection.getConnection().getTable(tableNameObj); 263 | Scan scan = new Scan(); 264 | resultScanner = table.getScanner(scan); 265 | for (Result result : resultScanner) { 266 | List cells = result.listCells(); 267 | for (Cell cell : cells) { 268 | String row = new String(result.getRow(), "UTF-8"); 269 | String family = new String(CellUtil.cloneFamily(cell), "UTF-8"); 270 | String qualifier = new String(CellUtil.cloneQualifier(cell), "UTF-8"); 271 | String value = new String(CellUtil.cloneValue(cell), "UTF-8"); 272 | logger.info(":::::[row:" + row + "],[family:" + family + "],[qualifier:" + qualifier + "],[value:" 273 | + value + "]"); 274 | } 275 | } 276 | } catch (IOException e) { 277 | throw new MessageException("Hbase获取指定行", e); 278 | } finally { 279 | release(null, table, null); 280 | } 281 | 282 | } 283 | 284 | /* 285 | * 删除指定的列 286 | * 287 | * @tableName 表名 288 | * 289 | * @rowKey rowKey 290 | * 291 | * @familyName 列族名 292 | * 293 | * @columnName 列名 294 | */ 295 | public static void deleteColumn(String tableName, String rowKey, String falilyName, String columnName) { 296 | TableName tableNameObj = null; 297 | Table table = null; 298 | try { 299 | tableNameObj = TableName.valueOf(tableName); 300 | table = HBasePoolConnection.getConnection().getTable(tableNameObj); 301 | Delete deleteColumn = new Delete(Bytes.toBytes(rowKey)); 302 | deleteColumn.addColumn(Bytes.toBytes(falilyName), Bytes.toBytes(columnName)); 303 | table.delete(deleteColumn); 304 | logger.info(falilyName + ":" + columnName + "is deleted!"); 305 | } catch (IOException e) { 306 | throw new MessageException("Hbase删除指定行", e); 307 | } finally { 308 | release(null, table, null); 309 | } 310 | 311 | } 312 | 313 | public static void bathWriteData(List puts, String tableName, String columnName) { 314 | TableName tableNameObj = null; 315 | Table table = null; 316 | try { 317 | tableNameObj = TableName.valueOf(tableName); 318 | table = HBasePoolConnection.getConnection().getTable(tableNameObj); 319 | 320 | table.put(puts); 321 | ((HTable) table).flushCommits(); 322 | } catch (IOException e) { 323 | throw new MessageException("Hbase批量put插入", e); 324 | 325 | } finally { 326 | release(null, table, null); 327 | } 328 | 329 | } 330 | 331 | public static Put getPut(String row, String columnFamily, String column, String data) { 332 | 333 | Put put = new Put(Bytes.toBytes(row)); 334 | put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(column), Bytes.toBytes(data)); 335 | return put; 336 | } 337 | 338 | 339 | public static Put getPut(String row, String columnFamily, Map map) { 340 | 341 | if(map == null || map.isEmpty()){ 342 | return null; 343 | } 344 | 345 | Put put = new Put(Bytes.toBytes(row)); 346 | 347 | Iterator keys = map.keySet().iterator(); 348 | while(keys.hasNext()){ 349 | String key = keys.next(); 350 | 351 | put.addColumn(Bytes.toBytes(columnFamily), 352 | Bytes.toBytes(key), 353 | Bytes.toBytes(map.get(key) == null ? "null":map.get(key).toString())); 354 | } 355 | 356 | return put; 357 | } 358 | 359 | 360 | public static Delete getDelete(String row, String columnFamily, String column) { 361 | 362 | Delete delete = new Delete(Bytes.toBytes(row)); 363 | delete.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(column)); 364 | return delete; 365 | } 366 | 367 | /** 368 | * 批量插入 369 | * 370 | * @param tableName 371 | * @param puts 372 | */ 373 | public static void batchInsert(String tableName, List puts) { 374 | BufferedMutator table = null; 375 | try { 376 | // 连接表获取表对象 377 | table = HBasePoolConnection.getConnection().getBufferedMutator(TableName.valueOf(tableName)); 378 | List mutations = new ArrayList(); 379 | for (Put put : puts) { 380 | mutations.add(put); 381 | } 382 | 383 | table.mutate(mutations); 384 | // 如果不flush 在后面get可能是看不见的 385 | table.flush(); 386 | } catch (IOException e) { 387 | throw new MessageException("Hbase批量插入异常", e); 388 | } finally { 389 | release(null, null, table); 390 | } 391 | 392 | } 393 | 394 | /* 395 | * 删除指定的列 396 | * 397 | * @tableName 表名 398 | * 399 | * @rowKey rowKey 400 | * 401 | * @familyName 列族名 402 | * 403 | * @columnName 列名 404 | */ 405 | public static void batchDelete(String tableName, List deletes) { 406 | TableName tableNameObj = null; 407 | Table table = null; 408 | try { 409 | tableNameObj = TableName.valueOf(tableName); 410 | table = HBasePoolConnection.getConnection().getTable(tableNameObj); 411 | table.delete(deletes); 412 | 413 | } catch (IOException e) { 414 | throw new MessageException("Hbase删除指定行", e); 415 | } finally { 416 | release(null, table, null); 417 | } 418 | 419 | } 420 | 421 | /** 422 | * @param args 423 | */ 424 | public static void main(String[] args) { 425 | try { 426 | /* 427 | * HbaseDemo.CreateTable("userinfo", "baseinfo"); 428 | * HbaseDemo.PutData("userinfo", "row2", "baseinfo", "vio2", 429 | * "驾驶车辆违法信息2："); HbaseDemo.PutData("userinfo", "row5", "baseinfo", 430 | * "vio2", "驾驶车辆违法信息2："); HbaseDemo.GetData("userinfo", "row2", 431 | * "baseinfo", "vio2"); HbaseDemo.ScanAll("userinfo"); 432 | */ 433 | 434 | } catch (Exception e) { 435 | e.printStackTrace(); 436 | } 437 | } 438 | } -------------------------------------------------------------------------------- /src/main/java/com/lm/kafkahbase/OffsetHBaseUtils.java: -------------------------------------------------------------------------------- 1 | package com.lm.kafkahbase; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | 6 | import org.apache.hadoop.hbase.Cell; 7 | import org.apache.hadoop.hbase.CellUtil; 8 | import org.apache.hadoop.hbase.client.Result; 9 | import org.apache.hadoop.hbase.client.ResultScanner; 10 | import org.apache.kafka.common.TopicPartition; 11 | import org.slf4j.Logger; 12 | import org.slf4j.LoggerFactory; 13 | 14 | public class OffsetHBaseUtils { 15 | private static Logger LOGGER = LoggerFactory.getLogger(OffsetHBaseUtils.class); 16 | public static Map getOffset(ResultScanner rs){ 17 | // begin from the the offsets committed to the database 18 | Map fromOffsets = new HashMap<>(); 19 | String s1 = null; 20 | int s2 = 0; 21 | long s3 = 0; 22 | for (Result r : rs) { 23 | System.out.println("rowkey:" + new String(r.getRow())); 24 | for (Cell cell : r.rawCells()) { 25 | String qualifier = new String(CellUtil.cloneQualifier(cell)); 26 | String value = new String(CellUtil.cloneValue(cell)); 27 | String family = new String(CellUtil.cloneFamily(cell)); 28 | 29 | if (qualifier.equals("topic")) { 30 | s1 = value; 31 | LOGGER.info("列族:" + family + " 列:" + qualifier + ":" + s1); 32 | } 33 | 34 | if (qualifier.equals("partition")) { 35 | s2 = Integer.valueOf(value); 36 | LOGGER.info("列族:" + family + " 列:" + qualifier + ":" + s2); 37 | } 38 | 39 | if (qualifier.equals("offset")) { 40 | s3 = Long.valueOf(value); 41 | LOGGER.info("列族:" + family + " 列:" + qualifier + ":" + s3); 42 | } 43 | 44 | } 45 | 46 | fromOffsets.put(new TopicPartition(s1, s2), s3); 47 | } 48 | return fromOffsets; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/com/lm/kafkahbase/OggKafkaJsonUtils.java: -------------------------------------------------------------------------------- 1 | package com.lm.kafkahbase; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Iterator; 5 | import java.util.List; 6 | import java.util.Map; 7 | 8 | import org.apache.hadoop.hbase.client.Put; 9 | 10 | import com.alibaba.fastjson.JSON; 11 | import com.lm.utils.ConstUtil; 12 | 13 | 14 | public class OggKafkaJsonUtils { 15 | public static void processBatchPut(List oggKafkaJsonValues) { 16 | 17 | List puts = new ArrayList<>(); 18 | 19 | System.out.println(oggKafkaJsonValues.size()); 20 | for (String json : oggKafkaJsonValues) { 21 | //1.json转map 22 | Map jsonMap = JSON.parseObject(json); 23 | //2.获取状态和表 24 | String tableName = jsonMap.get("table").toString(); 25 | List primaryKey =(List) jsonMap.get("primary_keys"); 26 | 27 | Map values = (Map)jsonMap.get("after"); 28 | 29 | if(values == null || values.isEmpty()){ 30 | continue; 31 | } 32 | 33 | String rowKey = values.get("order_id").toString(); 34 | puts.add(HBaseUtils.getPut(rowKey, ConstUtil.HBASE_FAMILY, values)); 35 | } 36 | 37 | HBaseUtils.batchInsert("dfs.order", puts); 38 | 39 | 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/com/lm/kafkahbase/OggKafkaUtils.java: -------------------------------------------------------------------------------- 1 | package com.lm.kafkahbase; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Arrays; 5 | import java.util.HashMap; 6 | import java.util.Iterator; 7 | import java.util.List; 8 | 9 | import org.apache.hadoop.hbase.client.Delete; 10 | import org.apache.hadoop.hbase.client.Put; 11 | import org.slf4j.Logger; 12 | import org.slf4j.LoggerFactory; 13 | 14 | import com.lm.utils.ConstUtil; 15 | 16 | public class OggKafkaUtils { 17 | private static Logger logger = LoggerFactory.getLogger(OggKafkaUtils.class); 18 | 19 | /** 20 | * 执行接收到的kafka数据到hbase 21 | * 22 | * @param oggKafkaValues 23 | */ 24 | public static void processBatchPut(List oggKafkaValues) { 25 | // 26 | 27 | List insertPuts = getOggKafkaPuts(oggKafkaValues); 28 | 29 | HBaseUtils.batchInsert("dfs.order", insertPuts); 30 | 31 | List deletes = getOggKafkaDelete(oggKafkaValues); 32 | 33 | HBaseUtils.batchDelete("dfs.order", deletes); 34 | 35 | } 36 | 37 | /** 38 | * 根据sparkStreaming获取源ogg经kafka发送的消息 39 | * 40 | * @param oggKafkaValues 41 | * @return 42 | */ 43 | public static List>> getOggKafkas(List oggKafkaValues) { 44 | // 一次加载多有数据,第一个map为header和body,第二个map对应key,value值 45 | List>> oggKafkaMapList = new ArrayList<>(); 46 | for (String value : oggKafkaValues) { 47 | // 获取value对应分隔信息 48 | List values = Arrays.asList(value.split(ConstUtil.KAFKA_SPLIT)); 49 | 50 | oggKafkaMapList.add(getKafkaMap(values)); 51 | } 52 | 53 | return oggKafkaMapList; 54 | } 55 | 56 | /** 57 | * 根据sparkStreaming获取源ogg经kafka发送的消息 58 | * 59 | * @param oggKafkaValues 60 | * @return 61 | */ 62 | public static List getOggKafkaPuts(List oggKafkaValues) { 63 | // 带插入或更新的put 64 | List insertPuts = new ArrayList<>(); 65 | // 一次加载多有数据,第一个map为header和body,第二个map对应key,value值 66 | List>> oggKafkaMapList = getOggKafkas(oggKafkaValues); 67 | 68 | for (HashMap> hashMap : oggKafkaMapList) { 69 | if (!"D".equals(hashMap.get("header").get("ogg_status"))) { 70 | insertPuts.addAll(handlerMsgToHBasePut(hashMap)); 71 | } 72 | } 73 | return insertPuts; 74 | } 75 | 76 | /** 77 | * 根据sparkStreaming获取源ogg经kafka发送的消息 78 | * 79 | * @param oggKafkaValues 80 | * @return 81 | */ 82 | public static List getOggKafkaDelete(List oggKafkaValues) { 83 | // 删除的put 84 | List deletes = new ArrayList<>(); 85 | // 一次加载多有数据,第一个map为header和body,第二个map对应key,value值 86 | List>> oggKafkaMapList = getOggKafkas(oggKafkaValues); 87 | for (HashMap> hashMap : oggKafkaMapList) { 88 | if ("D".equals(hashMap.get("header").get("ogg_status"))) { 89 | deletes.addAll(handlerMsgToHBaseDelete(hashMap)); 90 | } 91 | } 92 | return deletes; 93 | } 94 | 95 | /** 96 | * 每一个kafka消息对应的map header:消息头 body:消息体 97 | * 98 | * @param kafkaMsgList 99 | * @return 100 | */ 101 | public static HashMap> getKafkaMap(List kafkaMsgList) { 102 | HashMap> mapList = new HashMap>(); 103 | 104 | if (kafkaMsgList == null || kafkaMsgList.isEmpty()) { 105 | return mapList; 106 | } 107 | 108 | HashMap headerMap = new HashMap<>(); 109 | if (kafkaMsgList == null || kafkaMsgList.size() < 5) { 110 | logger.error("getKafkaMap 异常消息头"); 111 | return mapList; 112 | } 113 | // 1.数据头 114 | List kafkaHeader = kafkaMsgList.subList(0, 5); 115 | List kafkaBody = kafkaMsgList.subList(5, kafkaMsgList.size()); 116 | headerMap.put("ogg_status", kafkaHeader.get(0)); 117 | headerMap.put("ogg_table", kafkaHeader.get(1)); 118 | headerMap.put("ogg_created", kafkaHeader.get(2)); 119 | headerMap.put("ogg_updated", kafkaHeader.get(3)); 120 | headerMap.put("ogg_id", kafkaHeader.get(4)); 121 | mapList.put("header", headerMap); 122 | 123 | HashMap bodyMap = new HashMap<>(); 124 | for (int i = 0; i < kafkaBody.size(); i += 2) { 125 | // 偶数时key,奇数是value 126 | bodyMap.put(kafkaBody.get(i), kafkaBody.get(i + 1)); 127 | } 128 | mapList.put("body", bodyMap); 129 | return mapList; 130 | } 131 | 132 | /** 133 | * 每一个加入加入到put 134 | * 135 | * @param kafkaMapList 136 | */ 137 | public static List handlerMsgToHBasePut(HashMap> kafkaMapList) { 138 | List puts = new ArrayList<>(); 139 | if (kafkaMapList == null || kafkaMapList.size() == 0) { 140 | return puts; 141 | } 142 | 143 | HashMap headerMap = kafkaMapList.get("header"); 144 | String ogg_table = headerMap.get("ogg_table").toLowerCase(); 145 | ogg_table = "dfs.order"; 146 | HashMap bodyMap = kafkaMapList.get("body"); 147 | 148 | Iterator keys = bodyMap.keySet().iterator(); 149 | String rowKey = bodyMap.get(ConstUtil.TABLE_ROWKEY.get(ogg_table)); 150 | 151 | while (keys.hasNext()) { 152 | String key = keys.next(); 153 | String value = bodyMap.get(key); 154 | if (null != value && ConstUtil.STRING_NULL.equals(value.toUpperCase())) { 155 | continue; 156 | } 157 | 158 | puts.add(HBaseUtils.getPut(rowKey, ConstUtil.HBASE_FAMILY, key, value)); 159 | } 160 | 161 | return puts; 162 | 163 | } 164 | 165 | /** 166 | * 每一个加入加入到put 167 | * 168 | * @param kafkaMapList 169 | */ 170 | public static List handlerMsgToHBaseDelete(HashMap> kafkaMapList) { 171 | List deletes = new ArrayList<>(); 172 | if (kafkaMapList == null || kafkaMapList.size() == 0) { 173 | return deletes; 174 | } 175 | 176 | HashMap headerMap = kafkaMapList.get("header"); 177 | String ogg_table = headerMap.get("ogg_table").toLowerCase(); 178 | ogg_table = "dfs.order"; 179 | HashMap bodyMap = kafkaMapList.get("body"); 180 | 181 | Iterator keys = bodyMap.keySet().iterator(); 182 | String rowKey = bodyMap.get(ConstUtil.TABLE_ROWKEY.get(ogg_table)); 183 | 184 | while (keys.hasNext()) { 185 | String key = keys.next(); 186 | String value = bodyMap.get(key); 187 | if (null != value && ConstUtil.STRING_NULL.equals(value.toUpperCase())) { 188 | continue; 189 | } 190 | 191 | deletes.add(HBaseUtils.getDelete(rowKey, ConstUtil.HBASE_FAMILY, key)); 192 | } 193 | 194 | return deletes; 195 | 196 | } 197 | 198 | /** 199 | * 插入操作 200 | * 201 | * @param tableName 202 | * @param columnMaps 203 | */ 204 | public static void processInsertHbase(String tableName, HashMap columnMaps) { 205 | 206 | if (columnMaps == null) { 207 | return; 208 | } 209 | 210 | Iterator keys = columnMaps.keySet().iterator(); 211 | String rowKey = columnMaps.get("order_id"); 212 | 213 | while (keys.hasNext()) { 214 | String key = keys.next(); 215 | String value = columnMaps.get(key); 216 | if (null != value && "NULL".equals(value.toUpperCase())) { 217 | value = ""; 218 | } 219 | 220 | HBaseUtils.getPut(rowKey, "cf1", key, value); 221 | 222 | } 223 | 224 | } 225 | 226 | /** 227 | * 插入操作 228 | * 229 | * @param tableName 230 | * @param columnMaps 231 | */ 232 | public static void processDeleteHbase(String tableName, HashMap columnMaps) { 233 | 234 | if (columnMaps == null) { 235 | return; 236 | } 237 | 238 | Iterator keys = columnMaps.keySet().iterator(); 239 | String rowKey = columnMaps.get("order_id"); 240 | 241 | while (keys.hasNext()) { 242 | String key = keys.next(); 243 | HBaseUtils.deleteColumn(tableName, rowKey, "cf1", key); 244 | } 245 | 246 | } 247 | 248 | public static void main(String[] args) { 249 | String[] arr = new String[] { "0", "1", "2", "3", "4", "5" }; 250 | for (int i = 0; i < arr.length; i += 2) { 251 | // 偶数时key,奇数是value 252 | System.out.println(arr[i] + ":" + arr[i + 1]); 253 | } 254 | 255 | List list = Arrays.asList(arr); 256 | List list1 = list.subList(0, 2); 257 | List list2 = list.subList(2, list.size()); 258 | 259 | System.out.println(list1.toString()); 260 | System.out.println(list2.toString()); 261 | } 262 | } 263 | -------------------------------------------------------------------------------- /src/main/java/com/lm/kryo/MyRegistrator.java: -------------------------------------------------------------------------------- 1 | package com.lm.kryo; 2 | 3 | import org.apache.kafka.clients.consumer.ConsumerRecord; 4 | import org.apache.spark.serializer.KryoRegistrator; 5 | 6 | import com.esotericsoftware.kryo.Kryo; 7 | 8 | public class MyRegistrator implements KryoRegistrator { 9 | 10 | @Override 11 | public void registerClasses(Kryo arg0) { 12 | arg0.register(ConsumerRecord.class); 13 | } 14 | 15 | } 16 | -------------------------------------------------------------------------------- /src/main/java/com/lm/spark/SparkStreamingKafka.java: -------------------------------------------------------------------------------- 1 | package com.lm.spark; 2 | 3 | import java.io.Serializable; 4 | import java.util.ArrayList; 5 | import java.util.Arrays; 6 | import java.util.Collection; 7 | import java.util.Date; 8 | import java.util.HashMap; 9 | import java.util.List; 10 | import java.util.Map; 11 | 12 | import org.apache.hadoop.hbase.client.ResultScanner; 13 | import org.apache.kafka.clients.consumer.ConsumerRecord; 14 | import org.apache.kafka.common.TopicPartition; 15 | import org.apache.kafka.common.serialization.StringDeserializer; 16 | import org.apache.spark.SparkConf; 17 | import org.apache.spark.api.java.JavaRDD; 18 | import org.apache.spark.api.java.function.VoidFunction2; 19 | import org.apache.spark.streaming.Durations; 20 | import org.apache.spark.streaming.Time; 21 | import org.apache.spark.streaming.api.java.JavaInputDStream; 22 | import org.apache.spark.streaming.api.java.JavaStreamingContext; 23 | import org.apache.spark.streaming.kafka010.ConsumerStrategies; 24 | import org.apache.spark.streaming.kafka010.HasOffsetRanges; 25 | import org.apache.spark.streaming.kafka010.KafkaUtils; 26 | import org.apache.spark.streaming.kafka010.LocationStrategies; 27 | import org.apache.spark.streaming.kafka010.OffsetRange; 28 | import org.slf4j.Logger; 29 | import org.slf4j.LoggerFactory; 30 | import org.springframework.beans.factory.annotation.Value; 31 | import org.springframework.stereotype.Component; 32 | 33 | import com.lm.exception.MessageException; 34 | import com.lm.kafkahbase.HBaseUtils; 35 | import com.lm.kafkahbase.OffsetHBaseUtils; 36 | import com.lm.kafkahbase.OggKafkaUtils; 37 | import com.lm.utils.BeanUtil; 38 | 39 | @Component 40 | public class SparkStreamingKafka implements Serializable { 41 | 42 | /** 43 | * 44 | */ 45 | private static final long serialVersionUID = 1L; 46 | 47 | public static Logger LOGGER = LoggerFactory.getLogger(SparkStreamingKafka.class); 48 | 49 | @Value("${spark.appname}") 50 | private String appName; 51 | @Value("${spark.master}") 52 | private String master; 53 | @Value("${spark.seconds}") 54 | private long second; 55 | @Value("${kafka.metadata.broker.list}") 56 | private String metadataBrokerList; 57 | @Value("${kafka.auto.offset.reset}") 58 | private String autoOffsetReset; 59 | @Value("${kafka.topics}") 60 | private String kafkaTopics; 61 | @Value("${kafka.group.id}") 62 | private String kafkaGroupId; 63 | 64 | String datatable = "dfs.offset"; 65 | String offsetFamily = "topic_partition_offset"; 66 | 67 | public void processSparkStreaming() throws InterruptedException { 68 | // 1.配置sparkconf,必须要配置master 69 | SparkConf conf = new SparkConf().setAppName(appName).setMaster(master); 70 | conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); 71 | conf.set("spark.kryo.registrator", "com.lm.kryo.MyRegistrator"); 72 | 73 | // 2.根据sparkconf 创建JavaStreamingContext 74 | JavaStreamingContext jsc = new JavaStreamingContext(conf, Durations.seconds(second)); 75 | 76 | // 3.配置kafka 77 | Map kafkaParams = new HashMap<>(); 78 | kafkaParams.put("bootstrap.servers", metadataBrokerList); 79 | kafkaParams.put("key.deserializer", StringDeserializer.class); 80 | kafkaParams.put("value.deserializer", StringDeserializer.class); 81 | kafkaParams.put("group.id", kafkaGroupId); 82 | kafkaParams.put("auto.offset.reset", autoOffsetReset); 83 | kafkaParams.put("enable.auto.commit", false); 84 | 85 | // 4.kafka主题 86 | Collection topics = Arrays.asList(kafkaTopics.split(",")); 87 | 88 | // 5.创建SparkStreaming输入数据来源input Stream 89 | JavaInputDStream> stream = null; 90 | // KafkaUtils.createDirectStream(jsc, 91 | // LocationStrategies.PreferConsistent(), 92 | // ConsumerStrategies. Subscribe(topics, kafkaParams)); 93 | 94 | // 判断数据表是否存在，如果不存在则从topic首位置消费，并新建该表；如果表存在，则从表中恢复话题对应分区的消息的offset 95 | boolean isExists = HBaseUtils.isExistTable(datatable); 96 | if (isExists) { 97 | 98 | ResultScanner rs = HBaseUtils.getResultScanner(datatable, kafkaTopics); 99 | 100 | if (rs == null || !rs.iterator().hasNext()) { 101 | stream = KafkaUtils.createDirectStream(jsc, LocationStrategies.PreferConsistent(), 102 | ConsumerStrategies. Subscribe(topics, kafkaParams)); 103 | 104 | } else { 105 | 106 | Map fromOffsets = OffsetHBaseUtils.getOffset(rs); 107 | stream = KafkaUtils.createDirectStream(jsc, LocationStrategies.PreferConsistent(), 108 | ConsumerStrategies. Assign(fromOffsets.keySet(), kafkaParams, fromOffsets)); 109 | } 110 | } else { 111 | // 如果不存在TopicOffset表，则从topic首位置开始消费 112 | stream = KafkaUtils.createDirectStream(jsc, LocationStrategies.PreferConsistent(), 113 | ConsumerStrategies. Subscribe(topics, kafkaParams)); 114 | 115 | // 并创建TopicOffset表 116 | HBaseUtils.createTable(datatable, offsetFamily); 117 | 118 | System.out.println(datatable + "表已经成功创建!----------------"); 119 | } 120 | 121 | // 6.spark rdd转化和行动处理 122 | stream.foreachRDD(new VoidFunction2>, Time>() { 123 | 124 | private static final long serialVersionUID = 1L; 125 | 126 | @Override 127 | public void call(JavaRDD> v1, Time v2) { 128 | OffsetRange[] offsetRanges = ((HasOffsetRanges) v1.rdd()).offsetRanges(); 129 | for (OffsetRange offsetRange : offsetRanges) { 130 | // begin your transaction 131 | // 为了保证业务的事务性，最好把业务计算结果和offset同时进行hbase的存储，这样可以保证要么都成功，要么都失败，最终从端到端体现消费精确一次消费的意境 132 | // 存储 133 | long startDate = new Date().getTime(); 134 | List> consumerRecords = v1.collect(); 135 | 136 | List oggValues = new ArrayList<>(); 137 | for (ConsumerRecord record : consumerRecords) { 138 | oggValues.add(record.value()); 139 | } 140 | // OggKafkaUtils.processBatchPut(oggValues); 141 | System.out.println(oggValues.toString()); 142 | long endDate = new Date().getTime(); 143 | System.out.println("插入完成:" + (endDate - startDate)); 144 | 145 | // update results 146 | // update offsets where the end of existing offsets 147 | // matches 148 | // the beginning of this batch of offsets 149 | // assert that offsets were updated correctly 150 | 151 | try { 152 | HBaseUtils.putData(datatable, offsetFamily, offsetFamily, BeanUtil.objectToMap(offsetRange)); 153 | } catch (Exception e) { 154 | throw new MessageException("object与map转化", e); 155 | } 156 | System.out.println("add data Success!"); 157 | // end your transaction 158 | } 159 | System.out.println("the RDD records counts is " + v1.count()); 160 | 161 | } 162 | }); 163 | 164 | // 6. 启动执行 165 | jsc.start(); 166 | // 7. 等待执行停止，如有异常直接抛出并关闭 167 | jsc.awaitTermination(); 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /src/main/java/com/lm/spark/SparkStreamingKafka2.java: -------------------------------------------------------------------------------- 1 | package com.lm.spark; 2 | 3 | import java.io.Serializable; 4 | import java.util.ArrayList; 5 | import java.util.Arrays; 6 | import java.util.Collection; 7 | import java.util.Date; 8 | import java.util.HashMap; 9 | import java.util.List; 10 | import java.util.Map; 11 | 12 | import org.apache.hadoop.hbase.client.ResultScanner; 13 | import org.apache.kafka.clients.consumer.ConsumerRecord; 14 | import org.apache.kafka.common.TopicPartition; 15 | import org.apache.kafka.common.serialization.StringDeserializer; 16 | import org.apache.spark.SparkConf; 17 | import org.apache.spark.api.java.JavaRDD; 18 | import org.apache.spark.api.java.function.VoidFunction2; 19 | import org.apache.spark.streaming.Durations; 20 | import org.apache.spark.streaming.Time; 21 | import org.apache.spark.streaming.api.java.JavaInputDStream; 22 | import org.apache.spark.streaming.api.java.JavaStreamingContext; 23 | import org.apache.spark.streaming.kafka010.ConsumerStrategies; 24 | import org.apache.spark.streaming.kafka010.HasOffsetRanges; 25 | import org.apache.spark.streaming.kafka010.KafkaUtils; 26 | import org.apache.spark.streaming.kafka010.LocationStrategies; 27 | import org.apache.spark.streaming.kafka010.OffsetRange; 28 | import org.slf4j.Logger; 29 | import org.slf4j.LoggerFactory; 30 | import org.springframework.beans.factory.annotation.Value; 31 | import org.springframework.stereotype.Component; 32 | 33 | import com.lm.exception.MessageException; 34 | import com.lm.kafkahbase.HBaseUtils; 35 | import com.lm.kafkahbase.OffsetHBaseUtils; 36 | import com.lm.kafkahbase.OggKafkaJsonUtils; 37 | import com.lm.kafkahbase.OggKafkaUtils; 38 | import com.lm.utils.BeanUtil; 39 | 40 | @Component 41 | public class SparkStreamingKafka2 implements Serializable { 42 | 43 | /** 44 | * 45 | */ 46 | private static final long serialVersionUID = 1L; 47 | 48 | public static Logger LOGGER = LoggerFactory.getLogger(SparkStreamingKafka2.class); 49 | 50 | @Value("${spark.appname}") 51 | private String appName; 52 | @Value("${spark.master}") 53 | private String master; 54 | @Value("${spark.seconds}") 55 | private long second; 56 | @Value("${kafka.metadata.broker.list}") 57 | private String metadataBrokerList; 58 | @Value("${kafka.auto.offset.reset}") 59 | private String autoOffsetReset; 60 | @Value("${kafka.topics}") 61 | private String kafkaTopics; 62 | @Value("${kafka.group.id}") 63 | private String kafkaGroupId; 64 | 65 | String datatable = "dfs.offset"; 66 | String offsetFamily = "topic_partition_offset"; 67 | 68 | public void processSparkStreaming() throws InterruptedException { 69 | // 1.配置sparkconf,必须要配置master 70 | SparkConf conf = new SparkConf().setAppName(appName).setMaster(master); 71 | conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); 72 | conf.set("spark.kryo.registrator", "com.lm.kryo.MyRegistrator"); 73 | conf.set("spark.kryoserializer.buffer.mb", "256"); 74 | conf.set("spark.kryoserializer.buffer.max", "512"); 75 | 76 | conf.set("spark.executor.memory", "4g"); 77 | 78 | // 2.根据sparkconf 创建JavaStreamingContext 79 | JavaStreamingContext jsc = new JavaStreamingContext(conf, Durations.seconds(second)); 80 | 81 | // 3.配置kafka 82 | Map kafkaParams = new HashMap<>(); 83 | kafkaParams.put("bootstrap.servers", metadataBrokerList); 84 | kafkaParams.put("key.deserializer", StringDeserializer.class); 85 | kafkaParams.put("value.deserializer", StringDeserializer.class); 86 | kafkaParams.put("group.id", kafkaGroupId); 87 | kafkaParams.put("auto.offset.reset", autoOffsetReset); 88 | kafkaParams.put("enable.auto.commit", false); 89 | 90 | // 4.kafka主题 91 | Collection topics = Arrays.asList(kafkaTopics.split(",")); 92 | 93 | // 5.创建SparkStreaming输入数据来源input Stream 94 | JavaInputDStream> stream = null; 95 | // KafkaUtils.createDirectStream(jsc, 96 | // LocationStrategies.PreferConsistent(), 97 | // ConsumerStrategies. Subscribe(topics, kafkaParams)); 98 | 99 | // 判断数据表是否存在，如果不存在则从topic首位置消费，并新建该表；如果表存在，则从表中恢复话题对应分区的消息的offset 100 | boolean isExists = HBaseUtils.isExistTable(datatable); 101 | if (isExists) { 102 | 103 | ResultScanner rs = HBaseUtils.getResultScanner(datatable, kafkaTopics); 104 | 105 | if (rs == null || !rs.iterator().hasNext()) { 106 | stream = KafkaUtils.createDirectStream(jsc, LocationStrategies.PreferConsistent(), 107 | ConsumerStrategies. Subscribe(topics, kafkaParams)); 108 | 109 | } else { 110 | 111 | Map fromOffsets = OffsetHBaseUtils.getOffset(rs); 112 | stream = KafkaUtils.createDirectStream(jsc, LocationStrategies.PreferConsistent(), 113 | ConsumerStrategies. Assign(fromOffsets.keySet(), kafkaParams, fromOffsets)); 114 | } 115 | } else { 116 | // 如果不存在TopicOffset表，则从topic首位置开始消费 117 | stream = KafkaUtils.createDirectStream(jsc, LocationStrategies.PreferConsistent(), 118 | ConsumerStrategies. Subscribe(topics, kafkaParams)); 119 | 120 | // 并创建TopicOffset表 121 | HBaseUtils.createTable(datatable, offsetFamily); 122 | 123 | System.out.println(datatable + "表已经成功创建!----------------"); 124 | } 125 | 126 | // 6.spark rdd转化和行动处理 127 | stream.foreachRDD(new VoidFunction2>, Time>() { 128 | 129 | private static final long serialVersionUID = 1L; 130 | 131 | @Override 132 | public void call(JavaRDD> v1, Time v2) { 133 | 134 | OffsetRange[] offsetRanges = ((HasOffsetRanges) v1.rdd()).offsetRanges(); 135 | for (OffsetRange offsetRange : offsetRanges) { 136 | // begin your transaction 137 | // 为了保证业务的事务性，最好把业务计算结果和offset同时进行hbase的存储，这样可以保证要么都成功，要么都失败，最终从端到端体现消费精确一次消费的意境 138 | // 存储 139 | long startDate = new Date().getTime(); 140 | List> consumerRecords = v1.collect(); 141 | 142 | List oggValues = new ArrayList<>(); 143 | for (ConsumerRecord record : consumerRecords) { 144 | oggValues.add(record.value()); 145 | } 146 | OggKafkaJsonUtils.processBatchPut(oggValues); 147 | long endDate = new Date().getTime(); 148 | System.out.println("插入完成:" + (endDate - startDate)); 149 | 150 | // update results 151 | // update offsets where the end of existing offsets 152 | // matches 153 | // the beginning of this batch of offsets 154 | // assert that offsets were updated correctly 155 | 156 | try { 157 | HBaseUtils.putData(datatable, offsetFamily, offsetFamily, BeanUtil.objectToMap(offsetRange)); 158 | } catch (Exception e) { 159 | throw new MessageException("object与map转化", e); 160 | } 161 | System.out.println("add data Success!"); 162 | // end your transaction 163 | } 164 | System.out.println("the RDD records counts is " + v1.count()); 165 | 166 | } 167 | }); 168 | 169 | // 6. 启动执行 170 | jsc.start(); 171 | // 7. 等待执行停止，如有异常直接抛出并关闭 172 | jsc.awaitTermination(); 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /src/main/java/com/lm/spark/SparkStreamingKafka3.java: -------------------------------------------------------------------------------- 1 | package com.lm.spark; 2 | 3 | import java.io.Serializable; 4 | import java.util.Arrays; 5 | import java.util.Collection; 6 | import java.util.HashMap; 7 | import java.util.List; 8 | import java.util.Map; 9 | 10 | import org.apache.hadoop.hbase.client.Put; 11 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 12 | import org.apache.kafka.clients.consumer.ConsumerRecord; 13 | import org.apache.kafka.common.serialization.StringDeserializer; 14 | import org.apache.spark.SparkConf; 15 | import org.apache.spark.api.java.JavaPairRDD; 16 | import org.apache.spark.api.java.JavaRDD; 17 | import org.apache.spark.api.java.function.Function2; 18 | import org.apache.spark.api.java.function.PairFunction; 19 | import org.apache.spark.api.java.function.VoidFunction2; 20 | import org.apache.spark.streaming.Durations; 21 | import org.apache.spark.streaming.Time; 22 | import org.apache.spark.streaming.api.java.JavaInputDStream; 23 | import org.apache.spark.streaming.api.java.JavaPairDStream; 24 | import org.apache.spark.streaming.api.java.JavaStreamingContext; 25 | import org.apache.spark.streaming.kafka010.ConsumerStrategies; 26 | import org.apache.spark.streaming.kafka010.KafkaUtils; 27 | import org.apache.spark.streaming.kafka010.LocationStrategies; 28 | import org.slf4j.Logger; 29 | import org.slf4j.LoggerFactory; 30 | import org.springframework.beans.factory.annotation.Value; 31 | import org.springframework.stereotype.Component; 32 | 33 | import scala.Tuple2; 34 | 35 | @Component 36 | public class SparkStreamingKafka3 implements Serializable { 37 | 38 | /** 39 | * 40 | */ 41 | private static final long serialVersionUID = 1L; 42 | 43 | public static Logger LOGGER = LoggerFactory.getLogger(SparkStreamingKafka3.class); 44 | 45 | @Value("${spark.appname}") 46 | private String appName; 47 | @Value("${spark.master}") 48 | private String master; 49 | @Value("${spark.seconds}") 50 | private long second; 51 | @Value("${kafka.metadata.broker.list}") 52 | private String metadataBrokerList; 53 | @Value("${kafka.auto.offset.reset}") 54 | private String autoOffsetReset; 55 | @Value("${kafka.topics}") 56 | private String kafkaTopics; 57 | @Value("${kafka.group.id}") 58 | private String kafkaGroupId; 59 | 60 | public void processSparkStreaming() throws InterruptedException { 61 | // 1.配置sparkconf,必须要配置master 62 | SparkConf conf = new SparkConf().setAppName(appName).setMaster(master); 63 | conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); 64 | conf.set("spark.kryo.registrator", "com.lm.kryo.MyRegistrator"); 65 | 66 | // 2.根据sparkconf 创建JavaStreamingContext 67 | JavaStreamingContext jsc = new JavaStreamingContext(conf, Durations.seconds(second)); 68 | 69 | // 3.配置kafka 70 | Map kafkaParams = new HashMap<>(); 71 | kafkaParams.put("bootstrap.servers", metadataBrokerList); 72 | kafkaParams.put("key.deserializer", StringDeserializer.class); 73 | kafkaParams.put("value.deserializer", StringDeserializer.class); 74 | kafkaParams.put("group.id", kafkaGroupId); 75 | kafkaParams.put("auto.offset.reset", autoOffsetReset); 76 | kafkaParams.put("enable.auto.commit", false); 77 | 78 | // 4.kafka主题 79 | Collection topics = Arrays.asList(kafkaTopics.split(",")); 80 | 81 | // 5.创建SparkStreaming输入数据来源input Stream 82 | final JavaInputDStream> stream = 83 | KafkaUtils.createDirectStream(jsc, LocationStrategies.PreferConsistent(), 84 | ConsumerStrategies. Subscribe(topics, kafkaParams)); 85 | 86 | 87 | 88 | JavaPairDStream ogg = stream.mapToPair(new PairFunction, ImmutableBytesWritable, Put>() { 89 | 90 | /** 91 | * 92 | */ 93 | private static final long serialVersionUID = 1L; 94 | 95 | @Override 96 | public Tuple2 call(ConsumerRecord t) throws Exception { 97 | return null; 98 | } 99 | 100 | }); 101 | 102 | 103 | 104 | // JavaPairRDD ogg ; 105 | 106 | 107 | // 6.spark rdd转化和行动处理 108 | stream.foreachRDD(new VoidFunction2>, Time>() { 109 | 110 | private static final long serialVersionUID = 1L; 111 | 112 | @Override 113 | public void call(JavaRDD> v1, Time v2) throws Exception { 114 | 115 | List> consumerRecords = v1.collect(); 116 | 117 | } 118 | }); 119 | 120 | // 6. 启动执行 121 | jsc.start(); 122 | // 7. 等待执行停止，如有异常直接抛出并关闭 123 | jsc.awaitTermination(); 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /src/main/java/com/lm/utils/BeanUtil.java: -------------------------------------------------------------------------------- 1 | package com.lm.utils; 2 | 3 | import java.lang.reflect.Field; 4 | import java.lang.reflect.Modifier; 5 | import java.util.HashMap; 6 | import java.util.Map; 7 | 8 | /** 9 | * 描述：实体工具类 author qiaobin 2016/10/11 16:25. 10 | */ 11 | public class BeanUtil { 12 | 13 | /** 14 | * obj转map 15 | * @param obj 16 | * @return 17 | * @throws Exception 18 | */ 19 | public static Map objectToMap(Object obj) throws Exception { 20 | if (obj == null) { 21 | return null; 22 | } 23 | Map map = new HashMap(); 24 | Field[] declaredFields = obj.getClass().getDeclaredFields(); 25 | Field[] superDeclaredFields = obj.getClass().getSuperclass().getDeclaredFields(); 26 | for (Field field : declaredFields) { 27 | field.setAccessible(true); 28 | map.put(field.getName(), field.get(obj)); 29 | } 30 | for (Field field : superDeclaredFields) { 31 | field.setAccessible(true); 32 | map.put(field.getName(), field.get(obj)); 33 | } 34 | return map; 35 | } 36 | 37 | /** 38 | * map转object 39 | * @param map 40 | * @param beanClass 41 | * @return 42 | * @throws Exception 43 | */ 44 | public static Object mapToObject(Map map, Class beanClass) throws Exception { 45 | if (map == null) 46 | return null; 47 | Object obj = beanClass.newInstance(); 48 | Field[] fields = obj.getClass().getDeclaredFields(); 49 | for (Field field : fields) { 50 | int mod = field.getModifiers(); 51 | if (Modifier.isStatic(mod) || Modifier.isFinal(mod)) { 52 | continue; 53 | } 54 | field.setAccessible(true); 55 | field.set(obj, map.get(field.getName())); 56 | } 57 | Field[] superFields = obj.getClass().getSuperclass().getDeclaredFields(); 58 | for (Field field : superFields) { 59 | int mod = field.getModifiers(); 60 | if (Modifier.isStatic(mod) || Modifier.isFinal(mod)) { 61 | continue; 62 | } 63 | field.setAccessible(true); 64 | field.set(obj, map.get(field.getName())); 65 | } 66 | return obj; 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/main/java/com/lm/utils/ConstUtil.java: -------------------------------------------------------------------------------- 1 | package com.lm.utils; 2 | 3 | import java.util.HashMap; 4 | 5 | /** 6 | * 常量工具 7 | */ 8 | public class ConstUtil { 9 | public final static String KAFKA_SPLIT = "\\â«"; 10 | 11 | public final static String HBASE_FAMILY = "cf1"; 12 | 13 | public final static String STRING_NULL = "NULL"; 14 | 15 | public final static HashMap TABLE_ROWKEY = new HashMap<>(); 16 | 17 | static{ 18 | TABLE_ROWKEY.put("dfs.order", "order_id"); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/lm/utils/SpringUtils.java: -------------------------------------------------------------------------------- 1 | package com.lm.utils; 2 | 3 | import org.springframework.context.ApplicationContext; 4 | import org.springframework.context.support.ClassPathXmlApplicationContext; 5 | 6 | final public class SpringUtils { 7 | 8 | private static ApplicationContext ac = null; 9 | 10 | private SpringUtils() { 11 | 12 | } 13 | 14 | static { 15 | ac = new ClassPathXmlApplicationContext("applicationContext.xml"); 16 | } 17 | 18 | public static ApplicationContext getApplicationContext() { 19 | // 获得返回的容器对象 20 | return ac; 21 | } 22 | 23 | } -------------------------------------------------------------------------------- /src/main/resources/applicationContext.xml: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /src/main/resources/config.properties: -------------------------------------------------------------------------------- 1 | hbase.zk.host=10.32.19.41 2 | hbase.zk.port=2181 3 | 4 | zk.checkpoint.tmpdir=D:/tmp/oggCheckPoint/ 5 | 6 | kafka.metadata.broker.list=10.32.19.41:9092,10.32.19.42:9092,10.32.19.43:9092 7 | kafka.auto.offset.reset=earliest 8 | #latest,earliest 9 | kafka.topics=pms 10 | kafka.group.id=sparkstreaming-kafka10 11 | 12 | spark.master=local[2] 13 | spark.appname=SparkStreamingKafka 14 | spark.seconds=5 15 | -------------------------------------------------------------------------------- /src/main/resources/hbase-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 23 | 24 | 25 | 26 | 27 | hbase.rootdir 28 | 29 | hdfs://hd054:9000/hbase 30 | 31 | 32 | 33 | 34 | 35 | hbase.cluster.distributed 36 | 37 | true 38 | 39 | 40 | 41 | 42 | 43 | hbase.zookeeper.quorum 44 | 45 | 10.32.19.41:2181,10.32.19.42:2181,10.32.19.43:2181 46 | 47 | 48 | 49 | 50 | hbase.zookeeper.property.clientPort 51 | 2181 52 | 53 | 54 | 55 | 56 | 57 | dfs.replication 58 | 59 | 1 60 | 61 | 62 | 63 | 64 | hbase.master.port 65 | 16000 66 | 67 | 68 | 69 | hbase.master.info.port 70 | 16010 71 | 72 | 73 | 74 | hbase.tmp.dir 75 | /home/winit/hbase-1.3.1/data/tmp 76 | 77 | 78 | 79 | 80 | hbase.zookeeper.property.dataDir 81 | /home/winit/zookeeper-3.4.8/data 82 | 83 | 84 | 85 | 111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # Set everything to be logged to the console 18 | log4j.rootCategory=info, console 19 | log4j.appender.console=org.apache.log4j.ConsoleAppender 20 | log4j.appender.console.target=System.err 21 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 22 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 23 | 24 | # Settings to quiet third party logs that are too verbose 25 | log4j.logger.org.spark-project.jetty=WARN 26 | log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR 27 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 28 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 29 | log4j.logger.org.apache.parquet=ERROR 30 | log4j.logger.parquet=ERROR 31 | 32 | # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support 33 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL 34 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR -------------------------------------------------------------------------------- /src/test/java/com/lm/spring_sparkstreaming_kafka10/AppTest.java: -------------------------------------------------------------------------------- 1 | package com.lm.spring_sparkstreaming_kafka10; 2 | 3 | import junit.framework.Test; 4 | import junit.framework.TestCase; 5 | import junit.framework.TestSuite; 6 | 7 | /** 8 | * Unit test for simple App. 9 | */ 10 | public class AppTest 11 | extends TestCase 12 | { 13 | /** 14 | * Create the test case 15 | * 16 | * @param testName name of the test case 17 | */ 18 | public AppTest( String testName ) 19 | { 20 | super( testName ); 21 | } 22 | 23 | /** 24 | * @return the suite of tests being tested 25 | */ 26 | public static Test suite() 27 | { 28 | return new TestSuite( AppTest.class ); 29 | } 30 | 31 | /** 32 | * Rigourous Test :-) 33 | */ 34 | public void testApp() 35 | { 36 | assertTrue( true ); 37 | } 38 | } 39 | --------------------------------------------------------------------------------