├── .gitignore ├── README.md ├── pom.xml └── src └── main └── scala ├── META-INF └── MANIFEST.MF ├── info └── xiaohei │ └── spark │ ├── connector │ ├── HBaseEntry.scala │ ├── RelationalDbEntry.scala │ ├── hbase │ │ ├── HBaseCommonUtils.scala │ │ ├── HBaseConf.scala │ │ ├── builder │ │ │ ├── reader │ │ │ │ ├── HBaseContext.scala │ │ │ │ ├── HBaseReaderBuilder.scala │ │ │ │ ├── HBaseSaltRDD.scala │ │ │ │ └── HBaseSimpleRDD.scala │ │ │ └── writer │ │ │ │ ├── CollectionWriterBuilder.scala │ │ │ │ └── HBaseWriterBuilder.scala │ │ ├── package.scala │ │ ├── salt │ │ │ ├── SaltProducer.scala │ │ │ ├── SaltProducerConversions.scala │ │ │ └── SaltProducerFactory.scala │ │ └── transformer │ │ │ ├── DataTransformer.scala │ │ │ ├── reader │ │ │ ├── DataReader.scala │ │ │ └── DataReaderConversions.scala │ │ │ └── writer │ │ │ ├── DataWriter.scala │ │ │ └── DataWriterConversions.scala │ └── mysql │ │ ├── MysqlConf.scala │ │ ├── builder │ │ ├── reader │ │ │ ├── MysqlContext.scala │ │ │ └── MysqlReaderBuilder.scala │ │ └── writer │ │ │ └── MysqlWriterBuilder.scala │ │ ├── package.scala │ │ └── transformer │ │ ├── executor │ │ ├── DataExecutor.scala │ │ └── DataExecutorConversions.scala │ │ └── mapper │ │ ├── DataMapper.scala │ │ └── DataMapperConversions.scala │ └── test │ └── Test.scala └── org └── apache └── spark └── rdd ├── HBaseKerberosUtil.scala └── HBaseScanRDD.scala /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | # idea 3 | target/ 4 | deploy/ 5 | target/surefire-reports/ 6 | .idea/ 7 | *.iml 8 | .DS_Store 9 | 10 | # Mobile Tools for Java (J2ME) 11 | .mtj.tmp/ 12 | 13 | # Package Files # 14 | *.jar 15 | *.war 16 | *.ear 17 | 18 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 19 | hs_err_pid* 20 | 21 | # current project 22 | .gitignore 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Spark Database Connector 2 | 3 | ## New Feature 4 | 5 | - List写入HBase支持Kerberos认证 6 | - 升级HBase Client API为1.2.0版本 7 | 8 | 隐藏处理各种数据库的连接细节,使用Scala API在Spark中简易地处理数据库连接的读写操作。 9 | 10 | 相关测试环境信息: 11 | 12 | - Scala 2.11.8/2.10.5 13 | - Spark 1.6.0 14 | - HBase 0.98.4 15 | - Jdbc Driver 5.1.35 16 | 17 | 目前支持的有: 18 | 19 | - HBase 20 | - MySQL 21 | 22 | 添加Maven引用: 23 | 24 | ```xml 25 | 26 | info.xiaohei.www 27 | spark-database-connector_2.11 28 | 1.0.0 29 | 30 | ``` 31 | 32 | Scala 2.10版本使用: 33 | 34 | ```xml 35 | 36 | info.xiaohei.www 37 | spark-database-connector_2.10 38 | 1.0.0 39 | 40 | ``` 41 | 42 | ## HBase 43 | 44 | ### 设置HBase host 45 | 46 | 通过以下三种任意方式设置HBase host地址 47 | 48 | **1、在spark-submit中设置命令:** 49 | 50 | ```shell 51 | spark-submit --conf spark.hbase.host=your-hbase-host 52 | ``` 53 | 54 | **2、在Scala代码中设置:** 55 | 56 | ```scala 57 | val sparkConf = new SparkConf() 58 | sparkConf.set("spark.hbase.host", "your-hbase-host") 59 | val sc = new SparkContext(sparkConf) 60 | ``` 61 | 62 | **3、在JVM参数中设置:** 63 | 64 | ```shell 65 | java -Dspark.hbase.host=your-hbase-host -jar .... 66 | ``` 67 | 68 | **设置hbase-site.xml文件读取路径(可选)** 69 | 70 | 如果有读取hbase-site.xml文件的需求时,可以通过设置下面的选项进行指定: 71 | 72 | ```shell 73 | spark.hbase.config=your-hbase-config-path 74 | ``` 75 | 76 | 设置该选项的方式同上 77 | 注意:需要将hbase-site.xml文件添加到当前项目可识别的resource路径中,否则将无法读取,使用默认配置 78 | 79 | ### 向HBase写入数据 80 | 81 | **导入隐式转换:** 82 | 83 | ```scala 84 | import info.xiaohei.spark.connector.hbase._ 85 | ``` 86 | 87 | #### Spark RDD写入HBase 88 | 89 | 任何Spark RDD对象都能直接操作写入HBase,例如: 90 | 91 | ```scala 92 | val rdd = sc.parallelize(1 to 100) 93 | .map(i => (s"rowkey-${i.toString}", s"column1-${i.toString}", "column2")) 94 | ``` 95 | 96 | 这个RDD包含了100个三元组类型的数据,写入HBase时,第一个元素为rowkey,剩下的元素依次为各个列的值: 97 | 98 | ```scala 99 | rdd.toHBase("mytable") 100 | .insert("col1", "col2") 101 | .inColumnFamily("columnFamily") 102 | .save() 103 | ``` 104 | 105 | (1)使用RDD的toHBase函数传入要写入的表名 106 | (2)insert函数传入要插入的各个列名 107 | (3)inColumnFamily函数传入这些列所在的列族名 108 | (4)最后save函数将该RDD保存在HBase中 109 | 110 | 如果col2和col1的列族不一样,可以在insert传入列名时单独指定: 111 | 112 | ```scala 113 | rdd.toHBase("mytable") 114 | .insert("col1", "otherColumnFamily:col2") 115 | .inColumnFamily("defaultColumnFamily") 116 | .save() 117 | ``` 118 | 119 | 列族名和列名之间要用冒号(:)隔开,其他列需要指定列名时使用的方式一致 120 | 121 | #### Scala集合/序列写入HBase 122 | 123 | ```scala 124 | val dataList = Seq[(String, String)]( 125 | ("00001475304346643896037", "kgJkm0euSbe"), 126 | ("00001475376619355219953", "kiaR40qzI8o"), 127 | ("00001475458728618943637", "kgCoW0hgzXO"), 128 | ("00001475838363931738019", "kqiHu0WNJC0") 129 | 130 | ) 131 | 132 | //创建隐式变量 133 | implicit val hbaseConf = HBaseConf.createConf("hbase-host") 134 | //如果实在spark程序操作可以通过以下的方式 135 | implicit val hbaseConf = HBaseConf.createFromSpark(sc) 136 | 137 | dataList.toHBase("mytable") 138 | .insert("col1", "col2") 139 | .inColumnFamily("columnFamily") 140 | .save() 141 | ``` 142 | 143 | 使用方式和RDD写入HBase的操作类似,**注意,隐式变量不能在spark的foreachPartition等算子中定义** 144 | 145 | 以上的方式将使用HTable的put list批量将集合中的数据一次全部put到HBase中,如果写入HBase时想使用缓存区的方式,需要另外添加几个参数: 146 | 147 | ```scala 148 | dataList.toHBase("mytable" 149 | //该参数指定写入时的autoFlush为false 150 | , Some(false, false) 151 | //该参数指定写入缓冲区的大小 152 | , Some(5 * 1024 * 1024)) 153 | .insert("col1", "col2") 154 | .inColumnFamily("columnFamily") 155 | .save() 156 | ``` 157 | 158 | 使用该方式时,集合中的每个数据都会被put一次,但是关闭了自动刷写,所以只有当缓冲区满了之后才会批量向HBase写入 159 | 160 | #### 写入时为Rowkey添加salt前缀 161 | 162 | ```scala 163 | rdd.toHBase("mytable") 164 | .insert("col1", "otherColumnFamily:col2") 165 | .inColumnFamily("defaultColumnFamily") 166 | //添加salt 167 | .withSalt(saltArray) 168 | .save() 169 | ``` 170 | 171 | saltArray是一个字符串数组,简单的例如0-9的字符串表示,由使用者自己定义 172 | 173 | 使用withSalt函数之后,在写入HBase时会为rowkey添加一个saltArray中的随机串,**注意:为了更好的支持HBase部分键扫描(rowkey左对齐),数组中的所有元素长度都应该相等** 174 | 175 | 取随机串的方式有两种: 176 | * 1.计算当前的rowkey的hashCode的16进制表示并对saltArray的长度取余数,得到saltArray中的一个随机串作为salt前缀添加到rowkey 177 | * 2.使用随机数生成器获得不超过saltArray长度的数字作为下标取数组中的值 178 | 179 | 当前使用的是第一种方式 180 | 181 | ### 读取HBase数据 182 | 183 | **导入隐式转换:** 184 | 185 | ```scala 186 | import info.xiaohei.spark.connector.hbase._ 187 | ``` 188 | 189 | 读取HBase的数据操作需要通过sc来进行: 190 | 191 | ```scala 192 | val hbaseRdd = sc.fromHBase[(String, String, String)]("mytable") 193 | .select("col1", "col2") 194 | .inColumnFamily("columnFamily") 195 | .withStartRow("startRow") 196 | .withEndRow("endRow") 197 | //当rowkey中有随机的salt前缀时,将salt数组传入即可自动解析 198 | //得到的rowkey将会是原始的,不带salt前缀的 199 | .withSalt(saltArray) 200 | ``` 201 | 202 | (1)使用sc的fromHBase函数传入要读取数据的表名,该函数需要指定读取数据的类型信息 203 | (2)select函数传入要读取的各个列名 204 | (3)inColumnFamily函数传入这些列所在的列族名 205 | (4)withStartRow和withEndRow将设置rowkey的扫描范围,可选操作 206 | (5)之后就可以在hbaseRdd上执行Spark RDD的各种算子操作 207 | 208 | 上面的例子中,fromHBase的泛型类型为三元组,但是select中只读取了两列值,因此,该三元组中第一个元素将是rowkey的值,其他元素按照列的顺序依次类推 209 | 210 | 当你不需要读取rowkey的值时,只需要将fromHBase的泛型类型改为二元组 211 | 212 | 即读取的列数为n,泛型类型为n元组时,列名和元组中的各个元素相对应 213 | 读取的列数为n,泛型类型为n+1元组时,元组的第一个元素为rowkey 214 | 215 | 当各个列位于不同列族时,设置列族的方式同写入HBase一致 216 | 217 | ### SQL On HBase 218 | 219 | 借助SQLContext的DataFrame接口,在组件中可以轻易实现SQL On HBase的功能。 220 | 221 | 上例中的hbaseRdd是从HBase中读取出来的数据,在此RDD的基础上进行转换操作: 222 | 223 | ```scala 224 | //创建org.apache.spark.sql.Row类型的RDD 225 | val rowRdd = hbaseRdd.map(r => Row(r._1, r._2, r._3)) 226 | val sqlContext = new SQLContext(sc) 227 | val df = sqlContext.createDataFrame( 228 | rowRdd, 229 | StructType(Array(StructField("col1", StringType), StructField("col2", StringType), StructField("col3", StringType))) 230 | ) 231 | df.show() 232 | 233 | df.registerTempTable("mytable") 234 | sqlContext.sql("select col1 from mytable").show() 235 | ``` 236 | 237 | ### 使用case class查询/读取HBase的数据 238 | 239 | 使用内置的隐式转换可以处理基本数据类型和元组数据,当有使用case class的需求时,需要额外做一些准备工作 240 | 241 | 定义如下的case class: 242 | 243 | ```scala 244 | case class MyClass(name: String, age: Int) 245 | ``` 246 | 247 | 如果想达到以下的效果: 248 | 249 | ```scala 250 | val classRdd = sc.fromHBase[MyClass]("tableName") 251 | .select("name","age") 252 | .inColumnFamily("info") 253 | 254 | classRdd.map{ 255 | c => 256 | (c.name,c.age) 257 | } 258 | ``` 259 | 260 | 或者以下的效果: 261 | 262 | ```scala 263 | //classRdd的类型为RDD[MyClass] 264 | classRdd.toHBase("tableName") 265 | .insert("name","age") 266 | .inColumnFamily("info") 267 | .save() 268 | ``` 269 | 270 | 需要另外实现能够解析自定义case class的隐式方法: 271 | 272 | ```scala 273 | implicit def myReaderConversion: DataReader[MyClass] = new CustomDataReader[(String, Int), MyClass] { 274 | override def convert(data: (String, Int)): MyClass = MyClass(data._1, data._2) 275 | } 276 | 277 | implicit def myWriterConversion: DataWriter[MyClass] = new CustomDataWriter[MyClass, (String, Int)] { 278 | override def convert(data: MyClass): (String, Int) = (data.name, data.age) 279 | } 280 | ``` 281 | 282 | 该隐式方法返回一个DataReader/DataWriter 重写CustomDataReader/CustomDataWriter中的convert方法 283 | 将case class转换为一个元组或者将元组转化为case class即可 284 | 285 | ## 带有Kerberos认证的HBase 286 | 287 | 除了上述过程中写HBase需要的配置外,还需要指定以下三个配置: 288 | 289 | - spark.hbase.krb.principal:认证的principal用户名 290 | - spark.hbase.krb.keytab:keytab文件路径(各个节点都存在且路径保持一致) 291 | - spark.hbase.config:hbase-site.xml文件路径 292 | 293 | 写入HBase时将会使用提供给的krb信息进行认证 294 | 295 | 当前仅支持无缝读取启用了Kerberos认证的HBase 296 | 写入时有一定限制,如要使用RDD的foreachPartition入库: 297 | 298 | ```scala 299 | rdd.foreachPartition{ 300 | data => 301 | data.toList.toHBase("table").insert("columns")//... 302 | } 303 | ``` 304 | 305 | **注意,foreachPartition中的toList操作将会把分区中的所有数据加载到内存中,如果数据量过大可能会造成OOM,增加Executor的内存即可** 306 | 307 | TODO:RDD的读写接口目前还未实现Kerberos认证 308 | 309 | ## MySQL 310 | 311 | 除了可以将RDD/集合写入HBase之外,还可以在普通的程序中进行MySQL的相关操作 312 | 313 | ### 在conf中设置相关信息 314 | 315 | **1、Spark程序中操作** 316 | 317 | 在SparkConf中设置以下的信息: 318 | 319 | ```scala 320 | sparkConf 321 | .set("spark.mysql.host", "your-host") 322 | .set("spark.mysql.username", "your-username") 323 | .set("spark.mysql.password", "your-passwd") 324 | .set("spark.mysql.port", "db-port") 325 | .set("spark.mysql.db", "database-name") 326 | 327 | //创建MySqlConf的隐式变量 328 | implicit val mysqlConf = MysqlConf.createFromSpark(sc) 329 | ``` 330 | 331 | 关于这个隐式变量的说明:在RDD的foreachPartition或者mapPartitions等操作时,因为涉及到序列化的问题,默认的对MySqlConf的隐式转化操作会出现异常问题,所以需要显示的声明一下这个变量,其他不涉及网络序列化传输的操作可以省略这步 332 | 333 | HBase小节中的设置属性的方法在这里也适用 334 | 335 | **2、普通程序中操作** 336 | 337 | 创建MysqlConf,并设置相关属性: 338 | 339 | ```scala 340 | //创建MySqlConf的隐式变量 341 | implicit val mysqlConf = MysqlConf.createConf( 342 | "your-host", 343 | "username", 344 | "password", 345 | "port", 346 | "db-name" 347 | ) 348 | 349 | ``` 350 | 351 | 在普通程序中操作时一定要显示声明MysqlConf这个隐式变量 352 | 353 | ### 写入MySQL 354 | 355 | 导入隐式转换: 356 | 357 | ```scala 358 | import info.xiaohei.spark.connector.mysql._ 359 | ``` 360 | 361 | 之后任何Iterable类型的数据都可以直接写入MySQL中: 362 | 363 | ```scala 364 | list.toMysql("table-name") 365 | //插入的列名 366 | .insert("columns") 367 | //where条件,如age=1 368 | .where("where-conditions") 369 | .save() 370 | ``` 371 | 372 | 373 | ### 在Spark程序中从MySQL读取数据 374 | 375 | ```scala 376 | val res = sc.fromMysql[(Int,String,Int)]("table-name") 377 | .select("id","name","age") 378 | .where("where-conditions") 379 | .get 380 | ``` 381 | 382 | ### 在普通程序中从MySQL读取数据 383 | 384 | ```scala 385 | //普通程序读取关系型数据库入口 386 | val dbEntry = new RelationalDbEntry 387 | 388 | val res = dbEntry.fromMysql[(Int,String,Int)]("table-name") 389 | .select("id","name","age") 390 | .where("where-conditions") 391 | .get 392 | ``` 393 | 394 | 创建数据库入口之后的操作和spark中的流程一致 395 | 396 | ### case class解析 397 | 398 | 如果需要使用自定义的case class解析/写入MySQL,例如: 399 | 400 | ```scala 401 | case class Model(id: Int, name: String, age: Int) 402 | ``` 403 | 404 | 基本流程和hbase小节中差不多,定义隐式转换: 405 | 406 | ```scala 407 | implicit def myExecutorConversion: DataExecutor[Model] = new CustomDataExecutor[Model, (Int, String, Int)]() { 408 | override def convert(data: Model): (Int, String, Int) = (data.id, data.name, data.age) 409 | } 410 | 411 | implicit def myMapperConversion: DataMapper[Model] = new CustomDataMapper[(Int, String, Int), Model]() { 412 | override def convert(data: (Int, String, Int)): Model = Model(data._1, data._2, data._3) 413 | } 414 | ``` 415 | 416 | 之后可以直接使用: 417 | 418 | ```scala 419 | val entry = new RelationalDbEntry 420 | val res = entry.fromMysql[Model]("test") 421 | .select("id", "name", "age") 422 | .get 423 | res.foreach(x => println(s"id:${x.id},name:${x.name},age:${x.age}")) 424 | ``` 425 | 426 | 427 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | info.xiaohei.www 8 | 9 | spark-database-connector_2.10 10 | 1.0.0 11 | 12 | 13 | UTF-8 14 | 2.10.5 15 | 2.10 16 | 17 | 18 | 1.6.0 19 | 20 | 1.2.0 21 | 5.1.35 22 | 23 | 24 | 25 | 26 | alimaven 27 | aliyun maven 28 | http://maven.aliyun.com/nexus/content/groups/public/ 29 | 30 | 31 | jcenter 32 | jcenter Repository 33 | http://jcenter.bintray.com/ 34 | 35 | false 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | org.apache.spark 44 | spark-core_${scala.binary.version} 45 | ${spark.version} 46 | provided 47 | 48 | 49 | org.apache.spark 50 | spark-sql_${scala.binary.version} 51 | ${spark.version} 52 | 53 | 54 | org.apache.spark 55 | spark-streaming_${scala.binary.version} 56 | ${spark.version} 57 | 58 | 59 | org.apache.spark 60 | spark-mllib_${scala.binary.version} 61 | ${spark.version} 62 | 63 | 64 | 65 | 66 | org.apache.spark 67 | spark-test-tags_${scala.binary.version} 68 | ${spark.version} 69 | provided 70 | 71 | 72 | 73 | org.apache.hbase 74 | hbase-common 75 | ${hbase.version} 76 | provided 77 | 78 | 79 | 80 | org.apache.hbase 81 | hbase-client 82 | ${hbase.version} 83 | provided 84 | 85 | 86 | org.apache.hbase 87 | hbase-server 88 | ${hbase.version} 89 | provided 90 | 91 | 92 | org.apache.hbase 93 | hbase-protocol 94 | ${hbase.version} 95 | provided 96 | 97 | 98 | 99 | 100 | junit 101 | junit 102 | 4.11 103 | 104 | 105 | mysql 106 | mysql-connector-java 107 | ${mysql.version} 108 | 109 | 110 | 111 | 112 | 113 | 114 | org.apache.maven.plugins 115 | maven-gpg-plugin 116 | 1.5 117 | 118 | 119 | sign-artifacts 120 | verify 121 | 122 | sign 123 | 124 | 125 | 126 | 127 | 128 | 129 | jar 130 | 131 | org.sonatype.oss 132 | oss-parent 133 | 7 134 | 135 | 136 | 137 | 138 | The Apache Software License, Version 2.0 139 | http://www.apache.org/licenses/LICENSE-2.0.txt 140 | repo 141 | 142 | 143 | 144 | https://github.com/chubbyjiang/Spark_DB_Connector 145 | git@github.com:chubbyjiang/Spark_DB_Connector.git 146 | https://www.xiaohei.info 147 | 148 | 149 | 150 | xiaohei 151 | xiaohei.info@gmail.com 152 | https://www.xiaohei.info 153 | 154 | 155 | -------------------------------------------------------------------------------- /src/main/scala/META-INF/MANIFEST.MF: -------------------------------------------------------------------------------- 1 | Manifest-Version: 1.0 2 | Main-Class: info.xiaohei.spark.connector.hbase.Test 3 | 4 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/HBaseEntry.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector 2 | 3 | import info.xiaohei.spark.connector.hbase.salt.SaltProducerFactory 4 | import info.xiaohei.spark.connector.hbase.transformer.writer.SingleColumnDataWriter 5 | import org.apache.hadoop.hbase.{CellUtil, HBaseConfiguration, TableName} 6 | import org.apache.hadoop.hbase.client.{ConnectionFactory, Get} 7 | import org.apache.hadoop.hbase.util.Bytes 8 | 9 | /** 10 | * Author: xiaohei 11 | * Date: 2017/6/10 12 | * Email: xiaohei.info@gmail.com 13 | * Host: xiaohei.info 14 | */ 15 | class HBaseEntry extends Serializable { 16 | def singleQuery(tableName: String, rowkey: String, salts: Iterable[String], 17 | columnFamily: String, columns: Iterable[String]) = { 18 | val finalRowkey = if (salts.isEmpty) { 19 | rowkey 20 | } else { 21 | val saltProducer = new SaltProducerFactory[String]().getHashProducer(salts) 22 | val writer = new SingleColumnDataWriter[String] { 23 | override def writeSingleColumn(data: String): Option[Array[Byte]] = Some(Bytes.toBytes(data)) 24 | } 25 | val rawRowkey = writer.writeSingleColumn(rowkey).get 26 | saltProducer.salting(rawRowkey) + Bytes.toString(rawRowkey) 27 | } 28 | 29 | val conf = HBaseConfiguration.create() 30 | val connection = ConnectionFactory.createConnection(conf) 31 | val table = connection.getTable(TableName.valueOf(tableName)) 32 | val get = new Get(Bytes.toBytes(finalRowkey)) 33 | for (col <- columns) { 34 | get.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(col)) 35 | } 36 | val result = table.get(get) 37 | 38 | val res = new collection.mutable.ListBuffer[String]() 39 | val cells = result.listCells().iterator() 40 | while (cells.hasNext) { 41 | res.append(Bytes.toString(CellUtil.cloneValue(cells.next()))) 42 | } 43 | res 44 | } 45 | 46 | 47 | } 48 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/RelationalDbEntry.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector 2 | 3 | /** 4 | * Author: xiaohei 5 | * Date: 2017/4/10 6 | * Email: xiaohei.info@gmail.com 7 | * Host: xiaohei.info 8 | */ 9 | class RelationalDbEntry extends Serializable{ 10 | 11 | } 12 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/hbase/HBaseCommonUtils.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector.hbase 2 | 3 | /** 4 | * Author: xiaohei 5 | * Date: 2017/3/21 6 | * Email: xiaohei.info@gmail.com 7 | * Host: www.xiaohei.info 8 | */ 9 | private[hbase] object HBaseCommonUtils { 10 | 11 | def columnsWithFamily(defaultColumnFamily: Option[String], columns: Iterable[String]): Iterable[(String, String)] = { 12 | columns.map { 13 | c => 14 | if (c.contains(":")) { 15 | (c.substring(0, c.indexOf(":")), c.substring(c.indexOf(":") + 1)) 16 | } 17 | else if (defaultColumnFamily.isEmpty) { 18 | throw new IllegalArgumentException("Default column family is mandatory when column names are not fully qualified") 19 | } 20 | else { 21 | (defaultColumnFamily.get, c) 22 | } 23 | } 24 | } 25 | 26 | def getFullColumnNames(defaultColumnFamily: Option[String], columns: Iterable[String]): Iterable[String] = { 27 | columnsWithFamily(defaultColumnFamily, columns).map { 28 | case (f, c) => s"$f:$c" 29 | } 30 | 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/hbase/HBaseConf.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector.hbase 2 | 3 | import org.apache.hadoop.fs.Path 4 | import org.apache.hadoop.hbase.{HBaseConfiguration, HConstants} 5 | import org.apache.spark.SparkConf 6 | 7 | /** 8 | * Author: xiaohei 9 | * Date: 2017/3/21 10 | * Email: xiaohei.info@gmail.com 11 | * Host: www.xiaohei.info 12 | */ 13 | 14 | case class HBaseConf private[hbase](hbaseHost: Option[String] 15 | , hbaseConfig: Option[String] = None 16 | , principal: Option[String] = None 17 | , keytab: Option[String] = None) { 18 | def createHadoopBaseConf() = { 19 | val conf = HBaseConfiguration.create() 20 | 21 | hbaseConfig.foreach { 22 | hbaseConfigValue => 23 | for (localConf <- hbaseConfigValue.split(",")) { 24 | //todo:路径不存在的处理 25 | conf.addResource(new Path(localConf)) 26 | } 27 | } 28 | 29 | //todo:测试两种读法 30 | // val localConfigFile = Option(getClass.getClassLoader.getResource(hbaseConfig)) 31 | // localConfigFile.foreach(c => conf.addResource(c)) 32 | 33 | hbaseHost.foreach { 34 | host => 35 | conf.set(HConstants.ZOOKEEPER_QUORUM, host) 36 | } 37 | if (conf.get(HConstants.ZOOKEEPER_QUORUM).isEmpty) { 38 | conf.set(HConstants.ZOOKEEPER_QUORUM, HBaseConf.defaultHBaseHost) 39 | } 40 | 41 | principal.foreach { 42 | krb => 43 | conf.set("spark.hbase.krb.principal", krb) 44 | } 45 | keytab.foreach { 46 | key => 47 | conf.set("spark.hbase.krb.keytab", key) 48 | } 49 | 50 | conf 51 | } 52 | } 53 | 54 | object HBaseConf { 55 | 56 | val defaultHBaseHost = "localhost" 57 | 58 | def createFromSpark(conf: SparkConf) = { 59 | val hbaseHost = conf.get("spark.hbase.host", null) 60 | val hbaseConfig = conf.get("spark.hbase.config", null) 61 | 62 | val principal = conf.get("spark.hbase.krb.principal", null) 63 | val keytab = conf.get("spark.hbase.krb.keytab", null) 64 | HBaseConf(Option(hbaseHost), Option(hbaseConfig), Option(principal), Option(keytab)) 65 | } 66 | 67 | def createConf(hbaseHost: String) = { 68 | HBaseConf(Option(hbaseHost)) 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/hbase/builder/reader/HBaseContext.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector.hbase.builder.reader 2 | 3 | import org.apache.spark.SparkContext 4 | 5 | import scala.reflect.ClassTag 6 | 7 | /** 8 | * Author: xiaohei 9 | * Date: 2017/3/21 10 | * Email: xiaohei.info@gmail.com 11 | * Host: www.xiaohei.info 12 | */ 13 | private[hbase] class HBaseContext(@transient sc: SparkContext) extends Serializable { 14 | def fromHBase[R: ClassTag](tableName: String): HBaseReaderBuilder[R] = new HBaseReaderBuilder[R](sc, tableName = tableName) 15 | } 16 | 17 | trait HBaseContextConversions extends Serializable { 18 | implicit def toHBaseContext(sc: SparkContext): HBaseContext = new HBaseContext(sc) 19 | } 20 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/hbase/builder/reader/HBaseReaderBuilder.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector.hbase.builder.reader 2 | 3 | import info.xiaohei.spark.connector.hbase.salt.SaltProducerFactory 4 | import info.xiaohei.spark.connector.hbase.transformer.reader.DataReader 5 | import info.xiaohei.spark.connector.hbase.{HBaseCommonUtils, HBaseConf} 6 | import org.apache.hadoop.hbase.client.Result 7 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable 8 | import org.apache.hadoop.hbase.mapreduce.TableInputFormat 9 | import org.apache.hadoop.security.UserGroupInformation 10 | import org.apache.spark.SparkContext 11 | import org.apache.spark.rdd.{HBaseScanRDD, NewHadoopRDD, RDD} 12 | 13 | import scala.reflect.ClassTag 14 | 15 | /** 16 | * Author: xiaohei 17 | * Date: 2017/3/21 18 | * Email: xiaohei.info@gmail.com 19 | * Host: www.xiaohei.info 20 | */ 21 | case class HBaseReaderBuilder[R: ClassTag] private[hbase]( 22 | @transient sc: SparkContext, 23 | private[hbase] val tableName: String, 24 | private[hbase] val defaultColumnFamily: Option[String] = None, 25 | private[hbase] val columns: Iterable[String] = Seq.empty, 26 | private[hbase] val startRow: Option[String] = None, 27 | private[hbase] val stopRow: Option[String] = None, 28 | private[hbase] val salts: Iterable[String] = Seq.empty 29 | ) { 30 | def select(columns: String*): HBaseReaderBuilder[R] = { 31 | require(this.columns.isEmpty, "Columns have already been set") 32 | require(columns.nonEmpty, "You should provide at least one column") 33 | this.copy(columns = columns) 34 | } 35 | 36 | def select(columns: Iterable[String]): HBaseReaderBuilder[R] = { 37 | require(this.columns.isEmpty, "Columns have already been set") 38 | require(columns.nonEmpty, "You should provide at least one column") 39 | this.copy(columns = columns) 40 | } 41 | 42 | def inColumnFamily(columnFamily: String): HBaseReaderBuilder[R] = { 43 | require(this.defaultColumnFamily.isEmpty, "Default column family has already been set") 44 | require(columnFamily.nonEmpty, "Invalid column family provided") 45 | this.copy(defaultColumnFamily = Some(columnFamily)) 46 | } 47 | 48 | def withStartRow(startRow: String): HBaseReaderBuilder[R] = { 49 | require(startRow.nonEmpty, s"Invalid start row '$startRow'") 50 | require(this.startRow.isEmpty, "Start row has already been set") 51 | this.copy(startRow = Some(startRow)) 52 | } 53 | 54 | def withEndRow(endRow: String): HBaseReaderBuilder[R] = { 55 | require(endRow.nonEmpty, s"Invalid stop row '$endRow'") 56 | require(this.stopRow.isEmpty, "Stop row has already been set") 57 | this.copy(stopRow = Some(endRow)) 58 | } 59 | 60 | def withSalt(salts: Iterable[String]) = { 61 | require(salts.size > 1, "Invalid salting. Two or more elements are required") 62 | require(this.salts.isEmpty, "Salting has already been set") 63 | 64 | this.copy(salts = salts) 65 | } 66 | 67 | private[hbase] def withRanges(startRow: Option[String], stopRow: Option[String]) = { 68 | copy(startRow = startRow, stopRow = stopRow) 69 | } 70 | } 71 | 72 | trait HBaseReaderBuilderConversions extends Serializable { 73 | implicit def toHBaseRDD[R: ClassTag](builder: HBaseReaderBuilder[R]) 74 | (implicit reader: DataReader[R], saltProducerFactory: SaltProducerFactory[String]): RDD[R] = { 75 | if (builder.salts.isEmpty) { 76 | toSimpleHBaseRdd(builder) 77 | } else { 78 | val saltLength = saltProducerFactory.getHashProducer(builder.salts).singleSaltength 79 | val sortedSalts = builder.salts.toList.sorted.map(Some(_)) 80 | val ranges = sortedSalts.zip(sortedSalts.drop(1) :+ None) 81 | val rddSeq = ranges.map { 82 | salt => 83 | builder.withRanges( 84 | if (builder.startRow.nonEmpty) Some(salt._1.get + builder.startRow.get) else salt._1, 85 | if (builder.stopRow.nonEmpty) Some(salt._1.get + builder.stopRow.get) else salt._2 86 | ) 87 | }.map { 88 | builder => 89 | toSimpleHBaseRdd(builder, saltLength).asInstanceOf[RDD[R]] 90 | } 91 | val sc = rddSeq.head.sparkContext 92 | new HBaseSaltRDD[R](sc, rddSeq) 93 | } 94 | } 95 | 96 | private def toSimpleHBaseRdd[R: ClassTag](builder: HBaseReaderBuilder[R], saltsLength: Int = 0) 97 | (implicit reader: DataReader[R]): HBaseSimpleRDD[R] = { 98 | val hbaseConfig = HBaseConf.createFromSpark(builder.sc.getConf).createHadoopBaseConf() 99 | hbaseConfig.set(TableInputFormat.INPUT_TABLE, builder.tableName) 100 | require(builder.columns.nonEmpty, "No columns have been defined for the operation") 101 | val columnNames = builder.columns 102 | val fullColumnNames = HBaseCommonUtils.getFullColumnNames(builder.defaultColumnFamily, columnNames) 103 | if (fullColumnNames.nonEmpty) { 104 | hbaseConfig.set(TableInputFormat.SCAN_COLUMNS, fullColumnNames.mkString(" ")) 105 | } 106 | if (builder.startRow.nonEmpty) { 107 | hbaseConfig.set(TableInputFormat.SCAN_ROW_START, builder.startRow.get) 108 | } 109 | if (builder.stopRow.nonEmpty) { 110 | hbaseConfig.set(TableInputFormat.SCAN_ROW_STOP, builder.stopRow.get) 111 | } 112 | 113 | //krb认证 114 | val rdd = if (hbaseConfig.get("spark.hbase.krb.principal") == null || hbaseConfig.get("spark.hbase.krb.keytab") == null) { 115 | //todo:asInstanceOf 116 | builder.sc.newAPIHadoopRDD(hbaseConfig 117 | , classOf[TableInputFormat] 118 | , classOf[ImmutableBytesWritable] 119 | , classOf[Result]) 120 | .asInstanceOf[NewHadoopRDD[ImmutableBytesWritable, Result]] 121 | } else { 122 | val principle = hbaseConfig.get("spark.hbase.krb.principal") 123 | val keytab = hbaseConfig.get("spark.hbase.krb.keytab") 124 | new HBaseScanRDD[ImmutableBytesWritable, Result](principle, keytab 125 | , builder.sc 126 | , classOf[TableInputFormat] 127 | , classOf[ImmutableBytesWritable] 128 | , classOf[Result] 129 | , hbaseConfig) 130 | } 131 | new HBaseSimpleRDD[R](rdd, builder, saltsLength) 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/hbase/builder/reader/HBaseSaltRDD.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector.hbase.builder.reader 2 | 3 | import org.apache.spark.SparkContext 4 | import org.apache.spark.rdd.{RDD, UnionRDD} 5 | 6 | import scala.reflect.ClassTag 7 | 8 | /** 9 | * Author: xiaohei 10 | * Date: 2017/4/25 11 | * Email: xiaohei.info@gmail.com 12 | * Host: xiaohei.info 13 | */ 14 | class HBaseSaltRDD[R: ClassTag](sc: SparkContext, rdds: Seq[RDD[R]]) extends UnionRDD[R](sc, rdds) { 15 | 16 | } 17 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/hbase/builder/reader/HBaseSimpleRDD.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector.hbase.builder.reader 2 | 3 | import info.xiaohei.spark.connector.hbase.HBaseCommonUtils 4 | import info.xiaohei.spark.connector.hbase.transformer.reader.DataReader 5 | import org.apache.hadoop.hbase.CellUtil 6 | import org.apache.hadoop.hbase.client.Result 7 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable 8 | import org.apache.hadoop.hbase.util.Bytes 9 | import org.apache.spark.annotation.DeveloperApi 10 | import org.apache.spark.rdd.{NewHadoopRDD, RDD} 11 | import org.apache.spark.{Partition, TaskContext} 12 | 13 | import scala.reflect.ClassTag 14 | 15 | /** 16 | * Author: xiaohei 17 | * Date: 2017/3/21 18 | * Email: xiaohei.info@gmail.com 19 | * Host: www.xiaohei.info 20 | */ 21 | //todo:SimpleHBaseRdd 22 | class HBaseSimpleRDD[R: ClassTag](hadoopHBaseRDD: RDD[(ImmutableBytesWritable, Result)], 23 | builder: HBaseReaderBuilder[R], saltsLength: Int) 24 | (implicit reader: DataReader[R]) extends RDD[R](hadoopHBaseRDD) { 25 | @DeveloperApi 26 | override def compute(split: Partition, context: TaskContext): Iterator[R] = { 27 | firstParent[(ImmutableBytesWritable, Result)].iterator(split, context) 28 | .map(e => convert(e._1, e._2)) 29 | } 30 | 31 | override protected def getPartitions: Array[Partition] = { 32 | firstParent[(ImmutableBytesWritable, Result)].partitions 33 | } 34 | 35 | private def convert(key: ImmutableBytesWritable, row: Result) = { 36 | //val columnNames = Utils.chosenColumns(builder.columns, reader.columns) 37 | require(builder.columns.nonEmpty, "No columns have been defined for the operation") 38 | val columnNames = builder.columns 39 | val columnsWithFamiy = HBaseCommonUtils.columnsWithFamily(builder.defaultColumnFamily, columnNames) 40 | val columns = columnsWithFamiy 41 | .map(t => (Bytes.toBytes(t._1), Bytes.toBytes(t._2))) 42 | .map { 43 | t => 44 | if (row.containsColumn(t._1, t._2)) { 45 | Some(CellUtil.cloneValue(row.getColumnLatestCell(t._1, t._2))) 46 | } else { 47 | None 48 | } 49 | }.toList 50 | reader.read(Some(key.get.drop(saltsLength)) :: columns) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/hbase/builder/writer/CollectionWriterBuilder.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector.hbase.builder.writer 2 | 3 | import java.security.PrivilegedAction 4 | 5 | import info.xiaohei.spark.connector.hbase.HBaseConf 6 | import info.xiaohei.spark.connector.hbase.salt.{SaltProducer, SaltProducerFactory} 7 | import info.xiaohei.spark.connector.hbase.transformer.writer.DataWriter 8 | import org.apache.hadoop.hbase.TableName 9 | import org.apache.hadoop.hbase.client._ 10 | import org.apache.hadoop.hbase.util.Bytes 11 | import org.apache.hadoop.security.UserGroupInformation 12 | 13 | import scala.collection.JavaConversions._ 14 | 15 | /** 16 | * Author: xiaohei 17 | * Date: 2017/3/21 18 | * Email: xiaohei.info@gmail.com 19 | * Host: www.xiaohei.info 20 | */ 21 | 22 | case class CollectionWriterBuilder[C] private[hbase]( 23 | private[hbase] val hBaseConf: HBaseConf, 24 | private[hbase] val collectionData: Iterable[C], 25 | private[hbase] val tableName: String, 26 | private[hbase] val writeBufferSize: Option[Long], 27 | private[hbase] val asynProcess: Boolean, 28 | private[hbase] val defaultColumnFamily: Option[String] = None, 29 | private[hbase] val columns: Iterable[String] = Seq.empty, 30 | private[hbase] val salts: Iterable[String] = Seq.empty 31 | ) { 32 | def insert(cols: String*) = { 33 | require(this.columns.isEmpty, "Columns haven't been set") 34 | require(cols.nonEmpty, "Columns must by set,at least one") 35 | this.copy(columns = cols) 36 | } 37 | 38 | def insert(cols: Iterable[String]) = { 39 | require(this.columns.isEmpty, "Columns haven't been set") 40 | require(cols.nonEmpty, "Columns must by set,at least one") 41 | this.copy(columns = cols) 42 | } 43 | 44 | def inColumnFamily(family: String) = { 45 | require(this.defaultColumnFamily.isEmpty, "Default column family hasn't been set") 46 | require(family.nonEmpty, "Column family must provided") 47 | this.copy(defaultColumnFamily = Some(family)) 48 | } 49 | 50 | def withSalt(salts: Iterable[String]) = { 51 | require(salts.size > 1, "Invalid salting. Two or more elements are required") 52 | require(this.salts.isEmpty, "Salting has already been set") 53 | 54 | this.copy(salts = salts) 55 | } 56 | } 57 | 58 | private[hbase] class CollectionWriterBuildMaker[C](collectionData: Iterable[C])(implicit hBaseConf: HBaseConf) extends Serializable { 59 | def toHBase(tableName: String 60 | , writeBufferSize: Option[Long] = None 61 | , asynProcess: Boolean = false) 62 | = CollectionWriterBuilder[C](hBaseConf, collectionData, tableName, writeBufferSize, asynProcess) 63 | } 64 | 65 | private[hbase] class CollectionWriter[C](builder: CollectionWriterBuilder[C]) 66 | (implicit writer: DataWriter[C], saltProducerFactory: SaltProducerFactory[String]) extends Serializable { 67 | def save(): Unit = { 68 | //val conf = HBaseConf.createHBaseConf(builder.hbaseHost).createHadoopBaseConf() 69 | val conf = builder.hBaseConf.createHadoopBaseConf() 70 | 71 | val connection = if (conf.get("spark.hbase.krb.principal") == null || conf.get("spark.hbase.krb.keytab") == null) { 72 | ConnectionFactory.createConnection(conf) 73 | } 74 | else { 75 | UserGroupInformation.setConfiguration(conf) 76 | val ugi: UserGroupInformation = UserGroupInformation 77 | .loginUserFromKeytabAndReturnUGI(conf.get("spark.hbase.krb.principal"), conf.get("spark.hbase.krb.keytab")) 78 | UserGroupInformation.setLoginUser(ugi) 79 | ugi.doAs(new PrivilegedAction[Connection] { 80 | def run: Connection = { 81 | ConnectionFactory.createConnection(conf) 82 | } 83 | }) 84 | } 85 | 86 | 87 | val tableName = TableName.valueOf(builder.tableName) 88 | 89 | val saltProducer: Option[SaltProducer[String]] = if (builder.salts.isEmpty) None else Some(saltProducerFactory.getHashProducer(builder.salts)) 90 | 91 | def coverData(data: C): Put = { 92 | val convertedData: Iterable[Option[Array[Byte]]] = writer.write(data) 93 | if (convertedData.size < 2) { 94 | throw new IllegalArgumentException("Expected at least two converted values, the first one should be the row key") 95 | } 96 | //val columnsNames = Utils.chosenColumns(builder.columns, writer.columns) 97 | require(builder.columns.nonEmpty, "No columns have been defined for the operation") 98 | val columnNames = builder.columns 99 | val rawRowkey = convertedData.head.get 100 | val columnData = convertedData.drop(1) 101 | 102 | 103 | if (columnData.size != columnNames.size) { 104 | throw new IllegalArgumentException(s"Wrong number of columns. Expected ${columnNames.size} found ${columnData.size}") 105 | } 106 | val rowkey = if (saltProducer.isEmpty) rawRowkey else Bytes.toBytes(saltProducer.get.salting(rawRowkey) + Bytes.toString(rawRowkey)) 107 | val put = new Put(rowkey) 108 | columnNames.zip(columnData).foreach { 109 | case (name, Some(value)) => 110 | val family = if (name.contains(":")) Bytes.toBytes(name.substring(0, name.indexOf(":"))) else Bytes.toBytes(builder.defaultColumnFamily.get) 111 | val column = if (name.contains(":")) Bytes.toBytes(name.substring(name.indexOf(":") + 1)) else Bytes.toBytes(name) 112 | put.addColumn(family, column, value) 113 | case _ => 114 | } 115 | put 116 | } 117 | 118 | if (builder.asynProcess) { 119 | val params = new BufferedMutatorParams(tableName).writeBufferSize(builder.writeBufferSize.get) 120 | val mutator = connection.getBufferedMutator(params) 121 | builder.collectionData.foreach(data => mutator.mutate(coverData(data))) 122 | mutator.close() 123 | } else { 124 | val table = connection.getTable(tableName) 125 | val putList = builder.collectionData.map(coverData).toList 126 | table.put(putList) 127 | table.close() 128 | } 129 | connection.close() 130 | } 131 | 132 | } 133 | 134 | 135 | trait CollectionWriterBuilderConversions extends Serializable { 136 | implicit def collectionToBuildMaker[C](collectionData: Iterable[C])(implicit hBaseConf: HBaseConf): CollectionWriterBuildMaker[C] = new CollectionWriterBuildMaker[C](collectionData) 137 | 138 | implicit def collectionBuilderToWriter[C](builder: CollectionWriterBuilder[C])(implicit writer: DataWriter[C], saltProducerFactory: SaltProducerFactory[String]): CollectionWriter[C] = new CollectionWriter[C](builder) 139 | } 140 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/hbase/builder/writer/HBaseWriterBuilder.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector.hbase.builder.writer 2 | 3 | import info.xiaohei.spark.connector.hbase.HBaseConf 4 | import info.xiaohei.spark.connector.hbase.salt.{SaltProducer, SaltProducerFactory} 5 | import info.xiaohei.spark.connector.hbase.transformer.writer.DataWriter 6 | import org.apache.hadoop.hbase.client.Put 7 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable 8 | import org.apache.hadoop.hbase.mapreduce.TableOutputFormat 9 | import org.apache.hadoop.hbase.util.Bytes 10 | import org.apache.hadoop.mapreduce.Job 11 | import org.apache.spark.rdd.RDD 12 | 13 | /** 14 | * Author: xiaohei 15 | * Date: 2017/3/21 16 | * Email: xiaohei.info@gmail.com 17 | * Host: www.xiaohei.info 18 | */ 19 | case class HBaseWriterBuilder[R] private[hbase]( 20 | private[hbase] val rdd: RDD[R], 21 | private[hbase] val tableName: String, 22 | //以下的参数通过方法动态设置 23 | private[hbase] val defaultColumnFamily: Option[String] = None, 24 | private[hbase] val columns: Iterable[String] = Seq.empty, 25 | private[hbase] val salts: Iterable[String] = Seq.empty 26 | ) 27 | extends Serializable { 28 | 29 | def insert(cols: String*) = { 30 | require(this.columns.isEmpty, "Columns haven't been set") 31 | require(cols.nonEmpty, "Columns must by set,at least one") 32 | this.copy(columns = cols) 33 | } 34 | 35 | def insert(cols: Iterable[String]) = { 36 | require(this.columns.isEmpty, "Columns haven't been set") 37 | require(cols.nonEmpty, "Columns must by set,at least one") 38 | this.copy(columns = cols) 39 | } 40 | 41 | def inColumnFamily(family: String) = { 42 | require(this.defaultColumnFamily.isEmpty, "Default column family hasn't been set") 43 | require(family.nonEmpty, "Column family must provided") 44 | this.copy(defaultColumnFamily = Some(family)) 45 | } 46 | 47 | def withSalt(salts: Iterable[String]) = { 48 | require(salts.size > 1, "Invalid salting. Two or more elements are required") 49 | require(this.salts.isEmpty, "Salting has already been set") 50 | 51 | this.copy(salts = salts) 52 | } 53 | } 54 | 55 | private[hbase] class HBaseWriterBuildMaker[R](rdd: RDD[R]) extends Serializable { 56 | def toHBase(tableName: String) = HBaseWriterBuilder(rdd, tableName) 57 | } 58 | 59 | private[hbase] class HBaseWriter[R](builder: HBaseWriterBuilder[R])(implicit writer: DataWriter[R] 60 | , saltProducerFactory: SaltProducerFactory[String]) extends Serializable { 61 | def save(): Unit = { 62 | val conf = HBaseConf.createFromSpark(builder.rdd.context.getConf).createHadoopBaseConf() 63 | conf.set(TableOutputFormat.OUTPUT_TABLE, builder.tableName) 64 | 65 | val job = Job.getInstance(conf) 66 | job.setOutputFormatClass(classOf[TableOutputFormat[String]]) 67 | 68 | val saltProducer: Option[SaltProducer[String]] = if (builder.salts.isEmpty) None else Some(saltProducerFactory.getHashProducer(builder.salts)) 69 | 70 | val transRdd = builder.rdd.map { 71 | data => 72 | val convertedData: Iterable[Option[Array[Byte]]] = writer.write(data) 73 | if (convertedData.size < 2) { 74 | throw new IllegalArgumentException("Expected at least two converted values, the first one should be the row key") 75 | } 76 | require(builder.columns.nonEmpty, "No columns have been defined for the operation") 77 | val columnNames = builder.columns 78 | val rawRowkey = convertedData.head.get 79 | val columnData = convertedData.drop(1) 80 | 81 | if (columnData.size != columnNames.size) { 82 | throw new IllegalArgumentException(s"Wrong number of columns. Expected ${columnNames.size} found ${columnData.size}") 83 | } 84 | //transform rowkey with salt 85 | val rowkey = if (saltProducer.isEmpty) rawRowkey else Bytes.toBytes(saltProducer.get.salting(rawRowkey) + Bytes.toString(rawRowkey)) 86 | val put = new Put(rowkey) 87 | columnNames.zip(columnData).foreach { 88 | case (name, Some(value)) => 89 | val family = if (name.contains(":")) Bytes.toBytes(name.substring(0, name.indexOf(":"))) else Bytes.toBytes(builder.defaultColumnFamily.get) 90 | val column = if (name.contains(":")) Bytes.toBytes(name.substring(name.indexOf(":") + 1)) else Bytes.toBytes(name) 91 | put.addColumn(family, column, value) 92 | case _ => 93 | } 94 | (new ImmutableBytesWritable, put) 95 | } 96 | transRdd.saveAsNewAPIHadoopDataset(job.getConfiguration) 97 | } 98 | } 99 | 100 | trait HBaseWriterBuilderConversions extends Serializable { 101 | 102 | implicit def rddToHBaseBuildMaker[R](rdd: RDD[R]): HBaseWriterBuildMaker[R] = new HBaseWriterBuildMaker[R](rdd) 103 | 104 | implicit def builderToWriter[R](builder: HBaseWriterBuilder[R])(implicit writer: DataWriter[R], saltProducerFactory: SaltProducerFactory[String]): HBaseWriter[R] = new HBaseWriter[R](builder) 105 | } 106 | 107 | 108 | 109 | 110 | 111 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/hbase/package.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector 2 | 3 | import info.xiaohei.spark.connector.hbase.builder.reader.{HBaseContextConversions, HBaseReaderBuilderConversions} 4 | import info.xiaohei.spark.connector.hbase.builder.writer.{CollectionWriterBuilderConversions, HBaseWriterBuilderConversions} 5 | import info.xiaohei.spark.connector.hbase.salt.SaltProducerConversions 6 | import info.xiaohei.spark.connector.hbase.transformer.reader.DataReaderConversions 7 | import info.xiaohei.spark.connector.hbase.transformer.writer.DataWriterConversions 8 | 9 | 10 | /** 11 | * Author: xiaohei 12 | * Date: 2017/3/21 13 | * Email: xiaohei.info@gmail.com 14 | * Host: www.xiaohei.info 15 | */ 16 | 17 | package object hbase 18 | extends HBaseWriterBuilderConversions 19 | with HBaseReaderBuilderConversions 20 | with CollectionWriterBuilderConversions 21 | with DataWriterConversions 22 | with DataReaderConversions 23 | with HBaseContextConversions 24 | with SaltProducerConversions 25 | 26 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/hbase/salt/SaltProducer.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector.hbase.salt 2 | 3 | import info.xiaohei.spark.connector.hbase.transformer.writer.{DataWriter, SingleColumnDataWriter} 4 | 5 | import scala.reflect.ClassTag 6 | import scala.util.Random 7 | 8 | /** 9 | * Author: xiaohei 10 | * Date: 2017/4/19 11 | * Email: xiaohei.info@gmail.com 12 | * Host: xiaohei.info 13 | */ 14 | trait SaltProducer[T] extends Serializable { 15 | 16 | def salting(rowkey: Array[Byte]): T 17 | 18 | protected def salts: Array[T] 19 | 20 | protected def verify(implicit writer: DataWriter[T]): Unit = { 21 | require(singleSaltength > 0, "salt's length must great than 0") 22 | } 23 | 24 | def singleSaltength(implicit writer: DataWriter[T]): Int = { 25 | require(writer.isInstanceOf[SingleColumnDataWriter[T]], "salt array must be composed with primitive type") 26 | 27 | val singleColumnDataWriter = writer.asInstanceOf[SingleColumnDataWriter[T]] 28 | salts.map(s => singleColumnDataWriter.writeSingleColumn(s)) 29 | .map(b => b.getOrElse(Array[Byte]())) 30 | .map(_.length) 31 | .foldLeft(None.asInstanceOf[Option[Int]])((size1, size2) => { 32 | if (size1.nonEmpty && size1.get != size2) { 33 | throw new IllegalArgumentException(s"salts can not use different lengths with:${size1.get},$size2") 34 | } 35 | Some(size2) 36 | }).get 37 | } 38 | } 39 | 40 | private[salt] class RandomSaltProducer[T: ClassTag](val salts: Array[T])(implicit writer: DataWriter[T]) extends SaltProducer[T]() { 41 | 42 | verify 43 | 44 | override def salting(rowkey: Array[Byte]): T = { 45 | val randomizer = new Random 46 | salts(randomizer.nextInt(salts.length)) 47 | } 48 | } 49 | 50 | private[salt] class HashSaltProducer[T: ClassTag](val salts: Array[T])(implicit writer: DataWriter[T]) extends SaltProducer[T]() { 51 | 52 | verify 53 | 54 | override def salting(rowkey: Array[Byte]): T = { 55 | salts((java.util.Arrays.hashCode(rowkey) & 0x7fffffff) % salts.length) 56 | } 57 | } 58 | 59 | //todo:ClassTag do what 60 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/hbase/salt/SaltProducerConversions.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector.hbase.salt 2 | 3 | import info.xiaohei.spark.connector.hbase.transformer.writer.DataWriter 4 | 5 | import scala.reflect.ClassTag 6 | 7 | /** 8 | * Author: xiaohei 9 | * Date: 2017/4/20 10 | * Email: xiaohei.info@gmail.com 11 | * Host: xiaohei.info 12 | */ 13 | trait SaltProducerConversions extends Serializable { 14 | implicit def getSaltProducerFactory[T: ClassTag](implicit writer: DataWriter[T]): SaltProducerFactory[T] = new SaltProducerFactory[T]() 15 | } 16 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/hbase/salt/SaltProducerFactory.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector.hbase.salt 2 | 3 | import info.xiaohei.spark.connector.hbase.transformer.writer.DataWriter 4 | 5 | import scala.reflect.ClassTag 6 | 7 | /** 8 | * Author: xiaohei 9 | * Date: 2017/4/19 10 | * Email: xiaohei.info@gmail.com 11 | * Host: xiaohei.info 12 | */ 13 | class SaltProducerFactory[T: ClassTag] extends Serializable { 14 | def getHashProducer(saltArray: Iterable[T])(implicit writer: DataWriter[T]): SaltProducer[T] = new HashSaltProducer[T](saltArray.toArray) 15 | 16 | def getRandomProducer(saltArray: Iterable[T])(implicit writer: DataWriter[T]): SaltProducer[T] = new RandomSaltProducer[T](saltArray.toArray) 17 | } 18 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/hbase/transformer/DataTransformer.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector.hbase.transformer 2 | 3 | /** 4 | * Author: xiaohei 5 | * Date: 2017/3/21 6 | * Email: xiaohei.info@gmail.com 7 | * Host: www.xiaohei.info 8 | */ 9 | trait DataTransformer extends Serializable { 10 | type HBaseData = Iterable[Option[Array[Byte]]] 11 | } 12 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/hbase/transformer/reader/DataReader.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector.hbase.transformer.reader 2 | 3 | import info.xiaohei.spark.connector.hbase.transformer.DataTransformer 4 | import org.apache.hadoop.hbase.util.Bytes 5 | 6 | /** 7 | * Author: xiaohei 8 | * Date: 2017/3/21 9 | * Email: xiaohei.info@gmail.com 10 | * Host: www.xiaohei.info 11 | */ 12 | 13 | trait DataReader[T] extends DataTransformer { 14 | def read(data: HBaseData): T 15 | } 16 | 17 | trait SingleColumnDataReader[T] extends DataReader[T] { 18 | 19 | override def read(data: HBaseData): T = 20 | if (data.size == 1) 21 | columnMapWithOption(data.head) 22 | else if (data.size == 2) 23 | columnMapWithOption(data.drop(1).head) 24 | else 25 | throw new IllegalArgumentException(s"Unexpected number of columns: expected 1 or 2, returned ${data.size}") 26 | 27 | 28 | def columnMapWithOption(cols: Option[Array[Byte]]) = 29 | if (cols.nonEmpty) readSingleColumn(cols.get) 30 | else throw new IllegalArgumentException("Null value assigned to concrete class. Use Option[T] instead") 31 | 32 | def readSingleColumn(cols: Array[Byte]): T 33 | } 34 | 35 | trait TupleDataReader[T <: Product] extends DataReader[T] { 36 | 37 | val n: Int 38 | 39 | override def read(data: HBaseData): T = 40 | if (data.size == n) 41 | readTupleColumn(data) 42 | else if (data.size == n + 1) 43 | readTupleColumn(data.drop(1)) 44 | else 45 | throw new IllegalArgumentException(s"Unexpected number of columns: expected $n or ${n - 1}, returned ${data.size}") 46 | 47 | def readTupleColumn(data: HBaseData): T 48 | } 49 | 50 | abstract class CustomDataReader[S, T](implicit reader: DataReader[S]) extends DataReader[T] { 51 | 52 | override def read(data: HBaseData): T = convert(reader.read(data)) 53 | 54 | def convert(data: S): T 55 | } 56 | 57 | // 58 | //trait SingleColumnDataReader[T] extends DataReader[T] { 59 | // 60 | // override def read(data: HBaseData): T = 61 | // if (data.size == 1) 62 | // columnMapWithOption(data.head) 63 | // else if (data.size == 2) 64 | // columnMapWithOption(data.drop(1).head) 65 | // else 66 | // throw new IllegalArgumentException(s"Unexpected number of columns: expected 1 or 2, returned ${data.size}") 67 | // 68 | // def columnMapWithOption(cols: Option[Array[Byte]]): T 69 | //} 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/hbase/transformer/reader/DataReaderConversions.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector.hbase.transformer.reader 2 | 3 | import org.apache.hadoop.hbase.util.Bytes 4 | 5 | /** 6 | * Author: xiaohei 7 | * Date: 2017/3/26 8 | * Email: xiaohei.info@gmail.com 9 | * Host: xiaohei.info 10 | */ 11 | 12 | trait DataReaderConversions extends Serializable { 13 | 14 | // Simple types 15 | 16 | implicit def intReader: DataReader[Int] = new SingleColumnDataReader[Int] { 17 | override def readSingleColumn(cols: Array[Byte]): Int = Bytes.toInt(cols) 18 | } 19 | 20 | implicit def longReader: DataReader[Long] = new SingleColumnDataReader[Long] { 21 | override def readSingleColumn(cols: Array[Byte]): Long = Bytes.toLong(cols) 22 | } 23 | 24 | implicit def shortReader: DataReader[Short] = new SingleColumnDataReader[Short] { 25 | override def readSingleColumn(cols: Array[Byte]): Short = Bytes.toShort(cols) 26 | } 27 | 28 | implicit def doubleReader: DataReader[Double] = new SingleColumnDataReader[Double] { 29 | override def readSingleColumn(cols: Array[Byte]): Double = Bytes.toDouble(cols) 30 | } 31 | 32 | implicit def floatReader: DataReader[Float] = new SingleColumnDataReader[Float] { 33 | override def readSingleColumn(cols: Array[Byte]): Float = Bytes.toFloat(cols) 34 | } 35 | 36 | implicit def booleanReader: DataReader[Boolean] = new SingleColumnDataReader[Boolean] { 37 | override def readSingleColumn(cols: Array[Byte]): Boolean = Bytes.toBoolean(cols) 38 | } 39 | 40 | implicit def bigDecimalReader: DataReader[BigDecimal] = new SingleColumnDataReader[BigDecimal] { 41 | override def readSingleColumn(cols: Array[Byte]): BigDecimal = Bytes.toBigDecimal(cols) 42 | } 43 | 44 | implicit def stringReader: DataReader[String] = new SingleColumnDataReader[String] { 45 | override def readSingleColumn(cols: Array[Byte]): String = Bytes.toString(cols) 46 | } 47 | 48 | // Options 49 | 50 | implicit def optionReader[T](implicit c: DataReader[T]): DataReader[Option[T]] = new DataReader[Option[T]] { 51 | override def read(data: HBaseData): Option[T] = 52 | if (data.size != 1) throw new IllegalArgumentException(s"Unexpected number of columns: expected 1, returned ${data.size}") 53 | else { 54 | if (!classOf[SingleColumnDataReader[T]].isAssignableFrom(c.getClass)) throw new IllegalArgumentException("Option[T] can be used only with primitive values") 55 | if (data.head.nonEmpty) Some(c.read(data)) 56 | else None 57 | } 58 | } 59 | 60 | // Tuples 61 | 62 | implicit def tuple2Reader[T1, T2](implicit m1: DataReader[T1], m2: DataReader[T2]): DataReader[(T1, T2)] = new TupleDataReader[(T1, T2)] { 63 | 64 | val n = 2 65 | 66 | override def readTupleColumn(data: HBaseData) = { 67 | val h1 = data.take(1) 68 | val h2 = data.slice(1, 2) 69 | (m1.read(h1), m2.read(h2)) 70 | } 71 | } 72 | 73 | implicit def tuple3Reader[T1, T2, T3](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3]): DataReader[(T1, T2, T3)] = new TupleDataReader[(T1, T2, T3)] { 74 | 75 | val n = 3 76 | 77 | override def readTupleColumn(data: HBaseData) = { 78 | val h1 = data.take(1) 79 | val h2 = data.slice(1, 2) 80 | val h3 = data.slice(2, 3) 81 | (m1.read(h1), m2.read(h2), m3.read(h3)) 82 | } 83 | } 84 | 85 | implicit def tuple4Reader[T1, T2, T3, T4](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4]): DataReader[(T1, T2, T3, T4)] = new TupleDataReader[(T1, T2, T3, T4)] { 86 | 87 | val n = 4 88 | 89 | override def readTupleColumn(data: HBaseData) = { 90 | val h1 = data.take(1) 91 | val h2 = data.slice(1, 2) 92 | val h3 = data.slice(2, 3) 93 | val h4 = data.slice(3, 4) 94 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4)) 95 | } 96 | } 97 | 98 | implicit def tuple5Reader[T1, T2, T3, T4, T5](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5]): DataReader[(T1, T2, T3, T4, T5)] = new TupleDataReader[(T1, T2, T3, T4, T5)] { 99 | 100 | val n = 5 101 | 102 | override def readTupleColumn(data: HBaseData) = { 103 | val h1 = data.take(1) 104 | val h2 = data.slice(1, 2) 105 | val h3 = data.slice(2, 3) 106 | val h4 = data.slice(3, 4) 107 | val h5 = data.slice(4, 5) 108 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5)) 109 | } 110 | } 111 | 112 | implicit def tuple6Reader[T1, T2, T3, T4, T5, T6](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6]): DataReader[(T1, T2, T3, T4, T5, T6)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6)] { 113 | 114 | val n = 6 115 | 116 | override def readTupleColumn(data: HBaseData) = { 117 | val h1 = data.take(1) 118 | val h2 = data.slice(1, 2) 119 | val h3 = data.slice(2, 3) 120 | val h4 = data.slice(3, 4) 121 | val h5 = data.slice(4, 5) 122 | val h6 = data.slice(5, 6) 123 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6)) 124 | } 125 | } 126 | 127 | implicit def tuple7Reader[T1, T2, T3, T4, T5, T6, T7](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7]): DataReader[(T1, T2, T3, T4, T5, T6, T7)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7)] { 128 | 129 | val n = 7 130 | 131 | override def readTupleColumn(data: HBaseData) = { 132 | val h1 = data.take(1) 133 | val h2 = data.slice(1, 2) 134 | val h3 = data.slice(2, 3) 135 | val h4 = data.slice(3, 4) 136 | val h5 = data.slice(4, 5) 137 | val h6 = data.slice(5, 6) 138 | val h7 = data.slice(6, 7) 139 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7)) 140 | } 141 | } 142 | 143 | implicit def tuple8Reader[T1, T2, T3, T4, T5, T6, T7, T8](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8)] { 144 | 145 | val n = 8 146 | 147 | override def readTupleColumn(data: HBaseData) = { 148 | val h1 = data.take(1) 149 | val h2 = data.slice(1, 2) 150 | val h3 = data.slice(2, 3) 151 | val h4 = data.slice(3, 4) 152 | val h5 = data.slice(4, 5) 153 | val h6 = data.slice(5, 6) 154 | val h7 = data.slice(6, 7) 155 | val h8 = data.slice(7, 8) 156 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8)) 157 | } 158 | } 159 | 160 | implicit def tuple9Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9)] { 161 | 162 | val n = 9 163 | 164 | override def readTupleColumn(data: HBaseData) = { 165 | val h1 = data.take(1) 166 | val h2 = data.slice(1, 2) 167 | val h3 = data.slice(2, 3) 168 | val h4 = data.slice(3, 4) 169 | val h5 = data.slice(4, 5) 170 | val h6 = data.slice(5, 6) 171 | val h7 = data.slice(6, 7) 172 | val h8 = data.slice(7, 8) 173 | val h9 = data.slice(8, 9) 174 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9)) 175 | } 176 | } 177 | 178 | implicit def tuple10Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9], m10: DataReader[T10]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10)] { 179 | 180 | val n = 10 181 | 182 | override def readTupleColumn(data: HBaseData) = { 183 | val h1 = data.take(1) 184 | val h2 = data.slice(1, 2) 185 | val h3 = data.slice(2, 3) 186 | val h4 = data.slice(3, 4) 187 | val h5 = data.slice(4, 5) 188 | val h6 = data.slice(5, 6) 189 | val h7 = data.slice(6, 7) 190 | val h8 = data.slice(7, 8) 191 | val h9 = data.slice(8, 9) 192 | val h10 = data.slice(9, 10) 193 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9), m10.read(h10)) 194 | } 195 | } 196 | 197 | implicit def tuple11Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9], m10: DataReader[T10], m11: DataReader[T11]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11)] { 198 | 199 | val n = 11 200 | 201 | override def readTupleColumn(data: HBaseData) = { 202 | val h1 = data.take(1) 203 | val h2 = data.slice(1, 2) 204 | val h3 = data.slice(2, 3) 205 | val h4 = data.slice(3, 4) 206 | val h5 = data.slice(4, 5) 207 | val h6 = data.slice(5, 6) 208 | val h7 = data.slice(6, 7) 209 | val h8 = data.slice(7, 8) 210 | val h9 = data.slice(8, 9) 211 | val h10 = data.slice(9, 10) 212 | val h11 = data.slice(10, 11) 213 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9), m10.read(h10), m11.read(h11)) 214 | } 215 | } 216 | 217 | implicit def tuple12Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9], m10: DataReader[T10], m11: DataReader[T11], m12: DataReader[T12]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12)] { 218 | 219 | val n = 12 220 | 221 | override def readTupleColumn(data: HBaseData) = { 222 | val h1 = data.take(1) 223 | val h2 = data.slice(1, 2) 224 | val h3 = data.slice(2, 3) 225 | val h4 = data.slice(3, 4) 226 | val h5 = data.slice(4, 5) 227 | val h6 = data.slice(5, 6) 228 | val h7 = data.slice(6, 7) 229 | val h8 = data.slice(7, 8) 230 | val h9 = data.slice(8, 9) 231 | val h10 = data.slice(9, 10) 232 | val h11 = data.slice(10, 11) 233 | val h12 = data.slice(11, 12) 234 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9), m10.read(h10), m11.read(h11), m12.read(h12)) 235 | } 236 | } 237 | 238 | implicit def tuple13Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9], m10: DataReader[T10], m11: DataReader[T11], m12: DataReader[T12], m13: DataReader[T13]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13)] { 239 | 240 | val n = 13 241 | 242 | override def readTupleColumn(data: HBaseData) = { 243 | val h1 = data.take(1) 244 | val h2 = data.slice(1, 2) 245 | val h3 = data.slice(2, 3) 246 | val h4 = data.slice(3, 4) 247 | val h5 = data.slice(4, 5) 248 | val h6 = data.slice(5, 6) 249 | val h7 = data.slice(6, 7) 250 | val h8 = data.slice(7, 8) 251 | val h9 = data.slice(8, 9) 252 | val h10 = data.slice(9, 10) 253 | val h11 = data.slice(10, 11) 254 | val h12 = data.slice(11, 12) 255 | val h13 = data.slice(12, 13) 256 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9), m10.read(h10), m11.read(h11), m12.read(h12), m13.read(h13)) 257 | } 258 | } 259 | 260 | implicit def tuple14Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9], m10: DataReader[T10], m11: DataReader[T11], m12: DataReader[T12], m13: DataReader[T13], m14: DataReader[T14]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14)] { 261 | 262 | val n = 14 263 | 264 | override def readTupleColumn(data: HBaseData) = { 265 | val h1 = data.take(1) 266 | val h2 = data.slice(1, 2) 267 | val h3 = data.slice(2, 3) 268 | val h4 = data.slice(3, 4) 269 | val h5 = data.slice(4, 5) 270 | val h6 = data.slice(5, 6) 271 | val h7 = data.slice(6, 7) 272 | val h8 = data.slice(7, 8) 273 | val h9 = data.slice(8, 9) 274 | val h10 = data.slice(9, 10) 275 | val h11 = data.slice(10, 11) 276 | val h12 = data.slice(11, 12) 277 | val h13 = data.slice(12, 13) 278 | val h14 = data.slice(13, 14) 279 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9), m10.read(h10), m11.read(h11), m12.read(h12), m13.read(h13), m14.read(h14)) 280 | } 281 | } 282 | 283 | implicit def tuple15Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9], m10: DataReader[T10], m11: DataReader[T11], m12: DataReader[T12], m13: DataReader[T13], m14: DataReader[T14], m15: DataReader[T15]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15)] { 284 | 285 | val n = 15 286 | 287 | override def readTupleColumn(data: HBaseData) = { 288 | val h1 = data.take(1) 289 | val h2 = data.slice(1, 2) 290 | val h3 = data.slice(2, 3) 291 | val h4 = data.slice(3, 4) 292 | val h5 = data.slice(4, 5) 293 | val h6 = data.slice(5, 6) 294 | val h7 = data.slice(6, 7) 295 | val h8 = data.slice(7, 8) 296 | val h9 = data.slice(8, 9) 297 | val h10 = data.slice(9, 10) 298 | val h11 = data.slice(10, 11) 299 | val h12 = data.slice(11, 12) 300 | val h13 = data.slice(12, 13) 301 | val h14 = data.slice(13, 14) 302 | val h15 = data.slice(14, 15) 303 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9), m10.read(h10), m11.read(h11), m12.read(h12), m13.read(h13), m14.read(h14), m15.read(h15)) 304 | } 305 | } 306 | 307 | implicit def tuple16Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9], m10: DataReader[T10], m11: DataReader[T11], m12: DataReader[T12], m13: DataReader[T13], m14: DataReader[T14], m15: DataReader[T15], m16: DataReader[T16]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16)] { 308 | 309 | val n = 16 310 | 311 | override def readTupleColumn(data: HBaseData) = { 312 | val h1 = data.take(1) 313 | val h2 = data.slice(1, 2) 314 | val h3 = data.slice(2, 3) 315 | val h4 = data.slice(3, 4) 316 | val h5 = data.slice(4, 5) 317 | val h6 = data.slice(5, 6) 318 | val h7 = data.slice(6, 7) 319 | val h8 = data.slice(7, 8) 320 | val h9 = data.slice(8, 9) 321 | val h10 = data.slice(9, 10) 322 | val h11 = data.slice(10, 11) 323 | val h12 = data.slice(11, 12) 324 | val h13 = data.slice(12, 13) 325 | val h14 = data.slice(13, 14) 326 | val h15 = data.slice(14, 15) 327 | val h16 = data.slice(15, 16) 328 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9), m10.read(h10), m11.read(h11), m12.read(h12), m13.read(h13), m14.read(h14), m15.read(h15), m16.read(h16)) 329 | } 330 | } 331 | 332 | implicit def tuple17Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9], m10: DataReader[T10], m11: DataReader[T11], m12: DataReader[T12], m13: DataReader[T13], m14: DataReader[T14], m15: DataReader[T15], m16: DataReader[T16], m17: DataReader[T17]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17)] { 333 | 334 | val n = 17 335 | 336 | override def readTupleColumn(data: HBaseData) = { 337 | val h1 = data.take(1) 338 | val h2 = data.slice(1, 2) 339 | val h3 = data.slice(2, 3) 340 | val h4 = data.slice(3, 4) 341 | val h5 = data.slice(4, 5) 342 | val h6 = data.slice(5, 6) 343 | val h7 = data.slice(6, 7) 344 | val h8 = data.slice(7, 8) 345 | val h9 = data.slice(8, 9) 346 | val h10 = data.slice(9, 10) 347 | val h11 = data.slice(10, 11) 348 | val h12 = data.slice(11, 12) 349 | val h13 = data.slice(12, 13) 350 | val h14 = data.slice(13, 14) 351 | val h15 = data.slice(14, 15) 352 | val h16 = data.slice(15, 16) 353 | val h17 = data.slice(16, 17) 354 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9), m10.read(h10), m11.read(h11), m12.read(h12), m13.read(h13), m14.read(h14), m15.read(h15), m16.read(h16), m17.read(h17)) 355 | } 356 | } 357 | 358 | implicit def tuple18Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9], m10: DataReader[T10], m11: DataReader[T11], m12: DataReader[T12], m13: DataReader[T13], m14: DataReader[T14], m15: DataReader[T15], m16: DataReader[T16], m17: DataReader[T17], m18: DataReader[T18]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18)] { 359 | 360 | val n = 18 361 | 362 | override def readTupleColumn(data: HBaseData) = { 363 | val h1 = data.take(1) 364 | val h2 = data.slice(1, 2) 365 | val h3 = data.slice(2, 3) 366 | val h4 = data.slice(3, 4) 367 | val h5 = data.slice(4, 5) 368 | val h6 = data.slice(5, 6) 369 | val h7 = data.slice(6, 7) 370 | val h8 = data.slice(7, 8) 371 | val h9 = data.slice(8, 9) 372 | val h10 = data.slice(9, 10) 373 | val h11 = data.slice(10, 11) 374 | val h12 = data.slice(11, 12) 375 | val h13 = data.slice(12, 13) 376 | val h14 = data.slice(13, 14) 377 | val h15 = data.slice(14, 15) 378 | val h16 = data.slice(15, 16) 379 | val h17 = data.slice(16, 17) 380 | val h18 = data.slice(17, 18) 381 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9), m10.read(h10), m11.read(h11), m12.read(h12), m13.read(h13), m14.read(h14), m15.read(h15), m16.read(h16), m17.read(h17), m18.read(h18)) 382 | } 383 | } 384 | 385 | implicit def tuple19Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9], m10: DataReader[T10], m11: DataReader[T11], m12: DataReader[T12], m13: DataReader[T13], m14: DataReader[T14], m15: DataReader[T15], m16: DataReader[T16], m17: DataReader[T17], m18: DataReader[T18], m19: DataReader[T19]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19)] { 386 | 387 | val n = 19 388 | 389 | override def readTupleColumn(data: HBaseData) = { 390 | val h1 = data.take(1) 391 | val h2 = data.slice(1, 2) 392 | val h3 = data.slice(2, 3) 393 | val h4 = data.slice(3, 4) 394 | val h5 = data.slice(4, 5) 395 | val h6 = data.slice(5, 6) 396 | val h7 = data.slice(6, 7) 397 | val h8 = data.slice(7, 8) 398 | val h9 = data.slice(8, 9) 399 | val h10 = data.slice(9, 10) 400 | val h11 = data.slice(10, 11) 401 | val h12 = data.slice(11, 12) 402 | val h13 = data.slice(12, 13) 403 | val h14 = data.slice(13, 14) 404 | val h15 = data.slice(14, 15) 405 | val h16 = data.slice(15, 16) 406 | val h17 = data.slice(16, 17) 407 | val h18 = data.slice(17, 18) 408 | val h19 = data.slice(18, 19) 409 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9), m10.read(h10), m11.read(h11), m12.read(h12), m13.read(h13), m14.read(h14), m15.read(h15), m16.read(h16), m17.read(h17), m18.read(h18), m19.read(h19)) 410 | } 411 | } 412 | 413 | implicit def tuple20Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9], m10: DataReader[T10], m11: DataReader[T11], m12: DataReader[T12], m13: DataReader[T13], m14: DataReader[T14], m15: DataReader[T15], m16: DataReader[T16], m17: DataReader[T17], m18: DataReader[T18], m19: DataReader[T19], m20: DataReader[T20]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20)] { 414 | 415 | val n = 20 416 | 417 | override def readTupleColumn(data: HBaseData) = { 418 | val h1 = data.take(1) 419 | val h2 = data.slice(1, 2) 420 | val h3 = data.slice(2, 3) 421 | val h4 = data.slice(3, 4) 422 | val h5 = data.slice(4, 5) 423 | val h6 = data.slice(5, 6) 424 | val h7 = data.slice(6, 7) 425 | val h8 = data.slice(7, 8) 426 | val h9 = data.slice(8, 9) 427 | val h10 = data.slice(9, 10) 428 | val h11 = data.slice(10, 11) 429 | val h12 = data.slice(11, 12) 430 | val h13 = data.slice(12, 13) 431 | val h14 = data.slice(13, 14) 432 | val h15 = data.slice(14, 15) 433 | val h16 = data.slice(15, 16) 434 | val h17 = data.slice(16, 17) 435 | val h18 = data.slice(17, 18) 436 | val h19 = data.slice(18, 19) 437 | val h20 = data.slice(19, 20) 438 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9), m10.read(h10), m11.read(h11), m12.read(h12), m13.read(h13), m14.read(h14), m15.read(h15), m16.read(h16), m17.read(h17), m18.read(h18), m19.read(h19), m20.read(h20)) 439 | } 440 | } 441 | 442 | implicit def tuple21Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9], m10: DataReader[T10], m11: DataReader[T11], m12: DataReader[T12], m13: DataReader[T13], m14: DataReader[T14], m15: DataReader[T15], m16: DataReader[T16], m17: DataReader[T17], m18: DataReader[T18], m19: DataReader[T19], m20: DataReader[T20], m21: DataReader[T21]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21)] { 443 | 444 | val n = 21 445 | 446 | override def readTupleColumn(data: HBaseData) = { 447 | val h1 = data.take(1) 448 | val h2 = data.slice(1, 2) 449 | val h3 = data.slice(2, 3) 450 | val h4 = data.slice(3, 4) 451 | val h5 = data.slice(4, 5) 452 | val h6 = data.slice(5, 6) 453 | val h7 = data.slice(6, 7) 454 | val h8 = data.slice(7, 8) 455 | val h9 = data.slice(8, 9) 456 | val h10 = data.slice(9, 10) 457 | val h11 = data.slice(10, 11) 458 | val h12 = data.slice(11, 12) 459 | val h13 = data.slice(12, 13) 460 | val h14 = data.slice(13, 14) 461 | val h15 = data.slice(14, 15) 462 | val h16 = data.slice(15, 16) 463 | val h17 = data.slice(16, 17) 464 | val h18 = data.slice(17, 18) 465 | val h19 = data.slice(18, 19) 466 | val h20 = data.slice(19, 20) 467 | val h21 = data.slice(20, 21) 468 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9), m10.read(h10), m11.read(h11), m12.read(h12), m13.read(h13), m14.read(h14), m15.read(h15), m16.read(h16), m17.read(h17), m18.read(h18), m19.read(h19), m20.read(h20), m21.read(h21)) 469 | } 470 | } 471 | 472 | implicit def tuple22Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9], m10: DataReader[T10], m11: DataReader[T11], m12: DataReader[T12], m13: DataReader[T13], m14: DataReader[T14], m15: DataReader[T15], m16: DataReader[T16], m17: DataReader[T17], m18: DataReader[T18], m19: DataReader[T19], m20: DataReader[T20], m21: DataReader[T21], m22: DataReader[T22]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22)] { 473 | 474 | val n = 22 475 | 476 | override def readTupleColumn(data: HBaseData) = { 477 | val h1 = data.take(1) 478 | val h2 = data.slice(1, 2) 479 | val h3 = data.slice(2, 3) 480 | val h4 = data.slice(3, 4) 481 | val h5 = data.slice(4, 5) 482 | val h6 = data.slice(5, 6) 483 | val h7 = data.slice(6, 7) 484 | val h8 = data.slice(7, 8) 485 | val h9 = data.slice(8, 9) 486 | val h10 = data.slice(9, 10) 487 | val h11 = data.slice(10, 11) 488 | val h12 = data.slice(11, 12) 489 | val h13 = data.slice(12, 13) 490 | val h14 = data.slice(13, 14) 491 | val h15 = data.slice(14, 15) 492 | val h16 = data.slice(15, 16) 493 | val h17 = data.slice(16, 17) 494 | val h18 = data.slice(17, 18) 495 | val h19 = data.slice(18, 19) 496 | val h20 = data.slice(19, 20) 497 | val h21 = data.slice(20, 21) 498 | val h22 = data.slice(21, 22) 499 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9), m10.read(h10), m11.read(h11), m12.read(h12), m13.read(h13), m14.read(h14), m15.read(h15), m16.read(h16), m17.read(h17), m18.read(h18), m19.read(h19), m20.read(h20), m21.read(h21), m22.read(h22)) 500 | } 501 | } 502 | } 503 | 504 | 505 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/hbase/transformer/writer/DataWriter.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector.hbase.transformer.writer 2 | 3 | import info.xiaohei.spark.connector.hbase.transformer.DataTransformer 4 | 5 | /** 6 | * Author: xiaohei 7 | * Date: 2017/3/21 8 | * Email: xiaohei.info@gmail.com 9 | * Host: www.xiaohei.info 10 | */ 11 | trait DataWriter[T] extends DataTransformer { 12 | def write(data: T): HBaseData 13 | } 14 | 15 | trait SingleColumnDataWriter[T] extends DataWriter[T] { 16 | override def write(data: T): HBaseData = Seq(writeSingleColumn(data)) 17 | 18 | def writeSingleColumn(data: T): Option[Array[Byte]] 19 | } 20 | 21 | abstract class CustomDataWriter[S, T](implicit writer: DataWriter[T]) extends DataWriter[S] { 22 | 23 | override def write(data: S): HBaseData = writer.write(convert(data)) 24 | 25 | def convert(data: S): T 26 | } 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/hbase/transformer/writer/DataWriterConversions.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector.hbase.transformer.writer 2 | 3 | import org.apache.hadoop.hbase.util.Bytes 4 | 5 | /** 6 | * Author: xiaohei 7 | * Date: 2017/3/21 8 | * Email: xiaohei.info@gmail.com 9 | * Host: www.xiaohei.info 10 | */ 11 | trait DataWriterConversions extends Serializable{ 12 | implicit def intWriter: DataWriter[Int] = new SingleColumnDataWriter[Int] { 13 | override def writeSingleColumn(data: Int): Option[Array[Byte]] = Some(Bytes.toBytes(data)) 14 | } 15 | 16 | implicit def longWriter: DataWriter[Long] = new SingleColumnDataWriter[Long] { 17 | override def writeSingleColumn(data: Long): Option[Array[Byte]] = Some(Bytes.toBytes(data)) 18 | } 19 | 20 | implicit def shortWriter: DataWriter[Short] = new SingleColumnDataWriter[Short] { 21 | override def writeSingleColumn(data: Short): Option[Array[Byte]] = Some(Bytes.toBytes(data)) 22 | } 23 | 24 | implicit def doubleWriter: DataWriter[Double] = new SingleColumnDataWriter[Double] { 25 | override def writeSingleColumn(data: Double): Option[Array[Byte]] = Some(Bytes.toBytes(data)) 26 | } 27 | 28 | implicit def floatWriter: DataWriter[Float] = new SingleColumnDataWriter[Float] { 29 | override def writeSingleColumn(data: Float): Option[Array[Byte]] = Some(Bytes.toBytes(data)) 30 | } 31 | 32 | implicit def booleanWriter: DataWriter[Boolean] = new SingleColumnDataWriter[Boolean] { 33 | override def writeSingleColumn(data: Boolean): Option[Array[Byte]] = Some(Bytes.toBytes(data)) 34 | } 35 | 36 | implicit def bigDecimalWriter: DataWriter[BigDecimal] = new SingleColumnDataWriter[BigDecimal] { 37 | override def writeSingleColumn(data: BigDecimal): Option[Array[Byte]] = Some(Bytes.toBytes(data.bigDecimal)) 38 | } 39 | 40 | implicit def stringWriter: DataWriter[String] = new SingleColumnDataWriter[String] { 41 | override def writeSingleColumn(data: String): Option[Array[Byte]] = Some(Bytes.toBytes(data)) 42 | } 43 | 44 | // Options 45 | 46 | implicit def optionWriter[T](implicit c: DataWriter[T]): DataWriter[Option[T]] = new DataWriter[Option[T]] { 47 | override def write(data: Option[T]): HBaseData = if(data.nonEmpty) c.write(data.get) else Seq(None) 48 | } 49 | 50 | // Tuples 51 | 52 | 53 | implicit def tupleWriter2[T1, T2](implicit c1: DataWriter[T1], c2: DataWriter[T2]): DataWriter[(T1, T2)] = new DataWriter[(T1, T2)] { 54 | override def write(data: (T1, T2)): HBaseData = c1.write(data._1) ++ c2.write(data._2) 55 | } 56 | 57 | implicit def tupleWriter3[T1, T2, T3](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3]): DataWriter[(T1, T2, T3)] = new DataWriter[(T1, T2, T3)] { 58 | override def write(data: (T1, T2, T3)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) 59 | } 60 | 61 | implicit def tupleWriter4[T1, T2, T3, T4](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4]): DataWriter[(T1, T2, T3, T4)] = new DataWriter[(T1, T2, T3, T4)] { 62 | override def write(data: (T1, T2, T3, T4)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) 63 | } 64 | 65 | implicit def tupleWriter5[T1, T2, T3, T4, T5](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5]): DataWriter[(T1, T2, T3, T4, T5)] = new DataWriter[(T1, T2, T3, T4, T5)] { 66 | override def write(data: (T1, T2, T3, T4, T5)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) 67 | } 68 | 69 | implicit def tupleWriter6[T1, T2, T3, T4, T5, T6](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6]): DataWriter[(T1, T2, T3, T4, T5, T6)] = new DataWriter[(T1, T2, T3, T4, T5, T6)] { 70 | override def write(data: (T1, T2, T3, T4, T5, T6)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) 71 | } 72 | 73 | implicit def tupleWriter7[T1, T2, T3, T4, T5, T6, T7](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7]): DataWriter[(T1, T2, T3, T4, T5, T6, T7)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7)] { 74 | override def write(data: (T1, T2, T3, T4, T5, T6, T7)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) 75 | } 76 | 77 | implicit def tupleWriter8[T1, T2, T3, T4, T5, T6, T7, T8](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8)] { 78 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) 79 | } 80 | 81 | implicit def tupleWriter9[T1, T2, T3, T4, T5, T6, T7, T8, T9](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9)] { 82 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9) 83 | } 84 | 85 | implicit def tupleWriter10[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9], c10: DataWriter[T10]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10)] { 86 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9) ++ c10.write(data._10) 87 | } 88 | 89 | implicit def tupleWriter11[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9], c10: DataWriter[T10], c11: DataWriter[T11]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11)] { 90 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9) ++ c10.write(data._10) ++ c11.write(data._11) 91 | } 92 | 93 | implicit def tupleWriter12[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9], c10: DataWriter[T10], c11: DataWriter[T11], c12: DataWriter[T12]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12)] { 94 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9) ++ c10.write(data._10) ++ c11.write(data._11) ++ c12.write(data._12) 95 | } 96 | 97 | implicit def tupleWriter13[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9], c10: DataWriter[T10], c11: DataWriter[T11], c12: DataWriter[T12], c13: DataWriter[T13]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13)] { 98 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9) ++ c10.write(data._10) ++ c11.write(data._11) ++ c12.write(data._12) ++ c13.write(data._13) 99 | } 100 | 101 | implicit def tupleWriter14[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9], c10: DataWriter[T10], c11: DataWriter[T11], c12: DataWriter[T12], c13: DataWriter[T13], c14: DataWriter[T14]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14)] { 102 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9) ++ c10.write(data._10) ++ c11.write(data._11) ++ c12.write(data._12) ++ c13.write(data._13) ++ c14.write(data._14) 103 | } 104 | 105 | implicit def tupleWriter15[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9], c10: DataWriter[T10], c11: DataWriter[T11], c12: DataWriter[T12], c13: DataWriter[T13], c14: DataWriter[T14], c15: DataWriter[T15]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15)] { 106 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9) ++ c10.write(data._10) ++ c11.write(data._11) ++ c12.write(data._12) ++ c13.write(data._13) ++ c14.write(data._14) ++ c15.write(data._15) 107 | } 108 | 109 | implicit def tupleWriter16[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9], c10: DataWriter[T10], c11: DataWriter[T11], c12: DataWriter[T12], c13: DataWriter[T13], c14: DataWriter[T14], c15: DataWriter[T15], c16: DataWriter[T16]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16)] { 110 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9) ++ c10.write(data._10) ++ c11.write(data._11) ++ c12.write(data._12) ++ c13.write(data._13) ++ c14.write(data._14) ++ c15.write(data._15) ++ c16.write(data._16) 111 | } 112 | 113 | implicit def tupleWriter17[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9], c10: DataWriter[T10], c11: DataWriter[T11], c12: DataWriter[T12], c13: DataWriter[T13], c14: DataWriter[T14], c15: DataWriter[T15], c16: DataWriter[T16], c17: DataWriter[T17]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17)] { 114 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9) ++ c10.write(data._10) ++ c11.write(data._11) ++ c12.write(data._12) ++ c13.write(data._13) ++ c14.write(data._14) ++ c15.write(data._15) ++ c16.write(data._16) ++ c17.write(data._17) 115 | } 116 | 117 | implicit def tupleWriter18[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9], c10: DataWriter[T10], c11: DataWriter[T11], c12: DataWriter[T12], c13: DataWriter[T13], c14: DataWriter[T14], c15: DataWriter[T15], c16: DataWriter[T16], c17: DataWriter[T17], c18: DataWriter[T18]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18)] { 118 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9) ++ c10.write(data._10) ++ c11.write(data._11) ++ c12.write(data._12) ++ c13.write(data._13) ++ c14.write(data._14) ++ c15.write(data._15) ++ c16.write(data._16) ++ c17.write(data._17) ++ c18.write(data._18) 119 | } 120 | 121 | implicit def tupleWriter19[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9], c10: DataWriter[T10], c11: DataWriter[T11], c12: DataWriter[T12], c13: DataWriter[T13], c14: DataWriter[T14], c15: DataWriter[T15], c16: DataWriter[T16], c17: DataWriter[T17], c18: DataWriter[T18], c19: DataWriter[T19]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19)] { 122 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9) ++ c10.write(data._10) ++ c11.write(data._11) ++ c12.write(data._12) ++ c13.write(data._13) ++ c14.write(data._14) ++ c15.write(data._15) ++ c16.write(data._16) ++ c17.write(data._17) ++ c18.write(data._18) ++ c19.write(data._19) 123 | } 124 | 125 | implicit def tupleWriter20[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9], c10: DataWriter[T10], c11: DataWriter[T11], c12: DataWriter[T12], c13: DataWriter[T13], c14: DataWriter[T14], c15: DataWriter[T15], c16: DataWriter[T16], c17: DataWriter[T17], c18: DataWriter[T18], c19: DataWriter[T19], c20: DataWriter[T20]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20)] { 126 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9) ++ c10.write(data._10) ++ c11.write(data._11) ++ c12.write(data._12) ++ c13.write(data._13) ++ c14.write(data._14) ++ c15.write(data._15) ++ c16.write(data._16) ++ c17.write(data._17) ++ c18.write(data._18) ++ c19.write(data._19) ++ c20.write(data._20) 127 | } 128 | 129 | implicit def tupleWriter21[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9], c10: DataWriter[T10], c11: DataWriter[T11], c12: DataWriter[T12], c13: DataWriter[T13], c14: DataWriter[T14], c15: DataWriter[T15], c16: DataWriter[T16], c17: DataWriter[T17], c18: DataWriter[T18], c19: DataWriter[T19], c20: DataWriter[T20], c21: DataWriter[T21]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21)] { 130 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9) ++ c10.write(data._10) ++ c11.write(data._11) ++ c12.write(data._12) ++ c13.write(data._13) ++ c14.write(data._14) ++ c15.write(data._15) ++ c16.write(data._16) ++ c17.write(data._17) ++ c18.write(data._18) ++ c19.write(data._19) ++ c20.write(data._20) ++ c21.write(data._21) 131 | } 132 | 133 | implicit def tupleWriter22[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9], c10: DataWriter[T10], c11: DataWriter[T11], c12: DataWriter[T12], c13: DataWriter[T13], c14: DataWriter[T14], c15: DataWriter[T15], c16: DataWriter[T16], c17: DataWriter[T17], c18: DataWriter[T18], c19: DataWriter[T19], c20: DataWriter[T20], c21: DataWriter[T21], c22: DataWriter[T22]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22)] { 134 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9) ++ c10.write(data._10) ++ c11.write(data._11) ++ c12.write(data._12) ++ c13.write(data._13) ++ c14.write(data._14) ++ c15.write(data._15) ++ c16.write(data._16) ++ c17.write(data._17) ++ c18.write(data._18) ++ c19.write(data._19) ++ c20.write(data._20) ++ c21.write(data._21) ++ c22.write(data._22) 135 | } 136 | 137 | } 138 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/mysql/MysqlConf.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector.mysql 2 | 3 | import org.apache.spark.SparkContext 4 | 5 | /** 6 | * Author: xiaohei 7 | * Date: 2017/4/6 8 | * Email: xiaohei.info@gmail.com 9 | * Host: xiaohei.info 10 | */ 11 | 12 | private[mysql] object ConfOption extends Enumeration { 13 | type ConfOption = Value 14 | val SPARK_HBASE_HOST = Value("spark.hbase.host") 15 | val SPARK_MYSQL_HOST = Value("spark.mysql.host") 16 | val SPARK_MYSQL_USERNAME = Value("spark.mysql.username") 17 | val SPARK_MYSQL_PASSWORD = Value("spark.mysql.password") 18 | val SPARK_MYSQL_PORT = Value("spark.mysql.port") 19 | val SPARK_MYSQL_DB = Value("spark.mysql.db") 20 | } 21 | 22 | case class MysqlConf private[mysql]( 23 | private val conf: collection.mutable.Map[String, String] = collection.mutable.Map.empty 24 | ) { 25 | def getMysqlInfo(): (String, String, String) = { 26 | require(conf.nonEmpty, "mysql conf must be set") 27 | val host = conf.get(ConfOption.SPARK_MYSQL_HOST.toString) 28 | val port = conf.get(ConfOption.SPARK_MYSQL_PORT.toString) 29 | val db = conf.get(ConfOption.SPARK_MYSQL_DB.toString) 30 | val username = conf.get(ConfOption.SPARK_MYSQL_USERNAME.toString) 31 | val password = conf.get(ConfOption.SPARK_MYSQL_PASSWORD.toString) 32 | val connectStr = s"jdbc:mysql://${host.get}:${port.get}/${db.get}" 33 | require( 34 | host.isDefined && 35 | port.isDefined && 36 | db.isDefined && 37 | username.isDefined && 38 | password.isDefined, 39 | "host/port/dbname/username/password must be set in mysql conf!" 40 | ) 41 | (connectStr, username.get, password.get) 42 | } 43 | 44 | //todo:大量连接的情况下是否有隐患 45 | def set(key: String, value: String): MysqlConf = { 46 | conf += key -> value 47 | this.copy(conf = conf) 48 | } 49 | } 50 | 51 | object MysqlConf { 52 | def createFromSpark(sc: SparkContext) = { 53 | val sparkConf = sc.getConf 54 | createConf( 55 | sparkConf.get(ConfOption.SPARK_MYSQL_HOST.toString), 56 | sparkConf.get(ConfOption.SPARK_MYSQL_USERNAME.toString), 57 | sparkConf.get(ConfOption.SPARK_MYSQL_PASSWORD.toString), 58 | sparkConf.get(ConfOption.SPARK_MYSQL_PORT.toString, "3306"), 59 | sparkConf.get(ConfOption.SPARK_MYSQL_DB.toString) 60 | ) 61 | } 62 | 63 | def createConf( 64 | host: String, 65 | username: String, 66 | passwword: String, 67 | port: String, 68 | dbName: String 69 | ) = { 70 | val collectionConf = collection.mutable.Map[String, String]( 71 | ConfOption.SPARK_MYSQL_HOST.toString -> host, 72 | ConfOption.SPARK_MYSQL_USERNAME.toString -> username, 73 | ConfOption.SPARK_MYSQL_PASSWORD.toString -> passwword, 74 | ConfOption.SPARK_MYSQL_PORT.toString -> port, 75 | ConfOption.SPARK_MYSQL_DB.toString -> dbName 76 | ) 77 | MysqlConf(collectionConf) 78 | } 79 | } 80 | 81 | trait MysqlConfConversions extends Serializable{ 82 | implicit def scToMysqlConf(sc: SparkContext): MysqlConf = MysqlConf.createFromSpark(sc) 83 | } 84 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/mysql/builder/reader/MysqlContext.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector.mysql.builder.reader 2 | 3 | import info.xiaohei.spark.connector.RelationalDbEntry 4 | import org.apache.spark.SparkContext 5 | 6 | /** 7 | * Author: xiaohei 8 | * Date: 2017/3/26 9 | * Email: xiaohei.info@gmail.com 10 | * Host: xiaohei.info 11 | */ 12 | private[mysql] class MysqlContext() extends Serializable { 13 | def fromMysql[T](tableName: String): MysqlReaderBuilder[T] = new MysqlReaderBuilder[T](tableName) 14 | } 15 | 16 | trait MysqlContextConversions extends Serializable { 17 | implicit def scToMysqlContext(sc: SparkContext): MysqlContext = new MysqlContext() 18 | 19 | implicit def entryToMysqlContext(entry: RelationalDbEntry): MysqlContext = new MysqlContext() 20 | } 21 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/mysql/builder/reader/MysqlReaderBuilder.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector.mysql.builder.reader 2 | 3 | import java.sql.DriverManager 4 | 5 | import info.xiaohei.spark.connector.mysql.MysqlConf 6 | import info.xiaohei.spark.connector.mysql.transformer.mapper.DataMapper 7 | 8 | /** 9 | * Author: xiaohei 10 | * Date: 2017/3/26 11 | * Email: xiaohei.info@gmail.com 12 | * Host: xiaohei.info 13 | */ 14 | case class MysqlReaderBuilder[T]( 15 | private[mysql] val tableName: String, 16 | private[mysql] val columns: Iterable[String] = Seq.empty, 17 | private[mysql] val whereConditions: Option[String] = None 18 | ) { 19 | 20 | def select(cols: String*): MysqlReaderBuilder[T] = { 21 | require(this.columns.isEmpty, "Columns haven't been set") 22 | require(cols.nonEmpty, "Columns must by set,at least one") 23 | 24 | this.copy(columns = cols) 25 | } 26 | 27 | def select(cols: Iterable[String]): MysqlReaderBuilder[T] = { 28 | require(this.columns.isEmpty, "Columns haven't been set") 29 | require(cols.nonEmpty, "Columns must by set,at least one") 30 | 31 | this.copy(columns = cols) 32 | } 33 | 34 | def where(conditions: String): MysqlReaderBuilder[T] = { 35 | this.copy(whereConditions = Some(conditions)) 36 | } 37 | 38 | } 39 | 40 | trait MysqlReaderBuilderConversions extends Serializable { 41 | implicit def readFromMysql[T](builder: MysqlReaderBuilder[T]) 42 | (implicit mysqlConf: MysqlConf, dataMapper: DataMapper[T]): Option[Seq[T]] = { 43 | require(builder.columns.nonEmpty, "column names must be set!") 44 | 45 | val (connectStr, username, password) = mysqlConf.getMysqlInfo() 46 | val conn = DriverManager.getConnection(connectStr, username, password) 47 | var sql = s"select ${builder.columns.mkString(",")} from ${builder.tableName}" 48 | if (builder.whereConditions.nonEmpty) { 49 | sql += s" where ${builder.whereConditions}" 50 | } 51 | val ps = conn.prepareStatement(sql) 52 | Class.forName("com.mysql.jdbc.Driver") 53 | try { 54 | val resultList = new collection.mutable.ListBuffer[T] 55 | val resultSet = ps.executeQuery() 56 | while (resultSet.next()) { 57 | resultList += dataMapper.map(resultSet) 58 | } 59 | Some(resultList) 60 | } 61 | catch { 62 | case e: Exception => e.printStackTrace() 63 | None 64 | } finally { 65 | if (ps != null) { 66 | ps.close() 67 | } 68 | if (conn != null) { 69 | conn.close() 70 | } 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/mysql/builder/writer/MysqlWriterBuilder.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector.mysql.builder.writer 2 | 3 | import java.sql.{DriverManager, PreparedStatement} 4 | 5 | import info.xiaohei.spark.connector.mysql.MysqlConf 6 | import info.xiaohei.spark.connector.mysql.transformer.executor.DataExecutor 7 | 8 | /** 9 | * Author: xiaohei 10 | * Date: 2017/3/26 11 | * Email: xiaohei.info@gmail.com 12 | * Host: xiaohei.info 13 | */ 14 | case class MysqlWriterBuilder[C] private[mysql]( 15 | private[mysql] val tableName: String, 16 | private[mysql] val collectionData: Iterable[C], 17 | //todo:t.productIterator.foreach{ i =>println("Value = " + i )} 18 | private[mysql] val columns: Iterable[String] = Seq.empty, 19 | //todo:完善 20 | private[mysql] val whereConditions: Option[String] = None 21 | ) { 22 | def insert(cols: String*) = { 23 | require(this.columns.isEmpty, "Columns haven't been set") 24 | require(cols.nonEmpty, "Columns must by set,at least one") 25 | 26 | this.copy(columns = cols) 27 | } 28 | 29 | def insert(cols: Iterable[String]) = { 30 | require(this.columns.isEmpty, "Columns haven't been set") 31 | require(cols.nonEmpty, "Columns must by set,at least one") 32 | 33 | this.copy(columns = cols) 34 | } 35 | 36 | def where(conditions: String) = { 37 | this.copy(whereConditions = Some(conditions)) 38 | } 39 | } 40 | 41 | private[mysql] class MysqlWriterBuildMaker[C](collectionData: Iterable[C]) 42 | extends Serializable { 43 | def toMysql(tableName: String): MysqlWriterBuilder[C] = 44 | MysqlWriterBuilder[C](tableName, collectionData) 45 | } 46 | 47 | private[mysql] class MysqlWriter[C](builder: MysqlWriterBuilder[C])(implicit mysqlConf: MysqlConf, dataExecutor: DataExecutor[C]) 48 | extends Serializable { 49 | def save(): Unit = { 50 | require(builder.columns.nonEmpty, "column names must be set!") 51 | 52 | val (connectStr, username, password) = mysqlConf.getMysqlInfo() 53 | val conn = DriverManager.getConnection(connectStr, username, password) 54 | 55 | var placeholder = "" 56 | for (i <- 0 until builder.columns.size) placeholder += "?," 57 | var sql = s"insert into ${builder.tableName}(${builder.columns.mkString(",")}) values(${placeholder.substring(0, placeholder.length - 1)})" 58 | if (builder.whereConditions.nonEmpty) { 59 | sql += s" where ${builder.whereConditions}" 60 | } 61 | val ps = conn.prepareStatement(sql) 62 | Class.forName("com.mysql.jdbc.Driver") 63 | try { 64 | builder.collectionData.foreach(x => dataExecutor.execute(ps, x)) 65 | } catch { 66 | case e: Exception => e.printStackTrace() 67 | } finally { 68 | if (ps != null) { 69 | ps.close() 70 | } 71 | if (conn != null) { 72 | conn.close() 73 | } 74 | } 75 | } 76 | } 77 | 78 | trait MysqlWriterBuilderConversions extends Serializable { 79 | implicit def mysqlCollectionToBuildMaker[C](collectionData: Iterable[C]) 80 | : MysqlWriterBuildMaker[C] = new MysqlWriterBuildMaker[C](collectionData) 81 | 82 | implicit def mysqlCollectionBuilderToWriter[C](builder: MysqlWriterBuilder[C])(implicit mysqlConf: MysqlConf, dataExecutor: DataExecutor[C]) 83 | : MysqlWriter[C] = new MysqlWriter[C](builder) 84 | } 85 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/mysql/package.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector 2 | 3 | import info.xiaohei.spark.connector.mysql.builder.reader.{MysqlContextConversions, MysqlReaderBuilderConversions} 4 | import info.xiaohei.spark.connector.mysql.transformer.executor.DataExecutorConversions 5 | import info.xiaohei.spark.connector.mysql.transformer.mapper.DataMapperConversions 6 | import info.xiaohei.spark.connector.mysql.builder.writer.MysqlWriterBuilderConversions 7 | 8 | /** 9 | * Author: xiaohei 10 | * Date: 2017/4/6 11 | * Email: xiaohei.info@gmail.com 12 | * Host: xiaohei.info 13 | */ 14 | package object mysql extends MysqlReaderBuilderConversions 15 | with MysqlWriterBuilderConversions 16 | with MysqlConfConversions 17 | with MysqlContextConversions 18 | with DataExecutorConversions 19 | with DataMapperConversions 20 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/mysql/transformer/executor/DataExecutor.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector.mysql.transformer.executor 2 | 3 | import java.sql.PreparedStatement 4 | 5 | 6 | /** 7 | * Author: xiaohei 8 | * Date: 2017/4/15 9 | * Email: xiaohei.info@gmail.com 10 | * Host: xiaohei.info 11 | */ 12 | trait DataExecutor[T] extends Serializable { 13 | var index: Int = 1 14 | 15 | def prepare(ps: PreparedStatement, data: T): Unit 16 | 17 | def execute(ps: PreparedStatement, data: T): Unit = { 18 | prepare(ps, data) 19 | ps.executeUpdate() 20 | } 21 | } 22 | 23 | abstract class CustomDataExecutor[S, T](implicit dataExecutor: DataExecutor[T]) extends DataExecutor[S] { 24 | 25 | override def prepare(ps: PreparedStatement, data: S) = dataExecutor.prepare(ps, convert(data)) 26 | 27 | def convert(data: S): T 28 | } 29 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/mysql/transformer/executor/DataExecutorConversions.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector.mysql.transformer.executor 2 | 3 | import java.sql.PreparedStatement 4 | 5 | 6 | /** 7 | * Author: xiaohei 8 | * Date: 2017/4/15 9 | * Email: xiaohei.info@gmail.com 10 | * Host: xiaohei.info 11 | */ 12 | trait DataExecutorConversions extends Serializable { 13 | implicit def intExecutor: DataExecutor[Int] = new DataExecutor[Int] { 14 | override def prepare(ps: PreparedStatement, data: Int): Unit = { 15 | ps.setInt(index, data) 16 | } 17 | } 18 | 19 | implicit def longExecutor: DataExecutor[Long] = new DataExecutor[Long] { 20 | override def prepare(ps: PreparedStatement, data: Long): Unit = { 21 | ps.setLong(index, data) 22 | } 23 | } 24 | 25 | implicit def shortExecutor: DataExecutor[Short] = new DataExecutor[Short] { 26 | override def prepare(ps: PreparedStatement, data: Short): Unit = { 27 | ps.setShort(index, data) 28 | } 29 | } 30 | 31 | implicit def doubleExecutor: DataExecutor[Double] = new DataExecutor[Double] { 32 | override def prepare(ps: PreparedStatement, data: Double): Unit = { 33 | ps.setDouble(index, data) 34 | } 35 | } 36 | 37 | implicit def floatExecutor: DataExecutor[Float] = new DataExecutor[Float] { 38 | override def prepare(ps: PreparedStatement, data: Float): Unit = { 39 | ps.setFloat(index, data) 40 | } 41 | } 42 | 43 | implicit def booleanExecutor: DataExecutor[Boolean] = new DataExecutor[Boolean] { 44 | override def prepare(ps: PreparedStatement, data: Boolean): Unit = { 45 | ps.setBoolean(index, data) 46 | } 47 | } 48 | 49 | implicit def bigDecimalExecutor: DataExecutor[java.math.BigDecimal] = new DataExecutor[java.math.BigDecimal] { 50 | override def prepare(ps: PreparedStatement, data: java.math.BigDecimal): Unit = { 51 | ps.setBigDecimal(index, data) 52 | } 53 | } 54 | 55 | implicit def stringExecutor: DataExecutor[String] = new DataExecutor[String] { 56 | override def prepare(ps: PreparedStatement, data: String): Unit = { 57 | ps.setString(index, data) 58 | } 59 | } 60 | 61 | // Tuples 62 | 63 | //todo:index的设置方式 64 | implicit def tupleExecutor2[T1, T2](implicit e1: DataExecutor[T1], e2: DataExecutor[T2]): DataExecutor[(T1, T2)] = new DataExecutor[(T1, T2)] { 65 | override def prepare(ps: PreparedStatement, data: (T1, T2)): Unit = { 66 | e1.index = 1 67 | e1.prepare(ps, data._1) 68 | e2.index = 2 69 | e2.prepare(ps, data._2) 70 | } 71 | } 72 | 73 | implicit def tupleExecutor3[T1, T2, T3](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3]): DataExecutor[(T1, T2, T3)] = new DataExecutor[(T1, T2, T3)] { 74 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3)): Unit = { 75 | e1.index = 1 76 | e1.prepare(ps, data._1) 77 | e2.index = 2 78 | e2.prepare(ps, data._2) 79 | e3.index = 3 80 | e3.prepare(ps, data._3) 81 | } 82 | } 83 | 84 | implicit def tupleExecutor4[T1, T2, T3, T4](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4]): DataExecutor[(T1, T2, T3, T4)] = new DataExecutor[(T1, T2, T3, T4)] { 85 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4)): Unit = { 86 | e1.index = 1 87 | e1.prepare(ps, data._1) 88 | e2.index = 2 89 | e2.prepare(ps, data._2) 90 | e3.index = 3 91 | e3.prepare(ps, data._3) 92 | e4.index = 4 93 | e4.prepare(ps, data._4) 94 | } 95 | } 96 | 97 | implicit def tupleExecutor5[T1, T2, T3, T4, T5](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5]): DataExecutor[(T1, T2, T3, T4, T5)] = new DataExecutor[(T1, T2, T3, T4, T5)] { 98 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5)): Unit = { 99 | e1.index = 1 100 | e1.prepare(ps, data._1) 101 | e2.index = 2 102 | e2.prepare(ps, data._2) 103 | e3.index = 3 104 | e3.prepare(ps, data._3) 105 | e4.index = 4 106 | e4.prepare(ps, data._4) 107 | e5.index = 5 108 | e5.prepare(ps, data._5) 109 | } 110 | } 111 | 112 | implicit def tupleExecutor6[T1, T2, T3, T4, T5, T6](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6]): DataExecutor[(T1, T2, T3, T4, T5, T6)] = new DataExecutor[(T1, T2, T3, T4, T5, T6)] { 113 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6)): Unit = { 114 | e1.index = 1 115 | e1.prepare(ps, data._1) 116 | e2.index = 2 117 | e2.prepare(ps, data._2) 118 | e3.index = 3 119 | e3.prepare(ps, data._3) 120 | e4.index = 4 121 | e4.prepare(ps, data._4) 122 | e5.index = 5 123 | e5.prepare(ps, data._5) 124 | e6.index = 6 125 | e6.prepare(ps, data._6) 126 | } 127 | } 128 | 129 | implicit def tupleExecutor7[T1, T2, T3, T4, T5, T6, T7](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7)] { 130 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7)): Unit = { 131 | e1.index = 1 132 | e1.prepare(ps, data._1) 133 | e2.index = 2 134 | e2.prepare(ps, data._2) 135 | e3.index = 3 136 | e3.prepare(ps, data._3) 137 | e4.index = 4 138 | e4.prepare(ps, data._4) 139 | e5.index = 5 140 | e5.prepare(ps, data._5) 141 | e6.index = 6 142 | e6.prepare(ps, data._6) 143 | e7.index = 7 144 | e7.prepare(ps, data._7) 145 | } 146 | } 147 | 148 | implicit def tupleExecutor8[T1, T2, T3, T4, T5, T6, T7, T8](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8)] { 149 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8)): Unit = { 150 | e1.index = 1 151 | e1.prepare(ps, data._1) 152 | e2.index = 2 153 | e2.prepare(ps, data._2) 154 | e3.index = 3 155 | e3.prepare(ps, data._3) 156 | e4.index = 4 157 | e4.prepare(ps, data._4) 158 | e5.index = 5 159 | e5.prepare(ps, data._5) 160 | e6.index = 6 161 | e6.prepare(ps, data._6) 162 | e7.index = 7 163 | e7.prepare(ps, data._7) 164 | e8.index = 8 165 | e8.prepare(ps, data._8) 166 | } 167 | } 168 | 169 | implicit def tupleExecutor9[T1, T2, T3, T4, T5, T6, T7, T8, T9](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9)] { 170 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9)): Unit = { 171 | e1.index = 1 172 | e1.prepare(ps, data._1) 173 | e2.index = 2 174 | e2.prepare(ps, data._2) 175 | e3.index = 3 176 | e3.prepare(ps, data._3) 177 | e4.index = 4 178 | e4.prepare(ps, data._4) 179 | e5.index = 5 180 | e5.prepare(ps, data._5) 181 | e6.index = 6 182 | e6.prepare(ps, data._6) 183 | e7.index = 7 184 | e7.prepare(ps, data._7) 185 | e8.index = 8 186 | e8.prepare(ps, data._8) 187 | e9.index = 9 188 | e9.prepare(ps, data._9) 189 | } 190 | } 191 | 192 | implicit def tupleExecutor10[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9], e10: DataExecutor[T10]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10)] { 193 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10)): Unit = { 194 | e1.index = 1 195 | e1.prepare(ps, data._1) 196 | e2.index = 2 197 | e2.prepare(ps, data._2) 198 | e3.index = 3 199 | e3.prepare(ps, data._3) 200 | e4.index = 4 201 | e4.prepare(ps, data._4) 202 | e5.index = 5 203 | e5.prepare(ps, data._5) 204 | e6.index = 6 205 | e6.prepare(ps, data._6) 206 | e7.index = 7 207 | e7.prepare(ps, data._7) 208 | e8.index = 8 209 | e8.prepare(ps, data._8) 210 | e9.index = 9 211 | e9.prepare(ps, data._9) 212 | e10.index = 10 213 | e10.prepare(ps, data._10) 214 | } 215 | } 216 | 217 | implicit def tupleExecutor11[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9], e10: DataExecutor[T10], e11: DataExecutor[T11]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11)] { 218 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11)): Unit = { 219 | e1.index = 1 220 | e1.prepare(ps, data._1) 221 | e2.index = 2 222 | e2.prepare(ps, data._2) 223 | e3.index = 3 224 | e3.prepare(ps, data._3) 225 | e4.index = 4 226 | e4.prepare(ps, data._4) 227 | e5.index = 5 228 | e5.prepare(ps, data._5) 229 | e6.index = 6 230 | e6.prepare(ps, data._6) 231 | e7.index = 7 232 | e7.prepare(ps, data._7) 233 | e8.index = 8 234 | e8.prepare(ps, data._8) 235 | e9.index = 9 236 | e9.prepare(ps, data._9) 237 | e10.index = 10 238 | e10.prepare(ps, data._10) 239 | e11.index = 11 240 | e11.prepare(ps, data._11) 241 | } 242 | } 243 | 244 | implicit def tupleExecutor12[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9], e10: DataExecutor[T10], e11: DataExecutor[T11], e12: DataExecutor[T12]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12)] { 245 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12)): Unit = { 246 | e1.index = 1 247 | e1.prepare(ps, data._1) 248 | e2.index = 2 249 | e2.prepare(ps, data._2) 250 | e3.index = 3 251 | e3.prepare(ps, data._3) 252 | e4.index = 4 253 | e4.prepare(ps, data._4) 254 | e5.index = 5 255 | e5.prepare(ps, data._5) 256 | e6.index = 6 257 | e6.prepare(ps, data._6) 258 | e7.index = 7 259 | e7.prepare(ps, data._7) 260 | e8.index = 8 261 | e8.prepare(ps, data._8) 262 | e9.index = 9 263 | e9.prepare(ps, data._9) 264 | e10.index = 10 265 | e10.prepare(ps, data._10) 266 | e11.index = 11 267 | e11.prepare(ps, data._11) 268 | e12.index = 12 269 | e12.prepare(ps, data._12) 270 | } 271 | } 272 | 273 | implicit def tupleExecutor13[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9], e10: DataExecutor[T10], e11: DataExecutor[T11], e12: DataExecutor[T12], e13: DataExecutor[T13]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13)] { 274 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13)): Unit = { 275 | e1.index = 1 276 | e1.prepare(ps, data._1) 277 | e2.index = 2 278 | e2.prepare(ps, data._2) 279 | e3.index = 3 280 | e3.prepare(ps, data._3) 281 | e4.index = 4 282 | e4.prepare(ps, data._4) 283 | e5.index = 5 284 | e5.prepare(ps, data._5) 285 | e6.index = 6 286 | e6.prepare(ps, data._6) 287 | e7.index = 7 288 | e7.prepare(ps, data._7) 289 | e8.index = 8 290 | e8.prepare(ps, data._8) 291 | e9.index = 9 292 | e9.prepare(ps, data._9) 293 | e10.index = 10 294 | e10.prepare(ps, data._10) 295 | e11.index = 11 296 | e11.prepare(ps, data._11) 297 | e12.index = 12 298 | e12.prepare(ps, data._12) 299 | e13.index = 13 300 | e13.prepare(ps, data._13) 301 | } 302 | } 303 | 304 | implicit def tupleExecutor14[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9], e10: DataExecutor[T10], e11: DataExecutor[T11], e12: DataExecutor[T12], e13: DataExecutor[T13], e14: DataExecutor[T14]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14)] { 305 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14)): Unit = { 306 | e1.index = 1 307 | e1.prepare(ps, data._1) 308 | e2.index = 2 309 | e2.prepare(ps, data._2) 310 | e3.index = 3 311 | e3.prepare(ps, data._3) 312 | e4.index = 4 313 | e4.prepare(ps, data._4) 314 | e5.index = 5 315 | e5.prepare(ps, data._5) 316 | e6.index = 6 317 | e6.prepare(ps, data._6) 318 | e7.index = 7 319 | e7.prepare(ps, data._7) 320 | e8.index = 8 321 | e8.prepare(ps, data._8) 322 | e9.index = 9 323 | e9.prepare(ps, data._9) 324 | e10.index = 10 325 | e10.prepare(ps, data._10) 326 | e11.index = 11 327 | e11.prepare(ps, data._11) 328 | e12.index = 12 329 | e12.prepare(ps, data._12) 330 | e13.index = 13 331 | e13.prepare(ps, data._13) 332 | e14.index = 14 333 | e14.prepare(ps, data._14) 334 | } 335 | } 336 | 337 | implicit def tupleExecutor15[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9], e10: DataExecutor[T10], e11: DataExecutor[T11], e12: DataExecutor[T12], e13: DataExecutor[T13], e14: DataExecutor[T14], e15: DataExecutor[T15]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15)] { 338 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15)): Unit = { 339 | e1.index = 1 340 | e1.prepare(ps, data._1) 341 | e2.index = 2 342 | e2.prepare(ps, data._2) 343 | e3.index = 3 344 | e3.prepare(ps, data._3) 345 | e4.index = 4 346 | e4.prepare(ps, data._4) 347 | e5.index = 5 348 | e5.prepare(ps, data._5) 349 | e6.index = 6 350 | e6.prepare(ps, data._6) 351 | e7.index = 7 352 | e7.prepare(ps, data._7) 353 | e8.index = 8 354 | e8.prepare(ps, data._8) 355 | e9.index = 9 356 | e9.prepare(ps, data._9) 357 | e10.index = 10 358 | e10.prepare(ps, data._10) 359 | e11.index = 11 360 | e11.prepare(ps, data._11) 361 | e12.index = 12 362 | e12.prepare(ps, data._12) 363 | e13.index = 13 364 | e13.prepare(ps, data._13) 365 | e14.index = 14 366 | e14.prepare(ps, data._14) 367 | e15.index = 15 368 | e15.prepare(ps, data._15) 369 | } 370 | } 371 | 372 | implicit def tupleExecutor16[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9], e10: DataExecutor[T10], e11: DataExecutor[T11], e12: DataExecutor[T12], e13: DataExecutor[T13], e14: DataExecutor[T14], e15: DataExecutor[T15], e16: DataExecutor[T16]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16)] { 373 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16)): Unit = { 374 | e1.index = 1 375 | e1.prepare(ps, data._1) 376 | e2.index = 2 377 | e2.prepare(ps, data._2) 378 | e3.index = 3 379 | e3.prepare(ps, data._3) 380 | e4.index = 4 381 | e4.prepare(ps, data._4) 382 | e5.index = 5 383 | e5.prepare(ps, data._5) 384 | e6.index = 6 385 | e6.prepare(ps, data._6) 386 | e7.index = 7 387 | e7.prepare(ps, data._7) 388 | e8.index = 8 389 | e8.prepare(ps, data._8) 390 | e9.index = 9 391 | e9.prepare(ps, data._9) 392 | e10.index = 10 393 | e10.prepare(ps, data._10) 394 | e11.index = 11 395 | e11.prepare(ps, data._11) 396 | e12.index = 12 397 | e12.prepare(ps, data._12) 398 | e13.index = 13 399 | e13.prepare(ps, data._13) 400 | e14.index = 14 401 | e14.prepare(ps, data._14) 402 | e15.index = 15 403 | e15.prepare(ps, data._15) 404 | e16.index = 16 405 | e16.prepare(ps, data._16) 406 | } 407 | } 408 | 409 | implicit def tupleExecutor17[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9], e10: DataExecutor[T10], e11: DataExecutor[T11], e12: DataExecutor[T12], e13: DataExecutor[T13], e14: DataExecutor[T14], e15: DataExecutor[T15], e16: DataExecutor[T16], e17: DataExecutor[T17]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17)] { 410 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17)): Unit = { 411 | e1.index = 1 412 | e1.prepare(ps, data._1) 413 | e2.index = 2 414 | e2.prepare(ps, data._2) 415 | e3.index = 3 416 | e3.prepare(ps, data._3) 417 | e4.index = 4 418 | e4.prepare(ps, data._4) 419 | e5.index = 5 420 | e5.prepare(ps, data._5) 421 | e6.index = 6 422 | e6.prepare(ps, data._6) 423 | e7.index = 7 424 | e7.prepare(ps, data._7) 425 | e8.index = 8 426 | e8.prepare(ps, data._8) 427 | e9.index = 9 428 | e9.prepare(ps, data._9) 429 | e10.index = 10 430 | e10.prepare(ps, data._10) 431 | e11.index = 11 432 | e11.prepare(ps, data._11) 433 | e12.index = 12 434 | e12.prepare(ps, data._12) 435 | e13.index = 13 436 | e13.prepare(ps, data._13) 437 | e14.index = 14 438 | e14.prepare(ps, data._14) 439 | e15.index = 15 440 | e15.prepare(ps, data._15) 441 | e16.index = 16 442 | e16.prepare(ps, data._16) 443 | e17.index = 17 444 | e17.prepare(ps, data._17) 445 | } 446 | } 447 | 448 | implicit def tupleExecutor18[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9], e10: DataExecutor[T10], e11: DataExecutor[T11], e12: DataExecutor[T12], e13: DataExecutor[T13], e14: DataExecutor[T14], e15: DataExecutor[T15], e16: DataExecutor[T16], e17: DataExecutor[T17], e18: DataExecutor[T18]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18)] { 449 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18)): Unit = { 450 | e1.index = 1 451 | e1.prepare(ps, data._1) 452 | e2.index = 2 453 | e2.prepare(ps, data._2) 454 | e3.index = 3 455 | e3.prepare(ps, data._3) 456 | e4.index = 4 457 | e4.prepare(ps, data._4) 458 | e5.index = 5 459 | e5.prepare(ps, data._5) 460 | e6.index = 6 461 | e6.prepare(ps, data._6) 462 | e7.index = 7 463 | e7.prepare(ps, data._7) 464 | e8.index = 8 465 | e8.prepare(ps, data._8) 466 | e9.index = 9 467 | e9.prepare(ps, data._9) 468 | e10.index = 10 469 | e10.prepare(ps, data._10) 470 | e11.index = 11 471 | e11.prepare(ps, data._11) 472 | e12.index = 12 473 | e12.prepare(ps, data._12) 474 | e13.index = 13 475 | e13.prepare(ps, data._13) 476 | e14.index = 14 477 | e14.prepare(ps, data._14) 478 | e15.index = 15 479 | e15.prepare(ps, data._15) 480 | e16.index = 16 481 | e16.prepare(ps, data._16) 482 | e17.index = 17 483 | e17.prepare(ps, data._17) 484 | e18.index = 18 485 | e18.prepare(ps, data._18) 486 | } 487 | } 488 | 489 | implicit def tupleExecutor19[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9], e10: DataExecutor[T10], e11: DataExecutor[T11], e12: DataExecutor[T12], e13: DataExecutor[T13], e14: DataExecutor[T14], e15: DataExecutor[T15], e16: DataExecutor[T16], e17: DataExecutor[T17], e18: DataExecutor[T18], e19: DataExecutor[T19]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19)] { 490 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19)): Unit = { 491 | e1.index = 1 492 | e1.prepare(ps, data._1) 493 | e2.index = 2 494 | e2.prepare(ps, data._2) 495 | e3.index = 3 496 | e3.prepare(ps, data._3) 497 | e4.index = 4 498 | e4.prepare(ps, data._4) 499 | e5.index = 5 500 | e5.prepare(ps, data._5) 501 | e6.index = 6 502 | e6.prepare(ps, data._6) 503 | e7.index = 7 504 | e7.prepare(ps, data._7) 505 | e8.index = 8 506 | e8.prepare(ps, data._8) 507 | e9.index = 9 508 | e9.prepare(ps, data._9) 509 | e10.index = 10 510 | e10.prepare(ps, data._10) 511 | e11.index = 11 512 | e11.prepare(ps, data._11) 513 | e12.index = 12 514 | e12.prepare(ps, data._12) 515 | e13.index = 13 516 | e13.prepare(ps, data._13) 517 | e14.index = 14 518 | e14.prepare(ps, data._14) 519 | e15.index = 15 520 | e15.prepare(ps, data._15) 521 | e16.index = 16 522 | e16.prepare(ps, data._16) 523 | e17.index = 17 524 | e17.prepare(ps, data._17) 525 | e18.index = 18 526 | e18.prepare(ps, data._18) 527 | e19.index = 19 528 | e19.prepare(ps, data._19) 529 | } 530 | } 531 | 532 | implicit def tupleExecutor20[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9], e10: DataExecutor[T10], e11: DataExecutor[T11], e12: DataExecutor[T12], e13: DataExecutor[T13], e14: DataExecutor[T14], e15: DataExecutor[T15], e16: DataExecutor[T16], e17: DataExecutor[T17], e18: DataExecutor[T18], e19: DataExecutor[T19], e20: DataExecutor[T20]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20)] { 533 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20)): Unit = { 534 | e1.index = 1 535 | e1.prepare(ps, data._1) 536 | e2.index = 2 537 | e2.prepare(ps, data._2) 538 | e3.index = 3 539 | e3.prepare(ps, data._3) 540 | e4.index = 4 541 | e4.prepare(ps, data._4) 542 | e5.index = 5 543 | e5.prepare(ps, data._5) 544 | e6.index = 6 545 | e6.prepare(ps, data._6) 546 | e7.index = 7 547 | e7.prepare(ps, data._7) 548 | e8.index = 8 549 | e8.prepare(ps, data._8) 550 | e9.index = 9 551 | e9.prepare(ps, data._9) 552 | e10.index = 10 553 | e10.prepare(ps, data._10) 554 | e11.index = 11 555 | e11.prepare(ps, data._11) 556 | e12.index = 12 557 | e12.prepare(ps, data._12) 558 | e13.index = 13 559 | e13.prepare(ps, data._13) 560 | e14.index = 14 561 | e14.prepare(ps, data._14) 562 | e15.index = 15 563 | e15.prepare(ps, data._15) 564 | e16.index = 16 565 | e16.prepare(ps, data._16) 566 | e17.index = 17 567 | e17.prepare(ps, data._17) 568 | e18.index = 18 569 | e18.prepare(ps, data._18) 570 | e19.index = 19 571 | e19.prepare(ps, data._19) 572 | e20.index = 20 573 | e20.prepare(ps, data._20) 574 | } 575 | } 576 | 577 | implicit def tupleExecutor21[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9], e10: DataExecutor[T10], e11: DataExecutor[T11], e12: DataExecutor[T12], e13: DataExecutor[T13], e14: DataExecutor[T14], e15: DataExecutor[T15], e16: DataExecutor[T16], e17: DataExecutor[T17], e18: DataExecutor[T18], e19: DataExecutor[T19], e20: DataExecutor[T20], e21: DataExecutor[T21]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21)] { 578 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21)): Unit = { 579 | e1.index = 1 580 | e1.prepare(ps, data._1) 581 | e2.index = 2 582 | e2.prepare(ps, data._2) 583 | e3.index = 3 584 | e3.prepare(ps, data._3) 585 | e4.index = 4 586 | e4.prepare(ps, data._4) 587 | e5.index = 5 588 | e5.prepare(ps, data._5) 589 | e6.index = 6 590 | e6.prepare(ps, data._6) 591 | e7.index = 7 592 | e7.prepare(ps, data._7) 593 | e8.index = 8 594 | e8.prepare(ps, data._8) 595 | e9.index = 9 596 | e9.prepare(ps, data._9) 597 | e10.index = 10 598 | e10.prepare(ps, data._10) 599 | e11.index = 11 600 | e11.prepare(ps, data._11) 601 | e12.index = 12 602 | e12.prepare(ps, data._12) 603 | e13.index = 13 604 | e13.prepare(ps, data._13) 605 | e14.index = 14 606 | e14.prepare(ps, data._14) 607 | e15.index = 15 608 | e15.prepare(ps, data._15) 609 | e16.index = 16 610 | e16.prepare(ps, data._16) 611 | e17.index = 17 612 | e17.prepare(ps, data._17) 613 | e18.index = 18 614 | e18.prepare(ps, data._18) 615 | e19.index = 19 616 | e19.prepare(ps, data._19) 617 | e20.index = 20 618 | e20.prepare(ps, data._20) 619 | e21.index = 21 620 | e21.prepare(ps, data._21) 621 | } 622 | } 623 | 624 | implicit def tupleExecutor22[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9], e10: DataExecutor[T10], e11: DataExecutor[T11], e12: DataExecutor[T12], e13: DataExecutor[T13], e14: DataExecutor[T14], e15: DataExecutor[T15], e16: DataExecutor[T16], e17: DataExecutor[T17], e18: DataExecutor[T18], e19: DataExecutor[T19], e20: DataExecutor[T20], e21: DataExecutor[T21], e22: DataExecutor[T22]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22)] { 625 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22)): Unit = { 626 | e1.index = 1 627 | e1.prepare(ps, data._1) 628 | e2.index = 2 629 | e2.prepare(ps, data._2) 630 | e3.index = 3 631 | e3.prepare(ps, data._3) 632 | e4.index = 4 633 | e4.prepare(ps, data._4) 634 | e5.index = 5 635 | e5.prepare(ps, data._5) 636 | e6.index = 6 637 | e6.prepare(ps, data._6) 638 | e7.index = 7 639 | e7.prepare(ps, data._7) 640 | e8.index = 8 641 | e8.prepare(ps, data._8) 642 | e9.index = 9 643 | e9.prepare(ps, data._9) 644 | e10.index = 10 645 | e10.prepare(ps, data._10) 646 | e11.index = 11 647 | e11.prepare(ps, data._11) 648 | e12.index = 12 649 | e12.prepare(ps, data._12) 650 | e13.index = 13 651 | e13.prepare(ps, data._13) 652 | e14.index = 14 653 | e14.prepare(ps, data._14) 654 | e15.index = 15 655 | e15.prepare(ps, data._15) 656 | e16.index = 16 657 | e16.prepare(ps, data._16) 658 | e17.index = 17 659 | e17.prepare(ps, data._17) 660 | e18.index = 18 661 | e18.prepare(ps, data._18) 662 | e19.index = 19 663 | e19.prepare(ps, data._19) 664 | e20.index = 20 665 | e20.prepare(ps, data._20) 666 | e21.index = 21 667 | e21.prepare(ps, data._21) 668 | e22.index = 22 669 | e22.prepare(ps, data._22) 670 | } 671 | } 672 | } 673 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/mysql/transformer/mapper/DataMapper.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector.mysql.transformer.mapper 2 | 3 | import java.sql.ResultSet 4 | 5 | /** 6 | * Author: xiaohei 7 | * Date: 2017/4/16 8 | * Email: xiaohei.info@gmail.com 9 | * Host: xiaohei.info 10 | */ 11 | trait DataMapper[T] extends Serializable { 12 | var index: Int = 1 13 | 14 | def map(resultSet: ResultSet): T 15 | } 16 | 17 | abstract class CustomDataMapper[S, T](implicit dataMapper: DataMapper[S]) extends DataMapper[T] { 18 | 19 | override def map(resultSet: ResultSet): T = convert(dataMapper.map(resultSet)) 20 | 21 | def convert(data: S): T 22 | } -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/connector/mysql/transformer/mapper/DataMapperConversions.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.connector.mysql.transformer.mapper 2 | 3 | import java.math.BigDecimal 4 | import java.sql.ResultSet 5 | 6 | /** 7 | * Author: xiaohei 8 | * Date: 2017/4/16 9 | * Email: xiaohei.info@gmail.com 10 | * Host: xiaohei.info 11 | */ 12 | trait DataMapperConversions extends Serializable { 13 | implicit def intMapper: DataMapper[Int] = new DataMapper[Int] { 14 | override def map(resultSet: ResultSet): Int = { 15 | resultSet.getInt(index) 16 | } 17 | } 18 | 19 | implicit def longMapper: DataMapper[Long] = new DataMapper[Long] { 20 | override def map(resultSet: ResultSet): Long = { 21 | resultSet.getLong(index) 22 | } 23 | } 24 | 25 | implicit def shortMapper: DataMapper[Short] = new DataMapper[Short] { 26 | override def map(resultSet: ResultSet): Short = { 27 | resultSet.getShort(index) 28 | } 29 | } 30 | 31 | implicit def doubleMapper: DataMapper[Double] = new DataMapper[Double] { 32 | override def map(resultSet: ResultSet): Double = { 33 | resultSet.getDouble(index) 34 | } 35 | } 36 | 37 | implicit def floatMapper: DataMapper[Float] = new DataMapper[Float] { 38 | override def map(resultSet: ResultSet): Float = { 39 | resultSet.getFloat(index) 40 | } 41 | } 42 | 43 | implicit def booleanMapper: DataMapper[Boolean] = new DataMapper[Boolean] { 44 | override def map(resultSet: ResultSet): Boolean = { 45 | resultSet.getBoolean(index) 46 | } 47 | } 48 | 49 | implicit def bigDecimalMapper: DataMapper[java.math.BigDecimal] = new DataMapper[java.math.BigDecimal] { 50 | override def map(resultSet: ResultSet): BigDecimal = { 51 | resultSet.getBigDecimal(index) 52 | } 53 | } 54 | 55 | implicit def stringMapper: DataMapper[String] = new DataMapper[String] { 56 | override def map(resultSet: ResultSet): String = { 57 | resultSet.getString(index) 58 | } 59 | } 60 | 61 | implicit def tupleMapper2[T1, T2](implicit m1: DataMapper[T1], m2: DataMapper[T2]): DataMapper[(T1, T2)] = new DataMapper[(T1, T2)] { 62 | override def map(resultSet: ResultSet): (T1, T2) = { 63 | m1.index = 1 64 | m2.index = 2 65 | (m1.map(resultSet), m2.map(resultSet)) 66 | } 67 | } 68 | 69 | implicit def tupleMapper3[T1, T2, T3](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3]): DataMapper[(T1, T2, T3)] = new DataMapper[(T1, T2, T3)] { 70 | override def map(resultSet: ResultSet): (T1, T2, T3) = { 71 | m1.index = 1 72 | m2.index = 2 73 | m3.index = 3 74 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet)) 75 | } 76 | } 77 | 78 | 79 | implicit def tupleMapper4[T1, T2, T3, T4](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4]): DataMapper[(T1, T2, T3, T4)] = new DataMapper[(T1, T2, T3, T4)] { 80 | override def map(resultSet: ResultSet): (T1, T2, T3, T4) = { 81 | m1.index = 1 82 | m2.index = 2 83 | m3.index = 3 84 | m4.index = 4 85 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet)) 86 | } 87 | } 88 | 89 | implicit def tupleMapper5[T1, T2, T3, T4, T5](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5]): DataMapper[(T1, T2, T3, T4, T5)] = new DataMapper[(T1, T2, T3, T4, T5)] { 90 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5) = { 91 | m1.index = 1 92 | m2.index = 2 93 | m3.index = 3 94 | m4.index = 4 95 | m5.index = 5 96 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet)) 97 | } 98 | } 99 | 100 | implicit def tupleMapper6[T1, T2, T3, T4, T5, T6](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6]): DataMapper[(T1, T2, T3, T4, T5, T6)] = new DataMapper[(T1, T2, T3, T4, T5, T6)] { 101 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6) = { 102 | m1.index = 1 103 | m2.index = 2 104 | m3.index = 3 105 | m4.index = 4 106 | m5.index = 5 107 | m6.index = 6 108 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet)) 109 | } 110 | } 111 | 112 | implicit def tupleMapper7[T1, T2, T3, T4, T5, T6, T7](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7]): DataMapper[(T1, T2, T3, T4, T5, T6, T7)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7)] { 113 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7) = { 114 | m1.index = 1 115 | m2.index = 2 116 | m3.index = 3 117 | m4.index = 4 118 | m5.index = 5 119 | m6.index = 6 120 | m7.index = 7 121 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet)) 122 | } 123 | } 124 | 125 | implicit def tupleMapper8[T1, T2, T3, T4, T5, T6, T7, T8](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8)] { 126 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8) = { 127 | m1.index = 1 128 | m2.index = 2 129 | m3.index = 3 130 | m4.index = 4 131 | m5.index = 5 132 | m6.index = 6 133 | m7.index = 7 134 | m8.index = 8 135 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet)) 136 | } 137 | } 138 | 139 | implicit def tupleMapper9[T1, T2, T3, T4, T5, T6, T7, T8, T9](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9)] { 140 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9) = { 141 | m1.index = 1 142 | m2.index = 2 143 | m3.index = 3 144 | m4.index = 4 145 | m5.index = 5 146 | m6.index = 6 147 | m7.index = 7 148 | m8.index = 8 149 | m9.index = 9 150 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet)) 151 | } 152 | } 153 | 154 | implicit def tupleMapper10[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9], m10: DataMapper[T10]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10)] { 155 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10) = { 156 | m1.index = 1 157 | m2.index = 2 158 | m3.index = 3 159 | m4.index = 4 160 | m5.index = 5 161 | m6.index = 6 162 | m7.index = 7 163 | m8.index = 8 164 | m9.index = 9 165 | m10.index = 10 166 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet), m10.map(resultSet)) 167 | } 168 | } 169 | 170 | implicit def tupleMapper11[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9], m10: DataMapper[T10], m11: DataMapper[T11]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11)] { 171 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11) = { 172 | m1.index = 1 173 | m2.index = 2 174 | m3.index = 3 175 | m4.index = 4 176 | m5.index = 5 177 | m6.index = 6 178 | m7.index = 7 179 | m8.index = 8 180 | m9.index = 9 181 | m10.index = 10 182 | m11.index = 11 183 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet), m10.map(resultSet), m11.map(resultSet)) 184 | } 185 | } 186 | 187 | implicit def tupleMapper12[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9], m10: DataMapper[T10], m11: DataMapper[T11], m12: DataMapper[T12]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12)] { 188 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12) = { 189 | m1.index = 1 190 | m2.index = 2 191 | m3.index = 3 192 | m4.index = 4 193 | m5.index = 5 194 | m6.index = 6 195 | m7.index = 7 196 | m8.index = 8 197 | m9.index = 9 198 | m10.index = 10 199 | m11.index = 11 200 | m12.index = 12 201 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet), m10.map(resultSet), m11.map(resultSet), m12.map(resultSet)) 202 | } 203 | } 204 | 205 | implicit def tupleMapper13[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9], m10: DataMapper[T10], m11: DataMapper[T11], m12: DataMapper[T12], m13: DataMapper[T13]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13)] { 206 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13) = { 207 | m1.index = 1 208 | m2.index = 2 209 | m3.index = 3 210 | m4.index = 4 211 | m5.index = 5 212 | m6.index = 6 213 | m7.index = 7 214 | m8.index = 8 215 | m9.index = 9 216 | m10.index = 10 217 | m11.index = 11 218 | m12.index = 12 219 | m13.index = 13 220 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet), m10.map(resultSet), m11.map(resultSet), m12.map(resultSet), m13.map(resultSet)) 221 | } 222 | } 223 | 224 | implicit def tupleMapper14[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9], m10: DataMapper[T10], m11: DataMapper[T11], m12: DataMapper[T12], m13: DataMapper[T13], m14: DataMapper[T14]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14)] { 225 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14) = { 226 | m1.index = 1 227 | m2.index = 2 228 | m3.index = 3 229 | m4.index = 4 230 | m5.index = 5 231 | m6.index = 6 232 | m7.index = 7 233 | m8.index = 8 234 | m9.index = 9 235 | m10.index = 10 236 | m11.index = 11 237 | m12.index = 12 238 | m13.index = 13 239 | m14.index = 14 240 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet), m10.map(resultSet), m11.map(resultSet), m12.map(resultSet), m13.map(resultSet), m14.map(resultSet)) 241 | } 242 | } 243 | 244 | implicit def tupleMapper15[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9], m10: DataMapper[T10], m11: DataMapper[T11], m12: DataMapper[T12], m13: DataMapper[T13], m14: DataMapper[T14], m15: DataMapper[T15]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15)] { 245 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15) = { 246 | m1.index = 1 247 | m2.index = 2 248 | m3.index = 3 249 | m4.index = 4 250 | m5.index = 5 251 | m6.index = 6 252 | m7.index = 7 253 | m8.index = 8 254 | m9.index = 9 255 | m10.index = 10 256 | m11.index = 11 257 | m12.index = 12 258 | m13.index = 13 259 | m14.index = 14 260 | m15.index = 15 261 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet), m10.map(resultSet), m11.map(resultSet), m12.map(resultSet), m13.map(resultSet), m14.map(resultSet), m15.map(resultSet)) 262 | } 263 | } 264 | 265 | implicit def tupleMapper16[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9], m10: DataMapper[T10], m11: DataMapper[T11], m12: DataMapper[T12], m13: DataMapper[T13], m14: DataMapper[T14], m15: DataMapper[T15], m16: DataMapper[T16]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16)] { 266 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16) = { 267 | m1.index = 1 268 | m2.index = 2 269 | m3.index = 3 270 | m4.index = 4 271 | m5.index = 5 272 | m6.index = 6 273 | m7.index = 7 274 | m8.index = 8 275 | m9.index = 9 276 | m10.index = 10 277 | m11.index = 11 278 | m12.index = 12 279 | m13.index = 13 280 | m14.index = 14 281 | m15.index = 15 282 | m16.index = 16 283 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet), m10.map(resultSet), m11.map(resultSet), m12.map(resultSet), m13.map(resultSet), m14.map(resultSet), m15.map(resultSet), m16.map(resultSet)) 284 | } 285 | } 286 | 287 | implicit def tupleMapper17[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9], m10: DataMapper[T10], m11: DataMapper[T11], m12: DataMapper[T12], m13: DataMapper[T13], m14: DataMapper[T14], m15: DataMapper[T15], m16: DataMapper[T16], m17: DataMapper[T17]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17)] { 288 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17) = { 289 | m1.index = 1 290 | m2.index = 2 291 | m3.index = 3 292 | m4.index = 4 293 | m5.index = 5 294 | m6.index = 6 295 | m7.index = 7 296 | m8.index = 8 297 | m9.index = 9 298 | m10.index = 10 299 | m11.index = 11 300 | m12.index = 12 301 | m13.index = 13 302 | m14.index = 14 303 | m15.index = 15 304 | m16.index = 16 305 | m17.index = 17 306 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet), m10.map(resultSet), m11.map(resultSet), m12.map(resultSet), m13.map(resultSet), m14.map(resultSet), m15.map(resultSet), m16.map(resultSet), m17.map(resultSet)) 307 | } 308 | } 309 | 310 | implicit def tupleMapper18[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9], m10: DataMapper[T10], m11: DataMapper[T11], m12: DataMapper[T12], m13: DataMapper[T13], m14: DataMapper[T14], m15: DataMapper[T15], m16: DataMapper[T16], m17: DataMapper[T17], m18: DataMapper[T18]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18)] { 311 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18) = { 312 | m1.index = 1 313 | m2.index = 2 314 | m3.index = 3 315 | m4.index = 4 316 | m5.index = 5 317 | m6.index = 6 318 | m7.index = 7 319 | m8.index = 8 320 | m9.index = 9 321 | m10.index = 10 322 | m11.index = 11 323 | m12.index = 12 324 | m13.index = 13 325 | m14.index = 14 326 | m15.index = 15 327 | m16.index = 16 328 | m17.index = 17 329 | m18.index = 18 330 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet), m10.map(resultSet), m11.map(resultSet), m12.map(resultSet), m13.map(resultSet), m14.map(resultSet), m15.map(resultSet), m16.map(resultSet), m17.map(resultSet), m18.map(resultSet)) 331 | } 332 | } 333 | 334 | implicit def tupleMapper19[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9], m10: DataMapper[T10], m11: DataMapper[T11], m12: DataMapper[T12], m13: DataMapper[T13], m14: DataMapper[T14], m15: DataMapper[T15], m16: DataMapper[T16], m17: DataMapper[T17], m18: DataMapper[T18], m19: DataMapper[T19]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19)] { 335 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19) = { 336 | m1.index = 1 337 | m2.index = 2 338 | m3.index = 3 339 | m4.index = 4 340 | m5.index = 5 341 | m6.index = 6 342 | m7.index = 7 343 | m8.index = 8 344 | m9.index = 9 345 | m10.index = 10 346 | m11.index = 11 347 | m12.index = 12 348 | m13.index = 13 349 | m14.index = 14 350 | m15.index = 15 351 | m16.index = 16 352 | m17.index = 17 353 | m18.index = 18 354 | m19.index = 19 355 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet), m10.map(resultSet), m11.map(resultSet), m12.map(resultSet), m13.map(resultSet), m14.map(resultSet), m15.map(resultSet), m16.map(resultSet), m17.map(resultSet), m18.map(resultSet), m19.map(resultSet)) 356 | } 357 | } 358 | 359 | implicit def tupleMapper20[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9], m10: DataMapper[T10], m11: DataMapper[T11], m12: DataMapper[T12], m13: DataMapper[T13], m14: DataMapper[T14], m15: DataMapper[T15], m16: DataMapper[T16], m17: DataMapper[T17], m18: DataMapper[T18], m19: DataMapper[T19], m20: DataMapper[T20]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20)] { 360 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20) = { 361 | m1.index = 1 362 | m2.index = 2 363 | m3.index = 3 364 | m4.index = 4 365 | m5.index = 5 366 | m6.index = 6 367 | m7.index = 7 368 | m8.index = 8 369 | m9.index = 9 370 | m10.index = 10 371 | m11.index = 11 372 | m12.index = 12 373 | m13.index = 13 374 | m14.index = 14 375 | m15.index = 15 376 | m16.index = 16 377 | m17.index = 17 378 | m18.index = 18 379 | m19.index = 19 380 | m20.index = 20 381 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet), m10.map(resultSet), m11.map(resultSet), m12.map(resultSet), m13.map(resultSet), m14.map(resultSet), m15.map(resultSet), m16.map(resultSet), m17.map(resultSet), m18.map(resultSet), m19.map(resultSet), m20.map(resultSet)) 382 | } 383 | } 384 | 385 | implicit def tupleMapper21[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9], m10: DataMapper[T10], m11: DataMapper[T11], m12: DataMapper[T12], m13: DataMapper[T13], m14: DataMapper[T14], m15: DataMapper[T15], m16: DataMapper[T16], m17: DataMapper[T17], m18: DataMapper[T18], m19: DataMapper[T19], m20: DataMapper[T20], m21: DataMapper[T21]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21)] { 386 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21) = { 387 | m1.index = 1 388 | m2.index = 2 389 | m3.index = 3 390 | m4.index = 4 391 | m5.index = 5 392 | m6.index = 6 393 | m7.index = 7 394 | m8.index = 8 395 | m9.index = 9 396 | m10.index = 10 397 | m11.index = 11 398 | m12.index = 12 399 | m13.index = 13 400 | m14.index = 14 401 | m15.index = 15 402 | m16.index = 16 403 | m17.index = 17 404 | m18.index = 18 405 | m19.index = 19 406 | m20.index = 20 407 | m21.index = 21 408 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet), m10.map(resultSet), m11.map(resultSet), m12.map(resultSet), m13.map(resultSet), m14.map(resultSet), m15.map(resultSet), m16.map(resultSet), m17.map(resultSet), m18.map(resultSet), m19.map(resultSet), m20.map(resultSet), m21.map(resultSet)) 409 | } 410 | } 411 | 412 | implicit def tupleMapper22[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9], m10: DataMapper[T10], m11: DataMapper[T11], m12: DataMapper[T12], m13: DataMapper[T13], m14: DataMapper[T14], m15: DataMapper[T15], m16: DataMapper[T16], m17: DataMapper[T17], m18: DataMapper[T18], m19: DataMapper[T19], m20: DataMapper[T20], m21: DataMapper[T21], m22: DataMapper[T22]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22)] { 413 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22) = { 414 | m1.index = 1 415 | m2.index = 2 416 | m3.index = 3 417 | m4.index = 4 418 | m5.index = 5 419 | m6.index = 6 420 | m7.index = 7 421 | m8.index = 8 422 | m9.index = 9 423 | m10.index = 10 424 | m11.index = 11 425 | m12.index = 12 426 | m13.index = 13 427 | m14.index = 14 428 | m15.index = 15 429 | m16.index = 16 430 | m17.index = 17 431 | m18.index = 18 432 | m19.index = 19 433 | m20.index = 20 434 | m21.index = 21 435 | m22.index = 22 436 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet), m10.map(resultSet), m11.map(resultSet), m12.map(resultSet), m13.map(resultSet), m14.map(resultSet), m15.map(resultSet), m16.map(resultSet), m17.map(resultSet), m18.map(resultSet), m19.map(resultSet), m20.map(resultSet), m21.map(resultSet), m22.map(resultSet)) 437 | } 438 | } 439 | } 440 | -------------------------------------------------------------------------------- /src/main/scala/info/xiaohei/spark/test/Test.scala: -------------------------------------------------------------------------------- 1 | package info.xiaohei.spark.test 2 | 3 | /** 4 | * Author: xiaohei 5 | * Date: 2017/5/10 6 | * Email: xiaohei.info@gmail.com 7 | * Host: xiaohei.info 8 | */ 9 | object Test { 10 | 11 | } 12 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/rdd/HBaseKerberosUtil.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.rdd 2 | 3 | import java.io.IOException 4 | import java.security.PrivilegedExceptionAction 5 | 6 | import org.apache.hadoop.security.UserGroupInformation 7 | import org.apache.spark.util.SerializableConfiguration 8 | 9 | /** 10 | * Author: xiaohei 11 | * Date: 2017/7/7 12 | * Email: xiaohei.info@gmail.com 13 | * Host: xiaohei.info 14 | */ 15 | object HBaseKerberosUtil { 16 | @throws[IOException] 17 | def ugiDoAs[A](conf: SerializableConfiguration, principle: String, keytab: String, func: () => A): A = { 18 | UserGroupInformation.setConfiguration(conf.value) 19 | val ugi: UserGroupInformation = UserGroupInformation 20 | .loginUserFromKeytabAndReturnUGI(principle, keytab) 21 | UserGroupInformation.setLoginUser(ugi) 22 | ugi.checkTGTAndReloginFromKeytab() 23 | ugi.doAs(new PrivilegedExceptionAction[A] { 24 | def run: A = { 25 | func() 26 | } 27 | }) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/rdd/HBaseScanRDD.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.rdd 2 | 3 | import java.io.EOFException 4 | import java.text.SimpleDateFormat 5 | import java.util.Date 6 | 7 | import org.apache.hadoop.conf.{Configurable, Configuration} 8 | import org.apache.hadoop.io.Writable 9 | import org.apache.hadoop.mapred.JobConf 10 | import org.apache.hadoop.mapreduce._ 11 | import org.apache.hadoop.mapreduce.lib.input.{CombineFileSplit, FileSplit} 12 | import org.apache.spark._ 13 | import org.apache.spark.annotation.DeveloperApi 14 | import org.apache.spark.deploy.SparkHadoopUtil 15 | import org.apache.spark.executor.{DataReadMethod, InputMetrics} 16 | import org.apache.spark.mapreduce.SparkHadoopMapReduceUtil 17 | import org.apache.spark.rdd.NewHadoopRDD.NewHadoopMapPartitionsWithSplitRDD 18 | import org.apache.spark.storage.StorageLevel 19 | import org.apache.spark.util.{SerializableConfiguration, ShutdownHookManager} 20 | 21 | import scala.reflect.ClassTag 22 | 23 | class HBaseScanRDD[K, V]( 24 | principle: String, 25 | keytab: String, 26 | sc: SparkContext, 27 | inputFormatClass: Class[_ <: InputFormat[K, V]], 28 | keyClass: Class[K], 29 | valueClass: Class[V], 30 | @transient private val _conf: Configuration) 31 | extends RDD[(K, V)](sc, Nil) 32 | with SparkHadoopMapReduceUtil 33 | with Logging { 34 | // A Hadoop Configuration can be about 10 KB, which is pretty big, so broadcast it 35 | private val confBroadcast = sc.broadcast(new SerializableConfiguration(_conf)) 36 | // private val serializableConf = new SerializableWritable(_conf) 37 | private val jobTrackerId: String = { 38 | val formatter = new SimpleDateFormat("yyyyMMddHHmm") 39 | formatter.format(new Date()) 40 | } 41 | @transient protected val jobId = new JobID(jobTrackerId, id) 42 | private val shouldCloneJobConf = sparkContext.conf.getBoolean("spark.hadoop.cloneConf", defaultValue = false) 43 | private val ignoreCorruptFiles = 44 | sparkContext.conf.getBoolean("spark.files.ignoreCorruptFiles", defaultValue = true) 45 | 46 | def getConf: Configuration = { 47 | val conf: Configuration = confBroadcast.value.value 48 | if (shouldCloneJobConf) { 49 | // Hadoop Configuration objects are not thread-safe, which may lead to various problems if 50 | // one job modifies a configuration while another reads it (SPARK-2546, SPARK-10611). This 51 | // problem occurs somewhat rarely because most jobs treat the configuration as though it's 52 | // immutable. One solution, implemented here, is to clone the Configuration object. 53 | // Unfortunately, this clone can be very expensive. To avoid unexpected performance 54 | // regressions for workloads and Hadoop versions that do not suffer from these thread-safety 55 | // issues, this cloning is disabled by default. 56 | NewHadoopRDD.CONFIGURATION_INSTANTIATION_LOCK.synchronized { 57 | logDebug("Cloning Hadoop Configuration") 58 | // The Configuration passed in is actually a JobConf and possibly contains credentials. 59 | // To keep those credentials properly we have to create a new JobConf not a Configuration. 60 | if (conf.isInstanceOf[JobConf]) { 61 | new JobConf(conf) 62 | } else { 63 | new Configuration(conf) 64 | } 65 | } 66 | } else { 67 | conf 68 | } 69 | } 70 | 71 | override def getPartitions: Array[Partition] = { 72 | val inputFormat = inputFormatClass.newInstance 73 | inputFormat match { 74 | case configurable: Configurable => 75 | configurable.setConf(_conf) 76 | case _ => 77 | } 78 | val jobContext = newJobContext(_conf, jobId) 79 | val rawSplits = HBaseKerberosUtil.ugiDoAs(confBroadcast.value, principle, keytab, () => { 80 | inputFormat.getSplits(jobContext).toArray 81 | }: Array[Object]) 82 | val result = new Array[Partition](rawSplits.length) 83 | for (i <- rawSplits.indices) { 84 | result(i) = new NewHadoopPartition(id, i, rawSplits(i).asInstanceOf[InputSplit with Writable]) 85 | } 86 | result 87 | } 88 | 89 | override def compute(theSplit: Partition, context: TaskContext): InterruptibleIterator[(K, V)] = { 90 | val iter = new Iterator[(K, V)] { 91 | val split: NewHadoopPartition = theSplit.asInstanceOf[NewHadoopPartition] 92 | logInfo("Input split: " + split.serializableHadoopSplit) 93 | val conf: Configuration = getConf 94 | val inputMetrics: InputMetrics = context.taskMetrics 95 | .getInputMetricsForReadMethod(DataReadMethod.Hadoop) 96 | // Sets the thread local variable for the file's name 97 | split.serializableHadoopSplit.value match { 98 | case fs: FileSplit => SqlNewHadoopRDDState.setInputFileName(fs.getPath.toString) 99 | case _ => SqlNewHadoopRDDState.unsetInputFileName() 100 | } 101 | // Find a function that will return the FileSystem bytes read by this thread. Do this before 102 | // creating RecordReader, because RecordReader's constructor might read some bytes 103 | val bytesReadCallback: Option[() => Long] = inputMetrics.bytesReadCallback.orElse { 104 | split.serializableHadoopSplit.value match { 105 | case _: FileSplit | _: CombineFileSplit => 106 | SparkHadoopUtil.get.getFSBytesReadOnThreadCallback() 107 | case _ => None 108 | } 109 | } 110 | inputMetrics.setBytesReadCallback(f = bytesReadCallback) 111 | val format: InputFormat[K, V] = inputFormatClass.newInstance 112 | format match { 113 | case configurable: Configurable => 114 | configurable.setConf(conf) 115 | case _ => 116 | } 117 | val attemptId: TaskAttemptID = newTaskAttemptID(jobTrackerId, id, isMap = true, split.index, 0) 118 | val hadoopAttemptContext: TaskAttemptContext = newTaskAttemptContext(conf, attemptId) 119 | private var reader = HBaseKerberosUtil.ugiDoAs(confBroadcast.value, principle, keytab, () => { 120 | val _reader = format.createRecordReader( 121 | split.serializableHadoopSplit.value, hadoopAttemptContext) 122 | _reader.initialize(split.serializableHadoopSplit.value, hadoopAttemptContext) 123 | _reader 124 | }: RecordReader[K, V]) 125 | // Register an on-task-completion callback to close the input stream. 126 | context.addTaskCompletionListener(context => close()) 127 | var havePair = false 128 | var finished = false 129 | var recordsSinceMetricsUpdate = 0 130 | 131 | override def hasNext: Boolean = { 132 | if (!finished && !havePair) { 133 | try { 134 | finished = !reader.nextKeyValue 135 | } catch { 136 | case _: EOFException if ignoreCorruptFiles => finished = true 137 | } 138 | if (finished) { 139 | // Close and release the reader here; close() will also be called when the task 140 | // completes, but for tasks that read from many files, it helps to release the 141 | // resources early. 142 | close() 143 | } 144 | havePair = !finished 145 | } 146 | !finished 147 | } 148 | 149 | override def next(): (K, V) = { 150 | if (!hasNext) { 151 | throw new java.util.NoSuchElementException("End of stream") 152 | } 153 | havePair = false 154 | if (!finished) { 155 | inputMetrics.incRecordsRead(1) 156 | } 157 | (reader.getCurrentKey, reader.getCurrentValue) 158 | } 159 | 160 | private def close() { 161 | if (reader != null) { 162 | SqlNewHadoopRDDState.unsetInputFileName() 163 | // Close the reader and release it. Note: it's very important that we don't close the 164 | // reader more than once, since that exposes us to MAPREDUCE-5918 when running against 165 | // Hadoop 1.x and older Hadoop 2.x releases. That bug can lead to non-deterministic 166 | // corruption issues when reading compressed input. 167 | try { 168 | reader.close() 169 | } catch { 170 | case e: Exception => 171 | if (!ShutdownHookManager.inShutdown()) { 172 | logWarning("Exception in RecordReader.close()", e) 173 | } 174 | } finally { 175 | reader = null 176 | } 177 | if (bytesReadCallback.isDefined) { 178 | inputMetrics.updateBytesRead() 179 | } else if (split.serializableHadoopSplit.value.isInstanceOf[FileSplit] || 180 | split.serializableHadoopSplit.value.isInstanceOf[CombineFileSplit]) { 181 | // If we can't get the bytes read from the FS stats, fall back to the split size, 182 | // which may be inaccurate. 183 | try { 184 | inputMetrics.incBytesRead(split.serializableHadoopSplit.value.getLength) 185 | } catch { 186 | case e: java.io.IOException => 187 | logWarning("Unable to get input size to set InputMetrics for task", e) 188 | } 189 | } 190 | } 191 | } 192 | } 193 | new InterruptibleIterator(context, iter) 194 | } 195 | 196 | /** Maps over a partition, providing the InputSplit that was used as the base of the partition. */ 197 | @DeveloperApi 198 | def mapPartitionsWithInputSplit[U: ClassTag]( 199 | f: (InputSplit, Iterator[(K, V)]) => Iterator[U], 200 | preservesPartitioning: Boolean = false): RDD[U] = { 201 | new NewHadoopMapPartitionsWithSplitRDD(this, f, preservesPartitioning) 202 | } 203 | 204 | override def getPreferredLocations(hsplit: Partition): Seq[String] = { 205 | val split = hsplit.asInstanceOf[NewHadoopPartition].serializableHadoopSplit.value 206 | val locs = HadoopRDD.SPLIT_INFO_REFLECTIONS match { 207 | case Some(c) => 208 | try { 209 | val infos = c.newGetLocationInfo.invoke(split).asInstanceOf[Array[AnyRef]] 210 | Some(HadoopRDD.convertSplitLocationInfo(infos)) 211 | } catch { 212 | case e: Exception => 213 | logDebug("Failed to use InputSplit#getLocationInfo.", e) 214 | None 215 | } 216 | case None => None 217 | } 218 | locs.getOrElse(split.getLocations.filter(_ != "localhost")) 219 | } 220 | 221 | override def persist(storageLevel: StorageLevel): this.type = { 222 | if (storageLevel.deserialized) { 223 | logWarning("Caching NewHadoopRDDs as deserialized objects usually leads to undesired" + 224 | " behavior because Hadoop's RecordReader reuses the same Writable object for all records." + 225 | " Use a map transformation to make copies of the records.") 226 | } 227 | super.persist(storageLevel) 228 | } 229 | } --------------------------------------------------------------------------------