├── .gitignore
├── README.md
├── pom.xml
└── src
└── main
└── scala
├── META-INF
└── MANIFEST.MF
├── info
└── xiaohei
│ └── spark
│ ├── connector
│ ├── HBaseEntry.scala
│ ├── RelationalDbEntry.scala
│ ├── hbase
│ │ ├── HBaseCommonUtils.scala
│ │ ├── HBaseConf.scala
│ │ ├── builder
│ │ │ ├── reader
│ │ │ │ ├── HBaseContext.scala
│ │ │ │ ├── HBaseReaderBuilder.scala
│ │ │ │ ├── HBaseSaltRDD.scala
│ │ │ │ └── HBaseSimpleRDD.scala
│ │ │ └── writer
│ │ │ │ ├── CollectionWriterBuilder.scala
│ │ │ │ └── HBaseWriterBuilder.scala
│ │ ├── package.scala
│ │ ├── salt
│ │ │ ├── SaltProducer.scala
│ │ │ ├── SaltProducerConversions.scala
│ │ │ └── SaltProducerFactory.scala
│ │ └── transformer
│ │ │ ├── DataTransformer.scala
│ │ │ ├── reader
│ │ │ ├── DataReader.scala
│ │ │ └── DataReaderConversions.scala
│ │ │ └── writer
│ │ │ ├── DataWriter.scala
│ │ │ └── DataWriterConversions.scala
│ └── mysql
│ │ ├── MysqlConf.scala
│ │ ├── builder
│ │ ├── reader
│ │ │ ├── MysqlContext.scala
│ │ │ └── MysqlReaderBuilder.scala
│ │ └── writer
│ │ │ └── MysqlWriterBuilder.scala
│ │ ├── package.scala
│ │ └── transformer
│ │ ├── executor
│ │ ├── DataExecutor.scala
│ │ └── DataExecutorConversions.scala
│ │ └── mapper
│ │ ├── DataMapper.scala
│ │ └── DataMapperConversions.scala
│ └── test
│ └── Test.scala
└── org
└── apache
└── spark
└── rdd
├── HBaseKerberosUtil.scala
└── HBaseScanRDD.scala
/.gitignore:
--------------------------------------------------------------------------------
1 | *.class
2 | # idea
3 | target/
4 | deploy/
5 | target/surefire-reports/
6 | .idea/
7 | *.iml
8 | .DS_Store
9 |
10 | # Mobile Tools for Java (J2ME)
11 | .mtj.tmp/
12 |
13 | # Package Files #
14 | *.jar
15 | *.war
16 | *.ear
17 |
18 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
19 | hs_err_pid*
20 |
21 | # current project
22 | .gitignore
23 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Spark Database Connector
2 |
3 | ## New Feature
4 |
5 | - List写入HBase支持Kerberos认证
6 | - 升级HBase Client API为1.2.0版本
7 |
8 | 隐藏处理各种数据库的连接细节,使用Scala API在Spark中简易地处理数据库连接的读写操作。
9 |
10 | 相关测试环境信息:
11 |
12 | - Scala 2.11.8/2.10.5
13 | - Spark 1.6.0
14 | - HBase 0.98.4
15 | - Jdbc Driver 5.1.35
16 |
17 | 目前支持的有:
18 |
19 | - HBase
20 | - MySQL
21 |
22 | 添加Maven引用:
23 |
24 | ```xml
25 |
26 | info.xiaohei.www
27 | spark-database-connector_2.11
28 | 1.0.0
29 |
30 | ```
31 |
32 | Scala 2.10版本使用:
33 |
34 | ```xml
35 |
36 | info.xiaohei.www
37 | spark-database-connector_2.10
38 | 1.0.0
39 |
40 | ```
41 |
42 | ## HBase
43 |
44 | ### 设置HBase host
45 |
46 | 通过以下三种任意方式设置HBase host地址
47 |
48 | **1、在spark-submit中设置命令:**
49 |
50 | ```shell
51 | spark-submit --conf spark.hbase.host=your-hbase-host
52 | ```
53 |
54 | **2、在Scala代码中设置:**
55 |
56 | ```scala
57 | val sparkConf = new SparkConf()
58 | sparkConf.set("spark.hbase.host", "your-hbase-host")
59 | val sc = new SparkContext(sparkConf)
60 | ```
61 |
62 | **3、在JVM参数中设置:**
63 |
64 | ```shell
65 | java -Dspark.hbase.host=your-hbase-host -jar ....
66 | ```
67 |
68 | **设置hbase-site.xml文件读取路径(可选)**
69 |
70 | 如果有读取hbase-site.xml文件的需求时,可以通过设置下面的选项进行指定:
71 |
72 | ```shell
73 | spark.hbase.config=your-hbase-config-path
74 | ```
75 |
76 | 设置该选项的方式同上
77 | 注意:需要将hbase-site.xml文件添加到当前项目可识别的resource路径中,否则将无法读取,使用默认配置
78 |
79 | ### 向HBase写入数据
80 |
81 | **导入隐式转换:**
82 |
83 | ```scala
84 | import info.xiaohei.spark.connector.hbase._
85 | ```
86 |
87 | #### Spark RDD写入HBase
88 |
89 | 任何Spark RDD对象都能直接操作写入HBase,例如:
90 |
91 | ```scala
92 | val rdd = sc.parallelize(1 to 100)
93 | .map(i => (s"rowkey-${i.toString}", s"column1-${i.toString}", "column2"))
94 | ```
95 |
96 | 这个RDD包含了100个三元组类型的数据,写入HBase时,第一个元素为rowkey,剩下的元素依次为各个列的值:
97 |
98 | ```scala
99 | rdd.toHBase("mytable")
100 | .insert("col1", "col2")
101 | .inColumnFamily("columnFamily")
102 | .save()
103 | ```
104 |
105 | (1)使用RDD的toHBase函数传入要写入的表名
106 | (2)insert函数传入要插入的各个列名
107 | (3)inColumnFamily函数传入这些列所在的列族名
108 | (4)最后save函数将该RDD保存在HBase中
109 |
110 | 如果col2和col1的列族不一样,可以在insert传入列名时单独指定:
111 |
112 | ```scala
113 | rdd.toHBase("mytable")
114 | .insert("col1", "otherColumnFamily:col2")
115 | .inColumnFamily("defaultColumnFamily")
116 | .save()
117 | ```
118 |
119 | 列族名和列名之间要用冒号(:)隔开,其他列需要指定列名时使用的方式一致
120 |
121 | #### Scala集合/序列写入HBase
122 |
123 | ```scala
124 | val dataList = Seq[(String, String)](
125 | ("00001475304346643896037", "kgJkm0euSbe"),
126 | ("00001475376619355219953", "kiaR40qzI8o"),
127 | ("00001475458728618943637", "kgCoW0hgzXO"),
128 | ("00001475838363931738019", "kqiHu0WNJC0")
129 |
130 | )
131 |
132 | //创建隐式变量
133 | implicit val hbaseConf = HBaseConf.createConf("hbase-host")
134 | //如果实在spark程序操作可以通过以下的方式
135 | implicit val hbaseConf = HBaseConf.createFromSpark(sc)
136 |
137 | dataList.toHBase("mytable")
138 | .insert("col1", "col2")
139 | .inColumnFamily("columnFamily")
140 | .save()
141 | ```
142 |
143 | 使用方式和RDD写入HBase的操作类似,**注意,隐式变量不能在spark的foreachPartition等算子中定义**
144 |
145 | 以上的方式将使用HTable的put list批量将集合中的数据一次全部put到HBase中,如果写入HBase时想使用缓存区的方式,需要另外添加几个参数:
146 |
147 | ```scala
148 | dataList.toHBase("mytable"
149 | //该参数指定写入时的autoFlush为false
150 | , Some(false, false)
151 | //该参数指定写入缓冲区的大小
152 | , Some(5 * 1024 * 1024))
153 | .insert("col1", "col2")
154 | .inColumnFamily("columnFamily")
155 | .save()
156 | ```
157 |
158 | 使用该方式时,集合中的每个数据都会被put一次,但是关闭了自动刷写,所以只有当缓冲区满了之后才会批量向HBase写入
159 |
160 | #### 写入时为Rowkey添加salt前缀
161 |
162 | ```scala
163 | rdd.toHBase("mytable")
164 | .insert("col1", "otherColumnFamily:col2")
165 | .inColumnFamily("defaultColumnFamily")
166 | //添加salt
167 | .withSalt(saltArray)
168 | .save()
169 | ```
170 |
171 | saltArray是一个字符串数组,简单的例如0-9的字符串表示,由使用者自己定义
172 |
173 | 使用withSalt函数之后,在写入HBase时会为rowkey添加一个saltArray中的随机串,**注意:为了更好的支持HBase部分键扫描(rowkey左对齐),数组中的所有元素长度都应该相等**
174 |
175 | 取随机串的方式有两种:
176 | * 1.计算当前的rowkey的hashCode的16进制表示并对saltArray的长度取余数,得到saltArray中的一个随机串作为salt前缀添加到rowkey
177 | * 2.使用随机数生成器获得不超过saltArray长度的数字作为下标取数组中的值
178 |
179 | 当前使用的是第一种方式
180 |
181 | ### 读取HBase数据
182 |
183 | **导入隐式转换:**
184 |
185 | ```scala
186 | import info.xiaohei.spark.connector.hbase._
187 | ```
188 |
189 | 读取HBase的数据操作需要通过sc来进行:
190 |
191 | ```scala
192 | val hbaseRdd = sc.fromHBase[(String, String, String)]("mytable")
193 | .select("col1", "col2")
194 | .inColumnFamily("columnFamily")
195 | .withStartRow("startRow")
196 | .withEndRow("endRow")
197 | //当rowkey中有随机的salt前缀时,将salt数组传入即可自动解析
198 | //得到的rowkey将会是原始的,不带salt前缀的
199 | .withSalt(saltArray)
200 | ```
201 |
202 | (1)使用sc的fromHBase函数传入要读取数据的表名,该函数需要指定读取数据的类型信息
203 | (2)select函数传入要读取的各个列名
204 | (3)inColumnFamily函数传入这些列所在的列族名
205 | (4)withStartRow和withEndRow将设置rowkey的扫描范围,可选操作
206 | (5)之后就可以在hbaseRdd上执行Spark RDD的各种算子操作
207 |
208 | 上面的例子中,fromHBase的泛型类型为三元组,但是select中只读取了两列值,因此,该三元组中第一个元素将是rowkey的值,其他元素按照列的顺序依次类推
209 |
210 | 当你不需要读取rowkey的值时,只需要将fromHBase的泛型类型改为二元组
211 |
212 | 即读取的列数为n,泛型类型为n元组时,列名和元组中的各个元素相对应
213 | 读取的列数为n,泛型类型为n+1元组时,元组的第一个元素为rowkey
214 |
215 | 当各个列位于不同列族时,设置列族的方式同写入HBase一致
216 |
217 | ### SQL On HBase
218 |
219 | 借助SQLContext的DataFrame接口,在组件中可以轻易实现SQL On HBase的功能。
220 |
221 | 上例中的hbaseRdd是从HBase中读取出来的数据,在此RDD的基础上进行转换操作:
222 |
223 | ```scala
224 | //创建org.apache.spark.sql.Row类型的RDD
225 | val rowRdd = hbaseRdd.map(r => Row(r._1, r._2, r._3))
226 | val sqlContext = new SQLContext(sc)
227 | val df = sqlContext.createDataFrame(
228 | rowRdd,
229 | StructType(Array(StructField("col1", StringType), StructField("col2", StringType), StructField("col3", StringType)))
230 | )
231 | df.show()
232 |
233 | df.registerTempTable("mytable")
234 | sqlContext.sql("select col1 from mytable").show()
235 | ```
236 |
237 | ### 使用case class查询/读取HBase的数据
238 |
239 | 使用内置的隐式转换可以处理基本数据类型和元组数据,当有使用case class的需求时,需要额外做一些准备工作
240 |
241 | 定义如下的case class:
242 |
243 | ```scala
244 | case class MyClass(name: String, age: Int)
245 | ```
246 |
247 | 如果想达到以下的效果:
248 |
249 | ```scala
250 | val classRdd = sc.fromHBase[MyClass]("tableName")
251 | .select("name","age")
252 | .inColumnFamily("info")
253 |
254 | classRdd.map{
255 | c =>
256 | (c.name,c.age)
257 | }
258 | ```
259 |
260 | 或者以下的效果:
261 |
262 | ```scala
263 | //classRdd的类型为RDD[MyClass]
264 | classRdd.toHBase("tableName")
265 | .insert("name","age")
266 | .inColumnFamily("info")
267 | .save()
268 | ```
269 |
270 | 需要另外实现能够解析自定义case class的隐式方法:
271 |
272 | ```scala
273 | implicit def myReaderConversion: DataReader[MyClass] = new CustomDataReader[(String, Int), MyClass] {
274 | override def convert(data: (String, Int)): MyClass = MyClass(data._1, data._2)
275 | }
276 |
277 | implicit def myWriterConversion: DataWriter[MyClass] = new CustomDataWriter[MyClass, (String, Int)] {
278 | override def convert(data: MyClass): (String, Int) = (data.name, data.age)
279 | }
280 | ```
281 |
282 | 该隐式方法返回一个DataReader/DataWriter 重写CustomDataReader/CustomDataWriter中的convert方法
283 | 将case class转换为一个元组或者将元组转化为case class即可
284 |
285 | ## 带有Kerberos认证的HBase
286 |
287 | 除了上述过程中写HBase需要的配置外,还需要指定以下三个配置:
288 |
289 | - spark.hbase.krb.principal:认证的principal用户名
290 | - spark.hbase.krb.keytab:keytab文件路径(各个节点都存在且路径保持一致)
291 | - spark.hbase.config:hbase-site.xml文件路径
292 |
293 | 写入HBase时将会使用提供给的krb信息进行认证
294 |
295 | 当前仅支持无缝读取启用了Kerberos认证的HBase
296 | 写入时有一定限制,如要使用RDD的foreachPartition入库:
297 |
298 | ```scala
299 | rdd.foreachPartition{
300 | data =>
301 | data.toList.toHBase("table").insert("columns")//...
302 | }
303 | ```
304 |
305 | **注意,foreachPartition中的toList操作将会把分区中的所有数据加载到内存中,如果数据量过大可能会造成OOM,增加Executor的内存即可**
306 |
307 | TODO:RDD的读写接口目前还未实现Kerberos认证
308 |
309 | ## MySQL
310 |
311 | 除了可以将RDD/集合写入HBase之外,还可以在普通的程序中进行MySQL的相关操作
312 |
313 | ### 在conf中设置相关信息
314 |
315 | **1、Spark程序中操作**
316 |
317 | 在SparkConf中设置以下的信息:
318 |
319 | ```scala
320 | sparkConf
321 | .set("spark.mysql.host", "your-host")
322 | .set("spark.mysql.username", "your-username")
323 | .set("spark.mysql.password", "your-passwd")
324 | .set("spark.mysql.port", "db-port")
325 | .set("spark.mysql.db", "database-name")
326 |
327 | //创建MySqlConf的隐式变量
328 | implicit val mysqlConf = MysqlConf.createFromSpark(sc)
329 | ```
330 |
331 | 关于这个隐式变量的说明:在RDD的foreachPartition或者mapPartitions等操作时,因为涉及到序列化的问题,默认的对MySqlConf的隐式转化操作会出现异常问题,所以需要显示的声明一下这个变量,其他不涉及网络序列化传输的操作可以省略这步
332 |
333 | HBase小节中的设置属性的方法在这里也适用
334 |
335 | **2、普通程序中操作**
336 |
337 | 创建MysqlConf,并设置相关属性:
338 |
339 | ```scala
340 | //创建MySqlConf的隐式变量
341 | implicit val mysqlConf = MysqlConf.createConf(
342 | "your-host",
343 | "username",
344 | "password",
345 | "port",
346 | "db-name"
347 | )
348 |
349 | ```
350 |
351 | 在普通程序中操作时一定要显示声明MysqlConf这个隐式变量
352 |
353 | ### 写入MySQL
354 |
355 | 导入隐式转换:
356 |
357 | ```scala
358 | import info.xiaohei.spark.connector.mysql._
359 | ```
360 |
361 | 之后任何Iterable类型的数据都可以直接写入MySQL中:
362 |
363 | ```scala
364 | list.toMysql("table-name")
365 | //插入的列名
366 | .insert("columns")
367 | //where条件,如age=1
368 | .where("where-conditions")
369 | .save()
370 | ```
371 |
372 |
373 | ### 在Spark程序中从MySQL读取数据
374 |
375 | ```scala
376 | val res = sc.fromMysql[(Int,String,Int)]("table-name")
377 | .select("id","name","age")
378 | .where("where-conditions")
379 | .get
380 | ```
381 |
382 | ### 在普通程序中从MySQL读取数据
383 |
384 | ```scala
385 | //普通程序读取关系型数据库入口
386 | val dbEntry = new RelationalDbEntry
387 |
388 | val res = dbEntry.fromMysql[(Int,String,Int)]("table-name")
389 | .select("id","name","age")
390 | .where("where-conditions")
391 | .get
392 | ```
393 |
394 | 创建数据库入口之后的操作和spark中的流程一致
395 |
396 | ### case class解析
397 |
398 | 如果需要使用自定义的case class解析/写入MySQL,例如:
399 |
400 | ```scala
401 | case class Model(id: Int, name: String, age: Int)
402 | ```
403 |
404 | 基本流程和hbase小节中差不多,定义隐式转换:
405 |
406 | ```scala
407 | implicit def myExecutorConversion: DataExecutor[Model] = new CustomDataExecutor[Model, (Int, String, Int)]() {
408 | override def convert(data: Model): (Int, String, Int) = (data.id, data.name, data.age)
409 | }
410 |
411 | implicit def myMapperConversion: DataMapper[Model] = new CustomDataMapper[(Int, String, Int), Model]() {
412 | override def convert(data: (Int, String, Int)): Model = Model(data._1, data._2, data._3)
413 | }
414 | ```
415 |
416 | 之后可以直接使用:
417 |
418 | ```scala
419 | val entry = new RelationalDbEntry
420 | val res = entry.fromMysql[Model]("test")
421 | .select("id", "name", "age")
422 | .get
423 | res.foreach(x => println(s"id:${x.id},name:${x.name},age:${x.age}"))
424 | ```
425 |
426 |
427 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | info.xiaohei.www
8 |
9 | spark-database-connector_2.10
10 | 1.0.0
11 |
12 |
13 | UTF-8
14 | 2.10.5
15 | 2.10
16 |
17 |
18 | 1.6.0
19 |
20 | 1.2.0
21 | 5.1.35
22 |
23 |
24 |
25 |
26 | alimaven
27 | aliyun maven
28 | http://maven.aliyun.com/nexus/content/groups/public/
29 |
30 |
31 | jcenter
32 | jcenter Repository
33 | http://jcenter.bintray.com/
34 |
35 | false
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 | org.apache.spark
44 | spark-core_${scala.binary.version}
45 | ${spark.version}
46 | provided
47 |
48 |
49 | org.apache.spark
50 | spark-sql_${scala.binary.version}
51 | ${spark.version}
52 |
53 |
54 | org.apache.spark
55 | spark-streaming_${scala.binary.version}
56 | ${spark.version}
57 |
58 |
59 | org.apache.spark
60 | spark-mllib_${scala.binary.version}
61 | ${spark.version}
62 |
63 |
64 |
65 |
66 | org.apache.spark
67 | spark-test-tags_${scala.binary.version}
68 | ${spark.version}
69 | provided
70 |
71 |
72 |
73 | org.apache.hbase
74 | hbase-common
75 | ${hbase.version}
76 | provided
77 |
78 |
79 |
80 | org.apache.hbase
81 | hbase-client
82 | ${hbase.version}
83 | provided
84 |
85 |
86 | org.apache.hbase
87 | hbase-server
88 | ${hbase.version}
89 | provided
90 |
91 |
92 | org.apache.hbase
93 | hbase-protocol
94 | ${hbase.version}
95 | provided
96 |
97 |
98 |
99 |
100 | junit
101 | junit
102 | 4.11
103 |
104 |
105 | mysql
106 | mysql-connector-java
107 | ${mysql.version}
108 |
109 |
110 |
111 |
112 |
113 |
114 | org.apache.maven.plugins
115 | maven-gpg-plugin
116 | 1.5
117 |
118 |
119 | sign-artifacts
120 | verify
121 |
122 | sign
123 |
124 |
125 |
126 |
127 |
128 |
129 | jar
130 |
131 | org.sonatype.oss
132 | oss-parent
133 | 7
134 |
135 |
136 |
137 |
138 | The Apache Software License, Version 2.0
139 | http://www.apache.org/licenses/LICENSE-2.0.txt
140 | repo
141 |
142 |
143 |
144 | https://github.com/chubbyjiang/Spark_DB_Connector
145 | git@github.com:chubbyjiang/Spark_DB_Connector.git
146 | https://www.xiaohei.info
147 |
148 |
149 |
150 | xiaohei
151 | xiaohei.info@gmail.com
152 | https://www.xiaohei.info
153 |
154 |
155 |
--------------------------------------------------------------------------------
/src/main/scala/META-INF/MANIFEST.MF:
--------------------------------------------------------------------------------
1 | Manifest-Version: 1.0
2 | Main-Class: info.xiaohei.spark.connector.hbase.Test
3 |
4 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/HBaseEntry.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector
2 |
3 | import info.xiaohei.spark.connector.hbase.salt.SaltProducerFactory
4 | import info.xiaohei.spark.connector.hbase.transformer.writer.SingleColumnDataWriter
5 | import org.apache.hadoop.hbase.{CellUtil, HBaseConfiguration, TableName}
6 | import org.apache.hadoop.hbase.client.{ConnectionFactory, Get}
7 | import org.apache.hadoop.hbase.util.Bytes
8 |
9 | /**
10 | * Author: xiaohei
11 | * Date: 2017/6/10
12 | * Email: xiaohei.info@gmail.com
13 | * Host: xiaohei.info
14 | */
15 | class HBaseEntry extends Serializable {
16 | def singleQuery(tableName: String, rowkey: String, salts: Iterable[String],
17 | columnFamily: String, columns: Iterable[String]) = {
18 | val finalRowkey = if (salts.isEmpty) {
19 | rowkey
20 | } else {
21 | val saltProducer = new SaltProducerFactory[String]().getHashProducer(salts)
22 | val writer = new SingleColumnDataWriter[String] {
23 | override def writeSingleColumn(data: String): Option[Array[Byte]] = Some(Bytes.toBytes(data))
24 | }
25 | val rawRowkey = writer.writeSingleColumn(rowkey).get
26 | saltProducer.salting(rawRowkey) + Bytes.toString(rawRowkey)
27 | }
28 |
29 | val conf = HBaseConfiguration.create()
30 | val connection = ConnectionFactory.createConnection(conf)
31 | val table = connection.getTable(TableName.valueOf(tableName))
32 | val get = new Get(Bytes.toBytes(finalRowkey))
33 | for (col <- columns) {
34 | get.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(col))
35 | }
36 | val result = table.get(get)
37 |
38 | val res = new collection.mutable.ListBuffer[String]()
39 | val cells = result.listCells().iterator()
40 | while (cells.hasNext) {
41 | res.append(Bytes.toString(CellUtil.cloneValue(cells.next())))
42 | }
43 | res
44 | }
45 |
46 |
47 | }
48 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/RelationalDbEntry.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector
2 |
3 | /**
4 | * Author: xiaohei
5 | * Date: 2017/4/10
6 | * Email: xiaohei.info@gmail.com
7 | * Host: xiaohei.info
8 | */
9 | class RelationalDbEntry extends Serializable{
10 |
11 | }
12 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/hbase/HBaseCommonUtils.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector.hbase
2 |
3 | /**
4 | * Author: xiaohei
5 | * Date: 2017/3/21
6 | * Email: xiaohei.info@gmail.com
7 | * Host: www.xiaohei.info
8 | */
9 | private[hbase] object HBaseCommonUtils {
10 |
11 | def columnsWithFamily(defaultColumnFamily: Option[String], columns: Iterable[String]): Iterable[(String, String)] = {
12 | columns.map {
13 | c =>
14 | if (c.contains(":")) {
15 | (c.substring(0, c.indexOf(":")), c.substring(c.indexOf(":") + 1))
16 | }
17 | else if (defaultColumnFamily.isEmpty) {
18 | throw new IllegalArgumentException("Default column family is mandatory when column names are not fully qualified")
19 | }
20 | else {
21 | (defaultColumnFamily.get, c)
22 | }
23 | }
24 | }
25 |
26 | def getFullColumnNames(defaultColumnFamily: Option[String], columns: Iterable[String]): Iterable[String] = {
27 | columnsWithFamily(defaultColumnFamily, columns).map {
28 | case (f, c) => s"$f:$c"
29 | }
30 |
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/hbase/HBaseConf.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector.hbase
2 |
3 | import org.apache.hadoop.fs.Path
4 | import org.apache.hadoop.hbase.{HBaseConfiguration, HConstants}
5 | import org.apache.spark.SparkConf
6 |
7 | /**
8 | * Author: xiaohei
9 | * Date: 2017/3/21
10 | * Email: xiaohei.info@gmail.com
11 | * Host: www.xiaohei.info
12 | */
13 |
14 | case class HBaseConf private[hbase](hbaseHost: Option[String]
15 | , hbaseConfig: Option[String] = None
16 | , principal: Option[String] = None
17 | , keytab: Option[String] = None) {
18 | def createHadoopBaseConf() = {
19 | val conf = HBaseConfiguration.create()
20 |
21 | hbaseConfig.foreach {
22 | hbaseConfigValue =>
23 | for (localConf <- hbaseConfigValue.split(",")) {
24 | //todo:路径不存在的处理
25 | conf.addResource(new Path(localConf))
26 | }
27 | }
28 |
29 | //todo:测试两种读法
30 | // val localConfigFile = Option(getClass.getClassLoader.getResource(hbaseConfig))
31 | // localConfigFile.foreach(c => conf.addResource(c))
32 |
33 | hbaseHost.foreach {
34 | host =>
35 | conf.set(HConstants.ZOOKEEPER_QUORUM, host)
36 | }
37 | if (conf.get(HConstants.ZOOKEEPER_QUORUM).isEmpty) {
38 | conf.set(HConstants.ZOOKEEPER_QUORUM, HBaseConf.defaultHBaseHost)
39 | }
40 |
41 | principal.foreach {
42 | krb =>
43 | conf.set("spark.hbase.krb.principal", krb)
44 | }
45 | keytab.foreach {
46 | key =>
47 | conf.set("spark.hbase.krb.keytab", key)
48 | }
49 |
50 | conf
51 | }
52 | }
53 |
54 | object HBaseConf {
55 |
56 | val defaultHBaseHost = "localhost"
57 |
58 | def createFromSpark(conf: SparkConf) = {
59 | val hbaseHost = conf.get("spark.hbase.host", null)
60 | val hbaseConfig = conf.get("spark.hbase.config", null)
61 |
62 | val principal = conf.get("spark.hbase.krb.principal", null)
63 | val keytab = conf.get("spark.hbase.krb.keytab", null)
64 | HBaseConf(Option(hbaseHost), Option(hbaseConfig), Option(principal), Option(keytab))
65 | }
66 |
67 | def createConf(hbaseHost: String) = {
68 | HBaseConf(Option(hbaseHost))
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/hbase/builder/reader/HBaseContext.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector.hbase.builder.reader
2 |
3 | import org.apache.spark.SparkContext
4 |
5 | import scala.reflect.ClassTag
6 |
7 | /**
8 | * Author: xiaohei
9 | * Date: 2017/3/21
10 | * Email: xiaohei.info@gmail.com
11 | * Host: www.xiaohei.info
12 | */
13 | private[hbase] class HBaseContext(@transient sc: SparkContext) extends Serializable {
14 | def fromHBase[R: ClassTag](tableName: String): HBaseReaderBuilder[R] = new HBaseReaderBuilder[R](sc, tableName = tableName)
15 | }
16 |
17 | trait HBaseContextConversions extends Serializable {
18 | implicit def toHBaseContext(sc: SparkContext): HBaseContext = new HBaseContext(sc)
19 | }
20 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/hbase/builder/reader/HBaseReaderBuilder.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector.hbase.builder.reader
2 |
3 | import info.xiaohei.spark.connector.hbase.salt.SaltProducerFactory
4 | import info.xiaohei.spark.connector.hbase.transformer.reader.DataReader
5 | import info.xiaohei.spark.connector.hbase.{HBaseCommonUtils, HBaseConf}
6 | import org.apache.hadoop.hbase.client.Result
7 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable
8 | import org.apache.hadoop.hbase.mapreduce.TableInputFormat
9 | import org.apache.hadoop.security.UserGroupInformation
10 | import org.apache.spark.SparkContext
11 | import org.apache.spark.rdd.{HBaseScanRDD, NewHadoopRDD, RDD}
12 |
13 | import scala.reflect.ClassTag
14 |
15 | /**
16 | * Author: xiaohei
17 | * Date: 2017/3/21
18 | * Email: xiaohei.info@gmail.com
19 | * Host: www.xiaohei.info
20 | */
21 | case class HBaseReaderBuilder[R: ClassTag] private[hbase](
22 | @transient sc: SparkContext,
23 | private[hbase] val tableName: String,
24 | private[hbase] val defaultColumnFamily: Option[String] = None,
25 | private[hbase] val columns: Iterable[String] = Seq.empty,
26 | private[hbase] val startRow: Option[String] = None,
27 | private[hbase] val stopRow: Option[String] = None,
28 | private[hbase] val salts: Iterable[String] = Seq.empty
29 | ) {
30 | def select(columns: String*): HBaseReaderBuilder[R] = {
31 | require(this.columns.isEmpty, "Columns have already been set")
32 | require(columns.nonEmpty, "You should provide at least one column")
33 | this.copy(columns = columns)
34 | }
35 |
36 | def select(columns: Iterable[String]): HBaseReaderBuilder[R] = {
37 | require(this.columns.isEmpty, "Columns have already been set")
38 | require(columns.nonEmpty, "You should provide at least one column")
39 | this.copy(columns = columns)
40 | }
41 |
42 | def inColumnFamily(columnFamily: String): HBaseReaderBuilder[R] = {
43 | require(this.defaultColumnFamily.isEmpty, "Default column family has already been set")
44 | require(columnFamily.nonEmpty, "Invalid column family provided")
45 | this.copy(defaultColumnFamily = Some(columnFamily))
46 | }
47 |
48 | def withStartRow(startRow: String): HBaseReaderBuilder[R] = {
49 | require(startRow.nonEmpty, s"Invalid start row '$startRow'")
50 | require(this.startRow.isEmpty, "Start row has already been set")
51 | this.copy(startRow = Some(startRow))
52 | }
53 |
54 | def withEndRow(endRow: String): HBaseReaderBuilder[R] = {
55 | require(endRow.nonEmpty, s"Invalid stop row '$endRow'")
56 | require(this.stopRow.isEmpty, "Stop row has already been set")
57 | this.copy(stopRow = Some(endRow))
58 | }
59 |
60 | def withSalt(salts: Iterable[String]) = {
61 | require(salts.size > 1, "Invalid salting. Two or more elements are required")
62 | require(this.salts.isEmpty, "Salting has already been set")
63 |
64 | this.copy(salts = salts)
65 | }
66 |
67 | private[hbase] def withRanges(startRow: Option[String], stopRow: Option[String]) = {
68 | copy(startRow = startRow, stopRow = stopRow)
69 | }
70 | }
71 |
72 | trait HBaseReaderBuilderConversions extends Serializable {
73 | implicit def toHBaseRDD[R: ClassTag](builder: HBaseReaderBuilder[R])
74 | (implicit reader: DataReader[R], saltProducerFactory: SaltProducerFactory[String]): RDD[R] = {
75 | if (builder.salts.isEmpty) {
76 | toSimpleHBaseRdd(builder)
77 | } else {
78 | val saltLength = saltProducerFactory.getHashProducer(builder.salts).singleSaltength
79 | val sortedSalts = builder.salts.toList.sorted.map(Some(_))
80 | val ranges = sortedSalts.zip(sortedSalts.drop(1) :+ None)
81 | val rddSeq = ranges.map {
82 | salt =>
83 | builder.withRanges(
84 | if (builder.startRow.nonEmpty) Some(salt._1.get + builder.startRow.get) else salt._1,
85 | if (builder.stopRow.nonEmpty) Some(salt._1.get + builder.stopRow.get) else salt._2
86 | )
87 | }.map {
88 | builder =>
89 | toSimpleHBaseRdd(builder, saltLength).asInstanceOf[RDD[R]]
90 | }
91 | val sc = rddSeq.head.sparkContext
92 | new HBaseSaltRDD[R](sc, rddSeq)
93 | }
94 | }
95 |
96 | private def toSimpleHBaseRdd[R: ClassTag](builder: HBaseReaderBuilder[R], saltsLength: Int = 0)
97 | (implicit reader: DataReader[R]): HBaseSimpleRDD[R] = {
98 | val hbaseConfig = HBaseConf.createFromSpark(builder.sc.getConf).createHadoopBaseConf()
99 | hbaseConfig.set(TableInputFormat.INPUT_TABLE, builder.tableName)
100 | require(builder.columns.nonEmpty, "No columns have been defined for the operation")
101 | val columnNames = builder.columns
102 | val fullColumnNames = HBaseCommonUtils.getFullColumnNames(builder.defaultColumnFamily, columnNames)
103 | if (fullColumnNames.nonEmpty) {
104 | hbaseConfig.set(TableInputFormat.SCAN_COLUMNS, fullColumnNames.mkString(" "))
105 | }
106 | if (builder.startRow.nonEmpty) {
107 | hbaseConfig.set(TableInputFormat.SCAN_ROW_START, builder.startRow.get)
108 | }
109 | if (builder.stopRow.nonEmpty) {
110 | hbaseConfig.set(TableInputFormat.SCAN_ROW_STOP, builder.stopRow.get)
111 | }
112 |
113 | //krb认证
114 | val rdd = if (hbaseConfig.get("spark.hbase.krb.principal") == null || hbaseConfig.get("spark.hbase.krb.keytab") == null) {
115 | //todo:asInstanceOf
116 | builder.sc.newAPIHadoopRDD(hbaseConfig
117 | , classOf[TableInputFormat]
118 | , classOf[ImmutableBytesWritable]
119 | , classOf[Result])
120 | .asInstanceOf[NewHadoopRDD[ImmutableBytesWritable, Result]]
121 | } else {
122 | val principle = hbaseConfig.get("spark.hbase.krb.principal")
123 | val keytab = hbaseConfig.get("spark.hbase.krb.keytab")
124 | new HBaseScanRDD[ImmutableBytesWritable, Result](principle, keytab
125 | , builder.sc
126 | , classOf[TableInputFormat]
127 | , classOf[ImmutableBytesWritable]
128 | , classOf[Result]
129 | , hbaseConfig)
130 | }
131 | new HBaseSimpleRDD[R](rdd, builder, saltsLength)
132 | }
133 | }
134 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/hbase/builder/reader/HBaseSaltRDD.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector.hbase.builder.reader
2 |
3 | import org.apache.spark.SparkContext
4 | import org.apache.spark.rdd.{RDD, UnionRDD}
5 |
6 | import scala.reflect.ClassTag
7 |
8 | /**
9 | * Author: xiaohei
10 | * Date: 2017/4/25
11 | * Email: xiaohei.info@gmail.com
12 | * Host: xiaohei.info
13 | */
14 | class HBaseSaltRDD[R: ClassTag](sc: SparkContext, rdds: Seq[RDD[R]]) extends UnionRDD[R](sc, rdds) {
15 |
16 | }
17 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/hbase/builder/reader/HBaseSimpleRDD.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector.hbase.builder.reader
2 |
3 | import info.xiaohei.spark.connector.hbase.HBaseCommonUtils
4 | import info.xiaohei.spark.connector.hbase.transformer.reader.DataReader
5 | import org.apache.hadoop.hbase.CellUtil
6 | import org.apache.hadoop.hbase.client.Result
7 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable
8 | import org.apache.hadoop.hbase.util.Bytes
9 | import org.apache.spark.annotation.DeveloperApi
10 | import org.apache.spark.rdd.{NewHadoopRDD, RDD}
11 | import org.apache.spark.{Partition, TaskContext}
12 |
13 | import scala.reflect.ClassTag
14 |
15 | /**
16 | * Author: xiaohei
17 | * Date: 2017/3/21
18 | * Email: xiaohei.info@gmail.com
19 | * Host: www.xiaohei.info
20 | */
21 | //todo:SimpleHBaseRdd
22 | class HBaseSimpleRDD[R: ClassTag](hadoopHBaseRDD: RDD[(ImmutableBytesWritable, Result)],
23 | builder: HBaseReaderBuilder[R], saltsLength: Int)
24 | (implicit reader: DataReader[R]) extends RDD[R](hadoopHBaseRDD) {
25 | @DeveloperApi
26 | override def compute(split: Partition, context: TaskContext): Iterator[R] = {
27 | firstParent[(ImmutableBytesWritable, Result)].iterator(split, context)
28 | .map(e => convert(e._1, e._2))
29 | }
30 |
31 | override protected def getPartitions: Array[Partition] = {
32 | firstParent[(ImmutableBytesWritable, Result)].partitions
33 | }
34 |
35 | private def convert(key: ImmutableBytesWritable, row: Result) = {
36 | //val columnNames = Utils.chosenColumns(builder.columns, reader.columns)
37 | require(builder.columns.nonEmpty, "No columns have been defined for the operation")
38 | val columnNames = builder.columns
39 | val columnsWithFamiy = HBaseCommonUtils.columnsWithFamily(builder.defaultColumnFamily, columnNames)
40 | val columns = columnsWithFamiy
41 | .map(t => (Bytes.toBytes(t._1), Bytes.toBytes(t._2)))
42 | .map {
43 | t =>
44 | if (row.containsColumn(t._1, t._2)) {
45 | Some(CellUtil.cloneValue(row.getColumnLatestCell(t._1, t._2)))
46 | } else {
47 | None
48 | }
49 | }.toList
50 | reader.read(Some(key.get.drop(saltsLength)) :: columns)
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/hbase/builder/writer/CollectionWriterBuilder.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector.hbase.builder.writer
2 |
3 | import java.security.PrivilegedAction
4 |
5 | import info.xiaohei.spark.connector.hbase.HBaseConf
6 | import info.xiaohei.spark.connector.hbase.salt.{SaltProducer, SaltProducerFactory}
7 | import info.xiaohei.spark.connector.hbase.transformer.writer.DataWriter
8 | import org.apache.hadoop.hbase.TableName
9 | import org.apache.hadoop.hbase.client._
10 | import org.apache.hadoop.hbase.util.Bytes
11 | import org.apache.hadoop.security.UserGroupInformation
12 |
13 | import scala.collection.JavaConversions._
14 |
15 | /**
16 | * Author: xiaohei
17 | * Date: 2017/3/21
18 | * Email: xiaohei.info@gmail.com
19 | * Host: www.xiaohei.info
20 | */
21 |
22 | case class CollectionWriterBuilder[C] private[hbase](
23 | private[hbase] val hBaseConf: HBaseConf,
24 | private[hbase] val collectionData: Iterable[C],
25 | private[hbase] val tableName: String,
26 | private[hbase] val writeBufferSize: Option[Long],
27 | private[hbase] val asynProcess: Boolean,
28 | private[hbase] val defaultColumnFamily: Option[String] = None,
29 | private[hbase] val columns: Iterable[String] = Seq.empty,
30 | private[hbase] val salts: Iterable[String] = Seq.empty
31 | ) {
32 | def insert(cols: String*) = {
33 | require(this.columns.isEmpty, "Columns haven't been set")
34 | require(cols.nonEmpty, "Columns must by set,at least one")
35 | this.copy(columns = cols)
36 | }
37 |
38 | def insert(cols: Iterable[String]) = {
39 | require(this.columns.isEmpty, "Columns haven't been set")
40 | require(cols.nonEmpty, "Columns must by set,at least one")
41 | this.copy(columns = cols)
42 | }
43 |
44 | def inColumnFamily(family: String) = {
45 | require(this.defaultColumnFamily.isEmpty, "Default column family hasn't been set")
46 | require(family.nonEmpty, "Column family must provided")
47 | this.copy(defaultColumnFamily = Some(family))
48 | }
49 |
50 | def withSalt(salts: Iterable[String]) = {
51 | require(salts.size > 1, "Invalid salting. Two or more elements are required")
52 | require(this.salts.isEmpty, "Salting has already been set")
53 |
54 | this.copy(salts = salts)
55 | }
56 | }
57 |
58 | private[hbase] class CollectionWriterBuildMaker[C](collectionData: Iterable[C])(implicit hBaseConf: HBaseConf) extends Serializable {
59 | def toHBase(tableName: String
60 | , writeBufferSize: Option[Long] = None
61 | , asynProcess: Boolean = false)
62 | = CollectionWriterBuilder[C](hBaseConf, collectionData, tableName, writeBufferSize, asynProcess)
63 | }
64 |
65 | private[hbase] class CollectionWriter[C](builder: CollectionWriterBuilder[C])
66 | (implicit writer: DataWriter[C], saltProducerFactory: SaltProducerFactory[String]) extends Serializable {
67 | def save(): Unit = {
68 | //val conf = HBaseConf.createHBaseConf(builder.hbaseHost).createHadoopBaseConf()
69 | val conf = builder.hBaseConf.createHadoopBaseConf()
70 |
71 | val connection = if (conf.get("spark.hbase.krb.principal") == null || conf.get("spark.hbase.krb.keytab") == null) {
72 | ConnectionFactory.createConnection(conf)
73 | }
74 | else {
75 | UserGroupInformation.setConfiguration(conf)
76 | val ugi: UserGroupInformation = UserGroupInformation
77 | .loginUserFromKeytabAndReturnUGI(conf.get("spark.hbase.krb.principal"), conf.get("spark.hbase.krb.keytab"))
78 | UserGroupInformation.setLoginUser(ugi)
79 | ugi.doAs(new PrivilegedAction[Connection] {
80 | def run: Connection = {
81 | ConnectionFactory.createConnection(conf)
82 | }
83 | })
84 | }
85 |
86 |
87 | val tableName = TableName.valueOf(builder.tableName)
88 |
89 | val saltProducer: Option[SaltProducer[String]] = if (builder.salts.isEmpty) None else Some(saltProducerFactory.getHashProducer(builder.salts))
90 |
91 | def coverData(data: C): Put = {
92 | val convertedData: Iterable[Option[Array[Byte]]] = writer.write(data)
93 | if (convertedData.size < 2) {
94 | throw new IllegalArgumentException("Expected at least two converted values, the first one should be the row key")
95 | }
96 | //val columnsNames = Utils.chosenColumns(builder.columns, writer.columns)
97 | require(builder.columns.nonEmpty, "No columns have been defined for the operation")
98 | val columnNames = builder.columns
99 | val rawRowkey = convertedData.head.get
100 | val columnData = convertedData.drop(1)
101 |
102 |
103 | if (columnData.size != columnNames.size) {
104 | throw new IllegalArgumentException(s"Wrong number of columns. Expected ${columnNames.size} found ${columnData.size}")
105 | }
106 | val rowkey = if (saltProducer.isEmpty) rawRowkey else Bytes.toBytes(saltProducer.get.salting(rawRowkey) + Bytes.toString(rawRowkey))
107 | val put = new Put(rowkey)
108 | columnNames.zip(columnData).foreach {
109 | case (name, Some(value)) =>
110 | val family = if (name.contains(":")) Bytes.toBytes(name.substring(0, name.indexOf(":"))) else Bytes.toBytes(builder.defaultColumnFamily.get)
111 | val column = if (name.contains(":")) Bytes.toBytes(name.substring(name.indexOf(":") + 1)) else Bytes.toBytes(name)
112 | put.addColumn(family, column, value)
113 | case _ =>
114 | }
115 | put
116 | }
117 |
118 | if (builder.asynProcess) {
119 | val params = new BufferedMutatorParams(tableName).writeBufferSize(builder.writeBufferSize.get)
120 | val mutator = connection.getBufferedMutator(params)
121 | builder.collectionData.foreach(data => mutator.mutate(coverData(data)))
122 | mutator.close()
123 | } else {
124 | val table = connection.getTable(tableName)
125 | val putList = builder.collectionData.map(coverData).toList
126 | table.put(putList)
127 | table.close()
128 | }
129 | connection.close()
130 | }
131 |
132 | }
133 |
134 |
135 | trait CollectionWriterBuilderConversions extends Serializable {
136 | implicit def collectionToBuildMaker[C](collectionData: Iterable[C])(implicit hBaseConf: HBaseConf): CollectionWriterBuildMaker[C] = new CollectionWriterBuildMaker[C](collectionData)
137 |
138 | implicit def collectionBuilderToWriter[C](builder: CollectionWriterBuilder[C])(implicit writer: DataWriter[C], saltProducerFactory: SaltProducerFactory[String]): CollectionWriter[C] = new CollectionWriter[C](builder)
139 | }
140 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/hbase/builder/writer/HBaseWriterBuilder.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector.hbase.builder.writer
2 |
3 | import info.xiaohei.spark.connector.hbase.HBaseConf
4 | import info.xiaohei.spark.connector.hbase.salt.{SaltProducer, SaltProducerFactory}
5 | import info.xiaohei.spark.connector.hbase.transformer.writer.DataWriter
6 | import org.apache.hadoop.hbase.client.Put
7 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable
8 | import org.apache.hadoop.hbase.mapreduce.TableOutputFormat
9 | import org.apache.hadoop.hbase.util.Bytes
10 | import org.apache.hadoop.mapreduce.Job
11 | import org.apache.spark.rdd.RDD
12 |
13 | /**
14 | * Author: xiaohei
15 | * Date: 2017/3/21
16 | * Email: xiaohei.info@gmail.com
17 | * Host: www.xiaohei.info
18 | */
19 | case class HBaseWriterBuilder[R] private[hbase](
20 | private[hbase] val rdd: RDD[R],
21 | private[hbase] val tableName: String,
22 | //以下的参数通过方法动态设置
23 | private[hbase] val defaultColumnFamily: Option[String] = None,
24 | private[hbase] val columns: Iterable[String] = Seq.empty,
25 | private[hbase] val salts: Iterable[String] = Seq.empty
26 | )
27 | extends Serializable {
28 |
29 | def insert(cols: String*) = {
30 | require(this.columns.isEmpty, "Columns haven't been set")
31 | require(cols.nonEmpty, "Columns must by set,at least one")
32 | this.copy(columns = cols)
33 | }
34 |
35 | def insert(cols: Iterable[String]) = {
36 | require(this.columns.isEmpty, "Columns haven't been set")
37 | require(cols.nonEmpty, "Columns must by set,at least one")
38 | this.copy(columns = cols)
39 | }
40 |
41 | def inColumnFamily(family: String) = {
42 | require(this.defaultColumnFamily.isEmpty, "Default column family hasn't been set")
43 | require(family.nonEmpty, "Column family must provided")
44 | this.copy(defaultColumnFamily = Some(family))
45 | }
46 |
47 | def withSalt(salts: Iterable[String]) = {
48 | require(salts.size > 1, "Invalid salting. Two or more elements are required")
49 | require(this.salts.isEmpty, "Salting has already been set")
50 |
51 | this.copy(salts = salts)
52 | }
53 | }
54 |
55 | private[hbase] class HBaseWriterBuildMaker[R](rdd: RDD[R]) extends Serializable {
56 | def toHBase(tableName: String) = HBaseWriterBuilder(rdd, tableName)
57 | }
58 |
59 | private[hbase] class HBaseWriter[R](builder: HBaseWriterBuilder[R])(implicit writer: DataWriter[R]
60 | , saltProducerFactory: SaltProducerFactory[String]) extends Serializable {
61 | def save(): Unit = {
62 | val conf = HBaseConf.createFromSpark(builder.rdd.context.getConf).createHadoopBaseConf()
63 | conf.set(TableOutputFormat.OUTPUT_TABLE, builder.tableName)
64 |
65 | val job = Job.getInstance(conf)
66 | job.setOutputFormatClass(classOf[TableOutputFormat[String]])
67 |
68 | val saltProducer: Option[SaltProducer[String]] = if (builder.salts.isEmpty) None else Some(saltProducerFactory.getHashProducer(builder.salts))
69 |
70 | val transRdd = builder.rdd.map {
71 | data =>
72 | val convertedData: Iterable[Option[Array[Byte]]] = writer.write(data)
73 | if (convertedData.size < 2) {
74 | throw new IllegalArgumentException("Expected at least two converted values, the first one should be the row key")
75 | }
76 | require(builder.columns.nonEmpty, "No columns have been defined for the operation")
77 | val columnNames = builder.columns
78 | val rawRowkey = convertedData.head.get
79 | val columnData = convertedData.drop(1)
80 |
81 | if (columnData.size != columnNames.size) {
82 | throw new IllegalArgumentException(s"Wrong number of columns. Expected ${columnNames.size} found ${columnData.size}")
83 | }
84 | //transform rowkey with salt
85 | val rowkey = if (saltProducer.isEmpty) rawRowkey else Bytes.toBytes(saltProducer.get.salting(rawRowkey) + Bytes.toString(rawRowkey))
86 | val put = new Put(rowkey)
87 | columnNames.zip(columnData).foreach {
88 | case (name, Some(value)) =>
89 | val family = if (name.contains(":")) Bytes.toBytes(name.substring(0, name.indexOf(":"))) else Bytes.toBytes(builder.defaultColumnFamily.get)
90 | val column = if (name.contains(":")) Bytes.toBytes(name.substring(name.indexOf(":") + 1)) else Bytes.toBytes(name)
91 | put.addColumn(family, column, value)
92 | case _ =>
93 | }
94 | (new ImmutableBytesWritable, put)
95 | }
96 | transRdd.saveAsNewAPIHadoopDataset(job.getConfiguration)
97 | }
98 | }
99 |
100 | trait HBaseWriterBuilderConversions extends Serializable {
101 |
102 | implicit def rddToHBaseBuildMaker[R](rdd: RDD[R]): HBaseWriterBuildMaker[R] = new HBaseWriterBuildMaker[R](rdd)
103 |
104 | implicit def builderToWriter[R](builder: HBaseWriterBuilder[R])(implicit writer: DataWriter[R], saltProducerFactory: SaltProducerFactory[String]): HBaseWriter[R] = new HBaseWriter[R](builder)
105 | }
106 |
107 |
108 |
109 |
110 |
111 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/hbase/package.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector
2 |
3 | import info.xiaohei.spark.connector.hbase.builder.reader.{HBaseContextConversions, HBaseReaderBuilderConversions}
4 | import info.xiaohei.spark.connector.hbase.builder.writer.{CollectionWriterBuilderConversions, HBaseWriterBuilderConversions}
5 | import info.xiaohei.spark.connector.hbase.salt.SaltProducerConversions
6 | import info.xiaohei.spark.connector.hbase.transformer.reader.DataReaderConversions
7 | import info.xiaohei.spark.connector.hbase.transformer.writer.DataWriterConversions
8 |
9 |
10 | /**
11 | * Author: xiaohei
12 | * Date: 2017/3/21
13 | * Email: xiaohei.info@gmail.com
14 | * Host: www.xiaohei.info
15 | */
16 |
17 | package object hbase
18 | extends HBaseWriterBuilderConversions
19 | with HBaseReaderBuilderConversions
20 | with CollectionWriterBuilderConversions
21 | with DataWriterConversions
22 | with DataReaderConversions
23 | with HBaseContextConversions
24 | with SaltProducerConversions
25 |
26 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/hbase/salt/SaltProducer.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector.hbase.salt
2 |
3 | import info.xiaohei.spark.connector.hbase.transformer.writer.{DataWriter, SingleColumnDataWriter}
4 |
5 | import scala.reflect.ClassTag
6 | import scala.util.Random
7 |
8 | /**
9 | * Author: xiaohei
10 | * Date: 2017/4/19
11 | * Email: xiaohei.info@gmail.com
12 | * Host: xiaohei.info
13 | */
14 | trait SaltProducer[T] extends Serializable {
15 |
16 | def salting(rowkey: Array[Byte]): T
17 |
18 | protected def salts: Array[T]
19 |
20 | protected def verify(implicit writer: DataWriter[T]): Unit = {
21 | require(singleSaltength > 0, "salt's length must great than 0")
22 | }
23 |
24 | def singleSaltength(implicit writer: DataWriter[T]): Int = {
25 | require(writer.isInstanceOf[SingleColumnDataWriter[T]], "salt array must be composed with primitive type")
26 |
27 | val singleColumnDataWriter = writer.asInstanceOf[SingleColumnDataWriter[T]]
28 | salts.map(s => singleColumnDataWriter.writeSingleColumn(s))
29 | .map(b => b.getOrElse(Array[Byte]()))
30 | .map(_.length)
31 | .foldLeft(None.asInstanceOf[Option[Int]])((size1, size2) => {
32 | if (size1.nonEmpty && size1.get != size2) {
33 | throw new IllegalArgumentException(s"salts can not use different lengths with:${size1.get},$size2")
34 | }
35 | Some(size2)
36 | }).get
37 | }
38 | }
39 |
40 | private[salt] class RandomSaltProducer[T: ClassTag](val salts: Array[T])(implicit writer: DataWriter[T]) extends SaltProducer[T]() {
41 |
42 | verify
43 |
44 | override def salting(rowkey: Array[Byte]): T = {
45 | val randomizer = new Random
46 | salts(randomizer.nextInt(salts.length))
47 | }
48 | }
49 |
50 | private[salt] class HashSaltProducer[T: ClassTag](val salts: Array[T])(implicit writer: DataWriter[T]) extends SaltProducer[T]() {
51 |
52 | verify
53 |
54 | override def salting(rowkey: Array[Byte]): T = {
55 | salts((java.util.Arrays.hashCode(rowkey) & 0x7fffffff) % salts.length)
56 | }
57 | }
58 |
59 | //todo:ClassTag do what
60 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/hbase/salt/SaltProducerConversions.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector.hbase.salt
2 |
3 | import info.xiaohei.spark.connector.hbase.transformer.writer.DataWriter
4 |
5 | import scala.reflect.ClassTag
6 |
7 | /**
8 | * Author: xiaohei
9 | * Date: 2017/4/20
10 | * Email: xiaohei.info@gmail.com
11 | * Host: xiaohei.info
12 | */
13 | trait SaltProducerConversions extends Serializable {
14 | implicit def getSaltProducerFactory[T: ClassTag](implicit writer: DataWriter[T]): SaltProducerFactory[T] = new SaltProducerFactory[T]()
15 | }
16 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/hbase/salt/SaltProducerFactory.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector.hbase.salt
2 |
3 | import info.xiaohei.spark.connector.hbase.transformer.writer.DataWriter
4 |
5 | import scala.reflect.ClassTag
6 |
7 | /**
8 | * Author: xiaohei
9 | * Date: 2017/4/19
10 | * Email: xiaohei.info@gmail.com
11 | * Host: xiaohei.info
12 | */
13 | class SaltProducerFactory[T: ClassTag] extends Serializable {
14 | def getHashProducer(saltArray: Iterable[T])(implicit writer: DataWriter[T]): SaltProducer[T] = new HashSaltProducer[T](saltArray.toArray)
15 |
16 | def getRandomProducer(saltArray: Iterable[T])(implicit writer: DataWriter[T]): SaltProducer[T] = new RandomSaltProducer[T](saltArray.toArray)
17 | }
18 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/hbase/transformer/DataTransformer.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector.hbase.transformer
2 |
3 | /**
4 | * Author: xiaohei
5 | * Date: 2017/3/21
6 | * Email: xiaohei.info@gmail.com
7 | * Host: www.xiaohei.info
8 | */
9 | trait DataTransformer extends Serializable {
10 | type HBaseData = Iterable[Option[Array[Byte]]]
11 | }
12 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/hbase/transformer/reader/DataReader.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector.hbase.transformer.reader
2 |
3 | import info.xiaohei.spark.connector.hbase.transformer.DataTransformer
4 | import org.apache.hadoop.hbase.util.Bytes
5 |
6 | /**
7 | * Author: xiaohei
8 | * Date: 2017/3/21
9 | * Email: xiaohei.info@gmail.com
10 | * Host: www.xiaohei.info
11 | */
12 |
13 | trait DataReader[T] extends DataTransformer {
14 | def read(data: HBaseData): T
15 | }
16 |
17 | trait SingleColumnDataReader[T] extends DataReader[T] {
18 |
19 | override def read(data: HBaseData): T =
20 | if (data.size == 1)
21 | columnMapWithOption(data.head)
22 | else if (data.size == 2)
23 | columnMapWithOption(data.drop(1).head)
24 | else
25 | throw new IllegalArgumentException(s"Unexpected number of columns: expected 1 or 2, returned ${data.size}")
26 |
27 |
28 | def columnMapWithOption(cols: Option[Array[Byte]]) =
29 | if (cols.nonEmpty) readSingleColumn(cols.get)
30 | else throw new IllegalArgumentException("Null value assigned to concrete class. Use Option[T] instead")
31 |
32 | def readSingleColumn(cols: Array[Byte]): T
33 | }
34 |
35 | trait TupleDataReader[T <: Product] extends DataReader[T] {
36 |
37 | val n: Int
38 |
39 | override def read(data: HBaseData): T =
40 | if (data.size == n)
41 | readTupleColumn(data)
42 | else if (data.size == n + 1)
43 | readTupleColumn(data.drop(1))
44 | else
45 | throw new IllegalArgumentException(s"Unexpected number of columns: expected $n or ${n - 1}, returned ${data.size}")
46 |
47 | def readTupleColumn(data: HBaseData): T
48 | }
49 |
50 | abstract class CustomDataReader[S, T](implicit reader: DataReader[S]) extends DataReader[T] {
51 |
52 | override def read(data: HBaseData): T = convert(reader.read(data))
53 |
54 | def convert(data: S): T
55 | }
56 |
57 | //
58 | //trait SingleColumnDataReader[T] extends DataReader[T] {
59 | //
60 | // override def read(data: HBaseData): T =
61 | // if (data.size == 1)
62 | // columnMapWithOption(data.head)
63 | // else if (data.size == 2)
64 | // columnMapWithOption(data.drop(1).head)
65 | // else
66 | // throw new IllegalArgumentException(s"Unexpected number of columns: expected 1 or 2, returned ${data.size}")
67 | //
68 | // def columnMapWithOption(cols: Option[Array[Byte]]): T
69 | //}
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/hbase/transformer/reader/DataReaderConversions.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector.hbase.transformer.reader
2 |
3 | import org.apache.hadoop.hbase.util.Bytes
4 |
5 | /**
6 | * Author: xiaohei
7 | * Date: 2017/3/26
8 | * Email: xiaohei.info@gmail.com
9 | * Host: xiaohei.info
10 | */
11 |
12 | trait DataReaderConversions extends Serializable {
13 |
14 | // Simple types
15 |
16 | implicit def intReader: DataReader[Int] = new SingleColumnDataReader[Int] {
17 | override def readSingleColumn(cols: Array[Byte]): Int = Bytes.toInt(cols)
18 | }
19 |
20 | implicit def longReader: DataReader[Long] = new SingleColumnDataReader[Long] {
21 | override def readSingleColumn(cols: Array[Byte]): Long = Bytes.toLong(cols)
22 | }
23 |
24 | implicit def shortReader: DataReader[Short] = new SingleColumnDataReader[Short] {
25 | override def readSingleColumn(cols: Array[Byte]): Short = Bytes.toShort(cols)
26 | }
27 |
28 | implicit def doubleReader: DataReader[Double] = new SingleColumnDataReader[Double] {
29 | override def readSingleColumn(cols: Array[Byte]): Double = Bytes.toDouble(cols)
30 | }
31 |
32 | implicit def floatReader: DataReader[Float] = new SingleColumnDataReader[Float] {
33 | override def readSingleColumn(cols: Array[Byte]): Float = Bytes.toFloat(cols)
34 | }
35 |
36 | implicit def booleanReader: DataReader[Boolean] = new SingleColumnDataReader[Boolean] {
37 | override def readSingleColumn(cols: Array[Byte]): Boolean = Bytes.toBoolean(cols)
38 | }
39 |
40 | implicit def bigDecimalReader: DataReader[BigDecimal] = new SingleColumnDataReader[BigDecimal] {
41 | override def readSingleColumn(cols: Array[Byte]): BigDecimal = Bytes.toBigDecimal(cols)
42 | }
43 |
44 | implicit def stringReader: DataReader[String] = new SingleColumnDataReader[String] {
45 | override def readSingleColumn(cols: Array[Byte]): String = Bytes.toString(cols)
46 | }
47 |
48 | // Options
49 |
50 | implicit def optionReader[T](implicit c: DataReader[T]): DataReader[Option[T]] = new DataReader[Option[T]] {
51 | override def read(data: HBaseData): Option[T] =
52 | if (data.size != 1) throw new IllegalArgumentException(s"Unexpected number of columns: expected 1, returned ${data.size}")
53 | else {
54 | if (!classOf[SingleColumnDataReader[T]].isAssignableFrom(c.getClass)) throw new IllegalArgumentException("Option[T] can be used only with primitive values")
55 | if (data.head.nonEmpty) Some(c.read(data))
56 | else None
57 | }
58 | }
59 |
60 | // Tuples
61 |
62 | implicit def tuple2Reader[T1, T2](implicit m1: DataReader[T1], m2: DataReader[T2]): DataReader[(T1, T2)] = new TupleDataReader[(T1, T2)] {
63 |
64 | val n = 2
65 |
66 | override def readTupleColumn(data: HBaseData) = {
67 | val h1 = data.take(1)
68 | val h2 = data.slice(1, 2)
69 | (m1.read(h1), m2.read(h2))
70 | }
71 | }
72 |
73 | implicit def tuple3Reader[T1, T2, T3](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3]): DataReader[(T1, T2, T3)] = new TupleDataReader[(T1, T2, T3)] {
74 |
75 | val n = 3
76 |
77 | override def readTupleColumn(data: HBaseData) = {
78 | val h1 = data.take(1)
79 | val h2 = data.slice(1, 2)
80 | val h3 = data.slice(2, 3)
81 | (m1.read(h1), m2.read(h2), m3.read(h3))
82 | }
83 | }
84 |
85 | implicit def tuple4Reader[T1, T2, T3, T4](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4]): DataReader[(T1, T2, T3, T4)] = new TupleDataReader[(T1, T2, T3, T4)] {
86 |
87 | val n = 4
88 |
89 | override def readTupleColumn(data: HBaseData) = {
90 | val h1 = data.take(1)
91 | val h2 = data.slice(1, 2)
92 | val h3 = data.slice(2, 3)
93 | val h4 = data.slice(3, 4)
94 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4))
95 | }
96 | }
97 |
98 | implicit def tuple5Reader[T1, T2, T3, T4, T5](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5]): DataReader[(T1, T2, T3, T4, T5)] = new TupleDataReader[(T1, T2, T3, T4, T5)] {
99 |
100 | val n = 5
101 |
102 | override def readTupleColumn(data: HBaseData) = {
103 | val h1 = data.take(1)
104 | val h2 = data.slice(1, 2)
105 | val h3 = data.slice(2, 3)
106 | val h4 = data.slice(3, 4)
107 | val h5 = data.slice(4, 5)
108 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5))
109 | }
110 | }
111 |
112 | implicit def tuple6Reader[T1, T2, T3, T4, T5, T6](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6]): DataReader[(T1, T2, T3, T4, T5, T6)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6)] {
113 |
114 | val n = 6
115 |
116 | override def readTupleColumn(data: HBaseData) = {
117 | val h1 = data.take(1)
118 | val h2 = data.slice(1, 2)
119 | val h3 = data.slice(2, 3)
120 | val h4 = data.slice(3, 4)
121 | val h5 = data.slice(4, 5)
122 | val h6 = data.slice(5, 6)
123 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6))
124 | }
125 | }
126 |
127 | implicit def tuple7Reader[T1, T2, T3, T4, T5, T6, T7](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7]): DataReader[(T1, T2, T3, T4, T5, T6, T7)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7)] {
128 |
129 | val n = 7
130 |
131 | override def readTupleColumn(data: HBaseData) = {
132 | val h1 = data.take(1)
133 | val h2 = data.slice(1, 2)
134 | val h3 = data.slice(2, 3)
135 | val h4 = data.slice(3, 4)
136 | val h5 = data.slice(4, 5)
137 | val h6 = data.slice(5, 6)
138 | val h7 = data.slice(6, 7)
139 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7))
140 | }
141 | }
142 |
143 | implicit def tuple8Reader[T1, T2, T3, T4, T5, T6, T7, T8](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8)] {
144 |
145 | val n = 8
146 |
147 | override def readTupleColumn(data: HBaseData) = {
148 | val h1 = data.take(1)
149 | val h2 = data.slice(1, 2)
150 | val h3 = data.slice(2, 3)
151 | val h4 = data.slice(3, 4)
152 | val h5 = data.slice(4, 5)
153 | val h6 = data.slice(5, 6)
154 | val h7 = data.slice(6, 7)
155 | val h8 = data.slice(7, 8)
156 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8))
157 | }
158 | }
159 |
160 | implicit def tuple9Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9)] {
161 |
162 | val n = 9
163 |
164 | override def readTupleColumn(data: HBaseData) = {
165 | val h1 = data.take(1)
166 | val h2 = data.slice(1, 2)
167 | val h3 = data.slice(2, 3)
168 | val h4 = data.slice(3, 4)
169 | val h5 = data.slice(4, 5)
170 | val h6 = data.slice(5, 6)
171 | val h7 = data.slice(6, 7)
172 | val h8 = data.slice(7, 8)
173 | val h9 = data.slice(8, 9)
174 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9))
175 | }
176 | }
177 |
178 | implicit def tuple10Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9], m10: DataReader[T10]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10)] {
179 |
180 | val n = 10
181 |
182 | override def readTupleColumn(data: HBaseData) = {
183 | val h1 = data.take(1)
184 | val h2 = data.slice(1, 2)
185 | val h3 = data.slice(2, 3)
186 | val h4 = data.slice(3, 4)
187 | val h5 = data.slice(4, 5)
188 | val h6 = data.slice(5, 6)
189 | val h7 = data.slice(6, 7)
190 | val h8 = data.slice(7, 8)
191 | val h9 = data.slice(8, 9)
192 | val h10 = data.slice(9, 10)
193 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9), m10.read(h10))
194 | }
195 | }
196 |
197 | implicit def tuple11Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9], m10: DataReader[T10], m11: DataReader[T11]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11)] {
198 |
199 | val n = 11
200 |
201 | override def readTupleColumn(data: HBaseData) = {
202 | val h1 = data.take(1)
203 | val h2 = data.slice(1, 2)
204 | val h3 = data.slice(2, 3)
205 | val h4 = data.slice(3, 4)
206 | val h5 = data.slice(4, 5)
207 | val h6 = data.slice(5, 6)
208 | val h7 = data.slice(6, 7)
209 | val h8 = data.slice(7, 8)
210 | val h9 = data.slice(8, 9)
211 | val h10 = data.slice(9, 10)
212 | val h11 = data.slice(10, 11)
213 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9), m10.read(h10), m11.read(h11))
214 | }
215 | }
216 |
217 | implicit def tuple12Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9], m10: DataReader[T10], m11: DataReader[T11], m12: DataReader[T12]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12)] {
218 |
219 | val n = 12
220 |
221 | override def readTupleColumn(data: HBaseData) = {
222 | val h1 = data.take(1)
223 | val h2 = data.slice(1, 2)
224 | val h3 = data.slice(2, 3)
225 | val h4 = data.slice(3, 4)
226 | val h5 = data.slice(4, 5)
227 | val h6 = data.slice(5, 6)
228 | val h7 = data.slice(6, 7)
229 | val h8 = data.slice(7, 8)
230 | val h9 = data.slice(8, 9)
231 | val h10 = data.slice(9, 10)
232 | val h11 = data.slice(10, 11)
233 | val h12 = data.slice(11, 12)
234 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9), m10.read(h10), m11.read(h11), m12.read(h12))
235 | }
236 | }
237 |
238 | implicit def tuple13Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9], m10: DataReader[T10], m11: DataReader[T11], m12: DataReader[T12], m13: DataReader[T13]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13)] {
239 |
240 | val n = 13
241 |
242 | override def readTupleColumn(data: HBaseData) = {
243 | val h1 = data.take(1)
244 | val h2 = data.slice(1, 2)
245 | val h3 = data.slice(2, 3)
246 | val h4 = data.slice(3, 4)
247 | val h5 = data.slice(4, 5)
248 | val h6 = data.slice(5, 6)
249 | val h7 = data.slice(6, 7)
250 | val h8 = data.slice(7, 8)
251 | val h9 = data.slice(8, 9)
252 | val h10 = data.slice(9, 10)
253 | val h11 = data.slice(10, 11)
254 | val h12 = data.slice(11, 12)
255 | val h13 = data.slice(12, 13)
256 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9), m10.read(h10), m11.read(h11), m12.read(h12), m13.read(h13))
257 | }
258 | }
259 |
260 | implicit def tuple14Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9], m10: DataReader[T10], m11: DataReader[T11], m12: DataReader[T12], m13: DataReader[T13], m14: DataReader[T14]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14)] {
261 |
262 | val n = 14
263 |
264 | override def readTupleColumn(data: HBaseData) = {
265 | val h1 = data.take(1)
266 | val h2 = data.slice(1, 2)
267 | val h3 = data.slice(2, 3)
268 | val h4 = data.slice(3, 4)
269 | val h5 = data.slice(4, 5)
270 | val h6 = data.slice(5, 6)
271 | val h7 = data.slice(6, 7)
272 | val h8 = data.slice(7, 8)
273 | val h9 = data.slice(8, 9)
274 | val h10 = data.slice(9, 10)
275 | val h11 = data.slice(10, 11)
276 | val h12 = data.slice(11, 12)
277 | val h13 = data.slice(12, 13)
278 | val h14 = data.slice(13, 14)
279 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9), m10.read(h10), m11.read(h11), m12.read(h12), m13.read(h13), m14.read(h14))
280 | }
281 | }
282 |
283 | implicit def tuple15Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9], m10: DataReader[T10], m11: DataReader[T11], m12: DataReader[T12], m13: DataReader[T13], m14: DataReader[T14], m15: DataReader[T15]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15)] {
284 |
285 | val n = 15
286 |
287 | override def readTupleColumn(data: HBaseData) = {
288 | val h1 = data.take(1)
289 | val h2 = data.slice(1, 2)
290 | val h3 = data.slice(2, 3)
291 | val h4 = data.slice(3, 4)
292 | val h5 = data.slice(4, 5)
293 | val h6 = data.slice(5, 6)
294 | val h7 = data.slice(6, 7)
295 | val h8 = data.slice(7, 8)
296 | val h9 = data.slice(8, 9)
297 | val h10 = data.slice(9, 10)
298 | val h11 = data.slice(10, 11)
299 | val h12 = data.slice(11, 12)
300 | val h13 = data.slice(12, 13)
301 | val h14 = data.slice(13, 14)
302 | val h15 = data.slice(14, 15)
303 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9), m10.read(h10), m11.read(h11), m12.read(h12), m13.read(h13), m14.read(h14), m15.read(h15))
304 | }
305 | }
306 |
307 | implicit def tuple16Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9], m10: DataReader[T10], m11: DataReader[T11], m12: DataReader[T12], m13: DataReader[T13], m14: DataReader[T14], m15: DataReader[T15], m16: DataReader[T16]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16)] {
308 |
309 | val n = 16
310 |
311 | override def readTupleColumn(data: HBaseData) = {
312 | val h1 = data.take(1)
313 | val h2 = data.slice(1, 2)
314 | val h3 = data.slice(2, 3)
315 | val h4 = data.slice(3, 4)
316 | val h5 = data.slice(4, 5)
317 | val h6 = data.slice(5, 6)
318 | val h7 = data.slice(6, 7)
319 | val h8 = data.slice(7, 8)
320 | val h9 = data.slice(8, 9)
321 | val h10 = data.slice(9, 10)
322 | val h11 = data.slice(10, 11)
323 | val h12 = data.slice(11, 12)
324 | val h13 = data.slice(12, 13)
325 | val h14 = data.slice(13, 14)
326 | val h15 = data.slice(14, 15)
327 | val h16 = data.slice(15, 16)
328 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9), m10.read(h10), m11.read(h11), m12.read(h12), m13.read(h13), m14.read(h14), m15.read(h15), m16.read(h16))
329 | }
330 | }
331 |
332 | implicit def tuple17Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9], m10: DataReader[T10], m11: DataReader[T11], m12: DataReader[T12], m13: DataReader[T13], m14: DataReader[T14], m15: DataReader[T15], m16: DataReader[T16], m17: DataReader[T17]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17)] {
333 |
334 | val n = 17
335 |
336 | override def readTupleColumn(data: HBaseData) = {
337 | val h1 = data.take(1)
338 | val h2 = data.slice(1, 2)
339 | val h3 = data.slice(2, 3)
340 | val h4 = data.slice(3, 4)
341 | val h5 = data.slice(4, 5)
342 | val h6 = data.slice(5, 6)
343 | val h7 = data.slice(6, 7)
344 | val h8 = data.slice(7, 8)
345 | val h9 = data.slice(8, 9)
346 | val h10 = data.slice(9, 10)
347 | val h11 = data.slice(10, 11)
348 | val h12 = data.slice(11, 12)
349 | val h13 = data.slice(12, 13)
350 | val h14 = data.slice(13, 14)
351 | val h15 = data.slice(14, 15)
352 | val h16 = data.slice(15, 16)
353 | val h17 = data.slice(16, 17)
354 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9), m10.read(h10), m11.read(h11), m12.read(h12), m13.read(h13), m14.read(h14), m15.read(h15), m16.read(h16), m17.read(h17))
355 | }
356 | }
357 |
358 | implicit def tuple18Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9], m10: DataReader[T10], m11: DataReader[T11], m12: DataReader[T12], m13: DataReader[T13], m14: DataReader[T14], m15: DataReader[T15], m16: DataReader[T16], m17: DataReader[T17], m18: DataReader[T18]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18)] {
359 |
360 | val n = 18
361 |
362 | override def readTupleColumn(data: HBaseData) = {
363 | val h1 = data.take(1)
364 | val h2 = data.slice(1, 2)
365 | val h3 = data.slice(2, 3)
366 | val h4 = data.slice(3, 4)
367 | val h5 = data.slice(4, 5)
368 | val h6 = data.slice(5, 6)
369 | val h7 = data.slice(6, 7)
370 | val h8 = data.slice(7, 8)
371 | val h9 = data.slice(8, 9)
372 | val h10 = data.slice(9, 10)
373 | val h11 = data.slice(10, 11)
374 | val h12 = data.slice(11, 12)
375 | val h13 = data.slice(12, 13)
376 | val h14 = data.slice(13, 14)
377 | val h15 = data.slice(14, 15)
378 | val h16 = data.slice(15, 16)
379 | val h17 = data.slice(16, 17)
380 | val h18 = data.slice(17, 18)
381 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9), m10.read(h10), m11.read(h11), m12.read(h12), m13.read(h13), m14.read(h14), m15.read(h15), m16.read(h16), m17.read(h17), m18.read(h18))
382 | }
383 | }
384 |
385 | implicit def tuple19Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9], m10: DataReader[T10], m11: DataReader[T11], m12: DataReader[T12], m13: DataReader[T13], m14: DataReader[T14], m15: DataReader[T15], m16: DataReader[T16], m17: DataReader[T17], m18: DataReader[T18], m19: DataReader[T19]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19)] {
386 |
387 | val n = 19
388 |
389 | override def readTupleColumn(data: HBaseData) = {
390 | val h1 = data.take(1)
391 | val h2 = data.slice(1, 2)
392 | val h3 = data.slice(2, 3)
393 | val h4 = data.slice(3, 4)
394 | val h5 = data.slice(4, 5)
395 | val h6 = data.slice(5, 6)
396 | val h7 = data.slice(6, 7)
397 | val h8 = data.slice(7, 8)
398 | val h9 = data.slice(8, 9)
399 | val h10 = data.slice(9, 10)
400 | val h11 = data.slice(10, 11)
401 | val h12 = data.slice(11, 12)
402 | val h13 = data.slice(12, 13)
403 | val h14 = data.slice(13, 14)
404 | val h15 = data.slice(14, 15)
405 | val h16 = data.slice(15, 16)
406 | val h17 = data.slice(16, 17)
407 | val h18 = data.slice(17, 18)
408 | val h19 = data.slice(18, 19)
409 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9), m10.read(h10), m11.read(h11), m12.read(h12), m13.read(h13), m14.read(h14), m15.read(h15), m16.read(h16), m17.read(h17), m18.read(h18), m19.read(h19))
410 | }
411 | }
412 |
413 | implicit def tuple20Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9], m10: DataReader[T10], m11: DataReader[T11], m12: DataReader[T12], m13: DataReader[T13], m14: DataReader[T14], m15: DataReader[T15], m16: DataReader[T16], m17: DataReader[T17], m18: DataReader[T18], m19: DataReader[T19], m20: DataReader[T20]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20)] {
414 |
415 | val n = 20
416 |
417 | override def readTupleColumn(data: HBaseData) = {
418 | val h1 = data.take(1)
419 | val h2 = data.slice(1, 2)
420 | val h3 = data.slice(2, 3)
421 | val h4 = data.slice(3, 4)
422 | val h5 = data.slice(4, 5)
423 | val h6 = data.slice(5, 6)
424 | val h7 = data.slice(6, 7)
425 | val h8 = data.slice(7, 8)
426 | val h9 = data.slice(8, 9)
427 | val h10 = data.slice(9, 10)
428 | val h11 = data.slice(10, 11)
429 | val h12 = data.slice(11, 12)
430 | val h13 = data.slice(12, 13)
431 | val h14 = data.slice(13, 14)
432 | val h15 = data.slice(14, 15)
433 | val h16 = data.slice(15, 16)
434 | val h17 = data.slice(16, 17)
435 | val h18 = data.slice(17, 18)
436 | val h19 = data.slice(18, 19)
437 | val h20 = data.slice(19, 20)
438 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9), m10.read(h10), m11.read(h11), m12.read(h12), m13.read(h13), m14.read(h14), m15.read(h15), m16.read(h16), m17.read(h17), m18.read(h18), m19.read(h19), m20.read(h20))
439 | }
440 | }
441 |
442 | implicit def tuple21Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9], m10: DataReader[T10], m11: DataReader[T11], m12: DataReader[T12], m13: DataReader[T13], m14: DataReader[T14], m15: DataReader[T15], m16: DataReader[T16], m17: DataReader[T17], m18: DataReader[T18], m19: DataReader[T19], m20: DataReader[T20], m21: DataReader[T21]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21)] {
443 |
444 | val n = 21
445 |
446 | override def readTupleColumn(data: HBaseData) = {
447 | val h1 = data.take(1)
448 | val h2 = data.slice(1, 2)
449 | val h3 = data.slice(2, 3)
450 | val h4 = data.slice(3, 4)
451 | val h5 = data.slice(4, 5)
452 | val h6 = data.slice(5, 6)
453 | val h7 = data.slice(6, 7)
454 | val h8 = data.slice(7, 8)
455 | val h9 = data.slice(8, 9)
456 | val h10 = data.slice(9, 10)
457 | val h11 = data.slice(10, 11)
458 | val h12 = data.slice(11, 12)
459 | val h13 = data.slice(12, 13)
460 | val h14 = data.slice(13, 14)
461 | val h15 = data.slice(14, 15)
462 | val h16 = data.slice(15, 16)
463 | val h17 = data.slice(16, 17)
464 | val h18 = data.slice(17, 18)
465 | val h19 = data.slice(18, 19)
466 | val h20 = data.slice(19, 20)
467 | val h21 = data.slice(20, 21)
468 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9), m10.read(h10), m11.read(h11), m12.read(h12), m13.read(h13), m14.read(h14), m15.read(h15), m16.read(h16), m17.read(h17), m18.read(h18), m19.read(h19), m20.read(h20), m21.read(h21))
469 | }
470 | }
471 |
472 | implicit def tuple22Reader[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22](implicit m1: DataReader[T1], m2: DataReader[T2], m3: DataReader[T3], m4: DataReader[T4], m5: DataReader[T5], m6: DataReader[T6], m7: DataReader[T7], m8: DataReader[T8], m9: DataReader[T9], m10: DataReader[T10], m11: DataReader[T11], m12: DataReader[T12], m13: DataReader[T13], m14: DataReader[T14], m15: DataReader[T15], m16: DataReader[T16], m17: DataReader[T17], m18: DataReader[T18], m19: DataReader[T19], m20: DataReader[T20], m21: DataReader[T21], m22: DataReader[T22]): DataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22)] = new TupleDataReader[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22)] {
473 |
474 | val n = 22
475 |
476 | override def readTupleColumn(data: HBaseData) = {
477 | val h1 = data.take(1)
478 | val h2 = data.slice(1, 2)
479 | val h3 = data.slice(2, 3)
480 | val h4 = data.slice(3, 4)
481 | val h5 = data.slice(4, 5)
482 | val h6 = data.slice(5, 6)
483 | val h7 = data.slice(6, 7)
484 | val h8 = data.slice(7, 8)
485 | val h9 = data.slice(8, 9)
486 | val h10 = data.slice(9, 10)
487 | val h11 = data.slice(10, 11)
488 | val h12 = data.slice(11, 12)
489 | val h13 = data.slice(12, 13)
490 | val h14 = data.slice(13, 14)
491 | val h15 = data.slice(14, 15)
492 | val h16 = data.slice(15, 16)
493 | val h17 = data.slice(16, 17)
494 | val h18 = data.slice(17, 18)
495 | val h19 = data.slice(18, 19)
496 | val h20 = data.slice(19, 20)
497 | val h21 = data.slice(20, 21)
498 | val h22 = data.slice(21, 22)
499 | (m1.read(h1), m2.read(h2), m3.read(h3), m4.read(h4), m5.read(h5), m6.read(h6), m7.read(h7), m8.read(h8), m9.read(h9), m10.read(h10), m11.read(h11), m12.read(h12), m13.read(h13), m14.read(h14), m15.read(h15), m16.read(h16), m17.read(h17), m18.read(h18), m19.read(h19), m20.read(h20), m21.read(h21), m22.read(h22))
500 | }
501 | }
502 | }
503 |
504 |
505 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/hbase/transformer/writer/DataWriter.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector.hbase.transformer.writer
2 |
3 | import info.xiaohei.spark.connector.hbase.transformer.DataTransformer
4 |
5 | /**
6 | * Author: xiaohei
7 | * Date: 2017/3/21
8 | * Email: xiaohei.info@gmail.com
9 | * Host: www.xiaohei.info
10 | */
11 | trait DataWriter[T] extends DataTransformer {
12 | def write(data: T): HBaseData
13 | }
14 |
15 | trait SingleColumnDataWriter[T] extends DataWriter[T] {
16 | override def write(data: T): HBaseData = Seq(writeSingleColumn(data))
17 |
18 | def writeSingleColumn(data: T): Option[Array[Byte]]
19 | }
20 |
21 | abstract class CustomDataWriter[S, T](implicit writer: DataWriter[T]) extends DataWriter[S] {
22 |
23 | override def write(data: S): HBaseData = writer.write(convert(data))
24 |
25 | def convert(data: S): T
26 | }
27 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/hbase/transformer/writer/DataWriterConversions.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector.hbase.transformer.writer
2 |
3 | import org.apache.hadoop.hbase.util.Bytes
4 |
5 | /**
6 | * Author: xiaohei
7 | * Date: 2017/3/21
8 | * Email: xiaohei.info@gmail.com
9 | * Host: www.xiaohei.info
10 | */
11 | trait DataWriterConversions extends Serializable{
12 | implicit def intWriter: DataWriter[Int] = new SingleColumnDataWriter[Int] {
13 | override def writeSingleColumn(data: Int): Option[Array[Byte]] = Some(Bytes.toBytes(data))
14 | }
15 |
16 | implicit def longWriter: DataWriter[Long] = new SingleColumnDataWriter[Long] {
17 | override def writeSingleColumn(data: Long): Option[Array[Byte]] = Some(Bytes.toBytes(data))
18 | }
19 |
20 | implicit def shortWriter: DataWriter[Short] = new SingleColumnDataWriter[Short] {
21 | override def writeSingleColumn(data: Short): Option[Array[Byte]] = Some(Bytes.toBytes(data))
22 | }
23 |
24 | implicit def doubleWriter: DataWriter[Double] = new SingleColumnDataWriter[Double] {
25 | override def writeSingleColumn(data: Double): Option[Array[Byte]] = Some(Bytes.toBytes(data))
26 | }
27 |
28 | implicit def floatWriter: DataWriter[Float] = new SingleColumnDataWriter[Float] {
29 | override def writeSingleColumn(data: Float): Option[Array[Byte]] = Some(Bytes.toBytes(data))
30 | }
31 |
32 | implicit def booleanWriter: DataWriter[Boolean] = new SingleColumnDataWriter[Boolean] {
33 | override def writeSingleColumn(data: Boolean): Option[Array[Byte]] = Some(Bytes.toBytes(data))
34 | }
35 |
36 | implicit def bigDecimalWriter: DataWriter[BigDecimal] = new SingleColumnDataWriter[BigDecimal] {
37 | override def writeSingleColumn(data: BigDecimal): Option[Array[Byte]] = Some(Bytes.toBytes(data.bigDecimal))
38 | }
39 |
40 | implicit def stringWriter: DataWriter[String] = new SingleColumnDataWriter[String] {
41 | override def writeSingleColumn(data: String): Option[Array[Byte]] = Some(Bytes.toBytes(data))
42 | }
43 |
44 | // Options
45 |
46 | implicit def optionWriter[T](implicit c: DataWriter[T]): DataWriter[Option[T]] = new DataWriter[Option[T]] {
47 | override def write(data: Option[T]): HBaseData = if(data.nonEmpty) c.write(data.get) else Seq(None)
48 | }
49 |
50 | // Tuples
51 |
52 |
53 | implicit def tupleWriter2[T1, T2](implicit c1: DataWriter[T1], c2: DataWriter[T2]): DataWriter[(T1, T2)] = new DataWriter[(T1, T2)] {
54 | override def write(data: (T1, T2)): HBaseData = c1.write(data._1) ++ c2.write(data._2)
55 | }
56 |
57 | implicit def tupleWriter3[T1, T2, T3](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3]): DataWriter[(T1, T2, T3)] = new DataWriter[(T1, T2, T3)] {
58 | override def write(data: (T1, T2, T3)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3)
59 | }
60 |
61 | implicit def tupleWriter4[T1, T2, T3, T4](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4]): DataWriter[(T1, T2, T3, T4)] = new DataWriter[(T1, T2, T3, T4)] {
62 | override def write(data: (T1, T2, T3, T4)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4)
63 | }
64 |
65 | implicit def tupleWriter5[T1, T2, T3, T4, T5](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5]): DataWriter[(T1, T2, T3, T4, T5)] = new DataWriter[(T1, T2, T3, T4, T5)] {
66 | override def write(data: (T1, T2, T3, T4, T5)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5)
67 | }
68 |
69 | implicit def tupleWriter6[T1, T2, T3, T4, T5, T6](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6]): DataWriter[(T1, T2, T3, T4, T5, T6)] = new DataWriter[(T1, T2, T3, T4, T5, T6)] {
70 | override def write(data: (T1, T2, T3, T4, T5, T6)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6)
71 | }
72 |
73 | implicit def tupleWriter7[T1, T2, T3, T4, T5, T6, T7](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7]): DataWriter[(T1, T2, T3, T4, T5, T6, T7)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7)] {
74 | override def write(data: (T1, T2, T3, T4, T5, T6, T7)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7)
75 | }
76 |
77 | implicit def tupleWriter8[T1, T2, T3, T4, T5, T6, T7, T8](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8)] {
78 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8)
79 | }
80 |
81 | implicit def tupleWriter9[T1, T2, T3, T4, T5, T6, T7, T8, T9](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9)] {
82 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9)
83 | }
84 |
85 | implicit def tupleWriter10[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9], c10: DataWriter[T10]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10)] {
86 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9) ++ c10.write(data._10)
87 | }
88 |
89 | implicit def tupleWriter11[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9], c10: DataWriter[T10], c11: DataWriter[T11]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11)] {
90 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9) ++ c10.write(data._10) ++ c11.write(data._11)
91 | }
92 |
93 | implicit def tupleWriter12[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9], c10: DataWriter[T10], c11: DataWriter[T11], c12: DataWriter[T12]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12)] {
94 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9) ++ c10.write(data._10) ++ c11.write(data._11) ++ c12.write(data._12)
95 | }
96 |
97 | implicit def tupleWriter13[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9], c10: DataWriter[T10], c11: DataWriter[T11], c12: DataWriter[T12], c13: DataWriter[T13]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13)] {
98 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9) ++ c10.write(data._10) ++ c11.write(data._11) ++ c12.write(data._12) ++ c13.write(data._13)
99 | }
100 |
101 | implicit def tupleWriter14[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9], c10: DataWriter[T10], c11: DataWriter[T11], c12: DataWriter[T12], c13: DataWriter[T13], c14: DataWriter[T14]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14)] {
102 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9) ++ c10.write(data._10) ++ c11.write(data._11) ++ c12.write(data._12) ++ c13.write(data._13) ++ c14.write(data._14)
103 | }
104 |
105 | implicit def tupleWriter15[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9], c10: DataWriter[T10], c11: DataWriter[T11], c12: DataWriter[T12], c13: DataWriter[T13], c14: DataWriter[T14], c15: DataWriter[T15]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15)] {
106 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9) ++ c10.write(data._10) ++ c11.write(data._11) ++ c12.write(data._12) ++ c13.write(data._13) ++ c14.write(data._14) ++ c15.write(data._15)
107 | }
108 |
109 | implicit def tupleWriter16[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9], c10: DataWriter[T10], c11: DataWriter[T11], c12: DataWriter[T12], c13: DataWriter[T13], c14: DataWriter[T14], c15: DataWriter[T15], c16: DataWriter[T16]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16)] {
110 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9) ++ c10.write(data._10) ++ c11.write(data._11) ++ c12.write(data._12) ++ c13.write(data._13) ++ c14.write(data._14) ++ c15.write(data._15) ++ c16.write(data._16)
111 | }
112 |
113 | implicit def tupleWriter17[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9], c10: DataWriter[T10], c11: DataWriter[T11], c12: DataWriter[T12], c13: DataWriter[T13], c14: DataWriter[T14], c15: DataWriter[T15], c16: DataWriter[T16], c17: DataWriter[T17]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17)] {
114 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9) ++ c10.write(data._10) ++ c11.write(data._11) ++ c12.write(data._12) ++ c13.write(data._13) ++ c14.write(data._14) ++ c15.write(data._15) ++ c16.write(data._16) ++ c17.write(data._17)
115 | }
116 |
117 | implicit def tupleWriter18[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9], c10: DataWriter[T10], c11: DataWriter[T11], c12: DataWriter[T12], c13: DataWriter[T13], c14: DataWriter[T14], c15: DataWriter[T15], c16: DataWriter[T16], c17: DataWriter[T17], c18: DataWriter[T18]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18)] {
118 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9) ++ c10.write(data._10) ++ c11.write(data._11) ++ c12.write(data._12) ++ c13.write(data._13) ++ c14.write(data._14) ++ c15.write(data._15) ++ c16.write(data._16) ++ c17.write(data._17) ++ c18.write(data._18)
119 | }
120 |
121 | implicit def tupleWriter19[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9], c10: DataWriter[T10], c11: DataWriter[T11], c12: DataWriter[T12], c13: DataWriter[T13], c14: DataWriter[T14], c15: DataWriter[T15], c16: DataWriter[T16], c17: DataWriter[T17], c18: DataWriter[T18], c19: DataWriter[T19]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19)] {
122 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9) ++ c10.write(data._10) ++ c11.write(data._11) ++ c12.write(data._12) ++ c13.write(data._13) ++ c14.write(data._14) ++ c15.write(data._15) ++ c16.write(data._16) ++ c17.write(data._17) ++ c18.write(data._18) ++ c19.write(data._19)
123 | }
124 |
125 | implicit def tupleWriter20[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9], c10: DataWriter[T10], c11: DataWriter[T11], c12: DataWriter[T12], c13: DataWriter[T13], c14: DataWriter[T14], c15: DataWriter[T15], c16: DataWriter[T16], c17: DataWriter[T17], c18: DataWriter[T18], c19: DataWriter[T19], c20: DataWriter[T20]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20)] {
126 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9) ++ c10.write(data._10) ++ c11.write(data._11) ++ c12.write(data._12) ++ c13.write(data._13) ++ c14.write(data._14) ++ c15.write(data._15) ++ c16.write(data._16) ++ c17.write(data._17) ++ c18.write(data._18) ++ c19.write(data._19) ++ c20.write(data._20)
127 | }
128 |
129 | implicit def tupleWriter21[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9], c10: DataWriter[T10], c11: DataWriter[T11], c12: DataWriter[T12], c13: DataWriter[T13], c14: DataWriter[T14], c15: DataWriter[T15], c16: DataWriter[T16], c17: DataWriter[T17], c18: DataWriter[T18], c19: DataWriter[T19], c20: DataWriter[T20], c21: DataWriter[T21]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21)] {
130 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9) ++ c10.write(data._10) ++ c11.write(data._11) ++ c12.write(data._12) ++ c13.write(data._13) ++ c14.write(data._14) ++ c15.write(data._15) ++ c16.write(data._16) ++ c17.write(data._17) ++ c18.write(data._18) ++ c19.write(data._19) ++ c20.write(data._20) ++ c21.write(data._21)
131 | }
132 |
133 | implicit def tupleWriter22[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22](implicit c1: DataWriter[T1], c2: DataWriter[T2], c3: DataWriter[T3], c4: DataWriter[T4], c5: DataWriter[T5], c6: DataWriter[T6], c7: DataWriter[T7], c8: DataWriter[T8], c9: DataWriter[T9], c10: DataWriter[T10], c11: DataWriter[T11], c12: DataWriter[T12], c13: DataWriter[T13], c14: DataWriter[T14], c15: DataWriter[T15], c16: DataWriter[T16], c17: DataWriter[T17], c18: DataWriter[T18], c19: DataWriter[T19], c20: DataWriter[T20], c21: DataWriter[T21], c22: DataWriter[T22]): DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22)] = new DataWriter[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22)] {
134 | override def write(data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22)): HBaseData = c1.write(data._1) ++ c2.write(data._2) ++ c3.write(data._3) ++ c4.write(data._4) ++ c5.write(data._5) ++ c6.write(data._6) ++ c7.write(data._7) ++ c8.write(data._8) ++ c9.write(data._9) ++ c10.write(data._10) ++ c11.write(data._11) ++ c12.write(data._12) ++ c13.write(data._13) ++ c14.write(data._14) ++ c15.write(data._15) ++ c16.write(data._16) ++ c17.write(data._17) ++ c18.write(data._18) ++ c19.write(data._19) ++ c20.write(data._20) ++ c21.write(data._21) ++ c22.write(data._22)
135 | }
136 |
137 | }
138 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/mysql/MysqlConf.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector.mysql
2 |
3 | import org.apache.spark.SparkContext
4 |
5 | /**
6 | * Author: xiaohei
7 | * Date: 2017/4/6
8 | * Email: xiaohei.info@gmail.com
9 | * Host: xiaohei.info
10 | */
11 |
12 | private[mysql] object ConfOption extends Enumeration {
13 | type ConfOption = Value
14 | val SPARK_HBASE_HOST = Value("spark.hbase.host")
15 | val SPARK_MYSQL_HOST = Value("spark.mysql.host")
16 | val SPARK_MYSQL_USERNAME = Value("spark.mysql.username")
17 | val SPARK_MYSQL_PASSWORD = Value("spark.mysql.password")
18 | val SPARK_MYSQL_PORT = Value("spark.mysql.port")
19 | val SPARK_MYSQL_DB = Value("spark.mysql.db")
20 | }
21 |
22 | case class MysqlConf private[mysql](
23 | private val conf: collection.mutable.Map[String, String] = collection.mutable.Map.empty
24 | ) {
25 | def getMysqlInfo(): (String, String, String) = {
26 | require(conf.nonEmpty, "mysql conf must be set")
27 | val host = conf.get(ConfOption.SPARK_MYSQL_HOST.toString)
28 | val port = conf.get(ConfOption.SPARK_MYSQL_PORT.toString)
29 | val db = conf.get(ConfOption.SPARK_MYSQL_DB.toString)
30 | val username = conf.get(ConfOption.SPARK_MYSQL_USERNAME.toString)
31 | val password = conf.get(ConfOption.SPARK_MYSQL_PASSWORD.toString)
32 | val connectStr = s"jdbc:mysql://${host.get}:${port.get}/${db.get}"
33 | require(
34 | host.isDefined &&
35 | port.isDefined &&
36 | db.isDefined &&
37 | username.isDefined &&
38 | password.isDefined,
39 | "host/port/dbname/username/password must be set in mysql conf!"
40 | )
41 | (connectStr, username.get, password.get)
42 | }
43 |
44 | //todo:大量连接的情况下是否有隐患
45 | def set(key: String, value: String): MysqlConf = {
46 | conf += key -> value
47 | this.copy(conf = conf)
48 | }
49 | }
50 |
51 | object MysqlConf {
52 | def createFromSpark(sc: SparkContext) = {
53 | val sparkConf = sc.getConf
54 | createConf(
55 | sparkConf.get(ConfOption.SPARK_MYSQL_HOST.toString),
56 | sparkConf.get(ConfOption.SPARK_MYSQL_USERNAME.toString),
57 | sparkConf.get(ConfOption.SPARK_MYSQL_PASSWORD.toString),
58 | sparkConf.get(ConfOption.SPARK_MYSQL_PORT.toString, "3306"),
59 | sparkConf.get(ConfOption.SPARK_MYSQL_DB.toString)
60 | )
61 | }
62 |
63 | def createConf(
64 | host: String,
65 | username: String,
66 | passwword: String,
67 | port: String,
68 | dbName: String
69 | ) = {
70 | val collectionConf = collection.mutable.Map[String, String](
71 | ConfOption.SPARK_MYSQL_HOST.toString -> host,
72 | ConfOption.SPARK_MYSQL_USERNAME.toString -> username,
73 | ConfOption.SPARK_MYSQL_PASSWORD.toString -> passwword,
74 | ConfOption.SPARK_MYSQL_PORT.toString -> port,
75 | ConfOption.SPARK_MYSQL_DB.toString -> dbName
76 | )
77 | MysqlConf(collectionConf)
78 | }
79 | }
80 |
81 | trait MysqlConfConversions extends Serializable{
82 | implicit def scToMysqlConf(sc: SparkContext): MysqlConf = MysqlConf.createFromSpark(sc)
83 | }
84 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/mysql/builder/reader/MysqlContext.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector.mysql.builder.reader
2 |
3 | import info.xiaohei.spark.connector.RelationalDbEntry
4 | import org.apache.spark.SparkContext
5 |
6 | /**
7 | * Author: xiaohei
8 | * Date: 2017/3/26
9 | * Email: xiaohei.info@gmail.com
10 | * Host: xiaohei.info
11 | */
12 | private[mysql] class MysqlContext() extends Serializable {
13 | def fromMysql[T](tableName: String): MysqlReaderBuilder[T] = new MysqlReaderBuilder[T](tableName)
14 | }
15 |
16 | trait MysqlContextConversions extends Serializable {
17 | implicit def scToMysqlContext(sc: SparkContext): MysqlContext = new MysqlContext()
18 |
19 | implicit def entryToMysqlContext(entry: RelationalDbEntry): MysqlContext = new MysqlContext()
20 | }
21 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/mysql/builder/reader/MysqlReaderBuilder.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector.mysql.builder.reader
2 |
3 | import java.sql.DriverManager
4 |
5 | import info.xiaohei.spark.connector.mysql.MysqlConf
6 | import info.xiaohei.spark.connector.mysql.transformer.mapper.DataMapper
7 |
8 | /**
9 | * Author: xiaohei
10 | * Date: 2017/3/26
11 | * Email: xiaohei.info@gmail.com
12 | * Host: xiaohei.info
13 | */
14 | case class MysqlReaderBuilder[T](
15 | private[mysql] val tableName: String,
16 | private[mysql] val columns: Iterable[String] = Seq.empty,
17 | private[mysql] val whereConditions: Option[String] = None
18 | ) {
19 |
20 | def select(cols: String*): MysqlReaderBuilder[T] = {
21 | require(this.columns.isEmpty, "Columns haven't been set")
22 | require(cols.nonEmpty, "Columns must by set,at least one")
23 |
24 | this.copy(columns = cols)
25 | }
26 |
27 | def select(cols: Iterable[String]): MysqlReaderBuilder[T] = {
28 | require(this.columns.isEmpty, "Columns haven't been set")
29 | require(cols.nonEmpty, "Columns must by set,at least one")
30 |
31 | this.copy(columns = cols)
32 | }
33 |
34 | def where(conditions: String): MysqlReaderBuilder[T] = {
35 | this.copy(whereConditions = Some(conditions))
36 | }
37 |
38 | }
39 |
40 | trait MysqlReaderBuilderConversions extends Serializable {
41 | implicit def readFromMysql[T](builder: MysqlReaderBuilder[T])
42 | (implicit mysqlConf: MysqlConf, dataMapper: DataMapper[T]): Option[Seq[T]] = {
43 | require(builder.columns.nonEmpty, "column names must be set!")
44 |
45 | val (connectStr, username, password) = mysqlConf.getMysqlInfo()
46 | val conn = DriverManager.getConnection(connectStr, username, password)
47 | var sql = s"select ${builder.columns.mkString(",")} from ${builder.tableName}"
48 | if (builder.whereConditions.nonEmpty) {
49 | sql += s" where ${builder.whereConditions}"
50 | }
51 | val ps = conn.prepareStatement(sql)
52 | Class.forName("com.mysql.jdbc.Driver")
53 | try {
54 | val resultList = new collection.mutable.ListBuffer[T]
55 | val resultSet = ps.executeQuery()
56 | while (resultSet.next()) {
57 | resultList += dataMapper.map(resultSet)
58 | }
59 | Some(resultList)
60 | }
61 | catch {
62 | case e: Exception => e.printStackTrace()
63 | None
64 | } finally {
65 | if (ps != null) {
66 | ps.close()
67 | }
68 | if (conn != null) {
69 | conn.close()
70 | }
71 | }
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/mysql/builder/writer/MysqlWriterBuilder.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector.mysql.builder.writer
2 |
3 | import java.sql.{DriverManager, PreparedStatement}
4 |
5 | import info.xiaohei.spark.connector.mysql.MysqlConf
6 | import info.xiaohei.spark.connector.mysql.transformer.executor.DataExecutor
7 |
8 | /**
9 | * Author: xiaohei
10 | * Date: 2017/3/26
11 | * Email: xiaohei.info@gmail.com
12 | * Host: xiaohei.info
13 | */
14 | case class MysqlWriterBuilder[C] private[mysql](
15 | private[mysql] val tableName: String,
16 | private[mysql] val collectionData: Iterable[C],
17 | //todo:t.productIterator.foreach{ i =>println("Value = " + i )}
18 | private[mysql] val columns: Iterable[String] = Seq.empty,
19 | //todo:完善
20 | private[mysql] val whereConditions: Option[String] = None
21 | ) {
22 | def insert(cols: String*) = {
23 | require(this.columns.isEmpty, "Columns haven't been set")
24 | require(cols.nonEmpty, "Columns must by set,at least one")
25 |
26 | this.copy(columns = cols)
27 | }
28 |
29 | def insert(cols: Iterable[String]) = {
30 | require(this.columns.isEmpty, "Columns haven't been set")
31 | require(cols.nonEmpty, "Columns must by set,at least one")
32 |
33 | this.copy(columns = cols)
34 | }
35 |
36 | def where(conditions: String) = {
37 | this.copy(whereConditions = Some(conditions))
38 | }
39 | }
40 |
41 | private[mysql] class MysqlWriterBuildMaker[C](collectionData: Iterable[C])
42 | extends Serializable {
43 | def toMysql(tableName: String): MysqlWriterBuilder[C] =
44 | MysqlWriterBuilder[C](tableName, collectionData)
45 | }
46 |
47 | private[mysql] class MysqlWriter[C](builder: MysqlWriterBuilder[C])(implicit mysqlConf: MysqlConf, dataExecutor: DataExecutor[C])
48 | extends Serializable {
49 | def save(): Unit = {
50 | require(builder.columns.nonEmpty, "column names must be set!")
51 |
52 | val (connectStr, username, password) = mysqlConf.getMysqlInfo()
53 | val conn = DriverManager.getConnection(connectStr, username, password)
54 |
55 | var placeholder = ""
56 | for (i <- 0 until builder.columns.size) placeholder += "?,"
57 | var sql = s"insert into ${builder.tableName}(${builder.columns.mkString(",")}) values(${placeholder.substring(0, placeholder.length - 1)})"
58 | if (builder.whereConditions.nonEmpty) {
59 | sql += s" where ${builder.whereConditions}"
60 | }
61 | val ps = conn.prepareStatement(sql)
62 | Class.forName("com.mysql.jdbc.Driver")
63 | try {
64 | builder.collectionData.foreach(x => dataExecutor.execute(ps, x))
65 | } catch {
66 | case e: Exception => e.printStackTrace()
67 | } finally {
68 | if (ps != null) {
69 | ps.close()
70 | }
71 | if (conn != null) {
72 | conn.close()
73 | }
74 | }
75 | }
76 | }
77 |
78 | trait MysqlWriterBuilderConversions extends Serializable {
79 | implicit def mysqlCollectionToBuildMaker[C](collectionData: Iterable[C])
80 | : MysqlWriterBuildMaker[C] = new MysqlWriterBuildMaker[C](collectionData)
81 |
82 | implicit def mysqlCollectionBuilderToWriter[C](builder: MysqlWriterBuilder[C])(implicit mysqlConf: MysqlConf, dataExecutor: DataExecutor[C])
83 | : MysqlWriter[C] = new MysqlWriter[C](builder)
84 | }
85 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/mysql/package.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector
2 |
3 | import info.xiaohei.spark.connector.mysql.builder.reader.{MysqlContextConversions, MysqlReaderBuilderConversions}
4 | import info.xiaohei.spark.connector.mysql.transformer.executor.DataExecutorConversions
5 | import info.xiaohei.spark.connector.mysql.transformer.mapper.DataMapperConversions
6 | import info.xiaohei.spark.connector.mysql.builder.writer.MysqlWriterBuilderConversions
7 |
8 | /**
9 | * Author: xiaohei
10 | * Date: 2017/4/6
11 | * Email: xiaohei.info@gmail.com
12 | * Host: xiaohei.info
13 | */
14 | package object mysql extends MysqlReaderBuilderConversions
15 | with MysqlWriterBuilderConversions
16 | with MysqlConfConversions
17 | with MysqlContextConversions
18 | with DataExecutorConversions
19 | with DataMapperConversions
20 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/mysql/transformer/executor/DataExecutor.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector.mysql.transformer.executor
2 |
3 | import java.sql.PreparedStatement
4 |
5 |
6 | /**
7 | * Author: xiaohei
8 | * Date: 2017/4/15
9 | * Email: xiaohei.info@gmail.com
10 | * Host: xiaohei.info
11 | */
12 | trait DataExecutor[T] extends Serializable {
13 | var index: Int = 1
14 |
15 | def prepare(ps: PreparedStatement, data: T): Unit
16 |
17 | def execute(ps: PreparedStatement, data: T): Unit = {
18 | prepare(ps, data)
19 | ps.executeUpdate()
20 | }
21 | }
22 |
23 | abstract class CustomDataExecutor[S, T](implicit dataExecutor: DataExecutor[T]) extends DataExecutor[S] {
24 |
25 | override def prepare(ps: PreparedStatement, data: S) = dataExecutor.prepare(ps, convert(data))
26 |
27 | def convert(data: S): T
28 | }
29 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/mysql/transformer/executor/DataExecutorConversions.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector.mysql.transformer.executor
2 |
3 | import java.sql.PreparedStatement
4 |
5 |
6 | /**
7 | * Author: xiaohei
8 | * Date: 2017/4/15
9 | * Email: xiaohei.info@gmail.com
10 | * Host: xiaohei.info
11 | */
12 | trait DataExecutorConversions extends Serializable {
13 | implicit def intExecutor: DataExecutor[Int] = new DataExecutor[Int] {
14 | override def prepare(ps: PreparedStatement, data: Int): Unit = {
15 | ps.setInt(index, data)
16 | }
17 | }
18 |
19 | implicit def longExecutor: DataExecutor[Long] = new DataExecutor[Long] {
20 | override def prepare(ps: PreparedStatement, data: Long): Unit = {
21 | ps.setLong(index, data)
22 | }
23 | }
24 |
25 | implicit def shortExecutor: DataExecutor[Short] = new DataExecutor[Short] {
26 | override def prepare(ps: PreparedStatement, data: Short): Unit = {
27 | ps.setShort(index, data)
28 | }
29 | }
30 |
31 | implicit def doubleExecutor: DataExecutor[Double] = new DataExecutor[Double] {
32 | override def prepare(ps: PreparedStatement, data: Double): Unit = {
33 | ps.setDouble(index, data)
34 | }
35 | }
36 |
37 | implicit def floatExecutor: DataExecutor[Float] = new DataExecutor[Float] {
38 | override def prepare(ps: PreparedStatement, data: Float): Unit = {
39 | ps.setFloat(index, data)
40 | }
41 | }
42 |
43 | implicit def booleanExecutor: DataExecutor[Boolean] = new DataExecutor[Boolean] {
44 | override def prepare(ps: PreparedStatement, data: Boolean): Unit = {
45 | ps.setBoolean(index, data)
46 | }
47 | }
48 |
49 | implicit def bigDecimalExecutor: DataExecutor[java.math.BigDecimal] = new DataExecutor[java.math.BigDecimal] {
50 | override def prepare(ps: PreparedStatement, data: java.math.BigDecimal): Unit = {
51 | ps.setBigDecimal(index, data)
52 | }
53 | }
54 |
55 | implicit def stringExecutor: DataExecutor[String] = new DataExecutor[String] {
56 | override def prepare(ps: PreparedStatement, data: String): Unit = {
57 | ps.setString(index, data)
58 | }
59 | }
60 |
61 | // Tuples
62 |
63 | //todo:index的设置方式
64 | implicit def tupleExecutor2[T1, T2](implicit e1: DataExecutor[T1], e2: DataExecutor[T2]): DataExecutor[(T1, T2)] = new DataExecutor[(T1, T2)] {
65 | override def prepare(ps: PreparedStatement, data: (T1, T2)): Unit = {
66 | e1.index = 1
67 | e1.prepare(ps, data._1)
68 | e2.index = 2
69 | e2.prepare(ps, data._2)
70 | }
71 | }
72 |
73 | implicit def tupleExecutor3[T1, T2, T3](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3]): DataExecutor[(T1, T2, T3)] = new DataExecutor[(T1, T2, T3)] {
74 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3)): Unit = {
75 | e1.index = 1
76 | e1.prepare(ps, data._1)
77 | e2.index = 2
78 | e2.prepare(ps, data._2)
79 | e3.index = 3
80 | e3.prepare(ps, data._3)
81 | }
82 | }
83 |
84 | implicit def tupleExecutor4[T1, T2, T3, T4](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4]): DataExecutor[(T1, T2, T3, T4)] = new DataExecutor[(T1, T2, T3, T4)] {
85 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4)): Unit = {
86 | e1.index = 1
87 | e1.prepare(ps, data._1)
88 | e2.index = 2
89 | e2.prepare(ps, data._2)
90 | e3.index = 3
91 | e3.prepare(ps, data._3)
92 | e4.index = 4
93 | e4.prepare(ps, data._4)
94 | }
95 | }
96 |
97 | implicit def tupleExecutor5[T1, T2, T3, T4, T5](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5]): DataExecutor[(T1, T2, T3, T4, T5)] = new DataExecutor[(T1, T2, T3, T4, T5)] {
98 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5)): Unit = {
99 | e1.index = 1
100 | e1.prepare(ps, data._1)
101 | e2.index = 2
102 | e2.prepare(ps, data._2)
103 | e3.index = 3
104 | e3.prepare(ps, data._3)
105 | e4.index = 4
106 | e4.prepare(ps, data._4)
107 | e5.index = 5
108 | e5.prepare(ps, data._5)
109 | }
110 | }
111 |
112 | implicit def tupleExecutor6[T1, T2, T3, T4, T5, T6](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6]): DataExecutor[(T1, T2, T3, T4, T5, T6)] = new DataExecutor[(T1, T2, T3, T4, T5, T6)] {
113 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6)): Unit = {
114 | e1.index = 1
115 | e1.prepare(ps, data._1)
116 | e2.index = 2
117 | e2.prepare(ps, data._2)
118 | e3.index = 3
119 | e3.prepare(ps, data._3)
120 | e4.index = 4
121 | e4.prepare(ps, data._4)
122 | e5.index = 5
123 | e5.prepare(ps, data._5)
124 | e6.index = 6
125 | e6.prepare(ps, data._6)
126 | }
127 | }
128 |
129 | implicit def tupleExecutor7[T1, T2, T3, T4, T5, T6, T7](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7)] {
130 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7)): Unit = {
131 | e1.index = 1
132 | e1.prepare(ps, data._1)
133 | e2.index = 2
134 | e2.prepare(ps, data._2)
135 | e3.index = 3
136 | e3.prepare(ps, data._3)
137 | e4.index = 4
138 | e4.prepare(ps, data._4)
139 | e5.index = 5
140 | e5.prepare(ps, data._5)
141 | e6.index = 6
142 | e6.prepare(ps, data._6)
143 | e7.index = 7
144 | e7.prepare(ps, data._7)
145 | }
146 | }
147 |
148 | implicit def tupleExecutor8[T1, T2, T3, T4, T5, T6, T7, T8](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8)] {
149 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8)): Unit = {
150 | e1.index = 1
151 | e1.prepare(ps, data._1)
152 | e2.index = 2
153 | e2.prepare(ps, data._2)
154 | e3.index = 3
155 | e3.prepare(ps, data._3)
156 | e4.index = 4
157 | e4.prepare(ps, data._4)
158 | e5.index = 5
159 | e5.prepare(ps, data._5)
160 | e6.index = 6
161 | e6.prepare(ps, data._6)
162 | e7.index = 7
163 | e7.prepare(ps, data._7)
164 | e8.index = 8
165 | e8.prepare(ps, data._8)
166 | }
167 | }
168 |
169 | implicit def tupleExecutor9[T1, T2, T3, T4, T5, T6, T7, T8, T9](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9)] {
170 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9)): Unit = {
171 | e1.index = 1
172 | e1.prepare(ps, data._1)
173 | e2.index = 2
174 | e2.prepare(ps, data._2)
175 | e3.index = 3
176 | e3.prepare(ps, data._3)
177 | e4.index = 4
178 | e4.prepare(ps, data._4)
179 | e5.index = 5
180 | e5.prepare(ps, data._5)
181 | e6.index = 6
182 | e6.prepare(ps, data._6)
183 | e7.index = 7
184 | e7.prepare(ps, data._7)
185 | e8.index = 8
186 | e8.prepare(ps, data._8)
187 | e9.index = 9
188 | e9.prepare(ps, data._9)
189 | }
190 | }
191 |
192 | implicit def tupleExecutor10[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9], e10: DataExecutor[T10]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10)] {
193 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10)): Unit = {
194 | e1.index = 1
195 | e1.prepare(ps, data._1)
196 | e2.index = 2
197 | e2.prepare(ps, data._2)
198 | e3.index = 3
199 | e3.prepare(ps, data._3)
200 | e4.index = 4
201 | e4.prepare(ps, data._4)
202 | e5.index = 5
203 | e5.prepare(ps, data._5)
204 | e6.index = 6
205 | e6.prepare(ps, data._6)
206 | e7.index = 7
207 | e7.prepare(ps, data._7)
208 | e8.index = 8
209 | e8.prepare(ps, data._8)
210 | e9.index = 9
211 | e9.prepare(ps, data._9)
212 | e10.index = 10
213 | e10.prepare(ps, data._10)
214 | }
215 | }
216 |
217 | implicit def tupleExecutor11[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9], e10: DataExecutor[T10], e11: DataExecutor[T11]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11)] {
218 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11)): Unit = {
219 | e1.index = 1
220 | e1.prepare(ps, data._1)
221 | e2.index = 2
222 | e2.prepare(ps, data._2)
223 | e3.index = 3
224 | e3.prepare(ps, data._3)
225 | e4.index = 4
226 | e4.prepare(ps, data._4)
227 | e5.index = 5
228 | e5.prepare(ps, data._5)
229 | e6.index = 6
230 | e6.prepare(ps, data._6)
231 | e7.index = 7
232 | e7.prepare(ps, data._7)
233 | e8.index = 8
234 | e8.prepare(ps, data._8)
235 | e9.index = 9
236 | e9.prepare(ps, data._9)
237 | e10.index = 10
238 | e10.prepare(ps, data._10)
239 | e11.index = 11
240 | e11.prepare(ps, data._11)
241 | }
242 | }
243 |
244 | implicit def tupleExecutor12[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9], e10: DataExecutor[T10], e11: DataExecutor[T11], e12: DataExecutor[T12]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12)] {
245 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12)): Unit = {
246 | e1.index = 1
247 | e1.prepare(ps, data._1)
248 | e2.index = 2
249 | e2.prepare(ps, data._2)
250 | e3.index = 3
251 | e3.prepare(ps, data._3)
252 | e4.index = 4
253 | e4.prepare(ps, data._4)
254 | e5.index = 5
255 | e5.prepare(ps, data._5)
256 | e6.index = 6
257 | e6.prepare(ps, data._6)
258 | e7.index = 7
259 | e7.prepare(ps, data._7)
260 | e8.index = 8
261 | e8.prepare(ps, data._8)
262 | e9.index = 9
263 | e9.prepare(ps, data._9)
264 | e10.index = 10
265 | e10.prepare(ps, data._10)
266 | e11.index = 11
267 | e11.prepare(ps, data._11)
268 | e12.index = 12
269 | e12.prepare(ps, data._12)
270 | }
271 | }
272 |
273 | implicit def tupleExecutor13[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9], e10: DataExecutor[T10], e11: DataExecutor[T11], e12: DataExecutor[T12], e13: DataExecutor[T13]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13)] {
274 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13)): Unit = {
275 | e1.index = 1
276 | e1.prepare(ps, data._1)
277 | e2.index = 2
278 | e2.prepare(ps, data._2)
279 | e3.index = 3
280 | e3.prepare(ps, data._3)
281 | e4.index = 4
282 | e4.prepare(ps, data._4)
283 | e5.index = 5
284 | e5.prepare(ps, data._5)
285 | e6.index = 6
286 | e6.prepare(ps, data._6)
287 | e7.index = 7
288 | e7.prepare(ps, data._7)
289 | e8.index = 8
290 | e8.prepare(ps, data._8)
291 | e9.index = 9
292 | e9.prepare(ps, data._9)
293 | e10.index = 10
294 | e10.prepare(ps, data._10)
295 | e11.index = 11
296 | e11.prepare(ps, data._11)
297 | e12.index = 12
298 | e12.prepare(ps, data._12)
299 | e13.index = 13
300 | e13.prepare(ps, data._13)
301 | }
302 | }
303 |
304 | implicit def tupleExecutor14[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9], e10: DataExecutor[T10], e11: DataExecutor[T11], e12: DataExecutor[T12], e13: DataExecutor[T13], e14: DataExecutor[T14]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14)] {
305 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14)): Unit = {
306 | e1.index = 1
307 | e1.prepare(ps, data._1)
308 | e2.index = 2
309 | e2.prepare(ps, data._2)
310 | e3.index = 3
311 | e3.prepare(ps, data._3)
312 | e4.index = 4
313 | e4.prepare(ps, data._4)
314 | e5.index = 5
315 | e5.prepare(ps, data._5)
316 | e6.index = 6
317 | e6.prepare(ps, data._6)
318 | e7.index = 7
319 | e7.prepare(ps, data._7)
320 | e8.index = 8
321 | e8.prepare(ps, data._8)
322 | e9.index = 9
323 | e9.prepare(ps, data._9)
324 | e10.index = 10
325 | e10.prepare(ps, data._10)
326 | e11.index = 11
327 | e11.prepare(ps, data._11)
328 | e12.index = 12
329 | e12.prepare(ps, data._12)
330 | e13.index = 13
331 | e13.prepare(ps, data._13)
332 | e14.index = 14
333 | e14.prepare(ps, data._14)
334 | }
335 | }
336 |
337 | implicit def tupleExecutor15[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9], e10: DataExecutor[T10], e11: DataExecutor[T11], e12: DataExecutor[T12], e13: DataExecutor[T13], e14: DataExecutor[T14], e15: DataExecutor[T15]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15)] {
338 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15)): Unit = {
339 | e1.index = 1
340 | e1.prepare(ps, data._1)
341 | e2.index = 2
342 | e2.prepare(ps, data._2)
343 | e3.index = 3
344 | e3.prepare(ps, data._3)
345 | e4.index = 4
346 | e4.prepare(ps, data._4)
347 | e5.index = 5
348 | e5.prepare(ps, data._5)
349 | e6.index = 6
350 | e6.prepare(ps, data._6)
351 | e7.index = 7
352 | e7.prepare(ps, data._7)
353 | e8.index = 8
354 | e8.prepare(ps, data._8)
355 | e9.index = 9
356 | e9.prepare(ps, data._9)
357 | e10.index = 10
358 | e10.prepare(ps, data._10)
359 | e11.index = 11
360 | e11.prepare(ps, data._11)
361 | e12.index = 12
362 | e12.prepare(ps, data._12)
363 | e13.index = 13
364 | e13.prepare(ps, data._13)
365 | e14.index = 14
366 | e14.prepare(ps, data._14)
367 | e15.index = 15
368 | e15.prepare(ps, data._15)
369 | }
370 | }
371 |
372 | implicit def tupleExecutor16[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9], e10: DataExecutor[T10], e11: DataExecutor[T11], e12: DataExecutor[T12], e13: DataExecutor[T13], e14: DataExecutor[T14], e15: DataExecutor[T15], e16: DataExecutor[T16]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16)] {
373 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16)): Unit = {
374 | e1.index = 1
375 | e1.prepare(ps, data._1)
376 | e2.index = 2
377 | e2.prepare(ps, data._2)
378 | e3.index = 3
379 | e3.prepare(ps, data._3)
380 | e4.index = 4
381 | e4.prepare(ps, data._4)
382 | e5.index = 5
383 | e5.prepare(ps, data._5)
384 | e6.index = 6
385 | e6.prepare(ps, data._6)
386 | e7.index = 7
387 | e7.prepare(ps, data._7)
388 | e8.index = 8
389 | e8.prepare(ps, data._8)
390 | e9.index = 9
391 | e9.prepare(ps, data._9)
392 | e10.index = 10
393 | e10.prepare(ps, data._10)
394 | e11.index = 11
395 | e11.prepare(ps, data._11)
396 | e12.index = 12
397 | e12.prepare(ps, data._12)
398 | e13.index = 13
399 | e13.prepare(ps, data._13)
400 | e14.index = 14
401 | e14.prepare(ps, data._14)
402 | e15.index = 15
403 | e15.prepare(ps, data._15)
404 | e16.index = 16
405 | e16.prepare(ps, data._16)
406 | }
407 | }
408 |
409 | implicit def tupleExecutor17[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9], e10: DataExecutor[T10], e11: DataExecutor[T11], e12: DataExecutor[T12], e13: DataExecutor[T13], e14: DataExecutor[T14], e15: DataExecutor[T15], e16: DataExecutor[T16], e17: DataExecutor[T17]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17)] {
410 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17)): Unit = {
411 | e1.index = 1
412 | e1.prepare(ps, data._1)
413 | e2.index = 2
414 | e2.prepare(ps, data._2)
415 | e3.index = 3
416 | e3.prepare(ps, data._3)
417 | e4.index = 4
418 | e4.prepare(ps, data._4)
419 | e5.index = 5
420 | e5.prepare(ps, data._5)
421 | e6.index = 6
422 | e6.prepare(ps, data._6)
423 | e7.index = 7
424 | e7.prepare(ps, data._7)
425 | e8.index = 8
426 | e8.prepare(ps, data._8)
427 | e9.index = 9
428 | e9.prepare(ps, data._9)
429 | e10.index = 10
430 | e10.prepare(ps, data._10)
431 | e11.index = 11
432 | e11.prepare(ps, data._11)
433 | e12.index = 12
434 | e12.prepare(ps, data._12)
435 | e13.index = 13
436 | e13.prepare(ps, data._13)
437 | e14.index = 14
438 | e14.prepare(ps, data._14)
439 | e15.index = 15
440 | e15.prepare(ps, data._15)
441 | e16.index = 16
442 | e16.prepare(ps, data._16)
443 | e17.index = 17
444 | e17.prepare(ps, data._17)
445 | }
446 | }
447 |
448 | implicit def tupleExecutor18[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9], e10: DataExecutor[T10], e11: DataExecutor[T11], e12: DataExecutor[T12], e13: DataExecutor[T13], e14: DataExecutor[T14], e15: DataExecutor[T15], e16: DataExecutor[T16], e17: DataExecutor[T17], e18: DataExecutor[T18]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18)] {
449 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18)): Unit = {
450 | e1.index = 1
451 | e1.prepare(ps, data._1)
452 | e2.index = 2
453 | e2.prepare(ps, data._2)
454 | e3.index = 3
455 | e3.prepare(ps, data._3)
456 | e4.index = 4
457 | e4.prepare(ps, data._4)
458 | e5.index = 5
459 | e5.prepare(ps, data._5)
460 | e6.index = 6
461 | e6.prepare(ps, data._6)
462 | e7.index = 7
463 | e7.prepare(ps, data._7)
464 | e8.index = 8
465 | e8.prepare(ps, data._8)
466 | e9.index = 9
467 | e9.prepare(ps, data._9)
468 | e10.index = 10
469 | e10.prepare(ps, data._10)
470 | e11.index = 11
471 | e11.prepare(ps, data._11)
472 | e12.index = 12
473 | e12.prepare(ps, data._12)
474 | e13.index = 13
475 | e13.prepare(ps, data._13)
476 | e14.index = 14
477 | e14.prepare(ps, data._14)
478 | e15.index = 15
479 | e15.prepare(ps, data._15)
480 | e16.index = 16
481 | e16.prepare(ps, data._16)
482 | e17.index = 17
483 | e17.prepare(ps, data._17)
484 | e18.index = 18
485 | e18.prepare(ps, data._18)
486 | }
487 | }
488 |
489 | implicit def tupleExecutor19[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9], e10: DataExecutor[T10], e11: DataExecutor[T11], e12: DataExecutor[T12], e13: DataExecutor[T13], e14: DataExecutor[T14], e15: DataExecutor[T15], e16: DataExecutor[T16], e17: DataExecutor[T17], e18: DataExecutor[T18], e19: DataExecutor[T19]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19)] {
490 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19)): Unit = {
491 | e1.index = 1
492 | e1.prepare(ps, data._1)
493 | e2.index = 2
494 | e2.prepare(ps, data._2)
495 | e3.index = 3
496 | e3.prepare(ps, data._3)
497 | e4.index = 4
498 | e4.prepare(ps, data._4)
499 | e5.index = 5
500 | e5.prepare(ps, data._5)
501 | e6.index = 6
502 | e6.prepare(ps, data._6)
503 | e7.index = 7
504 | e7.prepare(ps, data._7)
505 | e8.index = 8
506 | e8.prepare(ps, data._8)
507 | e9.index = 9
508 | e9.prepare(ps, data._9)
509 | e10.index = 10
510 | e10.prepare(ps, data._10)
511 | e11.index = 11
512 | e11.prepare(ps, data._11)
513 | e12.index = 12
514 | e12.prepare(ps, data._12)
515 | e13.index = 13
516 | e13.prepare(ps, data._13)
517 | e14.index = 14
518 | e14.prepare(ps, data._14)
519 | e15.index = 15
520 | e15.prepare(ps, data._15)
521 | e16.index = 16
522 | e16.prepare(ps, data._16)
523 | e17.index = 17
524 | e17.prepare(ps, data._17)
525 | e18.index = 18
526 | e18.prepare(ps, data._18)
527 | e19.index = 19
528 | e19.prepare(ps, data._19)
529 | }
530 | }
531 |
532 | implicit def tupleExecutor20[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9], e10: DataExecutor[T10], e11: DataExecutor[T11], e12: DataExecutor[T12], e13: DataExecutor[T13], e14: DataExecutor[T14], e15: DataExecutor[T15], e16: DataExecutor[T16], e17: DataExecutor[T17], e18: DataExecutor[T18], e19: DataExecutor[T19], e20: DataExecutor[T20]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20)] {
533 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20)): Unit = {
534 | e1.index = 1
535 | e1.prepare(ps, data._1)
536 | e2.index = 2
537 | e2.prepare(ps, data._2)
538 | e3.index = 3
539 | e3.prepare(ps, data._3)
540 | e4.index = 4
541 | e4.prepare(ps, data._4)
542 | e5.index = 5
543 | e5.prepare(ps, data._5)
544 | e6.index = 6
545 | e6.prepare(ps, data._6)
546 | e7.index = 7
547 | e7.prepare(ps, data._7)
548 | e8.index = 8
549 | e8.prepare(ps, data._8)
550 | e9.index = 9
551 | e9.prepare(ps, data._9)
552 | e10.index = 10
553 | e10.prepare(ps, data._10)
554 | e11.index = 11
555 | e11.prepare(ps, data._11)
556 | e12.index = 12
557 | e12.prepare(ps, data._12)
558 | e13.index = 13
559 | e13.prepare(ps, data._13)
560 | e14.index = 14
561 | e14.prepare(ps, data._14)
562 | e15.index = 15
563 | e15.prepare(ps, data._15)
564 | e16.index = 16
565 | e16.prepare(ps, data._16)
566 | e17.index = 17
567 | e17.prepare(ps, data._17)
568 | e18.index = 18
569 | e18.prepare(ps, data._18)
570 | e19.index = 19
571 | e19.prepare(ps, data._19)
572 | e20.index = 20
573 | e20.prepare(ps, data._20)
574 | }
575 | }
576 |
577 | implicit def tupleExecutor21[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9], e10: DataExecutor[T10], e11: DataExecutor[T11], e12: DataExecutor[T12], e13: DataExecutor[T13], e14: DataExecutor[T14], e15: DataExecutor[T15], e16: DataExecutor[T16], e17: DataExecutor[T17], e18: DataExecutor[T18], e19: DataExecutor[T19], e20: DataExecutor[T20], e21: DataExecutor[T21]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21)] {
578 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21)): Unit = {
579 | e1.index = 1
580 | e1.prepare(ps, data._1)
581 | e2.index = 2
582 | e2.prepare(ps, data._2)
583 | e3.index = 3
584 | e3.prepare(ps, data._3)
585 | e4.index = 4
586 | e4.prepare(ps, data._4)
587 | e5.index = 5
588 | e5.prepare(ps, data._5)
589 | e6.index = 6
590 | e6.prepare(ps, data._6)
591 | e7.index = 7
592 | e7.prepare(ps, data._7)
593 | e8.index = 8
594 | e8.prepare(ps, data._8)
595 | e9.index = 9
596 | e9.prepare(ps, data._9)
597 | e10.index = 10
598 | e10.prepare(ps, data._10)
599 | e11.index = 11
600 | e11.prepare(ps, data._11)
601 | e12.index = 12
602 | e12.prepare(ps, data._12)
603 | e13.index = 13
604 | e13.prepare(ps, data._13)
605 | e14.index = 14
606 | e14.prepare(ps, data._14)
607 | e15.index = 15
608 | e15.prepare(ps, data._15)
609 | e16.index = 16
610 | e16.prepare(ps, data._16)
611 | e17.index = 17
612 | e17.prepare(ps, data._17)
613 | e18.index = 18
614 | e18.prepare(ps, data._18)
615 | e19.index = 19
616 | e19.prepare(ps, data._19)
617 | e20.index = 20
618 | e20.prepare(ps, data._20)
619 | e21.index = 21
620 | e21.prepare(ps, data._21)
621 | }
622 | }
623 |
624 | implicit def tupleExecutor22[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22](implicit e1: DataExecutor[T1], e2: DataExecutor[T2], e3: DataExecutor[T3], e4: DataExecutor[T4], e5: DataExecutor[T5], e6: DataExecutor[T6], e7: DataExecutor[T7], e8: DataExecutor[T8], e9: DataExecutor[T9], e10: DataExecutor[T10], e11: DataExecutor[T11], e12: DataExecutor[T12], e13: DataExecutor[T13], e14: DataExecutor[T14], e15: DataExecutor[T15], e16: DataExecutor[T16], e17: DataExecutor[T17], e18: DataExecutor[T18], e19: DataExecutor[T19], e20: DataExecutor[T20], e21: DataExecutor[T21], e22: DataExecutor[T22]): DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22)] = new DataExecutor[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22)] {
625 | override def prepare(ps: PreparedStatement, data: (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22)): Unit = {
626 | e1.index = 1
627 | e1.prepare(ps, data._1)
628 | e2.index = 2
629 | e2.prepare(ps, data._2)
630 | e3.index = 3
631 | e3.prepare(ps, data._3)
632 | e4.index = 4
633 | e4.prepare(ps, data._4)
634 | e5.index = 5
635 | e5.prepare(ps, data._5)
636 | e6.index = 6
637 | e6.prepare(ps, data._6)
638 | e7.index = 7
639 | e7.prepare(ps, data._7)
640 | e8.index = 8
641 | e8.prepare(ps, data._8)
642 | e9.index = 9
643 | e9.prepare(ps, data._9)
644 | e10.index = 10
645 | e10.prepare(ps, data._10)
646 | e11.index = 11
647 | e11.prepare(ps, data._11)
648 | e12.index = 12
649 | e12.prepare(ps, data._12)
650 | e13.index = 13
651 | e13.prepare(ps, data._13)
652 | e14.index = 14
653 | e14.prepare(ps, data._14)
654 | e15.index = 15
655 | e15.prepare(ps, data._15)
656 | e16.index = 16
657 | e16.prepare(ps, data._16)
658 | e17.index = 17
659 | e17.prepare(ps, data._17)
660 | e18.index = 18
661 | e18.prepare(ps, data._18)
662 | e19.index = 19
663 | e19.prepare(ps, data._19)
664 | e20.index = 20
665 | e20.prepare(ps, data._20)
666 | e21.index = 21
667 | e21.prepare(ps, data._21)
668 | e22.index = 22
669 | e22.prepare(ps, data._22)
670 | }
671 | }
672 | }
673 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/mysql/transformer/mapper/DataMapper.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector.mysql.transformer.mapper
2 |
3 | import java.sql.ResultSet
4 |
5 | /**
6 | * Author: xiaohei
7 | * Date: 2017/4/16
8 | * Email: xiaohei.info@gmail.com
9 | * Host: xiaohei.info
10 | */
11 | trait DataMapper[T] extends Serializable {
12 | var index: Int = 1
13 |
14 | def map(resultSet: ResultSet): T
15 | }
16 |
17 | abstract class CustomDataMapper[S, T](implicit dataMapper: DataMapper[S]) extends DataMapper[T] {
18 |
19 | override def map(resultSet: ResultSet): T = convert(dataMapper.map(resultSet))
20 |
21 | def convert(data: S): T
22 | }
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/connector/mysql/transformer/mapper/DataMapperConversions.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.connector.mysql.transformer.mapper
2 |
3 | import java.math.BigDecimal
4 | import java.sql.ResultSet
5 |
6 | /**
7 | * Author: xiaohei
8 | * Date: 2017/4/16
9 | * Email: xiaohei.info@gmail.com
10 | * Host: xiaohei.info
11 | */
12 | trait DataMapperConversions extends Serializable {
13 | implicit def intMapper: DataMapper[Int] = new DataMapper[Int] {
14 | override def map(resultSet: ResultSet): Int = {
15 | resultSet.getInt(index)
16 | }
17 | }
18 |
19 | implicit def longMapper: DataMapper[Long] = new DataMapper[Long] {
20 | override def map(resultSet: ResultSet): Long = {
21 | resultSet.getLong(index)
22 | }
23 | }
24 |
25 | implicit def shortMapper: DataMapper[Short] = new DataMapper[Short] {
26 | override def map(resultSet: ResultSet): Short = {
27 | resultSet.getShort(index)
28 | }
29 | }
30 |
31 | implicit def doubleMapper: DataMapper[Double] = new DataMapper[Double] {
32 | override def map(resultSet: ResultSet): Double = {
33 | resultSet.getDouble(index)
34 | }
35 | }
36 |
37 | implicit def floatMapper: DataMapper[Float] = new DataMapper[Float] {
38 | override def map(resultSet: ResultSet): Float = {
39 | resultSet.getFloat(index)
40 | }
41 | }
42 |
43 | implicit def booleanMapper: DataMapper[Boolean] = new DataMapper[Boolean] {
44 | override def map(resultSet: ResultSet): Boolean = {
45 | resultSet.getBoolean(index)
46 | }
47 | }
48 |
49 | implicit def bigDecimalMapper: DataMapper[java.math.BigDecimal] = new DataMapper[java.math.BigDecimal] {
50 | override def map(resultSet: ResultSet): BigDecimal = {
51 | resultSet.getBigDecimal(index)
52 | }
53 | }
54 |
55 | implicit def stringMapper: DataMapper[String] = new DataMapper[String] {
56 | override def map(resultSet: ResultSet): String = {
57 | resultSet.getString(index)
58 | }
59 | }
60 |
61 | implicit def tupleMapper2[T1, T2](implicit m1: DataMapper[T1], m2: DataMapper[T2]): DataMapper[(T1, T2)] = new DataMapper[(T1, T2)] {
62 | override def map(resultSet: ResultSet): (T1, T2) = {
63 | m1.index = 1
64 | m2.index = 2
65 | (m1.map(resultSet), m2.map(resultSet))
66 | }
67 | }
68 |
69 | implicit def tupleMapper3[T1, T2, T3](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3]): DataMapper[(T1, T2, T3)] = new DataMapper[(T1, T2, T3)] {
70 | override def map(resultSet: ResultSet): (T1, T2, T3) = {
71 | m1.index = 1
72 | m2.index = 2
73 | m3.index = 3
74 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet))
75 | }
76 | }
77 |
78 |
79 | implicit def tupleMapper4[T1, T2, T3, T4](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4]): DataMapper[(T1, T2, T3, T4)] = new DataMapper[(T1, T2, T3, T4)] {
80 | override def map(resultSet: ResultSet): (T1, T2, T3, T4) = {
81 | m1.index = 1
82 | m2.index = 2
83 | m3.index = 3
84 | m4.index = 4
85 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet))
86 | }
87 | }
88 |
89 | implicit def tupleMapper5[T1, T2, T3, T4, T5](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5]): DataMapper[(T1, T2, T3, T4, T5)] = new DataMapper[(T1, T2, T3, T4, T5)] {
90 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5) = {
91 | m1.index = 1
92 | m2.index = 2
93 | m3.index = 3
94 | m4.index = 4
95 | m5.index = 5
96 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet))
97 | }
98 | }
99 |
100 | implicit def tupleMapper6[T1, T2, T3, T4, T5, T6](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6]): DataMapper[(T1, T2, T3, T4, T5, T6)] = new DataMapper[(T1, T2, T3, T4, T5, T6)] {
101 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6) = {
102 | m1.index = 1
103 | m2.index = 2
104 | m3.index = 3
105 | m4.index = 4
106 | m5.index = 5
107 | m6.index = 6
108 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet))
109 | }
110 | }
111 |
112 | implicit def tupleMapper7[T1, T2, T3, T4, T5, T6, T7](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7]): DataMapper[(T1, T2, T3, T4, T5, T6, T7)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7)] {
113 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7) = {
114 | m1.index = 1
115 | m2.index = 2
116 | m3.index = 3
117 | m4.index = 4
118 | m5.index = 5
119 | m6.index = 6
120 | m7.index = 7
121 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet))
122 | }
123 | }
124 |
125 | implicit def tupleMapper8[T1, T2, T3, T4, T5, T6, T7, T8](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8)] {
126 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8) = {
127 | m1.index = 1
128 | m2.index = 2
129 | m3.index = 3
130 | m4.index = 4
131 | m5.index = 5
132 | m6.index = 6
133 | m7.index = 7
134 | m8.index = 8
135 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet))
136 | }
137 | }
138 |
139 | implicit def tupleMapper9[T1, T2, T3, T4, T5, T6, T7, T8, T9](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9)] {
140 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9) = {
141 | m1.index = 1
142 | m2.index = 2
143 | m3.index = 3
144 | m4.index = 4
145 | m5.index = 5
146 | m6.index = 6
147 | m7.index = 7
148 | m8.index = 8
149 | m9.index = 9
150 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet))
151 | }
152 | }
153 |
154 | implicit def tupleMapper10[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9], m10: DataMapper[T10]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10)] {
155 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10) = {
156 | m1.index = 1
157 | m2.index = 2
158 | m3.index = 3
159 | m4.index = 4
160 | m5.index = 5
161 | m6.index = 6
162 | m7.index = 7
163 | m8.index = 8
164 | m9.index = 9
165 | m10.index = 10
166 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet), m10.map(resultSet))
167 | }
168 | }
169 |
170 | implicit def tupleMapper11[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9], m10: DataMapper[T10], m11: DataMapper[T11]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11)] {
171 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11) = {
172 | m1.index = 1
173 | m2.index = 2
174 | m3.index = 3
175 | m4.index = 4
176 | m5.index = 5
177 | m6.index = 6
178 | m7.index = 7
179 | m8.index = 8
180 | m9.index = 9
181 | m10.index = 10
182 | m11.index = 11
183 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet), m10.map(resultSet), m11.map(resultSet))
184 | }
185 | }
186 |
187 | implicit def tupleMapper12[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9], m10: DataMapper[T10], m11: DataMapper[T11], m12: DataMapper[T12]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12)] {
188 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12) = {
189 | m1.index = 1
190 | m2.index = 2
191 | m3.index = 3
192 | m4.index = 4
193 | m5.index = 5
194 | m6.index = 6
195 | m7.index = 7
196 | m8.index = 8
197 | m9.index = 9
198 | m10.index = 10
199 | m11.index = 11
200 | m12.index = 12
201 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet), m10.map(resultSet), m11.map(resultSet), m12.map(resultSet))
202 | }
203 | }
204 |
205 | implicit def tupleMapper13[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9], m10: DataMapper[T10], m11: DataMapper[T11], m12: DataMapper[T12], m13: DataMapper[T13]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13)] {
206 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13) = {
207 | m1.index = 1
208 | m2.index = 2
209 | m3.index = 3
210 | m4.index = 4
211 | m5.index = 5
212 | m6.index = 6
213 | m7.index = 7
214 | m8.index = 8
215 | m9.index = 9
216 | m10.index = 10
217 | m11.index = 11
218 | m12.index = 12
219 | m13.index = 13
220 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet), m10.map(resultSet), m11.map(resultSet), m12.map(resultSet), m13.map(resultSet))
221 | }
222 | }
223 |
224 | implicit def tupleMapper14[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9], m10: DataMapper[T10], m11: DataMapper[T11], m12: DataMapper[T12], m13: DataMapper[T13], m14: DataMapper[T14]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14)] {
225 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14) = {
226 | m1.index = 1
227 | m2.index = 2
228 | m3.index = 3
229 | m4.index = 4
230 | m5.index = 5
231 | m6.index = 6
232 | m7.index = 7
233 | m8.index = 8
234 | m9.index = 9
235 | m10.index = 10
236 | m11.index = 11
237 | m12.index = 12
238 | m13.index = 13
239 | m14.index = 14
240 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet), m10.map(resultSet), m11.map(resultSet), m12.map(resultSet), m13.map(resultSet), m14.map(resultSet))
241 | }
242 | }
243 |
244 | implicit def tupleMapper15[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9], m10: DataMapper[T10], m11: DataMapper[T11], m12: DataMapper[T12], m13: DataMapper[T13], m14: DataMapper[T14], m15: DataMapper[T15]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15)] {
245 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15) = {
246 | m1.index = 1
247 | m2.index = 2
248 | m3.index = 3
249 | m4.index = 4
250 | m5.index = 5
251 | m6.index = 6
252 | m7.index = 7
253 | m8.index = 8
254 | m9.index = 9
255 | m10.index = 10
256 | m11.index = 11
257 | m12.index = 12
258 | m13.index = 13
259 | m14.index = 14
260 | m15.index = 15
261 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet), m10.map(resultSet), m11.map(resultSet), m12.map(resultSet), m13.map(resultSet), m14.map(resultSet), m15.map(resultSet))
262 | }
263 | }
264 |
265 | implicit def tupleMapper16[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9], m10: DataMapper[T10], m11: DataMapper[T11], m12: DataMapper[T12], m13: DataMapper[T13], m14: DataMapper[T14], m15: DataMapper[T15], m16: DataMapper[T16]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16)] {
266 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16) = {
267 | m1.index = 1
268 | m2.index = 2
269 | m3.index = 3
270 | m4.index = 4
271 | m5.index = 5
272 | m6.index = 6
273 | m7.index = 7
274 | m8.index = 8
275 | m9.index = 9
276 | m10.index = 10
277 | m11.index = 11
278 | m12.index = 12
279 | m13.index = 13
280 | m14.index = 14
281 | m15.index = 15
282 | m16.index = 16
283 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet), m10.map(resultSet), m11.map(resultSet), m12.map(resultSet), m13.map(resultSet), m14.map(resultSet), m15.map(resultSet), m16.map(resultSet))
284 | }
285 | }
286 |
287 | implicit def tupleMapper17[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9], m10: DataMapper[T10], m11: DataMapper[T11], m12: DataMapper[T12], m13: DataMapper[T13], m14: DataMapper[T14], m15: DataMapper[T15], m16: DataMapper[T16], m17: DataMapper[T17]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17)] {
288 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17) = {
289 | m1.index = 1
290 | m2.index = 2
291 | m3.index = 3
292 | m4.index = 4
293 | m5.index = 5
294 | m6.index = 6
295 | m7.index = 7
296 | m8.index = 8
297 | m9.index = 9
298 | m10.index = 10
299 | m11.index = 11
300 | m12.index = 12
301 | m13.index = 13
302 | m14.index = 14
303 | m15.index = 15
304 | m16.index = 16
305 | m17.index = 17
306 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet), m10.map(resultSet), m11.map(resultSet), m12.map(resultSet), m13.map(resultSet), m14.map(resultSet), m15.map(resultSet), m16.map(resultSet), m17.map(resultSet))
307 | }
308 | }
309 |
310 | implicit def tupleMapper18[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9], m10: DataMapper[T10], m11: DataMapper[T11], m12: DataMapper[T12], m13: DataMapper[T13], m14: DataMapper[T14], m15: DataMapper[T15], m16: DataMapper[T16], m17: DataMapper[T17], m18: DataMapper[T18]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18)] {
311 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18) = {
312 | m1.index = 1
313 | m2.index = 2
314 | m3.index = 3
315 | m4.index = 4
316 | m5.index = 5
317 | m6.index = 6
318 | m7.index = 7
319 | m8.index = 8
320 | m9.index = 9
321 | m10.index = 10
322 | m11.index = 11
323 | m12.index = 12
324 | m13.index = 13
325 | m14.index = 14
326 | m15.index = 15
327 | m16.index = 16
328 | m17.index = 17
329 | m18.index = 18
330 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet), m10.map(resultSet), m11.map(resultSet), m12.map(resultSet), m13.map(resultSet), m14.map(resultSet), m15.map(resultSet), m16.map(resultSet), m17.map(resultSet), m18.map(resultSet))
331 | }
332 | }
333 |
334 | implicit def tupleMapper19[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9], m10: DataMapper[T10], m11: DataMapper[T11], m12: DataMapper[T12], m13: DataMapper[T13], m14: DataMapper[T14], m15: DataMapper[T15], m16: DataMapper[T16], m17: DataMapper[T17], m18: DataMapper[T18], m19: DataMapper[T19]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19)] {
335 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19) = {
336 | m1.index = 1
337 | m2.index = 2
338 | m3.index = 3
339 | m4.index = 4
340 | m5.index = 5
341 | m6.index = 6
342 | m7.index = 7
343 | m8.index = 8
344 | m9.index = 9
345 | m10.index = 10
346 | m11.index = 11
347 | m12.index = 12
348 | m13.index = 13
349 | m14.index = 14
350 | m15.index = 15
351 | m16.index = 16
352 | m17.index = 17
353 | m18.index = 18
354 | m19.index = 19
355 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet), m10.map(resultSet), m11.map(resultSet), m12.map(resultSet), m13.map(resultSet), m14.map(resultSet), m15.map(resultSet), m16.map(resultSet), m17.map(resultSet), m18.map(resultSet), m19.map(resultSet))
356 | }
357 | }
358 |
359 | implicit def tupleMapper20[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9], m10: DataMapper[T10], m11: DataMapper[T11], m12: DataMapper[T12], m13: DataMapper[T13], m14: DataMapper[T14], m15: DataMapper[T15], m16: DataMapper[T16], m17: DataMapper[T17], m18: DataMapper[T18], m19: DataMapper[T19], m20: DataMapper[T20]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20)] {
360 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20) = {
361 | m1.index = 1
362 | m2.index = 2
363 | m3.index = 3
364 | m4.index = 4
365 | m5.index = 5
366 | m6.index = 6
367 | m7.index = 7
368 | m8.index = 8
369 | m9.index = 9
370 | m10.index = 10
371 | m11.index = 11
372 | m12.index = 12
373 | m13.index = 13
374 | m14.index = 14
375 | m15.index = 15
376 | m16.index = 16
377 | m17.index = 17
378 | m18.index = 18
379 | m19.index = 19
380 | m20.index = 20
381 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet), m10.map(resultSet), m11.map(resultSet), m12.map(resultSet), m13.map(resultSet), m14.map(resultSet), m15.map(resultSet), m16.map(resultSet), m17.map(resultSet), m18.map(resultSet), m19.map(resultSet), m20.map(resultSet))
382 | }
383 | }
384 |
385 | implicit def tupleMapper21[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9], m10: DataMapper[T10], m11: DataMapper[T11], m12: DataMapper[T12], m13: DataMapper[T13], m14: DataMapper[T14], m15: DataMapper[T15], m16: DataMapper[T16], m17: DataMapper[T17], m18: DataMapper[T18], m19: DataMapper[T19], m20: DataMapper[T20], m21: DataMapper[T21]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21)] {
386 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21) = {
387 | m1.index = 1
388 | m2.index = 2
389 | m3.index = 3
390 | m4.index = 4
391 | m5.index = 5
392 | m6.index = 6
393 | m7.index = 7
394 | m8.index = 8
395 | m9.index = 9
396 | m10.index = 10
397 | m11.index = 11
398 | m12.index = 12
399 | m13.index = 13
400 | m14.index = 14
401 | m15.index = 15
402 | m16.index = 16
403 | m17.index = 17
404 | m18.index = 18
405 | m19.index = 19
406 | m20.index = 20
407 | m21.index = 21
408 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet), m10.map(resultSet), m11.map(resultSet), m12.map(resultSet), m13.map(resultSet), m14.map(resultSet), m15.map(resultSet), m16.map(resultSet), m17.map(resultSet), m18.map(resultSet), m19.map(resultSet), m20.map(resultSet), m21.map(resultSet))
409 | }
410 | }
411 |
412 | implicit def tupleMapper22[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22](implicit m1: DataMapper[T1], m2: DataMapper[T2], m3: DataMapper[T3], m4: DataMapper[T4], m5: DataMapper[T5], m6: DataMapper[T6], m7: DataMapper[T7], m8: DataMapper[T8], m9: DataMapper[T9], m10: DataMapper[T10], m11: DataMapper[T11], m12: DataMapper[T12], m13: DataMapper[T13], m14: DataMapper[T14], m15: DataMapper[T15], m16: DataMapper[T16], m17: DataMapper[T17], m18: DataMapper[T18], m19: DataMapper[T19], m20: DataMapper[T20], m21: DataMapper[T21], m22: DataMapper[T22]): DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22)] = new DataMapper[(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22)] {
413 | override def map(resultSet: ResultSet): (T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22) = {
414 | m1.index = 1
415 | m2.index = 2
416 | m3.index = 3
417 | m4.index = 4
418 | m5.index = 5
419 | m6.index = 6
420 | m7.index = 7
421 | m8.index = 8
422 | m9.index = 9
423 | m10.index = 10
424 | m11.index = 11
425 | m12.index = 12
426 | m13.index = 13
427 | m14.index = 14
428 | m15.index = 15
429 | m16.index = 16
430 | m17.index = 17
431 | m18.index = 18
432 | m19.index = 19
433 | m20.index = 20
434 | m21.index = 21
435 | m22.index = 22
436 | (m1.map(resultSet), m2.map(resultSet), m3.map(resultSet), m4.map(resultSet), m5.map(resultSet), m6.map(resultSet), m7.map(resultSet), m8.map(resultSet), m9.map(resultSet), m10.map(resultSet), m11.map(resultSet), m12.map(resultSet), m13.map(resultSet), m14.map(resultSet), m15.map(resultSet), m16.map(resultSet), m17.map(resultSet), m18.map(resultSet), m19.map(resultSet), m20.map(resultSet), m21.map(resultSet), m22.map(resultSet))
437 | }
438 | }
439 | }
440 |
--------------------------------------------------------------------------------
/src/main/scala/info/xiaohei/spark/test/Test.scala:
--------------------------------------------------------------------------------
1 | package info.xiaohei.spark.test
2 |
3 | /**
4 | * Author: xiaohei
5 | * Date: 2017/5/10
6 | * Email: xiaohei.info@gmail.com
7 | * Host: xiaohei.info
8 | */
9 | object Test {
10 |
11 | }
12 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/rdd/HBaseKerberosUtil.scala:
--------------------------------------------------------------------------------
1 | package org.apache.spark.rdd
2 |
3 | import java.io.IOException
4 | import java.security.PrivilegedExceptionAction
5 |
6 | import org.apache.hadoop.security.UserGroupInformation
7 | import org.apache.spark.util.SerializableConfiguration
8 |
9 | /**
10 | * Author: xiaohei
11 | * Date: 2017/7/7
12 | * Email: xiaohei.info@gmail.com
13 | * Host: xiaohei.info
14 | */
15 | object HBaseKerberosUtil {
16 | @throws[IOException]
17 | def ugiDoAs[A](conf: SerializableConfiguration, principle: String, keytab: String, func: () => A): A = {
18 | UserGroupInformation.setConfiguration(conf.value)
19 | val ugi: UserGroupInformation = UserGroupInformation
20 | .loginUserFromKeytabAndReturnUGI(principle, keytab)
21 | UserGroupInformation.setLoginUser(ugi)
22 | ugi.checkTGTAndReloginFromKeytab()
23 | ugi.doAs(new PrivilegedExceptionAction[A] {
24 | def run: A = {
25 | func()
26 | }
27 | })
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/rdd/HBaseScanRDD.scala:
--------------------------------------------------------------------------------
1 | package org.apache.spark.rdd
2 |
3 | import java.io.EOFException
4 | import java.text.SimpleDateFormat
5 | import java.util.Date
6 |
7 | import org.apache.hadoop.conf.{Configurable, Configuration}
8 | import org.apache.hadoop.io.Writable
9 | import org.apache.hadoop.mapred.JobConf
10 | import org.apache.hadoop.mapreduce._
11 | import org.apache.hadoop.mapreduce.lib.input.{CombineFileSplit, FileSplit}
12 | import org.apache.spark._
13 | import org.apache.spark.annotation.DeveloperApi
14 | import org.apache.spark.deploy.SparkHadoopUtil
15 | import org.apache.spark.executor.{DataReadMethod, InputMetrics}
16 | import org.apache.spark.mapreduce.SparkHadoopMapReduceUtil
17 | import org.apache.spark.rdd.NewHadoopRDD.NewHadoopMapPartitionsWithSplitRDD
18 | import org.apache.spark.storage.StorageLevel
19 | import org.apache.spark.util.{SerializableConfiguration, ShutdownHookManager}
20 |
21 | import scala.reflect.ClassTag
22 |
23 | class HBaseScanRDD[K, V](
24 | principle: String,
25 | keytab: String,
26 | sc: SparkContext,
27 | inputFormatClass: Class[_ <: InputFormat[K, V]],
28 | keyClass: Class[K],
29 | valueClass: Class[V],
30 | @transient private val _conf: Configuration)
31 | extends RDD[(K, V)](sc, Nil)
32 | with SparkHadoopMapReduceUtil
33 | with Logging {
34 | // A Hadoop Configuration can be about 10 KB, which is pretty big, so broadcast it
35 | private val confBroadcast = sc.broadcast(new SerializableConfiguration(_conf))
36 | // private val serializableConf = new SerializableWritable(_conf)
37 | private val jobTrackerId: String = {
38 | val formatter = new SimpleDateFormat("yyyyMMddHHmm")
39 | formatter.format(new Date())
40 | }
41 | @transient protected val jobId = new JobID(jobTrackerId, id)
42 | private val shouldCloneJobConf = sparkContext.conf.getBoolean("spark.hadoop.cloneConf", defaultValue = false)
43 | private val ignoreCorruptFiles =
44 | sparkContext.conf.getBoolean("spark.files.ignoreCorruptFiles", defaultValue = true)
45 |
46 | def getConf: Configuration = {
47 | val conf: Configuration = confBroadcast.value.value
48 | if (shouldCloneJobConf) {
49 | // Hadoop Configuration objects are not thread-safe, which may lead to various problems if
50 | // one job modifies a configuration while another reads it (SPARK-2546, SPARK-10611). This
51 | // problem occurs somewhat rarely because most jobs treat the configuration as though it's
52 | // immutable. One solution, implemented here, is to clone the Configuration object.
53 | // Unfortunately, this clone can be very expensive. To avoid unexpected performance
54 | // regressions for workloads and Hadoop versions that do not suffer from these thread-safety
55 | // issues, this cloning is disabled by default.
56 | NewHadoopRDD.CONFIGURATION_INSTANTIATION_LOCK.synchronized {
57 | logDebug("Cloning Hadoop Configuration")
58 | // The Configuration passed in is actually a JobConf and possibly contains credentials.
59 | // To keep those credentials properly we have to create a new JobConf not a Configuration.
60 | if (conf.isInstanceOf[JobConf]) {
61 | new JobConf(conf)
62 | } else {
63 | new Configuration(conf)
64 | }
65 | }
66 | } else {
67 | conf
68 | }
69 | }
70 |
71 | override def getPartitions: Array[Partition] = {
72 | val inputFormat = inputFormatClass.newInstance
73 | inputFormat match {
74 | case configurable: Configurable =>
75 | configurable.setConf(_conf)
76 | case _ =>
77 | }
78 | val jobContext = newJobContext(_conf, jobId)
79 | val rawSplits = HBaseKerberosUtil.ugiDoAs(confBroadcast.value, principle, keytab, () => {
80 | inputFormat.getSplits(jobContext).toArray
81 | }: Array[Object])
82 | val result = new Array[Partition](rawSplits.length)
83 | for (i <- rawSplits.indices) {
84 | result(i) = new NewHadoopPartition(id, i, rawSplits(i).asInstanceOf[InputSplit with Writable])
85 | }
86 | result
87 | }
88 |
89 | override def compute(theSplit: Partition, context: TaskContext): InterruptibleIterator[(K, V)] = {
90 | val iter = new Iterator[(K, V)] {
91 | val split: NewHadoopPartition = theSplit.asInstanceOf[NewHadoopPartition]
92 | logInfo("Input split: " + split.serializableHadoopSplit)
93 | val conf: Configuration = getConf
94 | val inputMetrics: InputMetrics = context.taskMetrics
95 | .getInputMetricsForReadMethod(DataReadMethod.Hadoop)
96 | // Sets the thread local variable for the file's name
97 | split.serializableHadoopSplit.value match {
98 | case fs: FileSplit => SqlNewHadoopRDDState.setInputFileName(fs.getPath.toString)
99 | case _ => SqlNewHadoopRDDState.unsetInputFileName()
100 | }
101 | // Find a function that will return the FileSystem bytes read by this thread. Do this before
102 | // creating RecordReader, because RecordReader's constructor might read some bytes
103 | val bytesReadCallback: Option[() => Long] = inputMetrics.bytesReadCallback.orElse {
104 | split.serializableHadoopSplit.value match {
105 | case _: FileSplit | _: CombineFileSplit =>
106 | SparkHadoopUtil.get.getFSBytesReadOnThreadCallback()
107 | case _ => None
108 | }
109 | }
110 | inputMetrics.setBytesReadCallback(f = bytesReadCallback)
111 | val format: InputFormat[K, V] = inputFormatClass.newInstance
112 | format match {
113 | case configurable: Configurable =>
114 | configurable.setConf(conf)
115 | case _ =>
116 | }
117 | val attemptId: TaskAttemptID = newTaskAttemptID(jobTrackerId, id, isMap = true, split.index, 0)
118 | val hadoopAttemptContext: TaskAttemptContext = newTaskAttemptContext(conf, attemptId)
119 | private var reader = HBaseKerberosUtil.ugiDoAs(confBroadcast.value, principle, keytab, () => {
120 | val _reader = format.createRecordReader(
121 | split.serializableHadoopSplit.value, hadoopAttemptContext)
122 | _reader.initialize(split.serializableHadoopSplit.value, hadoopAttemptContext)
123 | _reader
124 | }: RecordReader[K, V])
125 | // Register an on-task-completion callback to close the input stream.
126 | context.addTaskCompletionListener(context => close())
127 | var havePair = false
128 | var finished = false
129 | var recordsSinceMetricsUpdate = 0
130 |
131 | override def hasNext: Boolean = {
132 | if (!finished && !havePair) {
133 | try {
134 | finished = !reader.nextKeyValue
135 | } catch {
136 | case _: EOFException if ignoreCorruptFiles => finished = true
137 | }
138 | if (finished) {
139 | // Close and release the reader here; close() will also be called when the task
140 | // completes, but for tasks that read from many files, it helps to release the
141 | // resources early.
142 | close()
143 | }
144 | havePair = !finished
145 | }
146 | !finished
147 | }
148 |
149 | override def next(): (K, V) = {
150 | if (!hasNext) {
151 | throw new java.util.NoSuchElementException("End of stream")
152 | }
153 | havePair = false
154 | if (!finished) {
155 | inputMetrics.incRecordsRead(1)
156 | }
157 | (reader.getCurrentKey, reader.getCurrentValue)
158 | }
159 |
160 | private def close() {
161 | if (reader != null) {
162 | SqlNewHadoopRDDState.unsetInputFileName()
163 | // Close the reader and release it. Note: it's very important that we don't close the
164 | // reader more than once, since that exposes us to MAPREDUCE-5918 when running against
165 | // Hadoop 1.x and older Hadoop 2.x releases. That bug can lead to non-deterministic
166 | // corruption issues when reading compressed input.
167 | try {
168 | reader.close()
169 | } catch {
170 | case e: Exception =>
171 | if (!ShutdownHookManager.inShutdown()) {
172 | logWarning("Exception in RecordReader.close()", e)
173 | }
174 | } finally {
175 | reader = null
176 | }
177 | if (bytesReadCallback.isDefined) {
178 | inputMetrics.updateBytesRead()
179 | } else if (split.serializableHadoopSplit.value.isInstanceOf[FileSplit] ||
180 | split.serializableHadoopSplit.value.isInstanceOf[CombineFileSplit]) {
181 | // If we can't get the bytes read from the FS stats, fall back to the split size,
182 | // which may be inaccurate.
183 | try {
184 | inputMetrics.incBytesRead(split.serializableHadoopSplit.value.getLength)
185 | } catch {
186 | case e: java.io.IOException =>
187 | logWarning("Unable to get input size to set InputMetrics for task", e)
188 | }
189 | }
190 | }
191 | }
192 | }
193 | new InterruptibleIterator(context, iter)
194 | }
195 |
196 | /** Maps over a partition, providing the InputSplit that was used as the base of the partition. */
197 | @DeveloperApi
198 | def mapPartitionsWithInputSplit[U: ClassTag](
199 | f: (InputSplit, Iterator[(K, V)]) => Iterator[U],
200 | preservesPartitioning: Boolean = false): RDD[U] = {
201 | new NewHadoopMapPartitionsWithSplitRDD(this, f, preservesPartitioning)
202 | }
203 |
204 | override def getPreferredLocations(hsplit: Partition): Seq[String] = {
205 | val split = hsplit.asInstanceOf[NewHadoopPartition].serializableHadoopSplit.value
206 | val locs = HadoopRDD.SPLIT_INFO_REFLECTIONS match {
207 | case Some(c) =>
208 | try {
209 | val infos = c.newGetLocationInfo.invoke(split).asInstanceOf[Array[AnyRef]]
210 | Some(HadoopRDD.convertSplitLocationInfo(infos))
211 | } catch {
212 | case e: Exception =>
213 | logDebug("Failed to use InputSplit#getLocationInfo.", e)
214 | None
215 | }
216 | case None => None
217 | }
218 | locs.getOrElse(split.getLocations.filter(_ != "localhost"))
219 | }
220 |
221 | override def persist(storageLevel: StorageLevel): this.type = {
222 | if (storageLevel.deserialized) {
223 | logWarning("Caching NewHadoopRDDs as deserialized objects usually leads to undesired" +
224 | " behavior because Hadoop's RecordReader reuses the same Writable object for all records." +
225 | " Use a map transformation to make copies of the records.")
226 | }
227 | super.persist(storageLevel)
228 | }
229 | }
--------------------------------------------------------------------------------