├── .cache
├── SparkOnHBase.Design.Doc.docx
├── .settings
    ├── org.eclipse.jdt.ui.prefs
    ├── org.eclipse.m2e.core.prefs
    ├── org.eclipse.core.resources.prefs
    └── org.eclipse.jdt.core.prefs
├── README.md
├── src
    └── main
    │   ├── scala
    │       └── org
    │       │   └── apache
    │       │       ├── spark
    │       │           └── SparkHadoopMapReduceUtilExtended.scala
    │       │       └── hadoop
    │       │           └── hbase
    │       │               └── spark
    │       │                   ├── ByteArrayWrapper.scala
    │       │                   ├── ByteArrayComparable.scala
    │       │                   ├── FamilyHFileWriteOptions.scala
    │       │                   ├── KeyFamilyQualifier.scala
    │       │                   ├── BulkLoadPartitioner.scala
    │       │                   ├── FamiliesQualifiersValues.scala
    │       │                   ├── example
    │       │                       ├── hbasecontext
    │       │                       │   ├── HBaseDistributedScanExample.scala
    │       │                       │   ├── HBaseBulkDeleteExample.scala
    │       │                       │   ├── HBaseStreamingBulkPutExample.scala
    │       │                       │   ├── HBaseBulkPutExampleFromFile.scala
    │       │                       │   ├── HBaseBulkPutExample.scala
    │       │                       │   ├── HBaseBulkPutTimestampExample.scala
    │       │                       │   └── HBaseBulkGetExample.scala
    │       │                       └── rdd
    │       │                       │   ├── HBaseBulkDeleteExample.scala
    │       │                       │   ├── HBaseBulkPutExample.scala
    │       │                       │   ├── HBaseBulkGetExample.scala
    │       │                       │   ├── HBaseMapPartitionExample.scala
    │       │                       │   └── HBaseForeachPartitionExample.scala
    │       │                   ├── ColumnFamilyQualifierMapKeyWrapper.scala
    │       │                   ├── HBaseScanRDD.scala
    │       │                   ├── HBaseDStreamFunctions.scala
    │       │                   ├── DynamicLogicExpression.scala
    │       │                   ├── HBaseRDDFunctions.scala
    │       │                   └── JavaHBaseContext.scala
    │   └── java
    │       └── org
    │           └── apache
    │               └── hadoop
    │                   └── hbase
    │                       └── spark
    │                           └── SparkSQLPushDownFilter.java
├── .project
├── .classpath
├── pom.unittest.but.no.cluster.xml
├── LICENSE.txt
└── pom.xml


/.cache:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tmalaska/SparkOnHBase/HEAD/.cache


--------------------------------------------------------------------------------
/SparkOnHBase.Design.Doc.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tmalaska/SparkOnHBase/HEAD/SparkOnHBase.Design.Doc.docx


--------------------------------------------------------------------------------
/.settings/org.eclipse.jdt.ui.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | formatter_profile=_ted
3 | formatter_settings_version=12
4 | 


--------------------------------------------------------------------------------
/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 | 


--------------------------------------------------------------------------------
/.settings/org.eclipse.core.resources.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | encoding//src/main/java=UTF-8
3 | encoding//src/test/java=UTF-8
4 | encoding/<project>=UTF-8
5 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # SparkOnHBase
2 | ## Overview
3 | This is a back port of the HBase Spark Module but this has fixes to work on kerberos
4 | 
5 | Documentation on how to use this code is at the following link
6 | 
7 | https://hbase.apache.org/book.html#spark


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/SparkHadoopMapReduceUtilExtended.scala:
--------------------------------------------------------------------------------
1 | package org.apache.spark
2 | 
3 | import org.apache.spark.mapreduce.SparkHadoopMapReduceUtil
4 | 
5 | trait SparkHadoopMapReduceUtilExtended extends SparkHadoopMapReduceUtil{
6 | 
7 | }


--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <projectDescription>
 3 | 	<name>spark.hbase</name>
 4 | 	<comment></comment>
 5 | 	<projects>
 6 | 	</projects>
 7 | 	<buildSpec>
 8 | 		<buildCommand>
 9 | 			<name>org.eclipse.m2e.core.maven2Builder</name>
10 | 			<arguments>
11 | 			</arguments>
12 | 		</buildCommand>
13 | 		<buildCommand>
14 | 			<name>org.scala-ide.sdt.core.scalabuilder</name>
15 | 			<arguments>
16 | 			</arguments>
17 | 		</buildCommand>
18 | 	</buildSpec>
19 | 	<natures>
20 | 		<nature>org.scala-ide.sdt.core.scalanature</nature>
21 | 		<nature>org.eclipse.jdt.core.javanature</nature>
22 | 		<nature>org.eclipse.m2e.core.maven2Nature</nature>
23 | 	</natures>
24 | </projectDescription>
25 | 


--------------------------------------------------------------------------------
/.classpath:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <classpath>
 3 | 	<classpathentry kind="src" output="target/scala/classes" path="src/main/java">
 4 | 		<attributes>
 5 | 			<attribute name="optional" value="true"/>
 6 | 			<attribute name="maven.pomderived" value="true"/>
 7 | 		</attributes>
 8 | 	</classpathentry>
 9 | 	<classpathentry kind="src" path="src/main/scala"/>
10 | 	<classpathentry kind="src" output="target/scala/test-classes" path="src/test/java">
11 | 		<attributes>
12 | 			<attribute name="optional" value="true"/>
13 | 			<attribute name="maven.pomderived" value="true"/>
14 | 		</attributes>
15 | 	</classpathentry>
16 | 	<classpathentry kind="src" path="src/test/scala"/>
17 | 	<classpathentry kind="con" path="org.scala-ide.sdt.launching.SCALA_CONTAINER"/>
18 | 	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5">
19 | 		<attributes>
20 | 			<attribute name="maven.pomderived" value="true"/>
21 | 		</attributes>
22 | 	</classpathentry>
23 | 	<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
24 | 		<attributes>
25 | 			<attribute name="maven.pomderived" value="true"/>
26 | 		</attributes>
27 | 	</classpathentry>
28 | 	<classpathentry kind="output" path="target/scala/classes"/>
29 | </classpath>
30 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/hadoop/hbase/spark/ByteArrayWrapper.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package org.apache.hadoop.hbase.spark
18 | 
19 | import java.io.Serializable
20 | 
21 | import org.apache.hadoop.hbase.util.Bytes
22 | 
23 | /**
24 |  * This is a wrapper over a byte array so it can work as
25 |  * a key in a hashMap
26 |  *
27 |  * @param value The Byte Array value
28 |  */
29 | class ByteArrayWrapper (var value:Array[Byte])
30 |   extends Comparable[ByteArrayWrapper] with Serializable {
31 |   override def compareTo(valueOther: ByteArrayWrapper): Int = {
32 |     Bytes.compareTo(value,valueOther.value)
33 |   }
34 |   override def equals(o2: Any): Boolean = {
35 |     o2 match {
36 |       case wrapper: ByteArrayWrapper =>
37 |         Bytes.equals(value, wrapper.value)
38 |       case _ =>
39 |         false
40 |     }
41 |   }
42 |   override def hashCode():Int = {
43 |     Bytes.hashCode(value)
44 |   }
45 | }
46 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/hadoop/hbase/spark/ByteArrayComparable.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *
 3 |  * Licensed to the Apache Software Foundation (ASF) under one
 4 |  * or more contributor license agreements.  See the NOTICE file
 5 |  * distributed with this work for additional information
 6 |  * regarding copyright ownership.  The ASF licenses this file
 7 |  * to you under the Apache License, Version 2.0 (the
 8 |  * "License"); you may not use this file except in compliance
 9 |  * with the License.  You may obtain a copy of the License at
10 |  *
11 |  *     http://www.apache.org/licenses/LICENSE-2.0
12 |  *
13 |  * Unless required by applicable law or agreed to in writing, software
14 |  * distributed under the License is distributed on an "AS IS" BASIS,
15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |  * See the License for the specific language governing permissions and
17 |  * limitations under the License.
18 |  */
19 | 
20 | package org.apache.hadoop.hbase.spark
21 | 
22 | import org.apache.hadoop.hbase.util.Bytes
23 | 
24 | class ByteArrayComparable(val bytes:Array[Byte], val offset:Int = 0, var length:Int = -1)
25 |   extends Comparable[ByteArrayComparable] {
26 | 
27 |   if (length == -1) {
28 |     length = bytes.length
29 |   }
30 | 
31 |   override def compareTo(o: ByteArrayComparable): Int = {
32 |     Bytes.compareTo(bytes, offset, length, o.bytes, o.offset, o.length)
33 |   }
34 | 
35 |   override def hashCode(): Int = {
36 |     Bytes.hashCode(bytes, offset, length)
37 |   }
38 | 
39 |   override def equals (obj: Any): Boolean = {
40 |     obj match {
41 |       case b: ByteArrayComparable =>
42 |         Bytes.equals(bytes, offset, length, b.bytes, b.offset, b.length)
43 |       case _ =>
44 |         false
45 |     }
46 |   }
47 | }
48 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/hadoop/hbase/spark/FamilyHFileWriteOptions.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.hadoop.hbase.spark
19 | 
20 | import java.io.Serializable
21 | 
22 | /**
23 |  * This object will hold optional data for how a given column family's
24 |  * writer will work
25 |  *
26 |  * @param compression       String to define the Compression to be used in the HFile
27 |  * @param bloomType         String to define the bloom type to be used in the HFile
28 |  * @param blockSize         The block size to be used in the HFile
29 |  * @param dataBlockEncoding String to define the data block encoding to be used
30 |  *                          in the HFile
31 |  */
32 | class FamilyHFileWriteOptions( val compression:String,
33 |                                val bloomType: String,
34 |                                val blockSize: Int,
35 |                                val dataBlockEncoding: String) extends Serializable
36 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/hadoop/hbase/spark/KeyFamilyQualifier.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.hadoop.hbase.spark
19 | 
20 | import java.io.Serializable
21 | 
22 | import org.apache.hadoop.hbase.util.Bytes
23 | 
24 | /**
25 |  * This is the key to be used for sorting and shuffling.
26 |  *
27 |  * We will only partition on the rowKey but we will sort on all three
28 |  *
29 |  * @param rowKey    Record RowKey
30 |  * @param family    Record ColumnFamily
31 |  * @param qualifier Cell Qualifier
32 |  */
33 | class KeyFamilyQualifier(val rowKey:Array[Byte], val family:Array[Byte], val qualifier:Array[Byte])
34 |   extends Comparable[KeyFamilyQualifier] with Serializable {
35 |   override def compareTo(o: KeyFamilyQualifier): Int = {
36 |     var result = Bytes.compareTo(rowKey, o.rowKey)
37 |     if (result == 0) {
38 |       result = Bytes.compareTo(family, o.family)
39 |       if (result == 0) result = Bytes.compareTo(qualifier, o.qualifier)
40 |     }
41 |     result
42 |   }
43 |   override def toString: String = {
44 |     Bytes.toString(rowKey) + ":" + Bytes.toString(family) + ":" + Bytes.toString(qualifier)
45 |   }
46 | }
47 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/hadoop/hbase/spark/BulkLoadPartitioner.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.hadoop.hbase.spark
19 | 
20 | import java.util
21 | import java.util.Comparator
22 | 
23 | import org.apache.hadoop.hbase.util.Bytes
24 | import org.apache.spark.Partitioner
25 | 
26 | /**
27 |  * A Partitioner implementation that will separate records to different
28 |  * HBase Regions based on region splits
29 |  *
30 |  * @param startKeys   The start keys for the given table
31 |  */
32 | class BulkLoadPartitioner(startKeys:Array[Array[Byte]])
33 |   extends Partitioner {
34 | 
35 |   override def numPartitions: Int = startKeys.length
36 | 
37 |   override def getPartition(key: Any): Int = {
38 | 
39 |     val comparator: Comparator[Array[Byte]] = new Comparator[Array[Byte]] {
40 |       override def compare(o1: Array[Byte], o2: Array[Byte]): Int = {
41 |         Bytes.compareTo(o1, o2)
42 |       }
43 |     }
44 | 
45 |     val rowKey:Array[Byte] =
46 |       key match {
47 |         case qualifier: KeyFamilyQualifier =>
48 |           qualifier.rowKey
49 |         case wrapper: ByteArrayWrapper =>
50 |           wrapper.value
51 |         case _ =>
52 |           key.asInstanceOf[Array[Byte]]
53 |       }
54 |     val partition = util.Arrays.binarySearch(startKeys, rowKey, comparator)
55 |     if (partition < 0) partition * -1 + -2
56 |     else partition
57 |   }
58 | }
59 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/hadoop/hbase/spark/FamiliesQualifiersValues.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package org.apache.hadoop.hbase.spark
18 | 
19 | import java.util
20 | 
21 | /**
22 |  * This object is a clean way to store and sort all cells that will be bulk
23 |  * loaded into a single row
24 |  */
25 | class FamiliesQualifiersValues extends Serializable {
26 |   //Tree maps are used because we need the results to
27 |   // be sorted when we read them
28 |   val familyMap = new util.TreeMap[ByteArrayWrapper,
29 |     util.TreeMap[ByteArrayWrapper, Array[Byte]]]()
30 | 
31 |   //normally in a row there are more columns then
32 |   //column families this wrapper is reused for column
33 |   //family look ups
34 |   val reusableWrapper = new ByteArrayWrapper(null)
35 | 
36 |   /**
37 |    * Adds a new cell to an existing row
38 |    * @param family    HBase column family
39 |    * @param qualifier HBase column qualifier
40 |    * @param value     HBase cell value
41 |    */
42 |   def += (family: Array[Byte], qualifier: Array[Byte], value: Array[Byte]): Unit = {
43 | 
44 |     reusableWrapper.value = family
45 | 
46 |     var qualifierValues = familyMap.get(reusableWrapper)
47 | 
48 |     if (qualifierValues == null) {
49 |       qualifierValues = new util.TreeMap[ByteArrayWrapper, Array[Byte]]()
50 |       familyMap.put(new ByteArrayWrapper(family), qualifierValues)
51 |     }
52 | 
53 |     qualifierValues.put(new ByteArrayWrapper(qualifier), value)
54 |   }
55 | }
56 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseDistributedScanExample.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package org.apache.hadoop.hbase.spark.example.hbasecontext
18 | 
19 | import org.apache.hadoop.hbase.spark.HBaseContext
20 | import org.apache.spark.SparkContext
21 | import org.apache.hadoop.hbase.{TableName, HBaseConfiguration}
22 | import org.apache.hadoop.hbase.util.Bytes
23 | import org.apache.hadoop.hbase.client.Scan
24 | import org.apache.spark.SparkConf
25 | /**
26 |  * This is a simple example of scanning records from HBase
27 |  * with the hbaseRDD function.
28 |  */
29 | object HBaseDistributedScanExample {
30 |   def main(args: Array[String]) {
31 |     if (args.length < 1) {
32 |       println("GenerateGraphs {tableName}")
33 |       return
34 |     }
35 | 
36 |     val tableName = args(0)
37 | 
38 |     val sparkConf = new SparkConf().setAppName("HBaseDistributedScanExample " + tableName )
39 |     val sc = new SparkContext(sparkConf)
40 | 
41 |     try {
42 |       val conf = HBaseConfiguration.create()
43 | 
44 |       val hbaseContext = new HBaseContext(sc, conf)
45 | 
46 |       val scan = new Scan()
47 |       scan.setCaching(100)
48 | 
49 |       val getRdd = hbaseContext.hbaseRDD(TableName.valueOf(tableName), scan)
50 | 
51 |       getRdd.foreach(v => println(Bytes.toString(v._1.get())))
52 | 
53 |       println("Length: " + getRdd.map(r => r._1.copyBytes()).collect().length);
54 | 
55 |         //.collect().foreach(v => println(Bytes.toString(v._1.get())))
56 |     } finally {
57 |       sc.stop()
58 |     }
59 |   }
60 | 
61 | }


--------------------------------------------------------------------------------
/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkDeleteExample.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.hadoop.hbase.spark.example.hbasecontext
19 | 
20 | import org.apache.hadoop.hbase.spark.HBaseContext
21 | import org.apache.spark.SparkContext
22 | import org.apache.hadoop.hbase.{TableName, HBaseConfiguration}
23 | import org.apache.hadoop.hbase.util.Bytes
24 | import org.apache.hadoop.hbase.client.Delete
25 | import org.apache.spark.SparkConf
26 | 
27 | /**
28 |  * This is a simple example of deleting records in HBase
29 |  * with the bulkDelete function.
30 |  */
31 | object HBaseBulkDeleteExample {
32 |   def main(args: Array[String]) {
33 |     if (args.length < 1) {
34 |       println("HBaseBulkDeletesExample {tableName} ")
35 |       return
36 |     }
37 | 
38 |     val tableName = args(0)
39 | 
40 |     val sparkConf = new SparkConf().setAppName("HBaseBulkDeleteExample " + tableName)
41 |     val sc = new SparkContext(sparkConf)
42 |     try {
43 |       //[Array[Byte]]
44 |       val rdd = sc.parallelize(Array(
45 |         Bytes.toBytes("1"),
46 |         Bytes.toBytes("2"),
47 |         Bytes.toBytes("3"),
48 |         Bytes.toBytes("4"),
49 |         Bytes.toBytes("5")
50 |       ))
51 | 
52 |       val conf = HBaseConfiguration.create()
53 | 
54 |       val hbaseContext = new HBaseContext(sc, conf)
55 |       hbaseContext.bulkDelete[Array[Byte]](rdd,
56 |         TableName.valueOf(tableName),
57 |         putRecord => new Delete(putRecord),
58 |         4)
59 |     } finally {
60 |       sc.stop()
61 |     }
62 |   }
63 | }


--------------------------------------------------------------------------------
/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseBulkDeleteExample.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package org.apache.hadoop.hbase.spark.example.rdd
18 | 
19 | import org.apache.hadoop.hbase.client.Delete
20 | import org.apache.hadoop.hbase.{TableName, HBaseConfiguration}
21 | import org.apache.hadoop.hbase.spark.HBaseContext
22 | import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._
23 | import org.apache.hadoop.hbase.util.Bytes
24 | 
25 | import org.apache.spark.{SparkContext, SparkConf}
26 | 
27 | /**
28 |  * This is a simple example of deleting records in HBase
29 |  * with the bulkDelete function.
30 |  */
31 | object HBaseBulkDeleteExample {
32 |   def main(args: Array[String]) {
33 |     if (args.length < 1) {
34 |       println("HBaseBulkDeletesExample {tableName} ")
35 |       return
36 |     }
37 | 
38 |     val tableName = args(0)
39 | 
40 |     val sparkConf = new SparkConf().setAppName("HBaseBulkDeleteExample " + tableName)
41 |     val sc = new SparkContext(sparkConf)
42 |     try {
43 |       //[Array[Byte]]
44 |       val rdd = sc.parallelize(Array(
45 |         Bytes.toBytes("1"),
46 |         Bytes.toBytes("2"),
47 |         Bytes.toBytes("3"),
48 |         Bytes.toBytes("4"),
49 |         Bytes.toBytes("5")
50 |       ))
51 | 
52 |       val conf = HBaseConfiguration.create()
53 | 
54 |       val hbaseContext = new HBaseContext(sc, conf)
55 | 
56 |       rdd.hbaseBulkDelete(hbaseContext, TableName.valueOf(tableName),
57 |         putRecord => new Delete(putRecord),
58 |         4)
59 | 
60 |     } finally {
61 |       sc.stop()
62 |     }
63 |   }
64 | }


--------------------------------------------------------------------------------
/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseStreamingBulkPutExample.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.hadoop.hbase.spark.example.hbasecontext
19 | 
20 | import org.apache.hadoop.hbase.spark.HBaseContext
21 | import org.apache.spark.SparkContext
22 | import org.apache.hadoop.hbase.{TableName, HBaseConfiguration}
23 | import org.apache.hadoop.hbase.util.Bytes
24 | import org.apache.hadoop.hbase.client.Put
25 | import org.apache.spark.streaming.StreamingContext
26 | import org.apache.spark.streaming.Seconds
27 | import org.apache.spark.SparkConf
28 | 
29 | /**
30 |  * This is a simple example of BulkPut with Spark Streaming
31 |  */
32 | object HBaseStreamingBulkPutExample {
33 |   def main(args: Array[String]) {
34 |     if (args.length < 4) {
35 |       println("HBaseStreamingBulkPutExample " +
36 |         "{host} {port} {tableName} {columnFamily}")
37 |       return
38 |     }
39 | 
40 |     val host = args(0)
41 |     val port = args(1)
42 |     val tableName = args(2)
43 |     val columnFamily = args(3)
44 | 
45 |     val sparkConf = new SparkConf().setAppName("HBaseBulkPutTimestampExample " +
46 |       tableName + " " + columnFamily)
47 |     val sc = new SparkContext(sparkConf)
48 |     try {
49 |       val ssc = new StreamingContext(sc, Seconds(1))
50 | 
51 |       val lines = ssc.socketTextStream(host, port.toInt)
52 | 
53 |       val conf = HBaseConfiguration.create()
54 | 
55 |       val hbaseContext = new HBaseContext(sc, conf)
56 | 
57 |       hbaseContext.streamBulkPut[String](lines,
58 |         TableName.valueOf(tableName),
59 |         (putRecord) => {
60 |           if (putRecord.length() > 0) {
61 |             val put = new Put(Bytes.toBytes(putRecord))
62 |             put.addColumn(Bytes.toBytes("c"), Bytes.toBytes("foo"), Bytes.toBytes("bar"))
63 |             put
64 |           } else {
65 |             null
66 |           }
67 |         })
68 |       ssc.start()
69 |       ssc.awaitTerminationOrTimeout(60000)
70 |     } finally {
71 |       sc.stop()
72 |     }
73 |   }
74 | }


--------------------------------------------------------------------------------
/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkPutExampleFromFile.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.hadoop.hbase.spark.example.hbasecontext
19 | 
20 | import org.apache.hadoop.hbase.spark.HBaseContext
21 | import org.apache.spark.SparkContext
22 | import org.apache.hadoop.hbase.{TableName, HBaseConfiguration}
23 | import org.apache.hadoop.hbase.util.Bytes
24 | import org.apache.hadoop.hbase.client.Put
25 | import org.apache.hadoop.mapred.TextInputFormat
26 | import org.apache.hadoop.io.LongWritable
27 | import org.apache.hadoop.io.Text
28 | import org.apache.spark.SparkConf
29 | 
30 | /**
31 |  * This is a simple example of putting records in HBase
32 |  * with the bulkPut function.  In this example we are
33 |  * getting the put information from a file
34 |  */
35 | object HBaseBulkPutExampleFromFile {
36 |   def main(args: Array[String]) {
37 |     if (args.length < 3) {
38 |       println("HBaseBulkPutExampleFromFile {tableName} {columnFamily} {inputFile}")
39 |       return
40 |     }
41 | 
42 |     val tableName = args(0)
43 |     val columnFamily = args(1)
44 |     val inputFile = args(2)
45 | 
46 |     val sparkConf = new SparkConf().setAppName("HBaseBulkPutExampleFromFile " +
47 |       tableName + " " + columnFamily + " " + inputFile)
48 |     val sc = new SparkContext(sparkConf)
49 | 
50 |     try {
51 |       var rdd = sc.hadoopFile(
52 |         inputFile,
53 |         classOf[TextInputFormat],
54 |         classOf[LongWritable],
55 |         classOf[Text]).map(v => {
56 |         System.out.println("reading-" + v._2.toString)
57 |         v._2.toString
58 |       })
59 | 
60 |       val conf = HBaseConfiguration.create()
61 | 
62 |       val hbaseContext = new HBaseContext(sc, conf)
63 |       hbaseContext.bulkPut[String](rdd,
64 |         TableName.valueOf(tableName),
65 |         (putRecord) => {
66 |           System.out.println("hbase-" + putRecord)
67 |           val put = new Put(Bytes.toBytes("Value- " + putRecord))
68 |           put.addColumn(Bytes.toBytes("c"), Bytes.toBytes("1"),
69 |             Bytes.toBytes(putRecord.length()))
70 |           put
71 |         });
72 |     } finally {
73 |       sc.stop()
74 |     }
75 |   }
76 | }
77 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseBulkPutExample.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.hadoop.hbase.spark.example.rdd
19 | 
20 | import org.apache.hadoop.hbase.client.Put
21 | import org.apache.hadoop.hbase.spark.HBaseContext
22 | import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._
23 | import org.apache.hadoop.hbase.util.Bytes
24 | import org.apache.hadoop.hbase.{HBaseConfiguration, TableName}
25 | import org.apache.spark.{SparkConf, SparkContext}
26 | 
27 | /**
28 |  * This is a simple example of putting records in HBase
29 |  * with the bulkPut function.
30 |  */
31 | object HBaseBulkPutExample {
32 |    def main(args: Array[String]) {
33 |      if (args.length < 2) {
34 |        println("HBaseBulkPutExample {tableName} {columnFamily}")
35 |        return
36 |      }
37 | 
38 |      val tableName = args(0)
39 |      val columnFamily = args(1)
40 | 
41 |      val sparkConf = new SparkConf().setAppName("HBaseBulkPutExample " +
42 |        tableName + " " + columnFamily)
43 |      val sc = new SparkContext(sparkConf)
44 | 
45 |      try {
46 |        //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])]
47 |        val rdd = sc.parallelize(Array(
48 |          (Bytes.toBytes("1"),
49 |            Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))),
50 |          (Bytes.toBytes("2"),
51 |            Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))),
52 |          (Bytes.toBytes("3"),
53 |            Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))),
54 |          (Bytes.toBytes("4"),
55 |            Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))),
56 |          (Bytes.toBytes("5"),
57 |            Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5"))))
58 |        ))
59 | 
60 |        val conf = HBaseConfiguration.create()
61 | 
62 |        val hbaseContext = new HBaseContext(sc, conf)
63 | 
64 |        rdd.hbaseBulkPut(hbaseContext, TableName.valueOf(tableName),
65 |          (putRecord) => {
66 |            val put = new Put(putRecord._1)
67 |            putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2,
68 |              putValue._3))
69 |            put
70 |          })
71 | 
72 |      } finally {
73 |        sc.stop()
74 |      }
75 |    }
76 |  }


--------------------------------------------------------------------------------
/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkPutExample.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.hadoop.hbase.spark.example.hbasecontext
19 | 
20 | import org.apache.hadoop.hbase.spark.HBaseContext
21 | import org.apache.spark.SparkContext
22 | import org.apache.hadoop.hbase.{TableName, HBaseConfiguration}
23 | import org.apache.hadoop.hbase.util.Bytes
24 | import org.apache.hadoop.hbase.client.Put
25 | import org.apache.spark.SparkConf
26 | 
27 | /**
28 |  * This is a simple example of putting records in HBase
29 |  * with the bulkPut function.
30 |  */
31 | object HBaseBulkPutExample {
32 |   def main(args: Array[String]) {
33 |     if (args.length < 2) {
34 |       println("HBaseBulkPutExample {tableName} {columnFamily}")
35 |       return
36 |     }
37 | 
38 |     val tableName = args(0)
39 |     val columnFamily = args(1)
40 | 
41 |     val sparkConf = new SparkConf().setAppName("HBaseBulkPutExample " +
42 |       tableName + " " + columnFamily)
43 |     val sc = new SparkContext(sparkConf)
44 | 
45 |     try {
46 |       //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])]
47 |       val rdd = sc.parallelize(Array(
48 |         (Bytes.toBytes("1"),
49 |           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))),
50 |         (Bytes.toBytes("2"),
51 |           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))),
52 |         (Bytes.toBytes("3"),
53 |           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))),
54 |         (Bytes.toBytes("4"),
55 |           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))),
56 |         (Bytes.toBytes("5"),
57 |           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5"))))
58 |       ))
59 | 
60 |       val conf = HBaseConfiguration.create()
61 | 
62 |       val hbaseContext = new HBaseContext(sc, conf)
63 |       hbaseContext.bulkPut[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])](rdd,
64 |         TableName.valueOf(tableName),
65 |         (putRecord) => {
66 |           val put = new Put(putRecord._1)
67 |           putRecord._2.foreach((putValue) =>
68 |             put.addColumn(putValue._1, putValue._2, putValue._3))
69 |           put
70 |         });
71 |     } finally {
72 |       sc.stop()
73 |     }
74 |   }
75 | }


--------------------------------------------------------------------------------
/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseBulkGetExample.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package org.apache.hadoop.hbase.spark.example.rdd
18 | 
19 | import org.apache.hadoop.hbase.client.{Result, Get}
20 | import org.apache.hadoop.hbase.{CellUtil, TableName, HBaseConfiguration}
21 | import org.apache.hadoop.hbase.spark.HBaseContext
22 | import org.apache.hadoop.hbase.util.Bytes
23 | import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._
24 | import org.apache.spark.{SparkContext, SparkConf}
25 | 
26 | /**
27 |  * This is a simple example of getting records in HBase
28 |  * with the bulkGet function.
29 |  */
30 | object HBaseBulkGetExample {
31 |   def main(args: Array[String]) {
32 |     if (args.length < 1) {
33 |       println("HBaseBulkGetExample {tableName}")
34 |       return
35 |     }
36 | 
37 |     val tableName = args(0)
38 | 
39 |     val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName)
40 |     val sc = new SparkContext(sparkConf)
41 | 
42 |     try {
43 | 
44 |       //[(Array[Byte])]
45 |       val rdd = sc.parallelize(Array(
46 |         Bytes.toBytes("1"),
47 |         Bytes.toBytes("2"),
48 |         Bytes.toBytes("3"),
49 |         Bytes.toBytes("4"),
50 |         Bytes.toBytes("5"),
51 |         Bytes.toBytes("6"),
52 |         Bytes.toBytes("7")))
53 | 
54 |       val conf = HBaseConfiguration.create()
55 | 
56 |       val hbaseContext = new HBaseContext(sc, conf)
57 | 
58 |       val getRdd = rdd.hbaseBulkGet[String](hbaseContext, TableName.valueOf(tableName), 2,
59 |         record => {
60 |           System.out.println("making Get")
61 |           new Get(record)
62 |         },
63 |         (result: Result) => {
64 | 
65 |           val it = result.listCells().iterator()
66 |           val b = new StringBuilder
67 | 
68 |           b.append(Bytes.toString(result.getRow) + ":")
69 | 
70 |           while (it.hasNext) {
71 |             val cell = it.next()
72 |             val q = Bytes.toString(CellUtil.cloneQualifier(cell))
73 |             if (q.equals("counter")) {
74 |               b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")")
75 |             } else {
76 |               b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")")
77 |             }
78 |           }
79 |           b.toString()
80 |         })
81 | 
82 |       getRdd.collect().foreach(v => println(v))
83 | 
84 |     } finally {
85 |       sc.stop()
86 |     }
87 |   }
88 | }
89 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkPutTimestampExample.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.hadoop.hbase.spark.example.hbasecontext
19 | 
20 | import org.apache.hadoop.hbase.spark.HBaseContext
21 | import org.apache.spark.SparkContext
22 | import org.apache.hadoop.hbase.{TableName, HBaseConfiguration}
23 | import org.apache.hadoop.hbase.util.Bytes
24 | import org.apache.hadoop.hbase.client.Put
25 | import org.apache.spark.SparkConf
26 | 
27 | /**
28 |  * This is a simple example of putting records in HBase
29 |  * with the bulkPut function.  In this example we are
30 |  * also setting the timestamp in the put
31 |  */
32 | object HBaseBulkPutTimestampExample {
33 |   def main(args: Array[String]) {
34 |     if (args.length < 2) {
35 |       System.out.println("HBaseBulkPutTimestampExample {tableName} {columnFamily}")
36 |       return
37 |     }
38 | 
39 |     val tableName = args(0)
40 |     val columnFamily = args(1)
41 | 
42 |     val sparkConf = new SparkConf().setAppName("HBaseBulkPutTimestampExample " +
43 |       tableName + " " + columnFamily)
44 |     val sc = new SparkContext(sparkConf)
45 | 
46 |     try {
47 | 
48 |       val rdd = sc.parallelize(Array(
49 |         (Bytes.toBytes("6"),
50 |           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))),
51 |         (Bytes.toBytes("7"),
52 |           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))),
53 |         (Bytes.toBytes("8"),
54 |           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))),
55 |         (Bytes.toBytes("9"),
56 |           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))),
57 |         (Bytes.toBytes("10"),
58 |           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5"))))))
59 | 
60 |       val conf = HBaseConfiguration.create()
61 | 
62 |       val timeStamp = System.currentTimeMillis()
63 | 
64 |       val hbaseContext = new HBaseContext(sc, conf)
65 |       hbaseContext.bulkPut[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])](rdd,
66 |         TableName.valueOf(tableName),
67 |         (putRecord) => {
68 |           val put = new Put(putRecord._1)
69 |           putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2,
70 |             timeStamp, putValue._3))
71 |           put
72 |         })
73 |     } finally {
74 |       sc.stop()
75 |     }
76 |   }
77 | }
78 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseMapPartitionExample.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.hadoop.hbase.spark.example.rdd
19 | 
20 | import org.apache.hadoop.hbase.client.Get
21 | import org.apache.hadoop.hbase.{TableName, HBaseConfiguration}
22 | import org.apache.hadoop.hbase.spark.HBaseContext
23 | import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._
24 | import org.apache.hadoop.hbase.util.Bytes
25 | import org.apache.spark.{SparkContext, SparkConf}
26 | 
27 | /**
28 |  * This is a simple example of using the mapPartitions
29 |  * method with a HBase connection
30 |  */
31 | object HBaseMapPartitionExample {
32 |   def main(args: Array[String]) {
33 |     if (args.length < 1) {
34 |       println("HBaseBulkGetExample {tableName}")
35 |       return
36 |     }
37 | 
38 |     val tableName = args(0)
39 | 
40 |     val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName)
41 |     val sc = new SparkContext(sparkConf)
42 | 
43 |     try {
44 | 
45 |       //[(Array[Byte])]
46 |       val rdd = sc.parallelize(Array(
47 |         Bytes.toBytes("1"),
48 |         Bytes.toBytes("2"),
49 |         Bytes.toBytes("3"),
50 |         Bytes.toBytes("4"),
51 |         Bytes.toBytes("5"),
52 |         Bytes.toBytes("6"),
53 |         Bytes.toBytes("7")))
54 | 
55 |       val conf = HBaseConfiguration.create()
56 | 
57 |       val hbaseContext = new HBaseContext(sc, conf)
58 | 
59 |       val getRdd = rdd.hbaseMapPartitions[String](hbaseContext, (it, connection) => {
60 |         val table = connection.getTable(TableName.valueOf(tableName))
61 |         it.map{r =>
62 |           //batching would be faster.  This is just an example
63 |           val result = table.get(new Get(r))
64 |           val it = result.listCells().iterator()
65 |           val b = new StringBuilder
66 |           b.append(Bytes.toString(result.getRow) + ":")
67 |           while (it.hasNext) {
68 |             val cell = it.next()
69 |             val q = Bytes.toString(cell.getQualifierArray)
70 |             if (q.equals("counter")) {
71 |               b.append("(" + q + "," + Bytes.toLong(cell.getValueArray) + ")")
72 |             } else {
73 |               b.append("(" + q + "," + Bytes.toString(cell.getValueArray) + ")")
74 |             }
75 |           }
76 |           b.toString()
77 |         }
78 |       })
79 | 
80 |       getRdd.collect().foreach(v => println(v))
81 | 
82 |     } finally {
83 |       sc.stop()
84 |     }
85 |   }
86 | }
87 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkGetExample.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.hadoop.hbase.spark.example.hbasecontext
19 | 
20 | import org.apache.hadoop.hbase.spark.HBaseContext
21 | import org.apache.spark.SparkContext
22 | import org.apache.hadoop.hbase.{CellUtil, TableName, HBaseConfiguration}
23 | import org.apache.hadoop.hbase.util.Bytes
24 | import org.apache.hadoop.hbase.client.Get
25 | import org.apache.hadoop.hbase.client.Result
26 | import org.apache.spark.SparkConf
27 | 
28 | /**
29 |  * This is a simple example of getting records in HBase
30 |  * with the bulkGet function.
31 |  */
32 | object HBaseBulkGetExample {
33 |   def main(args: Array[String]) {
34 |     if (args.length < 1) {
35 |       println("HBaseBulkGetExample {tableName}")
36 |       return
37 |     }
38 | 
39 |     val tableName = args(0)
40 | 
41 |     val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName)
42 |     val sc = new SparkContext(sparkConf)
43 | 
44 |     try {
45 | 
46 |       //[(Array[Byte])]
47 |       val rdd = sc.parallelize(Array(
48 |         Bytes.toBytes("1"),
49 |         Bytes.toBytes("2"),
50 |         Bytes.toBytes("3"),
51 |         Bytes.toBytes("4"),
52 |         Bytes.toBytes("5"),
53 |         Bytes.toBytes("6"),
54 |         Bytes.toBytes("7")))
55 | 
56 |       val conf = HBaseConfiguration.create()
57 | 
58 |       val hbaseContext = new HBaseContext(sc, conf)
59 | 
60 |       val getRdd = hbaseContext.bulkGet[Array[Byte], String](
61 |         TableName.valueOf(tableName),
62 |         2,
63 |         rdd,
64 |         record => {
65 |           System.out.println("making Get")
66 |           new Get(record)
67 |         },
68 |         (result: Result) => {
69 | 
70 |           val it = result.listCells().iterator()
71 |           val b = new StringBuilder
72 | 
73 |           b.append(Bytes.toString(result.getRow) + ":")
74 | 
75 |           while (it.hasNext) {
76 |             val cell = it.next()
77 |             val q = Bytes.toString(CellUtil.cloneQualifier(cell))
78 |             if (q.equals("counter")) {
79 |               b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")")
80 |             } else {
81 |               b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")")
82 |             }
83 |           }
84 |           b.toString()
85 |         })
86 | 
87 |       getRdd.collect().foreach(v => println(v))
88 | 
89 |     } finally {
90 |       sc.stop()
91 |     }
92 |   }
93 | }
94 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseForeachPartitionExample.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.hadoop.hbase.spark.example.rdd
19 | 
20 | import org.apache.hadoop.hbase.client.Put
21 | import org.apache.hadoop.hbase.{TableName, HBaseConfiguration}
22 | import org.apache.hadoop.hbase.spark.HBaseContext
23 | import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._
24 | import org.apache.hadoop.hbase.util.Bytes
25 | import org.apache.spark.{SparkContext, SparkConf}
26 | 
27 | /**
28 |  * This is a simple example of using the foreachPartition
29 |  * method with a HBase connection
30 |  */
31 | object HBaseForeachPartitionExample {
32 |   def main(args: Array[String]) {
33 |     if (args.length < 2) {
34 |       println("HBaseBulkPutExample {tableName} {columnFamily}")
35 |       return
36 |     }
37 | 
38 |     val tableName = args(0)
39 |     val columnFamily = args(1)
40 | 
41 |     val sparkConf = new SparkConf().setAppName("HBaseBulkPutExample " +
42 |       tableName + " " + columnFamily)
43 |     val sc = new SparkContext(sparkConf)
44 | 
45 |     try {
46 |       //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])]
47 |       val rdd = sc.parallelize(Array(
48 |         (Bytes.toBytes("1"),
49 |           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))),
50 |         (Bytes.toBytes("2"),
51 |           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))),
52 |         (Bytes.toBytes("3"),
53 |           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))),
54 |         (Bytes.toBytes("4"),
55 |           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))),
56 |         (Bytes.toBytes("5"),
57 |           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5"))))
58 |       ))
59 | 
60 |       val conf = HBaseConfiguration.create()
61 | 
62 |       val hbaseContext = new HBaseContext(sc, conf)
63 | 
64 | 
65 |       rdd.hbaseForeachPartition(hbaseContext,
66 |         (it, connection) => {
67 |           val m = connection.getBufferedMutator(TableName.valueOf(tableName))
68 | 
69 |           it.foreach(r => {
70 |             val put = new Put(r._1)
71 |             r._2.foreach((putValue) =>
72 |               put.addColumn(putValue._1, putValue._2, putValue._3))
73 |             m.mutate(put)
74 |           })
75 |           m.flush()
76 |           m.close()
77 |         })
78 | 
79 |     } finally {
80 |       sc.stop()
81 |     }
82 |   }
83 | }
84 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/hadoop/hbase/spark/ColumnFamilyQualifierMapKeyWrapper.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.hadoop.hbase.spark
19 | 
20 | import org.apache.hadoop.hbase.util.Bytes
21 | 
22 | /**
23 |  * A wrapper class that will allow both columnFamily and qualifier to
24 |  * be the key of a hashMap.  Also allow for finding the value in a hashmap
25 |  * with out cloning the HBase value from the HBase Cell object
26 |  * @param columnFamily       ColumnFamily byte array
27 |  * @param columnFamilyOffSet Offset of columnFamily value in the array
28 |  * @param columnFamilyLength Length of the columnFamily value in the columnFamily array
29 |  * @param qualifier          Qualifier byte array
30 |  * @param qualifierOffSet    Offset of qualifier value in the array
31 |  * @param qualifierLength    Length of the qualifier value with in the array
32 |  */
33 | class ColumnFamilyQualifierMapKeyWrapper(val columnFamily:Array[Byte],
34 |                                          val columnFamilyOffSet:Int,
35 |                                          val columnFamilyLength:Int,
36 |                                          val qualifier:Array[Byte],
37 |                                          val qualifierOffSet:Int,
38 |                                          val qualifierLength:Int)
39 |   extends Serializable{
40 | 
41 |   override def equals(other:Any): Boolean = {
42 |     val otherWrapper = other.asInstanceOf[ColumnFamilyQualifierMapKeyWrapper]
43 | 
44 |     Bytes.compareTo(columnFamily,
45 |       columnFamilyOffSet,
46 |       columnFamilyLength,
47 |       otherWrapper.columnFamily,
48 |       otherWrapper.columnFamilyOffSet,
49 |       otherWrapper.columnFamilyLength) == 0 && Bytes.compareTo(qualifier,
50 |         qualifierOffSet,
51 |         qualifierLength,
52 |         otherWrapper.qualifier,
53 |         otherWrapper.qualifierOffSet,
54 |         otherWrapper.qualifierLength) == 0
55 |   }
56 | 
57 |   override def hashCode():Int = {
58 |     Bytes.hashCode(columnFamily, columnFamilyOffSet, columnFamilyLength) +
59 |       Bytes.hashCode(qualifier, qualifierOffSet, qualifierLength)
60 |   }
61 | 
62 |   def cloneColumnFamily():Array[Byte] = {
63 |     val resultArray = new Array[Byte](columnFamilyLength)
64 |     System.arraycopy(columnFamily, columnFamilyOffSet, resultArray, 0, columnFamilyLength)
65 |     resultArray
66 |   }
67 | 
68 |   def cloneQualifier():Array[Byte] = {
69 |     val resultArray = new Array[Byte](qualifierLength)
70 |     System.arraycopy(qualifier, qualifierOffSet, resultArray, 0, qualifierLength)
71 |     resultArray
72 |   }
73 | }
74 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/hadoop/hbase/spark/HBaseScanRDD.scala:
--------------------------------------------------------------------------------
  1 | package org.apache.hadoop.hbase.spark
  2 | 
  3 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable
  4 | import org.apache.spark.deploy.SparkHadoopUtil
  5 | import org.apache.spark.{ SparkContext, TaskContext }
  6 | import org.apache.spark.broadcast.Broadcast
  7 | import org.apache.spark.SerializableWritable
  8 | import org.apache.hadoop.conf.Configuration
  9 | import org.apache.hadoop.security.Credentials
 10 | import org.apache.spark.rdd.RDD
 11 | import org.apache.spark.Partition
 12 | import org.apache.spark.InterruptibleIterator
 13 | import org.apache.hadoop.hbase.mapreduce.TableInputFormat
 14 | import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil
 15 | import org.apache.hadoop.hbase.client.{Result, Scan}
 16 | import org.apache.hadoop.mapreduce.Job
 17 | import org.apache.spark.SparkHadoopMapReduceUtilExtended
 18 | import org.apache.spark.Logging
 19 | import org.apache.hadoop.mapreduce.JobID
 20 | import org.apache.hadoop.io.Writable
 21 | import org.apache.hadoop.mapreduce.InputSplit
 22 | import java.text.SimpleDateFormat
 23 | import java.util.Date
 24 | import java.util.ArrayList
 25 | import org.apache.hadoop.security.UserGroupInformation
 26 | import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod
 27 | import org.apache.hadoop.hbase.mapreduce.IdentityTableMapper
 28 | 
 29 | class HBaseScanRDD (@transient sc: SparkContext,
 30 |                     @transient tableName: String,
 31 |                     @transient scan: Scan,
 32 |                     val configBroadcast: Broadcast[SerializableWritable[Configuration]],
 33 |                     val credentialsConf: Broadcast[SerializableWritable[Credentials]])
 34 |   extends RDD[(ImmutableBytesWritable, Result)](sc, Nil)
 35 |   with SparkHadoopMapReduceUtilExtended
 36 |   with Logging {
 37 | 
 38 |   @transient var appliedCredentials = false
 39 | 
 40 |   ///
 41 |   @transient val jobTransient = new Job(configBroadcast.value.value, "ExampleRead");
 42 |   TableMapReduceUtil.initTableMapperJob(
 43 |     tableName, // input HBase table name
 44 |     scan, // Scan instance to control CF and attribute selection
 45 |     classOf[IdentityTableMapper], // mapper
 46 |     null, // mapper output key
 47 |     null, // mapper output value
 48 |     jobTransient);
 49 | 
 50 |   @transient val jobConfigurationTrans = jobTransient.getConfiguration()
 51 |   jobConfigurationTrans.set(TableInputFormat.INPUT_TABLE, tableName)
 52 |   val jobConfigBroadcast = sc.broadcast(new SerializableWritable(jobConfigurationTrans))
 53 |   ////
 54 | 
 55 |   private val jobTrackerId: String = {
 56 |     val formatter = new SimpleDateFormat("yyyyMMddHHmm")
 57 |     formatter.format(new Date())
 58 |   }
 59 | 
 60 |   @transient protected val jobId = new JobID(jobTrackerId, id)
 61 | 
 62 |   override def getPartitions: Array[Partition] = {
 63 | 
 64 |     addCreds
 65 | 
 66 |     val tableInputFormat = new TableInputFormat
 67 |     tableInputFormat.setConf(jobConfigBroadcast.value.value)
 68 | 
 69 |     val jobContext = newJobContext(jobConfigBroadcast.value.value, jobId)
 70 |     val rawSplits = tableInputFormat.getSplits(jobContext).toArray
 71 |     val result = new Array[Partition](rawSplits.size)
 72 |     for (i <- 0 until rawSplits.size) {
 73 |       result(i) = new NewHadoopPartition(id, i, rawSplits(i).asInstanceOf[InputSplit with Writable])
 74 |     }
 75 | 
 76 |     result
 77 |   }
 78 | 
 79 |   override def compute(theSplit: Partition, context: TaskContext): InterruptibleIterator[(ImmutableBytesWritable, Result)] = {
 80 | 
 81 |     addCreds
 82 |     applyCreds
 83 | 
 84 |     val iter = new Iterator[(ImmutableBytesWritable, Result)] {
 85 | 
 86 |       addCreds
 87 |       applyCreds
 88 | 
 89 |       val split = theSplit.asInstanceOf[NewHadoopPartition]
 90 |       logInfo("Input split: " + split.serializableHadoopSplit)
 91 |       val conf = jobConfigBroadcast.value.value
 92 | 
 93 |       val attemptId = newTaskAttemptID(jobTrackerId, id, isMap = true, split.index, 0)
 94 |       val hadoopAttemptContext = newTaskAttemptContext(conf, attemptId)
 95 |       val format = new TableInputFormat
 96 |       format.setConf(conf)
 97 | 
 98 |       val reader = format.createRecordReader(
 99 |         split.serializableHadoopSplit.value, hadoopAttemptContext)
100 |       reader.initialize(split.serializableHadoopSplit.value, hadoopAttemptContext)
101 | 
102 |       // Register an on-task-completion callback to close the input stream.
103 |       context.addOnCompleteCallback(() => close())
104 |       var havePair = false
105 |       var finished = false
106 | 
107 |       override def hasNext: Boolean = {
108 |         if (!finished && !havePair) {
109 |           finished = !reader.nextKeyValue
110 |           havePair = !finished
111 |         }
112 |         !finished
113 |       }
114 | 
115 |       override def next(): (ImmutableBytesWritable, Result) = {
116 |         if (!hasNext) {
117 |           throw new java.util.NoSuchElementException("End of stream")
118 |         }
119 |         havePair = false
120 | 
121 |         val writableKey = new ImmutableBytesWritable(reader.getCurrentKey.copyBytes())
122 | 
123 |         (writableKey, reader.getCurrentValue)
124 |       }
125 | 
126 |       private def close() {
127 |         try {
128 |           reader.close()
129 |         } catch {
130 |           case e: Exception => logWarning("Exception in RecordReader.close()", e)
131 |         }
132 |       }
133 |     }
134 |     new InterruptibleIterator(context, iter)
135 |   }
136 | 
137 |   def addCreds {
138 |     val creds = SparkHadoopUtil.get.getCurrentUserCredentials()
139 | 
140 |     val ugi = UserGroupInformation.getCurrentUser()
141 |     ugi.addCredentials(creds)
142 |     // specify that this is a proxy user
143 |     ugi.setAuthenticationMethod(AuthenticationMethod.PROXY)
144 |   }
145 | 
146 |   def applyCreds[T]{
147 |     val credentials = SparkHadoopUtil.get.getCurrentUserCredentials()
148 | 
149 |     if (!appliedCredentials && credentials != null) {
150 |       appliedCredentials = true
151 | 
152 |       @transient val ugi = UserGroupInformation.getCurrentUser
153 |       ugi.addCredentials(credentials)
154 |       // specify that this is a proxy user
155 |       ugi.setAuthenticationMethod(AuthenticationMethod.PROXY)
156 | 
157 |       ugi.addCredentials(credentialsConf.value.value)
158 |     }
159 |   }
160 | 
161 |   private[spark] class NewHadoopPartition(
162 |                                            rddId: Int,
163 |                                            val index: Int,
164 |                                            @transient rawSplit: InputSplit with Writable)
165 |     extends Partition {
166 | 
167 |     val serializableHadoopSplit = new SerializableWritable(rawSplit)
168 | 
169 |     override def hashCode(): Int = 41 * (41 + rddId) + index
170 |   }
171 | }
172 | 


--------------------------------------------------------------------------------
/pom.unittest.but.no.cluster.xml:
--------------------------------------------------------------------------------
  1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  2 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  3 | 	<modelVersion>4.0.0</modelVersion>
  4 | 
  5 | 	<groupId>com.cloudera.sa</groupId>
  6 | 	<artifactId>spark.hbase</artifactId>
  7 | 	<version>0.0.1-SNAPSHOT</version>
  8 | 	<packaging>jar</packaging>
  9 | 
 10 | 	<name>spark.hbase</name>
 11 | 	<url>http://maven.apache.org</url>
 12 | 
 13 | 	<properties>
 14 | 		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 15 | 	</properties>
 16 | 
 17 | 	<dependencies>
 18 | 		<dependency>
 19 | 			<groupId>org.scala-lang</groupId>
 20 | 			<artifactId>scala-compiler</artifactId>
 21 | 			<version>2.10.4</version>
 22 | 		</dependency>
 23 | 		<dependency>
 24 | 			<groupId>org.scalatest</groupId>
 25 | 			<artifactId>scalatest_2.10</artifactId>
 26 | 			<version>2.1.5</version>
 27 | 		</dependency>
 28 | 		<dependency>
 29 | 			<groupId>org.apache.spark</groupId>
 30 | 			<artifactId>spark-core_2.10</artifactId>
 31 | 			<version>1.0.0-cdh5.1.0</version>
 32 | 		</dependency>
 33 | 		<dependency>
 34 | 			<groupId>org.apache.spark</groupId>
 35 | 			<artifactId>spark-streaming_2.10</artifactId>
 36 | 			<version>1.0.0-cdh5.1.0</version>
 37 | 			<type>test-jar</type>
 38 | 			<classifier>tests</classifier>
 39 | 			<scope>test</scope>
 40 | 		</dependency>
 41 | 		<dependency>
 42 | 			<groupId>org.apache.spark</groupId>
 43 | 			<artifactId>spark-streaming_2.10</artifactId>
 44 | 			<version>1.0.0-cdh5.1.0</version>
 45 | 		</dependency>
 46 | 		<dependency>
 47 | 			<groupId>org.apache.hbase</groupId>
 48 | 			<artifactId>hbase-client</artifactId>
 49 | 			<version>0.98.1-cdh5.1.0</version>
 50 | 		</dependency>
 51 | 		<dependency>
 52 | 			<groupId>org.apache.hbase</groupId>
 53 | 			<artifactId>hbase-client</artifactId>
 54 | 			<version>0.98.1-cdh5.1.0</version>
 55 | 			<type>test-jar</type>
 56 | 			<classifier>tests</classifier>
 57 | 			<scope>test</scope>
 58 | 		</dependency>
 59 | 		<dependency>
 60 | 			<groupId>org.apache.hbase</groupId>
 61 | 			<artifactId>hbase-server</artifactId>
 62 | 			<version>0.98.1-cdh5.1.0</version>
 63 | 		</dependency>
 64 | 		<dependency>
 65 | 			<groupId>org.apache.hbase</groupId>
 66 | 			<artifactId>hbase-server</artifactId>
 67 | 			<version>0.98.1-cdh5.1.0</version>
 68 | 			<type>test-jar</type>
 69 | 			<classifier>tests</classifier>
 70 | 		</dependency>
 71 | 		<dependency>
 72 | 			<groupId>org.apache.hbase</groupId>
 73 | 			<artifactId>hbase-hadoop2-compat</artifactId>
 74 | 			<version>0.98.1-cdh5.1.0</version>
 75 | 			<scope>runtime</scope>
 76 | 		</dependency>
 77 | 		<dependency>
 78 | 			<groupId>org.apache.hbase</groupId>
 79 | 			<artifactId>hbase-hadoop2-compat</artifactId>
 80 | 			<version>0.98.1-cdh5.1.0</version>
 81 | 			<type>test-jar</type>
 82 | 			<classifier>tests</classifier>
 83 | 			<scope>test</scope>
 84 | 		</dependency>
 85 | 		<dependency>
 86 | 			<groupId>org.apache.hbase</groupId>
 87 | 			<artifactId>hbase-common</artifactId>
 88 | 			<version>0.98.1-cdh5.1.0</version>
 89 | 		</dependency>
 90 | 		<dependency>
 91 | 			<groupId>org.apache.hbase</groupId>
 92 | 			<artifactId>hbase-common</artifactId>
 93 | 			<version>0.98.1-cdh5.1.0</version>
 94 | 			<type>test-jar</type>
 95 | 			<classifier>tests</classifier>
 96 | 			<scope>test</scope>
 97 | 		</dependency>
 98 | 		<dependency>
 99 | 			<groupId>org.apache.hbase</groupId>
100 | 			<artifactId>hbase-hadoop-compat</artifactId>
101 | 			<version>0.98.1-cdh5.1.0</version>
102 | 			<scope>test</scope>
103 | 		</dependency>
104 | 		<dependency>
105 | 			<groupId>org.apache.hbase</groupId>
106 | 			<artifactId>hbase-hadoop-compat</artifactId>
107 | 			<version>0.98.1-cdh5.1.0</version>
108 | 			<type>test-jar</type>
109 | 			<classifier>tests</classifier>
110 | 			<scope>test</scope>
111 | 		</dependency>
112 | 	</dependencies>
113 | 	<repositories>
114 | 		<repository>
115 | 			<id>maven-hadoop</id>
116 | 			<name>Hadoop Releases</name>
117 | 			<url>https://repository.cloudera.com/content/repositories/releases/</url>
118 | 		</repository>
119 | 	</repositories>
120 | 
121 | 	<build>
122 | 		<outputDirectory>target/scala/classes</outputDirectory>
123 | 		<testOutputDirectory>target/scala/test-classes</testOutputDirectory>
124 | 		<plugins>
125 | 			<plugin>
126 | 				<groupId>org.apache.maven.plugins</groupId>
127 | 				<artifactId>maven-surefire-plugin</artifactId>
128 | 				<version>2.17</version>
129 | 				<dependencies>
130 | 					<dependency>
131 | 						<groupId>org.apache.maven.surefire</groupId>
132 | 						<artifactId>surefire-junit47</artifactId>
133 | 						<version>2.17</version>
134 | 					</dependency>
135 | 				</dependencies>
136 | 			</plugin>
137 | 			<plugin>
138 | 				<groupId>org.scalatest</groupId>
139 | 				<artifactId>scalatest-maven-plugin</artifactId>
140 | 				<version>1.0</version>
141 | 				<configuration>
142 | 					<junitxml>.</junitxml>
143 | 				</configuration>
144 | 				<executions>
145 | 					<execution>
146 | 						<id>test</id>
147 | 						<goals>
148 | 							<goal>test</goal>
149 | 						</goals>
150 | 					</execution>
151 | 				</executions>
152 | 			</plugin>
153 | 
154 | 			<plugin>
155 | 				<groupId>org.scala-tools</groupId>
156 | 				<artifactId>maven-scala-plugin</artifactId>
157 | 				<executions>
158 | 					<execution>
159 | 						<id>compile</id>
160 | 						<goals>
161 | 							<goal>compile</goal>
162 | 						</goals>
163 | 						<phase>compile</phase>
164 | 					</execution>
165 | 					<execution>
166 | 						<id>test-compile</id>
167 | 						<goals>
168 | 							<goal>testCompile</goal>
169 | 						</goals>
170 | 						<phase>test-compile</phase>
171 | 					</execution>
172 | 					<execution>
173 | 						<phase>process-resources</phase>
174 | 						<goals>
175 | 							<goal>compile</goal>
176 | 						</goals>
177 | 					</execution>
178 | 				</executions>
179 | 			</plugin>
180 | 			<plugin>
181 | 				<groupId>org.apache.maven.plugins</groupId>
182 | 				<artifactId>maven-shade-plugin</artifactId>
183 | 				<version>2.2</version>
184 | 				<configuration>
185 | 					<shadedArtifactAttached>false</shadedArtifactAttached>
186 | 					<outputFile>target/SparkHBase.jar</outputFile>
187 | 					<artifactSet>
188 | 						<includes>
189 | 							<include>*:*</include>
190 | 						</includes>
191 | 					</artifactSet>
192 | 					<filters>
193 | 						<filter>
194 | 							<artifact>*:*</artifact>
195 | 							<excludes>
196 | 								<exclude>META-INF/*.SF</exclude>
197 | 								<exclude>META-INF/*.DSA</exclude>
198 | 								<exclude>META-INF/*.RSA</exclude>
199 | 							</excludes>
200 | 						</filter>
201 | 					</filters>
202 | 				</configuration>
203 | 				<executions>
204 | 					<execution>
205 | 						<phase>package</phase>
206 | 						<goals>
207 | 							<goal>shade</goal>
208 | 						</goals>
209 | 						<configuration>
210 | 							<transformers>
211 | 								<transformer
212 | 									implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
213 | 								<transformer
214 | 									implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
215 | 									<resource>reference.conf</resource>
216 | 								</transformer>
217 | 							</transformers>
218 | 						</configuration>
219 | 					</execution>
220 | 				</executions>
221 | 			</plugin>
222 | 		</plugins>
223 | 	</build>
224 | 
225 | </project>


--------------------------------------------------------------------------------
/src/main/scala/org/apache/hadoop/hbase/spark/HBaseDStreamFunctions.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | package org.apache.hadoop.hbase.spark
 18 | 
 19 | import org.apache.hadoop.hbase.TableName
 20 | import org.apache.hadoop.hbase.client._
 21 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable
 22 | import org.apache.spark.streaming.dstream.DStream
 23 | 
 24 | import scala.reflect.ClassTag
 25 | 
 26 | /**
 27 |  * HBaseDStreamFunctions contains a set of implicit functions that can be
 28 |  * applied to a Spark DStream so that we can easily interact with HBase
 29 |  */
 30 | object HBaseDStreamFunctions {
 31 | 
 32 |   /**
 33 |    * These are implicit methods for a DStream that contains any type of
 34 |    * data.
 35 |    *
 36 |    * @param dStream  This is for dStreams of any type
 37 |    * @tparam T       Type T
 38 |    */
 39 |   implicit class GenericHBaseDStreamFunctions[T](val dStream: DStream[T]) {
 40 | 
 41 |     /**
 42 |      * Implicit method that gives easy access to HBaseContext's bulk
 43 |      * put.  This will not return a new Stream.  Think of it like a foreach
 44 |      *
 45 |      * @param hc         The hbaseContext object to identify which
 46 |      *                   HBase cluster connection to use
 47 |      * @param tableName  The tableName that the put will be sent to
 48 |      * @param f          The function that will turn the DStream values
 49 |      *                   into HBase Put objects.
 50 |      */
 51 |     def hbaseBulkPut(hc: HBaseContext,
 52 |                      tableName: TableName,
 53 |                      f: (T) => Put): Unit = {
 54 |       hc.streamBulkPut(dStream, tableName, f)
 55 |     }
 56 | 
 57 |     /**
 58 |      * Implicit method that gives easy access to HBaseContext's bulk
 59 |      * get.  This will return a new DStream.  Think about it as a DStream map
 60 |      * function.  In that every DStream value will get a new value out of
 61 |      * HBase.  That new value will populate the newly generated DStream.
 62 |      *
 63 |      * @param hc             The hbaseContext object to identify which
 64 |      *                       HBase cluster connection to use
 65 |      * @param tableName      The tableName that the put will be sent to
 66 |      * @param batchSize      How many gets to execute in a single batch
 67 |      * @param f              The function that will turn the RDD values
 68 |      *                       in HBase Get objects
 69 |      * @param convertResult  The function that will convert a HBase
 70 |      *                       Result object into a value that will go
 71 |      *                       into the resulting DStream
 72 |      * @tparam R             The type of Object that will be coming
 73 |      *                       out of the resulting DStream
 74 |      * @return               A resulting DStream with type R objects
 75 |      */
 76 |     def hbaseBulkGet[R: ClassTag](hc: HBaseContext,
 77 |                      tableName: TableName,
 78 |                      batchSize:Int, f: (T) => Get, convertResult: (Result) => R):
 79 |     DStream[R] = {
 80 |       hc.streamBulkGet[T, R](tableName, batchSize, dStream, f, convertResult)
 81 |     }
 82 | 
 83 |     /**
 84 |      * Implicit method that gives easy access to HBaseContext's bulk
 85 |      * get.  This will return a new DStream.  Think about it as a DStream map
 86 |      * function.  In that every DStream value will get a new value out of
 87 |      * HBase.  That new value will populate the newly generated DStream.
 88 |      *
 89 |      * @param hc             The hbaseContext object to identify which
 90 |      *                       HBase cluster connection to use
 91 |      * @param tableName      The tableName that the put will be sent to
 92 |      * @param batchSize      How many gets to execute in a single batch
 93 |      * @param f              The function that will turn the RDD values
 94 |      *                       in HBase Get objects
 95 |      * @return               A resulting DStream with type R objects
 96 |      */
 97 |     def hbaseBulkGet(hc: HBaseContext,
 98 |                      tableName: TableName, batchSize:Int,
 99 |                      f: (T) => Get): DStream[(ImmutableBytesWritable, Result)] = {
100 |         hc.streamBulkGet[T, (ImmutableBytesWritable, Result)](
101 |           tableName, batchSize, dStream, f,
102 |           result => (new ImmutableBytesWritable(result.getRow), result))
103 |     }
104 | 
105 |     /**
106 |      * Implicit method that gives easy access to HBaseContext's bulk
107 |      * Delete.  This will not return a new DStream.
108 |      *
109 |      * @param hc         The hbaseContext object to identify which HBase
110 |      *                   cluster connection to use
111 |      * @param tableName  The tableName that the deletes will be sent to
112 |      * @param f          The function that will convert the DStream value into
113 |      *                   a HBase Delete Object
114 |      * @param batchSize  The number of Deletes to be sent in a single batch
115 |      */
116 |     def hbaseBulkDelete(hc: HBaseContext,
117 |                         tableName: TableName,
118 |                         f:(T) => Delete, batchSize:Int): Unit = {
119 |       hc.streamBulkDelete(dStream, tableName, f, batchSize)
120 |     }
121 | 
122 |     /**
123 |      * Implicit method that gives easy access to HBaseContext's
124 |      * foreachPartition method.  This will ack very much like a normal DStream
125 |      * foreach method but for the fact that you will now have a HBase connection
126 |      * while iterating through the values.
127 |      *
128 |      * @param hc  The hbaseContext object to identify which HBase
129 |      *            cluster connection to use
130 |      * @param f   This function will get an iterator for a Partition of an
131 |      *            DStream along with a connection object to HBase
132 |      */
133 |     def hbaseForeachPartition(hc: HBaseContext,
134 |                               f: (Iterator[T], Connection) => Unit): Unit = {
135 |       hc.streamForeachPartition(dStream, f)
136 |     }
137 | 
138 |     /**
139 |      * Implicit method that gives easy access to HBaseContext's
140 |      * mapPartitions method.  This will ask very much like a normal DStream
141 |      * map partitions method but for the fact that you will now have a
142 |      * HBase connection while iterating through the values
143 |      *
144 |      * @param hc  The hbaseContext object to identify which HBase
145 |      *            cluster connection to use
146 |      * @param f   This function will get an iterator for a Partition of an
147 |      *            DStream along with a connection object to HBase
148 |      * @tparam R  This is the type of objects that will go into the resulting
149 |      *            DStream
150 |      * @return    A resulting DStream of type R
151 |      */
152 |     def hbaseMapPartitions[R: ClassTag](hc: HBaseContext,
153 |                                         f: (Iterator[T], Connection) => Iterator[R]):
154 |     DStream[R] = {
155 |       hc.streamMapPartitions(dStream, f)
156 |     }
157 |   }
158 | }
159 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Apache License
 2 | 
 3 | Version 2.0, January 2004
 4 | 
 5 | http://www.apache.org/licenses/
 6 | 
 7 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
 8 | 
 9 | 1. Definitions.
10 | 
11 | "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document.
12 | 
13 | "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License.
14 | 
15 | "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
16 | 
17 | "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License.
18 | 
19 | "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files.
20 | 
21 | "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types.
22 | 
23 | "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below).
24 | 
25 | "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof.
26 | 
27 | "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution."
28 | 
29 | "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work.
30 | 
31 | 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form.
32 | 
33 | 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed.
34 | 
35 | 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions:
36 | 
37 | You must give any other recipients of the Work or Derivative Works a copy of this License; and
38 | 
39 | You must cause any modified files to carry prominent notices stating that You changed the files; and
40 | 
41 | You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and
42 | 
43 | If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License.
44 | 
45 | 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions.
46 | 
47 | 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file.
48 | 
49 | 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License.
50 | 
51 | 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages.
52 | 
53 | 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability.
54 | 
55 | END OF TERMS AND CONDITIONS
56 | 
57 | APPENDIX: How to apply the Apache License to your work
58 | To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives.
59 | 
60 |    Copyright [yyyy] [name of copyright owner]
61 | 
62 |    Licensed under the Apache License, Version 2.0 (the "License");
63 |    you may not use this file except in compliance with the License.
64 |    You may obtain a copy of the License at
65 | 
66 |        http://www.apache.org/licenses/LICENSE-2.0
67 | 
68 |    Unless required by applicable law or agreed to in writing, software
69 |    distributed under the License is distributed on an "AS IS" BASIS,
70 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
71 |    See the License for the specific language governing permissions and
72 |    limitations under the License.
73 | 


--------------------------------------------------------------------------------
/src/main/java/org/apache/hadoop/hbase/spark/SparkSQLPushDownFilter.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.apache.hadoop.hbase.spark;
 19 | 
 20 | import com.google.protobuf.ByteString;
 21 | import com.google.protobuf.InvalidProtocolBufferException;
 22 | import org.apache.commons.logging.Log;
 23 | import org.apache.commons.logging.LogFactory;
 24 | import org.apache.hadoop.hbase.Cell;
 25 | import org.apache.hadoop.hbase.exceptions.DeserializationException;
 26 | import org.apache.hadoop.hbase.filter.FilterBase;
 27 | import org.apache.hadoop.hbase.spark.protobuf.generated.FilterProtos;
 28 | import org.apache.hadoop.hbase.util.ByteStringer;
 29 | import org.apache.hadoop.hbase.util.Bytes;
 30 | import scala.collection.mutable.MutableList;
 31 | 
 32 | import java.io.IOException;
 33 | import java.util.HashMap;
 34 | import java.util.List;
 35 | import java.util.Map;
 36 | 
 37 | /**
 38 |  * This filter will push down all qualifier logic given to us
 39 |  * by SparkSQL so that we have make the filters at the region server level
 40 |  * and avoid sending the data back to the client to be filtered.
 41 |  */
 42 | public class SparkSQLPushDownFilter extends FilterBase{
 43 |   protected static final Log log = LogFactory.getLog(SparkSQLPushDownFilter.class);
 44 | 
 45 |   //The following values are populated with protobuffer
 46 |   DynamicLogicExpression dynamicLogicExpression;
 47 |   byte[][] valueFromQueryArray;
 48 |   HashMap<ByteArrayComparable, HashMap<ByteArrayComparable, String>>
 49 |           currentCellToColumnIndexMap;
 50 | 
 51 |   //The following values are transient
 52 |   HashMap<String, ByteArrayComparable> columnToCurrentRowValueMap = null;
 53 | 
 54 |   static final byte[] rowKeyFamily = new byte[0];
 55 |   static final byte[] rowKeyQualifier = Bytes.toBytes("key");
 56 | 
 57 |   public SparkSQLPushDownFilter(DynamicLogicExpression dynamicLogicExpression,
 58 |                                 byte[][] valueFromQueryArray,
 59 |                                 HashMap<ByteArrayComparable,
 60 |                                         HashMap<ByteArrayComparable, String>>
 61 |                                         currentCellToColumnIndexMap) {
 62 |     this.dynamicLogicExpression = dynamicLogicExpression;
 63 |     this.valueFromQueryArray = valueFromQueryArray;
 64 |     this.currentCellToColumnIndexMap = currentCellToColumnIndexMap;
 65 |   }
 66 | 
 67 |   public SparkSQLPushDownFilter(DynamicLogicExpression dynamicLogicExpression,
 68 |                                 byte[][] valueFromQueryArray,
 69 |                                 MutableList<SchemaQualifierDefinition> columnDefinitions) {
 70 |     this.dynamicLogicExpression = dynamicLogicExpression;
 71 |     this.valueFromQueryArray = valueFromQueryArray;
 72 | 
 73 |     //generate family qualifier to index mapping
 74 |     this.currentCellToColumnIndexMap =
 75 |             new HashMap<>();
 76 | 
 77 |     for (int i = 0; i < columnDefinitions.size(); i++) {
 78 |       SchemaQualifierDefinition definition = columnDefinitions.get(i).get();
 79 | 
 80 |       ByteArrayComparable familyByteComparable =
 81 |               new ByteArrayComparable(definition.columnFamilyBytes(),
 82 |                       0, definition.columnFamilyBytes().length);
 83 | 
 84 |       HashMap<ByteArrayComparable, String> qualifierIndexMap =
 85 |               currentCellToColumnIndexMap.get(familyByteComparable);
 86 | 
 87 |       if (qualifierIndexMap == null) {
 88 |         qualifierIndexMap = new HashMap<>();
 89 |         currentCellToColumnIndexMap.put(familyByteComparable, qualifierIndexMap);
 90 |       }
 91 |       ByteArrayComparable qualifierByteComparable =
 92 |               new ByteArrayComparable(definition.qualifierBytes(), 0,
 93 |                       definition.qualifierBytes().length);
 94 | 
 95 |       qualifierIndexMap.put(qualifierByteComparable, definition.columnName());
 96 |     }
 97 |   }
 98 | 
 99 |   @Override
100 |   public ReturnCode filterKeyValue(Cell c) throws IOException {
101 | 
102 |     //If the map RowValueMap is empty then we need to populate
103 |     // the row key
104 |     if (columnToCurrentRowValueMap == null) {
105 |       columnToCurrentRowValueMap = new HashMap<>();
106 |       HashMap<ByteArrayComparable, String> qualifierColumnMap =
107 |               currentCellToColumnIndexMap.get(
108 |                       new ByteArrayComparable(rowKeyFamily, 0, rowKeyFamily.length));
109 | 
110 |       if (qualifierColumnMap != null) {
111 |         String rowKeyColumnName =
112 |                 qualifierColumnMap.get(
113 |                         new ByteArrayComparable(rowKeyQualifier, 0,
114 |                                 rowKeyQualifier.length));
115 |         //Make sure that the rowKey is part of the where clause
116 |         if (rowKeyColumnName != null) {
117 |           columnToCurrentRowValueMap.put(rowKeyColumnName,
118 |                   new ByteArrayComparable(c.getRowArray(),
119 |                           c.getRowOffset(), c.getRowLength()));
120 |         }
121 |       }
122 |     }
123 | 
124 |     //Always populate the column value into the RowValueMap
125 |     ByteArrayComparable currentFamilyByteComparable =
126 |             new ByteArrayComparable(c.getFamilyArray(),
127 |             c.getFamilyOffset(),
128 |             c.getFamilyLength());
129 | 
130 |     HashMap<ByteArrayComparable, String> qualifierColumnMap =
131 |             currentCellToColumnIndexMap.get(
132 |                     currentFamilyByteComparable);
133 | 
134 |     if (qualifierColumnMap != null) {
135 | 
136 |       String columnName =
137 |               qualifierColumnMap.get(
138 |                       new ByteArrayComparable(c.getQualifierArray(),
139 |                               c.getQualifierOffset(),
140 |                               c.getQualifierLength()));
141 | 
142 |       if (columnName != null) {
143 |         columnToCurrentRowValueMap.put(columnName,
144 |                 new ByteArrayComparable(c.getValueArray(),
145 |                         c.getValueOffset(), c.getValueLength()));
146 |       }
147 |     }
148 | 
149 |     return ReturnCode.INCLUDE;
150 |   }
151 | 
152 | 
153 |   @Override
154 |   public boolean filterRow() throws IOException {
155 | 
156 |     try {
157 |       boolean result =
158 |               dynamicLogicExpression.execute(columnToCurrentRowValueMap,
159 |                       valueFromQueryArray);
160 |       columnToCurrentRowValueMap = null;
161 |       return !result;
162 |     } catch (Throwable e) {
163 |       log.error("Error running dynamic logic on row", e);
164 |     }
165 |     return false;
166 |   }
167 | 
168 | 
169 |   /**
170 |    * @param pbBytes A pb serialized instance
171 |    * @return An instance of SparkSQLPushDownFilter
172 |    * @throws org.apache.hadoop.hbase.exceptions.DeserializationException
173 |    */
174 |   @SuppressWarnings("unused")
175 |   public static SparkSQLPushDownFilter parseFrom(final byte[] pbBytes)
176 |           throws DeserializationException {
177 | 
178 |     FilterProtos.SQLPredicatePushDownFilter proto;
179 |     try {
180 |       proto = FilterProtos.SQLPredicatePushDownFilter.parseFrom(pbBytes);
181 |     } catch (InvalidProtocolBufferException e) {
182 |       throw new DeserializationException(e);
183 |     }
184 | 
185 |     //Load DynamicLogicExpression
186 |     DynamicLogicExpression dynamicLogicExpression =
187 |             DynamicLogicExpressionBuilder.build(proto.getDynamicLogicExpression());
188 | 
189 |     //Load valuesFromQuery
190 |     final List<ByteString> valueFromQueryArrayList = proto.getValueFromQueryArrayList();
191 |     byte[][] valueFromQueryArray = new byte[valueFromQueryArrayList.size()][];
192 |     for (int i = 0; i < valueFromQueryArrayList.size(); i++) {
193 |       valueFromQueryArray[i] = valueFromQueryArrayList.get(i).toByteArray();
194 |     }
195 | 
196 |     //Load mapping from HBase family/qualifier to Spark SQL columnName
197 |     HashMap<ByteArrayComparable, HashMap<ByteArrayComparable, String>>
198 |             currentCellToColumnIndexMap = new HashMap<>();
199 | 
200 |     for (FilterProtos.SQLPredicatePushDownCellToColumnMapping
201 |             sqlPredicatePushDownCellToColumnMapping :
202 |             proto.getCellToColumnMappingList()) {
203 | 
204 |       byte[] familyArray =
205 |               sqlPredicatePushDownCellToColumnMapping.getColumnFamily().toByteArray();
206 |       ByteArrayComparable familyByteComparable =
207 |               new ByteArrayComparable(familyArray, 0, familyArray.length);
208 |       HashMap<ByteArrayComparable, String> qualifierMap =
209 |               currentCellToColumnIndexMap.get(familyByteComparable);
210 | 
211 |       if (qualifierMap == null) {
212 |         qualifierMap = new HashMap<>();
213 |         currentCellToColumnIndexMap.put(familyByteComparable, qualifierMap);
214 |       }
215 |       byte[] qualifierArray =
216 |               sqlPredicatePushDownCellToColumnMapping.getQualifier().toByteArray();
217 | 
218 |       ByteArrayComparable qualifierByteComparable =
219 |               new ByteArrayComparable(qualifierArray, 0 ,qualifierArray.length);
220 | 
221 |       qualifierMap.put(qualifierByteComparable,
222 |               sqlPredicatePushDownCellToColumnMapping.getColumnName());
223 |     }
224 | 
225 |     return new SparkSQLPushDownFilter(dynamicLogicExpression,
226 |             valueFromQueryArray, currentCellToColumnIndexMap);
227 |   }
228 | 
229 |   /**
230 |    * @return The filter serialized using pb
231 |    */
232 |   public byte[] toByteArray() {
233 | 
234 |     FilterProtos.SQLPredicatePushDownFilter.Builder builder =
235 |             FilterProtos.SQLPredicatePushDownFilter.newBuilder();
236 | 
237 |     FilterProtos.SQLPredicatePushDownCellToColumnMapping.Builder columnMappingBuilder =
238 |             FilterProtos.SQLPredicatePushDownCellToColumnMapping.newBuilder();
239 | 
240 |     builder.setDynamicLogicExpression(dynamicLogicExpression.toExpressionString());
241 |     for (byte[] valueFromQuery: valueFromQueryArray) {
242 |       builder.addValueFromQueryArray(ByteStringer.wrap(valueFromQuery));
243 |     }
244 | 
245 |     for (Map.Entry<ByteArrayComparable, HashMap<ByteArrayComparable, String>>
246 |             familyEntry : currentCellToColumnIndexMap.entrySet()) {
247 |       for (Map.Entry<ByteArrayComparable, String> qualifierEntry :
248 |               familyEntry.getValue().entrySet()) {
249 |         columnMappingBuilder.setColumnFamily(
250 |                 ByteStringer.wrap(familyEntry.getKey().bytes()));
251 |         columnMappingBuilder.setQualifier(
252 |                 ByteStringer.wrap(qualifierEntry.getKey().bytes()));
253 |         columnMappingBuilder.setColumnName(qualifierEntry.getValue());
254 |         builder.addCellToColumnMapping(columnMappingBuilder.build());
255 |       }
256 |     }
257 | 
258 |     return builder.build().toByteArray();
259 |   }
260 | }
261 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/hadoop/hbase/spark/DynamicLogicExpression.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.apache.hadoop.hbase.spark
 19 | 
 20 | import java.util
 21 | 
 22 | import org.apache.hadoop.hbase.util.Bytes
 23 | 
 24 | /**
 25 |  * Dynamic logic for SQL push down logic there is an instance for most
 26 |  * common operations and a pass through for other operations not covered here
 27 |  *
 28 |  * Logic can be nested with And or Or operators.
 29 |  *
 30 |  * A logic tree can be written out as a string and reconstructed from that string
 31 |  *
 32 |  */
 33 | trait DynamicLogicExpression {
 34 |   def execute(columnToCurrentRowValueMap: util.HashMap[String, ByteArrayComparable],
 35 |               valueFromQueryValueArray:Array[Array[Byte]]): Boolean
 36 |   def toExpressionString: String = {
 37 |     val strBuilder = new StringBuilder
 38 |     appendToExpression(strBuilder)
 39 |     strBuilder.toString()
 40 |   }
 41 |   def appendToExpression(strBuilder:StringBuilder)
 42 | }
 43 | 
 44 | class AndLogicExpression (val leftExpression:DynamicLogicExpression,
 45 |                            val rightExpression:DynamicLogicExpression)
 46 |   extends DynamicLogicExpression{
 47 |   override def execute(columnToCurrentRowValueMap:
 48 |                        util.HashMap[String, ByteArrayComparable],
 49 |                        valueFromQueryValueArray:Array[Array[Byte]]): Boolean = {
 50 |     leftExpression.execute(columnToCurrentRowValueMap, valueFromQueryValueArray) &&
 51 |       rightExpression.execute(columnToCurrentRowValueMap, valueFromQueryValueArray)
 52 |   }
 53 | 
 54 |   override def appendToExpression(strBuilder: StringBuilder): Unit = {
 55 |     strBuilder.append("( ")
 56 |     strBuilder.append(leftExpression.toExpressionString)
 57 |     strBuilder.append(" AND ")
 58 |     strBuilder.append(rightExpression.toExpressionString)
 59 |     strBuilder.append(" )")
 60 |   }
 61 | }
 62 | 
 63 | class OrLogicExpression (val leftExpression:DynamicLogicExpression,
 64 |                           val rightExpression:DynamicLogicExpression)
 65 |   extends DynamicLogicExpression{
 66 |   override def execute(columnToCurrentRowValueMap:
 67 |                        util.HashMap[String, ByteArrayComparable],
 68 |                        valueFromQueryValueArray:Array[Array[Byte]]): Boolean = {
 69 |     leftExpression.execute(columnToCurrentRowValueMap, valueFromQueryValueArray) ||
 70 |       rightExpression.execute(columnToCurrentRowValueMap, valueFromQueryValueArray)
 71 |   }
 72 |   override def appendToExpression(strBuilder: StringBuilder): Unit = {
 73 |     strBuilder.append("( ")
 74 |     strBuilder.append(leftExpression.toExpressionString)
 75 |     strBuilder.append(" OR ")
 76 |     strBuilder.append(rightExpression.toExpressionString)
 77 |     strBuilder.append(" )")
 78 |   }
 79 | }
 80 | 
 81 | class EqualLogicExpression (val columnName:String,
 82 |                             val valueFromQueryIndex:Int,
 83 |                             val isNot:Boolean) extends DynamicLogicExpression{
 84 |   override def execute(columnToCurrentRowValueMap:
 85 |                        util.HashMap[String, ByteArrayComparable],
 86 |                        valueFromQueryValueArray:Array[Array[Byte]]): Boolean = {
 87 |     val currentRowValue = columnToCurrentRowValueMap.get(columnName)
 88 |     val valueFromQuery = valueFromQueryValueArray(valueFromQueryIndex)
 89 | 
 90 |     currentRowValue != null &&
 91 |       Bytes.equals(valueFromQuery,
 92 |         0, valueFromQuery.length, currentRowValue.bytes,
 93 |         currentRowValue.offset, currentRowValue.length) != isNot
 94 |   }
 95 |   override def appendToExpression(strBuilder: StringBuilder): Unit = {
 96 |     val command = if (isNot) "!=" else "=="
 97 |     strBuilder.append(columnName + " " + command + " " + valueFromQueryIndex)
 98 |   }
 99 | }
100 | 
101 | class IsNullLogicExpression (val columnName:String,
102 |                              val isNot:Boolean) extends DynamicLogicExpression{
103 |   override def execute(columnToCurrentRowValueMap:
104 |                        util.HashMap[String, ByteArrayComparable],
105 |                        valueFromQueryValueArray:Array[Array[Byte]]): Boolean = {
106 |     val currentRowValue = columnToCurrentRowValueMap.get(columnName)
107 | 
108 |     (currentRowValue == null) != isNot
109 |   }
110 |   override def appendToExpression(strBuilder: StringBuilder): Unit = {
111 |     val command = if (isNot) "isNotNull" else "isNull"
112 |     strBuilder.append(columnName + " " + command)
113 |   }
114 | }
115 | 
116 | class GreaterThanLogicExpression (val columnName:String,
117 |                                   val valueFromQueryIndex:Int)
118 |   extends DynamicLogicExpression{
119 |   override def execute(columnToCurrentRowValueMap:
120 |                        util.HashMap[String, ByteArrayComparable],
121 |                        valueFromQueryValueArray:Array[Array[Byte]]): Boolean = {
122 |     val currentRowValue = columnToCurrentRowValueMap.get(columnName)
123 |     val valueFromQuery = valueFromQueryValueArray(valueFromQueryIndex)
124 | 
125 |     currentRowValue != null &&
126 |       Bytes.compareTo(currentRowValue.bytes,
127 |         currentRowValue.offset, currentRowValue.length, valueFromQuery,
128 |         0, valueFromQuery.length) > 0
129 |   }
130 |   override def appendToExpression(strBuilder: StringBuilder): Unit = {
131 |     strBuilder.append(columnName + " > " + valueFromQueryIndex)
132 |   }
133 | }
134 | 
135 | class GreaterThanOrEqualLogicExpression (val columnName:String,
136 |                                          val valueFromQueryIndex:Int)
137 |   extends DynamicLogicExpression{
138 |   override def execute(columnToCurrentRowValueMap:
139 |                        util.HashMap[String, ByteArrayComparable],
140 |                        valueFromQueryValueArray:Array[Array[Byte]]): Boolean = {
141 |     val currentRowValue = columnToCurrentRowValueMap.get(columnName)
142 |     val valueFromQuery = valueFromQueryValueArray(valueFromQueryIndex)
143 | 
144 |     currentRowValue != null &&
145 |       Bytes.compareTo(currentRowValue.bytes,
146 |         currentRowValue.offset, currentRowValue.length, valueFromQuery,
147 |         0, valueFromQuery.length) >= 0
148 |   }
149 |   override def appendToExpression(strBuilder: StringBuilder): Unit = {
150 |     strBuilder.append(columnName + " >= " + valueFromQueryIndex)
151 |   }
152 | }
153 | 
154 | class LessThanLogicExpression (val columnName:String,
155 |                                val valueFromQueryIndex:Int)
156 |   extends DynamicLogicExpression{
157 |   override def execute(columnToCurrentRowValueMap:
158 |                        util.HashMap[String, ByteArrayComparable],
159 |                        valueFromQueryValueArray:Array[Array[Byte]]): Boolean = {
160 |     val currentRowValue = columnToCurrentRowValueMap.get(columnName)
161 |     val valueFromQuery = valueFromQueryValueArray(valueFromQueryIndex)
162 | 
163 |     currentRowValue != null &&
164 |       Bytes.compareTo(currentRowValue.bytes,
165 |         currentRowValue.offset, currentRowValue.length, valueFromQuery,
166 |         0, valueFromQuery.length) < 0
167 |   }
168 | 
169 |   override def appendToExpression(strBuilder: StringBuilder): Unit = {
170 |     strBuilder.append(columnName + " < " + valueFromQueryIndex)
171 |   }
172 | }
173 | 
174 | class LessThanOrEqualLogicExpression (val columnName:String,
175 |                                       val valueFromQueryIndex:Int)
176 |   extends DynamicLogicExpression{
177 |   override def execute(columnToCurrentRowValueMap:
178 |                        util.HashMap[String, ByteArrayComparable],
179 |                        valueFromQueryValueArray:Array[Array[Byte]]): Boolean = {
180 |     val currentRowValue = columnToCurrentRowValueMap.get(columnName)
181 |     val valueFromQuery = valueFromQueryValueArray(valueFromQueryIndex)
182 | 
183 |     currentRowValue != null &&
184 |       Bytes.compareTo(currentRowValue.bytes,
185 |         currentRowValue.offset, currentRowValue.length, valueFromQuery,
186 |         0, valueFromQuery.length) <= 0
187 |   }
188 | 
189 |   override def appendToExpression(strBuilder: StringBuilder): Unit = {
190 |     strBuilder.append(columnName + " <= " + valueFromQueryIndex)
191 |   }
192 | }
193 | 
194 | class PassThroughLogicExpression() extends DynamicLogicExpression {
195 |   override def execute(columnToCurrentRowValueMap:
196 |                        util.HashMap[String, ByteArrayComparable],
197 |                        valueFromQueryValueArray: Array[Array[Byte]]): Boolean = true
198 | 
199 |   override def appendToExpression(strBuilder: StringBuilder): Unit = {
200 |     strBuilder.append("Pass")
201 |   }
202 | }
203 | 
204 | object DynamicLogicExpressionBuilder {
205 |   def build(expressionString:String): DynamicLogicExpression = {
206 | 
207 |     val expressionAndOffset = build(expressionString.split(' '), 0)
208 |     expressionAndOffset._1
209 |   }
210 | 
211 |   private def build(expressionArray:Array[String],
212 |                     offSet:Int): (DynamicLogicExpression, Int) = {
213 |     if (expressionArray(offSet).equals("(")) {
214 |       val left = build(expressionArray, offSet + 1)
215 |       val right = build(expressionArray, left._2 + 1)
216 |       if (expressionArray(left._2).equals("AND")) {
217 |         (new AndLogicExpression(left._1, right._1), right._2 + 1)
218 |       } else if (expressionArray(left._2).equals("OR")) {
219 |         (new OrLogicExpression(left._1, right._1), right._2 + 1)
220 |       } else {
221 |         throw new Throwable("Unknown gate:" + expressionArray(left._2))
222 |       }
223 |     } else {
224 |       val command = expressionArray(offSet + 1)
225 |       if (command.equals("<")) {
226 |         (new LessThanLogicExpression(expressionArray(offSet),
227 |           expressionArray(offSet + 2).toInt), offSet + 3)
228 |       } else if (command.equals("<=")) {
229 |         (new LessThanOrEqualLogicExpression(expressionArray(offSet),
230 |           expressionArray(offSet + 2).toInt), offSet + 3)
231 |       } else if (command.equals(">")) {
232 |         (new GreaterThanLogicExpression(expressionArray(offSet),
233 |           expressionArray(offSet + 2).toInt), offSet + 3)
234 |       } else if (command.equals(">=")) {
235 |         (new GreaterThanOrEqualLogicExpression(expressionArray(offSet),
236 |           expressionArray(offSet + 2).toInt), offSet + 3)
237 |       } else if (command.equals("==")) {
238 |         (new EqualLogicExpression(expressionArray(offSet),
239 |           expressionArray(offSet + 2).toInt, false), offSet + 3)
240 |       } else if (command.equals("!=")) {
241 |         (new EqualLogicExpression(expressionArray(offSet),
242 |           expressionArray(offSet + 2).toInt, true), offSet + 3)
243 |       } else if (command.equals("isNull")) {
244 |         (new IsNullLogicExpression(expressionArray(offSet), false), offSet + 2)
245 |       } else if (command.equals("isNotNull")) {
246 |         (new IsNullLogicExpression(expressionArray(offSet), true), offSet + 2)
247 |       } else if (command.equals("Pass")) {
248 |         (new PassThroughLogicExpression, offSet + 2)
249 |       } else {
250 |         throw new Throwable("Unknown logic command:" + command)
251 |       }
252 |     }
253 |   }
254 | }


--------------------------------------------------------------------------------
/src/main/scala/org/apache/hadoop/hbase/spark/HBaseRDDFunctions.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.apache.hadoop.hbase.spark
 19 | 
 20 | import java.util
 21 | 
 22 | import org.apache.hadoop.hbase.{HConstants, TableName}
 23 | import org.apache.hadoop.hbase.client._
 24 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable
 25 | import org.apache.spark.rdd.RDD
 26 | 
 27 | import scala.reflect.ClassTag
 28 | 
 29 | /**
 30 |  * HBaseRDDFunctions contains a set of implicit functions that can be
 31 |  * applied to a Spark RDD so that we can easily interact with HBase
 32 |  */
 33 | object HBaseRDDFunctions
 34 | {
 35 | 
 36 |   /**
 37 |    * These are implicit methods for a RDD that contains any type of
 38 |    * data.
 39 |    *
 40 |    * @param rdd This is for rdd of any type
 41 |    * @tparam T  This is any type
 42 |    */
 43 |   implicit class GenericHBaseRDDFunctions[T](val rdd: RDD[T]) {
 44 | 
 45 |     /**
 46 |      * Implicit method that gives easy access to HBaseContext's bulk
 47 |      * put.  This will not return a new RDD.  Think of it like a foreach
 48 |      *
 49 |      * @param hc         The hbaseContext object to identify which
 50 |      *                   HBase cluster connection to use
 51 |      * @param tableName  The tableName that the put will be sent to
 52 |      * @param f          The function that will turn the RDD values
 53 |      *                   into HBase Put objects.
 54 |      */
 55 |     def hbaseBulkPut(hc: HBaseContext,
 56 |                      tableName: TableName,
 57 |                      f: (T) => Put): Unit = {
 58 |       hc.bulkPut(rdd, tableName, f)
 59 |     }
 60 | 
 61 |     /**
 62 |      * Implicit method that gives easy access to HBaseContext's bulk
 63 |      * get.  This will return a new RDD.  Think about it as a RDD map
 64 |      * function.  In that every RDD value will get a new value out of
 65 |      * HBase.  That new value will populate the newly generated RDD.
 66 |      *
 67 |      * @param hc             The hbaseContext object to identify which
 68 |      *                       HBase cluster connection to use
 69 |      * @param tableName      The tableName that the put will be sent to
 70 |      * @param batchSize      How many gets to execute in a single batch
 71 |      * @param f              The function that will turn the RDD values
 72 |      *                       in HBase Get objects
 73 |      * @param convertResult  The function that will convert a HBase
 74 |      *                       Result object into a value that will go
 75 |      *                       into the resulting RDD
 76 |      * @tparam R             The type of Object that will be coming
 77 |      *                       out of the resulting RDD
 78 |      * @return               A resulting RDD with type R objects
 79 |      */
 80 |     def hbaseBulkGet[R: ClassTag](hc: HBaseContext,
 81 |                             tableName: TableName, batchSize:Int,
 82 |                             f: (T) => Get, convertResult: (Result) => R): RDD[R] = {
 83 |       hc.bulkGet[T, R](tableName, batchSize, rdd, f, convertResult)
 84 |     }
 85 | 
 86 |     /**
 87 |      * Implicit method that gives easy access to HBaseContext's bulk
 88 |      * get.  This will return a new RDD.  Think about it as a RDD map
 89 |      * function.  In that every RDD value will get a new value out of
 90 |      * HBase.  That new value will populate the newly generated RDD.
 91 |      *
 92 |      * @param hc             The hbaseContext object to identify which
 93 |      *                       HBase cluster connection to use
 94 |      * @param tableName      The tableName that the put will be sent to
 95 |      * @param batchSize      How many gets to execute in a single batch
 96 |      * @param f              The function that will turn the RDD values
 97 |      *                       in HBase Get objects
 98 |      * @return               A resulting RDD with type R objects
 99 |      */
100 |     def hbaseBulkGet(hc: HBaseContext,
101 |                                   tableName: TableName, batchSize:Int,
102 |                                   f: (T) => Get): RDD[(ImmutableBytesWritable, Result)] = {
103 |       hc.bulkGet[T, (ImmutableBytesWritable, Result)](tableName,
104 |         batchSize, rdd, f,
105 |         result => if (result != null && result.getRow != null) {
106 |           (new ImmutableBytesWritable(result.getRow), result)
107 |         } else {
108 |           null
109 |         })
110 |     }
111 | 
112 |     /**
113 |      * Implicit method that gives easy access to HBaseContext's bulk
114 |      * Delete.  This will not return a new RDD.
115 |      *
116 |      * @param hc         The hbaseContext object to identify which HBase
117 |      *                   cluster connection to use
118 |      * @param tableName  The tableName that the deletes will be sent to
119 |      * @param f          The function that will convert the RDD value into
120 |      *                   a HBase Delete Object
121 |      * @param batchSize  The number of Deletes to be sent in a single batch
122 |      */
123 |     def hbaseBulkDelete(hc: HBaseContext,
124 |                         tableName: TableName, f:(T) => Delete, batchSize:Int): Unit = {
125 |       hc.bulkDelete(rdd, tableName, f, batchSize)
126 |     }
127 | 
128 |     /**
129 |      * Implicit method that gives easy access to HBaseContext's
130 |      * foreachPartition method.  This will ack very much like a normal RDD
131 |      * foreach method but for the fact that you will now have a HBase connection
132 |      * while iterating through the values.
133 |      *
134 |      * @param hc  The hbaseContext object to identify which HBase
135 |      *            cluster connection to use
136 |      * @param f   This function will get an iterator for a Partition of an
137 |      *            RDD along with a connection object to HBase
138 |      */
139 |     def hbaseForeachPartition(hc: HBaseContext,
140 |                               f: (Iterator[T], Connection) => Unit): Unit = {
141 |       hc.foreachPartition(rdd, f)
142 |     }
143 | 
144 |     /**
145 |      * Implicit method that gives easy access to HBaseContext's
146 |      * mapPartitions method.  This will ask very much like a normal RDD
147 |      * map partitions method but for the fact that you will now have a
148 |      * HBase connection while iterating through the values
149 |      *
150 |      * @param hc  The hbaseContext object to identify which HBase
151 |      *            cluster connection to use
152 |      * @param f   This function will get an iterator for a Partition of an
153 |      *            RDD along with a connection object to HBase
154 |      * @tparam R  This is the type of objects that will go into the resulting
155 |      *            RDD
156 |      * @return    A resulting RDD of type R
157 |      */
158 |     def hbaseMapPartitions[R: ClassTag](hc: HBaseContext,
159 |                                         f: (Iterator[T], Connection) => Iterator[R]):
160 |     RDD[R] = {
161 |       hc.mapPartitions[T,R](rdd, f)
162 |     }
163 | 
164 |     /**
165 |      * Spark Implementation of HBase Bulk load for wide rows or when
166 |      * values are not already combined at the time of the map process
167 |      *
168 |      * A Spark Implementation of HBase Bulk load
169 |      *
170 |      * This will take the content from an existing RDD then sort and shuffle
171 |      * it with respect to region splits.  The result of that sort and shuffle
172 |      * will be written to HFiles.
173 |      *
174 |      * After this function is executed the user will have to call
175 |      * LoadIncrementalHFiles.doBulkLoad(...) to move the files into HBase
176 |      *
177 |      * Also note this version of bulk load is different from past versions in
178 |      * that it includes the qualifier as part of the sort process. The
179 |      * reason for this is to be able to support rows will very large number
180 |      * of columns.
181 |      *
182 |      * @param tableName                      The HBase table we are loading into
183 |      * @param flatMap                        A flapMap function that will make every row in the RDD
184 |      *                                       into N cells for the bulk load
185 |      * @param stagingDir                     The location on the FileSystem to bulk load into
186 |      * @param familyHFileWriteOptionsMap     Options that will define how the HFile for a
187 |      *                                       column family is written
188 |      * @param compactionExclude              Compaction excluded for the HFiles
189 |      * @param maxSize                        Max size for the HFiles before they roll
190 |      */
191 |     def hbaseBulkLoad(hc: HBaseContext,
192 |                          tableName: TableName,
193 |                          flatMap: (T) => Iterator[(KeyFamilyQualifier, Array[Byte])],
194 |                          stagingDir:String,
195 |                          familyHFileWriteOptionsMap:
196 |                          util.Map[Array[Byte], FamilyHFileWriteOptions] =
197 |                          new util.HashMap[Array[Byte], FamilyHFileWriteOptions](),
198 |                          compactionExclude: Boolean = false,
199 |                          maxSize:Long = HConstants.DEFAULT_MAX_FILE_SIZE):Unit = {
200 |       hc.bulkLoad(rdd, tableName,
201 |         flatMap, stagingDir, familyHFileWriteOptionsMap,
202 |         compactionExclude, maxSize)
203 |     }
204 | 
205 |     /**
206 |      * Implicit method that gives easy access to HBaseContext's
207 |      * bulkLoadThinRows method.
208 |      *
209 |      * Spark Implementation of HBase Bulk load for short rows some where less then
210 |      * a 1000 columns.  This bulk load should be faster for tables will thinner
211 |      * rows then the other spark implementation of bulk load that puts only one
212 |      * value into a record going into a shuffle
213 |      *
214 |      * This will take the content from an existing RDD then sort and shuffle
215 |      * it with respect to region splits.  The result of that sort and shuffle
216 |      * will be written to HFiles.
217 |      *
218 |      * After this function is executed the user will have to call
219 |      * LoadIncrementalHFiles.doBulkLoad(...) to move the files into HBase
220 |      *
221 |      * In this implementation only the rowKey is given to the shuffle as the key
222 |      * and all the columns are already linked to the RowKey before the shuffle
223 |      * stage.  The sorting of the qualifier is done in memory out side of the
224 |      * shuffle stage
225 |      *
226 |      * @param tableName                      The HBase table we are loading into
227 |      * @param mapFunction                    A function that will convert the RDD records to
228 |      *                                       the key value format used for the shuffle to prep
229 |      *                                       for writing to the bulk loaded HFiles
230 |      * @param stagingDir                     The location on the FileSystem to bulk load into
231 |      * @param familyHFileWriteOptionsMap     Options that will define how the HFile for a
232 |      *                                       column family is written
233 |      * @param compactionExclude              Compaction excluded for the HFiles
234 |      * @param maxSize                        Max size for the HFiles before they roll
235 |      */
236 |     def hbaseBulkLoadThinRows(hc: HBaseContext,
237 |                       tableName: TableName,
238 |                       mapFunction: (T) =>
239 |                         (ByteArrayWrapper, FamiliesQualifiersValues),
240 |                       stagingDir:String,
241 |                       familyHFileWriteOptionsMap:
242 |                       util.Map[Array[Byte], FamilyHFileWriteOptions] =
243 |                       new util.HashMap[Array[Byte], FamilyHFileWriteOptions](),
244 |                       compactionExclude: Boolean = false,
245 |                       maxSize:Long = HConstants.DEFAULT_MAX_FILE_SIZE):Unit = {
246 |       hc.bulkLoadThinRows(rdd, tableName,
247 |         mapFunction, stagingDir, familyHFileWriteOptionsMap,
248 |         compactionExclude, maxSize)
249 |     }
250 |   }
251 | }
252 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/hadoop/hbase/spark/JavaHBaseContext.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.apache.hadoop.hbase.spark
 19 | 
 20 | import org.apache.hadoop.conf.Configuration
 21 | import org.apache.hadoop.hbase.TableName
 22 | import org.apache.hadoop.hbase.client.{Connection, Delete, Get, Put, Result, Scan}
 23 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable
 24 | import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
 25 | import org.apache.spark.api.java.function.{FlatMapFunction, Function, VoidFunction}
 26 | import org.apache.spark.streaming.api.java.JavaDStream
 27 | 
 28 | import scala.collection.JavaConversions._
 29 | import scala.reflect.ClassTag
 30 | 
 31 | /**
 32 |  * This is the Java Wrapper over HBaseContext which is written in
 33 |  * Scala.  This class will be used by developers that want to
 34 |  * work with Spark or Spark Streaming in Java
 35 |  *
 36 |  * @param jsc    This is the JavaSparkContext that we will wrap
 37 |  * @param config This is the config information to out HBase cluster
 38 |  */
 39 | class JavaHBaseContext(@transient jsc: JavaSparkContext,
 40 |                        @transient config: Configuration) extends Serializable {
 41 |   val hbaseContext = new HBaseContext(jsc.sc, config)
 42 | 
 43 |   /**
 44 |    * A simple enrichment of the traditional Spark javaRdd foreachPartition.
 45 |    * This function differs from the original in that it offers the
 46 |    * developer access to a already connected HConnection object
 47 |    *
 48 |    * Note: Do not close the HConnection object.  All HConnection
 49 |    * management is handled outside this method
 50 |    *
 51 |    * @param javaRdd Original javaRdd with data to iterate over
 52 |    * @param f       Function to be given a iterator to iterate through
 53 |    *                the RDD values and a HConnection object to interact
 54 |    *                with HBase
 55 |    */
 56 |   def foreachPartition[T](javaRdd: JavaRDD[T],
 57 |                           f: VoidFunction[(java.util.Iterator[T], Connection)]) = {
 58 | 
 59 |     hbaseContext.foreachPartition(javaRdd.rdd,
 60 |       (it: Iterator[T], conn: Connection) => {
 61 |         f.call((it, conn))
 62 |       })
 63 |   }
 64 | 
 65 |   /**
 66 |    * A simple enrichment of the traditional Spark Streaming dStream foreach
 67 |    * This function differs from the original in that it offers the
 68 |    * developer access to a already connected HConnection object
 69 |    *
 70 |    * Note: Do not close the HConnection object.  All HConnection
 71 |    * management is handled outside this method
 72 |    *
 73 |    * @param javaDstream Original DStream with data to iterate over
 74 |    * @param f           Function to be given a iterator to iterate through
 75 |    *                    the JavaDStream values and a HConnection object to
 76 |    *                    interact with HBase
 77 |    */
 78 |   def foreachPartition[T](javaDstream: JavaDStream[T],
 79 |                           f: VoidFunction[(Iterator[T], Connection)]) = {
 80 |     hbaseContext.foreachPartition(javaDstream.dstream,
 81 |       (it: Iterator[T], conn: Connection) => f.call(it, conn))
 82 |   }
 83 | 
 84 |   /**
 85 |    * A simple enrichment of the traditional Spark JavaRDD mapPartition.
 86 |    * This function differs from the original in that it offers the
 87 |    * developer access to a already connected HConnection object
 88 |    *
 89 |    * Note: Do not close the HConnection object.  All HConnection
 90 |    * management is handled outside this method
 91 |    *
 92 |    * Note: Make sure to partition correctly to avoid memory issue when
 93 |    * getting data from HBase
 94 |    *
 95 |    * @param javaRdd Original JavaRdd with data to iterate over
 96 |    * @param f       Function to be given a iterator to iterate through
 97 |    *                the RDD values and a HConnection object to interact
 98 |    *                with HBase
 99 |    * @return        Returns a new RDD generated by the user definition
100 |    *                function just like normal mapPartition
101 |    */
102 |   def mapPartitions[T, R](javaRdd: JavaRDD[T],
103 |                           f: FlatMapFunction[(java.util.Iterator[T],
104 |                             Connection), R]): JavaRDD[R] = {
105 | 
106 |     def fn = (it: Iterator[T], conn: Connection) =>
107 |       asScalaIterator(
108 |         f.call((asJavaIterator(it), conn)).iterator()
109 |       )
110 | 
111 |     JavaRDD.fromRDD(hbaseContext.mapPartitions(javaRdd.rdd,
112 |       (iterator: Iterator[T], connection: Connection) =>
113 |         fn(iterator, connection))(fakeClassTag[R]))(fakeClassTag[R])
114 |   }
115 | 
116 |   /**
117 |    * A simple enrichment of the traditional Spark Streaming JavaDStream
118 |    * mapPartition.
119 |    *
120 |    * This function differs from the original in that it offers the
121 |    * developer access to a already connected HConnection object
122 |    *
123 |    * Note: Do not close the HConnection object.  All HConnection
124 |    * management is handled outside this method
125 |    *
126 |    * Note: Make sure to partition correctly to avoid memory issue when
127 |    * getting data from HBase
128 |    *
129 |    * @param javaDstream Original JavaDStream with data to iterate over
130 |    * @param mp          Function to be given a iterator to iterate through
131 |    *                    the JavaDStream values and a HConnection object to
132 |    *                    interact with HBase
133 |    * @return            Returns a new JavaDStream generated by the user
134 |    *                    definition function just like normal mapPartition
135 |    */
136 |   def streamMap[T, U](javaDstream: JavaDStream[T],
137 |                       mp: Function[(Iterator[T], Connection), Iterator[U]]):
138 |   JavaDStream[U] = {
139 |     JavaDStream.fromDStream(hbaseContext.streamMapPartitions(javaDstream.dstream,
140 |       (it: Iterator[T], conn: Connection) =>
141 |         mp.call(it, conn))(fakeClassTag[U]))(fakeClassTag[U])
142 |   }
143 | 
144 |   /**
145 |    * A simple abstraction over the HBaseContext.foreachPartition method.
146 |    *
147 |    * It allow addition support for a user to take JavaRDD
148 |    * and generate puts and send them to HBase.
149 |    * The complexity of managing the HConnection is
150 |    * removed from the developer
151 |    *
152 |    * @param javaRdd   Original JavaRDD with data to iterate over
153 |    * @param tableName The name of the table to put into
154 |    * @param f         Function to convert a value in the JavaRDD
155 |    *                  to a HBase Put
156 |    */
157 |   def bulkPut[T](javaRdd: JavaRDD[T],
158 |                  tableName: TableName,
159 |                  f: Function[(T), Put]) {
160 | 
161 |     hbaseContext.bulkPut(javaRdd.rdd, tableName, (t: T) => f.call(t))
162 |   }
163 | 
164 |   /**
165 |    * A simple abstraction over the HBaseContext.streamMapPartition method.
166 |    *
167 |    * It allow addition support for a user to take a JavaDStream and
168 |    * generate puts and send them to HBase.
169 |    *
170 |    * The complexity of managing the HConnection is
171 |    * removed from the developer
172 |    *
173 |    * @param javaDstream Original DStream with data to iterate over
174 |    * @param tableName   The name of the table to put into
175 |    * @param f           Function to convert a value in
176 |    *                    the JavaDStream to a HBase Put
177 |    */
178 |   def streamBulkPut[T](javaDstream: JavaDStream[T],
179 |                        tableName: TableName,
180 |                        f: Function[T, Put]) = {
181 |     hbaseContext.streamBulkPut(javaDstream.dstream,
182 |       tableName,
183 |       (t: T) => f.call(t))
184 |   }
185 | 
186 |   /**
187 |    * A simple abstraction over the HBaseContext.foreachPartition method.
188 |    *
189 |    * It allow addition support for a user to take a JavaRDD and
190 |    * generate delete and send them to HBase.
191 |    *
192 |    * The complexity of managing the HConnection is
193 |    * removed from the developer
194 |    *
195 |    * @param javaRdd   Original JavaRDD with data to iterate over
196 |    * @param tableName The name of the table to delete from
197 |    * @param f         Function to convert a value in the JavaRDD to a
198 |    *                  HBase Deletes
199 |    * @param batchSize The number of deletes to batch before sending to HBase
200 |    */
201 |   def bulkDelete[T](javaRdd: JavaRDD[T], tableName: TableName,
202 |                     f: Function[T, Delete], batchSize: Integer) {
203 |     hbaseContext.bulkDelete(javaRdd.rdd, tableName, (t: T) => f.call(t), batchSize)
204 |   }
205 | 
206 |   /**
207 |    * A simple abstraction over the HBaseContext.streamBulkMutation method.
208 |    *
209 |    * It allow addition support for a user to take a JavaDStream and
210 |    * generate Delete and send them to HBase.
211 |    *
212 |    * The complexity of managing the HConnection is
213 |    * removed from the developer
214 |    *
215 |    * @param javaDStream Original DStream with data to iterate over
216 |    * @param tableName   The name of the table to delete from
217 |    * @param f           Function to convert a value in the JavaDStream to a
218 |    *                    HBase Delete
219 |    * @param batchSize   The number of deletes to be sent at once
220 |    */
221 |   def streamBulkDelete[T](javaDStream: JavaDStream[T],
222 |                           tableName: TableName,
223 |                           f: Function[T, Delete],
224 |                           batchSize: Integer) = {
225 |     hbaseContext.streamBulkDelete(javaDStream.dstream, tableName,
226 |       (t: T) => f.call(t),
227 |       batchSize)
228 |   }
229 | 
230 |   /**
231 |    * A simple abstraction over the HBaseContext.mapPartition method.
232 |    *
233 |    * It allow addition support for a user to take a JavaRDD and generates a
234 |    * new RDD based on Gets and the results they bring back from HBase
235 |    *
236 |    * @param tableName     The name of the table to get from
237 |    * @param batchSize     batch size of how many gets to retrieve in a single fetch
238 |    * @param javaRdd       Original JavaRDD with data to iterate over
239 |    * @param makeGet       Function to convert a value in the JavaRDD to a
240 |    *                      HBase Get
241 |    * @param convertResult This will convert the HBase Result object to
242 |    *                      what ever the user wants to put in the resulting
243 |    *                      JavaRDD
244 |    * @return              New JavaRDD that is created by the Get to HBase
245 |    */
246 |   def bulkGet[T, U](tableName: TableName,
247 |                     batchSize: Integer,
248 |                     javaRdd: JavaRDD[T],
249 |                     makeGet: Function[T, Get],
250 |                     convertResult: Function[Result, U]): JavaRDD[U] = {
251 | 
252 |     JavaRDD.fromRDD(hbaseContext.bulkGet[T, U](tableName,
253 |       batchSize,
254 |       javaRdd.rdd,
255 |       (t: T) => makeGet.call(t),
256 |       (r: Result) => {
257 |         convertResult.call(r)
258 |       })(fakeClassTag[U]))(fakeClassTag[U])
259 | 
260 |   }
261 | 
262 |   /**
263 |    * A simple abstraction over the HBaseContext.streamMap method.
264 |    *
265 |    * It allow addition support for a user to take a DStream and
266 |    * generates a new DStream based on Gets and the results
267 |    * they bring back from HBase
268 |    *
269 | 
270 |    * @param tableName     The name of the table to get from
271 |    * @param batchSize     The number of gets to be batched together
272 |    * @param javaDStream   Original DStream with data to iterate over
273 |    * @param makeGet       Function to convert a value in the JavaDStream to a
274 |    *                      HBase Get
275 |    * @param convertResult This will convert the HBase Result object to
276 |    *                      what ever the user wants to put in the resulting
277 |    *                      JavaDStream
278 |    * @return              New JavaDStream that is created by the Get to HBase
279 |    */
280 |   def streamBulkGet[T, U](tableName: TableName,
281 |                           batchSize: Integer,
282 |                           javaDStream: JavaDStream[T],
283 |                           makeGet: Function[T, Get],
284 |                           convertResult: Function[Result, U]) {
285 |     JavaDStream.fromDStream(hbaseContext.streamBulkGet(tableName,
286 |       batchSize,
287 |       javaDStream.dstream,
288 |       (t: T) => makeGet.call(t),
289 |       (r: Result) => convertResult.call(r))(fakeClassTag[U]))(fakeClassTag[U])
290 |   }
291 | 
292 |   /**
293 |    * This function will use the native HBase TableInputFormat with the
294 |    * given scan object to generate a new JavaRDD
295 |    *
296 |    * @param tableName The name of the table to scan
297 |    * @param scans     The HBase scan object to use to read data from HBase
298 |    * @param f         Function to convert a Result object from HBase into
299 |    *                  What the user wants in the final generated JavaRDD
300 |    * @return          New JavaRDD with results from scan
301 |    */
302 |   def hbaseRDD[U](tableName: TableName,
303 |                   scans: Scan,
304 |                   f: Function[(ImmutableBytesWritable, Result), U]):
305 |   JavaRDD[U] = {
306 |     JavaRDD.fromRDD(
307 |       hbaseContext.hbaseRDD[U](tableName,
308 |         scans,
309 |         (v: (ImmutableBytesWritable, Result)) =>
310 |           f.call(v._1, v._2))(fakeClassTag[U]))(fakeClassTag[U])
311 |   }
312 | 
313 |   /**
314 |    * A overloaded version of HBaseContext hbaseRDD that define the
315 |    * type of the resulting JavaRDD
316 |    *
317 |    * @param tableName The name of the table to scan
318 |    * @param scans     The HBase scan object to use to read data from HBase
319 |    * @return          New JavaRDD with results from scan
320 |    */
321 |   def hbaseRDD(tableName: TableName,
322 |                scans: Scan):
323 |   JavaRDD[(ImmutableBytesWritable, Result)] = {
324 |     JavaRDD.fromRDD(hbaseContext.hbaseRDD(tableName, scans))
325 |   }
326 | 
327 |   /**
328 |    * Produces a ClassTag[T], which is actually just a casted ClassTag[AnyRef].
329 |    *
330 |    * This method is used to keep ClassTags out of the external Java API, as the Java compiler
331 |    * cannot produce them automatically. While this ClassTag-faking does please the compiler,
332 |    * it can cause problems at runtime if the Scala API relies on ClassTags for correctness.
333 |    *
334 |    * Often, though, a ClassTag[AnyRef] will not lead to incorrect behavior,
335 |    * just worse performance or security issues.
336 |    * For instance, an Array[AnyRef] can hold any type T,
337 |    * but may lose primitive
338 |    * specialization.
339 |    */
340 |   private[spark]
341 |   def fakeClassTag[T]: ClassTag[T] = ClassTag.AnyRef.asInstanceOf[ClassTag[T]]
342 | }
343 | 


--------------------------------------------------------------------------------
/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
  1 | eclipse.preferences.version=1
  2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
  3 | org.eclipse.jdt.core.compiler.compliance=1.5
  4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
  5 | org.eclipse.jdt.core.compiler.source=1.5
  6 | org.eclipse.jdt.core.formatter.align_type_members_on_columns=false
  7 | org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16
  8 | org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation=0
  9 | org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16
 10 | org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call=16
 11 | org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation=16
 12 | org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression=16
 13 | org.eclipse.jdt.core.formatter.alignment_for_assignment=0
 14 | org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16
 15 | org.eclipse.jdt.core.formatter.alignment_for_compact_if=16
 16 | org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=80
 17 | org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0
 18 | org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16
 19 | org.eclipse.jdt.core.formatter.alignment_for_method_declaration=0
 20 | org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16
 21 | org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration=16
 22 | org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration=16
 23 | org.eclipse.jdt.core.formatter.alignment_for_resources_in_try=80
 24 | org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation=16
 25 | org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration=16
 26 | org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=16
 27 | org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration=16
 28 | org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration=16
 29 | org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration=16
 30 | org.eclipse.jdt.core.formatter.alignment_for_union_type_in_multicatch=16
 31 | org.eclipse.jdt.core.formatter.blank_lines_after_imports=1
 32 | org.eclipse.jdt.core.formatter.blank_lines_after_package=1
 33 | org.eclipse.jdt.core.formatter.blank_lines_before_field=0
 34 | org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration=0
 35 | org.eclipse.jdt.core.formatter.blank_lines_before_imports=1
 36 | org.eclipse.jdt.core.formatter.blank_lines_before_member_type=1
 37 | org.eclipse.jdt.core.formatter.blank_lines_before_method=1
 38 | org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk=1
 39 | org.eclipse.jdt.core.formatter.blank_lines_before_package=0
 40 | org.eclipse.jdt.core.formatter.blank_lines_between_import_groups=1
 41 | org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations=1
 42 | org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration=end_of_line
 43 | org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration=end_of_line
 44 | org.eclipse.jdt.core.formatter.brace_position_for_array_initializer=end_of_line
 45 | org.eclipse.jdt.core.formatter.brace_position_for_block=end_of_line
 46 | org.eclipse.jdt.core.formatter.brace_position_for_block_in_case=end_of_line
 47 | org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration=end_of_line
 48 | org.eclipse.jdt.core.formatter.brace_position_for_enum_constant=end_of_line
 49 | org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration=end_of_line
 50 | org.eclipse.jdt.core.formatter.brace_position_for_method_declaration=end_of_line
 51 | org.eclipse.jdt.core.formatter.brace_position_for_switch=end_of_line
 52 | org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=end_of_line
 53 | org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false
 54 | org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false
 55 | org.eclipse.jdt.core.formatter.comment.format_block_comments=true
 56 | org.eclipse.jdt.core.formatter.comment.format_header=false
 57 | org.eclipse.jdt.core.formatter.comment.format_html=true
 58 | org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true
 59 | org.eclipse.jdt.core.formatter.comment.format_line_comments=true
 60 | org.eclipse.jdt.core.formatter.comment.format_source_code=true
 61 | org.eclipse.jdt.core.formatter.comment.indent_parameter_description=true
 62 | org.eclipse.jdt.core.formatter.comment.indent_root_tags=true
 63 | org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags=insert
 64 | org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=insert
 65 | org.eclipse.jdt.core.formatter.comment.line_length=80
 66 | org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries=true
 67 | org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries=true
 68 | org.eclipse.jdt.core.formatter.comment.preserve_white_space_between_code_and_line_comments=false
 69 | org.eclipse.jdt.core.formatter.compact_else_if=true
 70 | org.eclipse.jdt.core.formatter.continuation_indentation=2
 71 | org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer=2
 72 | org.eclipse.jdt.core.formatter.disabling_tag=@formatter\:off
 73 | org.eclipse.jdt.core.formatter.enabling_tag=@formatter\:on
 74 | org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line=false
 75 | org.eclipse.jdt.core.formatter.format_line_comment_starting_on_first_column=true
 76 | org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header=true
 77 | org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header=true
 78 | org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header=true
 79 | org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header=true
 80 | org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases=true
 81 | org.eclipse.jdt.core.formatter.indent_empty_lines=false
 82 | org.eclipse.jdt.core.formatter.indent_statements_compare_to_block=true
 83 | org.eclipse.jdt.core.formatter.indent_statements_compare_to_body=true
 84 | org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases=true
 85 | org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch=false
 86 | org.eclipse.jdt.core.formatter.indentation.size=2
 87 | org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_field=insert
 88 | org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable=insert
 89 | org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_method=insert
 90 | org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_package=insert
 91 | org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter=do not insert
 92 | org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_type=insert
 93 | org.eclipse.jdt.core.formatter.insert_new_line_after_label=do not insert
 94 | org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer=do not insert
 95 | org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing=do not insert
 96 | org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement=do not insert
 97 | org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer=do not insert
 98 | org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement=do not insert
 99 | org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement=do not insert
100 | org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement=do not insert
101 | org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration=insert
102 | org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration=insert
103 | org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block=insert
104 | org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant=insert
105 | org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration=insert
106 | org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body=insert
107 | org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration=insert
108 | org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter=insert
109 | org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator=insert
110 | org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation=do not insert
111 | org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration=do not insert
112 | org.eclipse.jdt.core.formatter.insert_space_after_binary_operator=insert
113 | org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments=insert
114 | org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters=insert
115 | org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block=insert
116 | org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast=insert
117 | org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert=insert
118 | org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case=insert
119 | org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional=insert
120 | org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for=insert
121 | org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement=insert
122 | org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression=insert
123 | org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation=insert
124 | org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer=insert
125 | org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters=insert
126 | org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws=insert
127 | org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments=insert
128 | org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations=insert
129 | org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments=insert
130 | org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments=insert
131 | org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits=insert
132 | org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters=insert
133 | org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws=insert
134 | org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments=insert
135 | org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations=insert
136 | org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations=insert
137 | org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference=insert
138 | org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces=insert
139 | org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments=insert
140 | org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters=insert
141 | org.eclipse.jdt.core.formatter.insert_space_after_ellipsis=insert
142 | org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference=do not insert
143 | org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments=do not insert
144 | org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters=do not insert
145 | org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer=insert
146 | org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression=do not insert
147 | org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference=do not insert
148 | org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation=do not insert
149 | org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast=do not insert
150 | org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch=do not insert
151 | org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration=do not insert
152 | org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant=do not insert
153 | org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for=do not insert
154 | org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if=do not insert
155 | org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration=do not insert
156 | org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation=do not insert
157 | org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression=do not insert
158 | org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch=do not insert
159 | org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized=do not insert
160 | org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_try=do not insert
161 | org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while=do not insert
162 | org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator=do not insert
163 | org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator=do not insert
164 | org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional=insert
165 | org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard=do not insert
166 | org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for=insert
167 | org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_try_resources=insert
168 | org.eclipse.jdt.core.formatter.insert_space_after_unary_operator=do not insert
169 | org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter=insert
170 | org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator=insert
171 | org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration=insert
172 | org.eclipse.jdt.core.formatter.insert_space_before_binary_operator=insert
173 | org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference=do not insert
174 | org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments=do not insert
175 | org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters=do not insert
176 | org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer=insert
177 | org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression=do not insert
178 | org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference=do not insert
179 | org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation=do not insert
180 | org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast=do not insert
181 | org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch=do not insert
182 | org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration=do not insert
183 | org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant=do not insert
184 | org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for=do not insert
185 | org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if=do not insert
186 | org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration=do not insert
187 | org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation=do not insert
188 | org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression=do not insert
189 | org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch=do not insert
190 | org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized=do not insert
191 | org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_try=do not insert
192 | org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while=do not insert
193 | org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert=insert
194 | org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case=do not insert
195 | org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional=insert
196 | org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default=do not insert
197 | org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for=insert
198 | org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement=do not insert
199 | org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression=do not insert
200 | org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation=do not insert
201 | org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer=do not insert
202 | org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters=do not insert
203 | org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws=do not insert
204 | org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments=do not insert
205 | org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations=do not insert
206 | org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments=do not insert
207 | org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments=do not insert
208 | org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits=do not insert
209 | org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters=do not insert
210 | org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws=do not insert
211 | org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments=do not insert
212 | org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations=do not insert
213 | org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations=do not insert
214 | org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference=do not insert
215 | org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces=do not insert
216 | org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments=do not insert
217 | org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters=do not insert
218 | org.eclipse.jdt.core.formatter.insert_space_before_ellipsis=do not insert
219 | org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference=do not insert
220 | org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments=do not insert
221 | org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters=do not insert
222 | org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration=insert
223 | org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration=insert
224 | org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer=insert
225 | org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block=insert
226 | org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration=insert
227 | org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant=insert
228 | org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration=insert
229 | org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration=insert
230 | org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch=insert
231 | org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration=insert
232 | org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression=do not insert
233 | org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference=do not insert
234 | org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference=do not insert
235 | org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation=do not insert
236 | org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration=do not insert
237 | org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch=insert
238 | org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration=do not insert
239 | org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant=do not insert
240 | org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for=insert
241 | org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if=insert
242 | org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration=do not insert
243 | org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation=do not insert
244 | org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression=do not insert
245 | org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch=insert
246 | org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized=insert
247 | org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_try=insert
248 | org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while=insert
249 | org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return=insert
250 | org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw=insert
251 | org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator=do not insert
252 | org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator=do not insert
253 | org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional=insert
254 | org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard=do not insert
255 | org.eclipse.jdt.core.formatter.insert_space_before_semicolon=do not insert
256 | org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for=do not insert
257 | org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_try_resources=do not insert
258 | org.eclipse.jdt.core.formatter.insert_space_before_unary_operator=do not insert
259 | org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference=do not insert
260 | org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer=do not insert
261 | org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression=do not insert
262 | org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration=do not insert
263 | org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration=do not insert
264 | org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant=do not insert
265 | org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration=do not insert
266 | org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert
267 | org.eclipse.jdt.core.formatter.join_lines_in_comments=true
268 | org.eclipse.jdt.core.formatter.join_wrapped_lines=true
269 | org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false
270 | org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false
271 | org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false
272 | org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false
273 | org.eclipse.jdt.core.formatter.lineSplit=80
274 | org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false
275 | org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false
276 | org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body=0
277 | org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve=1
278 | org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line=true
279 | org.eclipse.jdt.core.formatter.tabulation.char=space
280 | org.eclipse.jdt.core.formatter.tabulation.size=2
281 | org.eclipse.jdt.core.formatter.use_on_off_tags=false
282 | org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=false
283 | org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true
284 | org.eclipse.jdt.core.formatter.wrap_before_or_operator_multicatch=true
285 | org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested=true
286 | 


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
  3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  5 |     <modelVersion>4.0.0</modelVersion>
  6 | 
  7 |     <groupId>com.cloudera.sa.sparkonhbase</groupId>
  8 |     <artifactId>patientproject</artifactId>
  9 |     <version>1.0-SNAPSHOT</version>
 10 | 
 11 |     <properties>
 12 |         <maven.compiler.target>1.7</maven.compiler.target>
 13 |         <maven.compiler.source>1.7</maven.compiler.source>
 14 |         <spark.version>1.5.0-cdh5.5.0</spark.version>
 15 |         <hbase.version>1.0.0-cdh5.5.0</hbase.version>
 16 |         <hadoop.version>2.6.0-cdh5.5.0</hadoop.version>
 17 |         <kafka.version>0.8.2.0-kafka-1.4.0</kafka.version>
 18 |         <flume.version>1.6.0-cdh5.5.0</flume.version>
 19 |         <parquet.version>1.5.0-cdh5.5.0</parquet.version>
 20 |     </properties>
 21 | 
 22 |     <dependencies>
 23 |         <dependency>
 24 |             <groupId>org.joda</groupId>
 25 |             <artifactId>joda-convert</artifactId>
 26 |             <version>1.8</version>
 27 |         </dependency>
 28 |         <dependency>
 29 |             <groupId>junit</groupId>
 30 |             <artifactId>junit</artifactId>
 31 |             <version>4.11</version>
 32 |             <scope>test</scope>
 33 |         </dependency>
 34 |         <dependency>
 35 |             <groupId>org.scala-lang</groupId>
 36 |             <artifactId>scala-compiler</artifactId>
 37 |             <version>2.10.4</version>
 38 |         </dependency>
 39 |         <dependency>
 40 |             <groupId>org.scalatest</groupId>
 41 |             <artifactId>scalatest_2.10</artifactId>
 42 |             <version>2.1.5</version>
 43 |         </dependency>
 44 |         <dependency>
 45 |             <groupId>org.apache.spark</groupId>
 46 |             <artifactId>spark-core_2.10</artifactId>
 47 |             <version>${spark.version}</version>
 48 |         </dependency>
 49 |         <dependency>
 50 |             <groupId>org.apache.spark</groupId>
 51 |             <artifactId>spark-sql_2.10</artifactId>
 52 |             <version>${spark.version}</version>
 53 |         </dependency>
 54 |         <dependency>
 55 |             <groupId>org.apache.spark</groupId>
 56 |             <artifactId>spark-hive_2.10</artifactId>
 57 |             <version>${spark.version}</version>
 58 |         </dependency>
 59 |         <dependency>
 60 |             <groupId>org.apache.spark</groupId>
 61 |             <artifactId>spark-streaming_2.10</artifactId>
 62 |             <version>${spark.version}</version>
 63 |         </dependency>
 64 |         <dependency>
 65 |             <groupId>org.apache.spark</groupId>
 66 |             <artifactId>spark-streaming_2.10</artifactId>
 67 |             <version>${spark.version}</version>
 68 |             <type>test-jar</type>
 69 |             <classifier>tests</classifier>
 70 |             <scope>test</scope>
 71 |         </dependency>
 72 |         <dependency>
 73 |             <groupId>org.apache.hbase</groupId>
 74 |             <artifactId>hbase-server</artifactId>
 75 |             <version>${hbase.version}</version>
 76 |         </dependency>
 77 |         <dependency>
 78 |             <groupId>org.apache.hbase</groupId>
 79 |             <artifactId>hbase-server</artifactId>
 80 |             <version>${hbase.version}</version>
 81 |             <type>test-jar</type>
 82 |         </dependency>
 83 |         <dependency>
 84 |             <groupId>org.apache.hadoop</groupId>
 85 |             <artifactId>hadoop-client</artifactId>
 86 |             <version>${hadoop.version}</version>
 87 |             <exclusions>
 88 |                 <exclusion>
 89 |                     <groupId>log4j</groupId>
 90 |                     <artifactId>log4j</artifactId>
 91 |                 </exclusion>
 92 |                 <exclusion>
 93 |                     <groupId>javax.servlet</groupId>
 94 |                     <artifactId>servlet-api</artifactId>
 95 |                 </exclusion>
 96 |                 <exclusion>
 97 |                     <groupId>javax.servlet.jsp</groupId>
 98 |                     <artifactId>jsp-api</artifactId>
 99 |                 </exclusion>
100 |                 <exclusion>
101 |                     <groupId>org.jruby</groupId>
102 |                     <artifactId>jruby-complete</artifactId>
103 |                 </exclusion>
104 |                 <exclusion>
105 |                     <groupId>org.jboss.netty</groupId>
106 |                     <artifactId>netty</artifactId>
107 |                 </exclusion>
108 |                 <exclusion>
109 |                     <groupId>io.netty</groupId>
110 |                     <artifactId>netty</artifactId>
111 |                 </exclusion>
112 |             </exclusions>
113 |         </dependency>
114 |         <dependency>
115 |             <groupId>org.apache.hadoop</groupId>
116 |             <artifactId>hadoop-common</artifactId>
117 |             <version>${hadoop.version}</version>
118 |             <exclusions>
119 |                 <exclusion>
120 |                     <groupId>log4j</groupId>
121 |                     <artifactId>log4j</artifactId>
122 |                 </exclusion>
123 |                 <exclusion>
124 |                     <groupId>javax.servlet</groupId>
125 |                     <artifactId>servlet-api</artifactId>
126 |                 </exclusion>
127 |                 <exclusion>
128 |                     <groupId>javax.servlet.jsp</groupId>
129 |                     <artifactId>jsp-api</artifactId>
130 |                 </exclusion>
131 |                 <exclusion>
132 |                     <groupId>org.jruby</groupId>
133 |                     <artifactId>jruby-complete</artifactId>
134 |                 </exclusion>
135 |                 <exclusion>
136 |                     <groupId>org.jboss.netty</groupId>
137 |                     <artifactId>netty</artifactId>
138 |                 </exclusion>
139 |                 <exclusion>
140 |                     <groupId>io.netty</groupId>
141 |                     <artifactId>netty</artifactId>
142 |                 </exclusion>
143 |             </exclusions>
144 |         </dependency>
145 | 
146 |         <dependency>
147 |             <groupId>org.apache.hadoop</groupId>
148 |             <artifactId>hadoop-common</artifactId>
149 |             <version>${hadoop.version}</version>
150 |             <type>test-jar</type>
151 |             <scope>test</scope>
152 |             <exclusions>
153 |                 <exclusion>
154 |                     <groupId>log4j</groupId>
155 |                     <artifactId>log4j</artifactId>
156 |                 </exclusion>
157 |                 <exclusion>
158 |                     <groupId>javax.servlet</groupId>
159 |                     <artifactId>servlet-api</artifactId>
160 |                 </exclusion>
161 |                 <exclusion>
162 |                     <groupId>javax.servlet.jsp</groupId>
163 |                     <artifactId>jsp-api</artifactId>
164 |                 </exclusion>
165 |                 <exclusion>
166 |                     <groupId>org.jruby</groupId>
167 |                     <artifactId>jruby-complete</artifactId>
168 |                 </exclusion>
169 |                 <exclusion>
170 |                     <groupId>org.jboss.netty</groupId>
171 |                     <artifactId>netty</artifactId>
172 |                 </exclusion>
173 |                 <exclusion>
174 |                     <groupId>io.netty</groupId>
175 |                     <artifactId>netty</artifactId>
176 |                 </exclusion>
177 |             </exclusions>
178 |         </dependency>
179 | 
180 |         <dependency>
181 |             <groupId>org.apache.hadoop</groupId>
182 |             <artifactId>hadoop-hdfs</artifactId>
183 |             <version>${hadoop.version}</version>
184 |             <type>test-jar</type>
185 |             <scope>test</scope>
186 |             <exclusions>
187 |                 <exclusion>
188 |                     <groupId>log4j</groupId>
189 |                     <artifactId>log4j</artifactId>
190 |                 </exclusion>
191 |                 <exclusion>
192 |                     <groupId>javax.servlet</groupId>
193 |                     <artifactId>servlet-api</artifactId>
194 |                 </exclusion>
195 |                 <exclusion>
196 |                     <groupId>javax.servlet.jsp</groupId>
197 |                     <artifactId>jsp-api</artifactId>
198 |                 </exclusion>
199 |                 <exclusion>
200 |                     <groupId>org.jruby</groupId>
201 |                     <artifactId>jruby-complete</artifactId>
202 |                 </exclusion>
203 |                 <exclusion>
204 |                     <groupId>org.jboss.netty</groupId>
205 |                     <artifactId>netty</artifactId>
206 |                 </exclusion>
207 |                 <exclusion>
208 |                     <groupId>io.netty</groupId>
209 |                     <artifactId>netty</artifactId>
210 |                 </exclusion>
211 |             </exclusions>
212 |         </dependency>
213 | 
214 |         <dependency>
215 |             <groupId>org.apache.hbase</groupId>
216 |             <artifactId>hbase-client</artifactId>
217 |             <version>${hbase.version}</version>
218 |             <exclusions>
219 |                 <exclusion>
220 |                     <groupId>log4j</groupId>
221 |                     <artifactId>log4j</artifactId>
222 |                 </exclusion>
223 |                 <exclusion>
224 |                     <groupId>org.apache.thrift</groupId>
225 |                     <artifactId>thrift</artifactId>
226 |                 </exclusion>
227 |                 <exclusion>
228 |                     <groupId>org.jruby</groupId>
229 |                     <artifactId>jruby-complete</artifactId>
230 |                 </exclusion>
231 |                 <exclusion>
232 |                     <groupId>org.slf4j</groupId>
233 |                     <artifactId>slf4j-log4j12</artifactId>
234 |                 </exclusion>
235 |                 <exclusion>
236 |                     <groupId>org.mortbay.jetty</groupId>
237 |                     <artifactId>jsp-2.1</artifactId>
238 |                 </exclusion>
239 |                 <exclusion>
240 |                     <groupId>org.mortbay.jetty</groupId>
241 |                     <artifactId>jsp-api-2.1</artifactId>
242 |                 </exclusion>
243 |                 <exclusion>
244 |                     <groupId>org.mortbay.jetty</groupId>
245 |                     <artifactId>servlet-api-2.5</artifactId>
246 |                 </exclusion>
247 |                 <exclusion>
248 |                     <groupId>com.sun.jersey</groupId>
249 |                     <artifactId>jersey-core</artifactId>
250 |                 </exclusion>
251 |                 <exclusion>
252 |                     <groupId>com.sun.jersey</groupId>
253 |                     <artifactId>jersey-json</artifactId>
254 |                 </exclusion>
255 |                 <exclusion>
256 |                     <groupId>com.sun.jersey</groupId>
257 |                     <artifactId>jersey-server</artifactId>
258 |                 </exclusion>
259 |                 <exclusion>
260 |                     <groupId>org.mortbay.jetty</groupId>
261 |                     <artifactId>jetty</artifactId>
262 |                 </exclusion>
263 |                 <exclusion>
264 |                     <groupId>org.mortbay.jetty</groupId>
265 |                     <artifactId>jetty-util</artifactId>
266 |                 </exclusion>
267 |                 <exclusion>
268 |                     <groupId>tomcat</groupId>
269 |                     <artifactId>jasper-runtime</artifactId>
270 |                 </exclusion>
271 |                 <exclusion>
272 |                     <groupId>tomcat</groupId>
273 |                     <artifactId>jasper-compiler</artifactId>
274 |                 </exclusion>
275 |                 <exclusion>
276 |                     <groupId>org.jruby</groupId>
277 |                     <artifactId>jruby-complete</artifactId>
278 |                 </exclusion>
279 |                 <exclusion>
280 |                     <groupId>org.jboss.netty</groupId>
281 |                     <artifactId>netty</artifactId>
282 |                 </exclusion>
283 |                 <exclusion>
284 |                     <groupId>io.netty</groupId>
285 |                     <artifactId>netty</artifactId>
286 |                 </exclusion>
287 |             </exclusions>
288 |         </dependency>
289 | 
290 |         <dependency>
291 |             <groupId>org.apache.hbase</groupId>
292 |             <artifactId>hbase-protocol</artifactId>
293 |             <version>${hbase.version}</version>
294 |         </dependency>
295 | 
296 |         <dependency>
297 |             <groupId>org.apache.hbase</groupId>
298 |             <artifactId>hbase-annotations</artifactId>
299 |             <version>${hbase.version}</version>
300 |             <type>test-jar</type>
301 |             <scope>test</scope>
302 |         </dependency>
303 | 
304 |         <dependency>
305 |             <groupId>org.apache.hbase</groupId>
306 |             <artifactId>hbase-hadoop-compat</artifactId>
307 |             <version>${hbase.version}</version>
308 |             <scope>test</scope>
309 |             <type>test-jar</type>
310 |             <exclusions>
311 |                 <exclusion>
312 |                     <groupId>log4j</groupId>
313 |                     <artifactId>log4j</artifactId>
314 |                 </exclusion>
315 |                 <exclusion>
316 |                     <groupId>org.apache.thrift</groupId>
317 |                     <artifactId>thrift</artifactId>
318 |                 </exclusion>
319 |                 <exclusion>
320 |                     <groupId>org.jruby</groupId>
321 |                     <artifactId>jruby-complete</artifactId>
322 |                 </exclusion>
323 |                 <exclusion>
324 |                     <groupId>org.slf4j</groupId>
325 |                     <artifactId>slf4j-log4j12</artifactId>
326 |                 </exclusion>
327 |                 <exclusion>
328 |                     <groupId>org.mortbay.jetty</groupId>
329 |                     <artifactId>jsp-2.1</artifactId>
330 |                 </exclusion>
331 |                 <exclusion>
332 |                     <groupId>org.mortbay.jetty</groupId>
333 |                     <artifactId>jsp-api-2.1</artifactId>
334 |                 </exclusion>
335 |                 <exclusion>
336 |                     <groupId>org.mortbay.jetty</groupId>
337 |                     <artifactId>servlet-api-2.5</artifactId>
338 |                 </exclusion>
339 |                 <exclusion>
340 |                     <groupId>com.sun.jersey</groupId>
341 |                     <artifactId>jersey-core</artifactId>
342 |                 </exclusion>
343 |                 <exclusion>
344 |                     <groupId>com.sun.jersey</groupId>
345 |                     <artifactId>jersey-json</artifactId>
346 |                 </exclusion>
347 |                 <exclusion>
348 |                     <groupId>com.sun.jersey</groupId>
349 |                     <artifactId>jersey-server</artifactId>
350 |                 </exclusion>
351 |                 <exclusion>
352 |                     <groupId>org.mortbay.jetty</groupId>
353 |                     <artifactId>jetty</artifactId>
354 |                 </exclusion>
355 |                 <exclusion>
356 |                     <groupId>org.mortbay.jetty</groupId>
357 |                     <artifactId>jetty-util</artifactId>
358 |                 </exclusion>
359 |                 <exclusion>
360 |                     <groupId>tomcat</groupId>
361 |                     <artifactId>jasper-runtime</artifactId>
362 |                 </exclusion>
363 |                 <exclusion>
364 |                     <groupId>tomcat</groupId>
365 |                     <artifactId>jasper-compiler</artifactId>
366 |                 </exclusion>
367 |                 <exclusion>
368 |                     <groupId>org.jruby</groupId>
369 |                     <artifactId>jruby-complete</artifactId>
370 |                 </exclusion>
371 |                 <exclusion>
372 |                     <groupId>org.jboss.netty</groupId>
373 |                     <artifactId>netty</artifactId>
374 |                 </exclusion>
375 |                 <exclusion>
376 |                     <groupId>io.netty</groupId>
377 |                     <artifactId>netty</artifactId>
378 |                 </exclusion>
379 |             </exclusions>
380 |         </dependency>
381 | 
382 |         <dependency>
383 |             <groupId>org.apache.hbase</groupId>
384 |             <artifactId>hbase-hadoop2-compat</artifactId>
385 |             <version>${hbase.version}</version>
386 |             <scope>test</scope>
387 |             <type>test-jar</type>
388 |             <exclusions>
389 |                 <exclusion>
390 |                     <groupId>log4j</groupId>
391 |                     <artifactId>log4j</artifactId>
392 |                 </exclusion>
393 |                 <exclusion>
394 |                     <groupId>org.apache.thrift</groupId>
395 |                     <artifactId>thrift</artifactId>
396 |                 </exclusion>
397 |                 <exclusion>
398 |                     <groupId>org.jruby</groupId>
399 |                     <artifactId>jruby-complete</artifactId>
400 |                 </exclusion>
401 |                 <exclusion>
402 |                     <groupId>org.slf4j</groupId>
403 |                     <artifactId>slf4j-log4j12</artifactId>
404 |                 </exclusion>
405 |                 <exclusion>
406 |                     <groupId>org.mortbay.jetty</groupId>
407 |                     <artifactId>jsp-2.1</artifactId>
408 |                 </exclusion>
409 |                 <exclusion>
410 |                     <groupId>org.mortbay.jetty</groupId>
411 |                     <artifactId>jsp-api-2.1</artifactId>
412 |                 </exclusion>
413 |                 <exclusion>
414 |                     <groupId>org.mortbay.jetty</groupId>
415 |                     <artifactId>servlet-api-2.5</artifactId>
416 |                 </exclusion>
417 |                 <exclusion>
418 |                     <groupId>com.sun.jersey</groupId>
419 |                     <artifactId>jersey-core</artifactId>
420 |                 </exclusion>
421 |                 <exclusion>
422 |                     <groupId>com.sun.jersey</groupId>
423 |                     <artifactId>jersey-json</artifactId>
424 |                 </exclusion>
425 |                 <exclusion>
426 |                     <groupId>com.sun.jersey</groupId>
427 |                     <artifactId>jersey-server</artifactId>
428 |                 </exclusion>
429 |                 <exclusion>
430 |                     <groupId>org.mortbay.jetty</groupId>
431 |                     <artifactId>jetty</artifactId>
432 |                 </exclusion>
433 |                 <exclusion>
434 |                     <groupId>org.mortbay.jetty</groupId>
435 |                     <artifactId>jetty-util</artifactId>
436 |                 </exclusion>
437 |                 <exclusion>
438 |                     <groupId>tomcat</groupId>
439 |                     <artifactId>jasper-runtime</artifactId>
440 |                 </exclusion>
441 |                 <exclusion>
442 |                     <groupId>tomcat</groupId>
443 |                     <artifactId>jasper-compiler</artifactId>
444 |                 </exclusion>
445 |                 <exclusion>
446 |                     <groupId>org.jruby</groupId>
447 |                     <artifactId>jruby-complete</artifactId>
448 |                 </exclusion>
449 |                 <exclusion>
450 |                     <groupId>org.jboss.netty</groupId>
451 |                     <artifactId>netty</artifactId>
452 |                 </exclusion>
453 |                 <exclusion>
454 |                     <groupId>io.netty</groupId>
455 |                     <artifactId>netty</artifactId>
456 |                 </exclusion>
457 |             </exclusions>
458 |         </dependency>
459 |         <dependency>
460 |             <groupId>com.google.protobuf</groupId>
461 |             <artifactId>protobuf-java</artifactId>
462 |             <version>2.5.0</version>
463 |         </dependency>
464 |         <dependency>
465 |             <groupId>commons-logging</groupId>
466 |             <artifactId>commons-logging</artifactId>
467 |             <version>1.2</version>
468 |         </dependency>
469 |         <dependency>
470 |             <groupId>org.kohsuke</groupId>
471 |             <artifactId>wordnet-random-name</artifactId>
472 |             <version>1.3</version>
473 |         </dependency>
474 |         <dependency>
475 |             <groupId>org.apache.kafka</groupId>
476 |             <artifactId>kafka-clients</artifactId>
477 |             <version>${kafka.version}</version>
478 |         </dependency>
479 |         <dependency>
480 |             <groupId>org.apache.flume.flume-ng-sinks</groupId>
481 |             <artifactId>flume-ng-hbase-sink</artifactId>
482 |             <version>${flume.version}</version>
483 |         </dependency>
484 | 
485 |         <dependency>
486 |             <groupId>com.twitter</groupId>
487 |             <artifactId>parquet-common</artifactId>
488 |             <version>${parquet.version}</version>
489 |         </dependency>
490 |         <dependency>
491 |             <groupId>com.twitter</groupId>
492 |             <artifactId>parquet-encoding</artifactId>
493 |             <version>${parquet.version}</version>
494 |         </dependency>
495 |         <dependency>
496 |             <groupId>com.twitter</groupId>
497 |             <artifactId>parquet-column</artifactId>
498 |             <version>${parquet.version}</version>
499 |         </dependency>
500 |         <dependency>
501 |             <groupId>com.twitter</groupId>
502 |             <artifactId>parquet-hadoop</artifactId>
503 |             <version>${parquet.version}</version>
504 |         </dependency>
505 |         <dependency>
506 |             <groupId>org.reflections</groupId>
507 |             <artifactId>reflections</artifactId>
508 |             <version>0.9.10</version>
509 |         </dependency>
510 |     </dependencies>
511 | 
512 |     <repositories>
513 |         <repository>
514 |             <id>cloudera</id>
515 |             <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
516 |         </repository>
517 |     </repositories>
518 | 
519 |     <pluginRepositories>
520 |         <pluginRepository>
521 |             <id>cloudera</id>
522 |             <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
523 |         </pluginRepository>
524 |     </pluginRepositories>
525 | 
526 |     <build>
527 |         <outputDirectory>target/scala/classes</outputDirectory>
528 |         <testOutputDirectory>target/scala/test-classes</testOutputDirectory>
529 |         <plugins>
530 |             <plugin>
531 |                 <groupId>org.apache.maven.plugins</groupId>
532 |                 <artifactId>maven-compiler-plugin</artifactId>
533 |                 <configuration>
534 |                     <source>1.7</source>
535 |                     <target>1.7</target>
536 |                 </configuration>
537 |             </plugin>
538 |             <plugin>
539 |                 <groupId>org.apache.maven.plugins</groupId>
540 |                 <artifactId>maven-surefire-plugin</artifactId>
541 |                 <version>2.17</version>
542 |                 <dependencies>
543 |                     <dependency>
544 |                         <groupId>org.apache.maven.surefire</groupId>
545 |                         <artifactId>surefire-junit47</artifactId>
546 |                         <version>2.17</version>
547 |                     </dependency>
548 |                 </dependencies>
549 |             </plugin>
550 |             <plugin>
551 |                 <groupId>org.scalatest</groupId>
552 |                 <artifactId>scalatest-maven-plugin</artifactId>
553 |                 <version>1.0</version>
554 |                 <configuration>
555 |                     <junitxml>.</junitxml>
556 |                 </configuration>
557 |                 <executions>
558 |                     <execution>
559 |                         <id>test</id>
560 |                         <goals>
561 |                             <goal>test</goal>
562 |                         </goals>
563 |                     </execution>
564 |                 </executions>
565 |             </plugin>
566 |             <plugin>
567 |                 <groupId>org.scala-tools</groupId>
568 |                 <artifactId>maven-scala-plugin</artifactId>
569 |                 <executions>
570 |                     <execution>
571 |                         <id>compile</id>
572 |                         <goals>
573 |                             <goal>compile</goal>
574 |                         </goals>
575 |                         <phase>compile</phase>
576 |                     </execution>
577 |                     <execution>
578 |                         <id>test-compile</id>
579 |                         <goals>
580 |                             <goal>testCompile</goal>
581 |                         </goals>
582 |                         <phase>test-compile</phase>
583 |                     </execution>
584 |                     <execution>
585 |                         <phase>process-resources</phase>
586 |                         <goals>
587 |                             <goal>compile</goal>
588 |                         </goals>
589 |                     </execution>
590 |                 </executions>
591 |             </plugin>
592 |             <plugin>
593 |                 <groupId>org.apache.maven.plugins</groupId>
594 |                 <artifactId>maven-shade-plugin</artifactId>
595 |                 <version>2.2</version>
596 |                 <configuration>
597 |                     <shadedArtifactAttached>false</shadedArtifactAttached>
598 |                     <outputFile>target/SparkOnHBase.jar</outputFile>
599 |                     <artifactSet>
600 |                         <includes>
601 |                             <include>*:*</include>
602 |                         </includes>
603 |                     </artifactSet>
604 |                     <filters>
605 |                         <filter>
606 |                             <artifact>*:*</artifact>
607 |                             <excludes>
608 |                                 <exclude>META-INF/*.SF</exclude>
609 |                                 <exclude>META-INF/*.DSA</exclude>
610 |                                 <exclude>META-INF/*.RSA</exclude>
611 |                             </excludes>
612 |                         </filter>
613 |                     </filters>
614 |                 </configuration>
615 |                 <executions>
616 |                     <execution>
617 |                         <phase>package</phase>
618 |                         <goals>
619 |                             <goal>shade</goal>
620 |                         </goals>
621 |                         <configuration>
622 |                             <transformers>
623 |                                 <transformer
624 |                                         implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
625 |                                 <transformer
626 |                                         implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
627 |                                     <resource>reference.conf</resource>
628 |                                 </transformer>
629 |                             </transformers>
630 |                         </configuration>
631 |                     </execution>
632 |                 </executions>
633 |             </plugin>
634 |         </plugins>
635 |     </build>
636 | </project>


--------------------------------------------------------------------------------