├── .asf.yaml
├── .gitignore
├── CHANGELOG.md
├── LICENSE.txt
├── NOTICE.txt
├── README.md
├── RELEASENOTES.md
├── bin
    ├── hbase-connectors
    ├── hbase-connectors-config.sh
    └── hbase-connectors-daemon.sh
├── conf
    └── log4j.properties
├── dev-support
    ├── .scalafmt.conf
    ├── code-coverage
    │   ├── README.md
    │   └── run-coverage.sh
    ├── eclipse.importorder
    ├── hbase_eclipse_formatter.xml
    ├── jenkins
    │   ├── Dockerfile
    │   ├── Jenkinsfile
    │   ├── gather_machine_environment.sh
    │   ├── hbase-personality.sh
    │   └── jenkins_precommit_github_yetus.sh
    └── license-header
├── hbase-connectors-assembly
    ├── pom.xml
    └── src
    │   └── main
    │       ├── assembly
    │           ├── connector-components.xml
    │           └── hbase-connectors-bin.xml
    │       └── resources
    │           ├── META-INF
    │               └── LEGAL
    │           └── supplemental-models.xml
├── kafka
    ├── README.md
    ├── conf
    │   └── kafka-route-rules.xml
    ├── hbase-kafka-model
    │   ├── pom.xml
    │   └── src
    │   │   └── main
    │   │       └── avro
    │   │           └── HbaseKafkaEvent.avro
    ├── hbase-kafka-proxy
    │   ├── pom.xml
    │   └── src
    │   │   ├── main
    │   │       └── java
    │   │       │   └── org
    │   │       │       └── apache
    │   │       │           └── hadoop
    │   │       │               └── hbase
    │   │       │                   └── kafka
    │   │       │                       ├── DropRule.java
    │   │       │                       ├── DumpToStringListener.java
    │   │       │                       ├── KafkaBridgeConnection.java
    │   │       │                       ├── KafkaProxy.java
    │   │       │                       ├── KafkaTableForBridge.java
    │   │       │                       ├── Rule.java
    │   │       │                       ├── TopicRoutingRules.java
    │   │       │                       └── TopicRule.java
    │   │   └── test
    │   │       └── java
    │   │           └── org
    │   │               └── apache
    │   │                   └── hadoop
    │   │                       └── hbase
    │   │                           └── kafka
    │   │                               ├── ProducerForTesting.java
    │   │                               ├── TestDropRule.java
    │   │                               ├── TestProcessMutations.java
    │   │                               ├── TestQualifierMatching.java
    │   │                               └── TestRouteRules.java
    └── pom.xml
├── pom.xml
├── spark
    ├── README.md
    ├── hbase-spark-it
    │   ├── pom.xml
    │   └── src
    │   │   └── test
    │   │       ├── java
    │   │           └── org
    │   │           │   └── apache
    │   │           │       └── hadoop
    │   │           │           └── hbase
    │   │           │               └── spark
    │   │           │                   └── IntegrationTestSparkBulkLoad.java
    │   │       └── resources
    │   │           └── hbase-site.xml
    ├── hbase-spark-protocol-shaded
    │   └── pom.xml
    ├── hbase-spark-protocol
    │   ├── pom.xml
    │   └── src
    │   │   └── main
    │   │       └── protobuf
    │   │           └── SparkFilter.proto
    ├── hbase-spark
    │   ├── README.md
    │   ├── pom.xml
    │   └── src
    │   │   ├── main
    │   │       ├── java
    │   │       │   └── org
    │   │       │   │   └── apache
    │   │       │   │       └── hadoop
    │   │       │   │           └── hbase
    │   │       │   │               └── spark
    │   │       │   │                   ├── SparkSQLPushDownFilter.java
    │   │       │   │                   └── example
    │   │       │   │                       └── hbasecontext
    │   │       │   │                           ├── JavaHBaseBulkDeleteExample.java
    │   │       │   │                           ├── JavaHBaseBulkGetExample.java
    │   │       │   │                           ├── JavaHBaseBulkLoadExample.java
    │   │       │   │                           ├── JavaHBaseBulkPutExample.java
    │   │       │   │                           ├── JavaHBaseDistributedScan.java
    │   │       │   │                           ├── JavaHBaseMapGetPutExample.java
    │   │       │   │                           └── JavaHBaseStreamingBulkPutExample.java
    │   │       └── scala
    │   │       │   └── org
    │   │       │       └── apache
    │   │       │           └── hadoop
    │   │       │               └── hbase
    │   │       │                   └── spark
    │   │       │                       ├── BulkLoadPartitioner.scala
    │   │       │                       ├── ByteArrayComparable.scala
    │   │       │                       ├── ByteArrayWrapper.scala
    │   │       │                       ├── ColumnFamilyQualifierMapKeyWrapper.scala
    │   │       │                       ├── DefaultSource.scala
    │   │       │                       ├── DynamicLogicExpression.scala
    │   │       │                       ├── FamiliesQualifiersValues.scala
    │   │       │                       ├── FamilyHFileWriteOptions.scala
    │   │       │                       ├── HBaseConnectionCache.scala
    │   │       │                       ├── HBaseContext.scala
    │   │       │                       ├── HBaseDStreamFunctions.scala
    │   │       │                       ├── HBaseRDDFunctions.scala
    │   │       │                       ├── JavaHBaseContext.scala
    │   │       │                       ├── KeyFamilyQualifier.scala
    │   │       │                       ├── Logging.scala
    │   │       │                       ├── NewHBaseRDD.scala
    │   │       │                       ├── datasources
    │   │       │                           ├── Bound.scala
    │   │       │                           ├── DataTypeParserWrapper.scala
    │   │       │                           ├── HBaseResources.scala
    │   │       │                           ├── HBaseSparkConf.scala
    │   │       │                           ├── HBaseTableCatalog.scala
    │   │       │                           ├── HBaseTableScanRDD.scala
    │   │       │                           ├── JavaBytesEncoder.scala
    │   │       │                           ├── NaiveEncoder.scala
    │   │       │                           ├── SchemaConverters.scala
    │   │       │                           ├── SerDes.scala
    │   │       │                           ├── SerializableConfiguration.scala
    │   │       │                           ├── Utils.scala
    │   │       │                           └── package.scala
    │   │       │                       └── example
    │   │       │                           ├── datasources
    │   │       │                               ├── AvroSource.scala
    │   │       │                               ├── DataType.scala
    │   │       │                               └── HBaseSource.scala
    │   │       │                           ├── hbasecontext
    │   │       │                               ├── HBaseBulkDeleteExample.scala
    │   │       │                               ├── HBaseBulkGetExample.scala
    │   │       │                               ├── HBaseBulkPutExample.scala
    │   │       │                               ├── HBaseBulkPutExampleFromFile.scala
    │   │       │                               ├── HBaseBulkPutTimestampExample.scala
    │   │       │                               ├── HBaseDistributedScanExample.scala
    │   │       │                               └── HBaseStreamingBulkPutExample.scala
    │   │       │                           └── rdd
    │   │       │                               ├── HBaseBulkDeleteExample.scala
    │   │       │                               ├── HBaseBulkGetExample.scala
    │   │       │                               ├── HBaseBulkPutExample.scala
    │   │       │                               ├── HBaseForeachPartitionExample.scala
    │   │       │                               └── HBaseMapPartitionExample.scala
    │   │   └── test
    │   │       ├── java
    │   │           └── org
    │   │           │   └── apache
    │   │           │       └── hadoop
    │   │           │           └── hbase
    │   │           │               └── spark
    │   │           │                   ├── TestJavaHBaseContext.java
    │   │           │                   └── TestJavaHBaseContextForLargeRows.java
    │   │       ├── resources
    │   │           ├── hbase-site.xml
    │   │           └── log4j.properties
    │   │       └── scala
    │   │           └── org
    │   │               └── apache
    │   │                   └── hadoop
    │   │                       └── hbase
    │   │                           └── spark
    │   │                               ├── BulkLoadSuite.scala
    │   │                               ├── DefaultSourceSuite.scala
    │   │                               ├── DynamicLogicExpressionSuite.scala
    │   │                               ├── HBaseCatalogSuite.scala
    │   │                               ├── HBaseConnectionCacheSuite.scala
    │   │                               ├── HBaseContextSuite.scala
    │   │                               ├── HBaseDStreamFunctionsSuite.scala
    │   │                               ├── HBaseRDDFunctionsSuite.scala
    │   │                               ├── HBaseTestSource.scala
    │   │                               ├── PartitionFilterSuite.scala
    │   │                               ├── StartsWithSuite.scala
    │   │                               └── TableOutputFormatSuite.scala
    └── pom.xml
└── test-reporting
    └── pom.xml


/.asf.yaml:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one
 2 | # or more contributor license agreements.  See the NOTICE file
 3 | # distributed with this work for additional information
 4 | # regarding copyright ownership.  The ASF licenses this file
 5 | # to you under the Apache License, Version 2.0 (the
 6 | # "License"); you may not use this file except in compliance
 7 | # with the License.  You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # This file controls the integration of HBase project with ASF infrastructure. Refer to
18 | # https://cwiki.apache.org/confluence/display/INFRA/.asf.yaml+features+for+git+repositories for
19 | # details. Be careful when changing the contents of this file since it may affect many developers
20 | # of the project and make sure to discuss the changes with dev@ before committing.
21 | 
22 | github:
23 |   description: "Apache HBase Connectors"
24 |   homepage: https://hbase.apache.org/
25 |   labels:
26 |     - database
27 |     - java
28 |     - hbase
29 |   features:
30 |     wiki: false
31 |     issues: false
32 |     projects: false
33 |   enabled_merge_buttons:
34 |     squash:  true
35 |     merge:   false
36 |     rebase:  true
37 |   autolink_jira: HBASE
38 | notifications:
39 |   commits:      commits@hbase.apache.org
40 |   issues:       issues@hbase.apache.org
41 |   pullrequests: issues@hbase.apache.org
42 |   jira_options: link label
43 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /.externalToolBuilders
 2 | .project
 3 | *.settings/
 4 | .DS_Store
 5 | .classpath
 6 | /build
 7 | /.idea/
 8 | /logs
 9 | *target/
10 | *.orig
11 | *~
12 | hbase-*/test
13 | *.iws
14 | *.iml
15 | *.ipr
16 | patchprocess/
17 | dependency-reduced-pom.xml
18 | .flattened-pom.xml
19 | link_report/
20 | linklint-*.zip
21 | linklint/
22 | .checkstyle
23 | **/.checkstyle
24 | 


--------------------------------------------------------------------------------
/NOTICE.txt:
--------------------------------------------------------------------------------
1 | Apache HBase - Connectors
2 | Copyright 2019 The Apache Software Foundation
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Licensed to the Apache Software Foundation (ASF) under one
 3 | or more contributor license agreements.  See the NOTICE file
 4 | distributed with this work for additional information
 5 | regarding copyright ownership.  The ASF licenses this file
 6 | to you under the Apache License, Version 2.0 (the
 7 | "License"); you may not use this file except in compliance
 8 | with the License.  You may obtain a copy of the License at
 9 | 
10 |     http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 | Unless required by applicable law or agreed to in writing, software
13 | distributed under the License is distributed on an "AS IS" BASIS,
14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | See the License for the specific language governing permissions and
16 | limitations under the License.
17 | -->
18 | 
19 | # hbase-connectors
20 | 
21 | Connectors for [Apache HBase&trade;](https://hbase.apache.org)
22 | 
23 |   * [Kafka Proxy](https://github.com/apache/hbase-connectors/tree/master/kafka)
24 |   * [Spark](https://github.com/apache/hbase-connectors/tree/master/spark)
25 | 


--------------------------------------------------------------------------------
/bin/hbase-connectors-config.sh:
--------------------------------------------------------------------------------
  1 | #
  2 | #/**
  3 | # * Licensed to the Apache Software Foundation (ASF) under one
  4 | # * or more contributor license agreements.  See the NOTICE file
  5 | # * distributed with this work for additional information
  6 | # * regarding copyright ownership.  The ASF licenses this file
  7 | # * to you under the Apache License, Version 2.0 (the
  8 | # * "License"); you may not use this file except in compliance
  9 | # * with the License.  You may obtain a copy of the License at
 10 | # *
 11 | # *     http://www.apache.org/licenses/LICENSE-2.0
 12 | # *
 13 | # * Unless required by applicable law or agreed to in writing, software
 14 | # * distributed under the License is distributed on an "AS IS" BASIS,
 15 | # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | # * See the License for the specific language governing permissions and
 17 | # * limitations under the License.
 18 | # */
 19 | 
 20 | # included in all the hbase connector scripts with source command
 21 | # should not be executable directly
 22 | # also should not be passed any arguments, since we need original $*
 23 | # Modelled after $HADOOP_HOME/bin/hadoop-env.sh.
 24 | 
 25 | # resolve links - "${BASH_SOURCE-$0}" may be a softlink
 26 | 
 27 | this="${BASH_SOURCE-$0}"
 28 | while [ -h "$this" ]; do
 29 |   ls=`ls -ld "$this"`
 30 |   link=`expr "$ls" : '.*-> \(.*\)$'`
 31 |   if expr "$link" : '.*/.*' > /dev/null; then
 32 |     this="$link"
 33 |   else
 34 |     this=`dirname "$this"`/"$link"
 35 |   fi
 36 | done
 37 | 
 38 | # convert relative path to absolute path
 39 | bin=`dirname "$this"`
 40 | script=`basename "$this"`
 41 | bin=`cd "$bin">/dev/null; pwd`
 42 | this="$bin/$script"
 43 | 
 44 | # the root of the hbase connector installation
 45 | if [ -z "$HBASE_CONNECTOR_HOME" ]; then
 46 |   export HBASE_CONNECTOR_HOME=`dirname "$this"`/..
 47 | fi
 48 | 
 49 | #check to see if the conf dir or hbase home are given as an optional arguments
 50 | while [ $# -gt 1 ]
 51 | do
 52 |   if [ "--config" = "$1" ]
 53 |   then
 54 |     shift
 55 |     confdir=$1
 56 |     shift
 57 |     HBASE_CONF_DIR=$confdir
 58 |   elif [ "--autostart-window-size" = "$1" ]
 59 |   then
 60 |     shift
 61 |     AUTOSTART_WINDOW_SIZE=$(( $1 + 0 ))
 62 |     if [ $AUTOSTART_WINDOW_SIZE -lt 0 ]; then
 63 |       echo "Invalid value for --autostart-window-size, should be a positive integer"
 64 |       exit 1
 65 |     fi
 66 |     shift
 67 |   elif [ "--autostart-window-retry-limit" = "$1" ]
 68 |   then
 69 |     shift
 70 |     AUTOSTART_WINDOW_RETRY_LIMIT=$(( $1 + 0 ))
 71 |     if [ $AUTOSTART_WINDOW_RETRY_LIMIT -lt 0 ]; then
 72 |       echo "Invalid value for --autostart-window-retry-limit, should be a positive integer"
 73 |       exit 1
 74 |     fi
 75 |     shift
 76 |   elif [ "--internal-classpath" = "$1" ]
 77 |   then
 78 |     shift
 79 |     # shellcheck disable=SC2034
 80 |     INTERNAL_CLASSPATH="true"
 81 |   elif [ "--debug" = "$1" ]
 82 |   then
 83 |     shift
 84 |     # shellcheck disable=SC2034
 85 |     DEBUG="true"
 86 |   else
 87 |     # Presume we are at end of options and break
 88 |     break
 89 |   fi
 90 | done
 91 | 
 92 | 
 93 | 
 94 | # Allow alternate hbase connector conf dir location.
 95 | HBASE_CONNECTOR_CONF_DIR="${HBASE_CONNECTOR_CONF_DIR:-$HBASE_CONNECTOR_HOME/conf}"
 96 | 
 97 | 
 98 | if [ -n "$HBASE_CONNECTOR_JMX_BASE" ] && [ -z "$HBASE_CONNECTOR_JMX_OPTS" ]; then
 99 |   HBASE_CONNECTOR_JMX_OPTS="$HBASE_CONNECTOR_JMX_BASE"
100 | fi
101 | 
102 | 
103 | # Source the hbase-connector-env.sh only if it has not already been done. HBASE_CONNECTOR_ENV_INIT keeps track of it.
104 | if [ -z "$HBASE_CONNECTOR_ENV_INIT" ] && [ -f "${HBASE_CONNECTOR_CONF_DIR}/hbase-connector-env.sh" ]; then
105 |   . "${HBASE_CONNECTOR_CONF_DIR}/hbase-connector-env.sh"
106 |   export HBASE_CONNECTOR_ENV_INIT="true"
107 | fi
108 | 
109 | # Newer versions of glibc use an arena memory allocator that causes virtual
110 | # memory usage to explode. Tune the variable down to prevent vmem explosion.
111 | export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-4}
112 | 
113 | 
114 | # Now having JAVA_HOME defined is required
115 | if [ -z "$JAVA_HOME" ]; then
116 |     cat 1>&2 <<EOF
117 | +======================================================================+
118 | |                    Error: JAVA_HOME is not set                       |
119 | +----------------------------------------------------------------------+
120 | | Please download the latest Sun JDK from the Sun Java web site        |
121 | |     > http://www.oracle.com/technetwork/java/javase/downloads        |
122 | |                                                                      |
123 | | HBase Connectors requires Java 1.8 or later.                                    |
124 | +======================================================================+
125 | EOF
126 |     exit 1
127 | fi
128 | 


--------------------------------------------------------------------------------
/conf/log4j.properties:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one
 2 | # or more contributor license agreements.  See the NOTICE file
 3 | # distributed with this work for additional information
 4 | # regarding copyright ownership.  The ASF licenses this file
 5 | # to you under the Apache License, Version 2.0 (the
 6 | # "License"); you may not use this file except in compliance
 7 | # with the License.  You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # Define some default values that can be overridden by system properties
18 | hbase.connector.root.logger=INFO,console
19 | hbase.connector.log.dir=.
20 | hbase.connector.log.file=hbase-connector.log
21 | hbase.connector.log.level=INFO
22 | 
23 | # Define the root logger to the system property "hbase.connector.root.logger".
24 | log4j.rootLogger=${hbase.connector.root.logger}
25 | 
26 | # Logging Threshold
27 | log4j.threshold=ALL
28 | 
29 | #
30 | # Daily Rolling File Appender
31 | #
32 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
33 | log4j.appender.DRFA.File=${hbase.connector.log.dir}/${hbase.connector.log.file}
34 | 
35 | # Rollver at midnight
36 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
37 | 
38 | # 30-day backup
39 | #log4j.appender.DRFA.MaxBackupIndex=30
40 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
41 | 
42 | # Pattern format: Date LogLevel LoggerName LogMessage
43 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %.1000m%n
44 | 
45 | # Rolling File Appender properties
46 | hbase.connector.log.maxfilesize=256MB
47 | hbase.connector.log.maxbackupindex=20
48 | 
49 | # Rolling File Appender
50 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender
51 | log4j.appender.RFA.File=${hbase.connector.log.dir}/${hbase.connector.log.file}
52 | 
53 | log4j.appender.RFA.MaxFileSize=${hbase.connector.log.maxfilesize}
54 | log4j.appender.RFA.MaxBackupIndex=${hbase.connector.log.maxbackupindex}
55 | 
56 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
57 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %.1000m%n
58 | 
59 | 
60 | #
61 | # Null Appender
62 | #
63 | log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender
64 | 
65 | #
66 | # console
67 | # Add "console" to rootlogger above if you want to use this
68 | #
69 | log4j.appender.console=org.apache.log4j.ConsoleAppender
70 | log4j.appender.console.target=System.err
71 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
72 | log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %.1000m%n
73 | 
74 | log4j.appender.asyncconsole=org.apache.hadoop.hbase.AsyncConsoleAppender
75 | log4j.appender.asyncconsole.target=System.err
76 | 
77 | # Custom Logging levels
78 | 
79 | 
80 | 
81 | log4j.logger.org.apache.hadoop.hbase.kafka=INFO
82 | 
83 | #this is a debugging tool
84 | log4j.logger.org.apache.hadoop.hbase.kafka.DumpToStringListener=DEBUG
85 | 
86 | 
87 | 
88 | log4j.logger.org.apache.hadoop.metrics2.impl.MetricsConfig=WARN
89 | log4j.logger.org.apache.hadoop.metrics2.impl.MetricsSinkAdapter=WARN
90 | log4j.logger.org.apache.hadoop.metrics2.impl.MetricsSystemImpl=WARN
91 | 


--------------------------------------------------------------------------------
/dev-support/.scalafmt.conf:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | # Template based off apache spark: https://github.com/apache/spark/blob/master/dev/.scalafmt.conf
19 | # Align settings
20 | align = none
21 | align.openParenDefnSite = false
22 | align.openParenCallSite = false
23 | align.tokens = []
24 | 
25 | # Rewrites
26 | rewrite.rules = [Imports]
27 | 
28 | # Imports
29 | rewrite.imports.sort = scalastyle
30 | rewrite.imports.groups = [
31 |     [".*"],
32 |     ["org.apache.hbase.thirdparty\\..*"],
33 |     ["org.apache.hadoop.hbase.shaded\\..*"]
34 | ]
35 | rewrite.imports.contiguousGroups = no
36 | importSelectors = "singleLine"
37 | 
38 | # Newlines
39 | newlines.beforeCurlyLambdaParams = multiline
40 | newlines.afterCurlyLambdaParams = squash
41 | danglingParentheses.preset = false
42 | optIn.configStyleArguments = false
43 | 
44 | # Scaladoc
45 | docstrings.style = Asterisk
46 | # See https://github.com/scalameta/scalafmt/issues/1387
47 | docstrings.wrap = no
48 | 
49 | # Max column
50 | maxColumn = 100
51 | 
52 | # Version
53 | runner.dialect = scala212
54 | version = 3.7.12
55 | 


--------------------------------------------------------------------------------
/dev-support/code-coverage/README.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 |  Licensed to the Apache Software Foundation (ASF) under one
 3 |  or more contributor license agreements.  See the NOTICE file
 4 |  distributed with this work for additional information
 5 |  regarding copyright ownership.  The ASF licenses this file
 6 |  to you under the Apache License, Version 2.0 (the
 7 |  "License"); you may not use this file except in compliance
 8 |  with the License.  You may obtain a copy of the License at
 9 | 
10 |      http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 |  Unless required by applicable law or agreed to in writing, software
13 |  distributed under the License is distributed on an "AS IS" BASIS,
14 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  See the License for the specific language governing permissions and
16 |  limitations under the License.
17 | -->
18 | 
19 | # Code analysis
20 | 
21 | The `run-coverage.sh` script runs maven with the coverage profile which generates the test coverage data for both java
22 | and scala classes.
23 | If the required parameters are given it also runs the sonar analysis and uploads the results to the given SonarQube
24 | Server.
25 | 
26 | ## Running code analysis
27 | 
28 | After running the script the code coverage results are generated under the `test-reporting/target/code-coverage/`
29 | folder.
30 | The JaCoCo code coverage library generated reports can be found under the `jacoco-reports` folder and the SCoverage
31 | generated results can be found under the `scoverage-reports` folder.
32 | 
33 | Here is how you can generate the code coverage reports:
34 | 
35 | ```./dev-support/code-coverage/run-coverage.sh```
36 | 
37 | ## Publishing coverage results to SonarQube
38 | 
39 | The required parameters for publishing the results to SonarQube are:
40 | 
41 | - host URL,
42 | - login credentials,
43 | - project key
44 | 
45 | The project name is an optional parameter.
46 | 
47 | Here is an example command for running and publishing the coverage data:
48 | 
49 | ```./dev-support/code-coverage/run-coverage.sh -l ProjectCredentials -u https://exampleserver.com -k Project_Key -n Project_Name```
50 | 


--------------------------------------------------------------------------------
/dev-support/code-coverage/run-coverage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Licensed to the Apache Software Foundation (ASF) under one
 3 | # or more contributor license agreements.  See the NOTICE file
 4 | # distributed with this work for additional information
 5 | # regarding copyright ownership.  The ASF licenses this file
 6 | # to you under the Apache License, Version 2.0 (the
 7 | # "License"); you may not use this file except in compliance
 8 | # with the License.  You may obtain a copy of the License at
 9 | #
10 | #   http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing,
13 | # software distributed under the License is distributed on an
14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | # KIND, either express or implied.  See the License for the
16 | # specific language governing permissions and limitations
17 | # under the License.
18 | 
19 | usage() {
20 |   echo
21 |   echo "options:"
22 |   echo "-h     Display help"
23 |   echo "-u     SonarQube Host URL"
24 |   echo "-l     SonarQube Login Credentials"
25 |   echo "-k     SonarQube Project Key"
26 |   echo "-n     SonarQube Project Name"
27 |   echo
28 |   echo "Important:"
29 |   echo "    The required parameters for publishing the coverage results to SonarQube:"
30 |   echo "      - Host URL"
31 |   echo "      - Login Credentials"
32 |   echo "      - Project Key"
33 |   echo
34 | }
35 | 
36 | execute() {
37 | SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
38 | MAIN_POM="${SCRIPT_DIR}/../../pom.xml"
39 | 
40 |   mvn -B -e -f "$MAIN_POM" clean install -DskipTests -DskipShade -Pcoverage
41 | 
42 |   mvn -B -e -f "$MAIN_POM" package -fn -Pcoverage
43 | 
44 |   # If the required parameters are given, the code coverage results are uploaded to the SonarQube Server
45 |   if [ -n "$SONAR_LOGIN" ] && [ -n "$SONAR_PROJECT_KEY" ] && [ -n "$SONAR_URL" ]; then
46 |     mvn -B -e -Pcoverage sonar:sonar -Dsonar.host.url="$SONAR_URL" -Dsonar.login="$SONAR_LOGIN" \
47 |        -Dsonar.projectKey="$SONAR_PROJECT_KEY" -Dsonar.projectName="$SONAR_PROJECT_NAME"
48 |   fi
49 | }
50 | 
51 | while getopts ":u:l:k:n:h" option; do
52 |   case $option in
53 |   u) SONAR_URL=${OPTARG:-} ;;
54 |   l) SONAR_LOGIN=${OPTARG:-} ;;
55 |   k) SONAR_PROJECT_KEY=${OPTARG:-} ;;
56 |   n) SONAR_PROJECT_NAME=${OPTARG:-} ;;
57 |   h) # Display usage
58 |     usage
59 |     exit
60 |     ;;
61 |   \?) # Invalid option
62 |     echo "Error: Invalid option"
63 |     exit
64 |     ;;
65 |   esac
66 | done
67 | 
68 | # Start code analysis
69 | execute
70 | 


--------------------------------------------------------------------------------
/dev-support/eclipse.importorder:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #Organize Import Order
16 | 3=org.apache.hadoop.hbase.shaded
17 | 2=org.apache.hbase.thirdparty
18 | 1=
19 | 0=\#
20 | 


--------------------------------------------------------------------------------
/dev-support/jenkins/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one
 2 | # or more contributor license agreements.  See the NOTICE file
 3 | # distributed with this work for additional information
 4 | # regarding copyright ownership.  The ASF licenses this file
 5 | # to you under the Apache License, Version 2.0 (the
 6 | # "License"); you may not use this file except in compliance
 7 | # with the License.  You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # Dockerfile for hbase-connectors pre-commit build.
18 | # https://builds.apache.org/job/PreCommit-HBASE-CONNECTORS-Build
19 | 
20 | FROM maven:3.6-jdk-8
21 | 
22 | # hadolint ignore=DL3008
23 | RUN apt-get -q update && apt-get -q install --no-install-recommends -y \
24 |        git \
25 |        rsync \
26 |        shellcheck \
27 |        wget && \
28 |     apt-get clean && \
29 |     rm -rf /var/lib/apt/lists/*
30 | 
31 | ###
32 | # Avoid out of memory errors in builds
33 | ###
34 | ENV MAVEN_OPTS -Xmx3g
35 | 
36 | CMD ["/bin/bash"]
37 | 
38 | ###
39 | # Everything past this point is either not needed for testing or breaks Yetus.
40 | # So tell Yetus not to read the rest of the file:
41 | # YETUS CUT HERE
42 | ###
43 | 


--------------------------------------------------------------------------------
/dev-support/jenkins/gather_machine_environment.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Licensed to the Apache Software Foundation (ASF) under one
 3 | # or more contributor license agreements.  See the NOTICE file
 4 | # distributed with this work for additional information
 5 | # regarding copyright ownership.  The ASF licenses this file
 6 | # to you under the Apache License, Version 2.0 (the
 7 | # "License"); you may not use this file except in compliance
 8 | # with the License.  You may obtain a copy of the License at
 9 | #
10 | #   http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing,
13 | # software distributed under the License is distributed on an
14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | # KIND, either express or implied.  See the License for the
16 | # specific language governing permissions and limitations
17 | # under the License.
18 | 
19 | set -e
20 | function usage {
21 |   echo "Usage: ${0} /path/for/output/dir"
22 |   echo ""
23 |   echo "  Gather info about a build machine that test harnesses should poll before running."
24 |   echo "  presumes you'll then archive the passed output dir."
25 | 
26 |   exit 1
27 | }
28 | 
29 | if [ "$#" -lt 1 ]; then
30 |   usage
31 | fi
32 | 
33 | 
34 | declare output=$1
35 | 
36 | if [ ! -d "${output}" ] || [ ! -w "${output}" ]; then
37 |   echo "Specified output directory must exist and be writable." >&2
38 |   exit 1
39 | fi
40 | 
41 | echo "getting machine specs, find in ${BUILD_URL}/artifact/${output}/"
42 | echo "JAVA_HOME: ${JAVA_HOME}" >"${output}/java_home" 2>&1 || true
43 | ls -l "${JAVA_HOME}" >"${output}/java_home_ls" 2>&1 || true
44 | echo "MAVEN_HOME: ${MAVEN_HOME}" >"${output}/mvn_home" 2>&1 || true
45 | mvn --offline --version  >"${output}/mvn_version" 2>&1 || true
46 | cat /proc/cpuinfo >"${output}/cpuinfo" 2>&1 || true
47 | cat /proc/meminfo >"${output}/meminfo" 2>&1 || true
48 | cat /proc/diskstats >"${output}/diskstats" 2>&1 || true
49 | cat /sys/block/sda/stat >"${output}/sys-block-sda-stat" 2>&1 || true
50 | df -h >"${output}/df-h" 2>&1 || true
51 | ps -Aww >"${output}/ps-Aww" 2>&1 || true
52 | ifconfig -a >"${output}/ifconfig-a" 2>&1 || true
53 | lsblk -ta >"${output}/lsblk-ta" 2>&1 || true
54 | lsblk -fa >"${output}/lsblk-fa" 2>&1 || true
55 | ulimit -a >"${output}/ulimit-a" 2>&1 || true
56 | uptime >"${output}/uptime" 2>&1 || true
57 | hostname -a >"${output}/hostname-a" 2>&1 || true
58 | 


--------------------------------------------------------------------------------
/dev-support/jenkins/jenkins_precommit_github_yetus.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | # Licensed to the Apache Software Foundation (ASF) under one
  3 | # or more contributor license agreements.  See the NOTICE file
  4 | # distributed with this work for additional information
  5 | # regarding copyright ownership.  The ASF licenses this file
  6 | # to you under the Apache License, Version 2.0 (the
  7 | # "License"); you may not use this file except in compliance
  8 | # with the License.  You may obtain a copy of the License at
  9 | #
 10 | #   http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing,
 13 | # software distributed under the License is distributed on an
 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 15 | # KIND, either express or implied.  See the License for the
 16 | # specific language governing permissions and limitations
 17 | # under the License.
 18 | 
 19 | set -e
 20 | 
 21 | # place ourselves in the directory containing the hbase and yetus checkouts
 22 | cd "$(dirname "$0")/../.."
 23 | echo "executing from $(pwd)"
 24 | 
 25 | if [[ "true" = "${DEBUG}" ]]; then
 26 |   set -x
 27 |   printenv 2>&1 | sort
 28 | fi
 29 | 
 30 | declare -i missing_env=0
 31 | declare -a required_envs=(
 32 |   # these ENV variables define the required API with Jenkinsfile_GitHub
 33 |   "ARCHIVE_PATTERN_LIST"
 34 |   "BUILD_URL_ARTIFACTS"
 35 |   "DOCKERFILE"
 36 |   "GITHUB_PASSWORD"
 37 |   "GITHUB_USER"
 38 |   "PATCHDIR"
 39 |   "PLUGINS"
 40 |   "SET_JAVA_HOME"
 41 |   "SOURCEDIR"
 42 |   "YETUSDIR"
 43 |   "PERSONALITY"
 44 | )
 45 | # Validate params
 46 | for required_env in "${required_envs[@]}"; do
 47 |   if [ -z "${!required_env}" ]; then
 48 |     echo "[ERROR] Required environment variable '${required_env}' is not set."
 49 |     missing_env=${missing_env}+1
 50 |   fi
 51 | done
 52 | 
 53 | if [ ${missing_env} -gt 0 ]; then
 54 |   echo "[ERROR] Please set the required environment variables before invoking. If this error is " \
 55 |        "on Jenkins, then please file a JIRA about the error."
 56 |   exit 1
 57 | fi
 58 | 
 59 | # TODO (HBASE-23900): cannot assume test-patch runs directly from sources
 60 | TESTPATCHBIN="${YETUSDIR}/precommit/src/main/shell/test-patch.sh"
 61 | 
 62 | # this must be clean for every run
 63 | rm -rf "${PATCHDIR}"
 64 | mkdir -p "${PATCHDIR}"
 65 | 
 66 | # Gather machine information
 67 | mkdir "${PATCHDIR}/machine"
 68 | "${SOURCEDIR}/dev-support/jenkins/gather_machine_environment.sh" "${PATCHDIR}/machine"
 69 | 
 70 | # enable debug output for yetus
 71 | if [[ "true" = "${DEBUG}" ]]; then
 72 |   YETUS_ARGS+=("--debug")
 73 | fi
 74 | # If we're doing docker, make sure we don't accidentally pollute the image with a host java path
 75 | if [ -n "${JAVA_HOME}" ]; then
 76 |   unset JAVA_HOME
 77 | fi
 78 | YETUS_ARGS+=("--patch-dir=${PATCHDIR}")
 79 | # where the source is located
 80 | YETUS_ARGS+=("--basedir=${SOURCEDIR}")
 81 | YETUS_ARGS+=("--project=hbase-connectors")
 82 | YETUS_ARGS+=("--personality=${PERSONALITY}")
 83 | # lots of different output formats
 84 | YETUS_ARGS+=("--brief-report-file=${PATCHDIR}/brief.txt")
 85 | YETUS_ARGS+=("--console-report-file=${PATCHDIR}/console.txt")
 86 | YETUS_ARGS+=("--html-report-file=${PATCHDIR}/report.html")
 87 | # enable writing back to Github
 88 | YETUS_ARGS+=("--github-password=${GITHUB_PASSWORD}")
 89 | YETUS_ARGS+=("--github-user=${GITHUB_USER}")
 90 | # auto-kill any surefire stragglers during unit test runs
 91 | YETUS_ARGS+=("--reapermode=kill")
 92 | # set relatively high limits for ASF machines
 93 | # changing these to higher values may cause problems
 94 | # with other jobs on systemd-enabled machines
 95 | YETUS_ARGS+=("--dockermemlimit=20g")
 96 | # -1 spotbugs issues that show up prior to the patch being applied
 97 | YETUS_ARGS+=("--spotbugs-strict-precheck")
 98 | # rsync these files back into the archive dir
 99 | YETUS_ARGS+=("--archive-list=${ARCHIVE_PATTERN_LIST}")
100 | # URL for user-side presentation in reports and such to our artifacts
101 | YETUS_ARGS+=("--build-url-artifacts=${BUILD_URL_ARTIFACTS}")
102 | # plugins to enable
103 | YETUS_ARGS+=("--plugins=${PLUGINS}")
104 | YETUS_ARGS+=("--tests-filter=test4tests")
105 | # run in docker mode and specifically point to our
106 | # Dockerfile since we don't want to use the auto-pulled version.
107 | YETUS_ARGS+=("--docker")
108 | YETUS_ARGS+=("--dockerfile=${DOCKERFILE}")
109 | YETUS_ARGS+=("--mvn-custom-repos")
110 | YETUS_ARGS+=("--java-home=${SET_JAVA_HOME}")
111 | # effectively treat dev-support as a custom maven module
112 | YETUS_ARGS+=("--skip-dirs=dev-support")
113 | # help keep the ASF boxes clean
114 | YETUS_ARGS+=("--sentinel")
115 | # use emoji vote so it is easier to find the broken line
116 | YETUS_ARGS+=("--github-use-emoji-vote")
117 | YETUS_ARGS+=("--github-repo=apache/hbase-connectors")
118 | 
119 | echo "Launching yetus with command line:"
120 | echo "${TESTPATCHBIN} ${YETUS_ARGS[*]}"
121 | 
122 | /usr/bin/env bash "${TESTPATCHBIN}" "${YETUS_ARGS[@]}"
123 | 


--------------------------------------------------------------------------------
/dev-support/license-header:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 


--------------------------------------------------------------------------------
/hbase-connectors-assembly/src/main/assembly/connector-components.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <!--
 3 | /**
 4 |  * Licensed to the Apache Software Foundation (ASF) under one
 5 |  * or more contributor license agreements.  See the NOTICE file
 6 |  * distributed with this work for additional information
 7 |  * regarding copyright ownership.  The ASF licenses this file
 8 |  * to you under the Apache License, Version 2.0 (the
 9 |  * "License"); you may not use this file except in compliance
10 |  * with the License.  You may obtain a copy of the License at
11 |  *
12 |  *     http://www.apache.org/licenses/LICENSE-2.0
13 |  *
14 |  * Unless required by applicable law or agreed to in writing, software
15 |  * distributed under the License is distributed on an "AS IS" BASIS,
16 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 |  * See the License for the specific language governing permissions and
18 |  * limitations under the License.
19 |  */
20 | -->
21 | 
22 | <!-- Filesets shared by different binary tars. -->
23 | <component>
24 |   <fileSets>
25 |     <!-- Include the top level conf directory -->
26 |     <fileSet>
27 |       <directory>${project.basedir}/../conf</directory>
28 |       <outputDirectory>conf</outputDirectory>
29 |       <fileMode>0644</fileMode>
30 |       <directoryMode>0755</directoryMode>
31 |     </fileSet>
32 |     <!-- Include top level bin directory -->
33 |     <!-- First copy all but the *.cmd files-->
34 |     <fileSet>
35 |       <directory>${project.basedir}/../bin</directory>
36 |       <outputDirectory>bin</outputDirectory>
37 |       <includes>
38 |         <include>hbase-connectors</include>
39 |         <include>hbase-connectors-config.sh</include>
40 |         <include>hbase-connectors-daemon.sh</include>
41 |       </includes>
42 |       <fileMode>0755</fileMode>
43 |       <directoryMode>0755</directoryMode>
44 |     </fileSet>
45 |   </fileSets>
46 | </component>
47 | 


--------------------------------------------------------------------------------
/hbase-connectors-assembly/src/main/assembly/hbase-connectors-bin.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <assembly xmlns="http://maven.apache.org/ASSEMBLY/2.0.0"
 3 |     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |     xsi:schemaLocation="http://maven.apache.org/ASSEMBLY/2.0.0 http://maven.apache.org/xsd/assembly-2.0.0.xsd">
 5 | <!--
 6 | /**
 7 |  * Licensed to the Apache Software Foundation (ASF) under one
 8 |  * or more contributor license agreements.  See the NOTICE file
 9 |  * distributed with this work for additional information
10 |  * regarding copyright ownership.  The ASF licenses this file
11 |  * to you under the Apache License, Version 2.0 (the
12 |  * "License"); you may not use this file except in compliance
13 |  * with the License.  You may obtain a copy of the License at
14 |  *
15 |  *     http://www.apache.org/licenses/LICENSE-2.0
16 |  *
17 |  * Unless required by applicable law or agreed to in writing, software
18 |  * distributed under the License is distributed on an "AS IS" BASIS,
19 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 |  * See the License for the specific language governing permissions and
21 |  * limitations under the License.
22 |  */
23 | -->
24 |   <id>bin</id>
25 |   <formats>
26 |     <format>tar.gz</format>
27 |   </formats>
28 |   <baseDirectory>hbase-connectors-${revision}</baseDirectory>
29 |   <componentDescriptors>
30 |       <componentDescriptor>src/main/assembly/connector-components.xml</componentDescriptor>
31 |   </componentDescriptors>
32 |   <moduleSets>
33 |     <moduleSet>
34 |       <useAllReactorProjects>true</useAllReactorProjects>
35 |       <excludes>
36 |         <exclude>org.apache.hbase.connectors.spark:hbase-spark-it</exclude>
37 |       </excludes>
38 |       <binaries>
39 |         <unpack>false</unpack>
40 |         <outputDirectory>lib</outputDirectory>
41 |         <dependencySets>
42 |           <dependencySet>
43 |             <excludes>
44 |               <exclude>org.apache.yetus:audience-annotations</exclude>
45 |               <exclude>org.slf4j:slf4j-api</exclude>
46 |               <exclude>org.slf4j:slf4j-log4j12</exclude>
47 |             </excludes>
48 |           </dependencySet>
49 |         </dependencySets>
50 |       </binaries>
51 |     </moduleSet>
52 |   </moduleSets>
53 |   <files>
54 |     <file>
55 |       <source>${project.build.directory}/maven-shared-archive-resources/META-INF/LICENSE</source>
56 |       <outputDirectory>.</outputDirectory>
57 |       <destName>LICENSE.txt</destName>
58 |       <lineEnding>unix</lineEnding>
59 |     </file>
60 |     <file>
61 |       <source>${project.build.directory}/maven-shared-archive-resources/META-INF/NOTICE</source>
62 |       <outputDirectory>.</outputDirectory>
63 |       <destName>NOTICE.txt</destName>
64 |       <lineEnding>unix</lineEnding>
65 |     </file>
66 |     <file>
67 |       <source>${basedir}/src/main/resources/META-INF/LEGAL</source>
68 |       <outputDirectory>.</outputDirectory>
69 |       <destName>LEGAL</destName>
70 |       <lineEnding>unix</lineEnding>
71 |     </file>
72 |     <file>
73 |       <source>../README.md</source>
74 |       <outputDirectory>${file.separator}</outputDirectory>
75 |     </file>
76 |     <file>
77 |       <source>../CHANGELOG.md</source>
78 |       <outputDirectory>${file.separator}</outputDirectory>
79 |     </file>
80 |     <file>
81 |       <source>../RELEASENOTES.md</source>
82 |       <outputDirectory>${file.separator}</outputDirectory>
83 |     </file>
84 |   </files>
85 | </assembly>
86 | 


--------------------------------------------------------------------------------
/hbase-connectors-assembly/src/main/resources/META-INF/LEGAL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/hbase-connectors/6544291a4c87a8b8c99bcc05dd64f4692e87f1f3/hbase-connectors-assembly/src/main/resources/META-INF/LEGAL


--------------------------------------------------------------------------------
/hbase-connectors-assembly/src/main/resources/supplemental-models.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!--
 3 | Licensed to the Apache Software Foundation (ASF) under one
 4 | or more contributor license agreements.  See the NOTICE file
 5 | distributed with this work for additional information
 6 | regarding copyright ownership.  The ASF licenses this file
 7 | to you under the Apache License, Version 2.0 (the
 8 | "License"); you may not use this file except in compliance
 9 | with the License.  You may obtain a copy of the License at
10 | 
11 |   http://www.apache.org/licenses/LICENSE-2.0
12 | 
13 | Unless required by applicable law or agreed to in writing,
14 | software distributed under the License is distributed on an
15 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | KIND, either express or implied.  See the License for the
17 | specific language governing permissions and limitations
18 | under the License.
19 | -->
20 | <supplementalDataModels xmlns="http://maven.apache.org/supplemental-model/1.0.0"
21 |                         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
22 |                         xsi:schemaLocation="http://maven.apache.org/supplemental-model/1.0.0 http://maven.apache.org/xsd/supplemental-model-1.0.0.xsd">
23 | <!-- this file is an extra supplement data model other than hbase-resource-bundle -->
24 | <!-- All of the below missing / ambiguous / full text license info verified at dependency source -->
25 | <!-- Dual licensed dependencies-->
26 |   <supplement>
27 |     <project>
28 |       <groupId>javax.xml.stream</groupId>
29 |       <artifactId>stax-api</artifactId>
30 |       <licenses>
31 |         <license>
32 |           <name>CDDL 1.1</name>
33 |           <url>https://github.com/javaee/activation/blob/master/LICENSE.txt</url>
34 |           <distribution>repo</distribution>
35 |         </license>
36 |       </licenses>
37 |     </project>
38 |   </supplement>
39 |   <supplement>
40 |     <project>
41 |       <groupId>org.codehaus.jettison</groupId>
42 |       <artifactId>jettison</artifactId>
43 |       <version>1.1</version>
44 |       <licenses>
45 |         <license>
46 |           <name>Apache License, Version 2.0</name>
47 |           <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
48 |           <distribution>repo</distribution>
49 |         </license>
50 |       </licenses>
51 |     </project>
52 |   </supplement>
53 |   <supplement>
54 |     <project>
55 |       <groupId>org.bouncycastle</groupId>
56 |       <artifactId>bcprov-jdk18on</artifactId>
57 |       <version>1.78.1</version>
58 |       <licenses>
59 |         <!-- bcpkix-jdk18on is licensed under the Bouncy Castle License, which is equivalent to the MIT License -->
60 |         <license>
61 |           <name>MIT License</name>
62 |           <url>http://www.opensource.org/licenses/mit-license.php</url>
63 |           <distribution>repo</distribution>
64 |           <comments>
65 |             Copyright (c) 2000 - 2018 The Legion of the Bouncy Castle Inc. (https://www.bouncycastle.org)
66 |           </comments>
67 |         </license>
68 |       </licenses>
69 |     </project>
70 |   </supplement>
71 | </supplementalDataModels>
72 | 


--------------------------------------------------------------------------------
/kafka/conf/kafka-route-rules.xml:
--------------------------------------------------------------------------------
 1 | <!--
 2 | /**
 3 |  * Licensed to the Apache Software Foundation (ASF) under one
 4 |  * or more contributor license agreements.  See the NOTICE file
 5 |  * distributed with this work for additional information
 6 |  * regarding copyright ownership.  The ASF licenses this file
 7 |  * to you under the Apache License, Version 2.0 (the
 8 |  * "License"); you may not use this file except in compliance
 9 |  * with the License.  You may obtain a copy of the License at
10 |  *
11 |  *     http://www.apache.org/licenses/LICENSE-2.0
12 |  *
13 |  * Unless required by applicable law or agreed to in writing, software
14 |  * distributed under the License is distributed on an "AS IS" BASIS,
15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |  * See the License for the specific language governing permissions and
17 |  * limitations under the License.
18 |  */
19 | -->
20 | <!-- rules used by hbase kafka proxy to route mutations to kafka topics-->
21 | <rules>
22 | <!--
23 | this rule would route all mutations from table default:mytable mycf to mykafkatopic
24 | 
25 | <rule action="route" table="default:mytable" topic="mykafkatopic"/>
26 | 
27 | this rule would route all mutations from column family mycf of table default:mytable to
28 | mykafkatopic
29 | 
30 | <rule action="route" table="default:mytable" columnFamily="mycf" topic="mykafkatopic"/>
31 | 
32 | this rule would route all mutations from qualifier myqualifier in column family mycf of table
33 | default:mytable to mykafkatopic
34 | 
35 | <rule action="route" table="default:mytable" columnFamily="mycf" qualifier="myqualifier"
36 | topic="mykafkatopic"/>
37 | 
38 | this rule would route all mutations from all qualifiers in column family mycf of table
39 | default:mytable to mykafkatopic that start with myqualifier
40 | 
41 | <rule action="route" table="default:mytable" columnFamily="mycf" qualifier="myqualifier*"
42 | topic="mykafkatopic"/>
43 | 
44 | this rule would route all mutations from all qualifiers in column family mycf of table
45 | default:mytable to mykafkatopic that contain myqualifier
46 | 
47 | <rule action="route" table="default:mytable" columnFamily="mycf" qualifier="*myqualifier*"
48 | topic="mykafkatopic"/>
49 | 
50 | when used together, these rules would route all mutations from column family mycf of table
51 | default:mytable to mykafkatopic except the ones from myqualifier
52 | 
53 | <rule action="drop" table="default:mytable" columnFamily="mycf" qualifier="myqualifier"
54 | topic="mykafkatopic"/>
55 | <rule action="route" table="default:mytable" columnFamily="mycf" topic="mykafkatopic"/>
56 | 
57 | when used together, these rules would route all mutations from column family mycf of table
58 | default:mytable to mykafkatopic except the ones that contain secretb in the qualifier name
59 | 
60 | <rule action="drop" table="default:mytable" columnFamily="mycf" qualifier="*secret*"
61 | topic="mykafkatopic"/>
62 | <rule action="route" table="default:mytable" columnFamily="mycf" topic="mykafkatopic"/>
63 | -->
64 | </rules>
65 | 


--------------------------------------------------------------------------------
/kafka/hbase-kafka-model/src/main/avro/HbaseKafkaEvent.avro:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  * <p>
10 |  * http://www.apache.org/licenses/LICENSE-2.0
11 |  * <p>
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 | */
18 | 
19 | {"namespace": "org.apache.hadoop.hbase.kafka",
20 |  "type": "record",
21 |  "name": "HBaseKafkaEvent",
22 |  "fields": [
23 |     {"name": "key", "type": "bytes"},
24 |     {"name": "timestamp",  "type": "long" },
25 |     {"name": "delete",  "type": "boolean" },
26 |     {"name": "value", "type": "bytes"},
27 |     {"name": "qualifier", "type": "bytes"},
28 |     {"name": "family", "type": "bytes"},
29 |     {"name": "table", "type": "bytes"}
30 |  ]
31 | }
32 | 


--------------------------------------------------------------------------------
/kafka/hbase-kafka-proxy/src/main/java/org/apache/hadoop/hbase/kafka/DropRule.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.kafka;
19 | 
20 | import org.apache.yetus.audience.InterfaceAudience;
21 | 
22 | /**
23 |  * Rule that indicates the Cell should not be replicated
24 |  */
25 | @InterfaceAudience.Private
26 | public class DropRule extends Rule {
27 |   public DropRule() {
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/kafka/hbase-kafka-proxy/src/main/java/org/apache/hadoop/hbase/kafka/DumpToStringListener.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | package org.apache.hadoop.hbase.kafka;
 19 | 
 20 | import java.time.Duration;
 21 | import java.util.Arrays;
 22 | import java.util.Iterator;
 23 | import java.util.Properties;
 24 | import java.util.stream.Collectors;
 25 | import org.apache.avro.io.BinaryDecoder;
 26 | import org.apache.avro.io.DecoderFactory;
 27 | import org.apache.avro.specific.SpecificDatumReader;
 28 | import org.apache.hadoop.hbase.util.Bytes;
 29 | import org.apache.hadoop.hbase.util.VersionInfo;
 30 | import org.apache.kafka.clients.consumer.ConsumerRecord;
 31 | import org.apache.kafka.clients.consumer.ConsumerRecords;
 32 | import org.apache.kafka.clients.consumer.KafkaConsumer;
 33 | import org.apache.kafka.common.serialization.ByteArrayDeserializer;
 34 | import org.apache.yetus.audience.InterfaceAudience;
 35 | import org.slf4j.Logger;
 36 | import org.slf4j.LoggerFactory;
 37 | 
 38 | import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine;
 39 | import org.apache.hbase.thirdparty.org.apache.commons.cli.DefaultParser;
 40 | import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter;
 41 | import org.apache.hbase.thirdparty.org.apache.commons.cli.Options;
 42 | import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException;
 43 | 
 44 | /**
 45 |  * connects to kafka and reads from the passed in topics. Parses each message into an avro object
 46 |  * and dumps it to the console.
 47 |  */
 48 | @InterfaceAudience.Private
 49 | public final class DumpToStringListener {
 50 |   private static final Logger LOG = LoggerFactory.getLogger(DumpToStringListener.class);
 51 | 
 52 |   private DumpToStringListener() {
 53 |   }
 54 | 
 55 |   public static void main(String[] args) {
 56 |     LOG.info("***** STARTING service '" + DumpToStringListener.class.getSimpleName() + "' *****");
 57 |     VersionInfo.logVersion();
 58 | 
 59 |     Options options = new Options();
 60 |     options.addRequiredOption("k", "kafkabrokers", true, "Kafka Brokers " + "(comma delimited)");
 61 |     options.addRequiredOption("t", "kafkatopics", true,
 62 |       "Kafka Topics " + "to subscribe to (comma delimited)");
 63 |     CommandLine commandLine = null;
 64 | 
 65 |     try {
 66 |       commandLine = new DefaultParser().parse(options, args);
 67 |     } catch (ParseException e) {
 68 |       LOG.error("Could not parse: ", e);
 69 |       printUsageAndExit(options, -1);
 70 |     }
 71 | 
 72 |     SpecificDatumReader<HBaseKafkaEvent> dreader =
 73 |       new SpecificDatumReader<>(HBaseKafkaEvent.SCHEMA$);
 74 | 
 75 |     String topic = commandLine.getOptionValue('t');
 76 |     Properties props = new Properties();
 77 |     props.put("bootstrap.servers", commandLine.getOptionValue('k'));
 78 |     props.put("group.id", "hbase kafka test tool");
 79 |     props.put("key.deserializer", ByteArrayDeserializer.class.getName());
 80 |     props.put("value.deserializer", ByteArrayDeserializer.class.getName());
 81 | 
 82 |     try (KafkaConsumer<byte[], byte[]> consumer = new KafkaConsumer<>(props)) {
 83 |       consumer.subscribe(Arrays.stream(topic.split(",")).collect(Collectors.toList()));
 84 | 
 85 |       while (true) {
 86 |         ConsumerRecords<byte[], byte[]> records = consumer.poll(Duration.ofMillis(10000));
 87 |         Iterator<ConsumerRecord<byte[], byte[]>> it = records.iterator();
 88 |         while (it.hasNext()) {
 89 |           ConsumerRecord<byte[], byte[]> record = it.next();
 90 |           BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(record.value(), null);
 91 |           try {
 92 |             HBaseKafkaEvent event = dreader.read(null, decoder);
 93 |             LOG.debug("key :" + Bytes.toString(record.key()) + " value " + event);
 94 |           } catch (Exception e) {
 95 |             throw new RuntimeException(e);
 96 |           }
 97 |         }
 98 |       }
 99 |     }
100 |   }
101 | 
102 |   private static void printUsageAndExit(Options options, int exitCode) {
103 |     HelpFormatter formatter = new HelpFormatter();
104 |     formatter.printHelp("hbase " + DumpToStringListener.class.getName(), "", options,
105 |       "\n[--kafkabrokers <kafka brokers (comma delmited)>] "
106 |         + "[-k <kafka brokers (comma delmited)>] \n",
107 |       true);
108 |     System.exit(exitCode);
109 |   }
110 | }
111 | 


--------------------------------------------------------------------------------
/kafka/hbase-kafka-proxy/src/main/java/org/apache/hadoop/hbase/kafka/TopicRule.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.kafka;
19 | 
20 | import java.util.Arrays;
21 | import java.util.HashSet;
22 | import java.util.Set;
23 | import java.util.stream.Collectors;
24 | import org.apache.yetus.audience.InterfaceAudience;
25 | 
26 | /**
27 |  * If the Cell matches the rule returns the configured topics.
28 |  */
29 | @InterfaceAudience.Private
30 | public class TopicRule extends Rule {
31 |   private Set<String> topics = new HashSet<>();
32 | 
33 |   public TopicRule(String topics) {
34 |     this.topics.addAll(Arrays.stream(topics.split(",")).collect(Collectors.toList()));
35 |   }
36 | 
37 |   public Set<String> getTopics() {
38 |     return topics;
39 |   }
40 | }
41 | 


--------------------------------------------------------------------------------
/kafka/hbase-kafka-proxy/src/test/java/org/apache/hadoop/hbase/kafka/ProducerForTesting.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.kafka;
19 | 
20 | import java.util.ArrayList;
21 | import java.util.HashMap;
22 | import java.util.List;
23 | import java.util.Map;
24 | import java.util.concurrent.Future;
25 | import org.apache.avro.io.BinaryDecoder;
26 | import org.apache.avro.io.DecoderFactory;
27 | import org.apache.avro.specific.SpecificDatumReader;
28 | import org.apache.kafka.clients.producer.MockProducer;
29 | import org.apache.kafka.clients.producer.ProducerRecord;
30 | import org.apache.kafka.clients.producer.RecordMetadata;
31 | import org.apache.kafka.test.MockSerializer;
32 | 
33 | /**
34 |  * Mocks Kafka producer for testing
35 |  */
36 | public class ProducerForTesting extends MockProducer<byte[], byte[]> {
37 |   Map<String, List<HBaseKafkaEvent>> messages = new HashMap<>();
38 |   SpecificDatumReader<HBaseKafkaEvent> dreader = new SpecificDatumReader<>(HBaseKafkaEvent.SCHEMA$);
39 | 
40 |   public ProducerForTesting() {
41 |     super(true, new MockSerializer(), new MockSerializer());
42 |   }
43 | 
44 |   public Map<String, List<HBaseKafkaEvent>> getMessages() {
45 |     return messages;
46 |   }
47 | 
48 |   @Override
49 |   public Future<RecordMetadata> send(ProducerRecord<byte[], byte[]> producerRecord) {
50 |     try {
51 |       BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(producerRecord.value(), null);
52 |       HBaseKafkaEvent event = dreader.read(null, decoder);
53 |       if (!messages.containsKey(producerRecord.topic())) {
54 |         messages.put(producerRecord.topic(), new ArrayList<>());
55 |       }
56 |       messages.get(producerRecord.topic()).add(event);
57 |       return super.send(producerRecord);
58 |     } catch (Exception e) {
59 |       throw new RuntimeException(e);
60 |     }
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/kafka/hbase-kafka-proxy/src/test/java/org/apache/hadoop/hbase/kafka/TestProcessMutations.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.kafka;
19 | 
20 | import java.io.ByteArrayInputStream;
21 | import java.nio.charset.StandardCharsets;
22 | import java.util.ArrayList;
23 | import java.util.List;
24 | import org.apache.hadoop.conf.Configuration;
25 | import org.apache.hadoop.hbase.TableName;
26 | import org.apache.hadoop.hbase.client.Put;
27 | import org.apache.hadoop.hbase.client.Row;
28 | import org.apache.hadoop.hbase.client.Table;
29 | import org.apache.hadoop.hbase.testclassification.SmallTests;
30 | import org.junit.Assert;
31 | import org.junit.Before;
32 | import org.junit.Test;
33 | import org.junit.experimental.categories.Category;
34 | 
35 | /**
36 |  * Test that mutations are getting published to the topic
37 |  */
38 | @Category(SmallTests.class)
39 | public class TestProcessMutations {
40 |   private static final String ROUTE_RULE1 =
41 |     "<rules><rule action=\"route\" table=\"MyNamespace:MyTable\" " + "topic=\"foo\"/></rules>";
42 | 
43 |   ProducerForTesting myTestingProducer;
44 | 
45 |   @Before
46 |   public void setup() {
47 |     this.myTestingProducer = new ProducerForTesting();
48 |   }
49 | 
50 |   @Test
51 |   public void testSendMessage() {
52 |     TopicRoutingRules rules = new TopicRoutingRules();
53 |     try {
54 |       rules.parseRules(new ByteArrayInputStream(ROUTE_RULE1.getBytes(StandardCharsets.UTF_8)));
55 |       Configuration conf = new Configuration();
56 |       KafkaBridgeConnection connection = new KafkaBridgeConnection(conf, rules, myTestingProducer);
57 |       long zeTimestamp = System.currentTimeMillis();
58 |       Put put = new Put("key1".getBytes(StandardCharsets.UTF_8), zeTimestamp);
59 |       put.addColumn("FAMILY".getBytes(StandardCharsets.UTF_8),
60 |         "not foo".getBytes(StandardCharsets.UTF_8),
61 |         "VALUE should NOT pass".getBytes(StandardCharsets.UTF_8));
62 |       put.addColumn("FAMILY".getBytes(StandardCharsets.UTF_8),
63 |         "foo".getBytes(StandardCharsets.UTF_8),
64 |         "VALUE should pass".getBytes(StandardCharsets.UTF_8));
65 |       Table myTable = connection.getTable(TableName.valueOf("MyNamespace:MyTable"));
66 |       List<Row> rows = new ArrayList<>();
67 |       rows.add(put);
68 |       myTable.batch(rows, new Object[0]);
69 | 
70 |       Assert.assertFalse(myTestingProducer.getMessages().isEmpty());
71 |     } catch (Exception e) {
72 |       Assert.fail(e.getMessage());
73 |     }
74 |   }
75 | }
76 | 


--------------------------------------------------------------------------------
/kafka/hbase-kafka-proxy/src/test/java/org/apache/hadoop/hbase/kafka/TestQualifierMatching.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.kafka;
19 | 
20 | import java.nio.charset.StandardCharsets;
21 | import org.apache.hadoop.hbase.testclassification.SmallTests;
22 | import org.junit.Assert;
23 | import org.junit.Test;
24 | import org.junit.experimental.categories.Category;
25 | 
26 | /**
27 |  * Make sure match rules work
28 |  */
29 | @Category(SmallTests.class)
30 | public class TestQualifierMatching {
31 | 
32 |   @Test
33 |   public void testMatchQualfier() {
34 |     DropRule rule = new DropRule();
35 |     rule.setQualifier("data".getBytes(StandardCharsets.UTF_8));
36 |     Assert.assertTrue(rule.qualifierMatch("data".getBytes(StandardCharsets.UTF_8)));
37 | 
38 |     rule = new DropRule();
39 |     rule.setQualifier("data1".getBytes(StandardCharsets.UTF_8));
40 |     Assert.assertFalse(rule.qualifierMatch("data".getBytes(StandardCharsets.UTF_8)));
41 | 
42 |     // if not set, it is a wildcard
43 |     rule = new DropRule();
44 |     Assert.assertTrue(rule.qualifierMatch("data".getBytes(StandardCharsets.UTF_8)));
45 |   }
46 | 
47 |   @Test
48 |   public void testStartWithQualifier() {
49 |     DropRule rule = new DropRule();
50 |     rule.setQualifier("data*".getBytes(StandardCharsets.UTF_8));
51 |     Assert.assertTrue(rule.isQualifierStartsWith());
52 |     Assert.assertFalse(rule.isQualifierEndsWith());
53 | 
54 |     Assert.assertTrue(rule.qualifierMatch("data".getBytes(StandardCharsets.UTF_8)));
55 |     Assert.assertTrue(rule.qualifierMatch("data1".getBytes(StandardCharsets.UTF_8)));
56 |     Assert.assertTrue(rule.qualifierMatch("datafoobar".getBytes(StandardCharsets.UTF_8)));
57 |     Assert.assertFalse(rule.qualifierMatch("datfoobar".getBytes(StandardCharsets.UTF_8)));
58 |     Assert.assertFalse(rule.qualifierMatch("d".getBytes(StandardCharsets.UTF_8)));
59 |     Assert.assertFalse(rule.qualifierMatch("".getBytes(StandardCharsets.UTF_8)));
60 |   }
61 | 
62 |   @Test
63 |   public void testEndsWithQualifier() {
64 |     DropRule rule = new DropRule();
65 |     rule.setQualifier("*data".getBytes(StandardCharsets.UTF_8));
66 |     Assert.assertFalse(rule.isQualifierStartsWith());
67 |     Assert.assertTrue(rule.isQualifierEndsWith());
68 | 
69 |     Assert.assertTrue(rule.qualifierMatch("data".getBytes(StandardCharsets.UTF_8)));
70 |     Assert.assertTrue(rule.qualifierMatch("1data".getBytes(StandardCharsets.UTF_8)));
71 |     Assert.assertTrue(rule.qualifierMatch("foobardata".getBytes(StandardCharsets.UTF_8)));
72 |     Assert.assertFalse(rule.qualifierMatch("foobardat".getBytes(StandardCharsets.UTF_8)));
73 |     Assert.assertFalse(rule.qualifierMatch("d".getBytes(StandardCharsets.UTF_8)));
74 |     Assert.assertFalse(rule.qualifierMatch("".getBytes(StandardCharsets.UTF_8)));
75 |   }
76 | }
77 | 


--------------------------------------------------------------------------------
/kafka/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 3 |   <!--
 4 |   /**
 5 |    * Licensed to the Apache Software Foundation (ASF) under one
 6 |    * or more contributor license agreements.  See the NOTICE file
 7 |    * distributed with this work for additional information
 8 |    * regarding copyright ownership.  The ASF licenses this file
 9 |    * to you under the Apache License, Version 2.0 (the
10 |    * "License"); you may not use this file except in compliance
11 |    * with the License.  You may obtain a copy of the License at
12 |    *
13 |    *     http://www.apache.org/licenses/LICENSE-2.0
14 |    *
15 |    * Unless required by applicable law or agreed to in writing, software
16 |    * distributed under the License is distributed on an "AS IS" BASIS,
17 |    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 |    * See the License for the specific language governing permissions and
19 |    * limitations under the License.
20 |    */
21 |   -->
22 |   <modelVersion>4.0.0</modelVersion>
23 |   <parent>
24 |     <groupId>org.apache.hbase.connectors</groupId>
25 |     <artifactId>hbase-connectors</artifactId>
26 |     <version>${revision}</version>
27 |     <relativePath>../</relativePath>
28 |   </parent>
29 |   <artifactId>kafka</artifactId>
30 |   <packaging>pom</packaging>
31 |   <name>Apache HBase - Kafka</name>
32 |   <description>Kafka Proxy for Apache HBase</description>
33 |   <modules>
34 |     <module>hbase-kafka-model</module>
35 |     <module>hbase-kafka-proxy</module>
36 |   </modules>
37 |   <properties>
38 |     <hbase.version>${hbase-hadoop2.version}</hbase.version>
39 |   </properties>
40 |   <dependencyManagement>
41 |     <dependencies>
42 |       <dependency>
43 |         <groupId>org.apache.avro</groupId>
44 |         <artifactId>avro</artifactId>
45 |         <version>${avro.version}</version>
46 |       </dependency>
47 |       <dependency>
48 |         <groupId>org.apache.hbase.connectors.kafka</groupId>
49 |         <artifactId>hbase-kafka-model</artifactId>
50 |         <version>${revision}</version>
51 |       </dependency>
52 |     </dependencies>
53 |   </dependencyManagement>
54 | </project>
55 | 


--------------------------------------------------------------------------------
/spark/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Licensed to the Apache Software Foundation (ASF) under one
 3 | or more contributor license agreements.  See the NOTICE file
 4 | distributed with this work for additional information
 5 | regarding copyright ownership.  The ASF licenses this file
 6 | to you under the Apache License, Version 2.0 (the
 7 | "License"); you may not use this file except in compliance
 8 | with the License.  You may obtain a copy of the License at
 9 | 
10 |     http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 | Unless required by applicable law or agreed to in writing, software
13 | distributed under the License is distributed on an "AS IS" BASIS,
14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | See the License for the specific language governing permissions and
16 | limitations under the License.
17 | -->
18 | 
19 | # Apache HBase&trade; Spark Connector
20 | 
21 | ## Spark, Scala and Configurable Options
22 | 
23 | To generate an artifact for a different [Spark version](https://mvnrepository.com/artifact/org.apache.spark/spark-core) and/or [Scala version](https://www.scala-lang.org/download/all.html),
24 | [Hadoop version](https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-core), or [HBase version](https://mvnrepository.com/artifact/org.apache.hbase/hbase), pass command-line options as follows (changing version numbers appropriately):
25 | 
26 | ```
27 | $ mvn -Dspark.version=3.1.2 -Dscala.version=2.12.10 -Dhadoop-three.version=3.2.0 -Dscala.binary.version=2.12 -Dhbase.version=2.4.8 clean install
28 | ```
29 | 
30 | ## Configuration and Installation
31 | **Client-side** (Spark) configuration:
32 | - The HBase configuration file `hbase-site.xml` should be made available to Spark, it can be copied to `$SPARK_CONF_DIR` (default is $SPARK_HOME/conf`)
33 | 
34 | **Server-side** (HBase region servers) configuration:
35 | - The following jars need to be in the CLASSPATH of the HBase region servers:
36 |   - scala-library, hbase-spark, and hbase-spark-protocol-shaded.
37 | - The server-side configuration is needed for column filter pushdown
38 |   - if you cannot perform the server-side configuration, consider using `.option("hbase.spark.pushdown.columnfilter", false)`
39 | - The Scala library version must match the Scala version (2.11 or 2.12) used for compiling the connector.
40 | 


--------------------------------------------------------------------------------
/spark/hbase-spark-it/src/test/resources/hbase-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 | /**
 5 |  *
 6 |  * Licensed to the Apache Software Foundation (ASF) under one
 7 |  * or more contributor license agreements.  See the NOTICE file
 8 |  * distributed with this work for additional information
 9 |  * regarding copyright ownership.  The ASF licenses this file
10 |  * to you under the Apache License, Version 2.0 (the
11 |  * "License"); you may not use this file except in compliance
12 |  * with the License.  You may obtain a copy of the License at
13 |  *
14 |  *     http://www.apache.org/licenses/LICENSE-2.0
15 |  *
16 |  * Unless required by applicable law or agreed to in writing, software
17 |  * distributed under the License is distributed on an "AS IS" BASIS,
18 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 |  * See the License for the specific language governing permissions and
20 |  * limitations under the License.
21 |  */
22 | -->
23 | <configuration>
24 |   <property>
25 |     <name>hbase.defaults.for.version.skip</name>
26 |     <value>true</value>
27 |   </property>
28 |   <property>
29 |     <name>hbase.hconnection.threads.keepalivetime</name>
30 |     <value>3</value>
31 |   </property>
32 | </configuration>
33 | 


--------------------------------------------------------------------------------
/spark/hbase-spark-protocol-shaded/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!--
 3 |  Licensed to the Apache Software Foundation (ASF) under one
 4 |  or more contributor license agreements.  See the NOTICE file
 5 |  distributed with this work for additional information
 6 |  regarding copyright ownership.  The ASF licenses this file
 7 |  to you under the Apache License, Version 2.0 (the
 8 |  "License"); you may not use this file except in compliance
 9 |  with the License.  You may obtain a copy of the License at
10 | 
11 |    http://www.apache.org/licenses/LICENSE-2.0
12 | 
13 |  Unless required by applicable law or agreed to in writing,
14 |  software distributed under the License is distributed on an
15 |  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 |  KIND, either express or implied.  See the License for the
17 |  specific language governing permissions and limitations
18 |  under the License.
19 | 
20 | -->
21 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
22 |   <modelVersion>4.0.0</modelVersion>
23 | 
24 |   <parent>
25 |     <groupId>org.apache.hbase.connectors</groupId>
26 |     <artifactId>spark</artifactId>
27 |     <version>${revision}</version>
28 |     <relativePath>../</relativePath>
29 |   </parent>
30 | 
31 |   <groupId>org.apache.hbase.connectors.spark</groupId>
32 |   <artifactId>hbase-spark-protocol-shaded</artifactId>
33 |   <name>Apache HBase - Spark Protocol (Shaded)</name>
34 | 
35 |   <dependencies>
36 |     <!-- marked as "optional" to keep it from being included
37 |          as a transitive dependency of this module. needed
38 |          because we don't make a dependency reduced pom after
39 |          shading.
40 |       -->
41 |     <dependency>
42 |       <groupId>org.apache.hbase.connectors.spark</groupId>
43 |       <artifactId>hbase-spark-protocol</artifactId>
44 |       <optional>true</optional>
45 |     </dependency>
46 |     <dependency>
47 |       <groupId>org.apache.hbase.thirdparty</groupId>
48 |       <artifactId>hbase-shaded-protobuf</artifactId>
49 |       <version>${hbase-thirdparty.version}</version>
50 |     </dependency>
51 |   </dependencies>
52 | 
53 |   <build>
54 |     <plugins>
55 |       <plugin>
56 |         <groupId>org.apache.maven.plugins</groupId>
57 |         <artifactId>maven-shade-plugin</artifactId>
58 |         <executions>
59 |           <execution>
60 |             <goals>
61 |               <goal>shade</goal>
62 |             </goals>
63 |             <phase>package</phase>
64 |             <configuration>
65 |               <shadeSourcesContent>true</shadeSourcesContent>
66 |               <createSourcesJar>true</createSourcesJar>
67 |               <!-- shade plugin is not compatible with flatten plugin
68 |                 if we are generating a dependency reduced pom -->
69 |               <createDependencyReducedPom>false</createDependencyReducedPom>
70 |               <relocations>
71 |                 <relocation>
72 |                   <pattern>com.google.protobuf</pattern>
73 |                   <shadedPattern>org.apache.hbase.thirdparty.com.google.protobuf</shadedPattern>
74 |                 </relocation>
75 |               </relocations>
76 |               <artifactSet>
77 |                 <excludes>
78 |                   <exclude>com.google.protobuf:protobuf-java</exclude>
79 |                   <exclude>org.apache.hbase.thirdparty:*</exclude>
80 |                 </excludes>
81 |               </artifactSet>
82 |             </configuration>
83 |           </execution>
84 |         </executions>
85 |       </plugin>
86 |     </plugins>
87 |   </build>
88 | 
89 | </project>
90 | 


--------------------------------------------------------------------------------
/spark/hbase-spark-protocol/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!--
 3 |  Licensed to the Apache Software Foundation (ASF) under one
 4 |  or more contributor license agreements.  See the NOTICE file
 5 |  distributed with this work for additional information
 6 |  regarding copyright ownership.  The ASF licenses this file
 7 |  to you under the Apache License, Version 2.0 (the
 8 |  "License"); you may not use this file except in compliance
 9 |  with the License.  You may obtain a copy of the License at
10 | 
11 |    http://www.apache.org/licenses/LICENSE-2.0
12 | 
13 |  Unless required by applicable law or agreed to in writing,
14 |  software distributed under the License is distributed on an
15 |  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 |  KIND, either express or implied.  See the License for the
17 |  specific language governing permissions and limitations
18 |  under the License.
19 | 
20 | -->
21 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
22 |   <modelVersion>4.0.0</modelVersion>
23 | 
24 |   <parent>
25 |     <groupId>org.apache.hbase.connectors</groupId>
26 |     <artifactId>spark</artifactId>
27 |     <version>${revision}</version>
28 |     <relativePath>../</relativePath>
29 |   </parent>
30 | 
31 |   <groupId>org.apache.hbase.connectors.spark</groupId>
32 |   <artifactId>hbase-spark-protocol</artifactId>
33 |   <name>Apache HBase - Spark Protocol</name>
34 | 
35 |   <dependencies>
36 |     <dependency>
37 |       <groupId>com.google.protobuf</groupId>
38 |       <artifactId>protobuf-java</artifactId>
39 |       <version>${thirdparty.protobuf.version}</version>
40 |     </dependency>
41 |   </dependencies>
42 | 
43 |   <build>
44 |     <plugins>
45 |       <plugin>
46 |         <groupId>org.apache.maven.plugins</groupId>
47 |         <artifactId>maven-compiler-plugin</artifactId>
48 |       </plugin>
49 |       <plugin>
50 |         <groupId>org.xolstice.maven.plugins</groupId>
51 |         <artifactId>protobuf-maven-plugin</artifactId>
52 |         <executions>
53 |           <execution>
54 |             <id>compile-protoc</id>
55 |             <goals>
56 |               <goal>compile</goal>
57 |             </goals>
58 |             <phase>generate-sources</phase>
59 |           </execution>
60 |         </executions>
61 |       </plugin>
62 |       <plugin>
63 |         <groupId>org.apache.maven.plugins</groupId>
64 |         <artifactId>maven-source-plugin</artifactId>
65 |         <executions>
66 |           <execution>
67 |             <id>attach-sources</id>
68 |             <goals>
69 |               <goal>jar</goal>
70 |             </goals>
71 |           </execution>
72 |         </executions>
73 |       </plugin>
74 |     </plugins>
75 |   </build>
76 | 
77 | </project>
78 | 


--------------------------------------------------------------------------------
/spark/hbase-spark-protocol/src/main/protobuf/SparkFilter.proto:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | // This file contains protocol buffers that are used for Spark filters
20 | // over in the hbase-spark module
21 | package hbase.pb;
22 | 
23 | option java_package = "org.apache.hadoop.hbase.spark.protobuf.generated";
24 | option java_outer_classname = "SparkFilterProtos";
25 | option java_generic_services = true;
26 | option java_generate_equals_and_hash = true;
27 | option optimize_for = SPEED;
28 | 
29 | message SQLPredicatePushDownCellToColumnMapping {
30 |   required bytes column_family = 1;
31 |   required bytes qualifier = 2;
32 |   required string column_name = 3;
33 | }
34 | 
35 | message SQLPredicatePushDownFilter {
36 |   required string dynamic_logic_expression = 1;
37 |   repeated bytes value_from_query_array = 2;
38 |   repeated SQLPredicatePushDownCellToColumnMapping cell_to_column_mapping = 3;
39 |   optional string encoderClassName = 4;
40 | }
41 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Licensed to the Apache Software Foundation (ASF) under one
 3 | or more contributor license agreements.  See the NOTICE file
 4 | distributed with this work for additional information
 5 | regarding copyright ownership.  The ASF licenses this file
 6 | to you under the Apache License, Version 2.0 (the
 7 | "License"); you may not use this file except in compliance
 8 | with the License.  You may obtain a copy of the License at
 9 | 
10 |     http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 | Unless required by applicable law or agreed to in writing, software
13 | distributed under the License is distributed on an "AS IS" BASIS,
14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | See the License for the specific language governing permissions and
16 | limitations under the License.
17 | -->
18 | 
19 | ##ON PROTOBUFS
20 | This maven module has core protobuf definition files ('.protos') used by hbase
21 | Spark that ship with hbase core including tests.
22 | 
23 | Generation of java files from protobuf .proto files included here is done as
24 | part of the build.
25 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseBulkDeleteExample.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark.example.hbasecontext;
19 | 
20 | import java.util.ArrayList;
21 | import java.util.List;
22 | import org.apache.hadoop.conf.Configuration;
23 | import org.apache.hadoop.hbase.HBaseConfiguration;
24 | import org.apache.hadoop.hbase.TableName;
25 | import org.apache.hadoop.hbase.client.Delete;
26 | import org.apache.hadoop.hbase.spark.JavaHBaseContext;
27 | import org.apache.hadoop.hbase.util.Bytes;
28 | import org.apache.spark.SparkConf;
29 | import org.apache.spark.api.java.JavaRDD;
30 | import org.apache.spark.api.java.JavaSparkContext;
31 | import org.apache.spark.api.java.function.Function;
32 | import org.apache.yetus.audience.InterfaceAudience;
33 | 
34 | /**
35 |  * This is a simple example of deleting records in HBase with the bulkDelete function.
36 |  */
37 | @InterfaceAudience.Private
38 | final public class JavaHBaseBulkDeleteExample {
39 | 
40 |   private JavaHBaseBulkDeleteExample() {
41 |   }
42 | 
43 |   public static void main(String[] args) {
44 |     if (args.length < 1) {
45 |       System.out.println("JavaHBaseBulkDeleteExample  {tableName}");
46 |       return;
47 |     }
48 | 
49 |     String tableName = args[0];
50 | 
51 |     SparkConf sparkConf = new SparkConf().setAppName("JavaHBaseBulkDeleteExample " + tableName);
52 |     JavaSparkContext jsc = new JavaSparkContext(sparkConf);
53 | 
54 |     try {
55 |       List<byte[]> list = new ArrayList<>(5);
56 |       list.add(Bytes.toBytes("1"));
57 |       list.add(Bytes.toBytes("2"));
58 |       list.add(Bytes.toBytes("3"));
59 |       list.add(Bytes.toBytes("4"));
60 |       list.add(Bytes.toBytes("5"));
61 | 
62 |       JavaRDD<byte[]> rdd = jsc.parallelize(list);
63 | 
64 |       Configuration conf = HBaseConfiguration.create();
65 | 
66 |       JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
67 | 
68 |       hbaseContext.bulkDelete(rdd, TableName.valueOf(tableName), new DeleteFunction(), 4);
69 |     } finally {
70 |       jsc.stop();
71 |     }
72 | 
73 |   }
74 | 
75 |   public static class DeleteFunction implements Function<byte[], Delete> {
76 |     private static final long serialVersionUID = 1L;
77 | 
78 |     public Delete call(byte[] v) throws Exception {
79 |       return new Delete(v);
80 |     }
81 |   }
82 | }
83 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseBulkGetExample.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | package org.apache.hadoop.hbase.spark.example.hbasecontext;
 19 | 
 20 | import java.util.ArrayList;
 21 | import java.util.Iterator;
 22 | import java.util.List;
 23 | import org.apache.hadoop.conf.Configuration;
 24 | import org.apache.hadoop.hbase.Cell;
 25 | import org.apache.hadoop.hbase.HBaseConfiguration;
 26 | import org.apache.hadoop.hbase.TableName;
 27 | import org.apache.hadoop.hbase.client.Get;
 28 | import org.apache.hadoop.hbase.client.Result;
 29 | import org.apache.hadoop.hbase.spark.JavaHBaseContext;
 30 | import org.apache.hadoop.hbase.util.Bytes;
 31 | import org.apache.spark.SparkConf;
 32 | import org.apache.spark.api.java.JavaRDD;
 33 | import org.apache.spark.api.java.JavaSparkContext;
 34 | import org.apache.spark.api.java.function.Function;
 35 | import org.apache.yetus.audience.InterfaceAudience;
 36 | 
 37 | /**
 38 |  * This is a simple example of getting records in HBase with the bulkGet function.
 39 |  */
 40 | @InterfaceAudience.Private
 41 | final public class JavaHBaseBulkGetExample {
 42 | 
 43 |   private JavaHBaseBulkGetExample() {
 44 |   }
 45 | 
 46 |   public static void main(String[] args) {
 47 |     if (args.length < 1) {
 48 |       System.out.println("JavaHBaseBulkGetExample  {tableName}");
 49 |       return;
 50 |     }
 51 | 
 52 |     String tableName = args[0];
 53 | 
 54 |     SparkConf sparkConf = new SparkConf().setAppName("JavaHBaseBulkGetExample " + tableName);
 55 |     JavaSparkContext jsc = new JavaSparkContext(sparkConf);
 56 | 
 57 |     try {
 58 |       List<byte[]> list = new ArrayList<>(5);
 59 |       list.add(Bytes.toBytes("1"));
 60 |       list.add(Bytes.toBytes("2"));
 61 |       list.add(Bytes.toBytes("3"));
 62 |       list.add(Bytes.toBytes("4"));
 63 |       list.add(Bytes.toBytes("5"));
 64 | 
 65 |       JavaRDD<byte[]> rdd = jsc.parallelize(list);
 66 | 
 67 |       Configuration conf = HBaseConfiguration.create();
 68 | 
 69 |       JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
 70 | 
 71 |       hbaseContext.bulkGet(TableName.valueOf(tableName), 2, rdd, new GetFunction(),
 72 |         new ResultFunction());
 73 |     } finally {
 74 |       jsc.stop();
 75 |     }
 76 |   }
 77 | 
 78 |   public static class GetFunction implements Function<byte[], Get> {
 79 | 
 80 |     private static final long serialVersionUID = 1L;
 81 | 
 82 |     public Get call(byte[] v) throws Exception {
 83 |       return new Get(v);
 84 |     }
 85 |   }
 86 | 
 87 |   public static class ResultFunction implements Function<Result, String> {
 88 | 
 89 |     private static final long serialVersionUID = 1L;
 90 | 
 91 |     public String call(Result result) throws Exception {
 92 |       Iterator<Cell> it = result.listCells().iterator();
 93 |       StringBuilder b = new StringBuilder();
 94 | 
 95 |       b.append(Bytes.toString(result.getRow())).append(":");
 96 | 
 97 |       while (it.hasNext()) {
 98 |         Cell cell = it.next();
 99 |         String q = Bytes.toString(cell.getQualifierArray());
100 |         if (q.equals("counter")) {
101 |           b.append("(").append(Bytes.toString(cell.getQualifierArray())).append(",")
102 |             .append(Bytes.toLong(cell.getValueArray())).append(")");
103 |         } else {
104 |           b.append("(").append(Bytes.toString(cell.getQualifierArray())).append(",")
105 |             .append(Bytes.toString(cell.getValueArray())).append(")");
106 |         }
107 |       }
108 |       return b.toString();
109 |     }
110 |   }
111 | }
112 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseBulkLoadExample.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | package org.apache.hadoop.hbase.spark.example.hbasecontext;
 19 | 
 20 | import java.util.ArrayList;
 21 | import java.util.HashMap;
 22 | import java.util.List;
 23 | import org.apache.hadoop.conf.Configuration;
 24 | import org.apache.hadoop.hbase.HBaseConfiguration;
 25 | import org.apache.hadoop.hbase.HConstants;
 26 | import org.apache.hadoop.hbase.TableName;
 27 | import org.apache.hadoop.hbase.spark.FamilyHFileWriteOptions;
 28 | import org.apache.hadoop.hbase.spark.JavaHBaseContext;
 29 | import org.apache.hadoop.hbase.spark.KeyFamilyQualifier;
 30 | import org.apache.hadoop.hbase.util.Bytes;
 31 | import org.apache.hadoop.hbase.util.Pair;
 32 | import org.apache.spark.SparkConf;
 33 | import org.apache.spark.api.java.JavaRDD;
 34 | import org.apache.spark.api.java.JavaSparkContext;
 35 | import org.apache.spark.api.java.function.Function;
 36 | import org.apache.yetus.audience.InterfaceAudience;
 37 | 
 38 | /**
 39 |  * Run this example using command below: SPARK_HOME/bin/spark-submit --master local[2] --class
 40 |  * org.apache.hadoop.hbase.spark.example.hbasecontext.JavaHBaseBulkLoadExample
 41 |  * path/to/hbase-spark.jar {path/to/output/HFiles} This example will output put hfiles in
 42 |  * {path/to/output/HFiles}, and user can run 'hbase
 43 |  * org.apache.hadoop.hbase.tool.LoadIncrementalHFiles' to load the HFiles into table to verify this
 44 |  * example.
 45 |  */
 46 | @InterfaceAudience.Private
 47 | final public class JavaHBaseBulkLoadExample {
 48 |   private JavaHBaseBulkLoadExample() {
 49 |   }
 50 | 
 51 |   public static void main(String[] args) {
 52 |     if (args.length < 1) {
 53 |       System.out.println("JavaHBaseBulkLoadExample  " + "{outputPath}");
 54 |       return;
 55 |     }
 56 | 
 57 |     String tableName = "bulkload-table-test";
 58 |     String columnFamily1 = "f1";
 59 |     String columnFamily2 = "f2";
 60 | 
 61 |     SparkConf sparkConf = new SparkConf().setAppName("JavaHBaseBulkLoadExample " + tableName);
 62 |     JavaSparkContext jsc = new JavaSparkContext(sparkConf);
 63 | 
 64 |     try {
 65 |       List<String> list = new ArrayList<String>();
 66 |       // row1
 67 |       list.add("1," + columnFamily1 + ",b,1");
 68 |       // row3
 69 |       list.add("3," + columnFamily1 + ",a,2");
 70 |       list.add("3," + columnFamily1 + ",b,1");
 71 |       list.add("3," + columnFamily2 + ",a,1");
 72 |       /* row2 */
 73 |       list.add("2," + columnFamily2 + ",a,3");
 74 |       list.add("2," + columnFamily2 + ",b,3");
 75 | 
 76 |       JavaRDD<String> rdd = jsc.parallelize(list);
 77 | 
 78 |       Configuration conf = HBaseConfiguration.create();
 79 |       JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
 80 | 
 81 |       hbaseContext.bulkLoad(rdd, TableName.valueOf(tableName), new BulkLoadFunction(), args[0],
 82 |         new HashMap<byte[], FamilyHFileWriteOptions>(), false, HConstants.DEFAULT_MAX_FILE_SIZE);
 83 |     } finally {
 84 |       jsc.stop();
 85 |     }
 86 |   }
 87 | 
 88 |   public static class BulkLoadFunction
 89 |     implements Function<String, Pair<KeyFamilyQualifier, byte[]>> {
 90 |     @Override
 91 |     public Pair<KeyFamilyQualifier, byte[]> call(String v1) throws Exception {
 92 |       if (v1 == null) {
 93 |         return null;
 94 |       }
 95 | 
 96 |       String[] strs = v1.split(",");
 97 |       if (strs.length != 4) {
 98 |         return null;
 99 |       }
100 | 
101 |       KeyFamilyQualifier kfq = new KeyFamilyQualifier(Bytes.toBytes(strs[0]),
102 |         Bytes.toBytes(strs[1]), Bytes.toBytes(strs[2]));
103 |       return new Pair(kfq, Bytes.toBytes(strs[3]));
104 |     }
105 |   }
106 | }
107 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseBulkPutExample.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark.example.hbasecontext;
19 | 
20 | import java.util.ArrayList;
21 | import java.util.List;
22 | import org.apache.hadoop.conf.Configuration;
23 | import org.apache.hadoop.hbase.HBaseConfiguration;
24 | import org.apache.hadoop.hbase.TableName;
25 | import org.apache.hadoop.hbase.client.Put;
26 | import org.apache.hadoop.hbase.spark.JavaHBaseContext;
27 | import org.apache.hadoop.hbase.util.Bytes;
28 | import org.apache.spark.SparkConf;
29 | import org.apache.spark.api.java.JavaRDD;
30 | import org.apache.spark.api.java.JavaSparkContext;
31 | import org.apache.spark.api.java.function.Function;
32 | import org.apache.yetus.audience.InterfaceAudience;
33 | 
34 | /**
35 |  * This is a simple example of putting records in HBase with the bulkPut function.
36 |  */
37 | @InterfaceAudience.Private
38 | final public class JavaHBaseBulkPutExample {
39 | 
40 |   private JavaHBaseBulkPutExample() {
41 |   }
42 | 
43 |   public static void main(String[] args) {
44 |     if (args.length < 2) {
45 |       System.out.println("JavaHBaseBulkPutExample  " + "{tableName} {columnFamily}");
46 |       return;
47 |     }
48 | 
49 |     String tableName = args[0];
50 |     String columnFamily = args[1];
51 | 
52 |     SparkConf sparkConf = new SparkConf().setAppName("JavaHBaseBulkPutExample " + tableName);
53 |     JavaSparkContext jsc = new JavaSparkContext(sparkConf);
54 | 
55 |     try {
56 |       List<String> list = new ArrayList<>(5);
57 |       list.add("1," + columnFamily + ",a,1");
58 |       list.add("2," + columnFamily + ",a,2");
59 |       list.add("3," + columnFamily + ",a,3");
60 |       list.add("4," + columnFamily + ",a,4");
61 |       list.add("5," + columnFamily + ",a,5");
62 | 
63 |       JavaRDD<String> rdd = jsc.parallelize(list);
64 | 
65 |       Configuration conf = HBaseConfiguration.create();
66 | 
67 |       JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
68 | 
69 |       hbaseContext.bulkPut(rdd, TableName.valueOf(tableName), new PutFunction());
70 |     } finally {
71 |       jsc.stop();
72 |     }
73 |   }
74 | 
75 |   public static class PutFunction implements Function<String, Put> {
76 | 
77 |     private static final long serialVersionUID = 1L;
78 | 
79 |     public Put call(String v) throws Exception {
80 |       String[] cells = v.split(",");
81 |       Put put = new Put(Bytes.toBytes(cells[0]));
82 | 
83 |       put.addColumn(Bytes.toBytes(cells[1]), Bytes.toBytes(cells[2]), Bytes.toBytes(cells[3]));
84 |       return put;
85 |     }
86 | 
87 |   }
88 | }
89 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseDistributedScan.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark.example.hbasecontext;
19 | 
20 | import java.util.List;
21 | import org.apache.hadoop.conf.Configuration;
22 | import org.apache.hadoop.hbase.HBaseConfiguration;
23 | import org.apache.hadoop.hbase.TableName;
24 | import org.apache.hadoop.hbase.client.Result;
25 | import org.apache.hadoop.hbase.client.Scan;
26 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
27 | import org.apache.hadoop.hbase.spark.JavaHBaseContext;
28 | import org.apache.hadoop.hbase.util.Bytes;
29 | import org.apache.spark.SparkConf;
30 | import org.apache.spark.api.java.JavaRDD;
31 | import org.apache.spark.api.java.JavaSparkContext;
32 | import org.apache.spark.api.java.function.Function;
33 | import org.apache.yetus.audience.InterfaceAudience;
34 | import scala.Tuple2;
35 | 
36 | /**
37 |  * This is a simple example of scanning records from HBase with the hbaseRDD function.
38 |  */
39 | @InterfaceAudience.Private
40 | final public class JavaHBaseDistributedScan {
41 | 
42 |   private JavaHBaseDistributedScan() {
43 |   }
44 | 
45 |   public static void main(String[] args) {
46 |     if (args.length < 1) {
47 |       System.out.println("JavaHBaseDistributedScan {tableName}");
48 |       return;
49 |     }
50 | 
51 |     String tableName = args[0];
52 | 
53 |     SparkConf sparkConf = new SparkConf().setAppName("JavaHBaseDistributedScan " + tableName);
54 |     JavaSparkContext jsc = new JavaSparkContext(sparkConf);
55 | 
56 |     try {
57 |       Configuration conf = HBaseConfiguration.create();
58 | 
59 |       JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
60 | 
61 |       Scan scan = new Scan();
62 |       scan.setCaching(100);
63 | 
64 |       JavaRDD<Tuple2<ImmutableBytesWritable, Result>> javaRdd =
65 |         hbaseContext.hbaseRDD(TableName.valueOf(tableName), scan);
66 | 
67 |       List<String> results = javaRdd.map(new ScanConvertFunction()).collect();
68 | 
69 |       System.out.println("Result Size: " + results.size());
70 |     } finally {
71 |       jsc.stop();
72 |     }
73 |   }
74 | 
75 |   private static class ScanConvertFunction
76 |     implements Function<Tuple2<ImmutableBytesWritable, Result>, String> {
77 |     @Override
78 |     public String call(Tuple2<ImmutableBytesWritable, Result> v1) throws Exception {
79 |       return Bytes.toString(v1._1().copyBytes());
80 |     }
81 |   }
82 | }
83 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseMapGetPutExample.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | package org.apache.hadoop.hbase.spark.example.hbasecontext;
 19 | 
 20 | import java.util.ArrayList;
 21 | import java.util.Iterator;
 22 | import java.util.List;
 23 | import org.apache.hadoop.conf.Configuration;
 24 | import org.apache.hadoop.hbase.HBaseConfiguration;
 25 | import org.apache.hadoop.hbase.TableName;
 26 | import org.apache.hadoop.hbase.client.BufferedMutator;
 27 | import org.apache.hadoop.hbase.client.Connection;
 28 | import org.apache.hadoop.hbase.client.Get;
 29 | import org.apache.hadoop.hbase.client.Put;
 30 | import org.apache.hadoop.hbase.client.Result;
 31 | import org.apache.hadoop.hbase.client.Table;
 32 | import org.apache.hadoop.hbase.spark.JavaHBaseContext;
 33 | import org.apache.hadoop.hbase.util.Bytes;
 34 | import org.apache.spark.SparkConf;
 35 | import org.apache.spark.api.java.JavaRDD;
 36 | import org.apache.spark.api.java.JavaSparkContext;
 37 | import org.apache.spark.api.java.function.Function;
 38 | import org.apache.spark.api.java.function.VoidFunction;
 39 | import org.apache.yetus.audience.InterfaceAudience;
 40 | import scala.Tuple2;
 41 | 
 42 | /**
 43 |  * This is a simple example of using the foreachPartition method with a HBase connection
 44 |  */
 45 | @InterfaceAudience.Private
 46 | final public class JavaHBaseMapGetPutExample {
 47 | 
 48 |   private JavaHBaseMapGetPutExample() {
 49 |   }
 50 | 
 51 |   public static void main(String[] args) {
 52 |     if (args.length < 1) {
 53 |       System.out.println("JavaHBaseBulkGetExample {tableName}");
 54 |       return;
 55 |     }
 56 | 
 57 |     final String tableName = args[0];
 58 | 
 59 |     SparkConf sparkConf = new SparkConf().setAppName("JavaHBaseBulkGetExample " + tableName);
 60 |     JavaSparkContext jsc = new JavaSparkContext(sparkConf);
 61 | 
 62 |     try {
 63 |       List<byte[]> list = new ArrayList<>(5);
 64 |       list.add(Bytes.toBytes("1"));
 65 |       list.add(Bytes.toBytes("2"));
 66 |       list.add(Bytes.toBytes("3"));
 67 |       list.add(Bytes.toBytes("4"));
 68 |       list.add(Bytes.toBytes("5"));
 69 | 
 70 |       JavaRDD<byte[]> rdd = jsc.parallelize(list);
 71 |       Configuration conf = HBaseConfiguration.create();
 72 | 
 73 |       JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
 74 | 
 75 |       hbaseContext.foreachPartition(rdd, new VoidFunction<Tuple2<Iterator<byte[]>, Connection>>() {
 76 |         public void call(Tuple2<Iterator<byte[]>, Connection> t) throws Exception {
 77 |           Table table = t._2().getTable(TableName.valueOf(tableName));
 78 |           BufferedMutator mutator = t._2().getBufferedMutator(TableName.valueOf(tableName));
 79 | 
 80 |           while (t._1().hasNext()) {
 81 |             byte[] b = t._1().next();
 82 |             Result r = table.get(new Get(b));
 83 |             if (r.getExists()) {
 84 |               mutator.mutate(new Put(b));
 85 |             }
 86 |           }
 87 | 
 88 |           mutator.flush();
 89 |           mutator.close();
 90 |           table.close();
 91 |         }
 92 |       });
 93 |     } finally {
 94 |       jsc.stop();
 95 |     }
 96 |   }
 97 | 
 98 |   public static class GetFunction implements Function<byte[], Get> {
 99 |     private static final long serialVersionUID = 1L;
100 | 
101 |     public Get call(byte[] v) throws Exception {
102 |       return new Get(v);
103 |     }
104 |   }
105 | }
106 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseStreamingBulkPutExample.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark.example.hbasecontext;
19 | 
20 | import org.apache.hadoop.conf.Configuration;
21 | import org.apache.hadoop.hbase.HBaseConfiguration;
22 | import org.apache.hadoop.hbase.TableName;
23 | import org.apache.hadoop.hbase.client.Put;
24 | import org.apache.hadoop.hbase.spark.JavaHBaseContext;
25 | import org.apache.hadoop.hbase.util.Bytes;
26 | import org.apache.spark.SparkConf;
27 | import org.apache.spark.api.java.JavaSparkContext;
28 | import org.apache.spark.api.java.function.Function;
29 | import org.apache.spark.streaming.Duration;
30 | import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
31 | import org.apache.spark.streaming.api.java.JavaStreamingContext;
32 | import org.apache.yetus.audience.InterfaceAudience;
33 | 
34 | /**
35 |  * This is a simple example of BulkPut with Spark Streaming
36 |  */
37 | @InterfaceAudience.Private
38 | final public class JavaHBaseStreamingBulkPutExample {
39 | 
40 |   private JavaHBaseStreamingBulkPutExample() {
41 |   }
42 | 
43 |   public static void main(String[] args) {
44 |     if (args.length < 4) {
45 |       System.out.println("JavaHBaseBulkPutExample  " + "{host} {port} {tableName}");
46 |       return;
47 |     }
48 | 
49 |     String host = args[0];
50 |     String port = args[1];
51 |     String tableName = args[2];
52 | 
53 |     SparkConf sparkConf = new SparkConf()
54 |       .setAppName("JavaHBaseStreamingBulkPutExample " + tableName + ":" + port + ":" + tableName);
55 | 
56 |     JavaSparkContext jsc = new JavaSparkContext(sparkConf);
57 | 
58 |     try {
59 |       JavaStreamingContext jssc = new JavaStreamingContext(jsc, new Duration(1000));
60 | 
61 |       JavaReceiverInputDStream<String> javaDstream =
62 |         jssc.socketTextStream(host, Integer.parseInt(port));
63 | 
64 |       Configuration conf = HBaseConfiguration.create();
65 | 
66 |       JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
67 | 
68 |       hbaseContext.streamBulkPut(javaDstream, TableName.valueOf(tableName), new PutFunction());
69 |     } finally {
70 |       jsc.stop();
71 |     }
72 |   }
73 | 
74 |   public static class PutFunction implements Function<String, Put> {
75 | 
76 |     private static final long serialVersionUID = 1L;
77 | 
78 |     public Put call(String v) throws Exception {
79 |       String[] part = v.split(",");
80 |       Put put = new Put(Bytes.toBytes(part[0]));
81 | 
82 |       put.addColumn(Bytes.toBytes(part[1]), Bytes.toBytes(part[2]), Bytes.toBytes(part[3]));
83 |       return put;
84 |     }
85 | 
86 |   }
87 | }
88 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/BulkLoadPartitioner.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark
19 | 
20 | import java.util
21 | import java.util.Comparator
22 | import org.apache.hadoop.hbase.util.Bytes
23 | import org.apache.spark.Partitioner
24 | import org.apache.yetus.audience.InterfaceAudience
25 | 
26 | /**
27 |  * A Partitioner implementation that will separate records to different
28 |  * HBase Regions based on region splits
29 |  *
30 |  * @param startKeys   The start keys for the given table
31 |  */
32 | @InterfaceAudience.Public
33 | class BulkLoadPartitioner(startKeys: Array[Array[Byte]]) extends Partitioner {
34 |   // when table not exist, startKeys = Byte[0][]
35 |   override def numPartitions: Int = if (startKeys.length == 0) 1 else startKeys.length
36 | 
37 |   override def getPartition(key: Any): Int = {
38 | 
39 |     val comparator: Comparator[Array[Byte]] = new Comparator[Array[Byte]] {
40 |       override def compare(o1: Array[Byte], o2: Array[Byte]): Int = {
41 |         Bytes.compareTo(o1, o2)
42 |       }
43 |     }
44 | 
45 |     val rowKey: Array[Byte] =
46 |       key match {
47 |         case qualifier: KeyFamilyQualifier =>
48 |           qualifier.rowKey
49 |         case wrapper: ByteArrayWrapper =>
50 |           wrapper.value
51 |         case _ =>
52 |           key.asInstanceOf[Array[Byte]]
53 |       }
54 |     var partition = util.Arrays.binarySearch(startKeys, rowKey, comparator)
55 |     if (partition < 0)
56 |       partition = partition * -1 + -2
57 |     if (partition < 0)
58 |       partition = 0
59 |     partition
60 |   }
61 | }
62 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/ByteArrayComparable.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark
19 | 
20 | import org.apache.hadoop.hbase.util.Bytes
21 | import org.apache.yetus.audience.InterfaceAudience
22 | 
23 | @InterfaceAudience.Public
24 | class ByteArrayComparable(val bytes: Array[Byte], val offset: Int = 0, var length: Int = -1)
25 |     extends Comparable[ByteArrayComparable] {
26 | 
27 |   if (length == -1) {
28 |     length = bytes.length
29 |   }
30 | 
31 |   override def compareTo(o: ByteArrayComparable): Int = {
32 |     Bytes.compareTo(bytes, offset, length, o.bytes, o.offset, o.length)
33 |   }
34 | 
35 |   override def hashCode(): Int = {
36 |     Bytes.hashCode(bytes, offset, length)
37 |   }
38 | 
39 |   override def equals(obj: Any): Boolean = {
40 |     obj match {
41 |       case b: ByteArrayComparable =>
42 |         Bytes.equals(bytes, offset, length, b.bytes, b.offset, b.length)
43 |       case _ =>
44 |         false
45 |     }
46 |   }
47 | }
48 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/ByteArrayWrapper.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark
19 | 
20 | import java.io.Serializable
21 | import org.apache.hadoop.hbase.util.Bytes
22 | import org.apache.yetus.audience.InterfaceAudience
23 | 
24 | /**
25 |  * This is a wrapper over a byte array so it can work as
26 |  * a key in a hashMap
27 |  *
28 |  * @param value The Byte Array value
29 |  */
30 | @InterfaceAudience.Public
31 | class ByteArrayWrapper(var value: Array[Byte])
32 |     extends Comparable[ByteArrayWrapper]
33 |     with Serializable {
34 |   override def compareTo(valueOther: ByteArrayWrapper): Int = {
35 |     Bytes.compareTo(value, valueOther.value)
36 |   }
37 |   override def equals(o2: Any): Boolean = {
38 |     o2 match {
39 |       case wrapper: ByteArrayWrapper =>
40 |         Bytes.equals(value, wrapper.value)
41 |       case _ =>
42 |         false
43 |     }
44 |   }
45 |   override def hashCode(): Int = {
46 |     Bytes.hashCode(value)
47 |   }
48 | }
49 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/ColumnFamilyQualifierMapKeyWrapper.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark
19 | 
20 | import org.apache.hadoop.hbase.util.Bytes
21 | import org.apache.yetus.audience.InterfaceAudience
22 | 
23 | /**
24 |  * A wrapper class that will allow both columnFamily and qualifier to
25 |  * be the key of a hashMap.  Also allow for finding the value in a hashmap
26 |  * with out cloning the HBase value from the HBase Cell object
27 |  * @param columnFamily       ColumnFamily byte array
28 |  * @param columnFamilyOffSet Offset of columnFamily value in the array
29 |  * @param columnFamilyLength Length of the columnFamily value in the columnFamily array
30 |  * @param qualifier          Qualifier byte array
31 |  * @param qualifierOffSet    Offset of qualifier value in the array
32 |  * @param qualifierLength    Length of the qualifier value with in the array
33 |  */
34 | @InterfaceAudience.Public
35 | class ColumnFamilyQualifierMapKeyWrapper(
36 |     val columnFamily: Array[Byte],
37 |     val columnFamilyOffSet: Int,
38 |     val columnFamilyLength: Int,
39 |     val qualifier: Array[Byte],
40 |     val qualifierOffSet: Int,
41 |     val qualifierLength: Int)
42 |     extends Serializable {
43 | 
44 |   override def equals(other: Any): Boolean = {
45 |     val otherWrapper = other.asInstanceOf[ColumnFamilyQualifierMapKeyWrapper]
46 | 
47 |     Bytes.compareTo(
48 |       columnFamily,
49 |       columnFamilyOffSet,
50 |       columnFamilyLength,
51 |       otherWrapper.columnFamily,
52 |       otherWrapper.columnFamilyOffSet,
53 |       otherWrapper.columnFamilyLength) == 0 && Bytes.compareTo(
54 |       qualifier,
55 |       qualifierOffSet,
56 |       qualifierLength,
57 |       otherWrapper.qualifier,
58 |       otherWrapper.qualifierOffSet,
59 |       otherWrapper.qualifierLength) == 0
60 |   }
61 | 
62 |   override def hashCode(): Int = {
63 |     Bytes.hashCode(columnFamily, columnFamilyOffSet, columnFamilyLength) +
64 |       Bytes.hashCode(qualifier, qualifierOffSet, qualifierLength)
65 |   }
66 | 
67 |   def cloneColumnFamily(): Array[Byte] = {
68 |     val resultArray = new Array[Byte](columnFamilyLength)
69 |     System.arraycopy(columnFamily, columnFamilyOffSet, resultArray, 0, columnFamilyLength)
70 |     resultArray
71 |   }
72 | 
73 |   def cloneQualifier(): Array[Byte] = {
74 |     val resultArray = new Array[Byte](qualifierLength)
75 |     System.arraycopy(qualifier, qualifierOffSet, resultArray, 0, qualifierLength)
76 |     resultArray
77 |   }
78 | }
79 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/FamiliesQualifiersValues.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark
19 | 
20 | import java.util
21 | import org.apache.yetus.audience.InterfaceAudience;
22 | 
23 | /**
24 |  * This object is a clean way to store and sort all cells that will be bulk
25 |  * loaded into a single row
26 |  */
27 | @InterfaceAudience.Public
28 | class FamiliesQualifiersValues extends Serializable {
29 |   // Tree maps are used because we need the results to
30 |   // be sorted when we read them
31 |   val familyMap = new util.TreeMap[ByteArrayWrapper, util.TreeMap[ByteArrayWrapper, Array[Byte]]]()
32 | 
33 |   // normally in a row there are more columns then
34 |   // column families this wrapper is reused for column
35 |   // family look ups
36 |   val reusableWrapper = new ByteArrayWrapper(null)
37 | 
38 |   /**
39 |    * Adds a new cell to an existing row
40 |    * @param family    HBase column family
41 |    * @param qualifier HBase column qualifier
42 |    * @param value     HBase cell value
43 |    */
44 |   def +=(family: Array[Byte], qualifier: Array[Byte], value: Array[Byte]): Unit = {
45 | 
46 |     reusableWrapper.value = family
47 | 
48 |     var qualifierValues = familyMap.get(reusableWrapper)
49 | 
50 |     if (qualifierValues == null) {
51 |       qualifierValues = new util.TreeMap[ByteArrayWrapper, Array[Byte]]()
52 |       familyMap.put(new ByteArrayWrapper(family), qualifierValues)
53 |     }
54 | 
55 |     qualifierValues.put(new ByteArrayWrapper(qualifier), value)
56 |   }
57 | 
58 |   /**
59 |    * A wrapper for "+=" method above, can be used by Java
60 |    * @param family    HBase column family
61 |    * @param qualifier HBase column qualifier
62 |    * @param value     HBase cell value
63 |    */
64 |   def add(family: Array[Byte], qualifier: Array[Byte], value: Array[Byte]): Unit = {
65 |     this += (family, qualifier, value)
66 |   }
67 | }
68 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/FamilyHFileWriteOptions.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark
19 | 
20 | import java.io.Serializable
21 | import org.apache.yetus.audience.InterfaceAudience;
22 | 
23 | /**
24 |  * This object will hold optional data for how a given column family's
25 |  * writer will work
26 |  *
27 |  * @param compression       String to define the Compression to be used in the HFile
28 |  * @param bloomType         String to define the bloom type to be used in the HFile
29 |  * @param blockSize         The block size to be used in the HFile
30 |  * @param dataBlockEncoding String to define the data block encoding to be used
31 |  *                          in the HFile
32 |  */
33 | @InterfaceAudience.Public
34 | class FamilyHFileWriteOptions(
35 |     val compression: String,
36 |     val bloomType: String,
37 |     val blockSize: Int,
38 |     val dataBlockEncoding: String)
39 |     extends Serializable
40 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/KeyFamilyQualifier.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark
19 | 
20 | import java.io.Serializable
21 | import org.apache.hadoop.hbase.util.Bytes
22 | import org.apache.yetus.audience.InterfaceAudience
23 | 
24 | /**
25 |  * This is the key to be used for sorting and shuffling.
26 |  *
27 |  * We will only partition on the rowKey but we will sort on all three
28 |  *
29 |  * @param rowKey    Record RowKey
30 |  * @param family    Record ColumnFamily
31 |  * @param qualifier Cell Qualifier
32 |  */
33 | @InterfaceAudience.Public
34 | class KeyFamilyQualifier(
35 |     val rowKey: Array[Byte],
36 |     val family: Array[Byte],
37 |     val qualifier: Array[Byte])
38 |     extends Comparable[KeyFamilyQualifier]
39 |     with Serializable {
40 |   override def compareTo(o: KeyFamilyQualifier): Int = {
41 |     var result = Bytes.compareTo(rowKey, o.rowKey)
42 |     if (result == 0) {
43 |       result = Bytes.compareTo(family, o.family)
44 |       if (result == 0) result = Bytes.compareTo(qualifier, o.qualifier)
45 |     }
46 |     result
47 |   }
48 |   override def toString: String = {
49 |     Bytes.toString(rowKey) + ":" + Bytes.toString(family) + ":" + Bytes.toString(qualifier)
50 |   }
51 | }
52 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/Logging.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | package org.apache.hadoop.hbase.spark
 19 | 
 20 | import org.apache.yetus.audience.InterfaceAudience
 21 | import org.slf4j.Logger
 22 | import org.slf4j.LoggerFactory
 23 | import org.slf4j.impl.StaticLoggerBinder
 24 | 
 25 | /**
 26 |  * Utility trait for classes that want to log data. Creates a SLF4J logger for the class and allows
 27 |  * logging messages at different levels using methods that only evaluate parameters lazily if the
 28 |  * log level is enabled.
 29 |  * Logging is private in Spark 2.0
 30 |  * This is to isolate incompatibilties across Spark releases.
 31 |  */
 32 | @InterfaceAudience.Private
 33 | trait Logging {
 34 | 
 35 |   // Make the log field transient so that objects with Logging can
 36 |   // be serialized and used on another machine
 37 |   @transient private var log_ : Logger = null
 38 | 
 39 |   // Method to get the logger name for this object
 40 |   protected def logName = {
 41 |     // Ignore trailing $'s in the class names for Scala objects
 42 |     this.getClass.getName.stripSuffix("$")
 43 |   }
 44 | 
 45 |   // Method to get or create the logger for this object
 46 |   protected def log: Logger = {
 47 |     if (log_ == null) {
 48 |       initializeLogIfNecessary(false)
 49 |       log_ = LoggerFactory.getLogger(logName)
 50 |     }
 51 |     log_
 52 |   }
 53 | 
 54 |   // Log methods that take only a String
 55 |   protected def logInfo(msg: => String) {
 56 |     if (log.isInfoEnabled) log.info(msg)
 57 |   }
 58 | 
 59 |   protected def logDebug(msg: => String) {
 60 |     if (log.isDebugEnabled) log.debug(msg)
 61 |   }
 62 | 
 63 |   protected def logTrace(msg: => String) {
 64 |     if (log.isTraceEnabled) log.trace(msg)
 65 |   }
 66 | 
 67 |   protected def logWarning(msg: => String) {
 68 |     if (log.isWarnEnabled) log.warn(msg)
 69 |   }
 70 | 
 71 |   protected def logError(msg: => String) {
 72 |     if (log.isErrorEnabled) log.error(msg)
 73 |   }
 74 | 
 75 |   // Log methods that take Throwables (Exceptions/Errors) too
 76 |   protected def logInfo(msg: => String, throwable: Throwable) {
 77 |     if (log.isInfoEnabled) log.info(msg, throwable)
 78 |   }
 79 | 
 80 |   protected def logDebug(msg: => String, throwable: Throwable) {
 81 |     if (log.isDebugEnabled) log.debug(msg, throwable)
 82 |   }
 83 | 
 84 |   protected def logTrace(msg: => String, throwable: Throwable) {
 85 |     if (log.isTraceEnabled) log.trace(msg, throwable)
 86 |   }
 87 | 
 88 |   protected def logWarning(msg: => String, throwable: Throwable) {
 89 |     if (log.isWarnEnabled) log.warn(msg, throwable)
 90 |   }
 91 | 
 92 |   protected def logError(msg: => String, throwable: Throwable) {
 93 |     if (log.isErrorEnabled) log.error(msg, throwable)
 94 |   }
 95 | 
 96 |   protected def initializeLogIfNecessary(isInterpreter: Boolean): Unit = {
 97 |     if (!Logging.initialized) {
 98 |       Logging.initLock.synchronized {
 99 |         if (!Logging.initialized) {
100 |           initializeLogging(isInterpreter)
101 |         }
102 |       }
103 |     }
104 |   }
105 | 
106 |   private def initializeLogging(isInterpreter: Boolean): Unit = {
107 |     // Don't use a logger in here, as this is itself occurring during initialization of a logger
108 |     // If Log4j 1.2 is being used, but is not initialized, load a default properties file
109 |     val binderClass = StaticLoggerBinder.getSingleton.getLoggerFactoryClassStr
110 |     Logging.initialized = true
111 | 
112 |     // Force a call into slf4j to initialize it. Avoids this happening from multiple threads
113 |     // and triggering this: http://mailman.qos.ch/pipermail/slf4j-dev/2010-April/002956.html
114 |     log
115 |   }
116 | }
117 | 
118 | private object Logging {
119 |   @volatile private var initialized = false
120 |   val initLock = new Object()
121 | }
122 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/NewHBaseRDD.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark
19 | 
20 | import org.apache.hadoop.conf.Configuration
21 | import org.apache.hadoop.mapreduce.InputFormat
22 | import org.apache.spark.{InterruptibleIterator, Partition, SparkContext, TaskContext}
23 | import org.apache.spark.rdd.NewHadoopRDD
24 | import org.apache.yetus.audience.InterfaceAudience
25 | 
26 | @InterfaceAudience.Public
27 | class NewHBaseRDD[K, V](
28 |     @transient val sc: SparkContext,
29 |     @transient val inputFormatClass: Class[_ <: InputFormat[K, V]],
30 |     @transient val keyClass: Class[K],
31 |     @transient val valueClass: Class[V],
32 |     @transient private val __conf: Configuration,
33 |     val hBaseContext: HBaseContext)
34 |     extends NewHadoopRDD(sc, inputFormatClass, keyClass, valueClass, __conf) {
35 | 
36 |   override def compute(theSplit: Partition, context: TaskContext): InterruptibleIterator[(K, V)] = {
37 |     super.compute(theSplit, context)
38 |   }
39 | }
40 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/Bound.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | package org.apache.hadoop.hbase.spark.datasources
 19 | 
 20 | import org.apache.hadoop.hbase.spark.hbase._
 21 | import org.apache.yetus.audience.InterfaceAudience
 22 | 
 23 | /**
 24 |  * The Bound represent the boudary for the scan
 25 |  *
 26 |  * @param b The byte array of the bound
 27 |  * @param inc inclusive or not.
 28 |  */
 29 | @InterfaceAudience.Private
 30 | case class Bound(b: Array[Byte], inc: Boolean)
 31 | // The non-overlapping ranges we need to scan, if lower is equal to upper, it is a get request
 32 | 
 33 | @InterfaceAudience.Private
 34 | case class Range(lower: Option[Bound], upper: Option[Bound])
 35 | 
 36 | @InterfaceAudience.Private
 37 | object Range {
 38 |   def apply(region: HBaseRegion): Range = {
 39 |     Range(
 40 |       region.start.map(Bound(_, true)),
 41 |       if (region.end.get.size == 0) {
 42 |         None
 43 |       } else {
 44 |         region.end.map((Bound(_, false)))
 45 |       })
 46 |   }
 47 | }
 48 | 
 49 | @InterfaceAudience.Private
 50 | object Ranges {
 51 |   // We assume that
 52 |   // 1. r.lower.inc is true, and r.upper.inc is false
 53 |   // 2. for each range in rs, its upper.inc is false
 54 |   def and(r: Range, rs: Seq[Range]): Seq[Range] = {
 55 |     rs.flatMap {
 56 |       s =>
 57 |         val lower = s.lower
 58 |           .map {
 59 |             x =>
 60 |               // the scan has lower bound
 61 |               r.lower
 62 |                 .map {
 63 |                   y =>
 64 |                     // the region has lower bound
 65 |                     if (ord.compare(x.b, y.b) < 0) {
 66 |                       // scan lower bound is smaller than region server lower bound
 67 |                       Some(y)
 68 |                     } else {
 69 |                       // scan low bound is greater or equal to region server lower bound
 70 |                       Some(x)
 71 |                     }
 72 |                 }
 73 |                 .getOrElse(Some(x))
 74 |           }
 75 |           .getOrElse(r.lower)
 76 | 
 77 |         val upper = s.upper
 78 |           .map {
 79 |             x =>
 80 |               // the scan has upper bound
 81 |               r.upper
 82 |                 .map {
 83 |                   y =>
 84 |                     // the region has upper bound
 85 |                     if (ord.compare(x.b, y.b) >= 0) {
 86 |                       // scan upper bound is larger than server upper bound
 87 |                       // but region server scan stop is exclusive. It is OK here.
 88 |                       Some(y)
 89 |                     } else {
 90 |                       // scan upper bound is less or equal to region server upper bound
 91 |                       Some(x)
 92 |                     }
 93 |                 }
 94 |                 .getOrElse(Some(x))
 95 |           }
 96 |           .getOrElse(r.upper)
 97 | 
 98 |         val c = lower
 99 |           .map {
100 |             case x =>
101 |               upper
102 |                 .map {
103 |                   case y =>
104 |                     ord.compare(x.b, y.b)
105 |                 }
106 |                 .getOrElse(-1)
107 |           }
108 |           .getOrElse(-1)
109 |         if (c < 0) {
110 |           Some(Range(lower, upper))
111 |         } else {
112 |           None
113 |         }
114 |     }.seq
115 |   }
116 | }
117 | 
118 | @InterfaceAudience.Private
119 | object Points {
120 |   def and(r: Range, ps: Seq[Array[Byte]]): Seq[Array[Byte]] = {
121 |     ps.flatMap {
122 |       p =>
123 |         if (ord.compare(r.lower.get.b, p) <= 0) {
124 |           // if region lower bound is less or equal to the point
125 |           if (r.upper.isDefined) {
126 |             // if region upper bound is defined
127 |             if (ord.compare(r.upper.get.b, p) > 0) {
128 |               // if the upper bound is greater than the point (because upper bound is exclusive)
129 |               Some(p)
130 |             } else {
131 |               None
132 |             }
133 |           } else {
134 |             // if the region upper bound is not defined (infinity)
135 |             Some(p)
136 |           }
137 |         } else {
138 |           None
139 |         }
140 |     }
141 |   }
142 | }
143 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/DataTypeParserWrapper.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark.datasources
19 | 
20 | import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
21 | import org.apache.spark.sql.types.DataType
22 | import org.apache.yetus.audience.InterfaceAudience
23 | 
24 | @InterfaceAudience.Private
25 | trait DataTypeParser {
26 |   def parse(dataTypeString: String): DataType
27 | }
28 | 
29 | @InterfaceAudience.Private
30 | object DataTypeParserWrapper extends DataTypeParser {
31 |   def parse(dataTypeString: String): DataType = CatalystSqlParser.parseDataType(dataTypeString)
32 | }
33 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/HBaseResources.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | package org.apache.hadoop.hbase.spark.datasources
 19 | 
 20 | import org.apache.hadoop.hbase.TableName
 21 | import org.apache.hadoop.hbase.client._
 22 | import org.apache.hadoop.hbase.spark.{HBaseConnectionCache, HBaseConnectionKey, HBaseRelation, SmartConnection}
 23 | import org.apache.yetus.audience.InterfaceAudience
 24 | import scala.language.implicitConversions
 25 | 
 26 | // Resource and ReferencedResources are defined for extensibility,
 27 | // e.g., consolidate scan and bulkGet in the future work.
 28 | 
 29 | // User has to invoke release explicitly to release the resource,
 30 | // and potentially parent resources
 31 | @InterfaceAudience.Private
 32 | trait Resource {
 33 |   def release(): Unit
 34 | }
 35 | 
 36 | @InterfaceAudience.Private
 37 | case class ScanResource(tbr: TableResource, rs: ResultScanner) extends Resource {
 38 |   def release() {
 39 |     rs.close()
 40 |     tbr.release()
 41 |   }
 42 | }
 43 | 
 44 | @InterfaceAudience.Private
 45 | case class GetResource(tbr: TableResource, rs: Array[Result]) extends Resource {
 46 |   def release() {
 47 |     tbr.release()
 48 |   }
 49 | }
 50 | 
 51 | @InterfaceAudience.Private
 52 | trait ReferencedResource {
 53 |   var count: Int = 0
 54 |   def init(): Unit
 55 |   def destroy(): Unit
 56 |   def acquire() = synchronized {
 57 |     try {
 58 |       count += 1
 59 |       if (count == 1) {
 60 |         init()
 61 |       }
 62 |     } catch {
 63 |       case e: Throwable =>
 64 |         release()
 65 |         throw e
 66 |     }
 67 |   }
 68 | 
 69 |   def release() = synchronized {
 70 |     count -= 1
 71 |     if (count == 0) {
 72 |       destroy()
 73 |     }
 74 |   }
 75 | 
 76 |   def releaseOnException[T](func: => T): T = {
 77 |     acquire()
 78 |     val ret = {
 79 |       try {
 80 |         func
 81 |       } catch {
 82 |         case e: Throwable =>
 83 |           release()
 84 |           throw e
 85 |       }
 86 |     }
 87 |     ret
 88 |   }
 89 | }
 90 | 
 91 | @InterfaceAudience.Private
 92 | case class TableResource(relation: HBaseRelation) extends ReferencedResource {
 93 |   var connection: SmartConnection = _
 94 |   var table: Table = _
 95 | 
 96 |   override def init(): Unit = {
 97 |     connection = HBaseConnectionCache.getConnection(relation.hbaseConf)
 98 |     table = connection.getTable(TableName.valueOf(relation.tableName))
 99 |   }
100 | 
101 |   override def destroy(): Unit = {
102 |     if (table != null) {
103 |       table.close()
104 |       table = null
105 |     }
106 |     if (connection != null) {
107 |       connection.close()
108 |       connection = null
109 |     }
110 |   }
111 | 
112 |   def getScanner(scan: Scan): ScanResource = releaseOnException {
113 |     ScanResource(this, table.getScanner(scan))
114 |   }
115 | 
116 |   def get(list: java.util.List[org.apache.hadoop.hbase.client.Get]) = releaseOnException {
117 |     GetResource(this, table.get(list))
118 |   }
119 | }
120 | 
121 | @InterfaceAudience.Private
122 | case class RegionResource(relation: HBaseRelation) extends ReferencedResource {
123 |   var connection: SmartConnection = _
124 |   var rl: RegionLocator = _
125 |   val regions = releaseOnException {
126 |     val keys = rl.getStartEndKeys
127 |     keys.getFirst
128 |       .zip(keys.getSecond)
129 |       .zipWithIndex
130 |       .map(
131 |         x =>
132 |           HBaseRegion(
133 |             x._2,
134 |             Some(x._1._1),
135 |             Some(x._1._2),
136 |             Some(rl.getRegionLocation(x._1._1).getHostname)))
137 |   }
138 | 
139 |   override def init(): Unit = {
140 |     connection = HBaseConnectionCache.getConnection(relation.hbaseConf)
141 |     rl = connection.getRegionLocator(TableName.valueOf(relation.tableName))
142 |   }
143 | 
144 |   override def destroy(): Unit = {
145 |     if (rl != null) {
146 |       rl.close()
147 |       rl = null
148 |     }
149 |     if (connection != null) {
150 |       connection.close()
151 |       connection = null
152 |     }
153 |   }
154 | }
155 | 
156 | @InterfaceAudience.Private
157 | object HBaseResources {
158 |   implicit def ScanResToScan(sr: ScanResource): ResultScanner = {
159 |     sr.rs
160 |   }
161 | 
162 |   implicit def GetResToResult(gr: GetResource): Array[Result] = {
163 |     gr.rs
164 |   }
165 | 
166 |   implicit def TableResToTable(tr: TableResource): Table = {
167 |     tr.table
168 |   }
169 | 
170 |   implicit def RegionResToRegions(rr: RegionResource): Seq[HBaseRegion] = {
171 |     rr.regions
172 |   }
173 | }
174 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/HBaseSparkConf.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark.datasources
19 | 
20 | import org.apache.yetus.audience.InterfaceAudience;
21 | 
22 | /**
23 |  * This is the hbase configuration. User can either set them in SparkConf, which
24 |  * will take effect globally, or configure it per table, which will overwrite the value
25 |  * set in SparkConf. If not set, the default value will take effect.
26 |  */
27 | @InterfaceAudience.Public
28 | object HBaseSparkConf {
29 | 
30 |   /**
31 |    * Set to false to disable server-side caching of blocks for this scan,
32 |    *  false by default, since full table scans generate too much BC churn.
33 |    */
34 |   val QUERY_CACHEBLOCKS = "hbase.spark.query.cacheblocks"
35 |   val DEFAULT_QUERY_CACHEBLOCKS = false
36 | 
37 |   /** The number of rows for caching that will be passed to scan. */
38 |   val QUERY_CACHEDROWS = "hbase.spark.query.cachedrows"
39 | 
40 |   /** Set the maximum number of values to return for each call to next() in scan. */
41 |   val QUERY_BATCHSIZE = "hbase.spark.query.batchsize"
42 | 
43 |   /** The number of BulkGets send to HBase. */
44 |   val BULKGET_SIZE = "hbase.spark.bulkget.size"
45 |   val DEFAULT_BULKGET_SIZE = 1000
46 | 
47 |   /** Set to specify the location of hbase configuration file. */
48 |   val HBASE_CONFIG_LOCATION = "hbase.spark.config.location"
49 | 
50 |   /** Set to specify whether create or use latest cached HBaseContext */
51 |   val USE_HBASECONTEXT = "hbase.spark.use.hbasecontext"
52 |   val DEFAULT_USE_HBASECONTEXT = true
53 | 
54 |   /** Pushdown the filter to data source engine to increase the performance of queries. */
55 |   val PUSHDOWN_COLUMN_FILTER = "hbase.spark.pushdown.columnfilter"
56 |   val DEFAULT_PUSHDOWN_COLUMN_FILTER = true
57 | 
58 |   /** Class name of the encoder, which encode data types from Spark to HBase bytes. */
59 |   val QUERY_ENCODER = "hbase.spark.query.encoder"
60 |   val DEFAULT_QUERY_ENCODER = classOf[NaiveEncoder].getCanonicalName
61 | 
62 |   /** The timestamp used to filter columns with a specific timestamp. */
63 |   val TIMESTAMP = "hbase.spark.query.timestamp"
64 | 
65 |   /** The starting timestamp used to filter columns with a specific range of versions. */
66 |   val TIMERANGE_START = "hbase.spark.query.timerange.start"
67 | 
68 |   /** The ending timestamp used to filter columns with a specific range of versions. */
69 |   val TIMERANGE_END = "hbase.spark.query.timerange.end"
70 | 
71 |   /** The maximum number of version to return. */
72 |   val MAX_VERSIONS = "hbase.spark.query.maxVersions"
73 | 
74 |   /** Delayed time to close hbase-spark connection when no reference to this connection, in milliseconds. */
75 |   val DEFAULT_CONNECTION_CLOSE_DELAY = 10 * 60 * 1000
76 | }
77 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/JavaBytesEncoder.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | package org.apache.hadoop.hbase.spark.datasources
 19 | 
 20 | import org.apache.hadoop.hbase.HBaseInterfaceAudience
 21 | import org.apache.hadoop.hbase.spark.Logging
 22 | import org.apache.hadoop.hbase.spark.datasources.JavaBytesEncoder.JavaBytesEncoder
 23 | import org.apache.spark.sql.types._
 24 | import org.apache.yetus.audience.InterfaceAudience
 25 | import org.apache.yetus.audience.InterfaceStability
 26 | 
 27 | /**
 28 |  * The ranges for the data type whose size is known. Whether the bound is inclusive
 29 |  * or exclusive is undefind, and upper to the caller to decide.
 30 |  *
 31 |  * @param low: the lower bound of the range.
 32 |  * @param upper: the upper bound of the range.
 33 |  */
 34 | @InterfaceAudience.LimitedPrivate(Array(HBaseInterfaceAudience.SPARK))
 35 | @InterfaceStability.Evolving
 36 | case class BoundRange(low: Array[Byte], upper: Array[Byte])
 37 | 
 38 | /**
 39 |  * The class identifies the ranges for a java primitive type. The caller needs
 40 |  * to decide the bound is either inclusive or exclusive on its own.
 41 |  * information
 42 |  *
 43 |  * @param less: the set of ranges for LessThan/LessOrEqualThan
 44 |  * @param greater: the set of ranges for GreaterThan/GreaterThanOrEqualTo
 45 |  * @param value: the byte array of the original value
 46 |  */
 47 | @InterfaceAudience.LimitedPrivate(Array(HBaseInterfaceAudience.SPARK))
 48 | @InterfaceStability.Evolving
 49 | case class BoundRanges(less: Array[BoundRange], greater: Array[BoundRange], value: Array[Byte])
 50 | 
 51 | /**
 52 |  * The trait to support plugin architecture for different encoder/decoder.
 53 |  * encode is used for serializing the data type to byte array and the filter is
 54 |  * used to filter out the unnecessary records.
 55 |  */
 56 | @InterfaceAudience.LimitedPrivate(Array(HBaseInterfaceAudience.SPARK))
 57 | @InterfaceStability.Evolving
 58 | trait BytesEncoder {
 59 |   def encode(dt: DataType, value: Any): Array[Byte]
 60 | 
 61 |   /**
 62 |    * The function performing real filtering operations. The format of filterBytes depends on the
 63 |    * implementation of the BytesEncoder.
 64 |    *
 65 |    * @param input: the current input byte array that needs to be filtered out
 66 |    * @param offset1: the starting offset of the input byte array.
 67 |    * @param length1: the length of the input byte array.
 68 |    * @param filterBytes: the byte array provided by query condition.
 69 |    * @param offset2: the starting offset in the filterBytes.
 70 |    * @param length2: the length of the bytes in the filterBytes
 71 |    * @param ops: The operation of the filter operator.
 72 |    * @return true: the record satisfies the predicates
 73 |    *         false: the record does not satisfy the predicates.
 74 |    */
 75 |   def filter(
 76 |       input: Array[Byte],
 77 |       offset1: Int,
 78 |       length1: Int,
 79 |       filterBytes: Array[Byte],
 80 |       offset2: Int,
 81 |       length2: Int,
 82 |       ops: JavaBytesEncoder): Boolean
 83 | 
 84 |   /**
 85 |    * Currently, it is used for partition pruning.
 86 |    * As for some codec, the order may be inconsistent between java primitive
 87 |    * type and its byte array. We may have to  split the predicates on some
 88 |    * of the java primitive type into multiple predicates.
 89 |    *
 90 |    * For example in naive codec,  some of the java primitive types have to be
 91 |    * split into multiple predicates, and union these predicates together to
 92 |    * make the predicates be performed correctly.
 93 |    * For example, if we have "COLUMN < 2", we will transform it into
 94 |    * "0 <= COLUMN < 2 OR Integer.MIN_VALUE <= COLUMN <= -1"
 95 |    */
 96 |   def ranges(in: Any): Option[BoundRanges]
 97 | }
 98 | 
 99 | @InterfaceAudience.LimitedPrivate(Array(HBaseInterfaceAudience.SPARK))
100 | @InterfaceStability.Evolving
101 | object JavaBytesEncoder extends Enumeration with Logging {
102 |   type JavaBytesEncoder = Value
103 |   val Greater, GreaterEqual, Less, LessEqual, Equal, Unknown = Value
104 | 
105 |   /**
106 |    * create the encoder/decoder
107 |    *
108 |    * @param clsName: the class name of the encoder/decoder class
109 |    * @return the instance of the encoder plugin.
110 |    */
111 |   def create(clsName: String): BytesEncoder = {
112 |     try {
113 |       Class.forName(clsName).newInstance.asInstanceOf[BytesEncoder]
114 |     } catch {
115 |       case _: Throwable =>
116 |         logWarning(s"$clsName cannot be initiated, falling back to naive encoder")
117 |         new NaiveEncoder()
118 |     }
119 |   }
120 | }
121 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/SerDes.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark.datasources
19 | 
20 | import org.apache.hadoop.hbase.util.Bytes
21 | import org.apache.yetus.audience.InterfaceAudience
22 | 
23 | // TODO: This is not really used in code.
24 | @InterfaceAudience.Public
25 | trait SerDes {
26 |   def serialize(value: Any): Array[Byte]
27 |   def deserialize(bytes: Array[Byte], start: Int, end: Int): Any
28 | }
29 | 
30 | // TODO: This is not really used in code.
31 | @InterfaceAudience.Private
32 | class DoubleSerDes extends SerDes {
33 |   override def serialize(value: Any): Array[Byte] = Bytes.toBytes(value.asInstanceOf[Double])
34 |   override def deserialize(bytes: Array[Byte], start: Int, end: Int): Any = {
35 |     Bytes.toDouble(bytes, start)
36 |   }
37 | }
38 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/SerializableConfiguration.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark.datasources
19 | 
20 | import java.io.{IOException, ObjectInputStream, ObjectOutputStream}
21 | import org.apache.hadoop.conf.Configuration
22 | import org.apache.yetus.audience.InterfaceAudience
23 | import scala.util.control.NonFatal
24 | 
25 | @InterfaceAudience.Private
26 | class SerializableConfiguration(@transient var value: Configuration) extends Serializable {
27 |   private def writeObject(out: ObjectOutputStream): Unit = tryOrIOException {
28 |     out.defaultWriteObject()
29 |     value.write(out)
30 |   }
31 | 
32 |   private def readObject(in: ObjectInputStream): Unit = tryOrIOException {
33 |     value = new Configuration(false)
34 |     value.readFields(in)
35 |   }
36 | 
37 |   def tryOrIOException(block: => Unit) {
38 |     try {
39 |       block
40 |     } catch {
41 |       case e: IOException => throw e
42 |       case NonFatal(t) => throw new IOException(t)
43 |     }
44 |   }
45 | }
46 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/Utils.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | package org.apache.hadoop.hbase.spark.datasources
 19 | 
 20 | import java.sql.{Date, Timestamp}
 21 | import org.apache.hadoop.hbase.spark.AvroSerdes
 22 | import org.apache.hadoop.hbase.util.Bytes
 23 | import org.apache.spark.sql.types._
 24 | import org.apache.spark.unsafe.types.UTF8String
 25 | import org.apache.yetus.audience.InterfaceAudience;
 26 | 
 27 | @InterfaceAudience.Private
 28 | object Utils {
 29 | 
 30 |   /**
 31 |    * Parses the hbase field to it's corresponding
 32 |    * scala type which can then be put into a Spark GenericRow
 33 |    * which is then automatically converted by Spark.
 34 |    */
 35 |   def hbaseFieldToScalaType(f: Field, src: Array[Byte], offset: Int, length: Int): Any = {
 36 |     if (f.exeSchema.isDefined) {
 37 |       // If we have avro schema defined, use it to get record, and then convert them to catalyst data type
 38 |       val m = AvroSerdes.deserialize(src, f.exeSchema.get)
 39 |       val n = f.avroToCatalyst.map(_(m))
 40 |       n.get
 41 |     } else {
 42 |       // Fall back to atomic type
 43 |       f.dt match {
 44 |         case BooleanType => src(offset) != 0
 45 |         case ByteType => src(offset)
 46 |         case ShortType => Bytes.toShort(src, offset)
 47 |         case IntegerType => Bytes.toInt(src, offset)
 48 |         case LongType => Bytes.toLong(src, offset)
 49 |         case FloatType => Bytes.toFloat(src, offset)
 50 |         case DoubleType => Bytes.toDouble(src, offset)
 51 |         case DateType => new Date(Bytes.toLong(src, offset))
 52 |         case TimestampType => new Timestamp(Bytes.toLong(src, offset))
 53 |         case StringType => Bytes.toString(src, offset, length)
 54 |         case BinaryType =>
 55 |           val newArray = new Array[Byte](length)
 56 |           System.arraycopy(src, offset, newArray, 0, length)
 57 |           newArray
 58 |         case _: DecimalType => Bytes.toBigDecimal(src, offset, length)
 59 |         // TODO: SparkSqlSerializer.deserialize[Any](src)
 60 |         case _ => throw new Exception(s"unsupported data type ${f.dt}")
 61 |       }
 62 |     }
 63 |   }
 64 | 
 65 |   // convert input to data type
 66 |   def toBytes(input: Any, field: Field): Array[Byte] = {
 67 |     if (field.schema.isDefined) {
 68 |       // Here we assume the top level type is structType
 69 |       val record = field.catalystToAvro(input)
 70 |       AvroSerdes.serialize(record, field.schema.get)
 71 |     } else {
 72 |       field.dt match {
 73 |         case BooleanType => Bytes.toBytes(input.asInstanceOf[Boolean])
 74 |         case ByteType => Array(input.asInstanceOf[Number].byteValue)
 75 |         case ShortType => Bytes.toBytes(input.asInstanceOf[Number].shortValue)
 76 |         case IntegerType => Bytes.toBytes(input.asInstanceOf[Number].intValue)
 77 |         case LongType => Bytes.toBytes(input.asInstanceOf[Number].longValue)
 78 |         case FloatType => Bytes.toBytes(input.asInstanceOf[Number].floatValue)
 79 |         case DoubleType => Bytes.toBytes(input.asInstanceOf[Number].doubleValue)
 80 |         case DateType | TimestampType => Bytes.toBytes(input.asInstanceOf[java.util.Date].getTime)
 81 |         case StringType => Bytes.toBytes(input.toString)
 82 |         case BinaryType => input.asInstanceOf[Array[Byte]]
 83 |         case _: DecimalType => Bytes.toBytes(input.asInstanceOf[java.math.BigDecimal])
 84 |         case _ => throw new Exception(s"unsupported data type ${field.dt}")
 85 |       }
 86 |     }
 87 |   }
 88 | 
 89 |   // increment Byte array's value by 1
 90 |   def incrementByteArray(array: Array[Byte]): Array[Byte] = {
 91 |     if (array.length == 0) {
 92 |       return null
 93 |     }
 94 |     var index = -1 // index of the byte we have to increment
 95 |     var a = array.length - 1
 96 | 
 97 |     while (a >= 0) {
 98 |       if (array(a) != (-1).toByte) {
 99 |         index = a
100 |         a = -1 // break from the loop because we found a non -1 element
101 |       }
102 |       a = a - 1
103 |     }
104 | 
105 |     if (index < 0) {
106 |       return null
107 |     }
108 |     val returnArray = new Array[Byte](array.length)
109 | 
110 |     for (a <- 0 until index) {
111 |       returnArray(a) = array(a)
112 |     }
113 |     returnArray(index) = (array(index) + 1).toByte
114 |     for (a <- index + 1 until array.length) {
115 |       returnArray(a) = 0.toByte
116 |     }
117 | 
118 |     returnArray
119 |   }
120 | }
121 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/package.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark
19 | 
20 | import org.apache.hadoop.hbase.util.Bytes
21 | import scala.math.Ordering
22 | 
23 | // TODO: add @InterfaceAudience.Private if https://issues.scala-lang.org/browse/SI-3600 is resolved
24 | package object hbase {
25 |   type HBaseType = Array[Byte]
26 |   def bytesMin = new Array[Byte](0)
27 |   def bytesMax = null
28 |   val ByteMax = -1.asInstanceOf[Byte]
29 |   val ByteMin = 0.asInstanceOf[Byte]
30 |   val ord: Ordering[HBaseType] = new Ordering[HBaseType] {
31 |     def compare(x: Array[Byte], y: Array[Byte]): Int = {
32 |       return Bytes.compareTo(x, y)
33 |     }
34 |   }
35 |   // Do not use BinaryType.ordering
36 |   implicit val order: Ordering[HBaseType] = ord
37 | 
38 | }
39 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/datasources/HBaseSource.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | package org.apache.hadoop.hbase.spark.example.datasources
 19 | 
 20 | import org.apache.hadoop.hbase.spark.datasources.HBaseTableCatalog
 21 | import org.apache.spark.SparkConf
 22 | import org.apache.spark.SparkContext
 23 | import org.apache.spark.sql.DataFrame
 24 | import org.apache.spark.sql.SQLContext
 25 | import org.apache.yetus.audience.InterfaceAudience
 26 | 
 27 | @InterfaceAudience.Private
 28 | case class HBaseRecord(
 29 |     col0: String,
 30 |     col1: Boolean,
 31 |     col2: Double,
 32 |     col3: Float,
 33 |     col4: Int,
 34 |     col5: Long,
 35 |     col6: Short,
 36 |     col7: String,
 37 |     col8: Byte)
 38 | 
 39 | @InterfaceAudience.Private
 40 | object HBaseRecord {
 41 |   def apply(i: Int): HBaseRecord = {
 42 |     val s = s"""row${"%03d".format(i)}"""
 43 |     HBaseRecord(
 44 |       s,
 45 |       i % 2 == 0,
 46 |       i.toDouble,
 47 |       i.toFloat,
 48 |       i,
 49 |       i.toLong,
 50 |       i.toShort,
 51 |       s"String$i extra",
 52 |       i.toByte)
 53 |   }
 54 | }
 55 | 
 56 | @InterfaceAudience.Private
 57 | object HBaseSource {
 58 |   val cat = s"""{
 59 |                 |"table":{"namespace":"default", "name":"HBaseSourceExampleTable"},
 60 |                 |"rowkey":"key",
 61 |                 |"columns":{
 62 |                 |"col0":{"cf":"rowkey", "col":"key", "type":"string"},
 63 |                 |"col1":{"cf":"cf1", "col":"col1", "type":"boolean"},
 64 |                 |"col2":{"cf":"cf2", "col":"col2", "type":"double"},
 65 |                 |"col3":{"cf":"cf3", "col":"col3", "type":"float"},
 66 |                 |"col4":{"cf":"cf4", "col":"col4", "type":"int"},
 67 |                 |"col5":{"cf":"cf5", "col":"col5", "type":"bigint"},
 68 |                 |"col6":{"cf":"cf6", "col":"col6", "type":"smallint"},
 69 |                 |"col7":{"cf":"cf7", "col":"col7", "type":"string"},
 70 |                 |"col8":{"cf":"cf8", "col":"col8", "type":"tinyint"}
 71 |                 |}
 72 |                 |}""".stripMargin
 73 | 
 74 |   def main(args: Array[String]) {
 75 |     val sparkConf = new SparkConf().setAppName("HBaseSourceExample")
 76 |     val sc = new SparkContext(sparkConf)
 77 |     val sqlContext = new SQLContext(sc)
 78 | 
 79 |     import sqlContext.implicits._
 80 | 
 81 |     def withCatalog(cat: String): DataFrame = {
 82 |       sqlContext.read
 83 |         .options(Map(HBaseTableCatalog.tableCatalog -> cat))
 84 |         .format("org.apache.hadoop.hbase.spark")
 85 |         .load()
 86 |     }
 87 | 
 88 |     val data = (0 to 255).map { i => HBaseRecord(i) }
 89 | 
 90 |     sc.parallelize(data)
 91 |       .toDF
 92 |       .write
 93 |       .options(Map(HBaseTableCatalog.tableCatalog -> cat, HBaseTableCatalog.newTable -> "5"))
 94 |       .format("org.apache.hadoop.hbase.spark")
 95 |       .save()
 96 | 
 97 |     val df = withCatalog(cat)
 98 |     df.show()
 99 |     df.filter($"col0" <= "row005")
100 |       .select($"col0", $"col1")
101 |       .show
102 |     df.filter($"col0" === "row005" || $"col0" <= "row005")
103 |       .select($"col0", $"col1")
104 |       .show
105 |     df.filter($"col0" > "row250")
106 |       .select($"col0", $"col1")
107 |       .show
108 |     df.registerTempTable("table1")
109 |     val c = sqlContext.sql("select count(col1) from table1 where col0 < 'row050'")
110 |     c.show()
111 |   }
112 | }
113 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkDeleteExample.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark.example.hbasecontext
19 | 
20 | import org.apache.hadoop.hbase.HBaseConfiguration
21 | import org.apache.hadoop.hbase.TableName
22 | import org.apache.hadoop.hbase.client.Delete
23 | import org.apache.hadoop.hbase.spark.HBaseContext
24 | import org.apache.hadoop.hbase.util.Bytes
25 | import org.apache.spark.SparkConf
26 | import org.apache.spark.SparkContext
27 | import org.apache.yetus.audience.InterfaceAudience
28 | 
29 | /**
30 |  * This is a simple example of deleting records in HBase
31 |  * with the bulkDelete function.
32 |  */
33 | @InterfaceAudience.Private
34 | object HBaseBulkDeleteExample {
35 |   def main(args: Array[String]) {
36 |     if (args.length < 1) {
37 |       println("HBaseBulkDeleteExample {tableName} missing an argument")
38 |       return
39 |     }
40 | 
41 |     val tableName = args(0)
42 | 
43 |     val sparkConf = new SparkConf().setAppName("HBaseBulkDeleteExample " + tableName)
44 |     val sc = new SparkContext(sparkConf)
45 |     try {
46 |       // [Array[Byte]]
47 |       val rdd = sc.parallelize(
48 |         Array(
49 |           Bytes.toBytes("1"),
50 |           Bytes.toBytes("2"),
51 |           Bytes.toBytes("3"),
52 |           Bytes.toBytes("4"),
53 |           Bytes.toBytes("5")))
54 | 
55 |       val conf = HBaseConfiguration.create()
56 | 
57 |       val hbaseContext = new HBaseContext(sc, conf)
58 |       hbaseContext.bulkDelete[Array[Byte]](
59 |         rdd,
60 |         TableName.valueOf(tableName),
61 |         putRecord => new Delete(putRecord),
62 |         4)
63 |     } finally {
64 |       sc.stop()
65 |     }
66 |   }
67 | }
68 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkGetExample.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | package org.apache.hadoop.hbase.spark.example.hbasecontext
 19 | 
 20 | import org.apache.hadoop.hbase.CellUtil
 21 | import org.apache.hadoop.hbase.HBaseConfiguration
 22 | import org.apache.hadoop.hbase.TableName
 23 | import org.apache.hadoop.hbase.client.Get
 24 | import org.apache.hadoop.hbase.client.Result
 25 | import org.apache.hadoop.hbase.spark.HBaseContext
 26 | import org.apache.hadoop.hbase.util.Bytes
 27 | import org.apache.spark.SparkConf
 28 | import org.apache.spark.SparkContext
 29 | import org.apache.yetus.audience.InterfaceAudience
 30 | 
 31 | /**
 32 |  * This is a simple example of getting records from HBase
 33 |  * with the bulkGet function.
 34 |  */
 35 | @InterfaceAudience.Private
 36 | object HBaseBulkGetExample {
 37 |   def main(args: Array[String]) {
 38 |     if (args.length < 1) {
 39 |       println("HBaseBulkGetExample {tableName} missing an argument")
 40 |       return
 41 |     }
 42 | 
 43 |     val tableName = args(0)
 44 | 
 45 |     val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName)
 46 |     val sc = new SparkContext(sparkConf)
 47 | 
 48 |     try {
 49 | 
 50 |       // [(Array[Byte])]
 51 |       val rdd = sc.parallelize(
 52 |         Array(
 53 |           Bytes.toBytes("1"),
 54 |           Bytes.toBytes("2"),
 55 |           Bytes.toBytes("3"),
 56 |           Bytes.toBytes("4"),
 57 |           Bytes.toBytes("5"),
 58 |           Bytes.toBytes("6"),
 59 |           Bytes.toBytes("7")))
 60 | 
 61 |       val conf = HBaseConfiguration.create()
 62 | 
 63 |       val hbaseContext = new HBaseContext(sc, conf)
 64 | 
 65 |       val getRdd = hbaseContext.bulkGet[Array[Byte], String](
 66 |         TableName.valueOf(tableName),
 67 |         2,
 68 |         rdd,
 69 |         record => {
 70 |           System.out.println("making Get")
 71 |           new Get(record)
 72 |         },
 73 |         (result: Result) => {
 74 | 
 75 |           val it = result.listCells().iterator()
 76 |           val b = new StringBuilder
 77 | 
 78 |           b.append(Bytes.toString(result.getRow) + ":")
 79 | 
 80 |           while (it.hasNext) {
 81 |             val cell = it.next()
 82 |             val q = Bytes.toString(CellUtil.cloneQualifier(cell))
 83 |             if (q.equals("counter")) {
 84 |               b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")")
 85 |             } else {
 86 |               b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")")
 87 |             }
 88 |           }
 89 |           b.toString()
 90 |         })
 91 | 
 92 |       getRdd
 93 |         .collect()
 94 |         .foreach(v => println(v))
 95 | 
 96 |     } finally {
 97 |       sc.stop()
 98 |     }
 99 |   }
100 | }
101 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkPutExample.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark.example.hbasecontext
19 | 
20 | import org.apache.hadoop.hbase.HBaseConfiguration
21 | import org.apache.hadoop.hbase.TableName
22 | import org.apache.hadoop.hbase.client.Put
23 | import org.apache.hadoop.hbase.spark.HBaseContext
24 | import org.apache.hadoop.hbase.util.Bytes
25 | import org.apache.spark.SparkConf
26 | import org.apache.spark.SparkContext
27 | import org.apache.yetus.audience.InterfaceAudience
28 | 
29 | /**
30 |  * This is a simple example of putting records in HBase
31 |  * with the bulkPut function.
32 |  */
33 | @InterfaceAudience.Private
34 | object HBaseBulkPutExample {
35 |   def main(args: Array[String]) {
36 |     if (args.length < 2) {
37 |       println("HBaseBulkPutExample {tableName} {columnFamily} are missing an arguments")
38 |       return
39 |     }
40 | 
41 |     val tableName = args(0)
42 |     val columnFamily = args(1)
43 | 
44 |     val sparkConf = new SparkConf().setAppName(
45 |       "HBaseBulkPutExample " +
46 |         tableName + " " + columnFamily)
47 |     val sc = new SparkContext(sparkConf)
48 | 
49 |     try {
50 |       // [(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])]
51 |       val rdd = sc.parallelize(
52 |         Array(
53 |           (
54 |             Bytes.toBytes("1"),
55 |             Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))),
56 |           (
57 |             Bytes.toBytes("2"),
58 |             Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))),
59 |           (
60 |             Bytes.toBytes("3"),
61 |             Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))),
62 |           (
63 |             Bytes.toBytes("4"),
64 |             Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))),
65 |           (
66 |             Bytes.toBytes("5"),
67 |             Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5"))))))
68 | 
69 |       val conf = HBaseConfiguration.create()
70 | 
71 |       val hbaseContext = new HBaseContext(sc, conf)
72 |       hbaseContext.bulkPut[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])](
73 |         rdd,
74 |         TableName.valueOf(tableName),
75 |         (putRecord) => {
76 |           val put = new Put(putRecord._1)
77 |           putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3))
78 |           put
79 |         });
80 |     } finally {
81 |       sc.stop()
82 |     }
83 |   }
84 | }
85 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkPutExampleFromFile.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark.example.hbasecontext
19 | 
20 | import org.apache.hadoop.hbase.HBaseConfiguration
21 | import org.apache.hadoop.hbase.TableName
22 | import org.apache.hadoop.hbase.client.Put
23 | import org.apache.hadoop.hbase.spark.HBaseContext
24 | import org.apache.hadoop.hbase.util.Bytes
25 | import org.apache.hadoop.io.LongWritable
26 | import org.apache.hadoop.io.Text
27 | import org.apache.hadoop.mapred.TextInputFormat
28 | import org.apache.spark.SparkConf
29 | import org.apache.spark.SparkContext
30 | import org.apache.yetus.audience.InterfaceAudience
31 | 
32 | /**
33 |  * This is a simple example of putting records in HBase
34 |  * with the bulkPut function.  In this example we are
35 |  * getting the put information from a file
36 |  */
37 | @InterfaceAudience.Private
38 | object HBaseBulkPutExampleFromFile {
39 |   def main(args: Array[String]) {
40 |     if (args.length < 3) {
41 |       println(
42 |         "HBaseBulkPutExampleFromFile {tableName} {columnFamily} {inputFile} are missing an argument")
43 |       return
44 |     }
45 | 
46 |     val tableName = args(0)
47 |     val columnFamily = args(1)
48 |     val inputFile = args(2)
49 | 
50 |     val sparkConf = new SparkConf().setAppName(
51 |       "HBaseBulkPutExampleFromFile " +
52 |         tableName + " " + columnFamily + " " + inputFile)
53 |     val sc = new SparkContext(sparkConf)
54 | 
55 |     try {
56 |       var rdd = sc
57 |         .hadoopFile(inputFile, classOf[TextInputFormat], classOf[LongWritable], classOf[Text])
58 |         .map(
59 |           v => {
60 |             System.out.println("reading-" + v._2.toString)
61 |             v._2.toString
62 |           })
63 | 
64 |       val conf = HBaseConfiguration.create()
65 | 
66 |       val hbaseContext = new HBaseContext(sc, conf)
67 |       hbaseContext.bulkPut[String](
68 |         rdd,
69 |         TableName.valueOf(tableName),
70 |         (putRecord) => {
71 |           System.out.println("hbase-" + putRecord)
72 |           val put = new Put(Bytes.toBytes("Value- " + putRecord))
73 |           put.addColumn(Bytes.toBytes("c"), Bytes.toBytes("1"), Bytes.toBytes(putRecord.length()))
74 |           put
75 |         });
76 |     } finally {
77 |       sc.stop()
78 |     }
79 |   }
80 | }
81 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkPutTimestampExample.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark.example.hbasecontext
19 | 
20 | import org.apache.hadoop.hbase.{HBaseConfiguration, TableName}
21 | import org.apache.hadoop.hbase.client.Put
22 | import org.apache.hadoop.hbase.spark.HBaseContext
23 | import org.apache.hadoop.hbase.util.Bytes
24 | import org.apache.spark.SparkConf
25 | import org.apache.spark.SparkContext
26 | import org.apache.yetus.audience.InterfaceAudience
27 | 
28 | /**
29 |  * This is a simple example of putting records in HBase
30 |  * with the bulkPut function.  In this example we are
31 |  * also setting the timestamp in the put
32 |  */
33 | @InterfaceAudience.Private
34 | object HBaseBulkPutTimestampExample {
35 |   def main(args: Array[String]) {
36 |     if (args.length < 2) {
37 |       System.out.println(
38 |         "HBaseBulkPutTimestampExample {tableName} {columnFamily} are missing an argument")
39 |       return
40 |     }
41 | 
42 |     val tableName = args(0)
43 |     val columnFamily = args(1)
44 | 
45 |     val sparkConf = new SparkConf().setAppName(
46 |       "HBaseBulkPutTimestampExample " +
47 |         tableName + " " + columnFamily)
48 |     val sc = new SparkContext(sparkConf)
49 | 
50 |     try {
51 | 
52 |       val rdd = sc.parallelize(
53 |         Array(
54 |           (
55 |             Bytes.toBytes("6"),
56 |             Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))),
57 |           (
58 |             Bytes.toBytes("7"),
59 |             Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))),
60 |           (
61 |             Bytes.toBytes("8"),
62 |             Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))),
63 |           (
64 |             Bytes.toBytes("9"),
65 |             Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))),
66 |           (
67 |             Bytes.toBytes("10"),
68 |             Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5"))))))
69 | 
70 |       val conf = HBaseConfiguration.create()
71 | 
72 |       val timeStamp = System.currentTimeMillis()
73 | 
74 |       val hbaseContext = new HBaseContext(sc, conf)
75 |       hbaseContext.bulkPut[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])](
76 |         rdd,
77 |         TableName.valueOf(tableName),
78 |         (putRecord) => {
79 |           val put = new Put(putRecord._1)
80 |           putRecord._2.foreach(
81 |             (putValue) => put.addColumn(putValue._1, putValue._2, timeStamp, putValue._3))
82 |           put
83 |         })
84 |     } finally {
85 |       sc.stop()
86 |     }
87 |   }
88 | }
89 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseDistributedScanExample.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark.example.hbasecontext
19 | 
20 | import org.apache.hadoop.hbase.HBaseConfiguration
21 | import org.apache.hadoop.hbase.TableName
22 | import org.apache.hadoop.hbase.client.Scan
23 | import org.apache.hadoop.hbase.spark.HBaseContext
24 | import org.apache.hadoop.hbase.util.Bytes
25 | import org.apache.spark.SparkConf
26 | import org.apache.spark.SparkContext
27 | import org.apache.yetus.audience.InterfaceAudience
28 | 
29 | /**
30 |  * This is a simple example of scanning records from HBase
31 |  * with the hbaseRDD function in Distributed fashion.
32 |  */
33 | @InterfaceAudience.Private
34 | object HBaseDistributedScanExample {
35 |   def main(args: Array[String]) {
36 |     if (args.length < 1) {
37 |       println("HBaseDistributedScanExample {tableName} missing an argument")
38 |       return
39 |     }
40 | 
41 |     val tableName = args(0)
42 | 
43 |     val sparkConf = new SparkConf().setAppName("HBaseDistributedScanExample " + tableName)
44 |     val sc = new SparkContext(sparkConf)
45 | 
46 |     try {
47 |       val conf = HBaseConfiguration.create()
48 | 
49 |       val hbaseContext = new HBaseContext(sc, conf)
50 | 
51 |       val scan = new Scan()
52 |       scan.setCaching(100)
53 | 
54 |       val getRdd = hbaseContext.hbaseRDD(TableName.valueOf(tableName), scan)
55 | 
56 |       getRdd.foreach(v => println(Bytes.toString(v._1.get())))
57 | 
58 |       println(
59 |         "Length: " + getRdd
60 |           .map(r => r._1.copyBytes())
61 |           .collect()
62 |           .length);
63 |     } finally {
64 |       sc.stop()
65 |     }
66 |   }
67 | 
68 | }
69 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseStreamingBulkPutExample.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark.example.hbasecontext
19 | 
20 | import org.apache.hadoop.hbase.HBaseConfiguration
21 | import org.apache.hadoop.hbase.TableName
22 | import org.apache.hadoop.hbase.client.Put
23 | import org.apache.hadoop.hbase.spark.HBaseContext
24 | import org.apache.hadoop.hbase.util.Bytes
25 | import org.apache.spark.SparkConf
26 | import org.apache.spark.SparkContext
27 | import org.apache.spark.streaming.Seconds
28 | import org.apache.spark.streaming.StreamingContext
29 | import org.apache.yetus.audience.InterfaceAudience
30 | 
31 | /**
32 |  * This is a simple example of BulkPut with Spark Streaming
33 |  */
34 | @InterfaceAudience.Private
35 | object HBaseStreamingBulkPutExample {
36 |   def main(args: Array[String]) {
37 |     if (args.length < 4) {
38 |       println(
39 |         "HBaseStreamingBulkPutExample " +
40 |           "{host} {port} {tableName} {columnFamily} are missing an argument")
41 |       return
42 |     }
43 | 
44 |     val host = args(0)
45 |     val port = args(1)
46 |     val tableName = args(2)
47 |     val columnFamily = args(3)
48 | 
49 |     val sparkConf = new SparkConf().setAppName(
50 |       "HBaseStreamingBulkPutExample " +
51 |         tableName + " " + columnFamily)
52 |     val sc = new SparkContext(sparkConf)
53 |     try {
54 |       val ssc = new StreamingContext(sc, Seconds(1))
55 | 
56 |       val lines = ssc.socketTextStream(host, port.toInt)
57 | 
58 |       val conf = HBaseConfiguration.create()
59 | 
60 |       val hbaseContext = new HBaseContext(sc, conf)
61 | 
62 |       hbaseContext.streamBulkPut[String](
63 |         lines,
64 |         TableName.valueOf(tableName),
65 |         (putRecord) => {
66 |           if (putRecord.length() > 0) {
67 |             val put = new Put(Bytes.toBytes(putRecord))
68 |             put.addColumn(Bytes.toBytes("c"), Bytes.toBytes("foo"), Bytes.toBytes("bar"))
69 |             put
70 |           } else {
71 |             null
72 |           }
73 |         })
74 |       ssc.start()
75 |       ssc.awaitTerminationOrTimeout(60000)
76 |     } finally {
77 |       sc.stop()
78 |     }
79 |   }
80 | }
81 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseBulkDeleteExample.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark.example.rdd
19 | 
20 | import org.apache.hadoop.hbase.HBaseConfiguration
21 | import org.apache.hadoop.hbase.TableName
22 | import org.apache.hadoop.hbase.client.Delete
23 | import org.apache.hadoop.hbase.spark.HBaseContext
24 | import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._
25 | import org.apache.hadoop.hbase.util.Bytes
26 | import org.apache.spark.SparkConf
27 | import org.apache.spark.SparkContext
28 | import org.apache.yetus.audience.InterfaceAudience
29 | 
30 | /**
31 |  * This is a simple example of deleting records in HBase
32 |  * with the bulkDelete function.
33 |  */
34 | @InterfaceAudience.Private
35 | object HBaseBulkDeleteExample {
36 |   def main(args: Array[String]) {
37 |     if (args.length < 1) {
38 |       println("HBaseBulkDeleteExample {tableName} are missing an argument")
39 |       return
40 |     }
41 | 
42 |     val tableName = args(0)
43 | 
44 |     val sparkConf = new SparkConf().setAppName("HBaseBulkDeleteExample " + tableName)
45 |     val sc = new SparkContext(sparkConf)
46 |     try {
47 |       // [Array[Byte]]
48 |       val rdd = sc.parallelize(
49 |         Array(
50 |           Bytes.toBytes("1"),
51 |           Bytes.toBytes("2"),
52 |           Bytes.toBytes("3"),
53 |           Bytes.toBytes("4"),
54 |           Bytes.toBytes("5")))
55 | 
56 |       val conf = HBaseConfiguration.create()
57 | 
58 |       val hbaseContext = new HBaseContext(sc, conf)
59 | 
60 |       rdd.hbaseBulkDelete(
61 |         hbaseContext,
62 |         TableName.valueOf(tableName),
63 |         putRecord => new Delete(putRecord),
64 |         4)
65 | 
66 |     } finally {
67 |       sc.stop()
68 |     }
69 |   }
70 | }
71 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseBulkGetExample.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | package org.apache.hadoop.hbase.spark.example.rdd
 19 | 
 20 | import org.apache.hadoop.hbase.CellUtil
 21 | import org.apache.hadoop.hbase.HBaseConfiguration
 22 | import org.apache.hadoop.hbase.TableName
 23 | import org.apache.hadoop.hbase.client.Get
 24 | import org.apache.hadoop.hbase.client.Result
 25 | import org.apache.hadoop.hbase.spark.HBaseContext
 26 | import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._
 27 | import org.apache.hadoop.hbase.util.Bytes
 28 | import org.apache.spark.SparkConf
 29 | import org.apache.spark.SparkContext
 30 | import org.apache.yetus.audience.InterfaceAudience
 31 | 
 32 | /**
 33 |  * This is a simple example of getting records from HBase
 34 |  * with the bulkGet function.
 35 |  */
 36 | @InterfaceAudience.Private
 37 | object HBaseBulkGetExample {
 38 |   def main(args: Array[String]) {
 39 |     if (args.length < 1) {
 40 |       println("HBaseBulkGetExample {tableName} is missing an argument")
 41 |       return
 42 |     }
 43 | 
 44 |     val tableName = args(0)
 45 | 
 46 |     val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName)
 47 |     val sc = new SparkContext(sparkConf)
 48 | 
 49 |     try {
 50 | 
 51 |       // [(Array[Byte])]
 52 |       val rdd = sc.parallelize(
 53 |         Array(
 54 |           Bytes.toBytes("1"),
 55 |           Bytes.toBytes("2"),
 56 |           Bytes.toBytes("3"),
 57 |           Bytes.toBytes("4"),
 58 |           Bytes.toBytes("5"),
 59 |           Bytes.toBytes("6"),
 60 |           Bytes.toBytes("7")))
 61 | 
 62 |       val conf = HBaseConfiguration.create()
 63 | 
 64 |       val hbaseContext = new HBaseContext(sc, conf)
 65 | 
 66 |       val getRdd = rdd.hbaseBulkGet[String](
 67 |         hbaseContext,
 68 |         TableName.valueOf(tableName),
 69 |         2,
 70 |         record => {
 71 |           System.out.println("making Get")
 72 |           new Get(record)
 73 |         },
 74 |         (result: Result) => {
 75 | 
 76 |           val it = result.listCells().iterator()
 77 |           val b = new StringBuilder
 78 | 
 79 |           b.append(Bytes.toString(result.getRow) + ":")
 80 | 
 81 |           while (it.hasNext) {
 82 |             val cell = it.next()
 83 |             val q = Bytes.toString(CellUtil.cloneQualifier(cell))
 84 |             if (q.equals("counter")) {
 85 |               b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")")
 86 |             } else {
 87 |               b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")")
 88 |             }
 89 |           }
 90 |           b.toString()
 91 |         })
 92 | 
 93 |       getRdd
 94 |         .collect()
 95 |         .foreach(v => println(v))
 96 | 
 97 |     } finally {
 98 |       sc.stop()
 99 |     }
100 |   }
101 | }
102 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseBulkPutExample.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark.example.rdd
19 | 
20 | import org.apache.hadoop.hbase.HBaseConfiguration
21 | import org.apache.hadoop.hbase.TableName
22 | import org.apache.hadoop.hbase.client.Put
23 | import org.apache.hadoop.hbase.spark.HBaseContext
24 | import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._
25 | import org.apache.hadoop.hbase.util.Bytes
26 | import org.apache.spark.SparkConf
27 | import org.apache.spark.SparkContext
28 | import org.apache.yetus.audience.InterfaceAudience
29 | 
30 | /**
31 |  * This is a simple example of putting records in HBase
32 |  * with the bulkPut function.
33 |  */
34 | @InterfaceAudience.Private
35 | object HBaseBulkPutExample {
36 |   def main(args: Array[String]) {
37 |     if (args.length < 2) {
38 |       println("HBaseBulkPutExample {tableName} {columnFamily} are missing an arguments")
39 |       return
40 |     }
41 | 
42 |     val tableName = args(0)
43 |     val columnFamily = args(1)
44 | 
45 |     val sparkConf = new SparkConf().setAppName(
46 |       "HBaseBulkPutExample " +
47 |         tableName + " " + columnFamily)
48 |     val sc = new SparkContext(sparkConf)
49 | 
50 |     try {
51 |       // [(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])]
52 |       val rdd = sc.parallelize(
53 |         Array(
54 |           (
55 |             Bytes.toBytes("1"),
56 |             Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))),
57 |           (
58 |             Bytes.toBytes("2"),
59 |             Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))),
60 |           (
61 |             Bytes.toBytes("3"),
62 |             Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))),
63 |           (
64 |             Bytes.toBytes("4"),
65 |             Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))),
66 |           (
67 |             Bytes.toBytes("5"),
68 |             Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5"))))))
69 | 
70 |       val conf = HBaseConfiguration.create()
71 | 
72 |       val hbaseContext = new HBaseContext(sc, conf)
73 | 
74 |       rdd.hbaseBulkPut(
75 |         hbaseContext,
76 |         TableName.valueOf(tableName),
77 |         (putRecord) => {
78 |           val put = new Put(putRecord._1)
79 |           putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3))
80 |           put
81 |         })
82 | 
83 |     } finally {
84 |       sc.stop()
85 |     }
86 |   }
87 | }
88 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseForeachPartitionExample.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark.example.rdd
19 | 
20 | import org.apache.hadoop.hbase.HBaseConfiguration
21 | import org.apache.hadoop.hbase.TableName
22 | import org.apache.hadoop.hbase.client.Put
23 | import org.apache.hadoop.hbase.spark.HBaseContext
24 | import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._
25 | import org.apache.hadoop.hbase.util.Bytes
26 | import org.apache.spark.SparkConf
27 | import org.apache.spark.SparkContext
28 | import org.apache.yetus.audience.InterfaceAudience
29 | 
30 | /**
31 |  * This is a simple example of using the foreachPartition
32 |  * method with a HBase connection
33 |  */
34 | @InterfaceAudience.Private
35 | object HBaseForeachPartitionExample {
36 |   def main(args: Array[String]) {
37 |     if (args.length < 2) {
38 |       println("HBaseForeachPartitionExample {tableName} {columnFamily} are missing an arguments")
39 |       return
40 |     }
41 | 
42 |     val tableName = args(0)
43 |     val columnFamily = args(1)
44 | 
45 |     val sparkConf = new SparkConf().setAppName(
46 |       "HBaseForeachPartitionExample " +
47 |         tableName + " " + columnFamily)
48 |     val sc = new SparkContext(sparkConf)
49 | 
50 |     try {
51 |       // [(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])]
52 |       val rdd = sc.parallelize(
53 |         Array(
54 |           (
55 |             Bytes.toBytes("1"),
56 |             Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))),
57 |           (
58 |             Bytes.toBytes("2"),
59 |             Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))),
60 |           (
61 |             Bytes.toBytes("3"),
62 |             Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))),
63 |           (
64 |             Bytes.toBytes("4"),
65 |             Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))),
66 |           (
67 |             Bytes.toBytes("5"),
68 |             Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5"))))))
69 | 
70 |       val conf = HBaseConfiguration.create()
71 | 
72 |       val hbaseContext = new HBaseContext(sc, conf)
73 | 
74 |       rdd.hbaseForeachPartition(
75 |         hbaseContext,
76 |         (it, connection) => {
77 |           val m = connection.getBufferedMutator(TableName.valueOf(tableName))
78 | 
79 |           it.foreach(
80 |             r => {
81 |               val put = new Put(r._1)
82 |               r._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3))
83 |               m.mutate(put)
84 |             })
85 |           m.flush()
86 |           m.close()
87 |         })
88 | 
89 |     } finally {
90 |       sc.stop()
91 |     }
92 |   }
93 | }
94 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseMapPartitionExample.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | package org.apache.hadoop.hbase.spark.example.rdd
 19 | 
 20 | import org.apache.hadoop.hbase.HBaseConfiguration
 21 | import org.apache.hadoop.hbase.TableName
 22 | import org.apache.hadoop.hbase.client.Get
 23 | import org.apache.hadoop.hbase.spark.HBaseContext
 24 | import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._
 25 | import org.apache.hadoop.hbase.util.Bytes
 26 | import org.apache.spark.SparkConf
 27 | import org.apache.spark.SparkContext
 28 | import org.apache.yetus.audience.InterfaceAudience
 29 | 
 30 | /**
 31 |  * This is a simple example of using the mapPartitions
 32 |  * method with a HBase connection
 33 |  */
 34 | @InterfaceAudience.Private
 35 | object HBaseMapPartitionExample {
 36 |   def main(args: Array[String]) {
 37 |     if (args.length < 1) {
 38 |       println("HBaseMapPartitionExample {tableName} is missing an argument")
 39 |       return
 40 |     }
 41 | 
 42 |     val tableName = args(0)
 43 | 
 44 |     val sparkConf = new SparkConf().setAppName("HBaseMapPartitionExample " + tableName)
 45 |     val sc = new SparkContext(sparkConf)
 46 | 
 47 |     try {
 48 | 
 49 |       // [(Array[Byte])]
 50 |       val rdd = sc.parallelize(
 51 |         Array(
 52 |           Bytes.toBytes("1"),
 53 |           Bytes.toBytes("2"),
 54 |           Bytes.toBytes("3"),
 55 |           Bytes.toBytes("4"),
 56 |           Bytes.toBytes("5"),
 57 |           Bytes.toBytes("6"),
 58 |           Bytes.toBytes("7")))
 59 | 
 60 |       val conf = HBaseConfiguration.create()
 61 | 
 62 |       val hbaseContext = new HBaseContext(sc, conf)
 63 | 
 64 |       val getRdd = rdd.hbaseMapPartitions[String](
 65 |         hbaseContext,
 66 |         (it, connection) => {
 67 |           val table = connection.getTable(TableName.valueOf(tableName))
 68 |           it.map {
 69 |             r =>
 70 |               // batching would be faster.  This is just an example
 71 |               val result = table.get(new Get(r))
 72 | 
 73 |               val it = result.listCells().iterator()
 74 |               val b = new StringBuilder
 75 | 
 76 |               b.append(Bytes.toString(result.getRow) + ":")
 77 | 
 78 |               while (it.hasNext) {
 79 |                 val cell = it.next()
 80 |                 val q = Bytes.toString(cell.getQualifierArray)
 81 |                 if (q.equals("counter")) {
 82 |                   b.append("(" + q + "," + Bytes.toLong(cell.getValueArray) + ")")
 83 |                 } else {
 84 |                   b.append("(" + q + "," + Bytes.toString(cell.getValueArray) + ")")
 85 |                 }
 86 |               }
 87 |               b.toString()
 88 |           }
 89 |         })
 90 | 
 91 |       getRdd
 92 |         .collect()
 93 |         .foreach(v => println(v))
 94 | 
 95 |     } finally {
 96 |       sc.stop()
 97 |     }
 98 |   }
 99 | }
100 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContextForLargeRows.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark;
19 | 
20 | import java.io.IOException;
21 | import java.util.ArrayList;
22 | import java.util.Arrays;
23 | import java.util.List;
24 | import org.apache.hadoop.conf.Configuration;
25 | import org.apache.hadoop.hbase.HBaseClassTestRule;
26 | import org.apache.hadoop.hbase.TableName;
27 | import org.apache.hadoop.hbase.client.Admin;
28 | import org.apache.hadoop.hbase.client.Connection;
29 | import org.apache.hadoop.hbase.client.ConnectionFactory;
30 | import org.apache.hadoop.hbase.client.Put;
31 | import org.apache.hadoop.hbase.client.Table;
32 | import org.apache.hadoop.hbase.testclassification.MediumTests;
33 | import org.apache.hadoop.hbase.testclassification.MiscTests;
34 | import org.apache.hadoop.hbase.util.Bytes;
35 | import org.apache.spark.api.java.JavaSparkContext;
36 | import org.junit.BeforeClass;
37 | import org.junit.ClassRule;
38 | import org.junit.experimental.categories.Category;
39 | 
40 | @Category({ MiscTests.class, MediumTests.class })
41 | public class TestJavaHBaseContextForLargeRows extends TestJavaHBaseContext {
42 | 
43 |   @ClassRule
44 |   public static final HBaseClassTestRule TIMEOUT =
45 |     HBaseClassTestRule.forClass(TestJavaHBaseContextForLargeRows.class);
46 | 
47 |   @BeforeClass
48 |   public static void setUpBeforeClass() throws Exception {
49 |     JSC = new JavaSparkContext("local", "JavaHBaseContextSuite");
50 | 
51 |     init();
52 |   }
53 | 
54 |   protected void populateTableWithMockData(Configuration conf, TableName tableName)
55 |     throws IOException {
56 |     try (Connection conn = ConnectionFactory.createConnection(conf);
57 |       Table table = conn.getTable(tableName); Admin admin = conn.getAdmin()) {
58 | 
59 |       List<Put> puts = new ArrayList<>(5);
60 | 
61 |       for (int i = 1; i < 6; i++) {
62 |         Put put = new Put(Bytes.toBytes(Integer.toString(i)));
63 |         // We are trying to generate a large row value here
64 |         char[] chars = new char[1024 * 1024 * 2];
65 |         // adding '0' to convert int to char
66 |         Arrays.fill(chars, (char) (i + '0'));
67 |         put.addColumn(columnFamily, columnFamily, Bytes.toBytes(String.valueOf(chars)));
68 |         puts.add(put);
69 |       }
70 |       table.put(puts);
71 |       admin.flush(tableName);
72 |     }
73 |   }
74 | }
75 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/test/resources/log4j.properties:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one
 2 | # or more contributor license agreements.  See the NOTICE file
 3 | # distributed with this work for additional information
 4 | # regarding copyright ownership.  The ASF licenses this file
 5 | # to you under the Apache License, Version 2.0 (the
 6 | # "License"); you may not use this file except in compliance
 7 | # with the License.  You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # Define some default values that can be overridden by system properties
18 | hbase.root.logger=INFO,FA
19 | hbase.log.dir=.
20 | hbase.log.file=hbase.log
21 | 
22 | # Define the root logger to the system property "hbase.root.logger".
23 | log4j.rootLogger=${hbase.root.logger}
24 | 
25 | # Logging Threshold
26 | log4j.threshold=ALL
27 | 
28 | #
29 | # Daily Rolling File Appender
30 | #
31 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
32 | log4j.appender.DRFA.File=${hbase.log.dir}/${hbase.log.file}
33 | 
34 | # Rollver at midnight
35 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
36 | 
37 | # 30-day backup
38 | #log4j.appender.DRFA.MaxBackupIndex=30
39 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
40 | # Debugging Pattern format
41 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %C{2}(%L): %m%n
42 | 
43 | 
44 | #
45 | # console
46 | # Add "console" to rootlogger above if you want to use this
47 | #
48 | log4j.appender.console=org.apache.log4j.ConsoleAppender
49 | log4j.appender.console.target=System.err
50 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
51 | log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %C{2}(%L): %m%n
52 | 
53 | #File Appender
54 | log4j.appender.FA=org.apache.log4j.FileAppender
55 | log4j.appender.FA.append=false
56 | log4j.appender.FA.file=target/log-output.txt
57 | log4j.appender.FA.layout=org.apache.log4j.PatternLayout
58 | log4j.appender.FA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %C{2}(%L): %m%n
59 | log4j.appender.FA.Threshold = INFO
60 | 
61 | # Custom Logging levels
62 | 
63 | #log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
64 | 
65 | log4j.logger.org.apache.hadoop=WARN
66 | log4j.logger.org.apache.zookeeper=ERROR
67 | log4j.logger.org.apache.hadoop.hbase=DEBUG
68 | 
69 | #These settings are workarounds against spurious logs from the minicluster.
70 | #See HBASE-4709
71 | log4j.logger.org.apache.hadoop.metrics2.impl.MetricsConfig=WARN
72 | log4j.logger.org.apache.hadoop.metrics2.impl.MetricsSinkAdapter=WARN
73 | log4j.logger.org.apache.hadoop.metrics2.impl.MetricsSystemImpl=WARN
74 | log4j.logger.org.apache.hadoop.metrics2.util.MBeans=WARN
75 | # Enable this to get detailed connection error/retry logging.
76 | # log4j.logger.org.apache.hadoop.hbase.client.ConnectionImplementation=TRACE
77 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseCatalogSuite.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | package org.apache.hadoop.hbase.spark
 19 | 
 20 | import org.apache.hadoop.hbase.spark.datasources.{DataTypeParserWrapper, DoubleSerDes, HBaseTableCatalog}
 21 | import org.apache.hadoop.hbase.util.Bytes
 22 | import org.apache.spark.sql.types._
 23 | import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}
 24 | 
 25 | class HBaseCatalogSuite
 26 |     extends FunSuite
 27 |     with BeforeAndAfterEach
 28 |     with BeforeAndAfterAll
 29 |     with Logging {
 30 | 
 31 |   val map = s"""MAP<int, struct<varchar:string>>"""
 32 |   val array = s"""array<struct<tinYint:tinyint>>"""
 33 |   val arrayMap = s"""MAp<int, ARRAY<double>>"""
 34 |   val catalog = s"""{
 35 |                     |"table":{"namespace":"default", "name":"htable"},
 36 |                     |"rowkey":"key1:key2",
 37 |                     |"columns":{
 38 |                     |"col1":{"cf":"rowkey", "col":"key1", "type":"string"},
 39 |                     |"col2":{"cf":"rowkey", "col":"key2", "type":"double"},
 40 |                     |"col3":{"cf":"cf1", "col":"col2", "type":"binary"},
 41 |                     |"col4":{"cf":"cf1", "col":"col3", "type":"timestamp"},
 42 |                     |"col5":{"cf":"cf1", "col":"col4", "type":"double", "serdes":"${classOf[
 43 |                     DoubleSerDes].getName}"},
 44 |                     |"col6":{"cf":"cf1", "col":"col5", "type":"$map"},
 45 |                     |"col7":{"cf":"cf1", "col":"col6", "type":"$array"},
 46 |                     |"col8":{"cf":"cf1", "col":"col7", "type":"$arrayMap"},
 47 |                     |"col9":{"cf":"cf1", "col":"col8", "type":"date"},
 48 |                     |"col10":{"cf":"cf1", "col":"col9", "type":"timestamp"}
 49 |                     |}
 50 |                     |}""".stripMargin
 51 |   val parameters = Map(HBaseTableCatalog.tableCatalog -> catalog)
 52 |   val t = HBaseTableCatalog(parameters)
 53 | 
 54 |   def checkDataType(dataTypeString: String, expectedDataType: DataType): Unit = {
 55 |     test(s"parse ${dataTypeString.replace("\n", "")}") {
 56 |       assert(DataTypeParserWrapper.parse(dataTypeString) === expectedDataType)
 57 |     }
 58 |   }
 59 |   test("basic") {
 60 |     assert(t.getField("col1").isRowKey == true)
 61 |     assert(t.getPrimaryKey == "key1")
 62 |     assert(t.getField("col3").dt == BinaryType)
 63 |     assert(t.getField("col4").dt == TimestampType)
 64 |     assert(t.getField("col5").dt == DoubleType)
 65 |     assert(t.getField("col5").serdes != None)
 66 |     assert(t.getField("col4").serdes == None)
 67 |     assert(t.getField("col1").isRowKey)
 68 |     assert(t.getField("col2").isRowKey)
 69 |     assert(!t.getField("col3").isRowKey)
 70 |     assert(t.getField("col2").length == Bytes.SIZEOF_DOUBLE)
 71 |     assert(t.getField("col1").length == -1)
 72 |     assert(t.getField("col8").length == -1)
 73 |     assert(t.getField("col9").dt == DateType)
 74 |     assert(t.getField("col10").dt == TimestampType)
 75 |   }
 76 | 
 77 |   checkDataType(map, t.getField("col6").dt)
 78 | 
 79 |   checkDataType(array, t.getField("col7").dt)
 80 | 
 81 |   checkDataType(arrayMap, t.getField("col8").dt)
 82 | 
 83 |   test("convert") {
 84 |     val m = Map(
 85 |       "hbase.columns.mapping" ->
 86 |         "KEY_FIELD STRING :key, A_FIELD STRING c:a, B_FIELD DOUBLE c:b, C_FIELD BINARY c:c,",
 87 |       "hbase.table" -> "NAMESPACE:TABLE")
 88 |     val map = HBaseTableCatalog.convert(m)
 89 |     val json = map.get(HBaseTableCatalog.tableCatalog).get
 90 |     val parameters = Map(HBaseTableCatalog.tableCatalog -> json)
 91 |     val t = HBaseTableCatalog(parameters)
 92 |     assert(t.namespace === "NAMESPACE")
 93 |     assert(t.name == "TABLE")
 94 |     assert(t.getField("KEY_FIELD").isRowKey)
 95 |     assert(DataTypeParserWrapper.parse("STRING") === t.getField("A_FIELD").dt)
 96 |     assert(!t.getField("A_FIELD").isRowKey)
 97 |     assert(DataTypeParserWrapper.parse("DOUBLE") === t.getField("B_FIELD").dt)
 98 |     assert(DataTypeParserWrapper.parse("BINARY") === t.getField("C_FIELD").dt)
 99 |   }
100 | 
101 |   test("compatibility") {
102 |     val m = Map(
103 |       "hbase.columns.mapping" ->
104 |         "KEY_FIELD STRING :key, A_FIELD STRING c:a, B_FIELD DOUBLE c:b, C_FIELD BINARY c:c,",
105 |       "hbase.table" -> "t1")
106 |     val t = HBaseTableCatalog(m)
107 |     assert(t.namespace === "default")
108 |     assert(t.name == "t1")
109 |     assert(t.getField("KEY_FIELD").isRowKey)
110 |     assert(DataTypeParserWrapper.parse("STRING") === t.getField("A_FIELD").dt)
111 |     assert(!t.getField("A_FIELD").isRowKey)
112 |     assert(DataTypeParserWrapper.parse("DOUBLE") === t.getField("B_FIELD").dt)
113 |     assert(DataTypeParserWrapper.parse("BINARY") === t.getField("C_FIELD").dt)
114 |   }
115 | }
116 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseTestSource.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package org.apache.hadoop.hbase.spark
19 | 
20 | import org.apache.hadoop.hbase.spark.datasources.HBaseSparkConf
21 | import org.apache.spark.SparkEnv
22 | import org.apache.spark.rdd.RDD
23 | import org.apache.spark.sql.{Row, SQLContext}
24 | import org.apache.spark.sql.sources._
25 | import org.apache.spark.sql.types._
26 | 
27 | class HBaseTestSource extends RelationProvider {
28 |   override def createRelation(
29 |       sqlContext: SQLContext,
30 |       parameters: Map[String, String]): BaseRelation = {
31 |     DummyScan(
32 |       parameters("cacheSize").toInt,
33 |       parameters("batchNum").toInt,
34 |       parameters("blockCacheingEnable").toBoolean,
35 |       parameters("rowNum").toInt)(sqlContext)
36 |   }
37 | }
38 | 
39 | case class DummyScan(cacheSize: Int, batchNum: Int, blockCachingEnable: Boolean, rowNum: Int)(
40 |     @transient val sqlContext: SQLContext)
41 |     extends BaseRelation
42 |     with TableScan {
43 |   private def sparkConf = SparkEnv.get.conf
44 |   override def schema: StructType =
45 |     StructType(StructField("i", IntegerType, nullable = false) :: Nil)
46 | 
47 |   override def buildScan(): RDD[Row] = sqlContext.sparkContext
48 |     .parallelize(0 until rowNum)
49 |     .map(Row(_))
50 |     .map {
51 |       x =>
52 |         if (sparkConf.getInt(HBaseSparkConf.QUERY_BATCHSIZE, -1) != batchNum ||
53 |           sparkConf.getInt(HBaseSparkConf.QUERY_CACHEDROWS, -1) != cacheSize ||
54 |           sparkConf.getBoolean(HBaseSparkConf.QUERY_CACHEBLOCKS, false) != blockCachingEnable) {
55 |           throw new Exception("HBase Spark configuration cannot be set properly")
56 |         }
57 |         x
58 |     }
59 | }
60 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/StartsWithSuite.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | package org.apache.hadoop.hbase.spark
 19 | 
 20 | import org.apache.hadoop.hbase.spark.datasources.Utils
 21 | import org.apache.hadoop.hbase.util.Bytes
 22 | import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}
 23 | 
 24 | class StartsWithSuite extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll with Logging {
 25 | 
 26 |   test("simple1") {
 27 |     val t = new Array[Byte](2)
 28 |     t(0) = 1.toByte
 29 |     t(1) = 2.toByte
 30 | 
 31 |     val expected = new Array[Byte](2)
 32 |     expected(0) = 1.toByte
 33 |     expected(1) = 3.toByte
 34 | 
 35 |     val res = Utils.incrementByteArray(t)
 36 |     assert(res.sameElements(expected))
 37 |   }
 38 | 
 39 |   test("simple2") {
 40 |     val t = new Array[Byte](1)
 41 |     t(0) = 87.toByte
 42 | 
 43 |     val expected = new Array[Byte](1)
 44 |     expected(0) = 88.toByte
 45 | 
 46 |     val res = Utils.incrementByteArray(t)
 47 |     assert(res.sameElements(expected))
 48 |   }
 49 | 
 50 |   test("overflow1") {
 51 |     val t = new Array[Byte](2)
 52 |     t(0) = 1.toByte
 53 |     t(1) = (-1).toByte
 54 | 
 55 |     val expected = new Array[Byte](2)
 56 |     expected(0) = 2.toByte
 57 |     expected(1) = 0.toByte
 58 | 
 59 |     val res = Utils.incrementByteArray(t)
 60 | 
 61 |     assert(res.sameElements(expected))
 62 |   }
 63 | 
 64 |   test("overflow2") {
 65 |     val t = new Array[Byte](2)
 66 |     t(0) = (-1).toByte
 67 |     t(1) = (-1).toByte
 68 | 
 69 |     val expected = null
 70 | 
 71 |     val res = Utils.incrementByteArray(t)
 72 | 
 73 |     assert(res == expected)
 74 |   }
 75 | 
 76 |   test("max-min-value") {
 77 |     val t = new Array[Byte](2)
 78 |     t(0) = 1.toByte
 79 |     t(1) = (127).toByte
 80 | 
 81 |     val expected = new Array[Byte](2)
 82 |     expected(0) = 1.toByte
 83 |     expected(1) = (-128).toByte
 84 | 
 85 |     val res = Utils.incrementByteArray(t)
 86 |     assert(res.sameElements(expected))
 87 |   }
 88 | 
 89 |   test("complicated") {
 90 |     val imput = "row005"
 91 |     val expectedOutput = "row006"
 92 | 
 93 |     val t = Bytes.toBytes(imput)
 94 |     val expected = Bytes.toBytes(expectedOutput)
 95 | 
 96 |     val res = Utils.incrementByteArray(t)
 97 |     assert(res.sameElements(expected))
 98 |   }
 99 | 
100 | }
101 | 


--------------------------------------------------------------------------------
/spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/TableOutputFormatSuite.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | package org.apache.hadoop.hbase.spark
 19 | 
 20 | import java.text.SimpleDateFormat
 21 | import java.util.{Date, Locale}
 22 | import org.apache.hadoop.hbase.{HBaseTestingUtility, TableName, TableNotFoundException}
 23 | import org.apache.hadoop.hbase.mapreduce.TableOutputFormat
 24 | import org.apache.hadoop.hbase.util.Bytes
 25 | import org.apache.hadoop.mapreduce.{Job, TaskAttemptID, TaskType}
 26 | import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
 27 | import org.apache.spark.{SparkConf, SparkContext}
 28 | import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}
 29 | import scala.util.{Failure, Success, Try}
 30 | 
 31 | // Unit tests for HBASE-20521: change get configuration(TableOutputFormat.conf) object first sequence from jobContext to getConf
 32 | // this suite contains two tests, one for normal case(getConf return null, use jobContext), create new TableOutputformat object without init TableOutputFormat.conf object,
 33 | // configuration object inside checkOutputSpecs came from jobContext.
 34 | // The other one(getConf return conf object) we manually call "setConf" to init TableOutputFormat.conf, for making it more straight forward, we specify a nonexistent table
 35 | // name in conf object, checkOutputSpecs will then throw TableNotFoundException exception
 36 | class TableOutputFormatSuite
 37 |     extends FunSuite
 38 |     with BeforeAndAfterEach
 39 |     with BeforeAndAfterAll
 40 |     with Logging {
 41 |   @transient var sc: SparkContext = null
 42 |   var TEST_UTIL = new HBaseTestingUtility
 43 | 
 44 |   val tableName = "TableOutputFormatTest"
 45 |   val tableNameTest = "NonExistentTable"
 46 |   val columnFamily = "cf"
 47 | 
 48 |   override protected def beforeAll(): Unit = {
 49 |     TEST_UTIL.startMiniCluster
 50 | 
 51 |     logInfo(" - minicluster started")
 52 |     try {
 53 |       TEST_UTIL.deleteTable(TableName.valueOf(tableName))
 54 |     } catch {
 55 |       case e: Exception => logInfo(" - no table " + tableName + " found")
 56 |     }
 57 | 
 58 |     TEST_UTIL.createTable(TableName.valueOf(tableName), Bytes.toBytes(columnFamily))
 59 |     logInfo(" - created table")
 60 | 
 61 |     // set "validateOutputSpecs" true anyway, force to validate output spec
 62 |     val sparkConf = new SparkConf()
 63 |       .setMaster("local")
 64 |       .setAppName("test")
 65 | 
 66 |     sc = new SparkContext(sparkConf)
 67 |   }
 68 | 
 69 |   override protected def afterAll(): Unit = {
 70 |     logInfo(" - delete table: " + tableName)
 71 |     TEST_UTIL.deleteTable(TableName.valueOf(tableName))
 72 |     logInfo(" - shutting down minicluster")
 73 |     TEST_UTIL.shutdownMiniCluster()
 74 | 
 75 |     TEST_UTIL.cleanupTestDir()
 76 |     sc.stop()
 77 |   }
 78 | 
 79 |   def getJobContext() = {
 80 |     val hConf = TEST_UTIL.getConfiguration
 81 |     hConf.set(TableOutputFormat.OUTPUT_TABLE, tableName)
 82 |     val job = Job.getInstance(hConf)
 83 |     job.setOutputFormatClass(classOf[TableOutputFormat[String]])
 84 | 
 85 |     val jobTrackerId = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US).format(new Date())
 86 |     val jobAttemptId = new TaskAttemptID(jobTrackerId, 1, TaskType.MAP, 0, 0)
 87 |     new TaskAttemptContextImpl(job.getConfiguration, jobAttemptId)
 88 |   }
 89 | 
 90 |   // Mock up jobContext object and execute actions in "write" function
 91 |   // from "org.apache.spark.internal.io.SparkHadoopMapReduceWriter"
 92 |   // this case should run normally without any exceptions
 93 |   test(
 94 |     "TableOutputFormat.checkOutputSpecs test without setConf called, should return true and without exceptions") {
 95 |     val jobContext = getJobContext()
 96 |     val format = jobContext.getOutputFormatClass
 97 |     val jobFormat = format.newInstance
 98 |     Try {
 99 |       jobFormat.checkOutputSpecs(jobContext)
100 |     } match {
101 |       case Success(_) => assert(true)
102 |       case Failure(_) => assert(false)
103 |     }
104 |   }
105 | 
106 |   // Set configuration externally, checkOutputSpec should use configuration object set by "SetConf" method
107 |   // rather than jobContext, this case should throw "TableNotFoundException" exception
108 |   test(
109 |     "TableOutputFormat.checkOutputSpecs test without setConf called, should throw TableNotFoundException") {
110 |     val jobContext = getJobContext()
111 |     val format = jobContext.getOutputFormatClass
112 |     val jobFormat = format.newInstance
113 | 
114 |     val hConf = TEST_UTIL.getConfiguration
115 |     hConf.set(TableOutputFormat.OUTPUT_TABLE, tableNameTest)
116 |     jobFormat.asInstanceOf[TableOutputFormat[String]].setConf(hConf)
117 |     Try {
118 |       jobFormat.checkOutputSpecs(jobContext)
119 |     } match {
120 |       case Success(_) => assert(false)
121 |       case Failure(e: Exception) => {
122 |         if (e.isInstanceOf[TableNotFoundException])
123 |           assert(true)
124 |         else
125 |           assert(false)
126 |       }
127 |       case _ => None
128 |     }
129 |   }
130 | 
131 | }
132 | 


--------------------------------------------------------------------------------
/test-reporting/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0"?>
  2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  3 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  4 |   <!--
  5 |   /**
  6 |    * Licensed to the Apache Software Foundation (ASF) under one
  7 |    * or more contributor license agreements.  See the NOTICE file
  8 |    * distributed with this work for additional information
  9 |    * regarding copyright ownership.  The ASF licenses this file
 10 |    * to you under the Apache License, Version 2.0 (the
 11 |    * "License"); you may not use this file except in compliance
 12 |    * with the License.  You may obtain a copy of the License at
 13 |    *
 14 |    *     http://www.apache.org/licenses/LICENSE-2.0
 15 |    *
 16 |    * Unless required by applicable law or agreed to in writing, software
 17 |    * distributed under the License is distributed on an "AS IS" BASIS,
 18 |    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 19 |    * See the License for the specific language governing permissions and
 20 |    * limitations under the License.
 21 |    */
 22 |   -->
 23 |   <modelVersion>4.0.0</modelVersion>
 24 | 
 25 |   <parent>
 26 |     <artifactId>hbase-connectors</artifactId>
 27 |     <groupId>org.apache.hbase.connectors</groupId>
 28 |     <version>${revision}</version>
 29 |   </parent>
 30 | 
 31 |   <artifactId>test-reporting</artifactId>
 32 |   <packaging>pom</packaging>
 33 |   <version>${revision}</version>
 34 |   <name>Test Reporting</name>
 35 |   <description>Test Reporting for Apache HBase Connectors</description>
 36 | 
 37 |   <properties>
 38 |     <argLine/>
 39 |     <main.basedir>${project.parent.basedir}</main.basedir>
 40 |   </properties>
 41 | 
 42 |   <dependencies>
 43 |     <!-- module dependencies where coverage is needed -->
 44 |     <dependency>
 45 |       <groupId>org.apache.hbase.connectors.kafka</groupId>
 46 |       <artifactId>hbase-kafka-proxy</artifactId>
 47 |       <version>${revision}</version>
 48 |     </dependency>
 49 |     <dependency>
 50 |       <groupId>org.apache.hbase.connectors.spark</groupId>
 51 |       <artifactId>hbase-spark</artifactId>
 52 |       <version>${revision}</version>
 53 |       <exclusions>
 54 |         <exclusion>
 55 |           <!-- make sure wrong scala version is not pulled in -->
 56 |           <groupId>org.scala-lang</groupId>
 57 |           <artifactId>scala-library</artifactId>
 58 |         </exclusion>
 59 |         <exclusion>
 60 |           <!-- make sure wrong scala version is not pulled in -->
 61 |           <groupId>org.scala-lang</groupId>
 62 |           <artifactId>scalap</artifactId>
 63 |         </exclusion>
 64 |         <exclusion>
 65 |           <groupId>com.google.code.findbugs</groupId>
 66 |           <artifactId>jsr305</artifactId>
 67 |         </exclusion>
 68 |       </exclusions>
 69 |     </dependency>
 70 |   </dependencies>
 71 | 
 72 |   <build>
 73 |     <plugins>
 74 |       <plugin>
 75 |         <groupId>org.apache.maven.plugins</groupId>
 76 |         <artifactId>maven-surefire-plugin</artifactId>
 77 |         <version>${surefire.version}</version>
 78 |         <configuration>
 79 |           <argLine>${argLine} -Xms256m -Xmx2048m</argLine>
 80 |           <forkCount>1</forkCount>
 81 |           <runOrder>random</runOrder>
 82 |         </configuration>
 83 |       </plugin>
 84 |     </plugins>
 85 | 
 86 |     <pluginManagement>
 87 |       <plugins>
 88 |         <plugin>
 89 |           <groupId>org.jacoco</groupId>
 90 |           <artifactId>jacoco-maven-plugin</artifactId>
 91 |           <executions>
 92 |             <execution>
 93 |               <id>report</id>
 94 |               <goals>
 95 |                 <goal>report-aggregate</goal>
 96 |               </goals>
 97 |               <phase>package</phase>
 98 |               <configuration>
 99 |                 <outputDirectory>${jacocoReportDir}</outputDirectory>
100 |                 <sourceEncoding>${project.build.sourceEncoding}</sourceEncoding>
101 |                 <outputEncoding>${project.reporting.outputEncoding}</outputEncoding>
102 |               </configuration>
103 |             </execution>
104 |           </executions>
105 |         </plugin>
106 |       </plugins>
107 |     </pluginManagement>
108 |   </build>
109 | </project>
110 | 


--------------------------------------------------------------------------------