├── .asf.yaml ├── .gitignore ├── CHANGELOG.md ├── LICENSE.txt ├── NOTICE.txt ├── README.md ├── RELEASENOTES.md ├── bin ├── hbase-connectors ├── hbase-connectors-config.sh └── hbase-connectors-daemon.sh ├── conf └── log4j.properties ├── dev-support ├── .scalafmt.conf ├── code-coverage │ ├── README.md │ └── run-coverage.sh ├── eclipse.importorder ├── hbase_eclipse_formatter.xml ├── jenkins │ ├── Dockerfile │ ├── Jenkinsfile │ ├── gather_machine_environment.sh │ ├── hbase-personality.sh │ └── jenkins_precommit_github_yetus.sh └── license-header ├── hbase-connectors-assembly ├── pom.xml └── src │ └── main │ ├── assembly │ ├── connector-components.xml │ └── hbase-connectors-bin.xml │ └── resources │ ├── META-INF │ └── LEGAL │ └── supplemental-models.xml ├── kafka ├── README.md ├── conf │ └── kafka-route-rules.xml ├── hbase-kafka-model │ ├── pom.xml │ └── src │ │ └── main │ │ └── avro │ │ └── HbaseKafkaEvent.avro ├── hbase-kafka-proxy │ ├── pom.xml │ └── src │ │ ├── main │ │ └── java │ │ │ └── org │ │ │ └── apache │ │ │ └── hadoop │ │ │ └── hbase │ │ │ └── kafka │ │ │ ├── DropRule.java │ │ │ ├── DumpToStringListener.java │ │ │ ├── KafkaBridgeConnection.java │ │ │ ├── KafkaProxy.java │ │ │ ├── KafkaTableForBridge.java │ │ │ ├── Rule.java │ │ │ ├── TopicRoutingRules.java │ │ │ └── TopicRule.java │ │ └── test │ │ └── java │ │ └── org │ │ └── apache │ │ └── hadoop │ │ └── hbase │ │ └── kafka │ │ ├── ProducerForTesting.java │ │ ├── TestDropRule.java │ │ ├── TestProcessMutations.java │ │ ├── TestQualifierMatching.java │ │ └── TestRouteRules.java └── pom.xml ├── pom.xml ├── spark ├── README.md ├── hbase-spark-it │ ├── pom.xml │ └── src │ │ └── test │ │ ├── java │ │ └── org │ │ │ └── apache │ │ │ └── hadoop │ │ │ └── hbase │ │ │ └── spark │ │ │ └── IntegrationTestSparkBulkLoad.java │ │ └── resources │ │ └── hbase-site.xml ├── hbase-spark-protocol-shaded │ └── pom.xml ├── hbase-spark-protocol │ ├── pom.xml │ └── src │ │ └── main │ │ └── protobuf │ │ └── SparkFilter.proto ├── hbase-spark │ ├── README.md │ ├── pom.xml │ └── src │ │ ├── main │ │ ├── java │ │ │ └── org │ │ │ │ └── apache │ │ │ │ └── hadoop │ │ │ │ └── hbase │ │ │ │ └── spark │ │ │ │ ├── SparkSQLPushDownFilter.java │ │ │ │ └── example │ │ │ │ └── hbasecontext │ │ │ │ ├── JavaHBaseBulkDeleteExample.java │ │ │ │ ├── JavaHBaseBulkGetExample.java │ │ │ │ ├── JavaHBaseBulkLoadExample.java │ │ │ │ ├── JavaHBaseBulkPutExample.java │ │ │ │ ├── JavaHBaseDistributedScan.java │ │ │ │ ├── JavaHBaseMapGetPutExample.java │ │ │ │ └── JavaHBaseStreamingBulkPutExample.java │ │ └── scala │ │ │ └── org │ │ │ └── apache │ │ │ └── hadoop │ │ │ └── hbase │ │ │ └── spark │ │ │ ├── BulkLoadPartitioner.scala │ │ │ ├── ByteArrayComparable.scala │ │ │ ├── ByteArrayWrapper.scala │ │ │ ├── ColumnFamilyQualifierMapKeyWrapper.scala │ │ │ ├── DefaultSource.scala │ │ │ ├── DynamicLogicExpression.scala │ │ │ ├── FamiliesQualifiersValues.scala │ │ │ ├── FamilyHFileWriteOptions.scala │ │ │ ├── HBaseConnectionCache.scala │ │ │ ├── HBaseContext.scala │ │ │ ├── HBaseDStreamFunctions.scala │ │ │ ├── HBaseRDDFunctions.scala │ │ │ ├── JavaHBaseContext.scala │ │ │ ├── KeyFamilyQualifier.scala │ │ │ ├── Logging.scala │ │ │ ├── NewHBaseRDD.scala │ │ │ ├── datasources │ │ │ ├── Bound.scala │ │ │ ├── DataTypeParserWrapper.scala │ │ │ ├── HBaseResources.scala │ │ │ ├── HBaseSparkConf.scala │ │ │ ├── HBaseTableCatalog.scala │ │ │ ├── HBaseTableScanRDD.scala │ │ │ ├── JavaBytesEncoder.scala │ │ │ ├── NaiveEncoder.scala │ │ │ ├── SchemaConverters.scala │ │ │ ├── SerDes.scala │ │ │ ├── SerializableConfiguration.scala │ │ │ ├── Utils.scala │ │ │ └── package.scala │ │ │ └── example │ │ │ ├── datasources │ │ │ ├── AvroSource.scala │ │ │ ├── DataType.scala │ │ │ └── HBaseSource.scala │ │ │ ├── hbasecontext │ │ │ ├── HBaseBulkDeleteExample.scala │ │ │ ├── HBaseBulkGetExample.scala │ │ │ ├── HBaseBulkPutExample.scala │ │ │ ├── HBaseBulkPutExampleFromFile.scala │ │ │ ├── HBaseBulkPutTimestampExample.scala │ │ │ ├── HBaseDistributedScanExample.scala │ │ │ └── HBaseStreamingBulkPutExample.scala │ │ │ └── rdd │ │ │ ├── HBaseBulkDeleteExample.scala │ │ │ ├── HBaseBulkGetExample.scala │ │ │ ├── HBaseBulkPutExample.scala │ │ │ ├── HBaseForeachPartitionExample.scala │ │ │ └── HBaseMapPartitionExample.scala │ │ └── test │ │ ├── java │ │ └── org │ │ │ └── apache │ │ │ └── hadoop │ │ │ └── hbase │ │ │ └── spark │ │ │ ├── TestJavaHBaseContext.java │ │ │ └── TestJavaHBaseContextForLargeRows.java │ │ ├── resources │ │ ├── hbase-site.xml │ │ └── log4j.properties │ │ └── scala │ │ └── org │ │ └── apache │ │ └── hadoop │ │ └── hbase │ │ └── spark │ │ ├── BulkLoadSuite.scala │ │ ├── DefaultSourceSuite.scala │ │ ├── DynamicLogicExpressionSuite.scala │ │ ├── HBaseCatalogSuite.scala │ │ ├── HBaseConnectionCacheSuite.scala │ │ ├── HBaseContextSuite.scala │ │ ├── HBaseDStreamFunctionsSuite.scala │ │ ├── HBaseRDDFunctionsSuite.scala │ │ ├── HBaseTestSource.scala │ │ ├── PartitionFilterSuite.scala │ │ ├── StartsWithSuite.scala │ │ └── TableOutputFormatSuite.scala └── pom.xml └── test-reporting └── pom.xml /.asf.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # This file controls the integration of HBase project with ASF infrastructure. Refer to 18 | # https://cwiki.apache.org/confluence/display/INFRA/.asf.yaml+features+for+git+repositories for 19 | # details. Be careful when changing the contents of this file since it may affect many developers 20 | # of the project and make sure to discuss the changes with dev@ before committing. 21 | 22 | github: 23 | description: "Apache HBase Connectors" 24 | homepage: https://hbase.apache.org/ 25 | labels: 26 | - database 27 | - java 28 | - hbase 29 | features: 30 | wiki: false 31 | issues: false 32 | projects: false 33 | enabled_merge_buttons: 34 | squash: true 35 | merge: false 36 | rebase: true 37 | autolink_jira: HBASE 38 | notifications: 39 | commits: commits@hbase.apache.org 40 | issues: issues@hbase.apache.org 41 | pullrequests: issues@hbase.apache.org 42 | jira_options: link label 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.externalToolBuilders 2 | .project 3 | *.settings/ 4 | .DS_Store 5 | .classpath 6 | /build 7 | /.idea/ 8 | /logs 9 | *target/ 10 | *.orig 11 | *~ 12 | hbase-*/test 13 | *.iws 14 | *.iml 15 | *.ipr 16 | patchprocess/ 17 | dependency-reduced-pom.xml 18 | .flattened-pom.xml 19 | link_report/ 20 | linklint-*.zip 21 | linklint/ 22 | .checkstyle 23 | **/.checkstyle 24 | -------------------------------------------------------------------------------- /NOTICE.txt: -------------------------------------------------------------------------------- 1 | Apache HBase - Connectors 2 | Copyright 2019 The Apache Software Foundation 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 18 | 19 | # hbase-connectors 20 | 21 | Connectors for [Apache HBase™](https://hbase.apache.org) 22 | 23 | * [Kafka Proxy](https://github.com/apache/hbase-connectors/tree/master/kafka) 24 | * [Spark](https://github.com/apache/hbase-connectors/tree/master/spark) 25 | -------------------------------------------------------------------------------- /bin/hbase-connectors-config.sh: -------------------------------------------------------------------------------- 1 | # 2 | #/** 3 | # * Licensed to the Apache Software Foundation (ASF) under one 4 | # * or more contributor license agreements. See the NOTICE file 5 | # * distributed with this work for additional information 6 | # * regarding copyright ownership. The ASF licenses this file 7 | # * to you under the Apache License, Version 2.0 (the 8 | # * "License"); you may not use this file except in compliance 9 | # * with the License. You may obtain a copy of the License at 10 | # * 11 | # * http://www.apache.org/licenses/LICENSE-2.0 12 | # * 13 | # * Unless required by applicable law or agreed to in writing, software 14 | # * distributed under the License is distributed on an "AS IS" BASIS, 15 | # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # * See the License for the specific language governing permissions and 17 | # * limitations under the License. 18 | # */ 19 | 20 | # included in all the hbase connector scripts with source command 21 | # should not be executable directly 22 | # also should not be passed any arguments, since we need original $* 23 | # Modelled after $HADOOP_HOME/bin/hadoop-env.sh. 24 | 25 | # resolve links - "${BASH_SOURCE-$0}" may be a softlink 26 | 27 | this="${BASH_SOURCE-$0}" 28 | while [ -h "$this" ]; do 29 | ls=`ls -ld "$this"` 30 | link=`expr "$ls" : '.*-> \(.*\)$'` 31 | if expr "$link" : '.*/.*' > /dev/null; then 32 | this="$link" 33 | else 34 | this=`dirname "$this"`/"$link" 35 | fi 36 | done 37 | 38 | # convert relative path to absolute path 39 | bin=`dirname "$this"` 40 | script=`basename "$this"` 41 | bin=`cd "$bin">/dev/null; pwd` 42 | this="$bin/$script" 43 | 44 | # the root of the hbase connector installation 45 | if [ -z "$HBASE_CONNECTOR_HOME" ]; then 46 | export HBASE_CONNECTOR_HOME=`dirname "$this"`/.. 47 | fi 48 | 49 | #check to see if the conf dir or hbase home are given as an optional arguments 50 | while [ $# -gt 1 ] 51 | do 52 | if [ "--config" = "$1" ] 53 | then 54 | shift 55 | confdir=$1 56 | shift 57 | HBASE_CONF_DIR=$confdir 58 | elif [ "--autostart-window-size" = "$1" ] 59 | then 60 | shift 61 | AUTOSTART_WINDOW_SIZE=$(( $1 + 0 )) 62 | if [ $AUTOSTART_WINDOW_SIZE -lt 0 ]; then 63 | echo "Invalid value for --autostart-window-size, should be a positive integer" 64 | exit 1 65 | fi 66 | shift 67 | elif [ "--autostart-window-retry-limit" = "$1" ] 68 | then 69 | shift 70 | AUTOSTART_WINDOW_RETRY_LIMIT=$(( $1 + 0 )) 71 | if [ $AUTOSTART_WINDOW_RETRY_LIMIT -lt 0 ]; then 72 | echo "Invalid value for --autostart-window-retry-limit, should be a positive integer" 73 | exit 1 74 | fi 75 | shift 76 | elif [ "--internal-classpath" = "$1" ] 77 | then 78 | shift 79 | # shellcheck disable=SC2034 80 | INTERNAL_CLASSPATH="true" 81 | elif [ "--debug" = "$1" ] 82 | then 83 | shift 84 | # shellcheck disable=SC2034 85 | DEBUG="true" 86 | else 87 | # Presume we are at end of options and break 88 | break 89 | fi 90 | done 91 | 92 | 93 | 94 | # Allow alternate hbase connector conf dir location. 95 | HBASE_CONNECTOR_CONF_DIR="${HBASE_CONNECTOR_CONF_DIR:-$HBASE_CONNECTOR_HOME/conf}" 96 | 97 | 98 | if [ -n "$HBASE_CONNECTOR_JMX_BASE" ] && [ -z "$HBASE_CONNECTOR_JMX_OPTS" ]; then 99 | HBASE_CONNECTOR_JMX_OPTS="$HBASE_CONNECTOR_JMX_BASE" 100 | fi 101 | 102 | 103 | # Source the hbase-connector-env.sh only if it has not already been done. HBASE_CONNECTOR_ENV_INIT keeps track of it. 104 | if [ -z "$HBASE_CONNECTOR_ENV_INIT" ] && [ -f "${HBASE_CONNECTOR_CONF_DIR}/hbase-connector-env.sh" ]; then 105 | . "${HBASE_CONNECTOR_CONF_DIR}/hbase-connector-env.sh" 106 | export HBASE_CONNECTOR_ENV_INIT="true" 107 | fi 108 | 109 | # Newer versions of glibc use an arena memory allocator that causes virtual 110 | # memory usage to explode. Tune the variable down to prevent vmem explosion. 111 | export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-4} 112 | 113 | 114 | # Now having JAVA_HOME defined is required 115 | if [ -z "$JAVA_HOME" ]; then 116 | cat 1>&2 < http://www.oracle.com/technetwork/java/javase/downloads | 122 | | | 123 | | HBase Connectors requires Java 1.8 or later. | 124 | +======================================================================+ 125 | EOF 126 | exit 1 127 | fi 128 | -------------------------------------------------------------------------------- /conf/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Define some default values that can be overridden by system properties 18 | hbase.connector.root.logger=INFO,console 19 | hbase.connector.log.dir=. 20 | hbase.connector.log.file=hbase-connector.log 21 | hbase.connector.log.level=INFO 22 | 23 | # Define the root logger to the system property "hbase.connector.root.logger". 24 | log4j.rootLogger=${hbase.connector.root.logger} 25 | 26 | # Logging Threshold 27 | log4j.threshold=ALL 28 | 29 | # 30 | # Daily Rolling File Appender 31 | # 32 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender 33 | log4j.appender.DRFA.File=${hbase.connector.log.dir}/${hbase.connector.log.file} 34 | 35 | # Rollver at midnight 36 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd 37 | 38 | # 30-day backup 39 | #log4j.appender.DRFA.MaxBackupIndex=30 40 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout 41 | 42 | # Pattern format: Date LogLevel LoggerName LogMessage 43 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %.1000m%n 44 | 45 | # Rolling File Appender properties 46 | hbase.connector.log.maxfilesize=256MB 47 | hbase.connector.log.maxbackupindex=20 48 | 49 | # Rolling File Appender 50 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 51 | log4j.appender.RFA.File=${hbase.connector.log.dir}/${hbase.connector.log.file} 52 | 53 | log4j.appender.RFA.MaxFileSize=${hbase.connector.log.maxfilesize} 54 | log4j.appender.RFA.MaxBackupIndex=${hbase.connector.log.maxbackupindex} 55 | 56 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 57 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %.1000m%n 58 | 59 | 60 | # 61 | # Null Appender 62 | # 63 | log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender 64 | 65 | # 66 | # console 67 | # Add "console" to rootlogger above if you want to use this 68 | # 69 | log4j.appender.console=org.apache.log4j.ConsoleAppender 70 | log4j.appender.console.target=System.err 71 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 72 | log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %.1000m%n 73 | 74 | log4j.appender.asyncconsole=org.apache.hadoop.hbase.AsyncConsoleAppender 75 | log4j.appender.asyncconsole.target=System.err 76 | 77 | # Custom Logging levels 78 | 79 | 80 | 81 | log4j.logger.org.apache.hadoop.hbase.kafka=INFO 82 | 83 | #this is a debugging tool 84 | log4j.logger.org.apache.hadoop.hbase.kafka.DumpToStringListener=DEBUG 85 | 86 | 87 | 88 | log4j.logger.org.apache.hadoop.metrics2.impl.MetricsConfig=WARN 89 | log4j.logger.org.apache.hadoop.metrics2.impl.MetricsSinkAdapter=WARN 90 | log4j.logger.org.apache.hadoop.metrics2.impl.MetricsSystemImpl=WARN 91 | -------------------------------------------------------------------------------- /dev-support/.scalafmt.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Template based off apache spark: https://github.com/apache/spark/blob/master/dev/.scalafmt.conf 19 | # Align settings 20 | align = none 21 | align.openParenDefnSite = false 22 | align.openParenCallSite = false 23 | align.tokens = [] 24 | 25 | # Rewrites 26 | rewrite.rules = [Imports] 27 | 28 | # Imports 29 | rewrite.imports.sort = scalastyle 30 | rewrite.imports.groups = [ 31 | [".*"], 32 | ["org.apache.hbase.thirdparty\\..*"], 33 | ["org.apache.hadoop.hbase.shaded\\..*"] 34 | ] 35 | rewrite.imports.contiguousGroups = no 36 | importSelectors = "singleLine" 37 | 38 | # Newlines 39 | newlines.beforeCurlyLambdaParams = multiline 40 | newlines.afterCurlyLambdaParams = squash 41 | danglingParentheses.preset = false 42 | optIn.configStyleArguments = false 43 | 44 | # Scaladoc 45 | docstrings.style = Asterisk 46 | # See https://github.com/scalameta/scalafmt/issues/1387 47 | docstrings.wrap = no 48 | 49 | # Max column 50 | maxColumn = 100 51 | 52 | # Version 53 | runner.dialect = scala212 54 | version = 3.7.12 55 | -------------------------------------------------------------------------------- /dev-support/code-coverage/README.md: -------------------------------------------------------------------------------- 1 | 18 | 19 | # Code analysis 20 | 21 | The `run-coverage.sh` script runs maven with the coverage profile which generates the test coverage data for both java 22 | and scala classes. 23 | If the required parameters are given it also runs the sonar analysis and uploads the results to the given SonarQube 24 | Server. 25 | 26 | ## Running code analysis 27 | 28 | After running the script the code coverage results are generated under the `test-reporting/target/code-coverage/` 29 | folder. 30 | The JaCoCo code coverage library generated reports can be found under the `jacoco-reports` folder and the SCoverage 31 | generated results can be found under the `scoverage-reports` folder. 32 | 33 | Here is how you can generate the code coverage reports: 34 | 35 | ```./dev-support/code-coverage/run-coverage.sh``` 36 | 37 | ## Publishing coverage results to SonarQube 38 | 39 | The required parameters for publishing the results to SonarQube are: 40 | 41 | - host URL, 42 | - login credentials, 43 | - project key 44 | 45 | The project name is an optional parameter. 46 | 47 | Here is an example command for running and publishing the coverage data: 48 | 49 | ```./dev-support/code-coverage/run-coverage.sh -l ProjectCredentials -u https://exampleserver.com -k Project_Key -n Project_Name``` 50 | -------------------------------------------------------------------------------- /dev-support/code-coverage/run-coverage.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | usage() { 20 | echo 21 | echo "options:" 22 | echo "-h Display help" 23 | echo "-u SonarQube Host URL" 24 | echo "-l SonarQube Login Credentials" 25 | echo "-k SonarQube Project Key" 26 | echo "-n SonarQube Project Name" 27 | echo 28 | echo "Important:" 29 | echo " The required parameters for publishing the coverage results to SonarQube:" 30 | echo " - Host URL" 31 | echo " - Login Credentials" 32 | echo " - Project Key" 33 | echo 34 | } 35 | 36 | execute() { 37 | SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" 38 | MAIN_POM="${SCRIPT_DIR}/../../pom.xml" 39 | 40 | mvn -B -e -f "$MAIN_POM" clean install -DskipTests -DskipShade -Pcoverage 41 | 42 | mvn -B -e -f "$MAIN_POM" package -fn -Pcoverage 43 | 44 | # If the required parameters are given, the code coverage results are uploaded to the SonarQube Server 45 | if [ -n "$SONAR_LOGIN" ] && [ -n "$SONAR_PROJECT_KEY" ] && [ -n "$SONAR_URL" ]; then 46 | mvn -B -e -Pcoverage sonar:sonar -Dsonar.host.url="$SONAR_URL" -Dsonar.login="$SONAR_LOGIN" \ 47 | -Dsonar.projectKey="$SONAR_PROJECT_KEY" -Dsonar.projectName="$SONAR_PROJECT_NAME" 48 | fi 49 | } 50 | 51 | while getopts ":u:l:k:n:h" option; do 52 | case $option in 53 | u) SONAR_URL=${OPTARG:-} ;; 54 | l) SONAR_LOGIN=${OPTARG:-} ;; 55 | k) SONAR_PROJECT_KEY=${OPTARG:-} ;; 56 | n) SONAR_PROJECT_NAME=${OPTARG:-} ;; 57 | h) # Display usage 58 | usage 59 | exit 60 | ;; 61 | \?) # Invalid option 62 | echo "Error: Invalid option" 63 | exit 64 | ;; 65 | esac 66 | done 67 | 68 | # Start code analysis 69 | execute 70 | -------------------------------------------------------------------------------- /dev-support/eclipse.importorder: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | #Organize Import Order 16 | 3=org.apache.hadoop.hbase.shaded 17 | 2=org.apache.hbase.thirdparty 18 | 1= 19 | 0=\# 20 | -------------------------------------------------------------------------------- /dev-support/jenkins/Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Dockerfile for hbase-connectors pre-commit build. 18 | # https://builds.apache.org/job/PreCommit-HBASE-CONNECTORS-Build 19 | 20 | FROM maven:3.6-jdk-8 21 | 22 | # hadolint ignore=DL3008 23 | RUN apt-get -q update && apt-get -q install --no-install-recommends -y \ 24 | git \ 25 | rsync \ 26 | shellcheck \ 27 | wget && \ 28 | apt-get clean && \ 29 | rm -rf /var/lib/apt/lists/* 30 | 31 | ### 32 | # Avoid out of memory errors in builds 33 | ### 34 | ENV MAVEN_OPTS -Xmx3g 35 | 36 | CMD ["/bin/bash"] 37 | 38 | ### 39 | # Everything past this point is either not needed for testing or breaks Yetus. 40 | # So tell Yetus not to read the rest of the file: 41 | # YETUS CUT HERE 42 | ### 43 | -------------------------------------------------------------------------------- /dev-support/jenkins/gather_machine_environment.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | set -e 20 | function usage { 21 | echo "Usage: ${0} /path/for/output/dir" 22 | echo "" 23 | echo " Gather info about a build machine that test harnesses should poll before running." 24 | echo " presumes you'll then archive the passed output dir." 25 | 26 | exit 1 27 | } 28 | 29 | if [ "$#" -lt 1 ]; then 30 | usage 31 | fi 32 | 33 | 34 | declare output=$1 35 | 36 | if [ ! -d "${output}" ] || [ ! -w "${output}" ]; then 37 | echo "Specified output directory must exist and be writable." >&2 38 | exit 1 39 | fi 40 | 41 | echo "getting machine specs, find in ${BUILD_URL}/artifact/${output}/" 42 | echo "JAVA_HOME: ${JAVA_HOME}" >"${output}/java_home" 2>&1 || true 43 | ls -l "${JAVA_HOME}" >"${output}/java_home_ls" 2>&1 || true 44 | echo "MAVEN_HOME: ${MAVEN_HOME}" >"${output}/mvn_home" 2>&1 || true 45 | mvn --offline --version >"${output}/mvn_version" 2>&1 || true 46 | cat /proc/cpuinfo >"${output}/cpuinfo" 2>&1 || true 47 | cat /proc/meminfo >"${output}/meminfo" 2>&1 || true 48 | cat /proc/diskstats >"${output}/diskstats" 2>&1 || true 49 | cat /sys/block/sda/stat >"${output}/sys-block-sda-stat" 2>&1 || true 50 | df -h >"${output}/df-h" 2>&1 || true 51 | ps -Aww >"${output}/ps-Aww" 2>&1 || true 52 | ifconfig -a >"${output}/ifconfig-a" 2>&1 || true 53 | lsblk -ta >"${output}/lsblk-ta" 2>&1 || true 54 | lsblk -fa >"${output}/lsblk-fa" 2>&1 || true 55 | ulimit -a >"${output}/ulimit-a" 2>&1 || true 56 | uptime >"${output}/uptime" 2>&1 || true 57 | hostname -a >"${output}/hostname-a" 2>&1 || true 58 | -------------------------------------------------------------------------------- /dev-support/jenkins/jenkins_precommit_github_yetus.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | set -e 20 | 21 | # place ourselves in the directory containing the hbase and yetus checkouts 22 | cd "$(dirname "$0")/../.." 23 | echo "executing from $(pwd)" 24 | 25 | if [[ "true" = "${DEBUG}" ]]; then 26 | set -x 27 | printenv 2>&1 | sort 28 | fi 29 | 30 | declare -i missing_env=0 31 | declare -a required_envs=( 32 | # these ENV variables define the required API with Jenkinsfile_GitHub 33 | "ARCHIVE_PATTERN_LIST" 34 | "BUILD_URL_ARTIFACTS" 35 | "DOCKERFILE" 36 | "GITHUB_PASSWORD" 37 | "GITHUB_USER" 38 | "PATCHDIR" 39 | "PLUGINS" 40 | "SET_JAVA_HOME" 41 | "SOURCEDIR" 42 | "YETUSDIR" 43 | "PERSONALITY" 44 | ) 45 | # Validate params 46 | for required_env in "${required_envs[@]}"; do 47 | if [ -z "${!required_env}" ]; then 48 | echo "[ERROR] Required environment variable '${required_env}' is not set." 49 | missing_env=${missing_env}+1 50 | fi 51 | done 52 | 53 | if [ ${missing_env} -gt 0 ]; then 54 | echo "[ERROR] Please set the required environment variables before invoking. If this error is " \ 55 | "on Jenkins, then please file a JIRA about the error." 56 | exit 1 57 | fi 58 | 59 | # TODO (HBASE-23900): cannot assume test-patch runs directly from sources 60 | TESTPATCHBIN="${YETUSDIR}/precommit/src/main/shell/test-patch.sh" 61 | 62 | # this must be clean for every run 63 | rm -rf "${PATCHDIR}" 64 | mkdir -p "${PATCHDIR}" 65 | 66 | # Gather machine information 67 | mkdir "${PATCHDIR}/machine" 68 | "${SOURCEDIR}/dev-support/jenkins/gather_machine_environment.sh" "${PATCHDIR}/machine" 69 | 70 | # enable debug output for yetus 71 | if [[ "true" = "${DEBUG}" ]]; then 72 | YETUS_ARGS+=("--debug") 73 | fi 74 | # If we're doing docker, make sure we don't accidentally pollute the image with a host java path 75 | if [ -n "${JAVA_HOME}" ]; then 76 | unset JAVA_HOME 77 | fi 78 | YETUS_ARGS+=("--patch-dir=${PATCHDIR}") 79 | # where the source is located 80 | YETUS_ARGS+=("--basedir=${SOURCEDIR}") 81 | YETUS_ARGS+=("--project=hbase-connectors") 82 | YETUS_ARGS+=("--personality=${PERSONALITY}") 83 | # lots of different output formats 84 | YETUS_ARGS+=("--brief-report-file=${PATCHDIR}/brief.txt") 85 | YETUS_ARGS+=("--console-report-file=${PATCHDIR}/console.txt") 86 | YETUS_ARGS+=("--html-report-file=${PATCHDIR}/report.html") 87 | # enable writing back to Github 88 | YETUS_ARGS+=("--github-password=${GITHUB_PASSWORD}") 89 | YETUS_ARGS+=("--github-user=${GITHUB_USER}") 90 | # auto-kill any surefire stragglers during unit test runs 91 | YETUS_ARGS+=("--reapermode=kill") 92 | # set relatively high limits for ASF machines 93 | # changing these to higher values may cause problems 94 | # with other jobs on systemd-enabled machines 95 | YETUS_ARGS+=("--dockermemlimit=20g") 96 | # -1 spotbugs issues that show up prior to the patch being applied 97 | YETUS_ARGS+=("--spotbugs-strict-precheck") 98 | # rsync these files back into the archive dir 99 | YETUS_ARGS+=("--archive-list=${ARCHIVE_PATTERN_LIST}") 100 | # URL for user-side presentation in reports and such to our artifacts 101 | YETUS_ARGS+=("--build-url-artifacts=${BUILD_URL_ARTIFACTS}") 102 | # plugins to enable 103 | YETUS_ARGS+=("--plugins=${PLUGINS}") 104 | YETUS_ARGS+=("--tests-filter=test4tests") 105 | # run in docker mode and specifically point to our 106 | # Dockerfile since we don't want to use the auto-pulled version. 107 | YETUS_ARGS+=("--docker") 108 | YETUS_ARGS+=("--dockerfile=${DOCKERFILE}") 109 | YETUS_ARGS+=("--mvn-custom-repos") 110 | YETUS_ARGS+=("--java-home=${SET_JAVA_HOME}") 111 | # effectively treat dev-support as a custom maven module 112 | YETUS_ARGS+=("--skip-dirs=dev-support") 113 | # help keep the ASF boxes clean 114 | YETUS_ARGS+=("--sentinel") 115 | # use emoji vote so it is easier to find the broken line 116 | YETUS_ARGS+=("--github-use-emoji-vote") 117 | YETUS_ARGS+=("--github-repo=apache/hbase-connectors") 118 | 119 | echo "Launching yetus with command line:" 120 | echo "${TESTPATCHBIN} ${YETUS_ARGS[*]}" 121 | 122 | /usr/bin/env bash "${TESTPATCHBIN}" "${YETUS_ARGS[@]}" 123 | -------------------------------------------------------------------------------- /dev-support/license-header: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | -------------------------------------------------------------------------------- /hbase-connectors-assembly/src/main/assembly/connector-components.xml: -------------------------------------------------------------------------------- 1 | 2 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | ${project.basedir}/../conf 28 | conf 29 | 0644 30 | 0755 31 | 32 | 33 | 34 | 35 | ${project.basedir}/../bin 36 | bin 37 | 38 | hbase-connectors 39 | hbase-connectors-config.sh 40 | hbase-connectors-daemon.sh 41 | 42 | 0755 43 | 0755 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /hbase-connectors-assembly/src/main/assembly/hbase-connectors-bin.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 24 | bin 25 | 26 | tar.gz 27 | 28 | hbase-connectors-${revision} 29 | 30 | src/main/assembly/connector-components.xml 31 | 32 | 33 | 34 | true 35 | 36 | org.apache.hbase.connectors.spark:hbase-spark-it 37 | 38 | 39 | false 40 | lib 41 | 42 | 43 | 44 | org.apache.yetus:audience-annotations 45 | org.slf4j:slf4j-api 46 | org.slf4j:slf4j-log4j12 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | ${project.build.directory}/maven-shared-archive-resources/META-INF/LICENSE 56 | . 57 | LICENSE.txt 58 | unix 59 | 60 | 61 | ${project.build.directory}/maven-shared-archive-resources/META-INF/NOTICE 62 | . 63 | NOTICE.txt 64 | unix 65 | 66 | 67 | ${basedir}/src/main/resources/META-INF/LEGAL 68 | . 69 | LEGAL 70 | unix 71 | 72 | 73 | ../README.md 74 | ${file.separator} 75 | 76 | 77 | ../CHANGELOG.md 78 | ${file.separator} 79 | 80 | 81 | ../RELEASENOTES.md 82 | ${file.separator} 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /hbase-connectors-assembly/src/main/resources/META-INF/LEGAL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/hbase-connectors/6544291a4c87a8b8c99bcc05dd64f4692e87f1f3/hbase-connectors-assembly/src/main/resources/META-INF/LEGAL -------------------------------------------------------------------------------- /hbase-connectors-assembly/src/main/resources/supplemental-models.xml: -------------------------------------------------------------------------------- 1 | 2 | 20 | 23 | 24 | 25 | 26 | 27 | 28 | javax.xml.stream 29 | stax-api 30 | 31 | 32 | CDDL 1.1 33 | https://github.com/javaee/activation/blob/master/LICENSE.txt 34 | repo 35 | 36 | 37 | 38 | 39 | 40 | 41 | org.codehaus.jettison 42 | jettison 43 | 1.1 44 | 45 | 46 | Apache License, Version 2.0 47 | http://www.apache.org/licenses/LICENSE-2.0.txt 48 | repo 49 | 50 | 51 | 52 | 53 | 54 | 55 | org.bouncycastle 56 | bcprov-jdk18on 57 | 1.78.1 58 | 59 | 60 | 61 | MIT License 62 | http://www.opensource.org/licenses/mit-license.php 63 | repo 64 | 65 | Copyright (c) 2000 - 2018 The Legion of the Bouncy Castle Inc. (https://www.bouncycastle.org) 66 | 67 | 68 | 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /kafka/conf/kafka-route-rules.xml: -------------------------------------------------------------------------------- 1 | 20 | 21 | 22 | 64 | 65 | -------------------------------------------------------------------------------- /kafka/hbase-kafka-model/src/main/avro/HbaseKafkaEvent.avro: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | *

10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | *

12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | {"namespace": "org.apache.hadoop.hbase.kafka", 20 | "type": "record", 21 | "name": "HBaseKafkaEvent", 22 | "fields": [ 23 | {"name": "key", "type": "bytes"}, 24 | {"name": "timestamp", "type": "long" }, 25 | {"name": "delete", "type": "boolean" }, 26 | {"name": "value", "type": "bytes"}, 27 | {"name": "qualifier", "type": "bytes"}, 28 | {"name": "family", "type": "bytes"}, 29 | {"name": "table", "type": "bytes"} 30 | ] 31 | } 32 | -------------------------------------------------------------------------------- /kafka/hbase-kafka-proxy/src/main/java/org/apache/hadoop/hbase/kafka/DropRule.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.kafka; 19 | 20 | import org.apache.yetus.audience.InterfaceAudience; 21 | 22 | /** 23 | * Rule that indicates the Cell should not be replicated 24 | */ 25 | @InterfaceAudience.Private 26 | public class DropRule extends Rule { 27 | public DropRule() { 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /kafka/hbase-kafka-proxy/src/main/java/org/apache/hadoop/hbase/kafka/DumpToStringListener.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.kafka; 19 | 20 | import java.time.Duration; 21 | import java.util.Arrays; 22 | import java.util.Iterator; 23 | import java.util.Properties; 24 | import java.util.stream.Collectors; 25 | import org.apache.avro.io.BinaryDecoder; 26 | import org.apache.avro.io.DecoderFactory; 27 | import org.apache.avro.specific.SpecificDatumReader; 28 | import org.apache.hadoop.hbase.util.Bytes; 29 | import org.apache.hadoop.hbase.util.VersionInfo; 30 | import org.apache.kafka.clients.consumer.ConsumerRecord; 31 | import org.apache.kafka.clients.consumer.ConsumerRecords; 32 | import org.apache.kafka.clients.consumer.KafkaConsumer; 33 | import org.apache.kafka.common.serialization.ByteArrayDeserializer; 34 | import org.apache.yetus.audience.InterfaceAudience; 35 | import org.slf4j.Logger; 36 | import org.slf4j.LoggerFactory; 37 | 38 | import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; 39 | import org.apache.hbase.thirdparty.org.apache.commons.cli.DefaultParser; 40 | import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter; 41 | import org.apache.hbase.thirdparty.org.apache.commons.cli.Options; 42 | import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException; 43 | 44 | /** 45 | * connects to kafka and reads from the passed in topics. Parses each message into an avro object 46 | * and dumps it to the console. 47 | */ 48 | @InterfaceAudience.Private 49 | public final class DumpToStringListener { 50 | private static final Logger LOG = LoggerFactory.getLogger(DumpToStringListener.class); 51 | 52 | private DumpToStringListener() { 53 | } 54 | 55 | public static void main(String[] args) { 56 | LOG.info("***** STARTING service '" + DumpToStringListener.class.getSimpleName() + "' *****"); 57 | VersionInfo.logVersion(); 58 | 59 | Options options = new Options(); 60 | options.addRequiredOption("k", "kafkabrokers", true, "Kafka Brokers " + "(comma delimited)"); 61 | options.addRequiredOption("t", "kafkatopics", true, 62 | "Kafka Topics " + "to subscribe to (comma delimited)"); 63 | CommandLine commandLine = null; 64 | 65 | try { 66 | commandLine = new DefaultParser().parse(options, args); 67 | } catch (ParseException e) { 68 | LOG.error("Could not parse: ", e); 69 | printUsageAndExit(options, -1); 70 | } 71 | 72 | SpecificDatumReader dreader = 73 | new SpecificDatumReader<>(HBaseKafkaEvent.SCHEMA$); 74 | 75 | String topic = commandLine.getOptionValue('t'); 76 | Properties props = new Properties(); 77 | props.put("bootstrap.servers", commandLine.getOptionValue('k')); 78 | props.put("group.id", "hbase kafka test tool"); 79 | props.put("key.deserializer", ByteArrayDeserializer.class.getName()); 80 | props.put("value.deserializer", ByteArrayDeserializer.class.getName()); 81 | 82 | try (KafkaConsumer consumer = new KafkaConsumer<>(props)) { 83 | consumer.subscribe(Arrays.stream(topic.split(",")).collect(Collectors.toList())); 84 | 85 | while (true) { 86 | ConsumerRecords records = consumer.poll(Duration.ofMillis(10000)); 87 | Iterator> it = records.iterator(); 88 | while (it.hasNext()) { 89 | ConsumerRecord record = it.next(); 90 | BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(record.value(), null); 91 | try { 92 | HBaseKafkaEvent event = dreader.read(null, decoder); 93 | LOG.debug("key :" + Bytes.toString(record.key()) + " value " + event); 94 | } catch (Exception e) { 95 | throw new RuntimeException(e); 96 | } 97 | } 98 | } 99 | } 100 | } 101 | 102 | private static void printUsageAndExit(Options options, int exitCode) { 103 | HelpFormatter formatter = new HelpFormatter(); 104 | formatter.printHelp("hbase " + DumpToStringListener.class.getName(), "", options, 105 | "\n[--kafkabrokers ] " 106 | + "[-k ] \n", 107 | true); 108 | System.exit(exitCode); 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /kafka/hbase-kafka-proxy/src/main/java/org/apache/hadoop/hbase/kafka/TopicRule.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.kafka; 19 | 20 | import java.util.Arrays; 21 | import java.util.HashSet; 22 | import java.util.Set; 23 | import java.util.stream.Collectors; 24 | import org.apache.yetus.audience.InterfaceAudience; 25 | 26 | /** 27 | * If the Cell matches the rule returns the configured topics. 28 | */ 29 | @InterfaceAudience.Private 30 | public class TopicRule extends Rule { 31 | private Set topics = new HashSet<>(); 32 | 33 | public TopicRule(String topics) { 34 | this.topics.addAll(Arrays.stream(topics.split(",")).collect(Collectors.toList())); 35 | } 36 | 37 | public Set getTopics() { 38 | return topics; 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /kafka/hbase-kafka-proxy/src/test/java/org/apache/hadoop/hbase/kafka/ProducerForTesting.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.kafka; 19 | 20 | import java.util.ArrayList; 21 | import java.util.HashMap; 22 | import java.util.List; 23 | import java.util.Map; 24 | import java.util.concurrent.Future; 25 | import org.apache.avro.io.BinaryDecoder; 26 | import org.apache.avro.io.DecoderFactory; 27 | import org.apache.avro.specific.SpecificDatumReader; 28 | import org.apache.kafka.clients.producer.MockProducer; 29 | import org.apache.kafka.clients.producer.ProducerRecord; 30 | import org.apache.kafka.clients.producer.RecordMetadata; 31 | import org.apache.kafka.test.MockSerializer; 32 | 33 | /** 34 | * Mocks Kafka producer for testing 35 | */ 36 | public class ProducerForTesting extends MockProducer { 37 | Map> messages = new HashMap<>(); 38 | SpecificDatumReader dreader = new SpecificDatumReader<>(HBaseKafkaEvent.SCHEMA$); 39 | 40 | public ProducerForTesting() { 41 | super(true, new MockSerializer(), new MockSerializer()); 42 | } 43 | 44 | public Map> getMessages() { 45 | return messages; 46 | } 47 | 48 | @Override 49 | public Future send(ProducerRecord producerRecord) { 50 | try { 51 | BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(producerRecord.value(), null); 52 | HBaseKafkaEvent event = dreader.read(null, decoder); 53 | if (!messages.containsKey(producerRecord.topic())) { 54 | messages.put(producerRecord.topic(), new ArrayList<>()); 55 | } 56 | messages.get(producerRecord.topic()).add(event); 57 | return super.send(producerRecord); 58 | } catch (Exception e) { 59 | throw new RuntimeException(e); 60 | } 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /kafka/hbase-kafka-proxy/src/test/java/org/apache/hadoop/hbase/kafka/TestProcessMutations.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.kafka; 19 | 20 | import java.io.ByteArrayInputStream; 21 | import java.nio.charset.StandardCharsets; 22 | import java.util.ArrayList; 23 | import java.util.List; 24 | import org.apache.hadoop.conf.Configuration; 25 | import org.apache.hadoop.hbase.TableName; 26 | import org.apache.hadoop.hbase.client.Put; 27 | import org.apache.hadoop.hbase.client.Row; 28 | import org.apache.hadoop.hbase.client.Table; 29 | import org.apache.hadoop.hbase.testclassification.SmallTests; 30 | import org.junit.Assert; 31 | import org.junit.Before; 32 | import org.junit.Test; 33 | import org.junit.experimental.categories.Category; 34 | 35 | /** 36 | * Test that mutations are getting published to the topic 37 | */ 38 | @Category(SmallTests.class) 39 | public class TestProcessMutations { 40 | private static final String ROUTE_RULE1 = 41 | ""; 42 | 43 | ProducerForTesting myTestingProducer; 44 | 45 | @Before 46 | public void setup() { 47 | this.myTestingProducer = new ProducerForTesting(); 48 | } 49 | 50 | @Test 51 | public void testSendMessage() { 52 | TopicRoutingRules rules = new TopicRoutingRules(); 53 | try { 54 | rules.parseRules(new ByteArrayInputStream(ROUTE_RULE1.getBytes(StandardCharsets.UTF_8))); 55 | Configuration conf = new Configuration(); 56 | KafkaBridgeConnection connection = new KafkaBridgeConnection(conf, rules, myTestingProducer); 57 | long zeTimestamp = System.currentTimeMillis(); 58 | Put put = new Put("key1".getBytes(StandardCharsets.UTF_8), zeTimestamp); 59 | put.addColumn("FAMILY".getBytes(StandardCharsets.UTF_8), 60 | "not foo".getBytes(StandardCharsets.UTF_8), 61 | "VALUE should NOT pass".getBytes(StandardCharsets.UTF_8)); 62 | put.addColumn("FAMILY".getBytes(StandardCharsets.UTF_8), 63 | "foo".getBytes(StandardCharsets.UTF_8), 64 | "VALUE should pass".getBytes(StandardCharsets.UTF_8)); 65 | Table myTable = connection.getTable(TableName.valueOf("MyNamespace:MyTable")); 66 | List rows = new ArrayList<>(); 67 | rows.add(put); 68 | myTable.batch(rows, new Object[0]); 69 | 70 | Assert.assertFalse(myTestingProducer.getMessages().isEmpty()); 71 | } catch (Exception e) { 72 | Assert.fail(e.getMessage()); 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /kafka/hbase-kafka-proxy/src/test/java/org/apache/hadoop/hbase/kafka/TestQualifierMatching.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.kafka; 19 | 20 | import java.nio.charset.StandardCharsets; 21 | import org.apache.hadoop.hbase.testclassification.SmallTests; 22 | import org.junit.Assert; 23 | import org.junit.Test; 24 | import org.junit.experimental.categories.Category; 25 | 26 | /** 27 | * Make sure match rules work 28 | */ 29 | @Category(SmallTests.class) 30 | public class TestQualifierMatching { 31 | 32 | @Test 33 | public void testMatchQualfier() { 34 | DropRule rule = new DropRule(); 35 | rule.setQualifier("data".getBytes(StandardCharsets.UTF_8)); 36 | Assert.assertTrue(rule.qualifierMatch("data".getBytes(StandardCharsets.UTF_8))); 37 | 38 | rule = new DropRule(); 39 | rule.setQualifier("data1".getBytes(StandardCharsets.UTF_8)); 40 | Assert.assertFalse(rule.qualifierMatch("data".getBytes(StandardCharsets.UTF_8))); 41 | 42 | // if not set, it is a wildcard 43 | rule = new DropRule(); 44 | Assert.assertTrue(rule.qualifierMatch("data".getBytes(StandardCharsets.UTF_8))); 45 | } 46 | 47 | @Test 48 | public void testStartWithQualifier() { 49 | DropRule rule = new DropRule(); 50 | rule.setQualifier("data*".getBytes(StandardCharsets.UTF_8)); 51 | Assert.assertTrue(rule.isQualifierStartsWith()); 52 | Assert.assertFalse(rule.isQualifierEndsWith()); 53 | 54 | Assert.assertTrue(rule.qualifierMatch("data".getBytes(StandardCharsets.UTF_8))); 55 | Assert.assertTrue(rule.qualifierMatch("data1".getBytes(StandardCharsets.UTF_8))); 56 | Assert.assertTrue(rule.qualifierMatch("datafoobar".getBytes(StandardCharsets.UTF_8))); 57 | Assert.assertFalse(rule.qualifierMatch("datfoobar".getBytes(StandardCharsets.UTF_8))); 58 | Assert.assertFalse(rule.qualifierMatch("d".getBytes(StandardCharsets.UTF_8))); 59 | Assert.assertFalse(rule.qualifierMatch("".getBytes(StandardCharsets.UTF_8))); 60 | } 61 | 62 | @Test 63 | public void testEndsWithQualifier() { 64 | DropRule rule = new DropRule(); 65 | rule.setQualifier("*data".getBytes(StandardCharsets.UTF_8)); 66 | Assert.assertFalse(rule.isQualifierStartsWith()); 67 | Assert.assertTrue(rule.isQualifierEndsWith()); 68 | 69 | Assert.assertTrue(rule.qualifierMatch("data".getBytes(StandardCharsets.UTF_8))); 70 | Assert.assertTrue(rule.qualifierMatch("1data".getBytes(StandardCharsets.UTF_8))); 71 | Assert.assertTrue(rule.qualifierMatch("foobardata".getBytes(StandardCharsets.UTF_8))); 72 | Assert.assertFalse(rule.qualifierMatch("foobardat".getBytes(StandardCharsets.UTF_8))); 73 | Assert.assertFalse(rule.qualifierMatch("d".getBytes(StandardCharsets.UTF_8))); 74 | Assert.assertFalse(rule.qualifierMatch("".getBytes(StandardCharsets.UTF_8))); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /kafka/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 22 | 4.0.0 23 | 24 | org.apache.hbase.connectors 25 | hbase-connectors 26 | ${revision} 27 | ../ 28 | 29 | kafka 30 | pom 31 | Apache HBase - Kafka 32 | Kafka Proxy for Apache HBase 33 | 34 | hbase-kafka-model 35 | hbase-kafka-proxy 36 | 37 | 38 | ${hbase-hadoop2.version} 39 | 40 | 41 | 42 | 43 | org.apache.avro 44 | avro 45 | ${avro.version} 46 | 47 | 48 | org.apache.hbase.connectors.kafka 49 | hbase-kafka-model 50 | ${revision} 51 | 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /spark/README.md: -------------------------------------------------------------------------------- 1 | 18 | 19 | # Apache HBase™ Spark Connector 20 | 21 | ## Spark, Scala and Configurable Options 22 | 23 | To generate an artifact for a different [Spark version](https://mvnrepository.com/artifact/org.apache.spark/spark-core) and/or [Scala version](https://www.scala-lang.org/download/all.html), 24 | [Hadoop version](https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-core), or [HBase version](https://mvnrepository.com/artifact/org.apache.hbase/hbase), pass command-line options as follows (changing version numbers appropriately): 25 | 26 | ``` 27 | $ mvn -Dspark.version=3.1.2 -Dscala.version=2.12.10 -Dhadoop-three.version=3.2.0 -Dscala.binary.version=2.12 -Dhbase.version=2.4.8 clean install 28 | ``` 29 | 30 | ## Configuration and Installation 31 | **Client-side** (Spark) configuration: 32 | - The HBase configuration file `hbase-site.xml` should be made available to Spark, it can be copied to `$SPARK_CONF_DIR` (default is $SPARK_HOME/conf`) 33 | 34 | **Server-side** (HBase region servers) configuration: 35 | - The following jars need to be in the CLASSPATH of the HBase region servers: 36 | - scala-library, hbase-spark, and hbase-spark-protocol-shaded. 37 | - The server-side configuration is needed for column filter pushdown 38 | - if you cannot perform the server-side configuration, consider using `.option("hbase.spark.pushdown.columnfilter", false)` 39 | - The Scala library version must match the Scala version (2.11 or 2.12) used for compiling the connector. 40 | -------------------------------------------------------------------------------- /spark/hbase-spark-it/src/test/resources/hbase-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 23 | 24 | 25 | hbase.defaults.for.version.skip 26 | true 27 | 28 | 29 | hbase.hconnection.threads.keepalivetime 30 | 3 31 | 32 | 33 | -------------------------------------------------------------------------------- /spark/hbase-spark-protocol-shaded/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 21 | 22 | 4.0.0 23 | 24 | 25 | org.apache.hbase.connectors 26 | spark 27 | ${revision} 28 | ../ 29 | 30 | 31 | org.apache.hbase.connectors.spark 32 | hbase-spark-protocol-shaded 33 | Apache HBase - Spark Protocol (Shaded) 34 | 35 | 36 | 41 | 42 | org.apache.hbase.connectors.spark 43 | hbase-spark-protocol 44 | true 45 | 46 | 47 | org.apache.hbase.thirdparty 48 | hbase-shaded-protobuf 49 | ${hbase-thirdparty.version} 50 | 51 | 52 | 53 | 54 | 55 | 56 | org.apache.maven.plugins 57 | maven-shade-plugin 58 | 59 | 60 | 61 | shade 62 | 63 | package 64 | 65 | true 66 | true 67 | 69 | false 70 | 71 | 72 | com.google.protobuf 73 | org.apache.hbase.thirdparty.com.google.protobuf 74 | 75 | 76 | 77 | 78 | com.google.protobuf:protobuf-java 79 | org.apache.hbase.thirdparty:* 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /spark/hbase-spark-protocol/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 21 | 22 | 4.0.0 23 | 24 | 25 | org.apache.hbase.connectors 26 | spark 27 | ${revision} 28 | ../ 29 | 30 | 31 | org.apache.hbase.connectors.spark 32 | hbase-spark-protocol 33 | Apache HBase - Spark Protocol 34 | 35 | 36 | 37 | com.google.protobuf 38 | protobuf-java 39 | ${thirdparty.protobuf.version} 40 | 41 | 42 | 43 | 44 | 45 | 46 | org.apache.maven.plugins 47 | maven-compiler-plugin 48 | 49 | 50 | org.xolstice.maven.plugins 51 | protobuf-maven-plugin 52 | 53 | 54 | compile-protoc 55 | 56 | compile 57 | 58 | generate-sources 59 | 60 | 61 | 62 | 63 | org.apache.maven.plugins 64 | maven-source-plugin 65 | 66 | 67 | attach-sources 68 | 69 | jar 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /spark/hbase-spark-protocol/src/main/protobuf/SparkFilter.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | // This file contains protocol buffers that are used for Spark filters 20 | // over in the hbase-spark module 21 | package hbase.pb; 22 | 23 | option java_package = "org.apache.hadoop.hbase.spark.protobuf.generated"; 24 | option java_outer_classname = "SparkFilterProtos"; 25 | option java_generic_services = true; 26 | option java_generate_equals_and_hash = true; 27 | option optimize_for = SPEED; 28 | 29 | message SQLPredicatePushDownCellToColumnMapping { 30 | required bytes column_family = 1; 31 | required bytes qualifier = 2; 32 | required string column_name = 3; 33 | } 34 | 35 | message SQLPredicatePushDownFilter { 36 | required string dynamic_logic_expression = 1; 37 | repeated bytes value_from_query_array = 2; 38 | repeated SQLPredicatePushDownCellToColumnMapping cell_to_column_mapping = 3; 39 | optional string encoderClassName = 4; 40 | } 41 | -------------------------------------------------------------------------------- /spark/hbase-spark/README.md: -------------------------------------------------------------------------------- 1 | 18 | 19 | ##ON PROTOBUFS 20 | This maven module has core protobuf definition files ('.protos') used by hbase 21 | Spark that ship with hbase core including tests. 22 | 23 | Generation of java files from protobuf .proto files included here is done as 24 | part of the build. 25 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseBulkDeleteExample.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.example.hbasecontext; 19 | 20 | import java.util.ArrayList; 21 | import java.util.List; 22 | import org.apache.hadoop.conf.Configuration; 23 | import org.apache.hadoop.hbase.HBaseConfiguration; 24 | import org.apache.hadoop.hbase.TableName; 25 | import org.apache.hadoop.hbase.client.Delete; 26 | import org.apache.hadoop.hbase.spark.JavaHBaseContext; 27 | import org.apache.hadoop.hbase.util.Bytes; 28 | import org.apache.spark.SparkConf; 29 | import org.apache.spark.api.java.JavaRDD; 30 | import org.apache.spark.api.java.JavaSparkContext; 31 | import org.apache.spark.api.java.function.Function; 32 | import org.apache.yetus.audience.InterfaceAudience; 33 | 34 | /** 35 | * This is a simple example of deleting records in HBase with the bulkDelete function. 36 | */ 37 | @InterfaceAudience.Private 38 | final public class JavaHBaseBulkDeleteExample { 39 | 40 | private JavaHBaseBulkDeleteExample() { 41 | } 42 | 43 | public static void main(String[] args) { 44 | if (args.length < 1) { 45 | System.out.println("JavaHBaseBulkDeleteExample {tableName}"); 46 | return; 47 | } 48 | 49 | String tableName = args[0]; 50 | 51 | SparkConf sparkConf = new SparkConf().setAppName("JavaHBaseBulkDeleteExample " + tableName); 52 | JavaSparkContext jsc = new JavaSparkContext(sparkConf); 53 | 54 | try { 55 | List list = new ArrayList<>(5); 56 | list.add(Bytes.toBytes("1")); 57 | list.add(Bytes.toBytes("2")); 58 | list.add(Bytes.toBytes("3")); 59 | list.add(Bytes.toBytes("4")); 60 | list.add(Bytes.toBytes("5")); 61 | 62 | JavaRDD rdd = jsc.parallelize(list); 63 | 64 | Configuration conf = HBaseConfiguration.create(); 65 | 66 | JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf); 67 | 68 | hbaseContext.bulkDelete(rdd, TableName.valueOf(tableName), new DeleteFunction(), 4); 69 | } finally { 70 | jsc.stop(); 71 | } 72 | 73 | } 74 | 75 | public static class DeleteFunction implements Function { 76 | private static final long serialVersionUID = 1L; 77 | 78 | public Delete call(byte[] v) throws Exception { 79 | return new Delete(v); 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseBulkGetExample.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.example.hbasecontext; 19 | 20 | import java.util.ArrayList; 21 | import java.util.Iterator; 22 | import java.util.List; 23 | import org.apache.hadoop.conf.Configuration; 24 | import org.apache.hadoop.hbase.Cell; 25 | import org.apache.hadoop.hbase.HBaseConfiguration; 26 | import org.apache.hadoop.hbase.TableName; 27 | import org.apache.hadoop.hbase.client.Get; 28 | import org.apache.hadoop.hbase.client.Result; 29 | import org.apache.hadoop.hbase.spark.JavaHBaseContext; 30 | import org.apache.hadoop.hbase.util.Bytes; 31 | import org.apache.spark.SparkConf; 32 | import org.apache.spark.api.java.JavaRDD; 33 | import org.apache.spark.api.java.JavaSparkContext; 34 | import org.apache.spark.api.java.function.Function; 35 | import org.apache.yetus.audience.InterfaceAudience; 36 | 37 | /** 38 | * This is a simple example of getting records in HBase with the bulkGet function. 39 | */ 40 | @InterfaceAudience.Private 41 | final public class JavaHBaseBulkGetExample { 42 | 43 | private JavaHBaseBulkGetExample() { 44 | } 45 | 46 | public static void main(String[] args) { 47 | if (args.length < 1) { 48 | System.out.println("JavaHBaseBulkGetExample {tableName}"); 49 | return; 50 | } 51 | 52 | String tableName = args[0]; 53 | 54 | SparkConf sparkConf = new SparkConf().setAppName("JavaHBaseBulkGetExample " + tableName); 55 | JavaSparkContext jsc = new JavaSparkContext(sparkConf); 56 | 57 | try { 58 | List list = new ArrayList<>(5); 59 | list.add(Bytes.toBytes("1")); 60 | list.add(Bytes.toBytes("2")); 61 | list.add(Bytes.toBytes("3")); 62 | list.add(Bytes.toBytes("4")); 63 | list.add(Bytes.toBytes("5")); 64 | 65 | JavaRDD rdd = jsc.parallelize(list); 66 | 67 | Configuration conf = HBaseConfiguration.create(); 68 | 69 | JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf); 70 | 71 | hbaseContext.bulkGet(TableName.valueOf(tableName), 2, rdd, new GetFunction(), 72 | new ResultFunction()); 73 | } finally { 74 | jsc.stop(); 75 | } 76 | } 77 | 78 | public static class GetFunction implements Function { 79 | 80 | private static final long serialVersionUID = 1L; 81 | 82 | public Get call(byte[] v) throws Exception { 83 | return new Get(v); 84 | } 85 | } 86 | 87 | public static class ResultFunction implements Function { 88 | 89 | private static final long serialVersionUID = 1L; 90 | 91 | public String call(Result result) throws Exception { 92 | Iterator it = result.listCells().iterator(); 93 | StringBuilder b = new StringBuilder(); 94 | 95 | b.append(Bytes.toString(result.getRow())).append(":"); 96 | 97 | while (it.hasNext()) { 98 | Cell cell = it.next(); 99 | String q = Bytes.toString(cell.getQualifierArray()); 100 | if (q.equals("counter")) { 101 | b.append("(").append(Bytes.toString(cell.getQualifierArray())).append(",") 102 | .append(Bytes.toLong(cell.getValueArray())).append(")"); 103 | } else { 104 | b.append("(").append(Bytes.toString(cell.getQualifierArray())).append(",") 105 | .append(Bytes.toString(cell.getValueArray())).append(")"); 106 | } 107 | } 108 | return b.toString(); 109 | } 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseBulkLoadExample.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.example.hbasecontext; 19 | 20 | import java.util.ArrayList; 21 | import java.util.HashMap; 22 | import java.util.List; 23 | import org.apache.hadoop.conf.Configuration; 24 | import org.apache.hadoop.hbase.HBaseConfiguration; 25 | import org.apache.hadoop.hbase.HConstants; 26 | import org.apache.hadoop.hbase.TableName; 27 | import org.apache.hadoop.hbase.spark.FamilyHFileWriteOptions; 28 | import org.apache.hadoop.hbase.spark.JavaHBaseContext; 29 | import org.apache.hadoop.hbase.spark.KeyFamilyQualifier; 30 | import org.apache.hadoop.hbase.util.Bytes; 31 | import org.apache.hadoop.hbase.util.Pair; 32 | import org.apache.spark.SparkConf; 33 | import org.apache.spark.api.java.JavaRDD; 34 | import org.apache.spark.api.java.JavaSparkContext; 35 | import org.apache.spark.api.java.function.Function; 36 | import org.apache.yetus.audience.InterfaceAudience; 37 | 38 | /** 39 | * Run this example using command below: SPARK_HOME/bin/spark-submit --master local[2] --class 40 | * org.apache.hadoop.hbase.spark.example.hbasecontext.JavaHBaseBulkLoadExample 41 | * path/to/hbase-spark.jar {path/to/output/HFiles} This example will output put hfiles in 42 | * {path/to/output/HFiles}, and user can run 'hbase 43 | * org.apache.hadoop.hbase.tool.LoadIncrementalHFiles' to load the HFiles into table to verify this 44 | * example. 45 | */ 46 | @InterfaceAudience.Private 47 | final public class JavaHBaseBulkLoadExample { 48 | private JavaHBaseBulkLoadExample() { 49 | } 50 | 51 | public static void main(String[] args) { 52 | if (args.length < 1) { 53 | System.out.println("JavaHBaseBulkLoadExample " + "{outputPath}"); 54 | return; 55 | } 56 | 57 | String tableName = "bulkload-table-test"; 58 | String columnFamily1 = "f1"; 59 | String columnFamily2 = "f2"; 60 | 61 | SparkConf sparkConf = new SparkConf().setAppName("JavaHBaseBulkLoadExample " + tableName); 62 | JavaSparkContext jsc = new JavaSparkContext(sparkConf); 63 | 64 | try { 65 | List list = new ArrayList(); 66 | // row1 67 | list.add("1," + columnFamily1 + ",b,1"); 68 | // row3 69 | list.add("3," + columnFamily1 + ",a,2"); 70 | list.add("3," + columnFamily1 + ",b,1"); 71 | list.add("3," + columnFamily2 + ",a,1"); 72 | /* row2 */ 73 | list.add("2," + columnFamily2 + ",a,3"); 74 | list.add("2," + columnFamily2 + ",b,3"); 75 | 76 | JavaRDD rdd = jsc.parallelize(list); 77 | 78 | Configuration conf = HBaseConfiguration.create(); 79 | JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf); 80 | 81 | hbaseContext.bulkLoad(rdd, TableName.valueOf(tableName), new BulkLoadFunction(), args[0], 82 | new HashMap(), false, HConstants.DEFAULT_MAX_FILE_SIZE); 83 | } finally { 84 | jsc.stop(); 85 | } 86 | } 87 | 88 | public static class BulkLoadFunction 89 | implements Function> { 90 | @Override 91 | public Pair call(String v1) throws Exception { 92 | if (v1 == null) { 93 | return null; 94 | } 95 | 96 | String[] strs = v1.split(","); 97 | if (strs.length != 4) { 98 | return null; 99 | } 100 | 101 | KeyFamilyQualifier kfq = new KeyFamilyQualifier(Bytes.toBytes(strs[0]), 102 | Bytes.toBytes(strs[1]), Bytes.toBytes(strs[2])); 103 | return new Pair(kfq, Bytes.toBytes(strs[3])); 104 | } 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseBulkPutExample.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.example.hbasecontext; 19 | 20 | import java.util.ArrayList; 21 | import java.util.List; 22 | import org.apache.hadoop.conf.Configuration; 23 | import org.apache.hadoop.hbase.HBaseConfiguration; 24 | import org.apache.hadoop.hbase.TableName; 25 | import org.apache.hadoop.hbase.client.Put; 26 | import org.apache.hadoop.hbase.spark.JavaHBaseContext; 27 | import org.apache.hadoop.hbase.util.Bytes; 28 | import org.apache.spark.SparkConf; 29 | import org.apache.spark.api.java.JavaRDD; 30 | import org.apache.spark.api.java.JavaSparkContext; 31 | import org.apache.spark.api.java.function.Function; 32 | import org.apache.yetus.audience.InterfaceAudience; 33 | 34 | /** 35 | * This is a simple example of putting records in HBase with the bulkPut function. 36 | */ 37 | @InterfaceAudience.Private 38 | final public class JavaHBaseBulkPutExample { 39 | 40 | private JavaHBaseBulkPutExample() { 41 | } 42 | 43 | public static void main(String[] args) { 44 | if (args.length < 2) { 45 | System.out.println("JavaHBaseBulkPutExample " + "{tableName} {columnFamily}"); 46 | return; 47 | } 48 | 49 | String tableName = args[0]; 50 | String columnFamily = args[1]; 51 | 52 | SparkConf sparkConf = new SparkConf().setAppName("JavaHBaseBulkPutExample " + tableName); 53 | JavaSparkContext jsc = new JavaSparkContext(sparkConf); 54 | 55 | try { 56 | List list = new ArrayList<>(5); 57 | list.add("1," + columnFamily + ",a,1"); 58 | list.add("2," + columnFamily + ",a,2"); 59 | list.add("3," + columnFamily + ",a,3"); 60 | list.add("4," + columnFamily + ",a,4"); 61 | list.add("5," + columnFamily + ",a,5"); 62 | 63 | JavaRDD rdd = jsc.parallelize(list); 64 | 65 | Configuration conf = HBaseConfiguration.create(); 66 | 67 | JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf); 68 | 69 | hbaseContext.bulkPut(rdd, TableName.valueOf(tableName), new PutFunction()); 70 | } finally { 71 | jsc.stop(); 72 | } 73 | } 74 | 75 | public static class PutFunction implements Function { 76 | 77 | private static final long serialVersionUID = 1L; 78 | 79 | public Put call(String v) throws Exception { 80 | String[] cells = v.split(","); 81 | Put put = new Put(Bytes.toBytes(cells[0])); 82 | 83 | put.addColumn(Bytes.toBytes(cells[1]), Bytes.toBytes(cells[2]), Bytes.toBytes(cells[3])); 84 | return put; 85 | } 86 | 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseDistributedScan.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.example.hbasecontext; 19 | 20 | import java.util.List; 21 | import org.apache.hadoop.conf.Configuration; 22 | import org.apache.hadoop.hbase.HBaseConfiguration; 23 | import org.apache.hadoop.hbase.TableName; 24 | import org.apache.hadoop.hbase.client.Result; 25 | import org.apache.hadoop.hbase.client.Scan; 26 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 27 | import org.apache.hadoop.hbase.spark.JavaHBaseContext; 28 | import org.apache.hadoop.hbase.util.Bytes; 29 | import org.apache.spark.SparkConf; 30 | import org.apache.spark.api.java.JavaRDD; 31 | import org.apache.spark.api.java.JavaSparkContext; 32 | import org.apache.spark.api.java.function.Function; 33 | import org.apache.yetus.audience.InterfaceAudience; 34 | import scala.Tuple2; 35 | 36 | /** 37 | * This is a simple example of scanning records from HBase with the hbaseRDD function. 38 | */ 39 | @InterfaceAudience.Private 40 | final public class JavaHBaseDistributedScan { 41 | 42 | private JavaHBaseDistributedScan() { 43 | } 44 | 45 | public static void main(String[] args) { 46 | if (args.length < 1) { 47 | System.out.println("JavaHBaseDistributedScan {tableName}"); 48 | return; 49 | } 50 | 51 | String tableName = args[0]; 52 | 53 | SparkConf sparkConf = new SparkConf().setAppName("JavaHBaseDistributedScan " + tableName); 54 | JavaSparkContext jsc = new JavaSparkContext(sparkConf); 55 | 56 | try { 57 | Configuration conf = HBaseConfiguration.create(); 58 | 59 | JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf); 60 | 61 | Scan scan = new Scan(); 62 | scan.setCaching(100); 63 | 64 | JavaRDD> javaRdd = 65 | hbaseContext.hbaseRDD(TableName.valueOf(tableName), scan); 66 | 67 | List results = javaRdd.map(new ScanConvertFunction()).collect(); 68 | 69 | System.out.println("Result Size: " + results.size()); 70 | } finally { 71 | jsc.stop(); 72 | } 73 | } 74 | 75 | private static class ScanConvertFunction 76 | implements Function, String> { 77 | @Override 78 | public String call(Tuple2 v1) throws Exception { 79 | return Bytes.toString(v1._1().copyBytes()); 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseMapGetPutExample.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.example.hbasecontext; 19 | 20 | import java.util.ArrayList; 21 | import java.util.Iterator; 22 | import java.util.List; 23 | import org.apache.hadoop.conf.Configuration; 24 | import org.apache.hadoop.hbase.HBaseConfiguration; 25 | import org.apache.hadoop.hbase.TableName; 26 | import org.apache.hadoop.hbase.client.BufferedMutator; 27 | import org.apache.hadoop.hbase.client.Connection; 28 | import org.apache.hadoop.hbase.client.Get; 29 | import org.apache.hadoop.hbase.client.Put; 30 | import org.apache.hadoop.hbase.client.Result; 31 | import org.apache.hadoop.hbase.client.Table; 32 | import org.apache.hadoop.hbase.spark.JavaHBaseContext; 33 | import org.apache.hadoop.hbase.util.Bytes; 34 | import org.apache.spark.SparkConf; 35 | import org.apache.spark.api.java.JavaRDD; 36 | import org.apache.spark.api.java.JavaSparkContext; 37 | import org.apache.spark.api.java.function.Function; 38 | import org.apache.spark.api.java.function.VoidFunction; 39 | import org.apache.yetus.audience.InterfaceAudience; 40 | import scala.Tuple2; 41 | 42 | /** 43 | * This is a simple example of using the foreachPartition method with a HBase connection 44 | */ 45 | @InterfaceAudience.Private 46 | final public class JavaHBaseMapGetPutExample { 47 | 48 | private JavaHBaseMapGetPutExample() { 49 | } 50 | 51 | public static void main(String[] args) { 52 | if (args.length < 1) { 53 | System.out.println("JavaHBaseBulkGetExample {tableName}"); 54 | return; 55 | } 56 | 57 | final String tableName = args[0]; 58 | 59 | SparkConf sparkConf = new SparkConf().setAppName("JavaHBaseBulkGetExample " + tableName); 60 | JavaSparkContext jsc = new JavaSparkContext(sparkConf); 61 | 62 | try { 63 | List list = new ArrayList<>(5); 64 | list.add(Bytes.toBytes("1")); 65 | list.add(Bytes.toBytes("2")); 66 | list.add(Bytes.toBytes("3")); 67 | list.add(Bytes.toBytes("4")); 68 | list.add(Bytes.toBytes("5")); 69 | 70 | JavaRDD rdd = jsc.parallelize(list); 71 | Configuration conf = HBaseConfiguration.create(); 72 | 73 | JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf); 74 | 75 | hbaseContext.foreachPartition(rdd, new VoidFunction, Connection>>() { 76 | public void call(Tuple2, Connection> t) throws Exception { 77 | Table table = t._2().getTable(TableName.valueOf(tableName)); 78 | BufferedMutator mutator = t._2().getBufferedMutator(TableName.valueOf(tableName)); 79 | 80 | while (t._1().hasNext()) { 81 | byte[] b = t._1().next(); 82 | Result r = table.get(new Get(b)); 83 | if (r.getExists()) { 84 | mutator.mutate(new Put(b)); 85 | } 86 | } 87 | 88 | mutator.flush(); 89 | mutator.close(); 90 | table.close(); 91 | } 92 | }); 93 | } finally { 94 | jsc.stop(); 95 | } 96 | } 97 | 98 | public static class GetFunction implements Function { 99 | private static final long serialVersionUID = 1L; 100 | 101 | public Get call(byte[] v) throws Exception { 102 | return new Get(v); 103 | } 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/example/hbasecontext/JavaHBaseStreamingBulkPutExample.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.example.hbasecontext; 19 | 20 | import org.apache.hadoop.conf.Configuration; 21 | import org.apache.hadoop.hbase.HBaseConfiguration; 22 | import org.apache.hadoop.hbase.TableName; 23 | import org.apache.hadoop.hbase.client.Put; 24 | import org.apache.hadoop.hbase.spark.JavaHBaseContext; 25 | import org.apache.hadoop.hbase.util.Bytes; 26 | import org.apache.spark.SparkConf; 27 | import org.apache.spark.api.java.JavaSparkContext; 28 | import org.apache.spark.api.java.function.Function; 29 | import org.apache.spark.streaming.Duration; 30 | import org.apache.spark.streaming.api.java.JavaReceiverInputDStream; 31 | import org.apache.spark.streaming.api.java.JavaStreamingContext; 32 | import org.apache.yetus.audience.InterfaceAudience; 33 | 34 | /** 35 | * This is a simple example of BulkPut with Spark Streaming 36 | */ 37 | @InterfaceAudience.Private 38 | final public class JavaHBaseStreamingBulkPutExample { 39 | 40 | private JavaHBaseStreamingBulkPutExample() { 41 | } 42 | 43 | public static void main(String[] args) { 44 | if (args.length < 4) { 45 | System.out.println("JavaHBaseBulkPutExample " + "{host} {port} {tableName}"); 46 | return; 47 | } 48 | 49 | String host = args[0]; 50 | String port = args[1]; 51 | String tableName = args[2]; 52 | 53 | SparkConf sparkConf = new SparkConf() 54 | .setAppName("JavaHBaseStreamingBulkPutExample " + tableName + ":" + port + ":" + tableName); 55 | 56 | JavaSparkContext jsc = new JavaSparkContext(sparkConf); 57 | 58 | try { 59 | JavaStreamingContext jssc = new JavaStreamingContext(jsc, new Duration(1000)); 60 | 61 | JavaReceiverInputDStream javaDstream = 62 | jssc.socketTextStream(host, Integer.parseInt(port)); 63 | 64 | Configuration conf = HBaseConfiguration.create(); 65 | 66 | JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf); 67 | 68 | hbaseContext.streamBulkPut(javaDstream, TableName.valueOf(tableName), new PutFunction()); 69 | } finally { 70 | jsc.stop(); 71 | } 72 | } 73 | 74 | public static class PutFunction implements Function { 75 | 76 | private static final long serialVersionUID = 1L; 77 | 78 | public Put call(String v) throws Exception { 79 | String[] part = v.split(","); 80 | Put put = new Put(Bytes.toBytes(part[0])); 81 | 82 | put.addColumn(Bytes.toBytes(part[1]), Bytes.toBytes(part[2]), Bytes.toBytes(part[3])); 83 | return put; 84 | } 85 | 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/BulkLoadPartitioner.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark 19 | 20 | import java.util 21 | import java.util.Comparator 22 | import org.apache.hadoop.hbase.util.Bytes 23 | import org.apache.spark.Partitioner 24 | import org.apache.yetus.audience.InterfaceAudience 25 | 26 | /** 27 | * A Partitioner implementation that will separate records to different 28 | * HBase Regions based on region splits 29 | * 30 | * @param startKeys The start keys for the given table 31 | */ 32 | @InterfaceAudience.Public 33 | class BulkLoadPartitioner(startKeys: Array[Array[Byte]]) extends Partitioner { 34 | // when table not exist, startKeys = Byte[0][] 35 | override def numPartitions: Int = if (startKeys.length == 0) 1 else startKeys.length 36 | 37 | override def getPartition(key: Any): Int = { 38 | 39 | val comparator: Comparator[Array[Byte]] = new Comparator[Array[Byte]] { 40 | override def compare(o1: Array[Byte], o2: Array[Byte]): Int = { 41 | Bytes.compareTo(o1, o2) 42 | } 43 | } 44 | 45 | val rowKey: Array[Byte] = 46 | key match { 47 | case qualifier: KeyFamilyQualifier => 48 | qualifier.rowKey 49 | case wrapper: ByteArrayWrapper => 50 | wrapper.value 51 | case _ => 52 | key.asInstanceOf[Array[Byte]] 53 | } 54 | var partition = util.Arrays.binarySearch(startKeys, rowKey, comparator) 55 | if (partition < 0) 56 | partition = partition * -1 + -2 57 | if (partition < 0) 58 | partition = 0 59 | partition 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/ByteArrayComparable.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark 19 | 20 | import org.apache.hadoop.hbase.util.Bytes 21 | import org.apache.yetus.audience.InterfaceAudience 22 | 23 | @InterfaceAudience.Public 24 | class ByteArrayComparable(val bytes: Array[Byte], val offset: Int = 0, var length: Int = -1) 25 | extends Comparable[ByteArrayComparable] { 26 | 27 | if (length == -1) { 28 | length = bytes.length 29 | } 30 | 31 | override def compareTo(o: ByteArrayComparable): Int = { 32 | Bytes.compareTo(bytes, offset, length, o.bytes, o.offset, o.length) 33 | } 34 | 35 | override def hashCode(): Int = { 36 | Bytes.hashCode(bytes, offset, length) 37 | } 38 | 39 | override def equals(obj: Any): Boolean = { 40 | obj match { 41 | case b: ByteArrayComparable => 42 | Bytes.equals(bytes, offset, length, b.bytes, b.offset, b.length) 43 | case _ => 44 | false 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/ByteArrayWrapper.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark 19 | 20 | import java.io.Serializable 21 | import org.apache.hadoop.hbase.util.Bytes 22 | import org.apache.yetus.audience.InterfaceAudience 23 | 24 | /** 25 | * This is a wrapper over a byte array so it can work as 26 | * a key in a hashMap 27 | * 28 | * @param value The Byte Array value 29 | */ 30 | @InterfaceAudience.Public 31 | class ByteArrayWrapper(var value: Array[Byte]) 32 | extends Comparable[ByteArrayWrapper] 33 | with Serializable { 34 | override def compareTo(valueOther: ByteArrayWrapper): Int = { 35 | Bytes.compareTo(value, valueOther.value) 36 | } 37 | override def equals(o2: Any): Boolean = { 38 | o2 match { 39 | case wrapper: ByteArrayWrapper => 40 | Bytes.equals(value, wrapper.value) 41 | case _ => 42 | false 43 | } 44 | } 45 | override def hashCode(): Int = { 46 | Bytes.hashCode(value) 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/ColumnFamilyQualifierMapKeyWrapper.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark 19 | 20 | import org.apache.hadoop.hbase.util.Bytes 21 | import org.apache.yetus.audience.InterfaceAudience 22 | 23 | /** 24 | * A wrapper class that will allow both columnFamily and qualifier to 25 | * be the key of a hashMap. Also allow for finding the value in a hashmap 26 | * with out cloning the HBase value from the HBase Cell object 27 | * @param columnFamily ColumnFamily byte array 28 | * @param columnFamilyOffSet Offset of columnFamily value in the array 29 | * @param columnFamilyLength Length of the columnFamily value in the columnFamily array 30 | * @param qualifier Qualifier byte array 31 | * @param qualifierOffSet Offset of qualifier value in the array 32 | * @param qualifierLength Length of the qualifier value with in the array 33 | */ 34 | @InterfaceAudience.Public 35 | class ColumnFamilyQualifierMapKeyWrapper( 36 | val columnFamily: Array[Byte], 37 | val columnFamilyOffSet: Int, 38 | val columnFamilyLength: Int, 39 | val qualifier: Array[Byte], 40 | val qualifierOffSet: Int, 41 | val qualifierLength: Int) 42 | extends Serializable { 43 | 44 | override def equals(other: Any): Boolean = { 45 | val otherWrapper = other.asInstanceOf[ColumnFamilyQualifierMapKeyWrapper] 46 | 47 | Bytes.compareTo( 48 | columnFamily, 49 | columnFamilyOffSet, 50 | columnFamilyLength, 51 | otherWrapper.columnFamily, 52 | otherWrapper.columnFamilyOffSet, 53 | otherWrapper.columnFamilyLength) == 0 && Bytes.compareTo( 54 | qualifier, 55 | qualifierOffSet, 56 | qualifierLength, 57 | otherWrapper.qualifier, 58 | otherWrapper.qualifierOffSet, 59 | otherWrapper.qualifierLength) == 0 60 | } 61 | 62 | override def hashCode(): Int = { 63 | Bytes.hashCode(columnFamily, columnFamilyOffSet, columnFamilyLength) + 64 | Bytes.hashCode(qualifier, qualifierOffSet, qualifierLength) 65 | } 66 | 67 | def cloneColumnFamily(): Array[Byte] = { 68 | val resultArray = new Array[Byte](columnFamilyLength) 69 | System.arraycopy(columnFamily, columnFamilyOffSet, resultArray, 0, columnFamilyLength) 70 | resultArray 71 | } 72 | 73 | def cloneQualifier(): Array[Byte] = { 74 | val resultArray = new Array[Byte](qualifierLength) 75 | System.arraycopy(qualifier, qualifierOffSet, resultArray, 0, qualifierLength) 76 | resultArray 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/FamiliesQualifiersValues.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark 19 | 20 | import java.util 21 | import org.apache.yetus.audience.InterfaceAudience; 22 | 23 | /** 24 | * This object is a clean way to store and sort all cells that will be bulk 25 | * loaded into a single row 26 | */ 27 | @InterfaceAudience.Public 28 | class FamiliesQualifiersValues extends Serializable { 29 | // Tree maps are used because we need the results to 30 | // be sorted when we read them 31 | val familyMap = new util.TreeMap[ByteArrayWrapper, util.TreeMap[ByteArrayWrapper, Array[Byte]]]() 32 | 33 | // normally in a row there are more columns then 34 | // column families this wrapper is reused for column 35 | // family look ups 36 | val reusableWrapper = new ByteArrayWrapper(null) 37 | 38 | /** 39 | * Adds a new cell to an existing row 40 | * @param family HBase column family 41 | * @param qualifier HBase column qualifier 42 | * @param value HBase cell value 43 | */ 44 | def +=(family: Array[Byte], qualifier: Array[Byte], value: Array[Byte]): Unit = { 45 | 46 | reusableWrapper.value = family 47 | 48 | var qualifierValues = familyMap.get(reusableWrapper) 49 | 50 | if (qualifierValues == null) { 51 | qualifierValues = new util.TreeMap[ByteArrayWrapper, Array[Byte]]() 52 | familyMap.put(new ByteArrayWrapper(family), qualifierValues) 53 | } 54 | 55 | qualifierValues.put(new ByteArrayWrapper(qualifier), value) 56 | } 57 | 58 | /** 59 | * A wrapper for "+=" method above, can be used by Java 60 | * @param family HBase column family 61 | * @param qualifier HBase column qualifier 62 | * @param value HBase cell value 63 | */ 64 | def add(family: Array[Byte], qualifier: Array[Byte], value: Array[Byte]): Unit = { 65 | this += (family, qualifier, value) 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/FamilyHFileWriteOptions.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark 19 | 20 | import java.io.Serializable 21 | import org.apache.yetus.audience.InterfaceAudience; 22 | 23 | /** 24 | * This object will hold optional data for how a given column family's 25 | * writer will work 26 | * 27 | * @param compression String to define the Compression to be used in the HFile 28 | * @param bloomType String to define the bloom type to be used in the HFile 29 | * @param blockSize The block size to be used in the HFile 30 | * @param dataBlockEncoding String to define the data block encoding to be used 31 | * in the HFile 32 | */ 33 | @InterfaceAudience.Public 34 | class FamilyHFileWriteOptions( 35 | val compression: String, 36 | val bloomType: String, 37 | val blockSize: Int, 38 | val dataBlockEncoding: String) 39 | extends Serializable 40 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/KeyFamilyQualifier.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark 19 | 20 | import java.io.Serializable 21 | import org.apache.hadoop.hbase.util.Bytes 22 | import org.apache.yetus.audience.InterfaceAudience 23 | 24 | /** 25 | * This is the key to be used for sorting and shuffling. 26 | * 27 | * We will only partition on the rowKey but we will sort on all three 28 | * 29 | * @param rowKey Record RowKey 30 | * @param family Record ColumnFamily 31 | * @param qualifier Cell Qualifier 32 | */ 33 | @InterfaceAudience.Public 34 | class KeyFamilyQualifier( 35 | val rowKey: Array[Byte], 36 | val family: Array[Byte], 37 | val qualifier: Array[Byte]) 38 | extends Comparable[KeyFamilyQualifier] 39 | with Serializable { 40 | override def compareTo(o: KeyFamilyQualifier): Int = { 41 | var result = Bytes.compareTo(rowKey, o.rowKey) 42 | if (result == 0) { 43 | result = Bytes.compareTo(family, o.family) 44 | if (result == 0) result = Bytes.compareTo(qualifier, o.qualifier) 45 | } 46 | result 47 | } 48 | override def toString: String = { 49 | Bytes.toString(rowKey) + ":" + Bytes.toString(family) + ":" + Bytes.toString(qualifier) 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/Logging.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark 19 | 20 | import org.apache.yetus.audience.InterfaceAudience 21 | import org.slf4j.Logger 22 | import org.slf4j.LoggerFactory 23 | import org.slf4j.impl.StaticLoggerBinder 24 | 25 | /** 26 | * Utility trait for classes that want to log data. Creates a SLF4J logger for the class and allows 27 | * logging messages at different levels using methods that only evaluate parameters lazily if the 28 | * log level is enabled. 29 | * Logging is private in Spark 2.0 30 | * This is to isolate incompatibilties across Spark releases. 31 | */ 32 | @InterfaceAudience.Private 33 | trait Logging { 34 | 35 | // Make the log field transient so that objects with Logging can 36 | // be serialized and used on another machine 37 | @transient private var log_ : Logger = null 38 | 39 | // Method to get the logger name for this object 40 | protected def logName = { 41 | // Ignore trailing $'s in the class names for Scala objects 42 | this.getClass.getName.stripSuffix("$") 43 | } 44 | 45 | // Method to get or create the logger for this object 46 | protected def log: Logger = { 47 | if (log_ == null) { 48 | initializeLogIfNecessary(false) 49 | log_ = LoggerFactory.getLogger(logName) 50 | } 51 | log_ 52 | } 53 | 54 | // Log methods that take only a String 55 | protected def logInfo(msg: => String) { 56 | if (log.isInfoEnabled) log.info(msg) 57 | } 58 | 59 | protected def logDebug(msg: => String) { 60 | if (log.isDebugEnabled) log.debug(msg) 61 | } 62 | 63 | protected def logTrace(msg: => String) { 64 | if (log.isTraceEnabled) log.trace(msg) 65 | } 66 | 67 | protected def logWarning(msg: => String) { 68 | if (log.isWarnEnabled) log.warn(msg) 69 | } 70 | 71 | protected def logError(msg: => String) { 72 | if (log.isErrorEnabled) log.error(msg) 73 | } 74 | 75 | // Log methods that take Throwables (Exceptions/Errors) too 76 | protected def logInfo(msg: => String, throwable: Throwable) { 77 | if (log.isInfoEnabled) log.info(msg, throwable) 78 | } 79 | 80 | protected def logDebug(msg: => String, throwable: Throwable) { 81 | if (log.isDebugEnabled) log.debug(msg, throwable) 82 | } 83 | 84 | protected def logTrace(msg: => String, throwable: Throwable) { 85 | if (log.isTraceEnabled) log.trace(msg, throwable) 86 | } 87 | 88 | protected def logWarning(msg: => String, throwable: Throwable) { 89 | if (log.isWarnEnabled) log.warn(msg, throwable) 90 | } 91 | 92 | protected def logError(msg: => String, throwable: Throwable) { 93 | if (log.isErrorEnabled) log.error(msg, throwable) 94 | } 95 | 96 | protected def initializeLogIfNecessary(isInterpreter: Boolean): Unit = { 97 | if (!Logging.initialized) { 98 | Logging.initLock.synchronized { 99 | if (!Logging.initialized) { 100 | initializeLogging(isInterpreter) 101 | } 102 | } 103 | } 104 | } 105 | 106 | private def initializeLogging(isInterpreter: Boolean): Unit = { 107 | // Don't use a logger in here, as this is itself occurring during initialization of a logger 108 | // If Log4j 1.2 is being used, but is not initialized, load a default properties file 109 | val binderClass = StaticLoggerBinder.getSingleton.getLoggerFactoryClassStr 110 | Logging.initialized = true 111 | 112 | // Force a call into slf4j to initialize it. Avoids this happening from multiple threads 113 | // and triggering this: http://mailman.qos.ch/pipermail/slf4j-dev/2010-April/002956.html 114 | log 115 | } 116 | } 117 | 118 | private object Logging { 119 | @volatile private var initialized = false 120 | val initLock = new Object() 121 | } 122 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/NewHBaseRDD.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark 19 | 20 | import org.apache.hadoop.conf.Configuration 21 | import org.apache.hadoop.mapreduce.InputFormat 22 | import org.apache.spark.{InterruptibleIterator, Partition, SparkContext, TaskContext} 23 | import org.apache.spark.rdd.NewHadoopRDD 24 | import org.apache.yetus.audience.InterfaceAudience 25 | 26 | @InterfaceAudience.Public 27 | class NewHBaseRDD[K, V]( 28 | @transient val sc: SparkContext, 29 | @transient val inputFormatClass: Class[_ <: InputFormat[K, V]], 30 | @transient val keyClass: Class[K], 31 | @transient val valueClass: Class[V], 32 | @transient private val __conf: Configuration, 33 | val hBaseContext: HBaseContext) 34 | extends NewHadoopRDD(sc, inputFormatClass, keyClass, valueClass, __conf) { 35 | 36 | override def compute(theSplit: Partition, context: TaskContext): InterruptibleIterator[(K, V)] = { 37 | super.compute(theSplit, context) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/Bound.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.datasources 19 | 20 | import org.apache.hadoop.hbase.spark.hbase._ 21 | import org.apache.yetus.audience.InterfaceAudience 22 | 23 | /** 24 | * The Bound represent the boudary for the scan 25 | * 26 | * @param b The byte array of the bound 27 | * @param inc inclusive or not. 28 | */ 29 | @InterfaceAudience.Private 30 | case class Bound(b: Array[Byte], inc: Boolean) 31 | // The non-overlapping ranges we need to scan, if lower is equal to upper, it is a get request 32 | 33 | @InterfaceAudience.Private 34 | case class Range(lower: Option[Bound], upper: Option[Bound]) 35 | 36 | @InterfaceAudience.Private 37 | object Range { 38 | def apply(region: HBaseRegion): Range = { 39 | Range( 40 | region.start.map(Bound(_, true)), 41 | if (region.end.get.size == 0) { 42 | None 43 | } else { 44 | region.end.map((Bound(_, false))) 45 | }) 46 | } 47 | } 48 | 49 | @InterfaceAudience.Private 50 | object Ranges { 51 | // We assume that 52 | // 1. r.lower.inc is true, and r.upper.inc is false 53 | // 2. for each range in rs, its upper.inc is false 54 | def and(r: Range, rs: Seq[Range]): Seq[Range] = { 55 | rs.flatMap { 56 | s => 57 | val lower = s.lower 58 | .map { 59 | x => 60 | // the scan has lower bound 61 | r.lower 62 | .map { 63 | y => 64 | // the region has lower bound 65 | if (ord.compare(x.b, y.b) < 0) { 66 | // scan lower bound is smaller than region server lower bound 67 | Some(y) 68 | } else { 69 | // scan low bound is greater or equal to region server lower bound 70 | Some(x) 71 | } 72 | } 73 | .getOrElse(Some(x)) 74 | } 75 | .getOrElse(r.lower) 76 | 77 | val upper = s.upper 78 | .map { 79 | x => 80 | // the scan has upper bound 81 | r.upper 82 | .map { 83 | y => 84 | // the region has upper bound 85 | if (ord.compare(x.b, y.b) >= 0) { 86 | // scan upper bound is larger than server upper bound 87 | // but region server scan stop is exclusive. It is OK here. 88 | Some(y) 89 | } else { 90 | // scan upper bound is less or equal to region server upper bound 91 | Some(x) 92 | } 93 | } 94 | .getOrElse(Some(x)) 95 | } 96 | .getOrElse(r.upper) 97 | 98 | val c = lower 99 | .map { 100 | case x => 101 | upper 102 | .map { 103 | case y => 104 | ord.compare(x.b, y.b) 105 | } 106 | .getOrElse(-1) 107 | } 108 | .getOrElse(-1) 109 | if (c < 0) { 110 | Some(Range(lower, upper)) 111 | } else { 112 | None 113 | } 114 | }.seq 115 | } 116 | } 117 | 118 | @InterfaceAudience.Private 119 | object Points { 120 | def and(r: Range, ps: Seq[Array[Byte]]): Seq[Array[Byte]] = { 121 | ps.flatMap { 122 | p => 123 | if (ord.compare(r.lower.get.b, p) <= 0) { 124 | // if region lower bound is less or equal to the point 125 | if (r.upper.isDefined) { 126 | // if region upper bound is defined 127 | if (ord.compare(r.upper.get.b, p) > 0) { 128 | // if the upper bound is greater than the point (because upper bound is exclusive) 129 | Some(p) 130 | } else { 131 | None 132 | } 133 | } else { 134 | // if the region upper bound is not defined (infinity) 135 | Some(p) 136 | } 137 | } else { 138 | None 139 | } 140 | } 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/DataTypeParserWrapper.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.datasources 19 | 20 | import org.apache.spark.sql.catalyst.parser.CatalystSqlParser 21 | import org.apache.spark.sql.types.DataType 22 | import org.apache.yetus.audience.InterfaceAudience 23 | 24 | @InterfaceAudience.Private 25 | trait DataTypeParser { 26 | def parse(dataTypeString: String): DataType 27 | } 28 | 29 | @InterfaceAudience.Private 30 | object DataTypeParserWrapper extends DataTypeParser { 31 | def parse(dataTypeString: String): DataType = CatalystSqlParser.parseDataType(dataTypeString) 32 | } 33 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/HBaseResources.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.datasources 19 | 20 | import org.apache.hadoop.hbase.TableName 21 | import org.apache.hadoop.hbase.client._ 22 | import org.apache.hadoop.hbase.spark.{HBaseConnectionCache, HBaseConnectionKey, HBaseRelation, SmartConnection} 23 | import org.apache.yetus.audience.InterfaceAudience 24 | import scala.language.implicitConversions 25 | 26 | // Resource and ReferencedResources are defined for extensibility, 27 | // e.g., consolidate scan and bulkGet in the future work. 28 | 29 | // User has to invoke release explicitly to release the resource, 30 | // and potentially parent resources 31 | @InterfaceAudience.Private 32 | trait Resource { 33 | def release(): Unit 34 | } 35 | 36 | @InterfaceAudience.Private 37 | case class ScanResource(tbr: TableResource, rs: ResultScanner) extends Resource { 38 | def release() { 39 | rs.close() 40 | tbr.release() 41 | } 42 | } 43 | 44 | @InterfaceAudience.Private 45 | case class GetResource(tbr: TableResource, rs: Array[Result]) extends Resource { 46 | def release() { 47 | tbr.release() 48 | } 49 | } 50 | 51 | @InterfaceAudience.Private 52 | trait ReferencedResource { 53 | var count: Int = 0 54 | def init(): Unit 55 | def destroy(): Unit 56 | def acquire() = synchronized { 57 | try { 58 | count += 1 59 | if (count == 1) { 60 | init() 61 | } 62 | } catch { 63 | case e: Throwable => 64 | release() 65 | throw e 66 | } 67 | } 68 | 69 | def release() = synchronized { 70 | count -= 1 71 | if (count == 0) { 72 | destroy() 73 | } 74 | } 75 | 76 | def releaseOnException[T](func: => T): T = { 77 | acquire() 78 | val ret = { 79 | try { 80 | func 81 | } catch { 82 | case e: Throwable => 83 | release() 84 | throw e 85 | } 86 | } 87 | ret 88 | } 89 | } 90 | 91 | @InterfaceAudience.Private 92 | case class TableResource(relation: HBaseRelation) extends ReferencedResource { 93 | var connection: SmartConnection = _ 94 | var table: Table = _ 95 | 96 | override def init(): Unit = { 97 | connection = HBaseConnectionCache.getConnection(relation.hbaseConf) 98 | table = connection.getTable(TableName.valueOf(relation.tableName)) 99 | } 100 | 101 | override def destroy(): Unit = { 102 | if (table != null) { 103 | table.close() 104 | table = null 105 | } 106 | if (connection != null) { 107 | connection.close() 108 | connection = null 109 | } 110 | } 111 | 112 | def getScanner(scan: Scan): ScanResource = releaseOnException { 113 | ScanResource(this, table.getScanner(scan)) 114 | } 115 | 116 | def get(list: java.util.List[org.apache.hadoop.hbase.client.Get]) = releaseOnException { 117 | GetResource(this, table.get(list)) 118 | } 119 | } 120 | 121 | @InterfaceAudience.Private 122 | case class RegionResource(relation: HBaseRelation) extends ReferencedResource { 123 | var connection: SmartConnection = _ 124 | var rl: RegionLocator = _ 125 | val regions = releaseOnException { 126 | val keys = rl.getStartEndKeys 127 | keys.getFirst 128 | .zip(keys.getSecond) 129 | .zipWithIndex 130 | .map( 131 | x => 132 | HBaseRegion( 133 | x._2, 134 | Some(x._1._1), 135 | Some(x._1._2), 136 | Some(rl.getRegionLocation(x._1._1).getHostname))) 137 | } 138 | 139 | override def init(): Unit = { 140 | connection = HBaseConnectionCache.getConnection(relation.hbaseConf) 141 | rl = connection.getRegionLocator(TableName.valueOf(relation.tableName)) 142 | } 143 | 144 | override def destroy(): Unit = { 145 | if (rl != null) { 146 | rl.close() 147 | rl = null 148 | } 149 | if (connection != null) { 150 | connection.close() 151 | connection = null 152 | } 153 | } 154 | } 155 | 156 | @InterfaceAudience.Private 157 | object HBaseResources { 158 | implicit def ScanResToScan(sr: ScanResource): ResultScanner = { 159 | sr.rs 160 | } 161 | 162 | implicit def GetResToResult(gr: GetResource): Array[Result] = { 163 | gr.rs 164 | } 165 | 166 | implicit def TableResToTable(tr: TableResource): Table = { 167 | tr.table 168 | } 169 | 170 | implicit def RegionResToRegions(rr: RegionResource): Seq[HBaseRegion] = { 171 | rr.regions 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/HBaseSparkConf.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.datasources 19 | 20 | import org.apache.yetus.audience.InterfaceAudience; 21 | 22 | /** 23 | * This is the hbase configuration. User can either set them in SparkConf, which 24 | * will take effect globally, or configure it per table, which will overwrite the value 25 | * set in SparkConf. If not set, the default value will take effect. 26 | */ 27 | @InterfaceAudience.Public 28 | object HBaseSparkConf { 29 | 30 | /** 31 | * Set to false to disable server-side caching of blocks for this scan, 32 | * false by default, since full table scans generate too much BC churn. 33 | */ 34 | val QUERY_CACHEBLOCKS = "hbase.spark.query.cacheblocks" 35 | val DEFAULT_QUERY_CACHEBLOCKS = false 36 | 37 | /** The number of rows for caching that will be passed to scan. */ 38 | val QUERY_CACHEDROWS = "hbase.spark.query.cachedrows" 39 | 40 | /** Set the maximum number of values to return for each call to next() in scan. */ 41 | val QUERY_BATCHSIZE = "hbase.spark.query.batchsize" 42 | 43 | /** The number of BulkGets send to HBase. */ 44 | val BULKGET_SIZE = "hbase.spark.bulkget.size" 45 | val DEFAULT_BULKGET_SIZE = 1000 46 | 47 | /** Set to specify the location of hbase configuration file. */ 48 | val HBASE_CONFIG_LOCATION = "hbase.spark.config.location" 49 | 50 | /** Set to specify whether create or use latest cached HBaseContext */ 51 | val USE_HBASECONTEXT = "hbase.spark.use.hbasecontext" 52 | val DEFAULT_USE_HBASECONTEXT = true 53 | 54 | /** Pushdown the filter to data source engine to increase the performance of queries. */ 55 | val PUSHDOWN_COLUMN_FILTER = "hbase.spark.pushdown.columnfilter" 56 | val DEFAULT_PUSHDOWN_COLUMN_FILTER = true 57 | 58 | /** Class name of the encoder, which encode data types from Spark to HBase bytes. */ 59 | val QUERY_ENCODER = "hbase.spark.query.encoder" 60 | val DEFAULT_QUERY_ENCODER = classOf[NaiveEncoder].getCanonicalName 61 | 62 | /** The timestamp used to filter columns with a specific timestamp. */ 63 | val TIMESTAMP = "hbase.spark.query.timestamp" 64 | 65 | /** The starting timestamp used to filter columns with a specific range of versions. */ 66 | val TIMERANGE_START = "hbase.spark.query.timerange.start" 67 | 68 | /** The ending timestamp used to filter columns with a specific range of versions. */ 69 | val TIMERANGE_END = "hbase.spark.query.timerange.end" 70 | 71 | /** The maximum number of version to return. */ 72 | val MAX_VERSIONS = "hbase.spark.query.maxVersions" 73 | 74 | /** Delayed time to close hbase-spark connection when no reference to this connection, in milliseconds. */ 75 | val DEFAULT_CONNECTION_CLOSE_DELAY = 10 * 60 * 1000 76 | } 77 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/JavaBytesEncoder.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.datasources 19 | 20 | import org.apache.hadoop.hbase.HBaseInterfaceAudience 21 | import org.apache.hadoop.hbase.spark.Logging 22 | import org.apache.hadoop.hbase.spark.datasources.JavaBytesEncoder.JavaBytesEncoder 23 | import org.apache.spark.sql.types._ 24 | import org.apache.yetus.audience.InterfaceAudience 25 | import org.apache.yetus.audience.InterfaceStability 26 | 27 | /** 28 | * The ranges for the data type whose size is known. Whether the bound is inclusive 29 | * or exclusive is undefind, and upper to the caller to decide. 30 | * 31 | * @param low: the lower bound of the range. 32 | * @param upper: the upper bound of the range. 33 | */ 34 | @InterfaceAudience.LimitedPrivate(Array(HBaseInterfaceAudience.SPARK)) 35 | @InterfaceStability.Evolving 36 | case class BoundRange(low: Array[Byte], upper: Array[Byte]) 37 | 38 | /** 39 | * The class identifies the ranges for a java primitive type. The caller needs 40 | * to decide the bound is either inclusive or exclusive on its own. 41 | * information 42 | * 43 | * @param less: the set of ranges for LessThan/LessOrEqualThan 44 | * @param greater: the set of ranges for GreaterThan/GreaterThanOrEqualTo 45 | * @param value: the byte array of the original value 46 | */ 47 | @InterfaceAudience.LimitedPrivate(Array(HBaseInterfaceAudience.SPARK)) 48 | @InterfaceStability.Evolving 49 | case class BoundRanges(less: Array[BoundRange], greater: Array[BoundRange], value: Array[Byte]) 50 | 51 | /** 52 | * The trait to support plugin architecture for different encoder/decoder. 53 | * encode is used for serializing the data type to byte array and the filter is 54 | * used to filter out the unnecessary records. 55 | */ 56 | @InterfaceAudience.LimitedPrivate(Array(HBaseInterfaceAudience.SPARK)) 57 | @InterfaceStability.Evolving 58 | trait BytesEncoder { 59 | def encode(dt: DataType, value: Any): Array[Byte] 60 | 61 | /** 62 | * The function performing real filtering operations. The format of filterBytes depends on the 63 | * implementation of the BytesEncoder. 64 | * 65 | * @param input: the current input byte array that needs to be filtered out 66 | * @param offset1: the starting offset of the input byte array. 67 | * @param length1: the length of the input byte array. 68 | * @param filterBytes: the byte array provided by query condition. 69 | * @param offset2: the starting offset in the filterBytes. 70 | * @param length2: the length of the bytes in the filterBytes 71 | * @param ops: The operation of the filter operator. 72 | * @return true: the record satisfies the predicates 73 | * false: the record does not satisfy the predicates. 74 | */ 75 | def filter( 76 | input: Array[Byte], 77 | offset1: Int, 78 | length1: Int, 79 | filterBytes: Array[Byte], 80 | offset2: Int, 81 | length2: Int, 82 | ops: JavaBytesEncoder): Boolean 83 | 84 | /** 85 | * Currently, it is used for partition pruning. 86 | * As for some codec, the order may be inconsistent between java primitive 87 | * type and its byte array. We may have to split the predicates on some 88 | * of the java primitive type into multiple predicates. 89 | * 90 | * For example in naive codec, some of the java primitive types have to be 91 | * split into multiple predicates, and union these predicates together to 92 | * make the predicates be performed correctly. 93 | * For example, if we have "COLUMN < 2", we will transform it into 94 | * "0 <= COLUMN < 2 OR Integer.MIN_VALUE <= COLUMN <= -1" 95 | */ 96 | def ranges(in: Any): Option[BoundRanges] 97 | } 98 | 99 | @InterfaceAudience.LimitedPrivate(Array(HBaseInterfaceAudience.SPARK)) 100 | @InterfaceStability.Evolving 101 | object JavaBytesEncoder extends Enumeration with Logging { 102 | type JavaBytesEncoder = Value 103 | val Greater, GreaterEqual, Less, LessEqual, Equal, Unknown = Value 104 | 105 | /** 106 | * create the encoder/decoder 107 | * 108 | * @param clsName: the class name of the encoder/decoder class 109 | * @return the instance of the encoder plugin. 110 | */ 111 | def create(clsName: String): BytesEncoder = { 112 | try { 113 | Class.forName(clsName).newInstance.asInstanceOf[BytesEncoder] 114 | } catch { 115 | case _: Throwable => 116 | logWarning(s"$clsName cannot be initiated, falling back to naive encoder") 117 | new NaiveEncoder() 118 | } 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/SerDes.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.datasources 19 | 20 | import org.apache.hadoop.hbase.util.Bytes 21 | import org.apache.yetus.audience.InterfaceAudience 22 | 23 | // TODO: This is not really used in code. 24 | @InterfaceAudience.Public 25 | trait SerDes { 26 | def serialize(value: Any): Array[Byte] 27 | def deserialize(bytes: Array[Byte], start: Int, end: Int): Any 28 | } 29 | 30 | // TODO: This is not really used in code. 31 | @InterfaceAudience.Private 32 | class DoubleSerDes extends SerDes { 33 | override def serialize(value: Any): Array[Byte] = Bytes.toBytes(value.asInstanceOf[Double]) 34 | override def deserialize(bytes: Array[Byte], start: Int, end: Int): Any = { 35 | Bytes.toDouble(bytes, start) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/SerializableConfiguration.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.datasources 19 | 20 | import java.io.{IOException, ObjectInputStream, ObjectOutputStream} 21 | import org.apache.hadoop.conf.Configuration 22 | import org.apache.yetus.audience.InterfaceAudience 23 | import scala.util.control.NonFatal 24 | 25 | @InterfaceAudience.Private 26 | class SerializableConfiguration(@transient var value: Configuration) extends Serializable { 27 | private def writeObject(out: ObjectOutputStream): Unit = tryOrIOException { 28 | out.defaultWriteObject() 29 | value.write(out) 30 | } 31 | 32 | private def readObject(in: ObjectInputStream): Unit = tryOrIOException { 33 | value = new Configuration(false) 34 | value.readFields(in) 35 | } 36 | 37 | def tryOrIOException(block: => Unit) { 38 | try { 39 | block 40 | } catch { 41 | case e: IOException => throw e 42 | case NonFatal(t) => throw new IOException(t) 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/Utils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.datasources 19 | 20 | import java.sql.{Date, Timestamp} 21 | import org.apache.hadoop.hbase.spark.AvroSerdes 22 | import org.apache.hadoop.hbase.util.Bytes 23 | import org.apache.spark.sql.types._ 24 | import org.apache.spark.unsafe.types.UTF8String 25 | import org.apache.yetus.audience.InterfaceAudience; 26 | 27 | @InterfaceAudience.Private 28 | object Utils { 29 | 30 | /** 31 | * Parses the hbase field to it's corresponding 32 | * scala type which can then be put into a Spark GenericRow 33 | * which is then automatically converted by Spark. 34 | */ 35 | def hbaseFieldToScalaType(f: Field, src: Array[Byte], offset: Int, length: Int): Any = { 36 | if (f.exeSchema.isDefined) { 37 | // If we have avro schema defined, use it to get record, and then convert them to catalyst data type 38 | val m = AvroSerdes.deserialize(src, f.exeSchema.get) 39 | val n = f.avroToCatalyst.map(_(m)) 40 | n.get 41 | } else { 42 | // Fall back to atomic type 43 | f.dt match { 44 | case BooleanType => src(offset) != 0 45 | case ByteType => src(offset) 46 | case ShortType => Bytes.toShort(src, offset) 47 | case IntegerType => Bytes.toInt(src, offset) 48 | case LongType => Bytes.toLong(src, offset) 49 | case FloatType => Bytes.toFloat(src, offset) 50 | case DoubleType => Bytes.toDouble(src, offset) 51 | case DateType => new Date(Bytes.toLong(src, offset)) 52 | case TimestampType => new Timestamp(Bytes.toLong(src, offset)) 53 | case StringType => Bytes.toString(src, offset, length) 54 | case BinaryType => 55 | val newArray = new Array[Byte](length) 56 | System.arraycopy(src, offset, newArray, 0, length) 57 | newArray 58 | case _: DecimalType => Bytes.toBigDecimal(src, offset, length) 59 | // TODO: SparkSqlSerializer.deserialize[Any](src) 60 | case _ => throw new Exception(s"unsupported data type ${f.dt}") 61 | } 62 | } 63 | } 64 | 65 | // convert input to data type 66 | def toBytes(input: Any, field: Field): Array[Byte] = { 67 | if (field.schema.isDefined) { 68 | // Here we assume the top level type is structType 69 | val record = field.catalystToAvro(input) 70 | AvroSerdes.serialize(record, field.schema.get) 71 | } else { 72 | field.dt match { 73 | case BooleanType => Bytes.toBytes(input.asInstanceOf[Boolean]) 74 | case ByteType => Array(input.asInstanceOf[Number].byteValue) 75 | case ShortType => Bytes.toBytes(input.asInstanceOf[Number].shortValue) 76 | case IntegerType => Bytes.toBytes(input.asInstanceOf[Number].intValue) 77 | case LongType => Bytes.toBytes(input.asInstanceOf[Number].longValue) 78 | case FloatType => Bytes.toBytes(input.asInstanceOf[Number].floatValue) 79 | case DoubleType => Bytes.toBytes(input.asInstanceOf[Number].doubleValue) 80 | case DateType | TimestampType => Bytes.toBytes(input.asInstanceOf[java.util.Date].getTime) 81 | case StringType => Bytes.toBytes(input.toString) 82 | case BinaryType => input.asInstanceOf[Array[Byte]] 83 | case _: DecimalType => Bytes.toBytes(input.asInstanceOf[java.math.BigDecimal]) 84 | case _ => throw new Exception(s"unsupported data type ${field.dt}") 85 | } 86 | } 87 | } 88 | 89 | // increment Byte array's value by 1 90 | def incrementByteArray(array: Array[Byte]): Array[Byte] = { 91 | if (array.length == 0) { 92 | return null 93 | } 94 | var index = -1 // index of the byte we have to increment 95 | var a = array.length - 1 96 | 97 | while (a >= 0) { 98 | if (array(a) != (-1).toByte) { 99 | index = a 100 | a = -1 // break from the loop because we found a non -1 element 101 | } 102 | a = a - 1 103 | } 104 | 105 | if (index < 0) { 106 | return null 107 | } 108 | val returnArray = new Array[Byte](array.length) 109 | 110 | for (a <- 0 until index) { 111 | returnArray(a) = array(a) 112 | } 113 | returnArray(index) = (array(index) + 1).toByte 114 | for (a <- index + 1 until array.length) { 115 | returnArray(a) = 0.toByte 116 | } 117 | 118 | returnArray 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark 19 | 20 | import org.apache.hadoop.hbase.util.Bytes 21 | import scala.math.Ordering 22 | 23 | // TODO: add @InterfaceAudience.Private if https://issues.scala-lang.org/browse/SI-3600 is resolved 24 | package object hbase { 25 | type HBaseType = Array[Byte] 26 | def bytesMin = new Array[Byte](0) 27 | def bytesMax = null 28 | val ByteMax = -1.asInstanceOf[Byte] 29 | val ByteMin = 0.asInstanceOf[Byte] 30 | val ord: Ordering[HBaseType] = new Ordering[HBaseType] { 31 | def compare(x: Array[Byte], y: Array[Byte]): Int = { 32 | return Bytes.compareTo(x, y) 33 | } 34 | } 35 | // Do not use BinaryType.ordering 36 | implicit val order: Ordering[HBaseType] = ord 37 | 38 | } 39 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/datasources/HBaseSource.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.example.datasources 19 | 20 | import org.apache.hadoop.hbase.spark.datasources.HBaseTableCatalog 21 | import org.apache.spark.SparkConf 22 | import org.apache.spark.SparkContext 23 | import org.apache.spark.sql.DataFrame 24 | import org.apache.spark.sql.SQLContext 25 | import org.apache.yetus.audience.InterfaceAudience 26 | 27 | @InterfaceAudience.Private 28 | case class HBaseRecord( 29 | col0: String, 30 | col1: Boolean, 31 | col2: Double, 32 | col3: Float, 33 | col4: Int, 34 | col5: Long, 35 | col6: Short, 36 | col7: String, 37 | col8: Byte) 38 | 39 | @InterfaceAudience.Private 40 | object HBaseRecord { 41 | def apply(i: Int): HBaseRecord = { 42 | val s = s"""row${"%03d".format(i)}""" 43 | HBaseRecord( 44 | s, 45 | i % 2 == 0, 46 | i.toDouble, 47 | i.toFloat, 48 | i, 49 | i.toLong, 50 | i.toShort, 51 | s"String$i extra", 52 | i.toByte) 53 | } 54 | } 55 | 56 | @InterfaceAudience.Private 57 | object HBaseSource { 58 | val cat = s"""{ 59 | |"table":{"namespace":"default", "name":"HBaseSourceExampleTable"}, 60 | |"rowkey":"key", 61 | |"columns":{ 62 | |"col0":{"cf":"rowkey", "col":"key", "type":"string"}, 63 | |"col1":{"cf":"cf1", "col":"col1", "type":"boolean"}, 64 | |"col2":{"cf":"cf2", "col":"col2", "type":"double"}, 65 | |"col3":{"cf":"cf3", "col":"col3", "type":"float"}, 66 | |"col4":{"cf":"cf4", "col":"col4", "type":"int"}, 67 | |"col5":{"cf":"cf5", "col":"col5", "type":"bigint"}, 68 | |"col6":{"cf":"cf6", "col":"col6", "type":"smallint"}, 69 | |"col7":{"cf":"cf7", "col":"col7", "type":"string"}, 70 | |"col8":{"cf":"cf8", "col":"col8", "type":"tinyint"} 71 | |} 72 | |}""".stripMargin 73 | 74 | def main(args: Array[String]) { 75 | val sparkConf = new SparkConf().setAppName("HBaseSourceExample") 76 | val sc = new SparkContext(sparkConf) 77 | val sqlContext = new SQLContext(sc) 78 | 79 | import sqlContext.implicits._ 80 | 81 | def withCatalog(cat: String): DataFrame = { 82 | sqlContext.read 83 | .options(Map(HBaseTableCatalog.tableCatalog -> cat)) 84 | .format("org.apache.hadoop.hbase.spark") 85 | .load() 86 | } 87 | 88 | val data = (0 to 255).map { i => HBaseRecord(i) } 89 | 90 | sc.parallelize(data) 91 | .toDF 92 | .write 93 | .options(Map(HBaseTableCatalog.tableCatalog -> cat, HBaseTableCatalog.newTable -> "5")) 94 | .format("org.apache.hadoop.hbase.spark") 95 | .save() 96 | 97 | val df = withCatalog(cat) 98 | df.show() 99 | df.filter($"col0" <= "row005") 100 | .select($"col0", $"col1") 101 | .show 102 | df.filter($"col0" === "row005" || $"col0" <= "row005") 103 | .select($"col0", $"col1") 104 | .show 105 | df.filter($"col0" > "row250") 106 | .select($"col0", $"col1") 107 | .show 108 | df.registerTempTable("table1") 109 | val c = sqlContext.sql("select count(col1) from table1 where col0 < 'row050'") 110 | c.show() 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkDeleteExample.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.example.hbasecontext 19 | 20 | import org.apache.hadoop.hbase.HBaseConfiguration 21 | import org.apache.hadoop.hbase.TableName 22 | import org.apache.hadoop.hbase.client.Delete 23 | import org.apache.hadoop.hbase.spark.HBaseContext 24 | import org.apache.hadoop.hbase.util.Bytes 25 | import org.apache.spark.SparkConf 26 | import org.apache.spark.SparkContext 27 | import org.apache.yetus.audience.InterfaceAudience 28 | 29 | /** 30 | * This is a simple example of deleting records in HBase 31 | * with the bulkDelete function. 32 | */ 33 | @InterfaceAudience.Private 34 | object HBaseBulkDeleteExample { 35 | def main(args: Array[String]) { 36 | if (args.length < 1) { 37 | println("HBaseBulkDeleteExample {tableName} missing an argument") 38 | return 39 | } 40 | 41 | val tableName = args(0) 42 | 43 | val sparkConf = new SparkConf().setAppName("HBaseBulkDeleteExample " + tableName) 44 | val sc = new SparkContext(sparkConf) 45 | try { 46 | // [Array[Byte]] 47 | val rdd = sc.parallelize( 48 | Array( 49 | Bytes.toBytes("1"), 50 | Bytes.toBytes("2"), 51 | Bytes.toBytes("3"), 52 | Bytes.toBytes("4"), 53 | Bytes.toBytes("5"))) 54 | 55 | val conf = HBaseConfiguration.create() 56 | 57 | val hbaseContext = new HBaseContext(sc, conf) 58 | hbaseContext.bulkDelete[Array[Byte]]( 59 | rdd, 60 | TableName.valueOf(tableName), 61 | putRecord => new Delete(putRecord), 62 | 4) 63 | } finally { 64 | sc.stop() 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkGetExample.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.example.hbasecontext 19 | 20 | import org.apache.hadoop.hbase.CellUtil 21 | import org.apache.hadoop.hbase.HBaseConfiguration 22 | import org.apache.hadoop.hbase.TableName 23 | import org.apache.hadoop.hbase.client.Get 24 | import org.apache.hadoop.hbase.client.Result 25 | import org.apache.hadoop.hbase.spark.HBaseContext 26 | import org.apache.hadoop.hbase.util.Bytes 27 | import org.apache.spark.SparkConf 28 | import org.apache.spark.SparkContext 29 | import org.apache.yetus.audience.InterfaceAudience 30 | 31 | /** 32 | * This is a simple example of getting records from HBase 33 | * with the bulkGet function. 34 | */ 35 | @InterfaceAudience.Private 36 | object HBaseBulkGetExample { 37 | def main(args: Array[String]) { 38 | if (args.length < 1) { 39 | println("HBaseBulkGetExample {tableName} missing an argument") 40 | return 41 | } 42 | 43 | val tableName = args(0) 44 | 45 | val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName) 46 | val sc = new SparkContext(sparkConf) 47 | 48 | try { 49 | 50 | // [(Array[Byte])] 51 | val rdd = sc.parallelize( 52 | Array( 53 | Bytes.toBytes("1"), 54 | Bytes.toBytes("2"), 55 | Bytes.toBytes("3"), 56 | Bytes.toBytes("4"), 57 | Bytes.toBytes("5"), 58 | Bytes.toBytes("6"), 59 | Bytes.toBytes("7"))) 60 | 61 | val conf = HBaseConfiguration.create() 62 | 63 | val hbaseContext = new HBaseContext(sc, conf) 64 | 65 | val getRdd = hbaseContext.bulkGet[Array[Byte], String]( 66 | TableName.valueOf(tableName), 67 | 2, 68 | rdd, 69 | record => { 70 | System.out.println("making Get") 71 | new Get(record) 72 | }, 73 | (result: Result) => { 74 | 75 | val it = result.listCells().iterator() 76 | val b = new StringBuilder 77 | 78 | b.append(Bytes.toString(result.getRow) + ":") 79 | 80 | while (it.hasNext) { 81 | val cell = it.next() 82 | val q = Bytes.toString(CellUtil.cloneQualifier(cell)) 83 | if (q.equals("counter")) { 84 | b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")") 85 | } else { 86 | b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")") 87 | } 88 | } 89 | b.toString() 90 | }) 91 | 92 | getRdd 93 | .collect() 94 | .foreach(v => println(v)) 95 | 96 | } finally { 97 | sc.stop() 98 | } 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkPutExample.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.example.hbasecontext 19 | 20 | import org.apache.hadoop.hbase.HBaseConfiguration 21 | import org.apache.hadoop.hbase.TableName 22 | import org.apache.hadoop.hbase.client.Put 23 | import org.apache.hadoop.hbase.spark.HBaseContext 24 | import org.apache.hadoop.hbase.util.Bytes 25 | import org.apache.spark.SparkConf 26 | import org.apache.spark.SparkContext 27 | import org.apache.yetus.audience.InterfaceAudience 28 | 29 | /** 30 | * This is a simple example of putting records in HBase 31 | * with the bulkPut function. 32 | */ 33 | @InterfaceAudience.Private 34 | object HBaseBulkPutExample { 35 | def main(args: Array[String]) { 36 | if (args.length < 2) { 37 | println("HBaseBulkPutExample {tableName} {columnFamily} are missing an arguments") 38 | return 39 | } 40 | 41 | val tableName = args(0) 42 | val columnFamily = args(1) 43 | 44 | val sparkConf = new SparkConf().setAppName( 45 | "HBaseBulkPutExample " + 46 | tableName + " " + columnFamily) 47 | val sc = new SparkContext(sparkConf) 48 | 49 | try { 50 | // [(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])] 51 | val rdd = sc.parallelize( 52 | Array( 53 | ( 54 | Bytes.toBytes("1"), 55 | Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), 56 | ( 57 | Bytes.toBytes("2"), 58 | Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), 59 | ( 60 | Bytes.toBytes("3"), 61 | Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), 62 | ( 63 | Bytes.toBytes("4"), 64 | Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), 65 | ( 66 | Bytes.toBytes("5"), 67 | Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))))) 68 | 69 | val conf = HBaseConfiguration.create() 70 | 71 | val hbaseContext = new HBaseContext(sc, conf) 72 | hbaseContext.bulkPut[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])]( 73 | rdd, 74 | TableName.valueOf(tableName), 75 | (putRecord) => { 76 | val put = new Put(putRecord._1) 77 | putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3)) 78 | put 79 | }); 80 | } finally { 81 | sc.stop() 82 | } 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkPutExampleFromFile.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.example.hbasecontext 19 | 20 | import org.apache.hadoop.hbase.HBaseConfiguration 21 | import org.apache.hadoop.hbase.TableName 22 | import org.apache.hadoop.hbase.client.Put 23 | import org.apache.hadoop.hbase.spark.HBaseContext 24 | import org.apache.hadoop.hbase.util.Bytes 25 | import org.apache.hadoop.io.LongWritable 26 | import org.apache.hadoop.io.Text 27 | import org.apache.hadoop.mapred.TextInputFormat 28 | import org.apache.spark.SparkConf 29 | import org.apache.spark.SparkContext 30 | import org.apache.yetus.audience.InterfaceAudience 31 | 32 | /** 33 | * This is a simple example of putting records in HBase 34 | * with the bulkPut function. In this example we are 35 | * getting the put information from a file 36 | */ 37 | @InterfaceAudience.Private 38 | object HBaseBulkPutExampleFromFile { 39 | def main(args: Array[String]) { 40 | if (args.length < 3) { 41 | println( 42 | "HBaseBulkPutExampleFromFile {tableName} {columnFamily} {inputFile} are missing an argument") 43 | return 44 | } 45 | 46 | val tableName = args(0) 47 | val columnFamily = args(1) 48 | val inputFile = args(2) 49 | 50 | val sparkConf = new SparkConf().setAppName( 51 | "HBaseBulkPutExampleFromFile " + 52 | tableName + " " + columnFamily + " " + inputFile) 53 | val sc = new SparkContext(sparkConf) 54 | 55 | try { 56 | var rdd = sc 57 | .hadoopFile(inputFile, classOf[TextInputFormat], classOf[LongWritable], classOf[Text]) 58 | .map( 59 | v => { 60 | System.out.println("reading-" + v._2.toString) 61 | v._2.toString 62 | }) 63 | 64 | val conf = HBaseConfiguration.create() 65 | 66 | val hbaseContext = new HBaseContext(sc, conf) 67 | hbaseContext.bulkPut[String]( 68 | rdd, 69 | TableName.valueOf(tableName), 70 | (putRecord) => { 71 | System.out.println("hbase-" + putRecord) 72 | val put = new Put(Bytes.toBytes("Value- " + putRecord)) 73 | put.addColumn(Bytes.toBytes("c"), Bytes.toBytes("1"), Bytes.toBytes(putRecord.length())) 74 | put 75 | }); 76 | } finally { 77 | sc.stop() 78 | } 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseBulkPutTimestampExample.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.example.hbasecontext 19 | 20 | import org.apache.hadoop.hbase.{HBaseConfiguration, TableName} 21 | import org.apache.hadoop.hbase.client.Put 22 | import org.apache.hadoop.hbase.spark.HBaseContext 23 | import org.apache.hadoop.hbase.util.Bytes 24 | import org.apache.spark.SparkConf 25 | import org.apache.spark.SparkContext 26 | import org.apache.yetus.audience.InterfaceAudience 27 | 28 | /** 29 | * This is a simple example of putting records in HBase 30 | * with the bulkPut function. In this example we are 31 | * also setting the timestamp in the put 32 | */ 33 | @InterfaceAudience.Private 34 | object HBaseBulkPutTimestampExample { 35 | def main(args: Array[String]) { 36 | if (args.length < 2) { 37 | System.out.println( 38 | "HBaseBulkPutTimestampExample {tableName} {columnFamily} are missing an argument") 39 | return 40 | } 41 | 42 | val tableName = args(0) 43 | val columnFamily = args(1) 44 | 45 | val sparkConf = new SparkConf().setAppName( 46 | "HBaseBulkPutTimestampExample " + 47 | tableName + " " + columnFamily) 48 | val sc = new SparkContext(sparkConf) 49 | 50 | try { 51 | 52 | val rdd = sc.parallelize( 53 | Array( 54 | ( 55 | Bytes.toBytes("6"), 56 | Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), 57 | ( 58 | Bytes.toBytes("7"), 59 | Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), 60 | ( 61 | Bytes.toBytes("8"), 62 | Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), 63 | ( 64 | Bytes.toBytes("9"), 65 | Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), 66 | ( 67 | Bytes.toBytes("10"), 68 | Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))))) 69 | 70 | val conf = HBaseConfiguration.create() 71 | 72 | val timeStamp = System.currentTimeMillis() 73 | 74 | val hbaseContext = new HBaseContext(sc, conf) 75 | hbaseContext.bulkPut[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])]( 76 | rdd, 77 | TableName.valueOf(tableName), 78 | (putRecord) => { 79 | val put = new Put(putRecord._1) 80 | putRecord._2.foreach( 81 | (putValue) => put.addColumn(putValue._1, putValue._2, timeStamp, putValue._3)) 82 | put 83 | }) 84 | } finally { 85 | sc.stop() 86 | } 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseDistributedScanExample.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.example.hbasecontext 19 | 20 | import org.apache.hadoop.hbase.HBaseConfiguration 21 | import org.apache.hadoop.hbase.TableName 22 | import org.apache.hadoop.hbase.client.Scan 23 | import org.apache.hadoop.hbase.spark.HBaseContext 24 | import org.apache.hadoop.hbase.util.Bytes 25 | import org.apache.spark.SparkConf 26 | import org.apache.spark.SparkContext 27 | import org.apache.yetus.audience.InterfaceAudience 28 | 29 | /** 30 | * This is a simple example of scanning records from HBase 31 | * with the hbaseRDD function in Distributed fashion. 32 | */ 33 | @InterfaceAudience.Private 34 | object HBaseDistributedScanExample { 35 | def main(args: Array[String]) { 36 | if (args.length < 1) { 37 | println("HBaseDistributedScanExample {tableName} missing an argument") 38 | return 39 | } 40 | 41 | val tableName = args(0) 42 | 43 | val sparkConf = new SparkConf().setAppName("HBaseDistributedScanExample " + tableName) 44 | val sc = new SparkContext(sparkConf) 45 | 46 | try { 47 | val conf = HBaseConfiguration.create() 48 | 49 | val hbaseContext = new HBaseContext(sc, conf) 50 | 51 | val scan = new Scan() 52 | scan.setCaching(100) 53 | 54 | val getRdd = hbaseContext.hbaseRDD(TableName.valueOf(tableName), scan) 55 | 56 | getRdd.foreach(v => println(Bytes.toString(v._1.get()))) 57 | 58 | println( 59 | "Length: " + getRdd 60 | .map(r => r._1.copyBytes()) 61 | .collect() 62 | .length); 63 | } finally { 64 | sc.stop() 65 | } 66 | } 67 | 68 | } 69 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/hbasecontext/HBaseStreamingBulkPutExample.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.example.hbasecontext 19 | 20 | import org.apache.hadoop.hbase.HBaseConfiguration 21 | import org.apache.hadoop.hbase.TableName 22 | import org.apache.hadoop.hbase.client.Put 23 | import org.apache.hadoop.hbase.spark.HBaseContext 24 | import org.apache.hadoop.hbase.util.Bytes 25 | import org.apache.spark.SparkConf 26 | import org.apache.spark.SparkContext 27 | import org.apache.spark.streaming.Seconds 28 | import org.apache.spark.streaming.StreamingContext 29 | import org.apache.yetus.audience.InterfaceAudience 30 | 31 | /** 32 | * This is a simple example of BulkPut with Spark Streaming 33 | */ 34 | @InterfaceAudience.Private 35 | object HBaseStreamingBulkPutExample { 36 | def main(args: Array[String]) { 37 | if (args.length < 4) { 38 | println( 39 | "HBaseStreamingBulkPutExample " + 40 | "{host} {port} {tableName} {columnFamily} are missing an argument") 41 | return 42 | } 43 | 44 | val host = args(0) 45 | val port = args(1) 46 | val tableName = args(2) 47 | val columnFamily = args(3) 48 | 49 | val sparkConf = new SparkConf().setAppName( 50 | "HBaseStreamingBulkPutExample " + 51 | tableName + " " + columnFamily) 52 | val sc = new SparkContext(sparkConf) 53 | try { 54 | val ssc = new StreamingContext(sc, Seconds(1)) 55 | 56 | val lines = ssc.socketTextStream(host, port.toInt) 57 | 58 | val conf = HBaseConfiguration.create() 59 | 60 | val hbaseContext = new HBaseContext(sc, conf) 61 | 62 | hbaseContext.streamBulkPut[String]( 63 | lines, 64 | TableName.valueOf(tableName), 65 | (putRecord) => { 66 | if (putRecord.length() > 0) { 67 | val put = new Put(Bytes.toBytes(putRecord)) 68 | put.addColumn(Bytes.toBytes("c"), Bytes.toBytes("foo"), Bytes.toBytes("bar")) 69 | put 70 | } else { 71 | null 72 | } 73 | }) 74 | ssc.start() 75 | ssc.awaitTerminationOrTimeout(60000) 76 | } finally { 77 | sc.stop() 78 | } 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseBulkDeleteExample.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.example.rdd 19 | 20 | import org.apache.hadoop.hbase.HBaseConfiguration 21 | import org.apache.hadoop.hbase.TableName 22 | import org.apache.hadoop.hbase.client.Delete 23 | import org.apache.hadoop.hbase.spark.HBaseContext 24 | import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ 25 | import org.apache.hadoop.hbase.util.Bytes 26 | import org.apache.spark.SparkConf 27 | import org.apache.spark.SparkContext 28 | import org.apache.yetus.audience.InterfaceAudience 29 | 30 | /** 31 | * This is a simple example of deleting records in HBase 32 | * with the bulkDelete function. 33 | */ 34 | @InterfaceAudience.Private 35 | object HBaseBulkDeleteExample { 36 | def main(args: Array[String]) { 37 | if (args.length < 1) { 38 | println("HBaseBulkDeleteExample {tableName} are missing an argument") 39 | return 40 | } 41 | 42 | val tableName = args(0) 43 | 44 | val sparkConf = new SparkConf().setAppName("HBaseBulkDeleteExample " + tableName) 45 | val sc = new SparkContext(sparkConf) 46 | try { 47 | // [Array[Byte]] 48 | val rdd = sc.parallelize( 49 | Array( 50 | Bytes.toBytes("1"), 51 | Bytes.toBytes("2"), 52 | Bytes.toBytes("3"), 53 | Bytes.toBytes("4"), 54 | Bytes.toBytes("5"))) 55 | 56 | val conf = HBaseConfiguration.create() 57 | 58 | val hbaseContext = new HBaseContext(sc, conf) 59 | 60 | rdd.hbaseBulkDelete( 61 | hbaseContext, 62 | TableName.valueOf(tableName), 63 | putRecord => new Delete(putRecord), 64 | 4) 65 | 66 | } finally { 67 | sc.stop() 68 | } 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseBulkGetExample.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.example.rdd 19 | 20 | import org.apache.hadoop.hbase.CellUtil 21 | import org.apache.hadoop.hbase.HBaseConfiguration 22 | import org.apache.hadoop.hbase.TableName 23 | import org.apache.hadoop.hbase.client.Get 24 | import org.apache.hadoop.hbase.client.Result 25 | import org.apache.hadoop.hbase.spark.HBaseContext 26 | import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ 27 | import org.apache.hadoop.hbase.util.Bytes 28 | import org.apache.spark.SparkConf 29 | import org.apache.spark.SparkContext 30 | import org.apache.yetus.audience.InterfaceAudience 31 | 32 | /** 33 | * This is a simple example of getting records from HBase 34 | * with the bulkGet function. 35 | */ 36 | @InterfaceAudience.Private 37 | object HBaseBulkGetExample { 38 | def main(args: Array[String]) { 39 | if (args.length < 1) { 40 | println("HBaseBulkGetExample {tableName} is missing an argument") 41 | return 42 | } 43 | 44 | val tableName = args(0) 45 | 46 | val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName) 47 | val sc = new SparkContext(sparkConf) 48 | 49 | try { 50 | 51 | // [(Array[Byte])] 52 | val rdd = sc.parallelize( 53 | Array( 54 | Bytes.toBytes("1"), 55 | Bytes.toBytes("2"), 56 | Bytes.toBytes("3"), 57 | Bytes.toBytes("4"), 58 | Bytes.toBytes("5"), 59 | Bytes.toBytes("6"), 60 | Bytes.toBytes("7"))) 61 | 62 | val conf = HBaseConfiguration.create() 63 | 64 | val hbaseContext = new HBaseContext(sc, conf) 65 | 66 | val getRdd = rdd.hbaseBulkGet[String]( 67 | hbaseContext, 68 | TableName.valueOf(tableName), 69 | 2, 70 | record => { 71 | System.out.println("making Get") 72 | new Get(record) 73 | }, 74 | (result: Result) => { 75 | 76 | val it = result.listCells().iterator() 77 | val b = new StringBuilder 78 | 79 | b.append(Bytes.toString(result.getRow) + ":") 80 | 81 | while (it.hasNext) { 82 | val cell = it.next() 83 | val q = Bytes.toString(CellUtil.cloneQualifier(cell)) 84 | if (q.equals("counter")) { 85 | b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")") 86 | } else { 87 | b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")") 88 | } 89 | } 90 | b.toString() 91 | }) 92 | 93 | getRdd 94 | .collect() 95 | .foreach(v => println(v)) 96 | 97 | } finally { 98 | sc.stop() 99 | } 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseBulkPutExample.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.example.rdd 19 | 20 | import org.apache.hadoop.hbase.HBaseConfiguration 21 | import org.apache.hadoop.hbase.TableName 22 | import org.apache.hadoop.hbase.client.Put 23 | import org.apache.hadoop.hbase.spark.HBaseContext 24 | import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ 25 | import org.apache.hadoop.hbase.util.Bytes 26 | import org.apache.spark.SparkConf 27 | import org.apache.spark.SparkContext 28 | import org.apache.yetus.audience.InterfaceAudience 29 | 30 | /** 31 | * This is a simple example of putting records in HBase 32 | * with the bulkPut function. 33 | */ 34 | @InterfaceAudience.Private 35 | object HBaseBulkPutExample { 36 | def main(args: Array[String]) { 37 | if (args.length < 2) { 38 | println("HBaseBulkPutExample {tableName} {columnFamily} are missing an arguments") 39 | return 40 | } 41 | 42 | val tableName = args(0) 43 | val columnFamily = args(1) 44 | 45 | val sparkConf = new SparkConf().setAppName( 46 | "HBaseBulkPutExample " + 47 | tableName + " " + columnFamily) 48 | val sc = new SparkContext(sparkConf) 49 | 50 | try { 51 | // [(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])] 52 | val rdd = sc.parallelize( 53 | Array( 54 | ( 55 | Bytes.toBytes("1"), 56 | Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), 57 | ( 58 | Bytes.toBytes("2"), 59 | Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), 60 | ( 61 | Bytes.toBytes("3"), 62 | Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), 63 | ( 64 | Bytes.toBytes("4"), 65 | Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), 66 | ( 67 | Bytes.toBytes("5"), 68 | Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))))) 69 | 70 | val conf = HBaseConfiguration.create() 71 | 72 | val hbaseContext = new HBaseContext(sc, conf) 73 | 74 | rdd.hbaseBulkPut( 75 | hbaseContext, 76 | TableName.valueOf(tableName), 77 | (putRecord) => { 78 | val put = new Put(putRecord._1) 79 | putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3)) 80 | put 81 | }) 82 | 83 | } finally { 84 | sc.stop() 85 | } 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseForeachPartitionExample.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.example.rdd 19 | 20 | import org.apache.hadoop.hbase.HBaseConfiguration 21 | import org.apache.hadoop.hbase.TableName 22 | import org.apache.hadoop.hbase.client.Put 23 | import org.apache.hadoop.hbase.spark.HBaseContext 24 | import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ 25 | import org.apache.hadoop.hbase.util.Bytes 26 | import org.apache.spark.SparkConf 27 | import org.apache.spark.SparkContext 28 | import org.apache.yetus.audience.InterfaceAudience 29 | 30 | /** 31 | * This is a simple example of using the foreachPartition 32 | * method with a HBase connection 33 | */ 34 | @InterfaceAudience.Private 35 | object HBaseForeachPartitionExample { 36 | def main(args: Array[String]) { 37 | if (args.length < 2) { 38 | println("HBaseForeachPartitionExample {tableName} {columnFamily} are missing an arguments") 39 | return 40 | } 41 | 42 | val tableName = args(0) 43 | val columnFamily = args(1) 44 | 45 | val sparkConf = new SparkConf().setAppName( 46 | "HBaseForeachPartitionExample " + 47 | tableName + " " + columnFamily) 48 | val sc = new SparkContext(sparkConf) 49 | 50 | try { 51 | // [(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])] 52 | val rdd = sc.parallelize( 53 | Array( 54 | ( 55 | Bytes.toBytes("1"), 56 | Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), 57 | ( 58 | Bytes.toBytes("2"), 59 | Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), 60 | ( 61 | Bytes.toBytes("3"), 62 | Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), 63 | ( 64 | Bytes.toBytes("4"), 65 | Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), 66 | ( 67 | Bytes.toBytes("5"), 68 | Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))))) 69 | 70 | val conf = HBaseConfiguration.create() 71 | 72 | val hbaseContext = new HBaseContext(sc, conf) 73 | 74 | rdd.hbaseForeachPartition( 75 | hbaseContext, 76 | (it, connection) => { 77 | val m = connection.getBufferedMutator(TableName.valueOf(tableName)) 78 | 79 | it.foreach( 80 | r => { 81 | val put = new Put(r._1) 82 | r._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3)) 83 | m.mutate(put) 84 | }) 85 | m.flush() 86 | m.close() 87 | }) 88 | 89 | } finally { 90 | sc.stop() 91 | } 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/example/rdd/HBaseMapPartitionExample.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark.example.rdd 19 | 20 | import org.apache.hadoop.hbase.HBaseConfiguration 21 | import org.apache.hadoop.hbase.TableName 22 | import org.apache.hadoop.hbase.client.Get 23 | import org.apache.hadoop.hbase.spark.HBaseContext 24 | import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ 25 | import org.apache.hadoop.hbase.util.Bytes 26 | import org.apache.spark.SparkConf 27 | import org.apache.spark.SparkContext 28 | import org.apache.yetus.audience.InterfaceAudience 29 | 30 | /** 31 | * This is a simple example of using the mapPartitions 32 | * method with a HBase connection 33 | */ 34 | @InterfaceAudience.Private 35 | object HBaseMapPartitionExample { 36 | def main(args: Array[String]) { 37 | if (args.length < 1) { 38 | println("HBaseMapPartitionExample {tableName} is missing an argument") 39 | return 40 | } 41 | 42 | val tableName = args(0) 43 | 44 | val sparkConf = new SparkConf().setAppName("HBaseMapPartitionExample " + tableName) 45 | val sc = new SparkContext(sparkConf) 46 | 47 | try { 48 | 49 | // [(Array[Byte])] 50 | val rdd = sc.parallelize( 51 | Array( 52 | Bytes.toBytes("1"), 53 | Bytes.toBytes("2"), 54 | Bytes.toBytes("3"), 55 | Bytes.toBytes("4"), 56 | Bytes.toBytes("5"), 57 | Bytes.toBytes("6"), 58 | Bytes.toBytes("7"))) 59 | 60 | val conf = HBaseConfiguration.create() 61 | 62 | val hbaseContext = new HBaseContext(sc, conf) 63 | 64 | val getRdd = rdd.hbaseMapPartitions[String]( 65 | hbaseContext, 66 | (it, connection) => { 67 | val table = connection.getTable(TableName.valueOf(tableName)) 68 | it.map { 69 | r => 70 | // batching would be faster. This is just an example 71 | val result = table.get(new Get(r)) 72 | 73 | val it = result.listCells().iterator() 74 | val b = new StringBuilder 75 | 76 | b.append(Bytes.toString(result.getRow) + ":") 77 | 78 | while (it.hasNext) { 79 | val cell = it.next() 80 | val q = Bytes.toString(cell.getQualifierArray) 81 | if (q.equals("counter")) { 82 | b.append("(" + q + "," + Bytes.toLong(cell.getValueArray) + ")") 83 | } else { 84 | b.append("(" + q + "," + Bytes.toString(cell.getValueArray) + ")") 85 | } 86 | } 87 | b.toString() 88 | } 89 | }) 90 | 91 | getRdd 92 | .collect() 93 | .foreach(v => println(v)) 94 | 95 | } finally { 96 | sc.stop() 97 | } 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContextForLargeRows.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark; 19 | 20 | import java.io.IOException; 21 | import java.util.ArrayList; 22 | import java.util.Arrays; 23 | import java.util.List; 24 | import org.apache.hadoop.conf.Configuration; 25 | import org.apache.hadoop.hbase.HBaseClassTestRule; 26 | import org.apache.hadoop.hbase.TableName; 27 | import org.apache.hadoop.hbase.client.Admin; 28 | import org.apache.hadoop.hbase.client.Connection; 29 | import org.apache.hadoop.hbase.client.ConnectionFactory; 30 | import org.apache.hadoop.hbase.client.Put; 31 | import org.apache.hadoop.hbase.client.Table; 32 | import org.apache.hadoop.hbase.testclassification.MediumTests; 33 | import org.apache.hadoop.hbase.testclassification.MiscTests; 34 | import org.apache.hadoop.hbase.util.Bytes; 35 | import org.apache.spark.api.java.JavaSparkContext; 36 | import org.junit.BeforeClass; 37 | import org.junit.ClassRule; 38 | import org.junit.experimental.categories.Category; 39 | 40 | @Category({ MiscTests.class, MediumTests.class }) 41 | public class TestJavaHBaseContextForLargeRows extends TestJavaHBaseContext { 42 | 43 | @ClassRule 44 | public static final HBaseClassTestRule TIMEOUT = 45 | HBaseClassTestRule.forClass(TestJavaHBaseContextForLargeRows.class); 46 | 47 | @BeforeClass 48 | public static void setUpBeforeClass() throws Exception { 49 | JSC = new JavaSparkContext("local", "JavaHBaseContextSuite"); 50 | 51 | init(); 52 | } 53 | 54 | protected void populateTableWithMockData(Configuration conf, TableName tableName) 55 | throws IOException { 56 | try (Connection conn = ConnectionFactory.createConnection(conf); 57 | Table table = conn.getTable(tableName); Admin admin = conn.getAdmin()) { 58 | 59 | List puts = new ArrayList<>(5); 60 | 61 | for (int i = 1; i < 6; i++) { 62 | Put put = new Put(Bytes.toBytes(Integer.toString(i))); 63 | // We are trying to generate a large row value here 64 | char[] chars = new char[1024 * 1024 * 2]; 65 | // adding '0' to convert int to char 66 | Arrays.fill(chars, (char) (i + '0')); 67 | put.addColumn(columnFamily, columnFamily, Bytes.toBytes(String.valueOf(chars))); 68 | puts.add(put); 69 | } 70 | table.put(puts); 71 | admin.flush(tableName); 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Define some default values that can be overridden by system properties 18 | hbase.root.logger=INFO,FA 19 | hbase.log.dir=. 20 | hbase.log.file=hbase.log 21 | 22 | # Define the root logger to the system property "hbase.root.logger". 23 | log4j.rootLogger=${hbase.root.logger} 24 | 25 | # Logging Threshold 26 | log4j.threshold=ALL 27 | 28 | # 29 | # Daily Rolling File Appender 30 | # 31 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender 32 | log4j.appender.DRFA.File=${hbase.log.dir}/${hbase.log.file} 33 | 34 | # Rollver at midnight 35 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd 36 | 37 | # 30-day backup 38 | #log4j.appender.DRFA.MaxBackupIndex=30 39 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout 40 | # Debugging Pattern format 41 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %C{2}(%L): %m%n 42 | 43 | 44 | # 45 | # console 46 | # Add "console" to rootlogger above if you want to use this 47 | # 48 | log4j.appender.console=org.apache.log4j.ConsoleAppender 49 | log4j.appender.console.target=System.err 50 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 51 | log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %C{2}(%L): %m%n 52 | 53 | #File Appender 54 | log4j.appender.FA=org.apache.log4j.FileAppender 55 | log4j.appender.FA.append=false 56 | log4j.appender.FA.file=target/log-output.txt 57 | log4j.appender.FA.layout=org.apache.log4j.PatternLayout 58 | log4j.appender.FA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %C{2}(%L): %m%n 59 | log4j.appender.FA.Threshold = INFO 60 | 61 | # Custom Logging levels 62 | 63 | #log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG 64 | 65 | log4j.logger.org.apache.hadoop=WARN 66 | log4j.logger.org.apache.zookeeper=ERROR 67 | log4j.logger.org.apache.hadoop.hbase=DEBUG 68 | 69 | #These settings are workarounds against spurious logs from the minicluster. 70 | #See HBASE-4709 71 | log4j.logger.org.apache.hadoop.metrics2.impl.MetricsConfig=WARN 72 | log4j.logger.org.apache.hadoop.metrics2.impl.MetricsSinkAdapter=WARN 73 | log4j.logger.org.apache.hadoop.metrics2.impl.MetricsSystemImpl=WARN 74 | log4j.logger.org.apache.hadoop.metrics2.util.MBeans=WARN 75 | # Enable this to get detailed connection error/retry logging. 76 | # log4j.logger.org.apache.hadoop.hbase.client.ConnectionImplementation=TRACE 77 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseCatalogSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark 19 | 20 | import org.apache.hadoop.hbase.spark.datasources.{DataTypeParserWrapper, DoubleSerDes, HBaseTableCatalog} 21 | import org.apache.hadoop.hbase.util.Bytes 22 | import org.apache.spark.sql.types._ 23 | import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} 24 | 25 | class HBaseCatalogSuite 26 | extends FunSuite 27 | with BeforeAndAfterEach 28 | with BeforeAndAfterAll 29 | with Logging { 30 | 31 | val map = s"""MAP>""" 32 | val array = s"""array>""" 33 | val arrayMap = s"""MAp>""" 34 | val catalog = s"""{ 35 | |"table":{"namespace":"default", "name":"htable"}, 36 | |"rowkey":"key1:key2", 37 | |"columns":{ 38 | |"col1":{"cf":"rowkey", "col":"key1", "type":"string"}, 39 | |"col2":{"cf":"rowkey", "col":"key2", "type":"double"}, 40 | |"col3":{"cf":"cf1", "col":"col2", "type":"binary"}, 41 | |"col4":{"cf":"cf1", "col":"col3", "type":"timestamp"}, 42 | |"col5":{"cf":"cf1", "col":"col4", "type":"double", "serdes":"${classOf[ 43 | DoubleSerDes].getName}"}, 44 | |"col6":{"cf":"cf1", "col":"col5", "type":"$map"}, 45 | |"col7":{"cf":"cf1", "col":"col6", "type":"$array"}, 46 | |"col8":{"cf":"cf1", "col":"col7", "type":"$arrayMap"}, 47 | |"col9":{"cf":"cf1", "col":"col8", "type":"date"}, 48 | |"col10":{"cf":"cf1", "col":"col9", "type":"timestamp"} 49 | |} 50 | |}""".stripMargin 51 | val parameters = Map(HBaseTableCatalog.tableCatalog -> catalog) 52 | val t = HBaseTableCatalog(parameters) 53 | 54 | def checkDataType(dataTypeString: String, expectedDataType: DataType): Unit = { 55 | test(s"parse ${dataTypeString.replace("\n", "")}") { 56 | assert(DataTypeParserWrapper.parse(dataTypeString) === expectedDataType) 57 | } 58 | } 59 | test("basic") { 60 | assert(t.getField("col1").isRowKey == true) 61 | assert(t.getPrimaryKey == "key1") 62 | assert(t.getField("col3").dt == BinaryType) 63 | assert(t.getField("col4").dt == TimestampType) 64 | assert(t.getField("col5").dt == DoubleType) 65 | assert(t.getField("col5").serdes != None) 66 | assert(t.getField("col4").serdes == None) 67 | assert(t.getField("col1").isRowKey) 68 | assert(t.getField("col2").isRowKey) 69 | assert(!t.getField("col3").isRowKey) 70 | assert(t.getField("col2").length == Bytes.SIZEOF_DOUBLE) 71 | assert(t.getField("col1").length == -1) 72 | assert(t.getField("col8").length == -1) 73 | assert(t.getField("col9").dt == DateType) 74 | assert(t.getField("col10").dt == TimestampType) 75 | } 76 | 77 | checkDataType(map, t.getField("col6").dt) 78 | 79 | checkDataType(array, t.getField("col7").dt) 80 | 81 | checkDataType(arrayMap, t.getField("col8").dt) 82 | 83 | test("convert") { 84 | val m = Map( 85 | "hbase.columns.mapping" -> 86 | "KEY_FIELD STRING :key, A_FIELD STRING c:a, B_FIELD DOUBLE c:b, C_FIELD BINARY c:c,", 87 | "hbase.table" -> "NAMESPACE:TABLE") 88 | val map = HBaseTableCatalog.convert(m) 89 | val json = map.get(HBaseTableCatalog.tableCatalog).get 90 | val parameters = Map(HBaseTableCatalog.tableCatalog -> json) 91 | val t = HBaseTableCatalog(parameters) 92 | assert(t.namespace === "NAMESPACE") 93 | assert(t.name == "TABLE") 94 | assert(t.getField("KEY_FIELD").isRowKey) 95 | assert(DataTypeParserWrapper.parse("STRING") === t.getField("A_FIELD").dt) 96 | assert(!t.getField("A_FIELD").isRowKey) 97 | assert(DataTypeParserWrapper.parse("DOUBLE") === t.getField("B_FIELD").dt) 98 | assert(DataTypeParserWrapper.parse("BINARY") === t.getField("C_FIELD").dt) 99 | } 100 | 101 | test("compatibility") { 102 | val m = Map( 103 | "hbase.columns.mapping" -> 104 | "KEY_FIELD STRING :key, A_FIELD STRING c:a, B_FIELD DOUBLE c:b, C_FIELD BINARY c:c,", 105 | "hbase.table" -> "t1") 106 | val t = HBaseTableCatalog(m) 107 | assert(t.namespace === "default") 108 | assert(t.name == "t1") 109 | assert(t.getField("KEY_FIELD").isRowKey) 110 | assert(DataTypeParserWrapper.parse("STRING") === t.getField("A_FIELD").dt) 111 | assert(!t.getField("A_FIELD").isRowKey) 112 | assert(DataTypeParserWrapper.parse("DOUBLE") === t.getField("B_FIELD").dt) 113 | assert(DataTypeParserWrapper.parse("BINARY") === t.getField("C_FIELD").dt) 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseTestSource.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark 19 | 20 | import org.apache.hadoop.hbase.spark.datasources.HBaseSparkConf 21 | import org.apache.spark.SparkEnv 22 | import org.apache.spark.rdd.RDD 23 | import org.apache.spark.sql.{Row, SQLContext} 24 | import org.apache.spark.sql.sources._ 25 | import org.apache.spark.sql.types._ 26 | 27 | class HBaseTestSource extends RelationProvider { 28 | override def createRelation( 29 | sqlContext: SQLContext, 30 | parameters: Map[String, String]): BaseRelation = { 31 | DummyScan( 32 | parameters("cacheSize").toInt, 33 | parameters("batchNum").toInt, 34 | parameters("blockCacheingEnable").toBoolean, 35 | parameters("rowNum").toInt)(sqlContext) 36 | } 37 | } 38 | 39 | case class DummyScan(cacheSize: Int, batchNum: Int, blockCachingEnable: Boolean, rowNum: Int)( 40 | @transient val sqlContext: SQLContext) 41 | extends BaseRelation 42 | with TableScan { 43 | private def sparkConf = SparkEnv.get.conf 44 | override def schema: StructType = 45 | StructType(StructField("i", IntegerType, nullable = false) :: Nil) 46 | 47 | override def buildScan(): RDD[Row] = sqlContext.sparkContext 48 | .parallelize(0 until rowNum) 49 | .map(Row(_)) 50 | .map { 51 | x => 52 | if (sparkConf.getInt(HBaseSparkConf.QUERY_BATCHSIZE, -1) != batchNum || 53 | sparkConf.getInt(HBaseSparkConf.QUERY_CACHEDROWS, -1) != cacheSize || 54 | sparkConf.getBoolean(HBaseSparkConf.QUERY_CACHEBLOCKS, false) != blockCachingEnable) { 55 | throw new Exception("HBase Spark configuration cannot be set properly") 56 | } 57 | x 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/StartsWithSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark 19 | 20 | import org.apache.hadoop.hbase.spark.datasources.Utils 21 | import org.apache.hadoop.hbase.util.Bytes 22 | import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} 23 | 24 | class StartsWithSuite extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll with Logging { 25 | 26 | test("simple1") { 27 | val t = new Array[Byte](2) 28 | t(0) = 1.toByte 29 | t(1) = 2.toByte 30 | 31 | val expected = new Array[Byte](2) 32 | expected(0) = 1.toByte 33 | expected(1) = 3.toByte 34 | 35 | val res = Utils.incrementByteArray(t) 36 | assert(res.sameElements(expected)) 37 | } 38 | 39 | test("simple2") { 40 | val t = new Array[Byte](1) 41 | t(0) = 87.toByte 42 | 43 | val expected = new Array[Byte](1) 44 | expected(0) = 88.toByte 45 | 46 | val res = Utils.incrementByteArray(t) 47 | assert(res.sameElements(expected)) 48 | } 49 | 50 | test("overflow1") { 51 | val t = new Array[Byte](2) 52 | t(0) = 1.toByte 53 | t(1) = (-1).toByte 54 | 55 | val expected = new Array[Byte](2) 56 | expected(0) = 2.toByte 57 | expected(1) = 0.toByte 58 | 59 | val res = Utils.incrementByteArray(t) 60 | 61 | assert(res.sameElements(expected)) 62 | } 63 | 64 | test("overflow2") { 65 | val t = new Array[Byte](2) 66 | t(0) = (-1).toByte 67 | t(1) = (-1).toByte 68 | 69 | val expected = null 70 | 71 | val res = Utils.incrementByteArray(t) 72 | 73 | assert(res == expected) 74 | } 75 | 76 | test("max-min-value") { 77 | val t = new Array[Byte](2) 78 | t(0) = 1.toByte 79 | t(1) = (127).toByte 80 | 81 | val expected = new Array[Byte](2) 82 | expected(0) = 1.toByte 83 | expected(1) = (-128).toByte 84 | 85 | val res = Utils.incrementByteArray(t) 86 | assert(res.sameElements(expected)) 87 | } 88 | 89 | test("complicated") { 90 | val imput = "row005" 91 | val expectedOutput = "row006" 92 | 93 | val t = Bytes.toBytes(imput) 94 | val expected = Bytes.toBytes(expectedOutput) 95 | 96 | val res = Utils.incrementByteArray(t) 97 | assert(res.sameElements(expected)) 98 | } 99 | 100 | } 101 | -------------------------------------------------------------------------------- /spark/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/TableOutputFormatSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.hbase.spark 19 | 20 | import java.text.SimpleDateFormat 21 | import java.util.{Date, Locale} 22 | import org.apache.hadoop.hbase.{HBaseTestingUtility, TableName, TableNotFoundException} 23 | import org.apache.hadoop.hbase.mapreduce.TableOutputFormat 24 | import org.apache.hadoop.hbase.util.Bytes 25 | import org.apache.hadoop.mapreduce.{Job, TaskAttemptID, TaskType} 26 | import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl 27 | import org.apache.spark.{SparkConf, SparkContext} 28 | import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} 29 | import scala.util.{Failure, Success, Try} 30 | 31 | // Unit tests for HBASE-20521: change get configuration(TableOutputFormat.conf) object first sequence from jobContext to getConf 32 | // this suite contains two tests, one for normal case(getConf return null, use jobContext), create new TableOutputformat object without init TableOutputFormat.conf object, 33 | // configuration object inside checkOutputSpecs came from jobContext. 34 | // The other one(getConf return conf object) we manually call "setConf" to init TableOutputFormat.conf, for making it more straight forward, we specify a nonexistent table 35 | // name in conf object, checkOutputSpecs will then throw TableNotFoundException exception 36 | class TableOutputFormatSuite 37 | extends FunSuite 38 | with BeforeAndAfterEach 39 | with BeforeAndAfterAll 40 | with Logging { 41 | @transient var sc: SparkContext = null 42 | var TEST_UTIL = new HBaseTestingUtility 43 | 44 | val tableName = "TableOutputFormatTest" 45 | val tableNameTest = "NonExistentTable" 46 | val columnFamily = "cf" 47 | 48 | override protected def beforeAll(): Unit = { 49 | TEST_UTIL.startMiniCluster 50 | 51 | logInfo(" - minicluster started") 52 | try { 53 | TEST_UTIL.deleteTable(TableName.valueOf(tableName)) 54 | } catch { 55 | case e: Exception => logInfo(" - no table " + tableName + " found") 56 | } 57 | 58 | TEST_UTIL.createTable(TableName.valueOf(tableName), Bytes.toBytes(columnFamily)) 59 | logInfo(" - created table") 60 | 61 | // set "validateOutputSpecs" true anyway, force to validate output spec 62 | val sparkConf = new SparkConf() 63 | .setMaster("local") 64 | .setAppName("test") 65 | 66 | sc = new SparkContext(sparkConf) 67 | } 68 | 69 | override protected def afterAll(): Unit = { 70 | logInfo(" - delete table: " + tableName) 71 | TEST_UTIL.deleteTable(TableName.valueOf(tableName)) 72 | logInfo(" - shutting down minicluster") 73 | TEST_UTIL.shutdownMiniCluster() 74 | 75 | TEST_UTIL.cleanupTestDir() 76 | sc.stop() 77 | } 78 | 79 | def getJobContext() = { 80 | val hConf = TEST_UTIL.getConfiguration 81 | hConf.set(TableOutputFormat.OUTPUT_TABLE, tableName) 82 | val job = Job.getInstance(hConf) 83 | job.setOutputFormatClass(classOf[TableOutputFormat[String]]) 84 | 85 | val jobTrackerId = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US).format(new Date()) 86 | val jobAttemptId = new TaskAttemptID(jobTrackerId, 1, TaskType.MAP, 0, 0) 87 | new TaskAttemptContextImpl(job.getConfiguration, jobAttemptId) 88 | } 89 | 90 | // Mock up jobContext object and execute actions in "write" function 91 | // from "org.apache.spark.internal.io.SparkHadoopMapReduceWriter" 92 | // this case should run normally without any exceptions 93 | test( 94 | "TableOutputFormat.checkOutputSpecs test without setConf called, should return true and without exceptions") { 95 | val jobContext = getJobContext() 96 | val format = jobContext.getOutputFormatClass 97 | val jobFormat = format.newInstance 98 | Try { 99 | jobFormat.checkOutputSpecs(jobContext) 100 | } match { 101 | case Success(_) => assert(true) 102 | case Failure(_) => assert(false) 103 | } 104 | } 105 | 106 | // Set configuration externally, checkOutputSpec should use configuration object set by "SetConf" method 107 | // rather than jobContext, this case should throw "TableNotFoundException" exception 108 | test( 109 | "TableOutputFormat.checkOutputSpecs test without setConf called, should throw TableNotFoundException") { 110 | val jobContext = getJobContext() 111 | val format = jobContext.getOutputFormatClass 112 | val jobFormat = format.newInstance 113 | 114 | val hConf = TEST_UTIL.getConfiguration 115 | hConf.set(TableOutputFormat.OUTPUT_TABLE, tableNameTest) 116 | jobFormat.asInstanceOf[TableOutputFormat[String]].setConf(hConf) 117 | Try { 118 | jobFormat.checkOutputSpecs(jobContext) 119 | } match { 120 | case Success(_) => assert(false) 121 | case Failure(e: Exception) => { 122 | if (e.isInstanceOf[TableNotFoundException]) 123 | assert(true) 124 | else 125 | assert(false) 126 | } 127 | case _ => None 128 | } 129 | } 130 | 131 | } 132 | -------------------------------------------------------------------------------- /test-reporting/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 23 | 4.0.0 24 | 25 | 26 | hbase-connectors 27 | org.apache.hbase.connectors 28 | ${revision} 29 | 30 | 31 | test-reporting 32 | pom 33 | ${revision} 34 | Test Reporting 35 | Test Reporting for Apache HBase Connectors 36 | 37 | 38 | 39 | ${project.parent.basedir} 40 | 41 | 42 | 43 | 44 | 45 | org.apache.hbase.connectors.kafka 46 | hbase-kafka-proxy 47 | ${revision} 48 | 49 | 50 | org.apache.hbase.connectors.spark 51 | hbase-spark 52 | ${revision} 53 | 54 | 55 | 56 | org.scala-lang 57 | scala-library 58 | 59 | 60 | 61 | org.scala-lang 62 | scalap 63 | 64 | 65 | com.google.code.findbugs 66 | jsr305 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | org.apache.maven.plugins 76 | maven-surefire-plugin 77 | ${surefire.version} 78 | 79 | ${argLine} -Xms256m -Xmx2048m 80 | 1 81 | random 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | org.jacoco 90 | jacoco-maven-plugin 91 | 92 | 93 | report 94 | 95 | report-aggregate 96 | 97 | package 98 | 99 | ${jacocoReportDir} 100 | ${project.build.sourceEncoding} 101 | ${project.reporting.outputEncoding} 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | --------------------------------------------------------------------------------