├── spark ├── .gitignore ├── contrib │ ├── checkstyle.xml │ └── import-control.xml ├── run.sh └── README.md ├── .gitignore ├── NOTICE ├── src ├── main │ ├── java │ │ └── org │ │ │ └── apache │ │ │ └── accumulo │ │ │ └── examples │ │ │ ├── constraints │ │ │ ├── ConstraintsCommon.java │ │ │ ├── MaxMutationSize.java │ │ │ └── NumericValueConstraint.java │ │ │ ├── bloom │ │ │ ├── BloomCommon.java │ │ │ ├── BloomFiltersNotFound.java │ │ │ ├── BloomBatchScanner.java │ │ │ └── BloomFilters.java │ │ │ ├── cli │ │ │ ├── ScannerOpts.java │ │ │ ├── ClientOnRequiredTable.java │ │ │ ├── ClientOnDefaultTable.java │ │ │ ├── Help.java │ │ │ ├── ClientOpts.java │ │ │ └── BatchWriterOpts.java │ │ │ ├── client │ │ │ ├── Flush.java │ │ │ ├── CountingVerifyingReceiver.java │ │ │ ├── ReadWriteExample.java │ │ │ └── SequentialBatchWriter.java │ │ │ ├── Common.java │ │ │ ├── helloworld │ │ │ ├── Read.java │ │ │ └── Insert.java │ │ │ ├── filedata │ │ │ ├── KeyUtil.java │ │ │ ├── FileDataQuery.java │ │ │ ├── VisibilityCombiner.java │ │ │ ├── ChunkInputFormat.java │ │ │ └── CharacterHistogram.java │ │ │ ├── mapreduce │ │ │ ├── bulk │ │ │ │ ├── SetupTable.java │ │ │ │ └── VerifyIngest.java │ │ │ ├── RegexExample.java │ │ │ ├── RowHash.java │ │ │ ├── TableToFile.java │ │ │ ├── NGramIngest.java │ │ │ └── WordCount.java │ │ │ ├── shard │ │ │ ├── Reverse.java │ │ │ ├── Index.java │ │ │ └── Query.java │ │ │ ├── util │ │ │ └── FormatUtil.java │ │ │ └── combiner │ │ │ └── StatsCombiner.java │ └── resources │ │ └── log4j.properties └── test │ ├── resources │ └── log4j.properties │ └── java │ └── org │ └── apache │ └── accumulo │ └── examples │ ├── filedata │ └── KeyUtilTest.java │ ├── constraints │ ├── NumericValueConstraintTest.java │ └── AlphaNumKeyConstraintTest.java │ ├── mapreduce │ └── MapReduceIT.java │ └── dirlist │ └── CountIT.java ├── .asf.yaml ├── CONTRIBUTING.md ├── contrib ├── checkstyle.xml └── import-control.xml ├── bin ├── build ├── runmr └── runex ├── conf ├── log4j2.properties └── env.sh.example ├── docs ├── helloworld.md ├── release-testing.md ├── bulkIngest.md ├── rowhash.md ├── tracing.md ├── tabletofile.md ├── regex.md ├── constraints.md ├── isolation.md ├── terasort.md ├── filedata.md ├── batch.md ├── wordcount.md ├── reservations.md ├── uniquecols.md ├── classpath.md ├── export.md ├── combiner.md ├── client.md └── shard.md └── .github └── workflows └── maven.yaml /spark/.gitignore: -------------------------------------------------------------------------------- 1 | /.classpath 2 | /.project 3 | /.settings/ 4 | /target/ 5 | /*.iml 6 | /.idea 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.classpath 2 | /.project 3 | /.settings/ 4 | /target/ 5 | /*.iml 6 | /.idea 7 | /examples.conf 8 | /conf/env.sh 9 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Apache Accumulo Examples 2 | Copyright 2019 The Apache Software Foundation 3 | 4 | This product includes software developed at 5 | The Apache Software Foundation (http://www.apache.org/). 6 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/constraints/ConstraintsCommon.java: -------------------------------------------------------------------------------- 1 | package org.apache.accumulo.examples.constraints; 2 | 3 | import org.apache.accumulo.examples.Common; 4 | 5 | public enum ConstraintsCommon { 6 | ; 7 | public static final String CONSTRAINTS_TABLE = Common.NAMESPACE + ".testConstraints"; 8 | public static final String CONSTRAINT_VIOLATED_MSG = "Constraint violated: {}"; 9 | } 10 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/bloom/BloomCommon.java: -------------------------------------------------------------------------------- 1 | package org.apache.accumulo.examples.bloom; 2 | 3 | import org.apache.accumulo.examples.Common; 4 | 5 | enum BloomCommon { 6 | ; 7 | public static final String BLOOM_TEST1_TABLE = Common.NAMESPACE + ".bloom_test1"; 8 | public static final String BLOOM_TEST2_TABLE = Common.NAMESPACE + ".bloom_test2"; 9 | public static final String BLOOM_TEST3_TABLE = Common.NAMESPACE + ".bloom_test3"; 10 | public static final String BLOOM_TEST4_TABLE = Common.NAMESPACE + ".bloom_test4"; 11 | 12 | public static final String BLOOM_ENABLED_PROPERTY = "table.bloom.enabled"; 13 | } 14 | -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | log4j.rootLogger=INFO, CA 17 | log4j.appender.CA=org.apache.log4j.ConsoleAppender 18 | log4j.appender.CA.layout=org.apache.log4j.PatternLayout 19 | log4j.appender.CA.layout.ConversionPattern=[%t} %-5p %c %x - %m%n 20 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/cli/ScannerOpts.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.cli; 18 | 19 | import com.beust.jcommander.Parameter; 20 | 21 | public class ScannerOpts { 22 | @Parameter(names = "--scanBatchSize", 23 | description = "the number of key-values to pull during a scan") 24 | public int scanBatchSize = 1000; 25 | } 26 | -------------------------------------------------------------------------------- /.asf.yaml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | # https://cwiki.apache.org/confluence/display/INFRA/git+-+.asf.yaml+features 21 | 22 | github: 23 | description: "Apache Accumulo Examples" 24 | homepage: https://accumulo.apache.org 25 | labels: 26 | - accumulo 27 | - big-data 28 | - hacktoberfest 29 | features: 30 | wiki: false 31 | issues: true 32 | projects: true 33 | 34 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/cli/ClientOnRequiredTable.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.cli; 18 | 19 | import com.beust.jcommander.Parameter; 20 | 21 | public class ClientOnRequiredTable extends ClientOpts { 22 | @Parameter(names = {"-t", "--table"}, required = true, description = "table to use") 23 | private String tableName; 24 | 25 | public String getTableName() { 26 | return tableName; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 17 | 18 | # Contributing to Accumulo Examples 19 | 20 | Contributions to Accumulo Examples can be made by creating a pull request to this repo 21 | on GitHub. 22 | 23 | Before creating a pull request, run `mvn clean verify` and run through the instructions 24 | for any example that was changed. 25 | 26 | For general instructions on contributing to Accumulo projects, check out the 27 | [Accumulo Contributor guide][contribute]. 28 | 29 | [contribute]: https://accumulo.apache.org/contributor/ 30 | -------------------------------------------------------------------------------- /contrib/checkstyle.xml: -------------------------------------------------------------------------------- 1 | 2 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /spark/contrib/checkstyle.xml: -------------------------------------------------------------------------------- 1 | 2 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /bin/build: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one or more 4 | # contributor license agreements. See the NOTICE file distributed with 5 | # this work for additional information regarding copyright ownership. 6 | # The ASF licenses this file to You under the Apache License, Version 2.0 7 | # (the "License"); you may not use this file except in compliance with 8 | # the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | bin_dir=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) 19 | ex_home=$( cd "$( dirname "$bin_dir" )" && pwd ) 20 | 21 | if [ -f "$ex_home/conf/env.sh" ]; then 22 | . "$ex_home"/conf/env.sh 23 | else 24 | . "$ex_home"/conf/env.sh.example 25 | fi 26 | 27 | if [[ ! -f "$EXAMPLES_JAR_PATH" ]]; then 28 | echo "Building $EXAMPLES_JAR_PATH" 29 | cd "${ex_home}" || exit 1 30 | mvn clean verify -P create-shade-jar -D skipTests -D accumulo.version="$ACCUMULO_VERSION" -D hadoop.version="$HADOOP_VERSION" -D zookeeper.version="$ZOOKEEPER_VERSION" 31 | fi 32 | -------------------------------------------------------------------------------- /conf/log4j2.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | status = info 17 | dest = err 18 | name = AccumuloExampleConfig 19 | monitorInterval = 30 20 | 21 | appender.console.type = Console 22 | appender.console.name = STDERR 23 | appender.console.target = SYSTEM_ERR 24 | appender.console.layout.type = PatternLayout 25 | appender.console.layout.pattern = %style{%d{ISO8601}}{dim,cyan} %style{[}{red}%style{%-8c{2}}{dim,blue}%style{]}{red} %highlight{%-5p}%style{:}{red} %m%n 26 | 27 | logger.zookeeper.name = org.apache.zookeeper 28 | logger.zookeeper.level = error 29 | 30 | rootLogger.level = info 31 | rootLogger.appenderRef.console.ref = STDERR 32 | 33 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/cli/ClientOnDefaultTable.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.cli; 18 | 19 | import com.beust.jcommander.Parameter; 20 | 21 | public class ClientOnDefaultTable extends ClientOpts { 22 | @Parameter(names = "--table", description = "table to use") 23 | private String tableName; 24 | 25 | public ClientOnDefaultTable(String table) { 26 | this.tableName = table; 27 | } 28 | 29 | public String getTableName() { 30 | return tableName; 31 | } 32 | 33 | public void setTableName(String tableName) { 34 | this.tableName = tableName; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /spark/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one or more 4 | # contributor license agreements. See the NOTICE file distributed with 5 | # this work for additional information regarding copyright ownership. 6 | # The ASF licenses this file to You under the Apache License, Version 2.0 7 | # (the "License"); you may not use this file except in compliance with 8 | # the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | 19 | if [[ -z "$1" || -z "$2" ]]; then 20 | echo "Usage: ./run.sh [bulk|batch] /path/to/accumulo-client.properties" 21 | exit 1 22 | fi 23 | 24 | JAR=./target/accumulo-spark-shaded.jar 25 | if [[ ! -f $JAR ]]; then 26 | mvn clean package -P create-shade-jar 27 | fi 28 | 29 | if [[ -z "$SPARK_HOME" ]]; then 30 | echo "SPARK_HOME must be set!" 31 | exit 1 32 | fi 33 | 34 | if [[ -z "$HADOOP_CONF_DIR" ]]; then 35 | echo "HADOOP_CONF_DIR must be set!" 36 | exit 1 37 | fi 38 | 39 | "$SPARK_HOME"/bin/spark-submit \ 40 | --class org.apache.accumulo.spark.CopyPlus5K \ 41 | --master yarn \ 42 | --deploy-mode client \ 43 | $JAR \ 44 | $1 $2 45 | -------------------------------------------------------------------------------- /src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | log4j.rootLogger=WARN, CA 17 | log4j.appender.CA=org.apache.log4j.ConsoleAppender 18 | log4j.appender.CA.layout=org.apache.log4j.PatternLayout 19 | log4j.appender.CA.layout.ConversionPattern=[%t} %-5p %c %x - %m%n 20 | 21 | log4j.logger.org.apache.accumulo.core.clientImpl.ServerClient=ERROR 22 | log4j.logger.org.apache.hadoop.mapred=ERROR 23 | log4j.logger.org.apache.hadoop.mapreduce=ERROR 24 | log4j.logger.org.apache.hadoop.metrics2=ERROR 25 | log4j.logger.org.apache.hadoop.util.ProcessTree=ERROR 26 | log4j.logger.org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter=ERROR 27 | log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR 28 | log4j.logger.org.apache.zookeeper.ClientCnxn=FATAL 29 | -------------------------------------------------------------------------------- /bin/runmr: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one or more 4 | # contributor license agreements. See the NOTICE file distributed with 5 | # this work for additional information regarding copyright ownership. 6 | # The ASF licenses this file to You under the Apache License, Version 2.0 7 | # (the "License"); you may not use this file except in compliance with 8 | # the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | bin_dir=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) 19 | ex_home=$( cd "$( dirname "$bin_dir" )" && pwd ) 20 | 21 | function print_usage() { 22 | cat < args.. 25 | EOF 26 | } 27 | 28 | if [ -z "$1" ]; then 29 | echo "ERROR: needs to be set" 30 | print_usage 31 | exit 1 32 | fi 33 | 34 | if [ -f "$ex_home/conf/env.sh" ]; then 35 | . "$ex_home"/conf/env.sh 36 | else 37 | . "$ex_home"/conf/env.sh.example 38 | fi 39 | 40 | # Build shaded jar (if not built already) 41 | "$ex_home"/bin/build 42 | 43 | export HADOOP_USE_CLIENT_CLASSLOADER=true 44 | "$HADOOP_HOME"/bin/yarn jar $EXAMPLES_JAR_PATH "org.apache.accumulo.examples.$1" ${*:2} 45 | -------------------------------------------------------------------------------- /bin/runex: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one or more 4 | # contributor license agreements. See the NOTICE file distributed with 5 | # this work for additional information regarding copyright ownership. 6 | # The ASF licenses this file to You under the Apache License, Version 2.0 7 | # (the "License"); you may not use this file except in compliance with 8 | # the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | bin_dir=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) 19 | ex_home=$( cd "$( dirname "$bin_dir" )" && pwd ) 20 | 21 | function print_usage() { 22 | cat < args.. 25 | EOF 26 | } 27 | 28 | if [ -z "$1" ]; then 29 | echo "ERROR: needs to be set" 30 | print_usage 31 | exit 1 32 | fi 33 | 34 | if [ -f "$ex_home/conf/env.sh" ]; then 35 | . "$ex_home"/conf/env.sh 36 | else 37 | . "$ex_home"/conf/env.sh.example 38 | fi 39 | 40 | # Build shaded jar (if not built already) 41 | "$ex_home"/bin/build 42 | 43 | export CLASSPATH="$EXAMPLES_JAR_PATH:$ex_home/conf:$ACCUMULO_HOME/lib/*:$CLASSPATH" 44 | java "org.apache.accumulo.examples.$1" ${*:2} 45 | 46 | -------------------------------------------------------------------------------- /conf/env.sh.example: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # General 17 | # ======= 18 | 19 | ## Hadoop installation 20 | export HADOOP_HOME="${HADOOP_HOME:-/path/to/hadoop}" 21 | ## Accumulo installation 22 | export ACCUMULO_HOME="${ACCUMULO_HOME:-/path/to/accumulo}" 23 | ## Path to Accumulo client properties 24 | export ACCUMULO_CLIENT_PROPS="$ACCUMULO_HOME/conf/accumulo-client.properties" 25 | 26 | # Configuration 27 | # ============= 28 | conf_dir=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) 29 | 30 | # Shaded jar 31 | # =============== 32 | # Versions set below will be what is included in the shaded jar 33 | export ACCUMULO_VERSION="`$ACCUMULO_HOME/bin/accumulo version`" 34 | export HADOOP_VERSION="`$HADOOP_HOME/bin/hadoop version | head -n1 | awk '{print $2}'`" 35 | export ZOOKEEPER_VERSION=3.7.0 36 | # Path to shaded test jar 37 | ex_home=$( cd "$( dirname "$conf_dir" )" && pwd ) 38 | export EXAMPLES_JAR_PATH="${ex_home}/target/accumulo-examples-shaded.jar" 39 | -------------------------------------------------------------------------------- /src/test/java/org/apache/accumulo/examples/filedata/KeyUtilTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.filedata; 18 | 19 | import static org.junit.jupiter.api.Assertions.assertEquals; 20 | 21 | import org.apache.hadoop.io.Text; 22 | import org.junit.jupiter.api.Test; 23 | 24 | public class KeyUtilTest { 25 | public static void checkSeps(String... s) { 26 | Text t = KeyUtil.buildNullSepText(s); 27 | String[] rets = KeyUtil.splitNullSepText(t); 28 | 29 | int length = 0; 30 | for (String str : s) 31 | length += str.length(); 32 | assertEquals(t.getLength(), length + s.length - 1); 33 | assertEquals(rets.length, s.length); 34 | for (int i = 0; i < s.length; i++) 35 | assertEquals(s[i], rets[i]); 36 | } 37 | 38 | @Test 39 | public void testNullSep() { 40 | checkSeps("abc", "d", "", "efgh"); 41 | checkSeps("ab", ""); 42 | checkSeps("abcde"); 43 | checkSeps(""); 44 | checkSeps("", ""); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/client/Flush.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.client; 18 | 19 | import org.apache.accumulo.core.client.AccumuloClient; 20 | import org.apache.accumulo.core.client.AccumuloException; 21 | import org.apache.accumulo.core.client.AccumuloSecurityException; 22 | import org.apache.accumulo.core.client.TableNotFoundException; 23 | import org.apache.accumulo.examples.cli.ClientOnRequiredTable; 24 | 25 | /** 26 | * Simple example for using tableOperations() (like create, delete, flush, etc). 27 | */ 28 | public class Flush { 29 | 30 | public static void main(String[] args) 31 | throws AccumuloSecurityException, AccumuloException, TableNotFoundException { 32 | ClientOnRequiredTable opts = new ClientOnRequiredTable(); 33 | opts.parseArgs(Flush.class.getName(), args); 34 | try (AccumuloClient client = opts.createAccumuloClient()) { 35 | client.tableOperations().flush(opts.getTableName(), null, null, true); 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /docs/helloworld.md: -------------------------------------------------------------------------------- 1 | 17 | # Apache Accumulo Hello World Example 18 | 19 | This tutorial uses the following Java classes: 20 | 21 | * [Insert.java] - Inserts 10K rows (50K entries) into accumulo with each row having 5 entries 22 | * [Read.java] - Reads data between two rows 23 | 24 | Inserts data with a BatchWriter: 25 | 26 | $ ./bin/runex helloworld.Insert 27 | 28 | On the accumulo status page at the URL below (you may need to replace 'localhost' with the name or IP of your server), you should see 50K entries 29 | 30 | http://localhost:9995/ 31 | 32 | To view the entries, use the shell (run `accumulo shell -u username -p password` to access it) to scan the table: 33 | 34 | username@instance> table examples.hellotable 35 | username@instance examples.hellotable> scan 36 | 37 | You can also use a Java class to scan the table: 38 | 39 | $ ./bin/runex helloworld.Read 40 | 41 | [Insert.java]: ../src/main/java/org/apache/accumulo/examples/helloworld/Insert.java 42 | [Read.java]: ../src/main/java/org/apache/accumulo/examples/helloworld/Read.java 43 | -------------------------------------------------------------------------------- /contrib/import-control.xml: -------------------------------------------------------------------------------- 1 | 2 | 16 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /spark/README.md: -------------------------------------------------------------------------------- 1 | 17 | # Apache Accumulo Spark Example 18 | 19 | ## Requirements 20 | 21 | * Accumulo 2.0+ 22 | * Hadoop YARN installed & `HADOOP_CONF_DIR` set in environment 23 | * Spark installed & `SPARK_HOME` set in environment 24 | 25 | ## Spark example 26 | 27 | The [CopyPlus5K] example will create an Accumulo table called `spark_example_input` 28 | and write 100 key/value entries into Accumulo with the values `0..99`. It then launches 29 | a Spark application that does following: 30 | 31 | * Read data from `spark_example_input` table using `AccumuloInputFormat` 32 | * Add 5000 to each value 33 | * Write the data to a new Accumulo table (called `spark_example_output`) using one of 34 | two methods. 35 | 1. **Bulk import** - Write data to an RFile in HDFS using `AccumuloFileOutputFormat` and 36 | bulk import to Accumulo table 37 | 2. **Batchwriter** - Creates a `BatchWriter` in Spark code to write to the table. 38 | 39 | This application can be run using the command: 40 | 41 | ./run.sh batch /path/to/accumulo-client.properties 42 | 43 | Change `batch` to `bulk` to use Bulk import method. 44 | 45 | [CopyPlus5K]: src/main/java/org/apache/accumulo/spark/CopyPlus5K.java 46 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/cli/Help.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.cli; 18 | 19 | import com.beust.jcommander.JCommander; 20 | import com.beust.jcommander.Parameter; 21 | import com.beust.jcommander.ParameterException; 22 | 23 | public class Help { 24 | @Parameter(names = {"-h", "-?", "--help", "-help"}, help = true) 25 | public boolean help = false; 26 | 27 | public void parseArgs(String programName, String[] args, Object... others) { 28 | JCommander commander = new JCommander(); 29 | commander.addObject(this); 30 | for (Object other : others) 31 | commander.addObject(other); 32 | commander.setProgramName(programName); 33 | try { 34 | commander.parse(args); 35 | } catch (ParameterException ex) { 36 | commander.usage(); 37 | exitWithError(ex.getMessage(), 1); 38 | } 39 | if (help) { 40 | commander.usage(); 41 | exit(0); 42 | } 43 | } 44 | 45 | public void exit(int status) { 46 | System.exit(status); 47 | } 48 | 49 | public void exitWithError(String message, int status) { 50 | System.err.println(message); 51 | exit(status); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /docs/release-testing.md: -------------------------------------------------------------------------------- 1 | 17 | # Apache Accumulo Release Testing 18 | 19 | This repository contains an integration test (IT) that runs all of the 20 | examples. This can be used for testing Accumulo release candidates (RC). To 21 | run the IT against a RC add the following to `~/.m2/settings.xml` changing 22 | `XXXX` to the proper id for a given RC. 23 | 24 | ```xml 25 | 26 | 27 | rcAccumulo 28 | 29 | 30 | accrc 31 | accrcp 32 | https://repository.apache.org/content/repositories/orgapacheaccumulo-XXXX 33 | 34 | 35 | 36 | 37 | accrcp 38 | accrcp 39 | https://repository.apache.org/content/repositories/orgapacheaccumulo-XXX 40 | 41 | 42 | 43 | 44 | ``` 45 | 46 | After adding that, you can run the following command in this repository to run the IT. 47 | 48 | ``` 49 | mvn clean verify -PrcAccumulo -Daccumulo.version=$ACCUMULO_RC_VERSION 50 | ``` 51 | -------------------------------------------------------------------------------- /docs/bulkIngest.md: -------------------------------------------------------------------------------- 1 | 17 | # Apache Accumulo Bulk Ingest Example 18 | 19 | This is an example of how to bulk ingest data into Accumulo using mapReduce. 20 | 21 | This tutorial uses the following Java classes. 22 | 23 | * [SetupTable.java] - creates the table, 'examples.test_bulk', and sets two split points. 24 | * [BulkIngestExample.java] - creates some data to ingest and then ingests the data using mapReduce 25 | * [VerifyIngest.java] - checks that the data was ingested 26 | 27 | The following commands show how to run this example. This example creates a 28 | table called test_bulk which has two initial split points. Then 1000 rows of 29 | test data are created in HDFS. After that the 1000 rows are ingested into 30 | Accumulo. Then we verify the 1000 rows are in Accumulo. 31 | 32 | $ ./bin/runex mapreduce.bulk.SetupTable 33 | $ ./bin/runmr mapreduce.bulk.BulkIngestExample 34 | $ ./bin/runex mapreduce.bulk.VerifyIngest 35 | 36 | [SetupTable.java]: ../src/main/java/org/apache/accumulo/examples/mapreduce/bulk/SetupTable.java 37 | [BulkIngestExample.java]: ../src/main/java/org/apache/accumulo/examples/mapreduce/bulk/BulkIngestExample.java 38 | [VerifyIngest.java]: ../src/main/java/org/apache/accumulo/examples/mapreduce/bulk/VerifyIngest.java 39 | -------------------------------------------------------------------------------- /docs/rowhash.md: -------------------------------------------------------------------------------- 1 | 17 | # Apache Accumulo RowHash Example 18 | 19 | This example shows a simple map/reduce job that reads from an accumulo table and 20 | writes back into that table. 21 | 22 | To run this example you will need some data in a table. The following will 23 | put a trivial amount of data into accumulo using the accumulo shell: 24 | 25 | $ accumulo shell 26 | username@instance> createnamespace examples 27 | username@instance> createtable examples.rowhash 28 | username@instance examples.rowhash> insert a-row cf cq value 29 | username@instance examples.rowhash> insert b-row cf cq value 30 | username@instance examples.rowhash> quit 31 | 32 | The RowHash class will insert a hash for each row in the database if it contains a 33 | specified colum. Here's how you run the map/reduce job 34 | 35 | $ ./bin/runmr mapreduce.RowHash -t examples.rowhash --column cf:cq 36 | 37 | Now we can scan the table and see the hashes: 38 | 39 | $ accumulo shell 40 | username@instance> scan -t examples.rowhash 41 | a-row cf:cq [] value 42 | a-row cf-HASHTYPE:cq-MD5BASE64 [] IGPBYI1uC6+AJJxC4r5YBA== 43 | b-row cf:cq [] value 44 | b-row cf-HASHTYPE:cq-MD5BASE64 [] IGPBYI1uC6+AJJxC4r5YBA== 45 | username@instance> 46 | 47 | -------------------------------------------------------------------------------- /docs/tracing.md: -------------------------------------------------------------------------------- 1 | 17 | # Apache Accumulo Tracing Example 18 | 19 | This tutorial uses the [TracingExample.java] Java class to create an OpenTelemetry 20 | span in the TracingExample application and then create and read entries from Accumulo. 21 | Tracing output should show up in logs for the TracingExample class and the Accumulo client, 22 | and logs for the Accumulo server processes. 23 | 24 | ## Setup 25 | 26 | 1. Download the OpenTelemetry [JavaAgent] jar file and place it into the `/path/to/accumulo/lib/` directory. 27 | 2. Add the property `general.opentelemetry.enabled=true` to accumulo.properties 28 | 3. Set the following environment variables in your environment: 29 | 30 | JAVA_TOOL_OPTIONS="-javaagent:/path/to/accumulo/lib/opentelemetry-javaagent-1.12.1.jar" 31 | OTEL_TRACES_EXPORTER="logging" 32 | 33 | ## Run the Example 34 | 35 | 1. Start Accumulo 36 | 2. Run the Tracing Example: 37 | 38 | $ ./bin/runex client.TracingExample --createtable --deletetable --create --read --table traceTest 39 | 40 | [JavaAgent]: https://search.maven.org/remotecontent?filepath=io/opentelemetry/javaagent/opentelemetry-javaagent/1.12.1/opentelemetry-javaagent-1.12.1.jar 41 | [TracingExample.java]: ../src/main/java/org/apache/accumulo/examples/client/TracingExample.java 42 | -------------------------------------------------------------------------------- /spark/contrib/import-control.xml: -------------------------------------------------------------------------------- 1 | 2 | 16 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /docs/tabletofile.md: -------------------------------------------------------------------------------- 1 | 17 | # Apache Accumulo Table-to-File Example 18 | 19 | This example uses mapreduce to extract specified columns from an existing table. 20 | 21 | To run this example you will need some data in a table. The following will 22 | put a trivial amount of data into accumulo using the accumulo shell: 23 | 24 | $ accumulo shell 25 | root@instance> createnamespace examples 26 | root@instance> createtable examples.input 27 | root@instance examples.input> insert dog cf cq dogvalue 28 | root@instance examples.input> insert cat cf cq catvalue 29 | root@instance examples.input> insert junk family qualifier junkvalue 30 | root@instance examples.input> quit 31 | 32 | The TableToFile class configures a map-only job to read the specified columns and 33 | writes the key/value pairs to a file in HDFS. 34 | 35 | The following will extract the rows containing the column "cf:cq": 36 | 37 | $ ./bin/runmr mapreduce.TableToFile -t examples.input --columns cf:cq --output /tmp/output 38 | 39 | $ hadoop fs -ls /tmp/output 40 | Found 2 items 41 | -rw-r--r-- 3 root supergroup 0 2021-05-04 10:32 /tmp/output/_SUCCESS 42 | -rw-r--r-- 3 root supergroup 44 2021-05-04 10:32 /tmp/output/part-m-00000 43 | 44 | We can see the output of our little map-reduce job: 45 | 46 | $ hadoop fs -text /tmp/output/part-m-00000 47 | catrow cf:cq [] catvalue 48 | dogrow cf:cq [] dogvalue -------------------------------------------------------------------------------- /docs/regex.md: -------------------------------------------------------------------------------- 1 | 17 | # Apache Accumulo Regex Example 18 | 19 | This example uses mapreduce and accumulo to find items using regular expressions. 20 | This is accomplished using a map-only mapreduce job and a scan-time iterator. 21 | 22 | To run this example you will need some data in a table. The following will 23 | put a trivial amount of data into accumulo using the accumulo shell: 24 | 25 | $ accumulo shell 26 | username@instance> createnamespace examples 27 | username@instance> createtable examples.regex 28 | username@instance examples.regex> insert dogrow dogcf dogcq dogvalue 29 | username@instance examples.regex> insert catrow catcf catcq catvalue 30 | username@instance examples.regex> quit 31 | 32 | The RegexExample class sets an iterator on the scanner. This does pattern matching 33 | against each key/value in accumulo, and only returns matching items. It will do this 34 | in parallel and will store the results in files in hdfs. 35 | 36 | The following will search for any rows in the input table that starts with "dog": 37 | 38 | $ ./bin/runmr mapreduce.RegexExample -t examples.regex --rowRegex 'dog.*' --output /tmp/output 39 | 40 | $ hdfs dfs -ls /tmp/output 41 | Found 3 items 42 | -rw-r--r-- 1 username supergroup 0 2013-01-10 14:11 /tmp/output/_SUCCESS 43 | -rw-r--r-- 1 username supergroup 51 2013-01-10 14:10 /tmp/output/part-m-00000 44 | 45 | We can see the output of our little map-reduce job: 46 | 47 | $ hdfs dfs -cat /tmp/output/part-m-00000 48 | dogrow dogcf:dogcq [] 1357844987994 false dogvalue 49 | -------------------------------------------------------------------------------- /src/test/java/org/apache/accumulo/examples/constraints/NumericValueConstraintTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.constraints; 18 | 19 | import static org.junit.jupiter.api.Assertions.assertEquals; 20 | import static org.junit.jupiter.api.Assertions.assertNull; 21 | 22 | import org.apache.accumulo.core.data.Mutation; 23 | import org.apache.accumulo.core.data.Value; 24 | import org.junit.jupiter.api.Test; 25 | 26 | import com.google.common.collect.Iterables; 27 | 28 | public class NumericValueConstraintTest { 29 | 30 | private final NumericValueConstraint nvc = new NumericValueConstraint(); 31 | 32 | @Test 33 | public void testCheck() { 34 | Mutation goodMutation = new Mutation("r"); 35 | goodMutation.put("cf", "cq", new Value("1234".getBytes())); 36 | assertNull(nvc.check(null, goodMutation)); 37 | 38 | // Check that multiple bad mutations result in one violation only 39 | Mutation badMutation = new Mutation("r"); 40 | badMutation.put("cf", "cq", new Value("foo1234".getBytes())); 41 | badMutation.put("cf2", "cq2", new Value("foo1234".getBytes())); 42 | assertEquals(NumericValueConstraint.NON_NUMERIC_VALUE, 43 | Iterables.getOnlyElement(nvc.check(null, badMutation)).shortValue()); 44 | } 45 | 46 | @Test 47 | public void testGetViolationDescription() { 48 | assertEquals(NumericValueConstraint.VIOLATION_MESSAGE, 49 | nvc.getViolationDescription(NumericValueConstraint.NON_NUMERIC_VALUE)); 50 | assertNull(nvc.getViolationDescription((short) 2)); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/Common.java: -------------------------------------------------------------------------------- 1 | package org.apache.accumulo.examples; 2 | 3 | import org.apache.accumulo.core.client.AccumuloClient; 4 | import org.apache.accumulo.core.client.AccumuloException; 5 | import org.apache.accumulo.core.client.AccumuloSecurityException; 6 | import org.apache.accumulo.core.client.NamespaceExistsException; 7 | import org.apache.accumulo.core.client.TableExistsException; 8 | import org.apache.accumulo.core.client.admin.NewTableConfiguration; 9 | import org.slf4j.Logger; 10 | import org.slf4j.LoggerFactory; 11 | 12 | public class Common { 13 | 14 | private static final Logger log = LoggerFactory.getLogger(Common.class); 15 | 16 | public static final String NAMESPACE = "examples"; 17 | 18 | public static final String TABLE_EXISTS_MSG = "Table already exists. User may wish to delete " 19 | + "existing table and re-run example. Table name: "; 20 | public static final String NAMESPACE_EXISTS_MSG = "Namespace already exists. User can ignore " 21 | + "this message and continue. Namespace: "; 22 | 23 | /** 24 | * Create a table within the supplied namespace. 25 | * 26 | * The incoming table name is expected to have the form "namespace.tablename". If the namespace 27 | * portion of the name is blank then the table is created outside of a namespace. 28 | * 29 | * @param client 30 | * AccumuloClient instance 31 | * @param table 32 | * The name of the table to be created 33 | */ 34 | public static void createTableWithNamespace(final AccumuloClient client, final String table) 35 | throws AccumuloException, AccumuloSecurityException { 36 | createTableWithNamespace(client, table, new NewTableConfiguration()); 37 | } 38 | 39 | public static void createTableWithNamespace(final AccumuloClient client, final String table, 40 | final NewTableConfiguration newTableConfig) 41 | throws AccumuloException, AccumuloSecurityException { 42 | String[] name = table.split("\\."); 43 | if (name.length == 2 && !name[0].isEmpty()) { 44 | try { 45 | client.namespaceOperations().create(name[0]); 46 | } catch (NamespaceExistsException e) { 47 | log.info(NAMESPACE_EXISTS_MSG + name[0]); 48 | } 49 | } 50 | try { 51 | client.tableOperations().create(table, newTableConfig); 52 | } catch (TableExistsException e) { 53 | log.warn(TABLE_EXISTS_MSG + table); 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/helloworld/Read.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.helloworld; 18 | 19 | import java.util.Map.Entry; 20 | 21 | import org.apache.accumulo.core.client.Accumulo; 22 | import org.apache.accumulo.core.client.AccumuloClient; 23 | import org.apache.accumulo.core.client.Scanner; 24 | import org.apache.accumulo.core.client.TableNotFoundException; 25 | import org.apache.accumulo.core.data.Key; 26 | import org.apache.accumulo.core.data.Range; 27 | import org.apache.accumulo.core.data.Value; 28 | import org.apache.accumulo.core.security.Authorizations; 29 | import org.apache.accumulo.examples.cli.ClientOpts; 30 | import org.slf4j.Logger; 31 | import org.slf4j.LoggerFactory; 32 | 33 | /** 34 | * Reads all data between two rows 35 | */ 36 | public class Read { 37 | 38 | private static final Logger log = LoggerFactory.getLogger(Read.class); 39 | 40 | public static void main(String[] args) throws TableNotFoundException { 41 | ClientOpts opts = new ClientOpts(); 42 | opts.parseArgs(Read.class.getName(), args); 43 | 44 | try (AccumuloClient client = Accumulo.newClient().from(opts.getClientPropsPath()).build(); 45 | Scanner scan = client.createScanner(Insert.HELLO_TABLE, Authorizations.EMPTY)) { 46 | scan.setRange(new Range(new Key("row_0"), new Key("row_1002"))); 47 | for (Entry e : scan) { 48 | Key key = e.getKey(); 49 | log.trace(key.getRow() + " " + key.getColumnFamily() + " " + key.getColumnQualifier() + " " 50 | + e.getValue()); 51 | } 52 | log.info("Scan complete"); 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/filedata/KeyUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.filedata; 18 | 19 | import java.util.ArrayList; 20 | 21 | import org.apache.hadoop.io.Text; 22 | 23 | /** 24 | * A utility for creating and parsing null-byte separated strings into/from Text objects. 25 | */ 26 | public class KeyUtil { 27 | public static final byte[] nullbyte = new byte[] {0}; 28 | 29 | /** 30 | * Join some number of strings using a null byte separator into a text object. 31 | * 32 | * @param s 33 | * strings 34 | * @return a text object containing the strings separated by null bytes 35 | */ 36 | public static Text buildNullSepText(String... s) { 37 | Text t = new Text(s[0]); 38 | for (int i = 1; i < s.length; i++) { 39 | t.append(nullbyte, 0, 1); 40 | t.append(s[i].getBytes(), 0, s[i].length()); 41 | } 42 | return t; 43 | } 44 | 45 | /** 46 | * Split a text object using a null byte separator into an array of strings. 47 | * 48 | * @param t 49 | * null-byte separated text object 50 | * @return an array of strings 51 | */ 52 | public static String[] splitNullSepText(Text t) { 53 | ArrayList s = new ArrayList<>(); 54 | byte[] b = t.getBytes(); 55 | int lastindex = 0; 56 | for (int i = 0; i < t.getLength(); i++) { 57 | if (b[i] == (byte) 0) { 58 | s.add(new String(b, lastindex, i - lastindex)); 59 | lastindex = i + 1; 60 | } 61 | } 62 | s.add(new String(b, lastindex, t.getLength() - lastindex)); 63 | return s.toArray(new String[s.size()]); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /docs/constraints.md: -------------------------------------------------------------------------------- 1 | 17 | # Apache Accumulo Constraints Example 18 | 19 | This tutorial uses the following Java classes, which can be found in org.apache.accumulo.examples.constraints: 20 | 21 | * [AlphaNumKeyConstraint.java] - a constraint that requires alphanumeric keys 22 | * [NumericValueConstraint.java] - a constraint that requires numeric string values 23 | * [MaxMutationSize.java] - a constraint that limits the size of mutations accepted into a table 24 | 25 | AlphaNumKeyConstraint prevents insertion of keys with characters not between aA and zZ or 0 to 9. 26 | NumericValueConstraint prevents insertion of values with characters not between 0 and 9. The examples create mutations 27 | that violate the constraint, throwing an exception. 28 | 29 | $ ./bin/runex constraints.AlphaNumKeyConstraint 30 | $ ./bin/runex constraints.NumericValueConstraint 31 | 32 | The MaxMutationSize constraint will force the table to reject any mutation that is larger than 1/256th of the 33 | working memory of the tablet server. The following example attempts to ingest a single row with a million columns, 34 | which exceeds the memory limit. Depending on the amount of Java heap your tserver(s) are given, you may have to 35 | increase the number of columns provided to see the failure. 36 | 37 | $ ./bin/runex constraints.MaxMutationSize 38 | 39 | [AlphaNumKeyConstraint.java]: ../src/main/java/org/apache/accumulo/examples/constraints/AlphaNumKeyConstraint.java 40 | [NumericValueConstraint.java]: ../src/main/java/org/apache/accumulo/examples/constraints/NumericValueConstraint.java 41 | [MaxMutationSize.java]: ../src/main/java/org/apache/accumulo/examples/constraints/MaxMutationSize.java 42 | 43 | 44 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/client/CountingVerifyingReceiver.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.client; 18 | 19 | import static java.nio.charset.StandardCharsets.UTF_8; 20 | 21 | import java.util.Arrays; 22 | import java.util.HashMap; 23 | 24 | import org.apache.accumulo.core.data.Key; 25 | import org.apache.accumulo.core.data.Value; 26 | import org.slf4j.Logger; 27 | import org.slf4j.LoggerFactory; 28 | 29 | /** 30 | * Internal class used to verify validity of data read. 31 | */ 32 | class CountingVerifyingReceiver { 33 | private static final Logger log = LoggerFactory.getLogger(CountingVerifyingReceiver.class); 34 | 35 | long count = 0; 36 | int expectedValueSize = 0; 37 | final HashMap expectedRows; 38 | 39 | CountingVerifyingReceiver(HashMap expectedRows, int expectedValueSize) { 40 | this.expectedRows = expectedRows; 41 | this.expectedValueSize = expectedValueSize; 42 | } 43 | 44 | public void receive(Key key, Value value) { 45 | 46 | String row = key.getRow().toString(); 47 | long rowid = Integer.parseInt(row.split("_")[1]); 48 | 49 | byte[] expectedValue = RandomBatchWriter.createValue(rowid, expectedValueSize); 50 | 51 | if (!Arrays.equals(expectedValue, value.get())) { 52 | log.error("Got unexpected value for " + key + " expected : " 53 | + new String(expectedValue, UTF_8) + " got : " + new String(value.get(), UTF_8)); 54 | } 55 | 56 | if (!expectedRows.containsKey(key.getRow().toString())) { 57 | log.error("Got unexpected key " + key); 58 | } else { 59 | expectedRows.put(key.getRow().toString(), true); 60 | } 61 | 62 | count++; 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/mapreduce/bulk/SetupTable.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.mapreduce.bulk; 18 | 19 | import java.util.TreeSet; 20 | import java.util.stream.Collectors; 21 | import java.util.stream.Stream; 22 | 23 | import org.apache.accumulo.core.client.Accumulo; 24 | import org.apache.accumulo.core.client.AccumuloClient; 25 | import org.apache.accumulo.core.client.AccumuloException; 26 | import org.apache.accumulo.core.client.AccumuloSecurityException; 27 | import org.apache.accumulo.core.client.TableNotFoundException; 28 | import org.apache.accumulo.core.client.admin.NewTableConfiguration; 29 | import org.apache.accumulo.examples.Common; 30 | import org.apache.accumulo.examples.cli.ClientOpts; 31 | import org.apache.hadoop.io.Text; 32 | 33 | public final class SetupTable { 34 | 35 | static final String BULK_INGEST_TABLE = Common.NAMESPACE + ".test_bulk"; 36 | 37 | private SetupTable() {} 38 | 39 | public static void main(String[] args) 40 | throws AccumuloSecurityException, TableNotFoundException, AccumuloException { 41 | 42 | final Stream splits = Stream.of("row_00000333", "row_00000666"); 43 | ClientOpts opts = new ClientOpts(); 44 | opts.parseArgs(SetupTable.class.getName(), args); 45 | 46 | try (AccumuloClient client = Accumulo.newClient().from(opts.getClientPropsPath()).build()) { 47 | // create a table with initial partitions 48 | TreeSet initialPartitions = splits.map(Text::new) 49 | .collect(Collectors.toCollection(TreeSet::new)); 50 | Common.createTableWithNamespace(client, BULK_INGEST_TABLE, 51 | new NewTableConfiguration().withSplits(initialPartitions)); 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /docs/isolation.md: -------------------------------------------------------------------------------- 1 | 17 | # Apache Accumulo Isolation Example 18 | 19 | Accumulo has an isolated scanner that ensures partial changes to rows are not 20 | seen. Isolation is documented in ../docs/isolation.html and the user manual. 21 | 22 | InterferenceTest is a simple example that shows the effects of scanning with 23 | and without isolation. This program starts two threads. One thread 24 | continually updates all the values in a row to be the same thing, but 25 | different from what it used to be. The other thread continually scans the 26 | table and checks that all values in a row are the same. Without isolation the 27 | scanning thread will sometimes see different values, which is the result of 28 | reading the row at the same time a mutation is changing the row. 29 | 30 | Below, Interference Test is run without isolation enabled for 5000 iterations 31 | and it reports problems. 32 | 33 | 34 | $ accumulo shell -u -p -e 'createnamespace examples' 35 | $ ./bin/runex isolation.InterferenceTest -t examples.isotest --iterations 50000 36 | ERROR Columns in row 053 had multiple values [53, 4553] 37 | ERROR Columns in row 061 had multiple values [561, 61] 38 | ERROR Columns in row 070 had multiple values [570, 1070] 39 | ERROR Columns in row 079 had multiple values [1079, 1579] 40 | ERROR Columns in row 088 had multiple values [2588, 1588] 41 | ERROR Columns in row 106 had multiple values [2606, 3106] 42 | ERROR Columns in row 115 had multiple values [4615, 3115] 43 | finished 44 | 45 | Below, Interference Test is run with isolation enabled for 5000 iterations and 46 | it reports no problems. 47 | 48 | $ ./bin/runex isolation.InterferenceTest -t examples.isotest --iterations 50000 --isolated 49 | finished 50 | 51 | 52 | -------------------------------------------------------------------------------- /.github/workflows/maven.yaml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | # This workflow will build a Java project with Maven 21 | # For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-maven 22 | 23 | name: QA 24 | 25 | on: 26 | push: 27 | branches: [ '*' ] 28 | pull_request: 29 | branches: [ '*' ] 30 | 31 | jobs: 32 | mvn: 33 | strategy: 34 | matrix: 35 | profile: 36 | - {name: 'verify', args: 'verify'} 37 | fail-fast: false 38 | timeout-minutes: 60 39 | runs-on: ubuntu-latest 40 | steps: 41 | - uses: actions/checkout@v4 42 | - name: Set up JDK 17 43 | uses: actions/setup-java@v4 44 | with: 45 | distribution: adopt 46 | java-version: 17 47 | cache: 'maven' 48 | - name: Build with Maven (${{ matrix.profile.name }}) 49 | run: mvn -B -V -e -ntp "-Dstyle.color=always" ${{ matrix.profile.args }} 50 | env: 51 | MAVEN_OPTS: -Djansi.force=true 52 | - name: Upload unit test results 53 | if: ${{ failure() }} 54 | uses: actions/upload-artifact@v4 55 | with: 56 | name: surefire-reports-${{ matrix.profile.name }} 57 | path: ./**/target/surefire-reports/ 58 | if-no-files-found: ignore 59 | - name: Upload integration test results 60 | if: ${{ failure() }} 61 | uses: actions/upload-artifact@v4 62 | with: 63 | name: failsafe-reports-${{ matrix.profile.name }} 64 | path: ./**/target/failsafe-reports/ 65 | if-no-files-found: ignore 66 | - name: Upload mini test logs 67 | if: ${{ failure() }} 68 | uses: actions/upload-artifact@v4 69 | with: 70 | name: mini-tests-logs-${{ matrix.profile.name }} 71 | path: ./**/target/**/mini-tests/**/logs/ 72 | if-no-files-found: ignore 73 | 74 | -------------------------------------------------------------------------------- /src/test/java/org/apache/accumulo/examples/constraints/AlphaNumKeyConstraintTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.constraints; 18 | 19 | import static org.junit.jupiter.api.Assertions.assertEquals; 20 | import static org.junit.jupiter.api.Assertions.assertNull; 21 | 22 | import org.apache.accumulo.core.data.Mutation; 23 | import org.apache.accumulo.core.data.Value; 24 | import org.junit.jupiter.api.Test; 25 | 26 | import com.google.common.collect.ImmutableList; 27 | 28 | public class AlphaNumKeyConstraintTest { 29 | 30 | private final AlphaNumKeyConstraint ankc = new AlphaNumKeyConstraint(); 31 | 32 | @Test 33 | public void test() { 34 | Mutation goodMutation = new Mutation("Row1"); 35 | goodMutation.put("Colf2", "ColQ3", new Value("value".getBytes())); 36 | assertNull(ankc.check(null, goodMutation)); 37 | 38 | // Check that violations are in row, cf, cq order 39 | Mutation badMutation = new Mutation("Row#1"); 40 | badMutation.put("Colf$2", "Colq%3", new Value("value".getBytes())); 41 | assertEquals( 42 | ImmutableList.of(AlphaNumKeyConstraint.NON_ALPHA_NUM_ROW, 43 | AlphaNumKeyConstraint.NON_ALPHA_NUM_COLF, AlphaNumKeyConstraint.NON_ALPHA_NUM_COLQ), 44 | ankc.check(null, badMutation)); 45 | } 46 | 47 | @Test 48 | public void testGetViolationDescription() { 49 | assertEquals(AlphaNumKeyConstraint.ROW_VIOLATION_MESSAGE, 50 | ankc.getViolationDescription(AlphaNumKeyConstraint.NON_ALPHA_NUM_ROW)); 51 | assertEquals(AlphaNumKeyConstraint.COLF_VIOLATION_MESSAGE, 52 | ankc.getViolationDescription(AlphaNumKeyConstraint.NON_ALPHA_NUM_COLF)); 53 | assertEquals(AlphaNumKeyConstraint.COLQ_VIOLATION_MESSAGE, 54 | ankc.getViolationDescription(AlphaNumKeyConstraint.NON_ALPHA_NUM_COLQ)); 55 | assertNull(ankc.getViolationDescription((short) 4)); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/shard/Reverse.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.shard; 18 | 19 | import java.util.Map.Entry; 20 | 21 | import org.apache.accumulo.core.client.Accumulo; 22 | import org.apache.accumulo.core.client.AccumuloClient; 23 | import org.apache.accumulo.core.client.BatchWriter; 24 | import org.apache.accumulo.core.client.Scanner; 25 | import org.apache.accumulo.core.data.Key; 26 | import org.apache.accumulo.core.data.Mutation; 27 | import org.apache.accumulo.core.data.Value; 28 | import org.apache.accumulo.core.security.Authorizations; 29 | import org.apache.accumulo.examples.cli.ClientOpts; 30 | import org.apache.hadoop.io.Text; 31 | 32 | import com.beust.jcommander.Parameter; 33 | 34 | /** 35 | * The program reads an accumulo table written by {@link Index} and writes out to another table. It 36 | * writes out a mapping of documents to terms. The document to term mapping is used by 37 | * {@link ContinuousQuery}. 38 | */ 39 | public class Reverse { 40 | 41 | static class Opts extends ClientOpts { 42 | 43 | @Parameter(names = "--shardTable", description = "name of the shard table") 44 | String shardTable; 45 | 46 | @Parameter(names = "--doc2Term", description = "name of the doc2Term table") 47 | String doc2TermTable; 48 | } 49 | 50 | public static void main(String[] args) throws Exception { 51 | Opts opts = new Opts(); 52 | opts.parseArgs(Reverse.class.getName(), args); 53 | 54 | try (AccumuloClient client = Accumulo.newClient().from(opts.getClientPropsPath()).build(); 55 | Scanner scanner = client.createScanner(opts.shardTable, Authorizations.EMPTY); 56 | BatchWriter bw = client.createBatchWriter(opts.doc2TermTable)) { 57 | for (Entry entry : scanner) { 58 | Key key = entry.getKey(); 59 | Mutation m = new Mutation(key.getColumnQualifier()); 60 | m.put(key.getColumnFamily(), new Text(), new Value(new byte[0])); 61 | bw.addMutation(m); 62 | } 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /docs/terasort.md: -------------------------------------------------------------------------------- 1 | 17 | # Apache Accumulo Terasort Example 18 | 19 | This example uses map/reduce to generate random input data that will 20 | be sorted by storing it into accumulo. It uses data very similar to the 21 | hadoop terasort benchmark. 22 | 23 | First, make sure the 'examples' namespace exists. If it already exists, the error message can be 24 | ignored. 25 | 26 | $ accumulo shell -u root -p secret -e 'createnamespace examples' 27 | 28 | This example is run with arguments describing the amount of data: 29 | 30 | $ ./bin/runmr mapreduce.TeraSortIngest --count 10 --minKeySize 10 --maxKeySize 10 \ 31 | --minValueSize 78 --maxValueSize 78 --table examples.sort --splits 10 32 | 33 | After the map reduce job completes, scan the data: 34 | 35 | $ accumulo shell 36 | username@instance> scan -t examples.sort 37 | +l-$$OE/ZH c: 4 [] GGGGGGGGGGWWWWWWWWWWMMMMMMMMMMCCCCCCCCCCSSSSSSSSSSIIIIIIIIIIYYYYYYYYYYOOOOOOOO 38 | ,C)wDw//u= c: 10 [] CCCCCCCCCCSSSSSSSSSSIIIIIIIIIIYYYYYYYYYYOOOOOOOOOOEEEEEEEEEEUUUUUUUUUUKKKKKKKK 39 | 75@~?'WdUF c: 1 [] IIIIIIIIIIYYYYYYYYYYOOOOOOOOOOEEEEEEEEEEUUUUUUUUUUKKKKKKKKKKAAAAAAAAAAQQQQQQQQ 40 | ;L+!2rT~hd c: 8 [] MMMMMMMMMMCCCCCCCCCCSSSSSSSSSSIIIIIIIIIIYYYYYYYYYYOOOOOOOOOOEEEEEEEEEEUUUUUUUU 41 | LsS8)|.ZLD c: 5 [] OOOOOOOOOOEEEEEEEEEEUUUUUUUUUUKKKKKKKKKKAAAAAAAAAAQQQQQQQQQQGGGGGGGGGGWWWWWWWW 42 | M^*dDE;6^< c: 9 [] UUUUUUUUUUKKKKKKKKKKAAAAAAAAAAQQQQQQQQQQGGGGGGGGGGWWWWWWWWWWMMMMMMMMMMCCCCCCCC 43 | ^Eu) 17 | # Apache Accumulo File System Archive Example (Data Only) 18 | 19 | This example archives file data into an Accumulo table. Files with duplicate data are only stored once. 20 | The example has the following classes: 21 | 22 | * CharacterHistogram - A MapReduce that computes a histogram of byte frequency for each file and stores the histogram alongside the file data. An example use of the ChunkInputFormat. 23 | * ChunkCombiner - An Iterator that dedupes file data and sets their visibilities to a combined visibility based on current references to the file data. 24 | * ChunkInputFormat - An Accumulo InputFormat that provides keys containing file info (List>) and values with an InputStream over the file (ChunkInputStream). 25 | * ChunkInputStream - An input stream over file data stored in Accumulo. 26 | * FileDataIngest - Takes a list of files and archives them into Accumulo keyed on hashes of the files. 27 | * FileDataQuery - Retrieves file data based on the hash of the file. (Used by the dirlist.Viewer.) 28 | * KeyUtil - A utility for creating and parsing null-byte separated strings into/from Text objects. 29 | * VisibilityCombiner - A utility for merging visibilities into the form (VIS1)|(VIS2)|... 30 | 31 | This example is coupled with the [dirlist example][dirlist]. 32 | 33 | If you haven't already run the [dirlist example][dirlist], ingest a file with FileDataIngest. 34 | 35 | $ ./bin/runex filedata.FileDataIngest -t examples.dataTable --auths exampleVis --chunk 1000 /path/to/accumulo/README.md 36 | 37 | Open the accumulo shell and look at the data. The row is the MD5 hash of the file, which you can 38 | verify by running a command such as 'md5sum' on the file. Note that in order to scan the 39 | examples.dataTable the class, org.apache.accumulo.examples.filedata.ChunkCombiner, must be in 40 | your classpath, or the accumulo-examples-shaded.jar should be moved to the accumulo lib directory. 41 | 42 | > scan -t examples.dataTable 43 | 44 | Run the CharacterHistogram MapReduce to add some information about the file. 45 | 46 | $ ./bin/runmr filedata.CharacterHistogram -t examples.dataTable --auths exampleVis --vis exampleVis 47 | 48 | Scan again to see the histogram stored in the 'info' column family. 49 | 50 | > scan -t examples.dataTable 51 | 52 | [dirlist]: dirlist.md 53 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/filedata/FileDataQuery.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.filedata; 18 | 19 | import java.io.IOException; 20 | import java.util.ArrayList; 21 | import java.util.List; 22 | import java.util.Map.Entry; 23 | 24 | import org.apache.accumulo.core.client.AccumuloClient; 25 | import org.apache.accumulo.core.client.Scanner; 26 | import org.apache.accumulo.core.client.TableNotFoundException; 27 | import org.apache.accumulo.core.data.Key; 28 | import org.apache.accumulo.core.data.Range; 29 | import org.apache.accumulo.core.data.Value; 30 | import org.apache.accumulo.core.security.Authorizations; 31 | 32 | import com.google.common.collect.Iterators; 33 | import com.google.common.collect.PeekingIterator; 34 | 35 | /** 36 | * Retrieves file data based on the hash of the file. Used by the 37 | * {@link org.apache.accumulo.examples.dirlist.Viewer}. See README.dirlist for instructions. 38 | */ 39 | public class FileDataQuery { 40 | final List> lastRefs; 41 | private final ChunkInputStream cis; 42 | Scanner scanner; 43 | 44 | public FileDataQuery(AccumuloClient client, String tableName, Authorizations auths) 45 | throws TableNotFoundException { 46 | lastRefs = new ArrayList<>(); 47 | cis = new ChunkInputStream(); 48 | scanner = client.createScanner(tableName, auths); 49 | } 50 | 51 | public List> getLastRefs() { 52 | return lastRefs; 53 | } 54 | 55 | public ChunkInputStream getData(String hash) throws IOException { 56 | scanner.setRange(new Range(hash)); 57 | scanner.setBatchSize(1); 58 | lastRefs.clear(); 59 | PeekingIterator> pi = Iterators.peekingIterator(scanner.iterator()); 60 | if (pi.hasNext()) { 61 | while (!pi.peek().getKey().getColumnFamily().equals(FileDataIngest.CHUNK_CF)) { 62 | lastRefs.add(pi.peek()); 63 | pi.next(); 64 | } 65 | } 66 | cis.clear(); 67 | cis.setSource(pi); 68 | return cis; 69 | } 70 | 71 | public String getSomeData(String hash, int numBytes) throws IOException { 72 | ChunkInputStream is = getData(hash); 73 | byte[] buf = new byte[numBytes]; 74 | if (is.read(buf) >= 0) { 75 | return new String(buf); 76 | } else { 77 | return ""; 78 | } 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/bloom/BloomFiltersNotFound.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.bloom; 18 | 19 | import static org.apache.accumulo.examples.bloom.BloomFilters.writeData; 20 | 21 | import java.util.Map; 22 | 23 | import org.apache.accumulo.core.client.Accumulo; 24 | import org.apache.accumulo.core.client.AccumuloClient; 25 | import org.apache.accumulo.core.client.AccumuloException; 26 | import org.apache.accumulo.core.client.AccumuloSecurityException; 27 | import org.apache.accumulo.core.client.TableNotFoundException; 28 | import org.apache.accumulo.core.client.admin.NewTableConfiguration; 29 | import org.apache.accumulo.examples.Common; 30 | import org.apache.accumulo.examples.cli.ClientOpts; 31 | import org.slf4j.Logger; 32 | import org.slf4j.LoggerFactory; 33 | 34 | public class BloomFiltersNotFound { 35 | 36 | private static final Logger log = LoggerFactory.getLogger(BloomFiltersNotFound.class); 37 | 38 | public static void main(String[] args) 39 | throws AccumuloException, AccumuloSecurityException, TableNotFoundException { 40 | ClientOpts opts = new ClientOpts(); 41 | opts.parseArgs(BloomFiltersNotFound.class.getName(), args); 42 | 43 | try (AccumuloClient client = Accumulo.newClient().from(opts.getClientPropsPath()).build()) { 44 | Map props = Map.of(BloomCommon.BLOOM_ENABLED_PROPERTY, "true"); 45 | var newTableConfig = new NewTableConfiguration().setProperties(props); 46 | 47 | Common.createTableWithNamespace(client, BloomCommon.BLOOM_TEST3_TABLE); 48 | Common.createTableWithNamespace(client, BloomCommon.BLOOM_TEST4_TABLE, newTableConfig); 49 | 50 | writeAndFlush(BloomCommon.BLOOM_TEST3_TABLE, client); 51 | writeAndFlush(BloomCommon.BLOOM_TEST4_TABLE, client); 52 | 53 | BloomBatchScanner.scan(client, BloomCommon.BLOOM_TEST3_TABLE, 8); 54 | BloomBatchScanner.scan(client, BloomCommon.BLOOM_TEST4_TABLE, 8); 55 | } 56 | } 57 | 58 | private static void writeAndFlush(String tableName, AccumuloClient client) 59 | throws TableNotFoundException, AccumuloException, AccumuloSecurityException { 60 | log.info("Writing data to {} (bloom filters enabled)", tableName); 61 | writeData(client, tableName, 7); 62 | client.tableOperations().flush(tableName, null, null, true); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /docs/batch.md: -------------------------------------------------------------------------------- 1 | 17 | # Apache Accumulo Batch Writing and Scanning Example 18 | 19 | This is an example of how to use the BatchWriter and BatchScanner. 20 | 21 | This tutorial uses the following Java classes. 22 | 23 | * [SequentialBatchWriter.java] - writes mutations with sequential rows and random values 24 | * [RandomBatchScanner.java] - reads random rows and verifies their values 25 | 26 | Run `SequentialBatchWriter` to add 10000 entries with random 50 bytes values to Accumulo. 27 | 28 | $ ./bin/runex client.SequentialBatchWriter 29 | 30 | Verify data was ingested by scanning the table using the Accumulo shell: 31 | 32 | $ accumulo shell 33 | root@instance> table examples.batch 34 | root@instance examples.batch> scan 35 | 36 | Run `RandomBatchScanner` to perform 1000 random queries and verify the results. 37 | 38 | $ ./bin/runex client.RandomBatchScanner 39 | 16:04:05,950 [examples.client.RandomBatchScanner] INFO : Generating 1000 random ranges for BatchScanner to read 40 | 16:04:06,020 [examples.client.RandomBatchScanner] INFO : Reading ranges using BatchScanner 41 | 16:04:06,283 [examples.client.RandomBatchScanner] TRACE: 100 lookups 42 | 16:04:06,290 [examples.client.RandomBatchScanner] TRACE: 200 lookups 43 | 16:04:06,294 [examples.client.RandomBatchScanner] TRACE: 300 lookups 44 | 16:04:06,297 [examples.client.RandomBatchScanner] TRACE: 400 lookups 45 | 16:04:06,301 [examples.client.RandomBatchScanner] TRACE: 500 lookups 46 | 16:04:06,304 [examples.client.RandomBatchScanner] TRACE: 600 lookups 47 | 16:04:06,307 [examples.client.RandomBatchScanner] TRACE: 700 lookups 48 | 16:04:06,309 [examples.client.RandomBatchScanner] TRACE: 800 lookups 49 | 16:04:06,316 [examples.client.RandomBatchScanner] TRACE: 900 lookups 50 | 16:04:06,320 [examples.client.RandomBatchScanner] TRACE: 1000 lookups 51 | 16:04:06,330 [examples.client.RandomBatchScanner] INFO : Scan finished! 3246.75 lookups/sec, 0.31 secs, 1000 results 52 | 16:04:06,331 [examples.client.RandomBatchScanner] INFO : All expected rows were scanned 53 | 54 | [SequentialBatchWriter.java]: ../src/main/java/org/apache/accumulo/examples/client/SequentialBatchWriter.java 55 | [RandomBatchWriter.java]: ../src/main/java/org/apache/accumulo/examples/client/RandomBatchWriter.java 56 | [RandomBatchScanner.java]: ../src/main/java/org/apache/accumulo/examples/client/RandomBatchScanner.java 57 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/filedata/VisibilityCombiner.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.filedata; 18 | 19 | import java.util.TreeSet; 20 | 21 | import org.apache.accumulo.core.data.ByteSequence; 22 | 23 | /** 24 | * A utility for merging visibilities into the form {@code (VIS1)|(VIS2)|...|(VISN)}. Used by the 25 | * {@link ChunkCombiner}. 26 | */ 27 | public class VisibilityCombiner { 28 | 29 | private final TreeSet visibilities = new TreeSet<>(); 30 | 31 | void add(ByteSequence cv) { 32 | if (cv.length() == 0) 33 | return; 34 | 35 | int depth = 0; 36 | int offset = 0; 37 | 38 | for (int i = 0; i < cv.length(); i++) { 39 | switch (cv.byteAt(i)) { 40 | case '(': 41 | depth++; 42 | break; 43 | case ')': 44 | depth--; 45 | if (depth < 0) 46 | throw new IllegalArgumentException("Invalid vis " + cv); 47 | break; 48 | case '|': 49 | if (depth == 0) { 50 | insert(cv.subSequence(offset, i)); 51 | offset = i + 1; 52 | } 53 | 54 | break; 55 | } 56 | } 57 | 58 | insert(cv.subSequence(offset, cv.length())); 59 | 60 | if (depth != 0) 61 | throw new IllegalArgumentException("Invalid vis " + cv); 62 | 63 | } 64 | 65 | private void insert(ByteSequence cv) { 66 | 67 | String cvs = cv.toString(); 68 | 69 | if (cvs.charAt(0) != '(') 70 | cvs = "(" + cvs + ")"; 71 | else { 72 | int depth = 0; 73 | int depthZeroCloses = 0; 74 | for (int i = 0; i < cv.length(); i++) { 75 | switch (cv.byteAt(i)) { 76 | case '(': 77 | depth++; 78 | break; 79 | case ')': 80 | depth--; 81 | if (depth == 0) 82 | depthZeroCloses++; 83 | break; 84 | } 85 | } 86 | 87 | if (depthZeroCloses > 1) 88 | cvs = "(" + cvs + ")"; 89 | } 90 | 91 | visibilities.add(cvs); 92 | } 93 | 94 | byte[] get() { 95 | StringBuilder sb = new StringBuilder(); 96 | String sep = ""; 97 | for (String cvs : visibilities) { 98 | sb.append(sep); 99 | sep = "|"; 100 | sb.append(cvs); 101 | } 102 | 103 | return sb.toString().getBytes(); 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /docs/wordcount.md: -------------------------------------------------------------------------------- 1 | 17 | # Apache Accumulo Word Count example 18 | 19 | The WordCount example ([WordCount.java]) uses MapReduce and Accumulo to compute 20 | word counts for a set of documents. This is accomplished using a map-only MapReduce 21 | job and an Accumulo table with combiners. 22 | 23 | To run this example, create a directory in HDFS containing text files. You can 24 | use the Accumulo README for data: 25 | 26 | $ hdfs dfs -mkdir /wc 27 | $ hdfs dfs -copyFromLocal /path/to/accumulo/README.md /wc/README.md 28 | 29 | Verify that the file was created: 30 | 31 | $ hdfs dfs -ls /wc 32 | 33 | After creating the table, run the WordCount MapReduce job with your HDFS input directory: 34 | 35 | $ ./bin/runmr mapreduce.WordCount -i /wc 36 | 37 | [WordCount.java] creates an Accumulo table named with a SummingCombiner iterator 38 | attached to it. It runs a map-only M/R job that reads the specified HDFS directory containing text files and 39 | writes word counts to Accumulo table. 40 | 41 | After the MapReduce job completes, query the Accumulo table to see word counts. 42 | 43 | $ accumulo shell 44 | username@instance> table examples.wordcount 45 | username@instance examples.wordcount> scan -b the 46 | the count:20080906 [] 75 47 | their count:20080906 [] 2 48 | them count:20080906 [] 1 49 | then count:20080906 [] 1 50 | ... 51 | 52 | When the WordCount MapReduce job was run above, the client properties were serialized 53 | into the MapReduce configuration. This is insecure if the properties contain sensitive 54 | information like passwords. A more secure option is store accumulo-client.properties 55 | in HDFS and run the job with the `-D` options. This will configure the MapReduce job 56 | to obtain the client properties from HDFS: 57 | 58 | $ hdfs dfs -mkdir /user 59 | $ hdfs dfs -mkdir /user/myuser 60 | $ hdfs dfs -copyFromLocal /path/to/accumulo/conf/accumulo-client.properties /user/myuser/ 61 | $ ./bin/runmr mapreduce.WordCount -i /wc -t examples.wordcount2 -d /user/myuser/accumulo-client.properties 62 | 63 | After the MapReduce job completes, query the `examples.wordcount2` table. The results should 64 | be the same as before: 65 | 66 | $ accumulo shell 67 | username@instance> table examples.wordcount2 68 | username@instance examples.wordcount2> scan -b the 69 | the count:20080906 [] 75 70 | their count:20080906 [] 2 71 | ... 72 | 73 | 74 | [WordCount.java]: ../src/main/java/org/apache/accumulo/examples/mapreduce/WordCount.java 75 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/cli/ClientOpts.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.cli; 18 | 19 | import java.nio.file.Paths; 20 | import java.util.Properties; 21 | 22 | import org.apache.accumulo.core.client.Accumulo; 23 | import org.apache.accumulo.core.client.AccumuloClient; 24 | import org.apache.accumulo.core.security.Authorizations; 25 | import org.apache.accumulo.core.security.ColumnVisibility; 26 | import org.apache.hadoop.conf.Configuration; 27 | 28 | import com.beust.jcommander.IStringConverter; 29 | import com.beust.jcommander.Parameter; 30 | 31 | public class ClientOpts extends Help { 32 | 33 | public static class AuthConverter implements IStringConverter { 34 | @Override 35 | public Authorizations convert(String value) { 36 | return new Authorizations(value.split(",")); 37 | } 38 | } 39 | 40 | public static class VisibilityConverter implements IStringConverter { 41 | @Override 42 | public ColumnVisibility convert(String value) { 43 | return new ColumnVisibility(value); 44 | } 45 | } 46 | 47 | @Parameter(names = {"-c", "--conf"}, description = "Path to accumulo-client.properties." 48 | + "If not set, defaults to path set by env variable ACCUMULO_CLIENT_PROPS.") 49 | private String propsPath = null; 50 | 51 | @Parameter(names = {"-auths", "--auths"}, converter = AuthConverter.class, 52 | description = "the authorizations to use when reading or writing") 53 | public Authorizations auths = Authorizations.EMPTY; 54 | 55 | private Properties cachedProps = null; 56 | 57 | public AccumuloClient createAccumuloClient() { 58 | return Accumulo.newClient().from(getClientPropsPath()).build(); 59 | } 60 | 61 | public String getClientPropsPath() { 62 | if (propsPath == null) { 63 | propsPath = System.getenv("ACCUMULO_CLIENT_PROPS"); 64 | if (propsPath == null) { 65 | throw new IllegalArgumentException("accumulo-client.properties must be set!"); 66 | } 67 | if (!Paths.get(propsPath).toFile().exists()) { 68 | throw new IllegalArgumentException(propsPath + " does not exist!"); 69 | } 70 | } 71 | return propsPath; 72 | } 73 | 74 | public Properties getClientProperties() { 75 | if (cachedProps == null) { 76 | cachedProps = Accumulo.newClientProperties().from(getClientPropsPath()).build(); 77 | } 78 | return cachedProps; 79 | } 80 | 81 | public Configuration getHadoopConfig() { 82 | Configuration config = new Configuration(); 83 | config.set("mapreduce.job.classloader", "true"); 84 | return config; 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /docs/reservations.md: -------------------------------------------------------------------------------- 1 | 17 | # Apache Accumulo Reservations Example 18 | 19 | This example shows running a simple reservation system implemented using 20 | conditional mutations. This system guarantees that only one concurrent user can 21 | reserve a resource. The example's reserve command allows multiple users to be 22 | specified. When this is done, it creates a separate reservation thread for each 23 | user. In the example below, threads are spun up for alice, bob, eve, mallory, 24 | and trent to reserve room06 on 20140101. Bob ends up getting the reservation 25 | and everyone else is put on a wait list. The example code will take any string 26 | for what, when and who. 27 | 28 | $ /path/to/accumulo org.apache.accumulo.server.util.ListInstances 29 | 30 | Instance Name | Instance ID | Master 31 | ---------------------+--------------------------------------+------------------------------- 32 | | 9f8f2a97-432f-4e66-b153-861e2a1ca246 | localhost:9999 33 | 34 | $ /path/to/accumulo shell -u root -p secret -e "createnamespace examples" 35 | $ /path/to/accumulo shell -u root -p secret -e "createtable examples.ars" 36 | $ ./bin/runex reservations.ARS 37 | >connect localhost root secret examples.ars 38 | connected 39 | > 40 | Commands : 41 | reserve {who} 42 | cancel 43 | list 44 | >reserve room06 20140101 alice bob eve mallory trent 45 | bob : RESERVED 46 | mallory : WAIT_LISTED 47 | alice : WAIT_LISTED 48 | trent : WAIT_LISTED 49 | eve : WAIT_LISTED 50 | >list room06 20140101 51 | Reservation holder : bob 52 | Wait list : [mallory, alice, trent, eve] 53 | >cancel room06 20140101 alice 54 | >cancel room06 20140101 bob 55 | >list room06 20140101 56 | Reservation holder : mallory 57 | Wait list : [trent, eve] 58 | >quit 59 | 60 | Scanning the table in the Accumulo shell after running the example shows the 61 | following: 62 | 63 | root@test16> table examples.ars 64 | root@test16 examples.ars> scan 65 | room06:20140101 res:0001 [] mallory 66 | room06:20140101 res:0003 [] trent 67 | room06:20140101 res:0004 [] eve 68 | room06:20140101 tx:seq [] 6 69 | 70 | The tx:seq column is incremented for each update to the row allowing for 71 | detection of concurrent changes. For an update to go through, the sequence 72 | number must not have changed since the data was read. If it does change, 73 | the conditional mutation will fail and the example code will retry. 74 | 75 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/filedata/ChunkInputFormat.java: -------------------------------------------------------------------------------- 1 | /// * 2 | // * Licensed to the Apache Software Foundation (ASF) under one or more 3 | // * contributor license agreements. See the NOTICE file distributed with 4 | // * this work for additional information regarding copyright ownership. 5 | // * The ASF licenses this file to You under the Apache License, Version 2.0 6 | // * (the "License"); you may not use this file except in compliance with 7 | // * the License. You may obtain a copy of the License at 8 | // * 9 | // * http://www.apache.org/licenses/LICENSE-2.0 10 | // * 11 | // * Unless required by applicable law or agreed to in writing, software 12 | // * distributed under the License is distributed on an "AS IS" BASIS, 13 | // * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // * See the License for the specific language governing permissions and 15 | // * limitations under the License. 16 | // */ 17 | // package org.apache.accumulo.examples.filedata; 18 | // 19 | // import java.io.IOException; 20 | // import java.io.InputStream; 21 | // import java.util.ArrayList; 22 | // import java.util.List; 23 | // import java.util.Map.Entry; 24 | // 25 | // import org.apache.accumulo.core.data.Key; 26 | // import org.apache.accumulo.core.data.Value; 27 | // import org.apache.accumulo.examples.util.FormatUtil; 28 | // import org.apache.hadoop.mapreduce.InputSplit; 29 | // import org.apache.hadoop.mapreduce.RecordReader; 30 | // import org.apache.hadoop.mapreduce.TaskAttemptContext; 31 | // 32 | // import com.google.common.collect.Iterators; 33 | // import com.google.common.collect.PeekingIterator; 34 | // 35 | /// ** 36 | // * An InputFormat that turns the file data ingested with {@link FileDataIngest} into an 37 | /// InputStream 38 | // * using {@link ChunkInputStream}. Mappers used with this InputFormat must close the InputStream. 39 | // */ 40 | // @SuppressWarnings("deprecation") 41 | // public class ChunkInputFormat extends 42 | // org.apache.accumulo.core.client.mapreduce.InputFormatBase>,InputStream> { 43 | // @Override 44 | // public RecordReader>,InputStream> createRecordReader(InputSplit split, 45 | // TaskAttemptContext context) { 46 | // return new RecordReaderBase<>() { 47 | // private PeekingIterator> peekingScannerIterator; 48 | // 49 | // @Override 50 | // public void initialize(InputSplit inSplit, TaskAttemptContext attempt) throws IOException { 51 | // super.initialize(inSplit, attempt); 52 | // peekingScannerIterator = Iterators.peekingIterator(scannerIterator); 53 | // currentK = new ArrayList<>(); 54 | // currentV = new ChunkInputStream(); 55 | // } 56 | // 57 | // @Override 58 | // public boolean nextKeyValue() throws IOException { 59 | // log.debug("nextKeyValue called"); 60 | // 61 | // currentK.clear(); 62 | // if (peekingScannerIterator.hasNext()) { 63 | // ++numKeysRead; 64 | // Entry entry = peekingScannerIterator.peek(); 65 | // while (!entry.getKey().getColumnFamily().equals(FileDataIngest.CHUNK_CF)) { 66 | // currentK.add(entry); 67 | // peekingScannerIterator.next(); 68 | // if (!peekingScannerIterator.hasNext()) { 69 | // return true; 70 | // } 71 | // entry = peekingScannerIterator.peek(); 72 | // } 73 | // currentKey = entry.getKey(); 74 | // ((ChunkInputStream) currentV).setSource(peekingScannerIterator); 75 | // if (log.isTraceEnabled()) { 76 | // log.trace("Processing key/value pair: " + FormatUtil.formatTableEntry(entry, true)); 77 | // } 78 | // 79 | // return true; 80 | // } 81 | // return false; 82 | // } 83 | // }; 84 | // } 85 | // } 86 | -------------------------------------------------------------------------------- /docs/uniquecols.md: -------------------------------------------------------------------------------- 1 | 17 | # Apache Accumulo Unique Columns example 18 | 19 | The UniqueColumns examples ([UniqueColumns.java]) computes the unique set 20 | of column family and column qualifiers in a table. It also demonstrates 21 | how a mapReduce job can directly read a tables files from HDFS. 22 | 23 | Create a table and add rows that all have identical column family and column 24 | qualifiers. 25 | 26 | ``` 27 | $ /path/to/accumulo shell -u username -p secret 28 | username@instance> createnamespace examples 29 | username@instance> createtable examples.unique 30 | username@instance examples.unique> insert row1 fam1 qual1 v1 31 | username@instance examples.unique> insert row2 fam1 qual1 v2 32 | username@instance examples.unique> insert row3 fam1 qual1 v3 33 | ``` 34 | 35 | Exit the Accumulo shell and run the uniqueColumns mapReduce job against 36 | this table. Note that if the output file already exists in HDFS, it will 37 | need to be deleted. 38 | 39 | ``` 40 | $ ./bin/runmr mapreduce.UniqueColumns --table examples.unique --reducers 1 --output /tmp/unique 41 | ``` 42 | 43 | When the mapReduce job completes, examine the output. 44 | 45 | ``` 46 | $ hdfs dfs -cat /tmp/unique/part-r-00000 47 | cf:fam1 48 | cq:qual1 49 | ``` 50 | 51 | The output displays the unique column family and column qualifier values. In 52 | this case since all rows use the same values, there are only two values output. 53 | 54 | Note that since the example used only one reducer all output will be contained 55 | within the single `part-r-00000` file. If more than one reducer is used the output 56 | will be spread among various `part-r-xxxxx` files. 57 | 58 | Go back to the shell and add some additional entries. 59 | 60 | ```text 61 | $ /path/to/accumulo shell -u username -p secret 62 | username@instance> table unique 63 | username@instance example.unique> insert row1 fam2 qual2 v2 64 | username@instance example.unique> insert row1 fam3 qual2 v2 65 | username@instance example.unique> insert row1 fam2 qual2 v2 66 | username@instance example.unique> insert row2 fam2 qual2 v2 67 | username@instance example.unique> insert row3 fam2 qual2 v2 68 | username@instance example.unique> insert row3 fam3 qual3 v2 69 | username@instance example.unique> insert row3 fam3 qual4 v2 70 | ``` 71 | 72 | Re-running the command will now find any additional unique column values. 73 | 74 | ```text 75 | $ hdfs dfs -rm -r -f /tmp/unique 76 | $ ./bin/runmr mapreduce.UniqueColumns --table examples.unique --reducers 1 --output /tmp/unique 77 | $ hdfs dfs -cat /tmp/unique/part-r-00000 78 | cf:fam1 79 | cf:fam2 80 | cf:fam3 81 | cq:qual1 82 | cq:qual2 83 | cq:qual3 84 | cq:qual4 85 | ``` 86 | 87 | The output now includes the additional column values that were added during the last batch of inserts. 88 | 89 | 90 | [UniqueColumns.java]: ../src/main/java/org/apache/accumulo/examples/mapreduce/UniqueColumns.java 91 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/client/ReadWriteExample.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.client; 18 | 19 | import java.util.Map.Entry; 20 | 21 | import org.apache.accumulo.core.client.Accumulo; 22 | import org.apache.accumulo.core.client.AccumuloClient; 23 | import org.apache.accumulo.core.client.AccumuloException; 24 | import org.apache.accumulo.core.client.AccumuloSecurityException; 25 | import org.apache.accumulo.core.client.BatchWriter; 26 | import org.apache.accumulo.core.client.Scanner; 27 | import org.apache.accumulo.core.client.TableNotFoundException; 28 | import org.apache.accumulo.core.data.Key; 29 | import org.apache.accumulo.core.data.Mutation; 30 | import org.apache.accumulo.core.data.Value; 31 | import org.apache.accumulo.core.security.Authorizations; 32 | import org.apache.accumulo.examples.Common; 33 | import org.apache.accumulo.examples.cli.ClientOpts; 34 | import org.slf4j.Logger; 35 | import org.slf4j.LoggerFactory; 36 | 37 | public class ReadWriteExample { 38 | 39 | private static final Logger log = LoggerFactory.getLogger(ReadWriteExample.class); 40 | 41 | private static final String READWRITE_TABLE = Common.NAMESPACE + ".readwrite"; 42 | 43 | private ReadWriteExample() {} 44 | 45 | public static void main(String[] args) throws AccumuloSecurityException, AccumuloException { 46 | ClientOpts opts = new ClientOpts(); 47 | opts.parseArgs(ReadWriteExample.class.getName(), args); 48 | 49 | try (AccumuloClient client = Accumulo.newClient().from(opts.getClientPropsPath()).build()) { 50 | Common.createTableWithNamespace(client, READWRITE_TABLE); 51 | // write data 52 | try (BatchWriter writer = client.createBatchWriter(READWRITE_TABLE)) { 53 | for (int i = 0; i < 10; i++) { 54 | Mutation m = new Mutation("hello" + i); 55 | m.put("cf", "cq", new Value("world" + i)); 56 | writer.addMutation(m); 57 | } 58 | } catch (TableNotFoundException e) { 59 | log.error("Could not find table {}: {}", e.getTableName(), e.getMessage()); 60 | System.exit(1); 61 | } 62 | 63 | // read data 64 | try (Scanner scanner = client.createScanner(READWRITE_TABLE, Authorizations.EMPTY)) { 65 | for (Entry entry : scanner) { 66 | log.info("{} -> {}", entry.getKey().toString(), entry.getValue().toString()); 67 | } 68 | } catch (TableNotFoundException e) { 69 | log.error("Could not find table {}: {}", e.getTableName(), e.getMessage()); 70 | System.exit(1); 71 | } 72 | 73 | // delete table 74 | try { 75 | client.tableOperations().delete(READWRITE_TABLE); 76 | } catch (TableNotFoundException e) { 77 | log.error("Unable to delete table '{}': {}", e.getTableName(), e.getMessage()); 78 | } 79 | } 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/mapreduce/bulk/VerifyIngest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.mapreduce.bulk; 18 | 19 | import java.util.Iterator; 20 | import java.util.Map.Entry; 21 | 22 | import org.apache.accumulo.core.client.Accumulo; 23 | import org.apache.accumulo.core.client.AccumuloClient; 24 | import org.apache.accumulo.core.client.Scanner; 25 | import org.apache.accumulo.core.client.TableNotFoundException; 26 | import org.apache.accumulo.core.data.Key; 27 | import org.apache.accumulo.core.data.Range; 28 | import org.apache.accumulo.core.data.Value; 29 | import org.apache.accumulo.core.security.Authorizations; 30 | import org.apache.accumulo.examples.cli.ClientOpts; 31 | import org.slf4j.Logger; 32 | import org.slf4j.LoggerFactory; 33 | 34 | public final class VerifyIngest { 35 | 36 | private static final Logger log = LoggerFactory.getLogger(VerifyIngest.class); 37 | private static final String ROW_FORMAT = "row_%010d"; 38 | private static final String VALUE_FORMAT = "value_%010d"; 39 | 40 | private VerifyIngest() {} 41 | 42 | public static void main(String[] args) throws TableNotFoundException { 43 | 44 | ClientOpts opts = new ClientOpts(); 45 | opts.parseArgs(VerifyIngest.class.getName(), args); 46 | 47 | try (AccumuloClient client = Accumulo.newClient().from(opts.getClientPropsPath()).build(); 48 | Scanner scanner = client.createScanner(SetupTable.BULK_INGEST_TABLE, 49 | Authorizations.EMPTY)) { 50 | 51 | scanner.setRange(new Range(String.format(ROW_FORMAT, 0), null)); 52 | 53 | Iterator> si = scanner.iterator(); 54 | 55 | boolean ok = true; 56 | 57 | for (int i = 0; i < BulkIngestExample.numRows; i++) { 58 | 59 | if (si.hasNext()) { 60 | Entry entry = si.next(); 61 | 62 | if (!entry.getKey().getRow().toString().equals(String.format(ROW_FORMAT, i))) { 63 | String formattedRow = String.format(ROW_FORMAT, i); 64 | log.error("unexpected row key {}; expected {}", entry.getKey().getRow(), formattedRow); 65 | ok = false; 66 | } 67 | 68 | if (!entry.getValue().toString().equals(String.format(VALUE_FORMAT, i))) { 69 | var formattedValue = String.format(VALUE_FORMAT, i); 70 | log.error("unexpected value {}; expected {}", entry.getValue(), formattedValue); 71 | ok = false; 72 | } 73 | 74 | } else { 75 | var formattedRow = String.format(ROW_FORMAT, i); 76 | log.error("no more rows, expected {}", formattedRow); 77 | ok = false; 78 | break; 79 | } 80 | } 81 | 82 | if (ok) { 83 | log.info("Data verification succeeded!"); 84 | System.exit(0); 85 | } else { 86 | log.info("Data verification failed!"); 87 | System.exit(1); 88 | } 89 | } 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /docs/classpath.md: -------------------------------------------------------------------------------- 1 | 17 | # Apache Accumulo Classpath Example 18 | 19 | This example shows how to use per table classpaths. The example leverages a 20 | test jar which contains a Filter that suppresses rows containing "foo". The 21 | example shows copying the FooFilter.jar into HDFS and then making an Accumulo 22 | table reference that jar. For this example, a directory, `/user1/lib`, is 23 | assumed to exist in HDFS. 24 | 25 | Create `/user1/lib` in HDFS if it does not exist. 26 | 27 | hadoop fs -mkdir -p /user1/lib 28 | 29 | Execute the following command in the shell. Note that the `FooFilter.jar` 30 | is located within the Accumulo source distribution. 31 | 32 | $ hadoop fs -copyFromLocal /path/to/accumulo/test/src/main/resources/org/apache/accumulo/test/FooFilter.jar /user1/lib 33 | 34 | Execute following in Accumulo shell to setup classpath context 35 | 36 | root@uno> config -s general.vfs.context.classpath.cx1=hdfs://:/user1/lib/[^.].*.jar 37 | 38 | Create a namespace and table 39 | 40 | root@uno> createnamespace examples 41 | root@uno> createtable examples.nofoo 42 | 43 | The following command makes this table use the configured classpath context 44 | 45 | root@uno examples.nofoo> config -t examples.nofoo -s table.class.loader.context=cx1 46 | 47 | The following command configures an iterator that's in FooFilter.jar 48 | 49 | root@uno examples.nofoo> setiter -n foofilter -p 10 -scan -minc -majc -class org.apache.accumulo.test.FooFilter 50 | Filter accepts or rejects each Key/Value pair 51 | ----------> set FooFilter parameter negate, default false keeps k/v that pass accept method, true rejects k/v that pass accept method: false 52 | 53 | The commands below show the filter is working. 54 | 55 | root@uno examples.nofoo> insert foo1 f1 q1 v1 56 | root@uno examples.nofoo> insert noo1 f1 q1 v2 57 | root@uno examples.nofoo> scan 58 | noo1 f1:q1 [] v2 59 | root@uno examples.nofoo> 60 | 61 | Below, an attempt is made to add the FooFilter to a table that's not configured 62 | to use the classpath context cx1. This fails until the table is configured to 63 | use cx1. 64 | 65 | root@uno examples.nofoo> createtable examples.nofootwo 66 | root@uno examples.nofootwo> setiter -n foofilter -p 10 -scan -minc -majc -class org.apache.accumulo.test.FooFilter 67 | 2013-05-03 12:49:35,943 [shell.Shell] ERROR: org.apache.accumulo.shell.ShellCommandException: Command could 68 | not be initialized (Unable to load org.apache.accumulo.test.FooFilter; class not found.) 69 | root@uno examples.nofootwo> config -t examples.nofootwo -s table.class.loader.context=cx1 70 | root@uno examples.nofootwo> setiter -n foofilter -p 10 -scan -minc -majc -class org.apache.accumulo.test.FooFilter 71 | Filter accepts or rejects each Key/Value pair 72 | ----------> set FooFilter parameter negate, default false keeps k/v that pass accept method, true rejects k/v that pass accept method: false 73 | 74 | 75 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/util/FormatUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.util; 18 | 19 | import java.util.Map; 20 | 21 | import org.apache.accumulo.core.data.Key; 22 | import org.apache.accumulo.core.data.Value; 23 | import org.apache.accumulo.core.security.ColumnVisibility; 24 | import org.apache.hadoop.io.Text; 25 | 26 | public final class FormatUtil { 27 | 28 | /** 29 | * Format and return the specified table entry as a human-readable String suitable for logging. 30 | *
31 | * If {@code includeTimestamp} is true, the entry will be formatted as:
32 | * {@literal : \t}
33 | * If false, the entry will be formatted as:
34 | * {@literal : \t}
35 | * Examples:
36 | * {@literal a ~chunk:\x00\x00\x00d\x00\x00\x00\x00 [A&B] 9223372036854775807 asdfjkl;} 37 | * {@literal a ~chunk:\x00\x00\x00d\x00\x00\x00\x00 [A&B] asdfjkl;} 38 | * 39 | * @param entry 40 | * the table entry to format 41 | * @param includeTimestamp 42 | * if true, include the timestamp in the returned result 43 | * @return the specified entry as a formatted String, or null if the entry is null 44 | */ 45 | public static String formatTableEntry(final Map.Entry entry, 46 | final boolean includeTimestamp) { 47 | if (entry == null) { 48 | return null; 49 | } 50 | 51 | Key key = entry.getKey(); 52 | StringBuilder sb = new StringBuilder(); 53 | Text buffer = new Text(); 54 | 55 | // Append row. 56 | appendBytes(sb, key.getRow(buffer).getBytes()).append(" "); 57 | 58 | // Append column family. 59 | appendBytes(sb, key.getColumnFamily().getBytes()).append(":"); 60 | 61 | // Append column qualifier. 62 | appendBytes(sb, key.getColumnQualifier().getBytes()).append(" "); 63 | 64 | // Append visibility and timestamp. 65 | sb.append(new ColumnVisibility(key.getColumnVisibility(buffer))); 66 | 67 | if (includeTimestamp) { 68 | sb.append(" ").append(entry.getKey().getTimestamp()); 69 | } 70 | 71 | // Append value. 72 | Value value = entry.getValue(); 73 | if (value != null && value.getSize() > 0) { 74 | sb.append("\t"); 75 | appendBytes(sb, value.get()); 76 | } 77 | return sb.toString(); 78 | } 79 | 80 | private static StringBuilder appendBytes(final StringBuilder sb, final byte[] ba) { 81 | for (byte b : ba) { 82 | int c = 0xff & b; 83 | if (c == '\\') { 84 | sb.append("\\\\"); 85 | } else if (c >= 32 && c <= 126) { 86 | sb.append((char) c); 87 | } else { 88 | sb.append("\\x").append(String.format("%02X", c)); 89 | } 90 | } 91 | return sb; 92 | } 93 | 94 | private FormatUtil() { 95 | throw new UnsupportedOperationException(); 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/cli/BatchWriterOpts.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.cli; 18 | 19 | import java.time.Duration; 20 | import java.util.concurrent.TimeUnit; 21 | 22 | import org.apache.accumulo.core.client.BatchWriterConfig; 23 | 24 | import com.beust.jcommander.IStringConverter; 25 | import com.beust.jcommander.Parameter; 26 | 27 | public class BatchWriterOpts { 28 | private static final BatchWriterConfig BWDEFAULTS = new BatchWriterConfig(); 29 | 30 | public static class TimeConverter implements IStringConverter { 31 | @Override 32 | public Long convert(String value) { 33 | if (value.matches("[0-9]+")) 34 | value = "PT" + value + "S"; // if only numbers then assume seconds 35 | return Duration.parse(value).toMillis(); 36 | } 37 | } 38 | 39 | public static class MemoryConverter implements IStringConverter { 40 | @Override 41 | public Long convert(String str) { 42 | try { 43 | char lastChar = str.charAt(str.length() - 1); 44 | int multiplier = 0; 45 | switch (Character.toUpperCase(lastChar)) { 46 | case 'G': 47 | multiplier += 10; 48 | case 'M': 49 | multiplier += 10; 50 | case 'K': 51 | multiplier += 10; 52 | case 'B': 53 | break; 54 | default: 55 | return Long.parseLong(str); 56 | } 57 | return Long.parseLong(str.substring(0, str.length() - 1)) << multiplier; 58 | } catch (Exception ex) { 59 | throw new IllegalArgumentException( 60 | "The value '" + str + "' is not a valid memory setting. A valid value would a number " 61 | + "possibily followed by an optional 'G', 'M', 'K', or 'B'."); 62 | } 63 | } 64 | } 65 | 66 | @Parameter(names = "--batchThreads", 67 | description = "Number of threads to use when writing large batches") 68 | public Integer batchThreads = BWDEFAULTS.getMaxWriteThreads(); 69 | 70 | @Parameter(names = "--batchLatency", converter = TimeConverter.class, 71 | description = "The maximum time to wait before flushing data to servers when writing") 72 | public Long batchLatency = BWDEFAULTS.getMaxLatency(TimeUnit.MILLISECONDS); 73 | 74 | @Parameter(names = "--batchMemory", converter = MemoryConverter.class, 75 | description = "memory used to batch data when writing") 76 | public Long batchMemory = BWDEFAULTS.getMaxMemory(); 77 | 78 | @Parameter(names = "--batchTimeout", converter = TimeConverter.class, 79 | description = "timeout used to fail a batch write") 80 | public Long batchTimeout = BWDEFAULTS.getTimeout(TimeUnit.MILLISECONDS); 81 | 82 | public BatchWriterConfig getBatchWriterConfig() { 83 | BatchWriterConfig config = new BatchWriterConfig(); 84 | config.setMaxWriteThreads(this.batchThreads); 85 | config.setMaxLatency(this.batchLatency, TimeUnit.MILLISECONDS); 86 | config.setMaxMemory(this.batchMemory); 87 | config.setTimeout(this.batchTimeout, TimeUnit.MILLISECONDS); 88 | return config; 89 | } 90 | 91 | } 92 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/mapreduce/RegexExample.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.mapreduce; 18 | 19 | import java.io.IOException; 20 | 21 | import org.apache.accumulo.core.client.IteratorSetting; 22 | import org.apache.accumulo.core.data.Key; 23 | import org.apache.accumulo.core.data.Value; 24 | import org.apache.accumulo.core.iterators.user.RegExFilter; 25 | import org.apache.accumulo.examples.cli.ClientOpts; 26 | import org.apache.accumulo.hadoop.mapreduce.AccumuloInputFormat; 27 | import org.apache.hadoop.fs.Path; 28 | import org.apache.hadoop.mapreduce.Job; 29 | import org.apache.hadoop.mapreduce.Mapper; 30 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; 31 | import org.slf4j.Logger; 32 | import org.slf4j.LoggerFactory; 33 | 34 | import com.beust.jcommander.Parameter; 35 | 36 | public class RegexExample { 37 | 38 | private static final Logger log = LoggerFactory.getLogger(RegexExample.class); 39 | 40 | public static class RegexMapper extends Mapper { 41 | @Override 42 | public void map(Key row, Value data, Context context) throws IOException, InterruptedException { 43 | context.write(row, data); 44 | } 45 | } 46 | 47 | static class Opts extends ClientOpts { 48 | @Parameter(names = {"-t", "--table"}, required = true, description = "table to use") 49 | String tableName; 50 | @Parameter(names = "--rowRegex") 51 | String rowRegex; 52 | @Parameter(names = "--columnFamilyRegex") 53 | String columnFamilyRegex; 54 | @Parameter(names = "--columnQualifierRegex") 55 | String columnQualifierRegex; 56 | @Parameter(names = "--valueRegex") 57 | String valueRegex; 58 | @Parameter(names = "--output", required = true) 59 | String destination; 60 | } 61 | 62 | public static void main(String[] args) throws Exception { 63 | Opts opts = new Opts(); 64 | opts.parseArgs(RegexExample.class.getName(), args); 65 | 66 | Job job = Job.getInstance(opts.getHadoopConfig()); 67 | job.setJobName(RegexExample.class.getSimpleName()); 68 | job.setJarByClass(RegexExample.class); 69 | 70 | job.setInputFormatClass(AccumuloInputFormat.class); 71 | 72 | IteratorSetting regex = new IteratorSetting(50, "regex", RegExFilter.class); 73 | RegExFilter.setRegexs(regex, opts.rowRegex, opts.columnFamilyRegex, opts.columnQualifierRegex, 74 | opts.valueRegex, false); 75 | 76 | AccumuloInputFormat.configure().clientProperties(opts.getClientProperties()) 77 | .table(opts.tableName).addIterator(regex).store(job); 78 | 79 | job.setMapperClass(RegexMapper.class); 80 | job.setMapOutputKeyClass(Key.class); 81 | job.setMapOutputValueClass(Value.class); 82 | job.setNumReduceTasks(0); 83 | job.setOutputFormatClass(TextOutputFormat.class); 84 | TextOutputFormat.setOutputPath(job, new Path(opts.destination)); 85 | 86 | log.info("setRowRegex: " + opts.rowRegex); 87 | log.info("setColumnFamilyRegex: " + opts.columnFamilyRegex); 88 | log.info("setColumnQualifierRegex: " + opts.columnQualifierRegex); 89 | log.info("setValueRegex: " + opts.valueRegex); 90 | 91 | System.exit(job.waitForCompletion(true) ? 0 : 1); 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/mapreduce/RowHash.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.mapreduce; 18 | 19 | import java.io.IOException; 20 | import java.util.Base64; 21 | import java.util.Collections; 22 | 23 | import org.apache.accumulo.core.client.IteratorSetting; 24 | import org.apache.accumulo.core.data.Key; 25 | import org.apache.accumulo.core.data.Mutation; 26 | import org.apache.accumulo.core.data.Value; 27 | import org.apache.accumulo.examples.cli.ClientOpts; 28 | import org.apache.accumulo.hadoop.mapreduce.AccumuloInputFormat; 29 | import org.apache.accumulo.hadoop.mapreduce.AccumuloOutputFormat; 30 | import org.apache.accumulo.hadoop.mapreduce.InputFormatBuilder; 31 | import org.apache.hadoop.io.MD5Hash; 32 | import org.apache.hadoop.io.Text; 33 | import org.apache.hadoop.mapreduce.Job; 34 | import org.apache.hadoop.mapreduce.Mapper; 35 | 36 | import com.beust.jcommander.Parameter; 37 | 38 | public class RowHash { 39 | 40 | /** 41 | * The Mapper class that given a row number, will generate the appropriate output line. 42 | */ 43 | public static class HashDataMapper extends Mapper { 44 | @Override 45 | public void map(Key row, Value data, Context context) throws IOException, InterruptedException { 46 | Mutation m = new Mutation(row.getRow()); 47 | m.put("cf-HASHTYPE", "cq-MD5BASE64", 48 | new Value(Base64.getEncoder().encode(MD5Hash.digest(data.toString()).getDigest()))); 49 | context.write(null, m); 50 | context.progress(); 51 | } 52 | 53 | @Override 54 | public void setup(Context job) {} 55 | } 56 | 57 | private static class Opts extends ClientOpts { 58 | @Parameter(names = {"-t", "--table"}, required = true, description = "table to use") 59 | String tableName; 60 | @Parameter(names = "--column", required = true) 61 | String column; 62 | } 63 | 64 | public static void main(String[] args) throws Exception { 65 | Opts opts = new Opts(); 66 | opts.parseArgs(RowHash.class.getName(), args); 67 | 68 | Job job = Job.getInstance(opts.getHadoopConfig()); 69 | job.setJobName(RowHash.class.getName()); 70 | job.setJarByClass(RowHash.class); 71 | job.setInputFormatClass(AccumuloInputFormat.class); 72 | InputFormatBuilder.InputFormatOptions inputOpts = AccumuloInputFormat.configure() 73 | .clientProperties(opts.getClientProperties()).table(opts.tableName); 74 | 75 | String col = opts.column; 76 | int idx = col.indexOf(":"); 77 | String cf = idx < 0 ? col : col.substring(0, idx); 78 | String cq = idx < 0 ? null : col.substring(idx + 1); 79 | if (cf.length() > 0) { 80 | inputOpts.fetchColumns(Collections.singleton(new IteratorSetting.Column(cf, cq))); 81 | } 82 | inputOpts.store(job); 83 | 84 | job.setMapperClass(HashDataMapper.class); 85 | job.setMapOutputKeyClass(Text.class); 86 | job.setMapOutputValueClass(Mutation.class); 87 | job.setNumReduceTasks(0); 88 | 89 | job.setOutputFormatClass(AccumuloOutputFormat.class); 90 | AccumuloOutputFormat.configure().clientProperties(opts.getClientProperties()) 91 | .defaultTable(opts.tableName).store(job); 92 | 93 | System.exit(job.waitForCompletion(true) ? 0 : 1); 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/constraints/MaxMutationSize.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.constraints; 18 | 19 | import java.util.Collections; 20 | import java.util.List; 21 | 22 | import org.apache.accumulo.core.client.Accumulo; 23 | import org.apache.accumulo.core.client.AccumuloClient; 24 | import org.apache.accumulo.core.client.AccumuloException; 25 | import org.apache.accumulo.core.client.AccumuloSecurityException; 26 | import org.apache.accumulo.core.client.BatchWriter; 27 | import org.apache.accumulo.core.client.MutationsRejectedException; 28 | import org.apache.accumulo.core.client.TableNotFoundException; 29 | import org.apache.accumulo.core.data.Mutation; 30 | import org.apache.accumulo.core.data.Value; 31 | import org.apache.accumulo.core.data.constraints.Constraint; 32 | import org.apache.accumulo.examples.Common; 33 | import org.apache.accumulo.examples.cli.ClientOpts; 34 | import org.slf4j.Logger; 35 | import org.slf4j.LoggerFactory; 36 | 37 | /** 38 | * Ensure that mutations are a reasonable size: we must be able to fit several in memory at a time. 39 | */ 40 | public class MaxMutationSize implements Constraint { 41 | 42 | private static final Logger log = LoggerFactory.getLogger(MaxMutationSize.class); 43 | 44 | static final long MAX_SIZE = Runtime.getRuntime().maxMemory() >> 8; 45 | static final List empty = Collections.emptyList(); 46 | static final List violations = Collections.singletonList((short) 0); 47 | 48 | @Override 49 | public String getViolationDescription(short violationCode) { 50 | return String.format("mutation exceeded maximum size of %d", MAX_SIZE); 51 | } 52 | 53 | @Override 54 | public List check(Environment env, Mutation mutation) { 55 | if (mutation.estimatedMemoryUsed() < MAX_SIZE) 56 | return empty; 57 | return violations; 58 | } 59 | 60 | public static void main(String[] args) 61 | throws AccumuloException, AccumuloSecurityException, TableNotFoundException { 62 | ClientOpts opts = new ClientOpts(); 63 | opts.parseArgs(MaxMutationSize.class.getName(), args); 64 | 65 | try (AccumuloClient client = Accumulo.newClient().from(opts.getClientPropsPath()).build()) { 66 | Common.createTableWithNamespace(client, ConstraintsCommon.CONSTRAINTS_TABLE); 67 | 68 | /* 69 | * Add the {@link MaxMutationSize} constraint to the table. Be sure to use the fully qualified 70 | * class name 71 | */ 72 | int num = client.tableOperations().addConstraint(ConstraintsCommon.CONSTRAINTS_TABLE, 73 | "org.apache.accumulo.examples.constraints.MaxMutationSize"); 74 | 75 | log.info("Attempting to write a lot of mutations to testConstraints"); 76 | try (BatchWriter bw = client.createBatchWriter(ConstraintsCommon.CONSTRAINTS_TABLE)) { 77 | Mutation m = new Mutation("r1"); 78 | for (int i = 0; i < 1_000_000; i++) 79 | m.put("cf" + i % 5000, "cq" + i, new Value(("value" + i).getBytes())); 80 | bw.addMutation(m); 81 | } catch (MutationsRejectedException e) { 82 | e.getConstraintViolationSummaries() 83 | .forEach(m -> log.error(ConstraintsCommon.CONSTRAINT_VIOLATED_MSG, m.constrainClass)); 84 | } 85 | client.tableOperations().removeConstraint(ConstraintsCommon.CONSTRAINTS_TABLE, num); 86 | } 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/bloom/BloomBatchScanner.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.bloom; 18 | 19 | import static org.apache.accumulo.examples.client.RandomBatchWriter.abs; 20 | 21 | import java.util.HashMap; 22 | import java.util.HashSet; 23 | import java.util.Map.Entry; 24 | import java.util.Random; 25 | 26 | import org.apache.accumulo.core.client.Accumulo; 27 | import org.apache.accumulo.core.client.AccumuloClient; 28 | import org.apache.accumulo.core.client.BatchScanner; 29 | import org.apache.accumulo.core.client.TableNotFoundException; 30 | import org.apache.accumulo.core.data.Key; 31 | import org.apache.accumulo.core.data.Range; 32 | import org.apache.accumulo.core.data.Value; 33 | import org.apache.accumulo.core.security.Authorizations; 34 | import org.apache.accumulo.examples.cli.ClientOpts; 35 | import org.slf4j.Logger; 36 | import org.slf4j.LoggerFactory; 37 | 38 | /** 39 | * Simple example for reading random batches of data from Accumulo. 40 | */ 41 | public final class BloomBatchScanner { 42 | 43 | private static final Logger log = LoggerFactory.getLogger(BloomBatchScanner.class); 44 | 45 | private BloomBatchScanner() {} 46 | 47 | public static void main(String[] args) throws TableNotFoundException { 48 | ClientOpts opts = new ClientOpts(); 49 | opts.parseArgs(BloomBatchScanner.class.getName(), args); 50 | 51 | try (AccumuloClient client = Accumulo.newClient().from(opts.getClientPropsPath()).build()) { 52 | scan(client, BloomCommon.BLOOM_TEST1_TABLE, 7); 53 | scan(client, BloomCommon.BLOOM_TEST2_TABLE, 7); 54 | } 55 | } 56 | 57 | static void scan(AccumuloClient client, String tableName, int seed) 58 | throws TableNotFoundException { 59 | Random r = new Random(seed); 60 | HashSet ranges = new HashSet<>(); 61 | HashMap expectedRows = new HashMap<>(); 62 | while (ranges.size() < 500) { 63 | long rowId = abs(r.nextLong()) % 1_000_000_000; 64 | String row = String.format("row_%010d", rowId); 65 | ranges.add(new Range(row)); 66 | expectedRows.put(row, false); 67 | } 68 | 69 | long t1 = System.currentTimeMillis(); 70 | long results = 0; 71 | long lookups = ranges.size(); 72 | 73 | log.info("Scanning {} with seed {}", tableName, seed); 74 | try (BatchScanner scan = client.createBatchScanner(tableName, Authorizations.EMPTY, 20)) { 75 | scan.setRanges(ranges); 76 | for (Entry entry : scan) { 77 | Key key = entry.getKey(); 78 | if (expectedRows.containsKey(key.getRow().toString())) { 79 | expectedRows.put(key.getRow().toString(), true); 80 | } else { 81 | log.info("Encountered unexpected key: {}", key); 82 | } 83 | results++; 84 | } 85 | } 86 | long t2 = System.currentTimeMillis(); 87 | log.info(String.format("Scan finished! %6.2f lookups/sec, %.2f secs, %d results", 88 | lookups / ((t2 - t1) / 1000.0), ((t2 - t1) / 1000.0), results)); 89 | 90 | int count = 0; 91 | for (Entry entry : expectedRows.entrySet()) { 92 | if (!entry.getValue()) { 93 | count++; 94 | } 95 | } 96 | if (count > 0) 97 | log.info("Did not find " + count); 98 | else 99 | log.info("All expected rows were scanned"); 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/client/SequentialBatchWriter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.client; 18 | 19 | import java.util.Random; 20 | 21 | import org.apache.accumulo.core.client.Accumulo; 22 | import org.apache.accumulo.core.client.AccumuloClient; 23 | import org.apache.accumulo.core.client.AccumuloException; 24 | import org.apache.accumulo.core.client.AccumuloSecurityException; 25 | import org.apache.accumulo.core.client.BatchWriter; 26 | import org.apache.accumulo.core.client.TableNotFoundException; 27 | import org.apache.accumulo.core.data.Mutation; 28 | import org.apache.accumulo.core.data.Value; 29 | import org.apache.accumulo.examples.Common; 30 | import org.apache.accumulo.examples.cli.ClientOpts; 31 | import org.slf4j.Logger; 32 | import org.slf4j.LoggerFactory; 33 | 34 | import com.beust.jcommander.Parameter; 35 | 36 | /** 37 | * Simple example for writing random data in sequential order to Accumulo. 38 | */ 39 | public final class SequentialBatchWriter { 40 | 41 | private static final Logger log = LoggerFactory.getLogger(SequentialBatchWriter.class); 42 | 43 | static final String BATCH_TABLE = Common.NAMESPACE + ".batch"; 44 | 45 | private SequentialBatchWriter() {} 46 | 47 | public static Value createValue(long rowId, int size) { 48 | Random r = new Random(rowId); 49 | byte[] value = new byte[size]; 50 | 51 | r.nextBytes(value); 52 | 53 | // transform to printable chars 54 | for (int j = 0; j < value.length; j++) { 55 | value[j] = (byte) (((0xff & value[j]) % 92) + ' '); 56 | } 57 | 58 | return new Value(value); 59 | } 60 | 61 | static class Opts extends ClientOpts { 62 | @Parameter(names = {"-t"}, description = "table to use") 63 | public String tableName = BATCH_TABLE; 64 | 65 | @Parameter(names = {"--start"}, description = "starting row") 66 | public Integer start = 0; 67 | 68 | @Parameter(names = {"--num"}, description = "number of rows") 69 | public Integer num = 10_000; 70 | 71 | @Parameter(names = {"--size"}, description = "size of values") 72 | public Integer size = 50; 73 | } 74 | 75 | /** 76 | * Writes 1000 entries to Accumulo using a {@link BatchWriter}. The rows of the entries will be 77 | * sequential starting from 0. The column families will be "foo" and column qualifiers will be 78 | * "1". The values will be random 50 byte arrays. 79 | */ 80 | public static void main(String[] args) 81 | throws AccumuloException, AccumuloSecurityException, TableNotFoundException { 82 | Opts opts = new Opts(); 83 | opts.parseArgs(SequentialBatchWriter.class.getName(), args); 84 | 85 | try (AccumuloClient client = Accumulo.newClient().from(opts.getClientPropsPath()).build()) { 86 | Common.createTableWithNamespace(client, opts.tableName); 87 | try (BatchWriter bw = client.createBatchWriter(opts.tableName)) { 88 | for (int i = 0; i < opts.num; i++) { 89 | int row = i + opts.start; 90 | Mutation m = new Mutation(String.format("row_%010d", row)); 91 | // create a random value that is a function of row id for verification purposes 92 | m.put("foo", "1", createValue(row, opts.size)); 93 | bw.addMutation(m); 94 | if (i % 1000 == 0) { 95 | log.trace("wrote {} entries", i); 96 | } 97 | } 98 | } 99 | } 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/bloom/BloomFilters.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.bloom; 18 | 19 | import java.util.HashMap; 20 | import java.util.Map; 21 | import java.util.Random; 22 | 23 | import org.apache.accumulo.core.client.Accumulo; 24 | import org.apache.accumulo.core.client.AccumuloClient; 25 | import org.apache.accumulo.core.client.AccumuloException; 26 | import org.apache.accumulo.core.client.AccumuloSecurityException; 27 | import org.apache.accumulo.core.client.BatchWriter; 28 | import org.apache.accumulo.core.client.MutationsRejectedException; 29 | import org.apache.accumulo.core.client.TableNotFoundException; 30 | import org.apache.accumulo.core.client.admin.NewTableConfiguration; 31 | import org.apache.accumulo.core.data.Mutation; 32 | import org.apache.accumulo.core.security.ColumnVisibility; 33 | import org.apache.accumulo.examples.Common; 34 | import org.apache.accumulo.examples.cli.ClientOpts; 35 | import org.apache.accumulo.examples.client.RandomBatchWriter; 36 | import org.slf4j.Logger; 37 | import org.slf4j.LoggerFactory; 38 | 39 | public final class BloomFilters { 40 | 41 | private static final Logger log = LoggerFactory.getLogger(BloomFilters.class); 42 | 43 | private BloomFilters() {} 44 | 45 | public static void main(String[] args) 46 | throws AccumuloException, AccumuloSecurityException, TableNotFoundException { 47 | 48 | ClientOpts opts = new ClientOpts(); 49 | opts.parseArgs(BloomFilters.class.getName(), args); 50 | 51 | try (AccumuloClient client = Accumulo.newClient().from(opts.getClientPropsPath()).build()) { 52 | Map table1props = Map.of("table.compaction.major.ratio", "7"); 53 | 54 | Map table2props = new HashMap<>(table1props); 55 | table2props.put(BloomCommon.BLOOM_ENABLED_PROPERTY, "true"); 56 | 57 | Common.createTableWithNamespace(client, BloomCommon.BLOOM_TEST1_TABLE, 58 | new NewTableConfiguration().setProperties(table1props)); 59 | Common.createTableWithNamespace(client, BloomCommon.BLOOM_TEST2_TABLE, 60 | new NewTableConfiguration().setProperties(table2props)); 61 | 62 | writeAndFlushData(BloomCommon.BLOOM_TEST1_TABLE, client); 63 | writeAndFlushData(BloomCommon.BLOOM_TEST2_TABLE, client); 64 | } 65 | } 66 | 67 | // Write a million rows 3 times flushing files to disk separately 68 | private static void writeAndFlushData(final String tableName, final AccumuloClient client) 69 | throws TableNotFoundException, AccumuloSecurityException, AccumuloException { 70 | log.info("Writing data to {}", tableName); 71 | writeData(client, tableName, 7); 72 | client.tableOperations().flush(tableName, null, null, true); 73 | writeData(client, tableName, 8); 74 | client.tableOperations().flush(tableName, null, null, true); 75 | writeData(client, tableName, 9); 76 | client.tableOperations().flush(tableName, null, null, true); 77 | } 78 | 79 | // write a million random rows 80 | static void writeData(AccumuloClient client, String tableName, int seed) 81 | throws TableNotFoundException, MutationsRejectedException { 82 | Random r = new Random(seed); 83 | try (BatchWriter bw = client.createBatchWriter(tableName)) { 84 | for (int x = 0; x < 1_000_000; x++) { 85 | long rowId = RandomBatchWriter.abs(r.nextLong()) % 1_000_000_000; 86 | Mutation m = RandomBatchWriter.createMutation(rowId, 50, new ColumnVisibility()); 87 | bw.addMutation(m); 88 | } 89 | } 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/shard/Index.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.shard; 18 | 19 | import java.io.File; 20 | import java.io.FileReader; 21 | import java.util.ArrayList; 22 | import java.util.HashSet; 23 | import java.util.List; 24 | 25 | import org.apache.accumulo.core.client.Accumulo; 26 | import org.apache.accumulo.core.client.AccumuloClient; 27 | import org.apache.accumulo.core.client.BatchWriter; 28 | import org.apache.accumulo.core.data.Mutation; 29 | import org.apache.accumulo.core.data.Value; 30 | import org.apache.accumulo.examples.cli.ClientOpts; 31 | 32 | import com.beust.jcommander.Parameter; 33 | 34 | /** 35 | * This program indexes a set of documents given on the command line into a shard table. 36 | * 37 | * What it writes to the table is row = partition id, column family = term, column qualifier = 38 | * document id. 39 | */ 40 | public class Index { 41 | 42 | static String genPartition(int partition) { 43 | return String.format("%08x", Math.abs(partition)); 44 | } 45 | 46 | public static void index(int numPartitions, String docId, String doc, String splitRegex, 47 | BatchWriter bw) throws Exception { 48 | 49 | String[] tokens = doc.split(splitRegex); 50 | 51 | String partition = genPartition(doc.hashCode() % numPartitions); 52 | 53 | Mutation m = new Mutation(partition); 54 | 55 | HashSet tokensSeen = new HashSet<>(); 56 | 57 | for (String token : tokens) { 58 | token = token.toLowerCase(); 59 | 60 | if (!tokensSeen.contains(token)) { 61 | tokensSeen.add(token); 62 | m.put(token, docId, new Value(new byte[0])); 63 | } 64 | } 65 | 66 | if (m.size() > 0) 67 | bw.addMutation(m); 68 | } 69 | 70 | public static void index(int numPartitions, File src, String splitRegex, BatchWriter bw) 71 | throws Exception { 72 | if (src.isDirectory()) { 73 | File[] files = src.listFiles(); 74 | if (files != null) { 75 | for (File child : files) { 76 | index(numPartitions, child, splitRegex, bw); 77 | } 78 | } 79 | } else { 80 | 81 | StringBuilder sb = new StringBuilder(); 82 | 83 | try (FileReader fr = new FileReader(src)) { 84 | 85 | char[] data = new char[4096]; 86 | int len; 87 | while ((len = fr.read(data)) != -1) { 88 | sb.append(data, 0, len); 89 | } 90 | 91 | } 92 | 93 | index(numPartitions, src.getAbsolutePath(), sb.toString(), splitRegex, bw); 94 | } 95 | 96 | } 97 | 98 | static class IndexOpts extends ClientOpts { 99 | 100 | @Parameter(names = {"-t", "--table"}, required = true, description = "table to use") 101 | private String tableName; 102 | 103 | @Parameter(names = "--partitions", required = true, 104 | description = "the number of shards to create") 105 | int partitions; 106 | 107 | @Parameter(required = true, description = " { ... }") 108 | List files = new ArrayList<>(); 109 | } 110 | 111 | public static void main(String[] args) throws Exception { 112 | IndexOpts opts = new IndexOpts(); 113 | opts.parseArgs(Index.class.getName(), args); 114 | 115 | String splitRegex = "\\W+"; 116 | 117 | try (AccumuloClient client = Accumulo.newClient().from(opts.getClientPropsPath()).build(); 118 | BatchWriter bw = client.createBatchWriter(opts.tableName)) { 119 | for (String filename : opts.files) { 120 | index(opts.partitions, new File(filename), splitRegex, bw); 121 | } 122 | } 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /docs/export.md: -------------------------------------------------------------------------------- 1 | 17 | # Apache Accumulo Export/Import Example 18 | 19 | Accumulo provides a mechanism to export and import tables. This example shows 20 | how to use this feature. 21 | 22 | The shell session below shows creating a table, inserting data, and exporting 23 | the table. A table must be offline to export it, and it should remain offline 24 | for the duration of the distcp. An easy way to take a table offline without 25 | interrupting access to it is to clone it and take the clone offline. 26 | 27 | root@test15> createnamespace examples 28 | root@test15> createtable examples.table1 29 | root@test15 examples.table1> insert a cf1 cq1 v1 30 | root@test15 examples.table1> insert h cf1 cq1 v2 31 | root@test15 examples.table1> insert z cf1 cq1 v3 32 | root@test15 examples.table1> insert z cf1 cq2 v4 33 | root@test15 examples.table1> addsplits -t examples.table1 b r 34 | root@test15 examples.table1> scan 35 | a cf1:cq1 [] v1 36 | h cf1:cq1 [] v2 37 | z cf1:cq1 [] v3 38 | z cf1:cq2 [] v4 39 | root@test15 examples.table1> config -t examples.table1 -s table.split.threshold=100M 40 | root@test15 examples.table1> clonetable examples.table1 examples.table1_exp 41 | root@test15 examples.table1table1> offline examples.table1_exp 42 | root@test15 examples.table1> exporttable -t examples.table1_exp /tmp/table1_export 43 | root@test15 examples.table1> quit 44 | 45 | After executing the export command, a few files are created in the hdfs dir. 46 | One of the files is a list of files to distcp as shown below. 47 | 48 | $ hadoop fs -ls /tmp/table1_export 49 | Found 2 items 50 | -rw-r--r-- 3 user supergroup 162 2012-07-25 09:56 /tmp/table1_export/distcp.txt 51 | -rw-r--r-- 3 user supergroup 821 2012-07-25 09:56 /tmp/table1_export/exportMetadata.zip 52 | $ hadoop fs -cat /tmp/table1_export/distcp.txt 53 | hdfs://n1.example.com:6093/accumulo/tables/3/default_tablet/F0000000.rf 54 | hdfs://n1.example.com:6093/tmp/table1_export/exportMetadata.zip 55 | 56 | Before the table can be imported, it must be copied using distcp. After the 57 | discp completed, the cloned table may be deleted. 58 | 59 | $ hadoop distcp -f /tmp/table1_export/distcp.txt /tmp/table1_export_dest 60 | 61 | The Accumulo shell session below shows importing the table and inspecting it. 62 | The data, splits, config, and logical time information for the table were 63 | preserved. 64 | 65 | root@test15> importtable examples.table1_copy /tmp/table1_export_dest 66 | root@test15> table examples.table1_copy 67 | root@test15 examples.table1_copy> scan 68 | a cf1:cq1 [] v1 69 | h cf1:cq1 [] v2 70 | z cf1:cq1 [] v3 71 | z cf1:cq2 [] v4 72 | root@test15 examples.table1_copy> getsplits -t examples.table1_copy 73 | b 74 | r 75 | root@test15> config -t examples.table1_copy -f split 76 | ---------+--------------------------+------------------------------------------- 77 | SCOPE | NAME | VALUE 78 | ---------+--------------------------+------------------------------------------- 79 | default | table.split.threshold .. | 1G 80 | table | @override ........... | 100M 81 | ---------+--------------------------+------------------------------------------- 82 | root@test15> tables -l 83 | accumulo.metadata => !0 84 | accumulo.root => +r 85 | table1_copy => 5 86 | trace => 1 87 | root@test15> scan -t accumulo.metadata -b 5 -c srv:time 88 | 5;b srv:time [] M1343224500467 89 | 5;r srv:time [] M1343224500467 90 | 5< srv:time [] M1343224500467 91 | 92 | 93 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/mapreduce/TableToFile.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.mapreduce; 18 | 19 | import java.io.IOException; 20 | import java.util.AbstractMap.SimpleImmutableEntry; 21 | import java.util.ArrayList; 22 | import java.util.List; 23 | import java.util.Map; 24 | 25 | import org.apache.accumulo.core.client.IteratorSetting; 26 | import org.apache.accumulo.core.data.Key; 27 | import org.apache.accumulo.core.data.Value; 28 | import org.apache.accumulo.examples.cli.ClientOpts; 29 | import org.apache.accumulo.examples.util.FormatUtil; 30 | import org.apache.accumulo.hadoop.mapreduce.AccumuloInputFormat; 31 | import org.apache.accumulo.hadoop.mapreduce.InputFormatBuilder; 32 | import org.apache.hadoop.fs.Path; 33 | import org.apache.hadoop.io.NullWritable; 34 | import org.apache.hadoop.io.Text; 35 | import org.apache.hadoop.mapreduce.Job; 36 | import org.apache.hadoop.mapreduce.Mapper; 37 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; 38 | 39 | import com.beust.jcommander.Parameter; 40 | 41 | /** 42 | * Takes a table and outputs the specified column to a set of part files on hdfs 43 | */ 44 | public class TableToFile { 45 | 46 | static class Opts extends ClientOpts { 47 | @Parameter(names = {"-t", "--table"}, required = true, description = "table to use") 48 | String tableName; 49 | @Parameter(names = "--output", required = true, description = "output directory") 50 | String output; 51 | @Parameter(names = "--columns", description = "columns to extract, in cf:cq{,cf:cq,...} form") 52 | String columns = ""; 53 | } 54 | 55 | /** 56 | * The Mapper class that given a row number, will generate the appropriate output line. 57 | */ 58 | public static class TTFMapper extends Mapper { 59 | @Override 60 | public void map(Key row, Value data, Context context) throws IOException, InterruptedException { 61 | Map.Entry entry = new SimpleImmutableEntry<>(row, data); 62 | context.write(NullWritable.get(), new Text(FormatUtil.formatTableEntry(entry, false))); 63 | context.setStatus("Outputed Value"); 64 | } 65 | } 66 | 67 | public static void main(String[] args) throws Exception { 68 | Opts opts = new Opts(); 69 | opts.parseArgs(TableToFile.class.getName(), args); 70 | 71 | List columnsToFetch = new ArrayList<>(); 72 | for (String col : opts.columns.split(",")) { 73 | int idx = col.indexOf(":"); 74 | String cf = idx < 0 ? col : col.substring(0, idx); 75 | String cq = idx < 0 ? null : col.substring(idx + 1); 76 | if (!cf.isEmpty()) 77 | columnsToFetch.add(new IteratorSetting.Column(cf, cq)); 78 | } 79 | 80 | Job job = Job.getInstance(opts.getHadoopConfig()); 81 | job.setJobName(TableToFile.class.getSimpleName() + "_" + System.currentTimeMillis()); 82 | job.setJarByClass(TableToFile.class); 83 | job.setInputFormatClass(AccumuloInputFormat.class); 84 | InputFormatBuilder.InputFormatOptions inputOpts = AccumuloInputFormat.configure() 85 | .clientProperties(opts.getClientProperties()).table(opts.tableName); 86 | if (!columnsToFetch.isEmpty()) { 87 | inputOpts.fetchColumns(columnsToFetch); 88 | } 89 | inputOpts.store(job); 90 | job.setMapperClass(TTFMapper.class); 91 | job.setMapOutputKeyClass(NullWritable.class); 92 | job.setMapOutputValueClass(Text.class); 93 | job.setNumReduceTasks(0); 94 | job.setOutputFormatClass(TextOutputFormat.class); 95 | TextOutputFormat.setOutputPath(job, new Path(opts.output)); 96 | 97 | System.exit(job.waitForCompletion(true) ? 0 : 1); 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/shard/Query.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.shard; 18 | 19 | import java.util.ArrayList; 20 | import java.util.Collections; 21 | import java.util.List; 22 | import java.util.Map.Entry; 23 | 24 | import org.apache.accumulo.core.client.Accumulo; 25 | import org.apache.accumulo.core.client.AccumuloClient; 26 | import org.apache.accumulo.core.client.BatchScanner; 27 | import org.apache.accumulo.core.client.IteratorSetting; 28 | import org.apache.accumulo.core.client.sample.SamplerConfiguration; 29 | import org.apache.accumulo.core.data.Key; 30 | import org.apache.accumulo.core.data.Range; 31 | import org.apache.accumulo.core.data.Value; 32 | import org.apache.accumulo.core.iterators.user.IntersectingIterator; 33 | import org.apache.accumulo.core.security.Authorizations; 34 | import org.apache.accumulo.examples.cli.ClientOpts; 35 | import org.apache.hadoop.io.Text; 36 | 37 | import com.beust.jcommander.Parameter; 38 | 39 | /** 40 | * This program queries a set of terms in the shard table (populated by {@link Index}) using the 41 | * {@link IntersectingIterator}. 42 | */ 43 | public class Query { 44 | 45 | static class QueryOpts extends ClientOpts { 46 | 47 | @Parameter(description = " term { ... }") 48 | List terms = new ArrayList<>(); 49 | 50 | @Parameter(names = {"-t", "--table"}, required = true, description = "table to use") 51 | String tableName; 52 | 53 | @Parameter(names = {"--sample"}, 54 | description = "Do queries against sample, useful when sample is built using column qualifier") 55 | boolean useSample = false; 56 | 57 | @Parameter(names = {"--sampleCutoff"}, 58 | description = "Use sample data to determine if a query might return a number of documents over the cutoff. This check is per tablet.") 59 | Integer sampleCutoff = null; 60 | } 61 | 62 | public static List query(BatchScanner bs, List terms, Integer cutoff) { 63 | 64 | Text[] columns = new Text[terms.size()]; 65 | int i = 0; 66 | for (String term : terms) { 67 | columns[i++] = new Text(term); 68 | } 69 | 70 | IteratorSetting ii; 71 | 72 | if (cutoff != null) { 73 | ii = new IteratorSetting(20, "ii", CutoffIntersectingIterator.class); 74 | CutoffIntersectingIterator.setCutoff(ii, cutoff); 75 | } else { 76 | ii = new IteratorSetting(20, "ii", IntersectingIterator.class); 77 | } 78 | 79 | IntersectingIterator.setColumnFamilies(ii, columns); 80 | bs.addScanIterator(ii); 81 | bs.setRanges(Collections.singleton(new Range())); 82 | List result = new ArrayList<>(); 83 | for (Entry entry : bs) { 84 | result.add(entry.getKey().getColumnQualifier().toString()); 85 | } 86 | return result; 87 | } 88 | 89 | public static void main(String[] args) throws Exception { 90 | QueryOpts opts = new QueryOpts(); 91 | opts.parseArgs(Query.class.getName(), args); 92 | 93 | try (AccumuloClient client = Accumulo.newClient().from(opts.getClientPropsPath()).build(); 94 | BatchScanner bs = client.createBatchScanner(opts.tableName, Authorizations.EMPTY, 10)) { 95 | if (opts.useSample) { 96 | SamplerConfiguration samplerConfig = client.tableOperations() 97 | .getSamplerConfiguration(opts.tableName); 98 | CutoffIntersectingIterator.validateSamplerConfig( 99 | client.tableOperations().getSamplerConfiguration(opts.tableName)); 100 | bs.setSamplerConfiguration(samplerConfig); 101 | } 102 | for (String entry : query(bs, opts.terms, opts.sampleCutoff)) { 103 | System.out.println(" " + entry); 104 | } 105 | } 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /docs/combiner.md: -------------------------------------------------------------------------------- 1 | 17 | # Apache Accumulo Combiner Example 18 | 19 | This tutorial uses the following Java class, which can be found in org.apache.accumulo.examples.combiner: 20 | 21 | * [StatsCombiner.java] - a combiner that calculates max, min, sum, and count 22 | 23 | This is a simple combiner example. To build this example run maven and then 24 | copy the produced jar into the accumulo lib dir. This is already done in the 25 | tar distribution. 26 | 27 | $ bin/accumulo shell -u username 28 | Enter current password for 'username'@'instance': *** 29 | 30 | Shell - Apache Accumulo Interactive Shell 31 | - 32 | - version: 2.1.0-SNAPSHOT 33 | - instance name: instance 34 | - instance id: 00000000-0000-0000-0000-000000000000 35 | - 36 | - type 'help' for a list of available commands 37 | - 38 | username@instance> createnamespace examples 39 | username@instance> createtable examples.runners 40 | username@instance examples.runners> setiter -t examples.runners -p 10 -scan -minc -majc -n decStats -class org.apache.accumulo.examples.combiner.StatsCombiner 41 | Combiner that keeps track of min, max, sum, and count 42 | ----------> set StatsCombiner parameter all, set to true to apply Combiner to every column, otherwise leave blank. if true, columns option will be ignored.: 43 | ----------> set StatsCombiner parameter columns, [:]{,[:]} escape non aplhanum chars using %.: stat 44 | ----------> set StatsCombiner parameter reduceOnFullCompactionOnly, If true, only reduce on full major compactions. Defaults to false. : 45 | ----------> set StatsCombiner parameter radix, radix/base of the numbers: 10 46 | username@instance examples.runners> setiter -t examples.runners -p 11 -scan -minc -majc -n hexStats -class org.apache.accumulo.examples.combiner.StatsCombiner 47 | Combiner that keeps track of min, max, sum, and count 48 | ----------> set StatsCombiner parameter all, set to true to apply Combiner to every column, otherwise leave blank. if true, columns option will be ignored.: 49 | ----------> set StatsCombiner parameter columns, [:]{,[:]} escape non-alphanum chars using %.: hstat 50 | ----------> set StatsCombiner parameter reduceOnFullCompactionOnly, If true, only reduce on full major compactions. Defaults to false. : 51 | ----------> set StatsCombiner parameter radix, radix/base of the numbers: 16 52 | username@instance examples.runners> insert 123456 name first Joe 53 | username@instance examples.runners> insert 123456 stat marathon 240 54 | username@instance examples.runners> scan 55 | 123456 name:first [] Joe 56 | 123456 stat:marathon [] 240,240,240,1 57 | username@instance examples.runners> insert 123456 stat marathon 230 58 | username@instance examples.runners> insert 123456 stat marathon 220 59 | username@instance examples.runners> scan 60 | 123456 name:first [] Joe 61 | 123456 stat:marathon [] 220,240,690,3 62 | username@instance examples.runners> insert 123456 hstat virtualMarathon 6a 63 | username@instance examples.runners> insert 123456 hstat virtualMarathon 6b 64 | username@instance examples.runners> scan 65 | 123456 hstat:virtualMarathon [] 6a,6b,d5,2 66 | 123456 name:first [] Joe 67 | 123456 stat:marathon [] 220,240,690,3 68 | 69 | In this example a table is created, and the example stats combiner is applied to 70 | the column family stat and hstat. The stats combiner computes min,max,sum, and 71 | count. It can be configured to use a different base or radix. In the example 72 | above the column family stat is configured for base 10 and the column family 73 | hstat is configured for base 16. 74 | 75 | [StatsCombiner.java]: ../src/main/java/org/apache/accumulo/examples/combiner/StatsCombiner.java 76 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/constraints/NumericValueConstraint.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.constraints; 18 | 19 | import java.util.Collection; 20 | import java.util.List; 21 | 22 | import org.apache.accumulo.core.client.Accumulo; 23 | import org.apache.accumulo.core.client.AccumuloClient; 24 | import org.apache.accumulo.core.client.AccumuloException; 25 | import org.apache.accumulo.core.client.AccumuloSecurityException; 26 | import org.apache.accumulo.core.client.BatchWriter; 27 | import org.apache.accumulo.core.client.MutationsRejectedException; 28 | import org.apache.accumulo.core.client.TableNotFoundException; 29 | import org.apache.accumulo.core.data.ColumnUpdate; 30 | import org.apache.accumulo.core.data.Mutation; 31 | import org.apache.accumulo.core.data.Value; 32 | import org.apache.accumulo.core.data.constraints.Constraint; 33 | import org.apache.accumulo.examples.Common; 34 | import org.apache.accumulo.examples.cli.ClientOpts; 35 | import org.slf4j.Logger; 36 | import org.slf4j.LoggerFactory; 37 | 38 | /** 39 | * This class is an accumulo constraint that ensures values are numeric strings. 40 | */ 41 | public class NumericValueConstraint implements Constraint { 42 | 43 | private static final Logger log = LoggerFactory.getLogger(NumericValueConstraint.class); 44 | 45 | static final short NON_NUMERIC_VALUE = 1; 46 | static final String VIOLATION_MESSAGE = "Value is not numeric"; 47 | 48 | private static final List VIOLATION_LIST = List.of(NON_NUMERIC_VALUE); 49 | 50 | private boolean isNumeric(byte[] bytes) { 51 | for (byte b : bytes) { 52 | boolean ok = (b >= '0' && b <= '9'); 53 | if (!ok) 54 | return false; 55 | } 56 | return true; 57 | } 58 | 59 | @Override 60 | public List check(Environment env, Mutation mutation) { 61 | Collection updates = mutation.getUpdates(); 62 | 63 | for (ColumnUpdate columnUpdate : updates) { 64 | if (!isNumeric(columnUpdate.getValue())) 65 | return VIOLATION_LIST; 66 | } 67 | return null; 68 | } 69 | 70 | @Override 71 | public String getViolationDescription(short violationCode) { 72 | if (violationCode == NON_NUMERIC_VALUE) { 73 | return VIOLATION_MESSAGE; 74 | } 75 | return null; 76 | } 77 | 78 | public static void main(String[] args) 79 | throws AccumuloException, AccumuloSecurityException, TableNotFoundException { 80 | ClientOpts opts = new ClientOpts(); 81 | opts.parseArgs(NumericValueConstraint.class.getName(), args); 82 | 83 | try (AccumuloClient client = Accumulo.newClient().from(opts.getClientPropsPath()).build()) { 84 | Common.createTableWithNamespace(client, ConstraintsCommon.CONSTRAINTS_TABLE); 85 | 86 | /* 87 | * Add the {@link NumericValueConstraint} constraint to the table. Be sure to use the fully 88 | * qualified class name 89 | */ 90 | int num = client.tableOperations().addConstraint(ConstraintsCommon.CONSTRAINTS_TABLE, 91 | "org.apache.accumulo.examples.constraints.NumericValueConstraint"); 92 | 93 | log.info("Attempting to write non-numeric data to testConstraints"); 94 | try (BatchWriter bw = client.createBatchWriter(ConstraintsCommon.CONSTRAINTS_TABLE)) { 95 | Mutation m = new Mutation("r1"); 96 | m.put("cf1", "cq1", new Value(("value1--$$@@%%").getBytes())); 97 | bw.addMutation(m); 98 | } catch (MutationsRejectedException e) { 99 | e.getConstraintViolationSummaries() 100 | .forEach(m -> log.error(ConstraintsCommon.CONSTRAINT_VIOLATED_MSG, m.constrainClass)); 101 | } 102 | client.tableOperations().removeConstraint(ConstraintsCommon.CONSTRAINTS_TABLE, num); 103 | } 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/combiner/StatsCombiner.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.combiner; 18 | 19 | import java.io.IOException; 20 | import java.util.Iterator; 21 | import java.util.Map; 22 | 23 | import org.apache.accumulo.core.client.IteratorSetting; 24 | import org.apache.accumulo.core.data.Key; 25 | import org.apache.accumulo.core.data.Value; 26 | import org.apache.accumulo.core.iterators.Combiner; 27 | import org.apache.accumulo.core.iterators.IteratorEnvironment; 28 | import org.apache.accumulo.core.iterators.SortedKeyValueIterator; 29 | 30 | /** 31 | * This combiner calculates the max, min, sum, and count of long integers represented as strings in 32 | * values. It stores the result in a comma-separated value of the form min,max,sum,count. If such a 33 | * value is encountered while combining, its information is incorporated into the running 34 | * calculations of min, max, sum, and count. See {@link Combiner} for more information on which 35 | * values are combined together. 36 | */ 37 | public class StatsCombiner extends Combiner { 38 | 39 | public static final String RADIX_OPTION = "radix"; 40 | 41 | private int radix = 10; 42 | 43 | @Override 44 | public Value reduce(Key key, Iterator iter) { 45 | 46 | long min = Long.MAX_VALUE; 47 | long max = Long.MIN_VALUE; 48 | long sum = 0; 49 | long count = 0; 50 | 51 | while (iter.hasNext()) { 52 | String[] stats = iter.next().toString().split(","); 53 | 54 | if (stats.length == 1) { 55 | long val = Long.parseLong(stats[0], radix); 56 | min = Math.min(val, min); 57 | max = Math.max(val, max); 58 | sum += val; 59 | count += 1; 60 | } else { 61 | min = Math.min(Long.parseLong(stats[0], radix), min); 62 | max = Math.max(Long.parseLong(stats[1], radix), max); 63 | sum += Long.parseLong(stats[2], radix); 64 | count += Long.parseLong(stats[3], radix); 65 | } 66 | } 67 | 68 | String ret = Long.toString(min, radix) + "," + Long.toString(max, radix) + "," 69 | + Long.toString(sum, radix) + "," + Long.toString(count, radix); 70 | return new Value(ret.getBytes()); 71 | } 72 | 73 | @Override 74 | public void init(SortedKeyValueIterator source, Map options, 75 | IteratorEnvironment env) throws IOException { 76 | super.init(source, options, env); 77 | 78 | if (options.containsKey(RADIX_OPTION)) 79 | radix = Integer.parseInt(options.get(RADIX_OPTION)); 80 | else 81 | radix = 10; 82 | } 83 | 84 | @Override 85 | public IteratorOptions describeOptions() { 86 | IteratorOptions io = super.describeOptions(); 87 | io.setName("statsCombiner"); 88 | io.setDescription("Combiner that keeps track of min, max, sum, and count"); 89 | io.addNamedOption(RADIX_OPTION, "radix/base of the numbers"); 90 | return io; 91 | } 92 | 93 | @Override 94 | public boolean validateOptions(Map options) { 95 | if (!super.validateOptions(options)) 96 | return false; 97 | 98 | if (options.containsKey(RADIX_OPTION) && !options.get(RADIX_OPTION).matches("\\d+")) 99 | throw new IllegalArgumentException( 100 | "invalid option " + RADIX_OPTION + ":" + options.get(RADIX_OPTION)); 101 | 102 | return true; 103 | } 104 | 105 | /** 106 | * A convenience method for setting the expected base/radix of the numbers 107 | * 108 | * @param iterConfig 109 | * Iterator settings to configure 110 | * @param base 111 | * The expected base/radix of the numbers. 112 | */ 113 | public static void setRadix(IteratorSetting iterConfig, int base) { 114 | iterConfig.addOption(RADIX_OPTION, base + ""); 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /src/test/java/org/apache/accumulo/examples/mapreduce/MapReduceIT.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.mapreduce; 18 | 19 | import static org.junit.jupiter.api.Assertions.assertEquals; 20 | 21 | import java.security.MessageDigest; 22 | import java.time.Duration; 23 | import java.util.Base64; 24 | import java.util.Collections; 25 | import java.util.Map.Entry; 26 | import java.util.Properties; 27 | 28 | import org.apache.accumulo.core.client.Accumulo; 29 | import org.apache.accumulo.core.client.AccumuloClient; 30 | import org.apache.accumulo.core.client.BatchWriter; 31 | import org.apache.accumulo.core.client.Scanner; 32 | import org.apache.accumulo.core.conf.ClientProperty; 33 | import org.apache.accumulo.core.conf.Property; 34 | import org.apache.accumulo.core.data.Key; 35 | import org.apache.accumulo.core.data.Mutation; 36 | import org.apache.accumulo.core.data.Value; 37 | import org.apache.accumulo.core.security.Authorizations; 38 | import org.apache.accumulo.examples.ExamplesIT; 39 | import org.apache.accumulo.miniclusterImpl.MiniAccumuloClusterImpl; 40 | import org.apache.accumulo.miniclusterImpl.MiniAccumuloConfigImpl; 41 | import org.apache.accumulo.test.functional.ConfigurableMacBase; 42 | import org.apache.hadoop.conf.Configuration; 43 | import org.junit.jupiter.api.Test; 44 | 45 | public class MapReduceIT extends ConfigurableMacBase { 46 | 47 | @Override 48 | protected Duration defaultTimeout() { 49 | return Duration.ofMinutes(1); 50 | } 51 | 52 | @Override 53 | protected void configure(MiniAccumuloConfigImpl cfg, Configuration hadoopCoreSite) { 54 | cfg.setProperty(Property.TSERV_NATIVEMAP_ENABLED, "false"); 55 | } 56 | 57 | public static final String hadoopTmpDirArg = "-Dhadoop.tmp.dir=" + System.getProperty("user.dir") 58 | + "/target/hadoop-tmp"; 59 | 60 | static final String tablename = "mapredf"; 61 | static final String input_cf = "cf-HASHTYPE"; 62 | static final String input_cq = "cq-NOTHASHED"; 63 | static final String input_cfcq = input_cf + ":" + input_cq; 64 | static final String output_cq = "cq-MD4BASE64"; 65 | static final String output_cfcq = input_cf + ":" + output_cq; 66 | 67 | @Test 68 | public void test() throws Exception { 69 | String confFile = System.getProperty("user.dir") + "/target/accumulo-client.properties"; 70 | Properties props = getClientProperties(); 71 | String instance = ClientProperty.INSTANCE_NAME.getValue(props); 72 | String keepers = ClientProperty.INSTANCE_ZOOKEEPERS.getValue(props); 73 | ExamplesIT.writeClientPropsFile(confFile, instance, keepers, "root", ROOT_PASSWORD); 74 | try (AccumuloClient client = Accumulo.newClient().from(props).build()) { 75 | client.tableOperations().create(tablename); 76 | try (BatchWriter bw = client.createBatchWriter(tablename)) { 77 | for (int i = 0; i < 10; i++) { 78 | Mutation m = new Mutation("" + i); 79 | m.put(input_cf, input_cq, "row" + i); 80 | bw.addMutation(m); 81 | } 82 | } 83 | MiniAccumuloClusterImpl.ProcessInfo hash = getCluster().exec(RowHash.class, 84 | Collections.singletonList(hadoopTmpDirArg), "-c", confFile, "-t", tablename, "--column", 85 | input_cfcq); 86 | assertEquals(0, hash.getProcess().waitFor()); 87 | 88 | try (Scanner s = client.createScanner(tablename, Authorizations.EMPTY)) { 89 | s.fetchColumn(input_cf, output_cq); 90 | int i = 0; 91 | MessageDigest md = MessageDigest.getInstance("MD5"); 92 | for (Entry entry : s) { 93 | byte[] check = Base64.getEncoder().encode(md.digest(("row" + i).getBytes())); 94 | assertEquals(entry.getValue().toString(), new String(check)); 95 | i++; 96 | } 97 | } 98 | } 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /docs/client.md: -------------------------------------------------------------------------------- 1 | 17 | # Apache Accumulo Client Examples 18 | 19 | The following Java classes are examples of the Accumulo client API: 20 | 21 | * [RowOperations.java] - reads and writes rows 22 | * [ReadWriteExample.java] - creates a table, writes to it, and reads from it 23 | 24 | [RowOperations.java] demonstrates how to read, write and delete rows using the BatchWriter and Scanner: 25 | 26 | $ ./bin/runex client.RowOperations 27 | [examples.client.RowOperations] INFO : This is only row2 28 | [examples.client.RowOperations] INFO : Key: row2 col:1 [] 1523301597006 false Value: v1 29 | [examples.client.RowOperations] INFO : Key: row2 col:2 [] 1523301597006 false Value: v2 30 | [examples.client.RowOperations] INFO : Key: row2 col:3 [] 1523301597006 false Value: v3 31 | [examples.client.RowOperations] INFO : This is everything 32 | [examples.client.RowOperations] INFO : Key: row1 col:1 [] 1523301597006 false Value: v1 33 | [examples.client.RowOperations] INFO : Key: row1 col:2 [] 1523301597006 false Value: v2 34 | [examples.client.RowOperations] INFO : Key: row1 col:3 [] 1523301597006 false Value: v3 35 | [examples.client.RowOperations] INFO : Key: row2 col:1 [] 1523301597006 false Value: v1 36 | [examples.client.RowOperations] INFO : Key: row2 col:2 [] 1523301597006 false Value: v2 37 | [examples.client.RowOperations] INFO : Key: row2 col:3 [] 1523301597006 false Value: v3 38 | [examples.client.RowOperations] INFO : Key: row3 col:1 [] 1523301597006 false Value: v1 39 | [examples.client.RowOperations] INFO : Key: row3 col:2 [] 1523301597006 false Value: v2 40 | [examples.client.RowOperations] INFO : Key: row3 col:3 [] 1523301597006 false Value: v3 41 | [examples.client.RowOperations] INFO : This is row1 and row3 42 | [examples.client.RowOperations] INFO : Key: row1 col:1 [] 1523301597006 false Value: v1 43 | [examples.client.RowOperations] INFO : Key: row1 col:2 [] 1523301597006 false Value: v2 44 | [examples.client.RowOperations] INFO : Key: row1 col:3 [] 1523301597006 false Value: v3 45 | [examples.client.RowOperations] INFO : Key: row3 col:1 [] 1523301597006 false Value: v1 46 | [examples.client.RowOperations] INFO : Key: row3 col:2 [] 1523301597006 false Value: v2 47 | [examples.client.RowOperations] INFO : Key: row3 col:3 [] 1523301597006 false Value: v3 48 | [examples.client.RowOperations] INFO : This is just row3 49 | [examples.client.RowOperations] INFO : Key: row3 col:1 [] 1523301597006 false Value: v1 50 | [examples.client.RowOperations] INFO : Key: row3 col:2 [] 1523301597006 false Value: v2 51 | [examples.client.RowOperations] INFO : Key: row3 col:3 [] 1523301597006 false Value: v3 52 | 53 | To create a table, write to it and read from it: 54 | 55 | $ ./bin/runex client.ReadWriteExample 56 | [examples.client.ReadWriteExample] INFO : hello0 cf:cq [] 1523306675130 false -> world0 57 | [examples.client.ReadWriteExample] INFO : hello1 cf:cq [] 1523306675130 false -> world1 58 | [examples.client.ReadWriteExample] INFO : hello2 cf:cq [] 1523306675130 false -> world2 59 | [examples.client.ReadWriteExample] INFO : hello3 cf:cq [] 1523306675130 false -> world3 60 | [examples.client.ReadWriteExample] INFO : hello4 cf:cq [] 1523306675130 false -> world4 61 | [examples.client.ReadWriteExample] INFO : hello5 cf:cq [] 1523306675130 false -> world5 62 | [examples.client.ReadWriteExample] INFO : hello6 cf:cq [] 1523306675130 false -> world6 63 | [examples.client.ReadWriteExample] INFO : hello7 cf:cq [] 1523306675130 false -> world7 64 | [examples.client.ReadWriteExample] INFO : hello8 cf:cq [] 1523306675130 false -> world8 65 | [examples.client.ReadWriteExample] INFO : hello9 cf:cq [] 1523306675130 false -> world9 66 | 67 | [Flush.java]: ../src/main/java/org/apache/accumulo/examples/client/Flush.java 68 | [RowOperations.java]: ../src/main/java/org/apache/accumulo/examples/client/RowOperations.java 69 | [ReadWriteExample.java]: ../src/main/java/org/apache/accumulo/examples/client/ReadWriteExample.java 70 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/filedata/CharacterHistogram.java: -------------------------------------------------------------------------------- 1 | /// * 2 | // * Licensed to the Apache Software Foundation (ASF) under one or more 3 | // * contributor license agreements. See the NOTICE file distributed with 4 | // * this work for additional information regarding copyright ownership. 5 | // * The ASF licenses this file to You under the Apache License, Version 2.0 6 | // * (the "License"); you may not use this file except in compliance with 7 | // * the License. You may obtain a copy of the License at 8 | // * 9 | // * http://www.apache.org/licenses/LICENSE-2.0 10 | // * 11 | // * Unless required by applicable law or agreed to in writing, software 12 | // * distributed under the License is distributed on an "AS IS" BASIS, 13 | // * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // * See the License for the specific language governing permissions and 15 | // * limitations under the License. 16 | // */ 17 | // package org.apache.accumulo.examples.filedata; 18 | // 19 | // import java.io.IOException; 20 | // import java.io.InputStream; 21 | // import java.util.Arrays; 22 | // import java.util.List; 23 | // import java.util.Map.Entry; 24 | // import java.util.Properties; 25 | // 26 | // import org.apache.accumulo.core.client.security.tokens.PasswordToken; 27 | // import org.apache.accumulo.core.data.Key; 28 | // import org.apache.accumulo.core.data.Mutation; 29 | // import org.apache.accumulo.core.data.Value; 30 | // import org.apache.accumulo.core.iterators.user.SummingArrayCombiner; 31 | // import org.apache.accumulo.core.security.ColumnVisibility; 32 | // import org.apache.accumulo.examples.cli.ClientOpts; 33 | // import org.apache.accumulo.hadoop.mapreduce.AccumuloOutputFormat; 34 | // import org.apache.hadoop.io.Text; 35 | // import org.apache.hadoop.mapreduce.Job; 36 | // import org.apache.hadoop.mapreduce.Mapper; 37 | // 38 | // import com.beust.jcommander.Parameter; 39 | // 40 | /// ** 41 | // * A MapReduce that computes a histogram of byte frequency for each file and stores the histogram 42 | // * alongside the file data. The {@link ChunkInputFormat} is used to read the file data from 43 | // * Accumulo. 44 | // */ 45 | // public class CharacterHistogram { 46 | // 47 | // private static final String VIS = "vis"; 48 | // 49 | // public static class HistMapper extends Mapper>,InputStream,Text,Mutation> { 50 | // private ColumnVisibility cv; 51 | // 52 | // @Override 53 | // public void map(List> k, InputStream v, Context context) 54 | // throws IOException, InterruptedException { 55 | // Long[] hist = new Long[256]; 56 | // Arrays.fill(hist, 0L); 57 | // int b = v.read(); 58 | // while (b >= 0) { 59 | // hist[b] += 1L; 60 | // b = v.read(); 61 | // } 62 | // v.close(); 63 | // Mutation m = new Mutation(k.get(0).getKey().getRow()); 64 | // m.put("info", "hist", cv, 65 | // new Value(SummingArrayCombiner.STRING_ARRAY_ENCODER.encode(Arrays.asList(hist)))); 66 | // context.write(new Text(), m); 67 | // } 68 | // 69 | // @Override 70 | // protected void setup(Context context) { 71 | // cv = new ColumnVisibility(context.getConfiguration().get(VIS, "")); 72 | // } 73 | // } 74 | // 75 | // static class Opts extends ClientOpts { 76 | // @Parameter(names = {"-t", "--table"}, required = true, description = "table to use") 77 | // String tableName; 78 | // @Parameter(names = "--vis") 79 | // String visibilities = ""; 80 | // } 81 | // 82 | // @SuppressWarnings("deprecation") 83 | // public static void main(String[] args) throws Exception { 84 | // Opts opts = new Opts(); 85 | // opts.parseArgs(CharacterHistogram.class.getName(), args); 86 | // 87 | // Job job = Job.getInstance(opts.getHadoopConfig()); 88 | // job.setJobName(CharacterHistogram.class.getSimpleName()); 89 | // job.setJarByClass(CharacterHistogram.class); 90 | // job.setInputFormatClass(ChunkInputFormat.class); 91 | // job.getConfiguration().set(VIS, opts.visibilities); 92 | // job.setMapperClass(HistMapper.class); 93 | // job.setMapOutputKeyClass(Text.class); 94 | // job.setMapOutputValueClass(Mutation.class); 95 | // 96 | // job.setNumReduceTasks(0); 97 | // 98 | // Properties props = opts.getClientProperties(); 99 | // ChunkInputFormat.setZooKeeperInstance(job, props.getProperty("instance.name"), 100 | // props.getProperty("instance.zookeepers")); 101 | // PasswordToken token = new PasswordToken(props.getProperty("auth.token")); 102 | // ChunkInputFormat.setConnectorInfo(job, props.getProperty("auth.principal"), token); 103 | // ChunkInputFormat.setInputTableName(job, opts.tableName); 104 | // ChunkInputFormat.setScanAuthorizations(job, opts.auths); 105 | // 106 | // job.setOutputFormatClass(AccumuloOutputFormat.class); 107 | // AccumuloOutputFormat.configure().clientProperties(opts.getClientProperties()) 108 | // .defaultTable(opts.tableName).createTables(true).store(job); 109 | // 110 | // System.exit(job.waitForCompletion(true) ? 0 : 1); 111 | // } 112 | // } 113 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/mapreduce/NGramIngest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.mapreduce; 18 | 19 | import java.io.IOException; 20 | import java.util.SortedSet; 21 | import java.util.TreeSet; 22 | import java.util.stream.Collectors; 23 | import java.util.stream.Stream; 24 | 25 | import org.apache.accumulo.core.client.AccumuloClient; 26 | import org.apache.accumulo.core.client.admin.NewTableConfiguration; 27 | import org.apache.accumulo.core.data.Mutation; 28 | import org.apache.accumulo.core.data.Value; 29 | import org.apache.accumulo.examples.Common; 30 | import org.apache.accumulo.examples.cli.ClientOpts; 31 | import org.apache.accumulo.hadoop.mapreduce.AccumuloOutputFormat; 32 | import org.apache.hadoop.fs.Path; 33 | import org.apache.hadoop.io.LongWritable; 34 | import org.apache.hadoop.io.Text; 35 | import org.apache.hadoop.mapreduce.Job; 36 | import org.apache.hadoop.mapreduce.Mapper; 37 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 38 | import org.slf4j.Logger; 39 | import org.slf4j.LoggerFactory; 40 | 41 | import com.beust.jcommander.Parameter; 42 | 43 | /** 44 | * Map job to ingest n-gram files from 45 | * http://storage.googleapis.com/books/ngrams/books/datasetsv2.html 46 | */ 47 | public class NGramIngest { 48 | 49 | private static final Logger log = LoggerFactory.getLogger(NGramIngest.class); 50 | 51 | static class Opts extends ClientOpts { 52 | @Parameter(names = {"-t", "--table"}, required = true, description = "table to use") 53 | String tableName; 54 | @Parameter(names = {"-i", "--input"}, required = true, description = "HDFS input directory") 55 | String inputDirectory; 56 | } 57 | 58 | static class NGramMapper extends Mapper { 59 | 60 | @Override 61 | protected void map(LongWritable location, Text value, Context context) 62 | throws IOException, InterruptedException { 63 | String[] parts = value.toString().split("\\t"); 64 | if (parts.length >= 4) { 65 | Mutation m = new Mutation(parts[0]); 66 | m.put(parts[1], String.format("%010d", Long.parseLong(parts[2])), 67 | new Value(parts[3].trim().getBytes())); 68 | context.write(null, m); 69 | } 70 | } 71 | } 72 | 73 | public static void main(String[] args) throws Exception { 74 | Opts opts = new Opts(); 75 | opts.parseArgs(NGramIngest.class.getName(), args); 76 | 77 | Job job = Job.getInstance(opts.getHadoopConfig()); 78 | job.setJobName(NGramIngest.class.getSimpleName()); 79 | job.setJarByClass(NGramIngest.class); 80 | 81 | job.setInputFormatClass(TextInputFormat.class); 82 | job.setOutputFormatClass(AccumuloOutputFormat.class); 83 | AccumuloOutputFormat.configure().clientProperties(opts.getClientProperties()) 84 | .defaultTable(opts.tableName).store(job); 85 | 86 | job.setMapperClass(NGramMapper.class); 87 | job.setMapOutputKeyClass(Text.class); 88 | job.setMapOutputValueClass(Mutation.class); 89 | 90 | job.setNumReduceTasks(0); 91 | job.setSpeculativeExecution(false); 92 | 93 | try (AccumuloClient client = opts.createAccumuloClient()) { 94 | if (!client.tableOperations().exists(opts.tableName)) { 95 | String[] numbers = "1 2 3 4 5 6 7 8 9".split("\\s"); 96 | String[] lower = "a b c d e f g h i j k l m n o p q r s t u v w x y z".split("\\s"); 97 | String[] upper = "A B C D E F G H I J K L M N O P Q R S T U V W X Y Z".split("\\s"); 98 | 99 | SortedSet splits = Stream.of(numbers, lower, upper).flatMap(Stream::of).map(Text::new) 100 | .collect(Collectors.toCollection(TreeSet::new)); 101 | 102 | var newTableConfig = new NewTableConfiguration().withSplits(splits); 103 | 104 | log.info("Creating table " + opts.tableName); 105 | Common.createTableWithNamespace(client, opts.tableName, newTableConfig); 106 | } 107 | } 108 | 109 | TextInputFormat.addInputPath(job, new Path(opts.inputDirectory)); 110 | System.exit(job.waitForCompletion(true) ? 0 : 1); 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /src/main/java/org/apache/accumulo/examples/mapreduce/WordCount.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.mapreduce; 18 | 19 | import java.io.IOException; 20 | import java.text.SimpleDateFormat; 21 | import java.util.Collections; 22 | import java.util.Date; 23 | 24 | import org.apache.accumulo.core.client.AccumuloClient; 25 | import org.apache.accumulo.core.client.IteratorSetting; 26 | import org.apache.accumulo.core.client.admin.NewTableConfiguration; 27 | import org.apache.accumulo.core.data.Mutation; 28 | import org.apache.accumulo.core.iterators.user.SummingCombiner; 29 | import org.apache.accumulo.examples.Common; 30 | import org.apache.accumulo.examples.cli.ClientOpts; 31 | import org.apache.accumulo.hadoop.mapreduce.AccumuloOutputFormat; 32 | import org.apache.hadoop.fs.Path; 33 | import org.apache.hadoop.io.LongWritable; 34 | import org.apache.hadoop.io.Text; 35 | import org.apache.hadoop.mapreduce.Job; 36 | import org.apache.hadoop.mapreduce.Mapper; 37 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 38 | import org.slf4j.Logger; 39 | import org.slf4j.LoggerFactory; 40 | 41 | import com.beust.jcommander.Parameter; 42 | 43 | /** 44 | * A simple MapReduce job that inserts word counts into Accumulo. See docs/mapred.md 45 | */ 46 | public final class WordCount { 47 | 48 | private static final Logger log = LoggerFactory.getLogger(WordCount.class); 49 | 50 | private WordCount() {} 51 | 52 | static class Opts extends ClientOpts { 53 | @Parameter(names = {"-t", "--table"}, description = "Name of output Accumulo table") 54 | String tableName = Common.NAMESPACE + ".wordcount"; 55 | @Parameter(names = {"-i", "--input"}, required = true, description = "HDFS input directory") 56 | String inputDirectory; 57 | @Parameter(names = {"-d", "--dfsPath"}, 58 | description = "HDFS Path where accumulo-client.properties exists") 59 | String hdfsPath; 60 | } 61 | 62 | public static class MapClass extends Mapper { 63 | @Override 64 | public void map(LongWritable key, Text value, Context output) throws IOException { 65 | String today = new SimpleDateFormat("yyyyMMdd").format(new Date()); 66 | String[] words = value.toString().split("\\s+"); 67 | 68 | for (String word : words) { 69 | Mutation mutation = new Mutation(word); 70 | mutation.at().family("count").qualifier(today).put("1"); 71 | 72 | try { 73 | output.write(null, mutation); 74 | } catch (InterruptedException e) { 75 | log.error("Could not write mutation to Context.", e); 76 | } 77 | } 78 | } 79 | } 80 | 81 | public static void main(String[] args) throws Exception { 82 | Opts opts = new Opts(); 83 | opts.parseArgs(WordCount.class.getName(), args); 84 | 85 | // Create Accumulo table with Summing iterator attached 86 | try (AccumuloClient client = opts.createAccumuloClient()) { 87 | IteratorSetting is = new IteratorSetting(10, SummingCombiner.class); 88 | SummingCombiner.setColumns(is, 89 | Collections.singletonList(new IteratorSetting.Column("count"))); 90 | SummingCombiner.setEncodingType(is, SummingCombiner.Type.STRING); 91 | Common.createTableWithNamespace(client, opts.tableName, 92 | new NewTableConfiguration().attachIterator(is)); 93 | } 94 | 95 | // Create M/R job 96 | Job job = Job.getInstance(opts.getHadoopConfig()); 97 | job.setJobName(WordCount.class.getName()); 98 | job.setJarByClass(WordCount.class); 99 | job.setInputFormatClass(TextInputFormat.class); 100 | TextInputFormat.setInputPaths(job, new Path(opts.inputDirectory)); 101 | 102 | job.setMapperClass(MapClass.class); 103 | job.setNumReduceTasks(0); 104 | job.setOutputFormatClass(AccumuloOutputFormat.class); 105 | job.setOutputKeyClass(Text.class); 106 | job.setOutputValueClass(Mutation.class); 107 | 108 | if (opts.hdfsPath != null) { 109 | AccumuloOutputFormat.configure().clientPropertiesPath(opts.hdfsPath) 110 | .defaultTable(opts.tableName).store(job); 111 | } else { 112 | AccumuloOutputFormat.configure().clientProperties(opts.getClientProperties()) 113 | .defaultTable(opts.tableName).store(job); 114 | } 115 | System.exit(job.waitForCompletion(true) ? 0 : 1); 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /src/test/java/org/apache/accumulo/examples/dirlist/CountIT.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.accumulo.examples.dirlist; 18 | 19 | import static org.junit.jupiter.api.Assertions.assertEquals; 20 | import static org.junit.jupiter.api.Assertions.assertFalse; 21 | 22 | import java.util.ArrayList; 23 | import java.util.Map.Entry; 24 | 25 | import org.apache.accumulo.core.client.Accumulo; 26 | import org.apache.accumulo.core.client.AccumuloClient; 27 | import org.apache.accumulo.core.client.BatchWriter; 28 | import org.apache.accumulo.core.client.BatchWriterConfig; 29 | import org.apache.accumulo.core.client.Scanner; 30 | import org.apache.accumulo.core.conf.Property; 31 | import org.apache.accumulo.core.data.Key; 32 | import org.apache.accumulo.core.data.Value; 33 | import org.apache.accumulo.core.security.Authorizations; 34 | import org.apache.accumulo.core.security.ColumnVisibility; 35 | import org.apache.accumulo.core.util.Pair; 36 | import org.apache.accumulo.examples.cli.BatchWriterOpts; 37 | import org.apache.accumulo.examples.cli.ScannerOpts; 38 | import org.apache.accumulo.miniclusterImpl.MiniAccumuloConfigImpl; 39 | import org.apache.accumulo.test.functional.ConfigurableMacBase; 40 | import org.apache.hadoop.conf.Configuration; 41 | import org.junit.jupiter.api.AfterEach; 42 | import org.junit.jupiter.api.BeforeEach; 43 | import org.junit.jupiter.api.Test; 44 | 45 | public class CountIT extends ConfigurableMacBase { 46 | 47 | private AccumuloClient client; 48 | private String tableName; 49 | 50 | @Override 51 | protected void configure(MiniAccumuloConfigImpl cfg, Configuration hadoopCoreSite) { 52 | cfg.setProperty(Property.TSERV_NATIVEMAP_ENABLED, "false"); 53 | } 54 | 55 | @BeforeEach 56 | public void setupInstance() throws Exception { 57 | tableName = getUniqueNames(1)[0]; 58 | client = Accumulo.newClient().from(getClientProperties()).build(); 59 | client.tableOperations().create(tableName); 60 | try (BatchWriter bw = client.createBatchWriter(tableName, new BatchWriterConfig())) { 61 | ColumnVisibility cv = new ColumnVisibility(); 62 | // / has 1 dir 63 | // /local has 2 dirs 1 file 64 | // /local/user1 has 2 files 65 | bw.addMutation(Ingest.buildMutation(cv, "/local", true, false, true, 272, 12345, null)); 66 | bw.addMutation(Ingest.buildMutation(cv, "/local/user1", true, false, true, 272, 12345, null)); 67 | bw.addMutation(Ingest.buildMutation(cv, "/local/user2", true, false, true, 272, 12345, null)); 68 | bw.addMutation( 69 | Ingest.buildMutation(cv, "/local/file", false, false, false, 1024, 12345, null)); 70 | bw.addMutation( 71 | Ingest.buildMutation(cv, "/local/file", false, false, false, 1024, 23456, null)); 72 | bw.addMutation( 73 | Ingest.buildMutation(cv, "/local/user1/file1", false, false, false, 2024, 12345, null)); 74 | bw.addMutation( 75 | Ingest.buildMutation(cv, "/local/user1/file2", false, false, false, 1028, 23456, null)); 76 | } 77 | } 78 | 79 | @AfterEach 80 | public void teardown() { 81 | client.close(); 82 | } 83 | 84 | @Test 85 | public void test() throws Exception { 86 | 87 | ScannerOpts scanOpts = new ScannerOpts(); 88 | BatchWriterOpts bwOpts = new BatchWriterOpts(); 89 | FileCount fc = new FileCount(client, tableName, Authorizations.EMPTY, new ColumnVisibility(), 90 | scanOpts, bwOpts); 91 | 92 | ArrayList> expected = new ArrayList<>(); 93 | expected.add(new Pair<>(QueryUtil.getRow("").toString(), "1,0,3,3")); 94 | expected.add(new Pair<>(QueryUtil.getRow("/local").toString(), "2,1,2,3")); 95 | expected.add(new Pair<>(QueryUtil.getRow("/local/user1").toString(), "0,2,0,2")); 96 | expected.add(new Pair<>(QueryUtil.getRow("/local/user2").toString(), "0,0,0,0")); 97 | 98 | int actualCount = 0; 99 | try (Scanner scanner = client.createScanner(tableName, new Authorizations())) { 100 | scanner.fetchColumn("dir", "counts"); 101 | assertFalse(scanner.iterator().hasNext()); 102 | 103 | fc.run(); 104 | 105 | for (Entry e : scanner) { 106 | assertEquals(e.getKey().getRow().toString(), expected.get(actualCount).getFirst()); 107 | assertEquals(e.getValue().toString(), expected.get(actualCount).getSecond()); 108 | actualCount++; 109 | } 110 | } 111 | assertEquals(expected.size(), actualCount); 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /docs/shard.md: -------------------------------------------------------------------------------- 1 | 17 | # Apache Accumulo Shard Example 18 | 19 | Accumulo has an iterator called the intersecting iterator which supports querying a term index that is partitioned by 20 | document, or "sharded". This example shows how to use the intersecting iterator through these four programs: 21 | 22 | * [Index.java] - Indexes a set of text files into an Accumulo table 23 | * [Query.java] - Finds documents containing a given set of terms. 24 | * [Reverse.java] - Reads the index table and writes a map of documents to terms into another table. 25 | * [ContinuousQuery.java] - Uses the table populated by Reverse.java to select N random terms per document. Then it continuously and randomly queries those terms. 26 | 27 | To run these example programs, create two tables like below. 28 | 29 | username@instance> createnamespace examples 30 | username@instance> createtable examples.shard 31 | username@instance examples.shard> createtable examples.doc2term 32 | 33 | After creating the tables, index some files. The following command indexes all the java files in the Accumulo source code. 34 | 35 | $ find /path/to/accumulo/core -name "*.java" | xargs ./bin/runex shard.Index -t examples.shard --partitions 30 36 | 37 | The following command queries the index to find all files containing 'foo' and 'bar'. 38 | 39 | $ ./bin/runex shard.Query -t examples.shard foo bar 40 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/spi/balancer/BaseHostRegexTableLoadBalancerTest.java 41 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/iterators/user/WholeRowIteratorTest.java 42 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/iteratorsImpl/IteratorConfigUtilTest.java 43 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/data/KeyBuilderTest.java 44 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/spi/balancer/HostRegexTableLoadBalancerReconfigurationTest.java 45 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/security/ColumnVisibilityTest.java 46 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/summary/SummaryCollectionTest.java 47 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/spi/balancer/HostRegexTableLoadBalancerTest.java 48 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/client/IteratorSettingTest.java 49 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/data/KeyExtentTest.java 50 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/security/VisibilityEvaluatorTest.java 51 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/iterators/user/TransformingIteratorTest.java 52 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/client/admin/NewTableConfigurationTest.java 53 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/conf/HadoopCredentialProviderTest.java 54 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/clientImpl/TableOperationsHelperTest.java 55 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/iterators/user/WholeColumnFamilyIteratorTest.java 56 | 57 | In order to run ContinuousQuery, we need to run Reverse.java to populate the `examples.doc2term` table. 58 | 59 | $ ./bin/runex shard.Reverse --shardTable examples.shard --doc2Term examples.doc2term 60 | 61 | Below ContinuousQuery is run using 5 terms. So it selects 5 random terms from each document, then it continually 62 | randomly selects one set of 5 terms and queries. It prints the number of matching documents and the time in seconds. 63 | 64 | $ ./bin/runex shard.ContinuousQuery --shardTable examples.shard --doc2Term examples.doc2term --terms 5 65 | [string, protected, sizeopt, cache, build] 1 0.084 66 | [public, these, exception, to, as] 25 0.267 67 | [by, encodeprevendrow, 0, work, as] 4 0.056 68 | [except, to, a, limitations, one] 969 0.197 69 | [copy, as, asf, version, is] 969 0.341 70 | [core, class, may, regarding, without] 862 0.437 71 | [max_data_to_print, default_visibility_cache_size, use, accumulo_export_info, fate] 1 0.066 72 | 73 | 74 | [Index.java]: ../src/main/java/org/apache/accumulo/examples/shard/Index.java 75 | [Query.java]: ../src/main/java/org/apache/accumulo/examples/shard/Query.java 76 | [Reverse.java]: ../src/main/java/org/apache/accumulo/examples/shard/Reverse.java 77 | [ContinuousQuery.java]: ../src/main/java/org/apache/accumulo/examples/shard/ContinuousQuery.java 78 | --------------------------------------------------------------------------------