├── spark
├── .gitignore
├── contrib
│ ├── checkstyle.xml
│ └── import-control.xml
├── run.sh
└── README.md
├── .gitignore
├── NOTICE
├── src
├── main
│ ├── java
│ │ └── org
│ │ │ └── apache
│ │ │ └── accumulo
│ │ │ └── examples
│ │ │ ├── constraints
│ │ │ ├── ConstraintsCommon.java
│ │ │ ├── MaxMutationSize.java
│ │ │ └── NumericValueConstraint.java
│ │ │ ├── bloom
│ │ │ ├── BloomCommon.java
│ │ │ ├── BloomFiltersNotFound.java
│ │ │ ├── BloomBatchScanner.java
│ │ │ └── BloomFilters.java
│ │ │ ├── cli
│ │ │ ├── ScannerOpts.java
│ │ │ ├── ClientOnRequiredTable.java
│ │ │ ├── ClientOnDefaultTable.java
│ │ │ ├── Help.java
│ │ │ ├── ClientOpts.java
│ │ │ └── BatchWriterOpts.java
│ │ │ ├── client
│ │ │ ├── Flush.java
│ │ │ ├── CountingVerifyingReceiver.java
│ │ │ ├── ReadWriteExample.java
│ │ │ └── SequentialBatchWriter.java
│ │ │ ├── Common.java
│ │ │ ├── helloworld
│ │ │ ├── Read.java
│ │ │ └── Insert.java
│ │ │ ├── filedata
│ │ │ ├── KeyUtil.java
│ │ │ ├── FileDataQuery.java
│ │ │ ├── VisibilityCombiner.java
│ │ │ ├── ChunkInputFormat.java
│ │ │ └── CharacterHistogram.java
│ │ │ ├── mapreduce
│ │ │ ├── bulk
│ │ │ │ ├── SetupTable.java
│ │ │ │ └── VerifyIngest.java
│ │ │ ├── RegexExample.java
│ │ │ ├── RowHash.java
│ │ │ ├── TableToFile.java
│ │ │ ├── NGramIngest.java
│ │ │ └── WordCount.java
│ │ │ ├── shard
│ │ │ ├── Reverse.java
│ │ │ ├── Index.java
│ │ │ └── Query.java
│ │ │ ├── util
│ │ │ └── FormatUtil.java
│ │ │ └── combiner
│ │ │ └── StatsCombiner.java
│ └── resources
│ │ └── log4j.properties
└── test
│ ├── resources
│ └── log4j.properties
│ └── java
│ └── org
│ └── apache
│ └── accumulo
│ └── examples
│ ├── filedata
│ └── KeyUtilTest.java
│ ├── constraints
│ ├── NumericValueConstraintTest.java
│ └── AlphaNumKeyConstraintTest.java
│ ├── mapreduce
│ └── MapReduceIT.java
│ └── dirlist
│ └── CountIT.java
├── .asf.yaml
├── CONTRIBUTING.md
├── contrib
├── checkstyle.xml
└── import-control.xml
├── bin
├── build
├── runmr
└── runex
├── conf
├── log4j2.properties
└── env.sh.example
├── docs
├── helloworld.md
├── release-testing.md
├── bulkIngest.md
├── rowhash.md
├── tracing.md
├── tabletofile.md
├── regex.md
├── constraints.md
├── isolation.md
├── terasort.md
├── filedata.md
├── batch.md
├── wordcount.md
├── reservations.md
├── uniquecols.md
├── classpath.md
├── export.md
├── combiner.md
├── client.md
└── shard.md
└── .github
└── workflows
└── maven.yaml
/spark/.gitignore:
--------------------------------------------------------------------------------
1 | /.classpath
2 | /.project
3 | /.settings/
4 | /target/
5 | /*.iml
6 | /.idea
7 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /.classpath
2 | /.project
3 | /.settings/
4 | /target/
5 | /*.iml
6 | /.idea
7 | /examples.conf
8 | /conf/env.sh
9 |
--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | Apache Accumulo Examples
2 | Copyright 2019 The Apache Software Foundation
3 |
4 | This product includes software developed at
5 | The Apache Software Foundation (http://www.apache.org/).
6 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/constraints/ConstraintsCommon.java:
--------------------------------------------------------------------------------
1 | package org.apache.accumulo.examples.constraints;
2 |
3 | import org.apache.accumulo.examples.Common;
4 |
5 | public enum ConstraintsCommon {
6 | ;
7 | public static final String CONSTRAINTS_TABLE = Common.NAMESPACE + ".testConstraints";
8 | public static final String CONSTRAINT_VIOLATED_MSG = "Constraint violated: {}";
9 | }
10 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/bloom/BloomCommon.java:
--------------------------------------------------------------------------------
1 | package org.apache.accumulo.examples.bloom;
2 |
3 | import org.apache.accumulo.examples.Common;
4 |
5 | enum BloomCommon {
6 | ;
7 | public static final String BLOOM_TEST1_TABLE = Common.NAMESPACE + ".bloom_test1";
8 | public static final String BLOOM_TEST2_TABLE = Common.NAMESPACE + ".bloom_test2";
9 | public static final String BLOOM_TEST3_TABLE = Common.NAMESPACE + ".bloom_test3";
10 | public static final String BLOOM_TEST4_TABLE = Common.NAMESPACE + ".bloom_test4";
11 |
12 | public static final String BLOOM_ENABLED_PROPERTY = "table.bloom.enabled";
13 | }
14 |
--------------------------------------------------------------------------------
/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one or more
2 | # contributor license agreements. See the NOTICE file distributed with
3 | # this work for additional information regarding copyright ownership.
4 | # The ASF licenses this file to You under the Apache License, Version 2.0
5 | # (the "License"); you may not use this file except in compliance with
6 | # the License. You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | log4j.rootLogger=INFO, CA
17 | log4j.appender.CA=org.apache.log4j.ConsoleAppender
18 | log4j.appender.CA.layout=org.apache.log4j.PatternLayout
19 | log4j.appender.CA.layout.ConversionPattern=[%t} %-5p %c %x - %m%n
20 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/cli/ScannerOpts.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.cli;
18 |
19 | import com.beust.jcommander.Parameter;
20 |
21 | public class ScannerOpts {
22 | @Parameter(names = "--scanBatchSize",
23 | description = "the number of key-values to pull during a scan")
24 | public int scanBatchSize = 1000;
25 | }
26 |
--------------------------------------------------------------------------------
/.asf.yaml:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing,
13 | # software distributed under the License is distributed on an
14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | # KIND, either express or implied. See the License for the
16 | # specific language governing permissions and limitations
17 | # under the License.
18 | #
19 |
20 | # https://cwiki.apache.org/confluence/display/INFRA/git+-+.asf.yaml+features
21 |
22 | github:
23 | description: "Apache Accumulo Examples"
24 | homepage: https://accumulo.apache.org
25 | labels:
26 | - accumulo
27 | - big-data
28 | - hacktoberfest
29 | features:
30 | wiki: false
31 | issues: true
32 | projects: true
33 |
34 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/cli/ClientOnRequiredTable.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.cli;
18 |
19 | import com.beust.jcommander.Parameter;
20 |
21 | public class ClientOnRequiredTable extends ClientOpts {
22 | @Parameter(names = {"-t", "--table"}, required = true, description = "table to use")
23 | private String tableName;
24 |
25 | public String getTableName() {
26 | return tableName;
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 |
17 |
18 | # Contributing to Accumulo Examples
19 |
20 | Contributions to Accumulo Examples can be made by creating a pull request to this repo
21 | on GitHub.
22 |
23 | Before creating a pull request, run `mvn clean verify` and run through the instructions
24 | for any example that was changed.
25 |
26 | For general instructions on contributing to Accumulo projects, check out the
27 | [Accumulo Contributor guide][contribute].
28 |
29 | [contribute]: https://accumulo.apache.org/contributor/
30 |
--------------------------------------------------------------------------------
/contrib/checkstyle.xml:
--------------------------------------------------------------------------------
1 |
2 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/spark/contrib/checkstyle.xml:
--------------------------------------------------------------------------------
1 |
2 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/bin/build:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env bash
2 |
3 | # Licensed to the Apache Software Foundation (ASF) under one or more
4 | # contributor license agreements. See the NOTICE file distributed with
5 | # this work for additional information regarding copyright ownership.
6 | # The ASF licenses this file to You under the Apache License, Version 2.0
7 | # (the "License"); you may not use this file except in compliance with
8 | # the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | bin_dir=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
19 | ex_home=$( cd "$( dirname "$bin_dir" )" && pwd )
20 |
21 | if [ -f "$ex_home/conf/env.sh" ]; then
22 | . "$ex_home"/conf/env.sh
23 | else
24 | . "$ex_home"/conf/env.sh.example
25 | fi
26 |
27 | if [[ ! -f "$EXAMPLES_JAR_PATH" ]]; then
28 | echo "Building $EXAMPLES_JAR_PATH"
29 | cd "${ex_home}" || exit 1
30 | mvn clean verify -P create-shade-jar -D skipTests -D accumulo.version="$ACCUMULO_VERSION" -D hadoop.version="$HADOOP_VERSION" -D zookeeper.version="$ZOOKEEPER_VERSION"
31 | fi
32 |
--------------------------------------------------------------------------------
/conf/log4j2.properties:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one or more
2 | # contributor license agreements. See the NOTICE file distributed with
3 | # this work for additional information regarding copyright ownership.
4 | # The ASF licenses this file to You under the Apache License, Version 2.0
5 | # (the "License"); you may not use this file except in compliance with
6 | # the License. You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | status = info
17 | dest = err
18 | name = AccumuloExampleConfig
19 | monitorInterval = 30
20 |
21 | appender.console.type = Console
22 | appender.console.name = STDERR
23 | appender.console.target = SYSTEM_ERR
24 | appender.console.layout.type = PatternLayout
25 | appender.console.layout.pattern = %style{%d{ISO8601}}{dim,cyan} %style{[}{red}%style{%-8c{2}}{dim,blue}%style{]}{red} %highlight{%-5p}%style{:}{red} %m%n
26 |
27 | logger.zookeeper.name = org.apache.zookeeper
28 | logger.zookeeper.level = error
29 |
30 | rootLogger.level = info
31 | rootLogger.appenderRef.console.ref = STDERR
32 |
33 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/cli/ClientOnDefaultTable.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.cli;
18 |
19 | import com.beust.jcommander.Parameter;
20 |
21 | public class ClientOnDefaultTable extends ClientOpts {
22 | @Parameter(names = "--table", description = "table to use")
23 | private String tableName;
24 |
25 | public ClientOnDefaultTable(String table) {
26 | this.tableName = table;
27 | }
28 |
29 | public String getTableName() {
30 | return tableName;
31 | }
32 |
33 | public void setTableName(String tableName) {
34 | this.tableName = tableName;
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/spark/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Licensed to the Apache Software Foundation (ASF) under one or more
4 | # contributor license agreements. See the NOTICE file distributed with
5 | # this work for additional information regarding copyright ownership.
6 | # The ASF licenses this file to You under the Apache License, Version 2.0
7 | # (the "License"); you may not use this file except in compliance with
8 | # the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 |
19 | if [[ -z "$1" || -z "$2" ]]; then
20 | echo "Usage: ./run.sh [bulk|batch] /path/to/accumulo-client.properties"
21 | exit 1
22 | fi
23 |
24 | JAR=./target/accumulo-spark-shaded.jar
25 | if [[ ! -f $JAR ]]; then
26 | mvn clean package -P create-shade-jar
27 | fi
28 |
29 | if [[ -z "$SPARK_HOME" ]]; then
30 | echo "SPARK_HOME must be set!"
31 | exit 1
32 | fi
33 |
34 | if [[ -z "$HADOOP_CONF_DIR" ]]; then
35 | echo "HADOOP_CONF_DIR must be set!"
36 | exit 1
37 | fi
38 |
39 | "$SPARK_HOME"/bin/spark-submit \
40 | --class org.apache.accumulo.spark.CopyPlus5K \
41 | --master yarn \
42 | --deploy-mode client \
43 | $JAR \
44 | $1 $2
45 |
--------------------------------------------------------------------------------
/src/test/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one or more
2 | # contributor license agreements. See the NOTICE file distributed with
3 | # this work for additional information regarding copyright ownership.
4 | # The ASF licenses this file to You under the Apache License, Version 2.0
5 | # (the "License"); you may not use this file except in compliance with
6 | # the License. You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | log4j.rootLogger=WARN, CA
17 | log4j.appender.CA=org.apache.log4j.ConsoleAppender
18 | log4j.appender.CA.layout=org.apache.log4j.PatternLayout
19 | log4j.appender.CA.layout.ConversionPattern=[%t} %-5p %c %x - %m%n
20 |
21 | log4j.logger.org.apache.accumulo.core.clientImpl.ServerClient=ERROR
22 | log4j.logger.org.apache.hadoop.mapred=ERROR
23 | log4j.logger.org.apache.hadoop.mapreduce=ERROR
24 | log4j.logger.org.apache.hadoop.metrics2=ERROR
25 | log4j.logger.org.apache.hadoop.util.ProcessTree=ERROR
26 | log4j.logger.org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter=ERROR
27 | log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR
28 | log4j.logger.org.apache.zookeeper.ClientCnxn=FATAL
29 |
--------------------------------------------------------------------------------
/bin/runmr:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env bash
2 |
3 | # Licensed to the Apache Software Foundation (ASF) under one or more
4 | # contributor license agreements. See the NOTICE file distributed with
5 | # this work for additional information regarding copyright ownership.
6 | # The ASF licenses this file to You under the Apache License, Version 2.0
7 | # (the "License"); you may not use this file except in compliance with
8 | # the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | bin_dir=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
19 | ex_home=$( cd "$( dirname "$bin_dir" )" && pwd )
20 |
21 | function print_usage() {
22 | cat < args..
25 | EOF
26 | }
27 |
28 | if [ -z "$1" ]; then
29 | echo "ERROR: needs to be set"
30 | print_usage
31 | exit 1
32 | fi
33 |
34 | if [ -f "$ex_home/conf/env.sh" ]; then
35 | . "$ex_home"/conf/env.sh
36 | else
37 | . "$ex_home"/conf/env.sh.example
38 | fi
39 |
40 | # Build shaded jar (if not built already)
41 | "$ex_home"/bin/build
42 |
43 | export HADOOP_USE_CLIENT_CLASSLOADER=true
44 | "$HADOOP_HOME"/bin/yarn jar $EXAMPLES_JAR_PATH "org.apache.accumulo.examples.$1" ${*:2}
45 |
--------------------------------------------------------------------------------
/bin/runex:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env bash
2 |
3 | # Licensed to the Apache Software Foundation (ASF) under one or more
4 | # contributor license agreements. See the NOTICE file distributed with
5 | # this work for additional information regarding copyright ownership.
6 | # The ASF licenses this file to You under the Apache License, Version 2.0
7 | # (the "License"); you may not use this file except in compliance with
8 | # the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | bin_dir=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
19 | ex_home=$( cd "$( dirname "$bin_dir" )" && pwd )
20 |
21 | function print_usage() {
22 | cat < args..
25 | EOF
26 | }
27 |
28 | if [ -z "$1" ]; then
29 | echo "ERROR: needs to be set"
30 | print_usage
31 | exit 1
32 | fi
33 |
34 | if [ -f "$ex_home/conf/env.sh" ]; then
35 | . "$ex_home"/conf/env.sh
36 | else
37 | . "$ex_home"/conf/env.sh.example
38 | fi
39 |
40 | # Build shaded jar (if not built already)
41 | "$ex_home"/bin/build
42 |
43 | export CLASSPATH="$EXAMPLES_JAR_PATH:$ex_home/conf:$ACCUMULO_HOME/lib/*:$CLASSPATH"
44 | java "org.apache.accumulo.examples.$1" ${*:2}
45 |
46 |
--------------------------------------------------------------------------------
/conf/env.sh.example:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one or more
2 | # contributor license agreements. See the NOTICE file distributed with
3 | # this work for additional information regarding copyright ownership.
4 | # The ASF licenses this file to You under the Apache License, Version 2.0
5 | # (the "License"); you may not use this file except in compliance with
6 | # the License. You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # General
17 | # =======
18 |
19 | ## Hadoop installation
20 | export HADOOP_HOME="${HADOOP_HOME:-/path/to/hadoop}"
21 | ## Accumulo installation
22 | export ACCUMULO_HOME="${ACCUMULO_HOME:-/path/to/accumulo}"
23 | ## Path to Accumulo client properties
24 | export ACCUMULO_CLIENT_PROPS="$ACCUMULO_HOME/conf/accumulo-client.properties"
25 |
26 | # Configuration
27 | # =============
28 | conf_dir=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
29 |
30 | # Shaded jar
31 | # ===============
32 | # Versions set below will be what is included in the shaded jar
33 | export ACCUMULO_VERSION="`$ACCUMULO_HOME/bin/accumulo version`"
34 | export HADOOP_VERSION="`$HADOOP_HOME/bin/hadoop version | head -n1 | awk '{print $2}'`"
35 | export ZOOKEEPER_VERSION=3.7.0
36 | # Path to shaded test jar
37 | ex_home=$( cd "$( dirname "$conf_dir" )" && pwd )
38 | export EXAMPLES_JAR_PATH="${ex_home}/target/accumulo-examples-shaded.jar"
39 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/accumulo/examples/filedata/KeyUtilTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.filedata;
18 |
19 | import static org.junit.jupiter.api.Assertions.assertEquals;
20 |
21 | import org.apache.hadoop.io.Text;
22 | import org.junit.jupiter.api.Test;
23 |
24 | public class KeyUtilTest {
25 | public static void checkSeps(String... s) {
26 | Text t = KeyUtil.buildNullSepText(s);
27 | String[] rets = KeyUtil.splitNullSepText(t);
28 |
29 | int length = 0;
30 | for (String str : s)
31 | length += str.length();
32 | assertEquals(t.getLength(), length + s.length - 1);
33 | assertEquals(rets.length, s.length);
34 | for (int i = 0; i < s.length; i++)
35 | assertEquals(s[i], rets[i]);
36 | }
37 |
38 | @Test
39 | public void testNullSep() {
40 | checkSeps("abc", "d", "", "efgh");
41 | checkSeps("ab", "");
42 | checkSeps("abcde");
43 | checkSeps("");
44 | checkSeps("", "");
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/client/Flush.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.client;
18 |
19 | import org.apache.accumulo.core.client.AccumuloClient;
20 | import org.apache.accumulo.core.client.AccumuloException;
21 | import org.apache.accumulo.core.client.AccumuloSecurityException;
22 | import org.apache.accumulo.core.client.TableNotFoundException;
23 | import org.apache.accumulo.examples.cli.ClientOnRequiredTable;
24 |
25 | /**
26 | * Simple example for using tableOperations() (like create, delete, flush, etc).
27 | */
28 | public class Flush {
29 |
30 | public static void main(String[] args)
31 | throws AccumuloSecurityException, AccumuloException, TableNotFoundException {
32 | ClientOnRequiredTable opts = new ClientOnRequiredTable();
33 | opts.parseArgs(Flush.class.getName(), args);
34 | try (AccumuloClient client = opts.createAccumuloClient()) {
35 | client.tableOperations().flush(opts.getTableName(), null, null, true);
36 | }
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/docs/helloworld.md:
--------------------------------------------------------------------------------
1 |
17 | # Apache Accumulo Hello World Example
18 |
19 | This tutorial uses the following Java classes:
20 |
21 | * [Insert.java] - Inserts 10K rows (50K entries) into accumulo with each row having 5 entries
22 | * [Read.java] - Reads data between two rows
23 |
24 | Inserts data with a BatchWriter:
25 |
26 | $ ./bin/runex helloworld.Insert
27 |
28 | On the accumulo status page at the URL below (you may need to replace 'localhost' with the name or IP of your server), you should see 50K entries
29 |
30 | http://localhost:9995/
31 |
32 | To view the entries, use the shell (run `accumulo shell -u username -p password` to access it) to scan the table:
33 |
34 | username@instance> table examples.hellotable
35 | username@instance examples.hellotable> scan
36 |
37 | You can also use a Java class to scan the table:
38 |
39 | $ ./bin/runex helloworld.Read
40 |
41 | [Insert.java]: ../src/main/java/org/apache/accumulo/examples/helloworld/Insert.java
42 | [Read.java]: ../src/main/java/org/apache/accumulo/examples/helloworld/Read.java
43 |
--------------------------------------------------------------------------------
/contrib/import-control.xml:
--------------------------------------------------------------------------------
1 |
2 |
16 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
--------------------------------------------------------------------------------
/spark/README.md:
--------------------------------------------------------------------------------
1 |
17 | # Apache Accumulo Spark Example
18 |
19 | ## Requirements
20 |
21 | * Accumulo 2.0+
22 | * Hadoop YARN installed & `HADOOP_CONF_DIR` set in environment
23 | * Spark installed & `SPARK_HOME` set in environment
24 |
25 | ## Spark example
26 |
27 | The [CopyPlus5K] example will create an Accumulo table called `spark_example_input`
28 | and write 100 key/value entries into Accumulo with the values `0..99`. It then launches
29 | a Spark application that does following:
30 |
31 | * Read data from `spark_example_input` table using `AccumuloInputFormat`
32 | * Add 5000 to each value
33 | * Write the data to a new Accumulo table (called `spark_example_output`) using one of
34 | two methods.
35 | 1. **Bulk import** - Write data to an RFile in HDFS using `AccumuloFileOutputFormat` and
36 | bulk import to Accumulo table
37 | 2. **Batchwriter** - Creates a `BatchWriter` in Spark code to write to the table.
38 |
39 | This application can be run using the command:
40 |
41 | ./run.sh batch /path/to/accumulo-client.properties
42 |
43 | Change `batch` to `bulk` to use Bulk import method.
44 |
45 | [CopyPlus5K]: src/main/java/org/apache/accumulo/spark/CopyPlus5K.java
46 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/cli/Help.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.cli;
18 |
19 | import com.beust.jcommander.JCommander;
20 | import com.beust.jcommander.Parameter;
21 | import com.beust.jcommander.ParameterException;
22 |
23 | public class Help {
24 | @Parameter(names = {"-h", "-?", "--help", "-help"}, help = true)
25 | public boolean help = false;
26 |
27 | public void parseArgs(String programName, String[] args, Object... others) {
28 | JCommander commander = new JCommander();
29 | commander.addObject(this);
30 | for (Object other : others)
31 | commander.addObject(other);
32 | commander.setProgramName(programName);
33 | try {
34 | commander.parse(args);
35 | } catch (ParameterException ex) {
36 | commander.usage();
37 | exitWithError(ex.getMessage(), 1);
38 | }
39 | if (help) {
40 | commander.usage();
41 | exit(0);
42 | }
43 | }
44 |
45 | public void exit(int status) {
46 | System.exit(status);
47 | }
48 |
49 | public void exitWithError(String message, int status) {
50 | System.err.println(message);
51 | exit(status);
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/docs/release-testing.md:
--------------------------------------------------------------------------------
1 |
17 | # Apache Accumulo Release Testing
18 |
19 | This repository contains an integration test (IT) that runs all of the
20 | examples. This can be used for testing Accumulo release candidates (RC). To
21 | run the IT against a RC add the following to `~/.m2/settings.xml` changing
22 | `XXXX` to the proper id for a given RC.
23 |
24 | ```xml
25 |
26 |
27 | rcAccumulo
28 |
29 |
30 | accrc
31 | accrcp
32 | https://repository.apache.org/content/repositories/orgapacheaccumulo-XXXX
33 |
34 |
35 |
36 |
37 | accrcp
38 | accrcp
39 | https://repository.apache.org/content/repositories/orgapacheaccumulo-XXX
40 |
41 |
42 |
43 |
44 | ```
45 |
46 | After adding that, you can run the following command in this repository to run the IT.
47 |
48 | ```
49 | mvn clean verify -PrcAccumulo -Daccumulo.version=$ACCUMULO_RC_VERSION
50 | ```
51 |
--------------------------------------------------------------------------------
/docs/bulkIngest.md:
--------------------------------------------------------------------------------
1 |
17 | # Apache Accumulo Bulk Ingest Example
18 |
19 | This is an example of how to bulk ingest data into Accumulo using mapReduce.
20 |
21 | This tutorial uses the following Java classes.
22 |
23 | * [SetupTable.java] - creates the table, 'examples.test_bulk', and sets two split points.
24 | * [BulkIngestExample.java] - creates some data to ingest and then ingests the data using mapReduce
25 | * [VerifyIngest.java] - checks that the data was ingested
26 |
27 | The following commands show how to run this example. This example creates a
28 | table called test_bulk which has two initial split points. Then 1000 rows of
29 | test data are created in HDFS. After that the 1000 rows are ingested into
30 | Accumulo. Then we verify the 1000 rows are in Accumulo.
31 |
32 | $ ./bin/runex mapreduce.bulk.SetupTable
33 | $ ./bin/runmr mapreduce.bulk.BulkIngestExample
34 | $ ./bin/runex mapreduce.bulk.VerifyIngest
35 |
36 | [SetupTable.java]: ../src/main/java/org/apache/accumulo/examples/mapreduce/bulk/SetupTable.java
37 | [BulkIngestExample.java]: ../src/main/java/org/apache/accumulo/examples/mapreduce/bulk/BulkIngestExample.java
38 | [VerifyIngest.java]: ../src/main/java/org/apache/accumulo/examples/mapreduce/bulk/VerifyIngest.java
39 |
--------------------------------------------------------------------------------
/docs/rowhash.md:
--------------------------------------------------------------------------------
1 |
17 | # Apache Accumulo RowHash Example
18 |
19 | This example shows a simple map/reduce job that reads from an accumulo table and
20 | writes back into that table.
21 |
22 | To run this example you will need some data in a table. The following will
23 | put a trivial amount of data into accumulo using the accumulo shell:
24 |
25 | $ accumulo shell
26 | username@instance> createnamespace examples
27 | username@instance> createtable examples.rowhash
28 | username@instance examples.rowhash> insert a-row cf cq value
29 | username@instance examples.rowhash> insert b-row cf cq value
30 | username@instance examples.rowhash> quit
31 |
32 | The RowHash class will insert a hash for each row in the database if it contains a
33 | specified colum. Here's how you run the map/reduce job
34 |
35 | $ ./bin/runmr mapreduce.RowHash -t examples.rowhash --column cf:cq
36 |
37 | Now we can scan the table and see the hashes:
38 |
39 | $ accumulo shell
40 | username@instance> scan -t examples.rowhash
41 | a-row cf:cq [] value
42 | a-row cf-HASHTYPE:cq-MD5BASE64 [] IGPBYI1uC6+AJJxC4r5YBA==
43 | b-row cf:cq [] value
44 | b-row cf-HASHTYPE:cq-MD5BASE64 [] IGPBYI1uC6+AJJxC4r5YBA==
45 | username@instance>
46 |
47 |
--------------------------------------------------------------------------------
/docs/tracing.md:
--------------------------------------------------------------------------------
1 |
17 | # Apache Accumulo Tracing Example
18 |
19 | This tutorial uses the [TracingExample.java] Java class to create an OpenTelemetry
20 | span in the TracingExample application and then create and read entries from Accumulo.
21 | Tracing output should show up in logs for the TracingExample class and the Accumulo client,
22 | and logs for the Accumulo server processes.
23 |
24 | ## Setup
25 |
26 | 1. Download the OpenTelemetry [JavaAgent] jar file and place it into the `/path/to/accumulo/lib/` directory.
27 | 2. Add the property `general.opentelemetry.enabled=true` to accumulo.properties
28 | 3. Set the following environment variables in your environment:
29 |
30 | JAVA_TOOL_OPTIONS="-javaagent:/path/to/accumulo/lib/opentelemetry-javaagent-1.12.1.jar"
31 | OTEL_TRACES_EXPORTER="logging"
32 |
33 | ## Run the Example
34 |
35 | 1. Start Accumulo
36 | 2. Run the Tracing Example:
37 |
38 | $ ./bin/runex client.TracingExample --createtable --deletetable --create --read --table traceTest
39 |
40 | [JavaAgent]: https://search.maven.org/remotecontent?filepath=io/opentelemetry/javaagent/opentelemetry-javaagent/1.12.1/opentelemetry-javaagent-1.12.1.jar
41 | [TracingExample.java]: ../src/main/java/org/apache/accumulo/examples/client/TracingExample.java
42 |
--------------------------------------------------------------------------------
/spark/contrib/import-control.xml:
--------------------------------------------------------------------------------
1 |
2 |
16 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
--------------------------------------------------------------------------------
/docs/tabletofile.md:
--------------------------------------------------------------------------------
1 |
17 | # Apache Accumulo Table-to-File Example
18 |
19 | This example uses mapreduce to extract specified columns from an existing table.
20 |
21 | To run this example you will need some data in a table. The following will
22 | put a trivial amount of data into accumulo using the accumulo shell:
23 |
24 | $ accumulo shell
25 | root@instance> createnamespace examples
26 | root@instance> createtable examples.input
27 | root@instance examples.input> insert dog cf cq dogvalue
28 | root@instance examples.input> insert cat cf cq catvalue
29 | root@instance examples.input> insert junk family qualifier junkvalue
30 | root@instance examples.input> quit
31 |
32 | The TableToFile class configures a map-only job to read the specified columns and
33 | writes the key/value pairs to a file in HDFS.
34 |
35 | The following will extract the rows containing the column "cf:cq":
36 |
37 | $ ./bin/runmr mapreduce.TableToFile -t examples.input --columns cf:cq --output /tmp/output
38 |
39 | $ hadoop fs -ls /tmp/output
40 | Found 2 items
41 | -rw-r--r-- 3 root supergroup 0 2021-05-04 10:32 /tmp/output/_SUCCESS
42 | -rw-r--r-- 3 root supergroup 44 2021-05-04 10:32 /tmp/output/part-m-00000
43 |
44 | We can see the output of our little map-reduce job:
45 |
46 | $ hadoop fs -text /tmp/output/part-m-00000
47 | catrow cf:cq [] catvalue
48 | dogrow cf:cq [] dogvalue
--------------------------------------------------------------------------------
/docs/regex.md:
--------------------------------------------------------------------------------
1 |
17 | # Apache Accumulo Regex Example
18 |
19 | This example uses mapreduce and accumulo to find items using regular expressions.
20 | This is accomplished using a map-only mapreduce job and a scan-time iterator.
21 |
22 | To run this example you will need some data in a table. The following will
23 | put a trivial amount of data into accumulo using the accumulo shell:
24 |
25 | $ accumulo shell
26 | username@instance> createnamespace examples
27 | username@instance> createtable examples.regex
28 | username@instance examples.regex> insert dogrow dogcf dogcq dogvalue
29 | username@instance examples.regex> insert catrow catcf catcq catvalue
30 | username@instance examples.regex> quit
31 |
32 | The RegexExample class sets an iterator on the scanner. This does pattern matching
33 | against each key/value in accumulo, and only returns matching items. It will do this
34 | in parallel and will store the results in files in hdfs.
35 |
36 | The following will search for any rows in the input table that starts with "dog":
37 |
38 | $ ./bin/runmr mapreduce.RegexExample -t examples.regex --rowRegex 'dog.*' --output /tmp/output
39 |
40 | $ hdfs dfs -ls /tmp/output
41 | Found 3 items
42 | -rw-r--r-- 1 username supergroup 0 2013-01-10 14:11 /tmp/output/_SUCCESS
43 | -rw-r--r-- 1 username supergroup 51 2013-01-10 14:10 /tmp/output/part-m-00000
44 |
45 | We can see the output of our little map-reduce job:
46 |
47 | $ hdfs dfs -cat /tmp/output/part-m-00000
48 | dogrow dogcf:dogcq [] 1357844987994 false dogvalue
49 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/accumulo/examples/constraints/NumericValueConstraintTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.constraints;
18 |
19 | import static org.junit.jupiter.api.Assertions.assertEquals;
20 | import static org.junit.jupiter.api.Assertions.assertNull;
21 |
22 | import org.apache.accumulo.core.data.Mutation;
23 | import org.apache.accumulo.core.data.Value;
24 | import org.junit.jupiter.api.Test;
25 |
26 | import com.google.common.collect.Iterables;
27 |
28 | public class NumericValueConstraintTest {
29 |
30 | private final NumericValueConstraint nvc = new NumericValueConstraint();
31 |
32 | @Test
33 | public void testCheck() {
34 | Mutation goodMutation = new Mutation("r");
35 | goodMutation.put("cf", "cq", new Value("1234".getBytes()));
36 | assertNull(nvc.check(null, goodMutation));
37 |
38 | // Check that multiple bad mutations result in one violation only
39 | Mutation badMutation = new Mutation("r");
40 | badMutation.put("cf", "cq", new Value("foo1234".getBytes()));
41 | badMutation.put("cf2", "cq2", new Value("foo1234".getBytes()));
42 | assertEquals(NumericValueConstraint.NON_NUMERIC_VALUE,
43 | Iterables.getOnlyElement(nvc.check(null, badMutation)).shortValue());
44 | }
45 |
46 | @Test
47 | public void testGetViolationDescription() {
48 | assertEquals(NumericValueConstraint.VIOLATION_MESSAGE,
49 | nvc.getViolationDescription(NumericValueConstraint.NON_NUMERIC_VALUE));
50 | assertNull(nvc.getViolationDescription((short) 2));
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/Common.java:
--------------------------------------------------------------------------------
1 | package org.apache.accumulo.examples;
2 |
3 | import org.apache.accumulo.core.client.AccumuloClient;
4 | import org.apache.accumulo.core.client.AccumuloException;
5 | import org.apache.accumulo.core.client.AccumuloSecurityException;
6 | import org.apache.accumulo.core.client.NamespaceExistsException;
7 | import org.apache.accumulo.core.client.TableExistsException;
8 | import org.apache.accumulo.core.client.admin.NewTableConfiguration;
9 | import org.slf4j.Logger;
10 | import org.slf4j.LoggerFactory;
11 |
12 | public class Common {
13 |
14 | private static final Logger log = LoggerFactory.getLogger(Common.class);
15 |
16 | public static final String NAMESPACE = "examples";
17 |
18 | public static final String TABLE_EXISTS_MSG = "Table already exists. User may wish to delete "
19 | + "existing table and re-run example. Table name: ";
20 | public static final String NAMESPACE_EXISTS_MSG = "Namespace already exists. User can ignore "
21 | + "this message and continue. Namespace: ";
22 |
23 | /**
24 | * Create a table within the supplied namespace.
25 | *
26 | * The incoming table name is expected to have the form "namespace.tablename". If the namespace
27 | * portion of the name is blank then the table is created outside of a namespace.
28 | *
29 | * @param client
30 | * AccumuloClient instance
31 | * @param table
32 | * The name of the table to be created
33 | */
34 | public static void createTableWithNamespace(final AccumuloClient client, final String table)
35 | throws AccumuloException, AccumuloSecurityException {
36 | createTableWithNamespace(client, table, new NewTableConfiguration());
37 | }
38 |
39 | public static void createTableWithNamespace(final AccumuloClient client, final String table,
40 | final NewTableConfiguration newTableConfig)
41 | throws AccumuloException, AccumuloSecurityException {
42 | String[] name = table.split("\\.");
43 | if (name.length == 2 && !name[0].isEmpty()) {
44 | try {
45 | client.namespaceOperations().create(name[0]);
46 | } catch (NamespaceExistsException e) {
47 | log.info(NAMESPACE_EXISTS_MSG + name[0]);
48 | }
49 | }
50 | try {
51 | client.tableOperations().create(table, newTableConfig);
52 | } catch (TableExistsException e) {
53 | log.warn(TABLE_EXISTS_MSG + table);
54 | }
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/helloworld/Read.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.helloworld;
18 |
19 | import java.util.Map.Entry;
20 |
21 | import org.apache.accumulo.core.client.Accumulo;
22 | import org.apache.accumulo.core.client.AccumuloClient;
23 | import org.apache.accumulo.core.client.Scanner;
24 | import org.apache.accumulo.core.client.TableNotFoundException;
25 | import org.apache.accumulo.core.data.Key;
26 | import org.apache.accumulo.core.data.Range;
27 | import org.apache.accumulo.core.data.Value;
28 | import org.apache.accumulo.core.security.Authorizations;
29 | import org.apache.accumulo.examples.cli.ClientOpts;
30 | import org.slf4j.Logger;
31 | import org.slf4j.LoggerFactory;
32 |
33 | /**
34 | * Reads all data between two rows
35 | */
36 | public class Read {
37 |
38 | private static final Logger log = LoggerFactory.getLogger(Read.class);
39 |
40 | public static void main(String[] args) throws TableNotFoundException {
41 | ClientOpts opts = new ClientOpts();
42 | opts.parseArgs(Read.class.getName(), args);
43 |
44 | try (AccumuloClient client = Accumulo.newClient().from(opts.getClientPropsPath()).build();
45 | Scanner scan = client.createScanner(Insert.HELLO_TABLE, Authorizations.EMPTY)) {
46 | scan.setRange(new Range(new Key("row_0"), new Key("row_1002")));
47 | for (Entry e : scan) {
48 | Key key = e.getKey();
49 | log.trace(key.getRow() + " " + key.getColumnFamily() + " " + key.getColumnQualifier() + " "
50 | + e.getValue());
51 | }
52 | log.info("Scan complete");
53 | }
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/filedata/KeyUtil.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.filedata;
18 |
19 | import java.util.ArrayList;
20 |
21 | import org.apache.hadoop.io.Text;
22 |
23 | /**
24 | * A utility for creating and parsing null-byte separated strings into/from Text objects.
25 | */
26 | public class KeyUtil {
27 | public static final byte[] nullbyte = new byte[] {0};
28 |
29 | /**
30 | * Join some number of strings using a null byte separator into a text object.
31 | *
32 | * @param s
33 | * strings
34 | * @return a text object containing the strings separated by null bytes
35 | */
36 | public static Text buildNullSepText(String... s) {
37 | Text t = new Text(s[0]);
38 | for (int i = 1; i < s.length; i++) {
39 | t.append(nullbyte, 0, 1);
40 | t.append(s[i].getBytes(), 0, s[i].length());
41 | }
42 | return t;
43 | }
44 |
45 | /**
46 | * Split a text object using a null byte separator into an array of strings.
47 | *
48 | * @param t
49 | * null-byte separated text object
50 | * @return an array of strings
51 | */
52 | public static String[] splitNullSepText(Text t) {
53 | ArrayList s = new ArrayList<>();
54 | byte[] b = t.getBytes();
55 | int lastindex = 0;
56 | for (int i = 0; i < t.getLength(); i++) {
57 | if (b[i] == (byte) 0) {
58 | s.add(new String(b, lastindex, i - lastindex));
59 | lastindex = i + 1;
60 | }
61 | }
62 | s.add(new String(b, lastindex, t.getLength() - lastindex));
63 | return s.toArray(new String[s.size()]);
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/docs/constraints.md:
--------------------------------------------------------------------------------
1 |
17 | # Apache Accumulo Constraints Example
18 |
19 | This tutorial uses the following Java classes, which can be found in org.apache.accumulo.examples.constraints:
20 |
21 | * [AlphaNumKeyConstraint.java] - a constraint that requires alphanumeric keys
22 | * [NumericValueConstraint.java] - a constraint that requires numeric string values
23 | * [MaxMutationSize.java] - a constraint that limits the size of mutations accepted into a table
24 |
25 | AlphaNumKeyConstraint prevents insertion of keys with characters not between aA and zZ or 0 to 9.
26 | NumericValueConstraint prevents insertion of values with characters not between 0 and 9. The examples create mutations
27 | that violate the constraint, throwing an exception.
28 |
29 | $ ./bin/runex constraints.AlphaNumKeyConstraint
30 | $ ./bin/runex constraints.NumericValueConstraint
31 |
32 | The MaxMutationSize constraint will force the table to reject any mutation that is larger than 1/256th of the
33 | working memory of the tablet server. The following example attempts to ingest a single row with a million columns,
34 | which exceeds the memory limit. Depending on the amount of Java heap your tserver(s) are given, you may have to
35 | increase the number of columns provided to see the failure.
36 |
37 | $ ./bin/runex constraints.MaxMutationSize
38 |
39 | [AlphaNumKeyConstraint.java]: ../src/main/java/org/apache/accumulo/examples/constraints/AlphaNumKeyConstraint.java
40 | [NumericValueConstraint.java]: ../src/main/java/org/apache/accumulo/examples/constraints/NumericValueConstraint.java
41 | [MaxMutationSize.java]: ../src/main/java/org/apache/accumulo/examples/constraints/MaxMutationSize.java
42 |
43 |
44 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/client/CountingVerifyingReceiver.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.client;
18 |
19 | import static java.nio.charset.StandardCharsets.UTF_8;
20 |
21 | import java.util.Arrays;
22 | import java.util.HashMap;
23 |
24 | import org.apache.accumulo.core.data.Key;
25 | import org.apache.accumulo.core.data.Value;
26 | import org.slf4j.Logger;
27 | import org.slf4j.LoggerFactory;
28 |
29 | /**
30 | * Internal class used to verify validity of data read.
31 | */
32 | class CountingVerifyingReceiver {
33 | private static final Logger log = LoggerFactory.getLogger(CountingVerifyingReceiver.class);
34 |
35 | long count = 0;
36 | int expectedValueSize = 0;
37 | final HashMap expectedRows;
38 |
39 | CountingVerifyingReceiver(HashMap expectedRows, int expectedValueSize) {
40 | this.expectedRows = expectedRows;
41 | this.expectedValueSize = expectedValueSize;
42 | }
43 |
44 | public void receive(Key key, Value value) {
45 |
46 | String row = key.getRow().toString();
47 | long rowid = Integer.parseInt(row.split("_")[1]);
48 |
49 | byte[] expectedValue = RandomBatchWriter.createValue(rowid, expectedValueSize);
50 |
51 | if (!Arrays.equals(expectedValue, value.get())) {
52 | log.error("Got unexpected value for " + key + " expected : "
53 | + new String(expectedValue, UTF_8) + " got : " + new String(value.get(), UTF_8));
54 | }
55 |
56 | if (!expectedRows.containsKey(key.getRow().toString())) {
57 | log.error("Got unexpected key " + key);
58 | } else {
59 | expectedRows.put(key.getRow().toString(), true);
60 | }
61 |
62 | count++;
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/mapreduce/bulk/SetupTable.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.mapreduce.bulk;
18 |
19 | import java.util.TreeSet;
20 | import java.util.stream.Collectors;
21 | import java.util.stream.Stream;
22 |
23 | import org.apache.accumulo.core.client.Accumulo;
24 | import org.apache.accumulo.core.client.AccumuloClient;
25 | import org.apache.accumulo.core.client.AccumuloException;
26 | import org.apache.accumulo.core.client.AccumuloSecurityException;
27 | import org.apache.accumulo.core.client.TableNotFoundException;
28 | import org.apache.accumulo.core.client.admin.NewTableConfiguration;
29 | import org.apache.accumulo.examples.Common;
30 | import org.apache.accumulo.examples.cli.ClientOpts;
31 | import org.apache.hadoop.io.Text;
32 |
33 | public final class SetupTable {
34 |
35 | static final String BULK_INGEST_TABLE = Common.NAMESPACE + ".test_bulk";
36 |
37 | private SetupTable() {}
38 |
39 | public static void main(String[] args)
40 | throws AccumuloSecurityException, TableNotFoundException, AccumuloException {
41 |
42 | final Stream splits = Stream.of("row_00000333", "row_00000666");
43 | ClientOpts opts = new ClientOpts();
44 | opts.parseArgs(SetupTable.class.getName(), args);
45 |
46 | try (AccumuloClient client = Accumulo.newClient().from(opts.getClientPropsPath()).build()) {
47 | // create a table with initial partitions
48 | TreeSet initialPartitions = splits.map(Text::new)
49 | .collect(Collectors.toCollection(TreeSet::new));
50 | Common.createTableWithNamespace(client, BULK_INGEST_TABLE,
51 | new NewTableConfiguration().withSplits(initialPartitions));
52 | }
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/docs/isolation.md:
--------------------------------------------------------------------------------
1 |
17 | # Apache Accumulo Isolation Example
18 |
19 | Accumulo has an isolated scanner that ensures partial changes to rows are not
20 | seen. Isolation is documented in ../docs/isolation.html and the user manual.
21 |
22 | InterferenceTest is a simple example that shows the effects of scanning with
23 | and without isolation. This program starts two threads. One thread
24 | continually updates all the values in a row to be the same thing, but
25 | different from what it used to be. The other thread continually scans the
26 | table and checks that all values in a row are the same. Without isolation the
27 | scanning thread will sometimes see different values, which is the result of
28 | reading the row at the same time a mutation is changing the row.
29 |
30 | Below, Interference Test is run without isolation enabled for 5000 iterations
31 | and it reports problems.
32 |
33 |
34 | $ accumulo shell -u -p -e 'createnamespace examples'
35 | $ ./bin/runex isolation.InterferenceTest -t examples.isotest --iterations 50000
36 | ERROR Columns in row 053 had multiple values [53, 4553]
37 | ERROR Columns in row 061 had multiple values [561, 61]
38 | ERROR Columns in row 070 had multiple values [570, 1070]
39 | ERROR Columns in row 079 had multiple values [1079, 1579]
40 | ERROR Columns in row 088 had multiple values [2588, 1588]
41 | ERROR Columns in row 106 had multiple values [2606, 3106]
42 | ERROR Columns in row 115 had multiple values [4615, 3115]
43 | finished
44 |
45 | Below, Interference Test is run with isolation enabled for 5000 iterations and
46 | it reports no problems.
47 |
48 | $ ./bin/runex isolation.InterferenceTest -t examples.isotest --iterations 50000 --isolated
49 | finished
50 |
51 |
52 |
--------------------------------------------------------------------------------
/.github/workflows/maven.yaml:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing,
13 | # software distributed under the License is distributed on an
14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | # KIND, either express or implied. See the License for the
16 | # specific language governing permissions and limitations
17 | # under the License.
18 | #
19 |
20 | # This workflow will build a Java project with Maven
21 | # For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-maven
22 |
23 | name: QA
24 |
25 | on:
26 | push:
27 | branches: [ '*' ]
28 | pull_request:
29 | branches: [ '*' ]
30 |
31 | jobs:
32 | mvn:
33 | strategy:
34 | matrix:
35 | profile:
36 | - {name: 'verify', args: 'verify'}
37 | fail-fast: false
38 | timeout-minutes: 60
39 | runs-on: ubuntu-latest
40 | steps:
41 | - uses: actions/checkout@v4
42 | - name: Set up JDK 17
43 | uses: actions/setup-java@v4
44 | with:
45 | distribution: adopt
46 | java-version: 17
47 | cache: 'maven'
48 | - name: Build with Maven (${{ matrix.profile.name }})
49 | run: mvn -B -V -e -ntp "-Dstyle.color=always" ${{ matrix.profile.args }}
50 | env:
51 | MAVEN_OPTS: -Djansi.force=true
52 | - name: Upload unit test results
53 | if: ${{ failure() }}
54 | uses: actions/upload-artifact@v4
55 | with:
56 | name: surefire-reports-${{ matrix.profile.name }}
57 | path: ./**/target/surefire-reports/
58 | if-no-files-found: ignore
59 | - name: Upload integration test results
60 | if: ${{ failure() }}
61 | uses: actions/upload-artifact@v4
62 | with:
63 | name: failsafe-reports-${{ matrix.profile.name }}
64 | path: ./**/target/failsafe-reports/
65 | if-no-files-found: ignore
66 | - name: Upload mini test logs
67 | if: ${{ failure() }}
68 | uses: actions/upload-artifact@v4
69 | with:
70 | name: mini-tests-logs-${{ matrix.profile.name }}
71 | path: ./**/target/**/mini-tests/**/logs/
72 | if-no-files-found: ignore
73 |
74 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/accumulo/examples/constraints/AlphaNumKeyConstraintTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.constraints;
18 |
19 | import static org.junit.jupiter.api.Assertions.assertEquals;
20 | import static org.junit.jupiter.api.Assertions.assertNull;
21 |
22 | import org.apache.accumulo.core.data.Mutation;
23 | import org.apache.accumulo.core.data.Value;
24 | import org.junit.jupiter.api.Test;
25 |
26 | import com.google.common.collect.ImmutableList;
27 |
28 | public class AlphaNumKeyConstraintTest {
29 |
30 | private final AlphaNumKeyConstraint ankc = new AlphaNumKeyConstraint();
31 |
32 | @Test
33 | public void test() {
34 | Mutation goodMutation = new Mutation("Row1");
35 | goodMutation.put("Colf2", "ColQ3", new Value("value".getBytes()));
36 | assertNull(ankc.check(null, goodMutation));
37 |
38 | // Check that violations are in row, cf, cq order
39 | Mutation badMutation = new Mutation("Row#1");
40 | badMutation.put("Colf$2", "Colq%3", new Value("value".getBytes()));
41 | assertEquals(
42 | ImmutableList.of(AlphaNumKeyConstraint.NON_ALPHA_NUM_ROW,
43 | AlphaNumKeyConstraint.NON_ALPHA_NUM_COLF, AlphaNumKeyConstraint.NON_ALPHA_NUM_COLQ),
44 | ankc.check(null, badMutation));
45 | }
46 |
47 | @Test
48 | public void testGetViolationDescription() {
49 | assertEquals(AlphaNumKeyConstraint.ROW_VIOLATION_MESSAGE,
50 | ankc.getViolationDescription(AlphaNumKeyConstraint.NON_ALPHA_NUM_ROW));
51 | assertEquals(AlphaNumKeyConstraint.COLF_VIOLATION_MESSAGE,
52 | ankc.getViolationDescription(AlphaNumKeyConstraint.NON_ALPHA_NUM_COLF));
53 | assertEquals(AlphaNumKeyConstraint.COLQ_VIOLATION_MESSAGE,
54 | ankc.getViolationDescription(AlphaNumKeyConstraint.NON_ALPHA_NUM_COLQ));
55 | assertNull(ankc.getViolationDescription((short) 4));
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/shard/Reverse.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.shard;
18 |
19 | import java.util.Map.Entry;
20 |
21 | import org.apache.accumulo.core.client.Accumulo;
22 | import org.apache.accumulo.core.client.AccumuloClient;
23 | import org.apache.accumulo.core.client.BatchWriter;
24 | import org.apache.accumulo.core.client.Scanner;
25 | import org.apache.accumulo.core.data.Key;
26 | import org.apache.accumulo.core.data.Mutation;
27 | import org.apache.accumulo.core.data.Value;
28 | import org.apache.accumulo.core.security.Authorizations;
29 | import org.apache.accumulo.examples.cli.ClientOpts;
30 | import org.apache.hadoop.io.Text;
31 |
32 | import com.beust.jcommander.Parameter;
33 |
34 | /**
35 | * The program reads an accumulo table written by {@link Index} and writes out to another table. It
36 | * writes out a mapping of documents to terms. The document to term mapping is used by
37 | * {@link ContinuousQuery}.
38 | */
39 | public class Reverse {
40 |
41 | static class Opts extends ClientOpts {
42 |
43 | @Parameter(names = "--shardTable", description = "name of the shard table")
44 | String shardTable;
45 |
46 | @Parameter(names = "--doc2Term", description = "name of the doc2Term table")
47 | String doc2TermTable;
48 | }
49 |
50 | public static void main(String[] args) throws Exception {
51 | Opts opts = new Opts();
52 | opts.parseArgs(Reverse.class.getName(), args);
53 |
54 | try (AccumuloClient client = Accumulo.newClient().from(opts.getClientPropsPath()).build();
55 | Scanner scanner = client.createScanner(opts.shardTable, Authorizations.EMPTY);
56 | BatchWriter bw = client.createBatchWriter(opts.doc2TermTable)) {
57 | for (Entry entry : scanner) {
58 | Key key = entry.getKey();
59 | Mutation m = new Mutation(key.getColumnQualifier());
60 | m.put(key.getColumnFamily(), new Text(), new Value(new byte[0]));
61 | bw.addMutation(m);
62 | }
63 | }
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/docs/terasort.md:
--------------------------------------------------------------------------------
1 |
17 | # Apache Accumulo Terasort Example
18 |
19 | This example uses map/reduce to generate random input data that will
20 | be sorted by storing it into accumulo. It uses data very similar to the
21 | hadoop terasort benchmark.
22 |
23 | First, make sure the 'examples' namespace exists. If it already exists, the error message can be
24 | ignored.
25 |
26 | $ accumulo shell -u root -p secret -e 'createnamespace examples'
27 |
28 | This example is run with arguments describing the amount of data:
29 |
30 | $ ./bin/runmr mapreduce.TeraSortIngest --count 10 --minKeySize 10 --maxKeySize 10 \
31 | --minValueSize 78 --maxValueSize 78 --table examples.sort --splits 10
32 |
33 | After the map reduce job completes, scan the data:
34 |
35 | $ accumulo shell
36 | username@instance> scan -t examples.sort
37 | +l-$$OE/ZH c: 4 [] GGGGGGGGGGWWWWWWWWWWMMMMMMMMMMCCCCCCCCCCSSSSSSSSSSIIIIIIIIIIYYYYYYYYYYOOOOOOOO
38 | ,C)wDw//u= c: 10 [] CCCCCCCCCCSSSSSSSSSSIIIIIIIIIIYYYYYYYYYYOOOOOOOOOOEEEEEEEEEEUUUUUUUUUUKKKKKKKK
39 | 75@~?'WdUF c: 1 [] IIIIIIIIIIYYYYYYYYYYOOOOOOOOOOEEEEEEEEEEUUUUUUUUUUKKKKKKKKKKAAAAAAAAAAQQQQQQQQ
40 | ;L+!2rT~hd c: 8 [] MMMMMMMMMMCCCCCCCCCCSSSSSSSSSSIIIIIIIIIIYYYYYYYYYYOOOOOOOOOOEEEEEEEEEEUUUUUUUU
41 | LsS8)|.ZLD c: 5 [] OOOOOOOOOOEEEEEEEEEEUUUUUUUUUUKKKKKKKKKKAAAAAAAAAAQQQQQQQQQQGGGGGGGGGGWWWWWWWW
42 | M^*dDE;6^< c: 9 [] UUUUUUUUUUKKKKKKKKKKAAAAAAAAAAQQQQQQQQQQGGGGGGGGGGWWWWWWWWWWMMMMMMMMMMCCCCCCCC
43 | ^Eu)
17 | # Apache Accumulo File System Archive Example (Data Only)
18 |
19 | This example archives file data into an Accumulo table. Files with duplicate data are only stored once.
20 | The example has the following classes:
21 |
22 | * CharacterHistogram - A MapReduce that computes a histogram of byte frequency for each file and stores the histogram alongside the file data. An example use of the ChunkInputFormat.
23 | * ChunkCombiner - An Iterator that dedupes file data and sets their visibilities to a combined visibility based on current references to the file data.
24 | * ChunkInputFormat - An Accumulo InputFormat that provides keys containing file info (List>) and values with an InputStream over the file (ChunkInputStream).
25 | * ChunkInputStream - An input stream over file data stored in Accumulo.
26 | * FileDataIngest - Takes a list of files and archives them into Accumulo keyed on hashes of the files.
27 | * FileDataQuery - Retrieves file data based on the hash of the file. (Used by the dirlist.Viewer.)
28 | * KeyUtil - A utility for creating and parsing null-byte separated strings into/from Text objects.
29 | * VisibilityCombiner - A utility for merging visibilities into the form (VIS1)|(VIS2)|...
30 |
31 | This example is coupled with the [dirlist example][dirlist].
32 |
33 | If you haven't already run the [dirlist example][dirlist], ingest a file with FileDataIngest.
34 |
35 | $ ./bin/runex filedata.FileDataIngest -t examples.dataTable --auths exampleVis --chunk 1000 /path/to/accumulo/README.md
36 |
37 | Open the accumulo shell and look at the data. The row is the MD5 hash of the file, which you can
38 | verify by running a command such as 'md5sum' on the file. Note that in order to scan the
39 | examples.dataTable the class, org.apache.accumulo.examples.filedata.ChunkCombiner, must be in
40 | your classpath, or the accumulo-examples-shaded.jar should be moved to the accumulo lib directory.
41 |
42 | > scan -t examples.dataTable
43 |
44 | Run the CharacterHistogram MapReduce to add some information about the file.
45 |
46 | $ ./bin/runmr filedata.CharacterHistogram -t examples.dataTable --auths exampleVis --vis exampleVis
47 |
48 | Scan again to see the histogram stored in the 'info' column family.
49 |
50 | > scan -t examples.dataTable
51 |
52 | [dirlist]: dirlist.md
53 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/filedata/FileDataQuery.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.filedata;
18 |
19 | import java.io.IOException;
20 | import java.util.ArrayList;
21 | import java.util.List;
22 | import java.util.Map.Entry;
23 |
24 | import org.apache.accumulo.core.client.AccumuloClient;
25 | import org.apache.accumulo.core.client.Scanner;
26 | import org.apache.accumulo.core.client.TableNotFoundException;
27 | import org.apache.accumulo.core.data.Key;
28 | import org.apache.accumulo.core.data.Range;
29 | import org.apache.accumulo.core.data.Value;
30 | import org.apache.accumulo.core.security.Authorizations;
31 |
32 | import com.google.common.collect.Iterators;
33 | import com.google.common.collect.PeekingIterator;
34 |
35 | /**
36 | * Retrieves file data based on the hash of the file. Used by the
37 | * {@link org.apache.accumulo.examples.dirlist.Viewer}. See README.dirlist for instructions.
38 | */
39 | public class FileDataQuery {
40 | final List> lastRefs;
41 | private final ChunkInputStream cis;
42 | Scanner scanner;
43 |
44 | public FileDataQuery(AccumuloClient client, String tableName, Authorizations auths)
45 | throws TableNotFoundException {
46 | lastRefs = new ArrayList<>();
47 | cis = new ChunkInputStream();
48 | scanner = client.createScanner(tableName, auths);
49 | }
50 |
51 | public List> getLastRefs() {
52 | return lastRefs;
53 | }
54 |
55 | public ChunkInputStream getData(String hash) throws IOException {
56 | scanner.setRange(new Range(hash));
57 | scanner.setBatchSize(1);
58 | lastRefs.clear();
59 | PeekingIterator> pi = Iterators.peekingIterator(scanner.iterator());
60 | if (pi.hasNext()) {
61 | while (!pi.peek().getKey().getColumnFamily().equals(FileDataIngest.CHUNK_CF)) {
62 | lastRefs.add(pi.peek());
63 | pi.next();
64 | }
65 | }
66 | cis.clear();
67 | cis.setSource(pi);
68 | return cis;
69 | }
70 |
71 | public String getSomeData(String hash, int numBytes) throws IOException {
72 | ChunkInputStream is = getData(hash);
73 | byte[] buf = new byte[numBytes];
74 | if (is.read(buf) >= 0) {
75 | return new String(buf);
76 | } else {
77 | return "";
78 | }
79 | }
80 | }
81 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/bloom/BloomFiltersNotFound.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.bloom;
18 |
19 | import static org.apache.accumulo.examples.bloom.BloomFilters.writeData;
20 |
21 | import java.util.Map;
22 |
23 | import org.apache.accumulo.core.client.Accumulo;
24 | import org.apache.accumulo.core.client.AccumuloClient;
25 | import org.apache.accumulo.core.client.AccumuloException;
26 | import org.apache.accumulo.core.client.AccumuloSecurityException;
27 | import org.apache.accumulo.core.client.TableNotFoundException;
28 | import org.apache.accumulo.core.client.admin.NewTableConfiguration;
29 | import org.apache.accumulo.examples.Common;
30 | import org.apache.accumulo.examples.cli.ClientOpts;
31 | import org.slf4j.Logger;
32 | import org.slf4j.LoggerFactory;
33 |
34 | public class BloomFiltersNotFound {
35 |
36 | private static final Logger log = LoggerFactory.getLogger(BloomFiltersNotFound.class);
37 |
38 | public static void main(String[] args)
39 | throws AccumuloException, AccumuloSecurityException, TableNotFoundException {
40 | ClientOpts opts = new ClientOpts();
41 | opts.parseArgs(BloomFiltersNotFound.class.getName(), args);
42 |
43 | try (AccumuloClient client = Accumulo.newClient().from(opts.getClientPropsPath()).build()) {
44 | Map props = Map.of(BloomCommon.BLOOM_ENABLED_PROPERTY, "true");
45 | var newTableConfig = new NewTableConfiguration().setProperties(props);
46 |
47 | Common.createTableWithNamespace(client, BloomCommon.BLOOM_TEST3_TABLE);
48 | Common.createTableWithNamespace(client, BloomCommon.BLOOM_TEST4_TABLE, newTableConfig);
49 |
50 | writeAndFlush(BloomCommon.BLOOM_TEST3_TABLE, client);
51 | writeAndFlush(BloomCommon.BLOOM_TEST4_TABLE, client);
52 |
53 | BloomBatchScanner.scan(client, BloomCommon.BLOOM_TEST3_TABLE, 8);
54 | BloomBatchScanner.scan(client, BloomCommon.BLOOM_TEST4_TABLE, 8);
55 | }
56 | }
57 |
58 | private static void writeAndFlush(String tableName, AccumuloClient client)
59 | throws TableNotFoundException, AccumuloException, AccumuloSecurityException {
60 | log.info("Writing data to {} (bloom filters enabled)", tableName);
61 | writeData(client, tableName, 7);
62 | client.tableOperations().flush(tableName, null, null, true);
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/docs/batch.md:
--------------------------------------------------------------------------------
1 |
17 | # Apache Accumulo Batch Writing and Scanning Example
18 |
19 | This is an example of how to use the BatchWriter and BatchScanner.
20 |
21 | This tutorial uses the following Java classes.
22 |
23 | * [SequentialBatchWriter.java] - writes mutations with sequential rows and random values
24 | * [RandomBatchScanner.java] - reads random rows and verifies their values
25 |
26 | Run `SequentialBatchWriter` to add 10000 entries with random 50 bytes values to Accumulo.
27 |
28 | $ ./bin/runex client.SequentialBatchWriter
29 |
30 | Verify data was ingested by scanning the table using the Accumulo shell:
31 |
32 | $ accumulo shell
33 | root@instance> table examples.batch
34 | root@instance examples.batch> scan
35 |
36 | Run `RandomBatchScanner` to perform 1000 random queries and verify the results.
37 |
38 | $ ./bin/runex client.RandomBatchScanner
39 | 16:04:05,950 [examples.client.RandomBatchScanner] INFO : Generating 1000 random ranges for BatchScanner to read
40 | 16:04:06,020 [examples.client.RandomBatchScanner] INFO : Reading ranges using BatchScanner
41 | 16:04:06,283 [examples.client.RandomBatchScanner] TRACE: 100 lookups
42 | 16:04:06,290 [examples.client.RandomBatchScanner] TRACE: 200 lookups
43 | 16:04:06,294 [examples.client.RandomBatchScanner] TRACE: 300 lookups
44 | 16:04:06,297 [examples.client.RandomBatchScanner] TRACE: 400 lookups
45 | 16:04:06,301 [examples.client.RandomBatchScanner] TRACE: 500 lookups
46 | 16:04:06,304 [examples.client.RandomBatchScanner] TRACE: 600 lookups
47 | 16:04:06,307 [examples.client.RandomBatchScanner] TRACE: 700 lookups
48 | 16:04:06,309 [examples.client.RandomBatchScanner] TRACE: 800 lookups
49 | 16:04:06,316 [examples.client.RandomBatchScanner] TRACE: 900 lookups
50 | 16:04:06,320 [examples.client.RandomBatchScanner] TRACE: 1000 lookups
51 | 16:04:06,330 [examples.client.RandomBatchScanner] INFO : Scan finished! 3246.75 lookups/sec, 0.31 secs, 1000 results
52 | 16:04:06,331 [examples.client.RandomBatchScanner] INFO : All expected rows were scanned
53 |
54 | [SequentialBatchWriter.java]: ../src/main/java/org/apache/accumulo/examples/client/SequentialBatchWriter.java
55 | [RandomBatchWriter.java]: ../src/main/java/org/apache/accumulo/examples/client/RandomBatchWriter.java
56 | [RandomBatchScanner.java]: ../src/main/java/org/apache/accumulo/examples/client/RandomBatchScanner.java
57 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/filedata/VisibilityCombiner.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.filedata;
18 |
19 | import java.util.TreeSet;
20 |
21 | import org.apache.accumulo.core.data.ByteSequence;
22 |
23 | /**
24 | * A utility for merging visibilities into the form {@code (VIS1)|(VIS2)|...|(VISN)}. Used by the
25 | * {@link ChunkCombiner}.
26 | */
27 | public class VisibilityCombiner {
28 |
29 | private final TreeSet visibilities = new TreeSet<>();
30 |
31 | void add(ByteSequence cv) {
32 | if (cv.length() == 0)
33 | return;
34 |
35 | int depth = 0;
36 | int offset = 0;
37 |
38 | for (int i = 0; i < cv.length(); i++) {
39 | switch (cv.byteAt(i)) {
40 | case '(':
41 | depth++;
42 | break;
43 | case ')':
44 | depth--;
45 | if (depth < 0)
46 | throw new IllegalArgumentException("Invalid vis " + cv);
47 | break;
48 | case '|':
49 | if (depth == 0) {
50 | insert(cv.subSequence(offset, i));
51 | offset = i + 1;
52 | }
53 |
54 | break;
55 | }
56 | }
57 |
58 | insert(cv.subSequence(offset, cv.length()));
59 |
60 | if (depth != 0)
61 | throw new IllegalArgumentException("Invalid vis " + cv);
62 |
63 | }
64 |
65 | private void insert(ByteSequence cv) {
66 |
67 | String cvs = cv.toString();
68 |
69 | if (cvs.charAt(0) != '(')
70 | cvs = "(" + cvs + ")";
71 | else {
72 | int depth = 0;
73 | int depthZeroCloses = 0;
74 | for (int i = 0; i < cv.length(); i++) {
75 | switch (cv.byteAt(i)) {
76 | case '(':
77 | depth++;
78 | break;
79 | case ')':
80 | depth--;
81 | if (depth == 0)
82 | depthZeroCloses++;
83 | break;
84 | }
85 | }
86 |
87 | if (depthZeroCloses > 1)
88 | cvs = "(" + cvs + ")";
89 | }
90 |
91 | visibilities.add(cvs);
92 | }
93 |
94 | byte[] get() {
95 | StringBuilder sb = new StringBuilder();
96 | String sep = "";
97 | for (String cvs : visibilities) {
98 | sb.append(sep);
99 | sep = "|";
100 | sb.append(cvs);
101 | }
102 |
103 | return sb.toString().getBytes();
104 | }
105 | }
106 |
--------------------------------------------------------------------------------
/docs/wordcount.md:
--------------------------------------------------------------------------------
1 |
17 | # Apache Accumulo Word Count example
18 |
19 | The WordCount example ([WordCount.java]) uses MapReduce and Accumulo to compute
20 | word counts for a set of documents. This is accomplished using a map-only MapReduce
21 | job and an Accumulo table with combiners.
22 |
23 | To run this example, create a directory in HDFS containing text files. You can
24 | use the Accumulo README for data:
25 |
26 | $ hdfs dfs -mkdir /wc
27 | $ hdfs dfs -copyFromLocal /path/to/accumulo/README.md /wc/README.md
28 |
29 | Verify that the file was created:
30 |
31 | $ hdfs dfs -ls /wc
32 |
33 | After creating the table, run the WordCount MapReduce job with your HDFS input directory:
34 |
35 | $ ./bin/runmr mapreduce.WordCount -i /wc
36 |
37 | [WordCount.java] creates an Accumulo table named with a SummingCombiner iterator
38 | attached to it. It runs a map-only M/R job that reads the specified HDFS directory containing text files and
39 | writes word counts to Accumulo table.
40 |
41 | After the MapReduce job completes, query the Accumulo table to see word counts.
42 |
43 | $ accumulo shell
44 | username@instance> table examples.wordcount
45 | username@instance examples.wordcount> scan -b the
46 | the count:20080906 [] 75
47 | their count:20080906 [] 2
48 | them count:20080906 [] 1
49 | then count:20080906 [] 1
50 | ...
51 |
52 | When the WordCount MapReduce job was run above, the client properties were serialized
53 | into the MapReduce configuration. This is insecure if the properties contain sensitive
54 | information like passwords. A more secure option is store accumulo-client.properties
55 | in HDFS and run the job with the `-D` options. This will configure the MapReduce job
56 | to obtain the client properties from HDFS:
57 |
58 | $ hdfs dfs -mkdir /user
59 | $ hdfs dfs -mkdir /user/myuser
60 | $ hdfs dfs -copyFromLocal /path/to/accumulo/conf/accumulo-client.properties /user/myuser/
61 | $ ./bin/runmr mapreduce.WordCount -i /wc -t examples.wordcount2 -d /user/myuser/accumulo-client.properties
62 |
63 | After the MapReduce job completes, query the `examples.wordcount2` table. The results should
64 | be the same as before:
65 |
66 | $ accumulo shell
67 | username@instance> table examples.wordcount2
68 | username@instance examples.wordcount2> scan -b the
69 | the count:20080906 [] 75
70 | their count:20080906 [] 2
71 | ...
72 |
73 |
74 | [WordCount.java]: ../src/main/java/org/apache/accumulo/examples/mapreduce/WordCount.java
75 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/cli/ClientOpts.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.cli;
18 |
19 | import java.nio.file.Paths;
20 | import java.util.Properties;
21 |
22 | import org.apache.accumulo.core.client.Accumulo;
23 | import org.apache.accumulo.core.client.AccumuloClient;
24 | import org.apache.accumulo.core.security.Authorizations;
25 | import org.apache.accumulo.core.security.ColumnVisibility;
26 | import org.apache.hadoop.conf.Configuration;
27 |
28 | import com.beust.jcommander.IStringConverter;
29 | import com.beust.jcommander.Parameter;
30 |
31 | public class ClientOpts extends Help {
32 |
33 | public static class AuthConverter implements IStringConverter {
34 | @Override
35 | public Authorizations convert(String value) {
36 | return new Authorizations(value.split(","));
37 | }
38 | }
39 |
40 | public static class VisibilityConverter implements IStringConverter {
41 | @Override
42 | public ColumnVisibility convert(String value) {
43 | return new ColumnVisibility(value);
44 | }
45 | }
46 |
47 | @Parameter(names = {"-c", "--conf"}, description = "Path to accumulo-client.properties."
48 | + "If not set, defaults to path set by env variable ACCUMULO_CLIENT_PROPS.")
49 | private String propsPath = null;
50 |
51 | @Parameter(names = {"-auths", "--auths"}, converter = AuthConverter.class,
52 | description = "the authorizations to use when reading or writing")
53 | public Authorizations auths = Authorizations.EMPTY;
54 |
55 | private Properties cachedProps = null;
56 |
57 | public AccumuloClient createAccumuloClient() {
58 | return Accumulo.newClient().from(getClientPropsPath()).build();
59 | }
60 |
61 | public String getClientPropsPath() {
62 | if (propsPath == null) {
63 | propsPath = System.getenv("ACCUMULO_CLIENT_PROPS");
64 | if (propsPath == null) {
65 | throw new IllegalArgumentException("accumulo-client.properties must be set!");
66 | }
67 | if (!Paths.get(propsPath).toFile().exists()) {
68 | throw new IllegalArgumentException(propsPath + " does not exist!");
69 | }
70 | }
71 | return propsPath;
72 | }
73 |
74 | public Properties getClientProperties() {
75 | if (cachedProps == null) {
76 | cachedProps = Accumulo.newClientProperties().from(getClientPropsPath()).build();
77 | }
78 | return cachedProps;
79 | }
80 |
81 | public Configuration getHadoopConfig() {
82 | Configuration config = new Configuration();
83 | config.set("mapreduce.job.classloader", "true");
84 | return config;
85 | }
86 | }
87 |
--------------------------------------------------------------------------------
/docs/reservations.md:
--------------------------------------------------------------------------------
1 |
17 | # Apache Accumulo Reservations Example
18 |
19 | This example shows running a simple reservation system implemented using
20 | conditional mutations. This system guarantees that only one concurrent user can
21 | reserve a resource. The example's reserve command allows multiple users to be
22 | specified. When this is done, it creates a separate reservation thread for each
23 | user. In the example below, threads are spun up for alice, bob, eve, mallory,
24 | and trent to reserve room06 on 20140101. Bob ends up getting the reservation
25 | and everyone else is put on a wait list. The example code will take any string
26 | for what, when and who.
27 |
28 | $ /path/to/accumulo org.apache.accumulo.server.util.ListInstances
29 |
30 | Instance Name | Instance ID | Master
31 | ---------------------+--------------------------------------+-------------------------------
32 | | 9f8f2a97-432f-4e66-b153-861e2a1ca246 | localhost:9999
33 |
34 | $ /path/to/accumulo shell -u root -p secret -e "createnamespace examples"
35 | $ /path/to/accumulo shell -u root -p secret -e "createtable examples.ars"
36 | $ ./bin/runex reservations.ARS
37 | >connect localhost root secret examples.ars
38 | connected
39 | >
40 | Commands :
41 | reserve {who}
42 | cancel
43 | list
44 | >reserve room06 20140101 alice bob eve mallory trent
45 | bob : RESERVED
46 | mallory : WAIT_LISTED
47 | alice : WAIT_LISTED
48 | trent : WAIT_LISTED
49 | eve : WAIT_LISTED
50 | >list room06 20140101
51 | Reservation holder : bob
52 | Wait list : [mallory, alice, trent, eve]
53 | >cancel room06 20140101 alice
54 | >cancel room06 20140101 bob
55 | >list room06 20140101
56 | Reservation holder : mallory
57 | Wait list : [trent, eve]
58 | >quit
59 |
60 | Scanning the table in the Accumulo shell after running the example shows the
61 | following:
62 |
63 | root@test16> table examples.ars
64 | root@test16 examples.ars> scan
65 | room06:20140101 res:0001 [] mallory
66 | room06:20140101 res:0003 [] trent
67 | room06:20140101 res:0004 [] eve
68 | room06:20140101 tx:seq [] 6
69 |
70 | The tx:seq column is incremented for each update to the row allowing for
71 | detection of concurrent changes. For an update to go through, the sequence
72 | number must not have changed since the data was read. If it does change,
73 | the conditional mutation will fail and the example code will retry.
74 |
75 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/filedata/ChunkInputFormat.java:
--------------------------------------------------------------------------------
1 | /// *
2 | // * Licensed to the Apache Software Foundation (ASF) under one or more
3 | // * contributor license agreements. See the NOTICE file distributed with
4 | // * this work for additional information regarding copyright ownership.
5 | // * The ASF licenses this file to You under the Apache License, Version 2.0
6 | // * (the "License"); you may not use this file except in compliance with
7 | // * the License. You may obtain a copy of the License at
8 | // *
9 | // * http://www.apache.org/licenses/LICENSE-2.0
10 | // *
11 | // * Unless required by applicable law or agreed to in writing, software
12 | // * distributed under the License is distributed on an "AS IS" BASIS,
13 | // * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | // * See the License for the specific language governing permissions and
15 | // * limitations under the License.
16 | // */
17 | // package org.apache.accumulo.examples.filedata;
18 | //
19 | // import java.io.IOException;
20 | // import java.io.InputStream;
21 | // import java.util.ArrayList;
22 | // import java.util.List;
23 | // import java.util.Map.Entry;
24 | //
25 | // import org.apache.accumulo.core.data.Key;
26 | // import org.apache.accumulo.core.data.Value;
27 | // import org.apache.accumulo.examples.util.FormatUtil;
28 | // import org.apache.hadoop.mapreduce.InputSplit;
29 | // import org.apache.hadoop.mapreduce.RecordReader;
30 | // import org.apache.hadoop.mapreduce.TaskAttemptContext;
31 | //
32 | // import com.google.common.collect.Iterators;
33 | // import com.google.common.collect.PeekingIterator;
34 | //
35 | /// **
36 | // * An InputFormat that turns the file data ingested with {@link FileDataIngest} into an
37 | /// InputStream
38 | // * using {@link ChunkInputStream}. Mappers used with this InputFormat must close the InputStream.
39 | // */
40 | // @SuppressWarnings("deprecation")
41 | // public class ChunkInputFormat extends
42 | // org.apache.accumulo.core.client.mapreduce.InputFormatBase>,InputStream> {
43 | // @Override
44 | // public RecordReader>,InputStream> createRecordReader(InputSplit split,
45 | // TaskAttemptContext context) {
46 | // return new RecordReaderBase<>() {
47 | // private PeekingIterator> peekingScannerIterator;
48 | //
49 | // @Override
50 | // public void initialize(InputSplit inSplit, TaskAttemptContext attempt) throws IOException {
51 | // super.initialize(inSplit, attempt);
52 | // peekingScannerIterator = Iterators.peekingIterator(scannerIterator);
53 | // currentK = new ArrayList<>();
54 | // currentV = new ChunkInputStream();
55 | // }
56 | //
57 | // @Override
58 | // public boolean nextKeyValue() throws IOException {
59 | // log.debug("nextKeyValue called");
60 | //
61 | // currentK.clear();
62 | // if (peekingScannerIterator.hasNext()) {
63 | // ++numKeysRead;
64 | // Entry entry = peekingScannerIterator.peek();
65 | // while (!entry.getKey().getColumnFamily().equals(FileDataIngest.CHUNK_CF)) {
66 | // currentK.add(entry);
67 | // peekingScannerIterator.next();
68 | // if (!peekingScannerIterator.hasNext()) {
69 | // return true;
70 | // }
71 | // entry = peekingScannerIterator.peek();
72 | // }
73 | // currentKey = entry.getKey();
74 | // ((ChunkInputStream) currentV).setSource(peekingScannerIterator);
75 | // if (log.isTraceEnabled()) {
76 | // log.trace("Processing key/value pair: " + FormatUtil.formatTableEntry(entry, true));
77 | // }
78 | //
79 | // return true;
80 | // }
81 | // return false;
82 | // }
83 | // };
84 | // }
85 | // }
86 |
--------------------------------------------------------------------------------
/docs/uniquecols.md:
--------------------------------------------------------------------------------
1 |
17 | # Apache Accumulo Unique Columns example
18 |
19 | The UniqueColumns examples ([UniqueColumns.java]) computes the unique set
20 | of column family and column qualifiers in a table. It also demonstrates
21 | how a mapReduce job can directly read a tables files from HDFS.
22 |
23 | Create a table and add rows that all have identical column family and column
24 | qualifiers.
25 |
26 | ```
27 | $ /path/to/accumulo shell -u username -p secret
28 | username@instance> createnamespace examples
29 | username@instance> createtable examples.unique
30 | username@instance examples.unique> insert row1 fam1 qual1 v1
31 | username@instance examples.unique> insert row2 fam1 qual1 v2
32 | username@instance examples.unique> insert row3 fam1 qual1 v3
33 | ```
34 |
35 | Exit the Accumulo shell and run the uniqueColumns mapReduce job against
36 | this table. Note that if the output file already exists in HDFS, it will
37 | need to be deleted.
38 |
39 | ```
40 | $ ./bin/runmr mapreduce.UniqueColumns --table examples.unique --reducers 1 --output /tmp/unique
41 | ```
42 |
43 | When the mapReduce job completes, examine the output.
44 |
45 | ```
46 | $ hdfs dfs -cat /tmp/unique/part-r-00000
47 | cf:fam1
48 | cq:qual1
49 | ```
50 |
51 | The output displays the unique column family and column qualifier values. In
52 | this case since all rows use the same values, there are only two values output.
53 |
54 | Note that since the example used only one reducer all output will be contained
55 | within the single `part-r-00000` file. If more than one reducer is used the output
56 | will be spread among various `part-r-xxxxx` files.
57 |
58 | Go back to the shell and add some additional entries.
59 |
60 | ```text
61 | $ /path/to/accumulo shell -u username -p secret
62 | username@instance> table unique
63 | username@instance example.unique> insert row1 fam2 qual2 v2
64 | username@instance example.unique> insert row1 fam3 qual2 v2
65 | username@instance example.unique> insert row1 fam2 qual2 v2
66 | username@instance example.unique> insert row2 fam2 qual2 v2
67 | username@instance example.unique> insert row3 fam2 qual2 v2
68 | username@instance example.unique> insert row3 fam3 qual3 v2
69 | username@instance example.unique> insert row3 fam3 qual4 v2
70 | ```
71 |
72 | Re-running the command will now find any additional unique column values.
73 |
74 | ```text
75 | $ hdfs dfs -rm -r -f /tmp/unique
76 | $ ./bin/runmr mapreduce.UniqueColumns --table examples.unique --reducers 1 --output /tmp/unique
77 | $ hdfs dfs -cat /tmp/unique/part-r-00000
78 | cf:fam1
79 | cf:fam2
80 | cf:fam3
81 | cq:qual1
82 | cq:qual2
83 | cq:qual3
84 | cq:qual4
85 | ```
86 |
87 | The output now includes the additional column values that were added during the last batch of inserts.
88 |
89 |
90 | [UniqueColumns.java]: ../src/main/java/org/apache/accumulo/examples/mapreduce/UniqueColumns.java
91 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/client/ReadWriteExample.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.client;
18 |
19 | import java.util.Map.Entry;
20 |
21 | import org.apache.accumulo.core.client.Accumulo;
22 | import org.apache.accumulo.core.client.AccumuloClient;
23 | import org.apache.accumulo.core.client.AccumuloException;
24 | import org.apache.accumulo.core.client.AccumuloSecurityException;
25 | import org.apache.accumulo.core.client.BatchWriter;
26 | import org.apache.accumulo.core.client.Scanner;
27 | import org.apache.accumulo.core.client.TableNotFoundException;
28 | import org.apache.accumulo.core.data.Key;
29 | import org.apache.accumulo.core.data.Mutation;
30 | import org.apache.accumulo.core.data.Value;
31 | import org.apache.accumulo.core.security.Authorizations;
32 | import org.apache.accumulo.examples.Common;
33 | import org.apache.accumulo.examples.cli.ClientOpts;
34 | import org.slf4j.Logger;
35 | import org.slf4j.LoggerFactory;
36 |
37 | public class ReadWriteExample {
38 |
39 | private static final Logger log = LoggerFactory.getLogger(ReadWriteExample.class);
40 |
41 | private static final String READWRITE_TABLE = Common.NAMESPACE + ".readwrite";
42 |
43 | private ReadWriteExample() {}
44 |
45 | public static void main(String[] args) throws AccumuloSecurityException, AccumuloException {
46 | ClientOpts opts = new ClientOpts();
47 | opts.parseArgs(ReadWriteExample.class.getName(), args);
48 |
49 | try (AccumuloClient client = Accumulo.newClient().from(opts.getClientPropsPath()).build()) {
50 | Common.createTableWithNamespace(client, READWRITE_TABLE);
51 | // write data
52 | try (BatchWriter writer = client.createBatchWriter(READWRITE_TABLE)) {
53 | for (int i = 0; i < 10; i++) {
54 | Mutation m = new Mutation("hello" + i);
55 | m.put("cf", "cq", new Value("world" + i));
56 | writer.addMutation(m);
57 | }
58 | } catch (TableNotFoundException e) {
59 | log.error("Could not find table {}: {}", e.getTableName(), e.getMessage());
60 | System.exit(1);
61 | }
62 |
63 | // read data
64 | try (Scanner scanner = client.createScanner(READWRITE_TABLE, Authorizations.EMPTY)) {
65 | for (Entry entry : scanner) {
66 | log.info("{} -> {}", entry.getKey().toString(), entry.getValue().toString());
67 | }
68 | } catch (TableNotFoundException e) {
69 | log.error("Could not find table {}: {}", e.getTableName(), e.getMessage());
70 | System.exit(1);
71 | }
72 |
73 | // delete table
74 | try {
75 | client.tableOperations().delete(READWRITE_TABLE);
76 | } catch (TableNotFoundException e) {
77 | log.error("Unable to delete table '{}': {}", e.getTableName(), e.getMessage());
78 | }
79 | }
80 | }
81 | }
82 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/mapreduce/bulk/VerifyIngest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.mapreduce.bulk;
18 |
19 | import java.util.Iterator;
20 | import java.util.Map.Entry;
21 |
22 | import org.apache.accumulo.core.client.Accumulo;
23 | import org.apache.accumulo.core.client.AccumuloClient;
24 | import org.apache.accumulo.core.client.Scanner;
25 | import org.apache.accumulo.core.client.TableNotFoundException;
26 | import org.apache.accumulo.core.data.Key;
27 | import org.apache.accumulo.core.data.Range;
28 | import org.apache.accumulo.core.data.Value;
29 | import org.apache.accumulo.core.security.Authorizations;
30 | import org.apache.accumulo.examples.cli.ClientOpts;
31 | import org.slf4j.Logger;
32 | import org.slf4j.LoggerFactory;
33 |
34 | public final class VerifyIngest {
35 |
36 | private static final Logger log = LoggerFactory.getLogger(VerifyIngest.class);
37 | private static final String ROW_FORMAT = "row_%010d";
38 | private static final String VALUE_FORMAT = "value_%010d";
39 |
40 | private VerifyIngest() {}
41 |
42 | public static void main(String[] args) throws TableNotFoundException {
43 |
44 | ClientOpts opts = new ClientOpts();
45 | opts.parseArgs(VerifyIngest.class.getName(), args);
46 |
47 | try (AccumuloClient client = Accumulo.newClient().from(opts.getClientPropsPath()).build();
48 | Scanner scanner = client.createScanner(SetupTable.BULK_INGEST_TABLE,
49 | Authorizations.EMPTY)) {
50 |
51 | scanner.setRange(new Range(String.format(ROW_FORMAT, 0), null));
52 |
53 | Iterator> si = scanner.iterator();
54 |
55 | boolean ok = true;
56 |
57 | for (int i = 0; i < BulkIngestExample.numRows; i++) {
58 |
59 | if (si.hasNext()) {
60 | Entry entry = si.next();
61 |
62 | if (!entry.getKey().getRow().toString().equals(String.format(ROW_FORMAT, i))) {
63 | String formattedRow = String.format(ROW_FORMAT, i);
64 | log.error("unexpected row key {}; expected {}", entry.getKey().getRow(), formattedRow);
65 | ok = false;
66 | }
67 |
68 | if (!entry.getValue().toString().equals(String.format(VALUE_FORMAT, i))) {
69 | var formattedValue = String.format(VALUE_FORMAT, i);
70 | log.error("unexpected value {}; expected {}", entry.getValue(), formattedValue);
71 | ok = false;
72 | }
73 |
74 | } else {
75 | var formattedRow = String.format(ROW_FORMAT, i);
76 | log.error("no more rows, expected {}", formattedRow);
77 | ok = false;
78 | break;
79 | }
80 | }
81 |
82 | if (ok) {
83 | log.info("Data verification succeeded!");
84 | System.exit(0);
85 | } else {
86 | log.info("Data verification failed!");
87 | System.exit(1);
88 | }
89 | }
90 | }
91 | }
92 |
--------------------------------------------------------------------------------
/docs/classpath.md:
--------------------------------------------------------------------------------
1 |
17 | # Apache Accumulo Classpath Example
18 |
19 | This example shows how to use per table classpaths. The example leverages a
20 | test jar which contains a Filter that suppresses rows containing "foo". The
21 | example shows copying the FooFilter.jar into HDFS and then making an Accumulo
22 | table reference that jar. For this example, a directory, `/user1/lib`, is
23 | assumed to exist in HDFS.
24 |
25 | Create `/user1/lib` in HDFS if it does not exist.
26 |
27 | hadoop fs -mkdir -p /user1/lib
28 |
29 | Execute the following command in the shell. Note that the `FooFilter.jar`
30 | is located within the Accumulo source distribution.
31 |
32 | $ hadoop fs -copyFromLocal /path/to/accumulo/test/src/main/resources/org/apache/accumulo/test/FooFilter.jar /user1/lib
33 |
34 | Execute following in Accumulo shell to setup classpath context
35 |
36 | root@uno> config -s general.vfs.context.classpath.cx1=hdfs://:/user1/lib/[^.].*.jar
37 |
38 | Create a namespace and table
39 |
40 | root@uno> createnamespace examples
41 | root@uno> createtable examples.nofoo
42 |
43 | The following command makes this table use the configured classpath context
44 |
45 | root@uno examples.nofoo> config -t examples.nofoo -s table.class.loader.context=cx1
46 |
47 | The following command configures an iterator that's in FooFilter.jar
48 |
49 | root@uno examples.nofoo> setiter -n foofilter -p 10 -scan -minc -majc -class org.apache.accumulo.test.FooFilter
50 | Filter accepts or rejects each Key/Value pair
51 | ----------> set FooFilter parameter negate, default false keeps k/v that pass accept method, true rejects k/v that pass accept method: false
52 |
53 | The commands below show the filter is working.
54 |
55 | root@uno examples.nofoo> insert foo1 f1 q1 v1
56 | root@uno examples.nofoo> insert noo1 f1 q1 v2
57 | root@uno examples.nofoo> scan
58 | noo1 f1:q1 [] v2
59 | root@uno examples.nofoo>
60 |
61 | Below, an attempt is made to add the FooFilter to a table that's not configured
62 | to use the classpath context cx1. This fails until the table is configured to
63 | use cx1.
64 |
65 | root@uno examples.nofoo> createtable examples.nofootwo
66 | root@uno examples.nofootwo> setiter -n foofilter -p 10 -scan -minc -majc -class org.apache.accumulo.test.FooFilter
67 | 2013-05-03 12:49:35,943 [shell.Shell] ERROR: org.apache.accumulo.shell.ShellCommandException: Command could
68 | not be initialized (Unable to load org.apache.accumulo.test.FooFilter; class not found.)
69 | root@uno examples.nofootwo> config -t examples.nofootwo -s table.class.loader.context=cx1
70 | root@uno examples.nofootwo> setiter -n foofilter -p 10 -scan -minc -majc -class org.apache.accumulo.test.FooFilter
71 | Filter accepts or rejects each Key/Value pair
72 | ----------> set FooFilter parameter negate, default false keeps k/v that pass accept method, true rejects k/v that pass accept method: false
73 |
74 |
75 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/util/FormatUtil.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.util;
18 |
19 | import java.util.Map;
20 |
21 | import org.apache.accumulo.core.data.Key;
22 | import org.apache.accumulo.core.data.Value;
23 | import org.apache.accumulo.core.security.ColumnVisibility;
24 | import org.apache.hadoop.io.Text;
25 |
26 | public final class FormatUtil {
27 |
28 | /**
29 | * Format and return the specified table entry as a human-readable String suitable for logging.
30 | *
31 | * If {@code includeTimestamp} is true, the entry will be formatted as:
32 | * {@literal : \t}
33 | * If false, the entry will be formatted as:
34 | * {@literal : \t}
35 | * Examples:
36 | * {@literal a ~chunk:\x00\x00\x00d\x00\x00\x00\x00 [A&B] 9223372036854775807 asdfjkl;}
37 | * {@literal a ~chunk:\x00\x00\x00d\x00\x00\x00\x00 [A&B] asdfjkl;}
38 | *
39 | * @param entry
40 | * the table entry to format
41 | * @param includeTimestamp
42 | * if true, include the timestamp in the returned result
43 | * @return the specified entry as a formatted String, or null if the entry is null
44 | */
45 | public static String formatTableEntry(final Map.Entry entry,
46 | final boolean includeTimestamp) {
47 | if (entry == null) {
48 | return null;
49 | }
50 |
51 | Key key = entry.getKey();
52 | StringBuilder sb = new StringBuilder();
53 | Text buffer = new Text();
54 |
55 | // Append row.
56 | appendBytes(sb, key.getRow(buffer).getBytes()).append(" ");
57 |
58 | // Append column family.
59 | appendBytes(sb, key.getColumnFamily().getBytes()).append(":");
60 |
61 | // Append column qualifier.
62 | appendBytes(sb, key.getColumnQualifier().getBytes()).append(" ");
63 |
64 | // Append visibility and timestamp.
65 | sb.append(new ColumnVisibility(key.getColumnVisibility(buffer)));
66 |
67 | if (includeTimestamp) {
68 | sb.append(" ").append(entry.getKey().getTimestamp());
69 | }
70 |
71 | // Append value.
72 | Value value = entry.getValue();
73 | if (value != null && value.getSize() > 0) {
74 | sb.append("\t");
75 | appendBytes(sb, value.get());
76 | }
77 | return sb.toString();
78 | }
79 |
80 | private static StringBuilder appendBytes(final StringBuilder sb, final byte[] ba) {
81 | for (byte b : ba) {
82 | int c = 0xff & b;
83 | if (c == '\\') {
84 | sb.append("\\\\");
85 | } else if (c >= 32 && c <= 126) {
86 | sb.append((char) c);
87 | } else {
88 | sb.append("\\x").append(String.format("%02X", c));
89 | }
90 | }
91 | return sb;
92 | }
93 |
94 | private FormatUtil() {
95 | throw new UnsupportedOperationException();
96 | }
97 | }
98 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/cli/BatchWriterOpts.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.cli;
18 |
19 | import java.time.Duration;
20 | import java.util.concurrent.TimeUnit;
21 |
22 | import org.apache.accumulo.core.client.BatchWriterConfig;
23 |
24 | import com.beust.jcommander.IStringConverter;
25 | import com.beust.jcommander.Parameter;
26 |
27 | public class BatchWriterOpts {
28 | private static final BatchWriterConfig BWDEFAULTS = new BatchWriterConfig();
29 |
30 | public static class TimeConverter implements IStringConverter {
31 | @Override
32 | public Long convert(String value) {
33 | if (value.matches("[0-9]+"))
34 | value = "PT" + value + "S"; // if only numbers then assume seconds
35 | return Duration.parse(value).toMillis();
36 | }
37 | }
38 |
39 | public static class MemoryConverter implements IStringConverter {
40 | @Override
41 | public Long convert(String str) {
42 | try {
43 | char lastChar = str.charAt(str.length() - 1);
44 | int multiplier = 0;
45 | switch (Character.toUpperCase(lastChar)) {
46 | case 'G':
47 | multiplier += 10;
48 | case 'M':
49 | multiplier += 10;
50 | case 'K':
51 | multiplier += 10;
52 | case 'B':
53 | break;
54 | default:
55 | return Long.parseLong(str);
56 | }
57 | return Long.parseLong(str.substring(0, str.length() - 1)) << multiplier;
58 | } catch (Exception ex) {
59 | throw new IllegalArgumentException(
60 | "The value '" + str + "' is not a valid memory setting. A valid value would a number "
61 | + "possibily followed by an optional 'G', 'M', 'K', or 'B'.");
62 | }
63 | }
64 | }
65 |
66 | @Parameter(names = "--batchThreads",
67 | description = "Number of threads to use when writing large batches")
68 | public Integer batchThreads = BWDEFAULTS.getMaxWriteThreads();
69 |
70 | @Parameter(names = "--batchLatency", converter = TimeConverter.class,
71 | description = "The maximum time to wait before flushing data to servers when writing")
72 | public Long batchLatency = BWDEFAULTS.getMaxLatency(TimeUnit.MILLISECONDS);
73 |
74 | @Parameter(names = "--batchMemory", converter = MemoryConverter.class,
75 | description = "memory used to batch data when writing")
76 | public Long batchMemory = BWDEFAULTS.getMaxMemory();
77 |
78 | @Parameter(names = "--batchTimeout", converter = TimeConverter.class,
79 | description = "timeout used to fail a batch write")
80 | public Long batchTimeout = BWDEFAULTS.getTimeout(TimeUnit.MILLISECONDS);
81 |
82 | public BatchWriterConfig getBatchWriterConfig() {
83 | BatchWriterConfig config = new BatchWriterConfig();
84 | config.setMaxWriteThreads(this.batchThreads);
85 | config.setMaxLatency(this.batchLatency, TimeUnit.MILLISECONDS);
86 | config.setMaxMemory(this.batchMemory);
87 | config.setTimeout(this.batchTimeout, TimeUnit.MILLISECONDS);
88 | return config;
89 | }
90 |
91 | }
92 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/mapreduce/RegexExample.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.mapreduce;
18 |
19 | import java.io.IOException;
20 |
21 | import org.apache.accumulo.core.client.IteratorSetting;
22 | import org.apache.accumulo.core.data.Key;
23 | import org.apache.accumulo.core.data.Value;
24 | import org.apache.accumulo.core.iterators.user.RegExFilter;
25 | import org.apache.accumulo.examples.cli.ClientOpts;
26 | import org.apache.accumulo.hadoop.mapreduce.AccumuloInputFormat;
27 | import org.apache.hadoop.fs.Path;
28 | import org.apache.hadoop.mapreduce.Job;
29 | import org.apache.hadoop.mapreduce.Mapper;
30 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
31 | import org.slf4j.Logger;
32 | import org.slf4j.LoggerFactory;
33 |
34 | import com.beust.jcommander.Parameter;
35 |
36 | public class RegexExample {
37 |
38 | private static final Logger log = LoggerFactory.getLogger(RegexExample.class);
39 |
40 | public static class RegexMapper extends Mapper {
41 | @Override
42 | public void map(Key row, Value data, Context context) throws IOException, InterruptedException {
43 | context.write(row, data);
44 | }
45 | }
46 |
47 | static class Opts extends ClientOpts {
48 | @Parameter(names = {"-t", "--table"}, required = true, description = "table to use")
49 | String tableName;
50 | @Parameter(names = "--rowRegex")
51 | String rowRegex;
52 | @Parameter(names = "--columnFamilyRegex")
53 | String columnFamilyRegex;
54 | @Parameter(names = "--columnQualifierRegex")
55 | String columnQualifierRegex;
56 | @Parameter(names = "--valueRegex")
57 | String valueRegex;
58 | @Parameter(names = "--output", required = true)
59 | String destination;
60 | }
61 |
62 | public static void main(String[] args) throws Exception {
63 | Opts opts = new Opts();
64 | opts.parseArgs(RegexExample.class.getName(), args);
65 |
66 | Job job = Job.getInstance(opts.getHadoopConfig());
67 | job.setJobName(RegexExample.class.getSimpleName());
68 | job.setJarByClass(RegexExample.class);
69 |
70 | job.setInputFormatClass(AccumuloInputFormat.class);
71 |
72 | IteratorSetting regex = new IteratorSetting(50, "regex", RegExFilter.class);
73 | RegExFilter.setRegexs(regex, opts.rowRegex, opts.columnFamilyRegex, opts.columnQualifierRegex,
74 | opts.valueRegex, false);
75 |
76 | AccumuloInputFormat.configure().clientProperties(opts.getClientProperties())
77 | .table(opts.tableName).addIterator(regex).store(job);
78 |
79 | job.setMapperClass(RegexMapper.class);
80 | job.setMapOutputKeyClass(Key.class);
81 | job.setMapOutputValueClass(Value.class);
82 | job.setNumReduceTasks(0);
83 | job.setOutputFormatClass(TextOutputFormat.class);
84 | TextOutputFormat.setOutputPath(job, new Path(opts.destination));
85 |
86 | log.info("setRowRegex: " + opts.rowRegex);
87 | log.info("setColumnFamilyRegex: " + opts.columnFamilyRegex);
88 | log.info("setColumnQualifierRegex: " + opts.columnQualifierRegex);
89 | log.info("setValueRegex: " + opts.valueRegex);
90 |
91 | System.exit(job.waitForCompletion(true) ? 0 : 1);
92 | }
93 | }
94 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/mapreduce/RowHash.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.mapreduce;
18 |
19 | import java.io.IOException;
20 | import java.util.Base64;
21 | import java.util.Collections;
22 |
23 | import org.apache.accumulo.core.client.IteratorSetting;
24 | import org.apache.accumulo.core.data.Key;
25 | import org.apache.accumulo.core.data.Mutation;
26 | import org.apache.accumulo.core.data.Value;
27 | import org.apache.accumulo.examples.cli.ClientOpts;
28 | import org.apache.accumulo.hadoop.mapreduce.AccumuloInputFormat;
29 | import org.apache.accumulo.hadoop.mapreduce.AccumuloOutputFormat;
30 | import org.apache.accumulo.hadoop.mapreduce.InputFormatBuilder;
31 | import org.apache.hadoop.io.MD5Hash;
32 | import org.apache.hadoop.io.Text;
33 | import org.apache.hadoop.mapreduce.Job;
34 | import org.apache.hadoop.mapreduce.Mapper;
35 |
36 | import com.beust.jcommander.Parameter;
37 |
38 | public class RowHash {
39 |
40 | /**
41 | * The Mapper class that given a row number, will generate the appropriate output line.
42 | */
43 | public static class HashDataMapper extends Mapper {
44 | @Override
45 | public void map(Key row, Value data, Context context) throws IOException, InterruptedException {
46 | Mutation m = new Mutation(row.getRow());
47 | m.put("cf-HASHTYPE", "cq-MD5BASE64",
48 | new Value(Base64.getEncoder().encode(MD5Hash.digest(data.toString()).getDigest())));
49 | context.write(null, m);
50 | context.progress();
51 | }
52 |
53 | @Override
54 | public void setup(Context job) {}
55 | }
56 |
57 | private static class Opts extends ClientOpts {
58 | @Parameter(names = {"-t", "--table"}, required = true, description = "table to use")
59 | String tableName;
60 | @Parameter(names = "--column", required = true)
61 | String column;
62 | }
63 |
64 | public static void main(String[] args) throws Exception {
65 | Opts opts = new Opts();
66 | opts.parseArgs(RowHash.class.getName(), args);
67 |
68 | Job job = Job.getInstance(opts.getHadoopConfig());
69 | job.setJobName(RowHash.class.getName());
70 | job.setJarByClass(RowHash.class);
71 | job.setInputFormatClass(AccumuloInputFormat.class);
72 | InputFormatBuilder.InputFormatOptions inputOpts = AccumuloInputFormat.configure()
73 | .clientProperties(opts.getClientProperties()).table(opts.tableName);
74 |
75 | String col = opts.column;
76 | int idx = col.indexOf(":");
77 | String cf = idx < 0 ? col : col.substring(0, idx);
78 | String cq = idx < 0 ? null : col.substring(idx + 1);
79 | if (cf.length() > 0) {
80 | inputOpts.fetchColumns(Collections.singleton(new IteratorSetting.Column(cf, cq)));
81 | }
82 | inputOpts.store(job);
83 |
84 | job.setMapperClass(HashDataMapper.class);
85 | job.setMapOutputKeyClass(Text.class);
86 | job.setMapOutputValueClass(Mutation.class);
87 | job.setNumReduceTasks(0);
88 |
89 | job.setOutputFormatClass(AccumuloOutputFormat.class);
90 | AccumuloOutputFormat.configure().clientProperties(opts.getClientProperties())
91 | .defaultTable(opts.tableName).store(job);
92 |
93 | System.exit(job.waitForCompletion(true) ? 0 : 1);
94 | }
95 | }
96 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/constraints/MaxMutationSize.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.constraints;
18 |
19 | import java.util.Collections;
20 | import java.util.List;
21 |
22 | import org.apache.accumulo.core.client.Accumulo;
23 | import org.apache.accumulo.core.client.AccumuloClient;
24 | import org.apache.accumulo.core.client.AccumuloException;
25 | import org.apache.accumulo.core.client.AccumuloSecurityException;
26 | import org.apache.accumulo.core.client.BatchWriter;
27 | import org.apache.accumulo.core.client.MutationsRejectedException;
28 | import org.apache.accumulo.core.client.TableNotFoundException;
29 | import org.apache.accumulo.core.data.Mutation;
30 | import org.apache.accumulo.core.data.Value;
31 | import org.apache.accumulo.core.data.constraints.Constraint;
32 | import org.apache.accumulo.examples.Common;
33 | import org.apache.accumulo.examples.cli.ClientOpts;
34 | import org.slf4j.Logger;
35 | import org.slf4j.LoggerFactory;
36 |
37 | /**
38 | * Ensure that mutations are a reasonable size: we must be able to fit several in memory at a time.
39 | */
40 | public class MaxMutationSize implements Constraint {
41 |
42 | private static final Logger log = LoggerFactory.getLogger(MaxMutationSize.class);
43 |
44 | static final long MAX_SIZE = Runtime.getRuntime().maxMemory() >> 8;
45 | static final List empty = Collections.emptyList();
46 | static final List violations = Collections.singletonList((short) 0);
47 |
48 | @Override
49 | public String getViolationDescription(short violationCode) {
50 | return String.format("mutation exceeded maximum size of %d", MAX_SIZE);
51 | }
52 |
53 | @Override
54 | public List check(Environment env, Mutation mutation) {
55 | if (mutation.estimatedMemoryUsed() < MAX_SIZE)
56 | return empty;
57 | return violations;
58 | }
59 |
60 | public static void main(String[] args)
61 | throws AccumuloException, AccumuloSecurityException, TableNotFoundException {
62 | ClientOpts opts = new ClientOpts();
63 | opts.parseArgs(MaxMutationSize.class.getName(), args);
64 |
65 | try (AccumuloClient client = Accumulo.newClient().from(opts.getClientPropsPath()).build()) {
66 | Common.createTableWithNamespace(client, ConstraintsCommon.CONSTRAINTS_TABLE);
67 |
68 | /*
69 | * Add the {@link MaxMutationSize} constraint to the table. Be sure to use the fully qualified
70 | * class name
71 | */
72 | int num = client.tableOperations().addConstraint(ConstraintsCommon.CONSTRAINTS_TABLE,
73 | "org.apache.accumulo.examples.constraints.MaxMutationSize");
74 |
75 | log.info("Attempting to write a lot of mutations to testConstraints");
76 | try (BatchWriter bw = client.createBatchWriter(ConstraintsCommon.CONSTRAINTS_TABLE)) {
77 | Mutation m = new Mutation("r1");
78 | for (int i = 0; i < 1_000_000; i++)
79 | m.put("cf" + i % 5000, "cq" + i, new Value(("value" + i).getBytes()));
80 | bw.addMutation(m);
81 | } catch (MutationsRejectedException e) {
82 | e.getConstraintViolationSummaries()
83 | .forEach(m -> log.error(ConstraintsCommon.CONSTRAINT_VIOLATED_MSG, m.constrainClass));
84 | }
85 | client.tableOperations().removeConstraint(ConstraintsCommon.CONSTRAINTS_TABLE, num);
86 | }
87 | }
88 | }
89 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/bloom/BloomBatchScanner.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.bloom;
18 |
19 | import static org.apache.accumulo.examples.client.RandomBatchWriter.abs;
20 |
21 | import java.util.HashMap;
22 | import java.util.HashSet;
23 | import java.util.Map.Entry;
24 | import java.util.Random;
25 |
26 | import org.apache.accumulo.core.client.Accumulo;
27 | import org.apache.accumulo.core.client.AccumuloClient;
28 | import org.apache.accumulo.core.client.BatchScanner;
29 | import org.apache.accumulo.core.client.TableNotFoundException;
30 | import org.apache.accumulo.core.data.Key;
31 | import org.apache.accumulo.core.data.Range;
32 | import org.apache.accumulo.core.data.Value;
33 | import org.apache.accumulo.core.security.Authorizations;
34 | import org.apache.accumulo.examples.cli.ClientOpts;
35 | import org.slf4j.Logger;
36 | import org.slf4j.LoggerFactory;
37 |
38 | /**
39 | * Simple example for reading random batches of data from Accumulo.
40 | */
41 | public final class BloomBatchScanner {
42 |
43 | private static final Logger log = LoggerFactory.getLogger(BloomBatchScanner.class);
44 |
45 | private BloomBatchScanner() {}
46 |
47 | public static void main(String[] args) throws TableNotFoundException {
48 | ClientOpts opts = new ClientOpts();
49 | opts.parseArgs(BloomBatchScanner.class.getName(), args);
50 |
51 | try (AccumuloClient client = Accumulo.newClient().from(opts.getClientPropsPath()).build()) {
52 | scan(client, BloomCommon.BLOOM_TEST1_TABLE, 7);
53 | scan(client, BloomCommon.BLOOM_TEST2_TABLE, 7);
54 | }
55 | }
56 |
57 | static void scan(AccumuloClient client, String tableName, int seed)
58 | throws TableNotFoundException {
59 | Random r = new Random(seed);
60 | HashSet ranges = new HashSet<>();
61 | HashMap expectedRows = new HashMap<>();
62 | while (ranges.size() < 500) {
63 | long rowId = abs(r.nextLong()) % 1_000_000_000;
64 | String row = String.format("row_%010d", rowId);
65 | ranges.add(new Range(row));
66 | expectedRows.put(row, false);
67 | }
68 |
69 | long t1 = System.currentTimeMillis();
70 | long results = 0;
71 | long lookups = ranges.size();
72 |
73 | log.info("Scanning {} with seed {}", tableName, seed);
74 | try (BatchScanner scan = client.createBatchScanner(tableName, Authorizations.EMPTY, 20)) {
75 | scan.setRanges(ranges);
76 | for (Entry entry : scan) {
77 | Key key = entry.getKey();
78 | if (expectedRows.containsKey(key.getRow().toString())) {
79 | expectedRows.put(key.getRow().toString(), true);
80 | } else {
81 | log.info("Encountered unexpected key: {}", key);
82 | }
83 | results++;
84 | }
85 | }
86 | long t2 = System.currentTimeMillis();
87 | log.info(String.format("Scan finished! %6.2f lookups/sec, %.2f secs, %d results",
88 | lookups / ((t2 - t1) / 1000.0), ((t2 - t1) / 1000.0), results));
89 |
90 | int count = 0;
91 | for (Entry entry : expectedRows.entrySet()) {
92 | if (!entry.getValue()) {
93 | count++;
94 | }
95 | }
96 | if (count > 0)
97 | log.info("Did not find " + count);
98 | else
99 | log.info("All expected rows were scanned");
100 | }
101 | }
102 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/client/SequentialBatchWriter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.client;
18 |
19 | import java.util.Random;
20 |
21 | import org.apache.accumulo.core.client.Accumulo;
22 | import org.apache.accumulo.core.client.AccumuloClient;
23 | import org.apache.accumulo.core.client.AccumuloException;
24 | import org.apache.accumulo.core.client.AccumuloSecurityException;
25 | import org.apache.accumulo.core.client.BatchWriter;
26 | import org.apache.accumulo.core.client.TableNotFoundException;
27 | import org.apache.accumulo.core.data.Mutation;
28 | import org.apache.accumulo.core.data.Value;
29 | import org.apache.accumulo.examples.Common;
30 | import org.apache.accumulo.examples.cli.ClientOpts;
31 | import org.slf4j.Logger;
32 | import org.slf4j.LoggerFactory;
33 |
34 | import com.beust.jcommander.Parameter;
35 |
36 | /**
37 | * Simple example for writing random data in sequential order to Accumulo.
38 | */
39 | public final class SequentialBatchWriter {
40 |
41 | private static final Logger log = LoggerFactory.getLogger(SequentialBatchWriter.class);
42 |
43 | static final String BATCH_TABLE = Common.NAMESPACE + ".batch";
44 |
45 | private SequentialBatchWriter() {}
46 |
47 | public static Value createValue(long rowId, int size) {
48 | Random r = new Random(rowId);
49 | byte[] value = new byte[size];
50 |
51 | r.nextBytes(value);
52 |
53 | // transform to printable chars
54 | for (int j = 0; j < value.length; j++) {
55 | value[j] = (byte) (((0xff & value[j]) % 92) + ' ');
56 | }
57 |
58 | return new Value(value);
59 | }
60 |
61 | static class Opts extends ClientOpts {
62 | @Parameter(names = {"-t"}, description = "table to use")
63 | public String tableName = BATCH_TABLE;
64 |
65 | @Parameter(names = {"--start"}, description = "starting row")
66 | public Integer start = 0;
67 |
68 | @Parameter(names = {"--num"}, description = "number of rows")
69 | public Integer num = 10_000;
70 |
71 | @Parameter(names = {"--size"}, description = "size of values")
72 | public Integer size = 50;
73 | }
74 |
75 | /**
76 | * Writes 1000 entries to Accumulo using a {@link BatchWriter}. The rows of the entries will be
77 | * sequential starting from 0. The column families will be "foo" and column qualifiers will be
78 | * "1". The values will be random 50 byte arrays.
79 | */
80 | public static void main(String[] args)
81 | throws AccumuloException, AccumuloSecurityException, TableNotFoundException {
82 | Opts opts = new Opts();
83 | opts.parseArgs(SequentialBatchWriter.class.getName(), args);
84 |
85 | try (AccumuloClient client = Accumulo.newClient().from(opts.getClientPropsPath()).build()) {
86 | Common.createTableWithNamespace(client, opts.tableName);
87 | try (BatchWriter bw = client.createBatchWriter(opts.tableName)) {
88 | for (int i = 0; i < opts.num; i++) {
89 | int row = i + opts.start;
90 | Mutation m = new Mutation(String.format("row_%010d", row));
91 | // create a random value that is a function of row id for verification purposes
92 | m.put("foo", "1", createValue(row, opts.size));
93 | bw.addMutation(m);
94 | if (i % 1000 == 0) {
95 | log.trace("wrote {} entries", i);
96 | }
97 | }
98 | }
99 | }
100 | }
101 | }
102 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/bloom/BloomFilters.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.bloom;
18 |
19 | import java.util.HashMap;
20 | import java.util.Map;
21 | import java.util.Random;
22 |
23 | import org.apache.accumulo.core.client.Accumulo;
24 | import org.apache.accumulo.core.client.AccumuloClient;
25 | import org.apache.accumulo.core.client.AccumuloException;
26 | import org.apache.accumulo.core.client.AccumuloSecurityException;
27 | import org.apache.accumulo.core.client.BatchWriter;
28 | import org.apache.accumulo.core.client.MutationsRejectedException;
29 | import org.apache.accumulo.core.client.TableNotFoundException;
30 | import org.apache.accumulo.core.client.admin.NewTableConfiguration;
31 | import org.apache.accumulo.core.data.Mutation;
32 | import org.apache.accumulo.core.security.ColumnVisibility;
33 | import org.apache.accumulo.examples.Common;
34 | import org.apache.accumulo.examples.cli.ClientOpts;
35 | import org.apache.accumulo.examples.client.RandomBatchWriter;
36 | import org.slf4j.Logger;
37 | import org.slf4j.LoggerFactory;
38 |
39 | public final class BloomFilters {
40 |
41 | private static final Logger log = LoggerFactory.getLogger(BloomFilters.class);
42 |
43 | private BloomFilters() {}
44 |
45 | public static void main(String[] args)
46 | throws AccumuloException, AccumuloSecurityException, TableNotFoundException {
47 |
48 | ClientOpts opts = new ClientOpts();
49 | opts.parseArgs(BloomFilters.class.getName(), args);
50 |
51 | try (AccumuloClient client = Accumulo.newClient().from(opts.getClientPropsPath()).build()) {
52 | Map table1props = Map.of("table.compaction.major.ratio", "7");
53 |
54 | Map table2props = new HashMap<>(table1props);
55 | table2props.put(BloomCommon.BLOOM_ENABLED_PROPERTY, "true");
56 |
57 | Common.createTableWithNamespace(client, BloomCommon.BLOOM_TEST1_TABLE,
58 | new NewTableConfiguration().setProperties(table1props));
59 | Common.createTableWithNamespace(client, BloomCommon.BLOOM_TEST2_TABLE,
60 | new NewTableConfiguration().setProperties(table2props));
61 |
62 | writeAndFlushData(BloomCommon.BLOOM_TEST1_TABLE, client);
63 | writeAndFlushData(BloomCommon.BLOOM_TEST2_TABLE, client);
64 | }
65 | }
66 |
67 | // Write a million rows 3 times flushing files to disk separately
68 | private static void writeAndFlushData(final String tableName, final AccumuloClient client)
69 | throws TableNotFoundException, AccumuloSecurityException, AccumuloException {
70 | log.info("Writing data to {}", tableName);
71 | writeData(client, tableName, 7);
72 | client.tableOperations().flush(tableName, null, null, true);
73 | writeData(client, tableName, 8);
74 | client.tableOperations().flush(tableName, null, null, true);
75 | writeData(client, tableName, 9);
76 | client.tableOperations().flush(tableName, null, null, true);
77 | }
78 |
79 | // write a million random rows
80 | static void writeData(AccumuloClient client, String tableName, int seed)
81 | throws TableNotFoundException, MutationsRejectedException {
82 | Random r = new Random(seed);
83 | try (BatchWriter bw = client.createBatchWriter(tableName)) {
84 | for (int x = 0; x < 1_000_000; x++) {
85 | long rowId = RandomBatchWriter.abs(r.nextLong()) % 1_000_000_000;
86 | Mutation m = RandomBatchWriter.createMutation(rowId, 50, new ColumnVisibility());
87 | bw.addMutation(m);
88 | }
89 | }
90 | }
91 | }
92 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/shard/Index.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.shard;
18 |
19 | import java.io.File;
20 | import java.io.FileReader;
21 | import java.util.ArrayList;
22 | import java.util.HashSet;
23 | import java.util.List;
24 |
25 | import org.apache.accumulo.core.client.Accumulo;
26 | import org.apache.accumulo.core.client.AccumuloClient;
27 | import org.apache.accumulo.core.client.BatchWriter;
28 | import org.apache.accumulo.core.data.Mutation;
29 | import org.apache.accumulo.core.data.Value;
30 | import org.apache.accumulo.examples.cli.ClientOpts;
31 |
32 | import com.beust.jcommander.Parameter;
33 |
34 | /**
35 | * This program indexes a set of documents given on the command line into a shard table.
36 | *
37 | * What it writes to the table is row = partition id, column family = term, column qualifier =
38 | * document id.
39 | */
40 | public class Index {
41 |
42 | static String genPartition(int partition) {
43 | return String.format("%08x", Math.abs(partition));
44 | }
45 |
46 | public static void index(int numPartitions, String docId, String doc, String splitRegex,
47 | BatchWriter bw) throws Exception {
48 |
49 | String[] tokens = doc.split(splitRegex);
50 |
51 | String partition = genPartition(doc.hashCode() % numPartitions);
52 |
53 | Mutation m = new Mutation(partition);
54 |
55 | HashSet tokensSeen = new HashSet<>();
56 |
57 | for (String token : tokens) {
58 | token = token.toLowerCase();
59 |
60 | if (!tokensSeen.contains(token)) {
61 | tokensSeen.add(token);
62 | m.put(token, docId, new Value(new byte[0]));
63 | }
64 | }
65 |
66 | if (m.size() > 0)
67 | bw.addMutation(m);
68 | }
69 |
70 | public static void index(int numPartitions, File src, String splitRegex, BatchWriter bw)
71 | throws Exception {
72 | if (src.isDirectory()) {
73 | File[] files = src.listFiles();
74 | if (files != null) {
75 | for (File child : files) {
76 | index(numPartitions, child, splitRegex, bw);
77 | }
78 | }
79 | } else {
80 |
81 | StringBuilder sb = new StringBuilder();
82 |
83 | try (FileReader fr = new FileReader(src)) {
84 |
85 | char[] data = new char[4096];
86 | int len;
87 | while ((len = fr.read(data)) != -1) {
88 | sb.append(data, 0, len);
89 | }
90 |
91 | }
92 |
93 | index(numPartitions, src.getAbsolutePath(), sb.toString(), splitRegex, bw);
94 | }
95 |
96 | }
97 |
98 | static class IndexOpts extends ClientOpts {
99 |
100 | @Parameter(names = {"-t", "--table"}, required = true, description = "table to use")
101 | private String tableName;
102 |
103 | @Parameter(names = "--partitions", required = true,
104 | description = "the number of shards to create")
105 | int partitions;
106 |
107 | @Parameter(required = true, description = " { ... }")
108 | List files = new ArrayList<>();
109 | }
110 |
111 | public static void main(String[] args) throws Exception {
112 | IndexOpts opts = new IndexOpts();
113 | opts.parseArgs(Index.class.getName(), args);
114 |
115 | String splitRegex = "\\W+";
116 |
117 | try (AccumuloClient client = Accumulo.newClient().from(opts.getClientPropsPath()).build();
118 | BatchWriter bw = client.createBatchWriter(opts.tableName)) {
119 | for (String filename : opts.files) {
120 | index(opts.partitions, new File(filename), splitRegex, bw);
121 | }
122 | }
123 | }
124 | }
125 |
--------------------------------------------------------------------------------
/docs/export.md:
--------------------------------------------------------------------------------
1 |
17 | # Apache Accumulo Export/Import Example
18 |
19 | Accumulo provides a mechanism to export and import tables. This example shows
20 | how to use this feature.
21 |
22 | The shell session below shows creating a table, inserting data, and exporting
23 | the table. A table must be offline to export it, and it should remain offline
24 | for the duration of the distcp. An easy way to take a table offline without
25 | interrupting access to it is to clone it and take the clone offline.
26 |
27 | root@test15> createnamespace examples
28 | root@test15> createtable examples.table1
29 | root@test15 examples.table1> insert a cf1 cq1 v1
30 | root@test15 examples.table1> insert h cf1 cq1 v2
31 | root@test15 examples.table1> insert z cf1 cq1 v3
32 | root@test15 examples.table1> insert z cf1 cq2 v4
33 | root@test15 examples.table1> addsplits -t examples.table1 b r
34 | root@test15 examples.table1> scan
35 | a cf1:cq1 [] v1
36 | h cf1:cq1 [] v2
37 | z cf1:cq1 [] v3
38 | z cf1:cq2 [] v4
39 | root@test15 examples.table1> config -t examples.table1 -s table.split.threshold=100M
40 | root@test15 examples.table1> clonetable examples.table1 examples.table1_exp
41 | root@test15 examples.table1table1> offline examples.table1_exp
42 | root@test15 examples.table1> exporttable -t examples.table1_exp /tmp/table1_export
43 | root@test15 examples.table1> quit
44 |
45 | After executing the export command, a few files are created in the hdfs dir.
46 | One of the files is a list of files to distcp as shown below.
47 |
48 | $ hadoop fs -ls /tmp/table1_export
49 | Found 2 items
50 | -rw-r--r-- 3 user supergroup 162 2012-07-25 09:56 /tmp/table1_export/distcp.txt
51 | -rw-r--r-- 3 user supergroup 821 2012-07-25 09:56 /tmp/table1_export/exportMetadata.zip
52 | $ hadoop fs -cat /tmp/table1_export/distcp.txt
53 | hdfs://n1.example.com:6093/accumulo/tables/3/default_tablet/F0000000.rf
54 | hdfs://n1.example.com:6093/tmp/table1_export/exportMetadata.zip
55 |
56 | Before the table can be imported, it must be copied using distcp. After the
57 | discp completed, the cloned table may be deleted.
58 |
59 | $ hadoop distcp -f /tmp/table1_export/distcp.txt /tmp/table1_export_dest
60 |
61 | The Accumulo shell session below shows importing the table and inspecting it.
62 | The data, splits, config, and logical time information for the table were
63 | preserved.
64 |
65 | root@test15> importtable examples.table1_copy /tmp/table1_export_dest
66 | root@test15> table examples.table1_copy
67 | root@test15 examples.table1_copy> scan
68 | a cf1:cq1 [] v1
69 | h cf1:cq1 [] v2
70 | z cf1:cq1 [] v3
71 | z cf1:cq2 [] v4
72 | root@test15 examples.table1_copy> getsplits -t examples.table1_copy
73 | b
74 | r
75 | root@test15> config -t examples.table1_copy -f split
76 | ---------+--------------------------+-------------------------------------------
77 | SCOPE | NAME | VALUE
78 | ---------+--------------------------+-------------------------------------------
79 | default | table.split.threshold .. | 1G
80 | table | @override ........... | 100M
81 | ---------+--------------------------+-------------------------------------------
82 | root@test15> tables -l
83 | accumulo.metadata => !0
84 | accumulo.root => +r
85 | table1_copy => 5
86 | trace => 1
87 | root@test15> scan -t accumulo.metadata -b 5 -c srv:time
88 | 5;b srv:time [] M1343224500467
89 | 5;r srv:time [] M1343224500467
90 | 5< srv:time [] M1343224500467
91 |
92 |
93 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/mapreduce/TableToFile.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.mapreduce;
18 |
19 | import java.io.IOException;
20 | import java.util.AbstractMap.SimpleImmutableEntry;
21 | import java.util.ArrayList;
22 | import java.util.List;
23 | import java.util.Map;
24 |
25 | import org.apache.accumulo.core.client.IteratorSetting;
26 | import org.apache.accumulo.core.data.Key;
27 | import org.apache.accumulo.core.data.Value;
28 | import org.apache.accumulo.examples.cli.ClientOpts;
29 | import org.apache.accumulo.examples.util.FormatUtil;
30 | import org.apache.accumulo.hadoop.mapreduce.AccumuloInputFormat;
31 | import org.apache.accumulo.hadoop.mapreduce.InputFormatBuilder;
32 | import org.apache.hadoop.fs.Path;
33 | import org.apache.hadoop.io.NullWritable;
34 | import org.apache.hadoop.io.Text;
35 | import org.apache.hadoop.mapreduce.Job;
36 | import org.apache.hadoop.mapreduce.Mapper;
37 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
38 |
39 | import com.beust.jcommander.Parameter;
40 |
41 | /**
42 | * Takes a table and outputs the specified column to a set of part files on hdfs
43 | */
44 | public class TableToFile {
45 |
46 | static class Opts extends ClientOpts {
47 | @Parameter(names = {"-t", "--table"}, required = true, description = "table to use")
48 | String tableName;
49 | @Parameter(names = "--output", required = true, description = "output directory")
50 | String output;
51 | @Parameter(names = "--columns", description = "columns to extract, in cf:cq{,cf:cq,...} form")
52 | String columns = "";
53 | }
54 |
55 | /**
56 | * The Mapper class that given a row number, will generate the appropriate output line.
57 | */
58 | public static class TTFMapper extends Mapper {
59 | @Override
60 | public void map(Key row, Value data, Context context) throws IOException, InterruptedException {
61 | Map.Entry entry = new SimpleImmutableEntry<>(row, data);
62 | context.write(NullWritable.get(), new Text(FormatUtil.formatTableEntry(entry, false)));
63 | context.setStatus("Outputed Value");
64 | }
65 | }
66 |
67 | public static void main(String[] args) throws Exception {
68 | Opts opts = new Opts();
69 | opts.parseArgs(TableToFile.class.getName(), args);
70 |
71 | List columnsToFetch = new ArrayList<>();
72 | for (String col : opts.columns.split(",")) {
73 | int idx = col.indexOf(":");
74 | String cf = idx < 0 ? col : col.substring(0, idx);
75 | String cq = idx < 0 ? null : col.substring(idx + 1);
76 | if (!cf.isEmpty())
77 | columnsToFetch.add(new IteratorSetting.Column(cf, cq));
78 | }
79 |
80 | Job job = Job.getInstance(opts.getHadoopConfig());
81 | job.setJobName(TableToFile.class.getSimpleName() + "_" + System.currentTimeMillis());
82 | job.setJarByClass(TableToFile.class);
83 | job.setInputFormatClass(AccumuloInputFormat.class);
84 | InputFormatBuilder.InputFormatOptions inputOpts = AccumuloInputFormat.configure()
85 | .clientProperties(opts.getClientProperties()).table(opts.tableName);
86 | if (!columnsToFetch.isEmpty()) {
87 | inputOpts.fetchColumns(columnsToFetch);
88 | }
89 | inputOpts.store(job);
90 | job.setMapperClass(TTFMapper.class);
91 | job.setMapOutputKeyClass(NullWritable.class);
92 | job.setMapOutputValueClass(Text.class);
93 | job.setNumReduceTasks(0);
94 | job.setOutputFormatClass(TextOutputFormat.class);
95 | TextOutputFormat.setOutputPath(job, new Path(opts.output));
96 |
97 | System.exit(job.waitForCompletion(true) ? 0 : 1);
98 | }
99 | }
100 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/shard/Query.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.shard;
18 |
19 | import java.util.ArrayList;
20 | import java.util.Collections;
21 | import java.util.List;
22 | import java.util.Map.Entry;
23 |
24 | import org.apache.accumulo.core.client.Accumulo;
25 | import org.apache.accumulo.core.client.AccumuloClient;
26 | import org.apache.accumulo.core.client.BatchScanner;
27 | import org.apache.accumulo.core.client.IteratorSetting;
28 | import org.apache.accumulo.core.client.sample.SamplerConfiguration;
29 | import org.apache.accumulo.core.data.Key;
30 | import org.apache.accumulo.core.data.Range;
31 | import org.apache.accumulo.core.data.Value;
32 | import org.apache.accumulo.core.iterators.user.IntersectingIterator;
33 | import org.apache.accumulo.core.security.Authorizations;
34 | import org.apache.accumulo.examples.cli.ClientOpts;
35 | import org.apache.hadoop.io.Text;
36 |
37 | import com.beust.jcommander.Parameter;
38 |
39 | /**
40 | * This program queries a set of terms in the shard table (populated by {@link Index}) using the
41 | * {@link IntersectingIterator}.
42 | */
43 | public class Query {
44 |
45 | static class QueryOpts extends ClientOpts {
46 |
47 | @Parameter(description = " term { ... }")
48 | List terms = new ArrayList<>();
49 |
50 | @Parameter(names = {"-t", "--table"}, required = true, description = "table to use")
51 | String tableName;
52 |
53 | @Parameter(names = {"--sample"},
54 | description = "Do queries against sample, useful when sample is built using column qualifier")
55 | boolean useSample = false;
56 |
57 | @Parameter(names = {"--sampleCutoff"},
58 | description = "Use sample data to determine if a query might return a number of documents over the cutoff. This check is per tablet.")
59 | Integer sampleCutoff = null;
60 | }
61 |
62 | public static List query(BatchScanner bs, List terms, Integer cutoff) {
63 |
64 | Text[] columns = new Text[terms.size()];
65 | int i = 0;
66 | for (String term : terms) {
67 | columns[i++] = new Text(term);
68 | }
69 |
70 | IteratorSetting ii;
71 |
72 | if (cutoff != null) {
73 | ii = new IteratorSetting(20, "ii", CutoffIntersectingIterator.class);
74 | CutoffIntersectingIterator.setCutoff(ii, cutoff);
75 | } else {
76 | ii = new IteratorSetting(20, "ii", IntersectingIterator.class);
77 | }
78 |
79 | IntersectingIterator.setColumnFamilies(ii, columns);
80 | bs.addScanIterator(ii);
81 | bs.setRanges(Collections.singleton(new Range()));
82 | List result = new ArrayList<>();
83 | for (Entry entry : bs) {
84 | result.add(entry.getKey().getColumnQualifier().toString());
85 | }
86 | return result;
87 | }
88 |
89 | public static void main(String[] args) throws Exception {
90 | QueryOpts opts = new QueryOpts();
91 | opts.parseArgs(Query.class.getName(), args);
92 |
93 | try (AccumuloClient client = Accumulo.newClient().from(opts.getClientPropsPath()).build();
94 | BatchScanner bs = client.createBatchScanner(opts.tableName, Authorizations.EMPTY, 10)) {
95 | if (opts.useSample) {
96 | SamplerConfiguration samplerConfig = client.tableOperations()
97 | .getSamplerConfiguration(opts.tableName);
98 | CutoffIntersectingIterator.validateSamplerConfig(
99 | client.tableOperations().getSamplerConfiguration(opts.tableName));
100 | bs.setSamplerConfiguration(samplerConfig);
101 | }
102 | for (String entry : query(bs, opts.terms, opts.sampleCutoff)) {
103 | System.out.println(" " + entry);
104 | }
105 | }
106 | }
107 | }
108 |
--------------------------------------------------------------------------------
/docs/combiner.md:
--------------------------------------------------------------------------------
1 |
17 | # Apache Accumulo Combiner Example
18 |
19 | This tutorial uses the following Java class, which can be found in org.apache.accumulo.examples.combiner:
20 |
21 | * [StatsCombiner.java] - a combiner that calculates max, min, sum, and count
22 |
23 | This is a simple combiner example. To build this example run maven and then
24 | copy the produced jar into the accumulo lib dir. This is already done in the
25 | tar distribution.
26 |
27 | $ bin/accumulo shell -u username
28 | Enter current password for 'username'@'instance': ***
29 |
30 | Shell - Apache Accumulo Interactive Shell
31 | -
32 | - version: 2.1.0-SNAPSHOT
33 | - instance name: instance
34 | - instance id: 00000000-0000-0000-0000-000000000000
35 | -
36 | - type 'help' for a list of available commands
37 | -
38 | username@instance> createnamespace examples
39 | username@instance> createtable examples.runners
40 | username@instance examples.runners> setiter -t examples.runners -p 10 -scan -minc -majc -n decStats -class org.apache.accumulo.examples.combiner.StatsCombiner
41 | Combiner that keeps track of min, max, sum, and count
42 | ----------> set StatsCombiner parameter all, set to true to apply Combiner to every column, otherwise leave blank. if true, columns option will be ignored.:
43 | ----------> set StatsCombiner parameter columns, [:]{,[:]} escape non aplhanum chars using %.: stat
44 | ----------> set StatsCombiner parameter reduceOnFullCompactionOnly, If true, only reduce on full major compactions. Defaults to false. :
45 | ----------> set StatsCombiner parameter radix, radix/base of the numbers: 10
46 | username@instance examples.runners> setiter -t examples.runners -p 11 -scan -minc -majc -n hexStats -class org.apache.accumulo.examples.combiner.StatsCombiner
47 | Combiner that keeps track of min, max, sum, and count
48 | ----------> set StatsCombiner parameter all, set to true to apply Combiner to every column, otherwise leave blank. if true, columns option will be ignored.:
49 | ----------> set StatsCombiner parameter columns, [:]{,[:]} escape non-alphanum chars using %.: hstat
50 | ----------> set StatsCombiner parameter reduceOnFullCompactionOnly, If true, only reduce on full major compactions. Defaults to false. :
51 | ----------> set StatsCombiner parameter radix, radix/base of the numbers: 16
52 | username@instance examples.runners> insert 123456 name first Joe
53 | username@instance examples.runners> insert 123456 stat marathon 240
54 | username@instance examples.runners> scan
55 | 123456 name:first [] Joe
56 | 123456 stat:marathon [] 240,240,240,1
57 | username@instance examples.runners> insert 123456 stat marathon 230
58 | username@instance examples.runners> insert 123456 stat marathon 220
59 | username@instance examples.runners> scan
60 | 123456 name:first [] Joe
61 | 123456 stat:marathon [] 220,240,690,3
62 | username@instance examples.runners> insert 123456 hstat virtualMarathon 6a
63 | username@instance examples.runners> insert 123456 hstat virtualMarathon 6b
64 | username@instance examples.runners> scan
65 | 123456 hstat:virtualMarathon [] 6a,6b,d5,2
66 | 123456 name:first [] Joe
67 | 123456 stat:marathon [] 220,240,690,3
68 |
69 | In this example a table is created, and the example stats combiner is applied to
70 | the column family stat and hstat. The stats combiner computes min,max,sum, and
71 | count. It can be configured to use a different base or radix. In the example
72 | above the column family stat is configured for base 10 and the column family
73 | hstat is configured for base 16.
74 |
75 | [StatsCombiner.java]: ../src/main/java/org/apache/accumulo/examples/combiner/StatsCombiner.java
76 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/constraints/NumericValueConstraint.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.constraints;
18 |
19 | import java.util.Collection;
20 | import java.util.List;
21 |
22 | import org.apache.accumulo.core.client.Accumulo;
23 | import org.apache.accumulo.core.client.AccumuloClient;
24 | import org.apache.accumulo.core.client.AccumuloException;
25 | import org.apache.accumulo.core.client.AccumuloSecurityException;
26 | import org.apache.accumulo.core.client.BatchWriter;
27 | import org.apache.accumulo.core.client.MutationsRejectedException;
28 | import org.apache.accumulo.core.client.TableNotFoundException;
29 | import org.apache.accumulo.core.data.ColumnUpdate;
30 | import org.apache.accumulo.core.data.Mutation;
31 | import org.apache.accumulo.core.data.Value;
32 | import org.apache.accumulo.core.data.constraints.Constraint;
33 | import org.apache.accumulo.examples.Common;
34 | import org.apache.accumulo.examples.cli.ClientOpts;
35 | import org.slf4j.Logger;
36 | import org.slf4j.LoggerFactory;
37 |
38 | /**
39 | * This class is an accumulo constraint that ensures values are numeric strings.
40 | */
41 | public class NumericValueConstraint implements Constraint {
42 |
43 | private static final Logger log = LoggerFactory.getLogger(NumericValueConstraint.class);
44 |
45 | static final short NON_NUMERIC_VALUE = 1;
46 | static final String VIOLATION_MESSAGE = "Value is not numeric";
47 |
48 | private static final List VIOLATION_LIST = List.of(NON_NUMERIC_VALUE);
49 |
50 | private boolean isNumeric(byte[] bytes) {
51 | for (byte b : bytes) {
52 | boolean ok = (b >= '0' && b <= '9');
53 | if (!ok)
54 | return false;
55 | }
56 | return true;
57 | }
58 |
59 | @Override
60 | public List check(Environment env, Mutation mutation) {
61 | Collection updates = mutation.getUpdates();
62 |
63 | for (ColumnUpdate columnUpdate : updates) {
64 | if (!isNumeric(columnUpdate.getValue()))
65 | return VIOLATION_LIST;
66 | }
67 | return null;
68 | }
69 |
70 | @Override
71 | public String getViolationDescription(short violationCode) {
72 | if (violationCode == NON_NUMERIC_VALUE) {
73 | return VIOLATION_MESSAGE;
74 | }
75 | return null;
76 | }
77 |
78 | public static void main(String[] args)
79 | throws AccumuloException, AccumuloSecurityException, TableNotFoundException {
80 | ClientOpts opts = new ClientOpts();
81 | opts.parseArgs(NumericValueConstraint.class.getName(), args);
82 |
83 | try (AccumuloClient client = Accumulo.newClient().from(opts.getClientPropsPath()).build()) {
84 | Common.createTableWithNamespace(client, ConstraintsCommon.CONSTRAINTS_TABLE);
85 |
86 | /*
87 | * Add the {@link NumericValueConstraint} constraint to the table. Be sure to use the fully
88 | * qualified class name
89 | */
90 | int num = client.tableOperations().addConstraint(ConstraintsCommon.CONSTRAINTS_TABLE,
91 | "org.apache.accumulo.examples.constraints.NumericValueConstraint");
92 |
93 | log.info("Attempting to write non-numeric data to testConstraints");
94 | try (BatchWriter bw = client.createBatchWriter(ConstraintsCommon.CONSTRAINTS_TABLE)) {
95 | Mutation m = new Mutation("r1");
96 | m.put("cf1", "cq1", new Value(("value1--$$@@%%").getBytes()));
97 | bw.addMutation(m);
98 | } catch (MutationsRejectedException e) {
99 | e.getConstraintViolationSummaries()
100 | .forEach(m -> log.error(ConstraintsCommon.CONSTRAINT_VIOLATED_MSG, m.constrainClass));
101 | }
102 | client.tableOperations().removeConstraint(ConstraintsCommon.CONSTRAINTS_TABLE, num);
103 | }
104 | }
105 | }
106 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/combiner/StatsCombiner.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.combiner;
18 |
19 | import java.io.IOException;
20 | import java.util.Iterator;
21 | import java.util.Map;
22 |
23 | import org.apache.accumulo.core.client.IteratorSetting;
24 | import org.apache.accumulo.core.data.Key;
25 | import org.apache.accumulo.core.data.Value;
26 | import org.apache.accumulo.core.iterators.Combiner;
27 | import org.apache.accumulo.core.iterators.IteratorEnvironment;
28 | import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
29 |
30 | /**
31 | * This combiner calculates the max, min, sum, and count of long integers represented as strings in
32 | * values. It stores the result in a comma-separated value of the form min,max,sum,count. If such a
33 | * value is encountered while combining, its information is incorporated into the running
34 | * calculations of min, max, sum, and count. See {@link Combiner} for more information on which
35 | * values are combined together.
36 | */
37 | public class StatsCombiner extends Combiner {
38 |
39 | public static final String RADIX_OPTION = "radix";
40 |
41 | private int radix = 10;
42 |
43 | @Override
44 | public Value reduce(Key key, Iterator iter) {
45 |
46 | long min = Long.MAX_VALUE;
47 | long max = Long.MIN_VALUE;
48 | long sum = 0;
49 | long count = 0;
50 |
51 | while (iter.hasNext()) {
52 | String[] stats = iter.next().toString().split(",");
53 |
54 | if (stats.length == 1) {
55 | long val = Long.parseLong(stats[0], radix);
56 | min = Math.min(val, min);
57 | max = Math.max(val, max);
58 | sum += val;
59 | count += 1;
60 | } else {
61 | min = Math.min(Long.parseLong(stats[0], radix), min);
62 | max = Math.max(Long.parseLong(stats[1], radix), max);
63 | sum += Long.parseLong(stats[2], radix);
64 | count += Long.parseLong(stats[3], radix);
65 | }
66 | }
67 |
68 | String ret = Long.toString(min, radix) + "," + Long.toString(max, radix) + ","
69 | + Long.toString(sum, radix) + "," + Long.toString(count, radix);
70 | return new Value(ret.getBytes());
71 | }
72 |
73 | @Override
74 | public void init(SortedKeyValueIterator source, Map options,
75 | IteratorEnvironment env) throws IOException {
76 | super.init(source, options, env);
77 |
78 | if (options.containsKey(RADIX_OPTION))
79 | radix = Integer.parseInt(options.get(RADIX_OPTION));
80 | else
81 | radix = 10;
82 | }
83 |
84 | @Override
85 | public IteratorOptions describeOptions() {
86 | IteratorOptions io = super.describeOptions();
87 | io.setName("statsCombiner");
88 | io.setDescription("Combiner that keeps track of min, max, sum, and count");
89 | io.addNamedOption(RADIX_OPTION, "radix/base of the numbers");
90 | return io;
91 | }
92 |
93 | @Override
94 | public boolean validateOptions(Map options) {
95 | if (!super.validateOptions(options))
96 | return false;
97 |
98 | if (options.containsKey(RADIX_OPTION) && !options.get(RADIX_OPTION).matches("\\d+"))
99 | throw new IllegalArgumentException(
100 | "invalid option " + RADIX_OPTION + ":" + options.get(RADIX_OPTION));
101 |
102 | return true;
103 | }
104 |
105 | /**
106 | * A convenience method for setting the expected base/radix of the numbers
107 | *
108 | * @param iterConfig
109 | * Iterator settings to configure
110 | * @param base
111 | * The expected base/radix of the numbers.
112 | */
113 | public static void setRadix(IteratorSetting iterConfig, int base) {
114 | iterConfig.addOption(RADIX_OPTION, base + "");
115 | }
116 | }
117 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/accumulo/examples/mapreduce/MapReduceIT.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.mapreduce;
18 |
19 | import static org.junit.jupiter.api.Assertions.assertEquals;
20 |
21 | import java.security.MessageDigest;
22 | import java.time.Duration;
23 | import java.util.Base64;
24 | import java.util.Collections;
25 | import java.util.Map.Entry;
26 | import java.util.Properties;
27 |
28 | import org.apache.accumulo.core.client.Accumulo;
29 | import org.apache.accumulo.core.client.AccumuloClient;
30 | import org.apache.accumulo.core.client.BatchWriter;
31 | import org.apache.accumulo.core.client.Scanner;
32 | import org.apache.accumulo.core.conf.ClientProperty;
33 | import org.apache.accumulo.core.conf.Property;
34 | import org.apache.accumulo.core.data.Key;
35 | import org.apache.accumulo.core.data.Mutation;
36 | import org.apache.accumulo.core.data.Value;
37 | import org.apache.accumulo.core.security.Authorizations;
38 | import org.apache.accumulo.examples.ExamplesIT;
39 | import org.apache.accumulo.miniclusterImpl.MiniAccumuloClusterImpl;
40 | import org.apache.accumulo.miniclusterImpl.MiniAccumuloConfigImpl;
41 | import org.apache.accumulo.test.functional.ConfigurableMacBase;
42 | import org.apache.hadoop.conf.Configuration;
43 | import org.junit.jupiter.api.Test;
44 |
45 | public class MapReduceIT extends ConfigurableMacBase {
46 |
47 | @Override
48 | protected Duration defaultTimeout() {
49 | return Duration.ofMinutes(1);
50 | }
51 |
52 | @Override
53 | protected void configure(MiniAccumuloConfigImpl cfg, Configuration hadoopCoreSite) {
54 | cfg.setProperty(Property.TSERV_NATIVEMAP_ENABLED, "false");
55 | }
56 |
57 | public static final String hadoopTmpDirArg = "-Dhadoop.tmp.dir=" + System.getProperty("user.dir")
58 | + "/target/hadoop-tmp";
59 |
60 | static final String tablename = "mapredf";
61 | static final String input_cf = "cf-HASHTYPE";
62 | static final String input_cq = "cq-NOTHASHED";
63 | static final String input_cfcq = input_cf + ":" + input_cq;
64 | static final String output_cq = "cq-MD4BASE64";
65 | static final String output_cfcq = input_cf + ":" + output_cq;
66 |
67 | @Test
68 | public void test() throws Exception {
69 | String confFile = System.getProperty("user.dir") + "/target/accumulo-client.properties";
70 | Properties props = getClientProperties();
71 | String instance = ClientProperty.INSTANCE_NAME.getValue(props);
72 | String keepers = ClientProperty.INSTANCE_ZOOKEEPERS.getValue(props);
73 | ExamplesIT.writeClientPropsFile(confFile, instance, keepers, "root", ROOT_PASSWORD);
74 | try (AccumuloClient client = Accumulo.newClient().from(props).build()) {
75 | client.tableOperations().create(tablename);
76 | try (BatchWriter bw = client.createBatchWriter(tablename)) {
77 | for (int i = 0; i < 10; i++) {
78 | Mutation m = new Mutation("" + i);
79 | m.put(input_cf, input_cq, "row" + i);
80 | bw.addMutation(m);
81 | }
82 | }
83 | MiniAccumuloClusterImpl.ProcessInfo hash = getCluster().exec(RowHash.class,
84 | Collections.singletonList(hadoopTmpDirArg), "-c", confFile, "-t", tablename, "--column",
85 | input_cfcq);
86 | assertEquals(0, hash.getProcess().waitFor());
87 |
88 | try (Scanner s = client.createScanner(tablename, Authorizations.EMPTY)) {
89 | s.fetchColumn(input_cf, output_cq);
90 | int i = 0;
91 | MessageDigest md = MessageDigest.getInstance("MD5");
92 | for (Entry entry : s) {
93 | byte[] check = Base64.getEncoder().encode(md.digest(("row" + i).getBytes()));
94 | assertEquals(entry.getValue().toString(), new String(check));
95 | i++;
96 | }
97 | }
98 | }
99 | }
100 | }
101 |
--------------------------------------------------------------------------------
/docs/client.md:
--------------------------------------------------------------------------------
1 |
17 | # Apache Accumulo Client Examples
18 |
19 | The following Java classes are examples of the Accumulo client API:
20 |
21 | * [RowOperations.java] - reads and writes rows
22 | * [ReadWriteExample.java] - creates a table, writes to it, and reads from it
23 |
24 | [RowOperations.java] demonstrates how to read, write and delete rows using the BatchWriter and Scanner:
25 |
26 | $ ./bin/runex client.RowOperations
27 | [examples.client.RowOperations] INFO : This is only row2
28 | [examples.client.RowOperations] INFO : Key: row2 col:1 [] 1523301597006 false Value: v1
29 | [examples.client.RowOperations] INFO : Key: row2 col:2 [] 1523301597006 false Value: v2
30 | [examples.client.RowOperations] INFO : Key: row2 col:3 [] 1523301597006 false Value: v3
31 | [examples.client.RowOperations] INFO : This is everything
32 | [examples.client.RowOperations] INFO : Key: row1 col:1 [] 1523301597006 false Value: v1
33 | [examples.client.RowOperations] INFO : Key: row1 col:2 [] 1523301597006 false Value: v2
34 | [examples.client.RowOperations] INFO : Key: row1 col:3 [] 1523301597006 false Value: v3
35 | [examples.client.RowOperations] INFO : Key: row2 col:1 [] 1523301597006 false Value: v1
36 | [examples.client.RowOperations] INFO : Key: row2 col:2 [] 1523301597006 false Value: v2
37 | [examples.client.RowOperations] INFO : Key: row2 col:3 [] 1523301597006 false Value: v3
38 | [examples.client.RowOperations] INFO : Key: row3 col:1 [] 1523301597006 false Value: v1
39 | [examples.client.RowOperations] INFO : Key: row3 col:2 [] 1523301597006 false Value: v2
40 | [examples.client.RowOperations] INFO : Key: row3 col:3 [] 1523301597006 false Value: v3
41 | [examples.client.RowOperations] INFO : This is row1 and row3
42 | [examples.client.RowOperations] INFO : Key: row1 col:1 [] 1523301597006 false Value: v1
43 | [examples.client.RowOperations] INFO : Key: row1 col:2 [] 1523301597006 false Value: v2
44 | [examples.client.RowOperations] INFO : Key: row1 col:3 [] 1523301597006 false Value: v3
45 | [examples.client.RowOperations] INFO : Key: row3 col:1 [] 1523301597006 false Value: v1
46 | [examples.client.RowOperations] INFO : Key: row3 col:2 [] 1523301597006 false Value: v2
47 | [examples.client.RowOperations] INFO : Key: row3 col:3 [] 1523301597006 false Value: v3
48 | [examples.client.RowOperations] INFO : This is just row3
49 | [examples.client.RowOperations] INFO : Key: row3 col:1 [] 1523301597006 false Value: v1
50 | [examples.client.RowOperations] INFO : Key: row3 col:2 [] 1523301597006 false Value: v2
51 | [examples.client.RowOperations] INFO : Key: row3 col:3 [] 1523301597006 false Value: v3
52 |
53 | To create a table, write to it and read from it:
54 |
55 | $ ./bin/runex client.ReadWriteExample
56 | [examples.client.ReadWriteExample] INFO : hello0 cf:cq [] 1523306675130 false -> world0
57 | [examples.client.ReadWriteExample] INFO : hello1 cf:cq [] 1523306675130 false -> world1
58 | [examples.client.ReadWriteExample] INFO : hello2 cf:cq [] 1523306675130 false -> world2
59 | [examples.client.ReadWriteExample] INFO : hello3 cf:cq [] 1523306675130 false -> world3
60 | [examples.client.ReadWriteExample] INFO : hello4 cf:cq [] 1523306675130 false -> world4
61 | [examples.client.ReadWriteExample] INFO : hello5 cf:cq [] 1523306675130 false -> world5
62 | [examples.client.ReadWriteExample] INFO : hello6 cf:cq [] 1523306675130 false -> world6
63 | [examples.client.ReadWriteExample] INFO : hello7 cf:cq [] 1523306675130 false -> world7
64 | [examples.client.ReadWriteExample] INFO : hello8 cf:cq [] 1523306675130 false -> world8
65 | [examples.client.ReadWriteExample] INFO : hello9 cf:cq [] 1523306675130 false -> world9
66 |
67 | [Flush.java]: ../src/main/java/org/apache/accumulo/examples/client/Flush.java
68 | [RowOperations.java]: ../src/main/java/org/apache/accumulo/examples/client/RowOperations.java
69 | [ReadWriteExample.java]: ../src/main/java/org/apache/accumulo/examples/client/ReadWriteExample.java
70 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/filedata/CharacterHistogram.java:
--------------------------------------------------------------------------------
1 | /// *
2 | // * Licensed to the Apache Software Foundation (ASF) under one or more
3 | // * contributor license agreements. See the NOTICE file distributed with
4 | // * this work for additional information regarding copyright ownership.
5 | // * The ASF licenses this file to You under the Apache License, Version 2.0
6 | // * (the "License"); you may not use this file except in compliance with
7 | // * the License. You may obtain a copy of the License at
8 | // *
9 | // * http://www.apache.org/licenses/LICENSE-2.0
10 | // *
11 | // * Unless required by applicable law or agreed to in writing, software
12 | // * distributed under the License is distributed on an "AS IS" BASIS,
13 | // * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | // * See the License for the specific language governing permissions and
15 | // * limitations under the License.
16 | // */
17 | // package org.apache.accumulo.examples.filedata;
18 | //
19 | // import java.io.IOException;
20 | // import java.io.InputStream;
21 | // import java.util.Arrays;
22 | // import java.util.List;
23 | // import java.util.Map.Entry;
24 | // import java.util.Properties;
25 | //
26 | // import org.apache.accumulo.core.client.security.tokens.PasswordToken;
27 | // import org.apache.accumulo.core.data.Key;
28 | // import org.apache.accumulo.core.data.Mutation;
29 | // import org.apache.accumulo.core.data.Value;
30 | // import org.apache.accumulo.core.iterators.user.SummingArrayCombiner;
31 | // import org.apache.accumulo.core.security.ColumnVisibility;
32 | // import org.apache.accumulo.examples.cli.ClientOpts;
33 | // import org.apache.accumulo.hadoop.mapreduce.AccumuloOutputFormat;
34 | // import org.apache.hadoop.io.Text;
35 | // import org.apache.hadoop.mapreduce.Job;
36 | // import org.apache.hadoop.mapreduce.Mapper;
37 | //
38 | // import com.beust.jcommander.Parameter;
39 | //
40 | /// **
41 | // * A MapReduce that computes a histogram of byte frequency for each file and stores the histogram
42 | // * alongside the file data. The {@link ChunkInputFormat} is used to read the file data from
43 | // * Accumulo.
44 | // */
45 | // public class CharacterHistogram {
46 | //
47 | // private static final String VIS = "vis";
48 | //
49 | // public static class HistMapper extends Mapper>,InputStream,Text,Mutation> {
50 | // private ColumnVisibility cv;
51 | //
52 | // @Override
53 | // public void map(List> k, InputStream v, Context context)
54 | // throws IOException, InterruptedException {
55 | // Long[] hist = new Long[256];
56 | // Arrays.fill(hist, 0L);
57 | // int b = v.read();
58 | // while (b >= 0) {
59 | // hist[b] += 1L;
60 | // b = v.read();
61 | // }
62 | // v.close();
63 | // Mutation m = new Mutation(k.get(0).getKey().getRow());
64 | // m.put("info", "hist", cv,
65 | // new Value(SummingArrayCombiner.STRING_ARRAY_ENCODER.encode(Arrays.asList(hist))));
66 | // context.write(new Text(), m);
67 | // }
68 | //
69 | // @Override
70 | // protected void setup(Context context) {
71 | // cv = new ColumnVisibility(context.getConfiguration().get(VIS, ""));
72 | // }
73 | // }
74 | //
75 | // static class Opts extends ClientOpts {
76 | // @Parameter(names = {"-t", "--table"}, required = true, description = "table to use")
77 | // String tableName;
78 | // @Parameter(names = "--vis")
79 | // String visibilities = "";
80 | // }
81 | //
82 | // @SuppressWarnings("deprecation")
83 | // public static void main(String[] args) throws Exception {
84 | // Opts opts = new Opts();
85 | // opts.parseArgs(CharacterHistogram.class.getName(), args);
86 | //
87 | // Job job = Job.getInstance(opts.getHadoopConfig());
88 | // job.setJobName(CharacterHistogram.class.getSimpleName());
89 | // job.setJarByClass(CharacterHistogram.class);
90 | // job.setInputFormatClass(ChunkInputFormat.class);
91 | // job.getConfiguration().set(VIS, opts.visibilities);
92 | // job.setMapperClass(HistMapper.class);
93 | // job.setMapOutputKeyClass(Text.class);
94 | // job.setMapOutputValueClass(Mutation.class);
95 | //
96 | // job.setNumReduceTasks(0);
97 | //
98 | // Properties props = opts.getClientProperties();
99 | // ChunkInputFormat.setZooKeeperInstance(job, props.getProperty("instance.name"),
100 | // props.getProperty("instance.zookeepers"));
101 | // PasswordToken token = new PasswordToken(props.getProperty("auth.token"));
102 | // ChunkInputFormat.setConnectorInfo(job, props.getProperty("auth.principal"), token);
103 | // ChunkInputFormat.setInputTableName(job, opts.tableName);
104 | // ChunkInputFormat.setScanAuthorizations(job, opts.auths);
105 | //
106 | // job.setOutputFormatClass(AccumuloOutputFormat.class);
107 | // AccumuloOutputFormat.configure().clientProperties(opts.getClientProperties())
108 | // .defaultTable(opts.tableName).createTables(true).store(job);
109 | //
110 | // System.exit(job.waitForCompletion(true) ? 0 : 1);
111 | // }
112 | // }
113 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/mapreduce/NGramIngest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.mapreduce;
18 |
19 | import java.io.IOException;
20 | import java.util.SortedSet;
21 | import java.util.TreeSet;
22 | import java.util.stream.Collectors;
23 | import java.util.stream.Stream;
24 |
25 | import org.apache.accumulo.core.client.AccumuloClient;
26 | import org.apache.accumulo.core.client.admin.NewTableConfiguration;
27 | import org.apache.accumulo.core.data.Mutation;
28 | import org.apache.accumulo.core.data.Value;
29 | import org.apache.accumulo.examples.Common;
30 | import org.apache.accumulo.examples.cli.ClientOpts;
31 | import org.apache.accumulo.hadoop.mapreduce.AccumuloOutputFormat;
32 | import org.apache.hadoop.fs.Path;
33 | import org.apache.hadoop.io.LongWritable;
34 | import org.apache.hadoop.io.Text;
35 | import org.apache.hadoop.mapreduce.Job;
36 | import org.apache.hadoop.mapreduce.Mapper;
37 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
38 | import org.slf4j.Logger;
39 | import org.slf4j.LoggerFactory;
40 |
41 | import com.beust.jcommander.Parameter;
42 |
43 | /**
44 | * Map job to ingest n-gram files from
45 | * http://storage.googleapis.com/books/ngrams/books/datasetsv2.html
46 | */
47 | public class NGramIngest {
48 |
49 | private static final Logger log = LoggerFactory.getLogger(NGramIngest.class);
50 |
51 | static class Opts extends ClientOpts {
52 | @Parameter(names = {"-t", "--table"}, required = true, description = "table to use")
53 | String tableName;
54 | @Parameter(names = {"-i", "--input"}, required = true, description = "HDFS input directory")
55 | String inputDirectory;
56 | }
57 |
58 | static class NGramMapper extends Mapper {
59 |
60 | @Override
61 | protected void map(LongWritable location, Text value, Context context)
62 | throws IOException, InterruptedException {
63 | String[] parts = value.toString().split("\\t");
64 | if (parts.length >= 4) {
65 | Mutation m = new Mutation(parts[0]);
66 | m.put(parts[1], String.format("%010d", Long.parseLong(parts[2])),
67 | new Value(parts[3].trim().getBytes()));
68 | context.write(null, m);
69 | }
70 | }
71 | }
72 |
73 | public static void main(String[] args) throws Exception {
74 | Opts opts = new Opts();
75 | opts.parseArgs(NGramIngest.class.getName(), args);
76 |
77 | Job job = Job.getInstance(opts.getHadoopConfig());
78 | job.setJobName(NGramIngest.class.getSimpleName());
79 | job.setJarByClass(NGramIngest.class);
80 |
81 | job.setInputFormatClass(TextInputFormat.class);
82 | job.setOutputFormatClass(AccumuloOutputFormat.class);
83 | AccumuloOutputFormat.configure().clientProperties(opts.getClientProperties())
84 | .defaultTable(opts.tableName).store(job);
85 |
86 | job.setMapperClass(NGramMapper.class);
87 | job.setMapOutputKeyClass(Text.class);
88 | job.setMapOutputValueClass(Mutation.class);
89 |
90 | job.setNumReduceTasks(0);
91 | job.setSpeculativeExecution(false);
92 |
93 | try (AccumuloClient client = opts.createAccumuloClient()) {
94 | if (!client.tableOperations().exists(opts.tableName)) {
95 | String[] numbers = "1 2 3 4 5 6 7 8 9".split("\\s");
96 | String[] lower = "a b c d e f g h i j k l m n o p q r s t u v w x y z".split("\\s");
97 | String[] upper = "A B C D E F G H I J K L M N O P Q R S T U V W X Y Z".split("\\s");
98 |
99 | SortedSet splits = Stream.of(numbers, lower, upper).flatMap(Stream::of).map(Text::new)
100 | .collect(Collectors.toCollection(TreeSet::new));
101 |
102 | var newTableConfig = new NewTableConfiguration().withSplits(splits);
103 |
104 | log.info("Creating table " + opts.tableName);
105 | Common.createTableWithNamespace(client, opts.tableName, newTableConfig);
106 | }
107 | }
108 |
109 | TextInputFormat.addInputPath(job, new Path(opts.inputDirectory));
110 | System.exit(job.waitForCompletion(true) ? 0 : 1);
111 | }
112 | }
113 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/accumulo/examples/mapreduce/WordCount.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.mapreduce;
18 |
19 | import java.io.IOException;
20 | import java.text.SimpleDateFormat;
21 | import java.util.Collections;
22 | import java.util.Date;
23 |
24 | import org.apache.accumulo.core.client.AccumuloClient;
25 | import org.apache.accumulo.core.client.IteratorSetting;
26 | import org.apache.accumulo.core.client.admin.NewTableConfiguration;
27 | import org.apache.accumulo.core.data.Mutation;
28 | import org.apache.accumulo.core.iterators.user.SummingCombiner;
29 | import org.apache.accumulo.examples.Common;
30 | import org.apache.accumulo.examples.cli.ClientOpts;
31 | import org.apache.accumulo.hadoop.mapreduce.AccumuloOutputFormat;
32 | import org.apache.hadoop.fs.Path;
33 | import org.apache.hadoop.io.LongWritable;
34 | import org.apache.hadoop.io.Text;
35 | import org.apache.hadoop.mapreduce.Job;
36 | import org.apache.hadoop.mapreduce.Mapper;
37 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
38 | import org.slf4j.Logger;
39 | import org.slf4j.LoggerFactory;
40 |
41 | import com.beust.jcommander.Parameter;
42 |
43 | /**
44 | * A simple MapReduce job that inserts word counts into Accumulo. See docs/mapred.md
45 | */
46 | public final class WordCount {
47 |
48 | private static final Logger log = LoggerFactory.getLogger(WordCount.class);
49 |
50 | private WordCount() {}
51 |
52 | static class Opts extends ClientOpts {
53 | @Parameter(names = {"-t", "--table"}, description = "Name of output Accumulo table")
54 | String tableName = Common.NAMESPACE + ".wordcount";
55 | @Parameter(names = {"-i", "--input"}, required = true, description = "HDFS input directory")
56 | String inputDirectory;
57 | @Parameter(names = {"-d", "--dfsPath"},
58 | description = "HDFS Path where accumulo-client.properties exists")
59 | String hdfsPath;
60 | }
61 |
62 | public static class MapClass extends Mapper {
63 | @Override
64 | public void map(LongWritable key, Text value, Context output) throws IOException {
65 | String today = new SimpleDateFormat("yyyyMMdd").format(new Date());
66 | String[] words = value.toString().split("\\s+");
67 |
68 | for (String word : words) {
69 | Mutation mutation = new Mutation(word);
70 | mutation.at().family("count").qualifier(today).put("1");
71 |
72 | try {
73 | output.write(null, mutation);
74 | } catch (InterruptedException e) {
75 | log.error("Could not write mutation to Context.", e);
76 | }
77 | }
78 | }
79 | }
80 |
81 | public static void main(String[] args) throws Exception {
82 | Opts opts = new Opts();
83 | opts.parseArgs(WordCount.class.getName(), args);
84 |
85 | // Create Accumulo table with Summing iterator attached
86 | try (AccumuloClient client = opts.createAccumuloClient()) {
87 | IteratorSetting is = new IteratorSetting(10, SummingCombiner.class);
88 | SummingCombiner.setColumns(is,
89 | Collections.singletonList(new IteratorSetting.Column("count")));
90 | SummingCombiner.setEncodingType(is, SummingCombiner.Type.STRING);
91 | Common.createTableWithNamespace(client, opts.tableName,
92 | new NewTableConfiguration().attachIterator(is));
93 | }
94 |
95 | // Create M/R job
96 | Job job = Job.getInstance(opts.getHadoopConfig());
97 | job.setJobName(WordCount.class.getName());
98 | job.setJarByClass(WordCount.class);
99 | job.setInputFormatClass(TextInputFormat.class);
100 | TextInputFormat.setInputPaths(job, new Path(opts.inputDirectory));
101 |
102 | job.setMapperClass(MapClass.class);
103 | job.setNumReduceTasks(0);
104 | job.setOutputFormatClass(AccumuloOutputFormat.class);
105 | job.setOutputKeyClass(Text.class);
106 | job.setOutputValueClass(Mutation.class);
107 |
108 | if (opts.hdfsPath != null) {
109 | AccumuloOutputFormat.configure().clientPropertiesPath(opts.hdfsPath)
110 | .defaultTable(opts.tableName).store(job);
111 | } else {
112 | AccumuloOutputFormat.configure().clientProperties(opts.getClientProperties())
113 | .defaultTable(opts.tableName).store(job);
114 | }
115 | System.exit(job.waitForCompletion(true) ? 0 : 1);
116 | }
117 | }
118 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/accumulo/examples/dirlist/CountIT.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package org.apache.accumulo.examples.dirlist;
18 |
19 | import static org.junit.jupiter.api.Assertions.assertEquals;
20 | import static org.junit.jupiter.api.Assertions.assertFalse;
21 |
22 | import java.util.ArrayList;
23 | import java.util.Map.Entry;
24 |
25 | import org.apache.accumulo.core.client.Accumulo;
26 | import org.apache.accumulo.core.client.AccumuloClient;
27 | import org.apache.accumulo.core.client.BatchWriter;
28 | import org.apache.accumulo.core.client.BatchWriterConfig;
29 | import org.apache.accumulo.core.client.Scanner;
30 | import org.apache.accumulo.core.conf.Property;
31 | import org.apache.accumulo.core.data.Key;
32 | import org.apache.accumulo.core.data.Value;
33 | import org.apache.accumulo.core.security.Authorizations;
34 | import org.apache.accumulo.core.security.ColumnVisibility;
35 | import org.apache.accumulo.core.util.Pair;
36 | import org.apache.accumulo.examples.cli.BatchWriterOpts;
37 | import org.apache.accumulo.examples.cli.ScannerOpts;
38 | import org.apache.accumulo.miniclusterImpl.MiniAccumuloConfigImpl;
39 | import org.apache.accumulo.test.functional.ConfigurableMacBase;
40 | import org.apache.hadoop.conf.Configuration;
41 | import org.junit.jupiter.api.AfterEach;
42 | import org.junit.jupiter.api.BeforeEach;
43 | import org.junit.jupiter.api.Test;
44 |
45 | public class CountIT extends ConfigurableMacBase {
46 |
47 | private AccumuloClient client;
48 | private String tableName;
49 |
50 | @Override
51 | protected void configure(MiniAccumuloConfigImpl cfg, Configuration hadoopCoreSite) {
52 | cfg.setProperty(Property.TSERV_NATIVEMAP_ENABLED, "false");
53 | }
54 |
55 | @BeforeEach
56 | public void setupInstance() throws Exception {
57 | tableName = getUniqueNames(1)[0];
58 | client = Accumulo.newClient().from(getClientProperties()).build();
59 | client.tableOperations().create(tableName);
60 | try (BatchWriter bw = client.createBatchWriter(tableName, new BatchWriterConfig())) {
61 | ColumnVisibility cv = new ColumnVisibility();
62 | // / has 1 dir
63 | // /local has 2 dirs 1 file
64 | // /local/user1 has 2 files
65 | bw.addMutation(Ingest.buildMutation(cv, "/local", true, false, true, 272, 12345, null));
66 | bw.addMutation(Ingest.buildMutation(cv, "/local/user1", true, false, true, 272, 12345, null));
67 | bw.addMutation(Ingest.buildMutation(cv, "/local/user2", true, false, true, 272, 12345, null));
68 | bw.addMutation(
69 | Ingest.buildMutation(cv, "/local/file", false, false, false, 1024, 12345, null));
70 | bw.addMutation(
71 | Ingest.buildMutation(cv, "/local/file", false, false, false, 1024, 23456, null));
72 | bw.addMutation(
73 | Ingest.buildMutation(cv, "/local/user1/file1", false, false, false, 2024, 12345, null));
74 | bw.addMutation(
75 | Ingest.buildMutation(cv, "/local/user1/file2", false, false, false, 1028, 23456, null));
76 | }
77 | }
78 |
79 | @AfterEach
80 | public void teardown() {
81 | client.close();
82 | }
83 |
84 | @Test
85 | public void test() throws Exception {
86 |
87 | ScannerOpts scanOpts = new ScannerOpts();
88 | BatchWriterOpts bwOpts = new BatchWriterOpts();
89 | FileCount fc = new FileCount(client, tableName, Authorizations.EMPTY, new ColumnVisibility(),
90 | scanOpts, bwOpts);
91 |
92 | ArrayList> expected = new ArrayList<>();
93 | expected.add(new Pair<>(QueryUtil.getRow("").toString(), "1,0,3,3"));
94 | expected.add(new Pair<>(QueryUtil.getRow("/local").toString(), "2,1,2,3"));
95 | expected.add(new Pair<>(QueryUtil.getRow("/local/user1").toString(), "0,2,0,2"));
96 | expected.add(new Pair<>(QueryUtil.getRow("/local/user2").toString(), "0,0,0,0"));
97 |
98 | int actualCount = 0;
99 | try (Scanner scanner = client.createScanner(tableName, new Authorizations())) {
100 | scanner.fetchColumn("dir", "counts");
101 | assertFalse(scanner.iterator().hasNext());
102 |
103 | fc.run();
104 |
105 | for (Entry e : scanner) {
106 | assertEquals(e.getKey().getRow().toString(), expected.get(actualCount).getFirst());
107 | assertEquals(e.getValue().toString(), expected.get(actualCount).getSecond());
108 | actualCount++;
109 | }
110 | }
111 | assertEquals(expected.size(), actualCount);
112 | }
113 | }
114 |
--------------------------------------------------------------------------------
/docs/shard.md:
--------------------------------------------------------------------------------
1 |
17 | # Apache Accumulo Shard Example
18 |
19 | Accumulo has an iterator called the intersecting iterator which supports querying a term index that is partitioned by
20 | document, or "sharded". This example shows how to use the intersecting iterator through these four programs:
21 |
22 | * [Index.java] - Indexes a set of text files into an Accumulo table
23 | * [Query.java] - Finds documents containing a given set of terms.
24 | * [Reverse.java] - Reads the index table and writes a map of documents to terms into another table.
25 | * [ContinuousQuery.java] - Uses the table populated by Reverse.java to select N random terms per document. Then it continuously and randomly queries those terms.
26 |
27 | To run these example programs, create two tables like below.
28 |
29 | username@instance> createnamespace examples
30 | username@instance> createtable examples.shard
31 | username@instance examples.shard> createtable examples.doc2term
32 |
33 | After creating the tables, index some files. The following command indexes all the java files in the Accumulo source code.
34 |
35 | $ find /path/to/accumulo/core -name "*.java" | xargs ./bin/runex shard.Index -t examples.shard --partitions 30
36 |
37 | The following command queries the index to find all files containing 'foo' and 'bar'.
38 |
39 | $ ./bin/runex shard.Query -t examples.shard foo bar
40 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/spi/balancer/BaseHostRegexTableLoadBalancerTest.java
41 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/iterators/user/WholeRowIteratorTest.java
42 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/iteratorsImpl/IteratorConfigUtilTest.java
43 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/data/KeyBuilderTest.java
44 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/spi/balancer/HostRegexTableLoadBalancerReconfigurationTest.java
45 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/security/ColumnVisibilityTest.java
46 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/summary/SummaryCollectionTest.java
47 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/spi/balancer/HostRegexTableLoadBalancerTest.java
48 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/client/IteratorSettingTest.java
49 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/data/KeyExtentTest.java
50 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/security/VisibilityEvaluatorTest.java
51 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/iterators/user/TransformingIteratorTest.java
52 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/client/admin/NewTableConfigurationTest.java
53 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/conf/HadoopCredentialProviderTest.java
54 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/clientImpl/TableOperationsHelperTest.java
55 | /path/to/accumulo/core/src/test/java/org/apache/accumulo/core/iterators/user/WholeColumnFamilyIteratorTest.java
56 |
57 | In order to run ContinuousQuery, we need to run Reverse.java to populate the `examples.doc2term` table.
58 |
59 | $ ./bin/runex shard.Reverse --shardTable examples.shard --doc2Term examples.doc2term
60 |
61 | Below ContinuousQuery is run using 5 terms. So it selects 5 random terms from each document, then it continually
62 | randomly selects one set of 5 terms and queries. It prints the number of matching documents and the time in seconds.
63 |
64 | $ ./bin/runex shard.ContinuousQuery --shardTable examples.shard --doc2Term examples.doc2term --terms 5
65 | [string, protected, sizeopt, cache, build] 1 0.084
66 | [public, these, exception, to, as] 25 0.267
67 | [by, encodeprevendrow, 0, work, as] 4 0.056
68 | [except, to, a, limitations, one] 969 0.197
69 | [copy, as, asf, version, is] 969 0.341
70 | [core, class, may, regarding, without] 862 0.437
71 | [max_data_to_print, default_visibility_cache_size, use, accumulo_export_info, fate] 1 0.066
72 |
73 |
74 | [Index.java]: ../src/main/java/org/apache/accumulo/examples/shard/Index.java
75 | [Query.java]: ../src/main/java/org/apache/accumulo/examples/shard/Query.java
76 | [Reverse.java]: ../src/main/java/org/apache/accumulo/examples/shard/Reverse.java
77 | [ContinuousQuery.java]: ../src/main/java/org/apache/accumulo/examples/shard/ContinuousQuery.java
78 |
--------------------------------------------------------------------------------