├── .github ├── release-drafter-config.yml └── workflows │ ├── integration.yml │ ├── release-drafter.yml │ └── version-and-release.yml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── build ├── sbt └── sbt-launch-lib.bash ├── doc ├── cluster.md ├── configuration.md ├── dataframe.md ├── dev.md ├── getting-started.md ├── java.md ├── python.md ├── rdd.md ├── streaming.md └── structured-streaming.md ├── pom.xml ├── project ├── build.properties └── plugins.sbt ├── scalastyle-config.xml └── src ├── main ├── resources │ └── META-INF │ │ └── services │ │ └── org.apache.spark.sql.sources.DataSourceRegister └── scala │ ├── com │ └── redislabs │ │ └── provider │ │ └── redis │ │ ├── ConnectionPool.scala │ │ ├── RedisConfig.scala │ │ ├── package.scala │ │ ├── partitioner │ │ ├── RedisPartition.scala │ │ └── RedisPartitioner.scala │ │ ├── rdd │ │ └── RedisRDD.scala │ │ ├── redisFunctions.scala │ │ ├── streaming │ │ ├── RedisInputDStream.scala │ │ ├── RedisStreamReceiver.scala │ │ ├── package.scala │ │ └── redisStreamingFunctions.scala │ │ └── util │ │ ├── CollectionUtils.scala │ │ ├── ConnectionUtils.scala │ │ ├── JsonUtils.scala │ │ ├── Logging.scala │ │ ├── ParseUtils.scala │ │ ├── PipelineUtils.scala │ │ └── StreamUtils.scala │ └── org │ └── apache │ └── spark │ └── sql │ └── redis │ ├── BinaryRedisPersistence.scala │ ├── DefaultSource.scala │ ├── HashRedisPersistence.scala │ ├── RedisPersistence.scala │ ├── RedisSourceRelation.scala │ ├── redis.scala │ └── stream │ ├── RedisSource.scala │ ├── RedisSourceConfig.scala │ ├── RedisSourceOffset.scala │ ├── RedisSourceRdd.scala │ ├── RedisSourceTypes.scala │ ├── RedisStreamProvider.scala │ └── RedisStreamReader.scala └── test ├── resources ├── blog ├── log4j.properties ├── test.csv └── tls │ ├── ca.crt │ ├── ca.key │ ├── ca.txt │ ├── client.csr │ ├── clientkeystore │ ├── redis.crt │ ├── redis.dh │ └── redis.key └── scala ├── com └── redislabs │ └── provider │ └── redis │ ├── RedisBenchmarks.scala │ ├── RedisConfigSuite.scala │ ├── SparkRedisSuite.scala │ ├── SparkStreamingRedisSuite.scala │ ├── df │ ├── AclDataframeSuite.scala │ ├── BinaryDataframeSuite.scala │ ├── CsvDataframeSuite.scala │ ├── DataframeSuite.scala │ ├── FilteredDataframeSuite.scala │ ├── HashDataframeSuite.scala │ ├── RedisDataframeSuite.scala │ ├── SparkSqlSuite.scala │ ├── acl │ │ ├── AclDataframeClusterSuite.scala │ │ └── AclDataframeStandaloneSuite.scala │ ├── benchmark │ │ ├── DataframeBenchmarkSuite.scala │ │ ├── ManyValueBenchmarkSuite.scala │ │ ├── SingleValueBenchmarkSuite.scala │ │ └── cluster │ │ │ ├── BinaryModelManyValueClusterBenchmarkSuite.scala │ │ │ ├── BinaryModelSingleValueClusterBenchmarkSuite.scala │ │ │ ├── HashModelManyValueClusterBenchmarkSuite.scala │ │ │ └── HashModelSingleValueClusterBenchmarkSuite.scala │ ├── cluster │ │ ├── BinaryDataframeClusterSuite.scala │ │ ├── CsvDataframeClusterSuite.scala │ │ ├── DataframeClusterSuite.scala │ │ ├── FilteredDataframeClusterSuite.scala │ │ ├── HashDataframeClusterSuite.scala │ │ └── SparkSqlClusterSuite.scala │ └── standalone │ │ ├── BinaryDataframeStandaloneSuite.scala │ │ ├── CsvDataframeStandaloneSuite.scala │ │ ├── DataframeStandaloneSuite.scala │ │ ├── FilteredDataframeStandaloneSuite.scala │ │ ├── HashDataframeStandaloneSuite.scala │ │ └── SparkSqlStandaloneSuite.scala │ ├── env │ ├── Env.scala │ ├── RedisClusterAclEnv.scala │ ├── RedisClusterEnv.scala │ ├── RedisStandaloneAclEnv.scala │ ├── RedisStandaloneEnv.scala │ └── RedisStandaloneSSLEnv.scala │ ├── rdd │ ├── RedisKeysSuite.scala │ ├── RedisRddExtraSuite.scala │ ├── RedisRddSuite.scala │ ├── acl │ │ ├── RedisRDDClusterAclSuite.scala │ │ └── RedisRDDStandaloneAclSuite.scala │ ├── cluster │ │ ├── RedisKeysClusterSuite.scala │ │ ├── RedisRDDClusterSuite.scala │ │ └── RedisRddExtraClusterSuite.scala │ └── standalone │ │ ├── RedisKeysStandaloneSuite.scala │ │ ├── RedisRDDStandaloneSuite.scala │ │ └── RedisRddExtraStandaloneSuite.scala │ ├── stream │ ├── RedisXStreamSuite.scala │ ├── cluster │ │ └── RedisXStreamClusterSuite.scala │ └── standalone │ │ └── RedisXStreamStandaloneSuite.scala │ └── util │ ├── BenchmarkTest.java │ ├── CollectionUtilsTest.scala │ ├── ConnectionSSLUtilsTest.scala │ ├── ConnectionUtilsTest.scala │ ├── EntityId.scala │ ├── JsonUtilsTest.scala │ ├── Person.scala │ └── TestUtils.scala └── org └── apache └── spark └── sql └── redis ├── RedisSourceRelationTest.scala └── stream ├── RedisConsumerOffsetTest.scala ├── RedisSourceConfigSuite.scala ├── RedisSourceTest.scala ├── RedisStreamSourceSuite.scala ├── cluster └── RedisStreamSourceClusterSuite.scala └── standalone └── RedisStreamSourceStandaloneSuite.scala /.github/release-drafter-config.yml: -------------------------------------------------------------------------------- 1 | name-template: 'Version $NEXT_PATCH_VERSION🌈' 2 | tag-template: 'v$NEXT_PATCH_VERSION' 3 | categories: 4 | - title: '🚀Features' 5 | labels: 6 | - 'feature' 7 | - 'enhancement' 8 | - title: 'Bug Fixes' 9 | labels: 10 | - 'fix' 11 | - 'bugfix' 12 | - 'bug' 13 | - title: '🧰Maintenance' 14 | label: 'chore' 15 | change-template: '- $TITLE @$AUTHOR (#$NUMBER)' 16 | exclude-labels: 17 | - 'skip-changelog' 18 | template: | 19 | ## Changes 20 | 21 | $CHANGES -------------------------------------------------------------------------------- /.github/workflows/integration.yml: -------------------------------------------------------------------------------- 1 | name: Continuous Integration 2 | on: 3 | push: 4 | paths-ignore: 5 | - 'doc/**' 6 | - '**/*.md' 7 | branches: 8 | - master 9 | - '[0-9].*' 10 | pull_request: 11 | branches: 12 | - master 13 | - '[0-9].*' 14 | schedule: 15 | - cron: '0 1 * * *' # nightly build 16 | workflow_dispatch: 17 | 18 | jobs: 19 | 20 | build: 21 | name: Build and Test 22 | runs-on: ubuntu-latest 23 | steps: 24 | - name: Checkout project 25 | uses: actions/checkout@v4 26 | - name: Set Java up in the runner 27 | uses: actions/setup-java@v4 28 | with: 29 | java-version: '8' 30 | distribution: 'temurin' 31 | cache: 'maven' 32 | - name: Setup Maven 33 | uses: s4u/setup-maven-action@v1.8.0 34 | with: 35 | java-version: 8 36 | - name: Install missing dependencies to container 37 | run: | 38 | sudo apt update 39 | sudo apt install -y libssl-dev 40 | wget http://download.redis.io/releases/redis-6.0.10.tar.gz 41 | tar -xzvf redis-6.0.10.tar.gz 42 | make -C redis-6.0.10 -j`nproc` BUILD_TLS=yes 43 | - name: Maven offline 44 | run: | 45 | mvn -q dependency:go-offline 46 | - name: Run tests 47 | run: | 48 | export PATH=$PWD/redis-6.0.10/src:$PATH 49 | make test 50 | env: 51 | JVM_OPTS: -Xmx3200m 52 | TERM: dumb 53 | - name: Upload coverage reports to Codecov 54 | uses: codecov/codecov-action@v4.0.1 55 | with: 56 | token: ${{ secrets.CODECOV_TOKEN }} 57 | -------------------------------------------------------------------------------- /.github/workflows/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name: Release Drafter 2 | 3 | on: 4 | push: 5 | # branches to consider in the event; optional, defaults to all 6 | branches: 7 | - master 8 | 9 | jobs: 10 | update_release_draft: 11 | runs-on: ubuntu-latest 12 | steps: 13 | # Drafts your next Release notes as Pull Requests are merged into "master" 14 | - uses: release-drafter/release-drafter@v5 15 | with: 16 | # (Optional) specify config name to use, relative to .github/. Default: release-drafter.yml 17 | config-name: release-drafter-config.yml 18 | env: 19 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} -------------------------------------------------------------------------------- /.github/workflows/version-and-release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | build: 9 | runs-on: ubuntu-latest 10 | 11 | steps: 12 | - uses: actions/checkout@v2 13 | 14 | - name: get version from tag 15 | id: get_version 16 | run: | 17 | realversion="${GITHUB_REF/refs\/tags\//}" 18 | realversion="${realversion//v/}" 19 | realversion=`echo ${realversion}|cut -d '-' -f 2-2` 20 | echo "::set-output name=VERSION::$realversion" 21 | 22 | - name: Set up publishing to maven central 23 | uses: actions/setup-java@v2 24 | with: 25 | java-version: '8' 26 | distribution: 'adopt' 27 | server-id: ossrh 28 | server-username: MAVEN_USERNAME 29 | server-password: MAVEN_PASSWORD 30 | 31 | - name: mvn versions 32 | run: mvn versions:set -DnewVersion=${{ steps.get_version.outputs.VERSION }} 33 | 34 | - name: Install gpg key 35 | run: | 36 | cat <(echo -e "${{ secrets.OSSH_GPG_SECRET_KEY }}") | gpg --batch --import 37 | gpg --list-secret-keys --keyid-format LONG 38 | 39 | - name: Publish 40 | run: | 41 | mvn --no-transfer-progress \ 42 | --batch-mode \ 43 | -Dgpg.passphrase='${{ secrets.OSSH_GPG_SECRET_KEY_PASSWORD }}' \ 44 | -DskipTests deploy -P release 45 | env: 46 | MAVEN_USERNAME: ${{secrets.OSSH_USERNAME}} 47 | MAVEN_PASSWORD: ${{secrets.OSSH_TOKEN}} 48 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | hs_err_pid*.log 2 | nohup.out 3 | scalastyle-output.xml 4 | .idea 5 | *.iml 6 | **/.idea 7 | */.classpath 8 | */.project 9 | */.settings 10 | */.cache 11 | */test-output/ 12 | *.log 13 | */*.versionsBackup 14 | target/ 15 | *GitIgnored* 16 | *.asc 17 | *.gpg 18 | /bin/ 19 | 20 | *.class 21 | *.log 22 | *.pyc 23 | sbt/*.jar 24 | 25 | # sbt specific 26 | .cache/ 27 | .history/ 28 | .lib/ 29 | dist/* 30 | target/ 31 | lib_managed/ 32 | src_managed/ 33 | project/boot/ 34 | project/plugins/project/ 35 | build/*.jar 36 | checkpoint-test/ 37 | 38 | # eclipse 39 | .project 40 | .classpath 41 | /.settings/ 42 | 43 | # Redis 44 | dump.rdb 45 | .DS_Store -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2015-2018, Redis Labs 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # user 2 | USER_ACL = user alice on >p1pp0 ~* +@all 3 | 4 | # STANDALONE REDIS NODE 5 | define REDIS_STANDALONE_NODE_CONF 6 | daemonize yes 7 | port 6379 8 | pidfile /tmp/redis_standalone_node_for_spark-redis.pid 9 | logfile /tmp/redis_standalone_node_for_spark-redis.log 10 | save "" 11 | appendonly no 12 | requirepass passwd 13 | $(USER_ACL) 14 | endef 15 | 16 | # STANDALONE REDIS NODE WITH SSL 17 | define REDIS_STANDALONE_NODE_CONF_SSL 18 | daemonize yes 19 | port 0 20 | pidfile /tmp/redis_standalone_node__ssl_for_spark-redis.pid 21 | logfile /tmp/redis_standalone_node_ssl_for_spark-redis.log 22 | save "" 23 | appendonly no 24 | requirepass passwd 25 | $(USER_ACL) 26 | tls-auth-clients no 27 | tls-port 6380 28 | tls-cert-file ./src/test/resources/tls/redis.crt 29 | tls-key-file ./src/test/resources/tls/redis.key 30 | tls-ca-cert-file ./src/test/resources/tls/ca.crt 31 | tls-dh-params-file ./src/test/resources/tls/redis.dh 32 | endef 33 | 34 | # CLUSTER REDIS NODES 35 | define REDIS_CLUSTER_NODE1_CONF 36 | daemonize yes 37 | port 7379 38 | $(USER_ACL) 39 | pidfile /tmp/redis_cluster_node1_for_spark-redis.pid 40 | logfile /tmp/redis_cluster_node1_for_spark-redis.log 41 | save "" 42 | appendonly no 43 | cluster-enabled yes 44 | cluster-config-file /tmp/redis_cluster_node1_for_spark-redis.conf 45 | endef 46 | 47 | define REDIS_CLUSTER_NODE2_CONF 48 | daemonize yes 49 | port 7380 50 | $(USER_ACL) 51 | pidfile /tmp/redis_cluster_node2_for_spark-redis.pid 52 | logfile /tmp/redis_cluster_node2_for_spark-redis.log 53 | save "" 54 | appendonly no 55 | cluster-enabled yes 56 | cluster-config-file /tmp/redis_cluster_node2_for_spark-redis.conf 57 | endef 58 | 59 | define REDIS_CLUSTER_NODE3_CONF 60 | daemonize yes 61 | port 7381 62 | $(USER_ACL) 63 | pidfile /tmp/redis_cluster_node3_for_spark-redis.pid 64 | logfile /tmp/redis_cluster_node3_for_spark-redis.log 65 | save "" 66 | appendonly no 67 | cluster-enabled yes 68 | cluster-config-file /tmp/redis_cluster_node3_for_spark-redis.conf 69 | endef 70 | 71 | export REDIS_STANDALONE_NODE_CONF 72 | export REDIS_STANDALONE_NODE_CONF_SSL 73 | export REDIS_CLUSTER_NODE1_CONF 74 | export REDIS_CLUSTER_NODE2_CONF 75 | export REDIS_CLUSTER_NODE3_CONF 76 | 77 | start-standalone: 78 | echo "$$REDIS_STANDALONE_NODE_CONF" | redis-server - 79 | echo "$$REDIS_STANDALONE_NODE_CONF_SSL" | redis-server - 80 | 81 | 82 | start-cluster: 83 | echo "$$REDIS_CLUSTER_NODE1_CONF" | redis-server - 84 | echo "$$REDIS_CLUSTER_NODE2_CONF" | redis-server - 85 | echo "$$REDIS_CLUSTER_NODE3_CONF" | redis-server - 86 | redis-cli -p 7380 cluster meet 127.0.0.1 7379 > /dev/null 87 | redis-cli -p 7381 cluster meet 127.0.0.1 7379 > /dev/null 88 | slots=$$(seq 0 2047); slots=$$(echo $$slots | tr '\n' ' '); redis-cli -p 7379 cluster addslots $$slots > /dev/null 89 | slots=$$(seq 2048 3333); slots=$$(echo $$slots | tr '\n' ' '); redis-cli -p 7380 cluster addslots $$slots > /dev/null 90 | slots=$$(seq 3334 5460); slots=$$(echo $$slots | tr '\n' ' '); redis-cli -p 7379 cluster addslots $$slots > /dev/null 91 | slots=$$(seq 5461 7777); slots=$$(echo $$slots | tr '\n' ' '); redis-cli -p 7380 cluster addslots $$slots > /dev/null 92 | slots=$$(seq 7778 9999); slots=$$(echo $$slots | tr '\n' ' '); redis-cli -p 7381 cluster addslots $$slots > /dev/null 93 | slots=$$(seq 10000 10922); slots=$$(echo $$slots | tr '\n' ' '); redis-cli -p 7380 cluster addslots $$slots > /dev/null 94 | slots=$$(seq 10923 16383); slots=$$(echo $$slots | tr '\n' ' '); redis-cli -p 7381 cluster addslots $$slots > /dev/null 95 | 96 | start: 97 | make start-standalone 98 | make start-cluster 99 | 100 | stop-standalone: 101 | kill `cat /tmp/redis_standalone_node_for_spark-redis.pid` 102 | kill `cat /tmp/redis_standalone_node__ssl_for_spark-redis.pid` 103 | 104 | stop-cluster: 105 | kill `cat /tmp/redis_cluster_node1_for_spark-redis.pid` || true 106 | kill `cat /tmp/redis_cluster_node2_for_spark-redis.pid` || true 107 | kill `cat /tmp/redis_cluster_node3_for_spark-redis.pid` || true 108 | rm -f /tmp/redis_cluster_node1_for_spark-redis.conf 109 | rm -f /tmp/redis_cluster_node2_for_spark-redis.conf 110 | rm -f /tmp/redis_cluster_node3_for_spark-redis.conf 111 | 112 | stop: 113 | make stop-standalone 114 | make stop-cluster 115 | 116 | restart: 117 | make stop 118 | make start 119 | 120 | test: 121 | make start 122 | # with --batch-mode maven doesn't print 'Progress: 125/150kB', the progress lines take up 90% of the log and causes 123 | # Travis build to fail with 'The job exceeded the maximum log length, and has been terminated' 124 | mvn clean test -B -DargLine="-Djavax.net.ssl.trustStorePassword=password -Djavax.net.ssl.trustStore=./src/test/resources/tls/clientkeystore -Djavax.net.ssl.trustStoreType=jceks" 125 | make stop 126 | 127 | benchmark: 128 | make start 129 | mvn clean test -B -Pbenchmark 130 | make stop 131 | 132 | deploy: 133 | make start 134 | mvn --batch-mode clean deploy 135 | make stop 136 | 137 | package: 138 | make start 139 | mvn --batch-mode clean package 140 | make stop 141 | 142 | .PHONY: test 143 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Integration](https://github.com/RedisLabs/spark-redis/actions/workflows/integration.yml/badge.svg?branch=master)](https://github.com/RedisLabs/spark-redis/actions/workflows/integration.yml) 2 | [![license](https://img.shields.io/github/license/RedisLabs/spark-redis.svg)](https://github.com/RedisLabs/spark-redis) 3 | [![Release](https://img.shields.io/github/release/RedisLabs/spark-redis.svg?sort=semver)](https://github.com/RedisLabs/spark-redis/releases/latest) 4 | [![Maven Central](https://img.shields.io/maven-central/v/com.redislabs/spark-redis_2.12?logo=redis)](https://maven-badges.herokuapp.com/maven-central/com.redislabs/spark-redis_2.12) 5 | [![Javadocs](https://www.javadoc.io/badge/com.redislabs/spark-redis_2.12.svg)](https://www.javadoc.io/doc/com.redislabs/spark-redis_2.12) 6 | [![Codecov](https://codecov.io/gh/RedisLabs/spark-redis/branch/master/graph/badge.svg)](https://codecov.io/gh/RedisLabs/spark-redis) 7 | 8 | [![Discord](https://img.shields.io/discord/697882427875393627.svg?style=social&logo=discord)](https://discord.gg/redis) 9 | [![Twitch](https://img.shields.io/twitch/status/redisinc?style=social)](https://www.twitch.tv/redisinc) 10 | [![YouTube](https://img.shields.io/youtube/channel/views/UCD78lHSwYqMlyetR0_P4Vig?style=social)](https://www.youtube.com/redisinc) 11 | [![Twitter](https://img.shields.io/twitter/follow/redisinc?style=social)](https://twitter.com/redisinc) 12 | # Spark-Redis 13 | A library for reading and writing data in [Redis](http://redis.io) using [Apache Spark](http://spark.apache.org/). 14 | 15 | Spark-Redis provides access to all of Redis' data structures - String, Hash, List, Set and Sorted Set - from Spark as RDDs. It also supports reading and writing with DataFrames and Spark SQL syntax. 16 | 17 | The library can be used both with Redis stand-alone as well as clustered databases. When used with Redis cluster, Spark-Redis is aware of its partitioning scheme and adjusts in response to resharding and node failure events. 18 | 19 | Spark-Redis also supports Spark Streaming (DStreams) and Structured Streaming. 20 | 21 | ## Version compatibility and branching 22 | 23 | The library has several branches, each corresponds to a different supported Spark version. For example, 'branch-2.3' works with any Spark 2.3.x version. 24 | The master branch contains the recent development for the next release. 25 | 26 | | Spark-Redis | Spark | Redis | Supported Scala Versions | 27 | |---------------------------------------------------------------------------|-------| ---------------- | ------------------------ | 28 | | [master](https://github.com/RedisLabs/spark-redis/) | 3.2.x | >=2.9.0 | 2.12 | 29 | | [3.0](https://github.com/RedisLabs/spark-redis/tree/branch-3.0) | 3.0.x | >=2.9.0 | 2.12 | 30 | | [2.4, 2.5, 2.6](https://github.com/RedisLabs/spark-redis/tree/branch-2.4) | 2.4.x | >=2.9.0 | 2.11, 2.12 | 31 | | [2.3](https://github.com/RedisLabs/spark-redis/tree/branch-2.3) | 2.3.x | >=2.9.0 | 2.11 | 32 | | [1.4](https://github.com/RedisLabs/spark-redis/tree/branch-1.4) | 1.4.x | | 2.10 | 33 | 34 | 35 | ## Known limitations 36 | 37 | * Java, Python and R API bindings are not provided at this time 38 | 39 | ## Additional considerations 40 | This library is a work in progress so the API may change before the official release. 41 | 42 | ## Documentation 43 | 44 | Please make sure you use documentation from the correct branch ([2.4](https://github.com/RedisLabs/spark-redis/tree/branch-2.4#documentation), [2.3](https://github.com/RedisLabs/spark-redis/tree/branch-2.3#documentation), etc). 45 | 46 | - [Getting Started](doc/getting-started.md) 47 | - [RDD](doc/rdd.md) 48 | - [Dataframe](doc/dataframe.md) 49 | - [Streaming](doc/streaming.md) 50 | - [Structured Streaming](doc/structured-streaming.md) 51 | - [Cluster](doc/cluster.md) 52 | - [Java](doc/java.md) 53 | - [Python](doc/python.md) 54 | - [Configuration](doc/configuration.md) 55 | - [Dev environment](doc/dev.md) 56 | 57 | ## Contributing 58 | 59 | You're encouraged to contribute to the Spark-Redis project. 60 | 61 | There are two ways you can do so: 62 | 63 | ### Submit Issues 64 | 65 | If you encounter an issue while using the library, please report it via the project's [issues tracker](https://github.com/RedisLabs/spark-redis/issues). 66 | 67 | ### Author Pull Requests 68 | 69 | Code contributions to the Spark-Redis project can be made using [pull requests](https://github.com/RedisLabs/spark-redis/pulls). To submit a pull request: 70 | 71 | 1. Fork this project. 72 | 2. Make and commit your changes. 73 | 3. Submit your changes as a pull request. 74 | -------------------------------------------------------------------------------- /build/sbt: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # When creating new tests for Spark SQL Hive, the HADOOP_CLASSPATH must contain the hive jars so 4 | # that we can run Hive to generate the golden answer. This is not required for normal development 5 | # or testing. 6 | for i in $HIVE_HOME/lib/* 7 | do HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$i 8 | done 9 | export HADOOP_CLASSPATH 10 | 11 | realpath () { 12 | ( 13 | TARGET_FILE=$1 14 | 15 | cd $(dirname $TARGET_FILE) 16 | TARGET_FILE=$(basename $TARGET_FILE) 17 | 18 | COUNT=0 19 | while [ -L "$TARGET_FILE" -a $COUNT -lt 100 ] 20 | do 21 | TARGET_FILE=$(readlink $TARGET_FILE) 22 | cd $(dirname $TARGET_FILE) 23 | TARGET_FILE=$(basename $TARGET_FILE) 24 | COUNT=$(($COUNT + 1)) 25 | done 26 | 27 | echo $(pwd -P)/$TARGET_FILE 28 | ) 29 | } 30 | 31 | . $(dirname $(realpath $0))/sbt-launch-lib.bash 32 | 33 | 34 | declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy" 35 | declare -r sbt_opts_file=".sbtopts" 36 | declare -r etc_sbt_opts_file="/etc/sbt/sbtopts" 37 | 38 | usage() { 39 | cat < path to global settings/plugins directory (default: ~/.sbt) 47 | -sbt-boot path to shared boot directory (default: ~/.sbt/boot in 0.11 series) 48 | -ivy path to local Ivy repository (default: ~/.ivy2) 49 | -mem set memory options (default: $sbt_mem, which is $(get_mem_opts $sbt_mem)) 50 | -no-share use all local caches; no sharing 51 | -no-global uses global caches, but does not use global ~/.sbt directory. 52 | -jvm-debug Turn on JVM debugging, open at the given port. 53 | -batch Disable interactive mode 54 | # sbt version (default: from project/build.properties if present, else latest release) 55 | -sbt-version use the specified version of sbt 56 | -sbt-jar use the specified jar as the sbt launcher 57 | -sbt-rc use an RC version of sbt 58 | -sbt-snapshot use a snapshot version of sbt 59 | # java version (default: java from PATH, currently $(java -version 2>&1 | grep version)) 60 | -java-home alternate JAVA_HOME 61 | # jvm options and output control 62 | JAVA_OPTS environment variable, if unset uses "$java_opts" 63 | SBT_OPTS environment variable, if unset uses "$default_sbt_opts" 64 | .sbtopts if this file exists in the current directory, it is 65 | prepended to the runner args 66 | /etc/sbt/sbtopts if this file exists, it is prepended to the runner args 67 | -Dkey=val pass -Dkey=val directly to the java runtime 68 | -J-X pass option -X directly to the java runtime 69 | (-J is stripped) 70 | -S-X add -X to sbt's scalacOptions (-J is stripped) 71 | -PmavenProfiles Enable a maven profile for the build. 72 | In the case of duplicated or conflicting options, the order above 73 | shows precedence: JAVA_OPTS lowest, command line options highest. 74 | EOM 75 | } 76 | 77 | process_my_args () { 78 | while [[ $# -gt 0 ]]; do 79 | case "$1" in 80 | -no-colors) addJava "-Dsbt.log.noformat=true" && shift ;; 81 | -no-share) addJava "$noshare_opts" && shift ;; 82 | -no-global) addJava "-Dsbt.global.base=$(pwd)/project/.sbtboot" && shift ;; 83 | -sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;; 84 | -sbt-dir) require_arg path "$1" "$2" && addJava "-Dsbt.global.base=$2" && shift 2 ;; 85 | -debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;; 86 | -batch) exec &2 "$@" 31 | } 32 | vlog () { 33 | [[ $verbose || $debug ]] && echoerr "$@" 34 | } 35 | dlog () { 36 | [[ $debug ]] && echoerr "$@" 37 | } 38 | 39 | acquire_sbt_jar () { 40 | SBT_VERSION=`awk -F "=" '/sbt\\.version/ {print $2}' ./project/build.properties` 41 | URL1=https://dl.bintray.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar 42 | JAR=build/sbt-launch-${SBT_VERSION}.jar 43 | 44 | sbt_jar=$JAR 45 | 46 | if [[ ! -f "$sbt_jar" ]]; then 47 | # Download sbt launch jar if it hasn't been downloaded yet 48 | if [ ! -f ${JAR} ]; then 49 | # Download 50 | printf "Attempting to fetch sbt\n" 51 | JAR_DL=${JAR}.part 52 | if hash curl 2>/dev/null; then 53 | curl --fail --location --silent ${URL1} > "${JAR_DL}" &&\ 54 | mv "${JAR_DL}" "${JAR}" 55 | elif hash wget 2>/dev/null; then 56 | wget --quiet ${URL1} -O "${JAR_DL}" &&\ 57 | mv "${JAR_DL}" "${JAR}" 58 | else 59 | printf "You do not have curl or wget installed, please install sbt manually from http://www.scala-sbt.org/\n" 60 | exit -1 61 | fi 62 | fi 63 | if [ ! -f ${JAR} ]; then 64 | # We failed to download 65 | printf "Our attempt to download sbt locally to ${JAR} failed. Please install sbt manually from http://www.scala-sbt.org/\n" 66 | exit -1 67 | fi 68 | printf "Launching sbt from ${JAR}\n" 69 | fi 70 | } 71 | 72 | execRunner () { 73 | # print the arguments one to a line, quoting any containing spaces 74 | [[ $verbose || $debug ]] && echo "# Executing command line:" && { 75 | for arg; do 76 | if printf "%s\n" "$arg" | grep -q ' '; then 77 | printf "\"%s\"\n" "$arg" 78 | else 79 | printf "%s\n" "$arg" 80 | fi 81 | done 82 | echo "" 83 | } 84 | 85 | exec "$@" 86 | } 87 | 88 | addJava () { 89 | dlog "[addJava] arg = '$1'" 90 | java_args=( "${java_args[@]}" "$1" ) 91 | } 92 | 93 | enableProfile () { 94 | dlog "[enableProfile] arg = '$1'" 95 | maven_profiles=( "${maven_profiles[@]}" "$1" ) 96 | export SBT_MAVEN_PROFILES="${maven_profiles[@]}" 97 | } 98 | 99 | addSbt () { 100 | dlog "[addSbt] arg = '$1'" 101 | sbt_commands=( "${sbt_commands[@]}" "$1" ) 102 | } 103 | addResidual () { 104 | dlog "[residual] arg = '$1'" 105 | residual_args=( "${residual_args[@]}" "$1" ) 106 | } 107 | addDebugger () { 108 | addJava "-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=$1" 109 | } 110 | 111 | # a ham-fisted attempt to move some memory settings in concert 112 | # so they need not be dicked around with individually. 113 | get_mem_opts () { 114 | local mem=${1:-2048} 115 | local perm=$(( $mem / 4 )) 116 | (( $perm > 256 )) || perm=256 117 | (( $perm < 4096 )) || perm=4096 118 | local codecache=$(( $perm / 2 )) 119 | 120 | echo "-Xms${mem}m -Xmx${mem}m -XX:MaxPermSize=${perm}m -XX:ReservedCodeCacheSize=${codecache}m" 121 | } 122 | 123 | require_arg () { 124 | local type="$1" 125 | local opt="$2" 126 | local arg="$3" 127 | if [[ -z "$arg" ]] || [[ "${arg:0:1}" == "-" ]]; then 128 | die "$opt requires <$type> argument" 129 | fi 130 | } 131 | 132 | is_function_defined() { 133 | declare -f "$1" > /dev/null 134 | } 135 | 136 | process_args () { 137 | while [[ $# -gt 0 ]]; do 138 | case "$1" in 139 | -h|-help) usage; exit 1 ;; 140 | -v|-verbose) verbose=1 && shift ;; 141 | -d|-debug) debug=1 && shift ;; 142 | 143 | -ivy) require_arg path "$1" "$2" && addJava "-Dsbt.ivy.home=$2" && shift 2 ;; 144 | -mem) require_arg integer "$1" "$2" && sbt_mem="$2" && shift 2 ;; 145 | -jvm-debug) require_arg port "$1" "$2" && addDebugger $2 && shift 2 ;; 146 | -batch) exec 7 | 8 | com.redislabs 9 | spark-redis_2.11 10 | 2.4.2 11 | 12 | 13 | ``` 14 | 15 | Or 16 | 17 | ```xml 18 | 19 | 20 | com.redislabs 21 | spark-redis_2.12 22 | 2.4.2 23 | 24 | 25 | ``` 26 | 27 | ### SBT 28 | 29 | ```scala 30 | libraryDependencies += "com.redislabs" %% "spark-redis" % "2.4.2" 31 | ``` 32 | 33 | ### Build form source 34 | You can download the library's source and build it: 35 | ``` 36 | git clone https://github.com/RedisLabs/spark-redis.git 37 | cd spark-redis 38 | mvn clean package -DskipTests 39 | ``` 40 | 41 | ### Using the library with spark shell 42 | Add Spark-Redis to Spark with the `--jars` command line option. 43 | 44 | ```bash 45 | $ bin/spark-shell --jars /spark-redis--jar-with-dependencies.jar 46 | ``` 47 | 48 | By default it connects to `localhost:6379` without any password, you can change the connection settings in the following manner: 49 | 50 | ```bash 51 | $ bin/spark-shell --jars /spark-redis--jar-with-dependencies.jar --conf "spark.redis.host=localhost" --conf "spark.redis.port=6379" --conf "spark.redis.auth=passwd" 52 | ``` 53 | 54 | 55 | ### Configuring connection to Redis in a self-contained application 56 | 57 | An example configuration of SparkContext with Redis configuration: 58 | 59 | ```scala 60 | import com.redislabs.provider.redis._ 61 | 62 | ... 63 | 64 | val sc = new SparkContext(new SparkConf() 65 | .setMaster("local") 66 | .setAppName("myApp") 67 | // initial redis host - can be any node in cluster mode 68 | .set("spark.redis.host", "localhost") 69 | // initial redis port 70 | .set("spark.redis.port", "6379") 71 | // optional redis AUTH password 72 | .set("spark.redis.auth", "passwd") 73 | ) 74 | ``` 75 | 76 | The SparkSession can be configured in a similar manner: 77 | 78 | ```scala 79 | val spark = SparkSession 80 | .builder() 81 | .appName("myApp") 82 | .master("local[*]") 83 | .config("spark.redis.host", "localhost") 84 | .config("spark.redis.port", "6379") 85 | .config("spark.redis.auth", "passwd") 86 | .getOrCreate() 87 | 88 | val sc = spark.sparkContext 89 | ``` 90 | 91 | ### Create RDD 92 | 93 | ```scala 94 | import com.redislabs.provider.redis._ 95 | 96 | val keysRDD = sc.fromRedisKeyPattern("foo*", 5) 97 | val keysRDD = sc.fromRedisKeys(Array("foo", "bar"), 5) 98 | ``` 99 | 100 | ### Write Dataframe 101 | 102 | ```scala 103 | df.write 104 | .format("org.apache.spark.sql.redis") 105 | .option("table", "foo") 106 | .save() 107 | ``` 108 | 109 | ### Create Stream 110 | 111 | ```scala 112 | import com.redislabs.provider.redis.streaming._ 113 | 114 | val ssc = new StreamingContext(sc, Seconds(1)) 115 | val redisStream = ssc.createRedisStream(Array("foo", "bar"), 116 | storageLevel = StorageLevel.MEMORY_AND_DISK_2) 117 | ``` 118 | -------------------------------------------------------------------------------- /doc/java.md: -------------------------------------------------------------------------------- 1 | # Using the library in Java 2 | 3 | The library is written in Scala and the API is primarily intended to be used with Scala. But you can also use the library with 4 | Java because of the Scala/Java interoperability. 5 | 6 | ## RDD 7 | 8 | Please, refer to the detailed documentation of [RDD support](rdd.md) for the full list of available features. 9 | The RDD functions are available in `RedisContext`. Example: 10 | 11 | ```java 12 | SparkConf sparkConf = new SparkConf() 13 | .setAppName("MyApp") 14 | .setMaster("local[*]") 15 | .set("spark.redis.host", "localhost") 16 | .set("spark.redis.port", "6379"); 17 | 18 | RedisConfig redisConfig = RedisConfig.fromSparkConf(sparkConf); 19 | ReadWriteConfig readWriteConfig = ReadWriteConfig.fromSparkConf(sparkConf); 20 | 21 | JavaSparkContext jsc = new JavaSparkContext(sparkConf); 22 | RedisContext redisContext = new RedisContext(jsc.sc()); 23 | 24 | JavaRDD> rdd = jsc.parallelize(Arrays.asList(Tuple2.apply("myKey", "Hello"))); 25 | int ttl = 0; 26 | 27 | redisContext.toRedisKV(rdd.rdd(), ttl, redisConfig, readWriteConfig); 28 | 29 | ``` 30 | 31 | ## Datasets and DataFrames 32 | 33 | The Dataset/DataFrame API is the same in Java and Scala. Please, refer to [DataFrame page](dataframe.md) for details. Here is an 34 | example with Java: 35 | 36 | ```Java 37 | public class Person { 38 | 39 | private String name; 40 | private Integer age; 41 | 42 | public Person() { 43 | } 44 | 45 | public Person(String name, Integer age) { 46 | this.name = name; 47 | this.age = age; 48 | } 49 | 50 | public String getName() { 51 | return name; 52 | } 53 | 54 | public void setName(String name) { 55 | this.name = name; 56 | } 57 | 58 | public Integer getAge() { 59 | return age; 60 | } 61 | 62 | public void setAge(Integer age) { 63 | this.age = age; 64 | } 65 | } 66 | 67 | ``` 68 | 69 | ```Java 70 | SparkSession spark = SparkSession 71 | .builder() 72 | .appName("MyApp") 73 | .master("local[*]") 74 | .config("spark.redis.host", "localhost") 75 | .config("spark.redis.port", "6379") 76 | .getOrCreate(); 77 | 78 | Dataset df = spark.createDataFrame(Arrays.asList( 79 | new Person("John", 35), 80 | new Person("Peter", 40)), Person.class); 81 | 82 | df.write() 83 | .format("org.apache.spark.sql.redis") 84 | .option("table", "person") 85 | .option("key.column", "name") 86 | .mode(SaveMode.Overwrite) 87 | .save(); 88 | ``` 89 | 90 | ## Streaming 91 | 92 | The following example demonstrates how to create a stream from Redis list `myList`. Please, refer to [Streaming](streaming.md) for more details. 93 | 94 | ```java 95 | SparkConf sparkConf = new SparkConf() 96 | .setAppName("MyApp") 97 | .setMaster("local[*]") 98 | .set("spark.redis.host", "localhost") 99 | .set("spark.redis.port", "6379"); 100 | 101 | JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(1)); 102 | 103 | RedisConfig redisConfig = new RedisConfig(new RedisEndpoint(sparkConf)); 104 | 105 | RedisStreamingContext redisStreamingContext = new RedisStreamingContext(jssc.ssc()); 106 | String[] keys = new String[]{"myList"}; 107 | RedisInputDStream> redisStream = 108 | redisStreamingContext.createRedisStream(keys, StorageLevel.MEMORY_ONLY(), redisConfig); 109 | 110 | redisStream.print(); 111 | 112 | jssc.start(); 113 | jssc.awaitTermination(); 114 | ``` -------------------------------------------------------------------------------- /doc/python.md: -------------------------------------------------------------------------------- 1 | # Python 2 | 3 | Python support is currently limited to DataFrames only. Please, refer to Scala [DataFrame documentation](dataframe.md) 4 | for the complete list of features. 5 | 6 | Here is an example: 7 | 8 | 1. Run `pyspark` providing the Spark-Redis JAR file: 9 | 10 | ```bash 11 | $ ./bin/pyspark --jars /spark-redis--jar-with-dependencies.jar 12 | ``` 13 | 14 | By default it connects to `localhost:6379` without any password, you can change the connection settings in the following manner: 15 | 16 | ```bash 17 | $ bin/pyspark --jars /spark-redis--jar-with-dependencies.jar --conf "spark.redis.host=localhost" --conf "spark.redis.port=6379" --conf "spark.redis.auth=passwd" 18 | ``` 19 | 20 | 2. Read DataFrame from JSON, write/read from Redis: 21 | ```python 22 | df = spark.read.json("examples/src/main/resources/people.json") 23 | df.write.format("org.apache.spark.sql.redis").option("table", "people").option("key.column", "name").save() 24 | loadedDf = spark.read.format("org.apache.spark.sql.redis").option("table", "people").option("key.column", "name").load() 25 | loadedDf.show() 26 | ``` 27 | 28 | 3. Check the data with redis-cli: 29 | 30 | ```bash 31 | 127.0.0.1:6379> hgetall people:Justin 32 | 1) "age" 33 | 2) "19" 34 | ``` 35 | 36 | The self-contained application can be configured in the following manner: 37 | 38 | ```python 39 | SparkSession\ 40 | .builder\ 41 | .appName("myApp")\ 42 | .config("spark.redis.host", "localhost")\ 43 | .config("spark.redis.port", "6379")\ 44 | .config("spark.redis.auth", "passwd")\ 45 | .getOrCreate() 46 | ``` 47 | 48 | 49 | -------------------------------------------------------------------------------- /doc/rdd.md: -------------------------------------------------------------------------------- 1 | 2 | # RDD 3 | 4 | - [The keys RDD](#the-keys-rdd) 5 | - [Reading data](#reading-data) 6 | - [Writing data](#writing-data) 7 | - [Read and write configuration options](#read-and-write-configuration-options) 8 | 9 | ### The keys RDD 10 | Since data access in Redis is based on keys, to use Spark-Redis you'll first need a keys RDD. The following example shows how to read key names from Redis into an RDD: 11 | ```scala 12 | import com.redislabs.provider.redis._ 13 | 14 | val keysRDD = sc.fromRedisKeyPattern("foo*", 5) 15 | val keysRDD = sc.fromRedisKeys(Array("foo", "bar"), 5) 16 | ``` 17 | 18 | The above example populates the keys RDD by retrieving the key names from Redis that match the given pattern (`foo*`) or the keys can be listed by an Array. Furthermore, it overrides the default setting of 3 partitions in the RDD with a new value of 5 - each partition consists of a set of Redis cluster hashslots contain the matched key names. 19 | 20 | ### Reading data 21 | 22 | Each of Redis' data types can be read into an RDD. The following snippet demonstrates reading from Redis Strings. 23 | 24 | #### Strings 25 | 26 | ```scala 27 | import com.redislabs.provider.redis._ 28 | val stringRDD = sc.fromRedisKV("keyPattern*") 29 | val stringRDD = sc.fromRedisKV(Array("foo", "bar")) 30 | ``` 31 | 32 | Once run, `stringRDD: RDD[(String, String)]` will contain the string values of all keys whose names are provided by keyPattern or `Array[String]`. 33 | 34 | #### Hashes 35 | ```scala 36 | val hashRDD = sc.fromRedisHash("keyPattern*") 37 | val hashRDD = sc.fromRedisHash(Array("foo", "bar")) 38 | ``` 39 | 40 | This will populate `hashRDD: RDD[(String, String)]` with the fields and values of the Redis Hashes, the hashes' names are provided by keyPattern or `Array[String]`. 41 | 42 | #### Lists 43 | ```scala 44 | val listRDD = sc.fromRedisList("keyPattern*") 45 | val listRDD = sc.fromRedisList(Array("foo", "bar")) 46 | ``` 47 | The contents (members) of the Redis Lists in whose names are provided by keyPattern or `Array[String]` will be stored in `listRDD: RDD[String]`. 48 | 49 | #### Sets 50 | ```scala 51 | val setRDD = sc.fromRedisSet("keyPattern*") 52 | val setRDD = sc.fromRedisSet(Array("foo", "bar")) 53 | ``` 54 | 55 | The Redis Sets' members will be written to `setRDD: RDD[String]`. 56 | 57 | #### Sorted Sets 58 | ```scala 59 | val zsetRDD = sc.fromRedisZSetWithScore("keyPattern*") 60 | val zsetRDD = sc.fromRedisZSetWithScore(Array("foo", "bar")) 61 | ``` 62 | 63 | Using `fromRedisZSetWithScore` will store in `zsetRDD: RDD[(String, Double)]` an RDD that consists of members and their scores, from the Redis Sorted Sets whose keys are provided by keyPattern or Array[String]. 64 | 65 | ```scala 66 | val zsetRDD = sc.fromRedisZSet("keyPattern*") 67 | val zsetRDD = sc.fromRedisZSet(Array("foo", "bar")) 68 | ``` 69 | 70 | Using `fromRedisZSet` will store in `zsetRDD: RDD[String]`, an RDD that consists of members from the Redis Sorted Sets whose keys are provided by keyPattern or Array[String]. 71 | 72 | ```scala 73 | val startPos: Int = _ 74 | val endPos: Int = _ 75 | val zsetRDD = sc.fromRedisZRangeWithScore("keyPattern*", startPos, endPos) 76 | val zsetRDD = sc.fromRedisZRangeWithScore(Array("foo", "bar"), startPos, endPos) 77 | ``` 78 | 79 | Using `fromRedisZRangeWithScore` will store in `zsetRDD: RDD[(String, Double)]`, an RDD that consists of members and the members' ranges are within [startPos, endPos] of its own Sorted Set from the Redis Sorted Sets whose keys are provided by keyPattern or Array[String]. 80 | 81 | ```scala 82 | val startPos: Int = _ 83 | val endPos: Int = _ 84 | val zsetRDD = sc.fromRedisZRange("keyPattern*", startPos, endPos) 85 | val zsetRDD = sc.fromRedisZRange(Array("foo", "bar"), startPos, endPos) 86 | ``` 87 | 88 | Using `fromRedisZSet` will store in `zsetRDD: RDD[String]`, an RDD that consists of members and the members' ranges are within [startPos, endPos] of its own Sorted Set from the Redis Sorted Sets whose keys are provided by keyPattern or Array[String]. 89 | 90 | ```scala 91 | val min: Double = _ 92 | val max: Double = _ 93 | val zsetRDD = sc.fromRedisZRangeByScoreWithScore("keyPattern*", min, max) 94 | val zsetRDD = sc.fromRedisZRangeByScoreWithScore(Array("foo", "bar"), min, max) 95 | ``` 96 | 97 | Using `fromRedisZRangeByScoreWithScore` will store in `zsetRDD: RDD[(String, Double)]`, an RDD that consists of members and the members' scores are within [min, max] from the Redis Sorted Sets whose keys are provided by keyPattern or Array[String]. 98 | 99 | ```scala 100 | val min: Double = _ 101 | val max: Double = _ 102 | val zsetRDD = sc.fromRedisZRangeByScore("keyPattern*", min, max) 103 | val zsetRDD = sc.fromRedisZRangeByScore(Array("foo", "bar"), min, max) 104 | ``` 105 | 106 | Using `fromRedisZSet` will store in `zsetRDD: RDD[String]`, an RDD that consists of members and the members' scores are within [min, max] from the Redis Sorted Sets whose keys are provided by keyPattern or Array[String]. 107 | 108 | ### Writing data 109 | To write data to Redis from Spark, you'll need to prepare the appropriate RDD depending on the type of data you want to write. 110 | 111 | #### Strings 112 | For String values, your RDD should consist of the key-value pairs that are to be written. Assuming that the strings RDD is called `stringRDD`, use the following snippet for writing it to Redis: 113 | 114 | ```scala 115 | sc.toRedisKV(stringRDD) 116 | ``` 117 | 118 | In order to set an expiry on the key, we can pass in the `ttl` (in seconds) as an additional argument: 119 | 120 | ```scala 121 | sc.toRedisKV(stringRDD, ttl) 122 | ``` 123 | 124 | By default, Strings won't have any expiry set. 125 | 126 | #### Hashes 127 | To store a Redis Hash, the RDD should consist of its field-value pairs. If the RDD is called `hashRDD`, the following should be used for storing it in the key name specified by `hashName`: 128 | 129 | ```scala 130 | sc.toRedisHASH(hashRDD, hashName) 131 | ``` 132 | In order to set an expiry on the key, we can pass in the `ttl` (in seconds) as an additional argument: 133 | 134 | ```scala 135 | sc.toRedisHASH(hashRDD, hashName, ttl) 136 | ``` 137 | 138 | By default, Hashes won't have any expiry set. 139 | 140 | Use the following to store an RDD into multiple hashs: 141 | 142 | ```scala 143 | sc.toRedisHASHes(hashRDD, ttl) 144 | ``` 145 | 146 | The `hashRDD` is a rdd of tuples (`hashname`, `map[field name, field value]`) 147 | 148 | ```scala 149 | sc.toRedisByteHASHes(hashRDD, ttl) 150 | ``` 151 | 152 | The `hashRDD` is a rdd of tuples (`hashname`, `map[field name, field value]`) represented as byte arrays. 153 | 154 | #### Lists 155 | Use the following to store an RDD in a Redis List: 156 | 157 | ```scala 158 | sc.toRedisLIST(listRDD, listName) 159 | ``` 160 | 161 | Use the following to store an RDD in a fixed-size Redis List: 162 | 163 | ```scala 164 | sc.toRedisFixedLIST(listRDD, listName, listSize) 165 | ``` 166 | 167 | The `listRDD` is an RDD that contains all of the list's string elements in order, and `listName` is the list's key name. 168 | `listSize` is an integer which specifies the size of the Redis list; it is optional, and will default to an unlimited size. 169 | 170 | Use the following to store an RDD in multiple Redis Lists: 171 | 172 | ```scala 173 | sc.toRedisLISTs(rdd) 174 | ``` 175 | 176 | The `rdd` is an RDD of tuples (`list name`, `list values`) 177 | 178 | Use the following to store an RDD of binary values in multiple Redis Lists: 179 | 180 | ```scala 181 | sc.toRedisByteLISTs(byteListRDD) 182 | ``` 183 | 184 | The `byteListRDD` is an RDD of tuples (`list name`, `list values`) represented as byte arrays. 185 | 186 | Expiry can be set on Lists by passing in an additional argument called `ttl` (in seconds) to the above methods except `toRedisFixedLIST`: 187 | ```scala 188 | sc.toRedisLIST(listRDD, listName, ttl) 189 | sc.toRedisLISTs(rdd, ttl) 190 | sc.toRedisByteLISTs(byteListRDD, ttl) 191 | ``` 192 | 193 | By default, Lists won't have any expiry set. 194 | 195 | #### Sets 196 | For storing data in a Redis Set, use `toRedisSET` as follows: 197 | 198 | ```scala 199 | sc.toRedisSET(setRDD, setName) 200 | ``` 201 | 202 | Where `setRDD` is an RDD with the set's string elements and `setName` is the name of the key for that set. 203 | 204 | In order to set an expiry on the key, we can pass in the `ttl` (in seconds) as an additional argument: 205 | 206 | ```scala 207 | sc.toRedisSET(setRDD, setName, ttl) 208 | ``` 209 | 210 | By default, Sets won't have any expiry set. 211 | 212 | #### Sorted Sets 213 | ```scala 214 | sc.toRedisZSET(zsetRDD, zsetName) 215 | ``` 216 | 217 | The above example demonstrates storing data in Redis in a Sorted Set. The `zsetRDD` in the example should contain pairs of members and their scores, whereas `zsetName` is the name for that key. 218 | 219 | In order to set an expiry on the key, we can pass in the `ttl` (in seconds) as an additional argument: 220 | 221 | ```scala 222 | sc.toRedisZSET(zsetRDD, zsetName, ttl) 223 | ``` 224 | 225 | By default, Sorted Sets won't have any expiry set. 226 | 227 | ### Read and write configuration options 228 | 229 | Some [configuration options](configuration.md) can be overridden for a particular RDD: 230 | 231 | ```scala 232 | val readWriteConf = ReadWriteConfig(scanCount = 1000, maxPipelineSize = 1000) 233 | val rdd = sc.fromRedisKeyPattern(keyPattern)(readWriteConfig = readWriteConf) 234 | ``` 235 | 236 | or with an implicit parameter: 237 | 238 | ```scala 239 | implicit val readWriteConf = ReadWriteConfig(scanCount = 1000, maxPipelineSize = 1000) 240 | val rdd = sc.fromRedisKeyPattern(keyPattern) 241 | ``` 242 | -------------------------------------------------------------------------------- /doc/streaming.md: -------------------------------------------------------------------------------- 1 | ### Streaming 2 | 3 | Spark-Redis supports streaming data from Stream and List data structures: 4 | 5 | - [Redis Stream](#redis-stream) 6 | - [Redis List](#redis-list) 7 | 8 | ## Redis Stream 9 | 10 | To stream data from [Redis Stream](https://redis.io/topics/streams-intro) use `createRedisXStream` method (added in Spark-Redis 2.3.1): 11 | 12 | ```scala 13 | import com.redislabs.provider.redis.streaming._ 14 | import com.redislabs.provider.redis.streaming.{ConsumerConfig, StreamItem} 15 | import org.apache.spark.sql.SparkSession 16 | import org.apache.spark.streaming.dstream.InputDStream 17 | import org.apache.spark.streaming.{Seconds, StreamingContext} 18 | 19 | val spark = SparkSession.builder.appName("Redis Stream Example") 20 | .master("local[*]") 21 | .config("spark.redis.host", "localhost") 22 | .config("spark.redis.port", "6379") 23 | .getOrCreate() 24 | 25 | val ssc = new StreamingContext(spark.sparkContext, Seconds(1)) 26 | 27 | val stream = ssc.createRedisXStream(Seq(ConsumerConfig("my-stream", "my-consumer-group", "my-consumer-1"))) 28 | stream.print() 29 | 30 | ssc.start() 31 | ssc.awaitTermination() 32 | 33 | ``` 34 | 35 | It will automatically create a consumer group if it doesn't exist and will start listening for the messages in the stream. 36 | 37 | ### Stream Offset 38 | 39 | By default it pulls messages starting from the latest message. If you need to start from the earliest message or any specific position in the stream, specify the `offset` parameter: 40 | 41 | ```scala 42 | ConsumerConfig("my-stream", "my-consumer-group", "my-consumer-1", offset = Earliest) // start from '0-0' 43 | ConsumerConfig("my-stream", "my-consumer-group", "my-consumer-1", IdOffset(42, 0)) // start from '42-0' 44 | ``` 45 | 46 | Please note, Spark-Redis will attempt to create a consumer group with the specified offset, but if the consumer group already exists, 47 | it will use the existing offset. It means, for example, if you decide to re-process all the messages from the beginning, 48 | just changing the offset to `Earliest` may not be enough. You may need to either manually delete the consumer 49 | group with `XGROUP DESTROY` or modify the offset with `XGROUP SETID`. 50 | 51 | ### Receiver reliability 52 | 53 | The DStream is implemented with a [Reliable Receiver](https://spark.apache.org/docs/latest/streaming-custom-receivers.html#receiver-reliability) that acknowledges 54 | after the data has been stored in Spark. As with any other Receiver to achieve strong fault-tolerance guarantees and ensure zero data loss, you have to enable [write-ahead logs](https://spark.apache.org/docs/latest/streaming-programming-guide.html#deploying-applications) and checkpointing. 55 | 56 | The received data is stored with `StorageLevel.MEMORY_AND_DISK_2` by default. 57 | Storage level can be configured with `storageLevel` parameter, e.g.: 58 | ```scala 59 | ssc.createRedisXStream(conf, storageLevel = StorageLevel.MEMORY_AND_DISK_SER_2) 60 | ``` 61 | 62 | ### Level of Parallelism 63 | 64 | The `createRedisXStream()` takes a sequence of consumer configs, each consumer is started in a separate thread. This allows you, for example, to 65 | create a stream from multiple Redis Stream keys: 66 | 67 | ```scala 68 | ssc.createRedisXStream(Seq( 69 | ConsumerConfig("my-stream-1", "my-consumer-group-1", "my-consumer-1"), 70 | ConsumerConfig("my-stream-2", "my-consumer-group-2", "my-consumer-1") 71 | )) 72 | ``` 73 | 74 | In this example we created an input DStream that corresponds to a single receiver running in a Spark executor. The receiver will create two threads pulling 75 | data from the streams in parallel. However if receiving data becomes a bottleneck, you may want to start multiple receivers in different executors (worker machines). 76 | This can be achieved by creating multiple input DStreams and using `union` to join them together. You can read more about about it [here](https://spark.apache.org/docs/latest/streaming-programming-guide.html#level-of-parallelism-in-data-receiving). 77 | 78 | For example, the following will create two receivers pulling the data from `my-stream` and balancing the load: 79 | 80 | ```scala 81 | val streams = Seq( 82 | ssc.createRedisXStream(Seq(ConsumerConfig("my-stream", "my-consumer-group", "my-consumer-1"))), 83 | ssc.createRedisXStream(Seq(ConsumerConfig("my-stream", "my-consumer-group", "my-consumer-2"))) 84 | ) 85 | 86 | val stream = ssc.union(streams) 87 | stream.print() 88 | ``` 89 | 90 | ### Configuration 91 | 92 | If the cluster is not large enough to process data as fast as it is being received, the receiving rate can be limited: 93 | 94 | ```scala 95 | ConsumerConfig("stream", "group", "c-1", rateLimitPerConsumer = Some(100)) // 100 items per second 96 | ``` 97 | 98 | It defines the number of received items per second per consumer. 99 | 100 | Another options you can configure are `batchSize` and `block`. They define the maximum number of pulled items and time in milliseconds to wait in a `XREADGROUP` call. 101 | 102 | ```scala 103 | ConsumerConfig("stream", "group", "c-1", batchSize = 50, block = 200) 104 | ``` 105 | 106 | 107 | ## Redis List 108 | 109 | The stream can be also created from Redis' List, the data is fetched with the `blpop` command. Users are required to provide an array which stores all the List names they are interested in. The [storageLevel](http://spark.apache.org/docs/latest/streaming-programming-guide.html#data-serialization) parameter is `MEMORY_AND_DISK_SER_2` by default. 110 | 111 | The method `createRedisStream` will create a `(listName, value)` stream, but if you don't care about which list feeds the value, you can use `createRedisStreamWithoutListname` to get the only `value` stream. 112 | 113 | Use the following to get a `(listName, value)` stream from `foo` and `bar` list 114 | 115 | ```scala 116 | import org.apache.spark.streaming.{Seconds, StreamingContext} 117 | import org.apache.spark.storage.StorageLevel 118 | import com.redislabs.provider.redis.streaming._ 119 | val ssc = new StreamingContext(sc, Seconds(1)) 120 | val redisStream = ssc.createRedisStream(Array("foo", "bar"), storageLevel = StorageLevel.MEMORY_AND_DISK_2) 121 | redisStream.print() 122 | ssc.start() 123 | ssc.awaitTermination() 124 | ``` 125 | 126 | 127 | Use the following to get a `value` stream from `foo` and `bar` list 128 | 129 | ```scala 130 | import org.apache.spark.streaming.{Seconds, StreamingContext} 131 | import org.apache.spark.storage.StorageLevel 132 | import com.redislabs.provider.redis.streaming._ 133 | val ssc = new StreamingContext(sc, Seconds(1)) 134 | val redisStream = ssc.createRedisStreamWithoutListname(Array("foo", "bar"), storageLevel = StorageLevel.MEMORY_AND_DISK_2) 135 | redisStream.print() 136 | ssc.start() 137 | ssc.awaitTermination() 138 | ``` 139 | -------------------------------------------------------------------------------- /doc/structured-streaming.md: -------------------------------------------------------------------------------- 1 | ### Structured Streaming 2 | 3 | Spark-Redis supports [Redis Stream](https://redis.io/topics/streams-intro) data structure as a source for [Structured Streaming](https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html): 4 | 5 | The following example reads data from a Redis Stream `sensors` that has two fields `sensor-id` and `temperature`: 6 | 7 | ```scala 8 | val spark = SparkSession 9 | .builder 10 | .master("local[*]") 11 | .config("spark.redis.host", "localhost") 12 | .config("spark.redis.port", "6379") 13 | .getOrCreate() 14 | 15 | val sensors = spark 16 | .readStream 17 | .format("redis") // read from Redis 18 | .option("stream.keys", "sensors") // stream key 19 | .schema(StructType(Array( // stream fields 20 | StructField("sensor-id", StringType), 21 | StructField("temperature", FloatType) 22 | ))) 23 | .load() 24 | 25 | val query = sensors 26 | .writeStream 27 | .format("console") 28 | .start() 29 | 30 | query.awaitTermination() 31 | 32 | ``` 33 | 34 | You can write the following items to the stream to test it: 35 | 36 | ``` 37 | xadd sensors * sensor-id 1 temperature 28.1 38 | xadd sensors * sensor-id 2 temperature 30.5 39 | xadd sensors * sensor-id 1 temperature 28.3 40 | ``` 41 | 42 | ### Output to Redis 43 | 44 | There is no Redis Sink available, but you can leverage [`foreachBatch`](https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html#foreachbatch) and [DataFrame](dataframe.md) write command to output 45 | stream into Redis. Please note, `foreachBatch` is only available starting from Spark 2.4.0. 46 | 47 | ```scala 48 | val query = sensors 49 | .writeStream 50 | .outputMode("update") 51 | .foreachBatch { (batchDF: DataFrame, batchId: Long) => 52 | batchDF 53 | .write 54 | .format("org.apache.spark.sql.redis") 55 | .option("table", "output") 56 | .mode(SaveMode.Append) 57 | .save() 58 | } 59 | .start() 60 | 61 | query.awaitTermination() 62 | ``` 63 | 64 | After writing the following to the Redis Stream: 65 | ``` 66 | xadd sensors * sensor-id 1 temperature 28.1 67 | xadd sensors * sensor-id 2 temperature 30.5 68 | xadd sensors * sensor-id 1 temperature 28.3 69 | ``` 70 | 71 | there will be the output `keys output:*`: 72 | ``` 73 | 1) "output:b1682af092b9467cb13cfdcf7fcc9835" 74 | 2) "output:04c80769320f4edeadcce8381a6f834d" 75 | 3) "output:4f04070a2fd548fdbea441b694c8673b" 76 | ``` 77 | 78 | `hgetall output:b1682af092b9467cb13cfdcf7fcc9835`: 79 | 80 | ``` 81 | 1) "sensor-id" 82 | 2) "2" 83 | 3) "temperature" 84 | 4) "30.5" 85 | ``` 86 | 87 | Please refer to [DataFrame docs](dataframe.md) for different options (such as specifying key name) available for writing. 88 | 89 | ### Stream Offset 90 | 91 | By default it pulls messages starting from the latest message in the stream. If you need to start from the specific position in the stream, specify the `stream.offsets` parameter as a JSON string. 92 | In the following example we set offset id to be `1548083485360-0`. The group name `redis-source` is a default consumer group that Spark-Redis automatically creates to read stream. 93 | 94 | ```scala 95 | val offsets = """{"offsets":{"sensors":{"groupName":"redis-source","offset":"1548083485360-0"}}}""" 96 | 97 | ... 98 | 99 | .option("stream.offsets", offsets) 100 | ``` 101 | 102 | If you want to process the stream from the beginning, set offset id to `0-0`. 103 | 104 | ### Entry id column 105 | 106 | You can access stream entry id by adding a column `_id` to the stream schema: 107 | 108 | ```scala 109 | val sensors = spark 110 | .readStream 111 | .format("redis") 112 | .option("stream.keys", "sensors") 113 | .schema(StructType(Array( 114 | StructField("_id", StringType), // entry id 115 | StructField("sensor-id", StringType), 116 | StructField("temperature", FloatType) 117 | ))) 118 | .load() 119 | ``` 120 | 121 | The stream schema: 122 | 123 | 124 | ``` 125 | +---------------+---------+-----------+ 126 | | _id|sensor-id|temperature| 127 | +---------------+---------+-----------+ 128 | |1548083485360-0| 1| 28.1| 129 | |1548083486248-0| 2| 30.5| 130 | |1548083486944-0| 1| 28.3| 131 | +---------------+---------+-----------+ 132 | 133 | ``` 134 | 135 | ### Level of Parallelism 136 | 137 | By default Spark-Redis creates a consumer group with a single consumer. There are two options available for increasing the level of parallelism. 138 | 139 | The first approach is to create stream from multiple Redis keys. You can specify multiple keys separated by comma, e.g. 140 | `.option("stream.keys", "sensors-eu,sensors-us")`. In this case data from each key will be mapped to a Spark partition. 141 | Please note, item ordering will be preserved only within a particular Redis key (Spark partition), there is no ordering guarantees for items across different keys. 142 | 143 | With the second approach you can read data from a single Redis key with multiple consumers in parallel, e.g. `option("stream.parallelism", 4)`. Each consumer will be mapped to a Spark partition. There are no ordering guarantees in this case. 144 | 145 | ### Connection options 146 | 147 | Similarly to Dataframe API, we can override connection options on the individual stream level, using following options passed to `spark.readStream`: 148 | 149 | | Name | Description | Type | Default | 150 | | -----------| -------------------------------------------------------------| ---------- | ----------- | 151 | | host | overrides `spark.redis.host` configured in SparkSession | `String` | `localhost` | 152 | | port | overrides `spark.redis.port` configured in SparkSession | `Int` | `6379` | 153 | | auth | overrides `spark.redis.auth` configured in SparkSession | `String` | - | 154 | | dbNum | overrides `spark.redis.db` configured in SparkSession | `Int` | `0` | 155 | | timeout | overrides `spark.redis.timeout` configured in SparkSession | `Int` | `2000` | 156 | 157 | 158 | ### Other configuration 159 | 160 | Spark-Redis automatically creates a consumer group with name `spark-source` if it doesn't exist. You can customize the consumer group name with 161 | `.option("stream.group.name", "my-group")`. Also you can customize the name of consumers in consumer group with `.option("stream.consumer.prefix", "my-consumer")`. 162 | 163 | Other options you can configure are `stream.read.batch.size` and `stream.read.block`. They define the maximum number of pulled items and time in milliseconds to wait in a `XREADGROUP` call. 164 | The default values are 100 items and 500 ms, respectively. 165 | 166 | ```scala 167 | .option("stream.read.batch.size", 200) // items 168 | .option("stream.read.block", 1000) // in milliseconds 169 | ``` 170 | 171 | ### Fault Tolerance Semantics 172 | 173 | Spark-Redis provides a replayable source, so by enabling [checkpointing](https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html#recovering-from-failures-with-checkpointing) and using 174 | idempotent sinks, one can ensure end-to-end exactly-once semantics under any failure. If checkpointing is not enabled, it is possible that you will lose messages. 175 | -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | // This file should only contain the version of sbt to use. 2 | sbt.version=0.13.6 3 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | // You may use this file to add plugin dependencies for sbt. 2 | addSbtPlugin("org.spark-packages" % "sbt-spark-package" % "0.2.3") 3 | 4 | resolvers += "bintray-spark-packages" at "https://dl.bintray.com/spark-packages/maven/" 5 | -------------------------------------------------------------------------------- /scalastyle-config.xml: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | 16 | 17 | Scalastyle standard configuration 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | -------------------------------------------------------------------------------- /src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister: -------------------------------------------------------------------------------- 1 | org.apache.spark.sql.redis.stream.RedisStreamProvider 2 | -------------------------------------------------------------------------------- /src/main/scala/com/redislabs/provider/redis/ConnectionPool.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis 2 | 3 | import redis.clients.jedis.exceptions.JedisConnectionException 4 | import redis.clients.jedis.{Jedis, JedisPool, JedisPoolConfig} 5 | 6 | import java.time.Duration 7 | import java.util.concurrent.ConcurrentHashMap 8 | import scala.collection.JavaConversions._ 9 | 10 | 11 | object ConnectionPool { 12 | @transient private lazy val pools: ConcurrentHashMap[RedisEndpoint, JedisPool] = 13 | new ConcurrentHashMap[RedisEndpoint, JedisPool]() 14 | 15 | def connect(re: RedisEndpoint): Jedis = { 16 | val pool = pools.getOrElseUpdate(re, 17 | { 18 | val poolConfig: JedisPoolConfig = new JedisPoolConfig(); 19 | poolConfig.setMaxTotal(250) 20 | poolConfig.setMaxIdle(32) 21 | poolConfig.setTestOnBorrow(false) 22 | poolConfig.setTestOnReturn(false) 23 | poolConfig.setTestWhileIdle(false) 24 | poolConfig.setSoftMinEvictableIdleTime(Duration.ofMinutes(1)) 25 | poolConfig.setTimeBetweenEvictionRuns(Duration.ofSeconds(30)) 26 | poolConfig.setNumTestsPerEvictionRun(-1) 27 | 28 | new JedisPool(poolConfig, re.host, re.port, re.timeout, re.user, re.auth, re.dbNum, re.ssl) 29 | } 30 | ) 31 | var sleepTime: Int = 4 32 | var conn: Jedis = null 33 | while (conn == null) { 34 | try { 35 | conn = pool.getResource 36 | } 37 | catch { 38 | case e: JedisConnectionException if e.getCause.toString. 39 | contains("ERR max number of clients reached") => { 40 | if (sleepTime < 500) sleepTime *= 2 41 | Thread.sleep(sleepTime) 42 | } 43 | case e: Exception => throw e 44 | } 45 | } 46 | conn 47 | } 48 | } 49 | 50 | -------------------------------------------------------------------------------- /src/main/scala/com/redislabs/provider/redis/package.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider 2 | 3 | package object redis extends RedisFunctions { 4 | val RedisSslScheme: String = "rediss" 5 | val RedisDataTypeHash: String = "hash" 6 | val RedisDataTypeString: String = "string" 7 | } 8 | -------------------------------------------------------------------------------- /src/main/scala/com/redislabs/provider/redis/partitioner/RedisPartition.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.partitioner 2 | 3 | import com.redislabs.provider.redis.RedisConfig 4 | import org.apache.spark.Partition 5 | 6 | 7 | case class RedisPartition(index: Int, 8 | redisConfig: RedisConfig, 9 | slots: (Int, Int)) extends Partition 10 | -------------------------------------------------------------------------------- /src/main/scala/com/redislabs/provider/redis/partitioner/RedisPartitioner.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.partitioner 2 | -------------------------------------------------------------------------------- /src/main/scala/com/redislabs/provider/redis/streaming/RedisInputDStream.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.streaming 2 | 3 | import com.redislabs.provider.redis.RedisConfig 4 | import org.apache.curator.utils.ThreadUtils 5 | import org.apache.spark.storage.StorageLevel 6 | import org.apache.spark.streaming.StreamingContext 7 | import org.apache.spark.streaming.receiver.Receiver 8 | import org.apache.spark.streaming.dstream.ReceiverInputDStream 9 | 10 | import redis.clients.jedis._ 11 | 12 | import scala.reflect.{ClassTag, classTag} 13 | import scala.util.control.NonFatal 14 | 15 | /** 16 | * Receives messages from Redis List 17 | */ 18 | class RedisInputDStream[T: ClassTag](_ssc: StreamingContext, 19 | keys: Array[String], 20 | storageLevel: StorageLevel, 21 | redisConfig: RedisConfig, 22 | streamType: Class[T]) 23 | extends ReceiverInputDStream[T](_ssc) { 24 | def getReceiver(): Receiver[T] = { 25 | new RedisReceiver(keys, storageLevel, redisConfig, streamType) 26 | } 27 | } 28 | 29 | 30 | private class RedisReceiver[T: ClassTag](keys: Array[String], 31 | storageLevel: StorageLevel, 32 | redisConfig: RedisConfig, 33 | streamType: Class[T]) 34 | extends Receiver[T](storageLevel) { 35 | 36 | def onStart() { 37 | val executorPool = ThreadUtils.newFixedThreadPool(keys.length, "BlockLists Streaming") 38 | try { 39 | /* start a executor for each interested List */ 40 | keys.foreach{ key => 41 | executorPool.submit(new MessageHandler(redisConfig.connectionForKey(key), key)) 42 | } 43 | } finally { 44 | executorPool.shutdown() 45 | } 46 | } 47 | 48 | def onStop() { 49 | } 50 | 51 | private class MessageHandler(conn: Jedis, key: String) extends Runnable { 52 | def run() { 53 | try { 54 | while(!isStopped) { 55 | val response = conn.blpop(2, key) 56 | if (response == null || response.isEmpty) { 57 | // no-op 58 | } else if (classTag[T] == classTag[String]) { 59 | store(response.get(1).asInstanceOf[T]) 60 | } else if (classTag[T] == classTag[(String, String)]) { 61 | store((response.get(0), response.get(1)).asInstanceOf[T]) 62 | } else { 63 | throw new scala.Exception("Unknown Redis Streaming type") 64 | } 65 | } 66 | } catch { 67 | case NonFatal(e) => 68 | restart("Error receiving data", e) 69 | } finally { 70 | onStop() 71 | } 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/main/scala/com/redislabs/provider/redis/streaming/RedisStreamReceiver.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.streaming 2 | 3 | import java.util.AbstractMap.SimpleEntry 4 | import com.redislabs.provider.redis.util.PipelineUtils.foreachWithPipeline 5 | import com.redislabs.provider.redis.util.{Logging, StreamUtils} 6 | import com.redislabs.provider.redis.{ReadWriteConfig, RedisConfig} 7 | import org.apache.curator.utils.ThreadUtils 8 | import org.apache.spark.storage.StorageLevel 9 | import org.apache.spark.streaming.receiver.Receiver 10 | import org.sparkproject.guava.util.concurrent.RateLimiter 11 | import redis.clients.jedis.{Jedis, StreamEntry, StreamEntryID} 12 | 13 | import scala.collection.JavaConversions._ 14 | 15 | /** 16 | * Receives messages from Redis Stream 17 | */ 18 | class RedisStreamReceiver(consumersConfig: Seq[ConsumerConfig], 19 | redisConfig: RedisConfig, 20 | readWriteConfig: ReadWriteConfig, 21 | storageLevel: StorageLevel) 22 | extends Receiver[StreamItem](storageLevel) with Logging { 23 | 24 | override def onStart(): Unit = { 25 | logInfo("Starting Redis Stream Receiver") 26 | val executorPool = ThreadUtils.newFixedThreadPool(consumersConfig.size, "RedisStreamMessageHandler") 27 | try { 28 | // start consumers in separate threads 29 | for (c <- consumersConfig) { 30 | executorPool.submit(new MessageHandler(c, redisConfig, readWriteConfig)) 31 | } 32 | } finally { 33 | // terminate threads after the work is done 34 | executorPool.shutdown() 35 | } 36 | } 37 | 38 | override def onStop(): Unit = { 39 | } 40 | 41 | private class MessageHandler(conf: ConsumerConfig, 42 | redisConfig: RedisConfig, 43 | implicit val readWriteConfig: ReadWriteConfig) extends Runnable { 44 | 45 | val jedis: Jedis = redisConfig.connectionForKey(conf.streamKey) 46 | val rateLimiterOpt: Option[RateLimiter] = conf.rateLimitPerConsumer.map(r => RateLimiter.create(r)) 47 | 48 | override def run(): Unit = { 49 | logInfo(s"Starting MessageHandler $conf") 50 | try { 51 | createConsumerGroupIfNotExist() 52 | receiveUnacknowledged() 53 | receiveNewMessages() 54 | } catch { 55 | case e: Exception => 56 | restart("Error handling message. Restarting.", e) 57 | } 58 | } 59 | 60 | def createConsumerGroupIfNotExist(): Unit = { 61 | val entryId = conf.offset match { 62 | case Earliest => new StreamEntryID(0, 0) 63 | case Latest => StreamEntryID.LAST_ENTRY 64 | case IdOffset(v1, v2) => new StreamEntryID(v1, v2) 65 | } 66 | StreamUtils.createConsumerGroupIfNotExist(jedis, conf.streamKey, conf.groupName, entryId) 67 | } 68 | 69 | def receiveUnacknowledged(): Unit = { 70 | logInfo(s"Starting receiving unacknowledged messages for key ${conf.streamKey}") 71 | var continue = true 72 | val unackId = new SimpleEntry(conf.streamKey, new StreamEntryID(0, 0)) 73 | 74 | while (!isStopped && continue) { 75 | val response = jedis.xreadGroup( 76 | conf.groupName, 77 | conf.consumerName, 78 | conf.batchSize, 79 | conf.block, 80 | false, 81 | unackId) 82 | 83 | val unackMessagesMap = response.map(e => (e.getKey, e.getValue)).toMap 84 | val entries = unackMessagesMap(conf.streamKey) 85 | if (entries.isEmpty) { 86 | continue = false 87 | } 88 | storeAndAck(conf.streamKey, entries) 89 | } 90 | } 91 | 92 | def receiveNewMessages(): Unit = { 93 | logInfo(s"Starting receiving new messages for key ${conf.streamKey}") 94 | val newMessId = new SimpleEntry(conf.streamKey, StreamEntryID.UNRECEIVED_ENTRY) 95 | 96 | while (!isStopped) { 97 | val response = jedis.xreadGroup( 98 | conf.groupName, 99 | conf.consumerName, 100 | conf.batchSize, 101 | conf.block, 102 | false, 103 | newMessId) 104 | 105 | if (response != null) { 106 | for (streamMessages <- response) { 107 | val key = streamMessages.getKey 108 | val entries = streamMessages.getValue 109 | storeAndAck(key, entries) 110 | } 111 | } 112 | } 113 | } 114 | 115 | def storeAndAck(streamKey: String, entries: Seq[StreamEntry]): Unit = { 116 | if (entries.nonEmpty) { 117 | // limit the rate if it's enabled 118 | rateLimiterOpt.foreach(_.acquire(entries.size)) 119 | val streamItems = entriesToItems(streamKey, entries) 120 | // call store(multiple-records) to reliably store in Spark memory 121 | store(streamItems.iterator) 122 | // ack redis 123 | foreachWithPipeline(jedis, entries) { (pipeline, entry) => 124 | pipeline.xack(streamKey, conf.groupName, entry.getID) 125 | } 126 | } 127 | } 128 | 129 | def entriesToItems(key: String, entries: Seq[StreamEntry]): Seq[StreamItem] = { 130 | entries.map { e => 131 | val itemId = ItemId(e.getID.getTime, e.getID.getSequence) 132 | StreamItem(key, itemId, e.getFields.toMap) 133 | } 134 | } 135 | } 136 | 137 | } 138 | 139 | /** 140 | * @param streamKey redis stream key 141 | * @param groupName consumer group name 142 | * @param consumerName consumer name 143 | * @param offset stream offset 144 | * @param rateLimitPerConsumer maximum retrieved messages per second per single consumer 145 | * @param batchSize maximum number of pulled items in a read API call 146 | * @param block time in milliseconds to wait for data in a blocking read API call 147 | */ 148 | case class ConsumerConfig(streamKey: String, 149 | groupName: String, 150 | consumerName: String, 151 | offset: Offset = Latest, 152 | rateLimitPerConsumer: Option[Int] = None, 153 | batchSize: Int = 100, 154 | block: Long = 500) 155 | 156 | /** 157 | * Represents an offset in the stream 158 | */ 159 | sealed trait Offset 160 | 161 | /** 162 | * Latest offset, known as a '$' special id 163 | */ 164 | case object Latest extends Offset 165 | 166 | /** 167 | * Earliest offset, '0-0' id 168 | */ 169 | case object Earliest extends Offset 170 | 171 | /** 172 | * Specific id in the form of 'v1-v2' 173 | * 174 | * @param v1 first token of the id 175 | * @param v2 second token of the id 176 | */ 177 | case class IdOffset(v1: Long, v2: Long) extends Offset 178 | 179 | /** 180 | * Item id in the form of 'v1-v2' 181 | * 182 | * @param v1 first token of the id 183 | * @param v2 second token of the id 184 | */ 185 | case class ItemId(v1: Long, v2: Long) 186 | 187 | /** 188 | * Represent an item in the stream 189 | * 190 | * @param streamKey stream key 191 | * @param id item(entry) id 192 | * @param fields key/value map of item fields 193 | */ 194 | case class StreamItem(streamKey: String, id: ItemId, fields: Map[String, String]) 195 | 196 | -------------------------------------------------------------------------------- /src/main/scala/com/redislabs/provider/redis/streaming/package.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis 2 | 3 | package object streaming extends RedisStreamingFunctions { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /src/main/scala/com/redislabs/provider/redis/streaming/redisStreamingFunctions.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.streaming 2 | 3 | import com.redislabs.provider.redis.{ReadWriteConfig, RedisConfig} 4 | import org.apache.spark.storage.StorageLevel 5 | import org.apache.spark.streaming.StreamingContext 6 | import org.apache.spark.streaming.dstream.InputDStream 7 | 8 | /** 9 | * RedisStreamingContext extends StreamingContext's functionality with Redis 10 | * 11 | * @param ssc a spark StreamingContext 12 | */ 13 | class RedisStreamingContext(@transient val ssc: StreamingContext) extends Serializable { 14 | /** 15 | * @param keys an Array[String] which consists all the Lists we want to listen to 16 | * @param storageLevel the receiver' storage tragedy of received data, default as MEMORY_AND_DISK_2 17 | * @return a stream of (listname, value) 18 | */ 19 | def createRedisStream(keys: Array[String], 20 | storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_2) 21 | (implicit 22 | redisConfig: RedisConfig = RedisConfig.fromSparkConf(ssc.sparkContext.getConf)): 23 | RedisInputDStream[(String, String)] = { 24 | new RedisInputDStream(ssc, keys, storageLevel, redisConfig, classOf[(String, String)]) 25 | } 26 | 27 | /** 28 | * @param keys an Array[String] which consists all the Lists we want to listen to 29 | * @param storageLevel the receiver' storage tragedy of received data, default as MEMORY_AND_DISK_2 30 | * @return a stream of (value) 31 | */ 32 | def createRedisStreamWithoutListname(keys: Array[String], 33 | storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_2) 34 | (implicit 35 | redisConf: RedisConfig = RedisConfig.fromSparkConf(ssc.sparkContext.getConf)): 36 | RedisInputDStream[String] = { 37 | new RedisInputDStream(ssc, keys, storageLevel, redisConf, classOf[String]) 38 | } 39 | 40 | def createRedisXStream(consumersConfig: Seq[ConsumerConfig], 41 | storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_2) 42 | (implicit 43 | redisConfig: RedisConfig = RedisConfig.fromSparkConf(ssc.sparkContext.getConf)): 44 | InputDStream[StreamItem] = { 45 | val readWriteConfig = ReadWriteConfig.fromSparkConf(ssc.sparkContext.getConf) 46 | val receiver = new RedisStreamReceiver(consumersConfig, redisConfig, readWriteConfig, storageLevel) 47 | ssc.receiverStream(receiver) 48 | } 49 | } 50 | 51 | trait RedisStreamingFunctions { 52 | 53 | implicit def toRedisStreamingContext(ssc: StreamingContext): RedisStreamingContext = new RedisStreamingContext(ssc) 54 | 55 | } 56 | 57 | -------------------------------------------------------------------------------- /src/main/scala/com/redislabs/provider/redis/util/CollectionUtils.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.util 2 | 3 | import scala.collection.IterableLike 4 | import scala.collection.generic.CanBuildFrom 5 | 6 | /** 7 | * @author The Viet Nguyen 8 | */ 9 | object CollectionUtils { 10 | 11 | implicit class RichCollection[A, Repr](val xs: IterableLike[A, Repr]) extends AnyVal { 12 | 13 | def distinctBy[B, That](f: A => B)(implicit cbf: CanBuildFrom[Repr, A, That]): That = { 14 | val builder = cbf(xs.repr) 15 | val iterator = xs.iterator 16 | var set = Set[B]() 17 | while (iterator.hasNext) { 18 | val element = iterator.next 19 | val distinctField = f(element) 20 | if (!set(distinctField)) { 21 | set += distinctField 22 | builder += element 23 | } 24 | } 25 | builder.result 26 | } 27 | } 28 | 29 | } 30 | -------------------------------------------------------------------------------- /src/main/scala/com/redislabs/provider/redis/util/ConnectionUtils.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.util 2 | 3 | import java.util.{List => JList} 4 | 5 | import com.redislabs.provider.redis.RedisConfig 6 | import com.redislabs.provider.redis.util.ConnectionUtils.XINFO.{SubCommandGroups, SubCommandStream} 7 | import redis.clients.jedis.Jedis 8 | import redis.clients.jedis.commands.ProtocolCommand 9 | import redis.clients.jedis.util.SafeEncoder 10 | 11 | import scala.collection.JavaConverters._ 12 | 13 | /** 14 | * @author The Viet Nguyen 15 | */ 16 | object ConnectionUtils { 17 | 18 | def withConnection[A](conn: Jedis)(body: Jedis => A): A = { 19 | try { 20 | body(conn) 21 | } finally { 22 | conn.close() 23 | } 24 | } 25 | 26 | def withConnection[A](streamKey: String)(body: Jedis => A)(implicit redisConfig: RedisConfig): A = { 27 | withConnection(redisConfig.connectionForKey(streamKey)){ 28 | body 29 | } 30 | } 31 | 32 | implicit class JedisExt(val jedis: Jedis) extends AnyVal { 33 | 34 | //TODO: temporary solution to get latest offset while not supported by Jedis 35 | def xinfo(command: String, args: String*): Map[String, Any] = { 36 | val client = jedis.getClient 37 | val combinedArgs = command +: args 38 | client.sendCommand(XINFO, combinedArgs: _*) 39 | val response = asList(client.getOne).asScala 40 | command match { 41 | case SubCommandStream => 42 | asMap(response) 43 | case SubCommandGroups => 44 | response.map(m => asList(m)).map(_.asScala).map(asMap) 45 | .map(m => String.valueOf(m("name")) -> m).toMap 46 | } 47 | } 48 | 49 | private def asMap(seq: Seq[Any]): Map[String, Any] = { 50 | seq.grouped(2) 51 | .map { group => 52 | val key = asString(group.head) 53 | val value = group(1) match { 54 | case arr: Array[Byte] => asString(arr) 55 | case other: Any => other 56 | } 57 | key -> value 58 | }.toMap 59 | } 60 | 61 | private def asList(any: Any): JList[Any] = 62 | any.asInstanceOf[JList[Any]] 63 | 64 | private def asString(any: Any): String = 65 | new String(any.asInstanceOf[Array[Byte]]) 66 | } 67 | 68 | object XINFO extends ProtocolCommand { 69 | 70 | val SubCommandStream = "STREAM" 71 | val SubCommandGroups = "GROUPS" 72 | 73 | val LastGeneratedId = "last-generated-id" 74 | val LastDeliveredId = "last-delivered-id" 75 | val LastEntry = "last-entry" 76 | val EntryId = "_id" 77 | 78 | override def getRaw: Array[Byte] = SafeEncoder.encode("XINFO") 79 | } 80 | 81 | } 82 | -------------------------------------------------------------------------------- /src/main/scala/com/redislabs/provider/redis/util/JsonUtils.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.util 2 | 3 | import org.json4s.jackson.Serialization 4 | import org.json4s.{Formats, NoTypeHints} 5 | 6 | /** 7 | * @author The Viet Nguyen 8 | */ 9 | object JsonUtils { 10 | 11 | private implicit val formats: Formats = Serialization.formats(NoTypeHints) 12 | 13 | def toJson(any: AnyRef): String = { 14 | Serialization.write(any) 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/main/scala/com/redislabs/provider/redis/util/Logging.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.util 2 | 3 | import org.slf4j.{Logger, LoggerFactory} 4 | 5 | /** 6 | * @author The Viet Nguyen 7 | */ 8 | trait Logging { 9 | 10 | /** 11 | * This logger will likely to be used in serializable environment like Spark contexts. So, we make 12 | * it transient to avoid unnecessary serialization errors. 13 | */ 14 | @transient private var _logger: Logger = _ 15 | 16 | protected def loggerName: String = 17 | this.getClass.getName.stripSuffix("$") 18 | 19 | protected def logger: Logger = { 20 | if (_logger == null) { 21 | _logger = LoggerFactory.getLogger(loggerName) 22 | } 23 | _logger 24 | } 25 | 26 | def logInfo(msg: => String): Unit = { 27 | if (logger.isInfoEnabled) { 28 | _logger.info(msg) 29 | } 30 | } 31 | 32 | def logDebug(msg: => String): Unit = { 33 | if (logger.isDebugEnabled) { 34 | _logger.debug(msg) 35 | } 36 | } 37 | 38 | def logTrace(msg: => String): Unit = { 39 | if (logger.isTraceEnabled) { 40 | _logger.trace(msg) 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/main/scala/com/redislabs/provider/redis/util/ParseUtils.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.util 2 | 3 | import java.lang.{Boolean => JBoolean, Byte => JByte, Double => JDouble, Float => JFloat, Long => JLong, Short => JShort} 4 | 5 | import org.apache.spark.sql.types._ 6 | import redis.clients.jedis.exceptions.JedisDataException 7 | 8 | import scala.util.{Failure, Success, Try} 9 | 10 | /** 11 | * @author The Viet Nguyen 12 | */ 13 | object ParseUtils { 14 | 15 | def parseFields(value: Map[String, String], schema: StructType): Array[Any] = 16 | schema.fields.map { field => 17 | val fieldName = field.name 18 | val fieldValue = value(fieldName) 19 | parseValue(field.dataType, fieldValue) 20 | } 21 | 22 | private def parseValue(dataType: DataType, fieldValueStr: String): Any = { 23 | if (fieldValueStr == null) { 24 | null 25 | } else { 26 | parseNotNullValue(dataType, fieldValueStr) 27 | } 28 | } 29 | 30 | // scalastyle:off cyclomatic.complexity 31 | private def parseNotNullValue(dataType: DataType, fieldValueStr: String): Any = 32 | dataType match { 33 | case ByteType => JByte.parseByte(fieldValueStr) 34 | case IntegerType => Integer.parseInt(fieldValueStr) 35 | case LongType => JLong.parseLong(fieldValueStr) 36 | case FloatType => JFloat.parseFloat(fieldValueStr) 37 | case DoubleType => JDouble.parseDouble(fieldValueStr) 38 | case BooleanType => JBoolean.parseBoolean(fieldValueStr) 39 | case ShortType => JShort.parseShort(fieldValueStr) 40 | case DateType => java.sql.Date.valueOf(fieldValueStr) 41 | case TimestampType => java.sql.Timestamp.valueOf(fieldValueStr) 42 | case _ => fieldValueStr 43 | } 44 | 45 | private[redis] def ignoreJedisWrongTypeException[T](tried: Try[T]): Try[Option[T]] = { 46 | tried.transform(s => Success(Some(s)), { 47 | // Swallow this exception 48 | case e: JedisDataException if Option(e.getMessage).getOrElse("").contains("WRONGTYPE") => Success(None) 49 | case e: Throwable => Failure(e) 50 | }) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/scala/com/redislabs/provider/redis/util/PipelineUtils.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.util 2 | 3 | import java.util.{List => JList} 4 | 5 | import com.redislabs.provider.redis.ReadWriteConfig 6 | import redis.clients.jedis.{Jedis, Pipeline} 7 | 8 | import scala.collection.JavaConverters._ 9 | import scala.collection.{TraversableOnce, mutable} 10 | 11 | object PipelineUtils { 12 | 13 | /** 14 | * Executes a pipeline function for each item in the sequence, returns the server response. 15 | * 16 | * Ensures that a new pipeline is created if the number of operations exceeds the given maxPipelineSize 17 | * while iterating over the items. 18 | * 19 | * @param conn jedis connection 20 | * @param readWriteConfig read/write config 21 | * @param items a sequence of elements (typically keys) 22 | * @param f function to applied for each item in the sequence 23 | * @return response from the server 24 | */ 25 | def mapWithPipeline[A](conn: Jedis, items: TraversableOnce[A])(f: (Pipeline, A) => Unit) 26 | (implicit readWriteConfig: ReadWriteConfig): Seq[AnyRef] = { 27 | val totalResp = mutable.ListBuffer[JList[AnyRef]]() 28 | 29 | // iterate over items and create new pipelines periodically 30 | var i = 0 31 | var pipeline = conn.pipelined() 32 | for (x <- items) { 33 | f(pipeline, x) 34 | i = i + 1 35 | if (i % readWriteConfig.maxPipelineSize == 0) { 36 | val resp = pipeline.syncAndReturnAll() 37 | totalResp += resp 38 | pipeline = conn.pipelined() 39 | } 40 | } 41 | 42 | // sync remaining items 43 | if (i % readWriteConfig.maxPipelineSize != 0) { 44 | val resp = pipeline.syncAndReturnAll() 45 | totalResp += resp 46 | } 47 | 48 | totalResp.flatMap(_.asScala) 49 | } 50 | 51 | /** 52 | * Executes a pipeline function for each item in the sequence. No response is returned. 53 | * 54 | * Ensures that a new pipeline is created if the number of operations exceeds the given maxPipelineSize 55 | * while iterating over the items. 56 | * 57 | * @param conn jedis connection 58 | * @param readWriteConfig read/write config 59 | * @param items a sequence of elements (typically keys) 60 | * @param f function to applied for each item in the sequence 61 | */ 62 | def foreachWithPipeline[A](conn: Jedis, items: TraversableOnce[A])(f: (Pipeline, A) => Unit) 63 | (implicit readWriteConfig: ReadWriteConfig): Unit = { 64 | // iterate over items and create new pipelines periodically 65 | var i = 0 66 | var pipeline = conn.pipelined() 67 | for (x <- items) { 68 | f(pipeline, x) 69 | i = i + 1 70 | if (i % readWriteConfig.maxPipelineSize == 0) { 71 | pipeline.sync() 72 | pipeline = conn.pipelined() 73 | } 74 | } 75 | 76 | // sync remaining items 77 | if (i % readWriteConfig.maxPipelineSize != 0) { 78 | pipeline.sync() 79 | } 80 | } 81 | 82 | /** 83 | * Executes a pipeline function for each item in the sequence. Doesn't sync and return the last pipeline after 84 | * all operations are executed. Allows to execute more operations with the returned pipeline. 85 | * The client is responsible of syncing the returned pipeline. 86 | * 87 | * Ensures that a new pipeline is created if the number of operations exceeds the given maxPipelineSize 88 | * while iterating over the items. 89 | * 90 | * @param conn jedis connection 91 | * @param readWriteConfig read/write config 92 | * @param items a sequence of elements (typically keys) 93 | * @param f function to applied for each item in the sequence 94 | * @return the last pipeline 95 | */ 96 | def foreachWithPipelineNoLastSync[A](conn: Jedis, items: TraversableOnce[A])(f: (Pipeline, A) => Unit) 97 | (implicit readWriteConfig: ReadWriteConfig): Pipeline = { 98 | // iterate over items and create new pipelines periodically 99 | var i = 0 100 | var pipeline = conn.pipelined() 101 | for (x <- items) { 102 | f(pipeline, x) 103 | i = i + 1 104 | if (i % readWriteConfig.maxPipelineSize == 0) { 105 | pipeline.sync() 106 | pipeline = conn.pipelined() 107 | } 108 | } 109 | 110 | // return pipeline, the client should sync pipeline 111 | pipeline 112 | } 113 | 114 | } 115 | -------------------------------------------------------------------------------- /src/main/scala/com/redislabs/provider/redis/util/StreamUtils.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.util 2 | 3 | import org.apache.commons.lang3.StringUtils 4 | import redis.clients.jedis.{StreamEntryID, Jedis} 5 | 6 | /** 7 | * @author The Viet Nguyen 8 | */ 9 | object StreamUtils extends Logging { 10 | 11 | val EntryIdEarliest = new StreamEntryID(0, 0) 12 | 13 | def createConsumerGroupIfNotExist(conn: Jedis, streamKey: String, groupName: String, 14 | offset: StreamEntryID): Unit = { 15 | try { 16 | conn.xgroupCreate(streamKey, groupName, offset, true) 17 | } catch { 18 | case e: Exception if StringUtils.contains(e.getMessage, "already exists") => 19 | logInfo(s"Consumer group already exists: $groupName") 20 | } 21 | } 22 | 23 | def resetConsumerGroup(conn: Jedis, streamKey: String, groupName: String, 24 | offset: StreamEntryID): Unit = { 25 | logInfo(s"Setting consumer group $groupName id to $offset") 26 | conn.xgroupSetID(streamKey, groupName, offset) 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/redis/BinaryRedisPersistence.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.redis 2 | 3 | import java.nio.charset.StandardCharsets.UTF_8 4 | 5 | import org.apache.commons.lang3.SerializationUtils 6 | import org.apache.spark.sql.Row 7 | import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema 8 | import org.apache.spark.sql.types.StructType 9 | import redis.clients.jedis.Pipeline 10 | 11 | /** 12 | * @author The Viet Nguyen 13 | */ 14 | class BinaryRedisPersistence extends RedisPersistence[Array[Byte]] { 15 | 16 | override def save(pipeline: Pipeline, key: String, value: Array[Byte], ttl: Int): Unit = { 17 | val keyBytes = key.getBytes(UTF_8) 18 | if (ttl > 0) { 19 | pipeline.setex(keyBytes, ttl.toLong, value) 20 | } else { 21 | pipeline.set(keyBytes, value) 22 | } 23 | } 24 | 25 | override def load(pipeline: Pipeline, key: String, requiredColumns: Seq[String]): Unit = 26 | pipeline.get(key.getBytes(UTF_8)) 27 | 28 | override def encodeRow(keyName: String, value: Row): Array[Byte] = { 29 | val fields = value.schema.fields.map(_.name) 30 | val valuesArray = fields.map(f => value.getAs[Any](f)) 31 | SerializationUtils.serialize(valuesArray) 32 | } 33 | 34 | override def decodeRow(keyMap: (String, String), value: Array[Byte], schema: StructType, 35 | requiredColumns: Seq[String]): Row = { 36 | val valuesArray: Array[Any] = SerializationUtils.deserialize(value) 37 | new GenericRowWithSchema(valuesArray, schema) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/redis/DefaultSource.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.redis 2 | 3 | import org.apache.spark.sql.SaveMode.{Append, ErrorIfExists, Ignore, Overwrite} 4 | import org.apache.spark.sql.sources.{BaseRelation, CreatableRelationProvider, RelationProvider, SchemaRelationProvider} 5 | import org.apache.spark.sql.types.StructType 6 | import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode} 7 | 8 | class DefaultSource extends RelationProvider with SchemaRelationProvider 9 | with CreatableRelationProvider { 10 | 11 | override def createRelation(sqlContext: SQLContext, 12 | parameters: Map[String, String]): BaseRelation = { 13 | new RedisSourceRelation(sqlContext, parameters, userSpecifiedSchema = None) 14 | } 15 | 16 | /** 17 | * Creates a new relation by saving the data to Redis 18 | */ 19 | override def createRelation(sqlContext: SQLContext, mode: SaveMode, 20 | parameters: Map[String, String], data: DataFrame): BaseRelation = { 21 | val relation = new RedisSourceRelation(sqlContext, parameters, userSpecifiedSchema = None) 22 | mode match { 23 | case Append => relation.insert(data, overwrite = false) 24 | case Overwrite => relation.insert(data, overwrite = true) 25 | case ErrorIfExists => 26 | if (relation.nonEmpty) { 27 | throw new IllegalStateException("SaveMode is set to ErrorIfExists and dataframe " + 28 | "already exists in Redis and contains data.") 29 | } 30 | relation.insert(data, overwrite = false) 31 | case Ignore => 32 | if (relation.isEmpty) { 33 | relation.insert(data, overwrite = false) 34 | } 35 | } 36 | 37 | relation 38 | } 39 | 40 | override def createRelation(sqlContext: SQLContext, parameters: Map[String, String], 41 | schema: StructType): BaseRelation = 42 | new RedisSourceRelation(sqlContext, parameters, userSpecifiedSchema = Some(schema)) 43 | } 44 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/redis/HashRedisPersistence.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.redis 2 | 3 | import java.util.{List => JList} 4 | 5 | import com.redislabs.provider.redis.util.ParseUtils 6 | import org.apache.spark.sql.Row 7 | import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema 8 | import org.apache.spark.sql.types._ 9 | import redis.clients.jedis.Pipeline 10 | 11 | import scala.collection.JavaConverters._ 12 | 13 | /** 14 | * @author The Viet Nguyen 15 | */ 16 | class HashRedisPersistence extends RedisPersistence[Any] { 17 | 18 | override def save(pipeline: Pipeline, key: String, value: Any, ttl: Int): Unit = { 19 | val javaValue = value.asInstanceOf[Map[String, String]].asJava 20 | pipeline.hmset(key, javaValue) 21 | if (ttl > 0) { 22 | pipeline.expire(key, ttl.toLong) 23 | } 24 | } 25 | 26 | override def load(pipeline: Pipeline, key: String, requiredColumns: Seq[String]): Unit = { 27 | pipeline.hmget(key, requiredColumns: _*) 28 | } 29 | 30 | override def encodeRow(keyName: String, value: Row): Map[String, String] = { 31 | val fields = value.schema.fields.map(_.name) 32 | val kvMap = value.getValuesMap[Any](fields) 33 | kvMap 34 | .filter { case (_, v) => 35 | // don't store null values 36 | v != null 37 | } 38 | .filter { case (k, _) => 39 | // don't store key values 40 | k != keyName 41 | } 42 | .map { case (k, v) => 43 | k -> String.valueOf(v) 44 | } 45 | } 46 | 47 | override def decodeRow(keyMap: (String, String), value: Any, schema: StructType, 48 | requiredColumns: Seq[String]): Row = { 49 | val scalaValue = value.asInstanceOf[JList[String]].asScala 50 | val values = requiredColumns.zip(scalaValue) 51 | val results = values :+ keyMap 52 | val fieldsValue = ParseUtils.parseFields(results.toMap, schema) 53 | new GenericRowWithSchema(fieldsValue, schema) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/redis/RedisPersistence.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.redis 2 | 3 | import org.apache.spark.sql.Row 4 | import org.apache.spark.sql.types.StructType 5 | import redis.clients.jedis.Pipeline 6 | 7 | /** 8 | * @author The Viet Nguyen 9 | */ 10 | trait RedisPersistence[T] extends Serializable { 11 | 12 | def save(pipeline: Pipeline, key: String, value: T, ttl: Int): Unit 13 | 14 | def load(pipeline: Pipeline, key: String, requiredColumns: Seq[String]): Unit 15 | 16 | /** 17 | * Encode dataframe row before storing it in Redis. 18 | * 19 | * @param keyName field name that should be encoded in special way, e.g. in Redis keys. 20 | * @param value row to encode. 21 | * @return encoded row 22 | */ 23 | def encodeRow(keyName: String, value: Row): T 24 | 25 | /** 26 | * Decode dataframe row stored in Redis. 27 | * 28 | * @param keyMap extracted name/value of key column from Redis key 29 | * @param value encoded row 30 | * @param schema row schema 31 | * @param requiredColumns required columns to decode 32 | * @return decoded row 33 | */ 34 | def decodeRow(keyMap: (String, String), value: T, schema: StructType, 35 | requiredColumns: Seq[String]): Row 36 | } 37 | 38 | object RedisPersistence { 39 | 40 | private val providers = 41 | Map(SqlOptionModelBinary -> new BinaryRedisPersistence(), 42 | SqlOptionModelHash -> new HashRedisPersistence()) 43 | 44 | def apply(model: String): RedisPersistence[Any] = { 45 | // use hash model by default 46 | providers.getOrElse(model, providers(SqlOptionModelHash)) 47 | .asInstanceOf[RedisPersistence[Any]] 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/redis/redis.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql 2 | 3 | /** 4 | * @author The Viet Nguyen 5 | */ 6 | package object redis { 7 | 8 | val RedisFormat = "org.apache.spark.sql.redis" 9 | 10 | val RddWriteIteratorGroupingSize = "rdd.write.iterator.grouping.size" 11 | val RddWriteIteratorGroupingSizeDefault = 1000 12 | 13 | val SqlOptionFilterKeysByType = "filter.keys.by.type" 14 | val SqlOptionNumPartitions = "partitions.number" 15 | /** 16 | * Default read operation number of partitions. 17 | */ 18 | val SqlOptionNumPartitionsDefault = 3 19 | val SqlOptionTableName = "table" 20 | val SqlOptionKeysPattern = "keys.pattern" 21 | val SqlOptionModel = "model" 22 | val SqlOptionModelBinary = "binary" 23 | val SqlOptionModelHash = "hash" 24 | val SqlOptionInferSchema = "infer.schema" 25 | val SqlOptionKeyColumn = "key.column" 26 | val SqlOptionTTL = "ttl" 27 | 28 | val SqlOptionMaxPipelineSize = "max.pipeline.size" 29 | val SqlOptionScanCount = "scan.count" 30 | 31 | val SqlOptionIteratorGroupingSize = "iterator.grouping.size" 32 | val SqlOptionIteratorGroupingSizeDefault = 1000 33 | 34 | val StreamOptionStreamKeys = "stream.keys" 35 | val StreamOptionStreamOffsets = "stream.offsets" 36 | val StreamOptionReadBatchSize = "stream.read.batch.size" 37 | val StreamOptionReadBatchSizeDefault = 100 38 | val StreamOptionReadBlock = "stream.read.block" 39 | val StreamOptionReadBlockDefault = 500 40 | val StreamOptionParallelism = "stream.parallelism" 41 | val StreamOptionGroupName = "stream.group.name" 42 | val StreamOptionConsumerPrefix = "stream.consumer.prefix" 43 | } 44 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/redis/stream/RedisSource.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.redis.stream 2 | 3 | import com.redislabs.provider.redis.RedisConfig 4 | import com.redislabs.provider.redis.util.CollectionUtils.RichCollection 5 | import com.redislabs.provider.redis.util.ConnectionUtils.{JedisExt, XINFO, withConnection} 6 | import com.redislabs.provider.redis.util.StreamUtils.{createConsumerGroupIfNotExist, resetConsumerGroup} 7 | import com.redislabs.provider.redis.util.{Logging, ParseUtils} 8 | import org.apache.spark.sql.catalyst.InternalRow 9 | import org.apache.spark.sql.execution.streaming.{Offset, Source} 10 | import org.apache.spark.sql.redis.stream.RedisSource._ 11 | import org.apache.spark.sql.types.StructType 12 | import org.apache.spark.sql.{DataFrame, SQLContext} 13 | import org.apache.spark.unsafe.types.UTF8String 14 | import redis.clients.jedis.{StreamEntryID, Jedis} 15 | 16 | import scala.collection.JavaConverters._ 17 | import scala.util.Try 18 | 19 | /** 20 | * @author The Viet Nguyen 21 | */ 22 | class RedisSource(sqlContext: SQLContext, metadataPath: String, 23 | userDefinedSchema: Option[StructType], parameters: Map[String, String]) 24 | extends Source with Logging { 25 | 26 | private val sc = sqlContext.sparkContext 27 | 28 | implicit private val redisConfig: RedisConfig = RedisConfig.fromSparkConfAndParameters(sc.getConf, parameters) 29 | 30 | private val sourceConfig = RedisSourceConfig.fromMap(parameters) 31 | 32 | private val currentSchema = userDefinedSchema.getOrElse { 33 | throw new IllegalArgumentException("Please specify schema") 34 | } 35 | 36 | /** 37 | * Called once on the source startup. Creates consumer groups and resets their offsets if needed. 38 | */ 39 | def start(): Unit = { 40 | sourceConfig.consumerConfigs.foreach { consumerConfig => 41 | val offsetsByStreamKey = sourceConfig.start.map(_.offsets).getOrElse(Map()) 42 | val streamKey = consumerConfig.streamKey 43 | val groupName = consumerConfig.groupName 44 | val configOffsetOption = offsetsByStreamKey.get(streamKey).map(_.offset).map(new StreamEntryID(_)) 45 | withConnection(streamKey) { conn => 46 | createConsumerGroupIfNotExist(conn, streamKey, groupName, configOffsetOption.getOrElse(StreamEntryID.LAST_ENTRY)) 47 | // if config offset is defined, reset to its value 48 | configOffsetOption.foreach { offset => 49 | resetConsumerGroup(conn, streamKey, groupName, offset) 50 | } 51 | } 52 | } 53 | } 54 | 55 | override def schema: StructType = currentSchema 56 | 57 | /** 58 | * Returns the maximum available offset for this source. 59 | * Returns `None` if this source has never received any data. 60 | */ 61 | override def getOffset: Option[Offset] = { 62 | val initialOffset = RedisSourceOffset(Map()) 63 | val sourceOffset = sourceConfig.consumerConfigs.foldLeft(initialOffset) { case (acc, e) => 64 | val streamKey = e.streamKey 65 | withConnection(streamKey) { conn => 66 | Try { 67 | // try to read last stream id, it will fail if doesn't exist 68 | val offsetId = streamLastId(conn, streamKey) 69 | val streamOffset = streamKey -> RedisConsumerOffset(e.groupName, offsetId) 70 | acc.copy(acc.offsets + streamOffset) 71 | } getOrElse { 72 | // stream key doesn't exist 73 | acc 74 | } 75 | } 76 | } 77 | if (sourceOffset.offsets.isEmpty) { 78 | None 79 | } else { 80 | Some(sourceOffset) 81 | } 82 | } 83 | 84 | override def getBatch(start: Option[Offset], end: Offset): DataFrame = { 85 | logInfo { 86 | s"""Getting batch... 87 | | start: $start 88 | | end: $end 89 | """.stripMargin 90 | } 91 | val localSchema = currentSchema 92 | val offsetRanges = getOffsetRanges(start, end, sourceConfig.consumerConfigs) 93 | 94 | // if 'start' is set, reset consumer group offset to read this batch 95 | resetConsumerGroupsIfHasOffset(offsetRanges) 96 | 97 | // read data 98 | val internalRdd = new RedisSourceRdd(sc, redisConfig, offsetRanges) 99 | .map { case (id, fields) => 100 | val fieldMap = fields.asScala.toMap + ("_id" -> id.toString) 101 | val values = ParseUtils.parseFields(fieldMap, localSchema) 102 | .map { 103 | case str: String => UTF8String.fromString(str) 104 | case other: Any => other 105 | } 106 | InternalRow(values: _*) 107 | } 108 | sqlContext.internalCreateDataFrame(internalRdd, schema, isStreaming = true) 109 | } 110 | 111 | override def commit(end: Offset): Unit = { 112 | logInfo( 113 | s"""Committing offset.. 114 | | end: ${end.json()} 115 | |""".stripMargin) 116 | } 117 | 118 | override def stop(): Unit = { 119 | } 120 | 121 | private def resetConsumerGroupsIfHasOffset(offsetRanges: Seq[RedisSourceOffsetRange]): Unit = { 122 | forEachOffsetRangeWithStreamConnection(offsetRanges) { case (conn, offsetRange) => 123 | offsetRange.start.map(new StreamEntryID(_)).foreach { start => 124 | val config = offsetRange.config 125 | resetConsumerGroup(conn, config.streamKey, config.groupName, start) 126 | } 127 | } 128 | } 129 | 130 | private def forEachOffsetRangeWithStreamConnection(offsetRanges: Seq[RedisSourceOffsetRange]) 131 | (op: (Jedis, RedisSourceOffsetRange) => Unit): Unit = { 132 | offsetRanges.groupBy(_.config.streamKey).foreach { case (streamKey, subRanges) => 133 | withConnection(streamKey) { conn => 134 | subRanges.distinctBy(_.config.groupName).foreach { offsetRange => 135 | op(conn, offsetRange) 136 | } 137 | } 138 | } 139 | } 140 | 141 | } 142 | 143 | object RedisSource { 144 | 145 | def getOffsetRanges(start: Option[Offset], end: Offset, 146 | consumerConfigs: Seq[RedisConsumerConfig]): Seq[RedisSourceOffsetRange] = { 147 | 148 | val offsetStarts = start.map(RedisSourceOffset.fromOffset).map(_.offsets).getOrElse(Map()) 149 | val offsetEnds = RedisSourceOffset.fromOffset(end) 150 | val configsByStreamKey = consumerConfigs.groupBy(_.streamKey) 151 | 152 | offsetEnds.offsets.flatMap { case (streamKey, offsetEnd) => 153 | val offsetStart = offsetStarts.get(streamKey).map(_.offset) 154 | val configs = configsByStreamKey(streamKey) 155 | configs.map { c => RedisSourceOffsetRange(offsetStart, offsetEnd.offset, c) } 156 | }.toSeq 157 | } 158 | 159 | def streamLastId(conn: Jedis, streamKey: String): String = { 160 | val infoMap = conn.xinfo(XINFO.SubCommandStream, streamKey) 161 | String.valueOf(infoMap(XINFO.LastGeneratedId)) 162 | } 163 | 164 | } 165 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/redis/stream/RedisSourceConfig.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.redis.stream 2 | 3 | import org.apache.spark.sql 4 | import org.apache.spark.sql.redis._ 5 | 6 | /** 7 | * @author The Viet Nguyen 8 | */ 9 | case class RedisSourceConfig(consumerConfigs: Seq[RedisConsumerConfig], 10 | start: Option[RedisSourceOffset]) 11 | 12 | object RedisSourceConfig { 13 | 14 | def fromMap(config: Map[String, String]): RedisSourceConfig = { 15 | val streamKeys = config.getOrElse(StreamOptionStreamKeys, 16 | throw new IllegalArgumentException(s"Please specify '$StreamOptionStreamKeys'")) 17 | val start = config.get(StreamOptionStreamOffsets).map(RedisSourceOffset.fromJson) 18 | val parallelism = config.get(sql.redis.StreamOptionParallelism).map(_.toInt).getOrElse(1) 19 | val groupName = config.getOrElse(StreamOptionGroupName, "spark-source") 20 | val consumerPrefix = config.getOrElse(StreamOptionConsumerPrefix, "consumer") 21 | val batchSize = config.get(StreamOptionReadBatchSize).map(_.toInt).getOrElse(StreamOptionReadBatchSizeDefault) 22 | val block = config.get(StreamOptionReadBlock).map(_.toInt).getOrElse(StreamOptionReadBlockDefault) 23 | val consumerConfigs = streamKeys.split(",").flatMap { streamKey => 24 | (1 to parallelism).map { consumerIndex => 25 | RedisConsumerConfig(streamKey, s"$groupName", s"$consumerPrefix-$consumerIndex", batchSize, block) 26 | } 27 | } 28 | RedisSourceConfig(consumerConfigs, start) 29 | } 30 | } 31 | 32 | case class RedisConsumerConfig(streamKey: String, groupName: String, consumerName: String, 33 | batchSize: Int, block: Int) 34 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/redis/stream/RedisSourceOffset.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.redis.stream 2 | 3 | import com.redislabs.provider.redis.util.JsonUtils 4 | import org.apache.spark.sql.execution.streaming.{Offset, SerializedOffset} 5 | import org.json4s.jackson.Serialization 6 | import org.json4s.{Formats, NoTypeHints} 7 | 8 | /** 9 | * @param offsets A map of offset by stream key 10 | * @author The Viet Nguyen 11 | */ 12 | case class RedisSourceOffset(offsets: Map[String, RedisConsumerOffset]) extends Offset { 13 | 14 | override def json(): String = JsonUtils.toJson(this) 15 | } 16 | 17 | object RedisSourceOffset { 18 | 19 | private implicit val formats: Formats = Serialization.formats(NoTypeHints) 20 | 21 | def fromOffset(offset: Offset): RedisSourceOffset = { 22 | offset match { 23 | case o: RedisSourceOffset => o 24 | case so: SerializedOffset => fromJson(so.json) 25 | case _ => 26 | throw new IllegalArgumentException( 27 | s"Invalid conversion from offset of ${offset.getClass} to RedisSourceOffset") 28 | } 29 | 30 | fromJson(offset.json()) 31 | } 32 | 33 | def fromJson(json: String): RedisSourceOffset = { 34 | try { 35 | Serialization.read[RedisSourceOffset](json) 36 | } catch { 37 | case e: Throwable => 38 | val example = RedisSourceOffset(Map("my-stream" -> RedisConsumerOffset("redis-source", "1543674099961-0"))) 39 | val jsonExample = Serialization.write(example) 40 | throw new RuntimeException(s"Unable to parse offset json. Example of valid json: $jsonExample", e) 41 | } 42 | } 43 | } 44 | 45 | case class RedisConsumerOffset(groupName: String, offset: String) 46 | 47 | case class RedisSourceOffsetRange(start: Option[String], end: String, config: RedisConsumerConfig) 48 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/redis/stream/RedisSourceRdd.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.redis.stream 2 | 3 | import com.redislabs.provider.redis.RedisConfig 4 | import com.redislabs.provider.redis.util.ConnectionUtils.withConnection 5 | import org.apache.spark.rdd.RDD 6 | import org.apache.spark.sql.redis.stream.RedisSourceTypes.StreamEntry 7 | import org.apache.spark.{Partition, SparkContext, TaskContext} 8 | 9 | /** 10 | * RDD of EntryID -> StreamEntry.fields 11 | * 12 | * @author The Viet Nguyen 13 | */ 14 | class RedisSourceRdd(sc: SparkContext, redisConfig: RedisConfig, 15 | offsetRanges: Seq[RedisSourceOffsetRange], autoAck: Boolean = true) 16 | extends RDD[StreamEntry](sc, Nil) { 17 | 18 | override def compute(split: Partition, context: TaskContext): Iterator[StreamEntry] = { 19 | val partition = split.asInstanceOf[RedisSourceRddPartition] 20 | val offsetRange = partition.offsetRange 21 | val streamReader = new RedisStreamReader(redisConfig) 22 | streamReader.unreadStreamEntries(offsetRange) 23 | } 24 | 25 | override protected def getPartitions: Array[Partition] = { 26 | offsetRanges.zipWithIndex.map { case (e, i) => RedisSourceRddPartition(i, e) } 27 | .toArray 28 | } 29 | } 30 | 31 | case class RedisSourceRddPartition(index: Int, offsetRange: RedisSourceOffsetRange) 32 | extends Partition 33 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/redis/stream/RedisSourceTypes.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.redis.stream 2 | 3 | import java.util.{List => JList, Map => JMap} 4 | 5 | import redis.clients.jedis.{StreamEntryID, StreamEntry => JStreamEntry} 6 | 7 | /** 8 | * @author The Viet Nguyen 9 | */ 10 | object RedisSourceTypes { 11 | 12 | type StreamEntry = (StreamEntryID, JMap[String, String]) 13 | type StreamEntryBatch = JMap.Entry[String, JList[JStreamEntry]] 14 | type StreamEntryBatches = JList[StreamEntryBatch] 15 | } 16 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/redis/stream/RedisStreamProvider.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.redis.stream 2 | 3 | import com.redislabs.provider.redis.util.Logging 4 | import org.apache.spark.sql.SQLContext 5 | import org.apache.spark.sql.execution.streaming.Source 6 | import org.apache.spark.sql.sources.{DataSourceRegister, StreamSourceProvider} 7 | import org.apache.spark.sql.types.{StringType, StructField, StructType} 8 | 9 | /** 10 | * @author The Viet Nguyen 11 | */ 12 | class RedisStreamProvider extends DataSourceRegister with StreamSourceProvider with Logging { 13 | 14 | override def shortName(): String = "redis" 15 | 16 | override def sourceSchema(sqlContext: SQLContext, schema: Option[StructType], 17 | providerName: String, parameters: Map[String, String]): (String, StructType) = { 18 | providerName -> schema.getOrElse { 19 | StructType(Seq(StructField("_id", StringType))) 20 | } 21 | } 22 | 23 | override def createSource(sqlContext: SQLContext, metadataPath: String, 24 | schema: Option[StructType], providerName: String, 25 | parameters: Map[String, String]): Source = { 26 | val (_, ss) = sourceSchema(sqlContext, schema, providerName, parameters) 27 | val source = new RedisSource(sqlContext, metadataPath, Some(ss), parameters) 28 | source.start() 29 | source 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/redis/stream/RedisStreamReader.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.redis.stream 2 | 3 | import java.util.AbstractMap.SimpleEntry 4 | import java.util.{Map => JMap} 5 | 6 | import com.redislabs.provider.redis.RedisConfig 7 | import com.redislabs.provider.redis.util.ConnectionUtils.withConnection 8 | import com.redislabs.provider.redis.util.Logging 9 | import org.apache.spark.sql.redis.stream.RedisSourceTypes.{StreamEntry, StreamEntryBatch, StreamEntryBatches} 10 | import redis.clients.jedis.StreamEntryID 11 | 12 | import scala.collection.JavaConverters._ 13 | import scala.math.Ordering.Implicits._ 14 | 15 | /** 16 | * @author The Viet Nguyen 17 | */ 18 | class RedisStreamReader(redisConfig: RedisConfig) extends Logging with Serializable { 19 | 20 | def unreadStreamEntries(offsetRange: RedisSourceOffsetRange): Iterator[StreamEntry] = { 21 | val config = offsetRange.config 22 | 23 | logInfo(s"Reading entries " + 24 | s"[${config.streamKey}, ${config.groupName}, ${config.consumerName}, start=${offsetRange.start} " + 25 | s"end=${offsetRange.end}]... " 26 | ) 27 | 28 | val res = filterStreamEntries(offsetRange) { 29 | val startEntryOffset = new SimpleEntry(config.streamKey, StreamEntryID.UNRECEIVED_ENTRY) 30 | Iterator.continually { 31 | readStreamEntryBatches(offsetRange, startEntryOffset) 32 | } 33 | } 34 | res 35 | } 36 | 37 | private def readStreamEntryBatches(offsetRange: RedisSourceOffsetRange, 38 | startEntryOffset: JMap.Entry[String, StreamEntryID]): StreamEntryBatches = { 39 | val config = offsetRange.config 40 | withConnection(redisConfig.connectionForKey(config.streamKey)) { conn => 41 | // we don't need acknowledgement, if spark processing fails, it will request the same batch again 42 | val noAck = true 43 | val response = conn.xreadGroup(config.groupName, 44 | config.consumerName, 45 | config.batchSize, 46 | config.block, 47 | noAck, 48 | startEntryOffset) 49 | logDebug(s"Got entries: $response") 50 | response 51 | } 52 | } 53 | 54 | private def filterStreamEntries(offsetRange: RedisSourceOffsetRange) 55 | (streamGroups: => Iterator[StreamEntryBatches]): Iterator[StreamEntry] = { 56 | val end = new StreamEntryID(offsetRange.end) 57 | streamGroups 58 | .takeWhile { response => 59 | (response != null) && !response.isEmpty 60 | } 61 | .flatMap { response => 62 | response.asScala.iterator 63 | } 64 | .flatMap { streamEntry => 65 | flattenStreamEntries(streamEntry) 66 | } 67 | .takeWhile { case (entryId, _) => 68 | entryId <= end 69 | } 70 | } 71 | 72 | private def flattenStreamEntries(entry: StreamEntryBatch): Iterator[StreamEntry] = { 73 | entry.getValue.asScala.iterator 74 | .map { streamEntry => 75 | streamEntry.getID -> streamEntry.getFields 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Root logger option 2 | log4j.rootLogger=WARN, stdout 3 | # Direct log messages to stdout 4 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 5 | #log4j.appender.stdout.Target=System.out 6 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 7 | log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n 8 | 9 | # DEBUG 10 | #log4j.logger.com.redislabs.provider.redis=DEBUG, stdout 11 | #log4j.logger.org.apache.spark.sql.redis=DEBUG, stdout 12 | log4j.logger.com.redislabs.provider.redis.util.StreamUtils=DEBUG 13 | log4j.logger.org.apache.spark.sql.redis.stream=DEBUG 14 | log4j.logger.org.apache.spark.sql.redis.stream.RedisStreamReader=INFO 15 | 16 | # 17 | #log4j.additivity.com.redislabs.provider.redis=false 18 | #log4j.additivity.org.apache.spark.sql.redis=false 19 | -------------------------------------------------------------------------------- /src/test/resources/tls/ca.crt: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIFSzCCAzOgAwIBAgIUHs81ch3cj/DaaUu/xeRpJtCvq3MwDQYJKoZIhvcNAQEL 3 | BQAwNTETMBEGA1UECgwKUmVkaXMgVGVzdDEeMBwGA1UEAwwVQ2VydGlmaWNhdGUg 4 | QXV0aG9yaXR5MB4XDTIwMDUxNzE1MDUwMVoXDTMwMDUxNTE1MDUwMVowNTETMBEG 5 | A1UECgwKUmVkaXMgVGVzdDEeMBwGA1UEAwwVQ2VydGlmaWNhdGUgQXV0aG9yaXR5 6 | MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAvBUjUWX3xDaFoRtmZUiH 7 | hv3HBtzQAUbpOMGqBUdm6F0/X2wa+bnmSOeAZoYuI7L2N3eBNZ6Vcd/rc0nP+PoL 8 | M3L0fDPpNtrAgxFiLzCTLdsC4AYfSt3DOIURoMCYBdKvy62IaCanrnQElIzk41hI 9 | NhWV1L1MXU3uaTw3xNXMX0pKp/Td6PYTTM4pZSDnXIzltpgOx4YXg/0MrWOLR9nS 10 | 95rv2the61zbMzf1OScOzncQcXwNAEbCvUPH5OwStNznPAxhLqhLuIJ05kKaon5U 11 | y9qQFQvbMNOuXy8Fi/yTL4ZV9EkUyWOM7iqmCHTU5VU92FkZB+glIfk5Y5DmDe5a 12 | Wsk4/BeDZdZgZShX41Dl12G5cQCErvxKmb0g41GJDwEBj4MZ3U8pJKJWj3vQC76V 13 | yTsHtrTJ5maM/ctGE7IrQfIJwrDIqU/tixkLuete8CWt8X96iwjBbpHQ0J1TCB11 14 | WDK0wTgvCOjwjvhDA7D3Bvj65BpS+BnhuC/v/julfrV5rFCiyDeWjY//od6bo1HU 15 | gHbJncdys5AlZs/FZpBmKC0Isa6JhNGE6SAZlmjn1euH8besA/qzResK+XOtTNr5 16 | pNQIA9L+Jfu66puViK8fnZb53NgG3YqbEuivUHXALbMj60JPVkwS2ZdDxY9iNvig 17 | W1RY6wv+cs5726AUKJqin18CAwEAAaNTMFEwHQYDVR0OBBYEFAFmv2JPj3Pkt7Gs 18 | ofrJ3JqLcVe5MB8GA1UdIwQYMBaAFAFmv2JPj3Pkt7GsofrJ3JqLcVe5MA8GA1Ud 19 | EwEB/wQFMAMBAf8wDQYJKoZIhvcNAQELBQADggIBALdXNzliM8y02M5kktG7a4U/ 20 | 3aAMGfa8+FimQPrBOSpRBoq39lf3sIcVMYe8HiSZoynVChR5pkPdR0a66X89UUNL 21 | EljZ7LX1oMG4hYnKq0tzu797DIAIY0fLyLbI8LnOcGFijRwtGIH/5YMLrgOwPBbD 22 | llWteVMywMhovon3nPM1S78T0cILZ6QeuoKrM8JpRhit4EWfAdcmRWqYfWtmY6eW 23 | SH4D0PQaJpDs9fsOYp0CwAMmDPQeM0EbVSe2Dl0kJh4rS2kYlCxN/ZhIv6qdgXv/ 24 | 4SAnBX5Cay+IDEuaeVn7rRuCVpouzMjA27ucZ3V56JjcL8HzB9iqzB/tnLMnzZmq 25 | /DlwrQTVDJEExXGtUDcI+cgNpH9jqD4akFTvVggRFjv70JWj3dXHgz4x2iAy7O/x 26 | d2IHI6WFVq7760EoQBTVyyJ/S2w8UVzHKAk8DMU+Y89jsUA0EPp/j7DiEeXSBWz+ 27 | ivk35QXE16kGOmDPgF2SzCHKoTEheQQyJRd2UFNNY37X6ROMlHaeoXVZ13cMENPr 28 | DNbB9h6Wi3lqP3WGteAk5uKQSZq8Q+/NgElnWls71MFKxzsIysH0nxkjbTmmZxxP 29 | C9UH611jVgWZKth4a846lDruS9lUecz8f2vfiNZbDzXVdXxfhVA5VlGyRDlZq0xM 30 | zSJm5Tn0Q1Tz4b+PMEFz 31 | -----END CERTIFICATE----- 32 | -------------------------------------------------------------------------------- /src/test/resources/tls/ca.key: -------------------------------------------------------------------------------- 1 | -----BEGIN RSA PRIVATE KEY----- 2 | MIIJKAIBAAKCAgEAvBUjUWX3xDaFoRtmZUiHhv3HBtzQAUbpOMGqBUdm6F0/X2wa 3 | +bnmSOeAZoYuI7L2N3eBNZ6Vcd/rc0nP+PoLM3L0fDPpNtrAgxFiLzCTLdsC4AYf 4 | St3DOIURoMCYBdKvy62IaCanrnQElIzk41hINhWV1L1MXU3uaTw3xNXMX0pKp/Td 5 | 6PYTTM4pZSDnXIzltpgOx4YXg/0MrWOLR9nS95rv2the61zbMzf1OScOzncQcXwN 6 | AEbCvUPH5OwStNznPAxhLqhLuIJ05kKaon5Uy9qQFQvbMNOuXy8Fi/yTL4ZV9EkU 7 | yWOM7iqmCHTU5VU92FkZB+glIfk5Y5DmDe5aWsk4/BeDZdZgZShX41Dl12G5cQCE 8 | rvxKmb0g41GJDwEBj4MZ3U8pJKJWj3vQC76VyTsHtrTJ5maM/ctGE7IrQfIJwrDI 9 | qU/tixkLuete8CWt8X96iwjBbpHQ0J1TCB11WDK0wTgvCOjwjvhDA7D3Bvj65BpS 10 | +BnhuC/v/julfrV5rFCiyDeWjY//od6bo1HUgHbJncdys5AlZs/FZpBmKC0Isa6J 11 | hNGE6SAZlmjn1euH8besA/qzResK+XOtTNr5pNQIA9L+Jfu66puViK8fnZb53NgG 12 | 3YqbEuivUHXALbMj60JPVkwS2ZdDxY9iNvigW1RY6wv+cs5726AUKJqin18CAwEA 13 | AQKCAgBq3Auj7K43wc5seXfU5b9yl+8jXAOmJhbN02J4+1dhf9FIstAkwFUxaK5Q 14 | Eb5XNA+l9fTodQBtoY5Rg0dxKweAJLj5dDj1nJWyIgdJzmxgqkVY6MGQtKx9CUW5 15 | spLtBAYzT5XnrsaoXGxZxi8pZ/gnGl51b1Pa0zM4gSkiYWJrZXdDM+F8wYq3oY5t 16 | UaOBtt+wvXXwMKRdrkEsphj1KrItUc8i5LoROUGlOQ7PePx1m7ow8A0m8g0koUUK 17 | MIJ0Vene+R2h96aF30DxrjfTSLl+1N/1xTW06R/yHnIlVcx+PPHLCFaSEUh57KtG 18 | tLJc1fB8YirRjjjCs6nCgwniMmEMqFNpf00YQQ9hwLl9tsc8gQKxx6EWvJt5ymOL 19 | jbOyHCOEu7pmEhwNo3+6UKUMpvMZGf9obsfA1aXrilobmuxGlM954C2LMlH+zuMJ 20 | p5sCbUvOtECcP/vKUACXkvcvseB4kfyDBEdRNYUIK9uIsdWnjJWA10WMszU6T4fH 21 | t8mePAatQsQhcHiT45vGBuVY9WixBECizK/eDuOtuxQGE4OycrYtSc77WOv2benx 22 | +E6RVoQkPt9mDpk06O23vM+asu3rrGoXogZcJVQf/3mjNbyEf1s3o0nY7DTov8Is 23 | Il7AkDJ7XcU61mzRSz4zEZm2LFc0ydsxB/gQnSmrfwEl9+egeQKCAQEA31mkcehv 24 | v4tgzEuJR9s2tRD4kI4U9IV5K+mz+7E0nrgU17devVWHnnNmSiSZOCYliiAlZJJq 25 | Le1pxzZyLv1SWvhBML7Jxqe63+uFbd3AaOUUK3/f1+KyrgZKoHfxadDdMhve0Zeo 26 | xVW3pBjpNTG3Z71FGI+X8yHMlxugQrQDZDMB4sHqRBmfsQ9HiHxNjew86UbDAmbu 27 | 5LHyovmtcYpBL07lsEIIpgnHF4B2cmvpn9TBams2L25/9WPwzCLRCZmltwyhLiHK 28 | Jrtjj7T3+VdxKCq/W94lCu+yVt4QiKd7R06u8VfbsLtOMdLjMsilTEI8HxnALHIC 29 | pRvtDlS5hrFvIwKCAQEA15OwNYARf+/DVu5vORYLpRECliDzYho9dwE6c267VF4A 30 | r+MasHnDLQiAbnjQ41KMxK5gVvJCz2Dxfy25Y9RAa/6Z4BN5r+rMQ4k2wvQwR0q1 31 | 56oPT6S2L016rlQN35vNA5njXV6KloeniGQSYAGOVjWdi0P6NPDSBdC/vdIYlZ3c 32 | 0l8KSthHNqYWwwfbZrVyKtjH9rOKNg0OcYzMvSK+wt6LibNbTorqhg4fLycwIFkj 33 | ZZZMUb1o6nZN571dKE7rz+Qx3P9MLeEqBuiuI/Wvpdx0BTM7RAAS4ayJXukA+KiO 34 | bPjuq5jxn7uaMY4xKkA7+wGyVdDl24R2ZGTqHfZQlQKCAQBdMRkVUie2Y2B7PZVF 35 | PylfeXpNTotdz4dUlEm93h2XkDVaIK/ODi7tJTdr/kNUE06ciHcxtInLKgF11rjj 36 | 9Fz1ihohTaoBUqD9p1bgFaOf4N1+nPd4K6XkSMnAlOtM/JIew1RAXahU1kQumxpL 37 | ULg299kpu1hqYRLtheBjlJrJpyLFS6YEzjA8f6SYRU1Sx1xO9XCOwwYwtDMKX2VI 38 | N6ilJaVe2t6i8Pd6TeaeLXqobuxZC+zq848+g3nIo0pXWB219/YUupKPgKa1IoxJ 39 | JHwZh2Sa32DTZFokNrntWvxsL199YexPnIeu8FBOMzwNSqGtVGBD6zpBTBqoXR+g 40 | HM7DAoIBAQCIihVQPXZYBt89XzV4/bqQaQ6vUd11ZFNL3a5M0HS8AfIDdR9BaU2y 41 | ZzEZR1JeuLlKFXwVdcnVGXy75ZUHYrcO76o8X3lyb4/CstTJc+pkLTC7s12RPyzZ 42 | FwS+B6Tl3QNj5YM5bxjuMKtu2ps8zZ9+gOTxATQndeRAJLBdJQXHNb5YTDPzpbqt 43 | JVNDYeWXzxKpirZUTfEbPPfJ+bjvjmMuf8/3fm81dw0FrUoZDoQP6QVfYWujVglu 44 | f1Hmlmy7jAkVml3usJBqerOovpUVV5ZRwiiF5qYB8t4Cq8oyH/gqhm/3G0/nxa48 45 | UpFeE8aWESssVy3B5ta/S83E1pnmMk1lAoIBACRQMEkyv3vIQZmMQzcsg0Y4FX+R 46 | f7lZ80rKbwYpwAKM4xHHXkj46jGBj0ZGHRNIR0BQqOksF8Sg29fjlXFA5G8yMlU/ 47 | hHFEfsY4dSS1VfmGoFr2kSYKJszlcybOeEM0ceuSNafZ+5X59g86g+iHZ9TsJ6ly 48 | F+cNYLIyVVCcaNB8YYiU9J20aMlzI+7Pr1ohesyb/7DwYMC78HzJO/AgvJNI477E 49 | lO/+5Ou1nJDDWRqncfOZGtnYaz6vGjVV3yWwSnVyksgyoGgExXb8+ItU5xwJValS 50 | lnkqM7ADuldyO61e7ctxJzOI9YWceYrDN38XV889umYWHB812rdBXmgGPvg= 51 | -----END RSA PRIVATE KEY----- 52 | -------------------------------------------------------------------------------- /src/test/resources/tls/ca.txt: -------------------------------------------------------------------------------- 1 | 7161BDE3516329B98ECE89BD1B3A84A165B36131 2 | -------------------------------------------------------------------------------- /src/test/resources/tls/client.csr: -------------------------------------------------------------------------------- 1 | -----BEGIN NEW CERTIFICATE REQUEST----- 2 | MIIEPzCCA+kCAQAwbDEQMA4GA1UEBhMHVW5rbm93bjEQMA4GA1UECBMHVW5rbm93 3 | bjEQMA4GA1UEBxMHVW5rbm93bjEOMAwGA1UEChMFcmVkaXMxDjAMBgNVBAsTBXJl 4 | ZGlzMRQwEgYDVQQDEwtyZWRpcyByZWRpczCCA0IwggI1BgcqhkjOOAQBMIICKAKC 5 | AQEAj3k12bmq6b+r7Yh6z0lRtvMuxZ47rzcY6OrElh8+/TYG50NRqcQYMzm4CefC 6 | rhxTm6dHW4XQEa24tHmHdUmEaVysDo8UszYIKKIv+icRCj1iqZNFNAmg/mlsRlj4 7 | S90ggZw3CaAQV7GVrc0AIz26VIS2KR+dZI74g0SGd5ec7AS0NKasLnXpmF3iPbAp 8 | L8ERjJ/6nYGB5zONt5K3MNe540lZL2gJmHIVORXqPWuLRlPGM0WPgDsypMLg8nKQ 9 | JW5OP4o7CDihxFDk4YwaKaN9316hQ95LZv8EkD7VzxYj4VjUh8YI6X8hHNgdyiPL 10 | bjgHZfgi40K+SEwFdjk5YBzWZwIdALr2lqaFePff3uf6Z8l3x4XvMrIzuuWAwLzV 11 | aV0CggEAFqZcWCBIUHBOdQKjl1cEDTTaOjR4wVTU5KXALSQu4E+W5h5L0JBKvayP 12 | N+6x4J8xgtI8kEPLZC+IAEFg7fnKCbMgdqecMqYn8kc+kYebosTnRL0ggVRMtVuA 13 | LDaNH6g+1InpTg+gaI4yQopceMR4xo0FJ7ccmjq7CwvhLERoljnn08502xAaZaor 14 | h/ZMaCbbPscvS1WZg0u07bAvfJDppJbTpV1TW+v8RdT2GfY/Pe27hzklwvIk4Hcx 15 | KW2oh+weR0j4fvtf3rdUhDFrIjLe5VPdrwIRKw0fAtowlzIk/ieu2oudSyki2bqL 16 | 457Z4QOmPFKBC8aIt+LtQxbh7xfb3gOCAQUAAoIBABhNEA7ZsggSRP9+M+YZPxsG 17 | HqXC+JUDPxFdt8G6LwXiLMSrDK7PRwWGY+srFpk/9XbHloJFUNMy7mTs44FikjRk 18 | Ckv9RdYxySWVe6DB8pZfRMBtwpL8EVB5H3zLzwl4bo7aSwqIGcW9vbLf9lDiAJr1 19 | tLPB7u00PYLmhLBpxsjt3IASQU7eQoHbKU1fqVFC0owPLV7eDMWXtDXW15CqcNVM 20 | RYH89GF1FVft5cyc+ezRtBumVTWfkfiypXKNemMtz8nG4XPafM4t/cwL32jeqNfj 21 | D+49rJCszRcbeWW38UUZUvrR0Pg4d/zMjweuFtxYvltOg5YQkCQ+GB4EAdpeEO2g 22 | MDAuBgkqhkiG9w0BCQ4xITAfMB0GA1UdDgQWBBRRT1L9TaDwnVyuQBHSRIfqwU6h 23 | TzANBglghkgBZQMEAwIFAANBADA+Ah0Ak8JdJGCo3g5GLlnJlf4b1wwYuLY5r26a 24 | apTxzwIdAKF297kB9IeY0JVbHKQcwyWAJzOtQO82mKCHrIo= 25 | -----END NEW CERTIFICATE REQUEST----- 26 | -------------------------------------------------------------------------------- /src/test/resources/tls/clientkeystore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RedisLabs/spark-redis/6b0a69c9701644bd03239b8bc64ac45367d23cef/src/test/resources/tls/clientkeystore -------------------------------------------------------------------------------- /src/test/resources/tls/redis.crt: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIID4jCCAcoCFHFhveNRYym5js6JvRs6hKFls2ExMA0GCSqGSIb3DQEBCwUAMDUx 3 | EzARBgNVBAoMClJlZGlzIFRlc3QxHjAcBgNVBAMMFUNlcnRpZmljYXRlIEF1dGhv 4 | cml0eTAeFw0yMDA1MTcxNTA1MDFaFw0yMTA1MTcxNTA1MDFaMCYxEzARBgNVBAoM 5 | ClJlZGlzIFRlc3QxDzANBgNVBAMMBlNlcnZlcjCCASIwDQYJKoZIhvcNAQEBBQAD 6 | ggEPADCCAQoCggEBANd6DHmb4xtWSej6EOHHNHP/zSVuQ3jYUqTzc5s1oWrmDyro 7 | rk3rnRBo1l9X6n9EESaMZyZxJFkrgnMlYNbZjy7KgKpGPP1BLDwyeBo6pYg8MRZK 8 | 0Kd0GaFqHk1WpboC9qKyhyWiD/C18+6Aq6Vm5ZcfnScanJ1WbmGZt+6NtGTOBDWu 9 | BGfCzCx/6oIkDvlebycFs7JuL1uxIaZYKfUG2bPWkVpbtbNcune6A2pTtsfMdx8U 10 | qWOW087uco3Q8QEa+rwLCDzhT6NywllGHN5ua5CgGdrYWFeH5lAuiNxvWbCaN9v8 11 | ua6+/xalOGewPixQbSgF6K0grjjg/jSdyt53BH0CAwEAATANBgkqhkiG9w0BAQsF 12 | AAOCAgEAmFgZAW4De4ushRBrzSIRa5n8Q8Nkn8DcS0ludeFLiV49gyXD9b0Nnw8w 13 | Ct+qxMMiVoWXqUUJWkfhcGQ/FbBZnDLsh3ZSHlyaMB+dXbmC5SJB9IlJsvEwjmN9 14 | RBEJ5VUwEc+OMb6w0CnG9tr//b1N5+iwvmdfapJcTradMwOe02ZJhD06Tvapol6P 15 | L4z/ErSTKJRHzBPgzeSQHoRwvYbs4GE5VGMlygEq/v8ieodH0eO0IH9Cb3wro+Yq 16 | 6+SkbtOaJKdr15DZ/zd8UgoaBGxJFJ03cBcZHgV9FYfvv9QrNxkp4mx/g4UIe9Zn 17 | 6LqJSqxoQmhLWOTitp2iW/yMmDtSo2jwi4KVx+ENO9O0NsPUP2nk6mkH1YMMVg32 18 | mmrcuqOMijQu8b1MI76mO0KY3fCmbS6d0gReEcVYBZ5aPPCFTOeZVFF3n2Iz44+h 19 | PrE+21UgwWYxmHQoSJ+f5378EEJYr3bY/PHOp/HEzPshlU3u++3utcGQbQRowUeF 20 | cNd/gdOwHQpw7Sx6YFUvdTs9PEexPzgwzca8mPVto9uO8T5LLyZUouG6/bPzIsj7 21 | bDeZ9o56JhpR6flgg5SvWanyUL5ihbqhIxJcQai/XbMS3h61GK9H9RCu9LoJiRMM 22 | +P2bBcsd0bqzysjbifk68UjcOSpCosLBavF/JSyAO9k2ytBzah8= 23 | -----END CERTIFICATE----- 24 | -------------------------------------------------------------------------------- /src/test/resources/tls/redis.dh: -------------------------------------------------------------------------------- 1 | -----BEGIN DH PARAMETERS----- 2 | MIIBCAKCAQEAsUMvwTsUNheculmKMUjtZZJYw8DPD1vlh3DlD1xjwShqh46ICyna 3 | fER4nKo9Lt63zNcM68w1aNgiyajbJgzemuNbRh55vh88jV4rNitqWcb0MiiuOWUc 4 | 3MjNtp6U7ttoZrMhXtiPLwEfNtkUZJZAaAnvs6MEu7BgyoVXpvt8Azckh1dpYeOt 5 | TFnmIqVMziH1R2MxIFCkxZNsL9vJtrf8K3bgBJaPFpJeTPRMS25Pg90iJ2ZJLDZU 6 | OjRuQpt7yaEGNJP3ADjQAWrSRMe2NnfJ2b2yI2CzTBQ0DOeAw731TmRw7pxIOPEu 7 | N1jPJMNsM9FLoeslpJ9neot7UrqZtCt5SwIBAg== 8 | -----END DH PARAMETERS----- 9 | -------------------------------------------------------------------------------- /src/test/resources/tls/redis.key: -------------------------------------------------------------------------------- 1 | -----BEGIN RSA PRIVATE KEY----- 2 | MIIEpAIBAAKCAQEA13oMeZvjG1ZJ6PoQ4cc0c//NJW5DeNhSpPNzmzWhauYPKuiu 3 | TeudEGjWX1fqf0QRJoxnJnEkWSuCcyVg1tmPLsqAqkY8/UEsPDJ4GjqliDwxFkrQ 4 | p3QZoWoeTValugL2orKHJaIP8LXz7oCrpWbllx+dJxqcnVZuYZm37o20ZM4ENa4E 5 | Z8LMLH/qgiQO+V5vJwWzsm4vW7Ehplgp9QbZs9aRWlu1s1y6d7oDalO2x8x3HxSp 6 | Y5bTzu5yjdDxARr6vAsIPOFPo3LCWUYc3m5rkKAZ2thYV4fmUC6I3G9ZsJo32/y5 7 | rr7/FqU4Z7A+LFBtKAXorSCuOOD+NJ3K3ncEfQIDAQABAoIBAGQFJkdIwhnNaw93 8 | DGERS9rQkZRfY3hzyaEB1NbmBSCO29mKGmbXCJg2YdPuBk6+9MsU/0iL2lh95ta2 9 | MgZpBJquqxze766LExJ6Rt+9+1qKvipf3gzsynzvulMh9lmqyHtrLWqdmJKekzVR 10 | ituDMO1+Yj9lje9G0IQDi6pzIpprYLdstQJsUoafwN8TbqPGem8Z8/ili3TIj9z/ 11 | EZuSH/Crv80ls+ptz2GiPcAxUctqAfha+3lkX+qWklTd+0B5LgHrf2agxIAb7549 12 | ZQTo4kDPDem7Rvqj4IQMHQhRdgSNYOmjidIF+XnbsdVw2rSbqMx6z+eLepuGtUQj 13 | fWDh5gECgYEA9GddWg36dF5kebvBfuFso4DSqKBip6P6cogzoI6fHP2b4Qe73hlq 14 | YiLOA3dPN/a8D0Ox3HNK8jx23EzG/JiIlBYX3RcB9+iUtg2Iixz9FiHnQkKwJFBS 15 | VnWOb4rNQqWm+5W/OxE3da5dvsQKKpcgAK9OYj84X0lH3196GsxRrIECgYEA4bNS 16 | 6J0tbWHZ6CnTSZJz3BRNFnn8WSLy6A8Yytku1DnJjODuJT0/JwxglM+sR022V7JY 17 | YCBfuueIXhqYbqCqCzLxZg21+w/3jpIZp7eDd5nY+VDnUv0PyNwGgv5rS6nEgpeL 18 | YdbDYbj0ZOLZjSDrz5OfqihNQZssfWeB+ecvCf0CgYA/s/l3EZGoZzoVKMUkhylD 19 | 7L37yeItA+axl6KtRL9gVRIeM1/aYhGChsPfz2dMlPkrmV9wsHRmczAf97sd97wR 20 | rTiHHgobTfoFAb4HVIT7EdcvRCaZMH5lnrqDhFBAAOFnTf7MLI6iE9LHeF2WAFIN 21 | G6R4ozXUUEt1g3NWLM9VAQKBgQC73b86PnKspJF0LTRg/hWQcBmGhv1k2LFmNgLF 22 | /id7oapBqIyx1Jw3jZbq5z4Yj/giYSIsyWXFtqmM4whUtUk1Ty8eanU6yJygQL44 23 | G4nDyPyQ8iXKrzgvUe3dpZZ8AZC/vxLW2qQBOKm9PBIn5epC+zcgtLEx1c8fh0Pq 24 | VuORpQKBgQDXfhxTrpYyoxJmAoLmj9IOdsE/vjlcpriaQu4hkzaN6wz79O637WBk 25 | zoaT72zYbCpWhD42yZLqQIkrgYjsht30Wut1dp/0FQSoT872aB62Q90UVKmX4TvN 26 | wODJz8mtdC5co5fjxbaUn9Zfc0LUO9KPhFd0fb6SwCthBSQ4RmBWgg== 27 | -----END RSA PRIVATE KEY----- 28 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/RedisBenchmarks.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis 2 | 3 | import java.io.{File, FileWriter, PrintWriter} 4 | import java.time.{Duration => JDuration} 5 | 6 | import com.redislabs.provider.redis.util.Logging 7 | 8 | /** 9 | * @author The Viet Nguyen 10 | */ 11 | trait RedisBenchmarks extends Logging { 12 | 13 | val benchmarkReportDir = new File("target/reports/benchmarks/") 14 | benchmarkReportDir.mkdirs() 15 | 16 | def time[R](tag: String)(block: => R): R = { 17 | val t0 = System.nanoTime() 18 | val result = block // call-by-name 19 | val t1 = System.nanoTime() 20 | new PrintWriter(new FileWriter(s"$benchmarkReportDir/results.txt", true)) { 21 | // scalastyle:off 22 | this.println(s"$tag, ${JDuration.ofNanos(t1 - t0)}") 23 | close() 24 | } 25 | result 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/RedisConfigSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis 2 | 3 | import org.scalatest.{FunSuite, Matchers} 4 | import redis.clients.jedis.util.JedisClusterCRC16 5 | 6 | class RedisConfigSuite extends FunSuite with Matchers { 7 | 8 | val redisStandaloneConfig = new RedisConfig(RedisEndpoint(host = "127.0.0.1", port = 6379, auth = "passwd")) 9 | val redisClusterConfig = new RedisConfig(RedisEndpoint(host = "127.0.0.1", port = 7379)) 10 | 11 | test("getNodesBySlots") { 12 | redisStandaloneConfig.getNodesBySlots(0, 16383).length shouldBe 1 13 | redisClusterConfig.getNodesBySlots(0, 16383).length shouldBe 7 14 | } 15 | 16 | test("getHost") { 17 | val key = "getHost" 18 | val slot = JedisClusterCRC16.getSlot(key) 19 | val standaloneHost = redisStandaloneConfig.getHost(key) 20 | assert(standaloneHost.startSlot <= slot && standaloneHost.endSlot >= slot) 21 | val clusterHost = redisClusterConfig.getHost(key) 22 | assert(clusterHost.startSlot <= slot && clusterHost.endSlot >= slot) 23 | } 24 | 25 | test("getNodes") { 26 | redisStandaloneConfig.getNodes(RedisEndpoint(host = "127.0.0.1", port = 6379, auth = "passwd")).length shouldBe 1 27 | redisClusterConfig.getNodes(RedisEndpoint(host = "127.0.0.1", port = 7379)).length shouldBe 7 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/SparkRedisSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis 2 | 3 | import com.redislabs.provider.redis.env.Env 4 | import com.redislabs.provider.redis.rdd.Keys 5 | import org.apache.spark.sql.{SQLContext, SQLImplicits, SparkSession} 6 | import org.scalatest.{BeforeAndAfterAll, FunSuite} 7 | 8 | /** 9 | * @author The Viet Nguyen 10 | */ 11 | trait SparkRedisSuite extends FunSuite with Env with Keys with BeforeAndAfterAll { 12 | 13 | override def beforeAll() { 14 | super.beforeAll() 15 | spark = SparkSession.builder().config(conf).getOrCreate() 16 | sc = spark.sparkContext 17 | } 18 | 19 | override def afterAll(): Unit = { 20 | spark.stop 21 | System.clearProperty("spark.driver.port") 22 | } 23 | 24 | object TestSqlImplicits extends SQLImplicits { 25 | 26 | override protected def _sqlContext: SQLContext = spark.sqlContext 27 | } 28 | 29 | } 30 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/SparkStreamingRedisSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis 2 | 3 | import com.redislabs.provider.redis.env.Env 4 | import com.redislabs.provider.redis.util.Logging 5 | import org.apache.spark.sql.SparkSession 6 | import org.apache.spark.streaming.{Seconds, StreamingContext} 7 | import org.scalatest.{BeforeAndAfterEach, FunSuite} 8 | 9 | /** 10 | * For spark streaming test we have to create spark and streaming context for each test 11 | */ 12 | trait SparkStreamingRedisSuite extends FunSuite with Env with BeforeAndAfterEach with Logging { 13 | 14 | override protected def beforeEach(): Unit = { 15 | super.beforeEach() 16 | spark = SparkSession.builder().config(conf).getOrCreate() 17 | sc = spark.sparkContext 18 | ssc = new StreamingContext(sc, Seconds(1)) 19 | } 20 | 21 | override protected def afterEach(): Unit = { 22 | ssc.stop() 23 | spark.stop 24 | System.clearProperty("spark.driver.port") 25 | super.afterEach() 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/df/AclDataframeSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.df 2 | 3 | import com.redislabs.provider.redis.util.Person.{TableNamePrefix, data} 4 | import com.redislabs.provider.redis.util.TestUtils.{generateTableName, interceptSparkErr} 5 | import org.apache.spark.sql.redis.{RedisFormat, SqlOptionTableName} 6 | import org.scalatest.Matchers 7 | import redis.clients.jedis.exceptions.JedisConnectionException 8 | 9 | /** 10 | * Basic dataframe test with user/password authentication 11 | */ 12 | trait AclDataframeSuite extends RedisDataframeSuite with Matchers { 13 | 14 | test("save and load dataframe") { 15 | val tableName = generateTableName(TableNamePrefix) 16 | val df = spark.createDataFrame(data) 17 | df.write.format(RedisFormat) 18 | .option(SqlOptionTableName, tableName) 19 | .save() 20 | val loadedDf = spark.read.format(RedisFormat) 21 | .option(SqlOptionTableName, tableName) 22 | .load() 23 | .cache() 24 | verifyDf(loadedDf) 25 | } 26 | 27 | test("incorrect password in dataframe options") { 28 | interceptSparkErr[JedisConnectionException] { 29 | val tableName = generateTableName(TableNamePrefix) 30 | val df = spark.createDataFrame(data) 31 | df.write.format(RedisFormat) 32 | .option(SqlOptionTableName, tableName) 33 | .option("user", user) 34 | .option("auth", "wrong_password") 35 | .save() 36 | } 37 | } 38 | 39 | test("correct user/password in dataframe options") { 40 | val tableName = generateTableName(TableNamePrefix) 41 | val df = spark.createDataFrame(data) 42 | df.write.format(RedisFormat) 43 | .option(SqlOptionTableName, tableName) 44 | .option("user", user) 45 | .option("auth", userPassword) 46 | .save() 47 | 48 | val loadedDf = spark.read.format(RedisFormat) 49 | .option(SqlOptionTableName, tableName) 50 | .option("user", user) 51 | .option("auth", userPassword) 52 | .load() 53 | .cache() 54 | verifyDf(loadedDf) 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/df/BinaryDataframeSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.df 2 | 3 | import com.redislabs.provider.redis.toRedisContext 4 | import com.redislabs.provider.redis.util.Person 5 | import com.redislabs.provider.redis.util.Person._ 6 | import com.redislabs.provider.redis.util.TestUtils._ 7 | import org.apache.commons.lang3.SerializationUtils 8 | import org.apache.spark.SparkException 9 | import org.apache.spark.sql.redis.RedisSourceRelation.tableDataKeyPattern 10 | import org.apache.spark.sql.redis._ 11 | import org.apache.spark.sql.types.{IntegerType, StructField, StructType} 12 | import org.scalatest.Matchers 13 | 14 | /** 15 | * @author The Viet Nguyen 16 | */ 17 | trait BinaryDataframeSuite extends RedisDataframeSuite with Matchers { 18 | 19 | test("save and load dataframe with binary mode") { 20 | val tableName = generateTableName(TableNamePrefix) 21 | val df = spark.createDataFrame(data) 22 | df.write.format(RedisFormat) 23 | .option(SqlOptionModel, SqlOptionModelBinary) 24 | .option(SqlOptionTableName, tableName) 25 | .save() 26 | val loadedDf = spark.read.format(RedisFormat) 27 | .option(SqlOptionModel, SqlOptionModelBinary) 28 | .option(SqlOptionTableName, tableName) 29 | .load() 30 | .cache() 31 | verifyDf(loadedDf) 32 | } 33 | 34 | test("save with binary mode and load dataframe") { 35 | val tableName = generateTableName(TableNamePrefix) 36 | val df = spark.createDataFrame(data) 37 | df.write.format(RedisFormat) 38 | .option(SqlOptionModel, SqlOptionModelBinary) 39 | .option(SqlOptionTableName, tableName) 40 | .save() 41 | interceptSparkErr[SparkException] { 42 | spark.read.format(RedisFormat) 43 | .option(SqlOptionTableName, tableName) 44 | .load() 45 | .show() 46 | } 47 | } 48 | 49 | test("save and load with binary mode dataframe") { 50 | val tableName = generateTableName(TableNamePrefix) 51 | val df = spark.createDataFrame(data) 52 | df.write.format(RedisFormat) 53 | .option(SqlOptionTableName, tableName) 54 | .save() 55 | interceptSparkErr[SparkException] { 56 | spark.read.format(RedisFormat) 57 | .option(SqlOptionModel, SqlOptionModelBinary) 58 | .option(SqlOptionTableName, tableName) 59 | .load() 60 | .show() 61 | } 62 | } 63 | 64 | test("load filtered hash keys with strings") { 65 | val tableName = generateTableName(TableNamePrefix) 66 | val df = spark.createDataFrame(data) 67 | df.write.format(RedisFormat) 68 | .option(SqlOptionTableName, tableName) 69 | .option(SqlOptionModel, SqlOptionModelHash) 70 | .save() 71 | val extraKey = RedisSourceRelation.uuid() 72 | saveMap(tableName, extraKey, Person.dataMaps.head) 73 | val loadedIds = spark.read.format(RedisFormat) 74 | .schema(Person.fullSchema) 75 | .option(SqlOptionTableName, tableName) 76 | .option(SqlOptionModel, SqlOptionModelHash) 77 | .option(SqlOptionFilterKeysByType, value = true) 78 | .load() 79 | .collect() 80 | .map { r => 81 | r.getAs[String]("_id") 82 | } 83 | loadedIds.length shouldBe 2 84 | loadedIds should not contain extraKey 85 | val countAll = sc.fromRedisKeyPattern(tableDataKeyPattern(tableName)).count() 86 | countAll shouldBe 3 87 | } 88 | 89 | test("load unfiltered hash keys with strings") { 90 | val tableName = generateTableName(TableNamePrefix) 91 | val df = spark.createDataFrame(data) 92 | df.write.format(RedisFormat) 93 | .option(SqlOptionTableName, tableName) 94 | .option(SqlOptionModel, SqlOptionModelHash) 95 | .save() 96 | saveMap(tableName, RedisSourceRelation.uuid(), Person.dataMaps.head) 97 | interceptSparkErr[SparkException] { 98 | spark.read.format(RedisFormat) 99 | .option(SqlOptionTableName, tableName) 100 | .option(SqlOptionModel, SqlOptionModelHash) 101 | .load() 102 | .collect() 103 | } 104 | } 105 | 106 | test("read dataframe by non-existing key (not pattern)") { 107 | val df = spark.read.format(RedisFormat) 108 | .option(SqlOptionKeysPattern, "some-non-existing-key") 109 | .option(SqlOptionModel, SqlOptionModelBinary) 110 | .schema(StructType(Array( 111 | StructField("id", IntegerType), 112 | StructField("value", IntegerType) 113 | ))) 114 | .load() 115 | .cache() 116 | 117 | df.show() 118 | df.count() should be (0) 119 | } 120 | 121 | def serialize(value: Map[String, String]): Array[Byte] = { 122 | val valuesArray = value.values.toArray 123 | SerializationUtils.serialize(valuesArray) 124 | } 125 | 126 | def saveMap(tableName: String, key: String, value: Map[String, String]): Unit 127 | } 128 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/df/CsvDataframeSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.df 2 | 3 | import com.redislabs.provider.redis.util.TestUtils._ 4 | import org.apache.spark.sql.functions._ 5 | import org.apache.spark.sql.redis.{RedisFormat, SqlOptionKeyColumn, SqlOptionTableName} 6 | import org.scalatest.Matchers 7 | 8 | trait CsvDataframeSuite extends RedisDataframeSuite with Matchers { 9 | 10 | test("load dataframe from test.csv file, write/read from redis") { 11 | val file = getClass.getClassLoader.getResource("test.csv").getFile 12 | val df = spark.read.format("csv") 13 | .option("header", true) 14 | .option("inferSchema", true) 15 | .load(file) 16 | .withColumn("id", monotonically_increasing_id()) 17 | .cache() 18 | 19 | val tableName = generateTableName("csv-data") 20 | 21 | df.write.format(RedisFormat) 22 | .option(SqlOptionTableName, tableName) 23 | .option(SqlOptionKeyColumn, "id") 24 | .save() 25 | 26 | val loadedDf = spark.read.format(RedisFormat) 27 | .option(SqlOptionTableName, tableName) 28 | .option(SqlOptionKeyColumn, "id") 29 | .load() 30 | .cache() 31 | 32 | df.schema should be(loadedDf.schema) 33 | 34 | df.collect().toSet should be(loadedDf.collect().toSet) 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/df/FilteredDataframeSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.df 2 | 3 | import com.redislabs.provider.redis.util.Person.{TableNamePrefix} 4 | import org.apache.spark.sql.redis.{RedisFormat, SqlOptionTableName} 5 | import org.scalatest.Matchers 6 | import com.redislabs.provider.redis.util.TestUtils._ 7 | 8 | /** 9 | * @author The Viet Nguyen 10 | */ 11 | trait FilteredDataframeSuite extends RedisDataframeSuite with Matchers { 12 | 13 | test("select none fields") { 14 | val tableName = generateTableName(TableNamePrefix) 15 | writeDf(tableName) 16 | val actualDf = spark.read.format(RedisFormat) 17 | .option(SqlOptionTableName, tableName) 18 | .load() 19 | .select() 20 | .cache() 21 | actualDf.count() shouldBe expectedDf.count() 22 | actualDf.collect().foreach { r => 23 | r.length shouldBe 0 24 | } 25 | } 26 | 27 | test("select all fields") { 28 | val tableName = generateTableName(TableNamePrefix) 29 | writeDf(tableName) 30 | val actualDf = spark.read.format(RedisFormat) 31 | .option(SqlOptionTableName, tableName) 32 | .load() 33 | .select("name", "age", "address", "salary") 34 | .cache() 35 | verifyDf(actualDf) 36 | } 37 | 38 | test("select partial fields") { 39 | val tableName = generateTableName(TableNamePrefix) 40 | writeDf(tableName) 41 | val actualDf = spark.read.format(RedisFormat) 42 | .option(SqlOptionTableName, tableName) 43 | .load() 44 | .select("name", "salary") 45 | .cache() 46 | verifyPartialDf(actualDf) 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/df/RedisDataframeSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.df 2 | 3 | import com.redislabs.provider.redis.SparkRedisSuite 4 | import com.redislabs.provider.redis.util.Person 5 | import com.redislabs.provider.redis.util.Person.data 6 | import org.apache.spark.sql.DataFrame 7 | import org.apache.spark.sql.redis.{RedisFormat, SqlOptionTableName} 8 | import org.scalatest.Matchers 9 | 10 | /** 11 | * @author The Viet Nguyen 12 | */ 13 | trait RedisDataframeSuite extends SparkRedisSuite with Matchers { 14 | 15 | import TestSqlImplicits._ 16 | 17 | lazy val expectedDf: DataFrame = Person.df(spark) 18 | 19 | def writeDf(tableName: String, options: Map[String, Any] = Map()): Unit = { 20 | val df = spark.createDataFrame(data) 21 | 22 | val initialWriter = df 23 | .write 24 | .format(RedisFormat) 25 | .option(SqlOptionTableName, tableName) 26 | 27 | val writer = options.foldLeft(initialWriter) { case (acc, (k, v)) => 28 | acc.option(k, v.toString) 29 | } 30 | writer.save() 31 | } 32 | 33 | def createTempView(tableName: String): Unit = { 34 | spark.createDataFrame(data).createOrReplaceTempView(tableName) 35 | } 36 | 37 | def loadAndVerifyDf(tableName: String, options: Map[String, Any] = Map()): Unit = { 38 | val initialReader = spark 39 | .read 40 | .format(RedisFormat) 41 | .option(SqlOptionTableName, tableName) 42 | 43 | val reader = options.foldLeft(initialReader) { case (acc, (k, v)) => 44 | acc.option(k, v.toString) 45 | } 46 | 47 | val actualDf = reader.load().cache() 48 | verifyDf(actualDf, data) 49 | } 50 | 51 | def verifyDf(actualDf: DataFrame, data: Seq[Person] = Person.data): Unit = { 52 | actualDf.show() 53 | actualDf.count() shouldBe expectedDf.count() 54 | // TODO: check nullable columns 55 | // actualDf.schema shouldBe expectedDf.schema 56 | val loadedArr = actualDf.as[Person].collect() 57 | loadedArr.sortBy(_.name) shouldBe data.toArray.sortBy(_.name) 58 | } 59 | 60 | def verifyPartialDf(actualDf: DataFrame): Unit = { 61 | actualDf.show() 62 | actualDf.count() shouldBe expectedDf.count() 63 | // TODO: check nullable columns 64 | // actualDf.schema shouldBe expectedDf.schema 65 | val loadedArr = actualDf.collect() 66 | .map(r => (r.getAs[String]("name"), r.getAs[Double]("salary"))) 67 | loadedArr.sortBy(_._1) shouldBe data.toArray.sortBy(_.name).map(p => (p.name, p.salary)) 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/df/SparkSqlSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.df 2 | 3 | import com.redislabs.provider.redis.util.Person.generatePersonTableName 4 | import org.apache.spark.sql.redis.{RedisFormat, SqlOptionTableName} 5 | import org.scalatest.Matchers 6 | 7 | /** 8 | * @author The Viet Nguyen 9 | */ 10 | trait SparkSqlSuite extends RedisDataframeSuite with Matchers { 11 | 12 | test("create temporary view then make regular insertions") { 13 | val tableName = generatePersonTableName() 14 | spark.sql( 15 | s"""CREATE TEMPORARY VIEW $tableName (name STRING, age INT, address STRING, salary DOUBLE) 16 | | USING $RedisFormat OPTIONS (table '$tableName') 17 | |""".stripMargin) 18 | spark.sql( 19 | s"""INSERT INTO TABLE $tableName 20 | | VALUES ('John', 30, '60 Wall Street', 150.5), 21 | | ('Peter', 35, '110 Wall Street', 200.3) 22 | |""".stripMargin) 23 | val loadedDf = spark.read.format(RedisFormat) 24 | .option(SqlOptionTableName, tableName) 25 | .load() 26 | .cache() 27 | verifyDf(loadedDf) 28 | } 29 | 30 | test("create temporary view then make overwrite insertions when no data exists") { 31 | val tableName = generatePersonTableName() 32 | spark.sql( 33 | s"""CREATE TEMPORARY VIEW $tableName (name STRING, age INT, address STRING, salary DOUBLE) 34 | | USING $RedisFormat OPTIONS (table '$tableName') 35 | |""".stripMargin) 36 | spark.sql( 37 | s"""INSERT overwrite TABLE $tableName 38 | |SELECT * FROM VALUES ('John', 30, '60 Wall Street', 150.5), 39 | | ('Peter', 35, '110 Wall Street', 200.3) 40 | |""".stripMargin) 41 | val loadedDf = spark.read.format(RedisFormat) 42 | .option(SqlOptionTableName, tableName) 43 | .load() 44 | .cache() 45 | verifyDf(loadedDf) 46 | } 47 | 48 | test("create temporary view then make overwrite insertions when data exists") { 49 | val tableName = generatePersonTableName() 50 | spark.sql( 51 | s"""CREATE TEMPORARY VIEW $tableName (name STRING, age INT, address STRING, salary DOUBLE) 52 | | USING $RedisFormat OPTIONS (table '$tableName') 53 | |""".stripMargin) 54 | spark.sql( 55 | s"""INSERT INTO TABLE $tableName 56 | | VALUES ('Johnny', 18, '17 Home Street', 0), 57 | | ('Peter', 23, '6 Home Street', 20) 58 | |""".stripMargin) 59 | spark.sql( 60 | s"""INSERT overwrite TABLE $tableName 61 | |SELECT * FROM VALUES ('John', 30, '60 Wall Street', 150.5), 62 | | ('Peter', 35, '110 Wall Street', 200.3) 63 | |""".stripMargin) 64 | val loadedDf = spark.read.format(RedisFormat) 65 | .option(SqlOptionTableName, tableName) 66 | .load() 67 | .cache() 68 | verifyDf(loadedDf) 69 | } 70 | 71 | test("create temporary view, make regular insertions then select") { 72 | val tableName = generatePersonTableName() 73 | spark.sql( 74 | s"""CREATE TEMPORARY VIEW $tableName (name STRING, age INT, address STRING, salary DOUBLE) 75 | | USING $RedisFormat OPTIONS (table '$tableName') 76 | |""".stripMargin) 77 | spark.sql( 78 | s"""INSERT INTO TABLE $tableName 79 | | VALUES ('John', 30, '60 Wall Street', 150.5), 80 | | ('Peter', 35, '110 Wall Street', 200.3) 81 | |""".stripMargin) 82 | val loadedDf = spark.sql( 83 | s"""SELECT * FROM $tableName 84 | |""".stripMargin) 85 | verifyDf(loadedDf) 86 | } 87 | 88 | test("select from temporary view") { 89 | val tableName = generatePersonTableName() 90 | createTempView(tableName) 91 | val loadedDf = spark.sql(s"SELECT * FROM $tableName") 92 | verifyDf(loadedDf) 93 | } 94 | 95 | test("select all fields from temporary view") { 96 | val tableName = generatePersonTableName() 97 | createTempView(tableName) 98 | val loadedDf = spark.sql(s"SELECT name, age, address, salary FROM $tableName") 99 | verifyDf(loadedDf) 100 | } 101 | 102 | test("select name and salary from temporary view") { 103 | val tableName = generatePersonTableName() 104 | createTempView(tableName) 105 | val actualDf = spark.sql(s"SELECT name, salary FROM $tableName") 106 | verifyPartialDf(actualDf) 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/df/acl/AclDataframeClusterSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.df.acl 2 | 3 | import com.redislabs.provider.redis.df.AclDataframeSuite 4 | import com.redislabs.provider.redis.env.RedisClusterAclEnv 5 | 6 | class AclDataframeClusterSuite extends AclDataframeSuite with RedisClusterAclEnv 7 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/df/acl/AclDataframeStandaloneSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.df.acl 2 | 3 | import com.redislabs.provider.redis.df.AclDataframeSuite 4 | import com.redislabs.provider.redis.env.RedisStandaloneAclEnv 5 | 6 | class AclDataframeStandaloneSuite extends AclDataframeSuite with RedisStandaloneAclEnv 7 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/df/benchmark/DataframeBenchmarkSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.df.benchmark 2 | 3 | import com.redislabs.provider.redis.df.RedisDataframeSuite 4 | import com.redislabs.provider.redis.rdd.Keys 5 | import com.redislabs.provider.redis.util.Person.TableNamePrefix 6 | import com.redislabs.provider.redis.util.PipelineUtils.foreachWithPipeline 7 | import com.redislabs.provider.redis.util.TestUtils.generateTableName 8 | import com.redislabs.provider.redis.util.{Logging, Person} 9 | import com.redislabs.provider.redis.{ReadWriteConfig, RedisBenchmarks, toRedisContext} 10 | import org.apache.spark.rdd.RDD 11 | import org.apache.spark.sql.SaveMode 12 | import org.apache.spark.sql.redis._ 13 | import redis.clients.jedis.PipelineBase 14 | 15 | /** 16 | * @author The Viet Nguyen 17 | */ 18 | trait DataframeBenchmarkSuite extends RedisDataframeSuite with RedisBenchmarks with Logging { 19 | 20 | val tableName: String = generateTableName(TableNamePrefix) 21 | 22 | private val startDate = s"${System.currentTimeMillis()}" 23 | 24 | def suiteTags: String = startDate 25 | 26 | def persistentModel: String 27 | 28 | def rdd(): RDD[Person] 29 | 30 | override def afterAll(): Unit = { 31 | time(s"$suiteTags, Cleanup") { 32 | val hosts = redisConfig.hosts 33 | implicit val readWriteConfig: ReadWriteConfig = ReadWriteConfig.Default 34 | sc.fromRedisKeyPattern() 35 | .foreachPartition { p => 36 | Keys.groupKeysByNode(hosts, p) 37 | .foreach { case (n, ks) => 38 | val conn = n.connect() 39 | foreachWithPipeline(conn, ks) { (pl, k) => 40 | (pl: PipelineBase).del(k) // fix ambiguous reference to overloaded definition 41 | } 42 | conn.close() 43 | } 44 | } 45 | } 46 | super.afterAll() 47 | } 48 | 49 | test(s"$suiteTags, Write") { 50 | val df = spark.createDataFrame(rdd()) 51 | time(s"$suiteTags, Write") { 52 | df.write.format(RedisFormat) 53 | .option(SqlOptionModel, persistentModel) 54 | .option(SqlOptionTableName, tableName) 55 | .option(SqlOptionKeyColumn, "name") 56 | .mode(SaveMode.Overwrite) 57 | .save() 58 | } 59 | } 60 | 61 | test(s"$suiteTags, Read") { 62 | time(s"$suiteTags, Read") { 63 | spark.read.format(RedisFormat) 64 | .option(SqlOptionModel, persistentModel) 65 | .option(SqlOptionTableName, tableName) 66 | .option(SqlOptionNumPartitions, 8) 67 | .load() 68 | .foreach { _ => 69 | // measure read all elements 70 | } 71 | } 72 | } 73 | 74 | test(s"$suiteTags, Read all fields") { 75 | time(s"$suiteTags, Read all fields") { 76 | spark.read.format(RedisFormat) 77 | .option(SqlOptionModel, persistentModel) 78 | .option(SqlOptionTableName, tableName) 79 | .option(SqlOptionNumPartitions, 8) 80 | .load() 81 | .select("name", "age", "address", "salary") 82 | .foreach { _ => 83 | // measure read all elements 84 | } 85 | } 86 | } 87 | 88 | test(s"$suiteTags, Read 1 fields") { 89 | time(s"$suiteTags, Read 1 fields") { 90 | spark.read.format(RedisFormat) 91 | .option(SqlOptionModel, persistentModel) 92 | .option(SqlOptionTableName, tableName) 93 | .option(SqlOptionNumPartitions, 8) 94 | .load() 95 | .select("name") 96 | .foreach { _ => 97 | // measure read all elements 98 | } 99 | } 100 | } 101 | 102 | test(s"$suiteTags, Read 0 fields") { 103 | time(s"$suiteTags, Read 0 fields") { 104 | spark.read.format(RedisFormat) 105 | .option(SqlOptionModel, persistentModel) 106 | .option(SqlOptionTableName, tableName) 107 | .option(SqlOptionNumPartitions, 8) 108 | .load() 109 | .select() 110 | .foreach { _ => 111 | // measure read all elements 112 | } 113 | } 114 | } 115 | 116 | test(s"$suiteTags, Take 10") { 117 | time(s"$suiteTags, Take 10") { 118 | spark.read.format(RedisFormat) 119 | .option(SqlOptionModel, persistentModel) 120 | .option(SqlOptionTableName, tableName) 121 | .option(SqlOptionNumPartitions, 8) 122 | .load() 123 | .take(10) 124 | } 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/df/benchmark/ManyValueBenchmarkSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.df.benchmark 2 | 3 | import com.redislabs.provider.redis.env.RedisClusterEnv 4 | import com.redislabs.provider.redis.util.Person 5 | import org.apache.spark.rdd.RDD 6 | 7 | /** 8 | * @author The Viet Nguyen 9 | */ 10 | trait ManyValueBenchmarkSuite extends DataframeBenchmarkSuite with RedisClusterEnv { 11 | 12 | private def num = 1000000 13 | 14 | override def suiteTags: String = s"${super.suiteTags}, Many:$num" 15 | 16 | override def rdd(): RDD[Person] = { 17 | val partitionsNum = 8 18 | val sectionLength = num / partitionsNum 19 | spark.sparkContext 20 | .parallelize(0 until partitionsNum, partitionsNum) 21 | .mapPartitions { 22 | _ 23 | .flatMap { i => 24 | val start = i * sectionLength 25 | val end = start + sectionLength + 1 26 | Stream.range(start, end) 27 | } 28 | .map { i => 29 | Person(s"John-$i", 30, "60 Wall Street", 150.5) 30 | } 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/df/benchmark/SingleValueBenchmarkSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.df.benchmark 2 | 3 | import com.redislabs.provider.redis.env.RedisClusterEnv 4 | import com.redislabs.provider.redis.util.Person 5 | import org.apache.spark.rdd.RDD 6 | 7 | /** 8 | * @author The Viet Nguyen 9 | */ 10 | trait SingleValueBenchmarkSuite extends DataframeBenchmarkSuite with RedisClusterEnv { 11 | 12 | override def suiteTags: String = s"${super.suiteTags}, Single" 13 | 14 | override def rdd(): RDD[Person] = { 15 | spark.sparkContext.parallelize(Seq(Person(s"John", 30, "60 Wall Street", 150.5))) 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/df/benchmark/cluster/BinaryModelManyValueClusterBenchmarkSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.df.benchmark.cluster 2 | 3 | import com.redislabs.provider.redis.df.benchmark.ManyValueBenchmarkSuite 4 | import com.redislabs.provider.redis.env.RedisClusterEnv 5 | import com.redislabs.provider.redis.util.BenchmarkTest 6 | import org.apache.spark.sql 7 | 8 | /** 9 | * @author The Viet Nguyen 10 | */ 11 | @BenchmarkTest 12 | class BinaryModelManyValueClusterBenchmarkSuite extends ManyValueBenchmarkSuite 13 | with RedisClusterEnv { 14 | 15 | override def suiteTags: String = s"${super.suiteTags}, Binary" 16 | 17 | override def persistentModel: String = sql.redis.SqlOptionModelBinary 18 | } 19 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/df/benchmark/cluster/BinaryModelSingleValueClusterBenchmarkSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.df.benchmark.cluster 2 | 3 | import com.redislabs.provider.redis.df.benchmark.SingleValueBenchmarkSuite 4 | import com.redislabs.provider.redis.env.RedisClusterEnv 5 | import com.redislabs.provider.redis.util.BenchmarkTest 6 | import org.apache.spark.sql 7 | 8 | /** 9 | * @author The Viet Nguyen 10 | */ 11 | @BenchmarkTest 12 | class BinaryModelSingleValueClusterBenchmarkSuite extends SingleValueBenchmarkSuite 13 | with RedisClusterEnv { 14 | 15 | override def suiteTags: String = s"${super.suiteTags}, Binary" 16 | 17 | override def persistentModel: String = sql.redis.SqlOptionModelBinary 18 | } 19 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/df/benchmark/cluster/HashModelManyValueClusterBenchmarkSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.df.benchmark.cluster 2 | 3 | import com.redislabs.provider.redis.df.benchmark.ManyValueBenchmarkSuite 4 | import com.redislabs.provider.redis.env.RedisClusterEnv 5 | import com.redislabs.provider.redis.util.BenchmarkTest 6 | import org.apache.spark.sql.redis.SqlOptionModelHash 7 | 8 | /** 9 | * @author The Viet Nguyen 10 | */ 11 | @BenchmarkTest 12 | class HashModelManyValueClusterBenchmarkSuite extends ManyValueBenchmarkSuite 13 | with RedisClusterEnv { 14 | 15 | override def suiteTags: String = s"${super.suiteTags}, Hash" 16 | 17 | override def persistentModel: String = SqlOptionModelHash 18 | } 19 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/df/benchmark/cluster/HashModelSingleValueClusterBenchmarkSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.df.benchmark.cluster 2 | 3 | import com.redislabs.provider.redis.df.benchmark.SingleValueBenchmarkSuite 4 | import com.redislabs.provider.redis.env.RedisClusterEnv 5 | import com.redislabs.provider.redis.util.BenchmarkTest 6 | import org.apache.spark.sql.redis.SqlOptionModelHash 7 | 8 | /** 9 | * @author The Viet Nguyen 10 | */ 11 | @BenchmarkTest 12 | class HashModelSingleValueClusterBenchmarkSuite extends SingleValueBenchmarkSuite 13 | with RedisClusterEnv { 14 | 15 | override def suiteTags: String = s"${super.suiteTags}, Hash" 16 | 17 | override def persistentModel: String = SqlOptionModelHash 18 | } 19 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/df/cluster/BinaryDataframeClusterSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.df.cluster 2 | 3 | import java.nio.charset.StandardCharsets.UTF_8 4 | 5 | import com.redislabs.provider.redis.df.BinaryDataframeSuite 6 | import com.redislabs.provider.redis.env.RedisClusterEnv 7 | import org.apache.spark.sql.redis.RedisSourceRelation.dataKey 8 | import redis.clients.jedis.{HostAndPort, JedisCluster} 9 | 10 | /** 11 | * @author The Viet Nguyen 12 | */ 13 | class BinaryDataframeClusterSuite extends BinaryDataframeSuite with RedisClusterEnv { 14 | 15 | override def saveMap(tableName: String, key: String, value: Map[String, String]): Unit = { 16 | val host = redisConfig.initialHost 17 | val hostAndPort = new HostAndPort(host.host, host.port) 18 | val conn = new JedisCluster(hostAndPort) 19 | conn.set(dataKey(tableName, key).getBytes(UTF_8), serialize(value)) 20 | conn.close() 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/df/cluster/CsvDataframeClusterSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.df.cluster 2 | 3 | import com.redislabs.provider.redis.df.CsvDataframeSuite 4 | import com.redislabs.provider.redis.env.RedisClusterEnv 5 | 6 | class CsvDataframeClusterSuite extends CsvDataframeSuite with RedisClusterEnv 7 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/df/cluster/DataframeClusterSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.df.cluster 2 | 3 | import com.redislabs.provider.redis.df.DataframeSuite 4 | import com.redislabs.provider.redis.env.RedisClusterEnv 5 | 6 | class DataframeClusterSuite extends DataframeSuite with RedisClusterEnv 7 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/df/cluster/FilteredDataframeClusterSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.df.cluster 2 | 3 | import com.redislabs.provider.redis.df.{FilteredDataframeSuite, RedisDataframeSuite} 4 | import com.redislabs.provider.redis.env.RedisClusterEnv 5 | 6 | /** 7 | * @author The Viet Nguyen 8 | */ 9 | class FilteredDataframeClusterSuite extends FilteredDataframeSuite with RedisClusterEnv 10 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/df/cluster/HashDataframeClusterSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.df.cluster 2 | 3 | import com.redislabs.provider.redis.df.HashDataframeSuite 4 | import com.redislabs.provider.redis.env.RedisClusterEnv 5 | import redis.clients.jedis.{HostAndPort, JedisCluster} 6 | 7 | import scala.collection.JavaConverters._ 8 | 9 | /** 10 | * @author The Viet Nguyen 11 | */ 12 | class HashDataframeClusterSuite extends HashDataframeSuite with RedisClusterEnv { 13 | 14 | override def saveMap(tableName: String, key: String, value: Map[String, String]): Unit = { 15 | val host = redisConfig.initialHost 16 | val hostAndPort = new HostAndPort(host.host, host.port) 17 | val conn = new JedisCluster(hostAndPort) 18 | conn.hmset(tableName + ":" + key, value.asJava) 19 | conn.close() 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/df/cluster/SparkSqlClusterSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.df.cluster 2 | 3 | import com.redislabs.provider.redis.df.SparkSqlSuite 4 | import com.redislabs.provider.redis.env.RedisClusterEnv 5 | 6 | /** 7 | * @author The Viet Nguyen 8 | */ 9 | class SparkSqlClusterSuite extends SparkSqlSuite with RedisClusterEnv 10 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/df/standalone/BinaryDataframeStandaloneSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.df.standalone 2 | 3 | import java.nio.charset.StandardCharsets.UTF_8 4 | 5 | import com.redislabs.provider.redis.df.BinaryDataframeSuite 6 | import com.redislabs.provider.redis.env.RedisStandaloneEnv 7 | import com.redislabs.provider.redis.util.ConnectionUtils.withConnection 8 | import org.apache.spark.sql.redis.RedisSourceRelation.dataKey 9 | 10 | /** 11 | * @author The Viet Nguyen 12 | */ 13 | class BinaryDataframeStandaloneSuite extends BinaryDataframeSuite with RedisStandaloneEnv { 14 | 15 | override def saveMap(tableName: String, key: String, value: Map[String, String]): Unit = { 16 | val host = redisConfig.initialHost 17 | withConnection(host.connect()) { conn => 18 | conn.set(dataKey(tableName, key).getBytes(UTF_8), serialize(value)) 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/df/standalone/CsvDataframeStandaloneSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.df.standalone 2 | 3 | import com.redislabs.provider.redis.df.CsvDataframeSuite 4 | import com.redislabs.provider.redis.env.RedisStandaloneEnv 5 | 6 | class CsvDataframeStandaloneSuite extends CsvDataframeSuite with RedisStandaloneEnv 7 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/df/standalone/DataframeStandaloneSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.df.standalone 2 | 3 | import com.redislabs.provider.redis.df.DataframeSuite 4 | import com.redislabs.provider.redis.env.RedisStandaloneEnv 5 | 6 | class DataframeStandaloneSuite extends DataframeSuite with RedisStandaloneEnv 7 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/df/standalone/FilteredDataframeStandaloneSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.df.standalone 2 | 3 | import com.redislabs.provider.redis.df.{FilteredDataframeSuite, RedisDataframeSuite} 4 | import com.redislabs.provider.redis.env.RedisStandaloneEnv 5 | 6 | /** 7 | * @author The Viet Nguyen 8 | */ 9 | class FilteredDataframeStandaloneSuite extends FilteredDataframeSuite with RedisStandaloneEnv 10 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/df/standalone/HashDataframeStandaloneSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.df.standalone 2 | 3 | import com.redislabs.provider.redis.df.HashDataframeSuite 4 | import com.redislabs.provider.redis.env.RedisStandaloneEnv 5 | import com.redislabs.provider.redis.util.ConnectionUtils.withConnection 6 | 7 | import scala.collection.JavaConverters._ 8 | 9 | /** 10 | * @author The Viet Nguyen 11 | */ 12 | class HashDataframeStandaloneSuite extends HashDataframeSuite with RedisStandaloneEnv { 13 | 14 | override def saveMap(tableName: String, key: String, value: Map[String, String]): Unit = { 15 | val host = redisConfig.initialHost 16 | withConnection(host.connect()) { conn => 17 | conn.hmset(tableName + ":" + key, value.asJava) 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/df/standalone/SparkSqlStandaloneSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.df.standalone 2 | 3 | import com.redislabs.provider.redis.df.SparkSqlSuite 4 | import com.redislabs.provider.redis.env.RedisStandaloneEnv 5 | 6 | /** 7 | * @author The Viet Nguyen 8 | */ 9 | class SparkSqlStandaloneSuite extends SparkSqlSuite with RedisStandaloneEnv 10 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/env/Env.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.env 2 | 3 | import com.redislabs.provider.redis.RedisConfig 4 | import org.apache.spark.sql.SparkSession 5 | import org.apache.spark.streaming.StreamingContext 6 | import org.apache.spark.{SparkConf, SparkContext} 7 | 8 | trait Env { 9 | 10 | val conf: SparkConf 11 | var spark: SparkSession = _ 12 | var sc: SparkContext = _ 13 | var ssc: StreamingContext = _ 14 | 15 | val redisHost = "127.0.0.1" 16 | val redisPort = 6379 17 | val redisAuth = "passwd" // password for 'default' user (AUTH ) 18 | 19 | // user credentials 20 | val user = "alice" 21 | val userPassword = "p1pp0" 22 | 23 | val redisConfig: RedisConfig 24 | } 25 | 26 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/env/RedisClusterAclEnv.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.env 2 | 3 | import com.redislabs.provider.redis.{RedisConfig, RedisEndpoint} 4 | import org.apache.spark.SparkConf 5 | 6 | /** 7 | * Cluster with user/password authentication 8 | */ 9 | trait RedisClusterAclEnv extends Env { 10 | 11 | override val redisPort = 7379 12 | 13 | override val conf: SparkConf = new SparkConf() 14 | .setMaster("local[*]").setAppName(getClass.getName) 15 | .set("spark.redis.host", redisHost) 16 | .set("spark.redis.port", s"$redisPort") 17 | .set("spark.redis.user", user) 18 | .set("spark.redis.auth", userPassword) 19 | .set("spark.streaming.stopGracefullyOnShutdown", "true") 20 | .set("spark.driver.bindAddress", "127.0.0.1") 21 | 22 | override val redisConfig: RedisConfig = 23 | new RedisConfig(RedisEndpoint( 24 | host = redisHost, 25 | port = redisPort, 26 | user = user, 27 | auth = userPassword)) 28 | } 29 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/env/RedisClusterEnv.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.env 2 | 3 | import com.redislabs.provider.redis.{RedisConfig, RedisEndpoint} 4 | import org.apache.spark.SparkConf 5 | 6 | /** 7 | * @author The Viet Nguyen 8 | */ 9 | trait RedisClusterEnv extends Env { 10 | 11 | override val redisPort = 7379 12 | 13 | override val conf: SparkConf = new SparkConf() 14 | .setMaster("local[*]").setAppName(getClass.getName) 15 | .set("spark.redis.host", redisHost) 16 | .set("spark.redis.port", s"$redisPort") 17 | .set("spark.streaming.stopGracefullyOnShutdown", "true") 18 | .set("spark.driver.bindAddress", "127.0.0.1") 19 | 20 | override val redisConfig: RedisConfig = new RedisConfig(RedisEndpoint(redisHost, redisPort)) 21 | } 22 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/env/RedisStandaloneAclEnv.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.env 2 | 3 | import com.redislabs.provider.redis.{RedisConfig, RedisEndpoint} 4 | import org.apache.spark.SparkConf 5 | 6 | /** 7 | * Standalone with user/password authentication 8 | */ 9 | trait RedisStandaloneAclEnv extends Env { 10 | 11 | override val conf: SparkConf = new SparkConf() 12 | .setMaster("local[*]").setAppName(getClass.getName) 13 | .set("spark.redis.host", redisHost) 14 | .set("spark.redis.port", s"$redisPort") 15 | .set("spark.redis.user", user) 16 | .set("spark.redis.auth", userPassword) 17 | .set("spark.streaming.stopGracefullyOnShutdown", "true") 18 | .set("spark.driver.bindAddress", "127.0.0.1") 19 | 20 | override val redisConfig: RedisConfig = 21 | new RedisConfig(RedisEndpoint( 22 | host = redisHost, 23 | port = redisPort, 24 | user = user, 25 | auth = userPassword)) 26 | } 27 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/env/RedisStandaloneEnv.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.env 2 | 3 | import com.redislabs.provider.redis.{RedisConfig, RedisEndpoint} 4 | import org.apache.spark.SparkConf 5 | 6 | /** 7 | * @author The Viet Nguyen 8 | */ 9 | trait RedisStandaloneEnv extends Env { 10 | 11 | override val conf: SparkConf = new SparkConf() 12 | .setMaster("local[*]").setAppName(getClass.getName) 13 | .set("spark.redis.host", redisHost) 14 | .set("spark.redis.port", s"$redisPort") 15 | .set("spark.redis.auth", redisAuth) 16 | .set("spark.streaming.stopGracefullyOnShutdown", "true") 17 | .set("spark.sql.streaming.forceDeleteTempCheckpointLocation", "true") 18 | .set("spark.driver.bindAddress", "127.0.0.1") 19 | 20 | override val redisConfig: RedisConfig = 21 | new RedisConfig(RedisEndpoint( 22 | host = redisHost, 23 | port = redisPort, 24 | auth = redisAuth)) 25 | } 26 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/env/RedisStandaloneSSLEnv.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.env 2 | 3 | import com.redislabs.provider.redis.{RedisConfig, RedisEndpoint} 4 | import org.apache.spark.SparkConf 5 | 6 | trait RedisStandaloneSSLEnv extends Env { 7 | 8 | override val redisPort = 6380 9 | 10 | override val conf: SparkConf = new SparkConf() 11 | .setMaster("local[*]").setAppName(getClass.getName) 12 | .set("spark.redis.host", redisHost) 13 | .set("spark.redis.port", s"$redisPort") 14 | .set("spark.redis.auth", redisAuth) 15 | .set("spark.redis.ssl", "true") 16 | .set("spark.streaming.stopGracefullyOnShutdown", "true") 17 | .set("spark.driver.bindAddress", "127.0.0.1") 18 | 19 | override val redisConfig: RedisConfig = 20 | new RedisConfig(RedisEndpoint(redisHost, redisPort, auth = redisAuth, ssl = true)) 21 | } 22 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/rdd/RedisKeysSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.rdd 2 | 3 | import com.redislabs.provider.redis.ReadWriteConfig 4 | import org.scalatest.Matchers 5 | import redis.clients.jedis.util.JedisClusterCRC16 6 | 7 | import scala.collection.JavaConverters._ 8 | 9 | trait RedisKeysSuite extends RedisRddSuite with Keys with Matchers { 10 | 11 | implicit val readWriteConfig: ReadWriteConfig = ReadWriteConfig.Default 12 | 13 | test("getKeys") { 14 | val returnedKeys = getKeys(redisConfig.hosts, 0, 1024, "*") 15 | .toArray.sorted 16 | 17 | val targetKeys = (sc.parallelize(content.split("\\W+")).collect :+ 18 | "all:words:cnt:sortedset" :+ 19 | "all:words:cnt:hash" :+ 20 | "all:words:list" :+ 21 | "all:words:set").filter(x => { 22 | val slot = JedisClusterCRC16.getSlot(x) 23 | !x.isEmpty && slot >= 0 && slot <= 1024 24 | }).distinct.sorted 25 | 26 | returnedKeys should be(targetKeys) 27 | } 28 | 29 | test("groupKeysByNode") { 30 | val allkeys = getKeys(redisConfig.hosts, 0, 16383, "*") 31 | val nodeKeysPairs = groupKeysByNode(redisConfig.hosts, allkeys) 32 | val returnedCnt = nodeKeysPairs.map { x => 33 | filterKeysByType(x._1.connect(), x._2, "string").length 34 | } 35 | .sum 36 | val targetCnt = sc.parallelize(content.split("\\W+").filter(!_.isEmpty)).distinct.count 37 | assert(returnedCnt == targetCnt) 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/rdd/RedisRddExtraSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.rdd 2 | 3 | import com.redislabs.provider.redis.util.ConnectionUtils.withConnection 4 | import org.scalatest.Matchers 5 | import com.redislabs.provider.redis._ 6 | import com.redislabs.provider.redis.util.TestUtils 7 | import redis.clients.jedis.exceptions.JedisConnectionException 8 | 9 | import scala.collection.JavaConverters._ 10 | 11 | /** 12 | * More RDD tests 13 | */ 14 | trait RedisRddExtraSuite extends SparkRedisSuite with Keys with Matchers { 15 | 16 | implicit val redisConfig: RedisConfig 17 | 18 | test("toRedisByteLISTs") { 19 | val list1 = Seq("a1", "b1", "c1") 20 | val list2 = Seq("a2", "b2", "c2") 21 | val keyValues = Seq( 22 | ("binary-list1", list1), 23 | ("binary-list2", list2) 24 | ) 25 | val keyValueBytes = keyValues.map { case (k, list) => (k.getBytes, list.map(_.getBytes())) } 26 | val rdd = sc.parallelize(keyValueBytes) 27 | sc.toRedisByteLISTs(rdd) 28 | 29 | verifyList("binary-list1", list1) 30 | verifyList("binary-list2", list2) 31 | } 32 | 33 | test("toRedisLISTs") { 34 | val list1 = Seq("a1", "b1", "c1") 35 | val list2 = Seq("a2", "b2", "c2") 36 | val keyValues = Seq( 37 | ("list1", list1), 38 | ("list2", list2) 39 | ) 40 | val rdd = sc.parallelize(keyValues) 41 | sc.toRedisLISTs(rdd) 42 | 43 | verifyList("list1", list1) 44 | verifyList("list2", list2) 45 | } 46 | 47 | test("toRedisHASHes") { 48 | val map1 = Map("k1" -> "v1", "k2" -> "v2") 49 | val map2 = Map("k3" -> "v3", "k4" -> "v4") 50 | val hashes = Seq( 51 | ("hash1", map1), 52 | ("hash2", map2) 53 | ) 54 | val rdd = sc.parallelize(hashes) 55 | sc.toRedisHASHes(rdd) 56 | 57 | verifyHash("hash1", map1) 58 | verifyHash("hash2", map2) 59 | } 60 | 61 | test("toRedisByteHASHes") { 62 | val map1 = Map("k1" -> "v1", "k2" -> "v2") 63 | val map2 = Map("k3" -> "v3", "k4" -> "v4") 64 | val hashes = Seq( 65 | ("hash1", map1), 66 | ("hash2", map2) 67 | ) 68 | val hashesBytes = hashes.map { case (k, hash) => (k.getBytes, hash.map { case (mapKey, mapVal) => (mapKey.getBytes, mapVal.getBytes) }) } 69 | val rdd = sc.parallelize(hashesBytes) 70 | sc.toRedisByteHASHes(rdd) 71 | 72 | verifyHash("hash1", map1) 73 | verifyHash("hash2", map2) 74 | } 75 | 76 | test("connection fails with incorrect user/pass") { 77 | assertThrows[JedisConnectionException] { 78 | new RedisConfig(RedisEndpoint( 79 | host = redisHost, 80 | port = redisPort, 81 | user = user, 82 | auth = "wrong_password")) 83 | } 84 | } 85 | 86 | test("connection with correct user/pass") { 87 | val userConfig = new RedisConfig(RedisEndpoint( 88 | host = redisHost, 89 | port = redisPort, 90 | user = user, 91 | auth = userPassword)) 92 | 93 | val someKey = TestUtils.generateRandomKey() 94 | val jedis = userConfig.connectionForKey(someKey) 95 | jedis.set(someKey, "123") 96 | jedis.get(someKey) should be("123") 97 | 98 | // test RDD operation 99 | sc.fromRedisKeyPattern(someKey)(redisConfig = userConfig) 100 | .collect()(0) should be(someKey) 101 | } 102 | 103 | def verifyList(list: String, vals: Seq[String]): Unit = { 104 | withConnection(redisConfig.getHost(list).endpoint.connect()) { conn => 105 | conn.lrange(list, 0, vals.size).asScala should be(vals.toList) 106 | } 107 | } 108 | 109 | def verifyHash(hash: String, vals: Map[String, String]): Unit = { 110 | withConnection(redisConfig.getHost(hash).endpoint.connect()) { conn => 111 | conn.hgetAll(hash).asScala should be(vals) 112 | } 113 | } 114 | 115 | } 116 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/rdd/RedisRddSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.rdd 2 | 3 | import com.redislabs.provider.redis.util.ConnectionUtils.withConnection 4 | import com.redislabs.provider.redis.{RedisConfig, SparkRedisSuite, toRedisContext} 5 | import org.scalatest.Matchers 6 | import scala.collection.JavaConverters._ 7 | 8 | import scala.io.Source.fromInputStream 9 | 10 | /** 11 | * @author The Viet Nguyen 12 | */ 13 | trait RedisRddSuite extends SparkRedisSuite with Keys with Matchers { 14 | 15 | implicit val redisConfig: RedisConfig 16 | 17 | val content: String = fromInputStream(getClass.getClassLoader.getResourceAsStream("blog")) 18 | .getLines.toArray.mkString("\n") 19 | 20 | val contentWords: Array[String] = content.split("\\W+").filter(_.nonEmpty) 21 | val zSetKey: String = "all:words:cnt:sortedset" 22 | val hashKey: String = "all:words:cnt:hash" 23 | val listKey: String = "all:words:list" 24 | val setKey: String = "all:words:set" 25 | val missingRedisKey: String = "missingRedisKey" 26 | 27 | override def beforeAll() { 28 | super.beforeAll() 29 | val wcnts = sc.parallelize(contentWords) 30 | .map { w => 31 | (w, 1) 32 | } 33 | .reduceByKey { 34 | _ + _ 35 | } 36 | .map { x => 37 | (x._1, x._2.toString) 38 | } 39 | val wds = sc.parallelize(contentWords) 40 | // Flush all the hosts 41 | redisConfig.hosts.foreach(node => { 42 | val conn = node.connect() 43 | conn.flushAll 44 | conn.close() 45 | }) 46 | sc.toRedisKV(wcnts) 47 | sc.toRedisZSET(wcnts, zSetKey) 48 | sc.toRedisHASH(wcnts, hashKey) 49 | sc.toRedisLIST(wds, listKey) 50 | sc.toRedisSET(wds, setKey) 51 | } 52 | 53 | test("RedisKVRDD") { 54 | val redisKVRDD = sc.fromRedisKV("*") 55 | val kvContents = redisKVRDD.sortByKey().collect 56 | val wrongTypeKeysRes = List(hashKey, zSetKey, listKey, setKey).map(sc.fromRedisKV(_).collect) 57 | val missingKeyRes = sc.fromRedisKV(missingRedisKey).collect() 58 | val wcnts = contentWords.map((_, 1)).groupBy(_._1). 59 | map(x => (x._1, x._2.map(_._2).sum.toString)).toArray.sortBy(_._1) 60 | kvContents shouldBe wcnts 61 | all(wrongTypeKeysRes) should have size 0 62 | missingKeyRes should have size 0 63 | } 64 | 65 | test("RedisZsetRDD") { 66 | val redisZSetWithScore = sc.fromRedisZSetWithScore(zSetKey) 67 | val zsetWithScore = redisZSetWithScore.sortByKey().collect 68 | 69 | val redisZSet = sc.fromRedisZSet("all:words:cnt:sortedset") 70 | val zset = redisZSet.collect.sorted 71 | 72 | val redisZRangeWithScore = sc.fromRedisZRangeWithScore(zSetKey, 0, 15) 73 | val zrangeWithScore = redisZRangeWithScore.collect.sortBy(x => (x._2, x._1)) 74 | 75 | val redisZRange = sc.fromRedisZRange(zSetKey, 0, 15) 76 | val zrange = redisZRange.collect.sorted 77 | 78 | val redisZRangeByScoreWithScore = 79 | sc.fromRedisZRangeByScoreWithScore(zSetKey, 3, 9) 80 | val zrangeByScoreWithScore = redisZRangeByScoreWithScore.collect.sortBy(x => (x._2, x._1)) 81 | 82 | val redisZRangeByScore = sc.fromRedisZRangeByScore(zSetKey, 3, 9) 83 | val zrangeByScore = redisZRangeByScore.collect.sorted 84 | 85 | val wrongTypeKeysRes = List(hashKey, setKey, listKey, contentWords(0)).map(sc.fromRedisZSetWithScore(_).collect) 86 | val missingKeyRes = sc.fromRedisZSetWithScore(missingRedisKey).collect() 87 | 88 | val wcnts = contentWords.map((_, 1)).groupBy(_._1). 89 | map(x => (x._1, x._2.map(_._2).sum.toDouble)) 90 | 91 | zsetWithScore should be(wcnts.toArray.sortBy(_._1)) 92 | zset should be(wcnts.keys.toArray.sorted) 93 | zrangeWithScore should be(wcnts.toArray.sortBy(x => (x._2, x._1)).take(16)) 94 | zrange should be(wcnts.toArray.sortBy(x => (x._2, x._1)).take(16).map(_._1)) 95 | zrangeByScoreWithScore should be(wcnts.toArray.filter(x => x._2 >= 3 && x._2 <= 9) 96 | .sortBy(x => (x._2, x._1))) 97 | zrangeByScore should be(wcnts.toArray.filter(x => x._2 >= 3 && x._2 <= 9).map(_._1).sorted) 98 | all(wrongTypeKeysRes) should have length 0 99 | missingKeyRes should have length 0 100 | } 101 | 102 | test("RedisHashRDD") { 103 | val redisHashRDD = sc.fromRedisHash(hashKey) 104 | val hashContents = redisHashRDD.sortByKey().collect 105 | val wcnts = contentWords.map((_, 1)).groupBy(_._1). 106 | map(x => (x._1, x._2.map(_._2).sum.toString)).toArray.sortBy(_._1) 107 | val wrongTypeKeysRes = List(zSetKey, setKey, listKey, contentWords(0)).map(sc.fromRedisHash(_).collect) 108 | val missingKeyRes = sc.fromRedisHash(missingRedisKey).collect() 109 | 110 | hashContents should be(wcnts) 111 | all(wrongTypeKeysRes) should have length 0 112 | missingKeyRes should have length 0 113 | } 114 | 115 | test("RedisListRDD") { 116 | val redisListRDD = sc.fromRedisList(listKey) 117 | val listContents = redisListRDD.sortBy(x => x).collect 118 | val ws = contentWords.sorted 119 | val wrongTypeKeysRes = List(zSetKey, setKey, hashKey, contentWords(0)).map(sc.fromRedisList(_).collect) 120 | val missingKeyRes = sc.fromRedisList(missingRedisKey).collect() 121 | 122 | listContents should be(ws) 123 | all(wrongTypeKeysRes) should have length 0 124 | missingKeyRes should have length 0 125 | } 126 | 127 | test("RedisSetRDD") { 128 | val redisSetRDD = sc.fromRedisSet(setKey) 129 | val setContents = redisSetRDD.sortBy(x => x).collect 130 | val ws = content.split("\\W+").filter(!_.isEmpty).distinct.sorted 131 | val wrongTypeKeysRes = List(zSetKey, listKey, hashKey, contentWords(0)).map(sc.fromRedisSet(_).collect) 132 | val missingKeyRes = sc.fromRedisSet(missingRedisKey).collect() 133 | 134 | setContents should be(ws) 135 | all(wrongTypeKeysRes) should have length 0 136 | missingKeyRes should have length 0 137 | } 138 | 139 | test("Expire") { 140 | val expireTime = 1 141 | val prefix = s"#expire in $expireTime#:" 142 | val wcnts = sc.parallelize(contentWords).map((_, 1)). 143 | reduceByKey(_ + _).map(x => (prefix + x._1, x._2.toString)) 144 | val wds = sc.parallelize(contentWords) 145 | sc.toRedisKV(wcnts, expireTime) 146 | sc.toRedisZSET(wcnts, prefix + zSetKey, expireTime) 147 | sc.toRedisHASH(wcnts, prefix + hashKey, expireTime) 148 | sc.toRedisLIST(wds, prefix + listKey, expireTime) 149 | sc.toRedisSET(wds, prefix + setKey, expireTime) 150 | Thread.sleep(expireTime * 1000 + 1) 151 | sc.fromRedisKeyPattern(prefix + "*").count should be(0) 152 | } 153 | 154 | } 155 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/rdd/acl/RedisRDDClusterAclSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.rdd.acl 2 | 3 | import com.redislabs.provider.redis.env.RedisClusterAclEnv 4 | import com.redislabs.provider.redis.rdd.RedisRddSuite 5 | 6 | class RedisRDDClusterAclSuite extends RedisRddSuite with RedisClusterAclEnv 7 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/rdd/acl/RedisRDDStandaloneAclSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.rdd.acl 2 | 3 | import com.redislabs.provider.redis.env.RedisStandaloneAclEnv 4 | import com.redislabs.provider.redis.rdd.RedisRddSuite 5 | 6 | class RedisRDDStandaloneAclSuite extends RedisRddSuite with RedisStandaloneAclEnv 7 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/rdd/cluster/RedisKeysClusterSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.rdd.cluster 2 | 3 | import com.redislabs.provider.redis.env.RedisClusterEnv 4 | import com.redislabs.provider.redis.rdd.RedisKeysSuite 5 | 6 | class RedisKeysClusterSuite extends RedisKeysSuite with RedisClusterEnv 7 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/rdd/cluster/RedisRDDClusterSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.rdd.cluster 2 | 3 | import com.redislabs.provider.redis.env.RedisClusterEnv 4 | import com.redislabs.provider.redis.rdd.RedisRddSuite 5 | 6 | class RedisRDDClusterSuite extends RedisRddSuite with RedisClusterEnv 7 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/rdd/cluster/RedisRddExtraClusterSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.rdd.cluster 2 | 3 | import com.redislabs.provider.redis.env.RedisClusterEnv 4 | import com.redislabs.provider.redis.rdd.RedisRddExtraSuite 5 | 6 | class RedisRddExtraClusterSuite extends RedisRddExtraSuite with RedisClusterEnv 7 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/rdd/standalone/RedisKeysStandaloneSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.rdd.standalone 2 | 3 | import com.redislabs.provider.redis.env.RedisStandaloneEnv 4 | import com.redislabs.provider.redis.rdd.RedisKeysSuite 5 | 6 | class RedisKeysStandaloneSuite extends RedisKeysSuite with RedisStandaloneEnv 7 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/rdd/standalone/RedisRDDStandaloneSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.rdd.standalone 2 | 3 | import com.redislabs.provider.redis.env.RedisStandaloneEnv 4 | import com.redislabs.provider.redis.rdd.RedisRddSuite 5 | 6 | class RedisRDDStandaloneSuite extends RedisRddSuite with RedisStandaloneEnv 7 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/rdd/standalone/RedisRddExtraStandaloneSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.rdd.standalone 2 | 3 | import com.redislabs.provider.redis.env.RedisStandaloneEnv 4 | import com.redislabs.provider.redis.rdd.RedisRddExtraSuite 5 | 6 | class RedisRddExtraStandaloneSuite extends RedisRddExtraSuite with RedisStandaloneEnv 7 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/stream/RedisXStreamSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.stream 2 | 3 | import com.redislabs.provider.redis.streaming.{ConsumerConfig, Earliest} 4 | import com.redislabs.provider.redis.util.ConnectionUtils.withConnection 5 | import com.redislabs.provider.redis.util.TestUtils 6 | import com.redislabs.provider.redis.SparkStreamingRedisSuite 7 | import com.redislabs.provider.redis.streaming._ 8 | import org.apache.spark.storage.StorageLevel 9 | import org.scalatest.Matchers 10 | import org.scalatest.concurrent.Eventually._ 11 | import org.scalatest.time.{Millis, Span} 12 | import redis.clients.jedis.StreamEntryID 13 | 14 | import scala.collection.JavaConversions._ 15 | 16 | // scalastyle:off multiple.string.literals 17 | trait RedisXStreamSuite extends SparkStreamingRedisSuite with Matchers { 18 | 19 | // timeout for eventually function 20 | implicit val patienceConfig = PatienceConfig(timeout = scaled(Span(5000, Millis))) 21 | 22 | test("createRedisXStream, 1 stream, 1 consumer") { 23 | val streamKey = TestUtils.generateRandomKey() 24 | 25 | // the data can be written to the stream earlier than we start receiver, so set offset to Earliest 26 | val stream = ssc.createRedisXStream(Seq(ConsumerConfig(streamKey, "g1", "c1", Earliest)), StorageLevel.MEMORY_ONLY) 27 | 28 | val _redisConfig = redisConfig // to make closure serializable 29 | 30 | // iterate over items and save to redis list 31 | // repartition to 1 to avoid concurrent write issues 32 | stream.repartition(1).foreachRDD { rdd => 33 | rdd.foreachPartition { partition => 34 | for (item <- partition) { 35 | val listKey = s"${item.streamKey}:list" 36 | withConnection(_redisConfig.connectionForKey(listKey)) { conn => 37 | conn.rpush(listKey, s"${item.id.v1}-${item.id.v2} " + item.fields.mkString(" ")) 38 | } 39 | } 40 | } 41 | } 42 | 43 | // write to stream 44 | withConnection(redisConfig.connectionForKey(streamKey)) { conn => 45 | conn.xadd(streamKey, new StreamEntryID(1, 0), Map("a" -> "1", "z" -> "4")) 46 | conn.xadd(streamKey, new StreamEntryID(1, 1), Map("b" -> "2")) 47 | conn.xadd(streamKey, new StreamEntryID(2, 0), Map("c" -> "3")) 48 | } 49 | 50 | ssc.start() 51 | 52 | // eventually there should be items in the list 53 | val listKey = s"$streamKey:list" 54 | withConnection(redisConfig.connectionForKey(listKey)) { conn => 55 | eventually { 56 | conn.llen(listKey) shouldBe 3 57 | conn.lpop(listKey) should be("1-0 a -> 1 z -> 4") 58 | conn.lpop(listKey) should be("1-1 b -> 2") 59 | conn.lpop(listKey) should be("2-0 c -> 3") 60 | } 61 | } 62 | } 63 | 64 | test("createRedisXStream, 1 stream, 2 consumers") { 65 | val streamKey = TestUtils.generateRandomKey() 66 | 67 | // the data can be written to the stream earlier than we start receiver, so set offset to Earliest 68 | val stream = ssc.createRedisXStream(Seq( 69 | ConsumerConfig(streamKey, "g1", "c1", Earliest, batchSize = 1), 70 | ConsumerConfig(streamKey, "g1", "c2", Earliest, batchSize = 1) 71 | ), StorageLevel.MEMORY_ONLY) 72 | 73 | val _redisConfig = redisConfig // to make closure serializable 74 | 75 | // iterate over items and save to redis list 76 | // repartition to 1 to avoid concurrent write issues 77 | stream.repartition(1).foreachRDD { rdd => 78 | rdd.foreachPartition { partition => 79 | for (item <- partition) { 80 | val listKey = s"${item.streamKey}:list" 81 | withConnection(_redisConfig.connectionForKey(listKey)) { conn => 82 | conn.rpush(listKey, s"${item.id.v1}-${item.id.v2} " + item.fields.mkString(" ")) 83 | } 84 | } 85 | } 86 | } 87 | 88 | // write to stream 89 | withConnection(redisConfig.connectionForKey(streamKey)) { conn => 90 | conn.xadd(streamKey, new StreamEntryID(1, 0), Map("a" -> "1", "z" -> "4")) 91 | conn.xadd(streamKey, new StreamEntryID(1, 1), Map("b" -> "2")) 92 | conn.xadd(streamKey, new StreamEntryID(2, 0), Map("c" -> "3")) 93 | } 94 | 95 | ssc.start() 96 | 97 | // eventually there should be items in the list, the ordering is not deterministic 98 | val listKey = s"$streamKey:list" 99 | withConnection(redisConfig.connectionForKey(listKey)) { conn => 100 | eventually { 101 | conn.llen(listKey) shouldBe 3 102 | (1 to 3).map(_ => conn.lpop(listKey)).toSet shouldBe Set( 103 | "1-0 a -> 1 z -> 4", 104 | "1-1 b -> 2", 105 | "2-0 c -> 3" 106 | ) 107 | } 108 | } 109 | } 110 | 111 | test("createRedisXStream, 2 streams, 2 consumers") { 112 | val stream1Key = TestUtils.generateRandomKey() 113 | val stream2Key = TestUtils.generateRandomKey() 114 | 115 | logInfo("stream1Key " + stream1Key) 116 | logInfo("stream2Key " + stream2Key) 117 | 118 | // the data can be written to the stream earlier than we start receiver, so set offset to Earliest 119 | val stream = ssc.createRedisXStream(Seq( 120 | ConsumerConfig(stream1Key, "g1", "c1", Earliest, batchSize = 1), 121 | ConsumerConfig(stream2Key, "g1", "c2", Earliest, batchSize = 1) 122 | ), StorageLevel.MEMORY_ONLY) 123 | 124 | val _redisConfig = redisConfig // to make closure serializable 125 | 126 | // iterate over items and save to redis list 127 | // repartition to 1 to avoid concurrent write issues 128 | stream.repartition(1).foreachRDD { rdd => 129 | rdd.foreachPartition { partition => 130 | for (item <- partition) { 131 | val listKey = s"${item.streamKey}:list" 132 | withConnection(_redisConfig.connectionForKey(listKey)) { conn => 133 | conn.rpush(listKey, s"${item.id.v1}-${item.id.v2} " + item.fields.mkString(" ")) 134 | } 135 | } 136 | } 137 | } 138 | 139 | // write to stream 140 | withConnection(redisConfig.connectionForKey(stream1Key)) { conn => 141 | conn.xadd(stream1Key, new StreamEntryID(1, 0), Map("a" -> "1", "z" -> "4")) 142 | } 143 | withConnection(redisConfig.connectionForKey(stream2Key)) { conn => 144 | conn.xadd(stream2Key, new StreamEntryID(1, 1), Map("b" -> "2")) 145 | conn.xadd(stream2Key, new StreamEntryID(2, 0), Map("c" -> "3")) 146 | } 147 | 148 | ssc.start() 149 | 150 | // eventually there should be items in the list 151 | val list1Key = s"$stream1Key:list" 152 | withConnection(redisConfig.connectionForKey(list1Key)) { conn => 153 | eventually { 154 | conn.llen(list1Key) shouldBe 1 155 | conn.lpop(list1Key) should be("1-0 a -> 1 z -> 4") 156 | } 157 | } 158 | 159 | val list2Key = s"$stream2Key:list" 160 | withConnection(redisConfig.connectionForKey(list2Key)) { conn => 161 | eventually { 162 | conn.llen(list2Key) shouldBe 2 163 | conn.lpop(list2Key) should be("1-1 b -> 2") 164 | conn.lpop(list2Key) should be("2-0 c -> 3") 165 | } 166 | } 167 | 168 | } 169 | 170 | } 171 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/stream/cluster/RedisXStreamClusterSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.stream.cluster 2 | 3 | import com.redislabs.provider.redis.env.RedisClusterEnv 4 | import com.redislabs.provider.redis.stream.RedisXStreamSuite 5 | 6 | class RedisXStreamClusterSuite extends RedisXStreamSuite with RedisClusterEnv 7 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/stream/standalone/RedisXStreamStandaloneSuite.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.stream.standalone 2 | 3 | import com.redislabs.provider.redis.env.RedisStandaloneEnv 4 | import com.redislabs.provider.redis.stream.RedisXStreamSuite 5 | 6 | class RedisXStreamStandaloneSuite extends RedisXStreamSuite with RedisStandaloneEnv 7 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/util/BenchmarkTest.java: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.util; 2 | 3 | import java.lang.annotation.ElementType; 4 | import java.lang.annotation.Retention; 5 | import java.lang.annotation.RetentionPolicy; 6 | import java.lang.annotation.Target; 7 | 8 | /** 9 | * @author The Viet Nguyen 10 | */ 11 | @org.scalatest.TagAnnotation 12 | @Retention(RetentionPolicy.RUNTIME) 13 | @Target({ElementType.METHOD, ElementType.TYPE}) 14 | public @interface BenchmarkTest { 15 | } 16 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/util/CollectionUtilsTest.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.util 2 | 3 | import com.redislabs.provider.redis.util.CollectionUtils.RichCollection 4 | import org.scalatest.{FunSuite, Matchers} 5 | 6 | /** 7 | * @author The Viet Nguyen 8 | */ 9 | class CollectionUtilsTest extends FunSuite with Matchers { 10 | 11 | test("distinctBy") { 12 | val persons = Seq(Person("John", 30, "60 Wall Street", 150.5), 13 | Person("John", 30, "18 Main Street", 150.5), Person("Peter", 35, "110 Wall Street", 200.3)) 14 | val distinctPersons = persons.distinctBy(_.name) 15 | distinctPersons shouldBe Seq(Person("John", 30, "60 Wall Street", 150.5), 16 | Person("Peter", 35, "110 Wall Street", 200.3)) 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/util/ConnectionSSLUtilsTest.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.util 2 | 3 | import com.redislabs.provider.redis.env.RedisStandaloneSSLEnv 4 | import com.redislabs.provider.redis.util.ConnectionUtils.{JedisExt, XINFO} 5 | import org.scalatest.{FunSuite, Matchers} 6 | import redis.clients.jedis.StreamEntryID 7 | 8 | import scala.collection.JavaConverters._ 9 | 10 | /** 11 | * @author The Viet Nguyen 12 | */ 13 | class ConnectionSSLUtilsTest extends FunSuite with Matchers with RedisStandaloneSSLEnv { 14 | 15 | test("xinfo") { 16 | val streamKey = TestUtils.generateRandomKey() 17 | val conn = redisConfig.connectionForKey(streamKey) 18 | val data = Map("key" -> "value").asJava 19 | val entryId = conn.xadd(streamKey, new StreamEntryID(0, 1), data) 20 | val info = conn.xinfo(XINFO.SubCommandStream, streamKey) 21 | info.get(XINFO.LastGeneratedId) shouldBe Some(entryId.toString) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/util/ConnectionUtilsTest.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.util 2 | 3 | import com.redislabs.provider.redis.env.RedisStandaloneEnv 4 | import com.redislabs.provider.redis.util.ConnectionUtils.{JedisExt, XINFO} 5 | import org.scalatest.{FunSuite, Matchers} 6 | import redis.clients.jedis.StreamEntryID 7 | 8 | import scala.collection.JavaConverters._ 9 | 10 | /** 11 | * @author The Viet Nguyen 12 | */ 13 | class ConnectionUtilsTest extends FunSuite with Matchers with RedisStandaloneEnv { 14 | 15 | test("xinfo") { 16 | val streamKey = TestUtils.generateRandomKey() 17 | val conn = redisConfig.connectionForKey(streamKey) 18 | val data = Map("key" -> "value").asJava 19 | val entryId = conn.xadd(streamKey, new StreamEntryID(0, 1), data) 20 | val info = conn.xinfo(XINFO.SubCommandStream, streamKey) 21 | info.get(XINFO.LastGeneratedId) shouldBe Some(entryId.toString) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/util/EntityId.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.util 2 | 3 | import org.apache.spark.sql.types._ 4 | 5 | /** 6 | * @author The Viet Nguyen 7 | */ 8 | case class EntityId(_id: String, name: String) 9 | 10 | object EntityId { 11 | 12 | val schema = StructType(Array( 13 | StructField("_id", StringType), 14 | StructField("name", StringType) 15 | )) 16 | } 17 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/util/JsonUtilsTest.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.util 2 | 3 | import org.scalatest.{FunSuite, Matchers} 4 | 5 | /** 6 | * @author The Viet Nguyen 7 | */ 8 | class JsonUtilsTest extends FunSuite with Matchers { 9 | 10 | test("testToJson") { 11 | val json = JsonUtils.toJson(Map("key" -> "value")) 12 | json shouldBe """{"key":"value"}""" 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/util/Person.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.util 2 | 3 | import com.redislabs.provider.redis.util.TestUtils._ 4 | import org.apache.spark.sql.types._ 5 | import org.apache.spark.sql.{DataFrame, SparkSession} 6 | 7 | /** 8 | * @author The Viet Nguyen 9 | */ 10 | case class Person(name: String, age: Int, address: String, salary: Double) 11 | 12 | object Person { 13 | 14 | val TableNamePrefix = "person" 15 | val KeyName = "name" 16 | 17 | val data = Seq( 18 | Person("John", 30, "60 Wall Street", 150.5), 19 | Person("Peter", 35, "110 Wall Street", 200.3) 20 | ) 21 | 22 | val dataMaps = Seq( 23 | Map("name" -> "John", "age" -> "30", "address" -> "60 Wall Street", "salary" -> "150.5"), 24 | Map("name" -> "Peter", "age" -> "35", "address" -> "110 Wall Street", "salary" -> "200.3") 25 | ) 26 | 27 | val schema = StructType(Array( 28 | StructField("name", StringType), 29 | StructField("age", IntegerType), 30 | StructField("address", StringType), 31 | StructField("salary", DoubleType) 32 | )) 33 | 34 | val fullSchema = StructType(schema.fields :+ StructField("_id", StringType)) 35 | 36 | def df(spark: SparkSession): DataFrame = spark.createDataFrame(data) 37 | 38 | def generatePersonTableName(): String = generateTableName(TableNamePrefix) 39 | 40 | def generatePersonStreamKey(): String = generatePersonTableName() 41 | 42 | } 43 | -------------------------------------------------------------------------------- /src/test/scala/com/redislabs/provider/redis/util/TestUtils.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis.util 2 | 3 | import java.util.UUID 4 | 5 | import org.scalatest.Assertions 6 | import scala.reflect.Manifest 7 | 8 | object TestUtils { 9 | 10 | def generateTableName(prefix: String): String = { 11 | // generate random table, so we can run test multiple times and not append/overwrite data 12 | prefix + UUID.randomUUID().toString.replace("-", "") 13 | } 14 | 15 | def generateRandomKey(): String = { 16 | UUID.randomUUID().toString.replace("-", "") 17 | } 18 | 19 | /** 20 | * A wrapper of Assertions.intercept() that suppresses spark errors in the logs. 21 | * It makes easier to analyse unit tests output. 22 | */ 23 | def interceptSparkErr[T <: AnyRef](f: => Any)(implicit manifest: Manifest[T]): T = { 24 | // turn off spark logger 25 | val logger = org.apache.log4j.Logger.getLogger("org") 26 | val levelBefore = logger.getLevel 27 | logger.setLevel(org.apache.log4j.Level.OFF) 28 | 29 | // delegate interception 30 | val interceptRes = Assertions.intercept(f) 31 | 32 | // revert logger 33 | logger.setLevel(levelBefore) 34 | 35 | interceptRes 36 | } 37 | 38 | 39 | } 40 | -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/redis/RedisSourceRelationTest.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.redis 2 | 3 | import org.scalatest.{FunSuite, Matchers} 4 | 5 | /** 6 | * @author The Viet Nguyen 7 | */ 8 | class RedisSourceRelationTest extends FunSuite with Matchers { 9 | 10 | test("redis key extractor with prefix pattern") { 11 | val key = RedisSourceRelation.tableKey("table*", "tablekey") 12 | key shouldBe "key" 13 | } 14 | 15 | test("redis key extractor with other patterns") { 16 | val key = RedisSourceRelation.tableKey("*table", "key") 17 | key shouldBe "key" 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/redis/stream/RedisConsumerOffsetTest.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.redis.stream 2 | 3 | import org.scalatest.{FunSuite, Matchers} 4 | 5 | /** 6 | * @author The Viet Nguyen 7 | */ 8 | class RedisConsumerOffsetTest extends FunSuite with Matchers { 9 | 10 | test("testFromJson") { 11 | val offset = RedisSourceOffset.fromJson( 12 | """ 13 | |{ 14 | | "offsets":{ 15 | | "mystream": { 16 | | "groupName": "group55", 17 | | "offset": "1543674099961-0" 18 | | } 19 | | } 20 | |} 21 | |""".stripMargin) 22 | offset shouldBe RedisSourceOffset(Map("mystream" -> 23 | RedisConsumerOffset("group55", "1543674099961-0"))) 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/redis/stream/RedisSourceConfigSuite.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.redis.stream 2 | 3 | import org.apache.spark.sql.redis._ 4 | import org.scalatest.{FunSuite, Matchers} 5 | 6 | /** 7 | * @author The Viet Nguyen 8 | */ 9 | class RedisSourceConfigSuite extends FunSuite with Matchers { 10 | 11 | val group: String = "group55" 12 | 13 | test("testFromMap") { 14 | val config = RedisSourceConfig.fromMap(Map( 15 | StreamOptionStreamKeys -> "mystream1,mystream2,mystream3", 16 | StreamOptionStreamOffsets -> 17 | s""" 18 | |{ 19 | | "offsets":{ 20 | | "mystream1": { 21 | | "groupName": "$group", 22 | | "offset": "0-10" 23 | | }, 24 | | "mystream2": { 25 | | "groupName": "$group", 26 | | "offset": "0-7" 27 | | } 28 | | } 29 | |} 30 | """.stripMargin, 31 | StreamOptionParallelism -> "2", 32 | StreamOptionGroupName -> group, 33 | StreamOptionConsumerPrefix -> "consumer" 34 | )) 35 | config shouldBe RedisSourceConfig( 36 | Seq( 37 | RedisConsumerConfig("mystream1", group, "consumer-1", 100, 500), 38 | RedisConsumerConfig("mystream1", group, "consumer-2", 100, 500), 39 | RedisConsumerConfig("mystream2", group, "consumer-1", 100, 500), 40 | RedisConsumerConfig("mystream2", group, "consumer-2", 100, 500), 41 | RedisConsumerConfig("mystream3", group, "consumer-1", 100, 500), 42 | RedisConsumerConfig("mystream3", group, "consumer-2", 100, 500) 43 | ), 44 | Some(RedisSourceOffset(Map( 45 | "mystream1" -> RedisConsumerOffset(group, "0-10"), 46 | "mystream2" -> RedisConsumerOffset(group, "0-7") 47 | ))) 48 | ) 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/redis/stream/RedisSourceTest.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.redis.stream 2 | 3 | import org.scalatest.{FunSuite, Matchers, OptionValues} 4 | 5 | /** 6 | * @author The Viet Nguyen 7 | */ 8 | class RedisSourceTest extends FunSuite with Matchers with OptionValues { 9 | 10 | test("testGetOffsetRanges") { 11 | val startOffsets = RedisSourceOffset(Map("mystream" -> RedisConsumerOffset("group55", "0-0"))) 12 | val endOffsets = RedisSourceOffset(Map("mystream" -> RedisConsumerOffset("group55", "0-1"))) 13 | val consumerConfig = RedisConsumerConfig("mystream", "group55", "consumer", 1000, 100) 14 | val consumerConfigs = Seq(consumerConfig) 15 | val offsetRanges = RedisSource.getOffsetRanges(Some(startOffsets), endOffsets, consumerConfigs) 16 | offsetRanges.head shouldBe RedisSourceOffsetRange(Some("0-0"), "0-1", consumerConfig) 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/redis/stream/cluster/RedisStreamSourceClusterSuite.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.redis.stream.cluster 2 | 3 | import com.redislabs.provider.redis.env.RedisClusterEnv 4 | import org.apache.spark.sql.redis.stream.RedisStreamSourceSuite 5 | 6 | class RedisStreamSourceClusterSuite extends RedisStreamSourceSuite with RedisClusterEnv 7 | -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/redis/stream/standalone/RedisStreamSourceStandaloneSuite.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.redis.stream.standalone 2 | 3 | import com.redislabs.provider.redis.env.RedisStandaloneEnv 4 | import org.apache.spark.sql.redis.stream.RedisStreamSourceSuite 5 | 6 | class RedisStreamSourceStandaloneSuite extends RedisStreamSourceSuite with RedisStandaloneEnv 7 | --------------------------------------------------------------------------------