├── .github
    ├── release-drafter-config.yml
    └── workflows
    │   ├── integration.yml
    │   ├── release-drafter.yml
    │   └── version-and-release.yml
├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── build
    ├── sbt
    └── sbt-launch-lib.bash
├── doc
    ├── cluster.md
    ├── configuration.md
    ├── dataframe.md
    ├── dev.md
    ├── getting-started.md
    ├── java.md
    ├── python.md
    ├── rdd.md
    ├── streaming.md
    └── structured-streaming.md
├── pom.xml
├── project
    ├── build.properties
    └── plugins.sbt
├── scalastyle-config.xml
└── src
    ├── main
        ├── resources
        │   └── META-INF
        │   │   └── services
        │   │       └── org.apache.spark.sql.sources.DataSourceRegister
        └── scala
        │   ├── com
        │       └── redislabs
        │       │   └── provider
        │       │       └── redis
        │       │           ├── ConnectionPool.scala
        │       │           ├── RedisConfig.scala
        │       │           ├── package.scala
        │       │           ├── partitioner
        │       │               ├── RedisPartition.scala
        │       │               └── RedisPartitioner.scala
        │       │           ├── rdd
        │       │               └── RedisRDD.scala
        │       │           ├── redisFunctions.scala
        │       │           ├── streaming
        │       │               ├── RedisInputDStream.scala
        │       │               ├── RedisStreamReceiver.scala
        │       │               ├── package.scala
        │       │               └── redisStreamingFunctions.scala
        │       │           └── util
        │       │               ├── CollectionUtils.scala
        │       │               ├── ConnectionUtils.scala
        │       │               ├── JsonUtils.scala
        │       │               ├── Logging.scala
        │       │               ├── ParseUtils.scala
        │       │               ├── PipelineUtils.scala
        │       │               └── StreamUtils.scala
        │   └── org
        │       └── apache
        │           └── spark
        │               └── sql
        │                   └── redis
        │                       ├── BinaryRedisPersistence.scala
        │                       ├── DefaultSource.scala
        │                       ├── HashRedisPersistence.scala
        │                       ├── RedisPersistence.scala
        │                       ├── RedisSourceRelation.scala
        │                       ├── redis.scala
        │                       └── stream
        │                           ├── RedisSource.scala
        │                           ├── RedisSourceConfig.scala
        │                           ├── RedisSourceOffset.scala
        │                           ├── RedisSourceRdd.scala
        │                           ├── RedisSourceTypes.scala
        │                           ├── RedisStreamProvider.scala
        │                           └── RedisStreamReader.scala
    └── test
        ├── resources
            ├── blog
            ├── log4j.properties
            ├── test.csv
            └── tls
            │   ├── ca.crt
            │   ├── ca.key
            │   ├── ca.txt
            │   ├── client.csr
            │   ├── clientkeystore
            │   ├── redis.crt
            │   ├── redis.dh
            │   └── redis.key
        └── scala
            ├── com
                └── redislabs
                │   └── provider
                │       └── redis
                │           ├── RedisBenchmarks.scala
                │           ├── RedisConfigSuite.scala
                │           ├── SparkRedisSuite.scala
                │           ├── SparkStreamingRedisSuite.scala
                │           ├── df
                │               ├── AclDataframeSuite.scala
                │               ├── BinaryDataframeSuite.scala
                │               ├── CsvDataframeSuite.scala
                │               ├── DataframeSuite.scala
                │               ├── FilteredDataframeSuite.scala
                │               ├── HashDataframeSuite.scala
                │               ├── RedisDataframeSuite.scala
                │               ├── SparkSqlSuite.scala
                │               ├── acl
                │               │   ├── AclDataframeClusterSuite.scala
                │               │   └── AclDataframeStandaloneSuite.scala
                │               ├── benchmark
                │               │   ├── DataframeBenchmarkSuite.scala
                │               │   ├── ManyValueBenchmarkSuite.scala
                │               │   ├── SingleValueBenchmarkSuite.scala
                │               │   └── cluster
                │               │   │   ├── BinaryModelManyValueClusterBenchmarkSuite.scala
                │               │   │   ├── BinaryModelSingleValueClusterBenchmarkSuite.scala
                │               │   │   ├── HashModelManyValueClusterBenchmarkSuite.scala
                │               │   │   └── HashModelSingleValueClusterBenchmarkSuite.scala
                │               ├── cluster
                │               │   ├── BinaryDataframeClusterSuite.scala
                │               │   ├── CsvDataframeClusterSuite.scala
                │               │   ├── DataframeClusterSuite.scala
                │               │   ├── FilteredDataframeClusterSuite.scala
                │               │   ├── HashDataframeClusterSuite.scala
                │               │   └── SparkSqlClusterSuite.scala
                │               └── standalone
                │               │   ├── BinaryDataframeStandaloneSuite.scala
                │               │   ├── CsvDataframeStandaloneSuite.scala
                │               │   ├── DataframeStandaloneSuite.scala
                │               │   ├── FilteredDataframeStandaloneSuite.scala
                │               │   ├── HashDataframeStandaloneSuite.scala
                │               │   └── SparkSqlStandaloneSuite.scala
                │           ├── env
                │               ├── Env.scala
                │               ├── RedisClusterAclEnv.scala
                │               ├── RedisClusterEnv.scala
                │               ├── RedisStandaloneAclEnv.scala
                │               ├── RedisStandaloneEnv.scala
                │               └── RedisStandaloneSSLEnv.scala
                │           ├── rdd
                │               ├── RedisKeysSuite.scala
                │               ├── RedisRddExtraSuite.scala
                │               ├── RedisRddSuite.scala
                │               ├── acl
                │               │   ├── RedisRDDClusterAclSuite.scala
                │               │   └── RedisRDDStandaloneAclSuite.scala
                │               ├── cluster
                │               │   ├── RedisKeysClusterSuite.scala
                │               │   ├── RedisRDDClusterSuite.scala
                │               │   └── RedisRddExtraClusterSuite.scala
                │               └── standalone
                │               │   ├── RedisKeysStandaloneSuite.scala
                │               │   ├── RedisRDDStandaloneSuite.scala
                │               │   └── RedisRddExtraStandaloneSuite.scala
                │           ├── stream
                │               ├── RedisXStreamSuite.scala
                │               ├── cluster
                │               │   └── RedisXStreamClusterSuite.scala
                │               └── standalone
                │               │   └── RedisXStreamStandaloneSuite.scala
                │           └── util
                │               ├── BenchmarkTest.java
                │               ├── CollectionUtilsTest.scala
                │               ├── ConnectionSSLUtilsTest.scala
                │               ├── ConnectionUtilsTest.scala
                │               ├── EntityId.scala
                │               ├── JsonUtilsTest.scala
                │               ├── Person.scala
                │               └── TestUtils.scala
            └── org
                └── apache
                    └── spark
                        └── sql
                            └── redis
                                ├── RedisSourceRelationTest.scala
                                └── stream
                                    ├── RedisConsumerOffsetTest.scala
                                    ├── RedisSourceConfigSuite.scala
                                    ├── RedisSourceTest.scala
                                    ├── RedisStreamSourceSuite.scala
                                    ├── cluster
                                        └── RedisStreamSourceClusterSuite.scala
                                    └── standalone
                                        └── RedisStreamSourceStandaloneSuite.scala


/.github/release-drafter-config.yml:
--------------------------------------------------------------------------------
 1 | name-template: 'Version $NEXT_PATCH_VERSION🌈'
 2 | tag-template: 'v$NEXT_PATCH_VERSION'
 3 | categories:
 4 |   - title: '🚀Features'
 5 |     labels:
 6 |       - 'feature'
 7 |       - 'enhancement'
 8 |   - title: 'Bug Fixes'
 9 |     labels:
10 |       - 'fix'
11 |       - 'bugfix'
12 |       - 'bug'
13 |   - title: '🧰Maintenance'
14 |     label: 'chore'
15 | change-template: '- $TITLE @$AUTHOR (#$NUMBER)'
16 | exclude-labels:
17 |   - 'skip-changelog'
18 | template: |
19 |   ## Changes
20 | 
21 |   $CHANGES


--------------------------------------------------------------------------------
/.github/workflows/integration.yml:
--------------------------------------------------------------------------------
 1 | name: Continuous Integration
 2 | on:
 3 |   push:
 4 |     paths-ignore:
 5 |       - 'doc/**'
 6 |       - '**/*.md'
 7 |     branches:
 8 |       - master
 9 |       - '[0-9].*'
10 |   pull_request:
11 |     branches:
12 |       - master
13 |       - '[0-9].*'
14 |   schedule:
15 |     - cron: '0 1 * * *' # nightly build
16 |   workflow_dispatch:
17 | 
18 | jobs:
19 | 
20 |   build:
21 |     name: Build and Test
22 |     runs-on: ubuntu-latest
23 |     steps:
24 |       - name: Checkout project
25 |         uses: actions/checkout@v4
26 |       - name: Set Java up in the runner
27 |         uses: actions/setup-java@v4
28 |         with:
29 |           java-version: '8'
30 |           distribution: 'temurin'
31 |           cache: 'maven'
32 |       - name: Setup Maven
33 |         uses: s4u/setup-maven-action@v1.8.0
34 |         with:
35 |           java-version: 8
36 |       - name: Install missing dependencies to container
37 |         run: |
38 |           sudo apt update
39 |           sudo apt install -y libssl-dev
40 |           wget http://download.redis.io/releases/redis-6.0.10.tar.gz
41 |           tar -xzvf redis-6.0.10.tar.gz
42 |           make -C redis-6.0.10 -j`nproc` BUILD_TLS=yes
43 |       - name: Maven offline
44 |         run: |
45 |           mvn -q dependency:go-offline
46 |       - name: Run tests
47 |         run: |
48 |           export PATH=$PWD/redis-6.0.10/src:$PATH
49 |           make test
50 |         env:
51 |           JVM_OPTS: -Xmx3200m
52 |           TERM: dumb
53 |       - name: Upload coverage reports to Codecov
54 |         uses: codecov/codecov-action@v4.0.1
55 |         with:
56 |           token: ${{ secrets.CODECOV_TOKEN }}
57 | 


--------------------------------------------------------------------------------
/.github/workflows/release-drafter.yml:
--------------------------------------------------------------------------------
 1 | name: Release Drafter
 2 | 
 3 | on:
 4 |   push:
 5 |     # branches to consider in the event; optional, defaults to all
 6 |     branches:
 7 |       - master
 8 | 
 9 | jobs:
10 |   update_release_draft:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       # Drafts your next Release notes as Pull Requests are merged into "master"
14 |       - uses: release-drafter/release-drafter@v5
15 |         with:
16 |           # (Optional) specify config name to use, relative to .github/. Default: release-drafter.yml
17 |            config-name: release-drafter-config.yml
18 |         env:
19 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}


--------------------------------------------------------------------------------
/.github/workflows/version-and-release.yml:
--------------------------------------------------------------------------------
 1 | name: Release
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [published]
 6 | 
 7 | jobs:
 8 |   build:
 9 |     runs-on: ubuntu-latest
10 | 
11 |     steps:
12 |       - uses: actions/checkout@v2
13 | 
14 |       - name: get version from tag
15 |         id: get_version
16 |         run: |
17 |           realversion="${GITHUB_REF/refs\/tags\//}"
18 |           realversion="${realversion//v/}"
19 |           realversion=`echo ${realversion}|cut -d '-' -f 2-2`
20 |           echo "::set-output name=VERSION::$realversion"
21 | 
22 |       - name: Set up publishing to maven central
23 |         uses: actions/setup-java@v2
24 |         with:
25 |           java-version: '8'
26 |           distribution: 'adopt'
27 |           server-id: ossrh
28 |           server-username: MAVEN_USERNAME
29 |           server-password: MAVEN_PASSWORD
30 | 
31 |       - name: mvn versions
32 |         run: mvn versions:set -DnewVersion=${{ steps.get_version.outputs.VERSION }}
33 | 
34 |       - name: Install gpg key
35 |         run: |
36 |           cat <(echo -e "${{ secrets.OSSH_GPG_SECRET_KEY }}") | gpg --batch --import
37 |           gpg --list-secret-keys --keyid-format LONG
38 | 
39 |       - name: Publish
40 |         run: |
41 |           mvn --no-transfer-progress \
42 |             --batch-mode \
43 |             -Dgpg.passphrase='${{ secrets.OSSH_GPG_SECRET_KEY_PASSWORD }}' \
44 |             -DskipTests deploy -P release
45 |         env:
46 |           MAVEN_USERNAME: ${{secrets.OSSH_USERNAME}}
47 |           MAVEN_PASSWORD: ${{secrets.OSSH_TOKEN}}
48 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | hs_err_pid*.log
 2 | nohup.out
 3 | scalastyle-output.xml
 4 | .idea
 5 | *.iml
 6 | **/.idea
 7 | */.classpath
 8 | */.project
 9 | */.settings
10 | */.cache
11 | */test-output/
12 | *.log
13 | */*.versionsBackup
14 | target/
15 | *GitIgnored*
16 | *.asc
17 | *.gpg
18 | /bin/
19 | 
20 | *.class
21 | *.log
22 | *.pyc
23 | sbt/*.jar
24 | 
25 | # sbt specific
26 | .cache/
27 | .history/
28 | .lib/
29 | dist/*
30 | target/
31 | lib_managed/
32 | src_managed/
33 | project/boot/
34 | project/plugins/project/
35 | build/*.jar
36 | checkpoint-test/
37 | 
38 | # eclipse
39 | .project
40 | .classpath
41 | /.settings/
42 | 
43 | # Redis
44 | dump.rdb
45 | .DS_Store


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2015-2018, Redis Labs
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | # user
  2 | USER_ACL = user alice on >p1pp0 ~* +@all
  3 | 
  4 | # STANDALONE REDIS NODE
  5 | define REDIS_STANDALONE_NODE_CONF
  6 | daemonize yes
  7 | port 6379
  8 | pidfile /tmp/redis_standalone_node_for_spark-redis.pid
  9 | logfile /tmp/redis_standalone_node_for_spark-redis.log
 10 | save ""
 11 | appendonly no
 12 | requirepass passwd
 13 | $(USER_ACL)
 14 | endef
 15 | 
 16 | # STANDALONE REDIS NODE WITH SSL
 17 | define REDIS_STANDALONE_NODE_CONF_SSL
 18 | daemonize yes
 19 | port 0
 20 | pidfile /tmp/redis_standalone_node__ssl_for_spark-redis.pid
 21 | logfile /tmp/redis_standalone_node_ssl_for_spark-redis.log
 22 | save ""
 23 | appendonly no
 24 | requirepass passwd
 25 | $(USER_ACL)
 26 | tls-auth-clients no
 27 | tls-port 6380
 28 | tls-cert-file ./src/test/resources/tls/redis.crt
 29 | tls-key-file ./src/test/resources/tls/redis.key
 30 | tls-ca-cert-file ./src/test/resources/tls/ca.crt
 31 | tls-dh-params-file ./src/test/resources/tls/redis.dh
 32 | endef
 33 | 
 34 | # CLUSTER REDIS NODES
 35 | define REDIS_CLUSTER_NODE1_CONF
 36 | daemonize yes
 37 | port 7379
 38 | $(USER_ACL)
 39 | pidfile /tmp/redis_cluster_node1_for_spark-redis.pid
 40 | logfile /tmp/redis_cluster_node1_for_spark-redis.log
 41 | save ""
 42 | appendonly no
 43 | cluster-enabled yes
 44 | cluster-config-file /tmp/redis_cluster_node1_for_spark-redis.conf
 45 | endef
 46 | 
 47 | define REDIS_CLUSTER_NODE2_CONF
 48 | daemonize yes
 49 | port 7380
 50 | $(USER_ACL)
 51 | pidfile /tmp/redis_cluster_node2_for_spark-redis.pid
 52 | logfile /tmp/redis_cluster_node2_for_spark-redis.log
 53 | save ""
 54 | appendonly no
 55 | cluster-enabled yes
 56 | cluster-config-file /tmp/redis_cluster_node2_for_spark-redis.conf
 57 | endef
 58 | 
 59 | define REDIS_CLUSTER_NODE3_CONF
 60 | daemonize yes
 61 | port 7381
 62 | $(USER_ACL)
 63 | pidfile /tmp/redis_cluster_node3_for_spark-redis.pid
 64 | logfile /tmp/redis_cluster_node3_for_spark-redis.log
 65 | save ""
 66 | appendonly no
 67 | cluster-enabled yes
 68 | cluster-config-file /tmp/redis_cluster_node3_for_spark-redis.conf
 69 | endef
 70 | 
 71 | export REDIS_STANDALONE_NODE_CONF
 72 | export REDIS_STANDALONE_NODE_CONF_SSL
 73 | export REDIS_CLUSTER_NODE1_CONF
 74 | export REDIS_CLUSTER_NODE2_CONF
 75 | export REDIS_CLUSTER_NODE3_CONF
 76 | 
 77 | start-standalone:
 78 | 	echo "$$REDIS_STANDALONE_NODE_CONF" | redis-server -
 79 | 	echo "$$REDIS_STANDALONE_NODE_CONF_SSL" | redis-server -
 80 | 
 81 | 
 82 | start-cluster:
 83 | 	echo "$$REDIS_CLUSTER_NODE1_CONF" | redis-server -
 84 | 	echo "$$REDIS_CLUSTER_NODE2_CONF" | redis-server -
 85 | 	echo "$$REDIS_CLUSTER_NODE3_CONF" | redis-server -
 86 | 	redis-cli -p 7380 cluster meet 127.0.0.1 7379 > /dev/null
 87 | 	redis-cli -p 7381 cluster meet 127.0.0.1 7379 > /dev/null
 88 | 	slots=$$(seq 0 2047); slots=$$(echo $$slots | tr '\n' ' '); redis-cli -p 7379 cluster addslots $$slots > /dev/null
 89 | 	slots=$$(seq 2048 3333); slots=$$(echo $$slots | tr '\n' ' '); redis-cli -p 7380 cluster addslots $$slots > /dev/null
 90 | 	slots=$$(seq 3334 5460); slots=$$(echo $$slots | tr '\n' ' '); redis-cli -p 7379 cluster addslots $$slots > /dev/null
 91 | 	slots=$$(seq 5461 7777); slots=$$(echo $$slots | tr '\n' ' '); redis-cli -p 7380 cluster addslots $$slots > /dev/null
 92 | 	slots=$$(seq 7778 9999); slots=$$(echo $$slots | tr '\n' ' '); redis-cli -p 7381 cluster addslots $$slots > /dev/null
 93 | 	slots=$$(seq 10000 10922); slots=$$(echo $$slots | tr '\n' ' '); redis-cli -p 7380 cluster addslots $$slots > /dev/null
 94 | 	slots=$$(seq 10923 16383); slots=$$(echo $$slots | tr '\n' ' '); redis-cli -p 7381 cluster addslots $$slots > /dev/null
 95 | 
 96 | start:
 97 | 	make start-standalone
 98 | 	make start-cluster
 99 | 
100 | stop-standalone:
101 | 	kill `cat /tmp/redis_standalone_node_for_spark-redis.pid`
102 | 	kill `cat /tmp/redis_standalone_node__ssl_for_spark-redis.pid`
103 | 	
104 | stop-cluster:
105 | 	kill `cat /tmp/redis_cluster_node1_for_spark-redis.pid` || true
106 | 	kill `cat /tmp/redis_cluster_node2_for_spark-redis.pid` || true
107 | 	kill `cat /tmp/redis_cluster_node3_for_spark-redis.pid` || true
108 | 	rm -f /tmp/redis_cluster_node1_for_spark-redis.conf
109 | 	rm -f /tmp/redis_cluster_node2_for_spark-redis.conf
110 | 	rm -f /tmp/redis_cluster_node3_for_spark-redis.conf
111 | 
112 | stop:
113 | 	make stop-standalone
114 | 	make stop-cluster
115 | 
116 | restart:
117 | 	make stop
118 | 	make start
119 | 
120 | test:
121 | 	make start
122 | 	# with --batch-mode maven doesn't print 'Progress: 125/150kB', the progress lines take up 90% of the log and causes
123 | 	# Travis build to fail with 'The job exceeded the maximum log length, and has been terminated'
124 | 	mvn clean test -B  -DargLine="-Djavax.net.ssl.trustStorePassword=password -Djavax.net.ssl.trustStore=./src/test/resources/tls/clientkeystore -Djavax.net.ssl.trustStoreType=jceks"
125 | 	make stop
126 | 
127 | benchmark:
128 | 	make start
129 | 	mvn clean test -B -Pbenchmark
130 | 	make stop
131 | 
132 | deploy:
133 | 	make start
134 | 	mvn --batch-mode clean deploy
135 | 	make stop
136 | 
137 | package:
138 | 	make start
139 | 	mvn --batch-mode clean package
140 | 	make stop
141 | 
142 | .PHONY: test
143 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Integration](https://github.com/RedisLabs/spark-redis/actions/workflows/integration.yml/badge.svg?branch=master)](https://github.com/RedisLabs/spark-redis/actions/workflows/integration.yml)
 2 | [![license](https://img.shields.io/github/license/RedisLabs/spark-redis.svg)](https://github.com/RedisLabs/spark-redis)
 3 | [![Release](https://img.shields.io/github/release/RedisLabs/spark-redis.svg?sort=semver)](https://github.com/RedisLabs/spark-redis/releases/latest)
 4 | [![Maven Central](https://img.shields.io/maven-central/v/com.redislabs/spark-redis_2.12?logo=redis)](https://maven-badges.herokuapp.com/maven-central/com.redislabs/spark-redis_2.12)
 5 | [![Javadocs](https://www.javadoc.io/badge/com.redislabs/spark-redis_2.12.svg)](https://www.javadoc.io/doc/com.redislabs/spark-redis_2.12)
 6 | [![Codecov](https://codecov.io/gh/RedisLabs/spark-redis/branch/master/graph/badge.svg)](https://codecov.io/gh/RedisLabs/spark-redis)
 7 | 
 8 | [![Discord](https://img.shields.io/discord/697882427875393627.svg?style=social&logo=discord)](https://discord.gg/redis)
 9 | [![Twitch](https://img.shields.io/twitch/status/redisinc?style=social)](https://www.twitch.tv/redisinc)
10 | [![YouTube](https://img.shields.io/youtube/channel/views/UCD78lHSwYqMlyetR0_P4Vig?style=social)](https://www.youtube.com/redisinc)
11 | [![Twitter](https://img.shields.io/twitter/follow/redisinc?style=social)](https://twitter.com/redisinc)
12 | # Spark-Redis
13 | A library for reading and writing data in [Redis](http://redis.io) using [Apache Spark](http://spark.apache.org/).
14 | 
15 | Spark-Redis provides access to all of Redis' data structures - String, Hash, List, Set and Sorted Set - from Spark as RDDs. It also supports reading and writing with DataFrames and Spark SQL syntax.
16 | 
17 | The library can be used both with Redis stand-alone as well as clustered databases. When used with Redis cluster, Spark-Redis is aware of its partitioning scheme and adjusts in response to resharding and node failure events.
18 | 
19 | Spark-Redis also supports Spark Streaming (DStreams) and Structured Streaming.
20 | 
21 | ## Version compatibility and branching
22 | 
23 | The library has several branches, each corresponds to a different supported Spark version. For example, 'branch-2.3' works with any Spark 2.3.x version.
24 | The master branch contains the recent development for the next release.
25 | 
26 | | Spark-Redis                                                               | Spark | Redis            | Supported Scala Versions | 
27 | |---------------------------------------------------------------------------|-------| ---------------- | ------------------------ |
28 | | [master](https://github.com/RedisLabs/spark-redis/)                       | 3.2.x | >=2.9.0          | 2.12                     | 
29 | | [3.0](https://github.com/RedisLabs/spark-redis/tree/branch-3.0)           | 3.0.x | >=2.9.0          | 2.12                     | 
30 | | [2.4, 2.5, 2.6](https://github.com/RedisLabs/spark-redis/tree/branch-2.4) | 2.4.x | >=2.9.0          | 2.11, 2.12               | 
31 | | [2.3](https://github.com/RedisLabs/spark-redis/tree/branch-2.3)           | 2.3.x | >=2.9.0          | 2.11                     | 
32 | | [1.4](https://github.com/RedisLabs/spark-redis/tree/branch-1.4)           | 1.4.x |                  | 2.10                     | 
33 | 
34 | 
35 | ## Known limitations
36 | 
37 | * Java, Python and R API bindings are not provided at this time
38 | 
39 | ## Additional considerations
40 | This library is a work in progress so the API may change before the official release.
41 | 
42 | ## Documentation
43 | 
44 | Please make sure you use documentation from the correct branch ([2.4](https://github.com/RedisLabs/spark-redis/tree/branch-2.4#documentation), [2.3](https://github.com/RedisLabs/spark-redis/tree/branch-2.3#documentation), etc). 
45 | 
46 |   - [Getting Started](doc/getting-started.md)
47 |   - [RDD](doc/rdd.md)
48 |   - [Dataframe](doc/dataframe.md)
49 |   - [Streaming](doc/streaming.md)
50 |   - [Structured Streaming](doc/structured-streaming.md)
51 |   - [Cluster](doc/cluster.md)
52 |   - [Java](doc/java.md)
53 |   - [Python](doc/python.md)
54 |   - [Configuration](doc/configuration.md)
55 |   - [Dev environment](doc/dev.md)
56 | 
57 | ## Contributing
58 | 
59 | You're encouraged to contribute to the Spark-Redis project. 
60 | 
61 | There are two ways you can do so:
62 | 
63 | ### Submit Issues
64 | 
65 | If you encounter an issue while using the library, please report it via the project's [issues tracker](https://github.com/RedisLabs/spark-redis/issues).
66 | 
67 | ### Author Pull Requests
68 | 
69 | Code contributions to the Spark-Redis project can be made using [pull requests](https://github.com/RedisLabs/spark-redis/pulls). To submit a pull request:
70 | 
71 |  1. Fork this project.
72 |  2. Make and commit your changes.
73 |  3. Submit your changes as a pull request.
74 | 


--------------------------------------------------------------------------------
/build/sbt:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | # When creating new tests for Spark SQL Hive, the HADOOP_CLASSPATH must contain the hive jars so
  4 | # that we can run Hive to generate the golden answer.  This is not required for normal development
  5 | # or testing.
  6 | for i in $HIVE_HOME/lib/*
  7 | do HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$i
  8 | done
  9 | export HADOOP_CLASSPATH
 10 | 
 11 | realpath () {
 12 | (
 13 |   TARGET_FILE=$1
 14 | 
 15 |   cd $(dirname $TARGET_FILE)
 16 |   TARGET_FILE=$(basename $TARGET_FILE)
 17 | 
 18 |   COUNT=0
 19 |   while [ -L "$TARGET_FILE" -a $COUNT -lt 100 ]
 20 |   do
 21 |       TARGET_FILE=$(readlink $TARGET_FILE)
 22 |       cd $(dirname $TARGET_FILE)
 23 |       TARGET_FILE=$(basename $TARGET_FILE)
 24 |       COUNT=$(($COUNT + 1))
 25 |   done
 26 | 
 27 |   echo $(pwd -P)/$TARGET_FILE
 28 | )
 29 | }
 30 | 
 31 | . $(dirname $(realpath $0))/sbt-launch-lib.bash
 32 | 
 33 | 
 34 | declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy"
 35 | declare -r sbt_opts_file=".sbtopts"
 36 | declare -r etc_sbt_opts_file="/etc/sbt/sbtopts"
 37 | 
 38 | usage() {
 39 |  cat <<EOM
 40 | Usage: $script_name [options]
 41 |   -h | -help         print this message
 42 |   -v | -verbose      this runner is chattier
 43 |   -d | -debug        set sbt log level to debug
 44 |   -no-colors         disable ANSI color codes
 45 |   -sbt-create        start sbt even if current directory contains no sbt project
 46 |   -sbt-dir   <path>  path to global settings/plugins directory (default: ~/.sbt)
 47 |   -sbt-boot  <path>  path to shared boot directory (default: ~/.sbt/boot in 0.11 series)
 48 |   -ivy       <path>  path to local Ivy repository (default: ~/.ivy2)
 49 |   -mem    <integer>  set memory options (default: $sbt_mem, which is $(get_mem_opts $sbt_mem))
 50 |   -no-share          use all local caches; no sharing
 51 |   -no-global         uses global caches, but does not use global ~/.sbt directory.
 52 |   -jvm-debug <port>  Turn on JVM debugging, open at the given port.
 53 |   -batch             Disable interactive mode
 54 |   # sbt version (default: from project/build.properties if present, else latest release)
 55 |   -sbt-version  <version>   use the specified version of sbt
 56 |   -sbt-jar      <path>      use the specified jar as the sbt launcher
 57 |   -sbt-rc                   use an RC version of sbt
 58 |   -sbt-snapshot             use a snapshot version of sbt
 59 |   # java version (default: java from PATH, currently $(java -version 2>&1 | grep version))
 60 |   -java-home <path>         alternate JAVA_HOME
 61 |   # jvm options and output control
 62 |   JAVA_OPTS          environment variable, if unset uses "$java_opts"
 63 |   SBT_OPTS           environment variable, if unset uses "$default_sbt_opts"
 64 |   .sbtopts           if this file exists in the current directory, it is
 65 |                      prepended to the runner args
 66 |   /etc/sbt/sbtopts   if this file exists, it is prepended to the runner args
 67 |   -Dkey=val          pass -Dkey=val directly to the java runtime
 68 |   -J-X               pass option -X directly to the java runtime
 69 |                      (-J is stripped)
 70 |   -S-X               add -X to sbt's scalacOptions (-J is stripped)
 71 |   -PmavenProfiles     Enable a maven profile for the build.
 72 | In the case of duplicated or conflicting options, the order above
 73 | shows precedence: JAVA_OPTS lowest, command line options highest.
 74 | EOM
 75 | }
 76 | 
 77 | process_my_args () {
 78 |   while [[ $# -gt 0 ]]; do
 79 |     case "$1" in
 80 |      -no-colors) addJava "-Dsbt.log.noformat=true" && shift ;;
 81 |       -no-share) addJava "$noshare_opts" && shift ;;
 82 |      -no-global) addJava "-Dsbt.global.base=$(pwd)/project/.sbtboot" && shift ;;
 83 |       -sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;;
 84 |        -sbt-dir) require_arg path "$1" "$2" && addJava "-Dsbt.global.base=$2" && shift 2 ;;
 85 |      -debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;;
 86 |          -batch) exec </dev/null && shift ;;
 87 | 
 88 |     -sbt-create) sbt_create=true && shift ;;
 89 | 
 90 |               *) addResidual "$1" && shift ;;
 91 |     esac
 92 |   done
 93 | 
 94 |   # Now, ensure sbt version is used.
 95 |   [[ "${sbt_version}XXX" != "XXX" ]] && addJava "-Dsbt.version=$sbt_version"
 96 | }
 97 | 
 98 | loadConfigFile() {
 99 |   cat "$1" | sed '/^\#/d'
100 | }
101 | 
102 | # if sbtopts files exist, prepend their contents to $@ so it can be processed by this runner
103 | [[ -f "$etc_sbt_opts_file" ]] && set -- $(loadConfigFile "$etc_sbt_opts_file") "$@"
104 | [[ -f "$sbt_opts_file" ]] && set -- $(loadConfigFile "$sbt_opts_file") "$@"
105 | 
106 | run "$@"
107 | 


--------------------------------------------------------------------------------
/build/sbt-launch-lib.bash:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | #
  3 | 
  4 | # A library to simplify using the SBT launcher from other packages.
  5 | # Note: This should be used by tools like giter8/conscript etc.
  6 | 
  7 | # TODO - Should we merge the main SBT script with this library?
  8 | 
  9 | if test -z "$HOME"; then
 10 |   declare -r script_dir="$(dirname $script_path)"
 11 | else
 12 |   declare -r script_dir="$HOME/.sbt"
 13 | fi
 14 | 
 15 | declare -a residual_args
 16 | declare -a java_args
 17 | declare -a scalac_args
 18 | declare -a sbt_commands
 19 | declare -a maven_profiles
 20 | 
 21 | if test -x "$JAVA_HOME/bin/java"; then
 22 |     echo -e "Using $JAVA_HOME as default JAVA_HOME."
 23 |     echo "Note, this will be overridden by -java-home if it is set."
 24 |     declare java_cmd="$JAVA_HOME/bin/java"
 25 | else
 26 |     declare java_cmd=java
 27 | fi
 28 | 
 29 | echoerr () {
 30 |   echo 1>&2 "$@"
 31 | }
 32 | vlog () {
 33 |   [[ $verbose || $debug ]] && echoerr "$@"
 34 | }
 35 | dlog () {
 36 |   [[ $debug ]] && echoerr "$@"
 37 | }
 38 | 
 39 | acquire_sbt_jar () {
 40 |   SBT_VERSION=`awk -F "=" '/sbt\\.version/ {print $2}' ./project/build.properties`
 41 |   URL1=https://dl.bintray.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar
 42 |   JAR=build/sbt-launch-${SBT_VERSION}.jar
 43 | 
 44 |   sbt_jar=$JAR
 45 | 
 46 |   if [[ ! -f "$sbt_jar" ]]; then
 47 |     # Download sbt launch jar if it hasn't been downloaded yet
 48 |     if [ ! -f ${JAR} ]; then
 49 |     # Download
 50 |     printf "Attempting to fetch sbt\n"
 51 |     JAR_DL=${JAR}.part
 52 |     if hash curl 2>/dev/null; then
 53 |       curl --fail --location --silent ${URL1} > "${JAR_DL}" &&\
 54 |         mv "${JAR_DL}" "${JAR}"
 55 |     elif hash wget 2>/dev/null; then
 56 |       wget --quiet ${URL1} -O "${JAR_DL}" &&\
 57 |         mv "${JAR_DL}" "${JAR}"
 58 |     else
 59 |       printf "You do not have curl or wget installed, please install sbt manually from http://www.scala-sbt.org/\n"
 60 |       exit -1
 61 |     fi
 62 |     fi
 63 |     if [ ! -f ${JAR} ]; then
 64 |     # We failed to download
 65 |     printf "Our attempt to download sbt locally to ${JAR} failed. Please install sbt manually from http://www.scala-sbt.org/\n"
 66 |     exit -1
 67 |     fi
 68 |     printf "Launching sbt from ${JAR}\n"
 69 |   fi
 70 | }
 71 | 
 72 | execRunner () {
 73 |   # print the arguments one to a line, quoting any containing spaces
 74 |   [[ $verbose || $debug ]] && echo "# Executing command line:" && {
 75 |     for arg; do
 76 |       if printf "%s\n" "$arg" | grep -q ' '; then
 77 |         printf "\"%s\"\n" "$arg"
 78 |       else
 79 |         printf "%s\n" "$arg"
 80 |       fi
 81 |     done
 82 |     echo ""
 83 |   }
 84 | 
 85 |   exec "$@"
 86 | }
 87 | 
 88 | addJava () {
 89 |   dlog "[addJava] arg = '$1'"
 90 |   java_args=( "${java_args[@]}" "$1" )
 91 | }
 92 | 
 93 | enableProfile () {
 94 |   dlog "[enableProfile] arg = '$1'"
 95 |   maven_profiles=( "${maven_profiles[@]}" "$1" )
 96 |   export SBT_MAVEN_PROFILES="${maven_profiles[@]}"
 97 | }
 98 | 
 99 | addSbt () {
100 |   dlog "[addSbt] arg = '$1'"
101 |   sbt_commands=( "${sbt_commands[@]}" "$1" )
102 | }
103 | addResidual () {
104 |   dlog "[residual] arg = '$1'"
105 |   residual_args=( "${residual_args[@]}" "$1" )
106 | }
107 | addDebugger () {
108 |   addJava "-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=$1"
109 | }
110 | 
111 | # a ham-fisted attempt to move some memory settings in concert
112 | # so they need not be dicked around with individually.
113 | get_mem_opts () {
114 |   local mem=${1:-2048}
115 |   local perm=$(( $mem / 4 ))
116 |   (( $perm > 256 )) || perm=256
117 |   (( $perm < 4096 )) || perm=4096
118 |   local codecache=$(( $perm / 2 ))
119 | 
120 |   echo "-Xms${mem}m -Xmx${mem}m -XX:MaxPermSize=${perm}m -XX:ReservedCodeCacheSize=${codecache}m"
121 | }
122 | 
123 | require_arg () {
124 |   local type="$1"
125 |   local opt="$2"
126 |   local arg="$3"
127 |   if [[ -z "$arg" ]] || [[ "${arg:0:1}" == "-" ]]; then
128 |     die "$opt requires <$type> argument"
129 |   fi
130 | }
131 | 
132 | is_function_defined() {
133 |   declare -f "$1" > /dev/null
134 | }
135 | 
136 | process_args () {
137 |   while [[ $# -gt 0 ]]; do
138 |     case "$1" in
139 |        -h|-help) usage; exit 1 ;;
140 |     -v|-verbose) verbose=1 && shift ;;
141 |       -d|-debug) debug=1 && shift ;;
142 | 
143 |            -ivy) require_arg path "$1" "$2" && addJava "-Dsbt.ivy.home=$2" && shift 2 ;;
144 |            -mem) require_arg integer "$1" "$2" && sbt_mem="$2" && shift 2 ;;
145 |      -jvm-debug) require_arg port "$1" "$2" && addDebugger $2 && shift 2 ;;
146 |          -batch) exec </dev/null && shift ;;
147 | 
148 |        -sbt-jar) require_arg path "$1" "$2" && sbt_jar="$2" && shift 2 ;;
149 |    -sbt-version) require_arg version "$1" "$2" && sbt_version="$2" && shift 2 ;;
150 |      -java-home) require_arg path "$1" "$2" && java_cmd="$2/bin/java" && export JAVA_HOME=$2 && shift 2 ;;
151 | 
152 |             -D*) addJava "$1" && shift ;;
153 |             -J*) addJava "${1:2}" && shift ;;
154 |             -P*) enableProfile "$1" && shift ;;
155 |               *) addResidual "$1" && shift ;;
156 |     esac
157 |   done
158 | 
159 |   is_function_defined process_my_args && {
160 |     myargs=("${residual_args[@]}")
161 |     residual_args=()
162 |     process_my_args "${myargs[@]}"
163 |   }
164 | }
165 | 
166 | run() {
167 |   # no jar? download it.
168 |   [[ -f "$sbt_jar" ]] || acquire_sbt_jar "$sbt_version" || {
169 |     # still no jar? uh-oh.
170 |     echo "Download failed. Obtain the sbt-launch.jar manually and place it at $sbt_jar"
171 |     exit 1
172 |   }
173 | 
174 |   # process the combined args, then reset "$@" to the residuals
175 |   process_args "$@"
176 |   set -- "${residual_args[@]}"
177 |   argumentCount=$#
178 | 
179 |   # run sbt
180 |   execRunner "$java_cmd" \
181 |     ${SBT_OPTS:-$default_sbt_opts} \
182 |     $(get_mem_opts $sbt_mem) \
183 |     ${java_opts} \
184 |     ${java_args[@]} \
185 |     -jar "$sbt_jar" \
186 |     "${sbt_commands[@]}" \
187 |     "${residual_args[@]}"
188 | }
189 | 
190 | runAlternateBoot() {
191 |   local bootpropsfile="$1"
192 |   shift
193 |   addJava "-Dsbt.boot.properties=$bootpropsfile"
194 |   run $@
195 | }
196 | 


--------------------------------------------------------------------------------
/doc/cluster.md:
--------------------------------------------------------------------------------
 1 | ### Connecting to Multiple Redis Clusters/Instances
 2 | 
 3 | ```scala
 4 | def twoEndpointExample ( sc: SparkContext) = {
 5 |   val redisConfig1 = new RedisConfig(new RedisEndpoint("127.0.0.1", 6379, "passwd"))
 6 |   val redisConfig2 = new RedisConfig(new RedisEndpoint("127.0.0.1", 7379))
 7 |   val rddFromEndpoint1 = {
 8 |     // endpoint("127.0.0.1", 6379) as the default connection in this block
 9 |     implicit val c = redisConfig1
10 |     sc.fromRedisKV("*")
11 |   }
12 |   val rddFromEndpoint2 = {
13 |     // endpoint("127.0.0.1", 7379) as the default connection in this block
14 |     implicit val c = redisConfig2
15 |     sc.fromRedisKV("*")
16 |   }
17 | }
18 | ```
19 | If you want to use multiple Redis clusters/instances, an implicit RedisConfig can be used in a code block to specify the target cluster/instance.
20 | 


--------------------------------------------------------------------------------
/doc/configuration.md:
--------------------------------------------------------------------------------
 1 | The supported configuration parameters are:
 2 | 
 3 | ## Spark Context configuration parameters
 4 | 
 5 | * `spark.redis.host` - host or IP of the initial node we connect to. The connector will read the cluster
 6 | topology from the initial node, so there is no need to provide the rest of the cluster nodes.
 7 | * `spark.redis.port` - the initial node's TCP redis port.
 8 | * `spark.redis.user` - the initial node's AUTH user
 9 | * `spark.redis.auth` - the initial node's AUTH password
10 | * `spark.redis.db` - optional DB number. Avoid using this, especially in cluster mode.
11 | * `spark.redis.timeout` - connection timeout in ms, 2000 ms by default
12 | * `spark.redis.max.pipeline.size` - the maximum number of commands per pipeline (used to batch commands). The default value is 100.
13 | * `spark.redis.scan.count` - count option of SCAN command (used to iterate over keys). The default value is 100.
14 | * `spark.redis.rdd.write.iterator.grouping.size` - applied for RDD write operations, the number of items to be grouped when iterating over underlying RDD partition 
15 | * `spark.redis.ssl` - set to true to use tls
16 | 
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/doc/dev.md:
--------------------------------------------------------------------------------
 1 | ### Development Environment
 2 | 
 3 | Spark-Redis is built using [Apache Maven](https://maven.apache.org/) and a helper [GNU Make](https://www.gnu.org/software/make/) file.
 4 | Maven is used to build a jar file and run tests. Makefile is used to start and stop redis instances required for integration tests.
 5 | 
 6 | The `Makefile` expects that Redis binaries (`redis-server` and`redis-cli`) are in your `PATH` environment variable.
 7 | 
 8 | To build Spark-Redis and run tests, run:
 9 | 
10 | ```
11 | make package
12 | ```
13 | 
14 | To run tests:
15 | 
16 | ```
17 | make test
18 | ```
19 | 
20 | If you would like to run tests from your IDE, you have to start Redis test instances with `make start` before that. To stop test
21 | instances, run `make stop`.
22 | 
23 | To build Spark-Redis skipping tests, run:
24 | 
25 | ```
26 | mvn clean package -DskipTests
27 | ```
28 | 
29 | 


--------------------------------------------------------------------------------
/doc/getting-started.md:
--------------------------------------------------------------------------------
  1 | ## Getting the library
  2 | 
  3 | ### Maven
  4 | 
  5 | ```xml
  6 |   <dependencies>
  7 |     <dependency>
  8 |       <groupId>com.redislabs</groupId>
  9 |       <artifactId>spark-redis_2.11</artifactId>
 10 |       <version>2.4.2</version>
 11 |     </dependency>
 12 |   </dependencies>
 13 | ```
 14 | 
 15 | Or
 16 | 
 17 | ```xml
 18 |   <dependencies>
 19 |     <dependency>
 20 |       <groupId>com.redislabs</groupId>
 21 |       <artifactId>spark-redis_2.12</artifactId>
 22 |       <version>2.4.2</version>
 23 |     </dependency>
 24 |   </dependencies>
 25 | ```
 26 | 
 27 | ### SBT
 28 | 
 29 | ```scala
 30 | libraryDependencies += "com.redislabs" %% "spark-redis" % "2.4.2"
 31 | ```
 32 | 
 33 | ### Build form source
 34 | You can download the library's source and build it:
 35 | ```
 36 | git clone https://github.com/RedisLabs/spark-redis.git
 37 | cd spark-redis
 38 | mvn clean package -DskipTests
 39 | ```
 40 | 
 41 | ### Using the library with spark shell
 42 | Add Spark-Redis to Spark with the `--jars` command line option. 
 43 | 
 44 | ```bash
 45 | $ bin/spark-shell --jars <path-to>/spark-redis-<version>-jar-with-dependencies.jar
 46 | ```
 47 | 
 48 | By default it connects to `localhost:6379` without any password, you can change the connection settings in the following manner:
 49 | 
 50 | ```bash
 51 | $ bin/spark-shell --jars <path-to>/spark-redis-<version>-jar-with-dependencies.jar --conf "spark.redis.host=localhost" --conf "spark.redis.port=6379" --conf "spark.redis.auth=passwd"
 52 | ```
 53 | 
 54 | 
 55 | ### Configuring connection to Redis in a self-contained application
 56 | 
 57 | An example configuration of SparkContext with Redis configuration:
 58 | 
 59 | ```scala
 60 | import com.redislabs.provider.redis._
 61 | 
 62 | ...
 63 | 
 64 | val sc = new SparkContext(new SparkConf()
 65 |       .setMaster("local")
 66 |       .setAppName("myApp")
 67 |       // initial redis host - can be any node in cluster mode
 68 |       .set("spark.redis.host", "localhost")
 69 |       // initial redis port
 70 |       .set("spark.redis.port", "6379")
 71 |       // optional redis AUTH password
 72 |       .set("spark.redis.auth", "passwd")
 73 |   )
 74 | ```
 75 | 
 76 | The SparkSession can be configured in a similar manner:
 77 | 
 78 | ```scala
 79 | val spark = SparkSession
 80 |   .builder()
 81 |   .appName("myApp")
 82 |   .master("local[*]")
 83 |   .config("spark.redis.host", "localhost")
 84 |   .config("spark.redis.port", "6379")
 85 |   .config("spark.redis.auth", "passwd")
 86 |   .getOrCreate()
 87 |   
 88 | val sc = spark.sparkContext  
 89 | ```
 90 | 
 91 | ### Create RDD
 92 | 
 93 | ```scala
 94 | import com.redislabs.provider.redis._
 95 | 
 96 | val keysRDD = sc.fromRedisKeyPattern("foo*", 5)
 97 | val keysRDD = sc.fromRedisKeys(Array("foo", "bar"), 5)
 98 | ```
 99 | 
100 | ### Write Dataframe
101 | 
102 | ```scala
103 | df.write
104 |   .format("org.apache.spark.sql.redis")
105 |   .option("table", "foo")
106 |   .save()
107 | ```
108 | 
109 | ### Create Stream
110 | 
111 | ```scala
112 | import com.redislabs.provider.redis.streaming._
113 | 
114 | val ssc = new StreamingContext(sc, Seconds(1))
115 | val redisStream = ssc.createRedisStream(Array("foo", "bar"),
116 |     storageLevel = StorageLevel.MEMORY_AND_DISK_2)
117 | ```
118 | 


--------------------------------------------------------------------------------
/doc/java.md:
--------------------------------------------------------------------------------
  1 | # Using the library in Java
  2 | 
  3 | The library is written in Scala and the API is primarily intended to be used with Scala. But you can also use the library with 
  4 | Java because of the Scala/Java interoperability. 
  5 | 
  6 | ## RDD
  7 | 
  8 | Please, refer to the detailed documentation of [RDD support](rdd.md) for the full list of available features.
  9 | The RDD functions are available in `RedisContext`. Example:
 10 | 
 11 | ```java
 12 | SparkConf sparkConf = new SparkConf()
 13 |                             .setAppName("MyApp")
 14 |                             .setMaster("local[*]")
 15 |                             .set("spark.redis.host", "localhost")
 16 |                             .set("spark.redis.port", "6379");
 17 | 
 18 | RedisConfig redisConfig = RedisConfig.fromSparkConf(sparkConf);
 19 | ReadWriteConfig readWriteConfig = ReadWriteConfig.fromSparkConf(sparkConf);
 20 | 
 21 | JavaSparkContext jsc = new JavaSparkContext(sparkConf);
 22 | RedisContext redisContext = new RedisContext(jsc.sc());
 23 | 
 24 | JavaRDD<Tuple2<String, String>> rdd = jsc.parallelize(Arrays.asList(Tuple2.apply("myKey", "Hello")));
 25 | int ttl = 0;
 26 | 
 27 | redisContext.toRedisKV(rdd.rdd(), ttl, redisConfig, readWriteConfig);
 28 | 
 29 | ``` 
 30 | 
 31 | ## Datasets and DataFrames
 32 | 
 33 | The Dataset/DataFrame API is the same in Java and Scala. Please, refer to [DataFrame page](dataframe.md) for details. Here is an
 34 | example with Java:
 35 | 
 36 | ```Java
 37 | public class Person {
 38 | 
 39 |     private String name;
 40 |     private Integer age;
 41 | 
 42 |     public Person() {
 43 |     }
 44 | 
 45 |     public Person(String name, Integer age) {
 46 |         this.name = name;
 47 |         this.age = age;
 48 |     }
 49 | 
 50 |     public String getName() {
 51 |         return name;
 52 |     }
 53 |     
 54 |     public void setName(String name) {
 55 |         this.name = name;
 56 |     }
 57 |     
 58 |     public Integer getAge() {
 59 |         return age;
 60 |     }
 61 |     
 62 |     public void setAge(Integer age) {
 63 |         this.age = age;
 64 |     }
 65 | }
 66 | 
 67 | ```
 68 | 
 69 | ```Java
 70 | SparkSession spark = SparkSession
 71 |                 .builder()
 72 |                 .appName("MyApp")
 73 |                 .master("local[*]")
 74 |                 .config("spark.redis.host", "localhost")
 75 |                 .config("spark.redis.port", "6379")
 76 |                 .getOrCreate();
 77 | 
 78 | Dataset<Row> df = spark.createDataFrame(Arrays.asList(
 79 |                 new Person("John", 35),
 80 |                 new Person("Peter", 40)), Person.class);
 81 | 
 82 | df.write()
 83 |   .format("org.apache.spark.sql.redis")
 84 |   .option("table", "person")
 85 |   .option("key.column", "name")
 86 |   .mode(SaveMode.Overwrite)
 87 |   .save();
 88 | ```
 89 | 
 90 | ## Streaming
 91 | 
 92 | The following example demonstrates how to create a stream from Redis list `myList`. Please, refer to [Streaming](streaming.md) for more details.
 93 | 
 94 | ```java
 95 | SparkConf sparkConf = new SparkConf()
 96 |             .setAppName("MyApp")
 97 |             .setMaster("local[*]")
 98 |             .set("spark.redis.host", "localhost")
 99 |             .set("spark.redis.port", "6379");
100 | 
101 | JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));
102 | 
103 | RedisConfig redisConfig = new RedisConfig(new RedisEndpoint(sparkConf));
104 | 
105 | RedisStreamingContext redisStreamingContext = new RedisStreamingContext(jssc.ssc());
106 | String[] keys = new String[]{"myList"};
107 | RedisInputDStream<Tuple2<String, String>> redisStream =
108 |         redisStreamingContext.createRedisStream(keys, StorageLevel.MEMORY_ONLY(), redisConfig);
109 | 
110 | redisStream.print();
111 | 
112 | jssc.start();
113 | jssc.awaitTermination();
114 | ```


--------------------------------------------------------------------------------
/doc/python.md:
--------------------------------------------------------------------------------
 1 | # Python
 2 | 
 3 | Python support is currently limited to DataFrames only. Please, refer to Scala [DataFrame documentation](dataframe.md) 
 4 | for the complete list of features.
 5 | 
 6 | Here is an example:
 7 | 
 8 | 1. Run `pyspark` providing the Spark-Redis JAR file:
 9 | 
10 | ```bash
11 | $ ./bin/pyspark --jars <path-to>/spark-redis-<version>-jar-with-dependencies.jar
12 | ```
13 | 
14 | By default it connects to `localhost:6379` without any password, you can change the connection settings in the following manner:
15 | 
16 | ```bash
17 | $ bin/pyspark --jars <path-to>/spark-redis-<version>-jar-with-dependencies.jar --conf "spark.redis.host=localhost" --conf "spark.redis.port=6379" --conf "spark.redis.auth=passwd"
18 | ```
19 | 
20 | 2. Read DataFrame from JSON, write/read from Redis:
21 | ```python
22 | df = spark.read.json("examples/src/main/resources/people.json")
23 | df.write.format("org.apache.spark.sql.redis").option("table", "people").option("key.column", "name").save()
24 | loadedDf = spark.read.format("org.apache.spark.sql.redis").option("table", "people").option("key.column", "name").load()
25 | loadedDf.show()
26 | ```
27 | 
28 | 3. Check the data with redis-cli:
29 | 
30 | ```bash
31 | 127.0.0.1:6379> hgetall people:Justin
32 | 1) "age"
33 | 2) "19"
34 | ```
35 | 
36 | The self-contained application can be configured in the following manner:
37 | 
38 | ```python
39 | SparkSession\
40 |     .builder\
41 |     .appName("myApp")\ 
42 |     .config("spark.redis.host", "localhost")\ 
43 |     .config("spark.redis.port", "6379")\
44 |     .config("spark.redis.auth", "passwd")\ 
45 |     .getOrCreate()
46 | ```
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/doc/rdd.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # RDD
  3 | 
  4 |   - [The keys RDD](#the-keys-rdd)
  5 |   - [Reading data](#reading-data)
  6 |   - [Writing data](#writing-data)
  7 |   - [Read and write configuration options](#read-and-write-configuration-options)
  8 | 
  9 | ### The keys RDD
 10 | Since data access in Redis is based on keys, to use Spark-Redis you'll first need a keys RDD.  The following example shows how to read key names from Redis into an RDD:
 11 | ```scala
 12 | import com.redislabs.provider.redis._
 13 | 
 14 | val keysRDD = sc.fromRedisKeyPattern("foo*", 5)
 15 | val keysRDD = sc.fromRedisKeys(Array("foo", "bar"), 5)
 16 | ```
 17 | 
 18 | The above example populates the keys RDD by retrieving the key names from Redis that match the given pattern (`foo*`) or the keys can be listed by an Array. Furthermore, it overrides the default setting of 3 partitions in the RDD with a new value of 5 - each partition consists of a set of Redis cluster hashslots contain the matched key names.
 19 | 
 20 | ### Reading data
 21 | 
 22 | Each of Redis' data types can be read into an RDD. The following snippet demonstrates reading from Redis Strings.
 23 | 
 24 | #### Strings
 25 | 
 26 | ```scala
 27 | import com.redislabs.provider.redis._
 28 | val stringRDD = sc.fromRedisKV("keyPattern*")
 29 | val stringRDD = sc.fromRedisKV(Array("foo", "bar"))
 30 | ```
 31 | 
 32 | Once run, `stringRDD: RDD[(String, String)]` will contain the string values of all keys whose names are provided by keyPattern or `Array[String]`.
 33 | 
 34 | #### Hashes
 35 | ```scala
 36 | val hashRDD = sc.fromRedisHash("keyPattern*")
 37 | val hashRDD = sc.fromRedisHash(Array("foo", "bar"))
 38 | ```
 39 | 
 40 | This will populate `hashRDD: RDD[(String, String)]` with the fields and values of the Redis Hashes, the hashes' names are provided by keyPattern or `Array[String]`.
 41 | 
 42 | #### Lists
 43 | ```scala
 44 | val listRDD = sc.fromRedisList("keyPattern*")
 45 | val listRDD = sc.fromRedisList(Array("foo", "bar"))
 46 | ```
 47 | The contents (members) of the Redis Lists in whose names are provided by keyPattern or `Array[String]` will be stored in `listRDD: RDD[String]`.
 48 | 
 49 | #### Sets
 50 | ```scala
 51 | val setRDD = sc.fromRedisSet("keyPattern*")
 52 | val setRDD = sc.fromRedisSet(Array("foo", "bar"))
 53 | ```
 54 | 
 55 | The Redis Sets' members will be written to `setRDD: RDD[String]`.
 56 | 
 57 | #### Sorted Sets
 58 | ```scala
 59 | val zsetRDD = sc.fromRedisZSetWithScore("keyPattern*")
 60 | val zsetRDD = sc.fromRedisZSetWithScore(Array("foo", "bar"))
 61 | ```
 62 | 
 63 | Using `fromRedisZSetWithScore` will store in `zsetRDD: RDD[(String, Double)]` an RDD that consists of members and their scores, from the Redis Sorted Sets whose keys are provided by keyPattern or Array[String].
 64 | 
 65 | ```scala
 66 | val zsetRDD = sc.fromRedisZSet("keyPattern*")
 67 | val zsetRDD = sc.fromRedisZSet(Array("foo", "bar"))
 68 | ```
 69 | 
 70 | Using `fromRedisZSet` will store in `zsetRDD: RDD[String]`, an RDD that consists of members from the Redis Sorted Sets whose keys are provided by keyPattern or Array[String].
 71 | 
 72 | ```scala
 73 | val startPos: Int = _
 74 | val endPos: Int = _
 75 | val zsetRDD = sc.fromRedisZRangeWithScore("keyPattern*", startPos, endPos)
 76 | val zsetRDD = sc.fromRedisZRangeWithScore(Array("foo", "bar"), startPos, endPos)
 77 | ```
 78 | 
 79 | Using `fromRedisZRangeWithScore` will store in `zsetRDD: RDD[(String, Double)]`, an RDD that consists of members and the members' ranges are within [startPos, endPos] of its own Sorted Set from the Redis Sorted Sets whose keys are provided by keyPattern or Array[String].
 80 | 
 81 | ```scala
 82 | val startPos: Int = _
 83 | val endPos: Int = _
 84 | val zsetRDD = sc.fromRedisZRange("keyPattern*", startPos, endPos)
 85 | val zsetRDD = sc.fromRedisZRange(Array("foo", "bar"), startPos, endPos)
 86 | ```
 87 | 
 88 | Using `fromRedisZSet` will store in `zsetRDD: RDD[String]`, an RDD that consists of members and the members' ranges are within [startPos, endPos] of its own Sorted Set from the Redis Sorted Sets whose keys are provided by keyPattern or Array[String].
 89 | 
 90 | ```scala
 91 | val min: Double = _
 92 | val max: Double = _
 93 | val zsetRDD = sc.fromRedisZRangeByScoreWithScore("keyPattern*", min, max)
 94 | val zsetRDD = sc.fromRedisZRangeByScoreWithScore(Array("foo", "bar"), min, max)
 95 | ```
 96 | 
 97 | Using `fromRedisZRangeByScoreWithScore` will store in `zsetRDD: RDD[(String, Double)]`, an RDD that consists of members and the members' scores are within [min, max] from the Redis Sorted Sets whose keys are provided by keyPattern or Array[String].
 98 | 
 99 | ```scala
100 | val min: Double = _
101 | val max: Double = _
102 | val zsetRDD = sc.fromRedisZRangeByScore("keyPattern*", min, max)
103 | val zsetRDD = sc.fromRedisZRangeByScore(Array("foo", "bar"), min, max)
104 | ```
105 | 
106 | Using `fromRedisZSet` will store in `zsetRDD: RDD[String]`, an RDD that consists of members and the members' scores are within [min, max] from the Redis Sorted Sets whose keys are provided by keyPattern or Array[String].
107 | 
108 | ### Writing data
109 | To write data to Redis from Spark, you'll need to prepare the appropriate RDD depending on the type of data you want to write.
110 | 
111 | #### Strings
112 | For String values, your RDD should consist of the key-value pairs that are to be written. Assuming that the strings RDD is called `stringRDD`, use the following snippet for writing it to Redis:
113 | 
114 | ```scala
115 | sc.toRedisKV(stringRDD)
116 | ```
117 | 
118 | In order to set an expiry on the key, we can pass in the `ttl` (in seconds) as an additional argument:
119 | 
120 | ```scala
121 | sc.toRedisKV(stringRDD, ttl)
122 | ```
123 | 
124 | By default, Strings won't have any expiry set.
125 | 
126 | #### Hashes
127 | To store a Redis Hash, the RDD should consist of its field-value pairs. If the RDD is called `hashRDD`, the following should be used for storing it in the key name specified by `hashName`:
128 | 
129 | ```scala
130 | sc.toRedisHASH(hashRDD, hashName)
131 | ```
132 | In order to set an expiry on the key, we can pass in the `ttl` (in seconds) as an additional argument:
133 | 
134 | ```scala
135 | sc.toRedisHASH(hashRDD, hashName, ttl)
136 | ```
137 | 
138 | By default, Hashes won't have any expiry set.
139 | 
140 | Use the following to store an RDD into multiple hashs:
141 | 
142 | ```scala
143 | sc.toRedisHASHes(hashRDD, ttl)
144 | ```
145 | 
146 | The `hashRDD` is a rdd of tuples (`hashname`, `map[field name, field value]`)
147 | 
148 | ```scala
149 | sc.toRedisByteHASHes(hashRDD, ttl)
150 | ```
151 | 
152 | The `hashRDD` is a rdd of tuples (`hashname`, `map[field name, field value]`) represented as byte arrays.
153 | 
154 | #### Lists
155 | Use the following to store an RDD in a Redis List:
156 | 
157 | ```scala
158 | sc.toRedisLIST(listRDD, listName)
159 | ```
160 | 
161 | Use the following to store an RDD in a fixed-size Redis List:
162 | 
163 | ```scala
164 | sc.toRedisFixedLIST(listRDD, listName, listSize)
165 | ```
166 | 
167 | The `listRDD` is an RDD that contains all of the list's string elements in order, and `listName` is the list's key name.
168 | `listSize` is an integer which specifies the size of the Redis list; it is optional, and will default to an unlimited size.
169 | 
170 | Use the following to store an RDD in multiple Redis Lists:
171 | 
172 | ```scala
173 | sc.toRedisLISTs(rdd)
174 | ```
175 | 
176 | The `rdd` is an RDD of tuples (`list name`, `list values`)
177 | 
178 | Use the following to store an RDD of binary values in multiple Redis Lists:
179 | 
180 | ```scala
181 | sc.toRedisByteLISTs(byteListRDD)
182 | ```
183 | 
184 | The `byteListRDD` is an RDD of tuples (`list name`, `list values`) represented as byte arrays. 
185 | 
186 | Expiry can be set on Lists by passing in an additional argument called `ttl` (in seconds) to the above methods except `toRedisFixedLIST`:
187 | ```scala
188 | sc.toRedisLIST(listRDD, listName, ttl)
189 | sc.toRedisLISTs(rdd, ttl)
190 | sc.toRedisByteLISTs(byteListRDD, ttl)
191 | ```
192 | 
193 | By default, Lists won't have any expiry set.
194 | 
195 | #### Sets
196 | For storing data in a Redis Set, use `toRedisSET` as follows:
197 | 
198 | ```scala
199 | sc.toRedisSET(setRDD, setName)
200 | ```
201 | 
202 | Where `setRDD` is an RDD with the set's string elements and `setName` is the name of the key for that set.
203 | 
204 | In order to set an expiry on the key, we can pass in the `ttl` (in seconds) as an additional argument:
205 | 
206 | ```scala
207 | sc.toRedisSET(setRDD, setName, ttl)
208 | ```
209 | 
210 | By default, Sets won't have any expiry set.
211 | 
212 | #### Sorted Sets
213 | ```scala
214 | sc.toRedisZSET(zsetRDD, zsetName)
215 | ```
216 | 
217 | The above example demonstrates storing data in Redis in a Sorted Set. The `zsetRDD` in the example should contain pairs of members and their scores, whereas `zsetName` is the name for that key.
218 | 
219 | In order to set an expiry on the key, we can pass in the `ttl` (in seconds) as an additional argument:
220 | 
221 | ```scala
222 | sc.toRedisZSET(zsetRDD, zsetName, ttl)
223 | ```
224 | 
225 | By default, Sorted Sets won't have any expiry set.
226 | 
227 | ### Read and write configuration options
228 | 
229 | Some [configuration options](configuration.md) can be overridden for a particular RDD:
230 | 
231 | ```scala
232 | val readWriteConf = ReadWriteConfig(scanCount = 1000, maxPipelineSize = 1000)
233 | val rdd = sc.fromRedisKeyPattern(keyPattern)(readWriteConfig = readWriteConf) 
234 | ```
235 | 
236 | or with an implicit parameter:
237 | 
238 | ```scala
239 | implicit val readWriteConf = ReadWriteConfig(scanCount = 1000, maxPipelineSize = 1000)
240 | val rdd = sc.fromRedisKeyPattern(keyPattern)
241 | ```
242 | 


--------------------------------------------------------------------------------
/doc/streaming.md:
--------------------------------------------------------------------------------
  1 | ### Streaming
  2 | 
  3 | Spark-Redis supports streaming data from Stream and List data structures:
  4 | 
  5 |   - [Redis Stream](#redis-stream)
  6 |   - [Redis List](#redis-list)
  7 | 
  8 | ## Redis Stream
  9 | 
 10 | To stream data from [Redis Stream](https://redis.io/topics/streams-intro) use `createRedisXStream` method (added in Spark-Redis 2.3.1):
 11 | 
 12 | ```scala
 13 | import com.redislabs.provider.redis.streaming._
 14 | import com.redislabs.provider.redis.streaming.{ConsumerConfig, StreamItem}
 15 | import org.apache.spark.sql.SparkSession
 16 | import org.apache.spark.streaming.dstream.InputDStream
 17 | import org.apache.spark.streaming.{Seconds, StreamingContext}
 18 | 
 19 | val spark = SparkSession.builder.appName("Redis Stream Example")
 20 |   .master("local[*]")
 21 |   .config("spark.redis.host", "localhost")
 22 |   .config("spark.redis.port", "6379")
 23 |   .getOrCreate()
 24 | 
 25 | val ssc = new StreamingContext(spark.sparkContext, Seconds(1))
 26 | 
 27 | val stream = ssc.createRedisXStream(Seq(ConsumerConfig("my-stream", "my-consumer-group", "my-consumer-1")))
 28 | stream.print()
 29 | 
 30 | ssc.start()
 31 | ssc.awaitTermination()
 32 | 
 33 | ```
 34 | 
 35 | It will automatically create a consumer group if it doesn't exist and will start listening for the messages in the stream. 
 36 | 
 37 | ### Stream Offset
 38 | 
 39 | By default it pulls messages starting from the latest message. If you need to start from the earliest message or any specific position in the stream, specify the `offset` parameter:
 40 | 
 41 | ```scala
 42 | ConsumerConfig("my-stream", "my-consumer-group", "my-consumer-1", offset = Earliest) // start from '0-0'
 43 | ConsumerConfig("my-stream", "my-consumer-group", "my-consumer-1", IdOffset(42, 0))   // start from '42-0'
 44 | ```
 45 | 
 46 | Please note, Spark-Redis will attempt to create a consumer group with the specified offset, but if the consumer group already exists, 
 47 | it will use the existing offset. It means, for example, if you decide to re-process all the messages from the beginning, 
 48 | just changing the offset to `Earliest` may not be enough. You may need to either manually delete the consumer 
 49 | group with `XGROUP DESTROY` or modify the offset with `XGROUP SETID`.
 50 | 
 51 | ### Receiver reliability
 52 | 
 53 | The DStream is implemented with a [Reliable Receiver](https://spark.apache.org/docs/latest/streaming-custom-receivers.html#receiver-reliability) that acknowledges 
 54 | after the data has been stored in Spark. As with any other Receiver to achieve strong fault-tolerance guarantees and ensure zero data loss, you have to enable [write-ahead logs](https://spark.apache.org/docs/latest/streaming-programming-guide.html#deploying-applications) and checkpointing. 
 55 | 
 56 | The received data is stored with `StorageLevel.MEMORY_AND_DISK_2` by default. 
 57 | Storage level can be configured with `storageLevel` parameter, e.g.:
 58 | ```scala
 59 | ssc.createRedisXStream(conf, storageLevel = StorageLevel.MEMORY_AND_DISK_SER_2)
 60 | ```
 61 | 
 62 | ### Level of Parallelism
 63 | 
 64 | The `createRedisXStream()` takes a sequence of consumer configs, each consumer is started in a separate thread. This allows you, for example, to
 65 | create a stream from multiple Redis Stream keys:
 66 | 
 67 | ```scala
 68 | ssc.createRedisXStream(Seq(
 69 |   ConsumerConfig("my-stream-1", "my-consumer-group-1", "my-consumer-1"),
 70 |   ConsumerConfig("my-stream-2", "my-consumer-group-2", "my-consumer-1")
 71 | ))
 72 | ```
 73 | 
 74 | In this example we created an input DStream that corresponds to a single receiver running in a Spark executor. The receiver will create two threads pulling 
 75 | data from the streams in parallel. However if receiving data becomes a bottleneck, you may want to start multiple receivers in different executors (worker machines).
 76 | This can be achieved by creating multiple input DStreams and using `union` to join them together. You can read more about about it [here](https://spark.apache.org/docs/latest/streaming-programming-guide.html#level-of-parallelism-in-data-receiving).
 77 | 
 78 | For example, the following will create two receivers pulling the data from `my-stream` and balancing the load:  
 79 | 
 80 | ```scala
 81 | val streams = Seq(
 82 |   ssc.createRedisXStream(Seq(ConsumerConfig("my-stream", "my-consumer-group", "my-consumer-1"))),
 83 |   ssc.createRedisXStream(Seq(ConsumerConfig("my-stream", "my-consumer-group", "my-consumer-2")))
 84 | )
 85 | 
 86 | val stream = ssc.union(streams)
 87 | stream.print()
 88 | ```
 89 | 
 90 | ### Configuration
 91 | 
 92 | If the cluster is not large enough to process data as fast as it is being received, the receiving rate can be limited:
 93 | 
 94 | ```scala
 95 | ConsumerConfig("stream", "group", "c-1", rateLimitPerConsumer = Some(100)) // 100 items per second
 96 | ```
 97 | 
 98 | It defines the number of received items per second per consumer.
 99 | 
100 | Another options you can configure are `batchSize` and `block`. They define the maximum number of pulled items and time in milliseconds to wait in a `XREADGROUP` call. 
101 | 
102 | ```scala
103 | ConsumerConfig("stream", "group", "c-1", batchSize = 50, block = 200)
104 | ```
105 | 
106 | 
107 | ## Redis List
108 | 
109 | The stream can be also created from Redis' List, the data is fetched with the `blpop` command. Users are required to provide an array which stores all the List names they are interested in. The [storageLevel](http://spark.apache.org/docs/latest/streaming-programming-guide.html#data-serialization) parameter is `MEMORY_AND_DISK_SER_2` by default.
110 | 
111 | The method `createRedisStream` will create a `(listName, value)` stream, but if you don't care about which list feeds the value, you can use `createRedisStreamWithoutListname` to get the only `value` stream.
112 | 
113 | Use the following to get a `(listName, value)` stream from `foo` and `bar` list
114 | 
115 | ```scala
116 | import org.apache.spark.streaming.{Seconds, StreamingContext}
117 | import org.apache.spark.storage.StorageLevel
118 | import com.redislabs.provider.redis.streaming._
119 | val ssc = new StreamingContext(sc, Seconds(1))
120 | val redisStream = ssc.createRedisStream(Array("foo", "bar"), storageLevel = StorageLevel.MEMORY_AND_DISK_2)
121 | redisStream.print()
122 | ssc.start()
123 | ssc.awaitTermination()
124 | ```
125 | 
126 | 
127 | Use the following to get a `value` stream from `foo` and `bar` list
128 | 
129 | ```scala
130 | import org.apache.spark.streaming.{Seconds, StreamingContext}
131 | import org.apache.spark.storage.StorageLevel
132 | import com.redislabs.provider.redis.streaming._
133 | val ssc = new StreamingContext(sc, Seconds(1))
134 | val redisStream = ssc.createRedisStreamWithoutListname(Array("foo", "bar"), storageLevel = StorageLevel.MEMORY_AND_DISK_2)
135 | redisStream.print()
136 | ssc.start()
137 | ssc.awaitTermination()
138 | ```
139 | 


--------------------------------------------------------------------------------
/doc/structured-streaming.md:
--------------------------------------------------------------------------------
  1 | ### Structured Streaming
  2 | 
  3 | Spark-Redis supports [Redis Stream](https://redis.io/topics/streams-intro) data structure as a source for [Structured Streaming](https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html):
  4 | 
  5 | The following example reads data from a Redis Stream `sensors` that has two fields `sensor-id` and `temperature`: 
  6 | 
  7 | ```scala
  8 | val spark = SparkSession
  9 |       .builder
 10 |       .master("local[*]")
 11 |       .config("spark.redis.host", "localhost")
 12 |       .config("spark.redis.port", "6379")
 13 |       .getOrCreate()
 14 | 
 15 | val sensors = spark
 16 |       .readStream
 17 |       .format("redis")                          // read from Redis
 18 |       .option("stream.keys", "sensors")         // stream key
 19 |       .schema(StructType(Array(                 // stream fields 
 20 |         StructField("sensor-id", StringType),
 21 |         StructField("temperature", FloatType)
 22 |       )))
 23 |       .load()
 24 | 
 25 | val query = sensors
 26 |   .writeStream
 27 |   .format("console")
 28 |   .start()
 29 | 
 30 | query.awaitTermination()
 31 | 
 32 | ```
 33 | 
 34 | You can write the following items to the stream to test it:
 35 | 
 36 | ```
 37 | xadd sensors * sensor-id 1 temperature 28.1
 38 | xadd sensors * sensor-id 2 temperature 30.5
 39 | xadd sensors * sensor-id 1 temperature 28.3
 40 | ```
 41 | 
 42 | ### Output to Redis
 43 | 
 44 | There is no Redis Sink available, but you can leverage [`foreachBatch`](https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html#foreachbatch) and [DataFrame](dataframe.md) write command to output 
 45 | stream into Redis. Please note, `foreachBatch` is only available starting from Spark 2.4.0.
 46 | 
 47 | ```scala
 48 | val query = sensors
 49 |   .writeStream
 50 |   .outputMode("update")
 51 |   .foreachBatch { (batchDF: DataFrame, batchId: Long) =>
 52 |     batchDF
 53 |       .write
 54 |       .format("org.apache.spark.sql.redis")
 55 |       .option("table", "output")
 56 |       .mode(SaveMode.Append)
 57 |       .save()
 58 |   }
 59 |   .start()
 60 | 
 61 | query.awaitTermination()
 62 | ``` 
 63 | 
 64 | After writing the following to the Redis Stream:
 65 | ```
 66 | xadd sensors * sensor-id 1 temperature 28.1
 67 | xadd sensors * sensor-id 2 temperature 30.5
 68 | xadd sensors * sensor-id 1 temperature 28.3
 69 | ```
 70 | 
 71 | there will be the output `keys output:*`:
 72 | ```
 73 | 1) "output:b1682af092b9467cb13cfdcf7fcc9835"
 74 | 2) "output:04c80769320f4edeadcce8381a6f834d"
 75 | 3) "output:4f04070a2fd548fdbea441b694c8673b"
 76 | ```
 77 | 
 78 | `hgetall output:b1682af092b9467cb13cfdcf7fcc9835`:
 79 | 
 80 | ```
 81 | 1) "sensor-id"
 82 | 2) "2"
 83 | 3) "temperature"
 84 | 4) "30.5"
 85 | ```
 86 | 
 87 | Please refer to [DataFrame docs](dataframe.md) for different options (such as specifying key name) available for writing.
 88 | 
 89 | ### Stream Offset
 90 | 
 91 | By default it pulls messages starting from the latest message in the stream. If you need to start from the specific position in the stream, specify the `stream.offsets` parameter as a JSON string. 
 92 | In the following example we set offset id to be `1548083485360-0`. The group name `redis-source` is a default consumer group that Spark-Redis automatically creates to read stream.
 93 | 
 94 | ```scala
 95 | val offsets = """{"offsets":{"sensors":{"groupName":"redis-source","offset":"1548083485360-0"}}}"""
 96 | 
 97 | ...
 98 | 
 99 |   .option("stream.offsets", offsets)
100 | ```
101 | 
102 | If you want to process the stream from the beginning, set offset id to `0-0`. 
103 | 
104 | ### Entry id column
105 | 
106 | You can access stream entry id by adding a column `_id` to the stream schema:
107 | 
108 | ```scala
109 | val sensors = spark
110 |       .readStream
111 |       .format("redis")                          
112 |       .option("stream.keys", "sensors")         
113 |       .schema(StructType(Array(                 
114 |         StructField("_id", StringType),         // entry id
115 |         StructField("sensor-id", StringType),
116 |         StructField("temperature", FloatType)
117 |       )))
118 |       .load()
119 | ```
120 | 
121 | The stream schema:
122 | 
123 | 
124 | ```
125 | +---------------+---------+-----------+
126 | |            _id|sensor-id|temperature|
127 | +---------------+---------+-----------+
128 | |1548083485360-0|        1|       28.1|
129 | |1548083486248-0|        2|       30.5|
130 | |1548083486944-0|        1|       28.3|
131 | +---------------+---------+-----------+
132 | 
133 | ```
134 | 
135 | ### Level of Parallelism
136 | 
137 | By default Spark-Redis creates a consumer group with a single consumer. There are two options available for increasing the level of parallelism.
138 | 
139 | The first approach is to create stream from multiple Redis keys. You can specify multiple keys separated by comma, e.g. 
140 | `.option("stream.keys", "sensors-eu,sensors-us")`. In this case data from each key will be mapped to a Spark partition.
141 | Please note, item ordering will be preserved only within a particular Redis key (Spark partition), there is no ordering guarantees for items across different keys.
142 | 
143 | With the second approach you can read data from a single Redis key with multiple consumers in parallel, e.g. `option("stream.parallelism", 4)`. Each consumer will be mapped to a Spark partition. There are no ordering guarantees in this case.
144 | 
145 | ### Connection options
146 | 
147 | Similarly to Dataframe API, we can override connection options on the individual stream level, using following options passed to `spark.readStream`:
148 | 
149 | | Name       | Description                                                  | Type       | Default     |
150 | | -----------| -------------------------------------------------------------| ---------- | ----------- |
151 | | host       | overrides `spark.redis.host` configured in SparkSession      | `String`   | `localhost` |
152 | | port       | overrides `spark.redis.port` configured in SparkSession      | `Int`      | `6379`      |
153 | | auth       | overrides `spark.redis.auth` configured in SparkSession      | `String`   | -           |
154 | | dbNum      | overrides `spark.redis.db` configured in SparkSession        | `Int`      | `0`         |
155 | | timeout    | overrides `spark.redis.timeout` configured in SparkSession   | `Int`      | `2000`      |
156 | 
157 | 
158 | ### Other configuration
159 | 
160 | Spark-Redis automatically creates a consumer group with name `spark-source` if it doesn't exist. You can customize the consumer group name with
161 | `.option("stream.group.name", "my-group")`. Also you can customize the name of consumers in consumer group with `.option("stream.consumer.prefix", "my-consumer")`.
162 | 
163 | Other options you can configure are `stream.read.batch.size` and `stream.read.block`. They define the maximum number of pulled items and time in milliseconds to wait in a `XREADGROUP` call. 
164 | The default values are 100 items and 500 ms, respectively.
165 | 
166 | ```scala
167 |   .option("stream.read.batch.size", 200)   // items
168 |   .option("stream.read.block", 1000)       // in milliseconds
169 | ```
170 | 
171 | ### Fault Tolerance Semantics
172 | 
173 | Spark-Redis provides a replayable source, so by enabling [checkpointing](https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html#recovering-from-failures-with-checkpointing) and using 
174 | idempotent sinks, one can ensure end-to-end exactly-once semantics under any failure. If checkpointing is not enabled, it is possible that you will lose messages. 
175 | 


--------------------------------------------------------------------------------
/project/build.properties:
--------------------------------------------------------------------------------
1 | // This file should only contain the version of sbt to use.
2 | sbt.version=0.13.6
3 | 


--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | // You may use this file to add plugin dependencies for sbt.
2 | addSbtPlugin("org.spark-packages" % "sbt-spark-package" % "0.2.3")
3 | 
4 | resolvers += "bintray-spark-packages" at "https://dl.bintray.com/spark-packages/maven/"
5 | 


--------------------------------------------------------------------------------
/scalastyle-config.xml:
--------------------------------------------------------------------------------
  1 | <!-- If you wish to turn off checking for a section of code, you can put a comment in the source
  2 |      before and after the section, with the following syntax: -->
  3 | <!-- // scalastyle:off -->
  4 | <!-- ... -->
  5 | <!-- // naughty stuff -->
  6 | <!-- ... -->
  7 | <!-- // scalastyle:on -->
  8 | 
  9 | <!-- You can also switch off checking for a specific rule by specifying the id of the rule to ignore
 10 |      IDs and such can be found here: http://www.scalastyle.org/rules-0.7.0.html -->
 11 | <!--
 12 | // scalastyle:off magic.number
 13 | var notAtAllAMagicNumber = 1234
 14 | // scalastyle:on magic.number
 15 | -->
 16 | <scalastyle commentFilter="enabled">
 17 |     <name>Scalastyle standard configuration</name>
 18 |     <check level="warning" class="org.scalastyle.file.FileTabChecker" enabled="true"></check>
 19 |     <check level="warning" class="org.scalastyle.file.FileLengthChecker" enabled="true">
 20 |      <parameters>
 21 |       <parameter name="maxFileLength"><![CDATA[800]]></parameter>
 22 |      </parameters>
 23 |     </check>
 24 |     <!--<check level="warning" class="org.scalastyle.file.HeaderMatchesChecker" enabled="true">-->
 25 |      <!--<parameters>-->
 26 |       <!--<parameter name="header"><![CDATA[/**-->
 27 |  <!--*/]]></parameter>-->
 28 |      <!--</parameters>-->
 29 |     <!--</check>-->
 30 |     <check level="warning" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"></check>
 31 |     <check level="warning" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="true"></check>
 32 |     <check level="warning" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check>
 33 |     <check level="warning" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
 34 |         <parameters>
 35 |             <parameter name="maxLineLength"><![CDATA[120]]></parameter>
 36 |             <parameter name="tabSize"><![CDATA[2]]></parameter>
 37 |         </parameters>
 38 |     </check>
 39 |     <check level="warning" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
 40 |         <parameters>
 41 |             <parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter>
 42 |         </parameters>
 43 |     </check>
 44 |     <check level="warning" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true">
 45 |         <parameters>
 46 |             <parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter>
 47 |         </parameters>
 48 |     </check>
 49 |     <check level="warning" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
 50 |         <parameters>
 51 |             <parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter>
 52 |         </parameters>
 53 |     </check>
 54 |     <check level="warning" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check>
 55 |     <check level="warning" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true">
 56 |         <parameters>
 57 |             <parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter>
 58 |         </parameters>
 59 |     </check>
 60 |     <check level="warning" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
 61 |         <parameters>
 62 |             <parameter name="maxParameters"><![CDATA[8]]></parameter>
 63 |         </parameters>
 64 |     </check>
 65 |     <!-- Should only affect tests
 66 |     <check level="warning" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="true">
 67 |         <parameters>
 68 |             <parameter name="ignore"><![CDATA[-1,0,1,2,3]]></parameter>
 69 |         </parameters>
 70 |     </check>
 71 |     -->
 72 |     <check level="warning" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="true"></check>
 73 |     <check level="warning" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="true"></check>
 74 |     <check level="warning" class="org.scalastyle.scalariform.ReturnChecker" enabled="true"></check>
 75 |     <!--<check level="warning" class="org.scalastyle.scalariform.NullChecker" enabled="true"></check>-->
 76 |     <!--<check level="warning" class="org.scalastyle.scalariform.NoCloneChecker" enabled="true"></check>-->
 77 |     <check level="warning" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="true"></check>
 78 |     <check level="warning" class="org.scalastyle.scalariform.CovariantEqualsChecker" enabled="true"></check>
 79 |     <check level="warning" class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"></check>
 80 |     <check level="warning" class="org.scalastyle.file.RegexChecker" enabled="true">
 81 |         <parameters>
 82 |             <parameter name="regex"><![CDATA[println]]></parameter>
 83 |         </parameters>
 84 |     </check>
 85 |     <check level="warning" class="org.scalastyle.scalariform.NumberOfTypesChecker" enabled="true">
 86 |         <parameters>
 87 |             <parameter name="maxTypes"><![CDATA[30]]></parameter>
 88 |         </parameters>
 89 |     </check>
 90 |     <check level="warning" class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="true">
 91 |         <parameters>
 92 |             <parameter name="maximum"><![CDATA[10]]></parameter>
 93 |         </parameters>
 94 |     </check>
 95 |     <check level="warning" class="org.scalastyle.scalariform.UppercaseLChecker" enabled="true"></check>
 96 |     <check level="warning" class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" enabled="true"></check>
 97 |     <check level="warning" class="org.scalastyle.scalariform.IfBraceChecker" enabled="true">
 98 |         <parameters>
 99 |             <parameter name="singleLineAllowed"><![CDATA[true]]></parameter>
100 |             <parameter name="doubleLineAllowed"><![CDATA[false]]></parameter>
101 |         </parameters>
102 |     </check>
103 |     <check level="warning" class="org.scalastyle.scalariform.MethodLengthChecker" enabled="true">
104 |         <parameters>
105 |             <parameter name="maxLength"><![CDATA[50]]></parameter>
106 |         </parameters>
107 |     </check>
108 |     <!-- This breaks symbolic method names so we don't turn it on. -->
109 |     <check level="warning" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="false">
110 |         <parameters>
111 |             <parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter>
112 |         </parameters>
113 |     </check>
114 |     <check level="warning" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="true">
115 |      <parameters>
116 |          <parameter name="maxMethods"><![CDATA[30]]></parameter>
117 |      </parameters>
118 |     </check>
119 |     <check level="warning" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"></check>
120 |     <check level="warning" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check>
121 |     <check level="warning" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check>
122 |     <check level="warning" class="org.scalastyle.scalariform.WhileChecker" enabled="false"></check>
123 |     <check level="warning" class="org.scalastyle.scalariform.VarFieldChecker" enabled="false"></check>
124 |     <check level="warning" class="org.scalastyle.scalariform.VarLocalChecker" enabled="false"></check>
125 |     <check level="warning" class="org.scalastyle.scalariform.RedundantIfChecker" enabled="false"></check>
126 |     <check level="warning" class="org.scalastyle.scalariform.TokenChecker" enabled="false">
127 |         <parameters>
128 |             <parameter name="regex"><![CDATA[println]]></parameter>
129 |         </parameters>
130 |     </check>
131 |     <check level="warning" class="org.scalastyle.scalariform.DeprecatedJavaChecker" enabled="true"></check>
132 |     <check level="warning" class="org.scalastyle.scalariform.EmptyClassChecker" enabled="true"></check>
133 |     <!-- disable type parameter checker-->
134 |     <check level="warning" class="org.scalastyle.scalariform.ClassTypeParameterChecker" enabled="false">
135 |         <parameters>
136 |             <parameter name="regex"><![CDATA[^[A-Z_]$]]></parameter>
137 |         </parameters>
138 |     </check>
139 |     <!-- Wildcard imports are fine
140 |     <check level="warning" class="org.scalastyle.scalariform.UnderscoreImportChecker" enabled="true"></check>
141 |     -->
142 |     <check level="warning" class="org.scalastyle.scalariform.LowercasePatternMatchChecker" enabled="true"></check>
143 |     <check level="warning" class="org.scalastyle.scalariform.MultipleStringLiteralsChecker" enabled="true">
144 |         <parameters>
145 |             <parameter name="allowed"><![CDATA[5]]></parameter>
146 |             <parameter name="ignoreRegex"><![CDATA[^""$]]></parameter>
147 |         </parameters>
148 |     </check>
149 |     <!-- disable since we use implicits imports from object and local implicit imports  -->
150 |     <check level="warning" class="org.scalastyle.scalariform.ImportGroupingChecker" enabled="false"></check>
151 | </scalastyle>
152 | 


--------------------------------------------------------------------------------
/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister:
--------------------------------------------------------------------------------
1 | org.apache.spark.sql.redis.stream.RedisStreamProvider
2 | 


--------------------------------------------------------------------------------
/src/main/scala/com/redislabs/provider/redis/ConnectionPool.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis
 2 | 
 3 | import redis.clients.jedis.exceptions.JedisConnectionException
 4 | import redis.clients.jedis.{Jedis, JedisPool, JedisPoolConfig}
 5 | 
 6 | import java.time.Duration
 7 | import java.util.concurrent.ConcurrentHashMap
 8 | import scala.collection.JavaConversions._
 9 | 
10 | 
11 | object ConnectionPool {
12 |   @transient private lazy val pools: ConcurrentHashMap[RedisEndpoint, JedisPool] =
13 |     new ConcurrentHashMap[RedisEndpoint, JedisPool]()
14 | 
15 |   def connect(re: RedisEndpoint): Jedis = {
16 |     val pool = pools.getOrElseUpdate(re,
17 |       {
18 |         val poolConfig: JedisPoolConfig = new JedisPoolConfig();
19 |         poolConfig.setMaxTotal(250)
20 |         poolConfig.setMaxIdle(32)
21 |         poolConfig.setTestOnBorrow(false)
22 |         poolConfig.setTestOnReturn(false)
23 |         poolConfig.setTestWhileIdle(false)
24 |         poolConfig.setSoftMinEvictableIdleTime(Duration.ofMinutes(1))
25 |         poolConfig.setTimeBetweenEvictionRuns(Duration.ofSeconds(30))
26 |         poolConfig.setNumTestsPerEvictionRun(-1)
27 | 
28 |         new JedisPool(poolConfig, re.host, re.port, re.timeout, re.user, re.auth, re.dbNum, re.ssl)
29 |       }
30 |     )
31 |     var sleepTime: Int = 4
32 |     var conn: Jedis = null
33 |     while (conn == null) {
34 |       try {
35 |         conn = pool.getResource
36 |       }
37 |       catch {
38 |         case e: JedisConnectionException if e.getCause.toString.
39 |           contains("ERR max number of clients reached") => {
40 |           if (sleepTime < 500) sleepTime *= 2
41 |           Thread.sleep(sleepTime)
42 |         }
43 |         case e: Exception => throw e
44 |       }
45 |     }
46 |     conn
47 |   }
48 | }
49 | 
50 | 


--------------------------------------------------------------------------------
/src/main/scala/com/redislabs/provider/redis/package.scala:
--------------------------------------------------------------------------------
1 | package com.redislabs.provider
2 | 
3 | package object redis extends RedisFunctions {
4 |   val RedisSslScheme: String = "rediss"
5 |   val RedisDataTypeHash: String = "hash"
6 |   val RedisDataTypeString: String = "string"
7 | }
8 | 


--------------------------------------------------------------------------------
/src/main/scala/com/redislabs/provider/redis/partitioner/RedisPartition.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.partitioner
 2 | 
 3 | import com.redislabs.provider.redis.RedisConfig
 4 | import org.apache.spark.Partition
 5 | 
 6 | 
 7 | case class RedisPartition(index: Int,
 8 |                           redisConfig: RedisConfig,
 9 |                           slots: (Int, Int)) extends Partition
10 | 


--------------------------------------------------------------------------------
/src/main/scala/com/redislabs/provider/redis/partitioner/RedisPartitioner.scala:
--------------------------------------------------------------------------------
1 | package com.redislabs.provider.redis.partitioner
2 | 


--------------------------------------------------------------------------------
/src/main/scala/com/redislabs/provider/redis/streaming/RedisInputDStream.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.streaming
 2 | 
 3 | import com.redislabs.provider.redis.RedisConfig
 4 | import org.apache.curator.utils.ThreadUtils
 5 | import org.apache.spark.storage.StorageLevel
 6 | import org.apache.spark.streaming.StreamingContext
 7 | import org.apache.spark.streaming.receiver.Receiver
 8 | import org.apache.spark.streaming.dstream.ReceiverInputDStream
 9 | 
10 | import redis.clients.jedis._
11 | 
12 | import scala.reflect.{ClassTag, classTag}
13 | import scala.util.control.NonFatal
14 | 
15 | /**
16 |   * Receives messages from Redis List
17 |   */
18 | class RedisInputDStream[T: ClassTag](_ssc: StreamingContext,
19 |                                      keys: Array[String],
20 |                                      storageLevel: StorageLevel,
21 |                                      redisConfig: RedisConfig,
22 |                                      streamType: Class[T])
23 |   extends ReceiverInputDStream[T](_ssc) {
24 |   def getReceiver(): Receiver[T] = {
25 |     new RedisReceiver(keys, storageLevel, redisConfig, streamType)
26 |   }
27 | }
28 | 
29 | 
30 | private class RedisReceiver[T: ClassTag](keys: Array[String],
31 |                                          storageLevel: StorageLevel,
32 |                                          redisConfig: RedisConfig,
33 |                                          streamType: Class[T])
34 |   extends Receiver[T](storageLevel) {
35 | 
36 |   def onStart() {
37 |     val executorPool = ThreadUtils.newFixedThreadPool(keys.length, "BlockLists Streaming")
38 |     try {
39 |       /* start a executor for each interested List */
40 |       keys.foreach{ key =>
41 |         executorPool.submit(new MessageHandler(redisConfig.connectionForKey(key), key))
42 |       }
43 |     } finally {
44 |       executorPool.shutdown()
45 |     }
46 |   }
47 | 
48 |   def onStop() {
49 |   }
50 | 
51 |   private class MessageHandler(conn: Jedis, key: String) extends Runnable {
52 |     def run() {
53 |       try {
54 |         while(!isStopped) {
55 |           val response = conn.blpop(2, key)
56 |           if (response == null || response.isEmpty) {
57 |             // no-op
58 |           } else if (classTag[T] == classTag[String]) {
59 |             store(response.get(1).asInstanceOf[T])
60 |           } else if (classTag[T] == classTag[(String, String)]) {
61 |             store((response.get(0), response.get(1)).asInstanceOf[T])
62 |           } else {
63 |             throw new scala.Exception("Unknown Redis Streaming type")
64 |           }
65 |         }
66 |       } catch {
67 |         case NonFatal(e) =>
68 |           restart("Error receiving data", e)
69 |       } finally {
70 |         onStop()
71 |       }
72 |     }
73 |   }
74 | }
75 | 


--------------------------------------------------------------------------------
/src/main/scala/com/redislabs/provider/redis/streaming/RedisStreamReceiver.scala:
--------------------------------------------------------------------------------
  1 | package com.redislabs.provider.redis.streaming
  2 | 
  3 | import java.util.AbstractMap.SimpleEntry
  4 | import com.redislabs.provider.redis.util.PipelineUtils.foreachWithPipeline
  5 | import com.redislabs.provider.redis.util.{Logging, StreamUtils}
  6 | import com.redislabs.provider.redis.{ReadWriteConfig, RedisConfig}
  7 | import org.apache.curator.utils.ThreadUtils
  8 | import org.apache.spark.storage.StorageLevel
  9 | import org.apache.spark.streaming.receiver.Receiver
 10 | import org.sparkproject.guava.util.concurrent.RateLimiter
 11 | import redis.clients.jedis.{Jedis, StreamEntry, StreamEntryID}
 12 | 
 13 | import scala.collection.JavaConversions._
 14 | 
 15 | /**
 16 |   * Receives messages from Redis Stream
 17 |   */
 18 | class RedisStreamReceiver(consumersConfig: Seq[ConsumerConfig],
 19 |                           redisConfig: RedisConfig,
 20 |                           readWriteConfig: ReadWriteConfig,
 21 |                           storageLevel: StorageLevel)
 22 |   extends Receiver[StreamItem](storageLevel) with Logging {
 23 | 
 24 |   override def onStart(): Unit = {
 25 |     logInfo("Starting Redis Stream Receiver")
 26 |     val executorPool = ThreadUtils.newFixedThreadPool(consumersConfig.size, "RedisStreamMessageHandler")
 27 |     try {
 28 |       // start consumers in separate threads
 29 |       for (c <- consumersConfig) {
 30 |         executorPool.submit(new MessageHandler(c, redisConfig, readWriteConfig))
 31 |       }
 32 |     } finally {
 33 |       // terminate threads after the work is done
 34 |       executorPool.shutdown()
 35 |     }
 36 |   }
 37 | 
 38 |   override def onStop(): Unit = {
 39 |   }
 40 | 
 41 |   private class MessageHandler(conf: ConsumerConfig,
 42 |                                redisConfig: RedisConfig,
 43 |                                implicit val readWriteConfig: ReadWriteConfig) extends Runnable {
 44 | 
 45 |     val jedis: Jedis = redisConfig.connectionForKey(conf.streamKey)
 46 |     val rateLimiterOpt: Option[RateLimiter] = conf.rateLimitPerConsumer.map(r => RateLimiter.create(r))
 47 | 
 48 |     override def run(): Unit = {
 49 |       logInfo(s"Starting MessageHandler $conf")
 50 |       try {
 51 |         createConsumerGroupIfNotExist()
 52 |         receiveUnacknowledged()
 53 |         receiveNewMessages()
 54 |       } catch {
 55 |         case e: Exception =>
 56 |           restart("Error handling message. Restarting.", e)
 57 |       }
 58 |     }
 59 | 
 60 |     def createConsumerGroupIfNotExist(): Unit = {
 61 |       val entryId = conf.offset match {
 62 |         case Earliest => new StreamEntryID(0, 0)
 63 |         case Latest => StreamEntryID.LAST_ENTRY
 64 |         case IdOffset(v1, v2) => new StreamEntryID(v1, v2)
 65 |       }
 66 |       StreamUtils.createConsumerGroupIfNotExist(jedis, conf.streamKey, conf.groupName, entryId)
 67 |     }
 68 | 
 69 |     def receiveUnacknowledged(): Unit = {
 70 |       logInfo(s"Starting receiving unacknowledged messages for key ${conf.streamKey}")
 71 |       var continue = true
 72 |       val unackId = new SimpleEntry(conf.streamKey, new StreamEntryID(0, 0))
 73 | 
 74 |       while (!isStopped && continue) {
 75 |         val response = jedis.xreadGroup(
 76 |           conf.groupName,
 77 |           conf.consumerName,
 78 |           conf.batchSize,
 79 |           conf.block,
 80 |           false,
 81 |           unackId)
 82 | 
 83 |         val unackMessagesMap = response.map(e => (e.getKey, e.getValue)).toMap
 84 |         val entries = unackMessagesMap(conf.streamKey)
 85 |         if (entries.isEmpty) {
 86 |           continue = false
 87 |         }
 88 |         storeAndAck(conf.streamKey, entries)
 89 |       }
 90 |     }
 91 | 
 92 |     def receiveNewMessages(): Unit = {
 93 |       logInfo(s"Starting receiving new messages for key ${conf.streamKey}")
 94 |       val newMessId = new SimpleEntry(conf.streamKey, StreamEntryID.UNRECEIVED_ENTRY)
 95 | 
 96 |       while (!isStopped) {
 97 |         val response = jedis.xreadGroup(
 98 |           conf.groupName,
 99 |           conf.consumerName,
100 |           conf.batchSize,
101 |           conf.block,
102 |           false,
103 |           newMessId)
104 | 
105 |         if (response != null) {
106 |           for (streamMessages <- response) {
107 |             val key = streamMessages.getKey
108 |             val entries = streamMessages.getValue
109 |             storeAndAck(key, entries)
110 |           }
111 |         }
112 |       }
113 |     }
114 | 
115 |     def storeAndAck(streamKey: String, entries: Seq[StreamEntry]): Unit = {
116 |       if (entries.nonEmpty) {
117 |         // limit the rate if it's enabled
118 |         rateLimiterOpt.foreach(_.acquire(entries.size))
119 |         val streamItems = entriesToItems(streamKey, entries)
120 |         // call store(multiple-records) to reliably store in Spark memory
121 |         store(streamItems.iterator)
122 |         // ack redis
123 |         foreachWithPipeline(jedis, entries) { (pipeline, entry) =>
124 |           pipeline.xack(streamKey, conf.groupName, entry.getID)
125 |         }
126 |       }
127 |     }
128 | 
129 |     def entriesToItems(key: String, entries: Seq[StreamEntry]): Seq[StreamItem] = {
130 |       entries.map { e =>
131 |         val itemId = ItemId(e.getID.getTime, e.getID.getSequence)
132 |         StreamItem(key, itemId, e.getFields.toMap)
133 |       }
134 |     }
135 |   }
136 | 
137 | }
138 | 
139 | /**
140 |   * @param streamKey            redis stream key
141 |   * @param groupName            consumer group name
142 |   * @param consumerName         consumer name
143 |   * @param offset               stream offset
144 |   * @param rateLimitPerConsumer maximum retrieved messages per second per single consumer
145 |   * @param batchSize            maximum number of pulled items in a read API call
146 |   * @param block                time in milliseconds to wait for data in a blocking read API call
147 |   */
148 | case class ConsumerConfig(streamKey: String,
149 |                           groupName: String,
150 |                           consumerName: String,
151 |                           offset: Offset = Latest,
152 |                           rateLimitPerConsumer: Option[Int] = None,
153 |                           batchSize: Int = 100,
154 |                           block: Long = 500)
155 | 
156 | /**
157 |   * Represents an offset in the stream
158 |   */
159 | sealed trait Offset
160 | 
161 | /**
162 |   * Latest offset, known as a '$' special id
163 |   */
164 | case object Latest extends Offset
165 | 
166 | /**
167 |   * Earliest offset, '0-0' id
168 |   */
169 | case object Earliest extends Offset
170 | 
171 | /**
172 |   * Specific id in the form of 'v1-v2'
173 |   *
174 |   * @param v1 first token of the id
175 |   * @param v2 second token of the id
176 |   */
177 | case class IdOffset(v1: Long, v2: Long) extends Offset
178 | 
179 | /**
180 |   * Item id in the form of 'v1-v2'
181 |   *
182 |   * @param v1 first token of the id
183 |   * @param v2 second token of the id
184 |   */
185 | case class ItemId(v1: Long, v2: Long)
186 | 
187 | /**
188 |   * Represent an item in the stream
189 |   *
190 |   * @param streamKey stream key
191 |   * @param id        item(entry) id
192 |   * @param fields    key/value map of item fields
193 |   */
194 | case class StreamItem(streamKey: String, id: ItemId, fields: Map[String, String])
195 | 
196 | 


--------------------------------------------------------------------------------
/src/main/scala/com/redislabs/provider/redis/streaming/package.scala:
--------------------------------------------------------------------------------
1 | package com.redislabs.provider.redis
2 | 
3 | package object streaming extends RedisStreamingFunctions {
4 | 
5 | }
6 | 


--------------------------------------------------------------------------------
/src/main/scala/com/redislabs/provider/redis/streaming/redisStreamingFunctions.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.streaming
 2 | 
 3 | import com.redislabs.provider.redis.{ReadWriteConfig, RedisConfig}
 4 | import org.apache.spark.storage.StorageLevel
 5 | import org.apache.spark.streaming.StreamingContext
 6 | import org.apache.spark.streaming.dstream.InputDStream
 7 | 
 8 | /**
 9 |   * RedisStreamingContext extends StreamingContext's functionality with Redis
10 |   *
11 |   * @param ssc a spark StreamingContext
12 |   */
13 | class RedisStreamingContext(@transient val ssc: StreamingContext) extends Serializable {
14 |   /**
15 |     * @param keys         an Array[String] which consists all the Lists we want to listen to
16 |     * @param storageLevel the receiver' storage tragedy of received data, default as MEMORY_AND_DISK_2
17 |     * @return a stream of (listname, value)
18 |     */
19 |   def createRedisStream(keys: Array[String],
20 |                         storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_2)
21 |                        (implicit
22 |                         redisConfig: RedisConfig = RedisConfig.fromSparkConf(ssc.sparkContext.getConf)):
23 |   RedisInputDStream[(String, String)] = {
24 |     new RedisInputDStream(ssc, keys, storageLevel, redisConfig, classOf[(String, String)])
25 |   }
26 | 
27 |   /**
28 |     * @param keys         an Array[String] which consists all the Lists we want to listen to
29 |     * @param storageLevel the receiver' storage tragedy of received data, default as MEMORY_AND_DISK_2
30 |     * @return a stream of (value)
31 |     */
32 |   def createRedisStreamWithoutListname(keys: Array[String],
33 |                                        storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_2)
34 |                                       (implicit
35 |                                        redisConf: RedisConfig = RedisConfig.fromSparkConf(ssc.sparkContext.getConf)):
36 |   RedisInputDStream[String] = {
37 |     new RedisInputDStream(ssc, keys, storageLevel, redisConf, classOf[String])
38 |   }
39 | 
40 |   def createRedisXStream(consumersConfig: Seq[ConsumerConfig],
41 |                          storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_2)
42 |                         (implicit
43 |                          redisConfig: RedisConfig = RedisConfig.fromSparkConf(ssc.sparkContext.getConf)):
44 |   InputDStream[StreamItem] = {
45 |     val readWriteConfig = ReadWriteConfig.fromSparkConf(ssc.sparkContext.getConf)
46 |     val receiver = new RedisStreamReceiver(consumersConfig, redisConfig, readWriteConfig, storageLevel)
47 |     ssc.receiverStream(receiver)
48 |   }
49 | }
50 | 
51 | trait RedisStreamingFunctions {
52 | 
53 |   implicit def toRedisStreamingContext(ssc: StreamingContext): RedisStreamingContext = new RedisStreamingContext(ssc)
54 | 
55 | }
56 | 
57 | 


--------------------------------------------------------------------------------
/src/main/scala/com/redislabs/provider/redis/util/CollectionUtils.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.util
 2 | 
 3 | import scala.collection.IterableLike
 4 | import scala.collection.generic.CanBuildFrom
 5 | 
 6 | /**
 7 |   * @author The Viet Nguyen
 8 |   */
 9 | object CollectionUtils {
10 | 
11 |   implicit class RichCollection[A, Repr](val xs: IterableLike[A, Repr]) extends AnyVal {
12 | 
13 |     def distinctBy[B, That](f: A => B)(implicit cbf: CanBuildFrom[Repr, A, That]): That = {
14 |       val builder = cbf(xs.repr)
15 |       val iterator = xs.iterator
16 |       var set = Set[B]()
17 |       while (iterator.hasNext) {
18 |         val element = iterator.next
19 |         val distinctField = f(element)
20 |         if (!set(distinctField)) {
21 |           set += distinctField
22 |           builder += element
23 |         }
24 |       }
25 |       builder.result
26 |     }
27 |   }
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/src/main/scala/com/redislabs/provider/redis/util/ConnectionUtils.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.util
 2 | 
 3 | import java.util.{List => JList}
 4 | 
 5 | import com.redislabs.provider.redis.RedisConfig
 6 | import com.redislabs.provider.redis.util.ConnectionUtils.XINFO.{SubCommandGroups, SubCommandStream}
 7 | import redis.clients.jedis.Jedis
 8 | import redis.clients.jedis.commands.ProtocolCommand
 9 | import redis.clients.jedis.util.SafeEncoder
10 | 
11 | import scala.collection.JavaConverters._
12 | 
13 | /**
14 |   * @author The Viet Nguyen
15 |   */
16 | object ConnectionUtils {
17 | 
18 |   def withConnection[A](conn: Jedis)(body: Jedis => A): A = {
19 |     try {
20 |       body(conn)
21 |     } finally {
22 |       conn.close()
23 |     }
24 |   }
25 | 
26 |   def withConnection[A](streamKey: String)(body: Jedis => A)(implicit redisConfig: RedisConfig): A = {
27 |     withConnection(redisConfig.connectionForKey(streamKey)){
28 |       body
29 |     }
30 |   }
31 | 
32 |   implicit class JedisExt(val jedis: Jedis) extends AnyVal {
33 | 
34 |     //TODO: temporary solution to get latest offset while not supported by Jedis
35 |     def xinfo(command: String, args: String*): Map[String, Any] = {
36 |       val client = jedis.getClient
37 |       val combinedArgs = command +: args
38 |       client.sendCommand(XINFO, combinedArgs: _*)
39 |       val response = asList(client.getOne).asScala
40 |       command match {
41 |         case SubCommandStream =>
42 |           asMap(response)
43 |         case SubCommandGroups =>
44 |           response.map(m => asList(m)).map(_.asScala).map(asMap)
45 |             .map(m => String.valueOf(m("name")) -> m).toMap
46 |       }
47 |     }
48 | 
49 |     private def asMap(seq: Seq[Any]): Map[String, Any] = {
50 |       seq.grouped(2)
51 |         .map { group =>
52 |           val key = asString(group.head)
53 |           val value = group(1) match {
54 |             case arr: Array[Byte] => asString(arr)
55 |             case other: Any => other
56 |           }
57 |           key -> value
58 |         }.toMap
59 |     }
60 | 
61 |     private def asList(any: Any): JList[Any] =
62 |       any.asInstanceOf[JList[Any]]
63 | 
64 |     private def asString(any: Any): String =
65 |       new String(any.asInstanceOf[Array[Byte]])
66 |   }
67 | 
68 |   object XINFO extends ProtocolCommand {
69 | 
70 |     val SubCommandStream = "STREAM"
71 |     val SubCommandGroups = "GROUPS"
72 | 
73 |     val LastGeneratedId = "last-generated-id"
74 |     val LastDeliveredId = "last-delivered-id"
75 |     val LastEntry = "last-entry"
76 |     val EntryId = "_id"
77 | 
78 |     override def getRaw: Array[Byte] = SafeEncoder.encode("XINFO")
79 |   }
80 | 
81 | }
82 | 


--------------------------------------------------------------------------------
/src/main/scala/com/redislabs/provider/redis/util/JsonUtils.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.util
 2 | 
 3 | import org.json4s.jackson.Serialization
 4 | import org.json4s.{Formats, NoTypeHints}
 5 | 
 6 | /**
 7 |   * @author The Viet Nguyen
 8 |   */
 9 | object JsonUtils {
10 | 
11 |   private implicit val formats: Formats = Serialization.formats(NoTypeHints)
12 | 
13 |   def toJson(any: AnyRef): String = {
14 |     Serialization.write(any)
15 |   }
16 | }
17 | 


--------------------------------------------------------------------------------
/src/main/scala/com/redislabs/provider/redis/util/Logging.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.util
 2 | 
 3 | import org.slf4j.{Logger, LoggerFactory}
 4 | 
 5 | /**
 6 |   * @author The Viet Nguyen
 7 |   */
 8 | trait Logging {
 9 | 
10 |   /**
11 |     * This logger will likely to be used in serializable environment like Spark contexts. So, we make
12 |     * it transient to avoid unnecessary serialization errors.
13 |     */
14 |   @transient private var _logger: Logger = _
15 | 
16 |   protected def loggerName: String =
17 |     this.getClass.getName.stripSuffix("$")
18 | 
19 |   protected def logger: Logger = {
20 |     if (_logger == null) {
21 |       _logger = LoggerFactory.getLogger(loggerName)
22 |     }
23 |     _logger
24 |   }
25 | 
26 |   def logInfo(msg: => String): Unit = {
27 |     if (logger.isInfoEnabled) {
28 |       _logger.info(msg)
29 |     }
30 |   }
31 | 
32 |   def logDebug(msg: => String): Unit = {
33 |     if (logger.isDebugEnabled) {
34 |       _logger.debug(msg)
35 |     }
36 |   }
37 | 
38 |   def logTrace(msg: => String): Unit = {
39 |     if (logger.isTraceEnabled) {
40 |       _logger.trace(msg)
41 |     }
42 |   }
43 | }
44 | 


--------------------------------------------------------------------------------
/src/main/scala/com/redislabs/provider/redis/util/ParseUtils.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.util
 2 | 
 3 | import java.lang.{Boolean => JBoolean, Byte => JByte, Double => JDouble, Float => JFloat, Long => JLong, Short => JShort}
 4 | 
 5 | import org.apache.spark.sql.types._
 6 | import redis.clients.jedis.exceptions.JedisDataException
 7 | 
 8 | import scala.util.{Failure, Success, Try}
 9 | 
10 | /**
11 |   * @author The Viet Nguyen
12 |   */
13 | object ParseUtils {
14 | 
15 |   def parseFields(value: Map[String, String], schema: StructType): Array[Any] =
16 |     schema.fields.map { field =>
17 |       val fieldName = field.name
18 |       val fieldValue = value(fieldName)
19 |       parseValue(field.dataType, fieldValue)
20 |     }
21 | 
22 |   private def parseValue(dataType: DataType, fieldValueStr: String): Any = {
23 |     if (fieldValueStr == null) {
24 |       null
25 |     } else {
26 |       parseNotNullValue(dataType, fieldValueStr)
27 |     }
28 |   }
29 | 
30 |   // scalastyle:off cyclomatic.complexity
31 |   private def parseNotNullValue(dataType: DataType, fieldValueStr: String): Any =
32 |     dataType match {
33 |       case ByteType => JByte.parseByte(fieldValueStr)
34 |       case IntegerType => Integer.parseInt(fieldValueStr)
35 |       case LongType => JLong.parseLong(fieldValueStr)
36 |       case FloatType => JFloat.parseFloat(fieldValueStr)
37 |       case DoubleType => JDouble.parseDouble(fieldValueStr)
38 |       case BooleanType => JBoolean.parseBoolean(fieldValueStr)
39 |       case ShortType => JShort.parseShort(fieldValueStr)
40 |       case DateType => java.sql.Date.valueOf(fieldValueStr)
41 |       case TimestampType => java.sql.Timestamp.valueOf(fieldValueStr)
42 |       case _ => fieldValueStr
43 |     }
44 | 
45 |   private[redis] def ignoreJedisWrongTypeException[T](tried: Try[T]): Try[Option[T]] = {
46 |     tried.transform(s => Success(Some(s)), {
47 |       // Swallow this exception
48 |       case e: JedisDataException if Option(e.getMessage).getOrElse("").contains("WRONGTYPE") => Success(None)
49 |       case e: Throwable => Failure(e)
50 |     })
51 |   }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/main/scala/com/redislabs/provider/redis/util/PipelineUtils.scala:
--------------------------------------------------------------------------------
  1 | package com.redislabs.provider.redis.util
  2 | 
  3 | import java.util.{List => JList}
  4 | 
  5 | import com.redislabs.provider.redis.ReadWriteConfig
  6 | import redis.clients.jedis.{Jedis, Pipeline}
  7 | 
  8 | import scala.collection.JavaConverters._
  9 | import scala.collection.{TraversableOnce, mutable}
 10 | 
 11 | object PipelineUtils {
 12 | 
 13 |   /**
 14 |     * Executes a pipeline function for each item in the sequence, returns the server response.
 15 |     *
 16 |     * Ensures that a new pipeline is created if the number of operations exceeds the given maxPipelineSize
 17 |     * while iterating over the items.
 18 |     *
 19 |     * @param conn            jedis connection
 20 |     * @param readWriteConfig read/write config
 21 |     * @param items           a sequence of elements (typically keys)
 22 |     * @param f               function to applied for each item in the sequence
 23 |     * @return response from the server
 24 |     */
 25 |   def mapWithPipeline[A](conn: Jedis, items: TraversableOnce[A])(f: (Pipeline, A) => Unit)
 26 |                         (implicit readWriteConfig: ReadWriteConfig): Seq[AnyRef] = {
 27 |     val totalResp = mutable.ListBuffer[JList[AnyRef]]()
 28 | 
 29 |     // iterate over items and create new pipelines periodically
 30 |     var i = 0
 31 |     var pipeline = conn.pipelined()
 32 |     for (x <- items) {
 33 |       f(pipeline, x)
 34 |       i = i + 1
 35 |       if (i % readWriteConfig.maxPipelineSize == 0) {
 36 |         val resp = pipeline.syncAndReturnAll()
 37 |         totalResp += resp
 38 |         pipeline = conn.pipelined()
 39 |       }
 40 |     }
 41 | 
 42 |     // sync remaining items
 43 |     if (i % readWriteConfig.maxPipelineSize != 0) {
 44 |       val resp = pipeline.syncAndReturnAll()
 45 |       totalResp += resp
 46 |     }
 47 | 
 48 |     totalResp.flatMap(_.asScala)
 49 |   }
 50 | 
 51 |   /**
 52 |     * Executes a pipeline function for each item in the sequence. No response is returned.
 53 |     *
 54 |     * Ensures that a new pipeline is created if the number of operations exceeds the given maxPipelineSize
 55 |     * while iterating over the items.
 56 |     *
 57 |     * @param conn            jedis connection
 58 |     * @param readWriteConfig read/write config
 59 |     * @param items           a sequence of elements (typically keys)
 60 |     * @param f               function to applied for each item in the sequence
 61 |     */
 62 |   def foreachWithPipeline[A](conn: Jedis, items: TraversableOnce[A])(f: (Pipeline, A) => Unit)
 63 |                             (implicit readWriteConfig: ReadWriteConfig): Unit = {
 64 |     // iterate over items and create new pipelines periodically
 65 |     var i = 0
 66 |     var pipeline = conn.pipelined()
 67 |     for (x <- items) {
 68 |       f(pipeline, x)
 69 |       i = i + 1
 70 |       if (i % readWriteConfig.maxPipelineSize == 0) {
 71 |         pipeline.sync()
 72 |         pipeline = conn.pipelined()
 73 |       }
 74 |     }
 75 | 
 76 |     // sync remaining items
 77 |     if (i % readWriteConfig.maxPipelineSize != 0) {
 78 |       pipeline.sync()
 79 |     }
 80 |   }
 81 | 
 82 |   /**
 83 |     * Executes a pipeline function for each item in the sequence. Doesn't sync and return the last pipeline after
 84 |     * all operations are executed. Allows to execute more operations with the returned pipeline.
 85 |     * The client is responsible of syncing the returned pipeline.
 86 |     *
 87 |     * Ensures that a new pipeline is created if the number of operations exceeds the given maxPipelineSize
 88 |     * while iterating over the items.
 89 |     *
 90 |     * @param conn            jedis connection
 91 |     * @param readWriteConfig read/write config
 92 |     * @param items           a sequence of elements (typically keys)
 93 |     * @param f               function to applied for each item in the sequence
 94 |     * @return the last pipeline
 95 |     */
 96 |   def foreachWithPipelineNoLastSync[A](conn: Jedis, items: TraversableOnce[A])(f: (Pipeline, A) => Unit)
 97 |                                       (implicit readWriteConfig: ReadWriteConfig): Pipeline = {
 98 |     // iterate over items and create new pipelines periodically
 99 |     var i = 0
100 |     var pipeline = conn.pipelined()
101 |     for (x <- items) {
102 |       f(pipeline, x)
103 |       i = i + 1
104 |       if (i % readWriteConfig.maxPipelineSize == 0) {
105 |         pipeline.sync()
106 |         pipeline = conn.pipelined()
107 |       }
108 |     }
109 | 
110 |     // return pipeline, the client should sync pipeline
111 |     pipeline
112 |   }
113 | 
114 | }
115 | 


--------------------------------------------------------------------------------
/src/main/scala/com/redislabs/provider/redis/util/StreamUtils.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.util
 2 | 
 3 | import org.apache.commons.lang3.StringUtils
 4 | import redis.clients.jedis.{StreamEntryID, Jedis}
 5 | 
 6 | /**
 7 |   * @author The Viet Nguyen
 8 |   */
 9 | object StreamUtils extends Logging {
10 | 
11 |   val EntryIdEarliest = new StreamEntryID(0, 0)
12 | 
13 |   def createConsumerGroupIfNotExist(conn: Jedis, streamKey: String, groupName: String,
14 |                                     offset: StreamEntryID): Unit = {
15 |     try {
16 |       conn.xgroupCreate(streamKey, groupName, offset, true)
17 |     } catch {
18 |       case e: Exception if StringUtils.contains(e.getMessage, "already exists") =>
19 |         logInfo(s"Consumer group already exists: $groupName")
20 |     }
21 |   }
22 | 
23 |   def resetConsumerGroup(conn: Jedis, streamKey: String, groupName: String,
24 |                          offset: StreamEntryID): Unit = {
25 |     logInfo(s"Setting consumer group $groupName id to $offset")
26 |     conn.xgroupSetID(streamKey, groupName, offset)
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/redis/BinaryRedisPersistence.scala:
--------------------------------------------------------------------------------
 1 | package org.apache.spark.sql.redis
 2 | 
 3 | import java.nio.charset.StandardCharsets.UTF_8
 4 | 
 5 | import org.apache.commons.lang3.SerializationUtils
 6 | import org.apache.spark.sql.Row
 7 | import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
 8 | import org.apache.spark.sql.types.StructType
 9 | import redis.clients.jedis.Pipeline
10 | 
11 | /**
12 |   * @author The Viet Nguyen
13 |   */
14 | class BinaryRedisPersistence extends RedisPersistence[Array[Byte]] {
15 | 
16 |   override def save(pipeline: Pipeline, key: String, value: Array[Byte], ttl: Int): Unit = {
17 |     val keyBytes = key.getBytes(UTF_8)
18 |     if (ttl > 0) {
19 |       pipeline.setex(keyBytes, ttl.toLong, value)
20 |     } else {
21 |       pipeline.set(keyBytes, value)
22 |     }
23 |   }
24 | 
25 |   override def load(pipeline: Pipeline, key: String, requiredColumns: Seq[String]): Unit =
26 |     pipeline.get(key.getBytes(UTF_8))
27 | 
28 |   override def encodeRow(keyName: String, value: Row): Array[Byte] = {
29 |     val fields = value.schema.fields.map(_.name)
30 |     val valuesArray = fields.map(f => value.getAs[Any](f))
31 |     SerializationUtils.serialize(valuesArray)
32 |   }
33 | 
34 |   override def decodeRow(keyMap: (String, String), value: Array[Byte], schema: StructType,
35 |                          requiredColumns: Seq[String]): Row = {
36 |     val valuesArray: Array[Any] = SerializationUtils.deserialize(value)
37 |     new GenericRowWithSchema(valuesArray, schema)
38 |   }
39 | }
40 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/redis/DefaultSource.scala:
--------------------------------------------------------------------------------
 1 | package org.apache.spark.sql.redis
 2 | 
 3 | import org.apache.spark.sql.SaveMode.{Append, ErrorIfExists, Ignore, Overwrite}
 4 | import org.apache.spark.sql.sources.{BaseRelation, CreatableRelationProvider, RelationProvider, SchemaRelationProvider}
 5 | import org.apache.spark.sql.types.StructType
 6 | import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode}
 7 | 
 8 | class DefaultSource extends RelationProvider with SchemaRelationProvider
 9 |   with CreatableRelationProvider {
10 | 
11 |   override def createRelation(sqlContext: SQLContext,
12 |                               parameters: Map[String, String]): BaseRelation = {
13 |     new RedisSourceRelation(sqlContext, parameters, userSpecifiedSchema = None)
14 |   }
15 | 
16 |   /**
17 |     * Creates a new relation by saving the data to Redis
18 |     */
19 |   override def createRelation(sqlContext: SQLContext, mode: SaveMode,
20 |                               parameters: Map[String, String], data: DataFrame): BaseRelation = {
21 |     val relation = new RedisSourceRelation(sqlContext, parameters, userSpecifiedSchema = None)
22 |     mode match {
23 |       case Append => relation.insert(data, overwrite = false)
24 |       case Overwrite => relation.insert(data, overwrite = true)
25 |       case ErrorIfExists =>
26 |         if (relation.nonEmpty) {
27 |           throw new IllegalStateException("SaveMode is set to ErrorIfExists and dataframe " +
28 |             "already exists in Redis and contains data.")
29 |         }
30 |         relation.insert(data, overwrite = false)
31 |       case Ignore =>
32 |         if (relation.isEmpty) {
33 |           relation.insert(data, overwrite = false)
34 |         }
35 |     }
36 | 
37 |     relation
38 |   }
39 | 
40 |   override def createRelation(sqlContext: SQLContext, parameters: Map[String, String],
41 |                               schema: StructType): BaseRelation =
42 |     new RedisSourceRelation(sqlContext, parameters, userSpecifiedSchema = Some(schema))
43 | }
44 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/redis/HashRedisPersistence.scala:
--------------------------------------------------------------------------------
 1 | package org.apache.spark.sql.redis
 2 | 
 3 | import java.util.{List => JList}
 4 | 
 5 | import com.redislabs.provider.redis.util.ParseUtils
 6 | import org.apache.spark.sql.Row
 7 | import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
 8 | import org.apache.spark.sql.types._
 9 | import redis.clients.jedis.Pipeline
10 | 
11 | import scala.collection.JavaConverters._
12 | 
13 | /**
14 |   * @author The Viet Nguyen
15 |   */
16 | class HashRedisPersistence extends RedisPersistence[Any] {
17 | 
18 |   override def save(pipeline: Pipeline, key: String, value: Any, ttl: Int): Unit = {
19 |     val javaValue = value.asInstanceOf[Map[String, String]].asJava
20 |     pipeline.hmset(key, javaValue)
21 |     if (ttl > 0) {
22 |       pipeline.expire(key, ttl.toLong)
23 |     }
24 |   }
25 | 
26 |   override def load(pipeline: Pipeline, key: String, requiredColumns: Seq[String]): Unit = {
27 |     pipeline.hmget(key, requiredColumns: _*)
28 |   }
29 | 
30 |   override def encodeRow(keyName: String, value: Row): Map[String, String] = {
31 |     val fields = value.schema.fields.map(_.name)
32 |     val kvMap = value.getValuesMap[Any](fields)
33 |     kvMap
34 |       .filter { case (_, v) =>
35 |         // don't store null values
36 |         v != null
37 |       }
38 |       .filter { case (k, _) =>
39 |         // don't store key values
40 |         k != keyName
41 |       }
42 |       .map { case (k, v) =>
43 |         k -> String.valueOf(v)
44 |       }
45 |   }
46 | 
47 |   override def decodeRow(keyMap: (String, String), value: Any, schema: StructType,
48 |                          requiredColumns: Seq[String]): Row = {
49 |     val scalaValue = value.asInstanceOf[JList[String]].asScala
50 |     val values = requiredColumns.zip(scalaValue)
51 |     val results = values :+ keyMap
52 |     val fieldsValue = ParseUtils.parseFields(results.toMap, schema)
53 |     new GenericRowWithSchema(fieldsValue, schema)
54 |   }
55 | }
56 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/redis/RedisPersistence.scala:
--------------------------------------------------------------------------------
 1 | package org.apache.spark.sql.redis
 2 | 
 3 | import org.apache.spark.sql.Row
 4 | import org.apache.spark.sql.types.StructType
 5 | import redis.clients.jedis.Pipeline
 6 | 
 7 | /**
 8 |   * @author The Viet Nguyen
 9 |   */
10 | trait RedisPersistence[T] extends Serializable {
11 | 
12 |   def save(pipeline: Pipeline, key: String, value: T, ttl: Int): Unit
13 | 
14 |   def load(pipeline: Pipeline, key: String, requiredColumns: Seq[String]): Unit
15 | 
16 |   /**
17 |     * Encode dataframe row before storing it in Redis.
18 |     *
19 |     * @param keyName field name that should be encoded in special way, e.g. in Redis keys.
20 |     * @param value   row to encode.
21 |     * @return encoded row
22 |     */
23 |   def encodeRow(keyName: String, value: Row): T
24 | 
25 |   /**
26 |     * Decode dataframe row stored in Redis.
27 |     *
28 |     * @param keyMap          extracted name/value of key column from Redis key
29 |     * @param value           encoded row
30 |     * @param schema          row schema
31 |     * @param requiredColumns required columns to decode
32 |     * @return decoded row
33 |     */
34 |   def decodeRow(keyMap: (String, String), value: T, schema: StructType,
35 |                 requiredColumns: Seq[String]): Row
36 | }
37 | 
38 | object RedisPersistence {
39 | 
40 |   private val providers =
41 |     Map(SqlOptionModelBinary -> new BinaryRedisPersistence(),
42 |       SqlOptionModelHash -> new HashRedisPersistence())
43 | 
44 |   def apply(model: String): RedisPersistence[Any] = {
45 |     // use hash model by default
46 |     providers.getOrElse(model, providers(SqlOptionModelHash))
47 |       .asInstanceOf[RedisPersistence[Any]]
48 |   }
49 | }
50 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/redis/redis.scala:
--------------------------------------------------------------------------------
 1 | package org.apache.spark.sql
 2 | 
 3 | /**
 4 |   * @author The Viet Nguyen
 5 |   */
 6 | package object redis {
 7 | 
 8 |   val RedisFormat = "org.apache.spark.sql.redis"
 9 | 
10 |   val RddWriteIteratorGroupingSize = "rdd.write.iterator.grouping.size"
11 |   val RddWriteIteratorGroupingSizeDefault = 1000
12 | 
13 |   val SqlOptionFilterKeysByType = "filter.keys.by.type"
14 |   val SqlOptionNumPartitions = "partitions.number"
15 |   /**
16 |     * Default read operation number of partitions.
17 |     */
18 |   val SqlOptionNumPartitionsDefault = 3
19 |   val SqlOptionTableName = "table"
20 |   val SqlOptionKeysPattern = "keys.pattern"
21 |   val SqlOptionModel = "model"
22 |   val SqlOptionModelBinary = "binary"
23 |   val SqlOptionModelHash = "hash"
24 |   val SqlOptionInferSchema = "infer.schema"
25 |   val SqlOptionKeyColumn = "key.column"
26 |   val SqlOptionTTL = "ttl"
27 | 
28 |   val SqlOptionMaxPipelineSize = "max.pipeline.size"
29 |   val SqlOptionScanCount = "scan.count"
30 | 
31 |   val SqlOptionIteratorGroupingSize = "iterator.grouping.size"
32 |   val SqlOptionIteratorGroupingSizeDefault = 1000
33 | 
34 |   val StreamOptionStreamKeys = "stream.keys"
35 |   val StreamOptionStreamOffsets = "stream.offsets"
36 |   val StreamOptionReadBatchSize = "stream.read.batch.size"
37 |   val StreamOptionReadBatchSizeDefault = 100
38 |   val StreamOptionReadBlock = "stream.read.block"
39 |   val StreamOptionReadBlockDefault = 500
40 |   val StreamOptionParallelism = "stream.parallelism"
41 |   val StreamOptionGroupName = "stream.group.name"
42 |   val StreamOptionConsumerPrefix = "stream.consumer.prefix"
43 | }
44 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/redis/stream/RedisSource.scala:
--------------------------------------------------------------------------------
  1 | package org.apache.spark.sql.redis.stream
  2 | 
  3 | import com.redislabs.provider.redis.RedisConfig
  4 | import com.redislabs.provider.redis.util.CollectionUtils.RichCollection
  5 | import com.redislabs.provider.redis.util.ConnectionUtils.{JedisExt, XINFO, withConnection}
  6 | import com.redislabs.provider.redis.util.StreamUtils.{createConsumerGroupIfNotExist, resetConsumerGroup}
  7 | import com.redislabs.provider.redis.util.{Logging, ParseUtils}
  8 | import org.apache.spark.sql.catalyst.InternalRow
  9 | import org.apache.spark.sql.execution.streaming.{Offset, Source}
 10 | import org.apache.spark.sql.redis.stream.RedisSource._
 11 | import org.apache.spark.sql.types.StructType
 12 | import org.apache.spark.sql.{DataFrame, SQLContext}
 13 | import org.apache.spark.unsafe.types.UTF8String
 14 | import redis.clients.jedis.{StreamEntryID, Jedis}
 15 | 
 16 | import scala.collection.JavaConverters._
 17 | import scala.util.Try
 18 | 
 19 | /**
 20 |   * @author The Viet Nguyen
 21 |   */
 22 | class RedisSource(sqlContext: SQLContext, metadataPath: String,
 23 |                   userDefinedSchema: Option[StructType], parameters: Map[String, String])
 24 |   extends Source with Logging {
 25 | 
 26 |   private val sc = sqlContext.sparkContext
 27 | 
 28 |   implicit private val redisConfig: RedisConfig = RedisConfig.fromSparkConfAndParameters(sc.getConf, parameters)
 29 | 
 30 |   private val sourceConfig = RedisSourceConfig.fromMap(parameters)
 31 | 
 32 |   private val currentSchema = userDefinedSchema.getOrElse {
 33 |     throw new IllegalArgumentException("Please specify schema")
 34 |   }
 35 | 
 36 |   /**
 37 |     * Called once on the source startup. Creates consumer groups and resets their offsets if needed.
 38 |     */
 39 |   def start(): Unit = {
 40 |     sourceConfig.consumerConfigs.foreach { consumerConfig =>
 41 |       val offsetsByStreamKey = sourceConfig.start.map(_.offsets).getOrElse(Map())
 42 |       val streamKey = consumerConfig.streamKey
 43 |       val groupName = consumerConfig.groupName
 44 |       val configOffsetOption = offsetsByStreamKey.get(streamKey).map(_.offset).map(new StreamEntryID(_))
 45 |       withConnection(streamKey) { conn =>
 46 |         createConsumerGroupIfNotExist(conn, streamKey, groupName, configOffsetOption.getOrElse(StreamEntryID.LAST_ENTRY))
 47 |         // if config offset is defined, reset to its value
 48 |         configOffsetOption.foreach { offset =>
 49 |           resetConsumerGroup(conn, streamKey, groupName, offset)
 50 |         }
 51 |       }
 52 |     }
 53 |   }
 54 | 
 55 |   override def schema: StructType = currentSchema
 56 | 
 57 |   /**
 58 |     * Returns the maximum available offset for this source.
 59 |     * Returns `None` if this source has never received any data.
 60 |     */
 61 |   override def getOffset: Option[Offset] = {
 62 |     val initialOffset = RedisSourceOffset(Map())
 63 |     val sourceOffset = sourceConfig.consumerConfigs.foldLeft(initialOffset) { case (acc, e) =>
 64 |       val streamKey = e.streamKey
 65 |       withConnection(streamKey) { conn =>
 66 |         Try {
 67 |           // try to read last stream id, it will fail if doesn't exist
 68 |           val offsetId = streamLastId(conn, streamKey)
 69 |           val streamOffset = streamKey -> RedisConsumerOffset(e.groupName, offsetId)
 70 |           acc.copy(acc.offsets + streamOffset)
 71 |         } getOrElse {
 72 |           // stream key doesn't exist
 73 |           acc
 74 |         }
 75 |       }
 76 |     }
 77 |     if (sourceOffset.offsets.isEmpty) {
 78 |       None
 79 |     } else {
 80 |       Some(sourceOffset)
 81 |     }
 82 |   }
 83 | 
 84 |   override def getBatch(start: Option[Offset], end: Offset): DataFrame = {
 85 |     logInfo {
 86 |       s"""Getting batch...
 87 |          |  start: $start
 88 |          |  end: $end
 89 |       """.stripMargin
 90 |     }
 91 |     val localSchema = currentSchema
 92 |     val offsetRanges = getOffsetRanges(start, end, sourceConfig.consumerConfigs)
 93 | 
 94 |     // if 'start' is set, reset consumer group offset to read this batch
 95 |     resetConsumerGroupsIfHasOffset(offsetRanges)
 96 | 
 97 |     // read data
 98 |     val internalRdd = new RedisSourceRdd(sc, redisConfig, offsetRanges)
 99 |       .map { case (id, fields) =>
100 |         val fieldMap = fields.asScala.toMap + ("_id" -> id.toString)
101 |         val values = ParseUtils.parseFields(fieldMap, localSchema)
102 |           .map {
103 |             case str: String => UTF8String.fromString(str)
104 |             case other: Any => other
105 |           }
106 |         InternalRow(values: _*)
107 |       }
108 |     sqlContext.internalCreateDataFrame(internalRdd, schema, isStreaming = true)
109 |   }
110 | 
111 |   override def commit(end: Offset): Unit = {
112 |     logInfo(
113 |       s"""Committing offset..
114 |          |  end: ${end.json()}
115 |          |""".stripMargin)
116 |   }
117 | 
118 |   override def stop(): Unit = {
119 |   }
120 | 
121 |   private def resetConsumerGroupsIfHasOffset(offsetRanges: Seq[RedisSourceOffsetRange]): Unit = {
122 |     forEachOffsetRangeWithStreamConnection(offsetRanges) { case (conn, offsetRange) =>
123 |       offsetRange.start.map(new StreamEntryID(_)).foreach { start =>
124 |         val config = offsetRange.config
125 |         resetConsumerGroup(conn, config.streamKey, config.groupName, start)
126 |       }
127 |     }
128 |   }
129 | 
130 |   private def forEachOffsetRangeWithStreamConnection(offsetRanges: Seq[RedisSourceOffsetRange])
131 |                                                     (op: (Jedis, RedisSourceOffsetRange) => Unit): Unit = {
132 |     offsetRanges.groupBy(_.config.streamKey).foreach { case (streamKey, subRanges) =>
133 |       withConnection(streamKey) { conn =>
134 |         subRanges.distinctBy(_.config.groupName).foreach { offsetRange =>
135 |           op(conn, offsetRange)
136 |         }
137 |       }
138 |     }
139 |   }
140 | 
141 | }
142 | 
143 | object RedisSource {
144 | 
145 |   def getOffsetRanges(start: Option[Offset], end: Offset,
146 |                       consumerConfigs: Seq[RedisConsumerConfig]): Seq[RedisSourceOffsetRange] = {
147 | 
148 |     val offsetStarts = start.map(RedisSourceOffset.fromOffset).map(_.offsets).getOrElse(Map())
149 |     val offsetEnds = RedisSourceOffset.fromOffset(end)
150 |     val configsByStreamKey = consumerConfigs.groupBy(_.streamKey)
151 | 
152 |     offsetEnds.offsets.flatMap { case (streamKey, offsetEnd) =>
153 |       val offsetStart = offsetStarts.get(streamKey).map(_.offset)
154 |       val configs = configsByStreamKey(streamKey)
155 |       configs.map { c => RedisSourceOffsetRange(offsetStart, offsetEnd.offset, c) }
156 |     }.toSeq
157 |   }
158 | 
159 |   def streamLastId(conn: Jedis, streamKey: String): String = {
160 |     val infoMap = conn.xinfo(XINFO.SubCommandStream, streamKey)
161 |     String.valueOf(infoMap(XINFO.LastGeneratedId))
162 |   }
163 | 
164 | }
165 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/redis/stream/RedisSourceConfig.scala:
--------------------------------------------------------------------------------
 1 | package org.apache.spark.sql.redis.stream
 2 | 
 3 | import org.apache.spark.sql
 4 | import org.apache.spark.sql.redis._
 5 | 
 6 | /**
 7 |   * @author The Viet Nguyen
 8 |   */
 9 | case class RedisSourceConfig(consumerConfigs: Seq[RedisConsumerConfig],
10 |                              start: Option[RedisSourceOffset])
11 | 
12 | object RedisSourceConfig {
13 | 
14 |   def fromMap(config: Map[String, String]): RedisSourceConfig = {
15 |     val streamKeys = config.getOrElse(StreamOptionStreamKeys,
16 |       throw new IllegalArgumentException(s"Please specify '$StreamOptionStreamKeys'"))
17 |     val start = config.get(StreamOptionStreamOffsets).map(RedisSourceOffset.fromJson)
18 |     val parallelism = config.get(sql.redis.StreamOptionParallelism).map(_.toInt).getOrElse(1)
19 |     val groupName = config.getOrElse(StreamOptionGroupName, "spark-source")
20 |     val consumerPrefix = config.getOrElse(StreamOptionConsumerPrefix, "consumer")
21 |     val batchSize = config.get(StreamOptionReadBatchSize).map(_.toInt).getOrElse(StreamOptionReadBatchSizeDefault)
22 |     val block = config.get(StreamOptionReadBlock).map(_.toInt).getOrElse(StreamOptionReadBlockDefault)
23 |     val consumerConfigs = streamKeys.split(",").flatMap { streamKey =>
24 |       (1 to parallelism).map { consumerIndex =>
25 |         RedisConsumerConfig(streamKey, s"$groupName", s"$consumerPrefix-$consumerIndex", batchSize, block)
26 |       }
27 |     }
28 |     RedisSourceConfig(consumerConfigs, start)
29 |   }
30 | }
31 | 
32 | case class RedisConsumerConfig(streamKey: String, groupName: String, consumerName: String,
33 |                                batchSize: Int, block: Int)
34 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/redis/stream/RedisSourceOffset.scala:
--------------------------------------------------------------------------------
 1 | package org.apache.spark.sql.redis.stream
 2 | 
 3 | import com.redislabs.provider.redis.util.JsonUtils
 4 | import org.apache.spark.sql.execution.streaming.{Offset, SerializedOffset}
 5 | import org.json4s.jackson.Serialization
 6 | import org.json4s.{Formats, NoTypeHints}
 7 | 
 8 | /**
 9 |   * @param offsets A map of offset by stream key
10 |   * @author The Viet Nguyen
11 |   */
12 | case class RedisSourceOffset(offsets: Map[String, RedisConsumerOffset]) extends Offset {
13 | 
14 |   override def json(): String = JsonUtils.toJson(this)
15 | }
16 | 
17 | object RedisSourceOffset {
18 | 
19 |   private implicit val formats: Formats = Serialization.formats(NoTypeHints)
20 | 
21 |   def fromOffset(offset: Offset): RedisSourceOffset = {
22 |     offset match {
23 |       case o: RedisSourceOffset => o
24 |       case so: SerializedOffset => fromJson(so.json)
25 |       case _ =>
26 |         throw new IllegalArgumentException(
27 |           s"Invalid conversion from offset of ${offset.getClass} to RedisSourceOffset")
28 |     }
29 | 
30 |     fromJson(offset.json())
31 |   }
32 | 
33 |   def fromJson(json: String): RedisSourceOffset = {
34 |     try {
35 |       Serialization.read[RedisSourceOffset](json)
36 |     } catch {
37 |       case e: Throwable =>
38 |         val example = RedisSourceOffset(Map("my-stream" -> RedisConsumerOffset("redis-source", "1543674099961-0")))
39 |         val jsonExample = Serialization.write(example)
40 |         throw new RuntimeException(s"Unable to parse offset json. Example of valid json: $jsonExample", e)
41 |     }
42 |   }
43 | }
44 | 
45 | case class RedisConsumerOffset(groupName: String, offset: String)
46 | 
47 | case class RedisSourceOffsetRange(start: Option[String], end: String, config: RedisConsumerConfig)
48 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/redis/stream/RedisSourceRdd.scala:
--------------------------------------------------------------------------------
 1 | package org.apache.spark.sql.redis.stream
 2 | 
 3 | import com.redislabs.provider.redis.RedisConfig
 4 | import com.redislabs.provider.redis.util.ConnectionUtils.withConnection
 5 | import org.apache.spark.rdd.RDD
 6 | import org.apache.spark.sql.redis.stream.RedisSourceTypes.StreamEntry
 7 | import org.apache.spark.{Partition, SparkContext, TaskContext}
 8 | 
 9 | /**
10 |   * RDD of EntryID -> StreamEntry.fields
11 |   *
12 |   * @author The Viet Nguyen
13 |   */
14 | class RedisSourceRdd(sc: SparkContext, redisConfig: RedisConfig,
15 |                      offsetRanges: Seq[RedisSourceOffsetRange], autoAck: Boolean = true)
16 |   extends RDD[StreamEntry](sc, Nil) {
17 | 
18 |   override def compute(split: Partition, context: TaskContext): Iterator[StreamEntry] = {
19 |     val partition = split.asInstanceOf[RedisSourceRddPartition]
20 |     val offsetRange = partition.offsetRange
21 |     val streamReader = new RedisStreamReader(redisConfig)
22 |     streamReader.unreadStreamEntries(offsetRange)
23 |   }
24 | 
25 |   override protected def getPartitions: Array[Partition] = {
26 |     offsetRanges.zipWithIndex.map { case (e, i) => RedisSourceRddPartition(i, e) }
27 |       .toArray
28 |   }
29 | }
30 | 
31 | case class RedisSourceRddPartition(index: Int, offsetRange: RedisSourceOffsetRange)
32 |   extends Partition
33 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/redis/stream/RedisSourceTypes.scala:
--------------------------------------------------------------------------------
 1 | package org.apache.spark.sql.redis.stream
 2 | 
 3 | import java.util.{List => JList, Map => JMap}
 4 | 
 5 | import redis.clients.jedis.{StreamEntryID, StreamEntry => JStreamEntry}
 6 | 
 7 | /**
 8 |   * @author The Viet Nguyen
 9 |   */
10 | object RedisSourceTypes {
11 | 
12 |   type StreamEntry = (StreamEntryID, JMap[String, String])
13 |   type StreamEntryBatch = JMap.Entry[String, JList[JStreamEntry]]
14 |   type StreamEntryBatches = JList[StreamEntryBatch]
15 | }
16 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/redis/stream/RedisStreamProvider.scala:
--------------------------------------------------------------------------------
 1 | package org.apache.spark.sql.redis.stream
 2 | 
 3 | import com.redislabs.provider.redis.util.Logging
 4 | import org.apache.spark.sql.SQLContext
 5 | import org.apache.spark.sql.execution.streaming.Source
 6 | import org.apache.spark.sql.sources.{DataSourceRegister, StreamSourceProvider}
 7 | import org.apache.spark.sql.types.{StringType, StructField, StructType}
 8 | 
 9 | /**
10 |   * @author The Viet Nguyen
11 |   */
12 | class RedisStreamProvider extends DataSourceRegister with StreamSourceProvider with Logging {
13 | 
14 |   override def shortName(): String = "redis"
15 | 
16 |   override def sourceSchema(sqlContext: SQLContext, schema: Option[StructType],
17 |                             providerName: String, parameters: Map[String, String]): (String, StructType) = {
18 |     providerName -> schema.getOrElse {
19 |       StructType(Seq(StructField("_id", StringType)))
20 |     }
21 |   }
22 | 
23 |   override def createSource(sqlContext: SQLContext, metadataPath: String,
24 |                             schema: Option[StructType], providerName: String,
25 |                             parameters: Map[String, String]): Source = {
26 |     val (_, ss) = sourceSchema(sqlContext, schema, providerName, parameters)
27 |     val source = new RedisSource(sqlContext, metadataPath, Some(ss), parameters)
28 |     source.start()
29 |     source
30 |   }
31 | }
32 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/redis/stream/RedisStreamReader.scala:
--------------------------------------------------------------------------------
 1 | package org.apache.spark.sql.redis.stream
 2 | 
 3 | import java.util.AbstractMap.SimpleEntry
 4 | import java.util.{Map => JMap}
 5 | 
 6 | import com.redislabs.provider.redis.RedisConfig
 7 | import com.redislabs.provider.redis.util.ConnectionUtils.withConnection
 8 | import com.redislabs.provider.redis.util.Logging
 9 | import org.apache.spark.sql.redis.stream.RedisSourceTypes.{StreamEntry, StreamEntryBatch, StreamEntryBatches}
10 | import redis.clients.jedis.StreamEntryID
11 | 
12 | import scala.collection.JavaConverters._
13 | import scala.math.Ordering.Implicits._
14 | 
15 | /**
16 |   * @author The Viet Nguyen
17 |   */
18 | class RedisStreamReader(redisConfig: RedisConfig) extends Logging with Serializable {
19 | 
20 |   def unreadStreamEntries(offsetRange: RedisSourceOffsetRange): Iterator[StreamEntry] = {
21 |     val config = offsetRange.config
22 | 
23 |     logInfo(s"Reading entries " +
24 |       s"[${config.streamKey}, ${config.groupName}, ${config.consumerName}, start=${offsetRange.start} " +
25 |       s"end=${offsetRange.end}]... "
26 |     )
27 | 
28 |     val res = filterStreamEntries(offsetRange) {
29 |       val startEntryOffset = new SimpleEntry(config.streamKey, StreamEntryID.UNRECEIVED_ENTRY)
30 |       Iterator.continually {
31 |         readStreamEntryBatches(offsetRange, startEntryOffset)
32 |       }
33 |     }
34 |     res
35 |   }
36 | 
37 |   private def readStreamEntryBatches(offsetRange: RedisSourceOffsetRange,
38 |                                      startEntryOffset: JMap.Entry[String, StreamEntryID]): StreamEntryBatches = {
39 |     val config = offsetRange.config
40 |     withConnection(redisConfig.connectionForKey(config.streamKey)) { conn =>
41 |       // we don't need acknowledgement, if spark processing fails, it will request the same batch again
42 |       val noAck = true
43 |       val response = conn.xreadGroup(config.groupName,
44 |         config.consumerName,
45 |         config.batchSize,
46 |         config.block,
47 |         noAck,
48 |         startEntryOffset)
49 |       logDebug(s"Got entries: $response")
50 |       response
51 |     }
52 |   }
53 | 
54 |   private def filterStreamEntries(offsetRange: RedisSourceOffsetRange)
55 |                                  (streamGroups: => Iterator[StreamEntryBatches]): Iterator[StreamEntry] = {
56 |     val end = new StreamEntryID(offsetRange.end)
57 |     streamGroups
58 |       .takeWhile { response =>
59 |         (response != null) && !response.isEmpty
60 |       }
61 |       .flatMap { response =>
62 |         response.asScala.iterator
63 |       }
64 |       .flatMap { streamEntry =>
65 |         flattenStreamEntries(streamEntry)
66 |       }
67 |       .takeWhile { case (entryId, _) =>
68 |         entryId <= end
69 |       }
70 |   }
71 | 
72 |   private def flattenStreamEntries(entry: StreamEntryBatch): Iterator[StreamEntry] = {
73 |     entry.getValue.asScala.iterator
74 |       .map { streamEntry =>
75 |         streamEntry.getID -> streamEntry.getFields
76 |       }
77 |   }
78 | }
79 | 


--------------------------------------------------------------------------------
/src/test/resources/log4j.properties:
--------------------------------------------------------------------------------
 1 | # Root logger option
 2 | log4j.rootLogger=WARN, stdout
 3 | # Direct log messages to stdout
 4 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
 5 | #log4j.appender.stdout.Target=System.out
 6 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
 7 | log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
 8 | 
 9 | # DEBUG
10 | #log4j.logger.com.redislabs.provider.redis=DEBUG, stdout
11 | #log4j.logger.org.apache.spark.sql.redis=DEBUG, stdout
12 | log4j.logger.com.redislabs.provider.redis.util.StreamUtils=DEBUG
13 | log4j.logger.org.apache.spark.sql.redis.stream=DEBUG
14 | log4j.logger.org.apache.spark.sql.redis.stream.RedisStreamReader=INFO
15 | 
16 | #
17 | #log4j.additivity.com.redislabs.provider.redis=false
18 | #log4j.additivity.org.apache.spark.sql.redis=false
19 | 


--------------------------------------------------------------------------------
/src/test/resources/tls/ca.crt:
--------------------------------------------------------------------------------
 1 | -----BEGIN CERTIFICATE-----
 2 | MIIFSzCCAzOgAwIBAgIUHs81ch3cj/DaaUu/xeRpJtCvq3MwDQYJKoZIhvcNAQEL
 3 | BQAwNTETMBEGA1UECgwKUmVkaXMgVGVzdDEeMBwGA1UEAwwVQ2VydGlmaWNhdGUg
 4 | QXV0aG9yaXR5MB4XDTIwMDUxNzE1MDUwMVoXDTMwMDUxNTE1MDUwMVowNTETMBEG
 5 | A1UECgwKUmVkaXMgVGVzdDEeMBwGA1UEAwwVQ2VydGlmaWNhdGUgQXV0aG9yaXR5
 6 | MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAvBUjUWX3xDaFoRtmZUiH
 7 | hv3HBtzQAUbpOMGqBUdm6F0/X2wa+bnmSOeAZoYuI7L2N3eBNZ6Vcd/rc0nP+PoL
 8 | M3L0fDPpNtrAgxFiLzCTLdsC4AYfSt3DOIURoMCYBdKvy62IaCanrnQElIzk41hI
 9 | NhWV1L1MXU3uaTw3xNXMX0pKp/Td6PYTTM4pZSDnXIzltpgOx4YXg/0MrWOLR9nS
10 | 95rv2the61zbMzf1OScOzncQcXwNAEbCvUPH5OwStNznPAxhLqhLuIJ05kKaon5U
11 | y9qQFQvbMNOuXy8Fi/yTL4ZV9EkUyWOM7iqmCHTU5VU92FkZB+glIfk5Y5DmDe5a
12 | Wsk4/BeDZdZgZShX41Dl12G5cQCErvxKmb0g41GJDwEBj4MZ3U8pJKJWj3vQC76V
13 | yTsHtrTJ5maM/ctGE7IrQfIJwrDIqU/tixkLuete8CWt8X96iwjBbpHQ0J1TCB11
14 | WDK0wTgvCOjwjvhDA7D3Bvj65BpS+BnhuC/v/julfrV5rFCiyDeWjY//od6bo1HU
15 | gHbJncdys5AlZs/FZpBmKC0Isa6JhNGE6SAZlmjn1euH8besA/qzResK+XOtTNr5
16 | pNQIA9L+Jfu66puViK8fnZb53NgG3YqbEuivUHXALbMj60JPVkwS2ZdDxY9iNvig
17 | W1RY6wv+cs5726AUKJqin18CAwEAAaNTMFEwHQYDVR0OBBYEFAFmv2JPj3Pkt7Gs
18 | ofrJ3JqLcVe5MB8GA1UdIwQYMBaAFAFmv2JPj3Pkt7GsofrJ3JqLcVe5MA8GA1Ud
19 | EwEB/wQFMAMBAf8wDQYJKoZIhvcNAQELBQADggIBALdXNzliM8y02M5kktG7a4U/
20 | 3aAMGfa8+FimQPrBOSpRBoq39lf3sIcVMYe8HiSZoynVChR5pkPdR0a66X89UUNL
21 | EljZ7LX1oMG4hYnKq0tzu797DIAIY0fLyLbI8LnOcGFijRwtGIH/5YMLrgOwPBbD
22 | llWteVMywMhovon3nPM1S78T0cILZ6QeuoKrM8JpRhit4EWfAdcmRWqYfWtmY6eW
23 | SH4D0PQaJpDs9fsOYp0CwAMmDPQeM0EbVSe2Dl0kJh4rS2kYlCxN/ZhIv6qdgXv/
24 | 4SAnBX5Cay+IDEuaeVn7rRuCVpouzMjA27ucZ3V56JjcL8HzB9iqzB/tnLMnzZmq
25 | /DlwrQTVDJEExXGtUDcI+cgNpH9jqD4akFTvVggRFjv70JWj3dXHgz4x2iAy7O/x
26 | d2IHI6WFVq7760EoQBTVyyJ/S2w8UVzHKAk8DMU+Y89jsUA0EPp/j7DiEeXSBWz+
27 | ivk35QXE16kGOmDPgF2SzCHKoTEheQQyJRd2UFNNY37X6ROMlHaeoXVZ13cMENPr
28 | DNbB9h6Wi3lqP3WGteAk5uKQSZq8Q+/NgElnWls71MFKxzsIysH0nxkjbTmmZxxP
29 | C9UH611jVgWZKth4a846lDruS9lUecz8f2vfiNZbDzXVdXxfhVA5VlGyRDlZq0xM
30 | zSJm5Tn0Q1Tz4b+PMEFz
31 | -----END CERTIFICATE-----
32 | 


--------------------------------------------------------------------------------
/src/test/resources/tls/ca.key:
--------------------------------------------------------------------------------
 1 | -----BEGIN RSA PRIVATE KEY-----
 2 | MIIJKAIBAAKCAgEAvBUjUWX3xDaFoRtmZUiHhv3HBtzQAUbpOMGqBUdm6F0/X2wa
 3 | +bnmSOeAZoYuI7L2N3eBNZ6Vcd/rc0nP+PoLM3L0fDPpNtrAgxFiLzCTLdsC4AYf
 4 | St3DOIURoMCYBdKvy62IaCanrnQElIzk41hINhWV1L1MXU3uaTw3xNXMX0pKp/Td
 5 | 6PYTTM4pZSDnXIzltpgOx4YXg/0MrWOLR9nS95rv2the61zbMzf1OScOzncQcXwN
 6 | AEbCvUPH5OwStNznPAxhLqhLuIJ05kKaon5Uy9qQFQvbMNOuXy8Fi/yTL4ZV9EkU
 7 | yWOM7iqmCHTU5VU92FkZB+glIfk5Y5DmDe5aWsk4/BeDZdZgZShX41Dl12G5cQCE
 8 | rvxKmb0g41GJDwEBj4MZ3U8pJKJWj3vQC76VyTsHtrTJ5maM/ctGE7IrQfIJwrDI
 9 | qU/tixkLuete8CWt8X96iwjBbpHQ0J1TCB11WDK0wTgvCOjwjvhDA7D3Bvj65BpS
10 | +BnhuC/v/julfrV5rFCiyDeWjY//od6bo1HUgHbJncdys5AlZs/FZpBmKC0Isa6J
11 | hNGE6SAZlmjn1euH8besA/qzResK+XOtTNr5pNQIA9L+Jfu66puViK8fnZb53NgG
12 | 3YqbEuivUHXALbMj60JPVkwS2ZdDxY9iNvigW1RY6wv+cs5726AUKJqin18CAwEA
13 | AQKCAgBq3Auj7K43wc5seXfU5b9yl+8jXAOmJhbN02J4+1dhf9FIstAkwFUxaK5Q
14 | Eb5XNA+l9fTodQBtoY5Rg0dxKweAJLj5dDj1nJWyIgdJzmxgqkVY6MGQtKx9CUW5
15 | spLtBAYzT5XnrsaoXGxZxi8pZ/gnGl51b1Pa0zM4gSkiYWJrZXdDM+F8wYq3oY5t
16 | UaOBtt+wvXXwMKRdrkEsphj1KrItUc8i5LoROUGlOQ7PePx1m7ow8A0m8g0koUUK
17 | MIJ0Vene+R2h96aF30DxrjfTSLl+1N/1xTW06R/yHnIlVcx+PPHLCFaSEUh57KtG
18 | tLJc1fB8YirRjjjCs6nCgwniMmEMqFNpf00YQQ9hwLl9tsc8gQKxx6EWvJt5ymOL
19 | jbOyHCOEu7pmEhwNo3+6UKUMpvMZGf9obsfA1aXrilobmuxGlM954C2LMlH+zuMJ
20 | p5sCbUvOtECcP/vKUACXkvcvseB4kfyDBEdRNYUIK9uIsdWnjJWA10WMszU6T4fH
21 | t8mePAatQsQhcHiT45vGBuVY9WixBECizK/eDuOtuxQGE4OycrYtSc77WOv2benx
22 | +E6RVoQkPt9mDpk06O23vM+asu3rrGoXogZcJVQf/3mjNbyEf1s3o0nY7DTov8Is
23 | Il7AkDJ7XcU61mzRSz4zEZm2LFc0ydsxB/gQnSmrfwEl9+egeQKCAQEA31mkcehv
24 | v4tgzEuJR9s2tRD4kI4U9IV5K+mz+7E0nrgU17devVWHnnNmSiSZOCYliiAlZJJq
25 | Le1pxzZyLv1SWvhBML7Jxqe63+uFbd3AaOUUK3/f1+KyrgZKoHfxadDdMhve0Zeo
26 | xVW3pBjpNTG3Z71FGI+X8yHMlxugQrQDZDMB4sHqRBmfsQ9HiHxNjew86UbDAmbu
27 | 5LHyovmtcYpBL07lsEIIpgnHF4B2cmvpn9TBams2L25/9WPwzCLRCZmltwyhLiHK
28 | Jrtjj7T3+VdxKCq/W94lCu+yVt4QiKd7R06u8VfbsLtOMdLjMsilTEI8HxnALHIC
29 | pRvtDlS5hrFvIwKCAQEA15OwNYARf+/DVu5vORYLpRECliDzYho9dwE6c267VF4A
30 | r+MasHnDLQiAbnjQ41KMxK5gVvJCz2Dxfy25Y9RAa/6Z4BN5r+rMQ4k2wvQwR0q1
31 | 56oPT6S2L016rlQN35vNA5njXV6KloeniGQSYAGOVjWdi0P6NPDSBdC/vdIYlZ3c
32 | 0l8KSthHNqYWwwfbZrVyKtjH9rOKNg0OcYzMvSK+wt6LibNbTorqhg4fLycwIFkj
33 | ZZZMUb1o6nZN571dKE7rz+Qx3P9MLeEqBuiuI/Wvpdx0BTM7RAAS4ayJXukA+KiO
34 | bPjuq5jxn7uaMY4xKkA7+wGyVdDl24R2ZGTqHfZQlQKCAQBdMRkVUie2Y2B7PZVF
35 | PylfeXpNTotdz4dUlEm93h2XkDVaIK/ODi7tJTdr/kNUE06ciHcxtInLKgF11rjj
36 | 9Fz1ihohTaoBUqD9p1bgFaOf4N1+nPd4K6XkSMnAlOtM/JIew1RAXahU1kQumxpL
37 | ULg299kpu1hqYRLtheBjlJrJpyLFS6YEzjA8f6SYRU1Sx1xO9XCOwwYwtDMKX2VI
38 | N6ilJaVe2t6i8Pd6TeaeLXqobuxZC+zq848+g3nIo0pXWB219/YUupKPgKa1IoxJ
39 | JHwZh2Sa32DTZFokNrntWvxsL199YexPnIeu8FBOMzwNSqGtVGBD6zpBTBqoXR+g
40 | HM7DAoIBAQCIihVQPXZYBt89XzV4/bqQaQ6vUd11ZFNL3a5M0HS8AfIDdR9BaU2y
41 | ZzEZR1JeuLlKFXwVdcnVGXy75ZUHYrcO76o8X3lyb4/CstTJc+pkLTC7s12RPyzZ
42 | FwS+B6Tl3QNj5YM5bxjuMKtu2ps8zZ9+gOTxATQndeRAJLBdJQXHNb5YTDPzpbqt
43 | JVNDYeWXzxKpirZUTfEbPPfJ+bjvjmMuf8/3fm81dw0FrUoZDoQP6QVfYWujVglu
44 | f1Hmlmy7jAkVml3usJBqerOovpUVV5ZRwiiF5qYB8t4Cq8oyH/gqhm/3G0/nxa48
45 | UpFeE8aWESssVy3B5ta/S83E1pnmMk1lAoIBACRQMEkyv3vIQZmMQzcsg0Y4FX+R
46 | f7lZ80rKbwYpwAKM4xHHXkj46jGBj0ZGHRNIR0BQqOksF8Sg29fjlXFA5G8yMlU/
47 | hHFEfsY4dSS1VfmGoFr2kSYKJszlcybOeEM0ceuSNafZ+5X59g86g+iHZ9TsJ6ly
48 | F+cNYLIyVVCcaNB8YYiU9J20aMlzI+7Pr1ohesyb/7DwYMC78HzJO/AgvJNI477E
49 | lO/+5Ou1nJDDWRqncfOZGtnYaz6vGjVV3yWwSnVyksgyoGgExXb8+ItU5xwJValS
50 | lnkqM7ADuldyO61e7ctxJzOI9YWceYrDN38XV889umYWHB812rdBXmgGPvg=
51 | -----END RSA PRIVATE KEY-----
52 | 


--------------------------------------------------------------------------------
/src/test/resources/tls/ca.txt:
--------------------------------------------------------------------------------
1 | 7161BDE3516329B98ECE89BD1B3A84A165B36131
2 | 


--------------------------------------------------------------------------------
/src/test/resources/tls/client.csr:
--------------------------------------------------------------------------------
 1 | -----BEGIN NEW CERTIFICATE REQUEST-----
 2 | MIIEPzCCA+kCAQAwbDEQMA4GA1UEBhMHVW5rbm93bjEQMA4GA1UECBMHVW5rbm93
 3 | bjEQMA4GA1UEBxMHVW5rbm93bjEOMAwGA1UEChMFcmVkaXMxDjAMBgNVBAsTBXJl
 4 | ZGlzMRQwEgYDVQQDEwtyZWRpcyByZWRpczCCA0IwggI1BgcqhkjOOAQBMIICKAKC
 5 | AQEAj3k12bmq6b+r7Yh6z0lRtvMuxZ47rzcY6OrElh8+/TYG50NRqcQYMzm4CefC
 6 | rhxTm6dHW4XQEa24tHmHdUmEaVysDo8UszYIKKIv+icRCj1iqZNFNAmg/mlsRlj4
 7 | S90ggZw3CaAQV7GVrc0AIz26VIS2KR+dZI74g0SGd5ec7AS0NKasLnXpmF3iPbAp
 8 | L8ERjJ/6nYGB5zONt5K3MNe540lZL2gJmHIVORXqPWuLRlPGM0WPgDsypMLg8nKQ
 9 | JW5OP4o7CDihxFDk4YwaKaN9316hQ95LZv8EkD7VzxYj4VjUh8YI6X8hHNgdyiPL
10 | bjgHZfgi40K+SEwFdjk5YBzWZwIdALr2lqaFePff3uf6Z8l3x4XvMrIzuuWAwLzV
11 | aV0CggEAFqZcWCBIUHBOdQKjl1cEDTTaOjR4wVTU5KXALSQu4E+W5h5L0JBKvayP
12 | N+6x4J8xgtI8kEPLZC+IAEFg7fnKCbMgdqecMqYn8kc+kYebosTnRL0ggVRMtVuA
13 | LDaNH6g+1InpTg+gaI4yQopceMR4xo0FJ7ccmjq7CwvhLERoljnn08502xAaZaor
14 | h/ZMaCbbPscvS1WZg0u07bAvfJDppJbTpV1TW+v8RdT2GfY/Pe27hzklwvIk4Hcx
15 | KW2oh+weR0j4fvtf3rdUhDFrIjLe5VPdrwIRKw0fAtowlzIk/ieu2oudSyki2bqL
16 | 457Z4QOmPFKBC8aIt+LtQxbh7xfb3gOCAQUAAoIBABhNEA7ZsggSRP9+M+YZPxsG
17 | HqXC+JUDPxFdt8G6LwXiLMSrDK7PRwWGY+srFpk/9XbHloJFUNMy7mTs44FikjRk
18 | Ckv9RdYxySWVe6DB8pZfRMBtwpL8EVB5H3zLzwl4bo7aSwqIGcW9vbLf9lDiAJr1
19 | tLPB7u00PYLmhLBpxsjt3IASQU7eQoHbKU1fqVFC0owPLV7eDMWXtDXW15CqcNVM
20 | RYH89GF1FVft5cyc+ezRtBumVTWfkfiypXKNemMtz8nG4XPafM4t/cwL32jeqNfj
21 | D+49rJCszRcbeWW38UUZUvrR0Pg4d/zMjweuFtxYvltOg5YQkCQ+GB4EAdpeEO2g
22 | MDAuBgkqhkiG9w0BCQ4xITAfMB0GA1UdDgQWBBRRT1L9TaDwnVyuQBHSRIfqwU6h
23 | TzANBglghkgBZQMEAwIFAANBADA+Ah0Ak8JdJGCo3g5GLlnJlf4b1wwYuLY5r26a
24 | apTxzwIdAKF297kB9IeY0JVbHKQcwyWAJzOtQO82mKCHrIo=
25 | -----END NEW CERTIFICATE REQUEST-----
26 | 


--------------------------------------------------------------------------------
/src/test/resources/tls/clientkeystore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RedisLabs/spark-redis/6b0a69c9701644bd03239b8bc64ac45367d23cef/src/test/resources/tls/clientkeystore


--------------------------------------------------------------------------------
/src/test/resources/tls/redis.crt:
--------------------------------------------------------------------------------
 1 | -----BEGIN CERTIFICATE-----
 2 | MIID4jCCAcoCFHFhveNRYym5js6JvRs6hKFls2ExMA0GCSqGSIb3DQEBCwUAMDUx
 3 | EzARBgNVBAoMClJlZGlzIFRlc3QxHjAcBgNVBAMMFUNlcnRpZmljYXRlIEF1dGhv
 4 | cml0eTAeFw0yMDA1MTcxNTA1MDFaFw0yMTA1MTcxNTA1MDFaMCYxEzARBgNVBAoM
 5 | ClJlZGlzIFRlc3QxDzANBgNVBAMMBlNlcnZlcjCCASIwDQYJKoZIhvcNAQEBBQAD
 6 | ggEPADCCAQoCggEBANd6DHmb4xtWSej6EOHHNHP/zSVuQ3jYUqTzc5s1oWrmDyro
 7 | rk3rnRBo1l9X6n9EESaMZyZxJFkrgnMlYNbZjy7KgKpGPP1BLDwyeBo6pYg8MRZK
 8 | 0Kd0GaFqHk1WpboC9qKyhyWiD/C18+6Aq6Vm5ZcfnScanJ1WbmGZt+6NtGTOBDWu
 9 | BGfCzCx/6oIkDvlebycFs7JuL1uxIaZYKfUG2bPWkVpbtbNcune6A2pTtsfMdx8U
10 | qWOW087uco3Q8QEa+rwLCDzhT6NywllGHN5ua5CgGdrYWFeH5lAuiNxvWbCaN9v8
11 | ua6+/xalOGewPixQbSgF6K0grjjg/jSdyt53BH0CAwEAATANBgkqhkiG9w0BAQsF
12 | AAOCAgEAmFgZAW4De4ushRBrzSIRa5n8Q8Nkn8DcS0ludeFLiV49gyXD9b0Nnw8w
13 | Ct+qxMMiVoWXqUUJWkfhcGQ/FbBZnDLsh3ZSHlyaMB+dXbmC5SJB9IlJsvEwjmN9
14 | RBEJ5VUwEc+OMb6w0CnG9tr//b1N5+iwvmdfapJcTradMwOe02ZJhD06Tvapol6P
15 | L4z/ErSTKJRHzBPgzeSQHoRwvYbs4GE5VGMlygEq/v8ieodH0eO0IH9Cb3wro+Yq
16 | 6+SkbtOaJKdr15DZ/zd8UgoaBGxJFJ03cBcZHgV9FYfvv9QrNxkp4mx/g4UIe9Zn
17 | 6LqJSqxoQmhLWOTitp2iW/yMmDtSo2jwi4KVx+ENO9O0NsPUP2nk6mkH1YMMVg32
18 | mmrcuqOMijQu8b1MI76mO0KY3fCmbS6d0gReEcVYBZ5aPPCFTOeZVFF3n2Iz44+h
19 | PrE+21UgwWYxmHQoSJ+f5378EEJYr3bY/PHOp/HEzPshlU3u++3utcGQbQRowUeF
20 | cNd/gdOwHQpw7Sx6YFUvdTs9PEexPzgwzca8mPVto9uO8T5LLyZUouG6/bPzIsj7
21 | bDeZ9o56JhpR6flgg5SvWanyUL5ihbqhIxJcQai/XbMS3h61GK9H9RCu9LoJiRMM
22 | +P2bBcsd0bqzysjbifk68UjcOSpCosLBavF/JSyAO9k2ytBzah8=
23 | -----END CERTIFICATE-----
24 | 


--------------------------------------------------------------------------------
/src/test/resources/tls/redis.dh:
--------------------------------------------------------------------------------
1 | -----BEGIN DH PARAMETERS-----
2 | MIIBCAKCAQEAsUMvwTsUNheculmKMUjtZZJYw8DPD1vlh3DlD1xjwShqh46ICyna
3 | fER4nKo9Lt63zNcM68w1aNgiyajbJgzemuNbRh55vh88jV4rNitqWcb0MiiuOWUc
4 | 3MjNtp6U7ttoZrMhXtiPLwEfNtkUZJZAaAnvs6MEu7BgyoVXpvt8Azckh1dpYeOt
5 | TFnmIqVMziH1R2MxIFCkxZNsL9vJtrf8K3bgBJaPFpJeTPRMS25Pg90iJ2ZJLDZU
6 | OjRuQpt7yaEGNJP3ADjQAWrSRMe2NnfJ2b2yI2CzTBQ0DOeAw731TmRw7pxIOPEu
7 | N1jPJMNsM9FLoeslpJ9neot7UrqZtCt5SwIBAg==
8 | -----END DH PARAMETERS-----
9 | 


--------------------------------------------------------------------------------
/src/test/resources/tls/redis.key:
--------------------------------------------------------------------------------
 1 | -----BEGIN RSA PRIVATE KEY-----
 2 | MIIEpAIBAAKCAQEA13oMeZvjG1ZJ6PoQ4cc0c//NJW5DeNhSpPNzmzWhauYPKuiu
 3 | TeudEGjWX1fqf0QRJoxnJnEkWSuCcyVg1tmPLsqAqkY8/UEsPDJ4GjqliDwxFkrQ
 4 | p3QZoWoeTValugL2orKHJaIP8LXz7oCrpWbllx+dJxqcnVZuYZm37o20ZM4ENa4E
 5 | Z8LMLH/qgiQO+V5vJwWzsm4vW7Ehplgp9QbZs9aRWlu1s1y6d7oDalO2x8x3HxSp
 6 | Y5bTzu5yjdDxARr6vAsIPOFPo3LCWUYc3m5rkKAZ2thYV4fmUC6I3G9ZsJo32/y5
 7 | rr7/FqU4Z7A+LFBtKAXorSCuOOD+NJ3K3ncEfQIDAQABAoIBAGQFJkdIwhnNaw93
 8 | DGERS9rQkZRfY3hzyaEB1NbmBSCO29mKGmbXCJg2YdPuBk6+9MsU/0iL2lh95ta2
 9 | MgZpBJquqxze766LExJ6Rt+9+1qKvipf3gzsynzvulMh9lmqyHtrLWqdmJKekzVR
10 | ituDMO1+Yj9lje9G0IQDi6pzIpprYLdstQJsUoafwN8TbqPGem8Z8/ili3TIj9z/
11 | EZuSH/Crv80ls+ptz2GiPcAxUctqAfha+3lkX+qWklTd+0B5LgHrf2agxIAb7549
12 | ZQTo4kDPDem7Rvqj4IQMHQhRdgSNYOmjidIF+XnbsdVw2rSbqMx6z+eLepuGtUQj
13 | fWDh5gECgYEA9GddWg36dF5kebvBfuFso4DSqKBip6P6cogzoI6fHP2b4Qe73hlq
14 | YiLOA3dPN/a8D0Ox3HNK8jx23EzG/JiIlBYX3RcB9+iUtg2Iixz9FiHnQkKwJFBS
15 | VnWOb4rNQqWm+5W/OxE3da5dvsQKKpcgAK9OYj84X0lH3196GsxRrIECgYEA4bNS
16 | 6J0tbWHZ6CnTSZJz3BRNFnn8WSLy6A8Yytku1DnJjODuJT0/JwxglM+sR022V7JY
17 | YCBfuueIXhqYbqCqCzLxZg21+w/3jpIZp7eDd5nY+VDnUv0PyNwGgv5rS6nEgpeL
18 | YdbDYbj0ZOLZjSDrz5OfqihNQZssfWeB+ecvCf0CgYA/s/l3EZGoZzoVKMUkhylD
19 | 7L37yeItA+axl6KtRL9gVRIeM1/aYhGChsPfz2dMlPkrmV9wsHRmczAf97sd97wR
20 | rTiHHgobTfoFAb4HVIT7EdcvRCaZMH5lnrqDhFBAAOFnTf7MLI6iE9LHeF2WAFIN
21 | G6R4ozXUUEt1g3NWLM9VAQKBgQC73b86PnKspJF0LTRg/hWQcBmGhv1k2LFmNgLF
22 | /id7oapBqIyx1Jw3jZbq5z4Yj/giYSIsyWXFtqmM4whUtUk1Ty8eanU6yJygQL44
23 | G4nDyPyQ8iXKrzgvUe3dpZZ8AZC/vxLW2qQBOKm9PBIn5epC+zcgtLEx1c8fh0Pq
24 | VuORpQKBgQDXfhxTrpYyoxJmAoLmj9IOdsE/vjlcpriaQu4hkzaN6wz79O637WBk
25 | zoaT72zYbCpWhD42yZLqQIkrgYjsht30Wut1dp/0FQSoT872aB62Q90UVKmX4TvN
26 | wODJz8mtdC5co5fjxbaUn9Zfc0LUO9KPhFd0fb6SwCthBSQ4RmBWgg==
27 | -----END RSA PRIVATE KEY-----
28 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/RedisBenchmarks.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis
 2 | 
 3 | import java.io.{File, FileWriter, PrintWriter}
 4 | import java.time.{Duration => JDuration}
 5 | 
 6 | import com.redislabs.provider.redis.util.Logging
 7 | 
 8 | /**
 9 |   * @author The Viet Nguyen
10 |   */
11 | trait RedisBenchmarks extends Logging {
12 | 
13 |   val benchmarkReportDir = new File("target/reports/benchmarks/")
14 |   benchmarkReportDir.mkdirs()
15 | 
16 |   def time[R](tag: String)(block: => R): R = {
17 |     val t0 = System.nanoTime()
18 |     val result = block // call-by-name
19 |     val t1 = System.nanoTime()
20 |     new PrintWriter(new FileWriter(s"$benchmarkReportDir/results.txt", true)) {
21 |       // scalastyle:off
22 |       this.println(s"$tag, ${JDuration.ofNanos(t1 - t0)}")
23 |       close()
24 |     }
25 |     result
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/RedisConfigSuite.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis
 2 | 
 3 | import org.scalatest.{FunSuite, Matchers}
 4 | import redis.clients.jedis.util.JedisClusterCRC16
 5 | 
 6 | class RedisConfigSuite extends FunSuite with Matchers {
 7 | 
 8 |   val redisStandaloneConfig = new RedisConfig(RedisEndpoint(host = "127.0.0.1", port = 6379, auth = "passwd"))
 9 |   val redisClusterConfig = new RedisConfig(RedisEndpoint(host = "127.0.0.1", port = 7379))
10 | 
11 |   test("getNodesBySlots") {
12 |     redisStandaloneConfig.getNodesBySlots(0, 16383).length shouldBe 1
13 |     redisClusterConfig.getNodesBySlots(0, 16383).length shouldBe 7
14 |   }
15 | 
16 |   test("getHost") {
17 |     val key = "getHost"
18 |     val slot = JedisClusterCRC16.getSlot(key)
19 |     val standaloneHost = redisStandaloneConfig.getHost(key)
20 |     assert(standaloneHost.startSlot <= slot && standaloneHost.endSlot >= slot)
21 |     val clusterHost = redisClusterConfig.getHost(key)
22 |     assert(clusterHost.startSlot <= slot && clusterHost.endSlot >= slot)
23 |   }
24 | 
25 |   test("getNodes") {
26 |     redisStandaloneConfig.getNodes(RedisEndpoint(host = "127.0.0.1", port = 6379, auth = "passwd")).length shouldBe 1
27 |     redisClusterConfig.getNodes(RedisEndpoint(host = "127.0.0.1", port = 7379)).length shouldBe 7
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/SparkRedisSuite.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis
 2 | 
 3 | import com.redislabs.provider.redis.env.Env
 4 | import com.redislabs.provider.redis.rdd.Keys
 5 | import org.apache.spark.sql.{SQLContext, SQLImplicits, SparkSession}
 6 | import org.scalatest.{BeforeAndAfterAll, FunSuite}
 7 | 
 8 | /**
 9 |   * @author The Viet Nguyen
10 |   */
11 | trait SparkRedisSuite extends FunSuite with Env with Keys with BeforeAndAfterAll {
12 | 
13 |   override def beforeAll() {
14 |     super.beforeAll()
15 |     spark = SparkSession.builder().config(conf).getOrCreate()
16 |     sc = spark.sparkContext
17 |   }
18 | 
19 |   override def afterAll(): Unit = {
20 |     spark.stop
21 |     System.clearProperty("spark.driver.port")
22 |   }
23 | 
24 |   object TestSqlImplicits extends SQLImplicits {
25 | 
26 |     override protected def _sqlContext: SQLContext = spark.sqlContext
27 |   }
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/SparkStreamingRedisSuite.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis
 2 | 
 3 | import com.redislabs.provider.redis.env.Env
 4 | import com.redislabs.provider.redis.util.Logging
 5 | import org.apache.spark.sql.SparkSession
 6 | import org.apache.spark.streaming.{Seconds, StreamingContext}
 7 | import org.scalatest.{BeforeAndAfterEach, FunSuite}
 8 | 
 9 | /**
10 |   * For spark streaming test we have to create spark and streaming context for each test
11 |   */
12 | trait SparkStreamingRedisSuite extends FunSuite with Env with BeforeAndAfterEach with Logging {
13 | 
14 |   override protected def beforeEach(): Unit = {
15 |     super.beforeEach()
16 |     spark = SparkSession.builder().config(conf).getOrCreate()
17 |     sc = spark.sparkContext
18 |     ssc = new StreamingContext(sc, Seconds(1))
19 |   }
20 | 
21 |   override protected def afterEach(): Unit = {
22 |     ssc.stop()
23 |     spark.stop
24 |     System.clearProperty("spark.driver.port")
25 |     super.afterEach()
26 |   }
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/df/AclDataframeSuite.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.df
 2 | 
 3 | import com.redislabs.provider.redis.util.Person.{TableNamePrefix, data}
 4 | import com.redislabs.provider.redis.util.TestUtils.{generateTableName, interceptSparkErr}
 5 | import org.apache.spark.sql.redis.{RedisFormat, SqlOptionTableName}
 6 | import org.scalatest.Matchers
 7 | import redis.clients.jedis.exceptions.JedisConnectionException
 8 | 
 9 | /**
10 |  * Basic dataframe test with user/password authentication
11 |  */
12 | trait AclDataframeSuite extends RedisDataframeSuite with Matchers {
13 | 
14 |   test("save and load dataframe") {
15 |     val tableName = generateTableName(TableNamePrefix)
16 |     val df = spark.createDataFrame(data)
17 |     df.write.format(RedisFormat)
18 |       .option(SqlOptionTableName, tableName)
19 |       .save()
20 |     val loadedDf = spark.read.format(RedisFormat)
21 |       .option(SqlOptionTableName, tableName)
22 |       .load()
23 |       .cache()
24 |     verifyDf(loadedDf)
25 |   }
26 | 
27 |   test("incorrect password in dataframe options") {
28 |     interceptSparkErr[JedisConnectionException] {
29 |       val tableName = generateTableName(TableNamePrefix)
30 |       val df = spark.createDataFrame(data)
31 |       df.write.format(RedisFormat)
32 |         .option(SqlOptionTableName, tableName)
33 |         .option("user", user)
34 |         .option("auth", "wrong_password")
35 |         .save()
36 |     }
37 |   }
38 | 
39 |   test("correct user/password in dataframe options") {
40 |     val tableName = generateTableName(TableNamePrefix)
41 |     val df = spark.createDataFrame(data)
42 |     df.write.format(RedisFormat)
43 |       .option(SqlOptionTableName, tableName)
44 |       .option("user", user)
45 |       .option("auth", userPassword)
46 |       .save()
47 | 
48 |     val loadedDf = spark.read.format(RedisFormat)
49 |       .option(SqlOptionTableName, tableName)
50 |       .option("user", user)
51 |       .option("auth", userPassword)
52 |       .load()
53 |       .cache()
54 |     verifyDf(loadedDf)
55 |   }
56 | 
57 | }
58 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/df/BinaryDataframeSuite.scala:
--------------------------------------------------------------------------------
  1 | package com.redislabs.provider.redis.df
  2 | 
  3 | import com.redislabs.provider.redis.toRedisContext
  4 | import com.redislabs.provider.redis.util.Person
  5 | import com.redislabs.provider.redis.util.Person._
  6 | import com.redislabs.provider.redis.util.TestUtils._
  7 | import org.apache.commons.lang3.SerializationUtils
  8 | import org.apache.spark.SparkException
  9 | import org.apache.spark.sql.redis.RedisSourceRelation.tableDataKeyPattern
 10 | import org.apache.spark.sql.redis._
 11 | import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
 12 | import org.scalatest.Matchers
 13 | 
 14 | /**
 15 |   * @author The Viet Nguyen
 16 |   */
 17 | trait BinaryDataframeSuite extends RedisDataframeSuite with Matchers {
 18 | 
 19 |   test("save and load dataframe with binary mode") {
 20 |     val tableName = generateTableName(TableNamePrefix)
 21 |     val df = spark.createDataFrame(data)
 22 |     df.write.format(RedisFormat)
 23 |       .option(SqlOptionModel, SqlOptionModelBinary)
 24 |       .option(SqlOptionTableName, tableName)
 25 |       .save()
 26 |     val loadedDf = spark.read.format(RedisFormat)
 27 |       .option(SqlOptionModel, SqlOptionModelBinary)
 28 |       .option(SqlOptionTableName, tableName)
 29 |       .load()
 30 |       .cache()
 31 |     verifyDf(loadedDf)
 32 |   }
 33 | 
 34 |   test("save with binary mode and load dataframe") {
 35 |     val tableName = generateTableName(TableNamePrefix)
 36 |     val df = spark.createDataFrame(data)
 37 |     df.write.format(RedisFormat)
 38 |       .option(SqlOptionModel, SqlOptionModelBinary)
 39 |       .option(SqlOptionTableName, tableName)
 40 |       .save()
 41 |     interceptSparkErr[SparkException] {
 42 |       spark.read.format(RedisFormat)
 43 |         .option(SqlOptionTableName, tableName)
 44 |         .load()
 45 |         .show()
 46 |     }
 47 |   }
 48 | 
 49 |   test("save and load with binary mode dataframe") {
 50 |     val tableName = generateTableName(TableNamePrefix)
 51 |     val df = spark.createDataFrame(data)
 52 |     df.write.format(RedisFormat)
 53 |       .option(SqlOptionTableName, tableName)
 54 |       .save()
 55 |     interceptSparkErr[SparkException] {
 56 |       spark.read.format(RedisFormat)
 57 |         .option(SqlOptionModel, SqlOptionModelBinary)
 58 |         .option(SqlOptionTableName, tableName)
 59 |         .load()
 60 |         .show()
 61 |     }
 62 |   }
 63 | 
 64 |   test("load filtered hash keys with strings") {
 65 |     val tableName = generateTableName(TableNamePrefix)
 66 |     val df = spark.createDataFrame(data)
 67 |     df.write.format(RedisFormat)
 68 |       .option(SqlOptionTableName, tableName)
 69 |       .option(SqlOptionModel, SqlOptionModelHash)
 70 |       .save()
 71 |     val extraKey = RedisSourceRelation.uuid()
 72 |     saveMap(tableName, extraKey, Person.dataMaps.head)
 73 |     val loadedIds = spark.read.format(RedisFormat)
 74 |       .schema(Person.fullSchema)
 75 |       .option(SqlOptionTableName, tableName)
 76 |       .option(SqlOptionModel, SqlOptionModelHash)
 77 |       .option(SqlOptionFilterKeysByType, value = true)
 78 |       .load()
 79 |       .collect()
 80 |       .map { r =>
 81 |         r.getAs[String]("_id")
 82 |       }
 83 |     loadedIds.length shouldBe 2
 84 |     loadedIds should not contain extraKey
 85 |     val countAll = sc.fromRedisKeyPattern(tableDataKeyPattern(tableName)).count()
 86 |     countAll shouldBe 3
 87 |   }
 88 | 
 89 |   test("load unfiltered hash keys with strings") {
 90 |     val tableName = generateTableName(TableNamePrefix)
 91 |     val df = spark.createDataFrame(data)
 92 |     df.write.format(RedisFormat)
 93 |       .option(SqlOptionTableName, tableName)
 94 |       .option(SqlOptionModel, SqlOptionModelHash)
 95 |       .save()
 96 |     saveMap(tableName, RedisSourceRelation.uuid(), Person.dataMaps.head)
 97 |     interceptSparkErr[SparkException] {
 98 |       spark.read.format(RedisFormat)
 99 |         .option(SqlOptionTableName, tableName)
100 |         .option(SqlOptionModel, SqlOptionModelHash)
101 |         .load()
102 |         .collect()
103 |     }
104 |   }
105 | 
106 |   test("read dataframe by non-existing key (not pattern)") {
107 |     val df = spark.read.format(RedisFormat)
108 |       .option(SqlOptionKeysPattern, "some-non-existing-key")
109 |       .option(SqlOptionModel, SqlOptionModelBinary)
110 |       .schema(StructType(Array(
111 |         StructField("id", IntegerType),
112 |         StructField("value", IntegerType)
113 |       )))
114 |       .load()
115 |       .cache()
116 | 
117 |     df.show()
118 |     df.count() should be (0)
119 |   }
120 | 
121 |   def serialize(value: Map[String, String]): Array[Byte] = {
122 |     val valuesArray = value.values.toArray
123 |     SerializationUtils.serialize(valuesArray)
124 |   }
125 | 
126 |   def saveMap(tableName: String, key: String, value: Map[String, String]): Unit
127 | }
128 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/df/CsvDataframeSuite.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.df
 2 | 
 3 | import com.redislabs.provider.redis.util.TestUtils._
 4 | import org.apache.spark.sql.functions._
 5 | import org.apache.spark.sql.redis.{RedisFormat, SqlOptionKeyColumn, SqlOptionTableName}
 6 | import org.scalatest.Matchers
 7 | 
 8 | trait CsvDataframeSuite extends RedisDataframeSuite with Matchers {
 9 | 
10 |   test("load dataframe from test.csv file, write/read from redis") {
11 |     val file = getClass.getClassLoader.getResource("test.csv").getFile
12 |     val df = spark.read.format("csv")
13 |       .option("header", true)
14 |       .option("inferSchema", true)
15 |       .load(file)
16 |       .withColumn("id", monotonically_increasing_id())
17 |       .cache()
18 | 
19 |     val tableName = generateTableName("csv-data")
20 | 
21 |     df.write.format(RedisFormat)
22 |       .option(SqlOptionTableName, tableName)
23 |       .option(SqlOptionKeyColumn, "id")
24 |       .save()
25 | 
26 |     val loadedDf = spark.read.format(RedisFormat)
27 |       .option(SqlOptionTableName, tableName)
28 |       .option(SqlOptionKeyColumn, "id")
29 |       .load()
30 |       .cache()
31 | 
32 |     df.schema should be(loadedDf.schema)
33 | 
34 |     df.collect().toSet should be(loadedDf.collect().toSet)
35 |   }
36 | 
37 | }
38 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/df/FilteredDataframeSuite.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.df
 2 | 
 3 | import com.redislabs.provider.redis.util.Person.{TableNamePrefix}
 4 | import org.apache.spark.sql.redis.{RedisFormat, SqlOptionTableName}
 5 | import org.scalatest.Matchers
 6 | import com.redislabs.provider.redis.util.TestUtils._
 7 | 
 8 | /**
 9 |   * @author The Viet Nguyen
10 |   */
11 | trait FilteredDataframeSuite extends RedisDataframeSuite with Matchers {
12 | 
13 |   test("select none fields") {
14 |     val tableName = generateTableName(TableNamePrefix)
15 |     writeDf(tableName)
16 |     val actualDf = spark.read.format(RedisFormat)
17 |       .option(SqlOptionTableName, tableName)
18 |       .load()
19 |       .select()
20 |       .cache()
21 |     actualDf.count() shouldBe expectedDf.count()
22 |     actualDf.collect().foreach { r =>
23 |       r.length shouldBe 0
24 |     }
25 |   }
26 | 
27 |   test("select all fields") {
28 |     val tableName = generateTableName(TableNamePrefix)
29 |     writeDf(tableName)
30 |     val actualDf = spark.read.format(RedisFormat)
31 |       .option(SqlOptionTableName, tableName)
32 |       .load()
33 |       .select("name", "age", "address", "salary")
34 |       .cache()
35 |     verifyDf(actualDf)
36 |   }
37 | 
38 |   test("select partial fields") {
39 |     val tableName = generateTableName(TableNamePrefix)
40 |     writeDf(tableName)
41 |     val actualDf = spark.read.format(RedisFormat)
42 |       .option(SqlOptionTableName, tableName)
43 |       .load()
44 |       .select("name", "salary")
45 |       .cache()
46 |     verifyPartialDf(actualDf)
47 |   }
48 | }
49 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/df/RedisDataframeSuite.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.df
 2 | 
 3 | import com.redislabs.provider.redis.SparkRedisSuite
 4 | import com.redislabs.provider.redis.util.Person
 5 | import com.redislabs.provider.redis.util.Person.data
 6 | import org.apache.spark.sql.DataFrame
 7 | import org.apache.spark.sql.redis.{RedisFormat, SqlOptionTableName}
 8 | import org.scalatest.Matchers
 9 | 
10 | /**
11 |   * @author The Viet Nguyen
12 |   */
13 | trait RedisDataframeSuite extends SparkRedisSuite with Matchers {
14 | 
15 |   import TestSqlImplicits._
16 | 
17 |   lazy val expectedDf: DataFrame = Person.df(spark)
18 | 
19 |   def writeDf(tableName: String, options: Map[String, Any] = Map()): Unit = {
20 |     val df = spark.createDataFrame(data)
21 | 
22 |     val initialWriter = df
23 |       .write
24 |       .format(RedisFormat)
25 |       .option(SqlOptionTableName, tableName)
26 | 
27 |     val writer = options.foldLeft(initialWriter) { case (acc, (k, v)) =>
28 |       acc.option(k, v.toString)
29 |     }
30 |     writer.save()
31 |   }
32 | 
33 |   def createTempView(tableName: String): Unit = {
34 |     spark.createDataFrame(data).createOrReplaceTempView(tableName)
35 |   }
36 | 
37 |   def loadAndVerifyDf(tableName: String, options: Map[String, Any] = Map()): Unit = {
38 |     val initialReader = spark
39 |       .read
40 |       .format(RedisFormat)
41 |       .option(SqlOptionTableName, tableName)
42 | 
43 |     val reader = options.foldLeft(initialReader) { case (acc, (k, v)) =>
44 |       acc.option(k, v.toString)
45 |     }
46 | 
47 |     val actualDf = reader.load().cache()
48 |     verifyDf(actualDf, data)
49 |   }
50 | 
51 |   def verifyDf(actualDf: DataFrame, data: Seq[Person] = Person.data): Unit = {
52 |     actualDf.show()
53 |     actualDf.count() shouldBe expectedDf.count()
54 |     // TODO: check nullable columns
55 |     // actualDf.schema shouldBe expectedDf.schema
56 |     val loadedArr = actualDf.as[Person].collect()
57 |     loadedArr.sortBy(_.name) shouldBe data.toArray.sortBy(_.name)
58 |   }
59 | 
60 |   def verifyPartialDf(actualDf: DataFrame): Unit = {
61 |     actualDf.show()
62 |     actualDf.count() shouldBe expectedDf.count()
63 |     // TODO: check nullable columns
64 |     // actualDf.schema shouldBe expectedDf.schema
65 |     val loadedArr = actualDf.collect()
66 |       .map(r => (r.getAs[String]("name"), r.getAs[Double]("salary")))
67 |     loadedArr.sortBy(_._1) shouldBe data.toArray.sortBy(_.name).map(p => (p.name, p.salary))
68 |   }
69 | }
70 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/df/SparkSqlSuite.scala:
--------------------------------------------------------------------------------
  1 | package com.redislabs.provider.redis.df
  2 | 
  3 | import com.redislabs.provider.redis.util.Person.generatePersonTableName
  4 | import org.apache.spark.sql.redis.{RedisFormat, SqlOptionTableName}
  5 | import org.scalatest.Matchers
  6 | 
  7 | /**
  8 |   * @author The Viet Nguyen
  9 |   */
 10 | trait SparkSqlSuite extends RedisDataframeSuite with Matchers {
 11 | 
 12 |   test("create temporary view then make regular insertions") {
 13 |     val tableName = generatePersonTableName()
 14 |     spark.sql(
 15 |       s"""CREATE TEMPORARY VIEW $tableName (name STRING, age INT, address STRING, salary DOUBLE)
 16 |          |  USING $RedisFormat OPTIONS (table '$tableName')
 17 |          |""".stripMargin)
 18 |     spark.sql(
 19 |       s"""INSERT INTO TABLE $tableName
 20 |          |  VALUES ('John', 30, '60 Wall Street', 150.5),
 21 |          |    ('Peter', 35, '110 Wall Street', 200.3)
 22 |          |""".stripMargin)
 23 |     val loadedDf = spark.read.format(RedisFormat)
 24 |       .option(SqlOptionTableName, tableName)
 25 |       .load()
 26 |       .cache()
 27 |     verifyDf(loadedDf)
 28 |   }
 29 | 
 30 |   test("create temporary view then make overwrite insertions when no data exists") {
 31 |     val tableName = generatePersonTableName()
 32 |     spark.sql(
 33 |       s"""CREATE TEMPORARY VIEW $tableName (name STRING, age INT, address STRING, salary DOUBLE)
 34 |          |  USING $RedisFormat OPTIONS (table '$tableName')
 35 |          |""".stripMargin)
 36 |     spark.sql(
 37 |       s"""INSERT overwrite TABLE $tableName
 38 |          |SELECT * FROM VALUES ('John', 30, '60 Wall Street', 150.5),
 39 |          |    ('Peter', 35, '110 Wall Street', 200.3)
 40 |          |""".stripMargin)
 41 |     val loadedDf = spark.read.format(RedisFormat)
 42 |       .option(SqlOptionTableName, tableName)
 43 |       .load()
 44 |       .cache()
 45 |     verifyDf(loadedDf)
 46 |   }
 47 | 
 48 |   test("create temporary view then make overwrite insertions when data exists") {
 49 |     val tableName = generatePersonTableName()
 50 |     spark.sql(
 51 |       s"""CREATE TEMPORARY VIEW $tableName (name STRING, age INT, address STRING, salary DOUBLE)
 52 |          |  USING $RedisFormat OPTIONS (table '$tableName')
 53 |          |""".stripMargin)
 54 |     spark.sql(
 55 |       s"""INSERT INTO TABLE $tableName
 56 |          |  VALUES ('Johnny', 18, '17 Home Street', 0),
 57 |          |    ('Peter', 23, '6 Home Street', 20)
 58 |          |""".stripMargin)
 59 |     spark.sql(
 60 |       s"""INSERT overwrite TABLE $tableName
 61 |          |SELECT * FROM VALUES ('John', 30, '60 Wall Street', 150.5),
 62 |          |    ('Peter', 35, '110 Wall Street', 200.3)
 63 |          |""".stripMargin)
 64 |     val loadedDf = spark.read.format(RedisFormat)
 65 |       .option(SqlOptionTableName, tableName)
 66 |       .load()
 67 |       .cache()
 68 |     verifyDf(loadedDf)
 69 |   }
 70 | 
 71 |   test("create temporary view, make regular insertions then select") {
 72 |     val tableName = generatePersonTableName()
 73 |     spark.sql(
 74 |       s"""CREATE TEMPORARY VIEW $tableName (name STRING, age INT, address STRING, salary DOUBLE)
 75 |          |  USING $RedisFormat OPTIONS (table '$tableName')
 76 |          |""".stripMargin)
 77 |     spark.sql(
 78 |       s"""INSERT INTO TABLE $tableName
 79 |          |  VALUES ('John', 30, '60 Wall Street', 150.5),
 80 |          |    ('Peter', 35, '110 Wall Street', 200.3)
 81 |          |""".stripMargin)
 82 |     val loadedDf = spark.sql(
 83 |       s"""SELECT * FROM $tableName
 84 |          |""".stripMargin)
 85 |     verifyDf(loadedDf)
 86 |   }
 87 | 
 88 |   test("select from temporary view") {
 89 |     val tableName = generatePersonTableName()
 90 |     createTempView(tableName)
 91 |     val loadedDf = spark.sql(s"SELECT * FROM $tableName")
 92 |     verifyDf(loadedDf)
 93 |   }
 94 | 
 95 |   test("select all fields from temporary view") {
 96 |     val tableName = generatePersonTableName()
 97 |     createTempView(tableName)
 98 |     val loadedDf = spark.sql(s"SELECT name, age, address, salary FROM $tableName")
 99 |     verifyDf(loadedDf)
100 |   }
101 | 
102 |   test("select name and salary from temporary view") {
103 |     val tableName = generatePersonTableName()
104 |     createTempView(tableName)
105 |     val actualDf = spark.sql(s"SELECT name, salary FROM $tableName")
106 |     verifyPartialDf(actualDf)
107 |   }
108 | }
109 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/df/acl/AclDataframeClusterSuite.scala:
--------------------------------------------------------------------------------
1 | package com.redislabs.provider.redis.df.acl
2 | 
3 | import com.redislabs.provider.redis.df.AclDataframeSuite
4 | import com.redislabs.provider.redis.env.RedisClusterAclEnv
5 | 
6 | class AclDataframeClusterSuite extends AclDataframeSuite with RedisClusterAclEnv
7 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/df/acl/AclDataframeStandaloneSuite.scala:
--------------------------------------------------------------------------------
1 | package com.redislabs.provider.redis.df.acl
2 | 
3 | import com.redislabs.provider.redis.df.AclDataframeSuite
4 | import com.redislabs.provider.redis.env.RedisStandaloneAclEnv
5 | 
6 | class AclDataframeStandaloneSuite extends AclDataframeSuite with RedisStandaloneAclEnv
7 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/df/benchmark/DataframeBenchmarkSuite.scala:
--------------------------------------------------------------------------------
  1 | package com.redislabs.provider.redis.df.benchmark
  2 | 
  3 | import com.redislabs.provider.redis.df.RedisDataframeSuite
  4 | import com.redislabs.provider.redis.rdd.Keys
  5 | import com.redislabs.provider.redis.util.Person.TableNamePrefix
  6 | import com.redislabs.provider.redis.util.PipelineUtils.foreachWithPipeline
  7 | import com.redislabs.provider.redis.util.TestUtils.generateTableName
  8 | import com.redislabs.provider.redis.util.{Logging, Person}
  9 | import com.redislabs.provider.redis.{ReadWriteConfig, RedisBenchmarks, toRedisContext}
 10 | import org.apache.spark.rdd.RDD
 11 | import org.apache.spark.sql.SaveMode
 12 | import org.apache.spark.sql.redis._
 13 | import redis.clients.jedis.PipelineBase
 14 | 
 15 | /**
 16 |   * @author The Viet Nguyen
 17 |   */
 18 | trait DataframeBenchmarkSuite extends RedisDataframeSuite with RedisBenchmarks with Logging {
 19 | 
 20 |   val tableName: String = generateTableName(TableNamePrefix)
 21 | 
 22 |   private val startDate = s"${System.currentTimeMillis()}"
 23 | 
 24 |   def suiteTags: String = startDate
 25 | 
 26 |   def persistentModel: String
 27 | 
 28 |   def rdd(): RDD[Person]
 29 | 
 30 |   override def afterAll(): Unit = {
 31 |     time(s"$suiteTags, Cleanup") {
 32 |       val hosts = redisConfig.hosts
 33 |       implicit val readWriteConfig: ReadWriteConfig = ReadWriteConfig.Default
 34 |       sc.fromRedisKeyPattern()
 35 |         .foreachPartition { p =>
 36 |           Keys.groupKeysByNode(hosts, p)
 37 |             .foreach { case (n, ks) =>
 38 |               val conn = n.connect()
 39 |               foreachWithPipeline(conn, ks) { (pl, k) =>
 40 |                 (pl: PipelineBase).del(k) // fix ambiguous reference to overloaded definition
 41 |               }
 42 |               conn.close()
 43 |             }
 44 |         }
 45 |     }
 46 |     super.afterAll()
 47 |   }
 48 | 
 49 |   test(s"$suiteTags, Write") {
 50 |     val df = spark.createDataFrame(rdd())
 51 |     time(s"$suiteTags, Write") {
 52 |       df.write.format(RedisFormat)
 53 |         .option(SqlOptionModel, persistentModel)
 54 |         .option(SqlOptionTableName, tableName)
 55 |         .option(SqlOptionKeyColumn, "name")
 56 |         .mode(SaveMode.Overwrite)
 57 |         .save()
 58 |     }
 59 |   }
 60 | 
 61 |   test(s"$suiteTags, Read") {
 62 |     time(s"$suiteTags, Read") {
 63 |       spark.read.format(RedisFormat)
 64 |         .option(SqlOptionModel, persistentModel)
 65 |         .option(SqlOptionTableName, tableName)
 66 |         .option(SqlOptionNumPartitions, 8)
 67 |         .load()
 68 |         .foreach { _ =>
 69 |           // measure read all elements
 70 |         }
 71 |     }
 72 |   }
 73 | 
 74 |   test(s"$suiteTags, Read all fields") {
 75 |     time(s"$suiteTags, Read all fields") {
 76 |       spark.read.format(RedisFormat)
 77 |         .option(SqlOptionModel, persistentModel)
 78 |         .option(SqlOptionTableName, tableName)
 79 |         .option(SqlOptionNumPartitions, 8)
 80 |         .load()
 81 |         .select("name", "age", "address", "salary")
 82 |         .foreach { _ =>
 83 |           // measure read all elements
 84 |         }
 85 |     }
 86 |   }
 87 | 
 88 |   test(s"$suiteTags, Read 1 fields") {
 89 |     time(s"$suiteTags, Read 1 fields") {
 90 |       spark.read.format(RedisFormat)
 91 |         .option(SqlOptionModel, persistentModel)
 92 |         .option(SqlOptionTableName, tableName)
 93 |         .option(SqlOptionNumPartitions, 8)
 94 |         .load()
 95 |         .select("name")
 96 |         .foreach { _ =>
 97 |           // measure read all elements
 98 |         }
 99 |     }
100 |   }
101 | 
102 |   test(s"$suiteTags, Read 0 fields") {
103 |     time(s"$suiteTags, Read 0 fields") {
104 |       spark.read.format(RedisFormat)
105 |         .option(SqlOptionModel, persistentModel)
106 |         .option(SqlOptionTableName, tableName)
107 |         .option(SqlOptionNumPartitions, 8)
108 |         .load()
109 |         .select()
110 |         .foreach { _ =>
111 |           // measure read all elements
112 |         }
113 |     }
114 |   }
115 | 
116 |   test(s"$suiteTags, Take 10") {
117 |     time(s"$suiteTags, Take 10") {
118 |       spark.read.format(RedisFormat)
119 |         .option(SqlOptionModel, persistentModel)
120 |         .option(SqlOptionTableName, tableName)
121 |         .option(SqlOptionNumPartitions, 8)
122 |         .load()
123 |         .take(10)
124 |     }
125 |   }
126 | }
127 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/df/benchmark/ManyValueBenchmarkSuite.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.df.benchmark
 2 | 
 3 | import com.redislabs.provider.redis.env.RedisClusterEnv
 4 | import com.redislabs.provider.redis.util.Person
 5 | import org.apache.spark.rdd.RDD
 6 | 
 7 | /**
 8 |   * @author The Viet Nguyen
 9 |   */
10 | trait ManyValueBenchmarkSuite extends DataframeBenchmarkSuite with RedisClusterEnv {
11 | 
12 |   private def num = 1000000
13 | 
14 |   override def suiteTags: String = s"${super.suiteTags}, Many:$num"
15 | 
16 |   override def rdd(): RDD[Person] = {
17 |     val partitionsNum = 8
18 |     val sectionLength = num / partitionsNum
19 |     spark.sparkContext
20 |       .parallelize(0 until partitionsNum, partitionsNum)
21 |       .mapPartitions {
22 |         _
23 |           .flatMap { i =>
24 |             val start = i * sectionLength
25 |             val end = start + sectionLength + 1
26 |             Stream.range(start, end)
27 |           }
28 |           .map { i =>
29 |             Person(s"John-$i", 30, "60 Wall Street", 150.5)
30 |           }
31 |       }
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/df/benchmark/SingleValueBenchmarkSuite.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.df.benchmark
 2 | 
 3 | import com.redislabs.provider.redis.env.RedisClusterEnv
 4 | import com.redislabs.provider.redis.util.Person
 5 | import org.apache.spark.rdd.RDD
 6 | 
 7 | /**
 8 |   * @author The Viet Nguyen
 9 |   */
10 | trait SingleValueBenchmarkSuite extends DataframeBenchmarkSuite with RedisClusterEnv {
11 | 
12 |   override def suiteTags: String = s"${super.suiteTags}, Single"
13 | 
14 |   override def rdd(): RDD[Person] = {
15 |     spark.sparkContext.parallelize(Seq(Person(s"John", 30, "60 Wall Street", 150.5)))
16 |   }
17 | }
18 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/df/benchmark/cluster/BinaryModelManyValueClusterBenchmarkSuite.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.df.benchmark.cluster
 2 | 
 3 | import com.redislabs.provider.redis.df.benchmark.ManyValueBenchmarkSuite
 4 | import com.redislabs.provider.redis.env.RedisClusterEnv
 5 | import com.redislabs.provider.redis.util.BenchmarkTest
 6 | import org.apache.spark.sql
 7 | 
 8 | /**
 9 |   * @author The Viet Nguyen
10 |   */
11 | @BenchmarkTest
12 | class BinaryModelManyValueClusterBenchmarkSuite extends ManyValueBenchmarkSuite
13 |   with RedisClusterEnv {
14 | 
15 |   override def suiteTags: String = s"${super.suiteTags}, Binary"
16 | 
17 |   override def persistentModel: String = sql.redis.SqlOptionModelBinary
18 | }
19 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/df/benchmark/cluster/BinaryModelSingleValueClusterBenchmarkSuite.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.df.benchmark.cluster
 2 | 
 3 | import com.redislabs.provider.redis.df.benchmark.SingleValueBenchmarkSuite
 4 | import com.redislabs.provider.redis.env.RedisClusterEnv
 5 | import com.redislabs.provider.redis.util.BenchmarkTest
 6 | import org.apache.spark.sql
 7 | 
 8 | /**
 9 |   * @author The Viet Nguyen
10 |   */
11 | @BenchmarkTest
12 | class BinaryModelSingleValueClusterBenchmarkSuite extends SingleValueBenchmarkSuite
13 |   with RedisClusterEnv {
14 | 
15 |   override def suiteTags: String = s"${super.suiteTags}, Binary"
16 | 
17 |   override def persistentModel: String = sql.redis.SqlOptionModelBinary
18 | }
19 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/df/benchmark/cluster/HashModelManyValueClusterBenchmarkSuite.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.df.benchmark.cluster
 2 | 
 3 | import com.redislabs.provider.redis.df.benchmark.ManyValueBenchmarkSuite
 4 | import com.redislabs.provider.redis.env.RedisClusterEnv
 5 | import com.redislabs.provider.redis.util.BenchmarkTest
 6 | import org.apache.spark.sql.redis.SqlOptionModelHash
 7 | 
 8 | /**
 9 |   * @author The Viet Nguyen
10 |   */
11 | @BenchmarkTest
12 | class HashModelManyValueClusterBenchmarkSuite extends ManyValueBenchmarkSuite
13 |   with RedisClusterEnv {
14 | 
15 |   override def suiteTags: String = s"${super.suiteTags}, Hash"
16 | 
17 |   override def persistentModel: String = SqlOptionModelHash
18 | }
19 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/df/benchmark/cluster/HashModelSingleValueClusterBenchmarkSuite.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.df.benchmark.cluster
 2 | 
 3 | import com.redislabs.provider.redis.df.benchmark.SingleValueBenchmarkSuite
 4 | import com.redislabs.provider.redis.env.RedisClusterEnv
 5 | import com.redislabs.provider.redis.util.BenchmarkTest
 6 | import org.apache.spark.sql.redis.SqlOptionModelHash
 7 | 
 8 | /**
 9 |   * @author The Viet Nguyen
10 |   */
11 | @BenchmarkTest
12 | class HashModelSingleValueClusterBenchmarkSuite extends SingleValueBenchmarkSuite
13 |   with RedisClusterEnv {
14 | 
15 |   override def suiteTags: String = s"${super.suiteTags}, Hash"
16 | 
17 |   override def persistentModel: String = SqlOptionModelHash
18 | }
19 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/df/cluster/BinaryDataframeClusterSuite.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.df.cluster
 2 | 
 3 | import java.nio.charset.StandardCharsets.UTF_8
 4 | 
 5 | import com.redislabs.provider.redis.df.BinaryDataframeSuite
 6 | import com.redislabs.provider.redis.env.RedisClusterEnv
 7 | import org.apache.spark.sql.redis.RedisSourceRelation.dataKey
 8 | import redis.clients.jedis.{HostAndPort, JedisCluster}
 9 | 
10 | /**
11 |   * @author The Viet Nguyen
12 |   */
13 | class BinaryDataframeClusterSuite extends BinaryDataframeSuite with RedisClusterEnv {
14 | 
15 |   override def saveMap(tableName: String, key: String, value: Map[String, String]): Unit = {
16 |     val host = redisConfig.initialHost
17 |     val hostAndPort = new HostAndPort(host.host, host.port)
18 |     val conn = new JedisCluster(hostAndPort)
19 |     conn.set(dataKey(tableName, key).getBytes(UTF_8), serialize(value))
20 |     conn.close()
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/df/cluster/CsvDataframeClusterSuite.scala:
--------------------------------------------------------------------------------
1 | package com.redislabs.provider.redis.df.cluster
2 | 
3 | import com.redislabs.provider.redis.df.CsvDataframeSuite
4 | import com.redislabs.provider.redis.env.RedisClusterEnv
5 | 
6 | class CsvDataframeClusterSuite extends CsvDataframeSuite with RedisClusterEnv
7 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/df/cluster/DataframeClusterSuite.scala:
--------------------------------------------------------------------------------
1 | package com.redislabs.provider.redis.df.cluster
2 | 
3 | import com.redislabs.provider.redis.df.DataframeSuite
4 | import com.redislabs.provider.redis.env.RedisClusterEnv
5 | 
6 | class DataframeClusterSuite extends DataframeSuite with RedisClusterEnv
7 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/df/cluster/FilteredDataframeClusterSuite.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.df.cluster
 2 | 
 3 | import com.redislabs.provider.redis.df.{FilteredDataframeSuite, RedisDataframeSuite}
 4 | import com.redislabs.provider.redis.env.RedisClusterEnv
 5 | 
 6 | /**
 7 |   * @author The Viet Nguyen
 8 |   */
 9 | class FilteredDataframeClusterSuite extends FilteredDataframeSuite with RedisClusterEnv
10 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/df/cluster/HashDataframeClusterSuite.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.df.cluster
 2 | 
 3 | import com.redislabs.provider.redis.df.HashDataframeSuite
 4 | import com.redislabs.provider.redis.env.RedisClusterEnv
 5 | import redis.clients.jedis.{HostAndPort, JedisCluster}
 6 | 
 7 | import scala.collection.JavaConverters._
 8 | 
 9 | /**
10 |   * @author The Viet Nguyen
11 |   */
12 | class HashDataframeClusterSuite extends HashDataframeSuite with RedisClusterEnv {
13 | 
14 |   override def saveMap(tableName: String, key: String, value: Map[String, String]): Unit = {
15 |     val host = redisConfig.initialHost
16 |     val hostAndPort = new HostAndPort(host.host, host.port)
17 |     val conn = new JedisCluster(hostAndPort)
18 |     conn.hmset(tableName + ":" + key, value.asJava)
19 |     conn.close()
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/df/cluster/SparkSqlClusterSuite.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.df.cluster
 2 | 
 3 | import com.redislabs.provider.redis.df.SparkSqlSuite
 4 | import com.redislabs.provider.redis.env.RedisClusterEnv
 5 | 
 6 | /**
 7 |   * @author The Viet Nguyen
 8 |   */
 9 | class SparkSqlClusterSuite extends SparkSqlSuite with RedisClusterEnv
10 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/df/standalone/BinaryDataframeStandaloneSuite.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.df.standalone
 2 | 
 3 | import java.nio.charset.StandardCharsets.UTF_8
 4 | 
 5 | import com.redislabs.provider.redis.df.BinaryDataframeSuite
 6 | import com.redislabs.provider.redis.env.RedisStandaloneEnv
 7 | import com.redislabs.provider.redis.util.ConnectionUtils.withConnection
 8 | import org.apache.spark.sql.redis.RedisSourceRelation.dataKey
 9 | 
10 | /**
11 |   * @author The Viet Nguyen
12 |   */
13 | class BinaryDataframeStandaloneSuite extends BinaryDataframeSuite with RedisStandaloneEnv {
14 | 
15 |   override def saveMap(tableName: String, key: String, value: Map[String, String]): Unit = {
16 |     val host = redisConfig.initialHost
17 |     withConnection(host.connect()) { conn =>
18 |       conn.set(dataKey(tableName, key).getBytes(UTF_8), serialize(value))
19 |     }
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/df/standalone/CsvDataframeStandaloneSuite.scala:
--------------------------------------------------------------------------------
1 | package com.redislabs.provider.redis.df.standalone
2 | 
3 | import com.redislabs.provider.redis.df.CsvDataframeSuite
4 | import com.redislabs.provider.redis.env.RedisStandaloneEnv
5 | 
6 | class CsvDataframeStandaloneSuite extends CsvDataframeSuite with RedisStandaloneEnv
7 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/df/standalone/DataframeStandaloneSuite.scala:
--------------------------------------------------------------------------------
1 | package com.redislabs.provider.redis.df.standalone
2 | 
3 | import com.redislabs.provider.redis.df.DataframeSuite
4 | import com.redislabs.provider.redis.env.RedisStandaloneEnv
5 | 
6 | class DataframeStandaloneSuite extends DataframeSuite with RedisStandaloneEnv
7 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/df/standalone/FilteredDataframeStandaloneSuite.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.df.standalone
 2 | 
 3 | import com.redislabs.provider.redis.df.{FilteredDataframeSuite, RedisDataframeSuite}
 4 | import com.redislabs.provider.redis.env.RedisStandaloneEnv
 5 | 
 6 | /**
 7 |   * @author The Viet Nguyen
 8 |   */
 9 | class FilteredDataframeStandaloneSuite extends FilteredDataframeSuite with RedisStandaloneEnv
10 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/df/standalone/HashDataframeStandaloneSuite.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.df.standalone
 2 | 
 3 | import com.redislabs.provider.redis.df.HashDataframeSuite
 4 | import com.redislabs.provider.redis.env.RedisStandaloneEnv
 5 | import com.redislabs.provider.redis.util.ConnectionUtils.withConnection
 6 | 
 7 | import scala.collection.JavaConverters._
 8 | 
 9 | /**
10 |   * @author The Viet Nguyen
11 |   */
12 | class HashDataframeStandaloneSuite extends HashDataframeSuite with RedisStandaloneEnv {
13 | 
14 |   override def saveMap(tableName: String, key: String, value: Map[String, String]): Unit = {
15 |     val host = redisConfig.initialHost
16 |     withConnection(host.connect()) { conn =>
17 |       conn.hmset(tableName + ":" + key, value.asJava)
18 |     }
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/df/standalone/SparkSqlStandaloneSuite.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.df.standalone
 2 | 
 3 | import com.redislabs.provider.redis.df.SparkSqlSuite
 4 | import com.redislabs.provider.redis.env.RedisStandaloneEnv
 5 | 
 6 | /**
 7 |   * @author The Viet Nguyen
 8 |   */
 9 | class SparkSqlStandaloneSuite extends SparkSqlSuite with RedisStandaloneEnv
10 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/env/Env.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.env
 2 | 
 3 | import com.redislabs.provider.redis.RedisConfig
 4 | import org.apache.spark.sql.SparkSession
 5 | import org.apache.spark.streaming.StreamingContext
 6 | import org.apache.spark.{SparkConf, SparkContext}
 7 | 
 8 | trait Env {
 9 | 
10 |   val conf: SparkConf
11 |   var spark: SparkSession = _
12 |   var sc: SparkContext = _
13 |   var ssc: StreamingContext = _
14 | 
15 |   val redisHost = "127.0.0.1"
16 |   val redisPort = 6379
17 |   val redisAuth = "passwd" // password for 'default' user (AUTH <password>)
18 | 
19 |   // user credentials
20 |   val user = "alice"
21 |   val userPassword = "p1pp0"
22 | 
23 |   val redisConfig: RedisConfig
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/env/RedisClusterAclEnv.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.env
 2 | 
 3 | import com.redislabs.provider.redis.{RedisConfig, RedisEndpoint}
 4 | import org.apache.spark.SparkConf
 5 | 
 6 | /**
 7 |  * Cluster with user/password authentication
 8 |  */
 9 | trait RedisClusterAclEnv extends Env {
10 | 
11 |   override val redisPort = 7379
12 | 
13 |   override val conf: SparkConf = new SparkConf()
14 |     .setMaster("local[*]").setAppName(getClass.getName)
15 |     .set("spark.redis.host", redisHost)
16 |     .set("spark.redis.port", s"$redisPort")
17 |     .set("spark.redis.user", user)
18 |     .set("spark.redis.auth", userPassword)
19 |     .set("spark.streaming.stopGracefullyOnShutdown", "true")
20 |     .set("spark.driver.bindAddress", "127.0.0.1")
21 | 
22 |   override val redisConfig: RedisConfig =
23 |     new RedisConfig(RedisEndpoint(
24 |       host = redisHost,
25 |       port = redisPort,
26 |       user = user,
27 |       auth = userPassword))
28 | }
29 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/env/RedisClusterEnv.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.env
 2 | 
 3 | import com.redislabs.provider.redis.{RedisConfig, RedisEndpoint}
 4 | import org.apache.spark.SparkConf
 5 | 
 6 | /**
 7 |   * @author The Viet Nguyen
 8 |   */
 9 | trait RedisClusterEnv extends Env {
10 | 
11 |   override val redisPort = 7379
12 | 
13 |   override val conf: SparkConf = new SparkConf()
14 |     .setMaster("local[*]").setAppName(getClass.getName)
15 |     .set("spark.redis.host", redisHost)
16 |     .set("spark.redis.port", s"$redisPort")
17 |     .set("spark.streaming.stopGracefullyOnShutdown", "true")
18 |     .set("spark.driver.bindAddress", "127.0.0.1")
19 | 
20 |   override val redisConfig: RedisConfig = new RedisConfig(RedisEndpoint(redisHost, redisPort))
21 | }
22 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/env/RedisStandaloneAclEnv.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.env
 2 | 
 3 | import com.redislabs.provider.redis.{RedisConfig, RedisEndpoint}
 4 | import org.apache.spark.SparkConf
 5 | 
 6 | /**
 7 |  * Standalone with user/password authentication
 8 |  */
 9 | trait RedisStandaloneAclEnv extends Env {
10 | 
11 |   override val conf: SparkConf = new SparkConf()
12 |     .setMaster("local[*]").setAppName(getClass.getName)
13 |     .set("spark.redis.host", redisHost)
14 |     .set("spark.redis.port", s"$redisPort")
15 |     .set("spark.redis.user", user)
16 |     .set("spark.redis.auth", userPassword)
17 |     .set("spark.streaming.stopGracefullyOnShutdown", "true")
18 |     .set("spark.driver.bindAddress", "127.0.0.1")
19 | 
20 |   override val redisConfig: RedisConfig =
21 |     new RedisConfig(RedisEndpoint(
22 |       host = redisHost,
23 |       port = redisPort,
24 |       user = user,
25 |       auth = userPassword))
26 | }
27 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/env/RedisStandaloneEnv.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.env
 2 | 
 3 | import com.redislabs.provider.redis.{RedisConfig, RedisEndpoint}
 4 | import org.apache.spark.SparkConf
 5 | 
 6 | /**
 7 |  * @author The Viet Nguyen
 8 |  */
 9 | trait RedisStandaloneEnv extends Env {
10 | 
11 |   override val conf: SparkConf = new SparkConf()
12 |     .setMaster("local[*]").setAppName(getClass.getName)
13 |     .set("spark.redis.host", redisHost)
14 |     .set("spark.redis.port", s"$redisPort")
15 |     .set("spark.redis.auth", redisAuth)
16 |     .set("spark.streaming.stopGracefullyOnShutdown", "true")
17 |     .set("spark.sql.streaming.forceDeleteTempCheckpointLocation", "true")
18 |     .set("spark.driver.bindAddress", "127.0.0.1")
19 | 
20 |   override val redisConfig: RedisConfig =
21 |     new RedisConfig(RedisEndpoint(
22 |       host = redisHost,
23 |       port = redisPort,
24 |       auth = redisAuth))
25 | }
26 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/env/RedisStandaloneSSLEnv.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.env
 2 | 
 3 | import com.redislabs.provider.redis.{RedisConfig, RedisEndpoint}
 4 | import org.apache.spark.SparkConf
 5 | 
 6 | trait RedisStandaloneSSLEnv extends Env {
 7 | 
 8 |   override val redisPort = 6380
 9 | 
10 |   override val conf: SparkConf = new SparkConf()
11 |     .setMaster("local[*]").setAppName(getClass.getName)
12 |     .set("spark.redis.host", redisHost)
13 |     .set("spark.redis.port", s"$redisPort")
14 |     .set("spark.redis.auth", redisAuth)
15 |     .set("spark.redis.ssl", "true")
16 |     .set("spark.streaming.stopGracefullyOnShutdown", "true")
17 |     .set("spark.driver.bindAddress", "127.0.0.1")
18 | 
19 |   override val redisConfig: RedisConfig =
20 |     new RedisConfig(RedisEndpoint(redisHost, redisPort, auth = redisAuth, ssl = true))
21 | }
22 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/rdd/RedisKeysSuite.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.rdd
 2 | 
 3 | import com.redislabs.provider.redis.ReadWriteConfig
 4 | import org.scalatest.Matchers
 5 | import redis.clients.jedis.util.JedisClusterCRC16
 6 | 
 7 | import scala.collection.JavaConverters._
 8 | 
 9 | trait RedisKeysSuite extends RedisRddSuite with Keys with Matchers {
10 | 
11 |   implicit val readWriteConfig: ReadWriteConfig = ReadWriteConfig.Default
12 | 
13 |   test("getKeys") {
14 |     val returnedKeys = getKeys(redisConfig.hosts, 0, 1024, "*")
15 |       .toArray.sorted
16 | 
17 |     val targetKeys = (sc.parallelize(content.split("\\W+")).collect :+
18 |       "all:words:cnt:sortedset" :+
19 |       "all:words:cnt:hash" :+
20 |       "all:words:list" :+
21 |       "all:words:set").filter(x => {
22 |       val slot = JedisClusterCRC16.getSlot(x)
23 |       !x.isEmpty && slot >= 0 && slot <= 1024
24 |     }).distinct.sorted
25 | 
26 |     returnedKeys should be(targetKeys)
27 |   }
28 | 
29 |   test("groupKeysByNode") {
30 |     val allkeys = getKeys(redisConfig.hosts, 0, 16383, "*")
31 |     val nodeKeysPairs = groupKeysByNode(redisConfig.hosts, allkeys)
32 |     val returnedCnt = nodeKeysPairs.map { x =>
33 |       filterKeysByType(x._1.connect(), x._2, "string").length
34 |     }
35 |       .sum
36 |     val targetCnt = sc.parallelize(content.split("\\W+").filter(!_.isEmpty)).distinct.count
37 |     assert(returnedCnt == targetCnt)
38 |   }
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/rdd/RedisRddExtraSuite.scala:
--------------------------------------------------------------------------------
  1 | package com.redislabs.provider.redis.rdd
  2 | 
  3 | import com.redislabs.provider.redis.util.ConnectionUtils.withConnection
  4 | import org.scalatest.Matchers
  5 | import com.redislabs.provider.redis._
  6 | import com.redislabs.provider.redis.util.TestUtils
  7 | import redis.clients.jedis.exceptions.JedisConnectionException
  8 | 
  9 | import scala.collection.JavaConverters._
 10 | 
 11 | /**
 12 |  * More RDD tests
 13 |  */
 14 | trait RedisRddExtraSuite extends SparkRedisSuite with Keys with Matchers {
 15 | 
 16 |   implicit val redisConfig: RedisConfig
 17 | 
 18 |   test("toRedisByteLISTs") {
 19 |     val list1 = Seq("a1", "b1", "c1")
 20 |     val list2 = Seq("a2", "b2", "c2")
 21 |     val keyValues = Seq(
 22 |       ("binary-list1", list1),
 23 |       ("binary-list2", list2)
 24 |     )
 25 |     val keyValueBytes = keyValues.map { case (k, list) => (k.getBytes, list.map(_.getBytes())) }
 26 |     val rdd = sc.parallelize(keyValueBytes)
 27 |     sc.toRedisByteLISTs(rdd)
 28 | 
 29 |     verifyList("binary-list1", list1)
 30 |     verifyList("binary-list2", list2)
 31 |   }
 32 | 
 33 |   test("toRedisLISTs") {
 34 |     val list1 = Seq("a1", "b1", "c1")
 35 |     val list2 = Seq("a2", "b2", "c2")
 36 |     val keyValues = Seq(
 37 |       ("list1", list1),
 38 |       ("list2", list2)
 39 |     )
 40 |     val rdd = sc.parallelize(keyValues)
 41 |     sc.toRedisLISTs(rdd)
 42 | 
 43 |     verifyList("list1", list1)
 44 |     verifyList("list2", list2)
 45 |   }
 46 | 
 47 |   test("toRedisHASHes") {
 48 |     val map1 = Map("k1" -> "v1", "k2" -> "v2")
 49 |     val map2 = Map("k3" -> "v3", "k4" -> "v4")
 50 |     val hashes = Seq(
 51 |       ("hash1", map1),
 52 |       ("hash2", map2)
 53 |     )
 54 |     val rdd = sc.parallelize(hashes)
 55 |     sc.toRedisHASHes(rdd)
 56 | 
 57 |     verifyHash("hash1", map1)
 58 |     verifyHash("hash2", map2)
 59 |   }
 60 | 
 61 |   test("toRedisByteHASHes") {
 62 |     val map1 = Map("k1" -> "v1", "k2" -> "v2")
 63 |     val map2 = Map("k3" -> "v3", "k4" -> "v4")
 64 |     val hashes = Seq(
 65 |       ("hash1", map1),
 66 |       ("hash2", map2)
 67 |     )
 68 |     val hashesBytes = hashes.map { case (k, hash) => (k.getBytes, hash.map { case (mapKey, mapVal) => (mapKey.getBytes, mapVal.getBytes) }) }
 69 |     val rdd = sc.parallelize(hashesBytes)
 70 |     sc.toRedisByteHASHes(rdd)
 71 | 
 72 |     verifyHash("hash1", map1)
 73 |     verifyHash("hash2", map2)
 74 |   }
 75 | 
 76 |   test("connection fails with incorrect user/pass") {
 77 |     assertThrows[JedisConnectionException] {
 78 |       new RedisConfig(RedisEndpoint(
 79 |         host = redisHost,
 80 |         port = redisPort,
 81 |         user = user,
 82 |         auth = "wrong_password"))
 83 |     }
 84 |   }
 85 | 
 86 |   test("connection with correct user/pass") {
 87 |     val userConfig = new RedisConfig(RedisEndpoint(
 88 |       host = redisHost,
 89 |       port = redisPort,
 90 |       user = user,
 91 |       auth = userPassword))
 92 | 
 93 |     val someKey = TestUtils.generateRandomKey()
 94 |     val jedis = userConfig.connectionForKey(someKey)
 95 |     jedis.set(someKey, "123")
 96 |     jedis.get(someKey) should be("123")
 97 | 
 98 |     // test RDD operation
 99 |     sc.fromRedisKeyPattern(someKey)(redisConfig = userConfig)
100 |       .collect()(0) should be(someKey)
101 |   }
102 | 
103 |   def verifyList(list: String, vals: Seq[String]): Unit = {
104 |     withConnection(redisConfig.getHost(list).endpoint.connect()) { conn =>
105 |       conn.lrange(list, 0, vals.size).asScala should be(vals.toList)
106 |     }
107 |   }
108 | 
109 |   def verifyHash(hash: String, vals: Map[String, String]): Unit = {
110 |     withConnection(redisConfig.getHost(hash).endpoint.connect()) { conn =>
111 |       conn.hgetAll(hash).asScala should be(vals)
112 |     }
113 |   }
114 | 
115 | }
116 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/rdd/RedisRddSuite.scala:
--------------------------------------------------------------------------------
  1 | package com.redislabs.provider.redis.rdd
  2 | 
  3 | import com.redislabs.provider.redis.util.ConnectionUtils.withConnection
  4 | import com.redislabs.provider.redis.{RedisConfig, SparkRedisSuite, toRedisContext}
  5 | import org.scalatest.Matchers
  6 | import scala.collection.JavaConverters._
  7 | 
  8 | import scala.io.Source.fromInputStream
  9 | 
 10 | /**
 11 |   * @author The Viet Nguyen
 12 |   */
 13 | trait RedisRddSuite extends SparkRedisSuite with Keys with Matchers {
 14 | 
 15 |   implicit val redisConfig: RedisConfig
 16 | 
 17 |   val content: String = fromInputStream(getClass.getClassLoader.getResourceAsStream("blog"))
 18 |     .getLines.toArray.mkString("\n")
 19 | 
 20 |   val contentWords: Array[String] = content.split("\\W+").filter(_.nonEmpty)
 21 |   val zSetKey: String = "all:words:cnt:sortedset"
 22 |   val hashKey: String = "all:words:cnt:hash"
 23 |   val listKey: String = "all:words:list"
 24 |   val setKey: String = "all:words:set"
 25 |   val missingRedisKey: String = "missingRedisKey"
 26 | 
 27 |   override def beforeAll() {
 28 |     super.beforeAll()
 29 |     val wcnts = sc.parallelize(contentWords)
 30 |       .map { w =>
 31 |         (w, 1)
 32 |       }
 33 |       .reduceByKey {
 34 |         _ + _
 35 |       }
 36 |       .map { x =>
 37 |         (x._1, x._2.toString)
 38 |       }
 39 |     val wds = sc.parallelize(contentWords)
 40 |     // Flush all the hosts
 41 |     redisConfig.hosts.foreach(node => {
 42 |       val conn = node.connect()
 43 |       conn.flushAll
 44 |       conn.close()
 45 |     })
 46 |     sc.toRedisKV(wcnts)
 47 |     sc.toRedisZSET(wcnts, zSetKey)
 48 |     sc.toRedisHASH(wcnts, hashKey)
 49 |     sc.toRedisLIST(wds, listKey)
 50 |     sc.toRedisSET(wds, setKey)
 51 |   }
 52 | 
 53 |   test("RedisKVRDD") {
 54 |     val redisKVRDD = sc.fromRedisKV("*")
 55 |     val kvContents = redisKVRDD.sortByKey().collect
 56 |     val wrongTypeKeysRes = List(hashKey, zSetKey, listKey, setKey).map(sc.fromRedisKV(_).collect)
 57 |     val missingKeyRes = sc.fromRedisKV(missingRedisKey).collect()
 58 |     val wcnts = contentWords.map((_, 1)).groupBy(_._1).
 59 |       map(x => (x._1, x._2.map(_._2).sum.toString)).toArray.sortBy(_._1)
 60 |     kvContents shouldBe wcnts
 61 |     all(wrongTypeKeysRes) should have size 0
 62 |     missingKeyRes should have size 0
 63 |   }
 64 | 
 65 |   test("RedisZsetRDD") {
 66 |     val redisZSetWithScore = sc.fromRedisZSetWithScore(zSetKey)
 67 |     val zsetWithScore = redisZSetWithScore.sortByKey().collect
 68 | 
 69 |     val redisZSet = sc.fromRedisZSet("all:words:cnt:sortedset")
 70 |     val zset = redisZSet.collect.sorted
 71 | 
 72 |     val redisZRangeWithScore = sc.fromRedisZRangeWithScore(zSetKey, 0, 15)
 73 |     val zrangeWithScore = redisZRangeWithScore.collect.sortBy(x => (x._2, x._1))
 74 | 
 75 |     val redisZRange = sc.fromRedisZRange(zSetKey, 0, 15)
 76 |     val zrange = redisZRange.collect.sorted
 77 | 
 78 |     val redisZRangeByScoreWithScore =
 79 |       sc.fromRedisZRangeByScoreWithScore(zSetKey, 3, 9)
 80 |     val zrangeByScoreWithScore = redisZRangeByScoreWithScore.collect.sortBy(x => (x._2, x._1))
 81 | 
 82 |     val redisZRangeByScore = sc.fromRedisZRangeByScore(zSetKey, 3, 9)
 83 |     val zrangeByScore = redisZRangeByScore.collect.sorted
 84 | 
 85 |     val wrongTypeKeysRes = List(hashKey, setKey, listKey, contentWords(0)).map(sc.fromRedisZSetWithScore(_).collect)
 86 |     val missingKeyRes = sc.fromRedisZSetWithScore(missingRedisKey).collect()
 87 | 
 88 |     val wcnts = contentWords.map((_, 1)).groupBy(_._1).
 89 |       map(x => (x._1, x._2.map(_._2).sum.toDouble))
 90 | 
 91 |     zsetWithScore should be(wcnts.toArray.sortBy(_._1))
 92 |     zset should be(wcnts.keys.toArray.sorted)
 93 |     zrangeWithScore should be(wcnts.toArray.sortBy(x => (x._2, x._1)).take(16))
 94 |     zrange should be(wcnts.toArray.sortBy(x => (x._2, x._1)).take(16).map(_._1))
 95 |     zrangeByScoreWithScore should be(wcnts.toArray.filter(x => x._2 >= 3 && x._2 <= 9)
 96 |       .sortBy(x => (x._2, x._1)))
 97 |     zrangeByScore should be(wcnts.toArray.filter(x => x._2 >= 3 && x._2 <= 9).map(_._1).sorted)
 98 |     all(wrongTypeKeysRes) should have length 0
 99 |     missingKeyRes should have length 0
100 |   }
101 | 
102 |   test("RedisHashRDD") {
103 |     val redisHashRDD = sc.fromRedisHash(hashKey)
104 |     val hashContents = redisHashRDD.sortByKey().collect
105 |     val wcnts = contentWords.map((_, 1)).groupBy(_._1).
106 |       map(x => (x._1, x._2.map(_._2).sum.toString)).toArray.sortBy(_._1)
107 |     val wrongTypeKeysRes = List(zSetKey, setKey, listKey, contentWords(0)).map(sc.fromRedisHash(_).collect)
108 |     val missingKeyRes = sc.fromRedisHash(missingRedisKey).collect()
109 | 
110 |     hashContents should be(wcnts)
111 |     all(wrongTypeKeysRes) should have length 0
112 |     missingKeyRes should have length 0
113 |   }
114 | 
115 |   test("RedisListRDD") {
116 |     val redisListRDD = sc.fromRedisList(listKey)
117 |     val listContents = redisListRDD.sortBy(x => x).collect
118 |     val ws = contentWords.sorted
119 |     val wrongTypeKeysRes = List(zSetKey, setKey, hashKey, contentWords(0)).map(sc.fromRedisList(_).collect)
120 |     val missingKeyRes = sc.fromRedisList(missingRedisKey).collect()
121 | 
122 |     listContents should be(ws)
123 |     all(wrongTypeKeysRes) should have length 0
124 |     missingKeyRes should have length 0
125 |   }
126 | 
127 |   test("RedisSetRDD") {
128 |     val redisSetRDD = sc.fromRedisSet(setKey)
129 |     val setContents = redisSetRDD.sortBy(x => x).collect
130 |     val ws = content.split("\\W+").filter(!_.isEmpty).distinct.sorted
131 |     val wrongTypeKeysRes = List(zSetKey, listKey, hashKey, contentWords(0)).map(sc.fromRedisSet(_).collect)
132 |     val missingKeyRes = sc.fromRedisSet(missingRedisKey).collect()
133 | 
134 |     setContents should be(ws)
135 |     all(wrongTypeKeysRes) should have length 0
136 |     missingKeyRes should have length 0
137 |   }
138 | 
139 |   test("Expire") {
140 |     val expireTime = 1
141 |     val prefix = s"#expire in $expireTime#:"
142 |     val wcnts = sc.parallelize(contentWords).map((_, 1)).
143 |       reduceByKey(_ + _).map(x => (prefix + x._1, x._2.toString))
144 |     val wds = sc.parallelize(contentWords)
145 |     sc.toRedisKV(wcnts, expireTime)
146 |     sc.toRedisZSET(wcnts, prefix + zSetKey, expireTime)
147 |     sc.toRedisHASH(wcnts, prefix + hashKey, expireTime)
148 |     sc.toRedisLIST(wds, prefix + listKey, expireTime)
149 |     sc.toRedisSET(wds, prefix + setKey, expireTime)
150 |     Thread.sleep(expireTime * 1000 + 1)
151 |     sc.fromRedisKeyPattern(prefix + "*").count should be(0)
152 |   }
153 | 
154 | }
155 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/rdd/acl/RedisRDDClusterAclSuite.scala:
--------------------------------------------------------------------------------
1 | package com.redislabs.provider.redis.rdd.acl
2 | 
3 | import com.redislabs.provider.redis.env.RedisClusterAclEnv
4 | import com.redislabs.provider.redis.rdd.RedisRddSuite
5 | 
6 | class RedisRDDClusterAclSuite extends RedisRddSuite with RedisClusterAclEnv
7 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/rdd/acl/RedisRDDStandaloneAclSuite.scala:
--------------------------------------------------------------------------------
1 | package com.redislabs.provider.redis.rdd.acl
2 | 
3 | import com.redislabs.provider.redis.env.RedisStandaloneAclEnv
4 | import com.redislabs.provider.redis.rdd.RedisRddSuite
5 | 
6 | class RedisRDDStandaloneAclSuite extends RedisRddSuite with RedisStandaloneAclEnv
7 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/rdd/cluster/RedisKeysClusterSuite.scala:
--------------------------------------------------------------------------------
1 | package com.redislabs.provider.redis.rdd.cluster
2 | 
3 | import com.redislabs.provider.redis.env.RedisClusterEnv
4 | import com.redislabs.provider.redis.rdd.RedisKeysSuite
5 | 
6 | class RedisKeysClusterSuite extends RedisKeysSuite with RedisClusterEnv
7 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/rdd/cluster/RedisRDDClusterSuite.scala:
--------------------------------------------------------------------------------
1 | package com.redislabs.provider.redis.rdd.cluster
2 | 
3 | import com.redislabs.provider.redis.env.RedisClusterEnv
4 | import com.redislabs.provider.redis.rdd.RedisRddSuite
5 | 
6 | class RedisRDDClusterSuite extends RedisRddSuite with RedisClusterEnv
7 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/rdd/cluster/RedisRddExtraClusterSuite.scala:
--------------------------------------------------------------------------------
1 | package com.redislabs.provider.redis.rdd.cluster
2 | 
3 | import com.redislabs.provider.redis.env.RedisClusterEnv
4 | import com.redislabs.provider.redis.rdd.RedisRddExtraSuite
5 | 
6 | class RedisRddExtraClusterSuite extends RedisRddExtraSuite with RedisClusterEnv
7 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/rdd/standalone/RedisKeysStandaloneSuite.scala:
--------------------------------------------------------------------------------
1 | package com.redislabs.provider.redis.rdd.standalone
2 | 
3 | import com.redislabs.provider.redis.env.RedisStandaloneEnv
4 | import com.redislabs.provider.redis.rdd.RedisKeysSuite
5 | 
6 | class RedisKeysStandaloneSuite extends RedisKeysSuite with RedisStandaloneEnv
7 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/rdd/standalone/RedisRDDStandaloneSuite.scala:
--------------------------------------------------------------------------------
1 | package com.redislabs.provider.redis.rdd.standalone
2 | 
3 | import com.redislabs.provider.redis.env.RedisStandaloneEnv
4 | import com.redislabs.provider.redis.rdd.RedisRddSuite
5 | 
6 | class RedisRDDStandaloneSuite extends RedisRddSuite with RedisStandaloneEnv
7 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/rdd/standalone/RedisRddExtraStandaloneSuite.scala:
--------------------------------------------------------------------------------
1 | package com.redislabs.provider.redis.rdd.standalone
2 | 
3 | import com.redislabs.provider.redis.env.RedisStandaloneEnv
4 | import com.redislabs.provider.redis.rdd.RedisRddExtraSuite
5 | 
6 | class RedisRddExtraStandaloneSuite extends RedisRddExtraSuite with RedisStandaloneEnv
7 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/stream/RedisXStreamSuite.scala:
--------------------------------------------------------------------------------
  1 | package com.redislabs.provider.redis.stream
  2 | 
  3 | import com.redislabs.provider.redis.streaming.{ConsumerConfig, Earliest}
  4 | import com.redislabs.provider.redis.util.ConnectionUtils.withConnection
  5 | import com.redislabs.provider.redis.util.TestUtils
  6 | import com.redislabs.provider.redis.SparkStreamingRedisSuite
  7 | import com.redislabs.provider.redis.streaming._
  8 | import org.apache.spark.storage.StorageLevel
  9 | import org.scalatest.Matchers
 10 | import org.scalatest.concurrent.Eventually._
 11 | import org.scalatest.time.{Millis, Span}
 12 | import redis.clients.jedis.StreamEntryID
 13 | 
 14 | import scala.collection.JavaConversions._
 15 | 
 16 | // scalastyle:off multiple.string.literals
 17 | trait RedisXStreamSuite extends SparkStreamingRedisSuite with Matchers {
 18 | 
 19 |   // timeout for eventually function
 20 |   implicit val patienceConfig = PatienceConfig(timeout = scaled(Span(5000, Millis)))
 21 | 
 22 |   test("createRedisXStream, 1 stream, 1 consumer") {
 23 |     val streamKey = TestUtils.generateRandomKey()
 24 | 
 25 |     // the data can be written to the stream earlier than we start receiver, so set offset to Earliest
 26 |     val stream = ssc.createRedisXStream(Seq(ConsumerConfig(streamKey, "g1", "c1", Earliest)), StorageLevel.MEMORY_ONLY)
 27 | 
 28 |     val _redisConfig = redisConfig // to make closure serializable
 29 | 
 30 |     // iterate over items and save to redis list
 31 |     // repartition to 1 to avoid concurrent write issues
 32 |     stream.repartition(1).foreachRDD { rdd =>
 33 |       rdd.foreachPartition { partition =>
 34 |         for (item <- partition) {
 35 |           val listKey = s"${item.streamKey}:list"
 36 |           withConnection(_redisConfig.connectionForKey(listKey)) { conn =>
 37 |             conn.rpush(listKey, s"${item.id.v1}-${item.id.v2} " + item.fields.mkString(" "))
 38 |           }
 39 |         }
 40 |       }
 41 |     }
 42 | 
 43 |     // write to stream
 44 |     withConnection(redisConfig.connectionForKey(streamKey)) { conn =>
 45 |       conn.xadd(streamKey, new StreamEntryID(1, 0), Map("a" -> "1", "z" -> "4"))
 46 |       conn.xadd(streamKey, new StreamEntryID(1, 1), Map("b" -> "2"))
 47 |       conn.xadd(streamKey, new StreamEntryID(2, 0), Map("c" -> "3"))
 48 |     }
 49 | 
 50 |     ssc.start()
 51 | 
 52 |     // eventually there should be items in the list
 53 |     val listKey = s"$streamKey:list"
 54 |     withConnection(redisConfig.connectionForKey(listKey)) { conn =>
 55 |       eventually {
 56 |         conn.llen(listKey) shouldBe 3
 57 |         conn.lpop(listKey) should be("1-0 a -> 1 z -> 4")
 58 |         conn.lpop(listKey) should be("1-1 b -> 2")
 59 |         conn.lpop(listKey) should be("2-0 c -> 3")
 60 |       }
 61 |     }
 62 |   }
 63 | 
 64 |   test("createRedisXStream, 1 stream, 2 consumers") {
 65 |     val streamKey = TestUtils.generateRandomKey()
 66 | 
 67 |     // the data can be written to the stream earlier than we start receiver, so set offset to Earliest
 68 |     val stream = ssc.createRedisXStream(Seq(
 69 |       ConsumerConfig(streamKey, "g1", "c1", Earliest, batchSize = 1),
 70 |       ConsumerConfig(streamKey, "g1", "c2", Earliest, batchSize = 1)
 71 |     ), StorageLevel.MEMORY_ONLY)
 72 | 
 73 |     val _redisConfig = redisConfig // to make closure serializable
 74 | 
 75 |     // iterate over items and save to redis list
 76 |     // repartition to 1 to avoid concurrent write issues
 77 |     stream.repartition(1).foreachRDD { rdd =>
 78 |       rdd.foreachPartition { partition =>
 79 |         for (item <- partition) {
 80 |           val listKey = s"${item.streamKey}:list"
 81 |           withConnection(_redisConfig.connectionForKey(listKey)) { conn =>
 82 |             conn.rpush(listKey, s"${item.id.v1}-${item.id.v2} " + item.fields.mkString(" "))
 83 |           }
 84 |         }
 85 |       }
 86 |     }
 87 | 
 88 |     // write to stream
 89 |     withConnection(redisConfig.connectionForKey(streamKey)) { conn =>
 90 |       conn.xadd(streamKey, new StreamEntryID(1, 0), Map("a" -> "1", "z" -> "4"))
 91 |       conn.xadd(streamKey, new StreamEntryID(1, 1), Map("b" -> "2"))
 92 |       conn.xadd(streamKey, new StreamEntryID(2, 0), Map("c" -> "3"))
 93 |     }
 94 | 
 95 |     ssc.start()
 96 | 
 97 |     // eventually there should be items in the list, the ordering is not deterministic
 98 |     val listKey = s"$streamKey:list"
 99 |     withConnection(redisConfig.connectionForKey(listKey)) { conn =>
100 |       eventually {
101 |         conn.llen(listKey) shouldBe 3
102 |         (1 to 3).map(_ => conn.lpop(listKey)).toSet shouldBe Set(
103 |           "1-0 a -> 1 z -> 4",
104 |           "1-1 b -> 2",
105 |           "2-0 c -> 3"
106 |         )
107 |       }
108 |     }
109 |   }
110 | 
111 |   test("createRedisXStream, 2 streams, 2 consumers") {
112 |     val stream1Key = TestUtils.generateRandomKey()
113 |     val stream2Key = TestUtils.generateRandomKey()
114 | 
115 |     logInfo("stream1Key " + stream1Key)
116 |     logInfo("stream2Key " + stream2Key)
117 | 
118 |     // the data can be written to the stream earlier than we start receiver, so set offset to Earliest
119 |     val stream = ssc.createRedisXStream(Seq(
120 |       ConsumerConfig(stream1Key, "g1", "c1", Earliest, batchSize = 1),
121 |       ConsumerConfig(stream2Key, "g1", "c2", Earliest, batchSize = 1)
122 |     ), StorageLevel.MEMORY_ONLY)
123 | 
124 |     val _redisConfig = redisConfig // to make closure serializable
125 | 
126 |     // iterate over items and save to redis list
127 |     // repartition to 1 to avoid concurrent write issues
128 |     stream.repartition(1).foreachRDD { rdd =>
129 |       rdd.foreachPartition { partition =>
130 |         for (item <- partition) {
131 |           val listKey = s"${item.streamKey}:list"
132 |           withConnection(_redisConfig.connectionForKey(listKey)) { conn =>
133 |             conn.rpush(listKey, s"${item.id.v1}-${item.id.v2} " + item.fields.mkString(" "))
134 |           }
135 |         }
136 |       }
137 |     }
138 | 
139 |     // write to stream
140 |     withConnection(redisConfig.connectionForKey(stream1Key)) { conn =>
141 |       conn.xadd(stream1Key, new StreamEntryID(1, 0), Map("a" -> "1", "z" -> "4"))
142 |     }
143 |     withConnection(redisConfig.connectionForKey(stream2Key)) { conn =>
144 |       conn.xadd(stream2Key, new StreamEntryID(1, 1), Map("b" -> "2"))
145 |       conn.xadd(stream2Key, new StreamEntryID(2, 0), Map("c" -> "3"))
146 |     }
147 | 
148 |     ssc.start()
149 | 
150 |     // eventually there should be items in the list
151 |     val list1Key = s"$stream1Key:list"
152 |     withConnection(redisConfig.connectionForKey(list1Key)) { conn =>
153 |       eventually {
154 |         conn.llen(list1Key) shouldBe 1
155 |         conn.lpop(list1Key) should be("1-0 a -> 1 z -> 4")
156 |       }
157 |     }
158 | 
159 |     val list2Key = s"$stream2Key:list"
160 |     withConnection(redisConfig.connectionForKey(list2Key)) { conn =>
161 |       eventually {
162 |         conn.llen(list2Key) shouldBe 2
163 |         conn.lpop(list2Key) should be("1-1 b -> 2")
164 |         conn.lpop(list2Key) should be("2-0 c -> 3")
165 |       }
166 |     }
167 | 
168 |   }
169 | 
170 | }
171 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/stream/cluster/RedisXStreamClusterSuite.scala:
--------------------------------------------------------------------------------
1 | package com.redislabs.provider.redis.stream.cluster
2 | 
3 | import com.redislabs.provider.redis.env.RedisClusterEnv
4 | import com.redislabs.provider.redis.stream.RedisXStreamSuite
5 | 
6 | class RedisXStreamClusterSuite extends RedisXStreamSuite with RedisClusterEnv
7 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/stream/standalone/RedisXStreamStandaloneSuite.scala:
--------------------------------------------------------------------------------
1 | package com.redislabs.provider.redis.stream.standalone
2 | 
3 | import com.redislabs.provider.redis.env.RedisStandaloneEnv
4 | import com.redislabs.provider.redis.stream.RedisXStreamSuite
5 | 
6 | class RedisXStreamStandaloneSuite extends RedisXStreamSuite with RedisStandaloneEnv
7 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/util/BenchmarkTest.java:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.util;
 2 | 
 3 | import java.lang.annotation.ElementType;
 4 | import java.lang.annotation.Retention;
 5 | import java.lang.annotation.RetentionPolicy;
 6 | import java.lang.annotation.Target;
 7 | 
 8 | /**
 9 |  * @author The Viet Nguyen
10 |  */
11 | @org.scalatest.TagAnnotation
12 | @Retention(RetentionPolicy.RUNTIME)
13 | @Target({ElementType.METHOD, ElementType.TYPE})
14 | public @interface BenchmarkTest {
15 | }
16 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/util/CollectionUtilsTest.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.util
 2 | 
 3 | import com.redislabs.provider.redis.util.CollectionUtils.RichCollection
 4 | import org.scalatest.{FunSuite, Matchers}
 5 | 
 6 | /**
 7 |   * @author The Viet Nguyen
 8 |   */
 9 | class CollectionUtilsTest extends FunSuite with Matchers {
10 | 
11 |   test("distinctBy") {
12 |     val persons = Seq(Person("John", 30, "60 Wall Street", 150.5),
13 |       Person("John", 30, "18 Main Street", 150.5), Person("Peter", 35, "110 Wall Street", 200.3))
14 |     val distinctPersons = persons.distinctBy(_.name)
15 |     distinctPersons shouldBe Seq(Person("John", 30, "60 Wall Street", 150.5),
16 |       Person("Peter", 35, "110 Wall Street", 200.3))
17 |   }
18 | }
19 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/util/ConnectionSSLUtilsTest.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.util
 2 | 
 3 | import com.redislabs.provider.redis.env.RedisStandaloneSSLEnv
 4 | import com.redislabs.provider.redis.util.ConnectionUtils.{JedisExt, XINFO}
 5 | import org.scalatest.{FunSuite, Matchers}
 6 | import redis.clients.jedis.StreamEntryID
 7 | 
 8 | import scala.collection.JavaConverters._
 9 | 
10 | /**
11 |   * @author The Viet Nguyen
12 |   */
13 | class ConnectionSSLUtilsTest extends FunSuite with Matchers with RedisStandaloneSSLEnv {
14 | 
15 |   test("xinfo") {
16 |     val streamKey = TestUtils.generateRandomKey()
17 |     val conn = redisConfig.connectionForKey(streamKey)
18 |     val data = Map("key" -> "value").asJava
19 |     val entryId = conn.xadd(streamKey, new StreamEntryID(0, 1), data)
20 |     val info = conn.xinfo(XINFO.SubCommandStream, streamKey)
21 |     info.get(XINFO.LastGeneratedId) shouldBe Some(entryId.toString)
22 |   }
23 | }
24 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/util/ConnectionUtilsTest.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.util
 2 | 
 3 | import com.redislabs.provider.redis.env.RedisStandaloneEnv
 4 | import com.redislabs.provider.redis.util.ConnectionUtils.{JedisExt, XINFO}
 5 | import org.scalatest.{FunSuite, Matchers}
 6 | import redis.clients.jedis.StreamEntryID
 7 | 
 8 | import scala.collection.JavaConverters._
 9 | 
10 | /**
11 |   * @author The Viet Nguyen
12 |   */
13 | class ConnectionUtilsTest extends FunSuite with Matchers with RedisStandaloneEnv {
14 | 
15 |   test("xinfo") {
16 |     val streamKey = TestUtils.generateRandomKey()
17 |     val conn = redisConfig.connectionForKey(streamKey)
18 |     val data = Map("key" -> "value").asJava
19 |     val entryId = conn.xadd(streamKey, new StreamEntryID(0, 1), data)
20 |     val info = conn.xinfo(XINFO.SubCommandStream, streamKey)
21 |     info.get(XINFO.LastGeneratedId) shouldBe Some(entryId.toString)
22 |   }
23 | }
24 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/util/EntityId.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.util
 2 | 
 3 | import org.apache.spark.sql.types._
 4 | 
 5 | /**
 6 |   * @author The Viet Nguyen
 7 |   */
 8 | case class EntityId(_id: String, name: String)
 9 | 
10 | object EntityId {
11 | 
12 |   val schema = StructType(Array(
13 |     StructField("_id", StringType),
14 |     StructField("name", StringType)
15 |   ))
16 | }
17 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/util/JsonUtilsTest.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.util
 2 | 
 3 | import org.scalatest.{FunSuite, Matchers}
 4 | 
 5 | /**
 6 |   * @author The Viet Nguyen
 7 |   */
 8 | class JsonUtilsTest extends FunSuite with Matchers {
 9 | 
10 |   test("testToJson") {
11 |     val json = JsonUtils.toJson(Map("key" -> "value"))
12 |     json shouldBe """{"key":"value"}"""
13 |   }
14 | }
15 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/util/Person.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.util
 2 | 
 3 | import com.redislabs.provider.redis.util.TestUtils._
 4 | import org.apache.spark.sql.types._
 5 | import org.apache.spark.sql.{DataFrame, SparkSession}
 6 | 
 7 | /**
 8 |   * @author The Viet Nguyen
 9 |   */
10 | case class Person(name: String, age: Int, address: String, salary: Double)
11 | 
12 | object Person {
13 | 
14 |   val TableNamePrefix = "person"
15 |   val KeyName = "name"
16 | 
17 |   val data = Seq(
18 |     Person("John", 30, "60 Wall Street", 150.5),
19 |     Person("Peter", 35, "110 Wall Street", 200.3)
20 |   )
21 | 
22 |   val dataMaps = Seq(
23 |     Map("name" -> "John", "age" -> "30", "address" -> "60 Wall Street", "salary" -> "150.5"),
24 |     Map("name" -> "Peter", "age" -> "35", "address" -> "110 Wall Street", "salary" -> "200.3")
25 |   )
26 | 
27 |   val schema = StructType(Array(
28 |     StructField("name", StringType),
29 |     StructField("age", IntegerType),
30 |     StructField("address", StringType),
31 |     StructField("salary", DoubleType)
32 |   ))
33 | 
34 |   val fullSchema = StructType(schema.fields :+ StructField("_id", StringType))
35 | 
36 |   def df(spark: SparkSession): DataFrame = spark.createDataFrame(data)
37 | 
38 |   def generatePersonTableName(): String = generateTableName(TableNamePrefix)
39 | 
40 |   def generatePersonStreamKey(): String = generatePersonTableName()
41 | 
42 | }
43 | 


--------------------------------------------------------------------------------
/src/test/scala/com/redislabs/provider/redis/util/TestUtils.scala:
--------------------------------------------------------------------------------
 1 | package com.redislabs.provider.redis.util
 2 | 
 3 | import java.util.UUID
 4 | 
 5 | import org.scalatest.Assertions
 6 | import scala.reflect.Manifest
 7 | 
 8 | object TestUtils {
 9 | 
10 |   def generateTableName(prefix: String): String = {
11 |     // generate random table, so we can run test multiple times and not append/overwrite data
12 |     prefix + UUID.randomUUID().toString.replace("-", "")
13 |   }
14 | 
15 |   def generateRandomKey(): String = {
16 |     UUID.randomUUID().toString.replace("-", "")
17 |   }
18 | 
19 |   /**
20 |     * A wrapper of Assertions.intercept() that suppresses spark errors in the logs.
21 |     * It makes easier to analyse unit tests output.
22 |     */
23 |   def interceptSparkErr[T <: AnyRef](f: => Any)(implicit manifest: Manifest[T]): T = {
24 |     // turn off spark logger
25 |     val logger = org.apache.log4j.Logger.getLogger("org")
26 |     val levelBefore = logger.getLevel
27 |     logger.setLevel(org.apache.log4j.Level.OFF)
28 | 
29 |     // delegate interception
30 |     val interceptRes = Assertions.intercept(f)
31 | 
32 |     // revert logger
33 |     logger.setLevel(levelBefore)
34 | 
35 |     interceptRes
36 |   }
37 | 
38 | 
39 | }
40 | 


--------------------------------------------------------------------------------
/src/test/scala/org/apache/spark/sql/redis/RedisSourceRelationTest.scala:
--------------------------------------------------------------------------------
 1 | package org.apache.spark.sql.redis
 2 | 
 3 | import org.scalatest.{FunSuite, Matchers}
 4 | 
 5 | /**
 6 |   * @author The Viet Nguyen
 7 |   */
 8 | class RedisSourceRelationTest extends FunSuite with Matchers {
 9 | 
10 |   test("redis key extractor with prefix pattern") {
11 |     val key = RedisSourceRelation.tableKey("table*", "tablekey")
12 |     key shouldBe "key"
13 |   }
14 | 
15 |   test("redis key extractor with other patterns") {
16 |     val key = RedisSourceRelation.tableKey("*table", "key")
17 |     key shouldBe "key"
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/src/test/scala/org/apache/spark/sql/redis/stream/RedisConsumerOffsetTest.scala:
--------------------------------------------------------------------------------
 1 | package org.apache.spark.sql.redis.stream
 2 | 
 3 | import org.scalatest.{FunSuite, Matchers}
 4 | 
 5 | /**
 6 |   * @author The Viet Nguyen
 7 |   */
 8 | class RedisConsumerOffsetTest extends FunSuite with Matchers {
 9 | 
10 |   test("testFromJson") {
11 |     val offset = RedisSourceOffset.fromJson(
12 |       """
13 |         |{
14 |         |  "offsets":{
15 |         |    "mystream": {
16 |         |      "groupName": "group55",
17 |         |      "offset": "1543674099961-0"
18 |         |    }
19 |         |  }
20 |         |}
21 |         |""".stripMargin)
22 |     offset shouldBe RedisSourceOffset(Map("mystream" ->
23 |       RedisConsumerOffset("group55", "1543674099961-0")))
24 |   }
25 | }
26 | 


--------------------------------------------------------------------------------
/src/test/scala/org/apache/spark/sql/redis/stream/RedisSourceConfigSuite.scala:
--------------------------------------------------------------------------------
 1 | package org.apache.spark.sql.redis.stream
 2 | 
 3 | import org.apache.spark.sql.redis._
 4 | import org.scalatest.{FunSuite, Matchers}
 5 | 
 6 | /**
 7 |   * @author The Viet Nguyen
 8 |   */
 9 | class RedisSourceConfigSuite extends FunSuite with Matchers {
10 | 
11 |   val group: String = "group55"
12 | 
13 |   test("testFromMap") {
14 |     val config = RedisSourceConfig.fromMap(Map(
15 |       StreamOptionStreamKeys -> "mystream1,mystream2,mystream3",
16 |       StreamOptionStreamOffsets ->
17 |         s"""
18 |           |{
19 |           |  "offsets":{
20 |           |    "mystream1": {
21 |           |      "groupName": "$group",
22 |           |      "offset": "0-10"
23 |           |    },
24 |           |    "mystream2": {
25 |           |       "groupName": "$group",
26 |           |       "offset": "0-7"
27 |           |    }
28 |           |  }
29 |           |}
30 |         """.stripMargin,
31 |       StreamOptionParallelism -> "2",
32 |       StreamOptionGroupName -> group,
33 |       StreamOptionConsumerPrefix -> "consumer"
34 |     ))
35 |     config shouldBe RedisSourceConfig(
36 |       Seq(
37 |         RedisConsumerConfig("mystream1", group, "consumer-1", 100, 500),
38 |         RedisConsumerConfig("mystream1", group, "consumer-2", 100, 500),
39 |         RedisConsumerConfig("mystream2", group, "consumer-1", 100, 500),
40 |         RedisConsumerConfig("mystream2", group, "consumer-2", 100, 500),
41 |         RedisConsumerConfig("mystream3", group, "consumer-1", 100, 500),
42 |         RedisConsumerConfig("mystream3", group, "consumer-2", 100, 500)
43 |       ),
44 |       Some(RedisSourceOffset(Map(
45 |         "mystream1" -> RedisConsumerOffset(group, "0-10"),
46 |         "mystream2" -> RedisConsumerOffset(group, "0-7")
47 |       )))
48 |     )
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/src/test/scala/org/apache/spark/sql/redis/stream/RedisSourceTest.scala:
--------------------------------------------------------------------------------
 1 | package org.apache.spark.sql.redis.stream
 2 | 
 3 | import org.scalatest.{FunSuite, Matchers, OptionValues}
 4 | 
 5 | /**
 6 |   * @author The Viet Nguyen
 7 |   */
 8 | class RedisSourceTest extends FunSuite with Matchers with OptionValues {
 9 | 
10 |   test("testGetOffsetRanges") {
11 |     val startOffsets = RedisSourceOffset(Map("mystream" -> RedisConsumerOffset("group55", "0-0")))
12 |     val endOffsets = RedisSourceOffset(Map("mystream" -> RedisConsumerOffset("group55", "0-1")))
13 |     val consumerConfig = RedisConsumerConfig("mystream", "group55", "consumer", 1000, 100)
14 |     val consumerConfigs = Seq(consumerConfig)
15 |     val offsetRanges = RedisSource.getOffsetRanges(Some(startOffsets), endOffsets, consumerConfigs)
16 |     offsetRanges.head shouldBe RedisSourceOffsetRange(Some("0-0"), "0-1", consumerConfig)
17 |   }
18 | }
19 | 


--------------------------------------------------------------------------------
/src/test/scala/org/apache/spark/sql/redis/stream/cluster/RedisStreamSourceClusterSuite.scala:
--------------------------------------------------------------------------------
1 | package org.apache.spark.sql.redis.stream.cluster
2 | 
3 | import com.redislabs.provider.redis.env.RedisClusterEnv
4 | import org.apache.spark.sql.redis.stream.RedisStreamSourceSuite
5 | 
6 | class RedisStreamSourceClusterSuite extends RedisStreamSourceSuite with RedisClusterEnv
7 | 


--------------------------------------------------------------------------------
/src/test/scala/org/apache/spark/sql/redis/stream/standalone/RedisStreamSourceStandaloneSuite.scala:
--------------------------------------------------------------------------------
1 | package org.apache.spark.sql.redis.stream.standalone
2 | 
3 | import com.redislabs.provider.redis.env.RedisStandaloneEnv
4 | import org.apache.spark.sql.redis.stream.RedisStreamSourceSuite
5 | 
6 | class RedisStreamSourceStandaloneSuite extends RedisStreamSourceSuite with RedisStandaloneEnv
7 | 


--------------------------------------------------------------------------------