├── .gitattributes
├── .github
    └── workflows
    │   └── ci-config.yml
├── LICENSE
├── README.md
├── app
    ├── Dockerfile
    ├── app_runner.py
    ├── models
    │   └── creditcardfraud.pb
    ├── requirements.txt
    └── script.py
├── connectinsight
    ├── Dockerfile
    ├── dbloader.py
    ├── entrypoint.sh
    ├── requirements.txt
    └── wait-for.sh
├── dataloader
    ├── Dockerfile
    ├── data
    │   └── creditcard.csv
    ├── load.py
    └── requirements.txt
├── demo_flow.png
├── demo_hash.png
├── demo_redisAI.png
├── demo_res.png
├── docker-compose.yaml
├── example_client
    ├── client.py
    └── requirements.txt
└── tests
    └── test_demo_flow.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.csv filter=lfs diff=lfs merge=lfs -text
2 | *.pb filter=lfs diff=lfs merge=lfs -text
3 | 


--------------------------------------------------------------------------------
/.github/workflows/ci-config.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 |   schedule:
 9 |     - cron: '0 0 * * *'
10 | 
11 | jobs:
12 |   run_tests:
13 |     runs-on: ubuntu-latest
14 |     services:
15 |       redis:
16 |         image: redislabs/redisai:latest
17 |         ports:
18 |           - 6379:6379
19 | 
20 |     steps:
21 |     - uses: actions/checkout@v2
22 |     - name: Install git-lfs
23 |       run: |
24 |           git lfs install
25 |           git lfs fetch && git lfs checkout
26 |     - uses: docker://python:latest
27 |     - name: Run tests
28 |       run: |
29 |         pip3 install -r example_client/requirements.txt
30 |         python3 -m unittest tests/test_demo_flow.py
31 | 
32 |   run_demo:
33 |     runs-on: ubuntu-latest
34 |     steps:
35 |     - uses: actions/checkout@v2
36 |     - name: Checkout model 
37 |       run: |
38 |         git lfs install 
39 |         git lfs fetch && git lfs checkout
40 |     - name: Build the docker-compose image
41 |       run: docker-compose up -d
42 |     - name: Test 
43 |       run: |
44 |         pip3 install -r example_client/requirements.txt
45 |         python3 example_client/client.py
46 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2020, Redis Labs
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![license](https://img.shields.io/github/license/RedisAI/FraudDetectionDemo.svg)](https://github.com/RedisAI/FraudDetectionDemo)
  2 | [![CI](https://github.com/RedisAI/FraudDetectionDemo/actions/workflows/ci-config.yml/badge.svg)](https://github.com/RedisAI/FraudDetectionDemo/actions/workflows/ci-config.yml)
  3 | [![Forum](https://img.shields.io/badge/Forum-RedisAI-blue)](https://forum.redislabs.com/c/modules/redisai)
  4 | [![Discord](https://img.shields.io/discord/697882427875393627)](https://discord.gg/rTQm7UZ)
  5 | 
  6 | # Fraud Detection Demo    
  7 | 
  8 | This demo illustrates the key advantage of data locality using [RedisAI](https://oss.redislabs.com/redisai/). Generating a prediction from a real-time AI/ML model is a multi-step process. The steps typically include: receiving a prediction (inference) request, retrieving feature data (needed by the mode)l and running the inference request (feeding features into model). In general, these steps run in multiple processes/machines. In contrast, using Redis and the RedisAI module allows you store feature data and run the AI Model in Redis! The feature data is within easy reach of the model!
  9 | This demo simulates a fraud-detection app, which relies on a ML model returning the probability that a given transaction is fraudulent. The model uses transaction data and reference (feature) data previously stored in Redis for the customers. The entire flow is executed within Redis.
 10 | 
 11 | ## Running the Demo
 12 | To run the demo app and load the data, run the following commands:
 13 | ```
 14 | # If you don't have it already, install https://git-lfs.github.com/ (On OSX: brew install git-lfs)
 15 | $ git lfs install && git lfs fetch && git lfs checkout
 16 | 
 17 | # Clone the repository
 18 | $ git clone https://github.com/RedisAI/FraudDetectionDemo.git
 19 | $ cd FraudDetectionDemo
 20 | 
 21 | # Launch the demo with docker-compose (may require sudo in linux)
 22 | $ docker-compose up
 23 | ```
 24 | If something went wrong, e.g. you skipped installing git-lfs, you need to force docker-compose to rebuild the containers
 25 | ```
 26 | $ docker-compose up --force-recreate --build
 27 | ```
 28 | 
 29 | ## Architecture
 30 | ### RedisAI
 31 | Redis instance is launched with RedisAI module loaded (by running `redislabs/redisai:latest` docker). RedisAI allows you to store and execute AI/ML pre-trained models in Redis. The models can be written in Pytorch, Tensorflow or any other ML framework (XGBoot, scikit-learn) allowing models to be exported to ONNX.
 32 | 
 33 | ### [Data Loader](https://github.com/RedisAI/FraudDetectionDemo/blob/master/dataloader/load.py)
 34 | Historical data of credit card transactions processed into features is loaded into Redis from [`data/creditcard.csv` file](https://media.githubusercontent.com/media/RedisAI/FraudDetectionDemo/master/dataloader/data/creditcard.csv), using redis-py (Redis' official python client). Every transaction is [stored in Redis](https://github.com/RedisAI/FraudDetectionDemo/blob/master/dataloader/load.py#L31) as a hash with the following structure:
 35 | - The key is `<time_stamp>_<index>{tag}`, where `<index>` is used to differentiate between keys which correspond to transactions that have occurred in the same timestamp, and `{tag}` is used to ensure that all hashes are stored in the same shard (when using clustered environment). 
 36 | - The hash value is a dictionary that contains 29 numeric features ("v0", "v1", ... "v28") that represent the transaction, and another field ("amount") specifying the amount of money to transfer in the transaction.
 37 | 
 38 | In addition, all the hashes' keys are [kept in a sorted set](https://github.com/RedisAI/FraudDetectionDemo/blob/master/dataloader/load.py#L34) stored in Redis whose key is `refernces{tag}`, and every element in it is of the form: `{hash_key_name: timestamp}`. This sorted set keeps track of the keynames of these hashes sorted by time.
 39 | 
 40 | ### [App](https://github.com/RedisAI/FraudDetectionDemo/blob/master/app/app_runner.py)
 41 | The app is a basic component that [loads the fraud detection pre-trained ML model](https://github.com/RedisAI/FraudDetectionDemo/blob/master/app/app_runner.py#L16) into RedisAI, along with a [TorchScript](https://oss.redis.com/redisai/intro/#scripting) which is used for pre-processing of the data. This model was built using Tensorflow, which is one of the three ML frameworks that RedisAI supports as backends.
 42 | 
 43 | **Multiple devices:** RedisAI allows parallel execution of models on different logical devices. In this demo, the fraud detection model is loaded on 2 logical devices under two different keys: 1)`fraud_detection_model{tag}_CPU` will be associated with `CPU`, and 2)`fraud_detection_model{tag}_CPU:1`, will be associated with `CPU:1`.
 44 | Side note - If your have a multiple GPU machine with Nvidia CUDA support, you can load models that can run in parallel and associate each one with a different device (for example: `fraud_detection_model{tag}_GPU:0` and `fraud_detection_model{tag}_GPU:1`), for gaining better utilization of GPU resources.
 45 | 
 46 | ### RedisInsight
 47 | [RedisInsight](https://redis.com/redis-enterprise/redis-insight/) is a desktop manager that provides an intuitive and efficient GUI for Redis, allowing you to interact with your databases, monitor, and manage your data. This demo bundles a RedisInsight container, to enable visualization and exploration of the data.  
 48 | 
 49 | ## Explore loaded reference data
 50 | Open a browser and point it to RedisInsight at https://localhost:8001 and select the preloaded redis connection.
 51 | In the *CLI* tool, execute the following command:
 52 | ```bash
 53 | >> dbsize
 54 | (integer) 1004
 55 | ```
 56 | 
 57 | The 1004 keys stands for:
 58 | - 1000 raw reference data points (hashes)
 59 | - 1 sorted set
 60 | - 1 Torch script
 61 | - 2 Tensorflow models
 62 | 
 63 | Under the *browser* tool, you can see the loaded keys. Select a key of a hash (such as`478_2{tag}`) to see an example for transaction data.
 64 | 
 65 | ![Demo hash](./demo_hash.png "Demo hash redisInsights")
 66 | 
 67 | Next, click on *RedisAI* visualization tool to see the two loaded models and the script. By selecting a script, you can see its source code. It is also possible to run models/script's function via the UI, according to [AI.MODELEXECUTE](https://oss.redis.com/redisai/commands/#aimodelexecute) and [AI.SCRIPTEXECUTE](https://oss.redis.com/redisai/commands/#aiscriptexecute) commands' format.   
 68 | 
 69 | ![Demo redisAI](./demo_redisAI.png "Demo redisAI redisInsights")
 70 | 
 71 | ## Flow
 72 | When a new transaction happens, it needs to be evaluated if it's fraudulent or not. The ML/DL models take two inputs for this, the relevant reference data and the new transaction details. The following actions are triggered:
 73 | * First, the relevant keys with respect to the new transaction time and a certain time interval are obtained via range query over the `refernces` sorted set. The hash values of those keys will constitute the relevant reference data.
 74 | * Next, RedisAI DAG (directed acyclic graph) object will be used to orchestrate the end-to-end prediction flow, that will be triggered upon calling the [AI.DAGEXECUTE command](https://oss.redis.com/redisai/commands/#aidagexecute). The DAG's operation are the following:
 75 |   1. [Loading](https://github.com/RedisAI/FraudDetectionDemo/blob/master/example_client/client.py#L26) the numeric representation of the new transaction as a tensor with [`AI.TENSROSET` command](https://oss.redis.com/redisai/commands/#aitensorset).
 76 |   2. Running the [`hashes_to_tensor` pre-processing function](https://github.com/RedisAI/FraudDetectionDemo/blob/master/app/script.py#L23) to create the reference data tensor. This function receives the list of relevant keys in Redis as input, and construct a reference data tensor based on these values, that are fetched directly from Redis (using [Redis command support for TorchScript](https://oss.redis.com/redisai/commands/#redis-commands-support)).
 77 |   3. Running the two fraud-detection models over the `transaction` and `refernces` inputs in parallel ([recall](https://github.com/RedisAI/FraudDetectionDemo/blob/master/README.md#L43) that each model was associated with a different logical device).
 78 |   4. Running the [`post_processing` function](https://github.com/RedisAI/FraudDetectionDemo/blob/master/app/script.py#L37) that receives both models' predictions as inputs, and outputs the average score.
 79 |   5. [Storing](https://github.com/RedisAI/FraudDetectionDemo/blob/master/example_client/client.py#L25) the output of the `post_processing` function under the key `result{tag}` in Redis.
 80 | 
 81 | Finally, the score of the transaction (i.e., the probability in which it is fraudulent) is fetched by using [`AI.TENSORGET` command](https://oss.redis.com/redisai/commands/#aitensorget).
 82 | 
 83 | ![Demo flow](./demo_flow.png "Demo flow")
 84 | 
 85 | ### Simulation
 86 | To simulate this flow with a client application connecting to redis, run the following (here you can also see an example of the simulation output):
 87 | ```bash
 88 | $ pip3 install -r example_client/requirements.txt
 89 | $ python3 example_client/client.py
 90 | 
 91 | Time interval for reference data:  (61, 270)
 92 | 
 93 | Generating a random transaction in Time=270...
 94 | [[ 2.7000000e+02 -8.2951581e-01 -4.4787747e-01  6.0682118e-01
 95 |    4.7524232e-01  2.3593563e-01  1.2634248e+00 -1.2233499e+00
 96 |    3.7027165e-01 -8.8206857e-02  8.6683428e-01  8.9412665e-01
 97 |   -4.8722781e-02 -5.2572483e-01 -1.5464017e+00 -4.5807543e-01
 98 |   -1.4910455e-01  9.2615825e-01  6.2056929e-01 -3.8565230e-02
 99 |   -1.0080141e+00  3.9647776e-01  1.6992532e+00 -9.6349031e-02
100 |   -8.6946076e-01  5.5705246e-02  2.1251810e-01  1.2321904e-01
101 |   -3.3462542e-01 -4.5848402e-01]]
102 | 
103 | Performing fraud detection prediction using reference data (use up to 10 previous transactions)...
104 | result:  [0.02251661 0.9774834 ]
105 | Transaction is fraudulent with probability 0.022516613826155663
106 | 
107 | Total execution took: 3.930330276489258 ms
108 | 
109 | 
110 | ```
111 | 
112 | In this simulation, a random time interval is generated, and a random transaction to be processed in the upper bound of this time interval is generated (represented by a tensor whose dimensions are `(1,30)`). Then, a reference data tensor whose dimensions are `(1, 256)` is prepared from the (up to) 10 most recent transactions' data, by concatenating their values and either pad the remaining space or trimming it (this is done in the `hashes_to_tensor` function of the Torch Script). Overall, the client runs the entire flow by calling AI.DAGEXECUTE command, and the results are stored in Redis.    
113 | 
114 | ![Demo flow res](./demo_res.png "Demo flow result")
115 | 


--------------------------------------------------------------------------------
/app/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.9
 2 | 
 3 | WORKDIR /app
 4 | ADD . /app
 5 | 
 6 | RUN set -ex; \
 7 |     pip install --trusted-host pypi.python.org --trusted-host pypi.org --trusted-host files.pythonhosted.org -r requirements.txt;
 8 | 
 9 | ENTRYPOINT [ "python3" ]
10 | 


--------------------------------------------------------------------------------
/app/app_runner.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from urllib.parse import urlparse
 3 | 
 4 | from redisai import Client
 5 | 
 6 | 
 7 | class FraudDetectionApp:
 8 |     def __init__(self, con_):
 9 |         self.con = con_
10 | 
11 |     def set_script(self, path, script_key):
12 |         with open(path, 'rb') as f:
13 |             script = f.read()
14 |             self.con.scriptstore(script_key, 'CPU', script, entry_points=['hashes_to_tensor', 'post_processing'])
15 | 
16 |     def set_model(self, path, model_key):
17 |         with open(path, 'rb') as f:
18 |             model = f.read()
19 |             self.con.modelstore(model_key+'_CPU', 'TF', 'CPU', data=model, inputs=['transaction', 'reference'], outputs=['output'])
20 |             self.con.modelstore(model_key+'_CPU:1', 'TF', 'CPU:1', data=model, inputs=['transaction', 'reference'], outputs=['output'])
21 | 
22 | 
23 | if __name__ == '__main__':
24 |     parser = argparse.ArgumentParser()
25 |     parser.add_argument('-rs', '--redis_server', help='Redis URL', type=str, default='redis://127.0.0.1:6379')
26 |     args = parser.parse_args()
27 | 
28 |     # Set up redis connection
29 |     url = urlparse(args.redis_server)
30 |     conn = Client(host=url.hostname, port=url.port)
31 |     if not conn.ping():
32 |         raise Exception('Redis unavailable')
33 | 
34 |     app = FraudDetectionApp(conn)
35 |     # Set script
36 |     app.set_script('script.py', 'helper_script{tag}')
37 |     # Set models
38 |     app.set_model('../app/models/creditcardfraud.pb', 'fraud_detection_model{tag}')
39 | 


--------------------------------------------------------------------------------
/app/models/creditcardfraud.pb:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:96ef0da3497866b726ac88928ed876d9647ca6a77b80d80f08513a8daaa38e07
3 | size 12251
4 | 


--------------------------------------------------------------------------------
/app/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | pandas
3 | redisai
4 | 


--------------------------------------------------------------------------------
/app/script.py:
--------------------------------------------------------------------------------
 1 | # Create 256-values reference tensor for the credit-fraud inference model.
 2 | def create_ref_tensor(tensors: List[Tensor]):
 3 |     size = (1, 256)
 4 |     # No input
 5 |     if len(tensors) == 0:
 6 |         return torch.zeros(size)
 7 | 
 8 |     # Concat tensors
 9 |     c = torch.cat(tensors).reshape((1, -1))
10 |     s = c.size()
11 |     if s == size:
12 |         return c
13 |     elif s[1] < size[1]:
14 |         # Not enough data - pad with zeros
15 |         res = torch.zeros(size)
16 |         res[0, :s[1]] = c.squeeze()
17 |         return res
18 |     else:
19 |         # Too much data - trim
20 |         return c[0, :size[1]].unsqueeze(0)
21 | 
22 | 
23 | def hashes_to_tensor(tensors: List[Tensor], keys: List[str], args: List[str]):
24 | 
25 |     # Get the hashes from redis, use the 10 recent transactions at most
26 |     tensors_from_hashes = []
27 |     for key in keys[:10]:
28 |         hash_values = redis.asList(redis.execute("HVALS", key))
29 |         # convert every value in the hash to a torch tensor, and concatenate them to a single tensor
30 |         tensor = [torch.tensor(float(str(v))).reshape(1, 1) for v in hash_values]
31 |         tensors_from_hashes.append(torch.cat(tensor, dim=0))
32 | 
33 |     return create_ref_tensor(tensors_from_hashes)
34 | 
35 | 
36 | # Average the two input tensors
37 | def post_processing(tensors: List[Tensor], keys: List[str], args: List[str]):
38 |     return (tensors[0]+tensors[1]) / 2.0
39 | 


--------------------------------------------------------------------------------
/connectinsight/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.7
 2 | 
 3 | WORKDIR /app
 4 | ADD . /app
 5 | 
 6 | RUN set -ex; \
 7 |     pip install --trusted-host pypi.python.org --trusted-host pypi.org --trusted-host files.pythonhosted.org -r requirements.txt;
 8 | 
 9 | ENTRYPOINT [ "bash", "entrypoint.sh" ]
10 | 


--------------------------------------------------------------------------------
/connectinsight/dbloader.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Dict, Iterable, Union
 3 | 
 4 | import requests
 5 | import click
 6 | 
 7 | 
 8 | APP_URL = os.environ.get("APP_URL", "redisinsight:8001")
 9 | 
10 | 
11 | def load_credentials():
12 |     """
13 |     Load the session token and CSRF token to make protected API calls.
14 |     """
15 |     resp = requests.get(APP_URL)
16 |     try:
17 |         csrftoken = resp.cookies['csrftoken']
18 |         sessionid = resp.cookies['sessionid']
19 |         return sessionid, csrftoken
20 |     except Exception:
21 |         # For debugging a KeyError for sessionid cookie being thrown on some builds.
22 |         print("RESP CONTENT:", resp, resp.content)
23 |         print("RESP COOKIES:", resp.cookies)
24 |         raise
25 | 
26 | 
27 | def make_add_db_api_call(name: str, host: str, port: int, session_token: str, csrf_token: str):
28 |     """
29 |     Make API call to add databases. Raises exception on error response.
30 |     """
31 |     url = f"{APP_URL}/api/instance/"
32 |     resp = requests.post(url=url,
33 |                          cookies={'csrftoken': csrf_token,
34 |                                   'sessionid': session_token},
35 |                          headers={'X-CSRFToken': csrf_token},
36 |                          json={"name": host,
37 |                                "connectionType": "STANDALONE",
38 |                                'host': host,
39 |                                'port': port})
40 |     if not resp.ok:
41 |         raise Exception("Error response:\n" + resp.content.decode())
42 | 
43 | 
44 | def add_dbs(db_urls: Iterable[str]):
45 |     """
46 |     Add databases.
47 |     """
48 |     sessionid, csrftoken = load_credentials()
49 |     for db_url in db_urls:
50 |         host, port = db_url.split(':')
51 |         port = int(port)
52 |         make_add_db_api_call(name=host, host=host, port=port, session_token=sessionid, csrf_token=csrftoken)
53 | 
54 | 
55 | @click.command()
56 | @click.option('--db_urls', required=True, help='list of db urls separated by `;`. E.g. localhost:6379;localhost:6380')
57 | def main(db_urls: str):
58 |     db_urls = db_urls.split(';')
59 |     add_dbs(db_urls)
60 | 
61 | 
62 | if __name__ == '__main__':
63 |     # pylint: disable=no-value-for-parameter
64 |     main()
65 | 


--------------------------------------------------------------------------------
/connectinsight/entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | echo "Waiting RedisInsight to start"
4 | bash ./wait-for.sh -t 20 $APP_URL -- echo "RedisInsight is up"
5 | echo "Excuting..."
6 | exec "$@"
7 | echo "done"
8 | 


--------------------------------------------------------------------------------
/connectinsight/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | click


--------------------------------------------------------------------------------
/connectinsight/wait-for.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | TIMEOUT=15
 4 | QUIET=0
 5 | 
 6 | echoerr() {
 7 |   if [ "$QUIET" -ne 1 ]; then printf "%s\n" "$*" 1>&2; fi
 8 | }
 9 | 
10 | usage() {
11 |   exitcode="$1"
12 |   cat << USAGE >&2
13 | Usage:
14 |   $cmdname host:port [-t timeout] [-- command args]
15 |   -q | --quiet                        Do not output any status messages
16 |   -t TIMEOUT | --timeout=timeout      Timeout in seconds, zero for no timeout
17 |   -- COMMAND ARGS                     Execute command with args after the test finishes
18 | USAGE
19 |   exit "$exitcode"
20 | }
21 | 
22 | wait_for() {
23 |   for i in `seq $TIMEOUT` ; do
24 |     nc -z "$HOST" "$PORT" > /dev/null 2>&1
25 |     
26 |     result=$?
27 |     if [ $result -eq 0 ] ; then
28 |       if [ $# -gt 0 ] ; then
29 |         exec "$@"
30 |       fi
31 |       exit 0
32 |     fi
33 |     sleep 1
34 |   done
35 |   echo "Operation timed out" >&2
36 |   exit 1
37 | }
38 | 
39 | while [ $# -gt 0 ]
40 | do
41 |   case "$1" in
42 |     *:* )
43 |     HOST=$(printf "%s\n" "$1"| cut -d : -f 1)
44 |     PORT=$(printf "%s\n" "$1"| cut -d : -f 2)
45 |     shift 1
46 |     ;;
47 |     -q | --quiet)
48 |     QUIET=1
49 |     shift 1
50 |     ;;
51 |     -t)
52 |     TIMEOUT="$2"
53 |     if [ "$TIMEOUT" = "" ]; then break; fi
54 |     shift 2
55 |     ;;
56 |     --timeout=*)
57 |     TIMEOUT="${1#*=}"
58 |     shift 1
59 |     ;;
60 |     --)
61 |     shift
62 |     break
63 |     ;;
64 |     --help)
65 |     usage 0
66 |     ;;
67 |     *)
68 |     echoerr "Unknown argument: $1"
69 |     usage 1
70 |     ;;
71 |   esac
72 | done
73 | 
74 | if [ "$HOST" = "" -o "$PORT" = "" ]; then
75 |   echoerr "Error: you need to provide a host and port to test."
76 |   usage 2
77 | fi
78 | 
79 | wait_for "$@"


--------------------------------------------------------------------------------
/dataloader/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:buster
 2 | 
 3 | WORKDIR /app
 4 | ADD . /app
 5 | 
 6 | RUN set -ex; \
 7 |     pip install --trusted-host pypi.python.org --trusted-host pypi.org --trusted-host files.pythonhosted.org -r requirements.txt;
 8 | 
 9 | ENTRYPOINT [ "python3" ]
10 | 


--------------------------------------------------------------------------------
/dataloader/data/creditcard.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:76274b691b16a6c49d3f159c883398e03ccd6d1ee12d9d8ee38f4b4b98551a89
3 | size 150828752
4 | 


--------------------------------------------------------------------------------
/dataloader/load.py:
--------------------------------------------------------------------------------
 1 | import redis
 2 | import argparse
 3 | import pandas as pd
 4 | from urllib.parse import urlparse
 5 | 
 6 | 
 7 | class DataGenerator:
 8 |     def __init__(self, conn, path, n_samples):
 9 |         self._conn = conn
10 |         # Read csv file
11 |         df = pd.read_csv(path, nrows=n_samples)
12 |         # Remove classification
13 |         del df['Class']
14 |         self._df = df
15 | 
16 |     def generate_data(self):
17 |         # df.to_dict('records') converts the data frame to a list of dictionaries
18 |         records = self._df.to_dict('records')
19 |         key_names = {}
20 | 
21 |         for record in records:
22 |             timestamp = int(record['Time'])
23 |             timestamp = str(timestamp)
24 |             if timestamp not in key_names:
25 |                 key_names[timestamp] = 0
26 |             # Use a unique key name, which is '<time_stamp>_<index>{tag}'
27 |             hash_key_name = timestamp + '_' + str(key_names[timestamp]) + '{tag}'
28 |             key_names[timestamp] = key_names[timestamp] + 1
29 | 
30 |             # set reference raw data
31 |             self._conn.hset(hash_key_name, mapping=record)
32 | 
33 |             # add key of reference to sorted set
34 |             self._conn.zadd("references{tag}", {hash_key_name: timestamp})
35 | 
36 | 
37 | if __name__ == '__main__':
38 |     parser = argparse.ArgumentParser()
39 |     parser.add_argument('-rs', '--redis_server', help='Redis URL', type=str, default='redis://127.0.0.1:6379')
40 |     parser.add_argument('-n', '--nrows', help='Number of rows to read from input file', type=str, default=1000)
41 | 
42 |     args = parser.parse_args()
43 | 
44 |     # Set up redis connection
45 |     url = urlparse(args.redis_server)
46 |     conn = redis.Redis(host=url.hostname, port=url.port)
47 |     if not conn.ping():
48 |         raise Exception('Redis unavailable')
49 | 
50 |     # Load reference data
51 |     dg = DataGenerator(conn, "data/creditcard.csv", args.nrows)
52 |     dg.generate_data()
53 | 


--------------------------------------------------------------------------------
/dataloader/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | pandas
3 | redis
4 | 


--------------------------------------------------------------------------------
/demo_flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RedisAI/FraudDetectionDemo/a2da2a58f2366141f9263885aa0bad7e29d4982f/demo_flow.png


--------------------------------------------------------------------------------
/demo_hash.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RedisAI/FraudDetectionDemo/a2da2a58f2366141f9263885aa0bad7e29d4982f/demo_hash.png


--------------------------------------------------------------------------------
/demo_redisAI.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RedisAI/FraudDetectionDemo/a2da2a58f2366141f9263885aa0bad7e29d4982f/demo_redisAI.png


--------------------------------------------------------------------------------
/demo_res.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RedisAI/FraudDetectionDemo/a2da2a58f2366141f9263885aa0bad7e29d4982f/demo_res.png


--------------------------------------------------------------------------------
/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | services:
 3 |   redis:
 4 |     image: redislabs/redisai:latest
 5 |     ports:
 6 |       - "6379:6379"
 7 |   dataloader:
 8 |     build: ./dataloader
 9 |     depends_on:
10 |       - redis
11 |     command: ['load.py', '--redis_server', 'redis://redis:6379']
12 |   app:
13 |     build: ./app
14 |     depends_on:
15 |       - redis
16 |       - dataloader
17 |     command: ['app_runner.py', '--redis_server', 'redis://redis:6379']
18 |   redisinsight:
19 |     image: redislabs/redisinsight:redisai-device-cpu-fix
20 |     depends_on:
21 |       - app
22 |     ports:
23 |       - "8001:8001"
24 |   setupredisinsight:
25 |     build: ./connectinsight/.
26 |     depends_on:
27 |       - redisinsight
28 |       - redis
29 |     environment:
30 |       - APP_URL=http://redisinsight:8001
31 |     command: ['python', 'dbloader.py', "--db_urls=redis:6379"]
32 | 


--------------------------------------------------------------------------------
/example_client/client.py:
--------------------------------------------------------------------------------
 1 | from redisai import Client
 2 | import time
 3 | import numpy as np
 4 | from random import randrange
 5 | 
 6 | 
 7 | def predict(conn, min_ts, max_ts, references_key, model_key, script_key):
 8 |     start = time.time()
 9 | 
10 |     # Create a random transaction tensor, with 'Time' set as the max_ts (to simulate a condition
11 |     # where we retrieve the latest transactions as our reference data)
12 |     print(f'\nGenerating a random transaction in Time={max_ts}...')
13 |     transaction_tensor = np.random.randn(1, 30).astype(np.float32)
14 |     transaction_tensor[0][0] = max_ts
15 |     print(transaction_tensor)
16 | 
17 |     # Find the relevant reference data (up to 10 recent transaction that occurred within the time interval)
18 |     ref_data_keys = conn.zrevrangebyscore(references_key, max_ts, min_ts)[:10]
19 | 
20 |     # Create a DAG (execution plan) for RedisAI. First, use the helper script to convert the reference data
21 |     # within the hashes into a tensor. Then run the 2 models and obtain 2 outputs,
22 |     # and finally use the helper script to take their average to be the result (and persist it in key space)
23 |     print("\nPerforming fraud detection prediction using reference data (use up to 10 previous transactions)...")
24 |     output_key_name = 'result{tag}'
25 |     dag = conn.dag(persist=[output_key_name])
26 |     dag.tensorset('transaction', transaction_tensor)
27 |     dag.scriptexecute(script_key, 'hashes_to_tensor', keys=ref_data_keys, outputs=['reference'])
28 |     dag.modelexecute(model_key+'_CPU', inputs=['transaction', 'reference'], outputs=['out_1'])
29 |     dag.modelexecute(model_key+'_CPU:1', inputs=['transaction', 'reference'], outputs=['out_2'])
30 |     dag.scriptexecute(script_key, 'post_processing', inputs=['out_1', 'out_2'], outputs=[output_key_name])
31 |     dag.execute()
32 | 
33 |     # get result
34 |     result = conn.tensorget(output_key_name)
35 |     print("result: ", result[0])
36 |     print(f'Transaction is fraudulent with probability {result[0][0]}\n')
37 |     print("Total execution took: " + str((time.time() - start) * 1000) + " ms")
38 | 
39 | 
40 | def main():
41 |     # Set up redis connection
42 |     conn = Client(host='localhost', port=6379)
43 |     if not conn.ping():
44 |         raise Exception('Redis unavailable')
45 | 
46 |     # Add '{tag}' to every key name to ensure that all keys will be mapped to te same shard in cluster environment.
47 |     references_key = "references{tag}"
48 |     min_ts = conn.zrangebyscore(references_key, "-inf", "+inf", withscores=True, start=0, num=1)[0][1]
49 |     max_ts = conn.zrevrangebyscore(references_key, "+inf", "-inf", withscores=True, start=0, num=1)[0][1]
50 | 
51 |     # Generate random time interval, and use the transactions in this time as the reference data.
52 |     min_sample_time = randrange(min_ts, max_ts)
53 |     max_sample_time = randrange(min_ts, max_ts)
54 |     if min_sample_time > max_sample_time:
55 |         min_sample_time, max_sample_time = max_sample_time, min_sample_time
56 | 
57 |     # Running a single execution
58 |     print("Time interval for reference data: ", (min_sample_time, max_sample_time))
59 |     model_key = 'fraud_detection_model{tag}'
60 |     script_key = 'helper_script{tag}'
61 |     predict(conn, min_sample_time, max_sample_time, references_key, model_key, script_key)
62 | 
63 | 
64 | if __name__ == '__main__':
65 |     main()
66 | 


--------------------------------------------------------------------------------
/example_client/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | pandas
3 | redisai
4 | 


--------------------------------------------------------------------------------
/tests/test_demo_flow.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from multiprocessing import Process
  3 | import numpy as np
  4 | 
  5 | from redisai import Client
  6 | from dataloader import load
  7 | from app import app_runner
  8 | 
  9 | 
 10 | class FraudDetectionDemoTest(unittest.TestCase):
 11 |     redisai_conn = Client(host='localhost', port=6379)
 12 |     n_samples = 1000
 13 |     dg = load.DataGenerator(redisai_conn, "dataloader/data/creditcard.csv", n_samples)
 14 | 
 15 |     model_key = 'fraud_detection_model{tag}'
 16 |     script_key = 'helper_script{tag}'
 17 |     references_key = 'references{tag}'
 18 | 
 19 |     def tearDown(self):
 20 |         self.redisai_conn.flushall()
 21 | 
 22 |     def test_data_generator_from_csv(self):
 23 |         # Load 1000 rows from the data csv
 24 |         self.dg.generate_data()
 25 |         assert (self.redisai_conn.dbsize() == self.n_samples + 1)  # key for every sample + the references sorted set
 26 | 
 27 |         # Expect all hash keys to be stored in the sorted set
 28 |         all_keys = self.redisai_conn.zrange(self.references_key, 0, 1001)
 29 |         assert (len(all_keys) == self.n_samples)
 30 | 
 31 |         # Expect 2 keys with with score=0 (which correspond to transactions in Time=0)
 32 |         keys = self.redisai_conn.zrange(self.references_key, 0, 1)
 33 |         assert (len(keys) == 2)
 34 | 
 35 |         # Assert the keys of the hash are the columns names
 36 |         first_hash = self.redisai_conn.hgetall(keys[0])
 37 |         hash_keys = list(first_hash.keys())
 38 |         assert (len(hash_keys) == len(self.dg._df.columns))
 39 |         for i in range(len(hash_keys)):
 40 |             assert (hash_keys[i].decode() == self.dg._df.columns[i])
 41 | 
 42 |         # Assert the values of the hash are the first row values
 43 |         for key in hash_keys:
 44 |             assert (first_hash[key].decode() == str(self.dg._df[key.decode()][0]))
 45 | 
 46 |     def test_hashes_to_tensor(self):
 47 |         app = app_runner.FraudDetectionApp(self.redisai_conn)
 48 | 
 49 |         # Set the script in Redis
 50 |         app.set_script('app/script.py', self.script_key)
 51 | 
 52 |         # Create a tensor from the first 10 hashes by using the script (store it in redis)
 53 |         self.dg.generate_data()
 54 |         hashes_keys = self.redisai_conn.zrange(self.references_key, 0, 9)
 55 |         assert (len(hashes_keys) == 10)
 56 |         output_key = 'out_tensor{tag}'
 57 |         self.redisai_conn.scriptexecute(self.script_key, 'hashes_to_tensor', keys=hashes_keys, outputs=[output_key])
 58 | 
 59 |         # get the result and verify it's meta data
 60 |         result = self.redisai_conn.tensorget(output_key, meta_only=True)
 61 |         assert (result['dtype'] == 'FLOAT')
 62 |         assert (result['shape'] == [1, 256])
 63 | 
 64 |     def test_entire_flow_multiple_clients(self):
 65 |         app = app_runner.FraudDetectionApp(self.redisai_conn)
 66 |         app.set_script('app/script.py', 'helper_script{tag}')
 67 |         app.set_model('app/models/creditcardfraud.pb', 'fraud_detection_model{tag}')
 68 |         self.dg.generate_data()
 69 | 
 70 |         def run_demo(output_key_name):
 71 |             # Create a random transaction tensor in Time=100
 72 |             transaction_tensor = np.random.randn(1, 30).astype(np.float32)
 73 |             transaction_tensor[0] = 100
 74 | 
 75 |             # Find the relevant reference data (transaction that occurred within the past 60 seconds)
 76 |             ref_data_keys = self.redisai_conn.zrangebyscore(self.references_key, 40, 100)
 77 | 
 78 |             # Create a DAG (execution plan) for RedisAI. First, use the helper script to convert the reference data
 79 |             # within the hashes into a tensor. Then run the model twice and obtain 2 outputs,
 80 |             # and finally use the helper script to take their average to be the result (and persist it in key space)
 81 |             dag = self.redisai_conn.dag(persist=[output_key_name])
 82 |             dag.tensorset('transaction', transaction_tensor)
 83 |             dag.scriptexecute(self.script_key, 'hashes_to_tensor', keys=ref_data_keys, outputs=['reference'])
 84 |             dag.modelexecute(self.model_key+'_CPU', inputs=['transaction', 'reference'], outputs=['out_1'])
 85 |             dag.modelexecute(self.model_key+'_CPU:1', inputs=['transaction', 'reference'], outputs=['out_2'])
 86 |             dag.scriptexecute(self.script_key, 'post_processing', inputs=['out_1', 'out_2'], outputs=[output_key_name])
 87 |             dag.execute()
 88 | 
 89 |         # run the demo scenario from multiple clients
 90 |         n_clients = 10
 91 |         clients = []
 92 |         for i in range(n_clients):
 93 |             client_output = 'result{tag}' + str(i)
 94 |             p = Process(target=run_demo(client_output))
 95 |             p.start()
 96 |             clients.append(p)
 97 |         # Wait for all clients to finish
 98 |         [c.join() for c in clients]
 99 | 
100 |         # assert valid results
101 |         for i in range(n_clients):
102 |             client_output = 'result{tag}' + str(i)
103 |             result = np.reshape(self.redisai_conn.tensorget(client_output), -1)
104 |             assert (result.shape == (2,))
105 |             assert ((0 <= result.all()) & (result.all() <= 100))
106 | 
107 | 
108 | if __name__ == '__main__':
109 |     unittest.main()
110 | 


--------------------------------------------------------------------------------