├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── redis3
    ├── __init__.py
    └── redis3.py
├── requirements.txt
├── serverless
    ├── app.py
    └── serverless.yml
├── setup.py
└── src
    ├── playground.py
    ├── run_tests.py
    └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | .serverless/
163 | 
164 | node_modules/
165 | 
166 | *.json


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM public.ecr.aws/lambda/python:3.10
 2 | 
 3 | # install the new, updated boto3 to leverage the new buckets
 4 | RUN pip3 install --upgrade pip && pip3 install boto3==1.33.2 --target "${LAMBDA_TASK_ROOT}" && pip3 install redis==5.0.1 --target "${LAMBDA_TASK_ROOT}"
 5 | 
 6 | COPY redis3/redis3.py ${LAMBDA_TASK_ROOT}
 7 | COPY serverless/app.py ${LAMBDA_TASK_ROOT}
 8 | 
 9 | # Set the CMD to the lambda handler
10 | CMD [ "app.lambda_handler" ]


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Bauplan Labs
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # redis3
  2 | 
  3 | A one-afternoon implementation of (a small set of) redis-like primitives with s3 Express.
  4 | 
  5 | ## Overview
  6 | 
  7 | AWS recently announced [s3 Express](https://aws.amazon.com/it/s3/storage-classes/express-one-zone/), a new type of s3 buckets tailored for low-latency access targeting (especially) small files, frequently accessed. In AWS own [words](https://aws.amazon.com/it/blogs/aws/new-amazon-s3-express-one-zone-high-performance-storage-class/): "This new storage class can handle objects of any size, but is especially awesome for smaller objects. (...) Because of s3 Express One Zone’s consistent very low latency, small objects can be read up to 10x faster compared to s3 Standard."
  8 | 
  9 | Even the pricing is geared towards small objects: "you pay $0.16/GB/month in the US East (N. Virginia) Region (...) and an additional per-GB fee for the portion of any request that exceeds 512 KB."
 10 | 
 11 | One of the primary (even if far from the only) use cases for things like Redis is consistent performance for key-value queries: you set 'bar' for the key 'foo', and then you retrieve it by asking 'foo' later. While Redis (and other key-value stores) are indubitably faster than s3 (even in its new express version), they are incredibly more costly: these are some prices (in US East) as of Dec 2023.
 12 | 
 13 | | Service | Monthly Cost ($) |
 14 | | ------------- | ------------- |
 15 | | S3 Express (1 GB) | 0.16 |
 16 | | cache.t4g.micro (0.5 GB)  | 11.52 (0.016 / hour) |
 17 | | cache.t4g.small (1.37 GB) | 23.04 (0.032 / hour) |
 18 | 
 19 | In this small experiment we set out to investigate which type of performance / cost trade-off is now unlocked thanks to the new s3 Express option: since we mostly care about key-value queries for "small objects", can we build a redis-like client entirely backed by s3 Express?
 20 | 
 21 | ## 1-min hello-world
 22 | 
 23 | If you have [AWS credentials](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html) set up so that you can run arbitrary s3 and s3 express stuff on your account, you can run this in one minute, by creating a virtual env and cd-ing into `src`:  
 24 | 
 25 | ```shell
 26 |  python3 -m venv venv
 27 |  source venv/bin/activate
 28 |  pip install .
 29 |  python
 30 | ```
 31 | 
 32 | In the Python REPL, you can now do (compare to [redis-py](https://github.com/redis/redis-py)):
 33 | 
 34 | ```shell
 35 | >>> from redis3.redis3 import redis3Client
 36 | >>> r = redis3Client(cache_name='mytestcache', db=0)
 37 | >>> r.set('foo', 'bar')
 38 | True
 39 | >>> r.get('foo')
 40 | 'bar'
 41 | ```
 42 | 
 43 | Note that:
 44 | 
 45 | * `my-cache-name` will be used (together with an availability zone, default to `use1-az5` as it assumes you are stuck with `us-east-1` like the rest of us) to produce a bucket like `redis3-mytestcache--use1-az5--x-s3`, which needs to be unique in the region as per s3 naming rules;
 46 | * `redis3Client` uses the boto3 client behind the scenes, so the usual authentication rules apply (credential file, environment variables or passing `aws_access_key_id` and the like as `**kwargs`).
 47 | 
 48 | If you want to see more ops, you can run `playground.py` with your own `my-cache-name` as argument:
 49 | 
 50 | ```shell
 51 | cd src
 52 | python playground.py my-cache-name
 53 | ```
 54 | 
 55 | ## 5-min explanation
 56 | 
 57 | Sometimes we want a full-fledged NoSQL store (no shortage of that!), sometimes we just want to set some value somewhere, possibly namespaced in some way, and get it back at a later time. Object storage like s3 was never fast and reliable enough in first byte latency to be an actual contender, until the release of s3 Express, which, for key-value type of queries, proposes a novel price/latency trade-off compared to more traditional solutions (Redis, dynamo etc.).
 58 | 
 59 | `redis3` is a 100 LOC (or whatever) class that puts together a redis-py interface to s3 Express, easy to be used as a slowish, but infinite and cheap cache (no worries about provisioning a larger instance, or evicting keys); `redis3` now implements GET and SET, namespaced by a database integer (Redis-like), plus few other commands, such as a version of MGET and MSET "suited" to object storage - i.e. it cannot be an atomic operation, but it runs in parallel through a thread pool, allowing to SET / GET many values with one command relatively fast (from my local machine - a pretty decent Mac in US East -, getting 25 keys with MGET takes 0.1286s, 50 takes 0.1362s and 100 takes 0.1960s). When instantiating the client (e.g. `redis3Client(cache_name='mytestcache', db=0)`) you can specify a `db` as a namespacing device, exactly as it happens in Redis (there is no limitation to `16` for the number of db of course).
 60 | 
 61 | | Redis Command | redis3 Command | Intended Semantics |
 62 | | ------------- | ------------- | ------------- |
 63 | | GET  | `get(key)` | get the value from a string key |
 64 | | SET  | `set(key, value)`  | set a string value for a key |
 65 | | MGET | `mget(keys)` | get multiple keys in parallel |
 66 | | MSET | `mset(keys, values)`  | set multiple values for keys in parallel |
 67 | | KEYS | `keys(starts_with)`  | list all keys in the current db |
 68 | | DEL | `delete(key)`  |  delete the key (no error is thrown if key does not exist) |
 69 | 
 70 | Note that redis (which, btw, runs single-threaded in-memory for a reason) can offer not only 316136913 more commands, but also atomicity guarantees (INCR, WATCH, etc.) that object storage cannot (s3 offers however [strong read-after-write consistency](https://aws.amazon.com/it/s3/consistency/): after a successful write of a new object, any subsequent read - including listin keys - request receives the latest version of the object). On the other hand, a s3-backed cache can offer more concurrent troughput at no additional effort, a truly "serverless experience" and a "thin client" which falls back on standard AWS libraries, inheriting automatically all security policies you can think of (e.g. since "db" in redis3 are just folder in an express bucket, access can controlled at that level by leveraging the usual IAM magic).
 71 | 
 72 | ## Running some tests
 73 | 
 74 | Some more (horribly repetitive) code to test the difference between s3 express and normal s3 (plus some tests to actually make sure the client behaves as it should) can be run here:
 75 | 
 76 | ```shell
 77 | cd src
 78 | python run_tests.py my-cache-name
 79 | ```
 80 | 
 81 | With EC2s, you can specify at creation the same availability zone as the s3 cache and run a comparison of normal buckets vs express in the best possible (in theory) latency conditions (vs a free [Redis](https://redis.com/) instance in us-east-1 as baseline comparison). My manual runs on a throw-away EC2 (k=100) gave the following results (in seconds):
 82 | 
 83 | | Test | Standard Bucket (s) | Express Bucket (s) | Redis Labs |
 84 | | ------------- | ------------- | ------------- | ------------- |
 85 | | GET (avg) | 0.016 | 0.005  | 0.001  |
 86 | | GET (median) | 0.014  | 0.005  | 0.0009  |
 87 | | GET (95th latency) | 0.027  | 0.005  | 0.002  | 
 88 | 
 89 | TL;DR: an express bucket is not just 3x faster in the average case, but significantly more reliable in the tail. Redis is still much faster than both, but (remember) it is also much more expensive.
 90 | 
 91 | Note: don't take these tests too seriously!
 92 | 
 93 | ### Bonus: a lambda-based use-case
 94 | 
 95 | If you know the [serverless framework](https://www.serverless.com/framework/) and have it avalaible on your machine, you can publish a lambda function that performs some (horribly repetitive) tests to evaluate AWS-lambda-to-s3 latency. Note that:
 96 | 
 97 | * on top of serverless, you will need Docker, as the `boto3` version inside Lamdbas is too old and does not support s3 express buckets yet;
 98 | * after deployment, you need to make sure the lambda role created for the function can access the s3 resources backing up the cache. Note that s3 express policies are [a bit of a drag](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-express-security-iam-identity-policies.html), so beware.
 99 | 
100 | If you feel adventurous and ready to fight IAM roles, then do:
101 | 
102 | ```shell
103 | cd serverless
104 | serverless deploy
105 | ```
106 | 
107 | (if you don't, you can just trust my numbers below!).
108 | 
109 | At the end, you'll get and endpoint such as `https://xxx.execute-api.us-east-1.amazonaws.com/dev/test?k=50&cache=mytestcache` that you can open in your browser to trigger the tests (`k` and `cache` are optional - check `app.py` for the defaults). One request will generate something like this, i.e. a comparison of _k_ ops in s3 express vs normal vs Redis Labs:
110 | 
111 | ```json
112 | {
113 |     "metadata": {
114 |         "timeMs": 7373,
115 |         "epochMs": 1701377819320,
116 |         "eventId": "971ca40d-8f50-4c27-a816-76bb7df292c4",
117 |         "inputK": 50
118 |     },
119 |     "data": {
120 |         "set_time_mean": 0.011164916356404623,
121 |         "set_time_median": 0.009434223175048828,
122 |         "get_time_mean": 0.006322011947631836,
123 |         "get_time_median": 0.006218910217285156,
124 |         "set_time_mean_s3": 0.026339941024780274,
125 |         "set_time_median_s3": 0.024151086807250977,
126 |         "get_time_mean_s3": 0.019532273610432943,
127 |         "get_time_median_s3": 0.016076326370239258,
128 |         "set_time_mean_redis": 0.0018777799606323241,
129 |         "set_time_median_redis": 0.000904083251953125,
130 |         "set_time_mean_many": 0.406483252843221,
131 |         "set_time_median_many": 0.3329179286956787,
132 |         "get_time_mean_many": 0.31602056821187335,
133 |         "get_time_median_many": 0.3195207118988037
134 |     }
135 | }
136 | ```
137 | 
138 | In this particular example, with _k=50_, setting a key with s3 Express is ~10ms, and 6 to get it back, vs ~25 and 18 from standard s3. Setting 50 keys at once with multi-threading takes ~400ms, while reading them back ~300. Not bad!
139 | 
140 | ## TO-DOs, misc. notes and all that jazz
141 | 
142 | * Since the only real dependency is boto3 and AWS access, make it easier to configure the client wrt AWS would be nice: right now, I've mostly running either in a DEV environment with semi-god IAM access, or in a carefully crafted IAM-role attached to the lambda;
143 | * if this is useful, move to poetry and auto-deploy to PyPyi would make it easy to just start using all around repos;
144 | * if lambda-based latency benchmarks are useful, built in the `serverless.yml` the proper AWS permission so that the deployment becomes seamless (now the entire serverless part is really manual, ad hoc and redundant).
145 | 
146 | Everything is left as an exercise to the reader.
147 | 
148 | ## License
149 | 
150 | This code is released "As Is", with no guarantees whatsover, under the MIT license. This was a fun coding excercise in-between serious tasks, and should be taken with the appropriate dose of sense of humour.
151 | 


--------------------------------------------------------------------------------
/redis3/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BauplanLabs/redis3/2375efa204a21867768e61913294771c8538f8d1/redis3/__init__.py


--------------------------------------------------------------------------------
/redis3/redis3.py:
--------------------------------------------------------------------------------
  1 | import boto3
  2 | import botocore
  3 | from time import time
  4 | import concurrent.futures
  5 | 
  6 | 
  7 | class redis3Client():
  8 |     
  9 |     def __init__(
 10 |         self, 
 11 |         cache_name: str, 
 12 |         db: int = 0, 
 13 |         availability_zone: str = 'use1-az5',
 14 |         bucket_prefix: str = 'redis3',
 15 |         verbose: bool = False,
 16 |         **kwargs
 17 |         ):
 18 |         """
 19 |         Store inside the class the s3 client, the cache name, the db number
 20 |         that will be used for all the ops. Note that you can pass credentials
 21 |         to boto3 at init phase using kwargs when instantiating the class.
 22 |         
 23 |         You can also override the default bucket prefix by passing a different
 24 |         bucket_prefix.
 25 |         """
 26 |         init_start_time = time()
 27 |         self.bucket_prefix = bucket_prefix
 28 |         
 29 |         # setup basic class attributes and objects
 30 |         self._s3_client = boto3.client('s3', **kwargs)
 31 |         self.bucket_name = self._get_bucket_from_cache_name(
 32 |             availability_zone,
 33 |             cache_name
 34 |             )
 35 |         self.db = db
 36 |         self._cache_name = cache_name
 37 |         self._availability_zone = availability_zone
 38 |         self._verbose = verbose
 39 |         try:
 40 |             if verbose:
 41 |                 print("Trying to create bucket {} in AZ {}".format(self.bucket_name, self._availability_zone))
 42 |             
 43 |             r = self._s3_client.create_bucket(
 44 |                 Bucket=self.bucket_name,
 45 |                 CreateBucketConfiguration={
 46 |                     'Location': {
 47 |                         'Type': 'AvailabilityZone',
 48 |                         'Name': self._availability_zone
 49 |                     },
 50 |                     'Bucket': {
 51 |                         'DataRedundancy': 'SingleAvailabilityZone',
 52 |                         'Type': 'Directory'
 53 |                     }
 54 |                 },
 55 |             )
 56 |         except botocore.exceptions.ClientError as e:
 57 |             # if the bucket already exists, just use it
 58 |             if e.response['Error']['Code'] == "BucketAlreadyOwnedByYou":
 59 |                 if self._verbose:
 60 |                     print("Bucket {} already exists. Using it as cache".format(self.bucket_name))
 61 |             else:
 62 |                 raise e    
 63 |             
 64 |         if self._verbose:
 65 |             print("Init completed in {:.4f}s".format(time() - init_start_time))
 66 |             
 67 |         return None
 68 |     
 69 |     @property
 70 |     def db(self):
 71 |         """
 72 |         Return the db for the cache (i.e. this is a prefix in the bucket)
 73 |         """
 74 |         return self._db
 75 |     
 76 |     @db.setter
 77 |     def db(self, value):
 78 |         """
 79 |         Set the db for the cache (i.e. this is a prefix in the bucket)
 80 |         """
 81 |         try:
 82 |             self._db = int(value)
 83 |         except ValueError:
 84 |             print('db must be an integer or something that can be casted as such, got {}'.format(value))
 85 |             raise ValueError
 86 |     
 87 |     @property
 88 |     def bucket_name(self):
 89 |         """
 90 |         Return the name of the bucket used to back the cache
 91 |         """
 92 |         return self._bucket_name
 93 |     
 94 |     @bucket_name.setter
 95 |     def bucket_name(self, value):
 96 |         """
 97 |         Set the name of the bucket used to back the cache
 98 |         """
 99 |         self._bucket_name = value
100 |     
101 |     def _get_bucket_from_cache_name(self, availability_zone: str, cache_name: str):
102 |         """
103 |         Produce a distinct bucket name from the cache name supplied by the user.
104 |         
105 |         Note that we need to comply with the following naming rules:
106 |         
107 |         https://docs.aws.amazon.com/AmazonS3/latest/userguide/directory-bucket-naming-rules.html
108 |         """
109 |         return '{}-{}--{}--x-s3'.format(self.bucket_prefix, cache_name, availability_zone)
110 |     
111 |     def _get_object_key_from_key_name(self, key: str):
112 |         """
113 |         Make sure that the key is prefixed with the db number as 
114 |         a natural namespacing of the keys
115 |         """
116 |         return '{}/{}'.format(self.db, key)
117 |     
118 |     def set(self, key: str, value: str):
119 |         """
120 |         Redis SET equivalent: set a string value for a given string key.
121 |         
122 |         Note that if you want to store a JSON object, you need to serialize it
123 |         to a string first.
124 |         
125 |         Ref: https://redis.io/commands/set/
126 |         """
127 |         assert isinstance(value, str), "Expected value to be a string, got {}".format(type(value))
128 |         _key = self._get_object_key_from_key_name(key)
129 |         try:
130 |             r = self._s3_client.put_object(
131 |                 Bucket=self.bucket_name,
132 |                 Key=_key,
133 |                 Body=value
134 |                 )
135 |         except botocore.exceptions.ClientError as e:
136 |             if self._verbose:
137 |                 print("!!! Failed operation: error code {}".format(e.response['Error']['Code']))
138 |                 
139 |             raise e
140 |         # if put_object succeeded, return True    
141 |         return True
142 |     
143 |     def get(self, key: str):
144 |         """
145 |         Redis GET equivalent: get a string value for a given string key.
146 |         
147 |         It returns None if the key doesn't exist.
148 |         
149 |         Ref: https://redis.io/commands/get/
150 |         
151 |         """
152 |         _key = self._get_object_key_from_key_name(key)
153 |         try:
154 |             r = self._s3_client.get_object(
155 |                 Bucket=self.bucket_name,
156 |                 Key=_key,
157 |                 )
158 |             # if get_object succeeded, return the value
159 |             if self._verbose:
160 |                 print("{} last modified on {}".format(_key, r['LastModified']))
161 |             
162 |             return r['Body'].read().decode('utf-8')
163 |         except botocore.exceptions.ClientError as e:
164 |             # this is where we handle the case where the key doesn't exist
165 |             if e.response['Error']['Code'] == "NoSuchKey":
166 |                 return None
167 |             if self._verbose:
168 |                 print("!!! Failed operation: error code {}".format(e.response['Error']['Code']))
169 |                 
170 |             raise e
171 |         
172 |     def mset(self, keys: list, values: list):
173 |         """
174 |         Set multiple keys to multiple values. 
175 |         Note that it's a threaded execution of set() for each key, so the return value
176 |         can be True (success) or the command may fail if any error occurs.
177 |         
178 |         Note that this is not an atomic operation and there is now way to know
179 |         which keys existed and which didn't.
180 |         
181 |         Ref: https://redis.io/commands/mset/
182 |         """
183 |                 
184 |         results = []
185 |         with concurrent.futures.ThreadPoolExecutor() as executor:
186 |             futures = {}
187 |             for ctr, (k, v) in enumerate(zip(keys, values)):
188 |                 futures[executor.submit(self.set, key=k, value=v)] = ctr
189 |             for future in concurrent.futures.as_completed(futures):
190 |                 try:
191 |                     results.append((future.result(), futures[future]))
192 |                 except Exception as ex:
193 |                     raise ex
194 |                 
195 |         results, _ = zip(*sorted(results, key=lambda x: x[1]))
196 |                 
197 |         return list(results)
198 |         
199 |     def mget(self, keys: list):
200 |         """
201 |         Return the values associated with the specified keys.
202 |         Note that it's a threaded execution of get() for each key, so the return value
203 |         can be a string (success), a None (no key found) or the command may fail if 
204 |         any error occurs.
205 |         
206 |         Note that this is not an atomic operation.
207 |         
208 |         Ref: https://redis.io/commands/mget/
209 |         """
210 |         values = []
211 |         with concurrent.futures.ThreadPoolExecutor() as executor:
212 |             futures = {}
213 |             for ctr, k in enumerate(keys):
214 |                 futures[executor.submit(self.get, key=k)] = ctr
215 |             for future in concurrent.futures.as_completed(futures):
216 |                 try:
217 |                     values.append((future.result(), futures[future]))
218 |                 except Exception as ex:
219 |                     raise ex
220 |                 
221 |         values, _ = zip(*sorted(values, key=lambda x: x[1]))
222 |                 
223 |         return list(values)
224 |     
225 |     def keys(self, starts_with=None):
226 |         """
227 |         Return all the keys matching the specified pattern in the current db, modeled
228 |         after the Redis "KEYS pattern" command (usual caveat on atomicity 
229 |         applies).
230 |         
231 |         This is a generator function, so you can use it like:
232 |         
233 |         for key in my_client.keys():
234 |             print(key)
235 |         
236 |         Ref: https://redis.io/commands/keys/
237 |         """
238 |         
239 |         return self._get_matching_s3_keys(
240 |             self.bucket_name, 
241 |             # for express, only prefixes that end in a delimiter ( /) are supported.
242 |             '{}/'.format(self.db), 
243 |             starts_with
244 |             )
245 | 
246 |     def _get_matching_s3_keys(self, bucket, prefix, pattern):
247 |         """
248 |         Code gently inspired by: https://alexwlchan.net/2017/listing-s3-keys/
249 |         """
250 |         kwargs = {'Bucket': bucket}
251 |         if prefix:
252 |             kwargs['Prefix'] = prefix
253 |         while True:
254 |             resp = self._s3_client.list_objects_v2(**kwargs)
255 |             for obj in resp['Contents']:
256 |                 key = obj['Key']
257 |                 # we want to make sure keys start with the prefix (i.e. the db number)
258 |                 assert key.startswith(prefix)
259 |                 # if no pattern is specified or the key starts with the pattern
260 |                 if pattern is None or key.startswith(pattern):
261 |                     yield key[len(prefix):]
262 | 
263 |             # The S3 API is paginated, so we pass the continuation token into the next response
264 |             try:
265 |                 kwargs['ContinuationToken'] = resp['NextContinuationToken']
266 |             except KeyError:
267 |                 break
268 |             
269 |     def delete(self, key: str):
270 |         """
271 |         Delete a key in the current database (a non-existent key gets ignored
272 |         as the AWS boto client won't raise any error). We use "delete" to avoid confliucts
273 |         with the Python keyword "del".
274 | 
275 |         Ref: https://redis.io/commands/del/
276 |         """
277 |         _key = self._get_object_key_from_key_name(key)
278 |         r = self._s3_client.delete_object(
279 |                 Bucket=self.bucket_name,
280 |                 Key=_key,
281 |                 )
282 |             
283 |         return True
284 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | boto3==1.33.2
2 | tqdm==4.66.1


--------------------------------------------------------------------------------
/serverless/app.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 
  3 | Simple lambda function to test redis3 and s3 performance when running compute next to storage.
  4 | All of this is hacky / manual / one-off testing, but it's a start to get a more realistic
  5 | sense of potential performance gains.
  6 | 
  7 | Note that for this to work you need to make the AWS lambda role (created by serverless)
  8 | aware of the bucket that is underlying the redis3 cache.
  9 | 
 10 | S3 express policies are a drag, so beware:
 11 | 
 12 | https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-express-security-iam-identity-policies.html
 13 | 
 14 | Note that to make it work, I had to actually copy the ARN for the bucket from the s3 console.
 15 | 
 16 | Finally the AWS lambda role should also have access to buckets with the pattern used by normal
 17 | s3 client to do the comparison, e.g.:
 18 | 
 19 | bucket_name = "redis3-test-{}".format(uuid.uuid4())
 20 | 
 21 | Note 2: we included a redis client in the lambda function to test the performance of 
 22 | actual Redis on Redis Lab in us-east-1: this is done for the purpose of the comparison
 23 | in the Medium blog post, so treat the code as a throw-away example (you will need to provide your own
 24 | credentials for Redis and spin up your own free Redis instance on Redis Labs).
 25 | 
 26 | """
 27 | 
 28 | 
 29 | import time
 30 | import uuid
 31 | import boto3
 32 | import json
 33 | from redis3 import redis3Client
 34 | import redis
 35 | from statistics import mean, median
 36 | 
 37 | 
 38 | # default number of keys to try to set / get
 39 | DEFAULT_K = 50
 40 | # check that the version of boto3 supports the s3 express feature
 41 | print("Boto3 version: {}".format(boto3.__version__))
 42 | 
 43 | 
 44 | def wrap_response(body):
 45 |     """
 46 |     Just make sure the response is in the right format given this is a HTTP GET
 47 |     """
 48 |     return {
 49 |         "statusCode": 200,
 50 |         "body": json.dumps(body),
 51 |         "headers": {
 52 |             "Content-Type": "application/json"
 53 |         }
 54 |     }
 55 | 
 56 | 
 57 | def run_redis_tests(
 58 |     key_list: list,
 59 |     val_list: list
 60 | ):
 61 |     my_client = redis.Redis(
 62 |         host='redis-xxx.cloud.redislabs.com', 
 63 |         password='mypwd',
 64 |         port=14665, 
 65 |         db=0)
 66 |     
 67 |     set_times = []
 68 |     for k, v in zip(key_list, val_list):
 69 |         s_set_time = time.time()
 70 |         r = my_client.set(k, v)
 71 |         set_times.append(time.time() - s_set_time)
 72 |         
 73 |     get_times = []
 74 |     for k, v in zip(key_list, val_list):
 75 |         s_get_time = time.time()
 76 |         r = my_client.get(k)
 77 |         get_times.append(time.time() - s_get_time) 
 78 |     
 79 |     return get_times, set_times
 80 |     
 81 | 
 82 | def run_redis3_many_keys_tests(
 83 |     cache_name: str,
 84 |     key_list: list,
 85 |     val_list: list
 86 | ):
 87 |     my_client = redis3Client(cache_name=cache_name, db=0, verbose=False)
 88 |     
 89 |     set_times = []
 90 |     get_times = []
 91 |     # do it few times to get a sense of the performance
 92 |     for _ in range(3):
 93 |         s_set_time = time.time()
 94 |         r = my_client.mset(key_list, val_list)
 95 |         set_times.append(time.time() - s_set_time)
 96 |         s_get_time = time.time()
 97 |         r = my_client.mget(key_list)
 98 |         get_times.append(time.time() - s_get_time)
 99 |         assert r == val_list, "Expected {}, got {}".format(val_list, r)
100 |     
101 |     return get_times, set_times
102 | 
103 | 
104 | def run_redis3_tests(
105 |     cache_name: str,
106 |     key_list: list,
107 |     val_list: list
108 | ):
109 |     my_client = redis3Client(cache_name=cache_name, db=0, verbose=False)
110 |     
111 |     set_times = []
112 |     for k, v in zip(key_list, val_list):
113 |         s_set_time = time.time()
114 |         r = my_client.set(k, v)
115 |         set_times.append(time.time() - s_set_time)
116 |         
117 |     get_times = []
118 |     for k, v in zip(key_list, val_list):
119 |         s_get_time = time.time()
120 |         r = my_client.get(k)
121 |         get_times.append(time.time() - s_get_time)
122 |         assert r == v, "Expected {}, got {}".format(v, r)    
123 |     
124 |     return get_times, set_times
125 | 
126 | 
127 | def run_s3_tests(
128 |     key_list: list,
129 |     val_list: list
130 | ):
131 |     # we assume the lambda role has access to buckets with the name redis3-test-*
132 |     s3_client = boto3.client('s3')
133 |     # create a bucket with a temp name
134 |     bucket_name = "redis3-test-{}".format(uuid.uuid4())
135 |     s3_client.create_bucket(Bucket=bucket_name)
136 |     # set all keys + values
137 |     set_times = []
138 |     for i in range(len(key_list)):
139 |         start = time.time()
140 |         s3_client.put_object(Bucket=bucket_name, Key=key_list[i], Body=val_list[i])
141 |         set_times.append(time.time() - start)
142 |     # read them back
143 |     get_times = []
144 |     for i in range(len(key_list)):
145 |         start = time.time()
146 |         obj = s3_client.get_object(Bucket=bucket_name, Key=key_list[i])
147 |         v = obj['Body'].read().decode('utf-8')
148 |         get_times.append(time.time() - start)
149 |         assert v == val_list[i], "Expected {}, got {}".format(val_list[i], v)
150 |         
151 |     # loop over all keys and delete them (otherwise we can't delete the bucket)
152 |     for key in key_list:
153 |         s3_client.delete_object(Bucket=bucket_name, Key=key)
154 |     
155 |     # delete the bucket
156 |     s3_client.delete_bucket(Bucket=bucket_name)
157 |         
158 |     return get_times, set_times
159 | 
160 | 
161 | def lambda_handler(event, context):
162 |     """
163 |     
164 |     Simple lambda function to test redis3 and s3 performance.
165 |     
166 |     No error checking, no fancy stuff, just throw-away code to get some get / set 
167 |     performance numbers.
168 |     
169 |     """
170 |     start = time.time()
171 |     data = {}
172 |     # debug
173 |     print(event)
174 |     query_args = event.get('queryStringParameters', None)
175 |     # set a default input_k 
176 |     cnt_k = query_args['k'] if query_args and 'k' in query_args else DEFAULT_K
177 |     key_list = ['playground_{}'.format(i) for i in range(cnt_k)]
178 |     val_list = ['bar_{}'.format(i) for i in range(cnt_k)]
179 |     # set a default cache name
180 |     cache_name = query_args['cache'] if query_args and 'cache' in query_args else 'mytestcache'
181 |     get_times, set_times = run_redis3_tests(cache_name, key_list, val_list)
182 |     # add some stats to the data object we return
183 |     data['set_times'] = set_times
184 |     data['set_time_mean'] = mean(set_times)
185 |     data['set_time_median'] = median(set_times)
186 |     data['get_times'] = get_times
187 |     data['get_time_mean'] = mean(get_times)
188 |     data['get_time_median'] = median(get_times)
189 |     # run some basic ops in s3
190 |     get_times, set_times = run_s3_tests(key_list, val_list)
191 |     # add some stats to the data object we return
192 |     data['set_times_s3'] = set_times
193 |     data['set_time_mean_s3'] = mean(set_times)
194 |     data['set_time_median_s3'] = median(set_times)
195 |     data['get_times_s3'] = get_times
196 |     data['get_time_mean_s3'] = mean(get_times)
197 |     data['get_time_median_s3'] = median(get_times)
198 |     # run some basic ops in s3
199 |     get_times, set_times = run_redis_tests(key_list, val_list)
200 |     # add some stats to the data object we return
201 |     data['set_times_redis'] = set_times
202 |     data['set_time_mean_redis'] = mean(set_times)
203 |     data['set_time_median_redis'] = median(set_times)
204 |     data['get_times_redis'] = get_times
205 |     data['get_time_mean_redis'] = mean(get_times)
206 |     data['get_time_median_redis'] = median(get_times)
207 |     # finally test the redis3 client with many keys at once
208 |     get_times, set_times = run_redis3_many_keys_tests(cache_name, key_list, val_list)
209 |     data['set_times_many'] = set_times
210 |     data['set_time_mean_many'] = mean(set_times)
211 |     data['set_time_median_many'] = median(set_times)
212 |     data['get_times_many'] = get_times
213 |     data['get_time_mean_many'] = mean(get_times)
214 |     data['get_time_median_many'] = median(get_times)
215 |       
216 |     body = {
217 |         "metadata": {
218 |             "timeMs": int((time.time() - start) * 1000.0),
219 |             "epochMs": int(time.time() * 1000),
220 |             "eventId": str(uuid.uuid4()),
221 |             'inputK': cnt_k,
222 |         },
223 |         "data": data
224 |     }
225 | 
226 |     return wrap_response(body)


--------------------------------------------------------------------------------
/serverless/serverless.yml:
--------------------------------------------------------------------------------
 1 | service: redis3-lambda-performance
 2 | 
 3 | provider:
 4 |   name: aws
 5 |   timeout: 30
 6 |   architecture: arm64
 7 |   ecr:
 8 |     images:
 9 |       redis3test:
10 |         path: ../
11 |         platform: linux/arm64
12 | 
13 | 
14 | functions:
15 |   myredis3test:
16 |     image:
17 |       name: redis3test
18 |     events:
19 |       - http:
20 |           path: test
21 |           method: get
22 | 
23 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """The setup script."""
 4 | 
 5 | from setuptools import find_packages, setup
 6 | 
 7 | with open("README.md") as readme_file:
 8 |     readme = readme_file.read()
 9 | 
10 | with open("requirements.txt") as f:
11 |     requirements = f.read().splitlines()
12 | 
13 | setup(
14 | 
15 |     author="redis3",
16 |     author_email='jacopo.tagliabue@bauplanlabs.com',
17 |     python_requires='>=3.9',
18 |     classifiers=[
19 |         "Development Status :: 2 - Pre-Alpha",
20 |         "Intended Audience :: Developers",
21 |         "License :: OSI Approved :: MIT License",
22 |         "Natural Language :: English",
23 |         "Programming Language :: Python :: 3.9",
24 |         "Programming Language :: Python :: 3.10",
25 |         "Programming Language :: Python :: 3.11",
26 |     ],
27 |     description="redis3",
28 |     install_requires=requirements,
29 |     license="MIT license",
30 |     long_description=readme,
31 |     long_description_content_type="text/x-rst",
32 |     include_package_data=True,
33 |     keywords="reclist",
34 |     name="reclist",
35 |     packages=find_packages(include=["redis3", "redis3.*"]),
36 | 
37 |     url='https://github.com/BauplanLabs/redis3',
38 |     version='0.0.2',
39 |     zip_safe=False,
40 |     extras_require={},
41 | )


--------------------------------------------------------------------------------
/src/playground.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 
  3 | This is a playground script that shows how to use the redis3Client class
  4 | by performing some basic operations on the "cache".
  5 | 
  6 | To cover edge cases and benchmarking, you can inspect and run run_tests.py
  7 | instead.
  8 | 
  9 | Note that redis3 assumes your interpreter can run:
 10 | 
 11 | s3_client = boto3.client('s3')
 12 | 
 13 | (and all the other boto3 calls in redis3.py) either through a local AWS credentials file, ENVs etc. 
 14 | (you can also modify this script to pass credentials to boto3 using kwargs for redis3Client).
 15 | 
 16 | For reference on AWS credentials and boto3, check this: 
 17 | https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html
 18 | 
 19 | """
 20 | 
 21 | from redis3.redis3 import redis3Client
 22 | from datetime import datetime
 23 | from utils import measure_func
 24 | import json
 25 | 
 26 | 
 27 | @measure_func
 28 | def set_key_with_timing(client):
 29 |     return client.set('foo', 'bar')
 30 | 
 31 | 
 32 | @measure_func
 33 | def get_key_with_timing(client):
 34 |     return client.get('foo')
 35 | 
 36 | 
 37 | @measure_func
 38 | def set_keys_with_timing(client, size=50):
 39 |     _list = ['playground_{}'.format(i) for i in range(size)]
 40 |     return client.mset(_list, _list)
 41 | 
 42 | 
 43 | @measure_func
 44 | def get_keys_with_timing(client, size=50):
 45 |     key_list = ['playground_{}'.format(i) for i in range(size)]
 46 |     return client.mget(key_list)
 47 | 
 48 | 
 49 | def run_playground(
 50 |     cache_name: str
 51 | ):
 52 |     # say hi
 53 |     print("Started playground at {}\n".format(datetime.now()))
 54 |     
 55 |     # first, instantiate redis3Client and check all is well
 56 |     # we set verbose to True to see what's going on under the hood as this
 57 |     # a playground script
 58 |     my_client = redis3Client(cache_name=cache_name, db=0, verbose=True)
 59 |     # for debugging purposes, print the name of the bucket used to back the cache
 60 |     print("Using bucket {} as cache".format(my_client.bucket_name))
 61 |     
 62 |     # now, some basic ops
 63 |     
 64 |     # set a key and get it back
 65 |     r = my_client.set('foo', 'bar')
 66 |     print(r)
 67 |     assert r is True, "Expected True, got {}".format(r)
 68 |     r = my_client.get('foo')
 69 |     print(r) 
 70 |     # overwrite the key and get it back
 71 |     r = my_client.set('foo', 'bar2')
 72 |     # store something more complex, as long as you can serialize it to a string
 73 |     # e.g. dump it to a JSON string
 74 |     my_obj = { 'k_{}'.format(i): 'v_{}'.format(i) for i in range(5) }
 75 |     r = my_client.set('foo_dic', json.dumps(my_obj))
 76 |     r = json.loads(my_client.get('foo_dic'))
 77 |     print("Json keys: {}".format(list(r.keys())))
 78 |     # get a key that doesn't exist
 79 |     r = my_client.get('baz')
 80 |     assert r is None, "Expected None, got {}".format(r)
 81 |     # set a list of keys and get them back in one go
 82 |     key_list = ['playground_{}'.format(i) for i in range(5)]
 83 |     val_list = ['bar_{}'.format(i) for i in range(5)]
 84 |     r = my_client.mset(key_list, val_list)
 85 |     val_list_back = my_client.mget(key_list)  
 86 |     print("Got back {} values".format(len(val_list_back)))
 87 |     # use the keys command to get all keys in the cache
 88 |     all_keys_in_db = list([k for k in my_client.keys()])
 89 |     print("Found {} keys in cache, first three: {}".format(len(all_keys_in_db), all_keys_in_db[:3]))
 90 |     # delete one
 91 |     r = my_client.delete(all_keys_in_db[0])
 92 |     # finally, do the same ops, wrapped in a timing decorator
 93 |     # to avoid spamming the console, we 'manually' toggle verbose off
 94 |     my_client._verbose = False
 95 |     
 96 |     r = set_key_with_timing(my_client)
 97 |     r = get_key_with_timing(my_client)
 98 |     r = get_keys_with_timing(my_client)
 99 |     r = set_keys_with_timing(my_client)
100 |     
101 |     # how does the many ops scale with more keys?
102 |     for i in [25, 50, 100, 500, 1000]:
103 |         print("\nRunning ops with {} keys".format(i))
104 |         r = set_keys_with_timing(my_client, size=i)
105 |         r = get_keys_with_timing(my_client, size=i)
106 |     
107 |     # say bye
108 |     print("\nFinished playground at {}. See you, s3ace cowboy".format(datetime.now()))
109 |     return
110 | 
111 | 
112 | if __name__ == "__main__":
113 |     import sys 
114 |     # make sure we have a cache name
115 |     assert len(sys.argv) == 2, "Please provide a cache name"
116 |     cache_name = sys.argv[1]
117 |     run_playground(cache_name=cache_name)


--------------------------------------------------------------------------------
/src/run_tests.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 
  3 | Simple script to run some tests on the cache: some functional tests, and some performance tests,
  4 | in which we compare standard S3 buckets with the express bucket.
  5 | 
  6 | """
  7 | 
  8 | import boto3
  9 | import uuid
 10 | from time import time
 11 | from statistics import median, mean
 12 | from tqdm import tqdm
 13 | from datetime import datetime
 14 | from redis3.redis3 import redis3Client
 15 | import math
 16 | import json
 17 | import uuid
 18 | 
 19 | 
 20 | def print_test_info(
 21 |     timing_list: list,
 22 |     target_percentile: int = 95
 23 | ):
 24 |     def percentile(input, q):
 25 |         """
 26 |         I don't want to import numpy just for this
 27 |         """
 28 |         data_sorted = sorted(input)
 29 |         
 30 |         return data_sorted[math.ceil(q / 100 * len(data_sorted))]
 31 | 
 32 |     print("Average time: {}".format(mean(timing_list)))
 33 |     print("Median time: {}".format(median(timing_list)))
 34 |     print("95th percentile time: {}".format(percentile(timing_list, target_percentile)))
 35 |     return
 36 | 
 37 | 
 38 | def run_normal_bucket_tests(
 39 |     test_keys: list, # list of keys to set / get
 40 |     test_values: list,
 41 |     **kwargs
 42 | ):
 43 |     print("\nStart of testing standard buckets at {}\n".format(datetime.now()))
 44 |     # start a client
 45 |     s3_client = boto3.client('s3', **kwargs)
 46 |     # create a bucket with a temp name
 47 |     # bucket creation is not part of the benchmark
 48 |     bucket_name = "redis3-test-{}".format(uuid.uuid4())
 49 |     s3_client.create_bucket(Bucket=bucket_name)
 50 |     # set all keys + values
 51 |     set_times = []
 52 |     for i in tqdm(range(len(test_keys))):
 53 |         start = time()
 54 |         s3_client.put_object(Bucket=bucket_name, Key=test_keys[i], Body=test_values[i])
 55 |         set_times.append(time() - start)
 56 |     # print out average and median set times
 57 |     print_test_info(set_times, target_percentile=95)
 58 |     # read them back
 59 |     get_times = []
 60 |     for i in tqdm(range(len(test_keys))):
 61 |         start = time()
 62 |         obj = s3_client.get_object(Bucket=bucket_name, Key=test_keys[i])
 63 |         v = obj['Body'].read().decode('utf-8')
 64 |         get_times.append(time() - start)
 65 |         # check we get the right value back!
 66 |         assert v == test_values[i], "Expected {}, got {}".format(test_values[i], v)
 67 |     # print out average and median set times
 68 |     print_test_info(get_times, target_percentile=95)
 69 |     
 70 |     # loop over all keys and delete them (otherwise we can't delete the bucket)
 71 |     for test_key in tqdm(test_keys):
 72 |         # TODO: we should totally parallelize this
 73 |         s3_client.delete_object(Bucket=bucket_name, Key=test_key)
 74 |     
 75 |     # delete the bucket
 76 |     s3_client.delete_bucket(Bucket=bucket_name)
 77 |     
 78 |     print("\nEnd of testing standard buckets at {}\n".format(datetime.now()))
 79 |     
 80 |     return
 81 |     
 82 | 
 83 | def run_cache_tests(
 84 |     test_keys: list, # list of keys to set / get
 85 |     test_values: list,
 86 |     cache_name: str, # name of the cache to use
 87 |     **kwargs
 88 | ):
 89 |     print("\nStart of testing the cache at {}\n".format(datetime.now()))
 90 |     my_client = redis3Client(cache_name=cache_name, db=0, verbose=False, **kwargs)
 91 |     # set all keys + values
 92 |     set_times = []
 93 |     for i in tqdm(range(len(test_keys))):
 94 |         start = time()
 95 |         my_client.set(test_keys[i], test_values[i])
 96 |         set_times.append(time() - start)
 97 |     # print out average and median set times
 98 |     print_test_info(set_times, target_percentile=95)
 99 |     # read them back
100 |     get_times = []
101 |     for i in tqdm(range(len(test_keys))):
102 |         start = time()
103 |         v = my_client.get(test_keys[i])
104 |         get_times.append(time() - start)
105 |         # check we get the right value back!
106 |         assert v == test_values[i], "Expected {}, got {}".format(test_values[i], v)
107 |     # print out average and median set times
108 |     print_test_info(get_times, target_percentile=95)
109 | 
110 |     # end
111 |     print("\nEnd of testing the cache at {}\n".format(datetime.now()))
112 |     
113 |     return 
114 | 
115 | def run_functional_tests(
116 |     cache_name: str, # name of the cache to use
117 |     **kwargs
118 | ):
119 |     my_client = redis3Client(cache_name=cache_name, db=0, verbose=False, **kwargs)
120 | 
121 |     # set a key and get it back
122 |     r = my_client.set('foo', 'bar')
123 |     assert r is True, "Expected True, got {}".format(r)
124 |     r = my_client.get('foo')
125 |     assert r == 'bar', "Expected 'bar', got {}".format(r)
126 |     assert isinstance(r, str), "Expected a string, got {}".format(type(r))
127 |     # overwrite the key and get it back
128 |     r = my_client.set('foo', 'bar2')
129 |     assert my_client.get('foo') == 'bar2', "Expected 'bar2', got {}".format(r)
130 |     # store something more complex, as long as you can serialize it to a string
131 |     my_obj = { 'k_{}'.format(i): 'v_{}'.format(i) for i in range(5) }
132 |     r = my_client.set('foo_dic', json.dumps(my_obj))
133 |     r = json.loads(my_client.get('foo_dic'))
134 |     assert r['k_0'] == 'v_0', "Expected 'v_0', got {}".format(r['k_0'])
135 |     # get a key that doesn't exist by randomly picking a uuid
136 |     r = my_client.get(str(uuid.uuid4()))
137 |     assert r is None, "Expected None, got {}".format(r)
138 |     # set a list of keys and get them back in one go
139 |     key_list = ['playground_{}'.format(i) for i in range(5)]
140 |     val_list = ['bar_{}'.format(i) for i in range(5)]
141 |     r = my_client.mset(key_list, val_list)
142 |     assert all(r), "Expected all True, got {}".format(r)
143 |     val_list_back = my_client.mget(key_list)    
144 |     assert val_list_back == val_list, "Expected {}, got {}".format(val_list, val_list_back)
145 |     # use the keys command to get all keys in the cache
146 |     all_keys_in_db = list([k for k in my_client.keys()])
147 |     print("Found {} keys in cache, first three: {}".format(len(all_keys_in_db), all_keys_in_db[:3]))
148 |     # delete one
149 |     r = my_client.delete(all_keys_in_db[0])
150 |     assert r is True, "Expected True, got {}".format(r)
151 |     # delete one that does not exist by getting a random string
152 |     # it should be ignored and get True back again
153 |     r = my_client.delete(str(uuid.uuid4()))
154 |     assert r is True, "Expected True, got {}".format(r)
155 |     # switch to a different bucket by passing a non-int (should get an error)
156 |     try:
157 |         my_client.db = 'ciao'
158 |     except ValueError:
159 |         pass
160 |     # now switch db for real to a magic number
161 |     my_client.db = "100"
162 |     # set a key and list all keys in the cache (should be only one)
163 |     my_client.set('foo_100', 'bar_100')
164 |     all_keys_in_db = list([k for k in my_client.keys()])
165 |     assert len(all_keys_in_db) == 1, "Expected 1 key, got {}".format(len(all_keys_in_db))
166 |     assert all_keys_in_db[0] == 'foo_100', "Expected 'foo_100', got {}".format(all_keys_in_db[0])
167 |     # finally delete the key and check it's gone
168 |     r = my_client.delete('foo_100')
169 |     # do it twice, nothings should happen
170 |     r = my_client.delete('foo_100')
171 |     # now, try to get it back, it should return None
172 |     r = my_client.get('foo_100')
173 |     assert r is None, "Expected None, got {}".format(r)
174 |     # end
175 |     print("\nEnd of functional tests {}\n".format(datetime.now()))
176 |     
177 |     return
178 | 
179 | def run_tests(
180 |     cache_name: str, # name of the cache to use
181 |     k: int, # number of keys to set / get during tests
182 |     **kwargs
183 | ):
184 |     print("Started testing at {}\n".format(datetime.now()))
185 |     # first, run some functional cache tests
186 |     run_functional_tests(cache_name, **kwargs)
187 |     # if nothing fails, create a list of keys and values for perf. testing
188 |     test_keys = ['foo_{}'.format(i) for i in range(k)]
189 |     test_values = ['bar_{}'.format(i) for i in range(k)]
190 |     # test performance of a normal bucket
191 |     run_normal_bucket_tests(test_keys, test_values, **kwargs)
192 |     # test performance of the cache
193 |     run_cache_tests(test_keys, test_values, cache_name, **kwargs)
194 |     print("\n====> Now running the tests again with 5x keys and values <====\n")
195 |     test_keys = ['foo_{}'.format(i) for i in range(k * 5)]
196 |     test_values = ['bar_{}'.format(i) for i in range(k * 5)]
197 |     run_normal_bucket_tests(test_keys, test_values, **kwargs)
198 |     run_cache_tests(test_keys, test_values, cache_name, **kwargs)
199 | 
200 |     print("\nFinished testing at {}. See you, s3ace cowboy".format(datetime.now()))
201 |     return
202 | 
203 | 
204 | if __name__ == "__main__":
205 |     import sys 
206 |     # make sure we have a cache name
207 |     assert len(sys.argv) == 2, "Please provide a cache name"
208 |     cache_name = sys.argv[1]
209 |     # note that k < 100 will create a problem with the percentile function
210 |     run_tests(cache_name, k=100)
211 |     
212 |     # note that you can provide AWS crednetials through the credential file in the machine,
213 |     # or through env variables or as kwargs, just as you would do with any instance
214 |     # of boto3.client: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html
215 |     #aws_client_auth = { "aws_access_key_id": "", "aws_secret_access_key": "" }
216 |     #run_tests(cache_name, k=100, **aws_client_auth)
217 |     
218 |    


--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
 1 | from time import time
 2 | 
 3 | 
 4 | def measure_func(func):
 5 |     # this wrapper shows the execution time of the function object passed
 6 |     def wrap_func(*args, **kwargs):
 7 |         t1 = time()
 8 |         result = func(*args, **kwargs)
 9 |         t2 = time()
10 |         result_to_print = result if not isinstance(result, list) else result[:2]
11 |         print(f'{func.__name__!r} executed in {(t2-t1):.4f}s, with result: {result_to_print}')
12 |         return result
13 |     return wrap_func


--------------------------------------------------------------------------------