├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── redis3 ├── __init__.py └── redis3.py ├── requirements.txt ├── serverless ├── app.py └── serverless.yml ├── setup.py └── src ├── playground.py ├── run_tests.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | .serverless/ 163 | 164 | node_modules/ 165 | 166 | *.json -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/lambda/python:3.10 2 | 3 | # install the new, updated boto3 to leverage the new buckets 4 | RUN pip3 install --upgrade pip && pip3 install boto3==1.33.2 --target "${LAMBDA_TASK_ROOT}" && pip3 install redis==5.0.1 --target "${LAMBDA_TASK_ROOT}" 5 | 6 | COPY redis3/redis3.py ${LAMBDA_TASK_ROOT} 7 | COPY serverless/app.py ${LAMBDA_TASK_ROOT} 8 | 9 | # Set the CMD to the lambda handler 10 | CMD [ "app.lambda_handler" ] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Bauplan Labs 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # redis3 2 | 3 | A one-afternoon implementation of (a small set of) redis-like primitives with s3 Express. 4 | 5 | ## Overview 6 | 7 | AWS recently announced [s3 Express](https://aws.amazon.com/it/s3/storage-classes/express-one-zone/), a new type of s3 buckets tailored for low-latency access targeting (especially) small files, frequently accessed. In AWS own [words](https://aws.amazon.com/it/blogs/aws/new-amazon-s3-express-one-zone-high-performance-storage-class/): "This new storage class can handle objects of any size, but is especially awesome for smaller objects. (...) Because of s3 Express One Zone’s consistent very low latency, small objects can be read up to 10x faster compared to s3 Standard." 8 | 9 | Even the pricing is geared towards small objects: "you pay $0.16/GB/month in the US East (N. Virginia) Region (...) and an additional per-GB fee for the portion of any request that exceeds 512 KB." 10 | 11 | One of the primary (even if far from the only) use cases for things like Redis is consistent performance for key-value queries: you set 'bar' for the key 'foo', and then you retrieve it by asking 'foo' later. While Redis (and other key-value stores) are indubitably faster than s3 (even in its new express version), they are incredibly more costly: these are some prices (in US East) as of Dec 2023. 12 | 13 | | Service | Monthly Cost ($) | 14 | | ------------- | ------------- | 15 | | S3 Express (1 GB) | 0.16 | 16 | | cache.t4g.micro (0.5 GB) | 11.52 (0.016 / hour) | 17 | | cache.t4g.small (1.37 GB) | 23.04 (0.032 / hour) | 18 | 19 | In this small experiment we set out to investigate which type of performance / cost trade-off is now unlocked thanks to the new s3 Express option: since we mostly care about key-value queries for "small objects", can we build a redis-like client entirely backed by s3 Express? 20 | 21 | ## 1-min hello-world 22 | 23 | If you have [AWS credentials](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html) set up so that you can run arbitrary s3 and s3 express stuff on your account, you can run this in one minute, by creating a virtual env and cd-ing into `src`: 24 | 25 | ```shell 26 | python3 -m venv venv 27 | source venv/bin/activate 28 | pip install . 29 | python 30 | ``` 31 | 32 | In the Python REPL, you can now do (compare to [redis-py](https://github.com/redis/redis-py)): 33 | 34 | ```shell 35 | >>> from redis3.redis3 import redis3Client 36 | >>> r = redis3Client(cache_name='mytestcache', db=0) 37 | >>> r.set('foo', 'bar') 38 | True 39 | >>> r.get('foo') 40 | 'bar' 41 | ``` 42 | 43 | Note that: 44 | 45 | * `my-cache-name` will be used (together with an availability zone, default to `use1-az5` as it assumes you are stuck with `us-east-1` like the rest of us) to produce a bucket like `redis3-mytestcache--use1-az5--x-s3`, which needs to be unique in the region as per s3 naming rules; 46 | * `redis3Client` uses the boto3 client behind the scenes, so the usual authentication rules apply (credential file, environment variables or passing `aws_access_key_id` and the like as `**kwargs`). 47 | 48 | If you want to see more ops, you can run `playground.py` with your own `my-cache-name` as argument: 49 | 50 | ```shell 51 | cd src 52 | python playground.py my-cache-name 53 | ``` 54 | 55 | ## 5-min explanation 56 | 57 | Sometimes we want a full-fledged NoSQL store (no shortage of that!), sometimes we just want to set some value somewhere, possibly namespaced in some way, and get it back at a later time. Object storage like s3 was never fast and reliable enough in first byte latency to be an actual contender, until the release of s3 Express, which, for key-value type of queries, proposes a novel price/latency trade-off compared to more traditional solutions (Redis, dynamo etc.). 58 | 59 | `redis3` is a 100 LOC (or whatever) class that puts together a redis-py interface to s3 Express, easy to be used as a slowish, but infinite and cheap cache (no worries about provisioning a larger instance, or evicting keys); `redis3` now implements GET and SET, namespaced by a database integer (Redis-like), plus few other commands, such as a version of MGET and MSET "suited" to object storage - i.e. it cannot be an atomic operation, but it runs in parallel through a thread pool, allowing to SET / GET many values with one command relatively fast (from my local machine - a pretty decent Mac in US East -, getting 25 keys with MGET takes 0.1286s, 50 takes 0.1362s and 100 takes 0.1960s). When instantiating the client (e.g. `redis3Client(cache_name='mytestcache', db=0)`) you can specify a `db` as a namespacing device, exactly as it happens in Redis (there is no limitation to `16` for the number of db of course). 60 | 61 | | Redis Command | redis3 Command | Intended Semantics | 62 | | ------------- | ------------- | ------------- | 63 | | GET | `get(key)` | get the value from a string key | 64 | | SET | `set(key, value)` | set a string value for a key | 65 | | MGET | `mget(keys)` | get multiple keys in parallel | 66 | | MSET | `mset(keys, values)` | set multiple values for keys in parallel | 67 | | KEYS | `keys(starts_with)` | list all keys in the current db | 68 | | DEL | `delete(key)` | delete the key (no error is thrown if key does not exist) | 69 | 70 | Note that redis (which, btw, runs single-threaded in-memory for a reason) can offer not only 316136913 more commands, but also atomicity guarantees (INCR, WATCH, etc.) that object storage cannot (s3 offers however [strong read-after-write consistency](https://aws.amazon.com/it/s3/consistency/): after a successful write of a new object, any subsequent read - including listin keys - request receives the latest version of the object). On the other hand, a s3-backed cache can offer more concurrent troughput at no additional effort, a truly "serverless experience" and a "thin client" which falls back on standard AWS libraries, inheriting automatically all security policies you can think of (e.g. since "db" in redis3 are just folder in an express bucket, access can controlled at that level by leveraging the usual IAM magic). 71 | 72 | ## Running some tests 73 | 74 | Some more (horribly repetitive) code to test the difference between s3 express and normal s3 (plus some tests to actually make sure the client behaves as it should) can be run here: 75 | 76 | ```shell 77 | cd src 78 | python run_tests.py my-cache-name 79 | ``` 80 | 81 | With EC2s, you can specify at creation the same availability zone as the s3 cache and run a comparison of normal buckets vs express in the best possible (in theory) latency conditions (vs a free [Redis](https://redis.com/) instance in us-east-1 as baseline comparison). My manual runs on a throw-away EC2 (k=100) gave the following results (in seconds): 82 | 83 | | Test | Standard Bucket (s) | Express Bucket (s) | Redis Labs | 84 | | ------------- | ------------- | ------------- | ------------- | 85 | | GET (avg) | 0.016 | 0.005 | 0.001 | 86 | | GET (median) | 0.014 | 0.005 | 0.0009 | 87 | | GET (95th latency) | 0.027 | 0.005 | 0.002 | 88 | 89 | TL;DR: an express bucket is not just 3x faster in the average case, but significantly more reliable in the tail. Redis is still much faster than both, but (remember) it is also much more expensive. 90 | 91 | Note: don't take these tests too seriously! 92 | 93 | ### Bonus: a lambda-based use-case 94 | 95 | If you know the [serverless framework](https://www.serverless.com/framework/) and have it avalaible on your machine, you can publish a lambda function that performs some (horribly repetitive) tests to evaluate AWS-lambda-to-s3 latency. Note that: 96 | 97 | * on top of serverless, you will need Docker, as the `boto3` version inside Lamdbas is too old and does not support s3 express buckets yet; 98 | * after deployment, you need to make sure the lambda role created for the function can access the s3 resources backing up the cache. Note that s3 express policies are [a bit of a drag](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-express-security-iam-identity-policies.html), so beware. 99 | 100 | If you feel adventurous and ready to fight IAM roles, then do: 101 | 102 | ```shell 103 | cd serverless 104 | serverless deploy 105 | ``` 106 | 107 | (if you don't, you can just trust my numbers below!). 108 | 109 | At the end, you'll get and endpoint such as `https://xxx.execute-api.us-east-1.amazonaws.com/dev/test?k=50&cache=mytestcache` that you can open in your browser to trigger the tests (`k` and `cache` are optional - check `app.py` for the defaults). One request will generate something like this, i.e. a comparison of _k_ ops in s3 express vs normal vs Redis Labs: 110 | 111 | ```json 112 | { 113 | "metadata": { 114 | "timeMs": 7373, 115 | "epochMs": 1701377819320, 116 | "eventId": "971ca40d-8f50-4c27-a816-76bb7df292c4", 117 | "inputK": 50 118 | }, 119 | "data": { 120 | "set_time_mean": 0.011164916356404623, 121 | "set_time_median": 0.009434223175048828, 122 | "get_time_mean": 0.006322011947631836, 123 | "get_time_median": 0.006218910217285156, 124 | "set_time_mean_s3": 0.026339941024780274, 125 | "set_time_median_s3": 0.024151086807250977, 126 | "get_time_mean_s3": 0.019532273610432943, 127 | "get_time_median_s3": 0.016076326370239258, 128 | "set_time_mean_redis": 0.0018777799606323241, 129 | "set_time_median_redis": 0.000904083251953125, 130 | "set_time_mean_many": 0.406483252843221, 131 | "set_time_median_many": 0.3329179286956787, 132 | "get_time_mean_many": 0.31602056821187335, 133 | "get_time_median_many": 0.3195207118988037 134 | } 135 | } 136 | ``` 137 | 138 | In this particular example, with _k=50_, setting a key with s3 Express is ~10ms, and 6 to get it back, vs ~25 and 18 from standard s3. Setting 50 keys at once with multi-threading takes ~400ms, while reading them back ~300. Not bad! 139 | 140 | ## TO-DOs, misc. notes and all that jazz 141 | 142 | * Since the only real dependency is boto3 and AWS access, make it easier to configure the client wrt AWS would be nice: right now, I've mostly running either in a DEV environment with semi-god IAM access, or in a carefully crafted IAM-role attached to the lambda; 143 | * if this is useful, move to poetry and auto-deploy to PyPyi would make it easy to just start using all around repos; 144 | * if lambda-based latency benchmarks are useful, built in the `serverless.yml` the proper AWS permission so that the deployment becomes seamless (now the entire serverless part is really manual, ad hoc and redundant). 145 | 146 | Everything is left as an exercise to the reader. 147 | 148 | ## License 149 | 150 | This code is released "As Is", with no guarantees whatsover, under the MIT license. This was a fun coding excercise in-between serious tasks, and should be taken with the appropriate dose of sense of humour. 151 | -------------------------------------------------------------------------------- /redis3/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BauplanLabs/redis3/2375efa204a21867768e61913294771c8538f8d1/redis3/__init__.py -------------------------------------------------------------------------------- /redis3/redis3.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import botocore 3 | from time import time 4 | import concurrent.futures 5 | 6 | 7 | class redis3Client(): 8 | 9 | def __init__( 10 | self, 11 | cache_name: str, 12 | db: int = 0, 13 | availability_zone: str = 'use1-az5', 14 | bucket_prefix: str = 'redis3', 15 | verbose: bool = False, 16 | **kwargs 17 | ): 18 | """ 19 | Store inside the class the s3 client, the cache name, the db number 20 | that will be used for all the ops. Note that you can pass credentials 21 | to boto3 at init phase using kwargs when instantiating the class. 22 | 23 | You can also override the default bucket prefix by passing a different 24 | bucket_prefix. 25 | """ 26 | init_start_time = time() 27 | self.bucket_prefix = bucket_prefix 28 | 29 | # setup basic class attributes and objects 30 | self._s3_client = boto3.client('s3', **kwargs) 31 | self.bucket_name = self._get_bucket_from_cache_name( 32 | availability_zone, 33 | cache_name 34 | ) 35 | self.db = db 36 | self._cache_name = cache_name 37 | self._availability_zone = availability_zone 38 | self._verbose = verbose 39 | try: 40 | if verbose: 41 | print("Trying to create bucket {} in AZ {}".format(self.bucket_name, self._availability_zone)) 42 | 43 | r = self._s3_client.create_bucket( 44 | Bucket=self.bucket_name, 45 | CreateBucketConfiguration={ 46 | 'Location': { 47 | 'Type': 'AvailabilityZone', 48 | 'Name': self._availability_zone 49 | }, 50 | 'Bucket': { 51 | 'DataRedundancy': 'SingleAvailabilityZone', 52 | 'Type': 'Directory' 53 | } 54 | }, 55 | ) 56 | except botocore.exceptions.ClientError as e: 57 | # if the bucket already exists, just use it 58 | if e.response['Error']['Code'] == "BucketAlreadyOwnedByYou": 59 | if self._verbose: 60 | print("Bucket {} already exists. Using it as cache".format(self.bucket_name)) 61 | else: 62 | raise e 63 | 64 | if self._verbose: 65 | print("Init completed in {:.4f}s".format(time() - init_start_time)) 66 | 67 | return None 68 | 69 | @property 70 | def db(self): 71 | """ 72 | Return the db for the cache (i.e. this is a prefix in the bucket) 73 | """ 74 | return self._db 75 | 76 | @db.setter 77 | def db(self, value): 78 | """ 79 | Set the db for the cache (i.e. this is a prefix in the bucket) 80 | """ 81 | try: 82 | self._db = int(value) 83 | except ValueError: 84 | print('db must be an integer or something that can be casted as such, got {}'.format(value)) 85 | raise ValueError 86 | 87 | @property 88 | def bucket_name(self): 89 | """ 90 | Return the name of the bucket used to back the cache 91 | """ 92 | return self._bucket_name 93 | 94 | @bucket_name.setter 95 | def bucket_name(self, value): 96 | """ 97 | Set the name of the bucket used to back the cache 98 | """ 99 | self._bucket_name = value 100 | 101 | def _get_bucket_from_cache_name(self, availability_zone: str, cache_name: str): 102 | """ 103 | Produce a distinct bucket name from the cache name supplied by the user. 104 | 105 | Note that we need to comply with the following naming rules: 106 | 107 | https://docs.aws.amazon.com/AmazonS3/latest/userguide/directory-bucket-naming-rules.html 108 | """ 109 | return '{}-{}--{}--x-s3'.format(self.bucket_prefix, cache_name, availability_zone) 110 | 111 | def _get_object_key_from_key_name(self, key: str): 112 | """ 113 | Make sure that the key is prefixed with the db number as 114 | a natural namespacing of the keys 115 | """ 116 | return '{}/{}'.format(self.db, key) 117 | 118 | def set(self, key: str, value: str): 119 | """ 120 | Redis SET equivalent: set a string value for a given string key. 121 | 122 | Note that if you want to store a JSON object, you need to serialize it 123 | to a string first. 124 | 125 | Ref: https://redis.io/commands/set/ 126 | """ 127 | assert isinstance(value, str), "Expected value to be a string, got {}".format(type(value)) 128 | _key = self._get_object_key_from_key_name(key) 129 | try: 130 | r = self._s3_client.put_object( 131 | Bucket=self.bucket_name, 132 | Key=_key, 133 | Body=value 134 | ) 135 | except botocore.exceptions.ClientError as e: 136 | if self._verbose: 137 | print("!!! Failed operation: error code {}".format(e.response['Error']['Code'])) 138 | 139 | raise e 140 | # if put_object succeeded, return True 141 | return True 142 | 143 | def get(self, key: str): 144 | """ 145 | Redis GET equivalent: get a string value for a given string key. 146 | 147 | It returns None if the key doesn't exist. 148 | 149 | Ref: https://redis.io/commands/get/ 150 | 151 | """ 152 | _key = self._get_object_key_from_key_name(key) 153 | try: 154 | r = self._s3_client.get_object( 155 | Bucket=self.bucket_name, 156 | Key=_key, 157 | ) 158 | # if get_object succeeded, return the value 159 | if self._verbose: 160 | print("{} last modified on {}".format(_key, r['LastModified'])) 161 | 162 | return r['Body'].read().decode('utf-8') 163 | except botocore.exceptions.ClientError as e: 164 | # this is where we handle the case where the key doesn't exist 165 | if e.response['Error']['Code'] == "NoSuchKey": 166 | return None 167 | if self._verbose: 168 | print("!!! Failed operation: error code {}".format(e.response['Error']['Code'])) 169 | 170 | raise e 171 | 172 | def mset(self, keys: list, values: list): 173 | """ 174 | Set multiple keys to multiple values. 175 | Note that it's a threaded execution of set() for each key, so the return value 176 | can be True (success) or the command may fail if any error occurs. 177 | 178 | Note that this is not an atomic operation and there is now way to know 179 | which keys existed and which didn't. 180 | 181 | Ref: https://redis.io/commands/mset/ 182 | """ 183 | 184 | results = [] 185 | with concurrent.futures.ThreadPoolExecutor() as executor: 186 | futures = {} 187 | for ctr, (k, v) in enumerate(zip(keys, values)): 188 | futures[executor.submit(self.set, key=k, value=v)] = ctr 189 | for future in concurrent.futures.as_completed(futures): 190 | try: 191 | results.append((future.result(), futures[future])) 192 | except Exception as ex: 193 | raise ex 194 | 195 | results, _ = zip(*sorted(results, key=lambda x: x[1])) 196 | 197 | return list(results) 198 | 199 | def mget(self, keys: list): 200 | """ 201 | Return the values associated with the specified keys. 202 | Note that it's a threaded execution of get() for each key, so the return value 203 | can be a string (success), a None (no key found) or the command may fail if 204 | any error occurs. 205 | 206 | Note that this is not an atomic operation. 207 | 208 | Ref: https://redis.io/commands/mget/ 209 | """ 210 | values = [] 211 | with concurrent.futures.ThreadPoolExecutor() as executor: 212 | futures = {} 213 | for ctr, k in enumerate(keys): 214 | futures[executor.submit(self.get, key=k)] = ctr 215 | for future in concurrent.futures.as_completed(futures): 216 | try: 217 | values.append((future.result(), futures[future])) 218 | except Exception as ex: 219 | raise ex 220 | 221 | values, _ = zip(*sorted(values, key=lambda x: x[1])) 222 | 223 | return list(values) 224 | 225 | def keys(self, starts_with=None): 226 | """ 227 | Return all the keys matching the specified pattern in the current db, modeled 228 | after the Redis "KEYS pattern" command (usual caveat on atomicity 229 | applies). 230 | 231 | This is a generator function, so you can use it like: 232 | 233 | for key in my_client.keys(): 234 | print(key) 235 | 236 | Ref: https://redis.io/commands/keys/ 237 | """ 238 | 239 | return self._get_matching_s3_keys( 240 | self.bucket_name, 241 | # for express, only prefixes that end in a delimiter ( /) are supported. 242 | '{}/'.format(self.db), 243 | starts_with 244 | ) 245 | 246 | def _get_matching_s3_keys(self, bucket, prefix, pattern): 247 | """ 248 | Code gently inspired by: https://alexwlchan.net/2017/listing-s3-keys/ 249 | """ 250 | kwargs = {'Bucket': bucket} 251 | if prefix: 252 | kwargs['Prefix'] = prefix 253 | while True: 254 | resp = self._s3_client.list_objects_v2(**kwargs) 255 | for obj in resp['Contents']: 256 | key = obj['Key'] 257 | # we want to make sure keys start with the prefix (i.e. the db number) 258 | assert key.startswith(prefix) 259 | # if no pattern is specified or the key starts with the pattern 260 | if pattern is None or key.startswith(pattern): 261 | yield key[len(prefix):] 262 | 263 | # The S3 API is paginated, so we pass the continuation token into the next response 264 | try: 265 | kwargs['ContinuationToken'] = resp['NextContinuationToken'] 266 | except KeyError: 267 | break 268 | 269 | def delete(self, key: str): 270 | """ 271 | Delete a key in the current database (a non-existent key gets ignored 272 | as the AWS boto client won't raise any error). We use "delete" to avoid confliucts 273 | with the Python keyword "del". 274 | 275 | Ref: https://redis.io/commands/del/ 276 | """ 277 | _key = self._get_object_key_from_key_name(key) 278 | r = self._s3_client.delete_object( 279 | Bucket=self.bucket_name, 280 | Key=_key, 281 | ) 282 | 283 | return True 284 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.33.2 2 | tqdm==4.66.1 -------------------------------------------------------------------------------- /serverless/app.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Simple lambda function to test redis3 and s3 performance when running compute next to storage. 4 | All of this is hacky / manual / one-off testing, but it's a start to get a more realistic 5 | sense of potential performance gains. 6 | 7 | Note that for this to work you need to make the AWS lambda role (created by serverless) 8 | aware of the bucket that is underlying the redis3 cache. 9 | 10 | S3 express policies are a drag, so beware: 11 | 12 | https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-express-security-iam-identity-policies.html 13 | 14 | Note that to make it work, I had to actually copy the ARN for the bucket from the s3 console. 15 | 16 | Finally the AWS lambda role should also have access to buckets with the pattern used by normal 17 | s3 client to do the comparison, e.g.: 18 | 19 | bucket_name = "redis3-test-{}".format(uuid.uuid4()) 20 | 21 | Note 2: we included a redis client in the lambda function to test the performance of 22 | actual Redis on Redis Lab in us-east-1: this is done for the purpose of the comparison 23 | in the Medium blog post, so treat the code as a throw-away example (you will need to provide your own 24 | credentials for Redis and spin up your own free Redis instance on Redis Labs). 25 | 26 | """ 27 | 28 | 29 | import time 30 | import uuid 31 | import boto3 32 | import json 33 | from redis3 import redis3Client 34 | import redis 35 | from statistics import mean, median 36 | 37 | 38 | # default number of keys to try to set / get 39 | DEFAULT_K = 50 40 | # check that the version of boto3 supports the s3 express feature 41 | print("Boto3 version: {}".format(boto3.__version__)) 42 | 43 | 44 | def wrap_response(body): 45 | """ 46 | Just make sure the response is in the right format given this is a HTTP GET 47 | """ 48 | return { 49 | "statusCode": 200, 50 | "body": json.dumps(body), 51 | "headers": { 52 | "Content-Type": "application/json" 53 | } 54 | } 55 | 56 | 57 | def run_redis_tests( 58 | key_list: list, 59 | val_list: list 60 | ): 61 | my_client = redis.Redis( 62 | host='redis-xxx.cloud.redislabs.com', 63 | password='mypwd', 64 | port=14665, 65 | db=0) 66 | 67 | set_times = [] 68 | for k, v in zip(key_list, val_list): 69 | s_set_time = time.time() 70 | r = my_client.set(k, v) 71 | set_times.append(time.time() - s_set_time) 72 | 73 | get_times = [] 74 | for k, v in zip(key_list, val_list): 75 | s_get_time = time.time() 76 | r = my_client.get(k) 77 | get_times.append(time.time() - s_get_time) 78 | 79 | return get_times, set_times 80 | 81 | 82 | def run_redis3_many_keys_tests( 83 | cache_name: str, 84 | key_list: list, 85 | val_list: list 86 | ): 87 | my_client = redis3Client(cache_name=cache_name, db=0, verbose=False) 88 | 89 | set_times = [] 90 | get_times = [] 91 | # do it few times to get a sense of the performance 92 | for _ in range(3): 93 | s_set_time = time.time() 94 | r = my_client.mset(key_list, val_list) 95 | set_times.append(time.time() - s_set_time) 96 | s_get_time = time.time() 97 | r = my_client.mget(key_list) 98 | get_times.append(time.time() - s_get_time) 99 | assert r == val_list, "Expected {}, got {}".format(val_list, r) 100 | 101 | return get_times, set_times 102 | 103 | 104 | def run_redis3_tests( 105 | cache_name: str, 106 | key_list: list, 107 | val_list: list 108 | ): 109 | my_client = redis3Client(cache_name=cache_name, db=0, verbose=False) 110 | 111 | set_times = [] 112 | for k, v in zip(key_list, val_list): 113 | s_set_time = time.time() 114 | r = my_client.set(k, v) 115 | set_times.append(time.time() - s_set_time) 116 | 117 | get_times = [] 118 | for k, v in zip(key_list, val_list): 119 | s_get_time = time.time() 120 | r = my_client.get(k) 121 | get_times.append(time.time() - s_get_time) 122 | assert r == v, "Expected {}, got {}".format(v, r) 123 | 124 | return get_times, set_times 125 | 126 | 127 | def run_s3_tests( 128 | key_list: list, 129 | val_list: list 130 | ): 131 | # we assume the lambda role has access to buckets with the name redis3-test-* 132 | s3_client = boto3.client('s3') 133 | # create a bucket with a temp name 134 | bucket_name = "redis3-test-{}".format(uuid.uuid4()) 135 | s3_client.create_bucket(Bucket=bucket_name) 136 | # set all keys + values 137 | set_times = [] 138 | for i in range(len(key_list)): 139 | start = time.time() 140 | s3_client.put_object(Bucket=bucket_name, Key=key_list[i], Body=val_list[i]) 141 | set_times.append(time.time() - start) 142 | # read them back 143 | get_times = [] 144 | for i in range(len(key_list)): 145 | start = time.time() 146 | obj = s3_client.get_object(Bucket=bucket_name, Key=key_list[i]) 147 | v = obj['Body'].read().decode('utf-8') 148 | get_times.append(time.time() - start) 149 | assert v == val_list[i], "Expected {}, got {}".format(val_list[i], v) 150 | 151 | # loop over all keys and delete them (otherwise we can't delete the bucket) 152 | for key in key_list: 153 | s3_client.delete_object(Bucket=bucket_name, Key=key) 154 | 155 | # delete the bucket 156 | s3_client.delete_bucket(Bucket=bucket_name) 157 | 158 | return get_times, set_times 159 | 160 | 161 | def lambda_handler(event, context): 162 | """ 163 | 164 | Simple lambda function to test redis3 and s3 performance. 165 | 166 | No error checking, no fancy stuff, just throw-away code to get some get / set 167 | performance numbers. 168 | 169 | """ 170 | start = time.time() 171 | data = {} 172 | # debug 173 | print(event) 174 | query_args = event.get('queryStringParameters', None) 175 | # set a default input_k 176 | cnt_k = query_args['k'] if query_args and 'k' in query_args else DEFAULT_K 177 | key_list = ['playground_{}'.format(i) for i in range(cnt_k)] 178 | val_list = ['bar_{}'.format(i) for i in range(cnt_k)] 179 | # set a default cache name 180 | cache_name = query_args['cache'] if query_args and 'cache' in query_args else 'mytestcache' 181 | get_times, set_times = run_redis3_tests(cache_name, key_list, val_list) 182 | # add some stats to the data object we return 183 | data['set_times'] = set_times 184 | data['set_time_mean'] = mean(set_times) 185 | data['set_time_median'] = median(set_times) 186 | data['get_times'] = get_times 187 | data['get_time_mean'] = mean(get_times) 188 | data['get_time_median'] = median(get_times) 189 | # run some basic ops in s3 190 | get_times, set_times = run_s3_tests(key_list, val_list) 191 | # add some stats to the data object we return 192 | data['set_times_s3'] = set_times 193 | data['set_time_mean_s3'] = mean(set_times) 194 | data['set_time_median_s3'] = median(set_times) 195 | data['get_times_s3'] = get_times 196 | data['get_time_mean_s3'] = mean(get_times) 197 | data['get_time_median_s3'] = median(get_times) 198 | # run some basic ops in s3 199 | get_times, set_times = run_redis_tests(key_list, val_list) 200 | # add some stats to the data object we return 201 | data['set_times_redis'] = set_times 202 | data['set_time_mean_redis'] = mean(set_times) 203 | data['set_time_median_redis'] = median(set_times) 204 | data['get_times_redis'] = get_times 205 | data['get_time_mean_redis'] = mean(get_times) 206 | data['get_time_median_redis'] = median(get_times) 207 | # finally test the redis3 client with many keys at once 208 | get_times, set_times = run_redis3_many_keys_tests(cache_name, key_list, val_list) 209 | data['set_times_many'] = set_times 210 | data['set_time_mean_many'] = mean(set_times) 211 | data['set_time_median_many'] = median(set_times) 212 | data['get_times_many'] = get_times 213 | data['get_time_mean_many'] = mean(get_times) 214 | data['get_time_median_many'] = median(get_times) 215 | 216 | body = { 217 | "metadata": { 218 | "timeMs": int((time.time() - start) * 1000.0), 219 | "epochMs": int(time.time() * 1000), 220 | "eventId": str(uuid.uuid4()), 221 | 'inputK': cnt_k, 222 | }, 223 | "data": data 224 | } 225 | 226 | return wrap_response(body) -------------------------------------------------------------------------------- /serverless/serverless.yml: -------------------------------------------------------------------------------- 1 | service: redis3-lambda-performance 2 | 3 | provider: 4 | name: aws 5 | timeout: 30 6 | architecture: arm64 7 | ecr: 8 | images: 9 | redis3test: 10 | path: ../ 11 | platform: linux/arm64 12 | 13 | 14 | functions: 15 | myredis3test: 16 | image: 17 | name: redis3test 18 | events: 19 | - http: 20 | path: test 21 | method: get 22 | 23 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """The setup script.""" 4 | 5 | from setuptools import find_packages, setup 6 | 7 | with open("README.md") as readme_file: 8 | readme = readme_file.read() 9 | 10 | with open("requirements.txt") as f: 11 | requirements = f.read().splitlines() 12 | 13 | setup( 14 | 15 | author="redis3", 16 | author_email='jacopo.tagliabue@bauplanlabs.com', 17 | python_requires='>=3.9', 18 | classifiers=[ 19 | "Development Status :: 2 - Pre-Alpha", 20 | "Intended Audience :: Developers", 21 | "License :: OSI Approved :: MIT License", 22 | "Natural Language :: English", 23 | "Programming Language :: Python :: 3.9", 24 | "Programming Language :: Python :: 3.10", 25 | "Programming Language :: Python :: 3.11", 26 | ], 27 | description="redis3", 28 | install_requires=requirements, 29 | license="MIT license", 30 | long_description=readme, 31 | long_description_content_type="text/x-rst", 32 | include_package_data=True, 33 | keywords="reclist", 34 | name="reclist", 35 | packages=find_packages(include=["redis3", "redis3.*"]), 36 | 37 | url='https://github.com/BauplanLabs/redis3', 38 | version='0.0.2', 39 | zip_safe=False, 40 | extras_require={}, 41 | ) -------------------------------------------------------------------------------- /src/playground.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | This is a playground script that shows how to use the redis3Client class 4 | by performing some basic operations on the "cache". 5 | 6 | To cover edge cases and benchmarking, you can inspect and run run_tests.py 7 | instead. 8 | 9 | Note that redis3 assumes your interpreter can run: 10 | 11 | s3_client = boto3.client('s3') 12 | 13 | (and all the other boto3 calls in redis3.py) either through a local AWS credentials file, ENVs etc. 14 | (you can also modify this script to pass credentials to boto3 using kwargs for redis3Client). 15 | 16 | For reference on AWS credentials and boto3, check this: 17 | https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html 18 | 19 | """ 20 | 21 | from redis3.redis3 import redis3Client 22 | from datetime import datetime 23 | from utils import measure_func 24 | import json 25 | 26 | 27 | @measure_func 28 | def set_key_with_timing(client): 29 | return client.set('foo', 'bar') 30 | 31 | 32 | @measure_func 33 | def get_key_with_timing(client): 34 | return client.get('foo') 35 | 36 | 37 | @measure_func 38 | def set_keys_with_timing(client, size=50): 39 | _list = ['playground_{}'.format(i) for i in range(size)] 40 | return client.mset(_list, _list) 41 | 42 | 43 | @measure_func 44 | def get_keys_with_timing(client, size=50): 45 | key_list = ['playground_{}'.format(i) for i in range(size)] 46 | return client.mget(key_list) 47 | 48 | 49 | def run_playground( 50 | cache_name: str 51 | ): 52 | # say hi 53 | print("Started playground at {}\n".format(datetime.now())) 54 | 55 | # first, instantiate redis3Client and check all is well 56 | # we set verbose to True to see what's going on under the hood as this 57 | # a playground script 58 | my_client = redis3Client(cache_name=cache_name, db=0, verbose=True) 59 | # for debugging purposes, print the name of the bucket used to back the cache 60 | print("Using bucket {} as cache".format(my_client.bucket_name)) 61 | 62 | # now, some basic ops 63 | 64 | # set a key and get it back 65 | r = my_client.set('foo', 'bar') 66 | print(r) 67 | assert r is True, "Expected True, got {}".format(r) 68 | r = my_client.get('foo') 69 | print(r) 70 | # overwrite the key and get it back 71 | r = my_client.set('foo', 'bar2') 72 | # store something more complex, as long as you can serialize it to a string 73 | # e.g. dump it to a JSON string 74 | my_obj = { 'k_{}'.format(i): 'v_{}'.format(i) for i in range(5) } 75 | r = my_client.set('foo_dic', json.dumps(my_obj)) 76 | r = json.loads(my_client.get('foo_dic')) 77 | print("Json keys: {}".format(list(r.keys()))) 78 | # get a key that doesn't exist 79 | r = my_client.get('baz') 80 | assert r is None, "Expected None, got {}".format(r) 81 | # set a list of keys and get them back in one go 82 | key_list = ['playground_{}'.format(i) for i in range(5)] 83 | val_list = ['bar_{}'.format(i) for i in range(5)] 84 | r = my_client.mset(key_list, val_list) 85 | val_list_back = my_client.mget(key_list) 86 | print("Got back {} values".format(len(val_list_back))) 87 | # use the keys command to get all keys in the cache 88 | all_keys_in_db = list([k for k in my_client.keys()]) 89 | print("Found {} keys in cache, first three: {}".format(len(all_keys_in_db), all_keys_in_db[:3])) 90 | # delete one 91 | r = my_client.delete(all_keys_in_db[0]) 92 | # finally, do the same ops, wrapped in a timing decorator 93 | # to avoid spamming the console, we 'manually' toggle verbose off 94 | my_client._verbose = False 95 | 96 | r = set_key_with_timing(my_client) 97 | r = get_key_with_timing(my_client) 98 | r = get_keys_with_timing(my_client) 99 | r = set_keys_with_timing(my_client) 100 | 101 | # how does the many ops scale with more keys? 102 | for i in [25, 50, 100, 500, 1000]: 103 | print("\nRunning ops with {} keys".format(i)) 104 | r = set_keys_with_timing(my_client, size=i) 105 | r = get_keys_with_timing(my_client, size=i) 106 | 107 | # say bye 108 | print("\nFinished playground at {}. See you, s3ace cowboy".format(datetime.now())) 109 | return 110 | 111 | 112 | if __name__ == "__main__": 113 | import sys 114 | # make sure we have a cache name 115 | assert len(sys.argv) == 2, "Please provide a cache name" 116 | cache_name = sys.argv[1] 117 | run_playground(cache_name=cache_name) -------------------------------------------------------------------------------- /src/run_tests.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Simple script to run some tests on the cache: some functional tests, and some performance tests, 4 | in which we compare standard S3 buckets with the express bucket. 5 | 6 | """ 7 | 8 | import boto3 9 | import uuid 10 | from time import time 11 | from statistics import median, mean 12 | from tqdm import tqdm 13 | from datetime import datetime 14 | from redis3.redis3 import redis3Client 15 | import math 16 | import json 17 | import uuid 18 | 19 | 20 | def print_test_info( 21 | timing_list: list, 22 | target_percentile: int = 95 23 | ): 24 | def percentile(input, q): 25 | """ 26 | I don't want to import numpy just for this 27 | """ 28 | data_sorted = sorted(input) 29 | 30 | return data_sorted[math.ceil(q / 100 * len(data_sorted))] 31 | 32 | print("Average time: {}".format(mean(timing_list))) 33 | print("Median time: {}".format(median(timing_list))) 34 | print("95th percentile time: {}".format(percentile(timing_list, target_percentile))) 35 | return 36 | 37 | 38 | def run_normal_bucket_tests( 39 | test_keys: list, # list of keys to set / get 40 | test_values: list, 41 | **kwargs 42 | ): 43 | print("\nStart of testing standard buckets at {}\n".format(datetime.now())) 44 | # start a client 45 | s3_client = boto3.client('s3', **kwargs) 46 | # create a bucket with a temp name 47 | # bucket creation is not part of the benchmark 48 | bucket_name = "redis3-test-{}".format(uuid.uuid4()) 49 | s3_client.create_bucket(Bucket=bucket_name) 50 | # set all keys + values 51 | set_times = [] 52 | for i in tqdm(range(len(test_keys))): 53 | start = time() 54 | s3_client.put_object(Bucket=bucket_name, Key=test_keys[i], Body=test_values[i]) 55 | set_times.append(time() - start) 56 | # print out average and median set times 57 | print_test_info(set_times, target_percentile=95) 58 | # read them back 59 | get_times = [] 60 | for i in tqdm(range(len(test_keys))): 61 | start = time() 62 | obj = s3_client.get_object(Bucket=bucket_name, Key=test_keys[i]) 63 | v = obj['Body'].read().decode('utf-8') 64 | get_times.append(time() - start) 65 | # check we get the right value back! 66 | assert v == test_values[i], "Expected {}, got {}".format(test_values[i], v) 67 | # print out average and median set times 68 | print_test_info(get_times, target_percentile=95) 69 | 70 | # loop over all keys and delete them (otherwise we can't delete the bucket) 71 | for test_key in tqdm(test_keys): 72 | # TODO: we should totally parallelize this 73 | s3_client.delete_object(Bucket=bucket_name, Key=test_key) 74 | 75 | # delete the bucket 76 | s3_client.delete_bucket(Bucket=bucket_name) 77 | 78 | print("\nEnd of testing standard buckets at {}\n".format(datetime.now())) 79 | 80 | return 81 | 82 | 83 | def run_cache_tests( 84 | test_keys: list, # list of keys to set / get 85 | test_values: list, 86 | cache_name: str, # name of the cache to use 87 | **kwargs 88 | ): 89 | print("\nStart of testing the cache at {}\n".format(datetime.now())) 90 | my_client = redis3Client(cache_name=cache_name, db=0, verbose=False, **kwargs) 91 | # set all keys + values 92 | set_times = [] 93 | for i in tqdm(range(len(test_keys))): 94 | start = time() 95 | my_client.set(test_keys[i], test_values[i]) 96 | set_times.append(time() - start) 97 | # print out average and median set times 98 | print_test_info(set_times, target_percentile=95) 99 | # read them back 100 | get_times = [] 101 | for i in tqdm(range(len(test_keys))): 102 | start = time() 103 | v = my_client.get(test_keys[i]) 104 | get_times.append(time() - start) 105 | # check we get the right value back! 106 | assert v == test_values[i], "Expected {}, got {}".format(test_values[i], v) 107 | # print out average and median set times 108 | print_test_info(get_times, target_percentile=95) 109 | 110 | # end 111 | print("\nEnd of testing the cache at {}\n".format(datetime.now())) 112 | 113 | return 114 | 115 | def run_functional_tests( 116 | cache_name: str, # name of the cache to use 117 | **kwargs 118 | ): 119 | my_client = redis3Client(cache_name=cache_name, db=0, verbose=False, **kwargs) 120 | 121 | # set a key and get it back 122 | r = my_client.set('foo', 'bar') 123 | assert r is True, "Expected True, got {}".format(r) 124 | r = my_client.get('foo') 125 | assert r == 'bar', "Expected 'bar', got {}".format(r) 126 | assert isinstance(r, str), "Expected a string, got {}".format(type(r)) 127 | # overwrite the key and get it back 128 | r = my_client.set('foo', 'bar2') 129 | assert my_client.get('foo') == 'bar2', "Expected 'bar2', got {}".format(r) 130 | # store something more complex, as long as you can serialize it to a string 131 | my_obj = { 'k_{}'.format(i): 'v_{}'.format(i) for i in range(5) } 132 | r = my_client.set('foo_dic', json.dumps(my_obj)) 133 | r = json.loads(my_client.get('foo_dic')) 134 | assert r['k_0'] == 'v_0', "Expected 'v_0', got {}".format(r['k_0']) 135 | # get a key that doesn't exist by randomly picking a uuid 136 | r = my_client.get(str(uuid.uuid4())) 137 | assert r is None, "Expected None, got {}".format(r) 138 | # set a list of keys and get them back in one go 139 | key_list = ['playground_{}'.format(i) for i in range(5)] 140 | val_list = ['bar_{}'.format(i) for i in range(5)] 141 | r = my_client.mset(key_list, val_list) 142 | assert all(r), "Expected all True, got {}".format(r) 143 | val_list_back = my_client.mget(key_list) 144 | assert val_list_back == val_list, "Expected {}, got {}".format(val_list, val_list_back) 145 | # use the keys command to get all keys in the cache 146 | all_keys_in_db = list([k for k in my_client.keys()]) 147 | print("Found {} keys in cache, first three: {}".format(len(all_keys_in_db), all_keys_in_db[:3])) 148 | # delete one 149 | r = my_client.delete(all_keys_in_db[0]) 150 | assert r is True, "Expected True, got {}".format(r) 151 | # delete one that does not exist by getting a random string 152 | # it should be ignored and get True back again 153 | r = my_client.delete(str(uuid.uuid4())) 154 | assert r is True, "Expected True, got {}".format(r) 155 | # switch to a different bucket by passing a non-int (should get an error) 156 | try: 157 | my_client.db = 'ciao' 158 | except ValueError: 159 | pass 160 | # now switch db for real to a magic number 161 | my_client.db = "100" 162 | # set a key and list all keys in the cache (should be only one) 163 | my_client.set('foo_100', 'bar_100') 164 | all_keys_in_db = list([k for k in my_client.keys()]) 165 | assert len(all_keys_in_db) == 1, "Expected 1 key, got {}".format(len(all_keys_in_db)) 166 | assert all_keys_in_db[0] == 'foo_100', "Expected 'foo_100', got {}".format(all_keys_in_db[0]) 167 | # finally delete the key and check it's gone 168 | r = my_client.delete('foo_100') 169 | # do it twice, nothings should happen 170 | r = my_client.delete('foo_100') 171 | # now, try to get it back, it should return None 172 | r = my_client.get('foo_100') 173 | assert r is None, "Expected None, got {}".format(r) 174 | # end 175 | print("\nEnd of functional tests {}\n".format(datetime.now())) 176 | 177 | return 178 | 179 | def run_tests( 180 | cache_name: str, # name of the cache to use 181 | k: int, # number of keys to set / get during tests 182 | **kwargs 183 | ): 184 | print("Started testing at {}\n".format(datetime.now())) 185 | # first, run some functional cache tests 186 | run_functional_tests(cache_name, **kwargs) 187 | # if nothing fails, create a list of keys and values for perf. testing 188 | test_keys = ['foo_{}'.format(i) for i in range(k)] 189 | test_values = ['bar_{}'.format(i) for i in range(k)] 190 | # test performance of a normal bucket 191 | run_normal_bucket_tests(test_keys, test_values, **kwargs) 192 | # test performance of the cache 193 | run_cache_tests(test_keys, test_values, cache_name, **kwargs) 194 | print("\n====> Now running the tests again with 5x keys and values <====\n") 195 | test_keys = ['foo_{}'.format(i) for i in range(k * 5)] 196 | test_values = ['bar_{}'.format(i) for i in range(k * 5)] 197 | run_normal_bucket_tests(test_keys, test_values, **kwargs) 198 | run_cache_tests(test_keys, test_values, cache_name, **kwargs) 199 | 200 | print("\nFinished testing at {}. See you, s3ace cowboy".format(datetime.now())) 201 | return 202 | 203 | 204 | if __name__ == "__main__": 205 | import sys 206 | # make sure we have a cache name 207 | assert len(sys.argv) == 2, "Please provide a cache name" 208 | cache_name = sys.argv[1] 209 | # note that k < 100 will create a problem with the percentile function 210 | run_tests(cache_name, k=100) 211 | 212 | # note that you can provide AWS crednetials through the credential file in the machine, 213 | # or through env variables or as kwargs, just as you would do with any instance 214 | # of boto3.client: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html 215 | #aws_client_auth = { "aws_access_key_id": "", "aws_secret_access_key": "" } 216 | #run_tests(cache_name, k=100, **aws_client_auth) 217 | 218 | -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | from time import time 2 | 3 | 4 | def measure_func(func): 5 | # this wrapper shows the execution time of the function object passed 6 | def wrap_func(*args, **kwargs): 7 | t1 = time() 8 | result = func(*args, **kwargs) 9 | t2 = time() 10 | result_to_print = result if not isinstance(result, list) else result[:2] 11 | print(f'{func.__name__!r} executed in {(t2-t1):.4f}s, with result: {result_to_print}') 12 | return result 13 | return wrap_func --------------------------------------------------------------------------------