├── .coveragerc ├── .github └── workflows │ └── python-package.yml ├── .gitignore ├── .readthedocs.yaml ├── CONTRIBUTING.md ├── EXAMPLES.md ├── LICENSE ├── Makefile ├── README.md ├── awsimple ├── __init__.py ├── __version__.py ├── aws.py ├── cache.py ├── dynamodb.py ├── dynamodb_miv.py ├── logs.py ├── mock.py ├── py.typed ├── s3.py ├── sns.py └── sqs.py ├── coverage.xml ├── doc ├── awsimple_sf_python_6_21.pdf ├── awsimple_sf_python_6_21.pptx ├── flake8_report.txt └── notes.txt ├── doc_source ├── aws_access.rst ├── conf.py ├── coverage.txt ├── dynamodb_access.rst ├── index.rst ├── quick_start_guide.rst ├── requirements.txt ├── s3_access.rst ├── sns_access.rst ├── sqs_access.rst ├── thank_you.rst └── user_guide.rst ├── examples ├── aws_access_test.py ├── derived_access_class.py ├── dynamodb_partition_and_sort.py ├── dynamodb_partition_only.py ├── make_venv.bat ├── make_venv.sh ├── read_s3_object.py ├── requirements-examples.txt ├── run_examples.bat ├── run_examples.sh └── write_read_s3_object.py ├── make_venv_dev.bat ├── make_venv_dev.sh ├── mypy.ini ├── pyproject.toml ├── requirements-dev.txt ├── scripts ├── blackify.bat ├── coverage.bat ├── doc_coverage_updater.py ├── pypi.bat ├── pytest.bat ├── run_flake8.bat ├── run_mypy.bat ├── run_sphinx.bat └── start_localstack.bat ├── setup.py └── test_awsimple ├── 280px-PNG_transparency_demonstration_1.png ├── __init__.py ├── conftest.py ├── const.py ├── dict_is_close.py ├── sqs_drain.py ├── test_aws_test.py ├── test_c_dynamodb_create_table.py ├── test_dynamodb.py ├── test_dynamodb_delete.py ├── test_dynamodb_delete_all_items.py ├── test_dynamodb_get_item.py ├── test_dynamodb_item_not_found.py ├── test_dynamodb_miv_ui.py ├── test_dynamodb_primary_key_as_number.py ├── test_dynamodb_query.py ├── test_dynamodb_query_kwargs.py ├── test_dynamodb_scan_cache.py ├── test_dynamodb_scan_table_as_dict.py ├── test_dynamodb_secondary_index.py ├── test_dynamodb_table_not_found.py ├── test_dynamodb_upsert.py ├── test_get_account_id.py ├── test_get_configuration_information.py ├── test_logs.py ├── test_lru_cache_helpers.py ├── test_mock.py ├── test_most_recent_error.py ├── test_s3_bucket.py ├── test_s3_bucket_not_found.py ├── test_s3_delete.py ├── test_s3_dir.py ├── test_s3_does_not_exist.py ├── test_s3_empty_bucket.py ├── test_s3_file_transfer.py ├── test_s3_keys.py ├── test_s3_list_buckets.py ├── test_s3_multiple_transfers.py ├── test_s3_object_floats.py ├── test_s3_public_readable.py ├── test_s3_python_object.py ├── test_s3_string.py ├── test_s3_transfer_lines.py ├── test_serializable.py ├── test_sns_create.py ├── test_sns_publish.py ├── test_sqs_create_and_delete_queue.py ├── test_sqs_get_arn.py ├── test_sqs_messages.py ├── test_sqs_messages_available_and_purge.py ├── test_sqs_queue_exists.py ├── test_sqs_receive_nothing.py ├── test_sqs_user_provided_timeout.py └── tst_paths.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = 3 | *venv/* 4 | test_*/* 5 | 6 | branch = True 7 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python package 5 | 6 | on: 7 | push: 8 | branches: [ main ] 9 | pull_request: 10 | branches: [ main ] 11 | workflow_dispatch: 12 | branches: [ main ] 13 | 14 | jobs: 15 | build: 16 | 17 | runs-on: ubuntu-latest 18 | strategy: 19 | matrix: 20 | python-version: ["3.12"] 21 | 22 | steps: 23 | - uses: actions/checkout@v4 24 | - name: Set up Python ${{ matrix.python-version }} 25 | uses: actions/setup-python@v4 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | - name: Install dependencies 29 | run: | 30 | python -m pip install --upgrade pip 31 | python -m pip install flake8 pytest 32 | if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi 33 | 34 | # was for pytest-fly, but couldn't get CI to install a compatible libEGL 35 | # - name: Install libEGL dependency 36 | # run: | 37 | # sudo apt-get update 38 | # sudo apt-get install -y libegl-mesa0 libgl1 39 | 40 | - name: Lint with flake8 41 | run: | 42 | # stop the build if there are Python syntax errors or undefined names 43 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 44 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 45 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 46 | 47 | - name: Test with pytest 48 | run: | 49 | pytest -s test_awsimple 50 | 51 | # - name: "Upload coverage to Codecov" 52 | # uses: codecov/codecov-action@v1 53 | # with: 54 | # fail_ci_if_error: true 55 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | 49 | # codecov 50 | # coverage.xml 51 | 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | 134 | # jca 135 | .idea/ 136 | cache/ 137 | temp/ 138 | # the real one is just LICENSE but some systems want both 139 | LICENSE.txt 140 | cov/ 141 | big_last_run.txt 142 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Build documentation in the docs/ directory with Sphinx 9 | sphinx: 10 | configuration: doc_source/conf.py 11 | 12 | # Optionally build your docs in additional formats such as PDF 13 | formats: 14 | - pdf 15 | 16 | # Optionally set the version of Python and requirements required to build your docs 17 | python: 18 | version: 3.8 19 | install: 20 | - requirements: doc_source/requirements.txt 21 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to awsimple 2 | 3 | Hi! First of all, thank you for contributing. :heart: 4 | 5 | All of the usual sorts of contributions are welcome: bug reports, patches, and feedback. 6 | Feel free to [browse existing issues](https://github.com/jamesabel/awsimple/issues) or [create a new one](https://github.com/jamesabel/awsimple/issues/new). 7 | 8 | 9 | ## Got a problem? 10 | 11 | You're welcome to [create an issue](https://github.com/jamesabel/awsimple/issues/new), but please [search existing ones](https://github.com/jamesabel/awsimple/issues) first to see if it's been discussed before. 12 | 13 | 14 | ## Want to submit some code or docs? 15 | 16 | Great! 17 | 18 | If you're intersted in tackling an [existing issue](https://github.com/jamesabel/awsimple/issues), comment on one to make sure you're on the right track. 19 | If it's an idea you have or a problem not captured in an issue, [create one](https://github.com/jamesabel/awsimple/issues/new) and let's align. 20 | 21 | 22 | ### Dev setup 23 | 24 | Requirements: 25 | 26 | - Python 3.8 or higher 27 | 28 | Fork the repo, to make your own personal copy, and then git clone your fork onto your computer. 29 | 30 | Once you have that, install project dependencies with: 31 | 32 | #### Mac / Linux 33 | ``` 34 | source make_venv_dev.sh 35 | ./venv/bin/activate 36 | ``` 37 | 38 | #### Windows 39 | ``` 40 | make_venv_dev.bat 41 | .\venv\Scripts\activate.bat 42 | ``` 43 | 44 | At this point you should be able to make changes to the codebase and run things. 45 | -------------------------------------------------------------------------------- /EXAMPLES.md: -------------------------------------------------------------------------------- 1 | # Running the examples for awsimple 2 | 3 | There are four examples in the example folder. When run, these examples should 4 | - check that you have access to aws through the aws cli 5 | - write a file called "hello.txt" to the S3 bucket awsimple-test-bucket-{random_number} 6 | - Note: It is strongly recommended to change the bucket name before you run this, but it will work without it 7 | - read the file from the S3 bucket awsimple-test-bucket-{random_number} 8 | 9 | 10 | 11 | ### 1. Make the Virtual Environment and activate it 12 | 13 | #### Mac / Linux 14 | ``` 15 | source make_venv.sh 16 | ./venv/bin/activate 17 | ``` 18 | 19 | #### Windows 20 | ``` 21 | make_venv.bat 22 | .\venv\Script\activate.bat 23 | ``` 24 | 25 | ### 2. Check your AWS profile and create a test user name "testawsimple" with read/write access to s3. 26 | 27 | Your default aws profile should be setup before you run the examples. The examples use a test user named "testawsimple". You should create this user before running the examples. 28 | 29 | ``` 30 | aws config 31 | ``` 32 | 33 | ### 3. Run the examples 34 | 35 | #### Mac / Linux 36 | ``` 37 | source run_examples.sh 38 | ``` 39 | 40 | #### Windows 41 | ``` 42 | run_examples.bat 43 | ``` 44 | 45 | 46 | 47 | ### Got a problem? 48 | 49 | You're welcome to [create an issue](https://github.com/jamesabel/awsimple/issues/new), but please [search existing ones](https://github.com/jamesabel/awsimple/issues) first to see if it's been discussed before. 50 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020-2021 James Abel 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = doc_source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 21 | 22 | 23 | 24 |

25 | 26 | # AWSimple 27 | 28 | *(pronounced A-W-Simple)* 29 | 30 | Simple API for basic AWS services such as S3 (Simple Storage Service), DynamoDB (a NoSQL database), SNS (Simple Notification Service), 31 | and SQS (Simple Queuing Service). 32 | 33 | Project featured on [PythonBytes Podcast Episode #224](https://pythonbytes.fm/episodes/show/224/join-us-on-a-python-adventure-back-to-1977). 34 | 35 | Full documentation available on [Read the Docs](https://awsimple.readthedocs.io/) . 36 | 37 | ### Features: 38 | 39 | - Simple Object Oriented API on top of boto3 40 | 41 | - One-line S3 file write, read, and delete 42 | 43 | - Automatic S3 retries 44 | 45 | - Locally cached S3 accesses 46 | 47 | - True file hashing (SHA512) for S3 files (S3's etag is not a true file hash) 48 | 49 | - DynamoDB full table scans (with local cache option) 50 | 51 | - DynamoDB secondary indexes 52 | 53 | - Built-in pagination (e.g. for DynamoDB table scans and queries). Always get everything you asked for. 54 | 55 | - Can automatically set SQS timeouts based on runtime data (can also be user-specified) 56 | 57 | - Supports moto mock and localstack. Handy for testing and CI. 58 | 59 | 60 | ## Usage 61 | 62 | pip install awsimple 63 | 64 | ## Examples 65 | 66 | The example folder has several examples you can customize and run. Instructions are available in [examples](EXAMPLES.md) 67 | 68 | ### S3 69 | 70 | # print string contents of an existing S3 object 71 | s = S3Access(profile_name="testawsimple", bucket="testawsimple").read_string("helloworld.txt") 72 | print(s) 73 | 74 | ### DynamoDB 75 | 76 | dynamodb_access = DynamoDBAccess(profile_name="testawsimple", table_name="testawsimple") 77 | 78 | # put an item into DynamoDB 79 | dynamodb_access.put_item({"id": "batman", "city": "Gotham"}) 80 | 81 | # now get it back 82 | item = dynamodb_access.get_item("id", "batman") 83 | print(item["city"]) # Gotham 84 | 85 | ## Introduction 86 | 87 | `awsimple` is a simple interface into basic AWS services such as S3 (Simple Storage Service) and 88 | DynamoDB (a simple NoSQL database). It has a set of higher level default settings and behavior 89 | that should cover many basic usage models. 90 | 91 | ## Discussion 92 | 93 | AWS's "serverless" resources offer many benefits. You only pay for what you use, easily scale, 94 | and generally have high performance and availability. 95 | 96 | While AWS has many varied services with extensive flexibility, using it for more straight-forward 97 | applications is sometimes a daunting task. There are access modes that are probably not requried 98 | and some default behaviors are not best for common usages. `awsimple` aims to create a higher 99 | level API to AWS services (such as S3, DynamoDB, SNS, and SQS) to improve programmer productivity. 100 | 101 | 102 | ## S3 103 | 104 | `awsimple` calculates the local file hash (sha512) and inserts it into the S3 object metadata. This is used 105 | to test for file equivalency. 106 | 107 | ## Caching 108 | 109 | S3 objects and DynamoDB tables can be cached locally to reduce network traffic, minimize AWS costs, 110 | and potentially offer a speedup. 111 | 112 | DynamoDB cached table scans are particularly useful for tables that are infrequently updated. 113 | 114 | ## What`awsimple` Is Not 115 | 116 | - `awsimple` is not necessarily the most memory and CPU efficient 117 | 118 | - `awsimple` does not provide cost monitoring hooks 119 | 120 | - `awsimple` does not provide all the options and features that the regular AWS API (e.g. boto3) does 121 | 122 | ## Updates/Releases 123 | 124 | 3.x.x - Cache life for cached DynamoDB scans is now based on most recent table modification time (kept in a separate 125 | table). Explict cache life is no longer required (parameter has been removed). 126 | 127 | ## Testing using moto mock and localstack 128 | 129 | moto mock-ing can improve performance and reduce AWS costs. `awsimple` supports both moto mock and localstack. 130 | In general, it's recommended to develop with mock and finally test with the real AWS services. 131 | 132 | Select via environment variables: 133 | 134 | - AWSIMPLE_USE_MOTO_MOCK=1 # use moto 135 | - AWSIMPLE_USE_LOCALSTACK=1 # use localstack 136 | 137 | ### Test Time 138 | 139 | | Method | Test Time (seconds) | Speedup (or slowdown) | Comment | 140 | |------------|---------------------|-----------------------|-----------------| 141 | | AWS | 462.65 | 1x | baseline | 142 | | mock | 40.46 | 11x | faster than AWS | 143 | | localstack | 2246.82 | 0.2x | slower than AWS | 144 | 145 | System: Intel® Core™ i7 CPU @ 3.47GHz, 32 GB RAM 146 | 147 | ## Contributing 148 | 149 | Contributions are welcome, and more information is available in the [contributing guide](CONTRIBUTING.md). -------------------------------------------------------------------------------- /awsimple/__init__.py: -------------------------------------------------------------------------------- 1 | from .__version__ import __application_name__, __version__, __author__, __title__ 2 | from .mock import use_moto_mock_env_var, is_mock, use_localstack_env_var, is_using_localstack 3 | from .aws import AWSAccess, AWSimpleException, boto_error_to_string 4 | from .cache import get_disk_free, get_directory_size, lru_cache_write, CacheAccess, CACHE_DIR_ENV_VAR 5 | from .dynamodb import DynamoDBAccess, dict_to_dynamodb, DBItemNotFound, DynamoDBTableNotFound, dynamodb_to_json, dynamodb_to_dict, QuerySelection, DictKey, convert_serializable_special_cases 6 | from .dynamodb import KeyType, aws_name_to_key_type 7 | from .dynamodb_miv import DynamoDBMIVUI, miv_string, get_time_us, miv_us_to_timestamp 8 | from .s3 import S3Access, S3DownloadStatus, S3ObjectMetadata, BucketNotFound 9 | from .sqs import SQSAccess, SQSPollAccess, aws_sqs_long_poll_max_wait_time, aws_sqs_max_messages 10 | from .sns import SNSAccess 11 | from .logs import LogsAccess 12 | -------------------------------------------------------------------------------- /awsimple/__version__.py: -------------------------------------------------------------------------------- 1 | __application_name__ = "awsimple" 2 | __title__ = __application_name__ 3 | __author__ = "abel" 4 | __version__ = "3.6.1" 5 | __author_email__ = "j@abel.co" 6 | __url__ = "https://github.com/jamesabel/awsimple" 7 | __download_url__ = "https://github.com/jamesabel/awsimple" 8 | __description__ = "Simple AWS API for S3, DynamoDB, SNS, and SQS" 9 | -------------------------------------------------------------------------------- /awsimple/aws.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Union, Any 3 | from logging import getLogger 4 | 5 | from typeguard import typechecked 6 | 7 | from boto3.session import Session 8 | from botocore.credentials import Credentials 9 | 10 | from awsimple import __application_name__, is_mock, is_using_localstack 11 | 12 | log = getLogger(__application_name__) 13 | 14 | 15 | class AWSimpleException(Exception): 16 | pass 17 | 18 | 19 | def boto_error_to_string(boto_error) -> Union[str, None]: 20 | if (response := boto_error.response) is None: 21 | most_recent_error = str(boto_error) 22 | else: 23 | if (response_error := response.get("Error")) is None: 24 | most_recent_error = None 25 | else: 26 | most_recent_error = response_error.get("Code") 27 | return most_recent_error 28 | 29 | 30 | class AWSAccess: 31 | @typechecked() 32 | def __init__( 33 | self, 34 | resource_name: Union[str, None] = None, 35 | profile_name: Union[str, None] = None, 36 | aws_access_key_id: Union[str, None] = None, 37 | aws_secret_access_key: Union[str, None] = None, 38 | region_name: Union[str, None] = None, 39 | ): 40 | """ 41 | AWSAccess - takes care of basic AWS access (e.g. session, client, resource), getting some basic AWS information, and mock support for testing. 42 | 43 | :param resource_name: AWS resource name (e.g. s3, dynamodb, sqs, sns, etc.). Can be None if just testing the connection. 44 | 45 | # Provide either: profile name or access key ID/secret access key pair 46 | 47 | :param profile_name: AWS profile name 48 | :param aws_access_key_id: AWS access key (required if secret_access_key given) 49 | :param aws_secret_access_key: AWS secret access key (required if access_key_id given) 50 | :param region_name: AWS region (may be optional - see AWS docs) 51 | """ 52 | 53 | import boto3 # import here to facilitate mocking 54 | 55 | self.resource_name = resource_name 56 | self.profile_name = profile_name 57 | self.aws_access_key_id = aws_access_key_id 58 | self.aws_secret_access_key = aws_secret_access_key 59 | self.region_name = region_name 60 | 61 | # string representation of AWS most recent error code 62 | self.most_recent_error = None # type: Union[str, None] 63 | 64 | self._moto_mock = None 65 | self._aws_keys_save = {} 66 | 67 | # use keys in AWS config 68 | # https://docs.aws.amazon.com/cli/latest/userguide/cli-config-files.html 69 | kwargs = {} 70 | for k in ["profile_name", "aws_access_key_id", "aws_secret_access_key", "region_name"]: 71 | if getattr(self, k) is not None: 72 | kwargs[k] = getattr(self, k) 73 | self.session = boto3.session.Session(**kwargs) 74 | 75 | self.client = None # type: Any 76 | if is_mock(): 77 | # moto mock AWS 78 | for aws_key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_SECURITY_TOKEN", "AWS_SESSION_TOKEN"]: 79 | self._aws_keys_save[aws_key] = os.environ.get(aws_key) # will be None if not set 80 | os.environ[aws_key] = "testing" 81 | 82 | from moto import mock_aws 83 | 84 | self._moto_mock = mock_aws() 85 | self._moto_mock.start() 86 | region = "us-east-1" 87 | if self.resource_name == "logs" or self.resource_name is None: 88 | # logs don't have a resource 89 | self.resource = None 90 | else: 91 | self.resource = boto3.resource(self.resource_name, region_name=region) # type: ignore 92 | if self.resource_name is None: 93 | self.client = None 94 | else: 95 | self.client = boto3.client(self.resource_name, region_name=region) # type: ignore 96 | if self.resource_name == "s3": 97 | assert self.resource is not None 98 | self.resource.create_bucket(Bucket="testawsimple") # todo: put this in the test code 99 | elif is_using_localstack(): 100 | self.aws_access_key_id = "test" 101 | self.aws_secret_access_key = "test" 102 | self.region_name = "us-west-2" 103 | if self.resource_name is not None: 104 | if self.resource_name == "logs": 105 | # logs don't have resource 106 | self.resource = None 107 | else: 108 | self.resource = boto3.resource(self.resource_name, endpoint_url=self._get_localstack_endpoint_url()) # type: ignore 109 | self.client = boto3.client(self.resource_name, endpoint_url=self._get_localstack_endpoint_url()) # type: ignore 110 | elif self.resource_name is None: 111 | # just the session, but not the client or resource 112 | self.client = None 113 | self.resource = None 114 | else: 115 | self.client = self.session.client(self.resource_name, config=self._get_config()) # type: ignore 116 | if self.resource_name == "logs" or self.resource_name == "rds": 117 | # logs and rds don't have a resource 118 | self.resource = None 119 | else: 120 | self.resource = self.session.resource(self.resource_name, config=self._get_config()) # type: ignore 121 | 122 | def _get_localstack_endpoint_url(self) -> str | None: 123 | endpoint_url = "http://localhost:4566" # default localstack endpoint 124 | return endpoint_url 125 | 126 | def _get_config(self): 127 | from botocore.config import Config # import here to facilitate mocking 128 | 129 | timeout = 60 * 60 # AWS default is 60, which is too short for some uses and/or connections 130 | return Config(connect_timeout=timeout, read_timeout=timeout) 131 | 132 | @typechecked() 133 | def get_region(self) -> Union[str, None]: 134 | """ 135 | Get current selected AWS region 136 | 137 | :return: region string 138 | """ 139 | return self.session.region_name 140 | 141 | def get_access_key(self) -> Union[str, None]: 142 | """ 143 | Get current access key string 144 | 145 | :return: access key 146 | """ 147 | _session = self.session 148 | assert isinstance(_session, Session) # for mypy 149 | _credentials = _session.get_credentials() 150 | assert isinstance(_credentials, Credentials) # for mypy 151 | access_key = _credentials.access_key 152 | return access_key 153 | 154 | def get_account_id(self): 155 | """ 156 | Get AWS account ID *** HAS BEEN REMOVED *** 157 | 158 | :return: account ID 159 | """ 160 | raise NotImplementedError(".get_account_id() has been removed") 161 | 162 | def test(self) -> bool: 163 | """ 164 | Basic connection/capability test 165 | 166 | :return: True if connection OK 167 | """ 168 | 169 | resources = self.session.get_available_resources() # boto3 will throw an error if there's an issue here 170 | if self.resource_name is not None and self.resource_name not in resources: 171 | raise PermissionError(self.resource_name) # we don't have permission to the specified resource 172 | return True # if we got here, we were successful 173 | 174 | def is_mocked(self) -> bool: 175 | """ 176 | Return True if currently mocking the AWS interface (e.g. for testing). 177 | 178 | :return: True if mocked 179 | """ 180 | return self._moto_mock is not None 181 | 182 | def clear_most_recent_error(self): 183 | self.most_recent_error = None 184 | 185 | def __del__(self): 186 | if self._moto_mock is not None: 187 | # if mocking, put everything back 188 | 189 | for aws_key, value in self._aws_keys_save.items(): 190 | if value is None: 191 | del os.environ[aws_key] 192 | else: 193 | os.environ[aws_key] = value 194 | 195 | self._moto_mock.stop() 196 | self._moto_mock = None # mock is "done" 197 | -------------------------------------------------------------------------------- /awsimple/cache.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from shutil import disk_usage, copy2 3 | import os 4 | import math 5 | from typing import Union 6 | from logging import getLogger 7 | 8 | from typeguard import typechecked 9 | from appdirs import user_cache_dir 10 | 11 | from awsimple import __application_name__, __author__, AWSAccess, AWSimpleException 12 | 13 | log = getLogger(__application_name__) 14 | 15 | CACHE_DIR_ENV_VAR = f"{__application_name__}_CACHE_DIR".upper() 16 | 17 | 18 | @typechecked() 19 | def get_disk_free(path: Path = Path(".")) -> int: 20 | total, used, free = disk_usage(Path(path).absolute().anchor) 21 | log.info(f"{total=} {used=} {free=}") 22 | return free 23 | 24 | 25 | @typechecked() 26 | def get_directory_size(path: Path) -> int: 27 | size = 0 28 | for p in path.glob("*"): 29 | if p.is_file(): 30 | size += os.path.getsize(p) 31 | elif p.is_dir(): 32 | size += get_directory_size(p) 33 | return size 34 | 35 | 36 | @typechecked() 37 | def lru_cache_write(new_data: Union[Path, bytes], cache_dir: Path, cache_file_name: str, max_absolute_cache_size: Union[int, None] = None, max_free_portion: Union[float, None] = None) -> bool: 38 | """ 39 | free up space in the LRU cache to make room for the new file 40 | :param new_data: path to new file or a bytes object we want to put in the cache 41 | :param cache_dir: cache directory 42 | :param cache_file_name: file name to write in cache 43 | :param max_absolute_cache_size: max absolute cache size (or None if not specified) 44 | :param max_free_portion: max portion of disk free space the cache is allowed to consume (e.g. 0.1 to take up to 10% of free disk space) 45 | :return: True wrote to cache 46 | """ 47 | 48 | least_recently_used_path = None 49 | least_recently_used_access_time = None 50 | least_recently_used_size = None 51 | wrote_to_cache = False 52 | 53 | try: 54 | max_free_absolute = max_free_portion * get_disk_free() if max_free_portion is not None else None 55 | values = [v for v in [max_free_absolute, max_absolute_cache_size] if v is not None] 56 | max_cache_size = min(values) if len(values) > 0 else None 57 | log.info(f"{max_cache_size=}") 58 | 59 | if isinstance(new_data, Path): 60 | new_size = os.path.getsize(new_data) 61 | elif isinstance(new_data, bytes): 62 | new_size = len(new_data) 63 | else: 64 | raise RuntimeError 65 | 66 | if max_cache_size is None: 67 | is_room = True # no limit 68 | elif new_size > max_cache_size: 69 | log.info(f"{new_data=} {new_size=} is larger than the cache itself {max_cache_size=}") 70 | is_room = False # new file will never fit so don't try to evict to make room for it 71 | else: 72 | cache_size = get_directory_size(cache_dir) 73 | overage = (cache_size + new_size) - max_cache_size 74 | 75 | # cache eviction 76 | while overage > 0: 77 | starting_overage = overage 78 | 79 | # find the least recently used file 80 | least_recently_used_path = None 81 | least_recently_used_access_time = None 82 | least_recently_used_size = None 83 | for file_path in cache_dir.rglob("*"): 84 | access_time = os.path.getatime(file_path) 85 | if least_recently_used_path is None or least_recently_used_access_time is None or access_time < least_recently_used_access_time: 86 | least_recently_used_path = file_path 87 | least_recently_used_access_time = access_time 88 | least_recently_used_size = os.path.getsize(file_path) 89 | 90 | if least_recently_used_path is not None: 91 | log.debug(f"evicting {least_recently_used_path=} {least_recently_used_access_time=} {least_recently_used_size=}") 92 | least_recently_used_path.unlink() 93 | if least_recently_used_size is None: 94 | AWSimpleException(f"{least_recently_used_size=}") 95 | else: 96 | overage -= least_recently_used_size 97 | 98 | if overage == starting_overage: 99 | # tried to free up space but were unsuccessful, so give up 100 | overage = 0 101 | 102 | # determine if we have room for the new file 103 | is_room = get_directory_size(cache_dir) + new_size <= max_cache_size 104 | 105 | if is_room: 106 | cache_dir.mkdir(parents=True, exist_ok=True) 107 | cache_dest = Path(cache_dir, cache_file_name) 108 | if isinstance(new_data, Path): 109 | log.info(f"caching {new_data} to {cache_dest=}") 110 | copy2(new_data, cache_dest) 111 | wrote_to_cache = True 112 | elif isinstance(new_data, bytes): 113 | log.info(f"caching {len(new_data)}B to {cache_dest=}") 114 | with cache_dest.open("wb") as f: 115 | f.write(new_data) 116 | wrote_to_cache = True 117 | else: 118 | raise RuntimeError 119 | else: 120 | log.info(f"no room for {new_data=}") 121 | 122 | except (FileNotFoundError, IOError, PermissionError) as e: 123 | log.debug(f"{least_recently_used_path=} {least_recently_used_access_time=} {least_recently_used_size=} {e}", stack_info=True, exc_info=True) 124 | 125 | return wrote_to_cache 126 | 127 | 128 | class CacheAccess(AWSAccess): 129 | def __init__( 130 | self, 131 | resource_name: str, 132 | cache_dir: Union[Path, None] = None, 133 | cache_life: float = math.inf, 134 | cache_max_absolute: int = round(1e9), 135 | cache_max_of_free: float = 0.05, 136 | mtime_abs_tol: float = 10.0, 137 | use_env_var_cache_dir: bool = False, 138 | **kwargs, 139 | ): 140 | """ 141 | AWS Access for cacheables 142 | 143 | :param cache_dir: dir for cache 144 | :param cache_life: life of cache (in seconds) 145 | :param cache_max_absolute: max size of cache 146 | :param cache_max_of_free: max portion of disk free space the cache will consume 147 | :param mtime_abs_tol: window in seconds where a modification time will be considered equal 148 | :param use_env_var_cache_dir: set to True to attempt to use environmental variable for the cache dir (user must explicitly set this to use env var for cache dir) 149 | """ 150 | 151 | self.use_env_var_cache_dir = use_env_var_cache_dir 152 | if cache_dir is not None: 153 | self.cache_dir = cache_dir # passing cache dir in takes precedent 154 | elif self.use_env_var_cache_dir and (cache_dir_from_env_var := os.environ.get(CACHE_DIR_ENV_VAR)) is not None: 155 | self.cache_dir = Path(cache_dir_from_env_var.strip()) 156 | else: 157 | self.cache_dir = Path(user_cache_dir(__application_name__, __author__), "aws", resource_name) 158 | 159 | self.cache_life = cache_life # seconds 160 | self.cache_max_absolute = cache_max_absolute # max absolute cache size 161 | self.cache_max_of_free = cache_max_of_free # max portion of the disk's free space this LRU cache will take 162 | self.cache_retries = 10 # cache upload retries 163 | self.mtime_abs_tol = mtime_abs_tol # file modification times within this cache window (in seconds) are considered equivalent 164 | 165 | super().__init__(resource_name, **kwargs) 166 | -------------------------------------------------------------------------------- /awsimple/dynamodb_miv.py: -------------------------------------------------------------------------------- 1 | from typing import Union, Type 2 | import time 3 | from copy import deepcopy 4 | from logging import getLogger 5 | from decimal import Decimal 6 | 7 | from typeguard import typechecked 8 | from boto3.dynamodb.conditions import Key 9 | 10 | from awsimple import DynamoDBAccess, DBItemNotFound, __application_name__ 11 | 12 | miv_string = "mivui" # monotonically increasing value roughly based on uS (microseconds) since the epoch, as an integer 13 | 14 | log = getLogger(__application_name__) 15 | 16 | 17 | @typechecked() 18 | def get_time_us() -> int: 19 | """ 20 | Get the current time in uS (microseconds) since the epoch as an int. 21 | :return: time in uS since the epoch 22 | """ 23 | return int(round(time.time() * 1e6)) 24 | 25 | 26 | @typechecked() 27 | def miv_us_to_timestamp(miv_ui: Union[int, Decimal]) -> float: 28 | """ 29 | Convert a miv uS int back to regular timestamp since epoch in seconds. 30 | :param miv_ui: MIV in uS as an int 31 | :return: regular time since epoch in seconds (as a float) 32 | """ 33 | return float(miv_ui) / 1e6 34 | 35 | 36 | class DynamoDBMIVUI(DynamoDBAccess): 37 | """ 38 | DynamoDB with a MIV UI (monotonically increasing value in uS since the epoch as an integer) as the "sort" key of the primary key pair. Useful for ordered puts and gets to DynamoDB, 39 | and enables get-ing the most senior item. 40 | 41 | One of the complaints about DynamoDB is that it doesn't have "automatic indexing" and/or "automatic timestamp". While this isn't automatic indexing per se, it does provide for 42 | ordered writes for a given primary partition (hash) key, and does so via a monotonically increasing value roughly based on time (essentially an automatic timestamp), which in 43 | some cases may be even more useful. 44 | """ 45 | 46 | @typechecked() 47 | def create_table( # type: ignore 48 | self, 49 | partition_key: str, 50 | secondary_index: Union[str, None] = None, 51 | partition_key_type: Union[Type[str], Type[int], Type[bool]] = str, 52 | secondary_key_type: Union[Type[str], Type[int], Type[bool]] = str, 53 | ) -> bool: 54 | return super().create_table(partition_key, miv_string, secondary_index, partition_key_type, int, secondary_key_type) 55 | 56 | @typechecked() 57 | def put_item(self, item: dict, time_us: Union[int, None] = None): 58 | """ 59 | Put (write) a DynamoDB table item with the miv automatically filled in. 60 | 61 | :param item: item 62 | :param time_us: optional time in uS to use (otherwise current time is used) 63 | """ 64 | assert self.resource is not None 65 | table = self.resource.Table(self.table_name) 66 | 67 | # Determine new miv. The miv is an int to avoid comparison or specification problems that can arise with floats. For example, when it comes time to delete an item. 68 | if time_us is None: 69 | # get the miv for the existing entries 70 | partition_key = self.get_primary_partition_key() 71 | partition_value = item[partition_key] 72 | try: 73 | existing_most_senior_item = self.get_most_senior_item(partition_key, partition_value) 74 | existing_miv_ui = existing_most_senior_item[miv_string] 75 | except DBItemNotFound: 76 | existing_miv_ui = None 77 | 78 | current_time_us = get_time_us() 79 | if existing_miv_ui is None or current_time_us > existing_miv_ui: 80 | new_miv_ui = current_time_us 81 | else: 82 | # the prior writer seems to be from the future (from our perspective), so just increment the existing miv by the smallest increment and go with that 83 | new_miv_ui = existing_miv_ui + 1 84 | else: 85 | new_miv_ui = time_us 86 | 87 | # make the new item with the new miv and put it into the DB table 88 | new_item = deepcopy(item) 89 | new_item[miv_string] = new_miv_ui 90 | table.put_item(Item=new_item) 91 | 92 | @typechecked() 93 | def get_most_senior_item(self, partition_key: str, partition_value: Union[str, int]) -> dict: 94 | """ 95 | Get the most senior (greatest miv value) item for a given primary partition (hash) key. Raises DBItemNotFound if it doesn't exist. 96 | :return: most senior item 97 | """ 98 | assert self.resource is not None 99 | table = self.resource.Table(self.table_name) 100 | # just get the one most senior item 101 | response = table.query(KeyConditionExpression=Key(partition_key).eq(partition_value), ScanIndexForward=False, Limit=1) 102 | if (items := response.get("Items")) is None or len(items) < 1: 103 | raise DBItemNotFound(f"{partition_key=},{partition_value=}") 104 | item = items[0] # we asked for exactly one 105 | return item 106 | -------------------------------------------------------------------------------- /awsimple/logs.py: -------------------------------------------------------------------------------- 1 | import time 2 | import getpass 3 | import platform 4 | from functools import lru_cache 5 | from typing import Union 6 | from pathlib import Path 7 | from datetime import datetime 8 | 9 | from awsimple import AWSAccess 10 | 11 | 12 | @lru_cache() 13 | def get_user_name() -> str: 14 | return getpass.getuser() 15 | 16 | 17 | @lru_cache() 18 | def get_computer_name() -> str: 19 | return platform.node() 20 | 21 | 22 | class LogsAccess(AWSAccess): 23 | """ 24 | Perform logging to AWS using CloudWatch Logs 25 | """ 26 | 27 | def __init__(self, log_group: str, **kwargs): 28 | """ 29 | Log to AWS CloudWatch. 30 | :param log_group: AWS CloudWatch log group 31 | :param kwargs: other kwargs (e.g. for authentication) 32 | """ 33 | super().__init__("logs", **kwargs) 34 | self.log_group = log_group 35 | self._upload_sequence_token = None # type: Union[str, None] 36 | 37 | def put(self, message: str): 38 | """ 39 | Log a message. 40 | :param message: message as a string 41 | """ 42 | try: 43 | self._put(message) 44 | success = True 45 | except self.client.exceptions.ResourceNotFoundException: 46 | success = False 47 | if not success: 48 | # log group and stream does not appear to exist, so make them 49 | try: 50 | self.client.create_log_group(logGroupName=self.log_group) 51 | self.client.put_retention_policy(logGroupName=self.log_group, retentionInDays=self.get_retention_in_days()) 52 | except self.client.exceptions.ResourceAlreadyExistsException: 53 | pass 54 | self.client.create_log_stream(logGroupName=self.log_group, logStreamName=self.get_stream_name()) 55 | self._put(message) 56 | 57 | def _put(self, message: str): 58 | """ 59 | Perform the put log event. Internal method to enable try/except in the regular .put() method. 60 | :param message: message as a string 61 | """ 62 | 63 | # if self._upload_sequence_token is None: 64 | # we don't yet have the sequence token, so try to get it from AWS 65 | stream_name = self.get_stream_name() 66 | if self._upload_sequence_token is None: 67 | log_streams_description = self.client.describe_log_streams(logGroupName=self.log_group) 68 | if (log_streams := log_streams_description.get("logStreams")) is not None and len(log_streams) > 0: 69 | for log_stream in log_streams: 70 | if log_stream["logStreamName"] == stream_name: 71 | self._upload_sequence_token = log_stream.get("uploadSequenceToken") 72 | 73 | # timestamp defined by AWS to be mS since epoch 74 | log_events = [{"timestamp": int(round(time.time() * 1000)), "message": message}] 75 | try: 76 | if self._upload_sequence_token is None: 77 | put_response = self.client.put_log_events(logGroupName=self.log_group, logStreamName=stream_name, logEvents=log_events) 78 | else: 79 | put_response = self.client.put_log_events(logGroupName=self.log_group, logStreamName=stream_name, logEvents=log_events, sequenceToken=self._upload_sequence_token) 80 | except self.client.exceptions.InvalidSequenceTokenException as e: 81 | # something went terribly wrong in logging, so write what happened somewhere safe 82 | with Path(Path.home(), "awsimple_exception.txt").open("w") as f: 83 | f.write(f"{datetime.now().astimezone().isoformat()},{self.log_group=},{stream_name=},{self._upload_sequence_token=},{e}\n") 84 | put_response = None 85 | 86 | if put_response is None: 87 | self._upload_sequence_token = None 88 | else: 89 | self._upload_sequence_token = put_response.get("nextSequenceToken") 90 | 91 | def get_stream_name(self) -> str: 92 | """ 93 | Get the stream name. User of this class can override this method to use a different stream name. 94 | :return: stream name string 95 | """ 96 | return f"{get_computer_name()}-{get_user_name()}" 97 | 98 | def get_retention_in_days(self) -> int: 99 | """ 100 | Define the log retention in days. User of this class can override this method to use a different retention period (only used when log group is created). 101 | :return: retention time in days as an integer 102 | """ 103 | return 365 104 | -------------------------------------------------------------------------------- /awsimple/mock.py: -------------------------------------------------------------------------------- 1 | import os 2 | from functools import cache 3 | 4 | from tobool import to_bool_strict 5 | 6 | use_moto_mock_env_var = "AWSIMPLE_USE_MOTO_MOCK" 7 | use_localstack_env_var = "AWSIMPLE_USE_LOCALSTACK" 8 | 9 | 10 | @cache 11 | def is_mock() -> bool: 12 | """ 13 | Is using moto mock? 14 | :return: True if using moto mock. 15 | """ 16 | return to_bool_strict(os.environ.get(use_moto_mock_env_var, "0")) 17 | 18 | 19 | @cache 20 | def is_using_localstack() -> bool: 21 | """ 22 | Is using localstack? 23 | :return: True if using localstack. 24 | """ 25 | return to_bool_strict(os.environ.get(use_localstack_env_var, "0")) 26 | -------------------------------------------------------------------------------- /awsimple/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesabel/awsimple/8157ed3d9501fcbc56382f148385144ed73a307e/awsimple/py.typed -------------------------------------------------------------------------------- /awsimple/s3.py: -------------------------------------------------------------------------------- 1 | """ 2 | S3 Access 3 | """ 4 | 5 | import os 6 | import shutil 7 | import time 8 | from math import isclose 9 | from pathlib import Path 10 | from dataclasses import dataclass 11 | from datetime import datetime 12 | from typing import Dict, List, Union 13 | import json 14 | from logging import getLogger 15 | 16 | import boto3 17 | from botocore.client import Config 18 | from botocore.exceptions import ClientError, EndpointConnectionError, ConnectionClosedError, SSLError 19 | from boto3.s3.transfer import TransferConfig 20 | from s3transfer import S3UploadFailedError 21 | import urllib3.exceptions 22 | from typeguard import typechecked 23 | from hashy import get_string_sha512, get_file_sha512, get_bytes_sha512, get_dls_sha512 24 | from yasf import sf 25 | 26 | from awsimple import CacheAccess, __application_name__, lru_cache_write, AWSimpleException, convert_serializable_special_cases 27 | 28 | # Use this project's name as a prefix to avoid string collisions. Use dashes instead of underscore since that's AWS's convention. 29 | sha512_string = f"{__application_name__}-sha512" 30 | 31 | json_extension = ".json" 32 | 33 | log = getLogger(__application_name__) 34 | 35 | connection_errors = (S3UploadFailedError, ClientError, EndpointConnectionError, SSLError, urllib3.exceptions.ProtocolError, ConnectionClosedError) 36 | 37 | 38 | class BucketNotFound(AWSimpleException): 39 | def __init__(self, bucket_name): 40 | self.bucket_name = bucket_name 41 | self.message = "Bucket not found" 42 | super().__init__(self.message) 43 | 44 | def __str__(self): 45 | return f"{self.bucket_name=} {self.message}" 46 | 47 | 48 | @dataclass 49 | class S3DownloadStatus: 50 | success: bool = False 51 | cache_hit: Union[bool, None] = None 52 | cache_write: Union[bool, None] = None 53 | 54 | 55 | @dataclass 56 | class S3ObjectMetadata: 57 | bucket: str 58 | key: str 59 | size: int 60 | mtime: datetime 61 | etag: str # generally not used 62 | sha512: Union[str, None] # hex string - only entries written with awsimple will have this 63 | url: str # URL of S3 object 64 | 65 | def get_sha512(self) -> str: 66 | """ 67 | Get hash used to compare S3 objects. If the SHA512 is available (recommended), then use that. If not (e.g. an S3 object wasn't written with AWSimple), create a "substitute" 68 | SHA512 hash that should change if the object contents change. 69 | :return: SHA512 hash (as string) 70 | """ 71 | if (sha512_value := self.sha512) is None: 72 | # round timestamp to seconds to try to avoid possible small deltas when dealing with time and floats 73 | mtime_as_int = int(round(self.mtime.timestamp())) 74 | metadata_list = [self.bucket, self.key, self.size, mtime_as_int] 75 | if self.etag is not None and len(self.etag) > 0: 76 | metadata_list.append(self.etag) 77 | sha512_value = get_dls_sha512(metadata_list) 78 | 79 | return sha512_value 80 | 81 | 82 | @typechecked() 83 | def serializable_object_to_json_as_bytes(json_serializable_object: Union[List, Dict]) -> bytes: 84 | return bytes(json.dumps(json_serializable_object, default=convert_serializable_special_cases).encode("UTF-8")) 85 | 86 | 87 | def _get_json_key(s3_key: str): 88 | """ 89 | get JSON key given an s3_key that may not have the .json extension 90 | :param s3_key: s3 key, potentially without the extension 91 | :return: JSON S3 key 92 | """ 93 | if not s3_key.endswith(json_extension): 94 | s3_key = f"{s3_key}{json_extension}" 95 | return s3_key 96 | 97 | 98 | class S3Access(CacheAccess): 99 | @typechecked() 100 | def __init__(self, bucket_name: Union[str, None] = None, **kwargs): 101 | """ 102 | S3 Access 103 | 104 | :param bucket_name: S3 bucket name 105 | :param kwargs: kwargs 106 | """ 107 | self.bucket_name = bucket_name 108 | self.retry_sleep_time = 3.0 # seconds 109 | self.retry_count = 10 110 | self.public_readable = False 111 | self.download_status = S3DownloadStatus() 112 | super().__init__(resource_name="s3", **kwargs) 113 | 114 | def get_s3_transfer_config(self) -> TransferConfig: 115 | # workaround threading issue https://github.com/boto/s3transfer/issues/197 116 | # derived class can overload this if a different config is desired 117 | s3_transfer_config = TransferConfig(use_threads=False) 118 | return s3_transfer_config 119 | 120 | @typechecked() 121 | def set_public_readable(self, public_readable: bool): 122 | self.public_readable = public_readable 123 | 124 | @typechecked() 125 | def bucket_list(self) -> list: 126 | """ 127 | list out all buckets 128 | (not called list_buckets() since that's used in boto3 but this returns a list of bucket strings not a list of dicts) 129 | 130 | :return: list of buckets 131 | """ 132 | return [b["Name"] for b in self.client.list_buckets()["Buckets"]] 133 | 134 | @typechecked() 135 | def read_string(self, s3_key: str) -> str: 136 | """ 137 | Read contents of an S3 object as a string 138 | 139 | :param s3_key: S3 key 140 | :return: S3 object as a string 141 | """ 142 | log.debug(f"reading {self.bucket_name}/{s3_key}") 143 | assert self.resource is not None 144 | return self.resource.Object(self.bucket_name, s3_key).get()["Body"].read().decode() 145 | 146 | @typechecked() 147 | def read_lines(self, s3_key: str) -> List[str]: 148 | """ 149 | Read contents of an S3 object as a list of strings 150 | 151 | :param s3_key: S3 key 152 | :return: a list of strings 153 | """ 154 | return self.read_string(s3_key).splitlines() 155 | 156 | @typechecked() 157 | def write_string(self, input_str: str, s3_key: str): 158 | """ 159 | Write a string to an S3 object 160 | 161 | :param input_str: input string 162 | :param s3_key: S3 key 163 | """ 164 | log.debug(f"writing {self.bucket_name}/{s3_key}") 165 | assert self.resource is not None 166 | self.resource.Object(self.bucket_name, s3_key).put(Body=input_str, Metadata={sha512_string: get_string_sha512(input_str)}) 167 | 168 | @typechecked() 169 | def write_lines(self, input_lines: List[str], s3_key: str): 170 | """ 171 | Write a list of strings to an S3 bucket 172 | 173 | :param input_lines: a list of strings 174 | :param s3_key: S3 key 175 | """ 176 | self.write_string("\n".join(input_lines), s3_key) 177 | 178 | @typechecked() 179 | def delete_object(self, s3_key: str): 180 | """ 181 | Delete an S3 object 182 | 183 | :param s3_key: S3 key 184 | """ 185 | log.info(f"deleting {self.bucket_name}/{s3_key}") 186 | assert self.resource is not None 187 | self.resource.Object(self.bucket_name, s3_key).delete() 188 | 189 | @typechecked() 190 | def upload(self, file_path: Union[str, Path], s3_key: str, force: bool = False) -> bool: 191 | """ 192 | Upload a file to an S3 object 193 | 194 | :param file_path: path to file to upload 195 | :param s3_key: S3 key 196 | :param force: True to force the upload, even if the file hash matches the S3 contents 197 | :return: True if uploaded 198 | """ 199 | 200 | log.info(f'S3 upload : "{file_path}" to {self.bucket_name}/{s3_key}') 201 | 202 | if isinstance(file_path, str): 203 | file_path = Path(file_path) 204 | 205 | file_mtime = os.path.getmtime(file_path) 206 | file_sha512 = get_file_sha512(file_path) 207 | if force: 208 | upload_flag = True 209 | else: 210 | if self.object_exists(s3_key): 211 | s3_object_metadata = self.get_s3_object_metadata(s3_key) 212 | log.info(f"{s3_object_metadata=}") 213 | if s3_object_metadata.get_sha512() is not None and file_sha512 is not None: 214 | # use the hash provided by awsimple, if it exists 215 | upload_flag = file_sha512 != s3_object_metadata.get_sha512() 216 | else: 217 | # if not, use mtime 218 | upload_flag = not isclose(file_mtime, s3_object_metadata.mtime.timestamp(), abs_tol=self.mtime_abs_tol) 219 | else: 220 | upload_flag = True 221 | 222 | uploaded_flag = False 223 | if upload_flag: 224 | log.info(f"local file : {file_sha512=},force={force} - uploading") 225 | 226 | transfer_retry_count = 0 227 | while not uploaded_flag and transfer_retry_count < self.retry_count: 228 | extra_args = {"Metadata": {sha512_string: file_sha512}} 229 | if self.public_readable: 230 | extra_args["ACL"] = "public-read" # type: ignore 231 | log.info(f"{extra_args=}") 232 | 233 | try: 234 | self.client.upload_file(str(file_path), self.bucket_name, s3_key, ExtraArgs=extra_args, Config=self.get_s3_transfer_config()) 235 | uploaded_flag = True 236 | except connection_errors as e: 237 | log.warning(f"{file_path} to {self.bucket_name}:{s3_key} : {transfer_retry_count=} : {e}") 238 | time.sleep(self.retry_sleep_time) 239 | except RuntimeError as e: 240 | log.error(f"{file_path} to {self.bucket_name}:{s3_key} : {transfer_retry_count=} : {e}") 241 | time.sleep(self.retry_sleep_time) 242 | 243 | transfer_retry_count += 1 244 | 245 | else: 246 | log.info(f"file hash of {file_sha512} is the same as is already on S3 and force={force} - not uploading") 247 | 248 | return uploaded_flag 249 | 250 | @typechecked() 251 | def upload_object_as_json(self, json_serializable_object: Union[List, Dict], s3_key: str, force=False) -> bool: 252 | """ 253 | Upload a serializable Python object to an S3 object 254 | 255 | :param json_serializable_object: serializable object 256 | :param s3_key: S3 key 257 | :param force: True to force the upload, even if the file hash matches the S3 contents 258 | :return: True if uploaded 259 | """ 260 | 261 | s3_key = _get_json_key(s3_key) 262 | json_as_bytes = serializable_object_to_json_as_bytes(json_serializable_object) 263 | json_sha512 = get_bytes_sha512(json_as_bytes) 264 | upload_flag = True 265 | if not force and self.object_exists(s3_key): 266 | s3_object_metadata = self.get_s3_object_metadata(s3_key) 267 | log.info(f"{s3_object_metadata=}") 268 | if s3_object_metadata.get_sha512() is not None and json_sha512 is not None: 269 | # use the hash provided by awsimple, if it exists 270 | upload_flag = json_sha512 != s3_object_metadata.get_sha512() 271 | 272 | uploaded_flag = False 273 | if upload_flag: 274 | log.info(f"{json_sha512=},force={force} - uploading") 275 | 276 | transfer_retry_count = 0 277 | while not uploaded_flag and transfer_retry_count < self.retry_count: 278 | meta_data = {sha512_string: json_sha512} 279 | log.info(f"{meta_data=}") 280 | assert self.resource is not None 281 | try: 282 | s3_object = self.resource.Object(self.bucket_name, s3_key) 283 | if self.public_readable: 284 | s3_object.put(Body=json_as_bytes, Metadata=meta_data, ACL="public-read") 285 | else: 286 | s3_object.put(Body=json_as_bytes, Metadata=meta_data) 287 | uploaded_flag = True 288 | except connection_errors as e: 289 | log.warning(f"{self.bucket_name}:{s3_key} : {transfer_retry_count=} : {e}") 290 | transfer_retry_count += 1 291 | time.sleep(self.retry_sleep_time) 292 | 293 | else: 294 | log.info(f"file hash of {json_sha512} is the same as is already on S3 and force={force} - not uploading") 295 | 296 | return uploaded_flag 297 | 298 | @typechecked() 299 | def download(self, s3_key: str, dest_path: Union[str, Path]) -> bool: 300 | """ 301 | Download an S3 object 302 | 303 | :param s3_key: S3 key 304 | :param dest_path: destination file or directory path. If the path is a directory, the file will be downloaded to that directory with the same name as the S3 key. 305 | :return: True if downloaded successfully 306 | """ 307 | 308 | if isinstance(dest_path, str): 309 | log.info(f"{dest_path} is not Path object. Non-Path objects will be deprecated in the future") 310 | 311 | assert isinstance(dest_path, Path) 312 | if dest_path.is_dir(): 313 | dest_path = Path(dest_path, s3_key) 314 | 315 | log.info(f'S3 download : {self.bucket_name}:{s3_key} to "{dest_path}" ("{Path(dest_path).absolute()}")') 316 | 317 | Path(dest_path).parent.mkdir(parents=True, exist_ok=True) 318 | 319 | transfer_retry_count = 0 320 | success = False 321 | while not success and transfer_retry_count < self.retry_count: 322 | try: 323 | log.debug(sf("calling client.download_file()", bucket_name=self.bucket_name, s3_key=s3_key, dest_path=dest_path)) 324 | self.client.download_file(self.bucket_name, s3_key, dest_path) 325 | log.debug(sf("S3 client.download_file() complete", bucket_name=self.bucket_name, s3_key=s3_key, dest_path=dest_path)) 326 | s3_object_metadata = self.get_s3_object_metadata(s3_key) 327 | log.debug(sf("S3 object metadata", s3_object_metadata=s3_object_metadata)) 328 | mtime_ts = s3_object_metadata.mtime.timestamp() 329 | os.utime(dest_path, (mtime_ts, mtime_ts)) # set the file mtime to the mtime in S3 330 | success = True 331 | except connection_errors as e: 332 | # ProtocolError can happen for a broken connection 333 | log.warning(f"{self.bucket_name}/{s3_key} to {dest_path} ({Path(dest_path).absolute()}) : {transfer_retry_count=} : {e}") 334 | time.sleep(self.retry_sleep_time) 335 | transfer_retry_count += 1 336 | log.debug(sf(transfer_retry_count=transfer_retry_count, success=success, bucket_name=self.bucket_name, s3_key=s3_key, dest_path=dest_path)) 337 | return success 338 | 339 | @typechecked() 340 | def download_cached(self, s3_key: str, dest_path: Path) -> S3DownloadStatus: 341 | """ 342 | download from AWS S3 with caching 343 | 344 | :param dest_path: destination full path or directory. If the path is a directory, the file will be downloaded to that directory with the same name as the S3 key. 345 | :param s3_key: S3 key of source 346 | :return: S3DownloadStatus instance 347 | """ 348 | 349 | if dest_path.is_dir(): 350 | dest_path = Path(dest_path, s3_key) 351 | log.info(f'S3 download_cached : {self.bucket_name}:{s3_key} to "{dest_path}" ("{dest_path.absolute()}")') 352 | 353 | self.download_status = S3DownloadStatus() # init 354 | 355 | s3_object_metadata = self.get_s3_object_metadata(s3_key) 356 | 357 | sha512 = s3_object_metadata.get_sha512() 358 | cache_path = Path(self.cache_dir, sha512) 359 | log.debug(f"{cache_path}") 360 | 361 | if cache_path.exists(): 362 | log.info(f"{self.bucket_name}/{s3_key} cache hit : copying {cache_path=} to {dest_path=} ({dest_path.absolute()})") 363 | self.download_status.cache_hit = True 364 | self.download_status.success = True 365 | dest_path.parent.mkdir(parents=True, exist_ok=True) 366 | shutil.copy2(cache_path, dest_path) 367 | else: 368 | self.download_status.cache_hit = False 369 | 370 | if not self.download_status.cache_hit: 371 | log.info(f"{self.bucket_name=}/{s3_key=} cache miss : {dest_path=} ({dest_path.absolute()})") 372 | self.download(s3_key, dest_path) 373 | self.cache_dir.mkdir(parents=True, exist_ok=True) 374 | self.download_status.cache_write = lru_cache_write(dest_path, self.cache_dir, sha512, self.cache_max_absolute, self.cache_max_of_free) 375 | self.download_status.success = True 376 | 377 | return self.download_status 378 | 379 | @typechecked() 380 | def download_object_as_json(self, s3_key: str) -> Union[List, Dict]: 381 | s3_key = _get_json_key(s3_key) 382 | assert self.resource is not None 383 | s3_object = self.resource.Object(self.bucket_name, s3_key) 384 | body = s3_object.get()["Body"].read().decode("utf-8") 385 | obj = json.loads(body) 386 | return obj 387 | 388 | @typechecked() 389 | def download_object_as_json_cached(self, s3_key: str) -> Union[List, Dict]: 390 | """ 391 | download object from AWS S3 with caching 392 | 393 | :param s3_key: S3 key of source 394 | :return: S3DownloadStatus instance 395 | """ 396 | object_from_json = None 397 | 398 | s3_key = _get_json_key(s3_key) 399 | 400 | self.download_status = S3DownloadStatus() # init 401 | 402 | s3_object_metadata = self.get_s3_object_metadata(s3_key) 403 | 404 | sha512 = s3_object_metadata.get_sha512() 405 | cache_path = Path(self.cache_dir, sha512) 406 | log.debug(f"{cache_path}") 407 | 408 | if cache_path.exists(): 409 | log.info(f"{self.bucket_name}/{s3_key} cache hit : using {cache_path=}") 410 | self.download_status.cache_hit = True 411 | self.download_status.success = True 412 | with cache_path.open("rb") as f: 413 | object_from_json = json.loads(f.read()) 414 | else: 415 | self.download_status.cache_hit = False 416 | 417 | if not self.download_status.cache_hit: 418 | log.info(f"{self.bucket_name=}/{s3_key=} cache miss)") 419 | assert self.resource is not None 420 | s3_object = self.resource.Object(self.bucket_name, s3_key) 421 | body = s3_object.get()["Body"].read() 422 | object_from_json = json.loads(body) 423 | self.download_status.cache_write = lru_cache_write(body, self.cache_dir, sha512, self.cache_max_absolute, self.cache_max_of_free) 424 | self.download_status.success = True 425 | 426 | if object_from_json is None: 427 | raise RuntimeError(s3_key) 428 | 429 | return object_from_json 430 | 431 | @typechecked() 432 | def get_s3_object_url(self, s3_key: str) -> str: 433 | """ 434 | Get S3 object URL 435 | 436 | :param s3_key: S3 key 437 | :return: object URL 438 | """ 439 | bucket_location = self.client.get_bucket_location(Bucket=self.bucket_name) 440 | location = bucket_location["LocationConstraint"] 441 | url = f"https://{self.bucket_name}.s3-{location}.amazonaws.com/{s3_key}" 442 | return url 443 | 444 | @typechecked() 445 | def get_s3_object_metadata(self, s3_key: str) -> S3ObjectMetadata: 446 | """ 447 | Get S3 object metadata 448 | 449 | :param s3_key: S3 key 450 | :return: S3ObjectMetadata or None if object does not exist 451 | """ 452 | assert self.resource is not None 453 | bucket_resource = self.resource.Bucket(self.bucket_name) 454 | if self.object_exists(s3_key): 455 | bucket_object = bucket_resource.Object(s3_key) 456 | assert isinstance(self.bucket_name, str) # mainly for mypy 457 | s3_object_metadata = S3ObjectMetadata( 458 | self.bucket_name, 459 | s3_key, 460 | bucket_object.content_length, 461 | bucket_object.last_modified, 462 | bucket_object.e_tag[1:-1].lower(), 463 | bucket_object.metadata.get(sha512_string), 464 | self.get_s3_object_url(s3_key), 465 | ) 466 | 467 | else: 468 | raise AWSimpleException(f"{self.bucket_name=} {s3_key=} does not exist") 469 | log.debug(f"{s3_object_metadata=}") 470 | return s3_object_metadata 471 | 472 | @typechecked() 473 | def object_exists(self, s3_key: str) -> bool: 474 | """ 475 | determine if an s3 object exists 476 | 477 | :param s3_key: the S3 object key 478 | :return: True if object exists 479 | """ 480 | assert self.resource is not None 481 | bucket_resource = self.resource.Bucket(self.bucket_name) 482 | objs = list(bucket_resource.objects.filter(Prefix=s3_key)) 483 | object_exists = len(objs) > 0 and objs[0].key == s3_key 484 | log.debug(f"{self.bucket_name}:{s3_key} : {object_exists=}") 485 | return object_exists 486 | 487 | @typechecked() 488 | def bucket_exists(self) -> bool: 489 | """ 490 | Test if S3 bucket exists 491 | 492 | :return: True if bucket exists 493 | """ 494 | 495 | # use a "custom" config so that .head_bucket() doesn't take a really long time if the bucket does not exist 496 | config = Config(connect_timeout=5, retries={"max_attempts": 3, "mode": "standard"}) 497 | s3 = boto3.client("s3", config=config) 498 | assert self.bucket_name is not None 499 | try: 500 | s3.head_bucket(Bucket=self.bucket_name) 501 | exists = True 502 | except ClientError as e: 503 | log.info(f"{self.bucket_name=}{e=}") 504 | exists = False 505 | return exists 506 | 507 | @typechecked() 508 | def create_bucket(self) -> bool: 509 | """ 510 | create S3 bucket 511 | 512 | :return: True if bucket created 513 | """ 514 | 515 | # this is ugly, but create_bucket needs to be told the region explicitly (it doesn't just take it from the config) 516 | if (region := self.get_region()) is None: 517 | raise RuntimeError("no region given (check ~.aws/config") 518 | else: 519 | location = {"LocationConstraint": region} 520 | 521 | created = False 522 | if not self.bucket_exists(): 523 | try: 524 | if self.public_readable: 525 | self.client.create_bucket(Bucket=self.bucket_name, CreateBucketConfiguration=location, ACL="public-read") 526 | else: 527 | self.client.create_bucket(Bucket=self.bucket_name, CreateBucketConfiguration=location) 528 | self.client.get_waiter("bucket_exists").wait(Bucket=self.bucket_name) 529 | created = True 530 | except ClientError as e: 531 | log.warning(f"{self.bucket_name=} {e=}") 532 | return created 533 | 534 | @typechecked() 535 | def delete_bucket(self) -> bool: 536 | """ 537 | delete S3 bucket 538 | 539 | :return: True if bucket deleted (False if didn't exist in the first place) 540 | """ 541 | try: 542 | self.client.delete_bucket(Bucket=self.bucket_name) 543 | deleted = True 544 | except ClientError as e: 545 | log.info(f"{self.bucket_name=}{e=}") # does not exist 546 | deleted = False 547 | return deleted 548 | 549 | @typechecked() 550 | def dir(self, prefix: str = "") -> Dict[str, S3ObjectMetadata]: 551 | """ 552 | Do a "directory" of an S3 bucket where the returned dict key is the S3 key and the value is an S3ObjectMetadata object. 553 | 554 | Use the faster .keys() method if all you need are the keys. 555 | 556 | :param prefix: only do a dir on objects that have this prefix in their keys (omit for all objects) 557 | :return: a dict where key is the S3 key and the value is S3ObjectMetadata 558 | """ 559 | directory = {} 560 | if self.bucket_exists(): 561 | paginator = self.client.get_paginator("list_objects_v2") 562 | for page in paginator.paginate(Bucket=self.bucket_name, Prefix=prefix): 563 | # deal with empty bucket 564 | for content in page.get("Contents", []): 565 | s3_key = content.get("Key") 566 | directory[s3_key] = self.get_s3_object_metadata(s3_key) 567 | else: 568 | raise BucketNotFound(self.bucket_name) 569 | return directory 570 | 571 | def keys(self, prefix: str = "") -> List[str]: 572 | """ 573 | List all the keys in this S3 Bucket. 574 | 575 | Note that this should be faster than .dir() if all you need are the keys and not the metadata. 576 | 577 | :param prefix: only do a dir on objects that have this prefix in their keys (omit for all objects) 578 | :return: a sorted list of all the keys in this S3 Bucket (sorted for consistency) 579 | """ 580 | keys = [] 581 | if self.bucket_exists(): 582 | paginator = self.client.get_paginator("list_objects_v2") 583 | for page in paginator.paginate(Bucket=self.bucket_name, Prefix=prefix): 584 | # deal with empty bucket 585 | for content in page.get("Contents", []): 586 | s3_key = content.get("Key") 587 | keys.append(s3_key) 588 | else: 589 | raise BucketNotFound(self.bucket_name) 590 | keys.sort() 591 | return keys 592 | -------------------------------------------------------------------------------- /awsimple/sns.py: -------------------------------------------------------------------------------- 1 | """ 2 | SNS Access 3 | """ 4 | 5 | from typing import Union, Dict 6 | 7 | from typeguard import typechecked 8 | 9 | from awsimple import AWSAccess, SQSAccess 10 | 11 | 12 | class SNSAccess(AWSAccess): 13 | @typechecked() 14 | def __init__(self, topic_name: str, **kwargs): 15 | """ 16 | SNS Access 17 | 18 | :param topic_name: SNS topic 19 | :param kwargs: kwargs 20 | """ 21 | super().__init__(resource_name="sns", **kwargs) 22 | self.topic_name = topic_name 23 | 24 | def get_topic(self): 25 | """ 26 | gets the associated SNS Topic instance 27 | 28 | :param topic_name: topic name 29 | :return: sns.Topic instance 30 | """ 31 | topic = None 32 | for t in self.resource.topics.all(): 33 | if t.arn.split(":")[-1] == self.topic_name: 34 | topic = t 35 | return topic 36 | 37 | @typechecked() 38 | def get_arn(self) -> str: 39 | """ 40 | get topic ARN from topic name 41 | 42 | :param topic_name: topic name string 43 | :return: topic ARN 44 | """ 45 | return self.get_topic().arn 46 | 47 | @typechecked() 48 | def create_topic(self) -> str: 49 | """ 50 | create an SNS topic 51 | 52 | :return: the SNS topic's arn 53 | """ 54 | response = self.client.create_topic(Name=self.topic_name, Attributes={"DisplayName": self.topic_name}) 55 | # todo: see if there are any waiters for SNS topic creation 56 | # https://stackoverflow.com/questions/50818327/aws-sns-and-waiter-functions-for-boto3 57 | return response["TopicArn"] 58 | 59 | def delete_topic(self): 60 | """ 61 | delete SNS topic 62 | 63 | """ 64 | self.client.delete_topic(TopicArn=self.get_arn()) 65 | 66 | @typechecked() 67 | def subscribe(self, subscriber: Union[str, SQSAccess]) -> str: 68 | """ 69 | Subscribe to an SNS topic 70 | 71 | :param subscriber: email or SQS queue 72 | :return: subscription ARN 73 | """ 74 | if isinstance(subscriber, str) and "@" in subscriber: 75 | # email 76 | endpoint = subscriber 77 | protocol = "email" 78 | elif isinstance(subscriber, SQSAccess): 79 | # 'hooks up' provided SQS queue to this SNS topic 80 | subscriber.add_permission(self.get_arn()) 81 | endpoint = subscriber.get_arn() 82 | protocol = "sqs" 83 | else: 84 | raise ValueError(f"{subscriber=}") 85 | response = self.client.subscribe(TopicArn=self.get_arn(), Protocol=protocol, Endpoint=endpoint, ReturnSubscriptionArn=True) 86 | return response["SubscriptionArn"] 87 | 88 | @typechecked() 89 | def publish(self, message: str, subject: Union[str, None] = None, attributes: Union[dict, None] = None) -> str: 90 | """ 91 | publish to an existing SNS topic 92 | 93 | :param message: message string 94 | :param subject: subject string 95 | :param attributes: message attributes (see AWS SNS documentation on SNS MessageAttributes) 96 | :return: message ID 97 | """ 98 | topic = self.get_topic() 99 | kwargs = {"Message": message} # type: Dict[str, Union[str, dict]] 100 | if subject is not None: 101 | kwargs["Subject"] = subject 102 | if attributes is not None: 103 | kwargs["MessageAttributes"] = attributes 104 | response = topic.publish(**kwargs) 105 | return response["MessageId"] 106 | -------------------------------------------------------------------------------- /awsimple/sqs.py: -------------------------------------------------------------------------------- 1 | """ 2 | SQS Access 3 | """ 4 | 5 | from dataclasses import dataclass 6 | from typing import List, Any, Dict, Union 7 | import time 8 | import statistics 9 | from datetime import timedelta 10 | from pathlib import Path 11 | import json 12 | from logging import getLogger 13 | 14 | from botocore.exceptions import ClientError, HTTPClientError 15 | from typeguard import typechecked 16 | import appdirs 17 | 18 | from awsimple import AWSAccess, __application_name__, __author__, boto_error_to_string 19 | 20 | log = getLogger(__application_name__) 21 | 22 | 23 | @dataclass 24 | class SQSMessage: 25 | """ 26 | SQS Message 27 | """ 28 | 29 | message: str # payload 30 | _m: Any # AWS message itself (from boto3) 31 | _q: Any # SQSAccess instance 32 | 33 | def delete(self): 34 | self._m.delete() # boto3 35 | self._q._update_response_history(self.get_id()) 36 | 37 | def get_id(self): 38 | return self._m.message_id 39 | 40 | def get_aws_message(self): 41 | # get the native AWS message 42 | return self._m 43 | 44 | 45 | # AWS defaults 46 | aws_sqs_long_poll_max_wait_time = 20 # seconds 47 | aws_sqs_max_messages = 10 48 | 49 | 50 | class SQSAccess(AWSAccess): 51 | @typechecked() 52 | def __init__(self, queue_name: str, immediate_delete: bool = True, visibility_timeout: Union[int, None] = None, minimum_visibility_timeout: int = 0, **kwargs): 53 | """ 54 | SQS access 55 | 56 | :param queue_name: queue name 57 | :param immediate_delete: True to immediately delete read message(s) upon receipt, False to require the user to call delete_message() 58 | :param visibility_timeout: visibility timeout (if explicitly given) - set to None to automatically attempt to determine the timeout 59 | :param minimum_visibility_timeout: visibility timeout will be at least this long (do not set if visibility_timeout set) 60 | :param kwargs: kwargs to send to base class 61 | """ 62 | super().__init__(resource_name="sqs", **kwargs) 63 | self.queue_name = queue_name 64 | 65 | # visibility timeout 66 | self.immediate_delete = immediate_delete # True to immediately delete messages 67 | self.user_provided_timeout = visibility_timeout # the queue will re-try a message (make it re-visible) if not deleted within this time 68 | self.user_provided_minimum_timeout = minimum_visibility_timeout # the timeout will be at least this long 69 | self.auto_timeout_multiplier = 10.0 # for automatic timeout calculations, multiply this times the median run time to get the timeout 70 | 71 | self.sqs_call_wait_time = 0 # short (0) or long poll (> 0, usually 20) 72 | self.queue = None # since this requires a call to AWS, this will be set only when needed 73 | 74 | self.immediate_delete_timeout: int = 30 # seconds 75 | self.minimum_nominal_work_time = 1.0 # minimum work time in seconds so we don't timeout too quickly, e.g. in case the user doesn't actually do any work 76 | 77 | # receive/delete times for messages (auto_delete set to False) 78 | self.response_history = {} # type: Dict[Any, Any] 79 | 80 | # We write the history out as a file so don't make this too big. We take the median (for the nominal run time) so make this big enough to tolerate a fair number of outliers. 81 | self.max_history = 20 82 | 83 | def _get_queue(self): 84 | if self.queue is None: 85 | try: 86 | queue = self.resource.get_queue_by_name(QueueName=self.queue_name) 87 | except self.client.exceptions.QueueDoesNotExist as e: 88 | log.debug(f"{self.queue_name},{e=}") 89 | queue = None 90 | except self.client.exceptions.ClientError as e: 91 | error_code = e.response["Error"].get("Code") 92 | if "NonExistentQueue" in error_code: 93 | log.debug(f"{self.queue_name},{e=},{error_code=}") 94 | queue = None 95 | else: 96 | # other errors (e.g. connection errors, etc.) 97 | raise 98 | 99 | if queue is not None: 100 | # kludge so when moto mocking we return None if it can't get the queue 101 | queue_type = type(queue) 102 | queue_type_string = str(queue_type) 103 | if "dict" in queue_type_string: 104 | log.warning(f"could not get Queue {self.queue_name}") 105 | else: 106 | self.queue = queue 107 | 108 | return self.queue 109 | 110 | @typechecked() 111 | def _get_response_history_file_path(self) -> Path: 112 | """ 113 | get response history file path 114 | 115 | :return: 116 | """ 117 | p = Path(appdirs.user_data_dir(__application_name__, __author__), "response", f"{self.queue_name}.json") 118 | log.debug(f'response history file path : "{p}"') 119 | return p 120 | 121 | @typechecked() 122 | def create_queue(self) -> str: 123 | """ 124 | create SQS queue 125 | 126 | :return: queue URL 127 | """ 128 | response = self.client.create_queue(QueueName=self.queue_name) 129 | url = response.get("QueueUrl", "") 130 | return url 131 | 132 | def delete_queue(self): 133 | """ 134 | delete queue 135 | """ 136 | if (queue := self._get_queue()) is None: 137 | log.warning(f"could not get queue {self.queue_name}") 138 | else: 139 | queue.delete() 140 | 141 | @typechecked() 142 | def exists(self) -> bool: 143 | """ 144 | test if SQS queue exists 145 | 146 | :return: True if exists 147 | """ 148 | return self._get_queue() is not None 149 | 150 | def calculate_nominal_work_time(self) -> int: 151 | response_times = [] 152 | for begin, end in self.response_history.values(): 153 | if end is not None: 154 | response_times.append(end - begin) 155 | nominal_work_time = max(statistics.median(response_times), self.minimum_nominal_work_time) # tolerate in case the measured work is very short 156 | log.debug(f"{nominal_work_time=}") 157 | return nominal_work_time 158 | 159 | def calculate_visibility_timeout(self) -> int: 160 | if self.user_provided_timeout is None: 161 | if self.immediate_delete: 162 | visibility_timeout = self.immediate_delete_timeout # we immediately delete the message so this doesn't need to be very long 163 | else: 164 | visibility_timeout = max(self.user_provided_minimum_timeout, round(self.auto_timeout_multiplier * self.calculate_nominal_work_time())) 165 | else: 166 | if self.immediate_delete: 167 | # if we immediately delete the message it doesn't make sense for the user to try to specify the timeout 168 | raise ValueError(f"nonsensical values: {self.user_provided_timeout=} and {self.immediate_delete=}") 169 | elif self.user_provided_minimum_timeout > 0: 170 | raise ValueError(f"do not specify both timeout ({self.user_provided_timeout}) and minimum_timeout {self.user_provided_minimum_timeout}") 171 | else: 172 | visibility_timeout = self.user_provided_timeout # timeout explicitly given by the user 173 | 174 | return visibility_timeout 175 | 176 | @typechecked() 177 | def _receive(self, max_number_of_messages_parameter: Union[int, None] = None) -> List[SQSMessage]: 178 | if self.user_provided_timeout is None and not self.immediate_delete: 179 | # read in response history (and initialize it if it doesn't exist) 180 | try: 181 | with open(self._get_response_history_file_path()) as f: 182 | self.response_history = json.load(f) 183 | except FileNotFoundError: 184 | pass 185 | except IOError as e: 186 | log.warning(f'IOError : "{self._get_response_history_file_path()}" : {e}') 187 | except json.JSONDecodeError as e: 188 | log.warning(f'JSONDecodeError : "{self._get_response_history_file_path()}" : {e}') 189 | if len(self.response_history) == 0: 190 | now = time.time() 191 | self.response_history[None] = (now, now + timedelta(hours=1).total_seconds()) # we have no history, so the initial nominal run time is a long time 192 | 193 | # receive the message(s) 194 | messages = [] # type: List[Any] 195 | continue_to_receive = True 196 | call_wait_time = self.sqs_call_wait_time # first time through may be long poll, but after that it's a short poll 197 | 198 | while continue_to_receive: 199 | aws_messages = None 200 | 201 | if max_number_of_messages_parameter is None: 202 | max_number_of_messages = aws_sqs_max_messages 203 | else: 204 | max_number_of_messages = max_number_of_messages_parameter - len(messages) # how many left to do 205 | 206 | try: 207 | if (queue := self._get_queue()) is None: 208 | log.warning(f"could not get queue {self.queue_name}") 209 | else: 210 | aws_messages = queue.receive_messages( 211 | MaxNumberOfMessages=min(max_number_of_messages, aws_sqs_max_messages), VisibilityTimeout=self.calculate_visibility_timeout(), WaitTimeSeconds=call_wait_time 212 | ) 213 | 214 | for m in aws_messages: 215 | if self.immediate_delete: 216 | m.delete() 217 | elif self.user_provided_timeout is None: 218 | # keep history of message processing times for user deletes, by AWS's message id 219 | self.response_history[m.message_id] = [time.time(), None] # start (finish will be filled in upon delete) 220 | 221 | # if history is too large, delete the oldest 222 | while len(self.response_history) > self.max_history: 223 | oldest = None 224 | for handle, start_finish in self.response_history.items(): 225 | if oldest is None or start_finish[0] < self.response_history[oldest][0]: 226 | oldest = handle 227 | del self.response_history[oldest] 228 | 229 | messages.append(SQSMessage(m.body, m, self)) 230 | 231 | except (ClientError, HTTPClientError) as e: 232 | # Usually we don't catch boto3 exceptions, but during a long poll a quick internet disruption can raise an exception that we'd like to avoid. 233 | log.debug(f"{self.queue_name=} {e}") 234 | self.most_recent_error = boto_error_to_string(e) 235 | 236 | call_wait_time = 0 # now, short polls 237 | 238 | if aws_messages is None or len(aws_messages) == 0 or (max_number_of_messages_parameter is not None and len(messages) >= max_number_of_messages_parameter): 239 | continue_to_receive = False 240 | 241 | return messages 242 | 243 | @typechecked() 244 | def receive_message(self) -> Union[SQSMessage, None]: 245 | """ 246 | receive SQS message from this queue 247 | :return: one SQSMessage if one available, else None 248 | """ 249 | 250 | messages = self._receive(1) 251 | message_count = len(messages) 252 | if message_count == 0: 253 | message = None 254 | elif message_count == 1: 255 | message = messages[0] 256 | else: 257 | raise RuntimeError(f"{message_count=}") 258 | return message 259 | 260 | @typechecked() 261 | def receive_messages(self, max_messages: Union[int, None] = None) -> List[SQSMessage]: 262 | """ 263 | receive a (possibly empty) list of SQS messages from this queue 264 | 265 | :param max_messages: maximum number of messages to receive (None for all available messages) 266 | :return: list of messages 267 | """ 268 | return self._receive(max_messages) 269 | 270 | def _update_response_history(self, message_id: str): 271 | """ 272 | update response history 273 | 274 | :param message_id: message ID 275 | """ 276 | # update response history 277 | if not self.immediate_delete and self.user_provided_timeout is None and message_id in self.response_history: 278 | self.response_history[message_id][1] = time.time() # set finish time 279 | 280 | # save to file 281 | file_path = self._get_response_history_file_path() 282 | file_path.parent.mkdir(parents=True, exist_ok=True) 283 | try: 284 | with open(self._get_response_history_file_path(), "w") as f: 285 | json.dump(self.response_history, f, indent=4) 286 | except IOError as e: 287 | log.info(f'"{file_path}" : {e}') 288 | 289 | @typechecked() 290 | def send(self, message: str): 291 | """ 292 | Send SQS message. If the queue doesn't exist, it will be created. 293 | 294 | :param message: message string 295 | """ 296 | if (queue := self._get_queue()) is None: 297 | log.info(f"could not get queue {self.queue_name} - creating it") 298 | self.create_queue() 299 | # ensure the queue has indeed been created 300 | count = 0 301 | while not self.exists() and count < 100: 302 | time.sleep(3) 303 | count += 1 304 | if (queue := self._get_queue()) is None: 305 | log.error(f"could not create queue {self.queue_name}") 306 | if queue is not None: 307 | queue.send_message(MessageBody=message) 308 | 309 | @typechecked() 310 | def get_arn(self) -> str: 311 | """ 312 | get SQS ARN 313 | 314 | :return: ARN string 315 | """ 316 | if (queue := self._get_queue()) is None: 317 | log.warning(f"could not get queue {self.queue_name}") 318 | arn = "" 319 | else: 320 | arn = queue.attributes["QueueArn"] 321 | return arn 322 | 323 | @typechecked() 324 | def add_permission(self, source_arn: str): 325 | """ 326 | allow source (e.g. SNS topic) to send to this SQS queue 327 | 328 | :param source_arn: source arn (e.g. SNS queue arn) 329 | 330 | """ 331 | 332 | # a little brute-force, but this is the only way I could assign SQS policy to accept messages from SNS 333 | policy = { 334 | "Version": "2012-10-17", 335 | "Statement": [{"Effect": "Allow", "Principal": "*", "Action": "SQS:SendMessage", "Resource": self.get_arn(), "Condition": {"StringEquals": {"aws:SourceArn": source_arn}}}], 336 | } 337 | 338 | policy_string = json.dumps(policy) 339 | log.info(f"{policy_string=}") 340 | if (queue := self._get_queue()) is None: 341 | log.warning(f"could not get queue {self.queue_name}") 342 | else: 343 | self.client.set_queue_attributes(QueueUrl=queue.url, Attributes={"Policy": policy_string}) 344 | 345 | def purge(self): 346 | """ 347 | purge all messages in the queue 348 | """ 349 | if (queue := self._get_queue()) is None: 350 | log.warning(f"could not get queue {self.queue_name}") 351 | else: 352 | self.client.purge_queue(QueueUrl=queue.url) 353 | 354 | def messages_available(self) -> int: 355 | """ 356 | return number of messages available 357 | :return: number of messages available 358 | """ 359 | key = "ApproximateNumberOfMessages" 360 | if (queue := self._get_queue()) is None: 361 | log.warning(f"could not get queue {self.queue_name}") 362 | number_of_messages_available = 0 363 | else: 364 | response = self.client.get_queue_attributes(QueueUrl=queue.url, AttributeNames=[key]) 365 | number_of_messages_available = int(response["Attributes"][key]) 366 | return number_of_messages_available 367 | 368 | 369 | class SQSPollAccess(SQSAccess): 370 | def __init__(self, queue_name: str, **kwargs): 371 | super().__init__(queue_name, **kwargs) 372 | self.sqs_call_wait_time = aws_sqs_long_poll_max_wait_time 373 | -------------------------------------------------------------------------------- /doc/awsimple_sf_python_6_21.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesabel/awsimple/8157ed3d9501fcbc56382f148385144ed73a307e/doc/awsimple_sf_python_6_21.pdf -------------------------------------------------------------------------------- /doc/awsimple_sf_python_6_21.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesabel/awsimple/8157ed3d9501fcbc56382f148385144ed73a307e/doc/awsimple_sf_python_6_21.pptx -------------------------------------------------------------------------------- /doc/flake8_report.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesabel/awsimple/8157ed3d9501fcbc56382f148385144ed73a307e/doc/flake8_report.txt -------------------------------------------------------------------------------- /doc/notes.txt: -------------------------------------------------------------------------------- 1 | 2 | - AWS IAM permissions will be required to use awsimple. S3, DynamoDB, SQS and SNS have AmazonFullAccess policies that can be used, 3 | or you can set your own up to align with your particular usage model. 4 | -------------------------------------------------------------------------------- /doc_source/aws_access.rst: -------------------------------------------------------------------------------- 1 | 2 | AWSAccess 3 | ========= 4 | 5 | .. automodule:: awsimple.aws 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /doc_source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | sys.path.insert(0, os.path.abspath('..')) 16 | sys.path.insert(0, os.path.abspath('.')) 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'awsimple' 21 | copyright = '2021, abel' 22 | author = 'abel' 23 | 24 | 25 | # -- General configuration --------------------------------------------------- 26 | 27 | # Add any Sphinx extension module names here, as strings. They can be 28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 29 | # ones. 30 | extensions = ['sphinx.ext.autosectionlabel', 'sphinx.ext.autodoc'] 31 | 32 | # Add any paths that contain templates here, relative to this directory. 33 | templates_path = ['_templates'] 34 | 35 | # List of patterns, relative to source directory, that match files and 36 | # directories to ignore when looking for source files. 37 | # This pattern also affects html_static_path and html_extra_path. 38 | exclude_patterns = [] 39 | 40 | 41 | # -- Options for HTML output ------------------------------------------------- 42 | 43 | # The theme to use for HTML and HTML Help pages. See the documentation for 44 | # a list of builtin themes. 45 | # 46 | html_theme = 'alabaster' 47 | 48 | # Add any paths that contain custom static files (such as style sheets) here, 49 | # relative to this directory. They are copied after the builtin static files, 50 | # so a file named "default.css" will overwrite the builtin "default.css". 51 | html_static_path = ['_static'] 52 | 53 | # use __init__ docstrings 54 | autoclass_content = 'both' 55 | -------------------------------------------------------------------------------- /doc_source/coverage.txt: -------------------------------------------------------------------------------- 1 | Test coverage: 83.85% -------------------------------------------------------------------------------- /doc_source/dynamodb_access.rst: -------------------------------------------------------------------------------- 1 | 2 | DynamoDBAccess 3 | ============== 4 | 5 | .. automodule:: awsimple.dynamodb 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /doc_source/index.rst: -------------------------------------------------------------------------------- 1 | AWSimple - a simple AWS API 2 | =========================== 3 | 4 | *(pronounced A-W-Simple)* 5 | 6 | AWSimple provides a simple, object-oriented interface into four AWS "serverless" cloud services: 7 | 8 | - S3 - Binary object storage. Analogous to storing files in the cloud. 9 | - DynamoDB - A NoSQL database to put, get, and query dictionary-like objects. 10 | - SQS - Queuing service for sending and receiving messages. 11 | - SNS - Notification service to send messages to a variety of destinations including emails, SMS messages, and SQS queues. 12 | 13 | `AWSimple` also provides some additional features: 14 | 15 | - True file hashing (SHA512) for S3 files. 16 | - Locally cached S3 accesses. 17 | - DynamoDB full table scans (with local cache option). 18 | - Built-in pagination. 19 | 20 | If you're new to `AWSimple`, check out the :ref:`Quick Start Guide`. Also check out the 21 | `examples `_. 22 | 23 | .. toctree:: 24 | :maxdepth: 2 25 | 26 | quick_start_guide 27 | user_guide 28 | aws_access 29 | s3_access 30 | dynamodb_access 31 | sns_access 32 | sqs_access 33 | thank_you 34 | 35 | 36 | Testing 37 | ------- 38 | .. include:: coverage.txt 39 | 40 | 41 | Indices and tables 42 | ================== 43 | * :ref:`genindex` 44 | * :ref:`modindex` 45 | * :ref:`search` 46 | 47 | 48 | The `AWSimple documentation `_ is hosted on `Read the Docs `_ . 49 | -------------------------------------------------------------------------------- /doc_source/quick_start_guide.rst: -------------------------------------------------------------------------------- 1 | 2 | Quick Start Guide 3 | ================= 4 | 5 | 6 | Installation 7 | ------------ 8 | 9 | Install `awsimple` from PyPI: 10 | 11 | `pip install awsimple` 12 | 13 | AWS's IAM 14 | --------- 15 | 16 | First you need to determine how you're going to access AWS, which is through AWS's IAM (Identity and Access Management). There are two ways: 17 | 18 | - `Use keys directly`: your AWS Access Key and AWS Secret Access Key are passed directly into AWSimple. 19 | - `Use an AWS profile`: An `.aws` directory in your home directory contains CONFIG and CREDENTIALS files that contain profiles that contain your Access Key and Secret Access Key. 20 | 21 | For development, the profile method is recommended. This way your secrets are kept out of your repository and 22 | application. In fact, if you put your secrets in a `default` profile, you don't have to tell AWSimple anything about your 23 | credentials at all since they will be used from the default location and profile. 24 | 25 | For applications, you may not want to use an `.aws` directory with profiles. Rather, you pass in keys in some secure mechanism defined by 26 | your particular application. 27 | 28 | Note that **AWS credentials must be properly managed and kept secret**, just as you would do for any other site where money is concerned. 29 | There are little to no mechanisms in AWS to stop improper use of AWS resources. While billing alerts can and should be used, these are "after the fact" and 30 | will not necessarily prevent billing surprises. 31 | 32 | See the AWS documentation on `configuration files `_ for more information. 33 | 34 | Testing the AWS Connection 35 | -------------------------- 36 | 37 | Now that you have your AWS IAM configured, let's test it out: 38 | 39 | .. code:: python 40 | 41 | from awsimple import AWSAccess 42 | 43 | # In this example we're using the default IAM profile (in ~/.aws/credentials and ~/.aws/config) 44 | print(AWSAccess().test()) # Should be 'True' 45 | 46 | 47 | If everything worked OK, this code will output `True` and you can go on to the next section. 48 | 49 | Creating, Writing and Reading an S3 Bucket Object 50 | ------------------------------------------------- 51 | 52 | Assuming your IAM configuration allows you to create an AWS S3 bucket and object, let's to that now. 53 | 54 | .. code:: python 55 | 56 | from awsimple import S3Access 57 | 58 | # bucket names are globally unique, so change this bucket name to something unique to you 59 | s3_access = S3Access("james-abel-awsimple-test-bucket") 60 | 61 | # let's first make sure the bucket exists 62 | s3_access.create_bucket() 63 | 64 | # the S3 key is the name of the object in the S3 bucket, somewhat analogous to a file name 65 | s3_key = "hello.txt" 66 | 67 | # write our message to S3 68 | s3_access.write_string("hello world", s3_key) 69 | 70 | # will output "hello world" 71 | print(s3_access.read_string(s3_key)) 72 | -------------------------------------------------------------------------------- /doc_source/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 2 | typeguard 3 | hashy 4 | dictim 5 | appdirs 6 | ismain 7 | tobool 8 | -------------------------------------------------------------------------------- /doc_source/s3_access.rst: -------------------------------------------------------------------------------- 1 | 2 | S3Access 3 | ======== 4 | 5 | .. automodule:: awsimple.s3 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /doc_source/sns_access.rst: -------------------------------------------------------------------------------- 1 | 2 | SNSAccess 3 | ========= 4 | 5 | .. automodule:: awsimple.sns 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /doc_source/sqs_access.rst: -------------------------------------------------------------------------------- 1 | 2 | SQSAccess 3 | ========= 4 | 5 | .. automodule:: awsimple.sqs 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /doc_source/thank_you.rst: -------------------------------------------------------------------------------- 1 | Thank You 2 | ========= 3 | 4 | Special thanks to Phebe Polk for code and documentation reviews. 5 | -------------------------------------------------------------------------------- /doc_source/user_guide.rst: -------------------------------------------------------------------------------- 1 | 2 | AWSimple User Guide 3 | =================== 4 | 5 | AWSimple provides a high level and object oriented access to common AWS "serverless" services such as 6 | :ref:`S3`, :ref:`DynamoDB`, :ref:`SNS`, and :ref:`SQS`. AWSimple uses AWS' 7 | `boto3 `_ "under the hood" for AWS access. 8 | 9 | Setting up your AWS Account 10 | --------------------------- 11 | In order to use AWSimple, or any other AWS software for that matter, you need an AWS account and one or more AWS "programmatic users" created via the 12 | `AWS IAM (Identity and Access Management) console `_. This user guide assumes you have a basic understanding of the AWS IAM. 13 | This programmatic user will need to be given appropriate permissions to the AWS resources you wish to use. IAM provides you with an `access key` and 14 | `secret access key` for a programmatic user. You must also select an AWS `region` (i.e. roughly where the actual AWS servers that you'll be using 15 | are located). These keys must be provided to AWSimple in order to access AWS resources. 16 | 17 | IMHO, at least for the purposes of initial development, you probably don't have to worry too much about fine-tuning your region. Pick a region reasonably 18 | close and go with that for a while. AWS's global network is pretty good, so just get close at first and you can optimize later. Many permissions and/or 19 | access issues can arise when you inadvertently try to access an unintended region. 20 | 21 | During development, it is recommended that these keys be placed in the AWS `credentials` and `config` files (no file extension) in the `.aws` directory 22 | under a `profile`. See `AWS configuration files `_ for directions on how to 23 | configure your credentials and config files. In fact, initially you can assign a programmtic user keys to the `[default]` profile, so you don't have to 24 | pass any credentials or region in to AWSimple. 25 | 26 | For production, the `access key`, `secret access key`, and `region` can be provided to AWSimple directly, in a manner that is appropriate for your application. 27 | 28 | Note that **AWS credentials must be properly managed and kept secret**, just as you would do for any other site where money is concerned. 29 | There are little to no mechanisms in AWS to stop improper use of AWS resources. While billing alerts can and should be used, these are "after the fact" and 30 | will not necessarily prevent billing surprises. 31 | 32 | See the AWS documentation on `configuration files `_ for more information. 33 | 34 | Testing your AWS Account 35 | ~~~~~~~~~~~~~~~~~~~~~~~~ 36 | Dealing with IAM and permissions can be tedious, and difficult to test. If they are wrong, you merely get a permissions error. To help permissions debug, 37 | AWSimple has a test feature to make sure you have the basic IAM setup working: 38 | 39 | .. code:: python 40 | 41 | from awsimple import AWSAccess 42 | 43 | # In this example we're using the default 44 | # IAM profile (in ~/.aws/credentials and ~/.aws/config) 45 | print(AWSAccess().test()) # Should be 'True' 46 | 47 | If this prints `True`, you at least have properly configured your programmatic user for AWSimple to use. 48 | 49 | Services accessible with AWSimple 50 | --------------------------------- 51 | AWSimple offers access into :ref:`S3`, :ref:`DynamoDB`, :ref:`SNS`, and :ref:`SQS`. 52 | 53 | 54 | S3 55 | -- 56 | S3 is probably one of the most popular AWS services. S3 is based on `buckets` and `objects` within those buckets. Again, AWSimple assumes a basic 57 | knowledge of S3, but refer to the `S3 documentation `_ if you are unfamiliar with S3. 58 | 59 | AWSimple provides the ability to create and delete S3 buckets, and write and read S3 bucket objects. In addition a few helper methods exist 60 | such as listing buckets and bucket objects. 61 | 62 | S3 create bucket 63 | ~~~~~~~~~~~~~~~~~~ 64 | Before you can use a bucket, it needs to be created. A bucket can be created with the AWS console, but here we'll do it programmatically with AWSimple: 65 | 66 | .. code:: python 67 | 68 | from awsimple import S3Access 69 | 70 | # bucket names are globally unique, so change this bucket name to something unique to you 71 | s3_access = S3Access("james-abel-awsimple-test-bucket") 72 | s3_access.create_bucket() 73 | 74 | Each S3Access instance is associated with a specific bucket. 75 | 76 | S3 write 77 | ~~~~~~~~ 78 | Now let's write an object to the bucket we just created: 79 | 80 | .. code:: python 81 | 82 | # the S3 key is the name of the object in the S3 bucket, somewhat analogous to a file name 83 | s3_key = "hello.txt" 84 | 85 | # write our "hello world" message to S3 86 | s3_access.write_string("hello world", s3_key) 87 | 88 | S3 read 89 | ~~~~~~~ 90 | And finally let's read the object back: 91 | 92 | .. code:: python 93 | 94 | # will print "hello world" 95 | print(s3_access.read_string(s3_key)) 96 | 97 | S3 Caching 98 | ~~~~~~~~~~ 99 | AWSimple can use local caching to reduce network traffic, which in turn can reduce costs and speed up applications. A file hash (SHA512) is 100 | used to ensure file content equivalency. 101 | 102 | DynamoDB 103 | -------- 104 | DynamoDB is a "NoSQL" (AKA document based) database. It is a "serverless" service that offers an `On Demand` option. DynamoDB is made up 105 | of `tables`, and each table can store a collection `items`. These `items` are similar to JSON objects, and can 106 | be created from Python dictionaries, with similar restrictions in converting Python dictionaries serializable to JSON. For these Python dicts, 107 | DynamoDB allow you to store and retrieve those dicts to and from the cloud simply, quickly, and there is a 108 | `free tier `_. 109 | 110 | Probably the trickiest part is selecting the `primary key`. The `primary key` is what defines the uniqueness of an item. 111 | See `AWS docs on primary key design `_ for details. 112 | The basic idea is that the primary key must be unique to that item and is composed of either a single `partition` (or hash) key or a combination of 113 | a `partition` and `sort` (or range) keys. Those keys are often either strings or numbers, although boolean is also allowed. Secondary 114 | keys and indexes are also supported and can be used for queries. 115 | 116 | The default type for partition and sort keys is a string (`str`), but numbers (using `int`) and booleans (using `bool`) can also be specified. 117 | 118 | DynamoDB - Partition Key 119 | ~~~~~~~~~~~~~~~~~~~~~~~~ 120 | The code below shows how you can use the simple primary key (no sort key) in a table to put and get items. This also illustrates the 121 | flexibility of a NoSQL database. Fields can be added after the table creation, as long as the primary key does not change. 122 | 123 | .. code:: python 124 | 125 | dynamodb_access = DynamoDBAccess("users_example", profile_name="testawsimple") 126 | 127 | # we're only using email as a partition key in our primary key (no sort key). emails are unique to each user. 128 | dynamodb_access.create_table("email") 129 | 130 | # add our first user using email, first and last name. Initially, we may think that's all we need. 131 | dynamodb_access.put_item({"email": "victor@victorwooten.com", "first_name": "Victor", "last_name": "Wooten"}) 132 | 133 | # oh no. No one knows who "John Jones" is, they only know "John Paul Jones", so we need to add a middle name. 134 | # Luckily we are using a NoSQL database, so we just add "middle_name" in a new key/value pair. No database migration needed. 135 | dynamodb_access.put_item({"email": "john@ledzeppelin.com", "first_name": "John", "middle_name": "Paul", "last_name": "Jones"}) 136 | 137 | # oh no again. No one knows who "Gordon Matthew Thomas Sumner" is either, even with 2 middle names! All they know is "Sting". 138 | # We need to add a nickname. No problem since we're using a NoSQL database. 139 | dynamodb_access.put_item( 140 | { 141 | "email": "sting@thepolice.com", 142 | "first_name": "Gordon", 143 | "middle_name": "Matthew", 144 | "middle_name_2": "Thomas", 145 | "last_name": "Sumner", 146 | "nickname": "Sting", 147 | } 148 | ) 149 | 150 | # look up user info for one of our users 151 | item = dynamodb_access.get_item("email", "john@ledzeppelin.com") # this is a "get" since we're using a key and will always get back exactly one item 152 | 153 | DynamoDB - Partition and Sort Keys 154 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 155 | Below is an example of using a `composite` primary key, which is comprised of a `partition` key and a `sort` key. 156 | 157 | .. code:: python 158 | 159 | dynamodb_access = DynamoDBAccess("musical_instruments_example", profile_name="testawsimple") 160 | 161 | # Our primary key is a composite of partition (manufacturer) and sort (serial_number). 162 | # For a particular manufacturer, serial numbers define exactly one instrument (for this example we are assuming a serial number can be represented as an 163 | # integer and doesn't have to be a string). 164 | dynamodb_access.create_table("manufacturer", "serial_number", sort_key_type=int) 165 | 166 | # we have to convert float to a Decimal for DynamoDB 167 | dynamodb_access.put_item(dict_to_dynamodb({"manufacturer": "Gibson", "serial_number": 1234, "model": "Ripper", "year": 1983, "price": 1299.50})) 168 | dynamodb_access.put_item(dict_to_dynamodb({"manufacturer": "Gibson", "serial_number": 5678, "model": "Thunderbird", "year": 1977, "price": 2399.50})) 169 | dynamodb_access.put_item( 170 | dict_to_dynamodb( 171 | { 172 | "manufacturer": "Fender", 173 | "serial_number": 1234, 174 | "model": "Precision", 175 | "year": 2008, 176 | "price": 1800.0, 177 | } # same serial number as the Gibson Ripper, but that's OK since this is Fender 178 | ) 179 | ) 180 | 181 | # get all the Gibson instruments 182 | item = dynamodb_access.query("manufacturer", "Gibson") # this can (and will in this case) be multiple items 183 | pprint(item) 184 | 185 | DynamoDB Secondary Indexes 186 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 187 | You can add `secondary` indexes to a DynamoDB table in order to do a query on fields you didn't put in the original primary keys. 188 | This is very similar to adding indexes in a relational database, which is often done to speed up queries. Adding a secondary index 189 | can also be done after table creation to facilitate new data, which is very handy when not all data is known at table 190 | creation time. 191 | 192 | DynamoDB Scanning and Caching 193 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 194 | Sometimes you want an entire table to do some sort of search or data-mining on. While AWS provides a `scan` capability which is available 195 | in awsimple's `DynamoDBAccess.scan_table()` method, this reads the entire table for each scan. However, this can be slow and/or costly. In order 196 | to reduce cost and increase speed, AWSimple offers a cached table scan via `DynamoDBAccess.scan_table_cached()` for tables that the 197 | user *knows* are static or at least verify slowly changing. If course, it's up to the user of awsimple to determine which method to use - the 198 | regular or cached version. 199 | 200 | For convenience, AWSimple also looks at the table's item count to determine if the cached scan needs to invalidate the cache. This can be 201 | useful if you know a table is only added to (thus the item count will change when it's updated) and you only try to access the table some time 202 | after the update. As of this writing the table item count is updated roughly every 6 hours. For example, you may use DynamoDB to store 203 | clinical trial data that is updated in a human time frame - e.g. weekly or even monthly, and once the trial is closed the data may never change. 204 | These sorts of situations may be appropriate for cached table scans. Of course it's up to the programmer to ensure this caching is appropriate 205 | for their use case. If not, use the regular `scan_table()` (albeit with the cost and performance implications). 206 | 207 | SNS 208 | --- 209 | SNS is AWS's Notification service for messages. SNS can create notifications for a variety of endpoints, including emails, text messages and 210 | :ref:`SQS` queues. SNS can also be "connected" to other AWS services such as S3 so that S3 events (e.g. writes) can cause an S3 notification. 211 | 212 | SQS 213 | --- 214 | SQS is AWS's queuing service. Messages can be placed in queues (either programmatically or "connected" to other AWS services like SNS). 215 | Programs can poll SQS queues to get messages to operate on. SQS queues can be immediately read (and return nothing of no messages are available) 216 | or `long polled` to wait for an incoming message to act on. 217 | -------------------------------------------------------------------------------- /examples/aws_access_test.py: -------------------------------------------------------------------------------- 1 | from awsimple import AWSAccess 2 | 3 | # In this example we're using the default profile 4 | print(AWSAccess().test()) # Should be 'True' 5 | -------------------------------------------------------------------------------- /examples/derived_access_class.py: -------------------------------------------------------------------------------- 1 | from ismain import is_main 2 | from os import getlogin 3 | 4 | from awsimple import S3Access 5 | 6 | profile_name = "testawsimple" # all of my derived classes use this AWS profile name 7 | 8 | 9 | class MyS3Access(S3Access): 10 | """ 11 | MyS3Access class takes care of IAM via a profile name 12 | """ 13 | 14 | def __init__(self, bucket: str, **kwargs): 15 | # define the profile name, but pass all other optional arguments to the base class 16 | super().__init__(bucket, profile_name=profile_name, **kwargs) 17 | 18 | 19 | def read_s3_object(): 20 | # profile_name provided by MyStorageAccess 21 | # bucket names are globally unique, so change this bucket name to something unique to you 22 | s3_access = MyS3Access(f"awsimple-test-bucket-{getlogin()}") # bucket name (for this example we assume it already exists) 23 | print(s3_access.read_string("hello.txt")) # hello.txt is the S3 object key 24 | 25 | 26 | if is_main(): 27 | read_s3_object() 28 | -------------------------------------------------------------------------------- /examples/dynamodb_partition_and_sort.py: -------------------------------------------------------------------------------- 1 | import time 2 | from pprint import pprint 3 | 4 | from ismain import is_main 5 | 6 | from awsimple import DynamoDBAccess, dict_to_dynamodb 7 | 8 | 9 | def musical_instruments_example(): 10 | """ 11 | This example shows how to use DynamoDB to keep a table of musical instruments. 12 | 13 | """ 14 | 15 | dynamodb_access = DynamoDBAccess("musical_instruments_example", profile_name="testawsimple", cache_life=60) # short cache life for development 16 | 17 | # Our primary key is a composite of partition (manufacturer) and sort (serial_number). 18 | # For a particular manufacturer, serial numbers define exactly one instrument (for this example we are assuming a serial number can be represented as an 19 | # integer and doesn't have to be a string). 20 | dynamodb_access.create_table("manufacturer", "serial_number", sort_key_type=int) 21 | 22 | # we have to convert float to a Decimal for DynamoDB 23 | dynamodb_access.put_item(dict_to_dynamodb({"manufacturer": "Gibson", "serial_number": 1234, "model": "Ripper", "year": 1983, "price": 1299.50})) 24 | dynamodb_access.put_item(dict_to_dynamodb({"manufacturer": "Gibson", "serial_number": 5678, "model": "Thunderbird", "year": 1977, "price": 2399.50})) 25 | dynamodb_access.put_item( 26 | dict_to_dynamodb( 27 | { 28 | "manufacturer": "Fender", 29 | "serial_number": 1234, 30 | "model": "Precision", 31 | "year": 2008, 32 | "price": 1800.0, 33 | } # same serial number as the Gibson Ripper, but that's OK since this is Fender 34 | ) 35 | ) 36 | 37 | # get all the Gibson instruments 38 | start = time.time() 39 | item = dynamodb_access.query("manufacturer", "Gibson") # this can (and will in this case) be multiple items 40 | end = time.time() 41 | pprint(item) 42 | print(f"query took {end-start} seconds") # nominal 0.1 to 0.15 seconds 43 | print() 44 | 45 | # get the entire inventory 46 | start = time.time() 47 | all_items = dynamodb_access.scan_table_cached() # use cached if the table is large and *only* if we know our table is slowly or never changing 48 | end = time.time() 49 | pprint(all_items) 50 | print(f"scan took {end-start} seconds ({dynamodb_access.cache_hit=})") # always fast for this small data set, but caching can offer a speedup for large tables 51 | 52 | 53 | if is_main(): 54 | musical_instruments_example() 55 | -------------------------------------------------------------------------------- /examples/dynamodb_partition_only.py: -------------------------------------------------------------------------------- 1 | import time 2 | from pprint import pprint 3 | from ismain import is_main 4 | 5 | from awsimple import DynamoDBAccess 6 | 7 | 8 | def users_example(): 9 | """ 10 | This example shows how to use DynamoDB to keep a table of users. This also illustrates the flexibility of NoSQL in that we can 11 | simply add fields at any time. 12 | 13 | """ 14 | 15 | dynamodb_access = DynamoDBAccess("users_example", profile_name="testawsimple") 16 | 17 | # we're only using email as a partition key in our primary key (no sort key). emails are unique to each user. 18 | dynamodb_access.create_table("email") 19 | 20 | # add our first user using email, first and last name. Initially, we may think that's all we need. 21 | dynamodb_access.put_item({"email": "victor@victorwooten.com", "first_name": "Victor", "last_name": "Wooten"}) 22 | 23 | # oh no. No one knows who "John Jones" is, they only know "John Paul Jones", so we need to add a middle name. 24 | # Luckily we are using a NoSQL database, so we just add "middle_name" in a new key/value pair. No database migration needed. 25 | dynamodb_access.put_item({"email": "john@ledzeppelin.com", "first_name": "John", "middle_name": "Paul", "last_name": "Jones"}) 26 | 27 | # oh no again. No one knows who "Gordon Matthew Thomas Sumner" is either, even with 2 middle names! All they know is "Sting". 28 | # We need to add a nickname. No problem since we're using a NoSQL database. 29 | dynamodb_access.put_item( 30 | { 31 | "email": "sting@thepolice.com", 32 | "first_name": "Gordon", 33 | "middle_name": "Matthew", 34 | "middle_name_2": "Thomas", 35 | "last_name": "Sumner", 36 | "nickname": "Sting", 37 | } 38 | ) 39 | 40 | # look up user info for one of our users 41 | start = time.time() 42 | item = dynamodb_access.get_item("email", "john@ledzeppelin.com") # this is a "get" since we're using a key and will always get back exactly one item 43 | end = time.time() 44 | 45 | pprint(item) 46 | print(f"took {end-start} seconds") # should take just a fraction of a second. 0.05 seconds was a nominal value on our test system. 47 | 48 | 49 | if is_main(): 50 | users_example() 51 | -------------------------------------------------------------------------------- /examples/make_venv.bat: -------------------------------------------------------------------------------- 1 | rmdir /S /Q venv 2 | c:"\Program Files\Python39\python.exe" -m venv --clear venv 3 | venv\Scripts\python.exe -m pip install --upgrade pip 4 | venv\Scripts\pip3 install -U setuptools 5 | venv\Scripts\pip3 install -U -r requirements-examples.txt 6 | -------------------------------------------------------------------------------- /examples/make_venv.sh: -------------------------------------------------------------------------------- 1 | python3 -m venv --clear venv 2 | source ./venv/bin/activate 3 | python -m pip install -U -r requirements-examples.txt 4 | deactivate 5 | -------------------------------------------------------------------------------- /examples/read_s3_object.py: -------------------------------------------------------------------------------- 1 | from ismain import is_main 2 | 3 | from awsimple import S3Access 4 | 5 | 6 | def read_s3_object(): 7 | s3_access = S3Access("testawsimple") 8 | print(s3_access.read_string("helloworld.txt")) 9 | 10 | 11 | if is_main(): 12 | read_s3_object() 13 | -------------------------------------------------------------------------------- /examples/requirements-examples.txt: -------------------------------------------------------------------------------- 1 | awsimple 2 | ismain 3 | -------------------------------------------------------------------------------- /examples/run_examples.bat: -------------------------------------------------------------------------------- 1 | call venv\Scripts\activate.bat 2 | python -m aws_access_test 3 | python -m write_read_s3_object 4 | python -m derived_access_class 5 | python -m dynamodb_partition_only 6 | python -m dynamodb_partition_and_sort 7 | deactivate 8 | -------------------------------------------------------------------------------- /examples/run_examples.sh: -------------------------------------------------------------------------------- 1 | source venv/bin/activate 2 | python -m aws_access_test 3 | python -m write_read_s3_object 4 | python -m derived_access_class 5 | deactivate 6 | -------------------------------------------------------------------------------- /examples/write_read_s3_object.py: -------------------------------------------------------------------------------- 1 | from awsimple import S3Access 2 | from os import getlogin 3 | 4 | # the S3 key is the name of the object in the S3 bucket, somewhat analogous to a file name 5 | s3_key = "hello.txt" 6 | 7 | # setup the s3_access object 8 | s3_access = S3Access(f"awsimple-test-bucket-{getlogin()}") # bucket names are globally unique, so change this bucket name to something unique to you 9 | 10 | 11 | # let's first make sure the bucket exists 12 | s3_access.create_bucket() 13 | 14 | # write our message to S3 15 | s3_access.write_string("hello world", s3_key) 16 | 17 | 18 | # will output "hello world" 19 | print(s3_access.read_string(s3_key)) 20 | -------------------------------------------------------------------------------- /make_venv_dev.bat: -------------------------------------------------------------------------------- 1 | rmdir /S /Q venv 2 | c:"\Program Files\Python313\python.exe" -m venv --clear venv 3 | venv\Scripts\python.exe -m pip install --upgrade pip 4 | venv\Scripts\pip3 install -U setuptools 5 | venv\Scripts\pip3 install -U -r requirements-dev.txt 6 | -------------------------------------------------------------------------------- /make_venv_dev.sh: -------------------------------------------------------------------------------- 1 | python3 -m venv --clear venv 2 | source ./venv/bin/activate 3 | python -m pip install -U -r requirements-dev.txt 4 | deactivate 5 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | 3 | [mypy-moto] 4 | ignore_errors = True 5 | ignore_missing_imports = True 6 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 192 3 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | # 2 | # awsimple requirements 3 | hashy 4 | boto3 5 | typeguard<3 6 | dictim 7 | appdirs 8 | tobool 9 | urllib3 10 | python-dateutil 11 | yasf 12 | # 13 | # examples 14 | ismain 15 | # 16 | # packaging 17 | twine 18 | wheel 19 | # 20 | # testing 21 | pytest 22 | moto[dynamodb,s3,sns,sqs] 23 | # moto apparently requires docker 24 | docker 25 | localstack 26 | localstack-client 27 | awscli 28 | awscli-local 29 | coverage 30 | pytest-cov 31 | pytest-pycharm 32 | pytest-socket 33 | pytest-xdist 34 | pillow 35 | requests 36 | # 37 | # formatting and linting 38 | black 39 | flake8 40 | mypy 41 | boto3-stubs[s3,dynamodb,sqs,sns] 42 | appdirs-stubs 43 | types-urllib3 44 | types-Pillow 45 | types-python-dateutil 46 | # 47 | # documentation 48 | sphinx 49 | -------------------------------------------------------------------------------- /scripts/blackify.bat: -------------------------------------------------------------------------------- 1 | pushd . 2 | cd .. 3 | call venv\Scripts\activate.bat 4 | python -m black -l 192 awsimple test_awsimple setup.py examples 5 | call deactivate 6 | popd 7 | -------------------------------------------------------------------------------- /scripts/coverage.bat: -------------------------------------------------------------------------------- 1 | pushd . 2 | cd .. 3 | set PYTHONPATH=%CD% 4 | set AWSIMPLE_USE_MOTO_MOCK=0 5 | mkdir cov 6 | venv\Scripts\pytest.exe --cov-report=html --cov-report=xml:cov\coverage.xml --cov --ignore=examples 7 | venv\Scripts\python.exe scripts\doc_coverage_updater.py 8 | set PYTHONPATH= 9 | set AWSIMPLE_USE_MOTO_MOCK= 10 | popd 11 | -------------------------------------------------------------------------------- /scripts/doc_coverage_updater.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from xml.etree import ElementTree 3 | 4 | from ismain import is_main 5 | 6 | 7 | def doc_coverage_updater(): 8 | attributes = ElementTree.parse(Path("cov", "coverage.xml")).getroot().attrib 9 | numerator = float(attributes["lines-covered"]) + float(attributes["branches-covered"]) 10 | denominator = float(attributes["lines-valid"]) + float(attributes["branches-valid"]) 11 | coverage = numerator/denominator 12 | Path("doc_source", "coverage.txt").write_text(f"Test coverage: {coverage:.2%}") 13 | 14 | 15 | if is_main(): 16 | doc_coverage_updater() 17 | -------------------------------------------------------------------------------- /scripts/pypi.bat: -------------------------------------------------------------------------------- 1 | pushd . 2 | cd .. 3 | rmdir /S /Q awsimple.egg-info 4 | rmdir /S /Q build 5 | rmdir /S /Q dist 6 | copy /Y LICENSE LICENSE.txt 7 | call venv\Scripts\activate.bat 8 | python.exe setup.py bdist_wheel 9 | twine upload dist/* 10 | rmdir /S /Q awsimple.egg-info 11 | rmdir /S /Q build 12 | call deactivate 13 | popd 14 | -------------------------------------------------------------------------------- /scripts/pytest.bat: -------------------------------------------------------------------------------- 1 | REM run pytest with and without mocking 2 | pushd . 3 | cd .. 4 | call venv\Scripts\activate.bat 5 | set PYTHONPATH=%CD% 6 | python -m pytest -s test_awsimple --cov-report xml:coverage.xml --cov-report html --cov=.\awsimple 7 | REM 8 | REM set AWSIMPLE_USE_MOTO_MOCK=0 9 | REM python -m pytest 10 | REM 11 | set PYTHONPATH= 12 | set AWSIMPLE_USE_MOTO_MOCK= 13 | popd 14 | -------------------------------------------------------------------------------- /scripts/run_flake8.bat: -------------------------------------------------------------------------------- 1 | pushd . 2 | cd .. 3 | del doc\flake8_report.txt 4 | call venv\Scripts\activate.bat 5 | REM 6 | REM E402 module level import not at top of file 7 | REM F401 imported but unused 8 | REM W503 line break before binary operator (black puts this in) 9 | REM E203 whitespace before ':' (black puts this in and may be controversial) 10 | REM E501 line too long 11 | flake8 --output-file doc\flake8_report.txt --ignore=E402,F401,W503,E203,E501 --tee awsimple 12 | call deactivate 13 | popd 14 | -------------------------------------------------------------------------------- /scripts/run_mypy.bat: -------------------------------------------------------------------------------- 1 | pushd . 2 | cd .. 3 | call venv\Scripts\activate.bat 4 | mypy -m awsimple 5 | mypy -m test_awsimple 6 | call deactivate 7 | popd 8 | -------------------------------------------------------------------------------- /scripts/run_sphinx.bat: -------------------------------------------------------------------------------- 1 | pushd . 2 | cd .. 3 | call venv\Scripts\activate.bat 4 | sphinx-build -M html doc_source build 5 | call deactivate 6 | popd 7 | -------------------------------------------------------------------------------- /scripts/start_localstack.bat: -------------------------------------------------------------------------------- 1 | pushd . 2 | cd .. 3 | venv\Scripts\python.exe -m localstack.cli.main start 4 | popd 5 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from setuptools import setup 4 | 5 | from awsimple.__version__ import __version__, __title__, __author__, __author_email__, __url__, __download_url__, __description__ 6 | 7 | readme_file_path = os.path.join("readme.md") 8 | 9 | with open(readme_file_path, encoding="utf-8") as f: 10 | long_description = "\n" + f.read() 11 | 12 | setup( 13 | name=__title__, 14 | description=__description__, 15 | long_description=long_description, 16 | long_description_content_type="text/markdown", 17 | version=__version__, 18 | author=__author__, 19 | author_email=__author_email__, 20 | license="MIT License", 21 | url=__url__, 22 | download_url=__download_url__, 23 | keywords=["aws", "cloud", "storage", "database", "dynamodb", "s3"], 24 | packages=[__title__], 25 | package_data={__title__: [readme_file_path, "py.typed"]}, 26 | install_requires=["boto3", "typeguard<3", "hashy>=0.1.1", "dictim", "appdirs", "tobool", "urllib3", "python-dateutil", "yasf"], 27 | project_urls={"Documentation": "https://awsimple.readthedocs.io/"}, 28 | classifiers=[], 29 | python_requires=">3.10", 30 | ) 31 | -------------------------------------------------------------------------------- /test_awsimple/280px-PNG_transparency_demonstration_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesabel/awsimple/8157ed3d9501fcbc56382f148385144ed73a307e/test_awsimple/280px-PNG_transparency_demonstration_1.png -------------------------------------------------------------------------------- /test_awsimple/__init__.py: -------------------------------------------------------------------------------- 1 | from .const import id_str, test_awsimple_str, never_change_file_name, never_change_file_size 2 | from .tst_paths import temp_dir, cache_dir 3 | from .dict_is_close import dict_is_close 4 | from .sqs_drain import drain 5 | -------------------------------------------------------------------------------- /test_awsimple/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | from pathlib import Path 4 | import logging 5 | 6 | from botocore.exceptions import EndpointConnectionError 7 | 8 | from awsimple import is_mock, use_moto_mock_env_var, S3Access, is_using_localstack, dynamodb 9 | 10 | from test_awsimple import test_awsimple_str, temp_dir, cache_dir 11 | 12 | mock_env_var = os.environ.get(use_moto_mock_env_var) 13 | 14 | if mock_env_var is None: 15 | # facilitates CI by using mocking by default 16 | os.environ[use_moto_mock_env_var] = "1" 17 | 18 | # if using non-local pytest, create the credentials and config files dynamically 19 | aws_credentials_and_config_dir = Path(Path.home(), ".aws") 20 | aws_credentials_file = Path(aws_credentials_and_config_dir, "credentials") 21 | aws_config_file = Path(aws_credentials_and_config_dir, "config") 22 | if is_mock(): 23 | dynamodb.get_accommodated_clock_skew = lambda: 0.0 # no clock skew for mock (better for CI) 24 | if not aws_credentials_and_config_dir.exists(): 25 | aws_credentials_and_config_dir.mkdir(parents=True, exist_ok=True) 26 | if not aws_credentials_file.exists(): 27 | credential_strings = [ 28 | "[default]\naws_access_key_id=AAAAAAAAAAAAAAAAAAAA\naws_secret_access_key=AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 29 | f"[{test_awsimple_str}]\naws_access_key_id=AAAAAAAAAAAAAAAAAAAA\naws_secret_access_key=AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 30 | ] 31 | aws_credentials_file.write_text("\n".join(credential_strings)) 32 | if not aws_config_file.exists(): 33 | config_strings = ["[profile default]\nregion=us-west-2", f"[profile {test_awsimple_str}]\nregion=us-west-2"] 34 | aws_config_file.write_text("\n".join(config_strings)) 35 | else: 36 | dynamodb.get_accommodated_clock_skew = lambda: 1.0 # faster than the default so tests don't take too much time 37 | 38 | 39 | class TestAWSimpleLoggingHandler(logging.Handler): 40 | def emit(self, record): 41 | print(record.getMessage()) 42 | assert False 43 | 44 | 45 | @pytest.fixture(scope="session", autouse=True) 46 | def session_fixture(): 47 | temp_dir.mkdir(parents=True, exist_ok=True) 48 | cache_dir.mkdir(parents=True, exist_ok=True) 49 | 50 | # add handler that will throw an assert on ERROR or greater 51 | test_handler = TestAWSimpleLoggingHandler() 52 | test_handler.setLevel(logging.ERROR) 53 | logging.getLogger().addHandler(test_handler) 54 | 55 | print(f"{is_mock()=},{is_using_localstack()=}") 56 | 57 | 58 | @pytest.fixture(scope="module") 59 | def s3_access(): 60 | _s3_access = S3Access(profile_name=test_awsimple_str, bucket_name=test_awsimple_str, cache_dir=cache_dir) 61 | return _s3_access 62 | 63 | 64 | @pytest.fixture(scope="session", autouse=True) 65 | def test_localstack(): 66 | if is_using_localstack(): 67 | # just try anything to see if localstack is running 68 | _s3_access = S3Access(profile_name=test_awsimple_str, bucket_name=test_awsimple_str, cache_dir=cache_dir) 69 | try: 70 | _s3_access.bucket_list() 71 | except EndpointConnectionError: 72 | pytest.exit(f"{is_using_localstack()=} and localstack is not running - please run scripts/start_localstack.bat") 73 | -------------------------------------------------------------------------------- /test_awsimple/const.py: -------------------------------------------------------------------------------- 1 | id_str = "id" 2 | test_awsimple_str = "testawsimple" # underscores not allowed for S3 buckets 3 | 4 | never_change_file_name = "never_change.txt" 5 | never_change_file_size = 65 6 | -------------------------------------------------------------------------------- /test_awsimple/dict_is_close.py: -------------------------------------------------------------------------------- 1 | from typing import Union, List 2 | from math import isinf, isnan, nan, inf 3 | 4 | from typeguard import typechecked 5 | 6 | rel_tol_default = 1e-09 7 | abs_tol_default = 0.0 8 | 9 | # todo: put this in PyPI as it's own package. Even though dictdiffer exists this is slightly different ... 10 | 11 | 12 | class ValueDivergence: 13 | @typechecked() 14 | def __init__(self, label: Union[str, None], value): 15 | self.label = label 16 | self.value = value 17 | 18 | def __repr__(self): 19 | v = str(self.value) 20 | if self.label is None: 21 | s = v 22 | else: 23 | s = self.label + ":" + v 24 | return s 25 | 26 | def __eq__(self, other): 27 | return self.label == other.label and self.value == other.value 28 | 29 | def to_sort(self): 30 | if isinstance(self.value, float) or isinstance(self.value, int): 31 | return self.value 32 | else: 33 | return 0.0 # for strings, etc. just use 0.0 to sort 34 | 35 | 36 | class ValueDivergences: 37 | @typechecked() 38 | def __init__(self, max_divergences: int = 10): 39 | self.max_divergences = max_divergences 40 | self.divergences = [] # type: List[ValueDivergence] 41 | self.hit_max_divergences_flag = False 42 | 43 | def __repr__(self): 44 | return self.divergences.__repr__() 45 | 46 | def __len__(self): 47 | return len(self.divergences) 48 | 49 | @typechecked() 50 | def add(self, divergence: ValueDivergence): 51 | if not any([d == divergence for d in self.divergences]): 52 | self.divergences.append(divergence) 53 | self.divergences.sort(key=lambda x: x.to_sort()) 54 | if len(self.divergences) > self.max_divergences: 55 | self.divergences.pop() 56 | self.hit_max_divergences_flag = True 57 | 58 | def get(self): 59 | return self.divergences 60 | 61 | def max_value(self): 62 | mv = None 63 | if len(self.divergences) > 0: 64 | mv = self.divergences[-1].value 65 | if not (isinstance(mv, float) or isinstance(mv, int)): 66 | mv = 0.0 67 | return mv 68 | 69 | def max_label(self): 70 | ml = None 71 | if len(self.divergences) > 0: 72 | ml = self.divergences[-1].label 73 | return ml 74 | 75 | def hit_max_divergences(self): 76 | # is max incomplete? 77 | return self.hit_max_divergences_flag 78 | 79 | 80 | class DictIsClose: 81 | """ 82 | Like doing x == y for a dict, except if there are floats then use math.isclose() 83 | """ 84 | 85 | @typechecked() 86 | def __init__(self, x, y, rel_tol: Union[float, None] = None, abs_tol: Union[float, None] = None, divergences: ValueDivergences = ValueDivergences()): 87 | self._x = x 88 | self._y = y 89 | self._rel_tol = rel_tol 90 | self._abs_tol = abs_tol 91 | self.divergences = divergences 92 | self._is_close_flag = self._dict_is_close(self._x, self._y, self._rel_tol, self._abs_tol, None) 93 | 94 | def __repr__(self): 95 | return self.divergences.__repr__() 96 | 97 | @typechecked() 98 | def _is_close_number(self, a: Union[float, int], b: Union[float, int], rel_tol: float, abs_tol: float, value_label: Union[str, None]): 99 | """ 100 | similar to math.isclose() except is keeps track of which values have the greatest difference 101 | :param a: first input 102 | :param b: second input 103 | :param rel_tol: relative tolerance 104 | :param abs_tol: absolute tolerance 105 | :return: 106 | """ 107 | 108 | # handle NaN, INF. Matches math.isclose() . 109 | divergence_value = 0.0 110 | if isnan(a) or isnan(b): 111 | is_close_flag = False 112 | divergence_value = nan 113 | elif isinf(a) and isinf(b): 114 | is_close_flag = a == b # handles both +INF and -INF 115 | if not is_close_flag: 116 | divergence_value = inf 117 | elif isinf(a) or isinf(b): 118 | is_close_flag = False # only one or the other is (positive or negative) infinity 119 | divergence_value = inf 120 | elif isinf(rel_tol) or isinf(abs_tol): 121 | is_close_flag = True 122 | else: 123 | # is_close_flag is same as: 124 | # abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol) 125 | divergence_value = abs(a - b) - max(rel_tol * max(abs(a), abs(b)), abs_tol) # if > 0.0, values are *not* close 126 | is_close_flag = divergence_value <= 0.0 127 | 128 | if not is_close_flag and divergence_value is not None and (self.divergences.max_value() is None or divergence_value > self.divergences.max_value()): 129 | self.divergences.add(ValueDivergence(value_label, divergence_value)) 130 | 131 | return is_close_flag 132 | 133 | @typechecked() 134 | def _dict_is_close(self, x, y, rel_tol: Union[float, None], abs_tol: Union[float, None], parent_label: Union[str, None]): 135 | if rel_tol is None or isnan(rel_tol): 136 | rel_tol = rel_tol_default 137 | if abs_tol is None: 138 | abs_tol = abs_tol_default 139 | 140 | if (isinstance(x, float) or isinstance(x, int)) and (isinstance(y, float) or isinstance(y, int)): 141 | is_close_flag = self._is_close_number(x, y, rel_tol, abs_tol, parent_label) 142 | elif isinstance(x, dict) and isinstance(y, dict): 143 | is_close_flags = [] 144 | if set(x.keys()) == set(y.keys()): 145 | for k in x: 146 | # keys can be things other than strings, e.g. int 147 | str_k = str(k) 148 | if parent_label is None: 149 | label = str_k 150 | else: 151 | label = parent_label + "." + str_k 152 | 153 | is_close_flag = self._dict_is_close(x[k], y[k], rel_tol, abs_tol, label) 154 | is_close_flags.append(is_close_flag) 155 | is_close_flag = all(is_close_flags) 156 | else: 157 | is_close_flag = x == y # everything else that can be evaluated with == such as strings 158 | if not is_close_flag: 159 | self.divergences.add(ValueDivergence(parent_label, str(x))) 160 | 161 | return is_close_flag 162 | 163 | def is_close(self): 164 | return self._is_close_flag 165 | 166 | 167 | @typechecked() 168 | def dict_is_close(x, y, rel_tol: Union[float, None] = None, abs_tol: Union[float, None] = None): 169 | """ 170 | 171 | Like doing x == y for a dict, except if there are floats then use math.isclose() 172 | 173 | :param x: input x 174 | :param y: input y 175 | :param rel_tol: relative tolerance to pass to math.close 176 | :param abs_tol: absolute tolerance to pass to math.close 177 | :return: True if dictionaries match and float values are close 178 | 179 | """ 180 | return DictIsClose(x, y, rel_tol, abs_tol).is_close() 181 | -------------------------------------------------------------------------------- /test_awsimple/sqs_drain.py: -------------------------------------------------------------------------------- 1 | import time 2 | from pprint import pprint 3 | 4 | from awsimple import SQSAccess 5 | 6 | from test_awsimple import test_awsimple_str 7 | 8 | 9 | def drain(): 10 | # drain existing messages 11 | q = SQSAccess(test_awsimple_str, profile_name=test_awsimple_str) 12 | q.create_queue() # just in case it doesn't exist 13 | while len(messages := q.receive_messages()) > 0: 14 | print("existing:") 15 | pprint(messages) 16 | time.sleep(0.1) 17 | print() 18 | -------------------------------------------------------------------------------- /test_awsimple/test_aws_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from botocore.exceptions import ProfileNotFound 4 | 5 | from awsimple import AWSAccess, S3Access, DynamoDBAccess, SQSAccess, is_mock 6 | 7 | from test_awsimple import test_awsimple_str 8 | 9 | 10 | def test_aws_test(): 11 | # test the test() method (basic AWS connection) 12 | 13 | # these should work 14 | if not is_mock(): 15 | assert AWSAccess(profile_name=test_awsimple_str).test() 16 | assert S3Access(test_awsimple_str, profile_name=test_awsimple_str).test() 17 | assert DynamoDBAccess(test_awsimple_str, profile_name=test_awsimple_str).test() 18 | assert SQSAccess(test_awsimple_str, profile_name=test_awsimple_str).test() 19 | 20 | if not is_mock(): 21 | # this (non-existent) profile doesn't have access at all 22 | with pytest.raises(ProfileNotFound): 23 | AWSAccess(profile_name="IAmNotAProfile").test() 24 | -------------------------------------------------------------------------------- /test_awsimple/test_c_dynamodb_create_table.py: -------------------------------------------------------------------------------- 1 | from pprint import pprint 2 | 3 | from awsimple import DynamoDBAccess 4 | from test_awsimple import test_awsimple_str 5 | 6 | 7 | def test_dynamodb_create_table(): 8 | table_name = f"{test_awsimple_str}temp" 9 | 10 | dynamodb_access = DynamoDBAccess(table_name, profile_name=test_awsimple_str) 11 | 12 | dynamodb_access.create_table("id") 13 | assert dynamodb_access.table_exists() # create_table has a waiter so the table should exist at this point 14 | 15 | dynamodb_access.put_item({"id": "me", "value": 1}) 16 | 17 | table_data = dynamodb_access.scan_table_cached() 18 | pprint(table_data) 19 | assert table_data[0]["id"] == "me" 20 | assert table_data[0]["value"] == 1 21 | assert len(table_data) == 1 22 | assert len(dynamodb_access.scan_table_cached(invalidate_cache=True)) == 1 23 | 24 | dynamodb_access.delete_table() 25 | assert not dynamodb_access.delete_table() # delete_table has a waiter so the table should exist at this point 26 | -------------------------------------------------------------------------------- /test_awsimple/test_dynamodb.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import decimal 4 | from collections import OrderedDict, defaultdict 5 | import math 6 | import datetime 7 | from datetime import timedelta, timezone 8 | import pickle 9 | from pathlib import Path 10 | import time 11 | 12 | from PIL import Image 13 | from ismain import is_main 14 | from dictim import dictim 15 | 16 | from awsimple import dict_to_dynamodb, DynamoDBAccess, is_mock, is_using_localstack, KeyType 17 | from awsimple.dynamodb import get_accommodated_clock_skew 18 | from test_awsimple import dict_is_close, test_awsimple_str, id_str 19 | 20 | dict_id = "test" 21 | 22 | # source: 23 | # https://en.wikipedia.org/wiki/Portable_Network_Graphics 24 | # https://en.wikipedia.org/wiki/File:PNG_transparency_demonstration_1.png 25 | png_image = Image.open(os.path.join("test_awsimple", "280px-PNG_transparency_demonstration_1.png")) 26 | 27 | od = OrderedDict() 28 | od["a"] = 1 29 | od["b"] = 2 30 | 31 | dd = defaultdict(int) 32 | dd[1] = 2 33 | 34 | sample_input = { 35 | id_str: dict_id, 36 | "sample1": "Test Data", 37 | "sample2": 2.0, 38 | "sample3": True, 39 | "sample4": int(1), 40 | "sample5": None, 41 | "sample6": {"test": True}, 42 | "sample7": ["Hello", "World"], 43 | "sample8": [9, 10], 44 | "od": od, 45 | "dd": dd, 46 | "DecimalInt": decimal.Decimal(42), 47 | "DecimalFloat": decimal.Decimal(2.0) / decimal.Decimal(3.0), 48 | "a_tuple": (1, 2, 3), 49 | 42: "my_key_is_an_int", 50 | "difficult_floats": [math.pi, math.e, 0.6], 51 | "difficult_ints": [sys.maxsize], 52 | "image": png_image, 53 | "test_date_time": datetime.datetime.fromtimestamp(1559679535, tz=timezone.utc), # 2019-06-04T20:18:55+00:00 54 | "zero_len_string": "", 55 | "dictim": dictim({"HI": dictim({"there": 1})}), # nested 56 | } 57 | 58 | 59 | def check_table_contents(contents): 60 | with open(os.path.join("cache", f"{test_awsimple_str}.pickle"), "rb") as f: 61 | assert dict_is_close(sample_input, contents[0]) 62 | assert dict_is_close(sample_input, pickle.load(f)[0]) 63 | 64 | 65 | def test_get_table_names(): 66 | if is_mock() or is_using_localstack(): 67 | dynamodb_access = DynamoDBAccess(test_awsimple_str, profile_name=test_awsimple_str) # for mock we have to make the table 68 | dynamodb_access.create_table(id_str) # have to create the table on the fly for mocking 69 | else: 70 | dynamodb_access = DynamoDBAccess(profile_name=test_awsimple_str) # since we're only going to get the existing table names, we don't have to provide a table name 71 | dynamodb_tables = dynamodb_access.get_table_names() 72 | print(dynamodb_tables) 73 | assert len(dynamodb_tables) > 0 74 | assert test_awsimple_str in dynamodb_tables 75 | 76 | 77 | def test_dynamodb(): 78 | dynamodb_dict = dict_to_dynamodb(sample_input) 79 | 80 | assert dynamodb_dict["sample1"] == "Test Data" 81 | assert math.isclose(float(dynamodb_dict["sample2"]), decimal.Decimal(2.0)) 82 | assert dynamodb_dict["sample3"] is True 83 | assert dynamodb_dict["sample5"] is None 84 | assert dynamodb_dict["sample6"] == {"test": True} 85 | assert dynamodb_dict["sample7"] == ["Hello", "World"] 86 | assert dynamodb_dict["sample8"] == [decimal.Decimal(9), decimal.Decimal(10)] 87 | assert dynamodb_dict["DecimalInt"] == decimal.Decimal(42) 88 | assert dynamodb_dict["DecimalFloat"] == decimal.Decimal(2.0) / decimal.Decimal(3.0) 89 | assert dynamodb_dict["a_tuple"] == [1, 2, 3] 90 | assert dynamodb_dict["42"] == "my_key_is_an_int" # test conversion of an int key to a string 91 | assert dynamodb_dict["test_date_time"] == "2019-06-04T20:18:55+00:00" 92 | assert dynamodb_dict["zero_len_string"] is None 93 | 94 | # while dictim is case-insensitive, when we convert to dict for DynamoDB it becomes case-sensitive 95 | assert list(dynamodb_dict["dictim"]["HI"])[0] == "there" 96 | assert dynamodb_dict["dictim"]["HI"]["there"] == 1 # actually Decimal(1) 97 | assert dynamodb_dict["dictim"].get("hi") is None # we're back to case sensitivity 98 | 99 | # start with a cache life of 1 second to ensure there is no cache hit 100 | dynamodb_access = DynamoDBAccess(profile_name=test_awsimple_str, table_name=test_awsimple_str, cache_dir=Path("cache"), cache_life=timedelta(seconds=1).total_seconds()) 101 | dynamodb_access.create_table(id_str) 102 | dynamodb_access.put_item(dynamodb_dict) 103 | time.sleep(get_accommodated_clock_skew()) 104 | 105 | sample_from_db = dynamodb_access.get_item(id_str, dict_id) 106 | assert sample_from_db == dynamodb_dict # make sure we get back exactly what we wrote 107 | 108 | table_contents = dynamodb_access.scan_table_cached() 109 | assert not dynamodb_access.cache_hit 110 | check_table_contents(table_contents) 111 | 112 | table_contents = dynamodb_access.scan_table() 113 | check_table_contents(table_contents) 114 | 115 | if is_using_localstack(): 116 | dynamodb_access.cache_life = 600.0 # localstack can take a while ... 117 | table_contents = dynamodb_access.scan_table_cached() 118 | assert dynamodb_access.cache_hit 119 | check_table_contents(table_contents) 120 | 121 | assert dynamodb_access.get_primary_keys_dict() == {KeyType.partition: id_str} 122 | 123 | 124 | if is_main(): 125 | test_dynamodb() 126 | -------------------------------------------------------------------------------- /test_awsimple/test_dynamodb_delete.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from awsimple import DynamoDBAccess, DBItemNotFound 4 | 5 | from test_awsimple import test_awsimple_str, id_str 6 | 7 | 8 | def test_dynamodb_delete(): 9 | dynamodb_access = DynamoDBAccess(profile_name=test_awsimple_str, table_name=test_awsimple_str) 10 | dynamodb_access.create_table(id_str) 11 | test_id = "deleter" 12 | item_value = {id_str: test_id, "color": "blue"} 13 | dynamodb_access.put_item(item_value) 14 | assert dynamodb_access.get_item(id_str, test_id) == item_value # check that it's set 15 | dynamodb_access.delete_item(id_str, test_id) 16 | with pytest.raises(DBItemNotFound): 17 | print(dynamodb_access.get_item(id_str, test_id)) # check that it's deleted 18 | -------------------------------------------------------------------------------- /test_awsimple/test_dynamodb_delete_all_items.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from awsimple import dict_to_dynamodb, DynamoDBAccess, is_mock 4 | 5 | from test_awsimple import id_str, test_awsimple_str 6 | 7 | 8 | def test_dynamodb_delete_all_items(): 9 | table_name = "awsimple-delete-test" # this test is the only thing we'll use this table for 10 | 11 | dynamodb_access = DynamoDBAccess(table_name, profile_name=test_awsimple_str) 12 | dynamodb_access.create_table(id_str) 13 | dynamodb_access.put_item(dict_to_dynamodb({id_str: "me", "answer": 42})) 14 | dynamodb_access.put_item(dict_to_dynamodb({id_str: "you", "question": 0})) 15 | while len(table_contents := dynamodb_access.scan_table()) != 2: 16 | print(f"waiting for the put ...{table_contents}") 17 | time.sleep(1) # DynamoDB is "eventually consistent" 18 | rows_deleted = dynamodb_access.delete_all_items() 19 | assert rows_deleted == 2 20 | while len(table_contents := dynamodb_access.scan_table()) != 0: 21 | print(f"waiting for the delete all items ...{table_contents}") 22 | time.sleep(1) # DynamoDB is "eventually consistent" 23 | -------------------------------------------------------------------------------- /test_awsimple/test_dynamodb_get_item.py: -------------------------------------------------------------------------------- 1 | from awsimple import DynamoDBAccess 2 | 3 | from test_awsimple import test_awsimple_str, id_str 4 | 5 | 6 | def test_dynamodb_get_item(): 7 | test_id = "test_id" 8 | dynamodb_access = DynamoDBAccess(profile_name=test_awsimple_str, table_name=test_awsimple_str) 9 | dynamodb_access.create_table(id_str) 10 | dynamodb_access.delete_item(id_str, test_id) # make sure the item doesn't exist 11 | 12 | item_value = {id_str: test_id, "color": "blue"} 13 | dynamodb_access.upsert_item(id_str, test_id, item={"color": "blue"}) # insert 14 | assert dynamodb_access.get_item(id_str, test_id) == item_value # check that it's set 15 | assert dynamodb_access.get_item(partition_value=test_id) == item_value # check that it's set 16 | -------------------------------------------------------------------------------- /test_awsimple/test_dynamodb_item_not_found.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from awsimple import DynamoDBAccess, DBItemNotFound 4 | 5 | from test_awsimple import test_awsimple_str, id_str 6 | 7 | 8 | def test_dynamodb_item_not_found(): 9 | dynamodb_access = DynamoDBAccess(test_awsimple_str, profile_name=test_awsimple_str) 10 | dynamodb_access.create_table(id_str) 11 | with pytest.raises(DBItemNotFound): 12 | dynamodb_access.get_item(id_str, "I will never ever exist") 13 | -------------------------------------------------------------------------------- /test_awsimple/test_dynamodb_miv_ui.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from awsimple import DynamoDBMIVUI, miv_string, get_time_us, miv_us_to_timestamp 4 | 5 | 6 | def test_dynamodb_miv_ui(): 7 | test_name = "test_dynamodb_miv_ui" 8 | primary_partition_key = "id" 9 | id_value = "me" 10 | input_data = {primary_partition_key: id_value} 11 | 12 | dynamodb_miv_ui = DynamoDBMIVUI(test_name) 13 | dynamodb_miv_ui.create_table(primary_partition_key) # use default of str 14 | dynamodb_miv_ui.put_item(input_data) 15 | dynamodb_miv_ui.put_item(input_data) 16 | output_data = dynamodb_miv_ui.get_most_senior_item(primary_partition_key, id_value) 17 | print(output_data) 18 | assert output_data[primary_partition_key] == id_value 19 | miv_value = output_data[miv_string] 20 | assert miv_value <= get_time_us() # basic check for miv value 21 | difference = time.time() - miv_us_to_timestamp(miv_value) 22 | print(f"{difference=} seconds") 23 | assert 0 < difference < 100 # check that we can convert the MIV back to time in seconds since epoch 24 | -------------------------------------------------------------------------------- /test_awsimple/test_dynamodb_primary_key_as_number.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from pprint import pprint 3 | 4 | from awsimple import DynamoDBAccess, dynamodb_to_dict 5 | from ismain import is_main 6 | 7 | from test_awsimple import test_awsimple_str 8 | 9 | 10 | def test_dynamodb_sort_as_number(): 11 | dynamodb_access = DynamoDBAccess(profile_name=test_awsimple_str, table_name=f"{test_awsimple_str}_sort_as_number", cache_dir=Path("cache")) 12 | dynamodb_access.create_table("id", "year", sort_key_type=int) # sort key as number 13 | input_item = {"id": "me", "year": 1999, "out_of_time": False} 14 | dynamodb_access.put_item(input_item) 15 | item = dynamodb_access.get_item("id", "me", "year", 1999) 16 | output_item = dynamodb_to_dict(item) 17 | pprint(item) 18 | assert input_item == output_item 19 | dynamodb_access.delete_table() 20 | 21 | 22 | def test_dynamodb_partition_as_number(): 23 | dynamodb_access = DynamoDBAccess(profile_name=test_awsimple_str, table_name=f"{test_awsimple_str}_partition_as_number", cache_dir=Path("cache")) 24 | dynamodb_access.create_table("year", "id", partition_key_type=int) # partition key as number 25 | input_item = {"id": "me", "year": 1999, "out_of_time": False} 26 | dynamodb_access.put_item(input_item) 27 | item = dynamodb_access.get_item("id", "me", "year", 1999) 28 | pprint(item) 29 | assert input_item == dynamodb_to_dict(item) 30 | 31 | item = dynamodb_access.query("year", 1999)[0] # only use the partition key (no sort key) 32 | pprint(item) 33 | assert input_item == dynamodb_to_dict(item) 34 | 35 | dynamodb_access.delete_table() 36 | 37 | 38 | if is_main(): 39 | test_dynamodb_sort_as_number() 40 | -------------------------------------------------------------------------------- /test_awsimple/test_dynamodb_query.py: -------------------------------------------------------------------------------- 1 | from ismain import is_main 2 | 3 | from awsimple import DynamoDBAccess, QuerySelection 4 | 5 | from test_awsimple import test_awsimple_str 6 | 7 | 8 | def test_dynamodb_query(): 9 | table_name = "testawsimpleps" # ps = both partition and sort 10 | 11 | dynamodb_access = DynamoDBAccess(profile_name=test_awsimple_str, table_name=table_name) 12 | dynamodb_access.create_table("id", "name") 13 | 14 | # three entries for "me" 15 | dynamodb_access.put_item({"id": "me", "name": "james", "answer": 13}) # this will be the "first" one 16 | dynamodb_access.put_item({"id": "me", "name": "james abel", "answer": 1}) 17 | dynamodb_access.put_item({"id": "me", "name": "zzz", "answer": 99}) # this will be the "last" one 18 | 19 | dynamodb_access.put_item({"id": "notme", "name": "notjames", "answer": 42}) 20 | 21 | response = dynamodb_access.query("id", "me") # partition only 22 | assert len(response) == 3 23 | 24 | response = dynamodb_access.query("id", "me", "name", "james") # partition and sort 25 | assert len(response) == 1 26 | 27 | response = dynamodb_access.query_begins_with("id", "me", "name", "james a") # begins with 28 | assert len(response) == 1 29 | response = dynamodb_access.query_begins_with("id", "me", "name", "jame") 30 | assert len(response) == 2 31 | 32 | response = dynamodb_access.query("id", "idonotexist") # does not exist 33 | assert len(response) == 0 34 | 35 | response = dynamodb_access.query_one("id", "me", QuerySelection.highest) 36 | assert response["answer"] == 99 37 | assert response["name"] == "zzz" # the "last" entry, as sorted by sort key 38 | 39 | response = dynamodb_access.query_one("id", "me", QuerySelection.lowest) 40 | assert response["answer"] == 13 41 | assert response["name"] == "james" # the "first" entry, as sorted by sort key 42 | 43 | response = dynamodb_access.query_one("id", "idonotexist", QuerySelection.lowest) 44 | assert response is None 45 | 46 | 47 | if is_main(): 48 | test_dynamodb_query() 49 | -------------------------------------------------------------------------------- /test_awsimple/test_dynamodb_query_kwargs.py: -------------------------------------------------------------------------------- 1 | from ismain import is_main 2 | 3 | from awsimple import DynamoDBAccess, QuerySelection 4 | 5 | from test_awsimple import test_awsimple_str 6 | 7 | 8 | def test_dynamodb_query_kwargs(): 9 | table_name = "testawsimpleps" # ps = both partition and sort 10 | 11 | dynamodb_access = DynamoDBAccess(profile_name=test_awsimple_str, table_name=table_name) 12 | dynamodb_access.create_table("id", "name") 13 | 14 | # three entries for "me" 15 | dynamodb_access.put_item({"id": "me", "name": "james", "answer": 13}) # this will be the "first" one 16 | dynamodb_access.put_item({"id": "me", "name": "james abel", "answer": 1}) 17 | dynamodb_access.put_item({"id": "me", "name": "zzz", "answer": 99}) # this will be the "last" one 18 | 19 | dynamodb_access.put_item({"id": "notme", "name": "notjames", "answer": 42}) 20 | 21 | response = dynamodb_access.query(id="me") # partition only 22 | assert len(response) == 3 23 | 24 | response = dynamodb_access.query(id="me", name="james") # partition and sort 25 | assert len(response) == 1 26 | 27 | response = dynamodb_access.query_begins_with(id="me", name="james a") # begins with 28 | assert len(response) == 1 29 | response = dynamodb_access.query_begins_with(id="me", name="jame") 30 | assert len(response) == 2 31 | 32 | response = dynamodb_access.query(id="idonotexist") # does not exist 33 | assert len(response) == 0 34 | 35 | response = dynamodb_access.query_one(partition_value="me") # highest is default 36 | assert response["answer"] == 99 37 | assert response["name"] == "zzz" # the "last" entry, as sorted by sort key 38 | 39 | response = dynamodb_access.query_one(partition_value="me", direction=QuerySelection.highest) 40 | assert response["answer"] == 99 41 | assert response["name"] == "zzz" # the "last" entry, as sorted by sort key 42 | 43 | response = dynamodb_access.query_one(partition_value="me", direction=QuerySelection.lowest) 44 | assert response["answer"] == 13 45 | assert response["name"] == "james" # the "first" entry, as sorted by sort key 46 | 47 | response = dynamodb_access.query_one(partition_value="idonotexist", direction=QuerySelection.lowest) 48 | assert response is None 49 | 50 | 51 | if is_main(): 52 | test_dynamodb_query_kwargs() 53 | -------------------------------------------------------------------------------- /test_awsimple/test_dynamodb_scan_cache.py: -------------------------------------------------------------------------------- 1 | def test_dynamodb_scan_cache_cache_life(): 2 | """ 3 | Test that we can properly do a DynamoDB cached scan using the cache life. 4 | """ 5 | ... 6 | 7 | 8 | def test_dynamodb_scan_cache_mtime(): 9 | """ 10 | Test that we can properly do a DynamoDB cached scan using the mtime (metadata). 11 | """ 12 | ... 13 | -------------------------------------------------------------------------------- /test_awsimple/test_dynamodb_scan_table_as_dict.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from datetime import timedelta 3 | from decimal import Decimal 4 | import time 5 | 6 | from awsimple import DynamoDBAccess 7 | from awsimple.dynamodb import get_accommodated_clock_skew 8 | from test_awsimple import test_awsimple_str, id_str 9 | 10 | 11 | def check_scan_table(table_contents: dict, expected_contents: dict): 12 | keys = list(table_contents.keys()) 13 | # for real AWS I may have other things in this table 14 | assert "a" in keys 15 | assert "b" in keys 16 | assert "c" in keys 17 | # check sort 18 | for key_index in range(0, len(keys) - 1): 19 | assert keys[key_index + 1] > keys[key_index] 20 | # only test for what we just put in - there may be other rows in the table in the real AWS 21 | for k, v in expected_contents.items(): 22 | assert table_contents[k] == v 23 | 24 | 25 | def test_dynamodb_scan_table_as_dict(): 26 | dynamodb_access = DynamoDBAccess(profile_name=test_awsimple_str, table_name=test_awsimple_str, cache_dir=Path("cache"), cache_life=timedelta(seconds=10).total_seconds()) 27 | dynamodb_access.create_table(id_str) 28 | dynamodb_access.put_item({id_str: "b", "value": 1}) # will be sorted in a different order than we're inputting 29 | dynamodb_access.put_item({id_str: "c", "value": 3}) 30 | dynamodb_access.put_item({id_str: "a", "value": 2}) 31 | time.sleep(get_accommodated_clock_skew()) 32 | 33 | expected_contents = {"a": {"id": "a", "value": Decimal("2")}, "b": {"id": "b", "value": Decimal("1")}, "c": {"id": "c", "value": Decimal("3")}} 34 | table_contents = dynamodb_access.scan_table_as_dict() 35 | check_scan_table(table_contents, expected_contents) 36 | 37 | table_contents = dynamodb_access.scan_table_cached_as_dict() 38 | check_scan_table(table_contents, expected_contents) 39 | 40 | table_contents = dynamodb_access.scan_table_cached_as_dict() 41 | assert dynamodb_access.cache_hit 42 | check_scan_table(table_contents, expected_contents) 43 | 44 | table_contents = dynamodb_access.scan_table_cached_as_dict(sort_key=lambda x: x[id_str]) # test sort_key 45 | check_scan_table(table_contents, expected_contents) 46 | -------------------------------------------------------------------------------- /test_awsimple/test_dynamodb_secondary_index.py: -------------------------------------------------------------------------------- 1 | from awsimple import DynamoDBAccess, DictKey 2 | from copy import deepcopy 3 | 4 | from test_awsimple import test_awsimple_str, id_str 5 | 6 | 7 | def test_dynamodb_secondary_index(): 8 | table_name = f"{test_awsimple_str}2" 9 | table = DynamoDBAccess(table_name) 10 | 11 | sort_key = "id2" 12 | secondary_index = "id3" 13 | table.create_table(id_str, sort_key, secondary_index) 14 | 15 | item = {id_str: "me", sort_key: "myself", secondary_index: "i"} 16 | table.put_item(item) 17 | 18 | item2 = deepcopy(item) 19 | item2[sort_key] = "moi même" # also test unicode! 20 | item2[secondary_index] = "je" 21 | table.put_item(item2) 22 | 23 | query_results = table.query(id_str, "me") 24 | print(f"{query_results=}") 25 | assert len(query_results) == 2 # just the partition key should provide us with both rows 26 | 27 | # with (only) the secondary index (in DynamoDB you can't mix primary and secondary indexes) 28 | assert table.query(secondary_index, "je") == [item2] 29 | assert table.query(id3="je") == [item2] 30 | 31 | expected_contents = { 32 | DictKey(partition="me", sort="moi même"): {"id": "me", "id2": "moi même", "id3": "je"}, 33 | DictKey(partition="me", sort="myself"): {"id": "me", "id2": "myself", "id3": "i"}, 34 | } 35 | contents = table.scan_table_cached_as_dict() 36 | assert contents == expected_contents 37 | assert list(contents.keys()) == [DictKey(partition="me", sort="moi même"), DictKey(partition="me", sort="myself")] 38 | 39 | table.delete_table() 40 | 41 | 42 | def test_dynamodb_secondary_index_int(): 43 | table_name = f"{test_awsimple_str}3" 44 | table = DynamoDBAccess(table_name) 45 | 46 | sort_key = "id2" 47 | secondary_index = "num" 48 | table.create_table(id_str, sort_key, secondary_index, secondary_key_type=int) # secondary index as an int 49 | 50 | table.put_item({id_str: "me", sort_key: "myself", secondary_index: 1}) 51 | table.put_item({id_str: "me", sort_key: "moi", secondary_index: 2}) 52 | 53 | query_results = table.query(id_str, "me") 54 | print(f"{query_results=}") 55 | assert len(query_results) == 2 # just the partition key should provide us with both rows 56 | table.delete_table() 57 | -------------------------------------------------------------------------------- /test_awsimple/test_dynamodb_table_not_found.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from awsimple import DynamoDBAccess, DynamoDBTableNotFound 4 | 5 | from test_awsimple import test_awsimple_str 6 | 7 | 8 | def test_dynamodb_table_not_found_put_item(): 9 | with pytest.raises(DynamoDBTableNotFound): 10 | dynamodb_access = DynamoDBAccess("does_not_exist", profile_name=test_awsimple_str) 11 | dynamodb_access.put_item(item={}) # table won't exist 12 | 13 | 14 | def test_dynamodb_table_not_found_upsert_item(): 15 | with pytest.raises(DynamoDBTableNotFound): 16 | dynamodb_access = DynamoDBAccess("does_not_exist", profile_name=test_awsimple_str) 17 | dynamodb_access.upsert_item(item={}) # table won't exist 18 | 19 | 20 | def test_dynamodb_table_not_found_get_item(): 21 | with pytest.raises(DynamoDBTableNotFound): 22 | dynamodb_access = DynamoDBAccess("does_not_exist", profile_name=test_awsimple_str) 23 | dynamodb_access.get_item("dummy", "dummy") # table won't exist 24 | -------------------------------------------------------------------------------- /test_awsimple/test_dynamodb_upsert.py: -------------------------------------------------------------------------------- 1 | from awsimple import DynamoDBAccess 2 | 3 | from test_awsimple import test_awsimple_str, id_str 4 | 5 | 6 | def test_dynamodb_upsert(): 7 | dynamodb_access = DynamoDBAccess(profile_name=test_awsimple_str, table_name=test_awsimple_str) 8 | dynamodb_access.create_table(id_str) 9 | test_id = "upserter" 10 | dynamodb_access.delete_item(id_str, test_id) # make sure the item doesn't exist 11 | 12 | item_value = {id_str: test_id, "color": "blue"} 13 | dynamodb_access.upsert_item(id_str, test_id, item={"color": "blue"}) # insert 14 | assert dynamodb_access.get_item(id_str, test_id) == item_value # check that it's set 15 | 16 | item_value["my_size"] = 9 17 | dynamodb_access.upsert_item(id_str, test_id, item={"my_size": 9}) # update with new data 18 | assert dynamodb_access.get_item(id_str, test_id) == item_value # check that it's set to the new value 19 | 20 | item_value["my_size"] = 10 21 | dynamodb_access.upsert_item(id_str, test_id, item={"my_size": 10}) # update existing data 22 | assert dynamodb_access.get_item(id_str, test_id) == item_value # check that it's set to the new value 23 | -------------------------------------------------------------------------------- /test_awsimple/test_get_account_id.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ismain import is_main 4 | 5 | from awsimple import AWSAccess 6 | 7 | 8 | def test_get_account_id(): 9 | 10 | with pytest.raises(NotImplementedError): 11 | aws_access = AWSAccess() 12 | account_id = aws_access.get_account_id() 13 | assert len(account_id) >= 12 # currently all account IDs are 12 numeric digits, but allow for them to increase in size (but still be only digits) 14 | assert account_id.isdigit() 15 | print(account_id) 16 | 17 | 18 | if is_main(): 19 | test_get_account_id() 20 | -------------------------------------------------------------------------------- /test_awsimple/test_get_configuration_information.py: -------------------------------------------------------------------------------- 1 | from awsimple import AWSAccess, is_mock 2 | 3 | from test_awsimple import test_awsimple_str 4 | 5 | 6 | def test_get_access_key(): 7 | if not is_mock(): 8 | # todo: get this to work with mocking 9 | access_key = AWSAccess(profile_name=test_awsimple_str).get_access_key() 10 | print(f"{access_key=}") 11 | print(f"{len(access_key)=}") 12 | # https://docs.aws.amazon.com/IAM/latest/APIReference/API_AccessKey.html 13 | assert len(access_key) >= 16 # as of this writing, the access key length was 20 14 | 15 | 16 | def test_get_region(): 17 | if not is_mock(): 18 | # todo: get this to work with mocking 19 | region = AWSAccess(profile_name=test_awsimple_str).get_region() 20 | print(f"{region=}") 21 | print(f"{len(region)=}") 22 | assert len(region) >= 5 # make sure we get back something 23 | -------------------------------------------------------------------------------- /test_awsimple/test_logs.py: -------------------------------------------------------------------------------- 1 | from awsimple import LogsAccess 2 | 3 | from test_awsimple import test_awsimple_str 4 | 5 | 6 | def test_logs(): 7 | logs_access = LogsAccess(test_awsimple_str) 8 | logs_access.put("my first log test") 9 | logs_access.put("my second log test") 10 | 11 | logs_access = LogsAccess(test_awsimple_str) 12 | logs_access.put("my third log test") 13 | logs_access.put("my forth log test") 14 | -------------------------------------------------------------------------------- /test_awsimple/test_lru_cache_helpers.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from awsimple import get_disk_free, get_directory_size, is_mock 4 | 5 | 6 | def test_disk_free(): 7 | free = get_disk_free() 8 | print(f"{free=:,}") 9 | assert free > 1e9 # assume we have some reasonable amount free 10 | 11 | 12 | def test_get_directory_size(): 13 | venv = Path("venv") 14 | if venv.exists(): 15 | # doesn't work with Linux CI 16 | size = get_directory_size(venv) # just use the venv as something that's relatively large and multiple directory levels 17 | print(f"{size=:,}") 18 | assert size >= 50000000 # 94,302,709 on 8/21/20, so assume it's not going to get a lot smaller 19 | -------------------------------------------------------------------------------- /test_awsimple/test_mock.py: -------------------------------------------------------------------------------- 1 | from awsimple import is_mock, S3Access 2 | 3 | from test_awsimple import test_awsimple_str 4 | 5 | 6 | def test_mock(): 7 | s3_access = S3Access(test_awsimple_str) 8 | assert is_mock() == s3_access.is_mocked() # make sure that the AWSAccess instance is actually using mocking 9 | -------------------------------------------------------------------------------- /test_awsimple/test_most_recent_error.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from awsimple import SQSAccess, is_mock, is_using_localstack 4 | 5 | from test_awsimple import test_awsimple_str, drain 6 | from pytest_socket import disable_socket, enable_socket 7 | 8 | 9 | def test_most_recent_error(): 10 | message_contents = "hi" 11 | 12 | drain() 13 | 14 | queue = SQSAccess(test_awsimple_str) 15 | queue.create_queue() 16 | queue.send(message_contents) 17 | 18 | if not is_mock(): 19 | # emulate a short internet disruption 20 | disable_socket() 21 | 22 | time.sleep(3) 23 | message = queue.receive_message() 24 | if not is_mock() and not is_using_localstack(): 25 | # doesn't work with moto nor localstack :( 26 | assert message.message == message_contents 27 | 28 | if not is_mock(): 29 | enable_socket() 30 | 31 | if is_mock(): 32 | assert queue.most_recent_error is None 33 | else: 34 | print(f"{queue.most_recent_error=}") # disable_socket() doesn't seem to work for this case - somehow we get the message anyway 35 | 36 | drain() 37 | -------------------------------------------------------------------------------- /test_awsimple/test_s3_bucket.py: -------------------------------------------------------------------------------- 1 | import time 2 | from logging import getLogger 3 | 4 | import pytest 5 | from awsimple import S3Access, BucketNotFound 6 | 7 | from test_awsimple import test_awsimple_str 8 | 9 | test_bucket_name = f"{test_awsimple_str}temp" # temp bucket that will be created and deleted 10 | 11 | log = getLogger(__name__) 12 | 13 | 14 | def test_s3_bucket(): 15 | s3_access = S3Access(test_bucket_name, profile_name=test_awsimple_str) # use non-keyword parameter for bucket_name 16 | created = s3_access.create_bucket() # may already exist 17 | log.info(f"{created=}") 18 | 19 | # wait for bucket to exist 20 | timeout_count = 100 21 | while not (bucket_exists := s3_access.bucket_exists()) and timeout_count > 0: 22 | time.sleep(3) 23 | timeout_count -= 1 24 | 25 | log.info(f"{bucket_exists=}") 26 | 27 | assert s3_access.bucket_exists() 28 | 29 | assert not s3_access.create_bucket() # already exists 30 | assert s3_access.delete_bucket() 31 | 32 | # wait for bucket to get deleted 33 | timeout_count = 100 34 | while s3_access.bucket_exists() and timeout_count > 0: 35 | time.sleep(3) # wait for bucket to exist 36 | timeout_count -= 1 37 | 38 | assert not s3_access.bucket_exists() 39 | assert not s3_access.delete_bucket() # was nothing to delete 40 | 41 | 42 | def test_s3_bucket_not_found(): 43 | with pytest.raises(BucketNotFound): 44 | s3_access = S3Access("IDoNotExist") 45 | s3_access.dir() 46 | -------------------------------------------------------------------------------- /test_awsimple/test_s3_bucket_not_found.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from awsimple import S3Access, BucketNotFound 4 | 5 | from test_awsimple import test_awsimple_str 6 | 7 | 8 | def test_s3_bucket_not_found(): 9 | s3_access = S3Access(profile_name=test_awsimple_str, bucket_name="doesnotexist") 10 | with pytest.raises(BucketNotFound): 11 | s3_access.keys() 12 | -------------------------------------------------------------------------------- /test_awsimple/test_s3_delete.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | def test_s3_delete(s3_access): 5 | test_string = "hi" 6 | s3_key = "hi.txt" 7 | s3_access.write_string(test_string, s3_key) # will create if the bucket doesn't exist 8 | assert s3_access.read_string(s3_key) == test_string 9 | s3_access.delete_object(s3_key) 10 | with pytest.raises(s3_access.client.exceptions.NoSuchKey): 11 | s3_access.read_string(s3_key) 12 | -------------------------------------------------------------------------------- /test_awsimple/test_s3_dir.py: -------------------------------------------------------------------------------- 1 | from pprint import pprint 2 | from pathlib import Path 3 | 4 | from awsimple import S3Access 5 | 6 | from test_awsimple import test_awsimple_str, temp_dir 7 | 8 | 9 | def test_s3_dir(): 10 | s3_access = S3Access(test_awsimple_str, profile_name=test_awsimple_str) # use non-keyword parameter for bucket_name 11 | 12 | # set up 13 | s3_access.create_bucket() # may already exist 14 | test_file_name = "test.txt" 15 | test_file_path = Path(temp_dir, test_file_name) 16 | test_file_path.open("w").write("hello world") 17 | s3_access.upload(test_file_path, test_file_name) # may already be in S3 18 | 19 | s3_dir = s3_access.dir() 20 | pprint(s3_dir) 21 | md = s3_dir[test_file_name] 22 | assert md.key == test_file_name 23 | assert md.sha512 == "309ecc489c12d6eb4cc40f50c902f2b4d0ed77ee511a7c7a9bcd3ca86d4cd86f989dd35bc5ff499670da34255b45b0cfd830e81f605dcf7dc5542e93ae9cd76f" # "hello world" 24 | 25 | 26 | def test_s3_dir_prefix(): 27 | s3_access = S3Access(test_awsimple_str, profile_name=test_awsimple_str) # use non-keyword parameter for bucket_name 28 | 29 | # set up 30 | s3_access.create_bucket() # may already exist 31 | test_file_name = "test.txt" 32 | test_file_path = Path(temp_dir, test_file_name) 33 | test_file_path.open("w").write("hello world") 34 | s3_access.upload(test_file_path, test_file_name) # may already be in S3 35 | 36 | s3_dir = s3_access.dir("test") 37 | pprint(s3_dir) 38 | md = s3_dir[test_file_name] 39 | assert md.key == test_file_name 40 | assert md.sha512 == "309ecc489c12d6eb4cc40f50c902f2b4d0ed77ee511a7c7a9bcd3ca86d4cd86f989dd35bc5ff499670da34255b45b0cfd830e81f605dcf7dc5542e93ae9cd76f" # "hello world" 41 | -------------------------------------------------------------------------------- /test_awsimple/test_s3_does_not_exist.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from awsimple import S3Access, AWSimpleException 3 | 4 | from test_awsimple import test_awsimple_str 5 | 6 | 7 | def test_s3_object_does_not_exist(): 8 | i_do_not_exist_key = "i-do-not-exist" 9 | 10 | s3_access = S3Access(profile_name=test_awsimple_str, bucket_name=test_awsimple_str) # keyword parameter for bucket_name 11 | assert s3_access.bucket_exists() # make sure the bucket exists 12 | with pytest.raises(s3_access.client.exceptions.NoSuchKey): 13 | s3_access.read_string(i_do_not_exist_key) 14 | 15 | with pytest.raises(AWSimpleException): 16 | s3_access.get_s3_object_metadata(i_do_not_exist_key) 17 | -------------------------------------------------------------------------------- /test_awsimple/test_s3_empty_bucket.py: -------------------------------------------------------------------------------- 1 | import time 2 | import platform 3 | import getpass 4 | 5 | from awsimple import S3Access 6 | 7 | 8 | def test_s3_empty_bucket(): 9 | bucket_name = f"emptybuckettest{platform.node()}{getpass.getuser()}".lower() # must be globally unique when using real S3 10 | print(f"{bucket_name=}") 11 | s3_access = S3Access(bucket_name) 12 | s3_access.create_bucket() 13 | assert s3_access.bucket_exists() 14 | assert len(s3_access.dir()) == 0 15 | s3_access.delete_bucket() 16 | -------------------------------------------------------------------------------- /test_awsimple/test_s3_file_transfer.py: -------------------------------------------------------------------------------- 1 | import time 2 | from datetime import timedelta 3 | from pathlib import Path 4 | from math import isclose 5 | import os 6 | from shutil import rmtree 7 | from logging import getLogger 8 | 9 | from awsimple import S3Access, get_directory_size, is_mock, is_using_localstack 10 | from test_awsimple import test_awsimple_str, never_change_file_name, temp_dir, cache_dir 11 | 12 | big_file_name = "big.txt" 13 | big_file_max_size = round(100e6) # should be large enough to do a multipart upload and would time out with default AWS timeouts (we use longer timeouts than the defaults) 14 | 15 | # real AWS 16 | never_change_size = 67 17 | never_change_mtime = 1636830116.0 18 | never_change_etag = "e3cb2ac8d7d4a8339ea3653f4f155ab4" 19 | 20 | log = getLogger(__name__) 21 | 22 | 23 | def test_get_never_change_metadata(s3_access) -> (int, float, str): 24 | global never_change_size, never_change_mtime, never_change_etag 25 | 26 | if is_mock() or is_using_localstack(): 27 | # mocking always starts with nothing so we need up "upload" this file, but use boto3 so we don't write awsimple's SHA512. 28 | # localstack is similar in that we need to ensure we make the file. 29 | 30 | test_file_path = Path(temp_dir, never_change_file_name) 31 | never_change_file_contents = "modification Aug 21, 2020 at 2:51 PM PT\nnever change this file\n" 32 | test_file_path.open("w").write(never_change_file_contents) 33 | s3_access.client.upload_file(str(test_file_path), test_awsimple_str, never_change_file_name) # no awsimple SHA512 34 | 35 | keys = [obj["Key"] for obj in s3_access.client.list_objects_v2(Bucket=test_awsimple_str)["Contents"]] 36 | assert never_change_file_name in keys 37 | 38 | metadata = s3_access.get_s3_object_metadata(never_change_file_name) 39 | never_change_mtime = metadata.mtime.timestamp() 40 | never_change_etag = metadata.etag 41 | never_change_size = metadata.size 42 | 43 | 44 | def test_s3_read_string(s3_access): 45 | test_string = str(time.time()) # so it changes between tests 46 | 47 | # s3_access.create_bucket() # may already exist 48 | s3_access.write_string(test_string, test_awsimple_str) 49 | assert s3_access.read_string(test_awsimple_str) == test_string 50 | 51 | 52 | def test_s3_big_file_upload(s3_access): 53 | # test big file upload (e.g. that we don't get a timeout) 54 | # this is run before the cache tests (hence the function name) 55 | 56 | big_last_run_file_path = Path("big_last_run.txt") 57 | big_last_run_file_path.parent.mkdir(exist_ok=True, parents=True) 58 | last_run = 0.0 59 | if not (is_mock() or is_using_localstack()): 60 | # avoid large frequent file uploads with real AWS 61 | try: 62 | last_run = float(big_last_run_file_path.open().read().strip()) 63 | except FileNotFoundError: 64 | pass 65 | 66 | # only run once a day max since it takes so long 67 | if last_run + timedelta(days=1).total_seconds() < time.time(): 68 | big_file_path = Path(temp_dir, big_file_name) 69 | size = big_file_max_size / 1000 # start with something small 70 | while size < big_file_max_size: 71 | size *= 2 # get bigger on each iteration 72 | size = min(big_file_max_size, size) # make sure at the end we do one of max size 73 | with big_file_path.open("w") as f: 74 | f.truncate(round(size)) # this quickly makes a (sparse) file filled with zeros 75 | start = time.time() 76 | s3_access.upload(big_file_path, big_file_name) 77 | log.info(f"{time.time() - start},{size:.0f}") 78 | 79 | big_last_run_file_path.open("w").write(str(time.time())) 80 | else: 81 | log.info(f"last run {time.time() - last_run} seconds ago so not running now") 82 | 83 | 84 | def test_s3_upload(s3_access): 85 | test_file_name = "test.txt" 86 | test_file_path = Path(temp_dir, test_file_name) 87 | test_file_path.open("w").write("hello world") 88 | assert s3_access.upload(test_file_path, test_file_name, force=True) 89 | time.sleep(3) 90 | assert s3_access.object_exists(test_file_name) 91 | 92 | 93 | def test_s3_z_metadata(s3_access): 94 | # does not work for mock todo: fix 95 | test_file_name = "test.txt" 96 | s3_object_metadata = s3_access.get_s3_object_metadata(test_file_name) 97 | # "hello world" uploaded with awsimple 98 | assert s3_object_metadata.sha512 == "309ecc489c12d6eb4cc40f50c902f2b4d0ed77ee511a7c7a9bcd3ca86d4cd86f989dd35bc5ff499670da34255b45b0cfd830e81f605dcf7dc5542e93ae9cd76f" 99 | assert s3_object_metadata.size == 11 100 | 101 | 102 | def test_s3_download_dest_full_path(s3_access): 103 | dest_path = Path(temp_dir, never_change_file_name) 104 | dest_path.unlink(missing_ok=True) 105 | success = s3_access.download(never_change_file_name, dest_path) # dest is a full path 106 | assert success 107 | assert dest_path.exists() 108 | assert isclose(os.path.getmtime(dest_path), never_change_mtime, rel_tol=0.0, abs_tol=3.0) 109 | 110 | 111 | def test_s3_download_dest_dir(s3_access): 112 | dest_path = Path(temp_dir, never_change_file_name) 113 | dest_path.unlink(missing_ok=True) 114 | success = s3_access.download(never_change_file_name, temp_dir) # dest is a directory 115 | assert success 116 | assert dest_path.exists() 117 | assert isclose(os.path.getmtime(dest_path), never_change_mtime, rel_tol=0.0, abs_tol=3.0) 118 | 119 | 120 | def test_s3_metadata_not_uploaded_with_awsimple(s3_access): 121 | bucket_dir = s3_access.dir() 122 | assert len(bucket_dir) > 0 123 | assert bucket_dir["never_change.txt"].size == never_change_size 124 | s3_object_metadata = s3_access.get_s3_object_metadata(never_change_file_name) 125 | mtime_epoch = s3_object_metadata.mtime.timestamp() 126 | assert isclose(mtime_epoch, never_change_mtime, rel_tol=0.0, abs_tol=3.0) # SWAG 127 | assert s3_object_metadata.etag == never_change_etag 128 | assert s3_object_metadata.sha512 is None # not uploaded with awsimple 129 | assert s3_object_metadata.size == never_change_size 130 | 131 | 132 | def _s3_download(dest: Path, s3_access): 133 | """ 134 | :param dest: directory or file path to download to 135 | :param s3_access: S3Access 136 | """ 137 | dest_path = Path(temp_dir, never_change_file_name) # expect file to be downloaded here 138 | # start with empty cache 139 | rmtree(cache_dir, ignore_errors=True) 140 | cache_dir.mkdir(parents=True, exist_ok=True) 141 | dest_path.unlink(missing_ok=True) 142 | download_status = s3_access.download_cached(never_change_file_name, dest) 143 | assert download_status.success 144 | assert not download_status.cache_hit 145 | assert download_status.cache_write 146 | assert dest_path.exists() 147 | # download cached 148 | dest_path.unlink() 149 | download_status = s3_access.download_cached(never_change_file_name, dest) 150 | assert download_status.success 151 | assert download_status.cache_hit 152 | assert not download_status.cache_write 153 | assert dest_path.exists() 154 | 155 | # with warm cache 156 | dest_path.unlink() 157 | download_status = s3_access.download_cached(never_change_file_name, dest) 158 | assert download_status.success 159 | assert download_status.cache_hit 160 | assert dest_path.exists() 161 | 162 | 163 | def _s3_download_big(dest: Path, s3_access): 164 | # download big file with normal cache size 165 | cache_size = get_directory_size(cache_dir) 166 | assert cache_size < 1000 # big file not in cache 167 | big_file_path = Path(temp_dir, big_file_name) 168 | download_status = s3_access.download_cached(big_file_name, dest) 169 | assert download_status.success 170 | assert not download_status.cache_hit 171 | assert download_status.cache_write 172 | assert big_file_path.exists() 173 | cache_size = get_directory_size(cache_dir) 174 | assert cache_size > 1000 # big file is in cache 175 | 176 | 177 | def test_s3_download_cached(s3_access): 178 | _s3_download(Path(temp_dir, never_change_file_name), s3_access) # small file with no AWSimple SHA512 179 | _s3_download_big(Path(temp_dir, big_file_name), s3_access) 180 | 181 | 182 | def test_s3_download_cached_dir(s3_access): 183 | _s3_download(temp_dir, s3_access) 184 | _s3_download_big(temp_dir, s3_access) 185 | 186 | 187 | def test_cache_eviction(s3_access): 188 | # force cache eviction 189 | cache_max = 100 190 | eviction_dir = Path(temp_dir, "eviction") 191 | eviction_cache = Path(eviction_dir, "cache") 192 | s3_access_cache_eviction = S3Access(profile_name=test_awsimple_str, bucket_name=test_awsimple_str, cache_dir=eviction_cache, cache_max_absolute=cache_max) 193 | size = 50 194 | rmtree(eviction_dir, ignore_errors=True) 195 | while size <= 2 * cache_max: 196 | file_name = f"t{size}.txt" 197 | source_file_path = Path(eviction_dir, "source", file_name) 198 | source_file_path.parent.mkdir(parents=True, exist_ok=True) 199 | 200 | # upload 201 | with source_file_path.open("w") as f: 202 | f.truncate(round(size)) # this quickly makes a (sparse) file filled with zeros 203 | s3_access_cache_eviction.upload(source_file_path, file_name) 204 | 205 | dest_path = Path(eviction_dir, "dest", file_name) 206 | 207 | # cold download 208 | status_cold = s3_access_cache_eviction.download_cached(file_name, dest_path) 209 | assert not status_cold.cache_hit 210 | if size <= cache_max: 211 | assert status_cold.cache_write 212 | 213 | # warm download 214 | assert dest_path.exists() 215 | status_warm = s3_access_cache_eviction.download_cached(file_name, dest_path) 216 | if size <= cache_max: 217 | assert status_warm.cache_hit 218 | assert not status_warm.cache_write 219 | assert dest_path.exists() 220 | 221 | # make sure cache stays within max size limit 222 | cache_size = get_directory_size(eviction_cache) 223 | assert cache_size <= cache_max # make sure we stay within bounds 224 | 225 | size *= 2 226 | -------------------------------------------------------------------------------- /test_awsimple/test_s3_keys.py: -------------------------------------------------------------------------------- 1 | from pprint import pprint 2 | from pathlib import Path 3 | 4 | from awsimple import S3Access 5 | 6 | from test_awsimple import test_awsimple_str, temp_dir 7 | 8 | 9 | def test_s3_keys(): 10 | s3_access = S3Access(test_awsimple_str, profile_name=test_awsimple_str) # use non-keyword parameter for bucket_name 11 | 12 | # set up 13 | s3_access.create_bucket() # may already exist 14 | test_file_name = "test.txt" 15 | test_file_name_2 = "test2.txt" 16 | test_file_path = Path(temp_dir, test_file_name) 17 | test_file_path.open("w").write("hello world") 18 | s3_access.upload(test_file_path, test_file_name_2) # may already be in S3 19 | s3_access.upload(test_file_path, test_file_name) # may already be in S3 20 | 21 | s3_keys = s3_access.keys() 22 | pprint(s3_keys) 23 | # for real AWS I may have other objects in the test bucket 24 | assert test_file_name in s3_keys 25 | assert test_file_name_2 in s3_keys 26 | 27 | 28 | def test_s3_keys_prefix(): 29 | s3_access = S3Access(test_awsimple_str, profile_name=test_awsimple_str) # use non-keyword parameter for bucket_name 30 | 31 | # set up 32 | s3_access.create_bucket() # may already exist 33 | test_file_name = "test.txt" 34 | test_file_name_2 = "test2.txt" 35 | test_file_path = Path(temp_dir, test_file_name) 36 | test_file_path.open("w").write("hello world") 37 | s3_access.upload(test_file_path, test_file_name_2) # may already be in S3 38 | s3_access.upload(test_file_path, test_file_name) # may already be in S3 39 | 40 | s3_keys = s3_access.keys("test2") 41 | pprint(s3_keys) 42 | # for real AWS I may have other objects in the test bucket 43 | assert test_file_name not in s3_keys 44 | assert test_file_name_2 in s3_keys 45 | -------------------------------------------------------------------------------- /test_awsimple/test_s3_list_buckets.py: -------------------------------------------------------------------------------- 1 | from pprint import pprint 2 | 3 | from awsimple import S3Access 4 | 5 | from test_awsimple import test_awsimple_str 6 | 7 | 8 | def test_s3_list_buckets(): 9 | bucket_names = S3Access().bucket_list() 10 | pprint(bucket_names) 11 | assert test_awsimple_str in bucket_names 12 | -------------------------------------------------------------------------------- /test_awsimple/test_s3_multiple_transfers.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import pytest 3 | from shutil import rmtree 4 | 5 | from awsimple import AWSimpleException, is_mock 6 | 7 | from test_awsimple import temp_dir, cache_dir 8 | 9 | 10 | def check_file_contents(file_path: Path, expected_contents: str): 11 | with file_path.open() as f: 12 | file_contents = f.read() 13 | assert file_contents == expected_contents 14 | 15 | 16 | def test_s3_multiple_transfers(s3_access): 17 | s3_paths = {} 18 | rmtree(temp_dir) 19 | for test_string in ["a", "b"]: 20 | s3_paths[test_string] = {} 21 | for mode in ["in", "out"]: 22 | p = Path(temp_dir, mode, f"{test_string}.txt") 23 | p.parent.mkdir(parents=True, exist_ok=True) 24 | if mode == "in": 25 | with p.open("w") as f: 26 | f.write(test_string) 27 | s3_paths[test_string][mode] = p 28 | 29 | if is_mock(): 30 | with pytest.raises(AWSimpleException): 31 | s3_access.download_cached("a", s3_paths["a"]["out"]) # won't exist at first if mocked 32 | 33 | # upload and download file 34 | s3_access.upload(s3_paths["a"]["in"], "a") 35 | download_status = s3_access.download_cached("a", s3_paths["a"]["out"]) 36 | assert download_status.success 37 | assert not download_status.cache_hit 38 | assert download_status.cache_write 39 | check_file_contents(s3_paths["a"]["out"], "a") 40 | 41 | # upload a different file into same bucket and check that we get the contents of that new file 42 | s3_access.upload(s3_paths["b"]["in"], "a") 43 | download_status = s3_access.download_cached("a", s3_paths["a"]["out"]) 44 | assert download_status.success 45 | assert not download_status.cache_hit 46 | assert download_status.cache_write 47 | check_file_contents(s3_paths["a"]["out"], "b") 48 | 49 | # cached download 50 | download_status = s3_access.download_cached("a", s3_paths["a"]["out"]) 51 | assert download_status.success 52 | assert download_status.cache_hit 53 | assert not download_status.cache_write 54 | check_file_contents(s3_paths["a"]["out"], "b") 55 | 56 | # put "a" back and just use regular download (not cached) 57 | s3_access.upload(s3_paths["a"]["in"], "a") 58 | assert s3_access.download("a", s3_paths["a"]["out"]) 59 | check_file_contents(s3_paths["a"]["out"], "a") 60 | 61 | # write something else to that bucket 62 | s3_access.write_string("c", "a") 63 | assert s3_access.read_string("a") == "c" 64 | 65 | # now upload and download an object 66 | test_dict = {"z": 3} 67 | s3_access.upload_object_as_json(test_dict, "a") 68 | downloaded_dict = s3_access.download_object_as_json("a") 69 | assert test_dict == downloaded_dict 70 | downloaded_dict = s3_access.download_object_as_json_cached("a") 71 | assert test_dict == downloaded_dict 72 | 73 | assert len(list(cache_dir.glob("*"))) == 3 # there should be 3 entries in the cache at this point 74 | -------------------------------------------------------------------------------- /test_awsimple/test_s3_object_floats.py: -------------------------------------------------------------------------------- 1 | def test_s3_object_floats(s3_access): 2 | object_with_floats = {"0.1": 2.3456789e-11} 3 | s3_key = "a" 4 | s3_access.upload_object_as_json(object_with_floats, s3_key) 5 | s3_object = s3_access.download_object_as_json_cached(s3_key) 6 | print(s3_object) 7 | assert s3_object == object_with_floats 8 | -------------------------------------------------------------------------------- /test_awsimple/test_s3_public_readable.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import time 3 | 4 | from awsimple import S3Access, is_using_localstack 5 | from requests import get 6 | 7 | from test_awsimple import test_awsimple_str, temp_dir 8 | 9 | 10 | def test_s3_upload(): 11 | contents = "I am public readable" 12 | s3_access = S3Access(profile_name=test_awsimple_str, bucket_name=test_awsimple_str) 13 | s3_access.set_public_readable(True) 14 | test_file_name = "public_readable.txt" 15 | test_file_path = Path(temp_dir, test_file_name) 16 | test_file_path.open("w").write(contents) 17 | assert s3_access.upload(test_file_path, test_file_name, force=True) 18 | count = 0 19 | while not s3_access.object_exists(test_file_name) and count < 100: 20 | time.sleep(1) 21 | count += 1 22 | assert s3_access.object_exists(test_file_name) 23 | 24 | # read from the URL to see if the contents are public readable 25 | metadata = s3_access.get_s3_object_metadata(test_file_name) 26 | if not is_using_localstack(): 27 | # localstack doesn't provide URL based access 28 | object_contents = get(metadata.url).content.decode("utf-8") 29 | assert object_contents == contents 30 | -------------------------------------------------------------------------------- /test_awsimple/test_s3_python_object.py: -------------------------------------------------------------------------------- 1 | from awsimple import S3Access 2 | 3 | from test_awsimple import test_awsimple_str 4 | 5 | 6 | def test_s3_python_object(): 7 | my_dict_a = {"a": 1} 8 | my_dict_b = {"b": 2} 9 | my_list = [1, 2, 3] 10 | my_complex_dict = {"1": 2, "my_list": [0, 9], "my_dict": {"z": -1, "w": -2}} 11 | 12 | s3_key = "my_object" 13 | s3_access = S3Access(profile_name=test_awsimple_str, bucket_name=test_awsimple_str) 14 | 15 | for my_object in (my_dict_a, my_dict_b, my_list, my_complex_dict): 16 | s3_access.upload_object_as_json(my_object, s3_key) 17 | 18 | my_dict_from_s3 = s3_access.download_object_as_json(s3_key) 19 | assert my_object == my_dict_from_s3 20 | 21 | my_dict_from_s3 = s3_access.download_object_as_json_cached(s3_key) 22 | assert my_object == my_dict_from_s3 23 | my_dict_from_s3 = s3_access.download_object_as_json_cached(s3_key) # this will be the cached version 24 | assert my_object == my_dict_from_s3 25 | assert s3_access.download_status.cache_hit 26 | -------------------------------------------------------------------------------- /test_awsimple/test_s3_string.py: -------------------------------------------------------------------------------- 1 | from awsimple import S3Access 2 | 3 | from test_awsimple import test_awsimple_str 4 | 5 | 6 | def test_s3_string(): 7 | s3_access = S3Access(test_awsimple_str) 8 | s3_access.write_string(test_awsimple_str, test_awsimple_str) 9 | d = s3_access.dir() 10 | metadata = d[test_awsimple_str] 11 | assert metadata.size == len(test_awsimple_str) 12 | assert metadata.key == test_awsimple_str # the contents are the same as the key 13 | # https://passwordsgenerator.net/sha512-hash-generator/ 14 | assert metadata.sha512.lower() == "D16764F12E4D13555A88372CFE702EF8AE07F24A3FFCEDE6E1CDC8B7BFC2B18EC3468A7752A09F100C9F24EA2BC77566A08972019FC04CF75AB3A64B475BDFA3".lower() 15 | -------------------------------------------------------------------------------- /test_awsimple/test_s3_transfer_lines.py: -------------------------------------------------------------------------------- 1 | def test_s3_transfer_lines(s3_access): 2 | s3_key = "a" 3 | lines = ["1", "2"] 4 | s3_access.write_lines(lines, s3_key) 5 | read_lines = s3_access.read_lines(s3_key) 6 | assert lines == read_lines 7 | -------------------------------------------------------------------------------- /test_awsimple/test_serializable.py: -------------------------------------------------------------------------------- 1 | from decimal import Decimal 2 | from enum import Enum 3 | from pathlib import Path 4 | from math import pi, isclose 5 | 6 | from PIL import Image 7 | 8 | from awsimple import dict_to_dynamodb, dynamodb_to_dict 9 | 10 | 11 | class TstClass(Enum): 12 | a = 1 13 | b = 2 14 | 15 | 16 | def test_make_serializable(): 17 | values = { 18 | "d": Decimal(1.0), 19 | "s": "s", 20 | "bool": True, 21 | "a": TstClass.a, 22 | "b": TstClass.b, 23 | "binary": b"\0\1", 24 | "ni": -100, # negative integer 25 | "nbi": -100000000000000000000000000000000000, # negative big integer 26 | "pi": pi, 27 | } 28 | values["image"] = Image.open(Path("test_awsimple", "280px-PNG_transparency_demonstration_1.png")) 29 | values = dict_to_dynamodb(values) 30 | serial_values = dynamodb_to_dict(values) 31 | assert serial_values["d"] == 1.0 32 | assert serial_values["s"] == "s" 33 | assert serial_values["bool"] is True 34 | assert serial_values["a"] == "a" 35 | assert serial_values["b"] == "b" 36 | image_size = len(serial_values["image"]) 37 | assert image_size == 141233 or image_size == 140065 # depending on the version of Pillow 38 | assert serial_values["binary"] == "b'\\x00\\x01'" 39 | assert isinstance(serial_values["ni"], int) 40 | assert isinstance(serial_values["nbi"], float) # ends up being a float, even though we'd prefer it as an int 41 | assert isclose(serial_values["pi"], pi) 42 | -------------------------------------------------------------------------------- /test_awsimple/test_sns_create.py: -------------------------------------------------------------------------------- 1 | from ismain import is_main 2 | 3 | from awsimple import SNSAccess 4 | 5 | from test_awsimple import test_awsimple_str 6 | 7 | 8 | def test_sns_create(): 9 | sns_access = SNSAccess(test_awsimple_str) 10 | sns_access.create_topic() 11 | 12 | 13 | if is_main(): 14 | test_sns_create() 15 | -------------------------------------------------------------------------------- /test_awsimple/test_sns_publish.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from awsimple import SNSAccess, SQSPollAccess, is_mock 4 | 5 | from test_awsimple import test_awsimple_str, drain 6 | 7 | 8 | def test_sns_publish(): 9 | drain() 10 | 11 | sqs_access = SQSPollAccess(test_awsimple_str) # queue that will subscribe to this topic and we'll read from at the end to test the propagation from SNS to SQS 12 | sqs_access.create_queue() 13 | sns_access = SNSAccess(test_awsimple_str) # our test SNS topic 14 | 15 | sns_access.create_topic() # this can set the permissions, which can take a while to propagate so it might fail the first time through 16 | 17 | subscription_arn = sns_access.subscribe(sqs_access) # subscribe the SQS queue to the SNS topic 18 | print(f"{subscription_arn=}") 19 | 20 | # put in your actual email and run this at least once: 21 | # sns_access.subscribe("me@mydomain.com") 22 | 23 | message_string = "This is a test for awsimple." 24 | subject_string = "awsimple test" 25 | # doesn't work with moto :( 26 | if not is_mock(): 27 | message_id = sns_access.publish(message_string, subject_string) 28 | print(f"{message_id=}") 29 | assert message_id is not None and len(message_id) > 0 30 | 31 | message = json.loads(sqs_access.receive_message().message) 32 | returned_message_string = message["Message"] 33 | print(f"{returned_message_string=}") 34 | assert returned_message_string == message_string 35 | -------------------------------------------------------------------------------- /test_awsimple/test_sqs_create_and_delete_queue.py: -------------------------------------------------------------------------------- 1 | from awsimple import SQSAccess, is_using_localstack 2 | 3 | from test_awsimple import test_awsimple_str 4 | 5 | 6 | def test_sqs_create_and_delete_queue(): 7 | # have to wait 60 seconds from delete to (re)creation so don't use the same queue name as other tests 8 | queue_name = "createdelete" 9 | q = SQSAccess(queue_name, profile_name=test_awsimple_str) 10 | url = q.create_queue() 11 | print(url) 12 | 13 | if not is_using_localstack(): 14 | # something like https://us-west-2.queue.amazonaws.com/076966278319/createdelete 15 | assert len(url) > 10 16 | assert url.endswith(queue_name) 17 | assert url.startswith("https://") 18 | assert "aws" in url 19 | 20 | q.delete_queue() 21 | -------------------------------------------------------------------------------- /test_awsimple/test_sqs_get_arn.py: -------------------------------------------------------------------------------- 1 | from ismain import is_main 2 | 3 | from awsimple import SQSAccess 4 | 5 | from test_awsimple import test_awsimple_str 6 | 7 | 8 | def test_sqs_get_arn(): 9 | sqs_access = SQSAccess(test_awsimple_str) 10 | sqs_access.create_queue() 11 | arn = sqs_access.get_arn() 12 | 13 | # e.g. arn:aws:sqs:us-west-2:123456789012:testawsimple 14 | print(f"{arn=}") 15 | 16 | assert arn.startswith("arn:aws:sqs:") 17 | # AWS region and account number is in the middle 18 | assert arn.endswith(f":{test_awsimple_str}") 19 | 20 | 21 | if is_main(): 22 | test_sqs_get_arn() 23 | -------------------------------------------------------------------------------- /test_awsimple/test_sqs_messages.py: -------------------------------------------------------------------------------- 1 | from pprint import pprint 2 | import time 3 | import math 4 | 5 | from awsimple import SQSAccess, SQSPollAccess, is_using_localstack 6 | 7 | from test_awsimple import test_awsimple_str, drain 8 | 9 | send_message = "hi" 10 | 11 | 12 | def test_sqs_immediate_delete(): 13 | drain() 14 | 15 | q = SQSAccess(test_awsimple_str, profile_name=test_awsimple_str) 16 | q.create_queue() 17 | 18 | send_time = time.time() 19 | q.send(send_message) 20 | time.sleep(0.1) 21 | 22 | while (receive_message := q.receive_message()) is None: 23 | time.sleep(0.1) 24 | print(receive_message) 25 | assert receive_message.message == send_message 26 | print(f"took {time.time() - send_time} seconds") 27 | 28 | 29 | def test_sqs_poll_immediate_delete(): 30 | drain() 31 | 32 | q = SQSPollAccess(test_awsimple_str, profile_name=test_awsimple_str) 33 | q.create_queue() 34 | 35 | send_time = time.time() 36 | q.send(send_message) 37 | 38 | receive_message = q.receive_message() # will long poll so we expect the message to be available within one call 39 | assert receive_message is not None 40 | print(receive_message) 41 | assert receive_message.message == send_message 42 | print(f"took {time.time() - send_time} seconds") 43 | 44 | 45 | def test_sqs_poll_user_delete(): 46 | work_time = 3.0 47 | 48 | drain() 49 | 50 | # populate the run time history 51 | queue = SQSAccess(test_awsimple_str, immediate_delete=False, profile_name=test_awsimple_str) 52 | queue.create_queue() 53 | queue._get_response_history_file_path().unlink(missing_ok=True) 54 | queue.max_history = 5 # test that we can delete old history values by using a very small history 55 | for value in range(0, queue.max_history): 56 | print(value) 57 | queue.send(str(value)) 58 | while len(messages := queue.receive_messages()) > 0: 59 | time.sleep(work_time) 60 | pprint(messages) 61 | for m in messages: 62 | print(f"deleting {m.message}") 63 | m.delete() 64 | 65 | # now do a long poll style 66 | poll_queue = SQSPollAccess(test_awsimple_str, immediate_delete=False, profile_name=test_awsimple_str) 67 | poll_queue.create_queue() 68 | 69 | print("sending test message") 70 | send_time = time.time() 71 | poll_queue.send(send_message) 72 | 73 | receive_message = poll_queue.receive_message() # will long poll so we expect the message to be available within one call 74 | assert receive_message is not None 75 | print(receive_message.message) 76 | assert receive_message.message == send_message 77 | time.sleep(work_time) # do some work 78 | print(f"took {time.time() - send_time} seconds") 79 | receive_message.delete() 80 | 81 | nominal_work_time = poll_queue.calculate_nominal_work_time() 82 | print(f"{work_time=}, calculated {nominal_work_time=}") 83 | if not is_using_localstack(): 84 | # localstack can be slow 85 | assert math.isclose(nominal_work_time, work_time, rel_tol=0.5, abs_tol=1.0) # fairly wide tolerance 86 | 87 | 88 | def test_sqs_n_messages(): 89 | """ 90 | test for a specific number of messages to be returned 91 | """ 92 | 93 | drain() 94 | 95 | message = "hi" 96 | queue = SQSAccess(test_awsimple_str) 97 | queue.create_queue() 98 | 99 | # more than we'll try to take out, and more than the AWS max per call 100 | for _ in range(0, 14): 101 | queue.send(message) 102 | time.sleep(10.0) # wait for messages to become available 103 | 104 | received = queue.receive_messages(11) # just over the AWS max per call of 10 105 | assert len(received) == 11 106 | 107 | drain() # clean up unreceived messages 108 | -------------------------------------------------------------------------------- /test_awsimple/test_sqs_messages_available_and_purge.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from awsimple import SQSAccess, is_mock 4 | 5 | from test_awsimple import test_awsimple_str, drain 6 | 7 | 8 | def wait_for_n_messages_available(queue: SQSAccess, expected_number_of_messages: int): 9 | time_out = 0 10 | while (messages_available := queue.messages_available()) != expected_number_of_messages and time_out < 60: 11 | time_out += 1 12 | time.sleep(1.0) 13 | assert messages_available == expected_number_of_messages 14 | 15 | 16 | def test_sqs_message_available_and_purge(): 17 | if not is_mock(): 18 | drain() 19 | 20 | queue = SQSAccess(test_awsimple_str) 21 | queue.create_queue() 22 | 23 | wait_for_n_messages_available(queue, 0) 24 | 25 | for number_of_messages in range(1, 5): 26 | queue.send(str(number_of_messages)) 27 | wait_for_n_messages_available(queue, number_of_messages) 28 | 29 | queue.purge() 30 | wait_for_n_messages_available(queue, 0) 31 | -------------------------------------------------------------------------------- /test_awsimple/test_sqs_queue_exists.py: -------------------------------------------------------------------------------- 1 | from awsimple import SQSAccess, is_mock 2 | 3 | from test_awsimple import test_awsimple_str 4 | 5 | 6 | def test_sqs_queue_exists(): 7 | q = SQSAccess(test_awsimple_str) 8 | q.create_queue() 9 | queue_exists = q.exists() 10 | # doesn't work with moto :( 11 | if not is_mock(): 12 | assert queue_exists 13 | queue_exists = SQSAccess("IDoNotExist").exists() 14 | assert not queue_exists 15 | -------------------------------------------------------------------------------- /test_awsimple/test_sqs_receive_nothing.py: -------------------------------------------------------------------------------- 1 | import time 2 | import math 3 | 4 | from awsimple import SQSAccess, SQSPollAccess, aws_sqs_long_poll_max_wait_time, is_mock, is_using_localstack 5 | 6 | from test_awsimple import test_awsimple_str, drain 7 | 8 | margin = 3.0 9 | rel_tol = 0.2 10 | 11 | 12 | def test_sqs_receive_nothing(): 13 | drain() 14 | start = time.time() 15 | queue = SQSAccess(test_awsimple_str) # will return immediately 16 | assert queue.receive_message() is None 17 | assert len(queue.receive_messages()) == 0 18 | t = time.time() - start 19 | print(f"{t=}") 20 | if is_using_localstack(): 21 | assert t < 100.0 # local stack is slow 22 | else: 23 | assert t < 3.0 # "immediate" 24 | 25 | 26 | def test_sqs_receive_nothing_poll_one(): 27 | if not is_mock(): 28 | drain() 29 | start = time.time() 30 | queue = SQSPollAccess(test_awsimple_str) # will return in AWS SQS max wait time (e.g. 20 sec) 31 | queue.create_queue() 32 | assert queue.receive_message() is None 33 | 34 | t = time.time() - start 35 | print(f"{t=}") 36 | assert math.isclose(t, aws_sqs_long_poll_max_wait_time + margin, rel_tol=rel_tol, abs_tol=margin) 37 | 38 | 39 | def test_sqs_receive_nothing_poll_many(): 40 | if not is_mock(): 41 | drain() 42 | start = time.time() 43 | queue = SQSPollAccess(test_awsimple_str) # will return in AWS SQS max wait time (e.g. 20 sec) 44 | queue.create_queue() 45 | assert len(queue.receive_messages()) == 0 46 | 47 | t = time.time() - start 48 | print(f"{t=}") 49 | assert math.isclose(t, aws_sqs_long_poll_max_wait_time + margin, rel_tol=rel_tol, abs_tol=margin) 50 | -------------------------------------------------------------------------------- /test_awsimple/test_sqs_user_provided_timeout.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import time 3 | 4 | from awsimple import SQSPollAccess, SQSAccess, is_mock, is_using_localstack 5 | 6 | from test_awsimple import test_awsimple_str, drain 7 | 8 | 9 | def test_user_provided_timeout(): 10 | if not is_mock(): 11 | drain() 12 | 13 | send_message = "hello" 14 | work_time = 2.0 15 | 16 | qp = SQSPollAccess(test_awsimple_str, visibility_timeout=round(10.0 * work_time), immediate_delete=False, profile_name=test_awsimple_str) 17 | qp.create_queue() 18 | qp.send(send_message) 19 | time.sleep(1.0) 20 | receive_message = qp.receive_message() 21 | assert receive_message.message == send_message 22 | 23 | q = SQSAccess(test_awsimple_str, profile_name=test_awsimple_str) 24 | q.create_queue() 25 | assert q.receive_message() is None # make sure the message is now invisible 26 | 27 | if not is_mock(): 28 | receive_message.delete() # not working for mock todo: fix 29 | 30 | assert q.receive_message() is None 31 | 32 | 33 | def test_user_provided_minimum_timeout(): 34 | if not is_mock(): 35 | drain() 36 | 37 | send_message = "hello" 38 | work_time = 2.0 39 | 40 | qp = SQSPollAccess(test_awsimple_str, minimum_visibility_timeout=round(10.0 * work_time), immediate_delete=False, profile_name=test_awsimple_str) 41 | qp.create_queue() 42 | qp.send(send_message) 43 | time.sleep(1.0) 44 | receive_message = qp.receive_message() 45 | assert receive_message.message == send_message 46 | 47 | q = SQSAccess(test_awsimple_str, profile_name=test_awsimple_str) 48 | q.create_queue() 49 | assert q.receive_message() is None # make sure the message is now invisible 50 | 51 | if not is_mock(): 52 | receive_message.delete() # not working for mock todo: fix 53 | 54 | assert q.receive_message() is None 55 | 56 | 57 | def test_actually_timeout(): 58 | if not is_mock(): 59 | drain() 60 | 61 | send_message = "hello" 62 | if is_using_localstack(): 63 | # localstack is slow 64 | work_time = 500.0 65 | else: 66 | work_time = 5.0 67 | 68 | qp = SQSPollAccess(test_awsimple_str, visibility_timeout=round(0.5 * work_time), immediate_delete=False, profile_name=test_awsimple_str) 69 | qp.create_queue() 70 | qp.send(send_message) 71 | time.sleep(1.0) 72 | receive_message = qp.receive_message() 73 | assert receive_message.message == send_message # got it once 74 | 75 | q = SQSAccess(test_awsimple_str, profile_name=test_awsimple_str) 76 | assert q.receive_message() is None # make sure the message is now invisible 77 | time.sleep(work_time) # will take "too long", so message should be available again on next receive_message 78 | 79 | if not is_mock(): 80 | # not working for mock todo: fix 81 | assert qp.receive_message().message == send_message 82 | receive_message.delete() # now we delete it 83 | 84 | assert q.receive_message() is None 85 | 86 | 87 | def test_user_provided_timeout_nonsensical_parameters(): 88 | if not is_mock(): 89 | drain() 90 | 91 | send_message = "hello" 92 | work_time = 2.0 93 | 94 | q = SQSPollAccess(test_awsimple_str, visibility_timeout=round(10.0 * work_time), profile_name=test_awsimple_str) 95 | q.create_queue() 96 | q.send(send_message) 97 | with pytest.raises(ValueError): 98 | q.receive_message() 99 | -------------------------------------------------------------------------------- /test_awsimple/tst_paths.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | temp_dir = Path("temp") 4 | cache_dir = Path(temp_dir, "cache") 5 | --------------------------------------------------------------------------------