├── .coveragerc
├── .github
└── workflows
│ └── python-package.yml
├── .gitignore
├── .readthedocs.yaml
├── CONTRIBUTING.md
├── EXAMPLES.md
├── LICENSE
├── Makefile
├── README.md
├── awsimple
├── __init__.py
├── __version__.py
├── aws.py
├── cache.py
├── dynamodb.py
├── dynamodb_miv.py
├── logs.py
├── mock.py
├── py.typed
├── s3.py
├── sns.py
└── sqs.py
├── coverage.xml
├── doc
├── awsimple_sf_python_6_21.pdf
├── awsimple_sf_python_6_21.pptx
├── flake8_report.txt
└── notes.txt
├── doc_source
├── aws_access.rst
├── conf.py
├── coverage.txt
├── dynamodb_access.rst
├── index.rst
├── quick_start_guide.rst
├── requirements.txt
├── s3_access.rst
├── sns_access.rst
├── sqs_access.rst
├── thank_you.rst
└── user_guide.rst
├── examples
├── aws_access_test.py
├── derived_access_class.py
├── dynamodb_partition_and_sort.py
├── dynamodb_partition_only.py
├── make_venv.bat
├── make_venv.sh
├── read_s3_object.py
├── requirements-examples.txt
├── run_examples.bat
├── run_examples.sh
└── write_read_s3_object.py
├── make_venv_dev.bat
├── make_venv_dev.sh
├── mypy.ini
├── pyproject.toml
├── requirements-dev.txt
├── scripts
├── blackify.bat
├── coverage.bat
├── doc_coverage_updater.py
├── pypi.bat
├── pytest.bat
├── run_flake8.bat
├── run_mypy.bat
├── run_sphinx.bat
└── start_localstack.bat
├── setup.py
└── test_awsimple
├── 280px-PNG_transparency_demonstration_1.png
├── __init__.py
├── conftest.py
├── const.py
├── dict_is_close.py
├── sqs_drain.py
├── test_aws_test.py
├── test_c_dynamodb_create_table.py
├── test_dynamodb.py
├── test_dynamodb_delete.py
├── test_dynamodb_delete_all_items.py
├── test_dynamodb_get_item.py
├── test_dynamodb_item_not_found.py
├── test_dynamodb_miv_ui.py
├── test_dynamodb_primary_key_as_number.py
├── test_dynamodb_query.py
├── test_dynamodb_query_kwargs.py
├── test_dynamodb_scan_cache.py
├── test_dynamodb_scan_table_as_dict.py
├── test_dynamodb_secondary_index.py
├── test_dynamodb_table_not_found.py
├── test_dynamodb_upsert.py
├── test_get_account_id.py
├── test_get_configuration_information.py
├── test_logs.py
├── test_lru_cache_helpers.py
├── test_mock.py
├── test_most_recent_error.py
├── test_s3_bucket.py
├── test_s3_bucket_not_found.py
├── test_s3_delete.py
├── test_s3_dir.py
├── test_s3_does_not_exist.py
├── test_s3_empty_bucket.py
├── test_s3_file_transfer.py
├── test_s3_keys.py
├── test_s3_list_buckets.py
├── test_s3_multiple_transfers.py
├── test_s3_object_floats.py
├── test_s3_public_readable.py
├── test_s3_python_object.py
├── test_s3_string.py
├── test_s3_transfer_lines.py
├── test_serializable.py
├── test_sns_create.py
├── test_sns_publish.py
├── test_sqs_create_and_delete_queue.py
├── test_sqs_get_arn.py
├── test_sqs_messages.py
├── test_sqs_messages_available_and_purge.py
├── test_sqs_queue_exists.py
├── test_sqs_receive_nothing.py
├── test_sqs_user_provided_timeout.py
└── tst_paths.py
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit =
3 | *venv/*
4 | test_*/*
5 |
6 | branch = True
7 |
--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
3 |
4 | name: Python package
5 |
6 | on:
7 | push:
8 | branches: [ main ]
9 | pull_request:
10 | branches: [ main ]
11 | workflow_dispatch:
12 | branches: [ main ]
13 |
14 | jobs:
15 | build:
16 |
17 | runs-on: ubuntu-latest
18 | strategy:
19 | matrix:
20 | python-version: ["3.12"]
21 |
22 | steps:
23 | - uses: actions/checkout@v4
24 | - name: Set up Python ${{ matrix.python-version }}
25 | uses: actions/setup-python@v4
26 | with:
27 | python-version: ${{ matrix.python-version }}
28 | - name: Install dependencies
29 | run: |
30 | python -m pip install --upgrade pip
31 | python -m pip install flake8 pytest
32 | if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
33 |
34 | # was for pytest-fly, but couldn't get CI to install a compatible libEGL
35 | # - name: Install libEGL dependency
36 | # run: |
37 | # sudo apt-get update
38 | # sudo apt-get install -y libegl-mesa0 libgl1
39 |
40 | - name: Lint with flake8
41 | run: |
42 | # stop the build if there are Python syntax errors or undefined names
43 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
44 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
45 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
46 |
47 | - name: Test with pytest
48 | run: |
49 | pytest -s test_awsimple
50 |
51 | # - name: "Upload coverage to Codecov"
52 | # uses: codecov/codecov-action@v1
53 | # with:
54 | # fail_ci_if_error: true
55 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 |
49 | # codecov
50 | # coverage.xml
51 |
52 | *.cover
53 | *.py,cover
54 | .hypothesis/
55 | .pytest_cache/
56 |
57 | # Translations
58 | *.mo
59 | *.pot
60 |
61 | # Django stuff:
62 | *.log
63 | local_settings.py
64 | db.sqlite3
65 | db.sqlite3-journal
66 |
67 | # Flask stuff:
68 | instance/
69 | .webassets-cache
70 |
71 | # Scrapy stuff:
72 | .scrapy
73 |
74 | # Sphinx documentation
75 | docs/_build/
76 |
77 | # PyBuilder
78 | target/
79 |
80 | # Jupyter Notebook
81 | .ipynb_checkpoints
82 |
83 | # IPython
84 | profile_default/
85 | ipython_config.py
86 |
87 | # pyenv
88 | .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
98 | __pypackages__/
99 |
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 |
104 | # SageMath parsed files
105 | *.sage.py
106 |
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 |
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 |
120 | # Rope project settings
121 | .ropeproject
122 |
123 | # mkdocs documentation
124 | /site
125 |
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 |
131 | # Pyre type checker
132 | .pyre/
133 |
134 | # jca
135 | .idea/
136 | cache/
137 | temp/
138 | # the real one is just LICENSE but some systems want both
139 | LICENSE.txt
140 | cov/
141 | big_last_run.txt
142 |
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | # .readthedocs.yaml
2 | # Read the Docs configuration file
3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
4 |
5 | # Required
6 | version: 2
7 |
8 | # Build documentation in the docs/ directory with Sphinx
9 | sphinx:
10 | configuration: doc_source/conf.py
11 |
12 | # Optionally build your docs in additional formats such as PDF
13 | formats:
14 | - pdf
15 |
16 | # Optionally set the version of Python and requirements required to build your docs
17 | python:
18 | version: 3.8
19 | install:
20 | - requirements: doc_source/requirements.txt
21 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to awsimple
2 |
3 | Hi! First of all, thank you for contributing. :heart:
4 |
5 | All of the usual sorts of contributions are welcome: bug reports, patches, and feedback.
6 | Feel free to [browse existing issues](https://github.com/jamesabel/awsimple/issues) or [create a new one](https://github.com/jamesabel/awsimple/issues/new).
7 |
8 |
9 | ## Got a problem?
10 |
11 | You're welcome to [create an issue](https://github.com/jamesabel/awsimple/issues/new), but please [search existing ones](https://github.com/jamesabel/awsimple/issues) first to see if it's been discussed before.
12 |
13 |
14 | ## Want to submit some code or docs?
15 |
16 | Great!
17 |
18 | If you're intersted in tackling an [existing issue](https://github.com/jamesabel/awsimple/issues), comment on one to make sure you're on the right track.
19 | If it's an idea you have or a problem not captured in an issue, [create one](https://github.com/jamesabel/awsimple/issues/new) and let's align.
20 |
21 |
22 | ### Dev setup
23 |
24 | Requirements:
25 |
26 | - Python 3.8 or higher
27 |
28 | Fork the repo, to make your own personal copy, and then git clone your fork onto your computer.
29 |
30 | Once you have that, install project dependencies with:
31 |
32 | #### Mac / Linux
33 | ```
34 | source make_venv_dev.sh
35 | ./venv/bin/activate
36 | ```
37 |
38 | #### Windows
39 | ```
40 | make_venv_dev.bat
41 | .\venv\Scripts\activate.bat
42 | ```
43 |
44 | At this point you should be able to make changes to the codebase and run things.
45 |
--------------------------------------------------------------------------------
/EXAMPLES.md:
--------------------------------------------------------------------------------
1 | # Running the examples for awsimple
2 |
3 | There are four examples in the example folder. When run, these examples should
4 | - check that you have access to aws through the aws cli
5 | - write a file called "hello.txt" to the S3 bucket awsimple-test-bucket-{random_number}
6 | - Note: It is strongly recommended to change the bucket name before you run this, but it will work without it
7 | - read the file from the S3 bucket awsimple-test-bucket-{random_number}
8 |
9 |
10 |
11 | ### 1. Make the Virtual Environment and activate it
12 |
13 | #### Mac / Linux
14 | ```
15 | source make_venv.sh
16 | ./venv/bin/activate
17 | ```
18 |
19 | #### Windows
20 | ```
21 | make_venv.bat
22 | .\venv\Script\activate.bat
23 | ```
24 |
25 | ### 2. Check your AWS profile and create a test user name "testawsimple" with read/write access to s3.
26 |
27 | Your default aws profile should be setup before you run the examples. The examples use a test user named "testawsimple". You should create this user before running the examples.
28 |
29 | ```
30 | aws config
31 | ```
32 |
33 | ### 3. Run the examples
34 |
35 | #### Mac / Linux
36 | ```
37 | source run_examples.sh
38 | ```
39 |
40 | #### Windows
41 | ```
42 | run_examples.bat
43 | ```
44 |
45 |
46 |
47 | ### Got a problem?
48 |
49 | You're welcome to [create an issue](https://github.com/jamesabel/awsimple/issues/new), but please [search existing ones](https://github.com/jamesabel/awsimple/issues) first to see if it's been discussed before.
50 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020-2021 James Abel
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = doc_source
9 | BUILDDIR = build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
21 |
22 |
23 |
24 |
25 |
26 | # AWSimple
27 |
28 | *(pronounced A-W-Simple)*
29 |
30 | Simple API for basic AWS services such as S3 (Simple Storage Service), DynamoDB (a NoSQL database), SNS (Simple Notification Service),
31 | and SQS (Simple Queuing Service).
32 |
33 | Project featured on [PythonBytes Podcast Episode #224](https://pythonbytes.fm/episodes/show/224/join-us-on-a-python-adventure-back-to-1977).
34 |
35 | Full documentation available on [Read the Docs](https://awsimple.readthedocs.io/) .
36 |
37 | ### Features:
38 |
39 | - Simple Object Oriented API on top of boto3
40 |
41 | - One-line S3 file write, read, and delete
42 |
43 | - Automatic S3 retries
44 |
45 | - Locally cached S3 accesses
46 |
47 | - True file hashing (SHA512) for S3 files (S3's etag is not a true file hash)
48 |
49 | - DynamoDB full table scans (with local cache option)
50 |
51 | - DynamoDB secondary indexes
52 |
53 | - Built-in pagination (e.g. for DynamoDB table scans and queries). Always get everything you asked for.
54 |
55 | - Can automatically set SQS timeouts based on runtime data (can also be user-specified)
56 |
57 | - Supports moto mock and localstack. Handy for testing and CI.
58 |
59 |
60 | ## Usage
61 |
62 | pip install awsimple
63 |
64 | ## Examples
65 |
66 | The example folder has several examples you can customize and run. Instructions are available in [examples](EXAMPLES.md)
67 |
68 | ### S3
69 |
70 | # print string contents of an existing S3 object
71 | s = S3Access(profile_name="testawsimple", bucket="testawsimple").read_string("helloworld.txt")
72 | print(s)
73 |
74 | ### DynamoDB
75 |
76 | dynamodb_access = DynamoDBAccess(profile_name="testawsimple", table_name="testawsimple")
77 |
78 | # put an item into DynamoDB
79 | dynamodb_access.put_item({"id": "batman", "city": "Gotham"})
80 |
81 | # now get it back
82 | item = dynamodb_access.get_item("id", "batman")
83 | print(item["city"]) # Gotham
84 |
85 | ## Introduction
86 |
87 | `awsimple` is a simple interface into basic AWS services such as S3 (Simple Storage Service) and
88 | DynamoDB (a simple NoSQL database). It has a set of higher level default settings and behavior
89 | that should cover many basic usage models.
90 |
91 | ## Discussion
92 |
93 | AWS's "serverless" resources offer many benefits. You only pay for what you use, easily scale,
94 | and generally have high performance and availability.
95 |
96 | While AWS has many varied services with extensive flexibility, using it for more straight-forward
97 | applications is sometimes a daunting task. There are access modes that are probably not requried
98 | and some default behaviors are not best for common usages. `awsimple` aims to create a higher
99 | level API to AWS services (such as S3, DynamoDB, SNS, and SQS) to improve programmer productivity.
100 |
101 |
102 | ## S3
103 |
104 | `awsimple` calculates the local file hash (sha512) and inserts it into the S3 object metadata. This is used
105 | to test for file equivalency.
106 |
107 | ## Caching
108 |
109 | S3 objects and DynamoDB tables can be cached locally to reduce network traffic, minimize AWS costs,
110 | and potentially offer a speedup.
111 |
112 | DynamoDB cached table scans are particularly useful for tables that are infrequently updated.
113 |
114 | ## What`awsimple` Is Not
115 |
116 | - `awsimple` is not necessarily the most memory and CPU efficient
117 |
118 | - `awsimple` does not provide cost monitoring hooks
119 |
120 | - `awsimple` does not provide all the options and features that the regular AWS API (e.g. boto3) does
121 |
122 | ## Updates/Releases
123 |
124 | 3.x.x - Cache life for cached DynamoDB scans is now based on most recent table modification time (kept in a separate
125 | table). Explict cache life is no longer required (parameter has been removed).
126 |
127 | ## Testing using moto mock and localstack
128 |
129 | moto mock-ing can improve performance and reduce AWS costs. `awsimple` supports both moto mock and localstack.
130 | In general, it's recommended to develop with mock and finally test with the real AWS services.
131 |
132 | Select via environment variables:
133 |
134 | - AWSIMPLE_USE_MOTO_MOCK=1 # use moto
135 | - AWSIMPLE_USE_LOCALSTACK=1 # use localstack
136 |
137 | ### Test Time
138 |
139 | | Method | Test Time (seconds) | Speedup (or slowdown) | Comment |
140 | |------------|---------------------|-----------------------|-----------------|
141 | | AWS | 462.65 | 1x | baseline |
142 | | mock | 40.46 | 11x | faster than AWS |
143 | | localstack | 2246.82 | 0.2x | slower than AWS |
144 |
145 | System: Intel® Core™ i7 CPU @ 3.47GHz, 32 GB RAM
146 |
147 | ## Contributing
148 |
149 | Contributions are welcome, and more information is available in the [contributing guide](CONTRIBUTING.md).
--------------------------------------------------------------------------------
/awsimple/__init__.py:
--------------------------------------------------------------------------------
1 | from .__version__ import __application_name__, __version__, __author__, __title__
2 | from .mock import use_moto_mock_env_var, is_mock, use_localstack_env_var, is_using_localstack
3 | from .aws import AWSAccess, AWSimpleException, boto_error_to_string
4 | from .cache import get_disk_free, get_directory_size, lru_cache_write, CacheAccess, CACHE_DIR_ENV_VAR
5 | from .dynamodb import DynamoDBAccess, dict_to_dynamodb, DBItemNotFound, DynamoDBTableNotFound, dynamodb_to_json, dynamodb_to_dict, QuerySelection, DictKey, convert_serializable_special_cases
6 | from .dynamodb import KeyType, aws_name_to_key_type
7 | from .dynamodb_miv import DynamoDBMIVUI, miv_string, get_time_us, miv_us_to_timestamp
8 | from .s3 import S3Access, S3DownloadStatus, S3ObjectMetadata, BucketNotFound
9 | from .sqs import SQSAccess, SQSPollAccess, aws_sqs_long_poll_max_wait_time, aws_sqs_max_messages
10 | from .sns import SNSAccess
11 | from .logs import LogsAccess
12 |
--------------------------------------------------------------------------------
/awsimple/__version__.py:
--------------------------------------------------------------------------------
1 | __application_name__ = "awsimple"
2 | __title__ = __application_name__
3 | __author__ = "abel"
4 | __version__ = "3.6.1"
5 | __author_email__ = "j@abel.co"
6 | __url__ = "https://github.com/jamesabel/awsimple"
7 | __download_url__ = "https://github.com/jamesabel/awsimple"
8 | __description__ = "Simple AWS API for S3, DynamoDB, SNS, and SQS"
9 |
--------------------------------------------------------------------------------
/awsimple/aws.py:
--------------------------------------------------------------------------------
1 | import os
2 | from typing import Union, Any
3 | from logging import getLogger
4 |
5 | from typeguard import typechecked
6 |
7 | from boto3.session import Session
8 | from botocore.credentials import Credentials
9 |
10 | from awsimple import __application_name__, is_mock, is_using_localstack
11 |
12 | log = getLogger(__application_name__)
13 |
14 |
15 | class AWSimpleException(Exception):
16 | pass
17 |
18 |
19 | def boto_error_to_string(boto_error) -> Union[str, None]:
20 | if (response := boto_error.response) is None:
21 | most_recent_error = str(boto_error)
22 | else:
23 | if (response_error := response.get("Error")) is None:
24 | most_recent_error = None
25 | else:
26 | most_recent_error = response_error.get("Code")
27 | return most_recent_error
28 |
29 |
30 | class AWSAccess:
31 | @typechecked()
32 | def __init__(
33 | self,
34 | resource_name: Union[str, None] = None,
35 | profile_name: Union[str, None] = None,
36 | aws_access_key_id: Union[str, None] = None,
37 | aws_secret_access_key: Union[str, None] = None,
38 | region_name: Union[str, None] = None,
39 | ):
40 | """
41 | AWSAccess - takes care of basic AWS access (e.g. session, client, resource), getting some basic AWS information, and mock support for testing.
42 |
43 | :param resource_name: AWS resource name (e.g. s3, dynamodb, sqs, sns, etc.). Can be None if just testing the connection.
44 |
45 | # Provide either: profile name or access key ID/secret access key pair
46 |
47 | :param profile_name: AWS profile name
48 | :param aws_access_key_id: AWS access key (required if secret_access_key given)
49 | :param aws_secret_access_key: AWS secret access key (required if access_key_id given)
50 | :param region_name: AWS region (may be optional - see AWS docs)
51 | """
52 |
53 | import boto3 # import here to facilitate mocking
54 |
55 | self.resource_name = resource_name
56 | self.profile_name = profile_name
57 | self.aws_access_key_id = aws_access_key_id
58 | self.aws_secret_access_key = aws_secret_access_key
59 | self.region_name = region_name
60 |
61 | # string representation of AWS most recent error code
62 | self.most_recent_error = None # type: Union[str, None]
63 |
64 | self._moto_mock = None
65 | self._aws_keys_save = {}
66 |
67 | # use keys in AWS config
68 | # https://docs.aws.amazon.com/cli/latest/userguide/cli-config-files.html
69 | kwargs = {}
70 | for k in ["profile_name", "aws_access_key_id", "aws_secret_access_key", "region_name"]:
71 | if getattr(self, k) is not None:
72 | kwargs[k] = getattr(self, k)
73 | self.session = boto3.session.Session(**kwargs)
74 |
75 | self.client = None # type: Any
76 | if is_mock():
77 | # moto mock AWS
78 | for aws_key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_SECURITY_TOKEN", "AWS_SESSION_TOKEN"]:
79 | self._aws_keys_save[aws_key] = os.environ.get(aws_key) # will be None if not set
80 | os.environ[aws_key] = "testing"
81 |
82 | from moto import mock_aws
83 |
84 | self._moto_mock = mock_aws()
85 | self._moto_mock.start()
86 | region = "us-east-1"
87 | if self.resource_name == "logs" or self.resource_name is None:
88 | # logs don't have a resource
89 | self.resource = None
90 | else:
91 | self.resource = boto3.resource(self.resource_name, region_name=region) # type: ignore
92 | if self.resource_name is None:
93 | self.client = None
94 | else:
95 | self.client = boto3.client(self.resource_name, region_name=region) # type: ignore
96 | if self.resource_name == "s3":
97 | assert self.resource is not None
98 | self.resource.create_bucket(Bucket="testawsimple") # todo: put this in the test code
99 | elif is_using_localstack():
100 | self.aws_access_key_id = "test"
101 | self.aws_secret_access_key = "test"
102 | self.region_name = "us-west-2"
103 | if self.resource_name is not None:
104 | if self.resource_name == "logs":
105 | # logs don't have resource
106 | self.resource = None
107 | else:
108 | self.resource = boto3.resource(self.resource_name, endpoint_url=self._get_localstack_endpoint_url()) # type: ignore
109 | self.client = boto3.client(self.resource_name, endpoint_url=self._get_localstack_endpoint_url()) # type: ignore
110 | elif self.resource_name is None:
111 | # just the session, but not the client or resource
112 | self.client = None
113 | self.resource = None
114 | else:
115 | self.client = self.session.client(self.resource_name, config=self._get_config()) # type: ignore
116 | if self.resource_name == "logs" or self.resource_name == "rds":
117 | # logs and rds don't have a resource
118 | self.resource = None
119 | else:
120 | self.resource = self.session.resource(self.resource_name, config=self._get_config()) # type: ignore
121 |
122 | def _get_localstack_endpoint_url(self) -> str | None:
123 | endpoint_url = "http://localhost:4566" # default localstack endpoint
124 | return endpoint_url
125 |
126 | def _get_config(self):
127 | from botocore.config import Config # import here to facilitate mocking
128 |
129 | timeout = 60 * 60 # AWS default is 60, which is too short for some uses and/or connections
130 | return Config(connect_timeout=timeout, read_timeout=timeout)
131 |
132 | @typechecked()
133 | def get_region(self) -> Union[str, None]:
134 | """
135 | Get current selected AWS region
136 |
137 | :return: region string
138 | """
139 | return self.session.region_name
140 |
141 | def get_access_key(self) -> Union[str, None]:
142 | """
143 | Get current access key string
144 |
145 | :return: access key
146 | """
147 | _session = self.session
148 | assert isinstance(_session, Session) # for mypy
149 | _credentials = _session.get_credentials()
150 | assert isinstance(_credentials, Credentials) # for mypy
151 | access_key = _credentials.access_key
152 | return access_key
153 |
154 | def get_account_id(self):
155 | """
156 | Get AWS account ID *** HAS BEEN REMOVED ***
157 |
158 | :return: account ID
159 | """
160 | raise NotImplementedError(".get_account_id() has been removed")
161 |
162 | def test(self) -> bool:
163 | """
164 | Basic connection/capability test
165 |
166 | :return: True if connection OK
167 | """
168 |
169 | resources = self.session.get_available_resources() # boto3 will throw an error if there's an issue here
170 | if self.resource_name is not None and self.resource_name not in resources:
171 | raise PermissionError(self.resource_name) # we don't have permission to the specified resource
172 | return True # if we got here, we were successful
173 |
174 | def is_mocked(self) -> bool:
175 | """
176 | Return True if currently mocking the AWS interface (e.g. for testing).
177 |
178 | :return: True if mocked
179 | """
180 | return self._moto_mock is not None
181 |
182 | def clear_most_recent_error(self):
183 | self.most_recent_error = None
184 |
185 | def __del__(self):
186 | if self._moto_mock is not None:
187 | # if mocking, put everything back
188 |
189 | for aws_key, value in self._aws_keys_save.items():
190 | if value is None:
191 | del os.environ[aws_key]
192 | else:
193 | os.environ[aws_key] = value
194 |
195 | self._moto_mock.stop()
196 | self._moto_mock = None # mock is "done"
197 |
--------------------------------------------------------------------------------
/awsimple/cache.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from shutil import disk_usage, copy2
3 | import os
4 | import math
5 | from typing import Union
6 | from logging import getLogger
7 |
8 | from typeguard import typechecked
9 | from appdirs import user_cache_dir
10 |
11 | from awsimple import __application_name__, __author__, AWSAccess, AWSimpleException
12 |
13 | log = getLogger(__application_name__)
14 |
15 | CACHE_DIR_ENV_VAR = f"{__application_name__}_CACHE_DIR".upper()
16 |
17 |
18 | @typechecked()
19 | def get_disk_free(path: Path = Path(".")) -> int:
20 | total, used, free = disk_usage(Path(path).absolute().anchor)
21 | log.info(f"{total=} {used=} {free=}")
22 | return free
23 |
24 |
25 | @typechecked()
26 | def get_directory_size(path: Path) -> int:
27 | size = 0
28 | for p in path.glob("*"):
29 | if p.is_file():
30 | size += os.path.getsize(p)
31 | elif p.is_dir():
32 | size += get_directory_size(p)
33 | return size
34 |
35 |
36 | @typechecked()
37 | def lru_cache_write(new_data: Union[Path, bytes], cache_dir: Path, cache_file_name: str, max_absolute_cache_size: Union[int, None] = None, max_free_portion: Union[float, None] = None) -> bool:
38 | """
39 | free up space in the LRU cache to make room for the new file
40 | :param new_data: path to new file or a bytes object we want to put in the cache
41 | :param cache_dir: cache directory
42 | :param cache_file_name: file name to write in cache
43 | :param max_absolute_cache_size: max absolute cache size (or None if not specified)
44 | :param max_free_portion: max portion of disk free space the cache is allowed to consume (e.g. 0.1 to take up to 10% of free disk space)
45 | :return: True wrote to cache
46 | """
47 |
48 | least_recently_used_path = None
49 | least_recently_used_access_time = None
50 | least_recently_used_size = None
51 | wrote_to_cache = False
52 |
53 | try:
54 | max_free_absolute = max_free_portion * get_disk_free() if max_free_portion is not None else None
55 | values = [v for v in [max_free_absolute, max_absolute_cache_size] if v is not None]
56 | max_cache_size = min(values) if len(values) > 0 else None
57 | log.info(f"{max_cache_size=}")
58 |
59 | if isinstance(new_data, Path):
60 | new_size = os.path.getsize(new_data)
61 | elif isinstance(new_data, bytes):
62 | new_size = len(new_data)
63 | else:
64 | raise RuntimeError
65 |
66 | if max_cache_size is None:
67 | is_room = True # no limit
68 | elif new_size > max_cache_size:
69 | log.info(f"{new_data=} {new_size=} is larger than the cache itself {max_cache_size=}")
70 | is_room = False # new file will never fit so don't try to evict to make room for it
71 | else:
72 | cache_size = get_directory_size(cache_dir)
73 | overage = (cache_size + new_size) - max_cache_size
74 |
75 | # cache eviction
76 | while overage > 0:
77 | starting_overage = overage
78 |
79 | # find the least recently used file
80 | least_recently_used_path = None
81 | least_recently_used_access_time = None
82 | least_recently_used_size = None
83 | for file_path in cache_dir.rglob("*"):
84 | access_time = os.path.getatime(file_path)
85 | if least_recently_used_path is None or least_recently_used_access_time is None or access_time < least_recently_used_access_time:
86 | least_recently_used_path = file_path
87 | least_recently_used_access_time = access_time
88 | least_recently_used_size = os.path.getsize(file_path)
89 |
90 | if least_recently_used_path is not None:
91 | log.debug(f"evicting {least_recently_used_path=} {least_recently_used_access_time=} {least_recently_used_size=}")
92 | least_recently_used_path.unlink()
93 | if least_recently_used_size is None:
94 | AWSimpleException(f"{least_recently_used_size=}")
95 | else:
96 | overage -= least_recently_used_size
97 |
98 | if overage == starting_overage:
99 | # tried to free up space but were unsuccessful, so give up
100 | overage = 0
101 |
102 | # determine if we have room for the new file
103 | is_room = get_directory_size(cache_dir) + new_size <= max_cache_size
104 |
105 | if is_room:
106 | cache_dir.mkdir(parents=True, exist_ok=True)
107 | cache_dest = Path(cache_dir, cache_file_name)
108 | if isinstance(new_data, Path):
109 | log.info(f"caching {new_data} to {cache_dest=}")
110 | copy2(new_data, cache_dest)
111 | wrote_to_cache = True
112 | elif isinstance(new_data, bytes):
113 | log.info(f"caching {len(new_data)}B to {cache_dest=}")
114 | with cache_dest.open("wb") as f:
115 | f.write(new_data)
116 | wrote_to_cache = True
117 | else:
118 | raise RuntimeError
119 | else:
120 | log.info(f"no room for {new_data=}")
121 |
122 | except (FileNotFoundError, IOError, PermissionError) as e:
123 | log.debug(f"{least_recently_used_path=} {least_recently_used_access_time=} {least_recently_used_size=} {e}", stack_info=True, exc_info=True)
124 |
125 | return wrote_to_cache
126 |
127 |
128 | class CacheAccess(AWSAccess):
129 | def __init__(
130 | self,
131 | resource_name: str,
132 | cache_dir: Union[Path, None] = None,
133 | cache_life: float = math.inf,
134 | cache_max_absolute: int = round(1e9),
135 | cache_max_of_free: float = 0.05,
136 | mtime_abs_tol: float = 10.0,
137 | use_env_var_cache_dir: bool = False,
138 | **kwargs,
139 | ):
140 | """
141 | AWS Access for cacheables
142 |
143 | :param cache_dir: dir for cache
144 | :param cache_life: life of cache (in seconds)
145 | :param cache_max_absolute: max size of cache
146 | :param cache_max_of_free: max portion of disk free space the cache will consume
147 | :param mtime_abs_tol: window in seconds where a modification time will be considered equal
148 | :param use_env_var_cache_dir: set to True to attempt to use environmental variable for the cache dir (user must explicitly set this to use env var for cache dir)
149 | """
150 |
151 | self.use_env_var_cache_dir = use_env_var_cache_dir
152 | if cache_dir is not None:
153 | self.cache_dir = cache_dir # passing cache dir in takes precedent
154 | elif self.use_env_var_cache_dir and (cache_dir_from_env_var := os.environ.get(CACHE_DIR_ENV_VAR)) is not None:
155 | self.cache_dir = Path(cache_dir_from_env_var.strip())
156 | else:
157 | self.cache_dir = Path(user_cache_dir(__application_name__, __author__), "aws", resource_name)
158 |
159 | self.cache_life = cache_life # seconds
160 | self.cache_max_absolute = cache_max_absolute # max absolute cache size
161 | self.cache_max_of_free = cache_max_of_free # max portion of the disk's free space this LRU cache will take
162 | self.cache_retries = 10 # cache upload retries
163 | self.mtime_abs_tol = mtime_abs_tol # file modification times within this cache window (in seconds) are considered equivalent
164 |
165 | super().__init__(resource_name, **kwargs)
166 |
--------------------------------------------------------------------------------
/awsimple/dynamodb_miv.py:
--------------------------------------------------------------------------------
1 | from typing import Union, Type
2 | import time
3 | from copy import deepcopy
4 | from logging import getLogger
5 | from decimal import Decimal
6 |
7 | from typeguard import typechecked
8 | from boto3.dynamodb.conditions import Key
9 |
10 | from awsimple import DynamoDBAccess, DBItemNotFound, __application_name__
11 |
12 | miv_string = "mivui" # monotonically increasing value roughly based on uS (microseconds) since the epoch, as an integer
13 |
14 | log = getLogger(__application_name__)
15 |
16 |
17 | @typechecked()
18 | def get_time_us() -> int:
19 | """
20 | Get the current time in uS (microseconds) since the epoch as an int.
21 | :return: time in uS since the epoch
22 | """
23 | return int(round(time.time() * 1e6))
24 |
25 |
26 | @typechecked()
27 | def miv_us_to_timestamp(miv_ui: Union[int, Decimal]) -> float:
28 | """
29 | Convert a miv uS int back to regular timestamp since epoch in seconds.
30 | :param miv_ui: MIV in uS as an int
31 | :return: regular time since epoch in seconds (as a float)
32 | """
33 | return float(miv_ui) / 1e6
34 |
35 |
36 | class DynamoDBMIVUI(DynamoDBAccess):
37 | """
38 | DynamoDB with a MIV UI (monotonically increasing value in uS since the epoch as an integer) as the "sort" key of the primary key pair. Useful for ordered puts and gets to DynamoDB,
39 | and enables get-ing the most senior item.
40 |
41 | One of the complaints about DynamoDB is that it doesn't have "automatic indexing" and/or "automatic timestamp". While this isn't automatic indexing per se, it does provide for
42 | ordered writes for a given primary partition (hash) key, and does so via a monotonically increasing value roughly based on time (essentially an automatic timestamp), which in
43 | some cases may be even more useful.
44 | """
45 |
46 | @typechecked()
47 | def create_table( # type: ignore
48 | self,
49 | partition_key: str,
50 | secondary_index: Union[str, None] = None,
51 | partition_key_type: Union[Type[str], Type[int], Type[bool]] = str,
52 | secondary_key_type: Union[Type[str], Type[int], Type[bool]] = str,
53 | ) -> bool:
54 | return super().create_table(partition_key, miv_string, secondary_index, partition_key_type, int, secondary_key_type)
55 |
56 | @typechecked()
57 | def put_item(self, item: dict, time_us: Union[int, None] = None):
58 | """
59 | Put (write) a DynamoDB table item with the miv automatically filled in.
60 |
61 | :param item: item
62 | :param time_us: optional time in uS to use (otherwise current time is used)
63 | """
64 | assert self.resource is not None
65 | table = self.resource.Table(self.table_name)
66 |
67 | # Determine new miv. The miv is an int to avoid comparison or specification problems that can arise with floats. For example, when it comes time to delete an item.
68 | if time_us is None:
69 | # get the miv for the existing entries
70 | partition_key = self.get_primary_partition_key()
71 | partition_value = item[partition_key]
72 | try:
73 | existing_most_senior_item = self.get_most_senior_item(partition_key, partition_value)
74 | existing_miv_ui = existing_most_senior_item[miv_string]
75 | except DBItemNotFound:
76 | existing_miv_ui = None
77 |
78 | current_time_us = get_time_us()
79 | if existing_miv_ui is None or current_time_us > existing_miv_ui:
80 | new_miv_ui = current_time_us
81 | else:
82 | # the prior writer seems to be from the future (from our perspective), so just increment the existing miv by the smallest increment and go with that
83 | new_miv_ui = existing_miv_ui + 1
84 | else:
85 | new_miv_ui = time_us
86 |
87 | # make the new item with the new miv and put it into the DB table
88 | new_item = deepcopy(item)
89 | new_item[miv_string] = new_miv_ui
90 | table.put_item(Item=new_item)
91 |
92 | @typechecked()
93 | def get_most_senior_item(self, partition_key: str, partition_value: Union[str, int]) -> dict:
94 | """
95 | Get the most senior (greatest miv value) item for a given primary partition (hash) key. Raises DBItemNotFound if it doesn't exist.
96 | :return: most senior item
97 | """
98 | assert self.resource is not None
99 | table = self.resource.Table(self.table_name)
100 | # just get the one most senior item
101 | response = table.query(KeyConditionExpression=Key(partition_key).eq(partition_value), ScanIndexForward=False, Limit=1)
102 | if (items := response.get("Items")) is None or len(items) < 1:
103 | raise DBItemNotFound(f"{partition_key=},{partition_value=}")
104 | item = items[0] # we asked for exactly one
105 | return item
106 |
--------------------------------------------------------------------------------
/awsimple/logs.py:
--------------------------------------------------------------------------------
1 | import time
2 | import getpass
3 | import platform
4 | from functools import lru_cache
5 | from typing import Union
6 | from pathlib import Path
7 | from datetime import datetime
8 |
9 | from awsimple import AWSAccess
10 |
11 |
12 | @lru_cache()
13 | def get_user_name() -> str:
14 | return getpass.getuser()
15 |
16 |
17 | @lru_cache()
18 | def get_computer_name() -> str:
19 | return platform.node()
20 |
21 |
22 | class LogsAccess(AWSAccess):
23 | """
24 | Perform logging to AWS using CloudWatch Logs
25 | """
26 |
27 | def __init__(self, log_group: str, **kwargs):
28 | """
29 | Log to AWS CloudWatch.
30 | :param log_group: AWS CloudWatch log group
31 | :param kwargs: other kwargs (e.g. for authentication)
32 | """
33 | super().__init__("logs", **kwargs)
34 | self.log_group = log_group
35 | self._upload_sequence_token = None # type: Union[str, None]
36 |
37 | def put(self, message: str):
38 | """
39 | Log a message.
40 | :param message: message as a string
41 | """
42 | try:
43 | self._put(message)
44 | success = True
45 | except self.client.exceptions.ResourceNotFoundException:
46 | success = False
47 | if not success:
48 | # log group and stream does not appear to exist, so make them
49 | try:
50 | self.client.create_log_group(logGroupName=self.log_group)
51 | self.client.put_retention_policy(logGroupName=self.log_group, retentionInDays=self.get_retention_in_days())
52 | except self.client.exceptions.ResourceAlreadyExistsException:
53 | pass
54 | self.client.create_log_stream(logGroupName=self.log_group, logStreamName=self.get_stream_name())
55 | self._put(message)
56 |
57 | def _put(self, message: str):
58 | """
59 | Perform the put log event. Internal method to enable try/except in the regular .put() method.
60 | :param message: message as a string
61 | """
62 |
63 | # if self._upload_sequence_token is None:
64 | # we don't yet have the sequence token, so try to get it from AWS
65 | stream_name = self.get_stream_name()
66 | if self._upload_sequence_token is None:
67 | log_streams_description = self.client.describe_log_streams(logGroupName=self.log_group)
68 | if (log_streams := log_streams_description.get("logStreams")) is not None and len(log_streams) > 0:
69 | for log_stream in log_streams:
70 | if log_stream["logStreamName"] == stream_name:
71 | self._upload_sequence_token = log_stream.get("uploadSequenceToken")
72 |
73 | # timestamp defined by AWS to be mS since epoch
74 | log_events = [{"timestamp": int(round(time.time() * 1000)), "message": message}]
75 | try:
76 | if self._upload_sequence_token is None:
77 | put_response = self.client.put_log_events(logGroupName=self.log_group, logStreamName=stream_name, logEvents=log_events)
78 | else:
79 | put_response = self.client.put_log_events(logGroupName=self.log_group, logStreamName=stream_name, logEvents=log_events, sequenceToken=self._upload_sequence_token)
80 | except self.client.exceptions.InvalidSequenceTokenException as e:
81 | # something went terribly wrong in logging, so write what happened somewhere safe
82 | with Path(Path.home(), "awsimple_exception.txt").open("w") as f:
83 | f.write(f"{datetime.now().astimezone().isoformat()},{self.log_group=},{stream_name=},{self._upload_sequence_token=},{e}\n")
84 | put_response = None
85 |
86 | if put_response is None:
87 | self._upload_sequence_token = None
88 | else:
89 | self._upload_sequence_token = put_response.get("nextSequenceToken")
90 |
91 | def get_stream_name(self) -> str:
92 | """
93 | Get the stream name. User of this class can override this method to use a different stream name.
94 | :return: stream name string
95 | """
96 | return f"{get_computer_name()}-{get_user_name()}"
97 |
98 | def get_retention_in_days(self) -> int:
99 | """
100 | Define the log retention in days. User of this class can override this method to use a different retention period (only used when log group is created).
101 | :return: retention time in days as an integer
102 | """
103 | return 365
104 |
--------------------------------------------------------------------------------
/awsimple/mock.py:
--------------------------------------------------------------------------------
1 | import os
2 | from functools import cache
3 |
4 | from tobool import to_bool_strict
5 |
6 | use_moto_mock_env_var = "AWSIMPLE_USE_MOTO_MOCK"
7 | use_localstack_env_var = "AWSIMPLE_USE_LOCALSTACK"
8 |
9 |
10 | @cache
11 | def is_mock() -> bool:
12 | """
13 | Is using moto mock?
14 | :return: True if using moto mock.
15 | """
16 | return to_bool_strict(os.environ.get(use_moto_mock_env_var, "0"))
17 |
18 |
19 | @cache
20 | def is_using_localstack() -> bool:
21 | """
22 | Is using localstack?
23 | :return: True if using localstack.
24 | """
25 | return to_bool_strict(os.environ.get(use_localstack_env_var, "0"))
26 |
--------------------------------------------------------------------------------
/awsimple/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesabel/awsimple/8157ed3d9501fcbc56382f148385144ed73a307e/awsimple/py.typed
--------------------------------------------------------------------------------
/awsimple/s3.py:
--------------------------------------------------------------------------------
1 | """
2 | S3 Access
3 | """
4 |
5 | import os
6 | import shutil
7 | import time
8 | from math import isclose
9 | from pathlib import Path
10 | from dataclasses import dataclass
11 | from datetime import datetime
12 | from typing import Dict, List, Union
13 | import json
14 | from logging import getLogger
15 |
16 | import boto3
17 | from botocore.client import Config
18 | from botocore.exceptions import ClientError, EndpointConnectionError, ConnectionClosedError, SSLError
19 | from boto3.s3.transfer import TransferConfig
20 | from s3transfer import S3UploadFailedError
21 | import urllib3.exceptions
22 | from typeguard import typechecked
23 | from hashy import get_string_sha512, get_file_sha512, get_bytes_sha512, get_dls_sha512
24 | from yasf import sf
25 |
26 | from awsimple import CacheAccess, __application_name__, lru_cache_write, AWSimpleException, convert_serializable_special_cases
27 |
28 | # Use this project's name as a prefix to avoid string collisions. Use dashes instead of underscore since that's AWS's convention.
29 | sha512_string = f"{__application_name__}-sha512"
30 |
31 | json_extension = ".json"
32 |
33 | log = getLogger(__application_name__)
34 |
35 | connection_errors = (S3UploadFailedError, ClientError, EndpointConnectionError, SSLError, urllib3.exceptions.ProtocolError, ConnectionClosedError)
36 |
37 |
38 | class BucketNotFound(AWSimpleException):
39 | def __init__(self, bucket_name):
40 | self.bucket_name = bucket_name
41 | self.message = "Bucket not found"
42 | super().__init__(self.message)
43 |
44 | def __str__(self):
45 | return f"{self.bucket_name=} {self.message}"
46 |
47 |
48 | @dataclass
49 | class S3DownloadStatus:
50 | success: bool = False
51 | cache_hit: Union[bool, None] = None
52 | cache_write: Union[bool, None] = None
53 |
54 |
55 | @dataclass
56 | class S3ObjectMetadata:
57 | bucket: str
58 | key: str
59 | size: int
60 | mtime: datetime
61 | etag: str # generally not used
62 | sha512: Union[str, None] # hex string - only entries written with awsimple will have this
63 | url: str # URL of S3 object
64 |
65 | def get_sha512(self) -> str:
66 | """
67 | Get hash used to compare S3 objects. If the SHA512 is available (recommended), then use that. If not (e.g. an S3 object wasn't written with AWSimple), create a "substitute"
68 | SHA512 hash that should change if the object contents change.
69 | :return: SHA512 hash (as string)
70 | """
71 | if (sha512_value := self.sha512) is None:
72 | # round timestamp to seconds to try to avoid possible small deltas when dealing with time and floats
73 | mtime_as_int = int(round(self.mtime.timestamp()))
74 | metadata_list = [self.bucket, self.key, self.size, mtime_as_int]
75 | if self.etag is not None and len(self.etag) > 0:
76 | metadata_list.append(self.etag)
77 | sha512_value = get_dls_sha512(metadata_list)
78 |
79 | return sha512_value
80 |
81 |
82 | @typechecked()
83 | def serializable_object_to_json_as_bytes(json_serializable_object: Union[List, Dict]) -> bytes:
84 | return bytes(json.dumps(json_serializable_object, default=convert_serializable_special_cases).encode("UTF-8"))
85 |
86 |
87 | def _get_json_key(s3_key: str):
88 | """
89 | get JSON key given an s3_key that may not have the .json extension
90 | :param s3_key: s3 key, potentially without the extension
91 | :return: JSON S3 key
92 | """
93 | if not s3_key.endswith(json_extension):
94 | s3_key = f"{s3_key}{json_extension}"
95 | return s3_key
96 |
97 |
98 | class S3Access(CacheAccess):
99 | @typechecked()
100 | def __init__(self, bucket_name: Union[str, None] = None, **kwargs):
101 | """
102 | S3 Access
103 |
104 | :param bucket_name: S3 bucket name
105 | :param kwargs: kwargs
106 | """
107 | self.bucket_name = bucket_name
108 | self.retry_sleep_time = 3.0 # seconds
109 | self.retry_count = 10
110 | self.public_readable = False
111 | self.download_status = S3DownloadStatus()
112 | super().__init__(resource_name="s3", **kwargs)
113 |
114 | def get_s3_transfer_config(self) -> TransferConfig:
115 | # workaround threading issue https://github.com/boto/s3transfer/issues/197
116 | # derived class can overload this if a different config is desired
117 | s3_transfer_config = TransferConfig(use_threads=False)
118 | return s3_transfer_config
119 |
120 | @typechecked()
121 | def set_public_readable(self, public_readable: bool):
122 | self.public_readable = public_readable
123 |
124 | @typechecked()
125 | def bucket_list(self) -> list:
126 | """
127 | list out all buckets
128 | (not called list_buckets() since that's used in boto3 but this returns a list of bucket strings not a list of dicts)
129 |
130 | :return: list of buckets
131 | """
132 | return [b["Name"] for b in self.client.list_buckets()["Buckets"]]
133 |
134 | @typechecked()
135 | def read_string(self, s3_key: str) -> str:
136 | """
137 | Read contents of an S3 object as a string
138 |
139 | :param s3_key: S3 key
140 | :return: S3 object as a string
141 | """
142 | log.debug(f"reading {self.bucket_name}/{s3_key}")
143 | assert self.resource is not None
144 | return self.resource.Object(self.bucket_name, s3_key).get()["Body"].read().decode()
145 |
146 | @typechecked()
147 | def read_lines(self, s3_key: str) -> List[str]:
148 | """
149 | Read contents of an S3 object as a list of strings
150 |
151 | :param s3_key: S3 key
152 | :return: a list of strings
153 | """
154 | return self.read_string(s3_key).splitlines()
155 |
156 | @typechecked()
157 | def write_string(self, input_str: str, s3_key: str):
158 | """
159 | Write a string to an S3 object
160 |
161 | :param input_str: input string
162 | :param s3_key: S3 key
163 | """
164 | log.debug(f"writing {self.bucket_name}/{s3_key}")
165 | assert self.resource is not None
166 | self.resource.Object(self.bucket_name, s3_key).put(Body=input_str, Metadata={sha512_string: get_string_sha512(input_str)})
167 |
168 | @typechecked()
169 | def write_lines(self, input_lines: List[str], s3_key: str):
170 | """
171 | Write a list of strings to an S3 bucket
172 |
173 | :param input_lines: a list of strings
174 | :param s3_key: S3 key
175 | """
176 | self.write_string("\n".join(input_lines), s3_key)
177 |
178 | @typechecked()
179 | def delete_object(self, s3_key: str):
180 | """
181 | Delete an S3 object
182 |
183 | :param s3_key: S3 key
184 | """
185 | log.info(f"deleting {self.bucket_name}/{s3_key}")
186 | assert self.resource is not None
187 | self.resource.Object(self.bucket_name, s3_key).delete()
188 |
189 | @typechecked()
190 | def upload(self, file_path: Union[str, Path], s3_key: str, force: bool = False) -> bool:
191 | """
192 | Upload a file to an S3 object
193 |
194 | :param file_path: path to file to upload
195 | :param s3_key: S3 key
196 | :param force: True to force the upload, even if the file hash matches the S3 contents
197 | :return: True if uploaded
198 | """
199 |
200 | log.info(f'S3 upload : "{file_path}" to {self.bucket_name}/{s3_key}')
201 |
202 | if isinstance(file_path, str):
203 | file_path = Path(file_path)
204 |
205 | file_mtime = os.path.getmtime(file_path)
206 | file_sha512 = get_file_sha512(file_path)
207 | if force:
208 | upload_flag = True
209 | else:
210 | if self.object_exists(s3_key):
211 | s3_object_metadata = self.get_s3_object_metadata(s3_key)
212 | log.info(f"{s3_object_metadata=}")
213 | if s3_object_metadata.get_sha512() is not None and file_sha512 is not None:
214 | # use the hash provided by awsimple, if it exists
215 | upload_flag = file_sha512 != s3_object_metadata.get_sha512()
216 | else:
217 | # if not, use mtime
218 | upload_flag = not isclose(file_mtime, s3_object_metadata.mtime.timestamp(), abs_tol=self.mtime_abs_tol)
219 | else:
220 | upload_flag = True
221 |
222 | uploaded_flag = False
223 | if upload_flag:
224 | log.info(f"local file : {file_sha512=},force={force} - uploading")
225 |
226 | transfer_retry_count = 0
227 | while not uploaded_flag and transfer_retry_count < self.retry_count:
228 | extra_args = {"Metadata": {sha512_string: file_sha512}}
229 | if self.public_readable:
230 | extra_args["ACL"] = "public-read" # type: ignore
231 | log.info(f"{extra_args=}")
232 |
233 | try:
234 | self.client.upload_file(str(file_path), self.bucket_name, s3_key, ExtraArgs=extra_args, Config=self.get_s3_transfer_config())
235 | uploaded_flag = True
236 | except connection_errors as e:
237 | log.warning(f"{file_path} to {self.bucket_name}:{s3_key} : {transfer_retry_count=} : {e}")
238 | time.sleep(self.retry_sleep_time)
239 | except RuntimeError as e:
240 | log.error(f"{file_path} to {self.bucket_name}:{s3_key} : {transfer_retry_count=} : {e}")
241 | time.sleep(self.retry_sleep_time)
242 |
243 | transfer_retry_count += 1
244 |
245 | else:
246 | log.info(f"file hash of {file_sha512} is the same as is already on S3 and force={force} - not uploading")
247 |
248 | return uploaded_flag
249 |
250 | @typechecked()
251 | def upload_object_as_json(self, json_serializable_object: Union[List, Dict], s3_key: str, force=False) -> bool:
252 | """
253 | Upload a serializable Python object to an S3 object
254 |
255 | :param json_serializable_object: serializable object
256 | :param s3_key: S3 key
257 | :param force: True to force the upload, even if the file hash matches the S3 contents
258 | :return: True if uploaded
259 | """
260 |
261 | s3_key = _get_json_key(s3_key)
262 | json_as_bytes = serializable_object_to_json_as_bytes(json_serializable_object)
263 | json_sha512 = get_bytes_sha512(json_as_bytes)
264 | upload_flag = True
265 | if not force and self.object_exists(s3_key):
266 | s3_object_metadata = self.get_s3_object_metadata(s3_key)
267 | log.info(f"{s3_object_metadata=}")
268 | if s3_object_metadata.get_sha512() is not None and json_sha512 is not None:
269 | # use the hash provided by awsimple, if it exists
270 | upload_flag = json_sha512 != s3_object_metadata.get_sha512()
271 |
272 | uploaded_flag = False
273 | if upload_flag:
274 | log.info(f"{json_sha512=},force={force} - uploading")
275 |
276 | transfer_retry_count = 0
277 | while not uploaded_flag and transfer_retry_count < self.retry_count:
278 | meta_data = {sha512_string: json_sha512}
279 | log.info(f"{meta_data=}")
280 | assert self.resource is not None
281 | try:
282 | s3_object = self.resource.Object(self.bucket_name, s3_key)
283 | if self.public_readable:
284 | s3_object.put(Body=json_as_bytes, Metadata=meta_data, ACL="public-read")
285 | else:
286 | s3_object.put(Body=json_as_bytes, Metadata=meta_data)
287 | uploaded_flag = True
288 | except connection_errors as e:
289 | log.warning(f"{self.bucket_name}:{s3_key} : {transfer_retry_count=} : {e}")
290 | transfer_retry_count += 1
291 | time.sleep(self.retry_sleep_time)
292 |
293 | else:
294 | log.info(f"file hash of {json_sha512} is the same as is already on S3 and force={force} - not uploading")
295 |
296 | return uploaded_flag
297 |
298 | @typechecked()
299 | def download(self, s3_key: str, dest_path: Union[str, Path]) -> bool:
300 | """
301 | Download an S3 object
302 |
303 | :param s3_key: S3 key
304 | :param dest_path: destination file or directory path. If the path is a directory, the file will be downloaded to that directory with the same name as the S3 key.
305 | :return: True if downloaded successfully
306 | """
307 |
308 | if isinstance(dest_path, str):
309 | log.info(f"{dest_path} is not Path object. Non-Path objects will be deprecated in the future")
310 |
311 | assert isinstance(dest_path, Path)
312 | if dest_path.is_dir():
313 | dest_path = Path(dest_path, s3_key)
314 |
315 | log.info(f'S3 download : {self.bucket_name}:{s3_key} to "{dest_path}" ("{Path(dest_path).absolute()}")')
316 |
317 | Path(dest_path).parent.mkdir(parents=True, exist_ok=True)
318 |
319 | transfer_retry_count = 0
320 | success = False
321 | while not success and transfer_retry_count < self.retry_count:
322 | try:
323 | log.debug(sf("calling client.download_file()", bucket_name=self.bucket_name, s3_key=s3_key, dest_path=dest_path))
324 | self.client.download_file(self.bucket_name, s3_key, dest_path)
325 | log.debug(sf("S3 client.download_file() complete", bucket_name=self.bucket_name, s3_key=s3_key, dest_path=dest_path))
326 | s3_object_metadata = self.get_s3_object_metadata(s3_key)
327 | log.debug(sf("S3 object metadata", s3_object_metadata=s3_object_metadata))
328 | mtime_ts = s3_object_metadata.mtime.timestamp()
329 | os.utime(dest_path, (mtime_ts, mtime_ts)) # set the file mtime to the mtime in S3
330 | success = True
331 | except connection_errors as e:
332 | # ProtocolError can happen for a broken connection
333 | log.warning(f"{self.bucket_name}/{s3_key} to {dest_path} ({Path(dest_path).absolute()}) : {transfer_retry_count=} : {e}")
334 | time.sleep(self.retry_sleep_time)
335 | transfer_retry_count += 1
336 | log.debug(sf(transfer_retry_count=transfer_retry_count, success=success, bucket_name=self.bucket_name, s3_key=s3_key, dest_path=dest_path))
337 | return success
338 |
339 | @typechecked()
340 | def download_cached(self, s3_key: str, dest_path: Path) -> S3DownloadStatus:
341 | """
342 | download from AWS S3 with caching
343 |
344 | :param dest_path: destination full path or directory. If the path is a directory, the file will be downloaded to that directory with the same name as the S3 key.
345 | :param s3_key: S3 key of source
346 | :return: S3DownloadStatus instance
347 | """
348 |
349 | if dest_path.is_dir():
350 | dest_path = Path(dest_path, s3_key)
351 | log.info(f'S3 download_cached : {self.bucket_name}:{s3_key} to "{dest_path}" ("{dest_path.absolute()}")')
352 |
353 | self.download_status = S3DownloadStatus() # init
354 |
355 | s3_object_metadata = self.get_s3_object_metadata(s3_key)
356 |
357 | sha512 = s3_object_metadata.get_sha512()
358 | cache_path = Path(self.cache_dir, sha512)
359 | log.debug(f"{cache_path}")
360 |
361 | if cache_path.exists():
362 | log.info(f"{self.bucket_name}/{s3_key} cache hit : copying {cache_path=} to {dest_path=} ({dest_path.absolute()})")
363 | self.download_status.cache_hit = True
364 | self.download_status.success = True
365 | dest_path.parent.mkdir(parents=True, exist_ok=True)
366 | shutil.copy2(cache_path, dest_path)
367 | else:
368 | self.download_status.cache_hit = False
369 |
370 | if not self.download_status.cache_hit:
371 | log.info(f"{self.bucket_name=}/{s3_key=} cache miss : {dest_path=} ({dest_path.absolute()})")
372 | self.download(s3_key, dest_path)
373 | self.cache_dir.mkdir(parents=True, exist_ok=True)
374 | self.download_status.cache_write = lru_cache_write(dest_path, self.cache_dir, sha512, self.cache_max_absolute, self.cache_max_of_free)
375 | self.download_status.success = True
376 |
377 | return self.download_status
378 |
379 | @typechecked()
380 | def download_object_as_json(self, s3_key: str) -> Union[List, Dict]:
381 | s3_key = _get_json_key(s3_key)
382 | assert self.resource is not None
383 | s3_object = self.resource.Object(self.bucket_name, s3_key)
384 | body = s3_object.get()["Body"].read().decode("utf-8")
385 | obj = json.loads(body)
386 | return obj
387 |
388 | @typechecked()
389 | def download_object_as_json_cached(self, s3_key: str) -> Union[List, Dict]:
390 | """
391 | download object from AWS S3 with caching
392 |
393 | :param s3_key: S3 key of source
394 | :return: S3DownloadStatus instance
395 | """
396 | object_from_json = None
397 |
398 | s3_key = _get_json_key(s3_key)
399 |
400 | self.download_status = S3DownloadStatus() # init
401 |
402 | s3_object_metadata = self.get_s3_object_metadata(s3_key)
403 |
404 | sha512 = s3_object_metadata.get_sha512()
405 | cache_path = Path(self.cache_dir, sha512)
406 | log.debug(f"{cache_path}")
407 |
408 | if cache_path.exists():
409 | log.info(f"{self.bucket_name}/{s3_key} cache hit : using {cache_path=}")
410 | self.download_status.cache_hit = True
411 | self.download_status.success = True
412 | with cache_path.open("rb") as f:
413 | object_from_json = json.loads(f.read())
414 | else:
415 | self.download_status.cache_hit = False
416 |
417 | if not self.download_status.cache_hit:
418 | log.info(f"{self.bucket_name=}/{s3_key=} cache miss)")
419 | assert self.resource is not None
420 | s3_object = self.resource.Object(self.bucket_name, s3_key)
421 | body = s3_object.get()["Body"].read()
422 | object_from_json = json.loads(body)
423 | self.download_status.cache_write = lru_cache_write(body, self.cache_dir, sha512, self.cache_max_absolute, self.cache_max_of_free)
424 | self.download_status.success = True
425 |
426 | if object_from_json is None:
427 | raise RuntimeError(s3_key)
428 |
429 | return object_from_json
430 |
431 | @typechecked()
432 | def get_s3_object_url(self, s3_key: str) -> str:
433 | """
434 | Get S3 object URL
435 |
436 | :param s3_key: S3 key
437 | :return: object URL
438 | """
439 | bucket_location = self.client.get_bucket_location(Bucket=self.bucket_name)
440 | location = bucket_location["LocationConstraint"]
441 | url = f"https://{self.bucket_name}.s3-{location}.amazonaws.com/{s3_key}"
442 | return url
443 |
444 | @typechecked()
445 | def get_s3_object_metadata(self, s3_key: str) -> S3ObjectMetadata:
446 | """
447 | Get S3 object metadata
448 |
449 | :param s3_key: S3 key
450 | :return: S3ObjectMetadata or None if object does not exist
451 | """
452 | assert self.resource is not None
453 | bucket_resource = self.resource.Bucket(self.bucket_name)
454 | if self.object_exists(s3_key):
455 | bucket_object = bucket_resource.Object(s3_key)
456 | assert isinstance(self.bucket_name, str) # mainly for mypy
457 | s3_object_metadata = S3ObjectMetadata(
458 | self.bucket_name,
459 | s3_key,
460 | bucket_object.content_length,
461 | bucket_object.last_modified,
462 | bucket_object.e_tag[1:-1].lower(),
463 | bucket_object.metadata.get(sha512_string),
464 | self.get_s3_object_url(s3_key),
465 | )
466 |
467 | else:
468 | raise AWSimpleException(f"{self.bucket_name=} {s3_key=} does not exist")
469 | log.debug(f"{s3_object_metadata=}")
470 | return s3_object_metadata
471 |
472 | @typechecked()
473 | def object_exists(self, s3_key: str) -> bool:
474 | """
475 | determine if an s3 object exists
476 |
477 | :param s3_key: the S3 object key
478 | :return: True if object exists
479 | """
480 | assert self.resource is not None
481 | bucket_resource = self.resource.Bucket(self.bucket_name)
482 | objs = list(bucket_resource.objects.filter(Prefix=s3_key))
483 | object_exists = len(objs) > 0 and objs[0].key == s3_key
484 | log.debug(f"{self.bucket_name}:{s3_key} : {object_exists=}")
485 | return object_exists
486 |
487 | @typechecked()
488 | def bucket_exists(self) -> bool:
489 | """
490 | Test if S3 bucket exists
491 |
492 | :return: True if bucket exists
493 | """
494 |
495 | # use a "custom" config so that .head_bucket() doesn't take a really long time if the bucket does not exist
496 | config = Config(connect_timeout=5, retries={"max_attempts": 3, "mode": "standard"})
497 | s3 = boto3.client("s3", config=config)
498 | assert self.bucket_name is not None
499 | try:
500 | s3.head_bucket(Bucket=self.bucket_name)
501 | exists = True
502 | except ClientError as e:
503 | log.info(f"{self.bucket_name=}{e=}")
504 | exists = False
505 | return exists
506 |
507 | @typechecked()
508 | def create_bucket(self) -> bool:
509 | """
510 | create S3 bucket
511 |
512 | :return: True if bucket created
513 | """
514 |
515 | # this is ugly, but create_bucket needs to be told the region explicitly (it doesn't just take it from the config)
516 | if (region := self.get_region()) is None:
517 | raise RuntimeError("no region given (check ~.aws/config")
518 | else:
519 | location = {"LocationConstraint": region}
520 |
521 | created = False
522 | if not self.bucket_exists():
523 | try:
524 | if self.public_readable:
525 | self.client.create_bucket(Bucket=self.bucket_name, CreateBucketConfiguration=location, ACL="public-read")
526 | else:
527 | self.client.create_bucket(Bucket=self.bucket_name, CreateBucketConfiguration=location)
528 | self.client.get_waiter("bucket_exists").wait(Bucket=self.bucket_name)
529 | created = True
530 | except ClientError as e:
531 | log.warning(f"{self.bucket_name=} {e=}")
532 | return created
533 |
534 | @typechecked()
535 | def delete_bucket(self) -> bool:
536 | """
537 | delete S3 bucket
538 |
539 | :return: True if bucket deleted (False if didn't exist in the first place)
540 | """
541 | try:
542 | self.client.delete_bucket(Bucket=self.bucket_name)
543 | deleted = True
544 | except ClientError as e:
545 | log.info(f"{self.bucket_name=}{e=}") # does not exist
546 | deleted = False
547 | return deleted
548 |
549 | @typechecked()
550 | def dir(self, prefix: str = "") -> Dict[str, S3ObjectMetadata]:
551 | """
552 | Do a "directory" of an S3 bucket where the returned dict key is the S3 key and the value is an S3ObjectMetadata object.
553 |
554 | Use the faster .keys() method if all you need are the keys.
555 |
556 | :param prefix: only do a dir on objects that have this prefix in their keys (omit for all objects)
557 | :return: a dict where key is the S3 key and the value is S3ObjectMetadata
558 | """
559 | directory = {}
560 | if self.bucket_exists():
561 | paginator = self.client.get_paginator("list_objects_v2")
562 | for page in paginator.paginate(Bucket=self.bucket_name, Prefix=prefix):
563 | # deal with empty bucket
564 | for content in page.get("Contents", []):
565 | s3_key = content.get("Key")
566 | directory[s3_key] = self.get_s3_object_metadata(s3_key)
567 | else:
568 | raise BucketNotFound(self.bucket_name)
569 | return directory
570 |
571 | def keys(self, prefix: str = "") -> List[str]:
572 | """
573 | List all the keys in this S3 Bucket.
574 |
575 | Note that this should be faster than .dir() if all you need are the keys and not the metadata.
576 |
577 | :param prefix: only do a dir on objects that have this prefix in their keys (omit for all objects)
578 | :return: a sorted list of all the keys in this S3 Bucket (sorted for consistency)
579 | """
580 | keys = []
581 | if self.bucket_exists():
582 | paginator = self.client.get_paginator("list_objects_v2")
583 | for page in paginator.paginate(Bucket=self.bucket_name, Prefix=prefix):
584 | # deal with empty bucket
585 | for content in page.get("Contents", []):
586 | s3_key = content.get("Key")
587 | keys.append(s3_key)
588 | else:
589 | raise BucketNotFound(self.bucket_name)
590 | keys.sort()
591 | return keys
592 |
--------------------------------------------------------------------------------
/awsimple/sns.py:
--------------------------------------------------------------------------------
1 | """
2 | SNS Access
3 | """
4 |
5 | from typing import Union, Dict
6 |
7 | from typeguard import typechecked
8 |
9 | from awsimple import AWSAccess, SQSAccess
10 |
11 |
12 | class SNSAccess(AWSAccess):
13 | @typechecked()
14 | def __init__(self, topic_name: str, **kwargs):
15 | """
16 | SNS Access
17 |
18 | :param topic_name: SNS topic
19 | :param kwargs: kwargs
20 | """
21 | super().__init__(resource_name="sns", **kwargs)
22 | self.topic_name = topic_name
23 |
24 | def get_topic(self):
25 | """
26 | gets the associated SNS Topic instance
27 |
28 | :param topic_name: topic name
29 | :return: sns.Topic instance
30 | """
31 | topic = None
32 | for t in self.resource.topics.all():
33 | if t.arn.split(":")[-1] == self.topic_name:
34 | topic = t
35 | return topic
36 |
37 | @typechecked()
38 | def get_arn(self) -> str:
39 | """
40 | get topic ARN from topic name
41 |
42 | :param topic_name: topic name string
43 | :return: topic ARN
44 | """
45 | return self.get_topic().arn
46 |
47 | @typechecked()
48 | def create_topic(self) -> str:
49 | """
50 | create an SNS topic
51 |
52 | :return: the SNS topic's arn
53 | """
54 | response = self.client.create_topic(Name=self.topic_name, Attributes={"DisplayName": self.topic_name})
55 | # todo: see if there are any waiters for SNS topic creation
56 | # https://stackoverflow.com/questions/50818327/aws-sns-and-waiter-functions-for-boto3
57 | return response["TopicArn"]
58 |
59 | def delete_topic(self):
60 | """
61 | delete SNS topic
62 |
63 | """
64 | self.client.delete_topic(TopicArn=self.get_arn())
65 |
66 | @typechecked()
67 | def subscribe(self, subscriber: Union[str, SQSAccess]) -> str:
68 | """
69 | Subscribe to an SNS topic
70 |
71 | :param subscriber: email or SQS queue
72 | :return: subscription ARN
73 | """
74 | if isinstance(subscriber, str) and "@" in subscriber:
75 | # email
76 | endpoint = subscriber
77 | protocol = "email"
78 | elif isinstance(subscriber, SQSAccess):
79 | # 'hooks up' provided SQS queue to this SNS topic
80 | subscriber.add_permission(self.get_arn())
81 | endpoint = subscriber.get_arn()
82 | protocol = "sqs"
83 | else:
84 | raise ValueError(f"{subscriber=}")
85 | response = self.client.subscribe(TopicArn=self.get_arn(), Protocol=protocol, Endpoint=endpoint, ReturnSubscriptionArn=True)
86 | return response["SubscriptionArn"]
87 |
88 | @typechecked()
89 | def publish(self, message: str, subject: Union[str, None] = None, attributes: Union[dict, None] = None) -> str:
90 | """
91 | publish to an existing SNS topic
92 |
93 | :param message: message string
94 | :param subject: subject string
95 | :param attributes: message attributes (see AWS SNS documentation on SNS MessageAttributes)
96 | :return: message ID
97 | """
98 | topic = self.get_topic()
99 | kwargs = {"Message": message} # type: Dict[str, Union[str, dict]]
100 | if subject is not None:
101 | kwargs["Subject"] = subject
102 | if attributes is not None:
103 | kwargs["MessageAttributes"] = attributes
104 | response = topic.publish(**kwargs)
105 | return response["MessageId"]
106 |
--------------------------------------------------------------------------------
/awsimple/sqs.py:
--------------------------------------------------------------------------------
1 | """
2 | SQS Access
3 | """
4 |
5 | from dataclasses import dataclass
6 | from typing import List, Any, Dict, Union
7 | import time
8 | import statistics
9 | from datetime import timedelta
10 | from pathlib import Path
11 | import json
12 | from logging import getLogger
13 |
14 | from botocore.exceptions import ClientError, HTTPClientError
15 | from typeguard import typechecked
16 | import appdirs
17 |
18 | from awsimple import AWSAccess, __application_name__, __author__, boto_error_to_string
19 |
20 | log = getLogger(__application_name__)
21 |
22 |
23 | @dataclass
24 | class SQSMessage:
25 | """
26 | SQS Message
27 | """
28 |
29 | message: str # payload
30 | _m: Any # AWS message itself (from boto3)
31 | _q: Any # SQSAccess instance
32 |
33 | def delete(self):
34 | self._m.delete() # boto3
35 | self._q._update_response_history(self.get_id())
36 |
37 | def get_id(self):
38 | return self._m.message_id
39 |
40 | def get_aws_message(self):
41 | # get the native AWS message
42 | return self._m
43 |
44 |
45 | # AWS defaults
46 | aws_sqs_long_poll_max_wait_time = 20 # seconds
47 | aws_sqs_max_messages = 10
48 |
49 |
50 | class SQSAccess(AWSAccess):
51 | @typechecked()
52 | def __init__(self, queue_name: str, immediate_delete: bool = True, visibility_timeout: Union[int, None] = None, minimum_visibility_timeout: int = 0, **kwargs):
53 | """
54 | SQS access
55 |
56 | :param queue_name: queue name
57 | :param immediate_delete: True to immediately delete read message(s) upon receipt, False to require the user to call delete_message()
58 | :param visibility_timeout: visibility timeout (if explicitly given) - set to None to automatically attempt to determine the timeout
59 | :param minimum_visibility_timeout: visibility timeout will be at least this long (do not set if visibility_timeout set)
60 | :param kwargs: kwargs to send to base class
61 | """
62 | super().__init__(resource_name="sqs", **kwargs)
63 | self.queue_name = queue_name
64 |
65 | # visibility timeout
66 | self.immediate_delete = immediate_delete # True to immediately delete messages
67 | self.user_provided_timeout = visibility_timeout # the queue will re-try a message (make it re-visible) if not deleted within this time
68 | self.user_provided_minimum_timeout = minimum_visibility_timeout # the timeout will be at least this long
69 | self.auto_timeout_multiplier = 10.0 # for automatic timeout calculations, multiply this times the median run time to get the timeout
70 |
71 | self.sqs_call_wait_time = 0 # short (0) or long poll (> 0, usually 20)
72 | self.queue = None # since this requires a call to AWS, this will be set only when needed
73 |
74 | self.immediate_delete_timeout: int = 30 # seconds
75 | self.minimum_nominal_work_time = 1.0 # minimum work time in seconds so we don't timeout too quickly, e.g. in case the user doesn't actually do any work
76 |
77 | # receive/delete times for messages (auto_delete set to False)
78 | self.response_history = {} # type: Dict[Any, Any]
79 |
80 | # We write the history out as a file so don't make this too big. We take the median (for the nominal run time) so make this big enough to tolerate a fair number of outliers.
81 | self.max_history = 20
82 |
83 | def _get_queue(self):
84 | if self.queue is None:
85 | try:
86 | queue = self.resource.get_queue_by_name(QueueName=self.queue_name)
87 | except self.client.exceptions.QueueDoesNotExist as e:
88 | log.debug(f"{self.queue_name},{e=}")
89 | queue = None
90 | except self.client.exceptions.ClientError as e:
91 | error_code = e.response["Error"].get("Code")
92 | if "NonExistentQueue" in error_code:
93 | log.debug(f"{self.queue_name},{e=},{error_code=}")
94 | queue = None
95 | else:
96 | # other errors (e.g. connection errors, etc.)
97 | raise
98 |
99 | if queue is not None:
100 | # kludge so when moto mocking we return None if it can't get the queue
101 | queue_type = type(queue)
102 | queue_type_string = str(queue_type)
103 | if "dict" in queue_type_string:
104 | log.warning(f"could not get Queue {self.queue_name}")
105 | else:
106 | self.queue = queue
107 |
108 | return self.queue
109 |
110 | @typechecked()
111 | def _get_response_history_file_path(self) -> Path:
112 | """
113 | get response history file path
114 |
115 | :return:
116 | """
117 | p = Path(appdirs.user_data_dir(__application_name__, __author__), "response", f"{self.queue_name}.json")
118 | log.debug(f'response history file path : "{p}"')
119 | return p
120 |
121 | @typechecked()
122 | def create_queue(self) -> str:
123 | """
124 | create SQS queue
125 |
126 | :return: queue URL
127 | """
128 | response = self.client.create_queue(QueueName=self.queue_name)
129 | url = response.get("QueueUrl", "")
130 | return url
131 |
132 | def delete_queue(self):
133 | """
134 | delete queue
135 | """
136 | if (queue := self._get_queue()) is None:
137 | log.warning(f"could not get queue {self.queue_name}")
138 | else:
139 | queue.delete()
140 |
141 | @typechecked()
142 | def exists(self) -> bool:
143 | """
144 | test if SQS queue exists
145 |
146 | :return: True if exists
147 | """
148 | return self._get_queue() is not None
149 |
150 | def calculate_nominal_work_time(self) -> int:
151 | response_times = []
152 | for begin, end in self.response_history.values():
153 | if end is not None:
154 | response_times.append(end - begin)
155 | nominal_work_time = max(statistics.median(response_times), self.minimum_nominal_work_time) # tolerate in case the measured work is very short
156 | log.debug(f"{nominal_work_time=}")
157 | return nominal_work_time
158 |
159 | def calculate_visibility_timeout(self) -> int:
160 | if self.user_provided_timeout is None:
161 | if self.immediate_delete:
162 | visibility_timeout = self.immediate_delete_timeout # we immediately delete the message so this doesn't need to be very long
163 | else:
164 | visibility_timeout = max(self.user_provided_minimum_timeout, round(self.auto_timeout_multiplier * self.calculate_nominal_work_time()))
165 | else:
166 | if self.immediate_delete:
167 | # if we immediately delete the message it doesn't make sense for the user to try to specify the timeout
168 | raise ValueError(f"nonsensical values: {self.user_provided_timeout=} and {self.immediate_delete=}")
169 | elif self.user_provided_minimum_timeout > 0:
170 | raise ValueError(f"do not specify both timeout ({self.user_provided_timeout}) and minimum_timeout {self.user_provided_minimum_timeout}")
171 | else:
172 | visibility_timeout = self.user_provided_timeout # timeout explicitly given by the user
173 |
174 | return visibility_timeout
175 |
176 | @typechecked()
177 | def _receive(self, max_number_of_messages_parameter: Union[int, None] = None) -> List[SQSMessage]:
178 | if self.user_provided_timeout is None and not self.immediate_delete:
179 | # read in response history (and initialize it if it doesn't exist)
180 | try:
181 | with open(self._get_response_history_file_path()) as f:
182 | self.response_history = json.load(f)
183 | except FileNotFoundError:
184 | pass
185 | except IOError as e:
186 | log.warning(f'IOError : "{self._get_response_history_file_path()}" : {e}')
187 | except json.JSONDecodeError as e:
188 | log.warning(f'JSONDecodeError : "{self._get_response_history_file_path()}" : {e}')
189 | if len(self.response_history) == 0:
190 | now = time.time()
191 | self.response_history[None] = (now, now + timedelta(hours=1).total_seconds()) # we have no history, so the initial nominal run time is a long time
192 |
193 | # receive the message(s)
194 | messages = [] # type: List[Any]
195 | continue_to_receive = True
196 | call_wait_time = self.sqs_call_wait_time # first time through may be long poll, but after that it's a short poll
197 |
198 | while continue_to_receive:
199 | aws_messages = None
200 |
201 | if max_number_of_messages_parameter is None:
202 | max_number_of_messages = aws_sqs_max_messages
203 | else:
204 | max_number_of_messages = max_number_of_messages_parameter - len(messages) # how many left to do
205 |
206 | try:
207 | if (queue := self._get_queue()) is None:
208 | log.warning(f"could not get queue {self.queue_name}")
209 | else:
210 | aws_messages = queue.receive_messages(
211 | MaxNumberOfMessages=min(max_number_of_messages, aws_sqs_max_messages), VisibilityTimeout=self.calculate_visibility_timeout(), WaitTimeSeconds=call_wait_time
212 | )
213 |
214 | for m in aws_messages:
215 | if self.immediate_delete:
216 | m.delete()
217 | elif self.user_provided_timeout is None:
218 | # keep history of message processing times for user deletes, by AWS's message id
219 | self.response_history[m.message_id] = [time.time(), None] # start (finish will be filled in upon delete)
220 |
221 | # if history is too large, delete the oldest
222 | while len(self.response_history) > self.max_history:
223 | oldest = None
224 | for handle, start_finish in self.response_history.items():
225 | if oldest is None or start_finish[0] < self.response_history[oldest][0]:
226 | oldest = handle
227 | del self.response_history[oldest]
228 |
229 | messages.append(SQSMessage(m.body, m, self))
230 |
231 | except (ClientError, HTTPClientError) as e:
232 | # Usually we don't catch boto3 exceptions, but during a long poll a quick internet disruption can raise an exception that we'd like to avoid.
233 | log.debug(f"{self.queue_name=} {e}")
234 | self.most_recent_error = boto_error_to_string(e)
235 |
236 | call_wait_time = 0 # now, short polls
237 |
238 | if aws_messages is None or len(aws_messages) == 0 or (max_number_of_messages_parameter is not None and len(messages) >= max_number_of_messages_parameter):
239 | continue_to_receive = False
240 |
241 | return messages
242 |
243 | @typechecked()
244 | def receive_message(self) -> Union[SQSMessage, None]:
245 | """
246 | receive SQS message from this queue
247 | :return: one SQSMessage if one available, else None
248 | """
249 |
250 | messages = self._receive(1)
251 | message_count = len(messages)
252 | if message_count == 0:
253 | message = None
254 | elif message_count == 1:
255 | message = messages[0]
256 | else:
257 | raise RuntimeError(f"{message_count=}")
258 | return message
259 |
260 | @typechecked()
261 | def receive_messages(self, max_messages: Union[int, None] = None) -> List[SQSMessage]:
262 | """
263 | receive a (possibly empty) list of SQS messages from this queue
264 |
265 | :param max_messages: maximum number of messages to receive (None for all available messages)
266 | :return: list of messages
267 | """
268 | return self._receive(max_messages)
269 |
270 | def _update_response_history(self, message_id: str):
271 | """
272 | update response history
273 |
274 | :param message_id: message ID
275 | """
276 | # update response history
277 | if not self.immediate_delete and self.user_provided_timeout is None and message_id in self.response_history:
278 | self.response_history[message_id][1] = time.time() # set finish time
279 |
280 | # save to file
281 | file_path = self._get_response_history_file_path()
282 | file_path.parent.mkdir(parents=True, exist_ok=True)
283 | try:
284 | with open(self._get_response_history_file_path(), "w") as f:
285 | json.dump(self.response_history, f, indent=4)
286 | except IOError as e:
287 | log.info(f'"{file_path}" : {e}')
288 |
289 | @typechecked()
290 | def send(self, message: str):
291 | """
292 | Send SQS message. If the queue doesn't exist, it will be created.
293 |
294 | :param message: message string
295 | """
296 | if (queue := self._get_queue()) is None:
297 | log.info(f"could not get queue {self.queue_name} - creating it")
298 | self.create_queue()
299 | # ensure the queue has indeed been created
300 | count = 0
301 | while not self.exists() and count < 100:
302 | time.sleep(3)
303 | count += 1
304 | if (queue := self._get_queue()) is None:
305 | log.error(f"could not create queue {self.queue_name}")
306 | if queue is not None:
307 | queue.send_message(MessageBody=message)
308 |
309 | @typechecked()
310 | def get_arn(self) -> str:
311 | """
312 | get SQS ARN
313 |
314 | :return: ARN string
315 | """
316 | if (queue := self._get_queue()) is None:
317 | log.warning(f"could not get queue {self.queue_name}")
318 | arn = ""
319 | else:
320 | arn = queue.attributes["QueueArn"]
321 | return arn
322 |
323 | @typechecked()
324 | def add_permission(self, source_arn: str):
325 | """
326 | allow source (e.g. SNS topic) to send to this SQS queue
327 |
328 | :param source_arn: source arn (e.g. SNS queue arn)
329 |
330 | """
331 |
332 | # a little brute-force, but this is the only way I could assign SQS policy to accept messages from SNS
333 | policy = {
334 | "Version": "2012-10-17",
335 | "Statement": [{"Effect": "Allow", "Principal": "*", "Action": "SQS:SendMessage", "Resource": self.get_arn(), "Condition": {"StringEquals": {"aws:SourceArn": source_arn}}}],
336 | }
337 |
338 | policy_string = json.dumps(policy)
339 | log.info(f"{policy_string=}")
340 | if (queue := self._get_queue()) is None:
341 | log.warning(f"could not get queue {self.queue_name}")
342 | else:
343 | self.client.set_queue_attributes(QueueUrl=queue.url, Attributes={"Policy": policy_string})
344 |
345 | def purge(self):
346 | """
347 | purge all messages in the queue
348 | """
349 | if (queue := self._get_queue()) is None:
350 | log.warning(f"could not get queue {self.queue_name}")
351 | else:
352 | self.client.purge_queue(QueueUrl=queue.url)
353 |
354 | def messages_available(self) -> int:
355 | """
356 | return number of messages available
357 | :return: number of messages available
358 | """
359 | key = "ApproximateNumberOfMessages"
360 | if (queue := self._get_queue()) is None:
361 | log.warning(f"could not get queue {self.queue_name}")
362 | number_of_messages_available = 0
363 | else:
364 | response = self.client.get_queue_attributes(QueueUrl=queue.url, AttributeNames=[key])
365 | number_of_messages_available = int(response["Attributes"][key])
366 | return number_of_messages_available
367 |
368 |
369 | class SQSPollAccess(SQSAccess):
370 | def __init__(self, queue_name: str, **kwargs):
371 | super().__init__(queue_name, **kwargs)
372 | self.sqs_call_wait_time = aws_sqs_long_poll_max_wait_time
373 |
--------------------------------------------------------------------------------
/doc/awsimple_sf_python_6_21.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesabel/awsimple/8157ed3d9501fcbc56382f148385144ed73a307e/doc/awsimple_sf_python_6_21.pdf
--------------------------------------------------------------------------------
/doc/awsimple_sf_python_6_21.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesabel/awsimple/8157ed3d9501fcbc56382f148385144ed73a307e/doc/awsimple_sf_python_6_21.pptx
--------------------------------------------------------------------------------
/doc/flake8_report.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesabel/awsimple/8157ed3d9501fcbc56382f148385144ed73a307e/doc/flake8_report.txt
--------------------------------------------------------------------------------
/doc/notes.txt:
--------------------------------------------------------------------------------
1 |
2 | - AWS IAM permissions will be required to use awsimple. S3, DynamoDB, SQS and SNS have AmazonFullAccess policies that can be used,
3 | or you can set your own up to align with your particular usage model.
4 |
--------------------------------------------------------------------------------
/doc_source/aws_access.rst:
--------------------------------------------------------------------------------
1 |
2 | AWSAccess
3 | =========
4 |
5 | .. automodule:: awsimple.aws
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/doc_source/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | sys.path.insert(0, os.path.abspath('..'))
16 | sys.path.insert(0, os.path.abspath('.'))
17 |
18 | # -- Project information -----------------------------------------------------
19 |
20 | project = 'awsimple'
21 | copyright = '2021, abel'
22 | author = 'abel'
23 |
24 |
25 | # -- General configuration ---------------------------------------------------
26 |
27 | # Add any Sphinx extension module names here, as strings. They can be
28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
29 | # ones.
30 | extensions = ['sphinx.ext.autosectionlabel', 'sphinx.ext.autodoc']
31 |
32 | # Add any paths that contain templates here, relative to this directory.
33 | templates_path = ['_templates']
34 |
35 | # List of patterns, relative to source directory, that match files and
36 | # directories to ignore when looking for source files.
37 | # This pattern also affects html_static_path and html_extra_path.
38 | exclude_patterns = []
39 |
40 |
41 | # -- Options for HTML output -------------------------------------------------
42 |
43 | # The theme to use for HTML and HTML Help pages. See the documentation for
44 | # a list of builtin themes.
45 | #
46 | html_theme = 'alabaster'
47 |
48 | # Add any paths that contain custom static files (such as style sheets) here,
49 | # relative to this directory. They are copied after the builtin static files,
50 | # so a file named "default.css" will overwrite the builtin "default.css".
51 | html_static_path = ['_static']
52 |
53 | # use __init__ docstrings
54 | autoclass_content = 'both'
55 |
--------------------------------------------------------------------------------
/doc_source/coverage.txt:
--------------------------------------------------------------------------------
1 | Test coverage: 83.85%
--------------------------------------------------------------------------------
/doc_source/dynamodb_access.rst:
--------------------------------------------------------------------------------
1 |
2 | DynamoDBAccess
3 | ==============
4 |
5 | .. automodule:: awsimple.dynamodb
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/doc_source/index.rst:
--------------------------------------------------------------------------------
1 | AWSimple - a simple AWS API
2 | ===========================
3 |
4 | *(pronounced A-W-Simple)*
5 |
6 | AWSimple provides a simple, object-oriented interface into four AWS "serverless" cloud services:
7 |
8 | - S3 - Binary object storage. Analogous to storing files in the cloud.
9 | - DynamoDB - A NoSQL database to put, get, and query dictionary-like objects.
10 | - SQS - Queuing service for sending and receiving messages.
11 | - SNS - Notification service to send messages to a variety of destinations including emails, SMS messages, and SQS queues.
12 |
13 | `AWSimple` also provides some additional features:
14 |
15 | - True file hashing (SHA512) for S3 files.
16 | - Locally cached S3 accesses.
17 | - DynamoDB full table scans (with local cache option).
18 | - Built-in pagination.
19 |
20 | If you're new to `AWSimple`, check out the :ref:`Quick Start Guide`. Also check out the
21 | `examples `_.
22 |
23 | .. toctree::
24 | :maxdepth: 2
25 |
26 | quick_start_guide
27 | user_guide
28 | aws_access
29 | s3_access
30 | dynamodb_access
31 | sns_access
32 | sqs_access
33 | thank_you
34 |
35 |
36 | Testing
37 | -------
38 | .. include:: coverage.txt
39 |
40 |
41 | Indices and tables
42 | ==================
43 | * :ref:`genindex`
44 | * :ref:`modindex`
45 | * :ref:`search`
46 |
47 |
48 | The `AWSimple documentation `_ is hosted on `Read the Docs `_ .
49 |
--------------------------------------------------------------------------------
/doc_source/quick_start_guide.rst:
--------------------------------------------------------------------------------
1 |
2 | Quick Start Guide
3 | =================
4 |
5 |
6 | Installation
7 | ------------
8 |
9 | Install `awsimple` from PyPI:
10 |
11 | `pip install awsimple`
12 |
13 | AWS's IAM
14 | ---------
15 |
16 | First you need to determine how you're going to access AWS, which is through AWS's IAM (Identity and Access Management). There are two ways:
17 |
18 | - `Use keys directly`: your AWS Access Key and AWS Secret Access Key are passed directly into AWSimple.
19 | - `Use an AWS profile`: An `.aws` directory in your home directory contains CONFIG and CREDENTIALS files that contain profiles that contain your Access Key and Secret Access Key.
20 |
21 | For development, the profile method is recommended. This way your secrets are kept out of your repository and
22 | application. In fact, if you put your secrets in a `default` profile, you don't have to tell AWSimple anything about your
23 | credentials at all since they will be used from the default location and profile.
24 |
25 | For applications, you may not want to use an `.aws` directory with profiles. Rather, you pass in keys in some secure mechanism defined by
26 | your particular application.
27 |
28 | Note that **AWS credentials must be properly managed and kept secret**, just as you would do for any other site where money is concerned.
29 | There are little to no mechanisms in AWS to stop improper use of AWS resources. While billing alerts can and should be used, these are "after the fact" and
30 | will not necessarily prevent billing surprises.
31 |
32 | See the AWS documentation on `configuration files `_ for more information.
33 |
34 | Testing the AWS Connection
35 | --------------------------
36 |
37 | Now that you have your AWS IAM configured, let's test it out:
38 |
39 | .. code:: python
40 |
41 | from awsimple import AWSAccess
42 |
43 | # In this example we're using the default IAM profile (in ~/.aws/credentials and ~/.aws/config)
44 | print(AWSAccess().test()) # Should be 'True'
45 |
46 |
47 | If everything worked OK, this code will output `True` and you can go on to the next section.
48 |
49 | Creating, Writing and Reading an S3 Bucket Object
50 | -------------------------------------------------
51 |
52 | Assuming your IAM configuration allows you to create an AWS S3 bucket and object, let's to that now.
53 |
54 | .. code:: python
55 |
56 | from awsimple import S3Access
57 |
58 | # bucket names are globally unique, so change this bucket name to something unique to you
59 | s3_access = S3Access("james-abel-awsimple-test-bucket")
60 |
61 | # let's first make sure the bucket exists
62 | s3_access.create_bucket()
63 |
64 | # the S3 key is the name of the object in the S3 bucket, somewhat analogous to a file name
65 | s3_key = "hello.txt"
66 |
67 | # write our message to S3
68 | s3_access.write_string("hello world", s3_key)
69 |
70 | # will output "hello world"
71 | print(s3_access.read_string(s3_key))
72 |
--------------------------------------------------------------------------------
/doc_source/requirements.txt:
--------------------------------------------------------------------------------
1 | boto3
2 | typeguard
3 | hashy
4 | dictim
5 | appdirs
6 | ismain
7 | tobool
8 |
--------------------------------------------------------------------------------
/doc_source/s3_access.rst:
--------------------------------------------------------------------------------
1 |
2 | S3Access
3 | ========
4 |
5 | .. automodule:: awsimple.s3
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/doc_source/sns_access.rst:
--------------------------------------------------------------------------------
1 |
2 | SNSAccess
3 | =========
4 |
5 | .. automodule:: awsimple.sns
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/doc_source/sqs_access.rst:
--------------------------------------------------------------------------------
1 |
2 | SQSAccess
3 | =========
4 |
5 | .. automodule:: awsimple.sqs
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/doc_source/thank_you.rst:
--------------------------------------------------------------------------------
1 | Thank You
2 | =========
3 |
4 | Special thanks to Phebe Polk for code and documentation reviews.
5 |
--------------------------------------------------------------------------------
/doc_source/user_guide.rst:
--------------------------------------------------------------------------------
1 |
2 | AWSimple User Guide
3 | ===================
4 |
5 | AWSimple provides a high level and object oriented access to common AWS "serverless" services such as
6 | :ref:`S3`, :ref:`DynamoDB`, :ref:`SNS`, and :ref:`SQS`. AWSimple uses AWS'
7 | `boto3 `_ "under the hood" for AWS access.
8 |
9 | Setting up your AWS Account
10 | ---------------------------
11 | In order to use AWSimple, or any other AWS software for that matter, you need an AWS account and one or more AWS "programmatic users" created via the
12 | `AWS IAM (Identity and Access Management) console `_. This user guide assumes you have a basic understanding of the AWS IAM.
13 | This programmatic user will need to be given appropriate permissions to the AWS resources you wish to use. IAM provides you with an `access key` and
14 | `secret access key` for a programmatic user. You must also select an AWS `region` (i.e. roughly where the actual AWS servers that you'll be using
15 | are located). These keys must be provided to AWSimple in order to access AWS resources.
16 |
17 | IMHO, at least for the purposes of initial development, you probably don't have to worry too much about fine-tuning your region. Pick a region reasonably
18 | close and go with that for a while. AWS's global network is pretty good, so just get close at first and you can optimize later. Many permissions and/or
19 | access issues can arise when you inadvertently try to access an unintended region.
20 |
21 | During development, it is recommended that these keys be placed in the AWS `credentials` and `config` files (no file extension) in the `.aws` directory
22 | under a `profile`. See `AWS configuration files `_ for directions on how to
23 | configure your credentials and config files. In fact, initially you can assign a programmtic user keys to the `[default]` profile, so you don't have to
24 | pass any credentials or region in to AWSimple.
25 |
26 | For production, the `access key`, `secret access key`, and `region` can be provided to AWSimple directly, in a manner that is appropriate for your application.
27 |
28 | Note that **AWS credentials must be properly managed and kept secret**, just as you would do for any other site where money is concerned.
29 | There are little to no mechanisms in AWS to stop improper use of AWS resources. While billing alerts can and should be used, these are "after the fact" and
30 | will not necessarily prevent billing surprises.
31 |
32 | See the AWS documentation on `configuration files `_ for more information.
33 |
34 | Testing your AWS Account
35 | ~~~~~~~~~~~~~~~~~~~~~~~~
36 | Dealing with IAM and permissions can be tedious, and difficult to test. If they are wrong, you merely get a permissions error. To help permissions debug,
37 | AWSimple has a test feature to make sure you have the basic IAM setup working:
38 |
39 | .. code:: python
40 |
41 | from awsimple import AWSAccess
42 |
43 | # In this example we're using the default
44 | # IAM profile (in ~/.aws/credentials and ~/.aws/config)
45 | print(AWSAccess().test()) # Should be 'True'
46 |
47 | If this prints `True`, you at least have properly configured your programmatic user for AWSimple to use.
48 |
49 | Services accessible with AWSimple
50 | ---------------------------------
51 | AWSimple offers access into :ref:`S3`, :ref:`DynamoDB`, :ref:`SNS`, and :ref:`SQS`.
52 |
53 |
54 | S3
55 | --
56 | S3 is probably one of the most popular AWS services. S3 is based on `buckets` and `objects` within those buckets. Again, AWSimple assumes a basic
57 | knowledge of S3, but refer to the `S3 documentation `_ if you are unfamiliar with S3.
58 |
59 | AWSimple provides the ability to create and delete S3 buckets, and write and read S3 bucket objects. In addition a few helper methods exist
60 | such as listing buckets and bucket objects.
61 |
62 | S3 create bucket
63 | ~~~~~~~~~~~~~~~~~~
64 | Before you can use a bucket, it needs to be created. A bucket can be created with the AWS console, but here we'll do it programmatically with AWSimple:
65 |
66 | .. code:: python
67 |
68 | from awsimple import S3Access
69 |
70 | # bucket names are globally unique, so change this bucket name to something unique to you
71 | s3_access = S3Access("james-abel-awsimple-test-bucket")
72 | s3_access.create_bucket()
73 |
74 | Each S3Access instance is associated with a specific bucket.
75 |
76 | S3 write
77 | ~~~~~~~~
78 | Now let's write an object to the bucket we just created:
79 |
80 | .. code:: python
81 |
82 | # the S3 key is the name of the object in the S3 bucket, somewhat analogous to a file name
83 | s3_key = "hello.txt"
84 |
85 | # write our "hello world" message to S3
86 | s3_access.write_string("hello world", s3_key)
87 |
88 | S3 read
89 | ~~~~~~~
90 | And finally let's read the object back:
91 |
92 | .. code:: python
93 |
94 | # will print "hello world"
95 | print(s3_access.read_string(s3_key))
96 |
97 | S3 Caching
98 | ~~~~~~~~~~
99 | AWSimple can use local caching to reduce network traffic, which in turn can reduce costs and speed up applications. A file hash (SHA512) is
100 | used to ensure file content equivalency.
101 |
102 | DynamoDB
103 | --------
104 | DynamoDB is a "NoSQL" (AKA document based) database. It is a "serverless" service that offers an `On Demand` option. DynamoDB is made up
105 | of `tables`, and each table can store a collection `items`. These `items` are similar to JSON objects, and can
106 | be created from Python dictionaries, with similar restrictions in converting Python dictionaries serializable to JSON. For these Python dicts,
107 | DynamoDB allow you to store and retrieve those dicts to and from the cloud simply, quickly, and there is a
108 | `free tier `_.
109 |
110 | Probably the trickiest part is selecting the `primary key`. The `primary key` is what defines the uniqueness of an item.
111 | See `AWS docs on primary key design `_ for details.
112 | The basic idea is that the primary key must be unique to that item and is composed of either a single `partition` (or hash) key or a combination of
113 | a `partition` and `sort` (or range) keys. Those keys are often either strings or numbers, although boolean is also allowed. Secondary
114 | keys and indexes are also supported and can be used for queries.
115 |
116 | The default type for partition and sort keys is a string (`str`), but numbers (using `int`) and booleans (using `bool`) can also be specified.
117 |
118 | DynamoDB - Partition Key
119 | ~~~~~~~~~~~~~~~~~~~~~~~~
120 | The code below shows how you can use the simple primary key (no sort key) in a table to put and get items. This also illustrates the
121 | flexibility of a NoSQL database. Fields can be added after the table creation, as long as the primary key does not change.
122 |
123 | .. code:: python
124 |
125 | dynamodb_access = DynamoDBAccess("users_example", profile_name="testawsimple")
126 |
127 | # we're only using email as a partition key in our primary key (no sort key). emails are unique to each user.
128 | dynamodb_access.create_table("email")
129 |
130 | # add our first user using email, first and last name. Initially, we may think that's all we need.
131 | dynamodb_access.put_item({"email": "victor@victorwooten.com", "first_name": "Victor", "last_name": "Wooten"})
132 |
133 | # oh no. No one knows who "John Jones" is, they only know "John Paul Jones", so we need to add a middle name.
134 | # Luckily we are using a NoSQL database, so we just add "middle_name" in a new key/value pair. No database migration needed.
135 | dynamodb_access.put_item({"email": "john@ledzeppelin.com", "first_name": "John", "middle_name": "Paul", "last_name": "Jones"})
136 |
137 | # oh no again. No one knows who "Gordon Matthew Thomas Sumner" is either, even with 2 middle names! All they know is "Sting".
138 | # We need to add a nickname. No problem since we're using a NoSQL database.
139 | dynamodb_access.put_item(
140 | {
141 | "email": "sting@thepolice.com",
142 | "first_name": "Gordon",
143 | "middle_name": "Matthew",
144 | "middle_name_2": "Thomas",
145 | "last_name": "Sumner",
146 | "nickname": "Sting",
147 | }
148 | )
149 |
150 | # look up user info for one of our users
151 | item = dynamodb_access.get_item("email", "john@ledzeppelin.com") # this is a "get" since we're using a key and will always get back exactly one item
152 |
153 | DynamoDB - Partition and Sort Keys
154 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
155 | Below is an example of using a `composite` primary key, which is comprised of a `partition` key and a `sort` key.
156 |
157 | .. code:: python
158 |
159 | dynamodb_access = DynamoDBAccess("musical_instruments_example", profile_name="testawsimple")
160 |
161 | # Our primary key is a composite of partition (manufacturer) and sort (serial_number).
162 | # For a particular manufacturer, serial numbers define exactly one instrument (for this example we are assuming a serial number can be represented as an
163 | # integer and doesn't have to be a string).
164 | dynamodb_access.create_table("manufacturer", "serial_number", sort_key_type=int)
165 |
166 | # we have to convert float to a Decimal for DynamoDB
167 | dynamodb_access.put_item(dict_to_dynamodb({"manufacturer": "Gibson", "serial_number": 1234, "model": "Ripper", "year": 1983, "price": 1299.50}))
168 | dynamodb_access.put_item(dict_to_dynamodb({"manufacturer": "Gibson", "serial_number": 5678, "model": "Thunderbird", "year": 1977, "price": 2399.50}))
169 | dynamodb_access.put_item(
170 | dict_to_dynamodb(
171 | {
172 | "manufacturer": "Fender",
173 | "serial_number": 1234,
174 | "model": "Precision",
175 | "year": 2008,
176 | "price": 1800.0,
177 | } # same serial number as the Gibson Ripper, but that's OK since this is Fender
178 | )
179 | )
180 |
181 | # get all the Gibson instruments
182 | item = dynamodb_access.query("manufacturer", "Gibson") # this can (and will in this case) be multiple items
183 | pprint(item)
184 |
185 | DynamoDB Secondary Indexes
186 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
187 | You can add `secondary` indexes to a DynamoDB table in order to do a query on fields you didn't put in the original primary keys.
188 | This is very similar to adding indexes in a relational database, which is often done to speed up queries. Adding a secondary index
189 | can also be done after table creation to facilitate new data, which is very handy when not all data is known at table
190 | creation time.
191 |
192 | DynamoDB Scanning and Caching
193 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
194 | Sometimes you want an entire table to do some sort of search or data-mining on. While AWS provides a `scan` capability which is available
195 | in awsimple's `DynamoDBAccess.scan_table()` method, this reads the entire table for each scan. However, this can be slow and/or costly. In order
196 | to reduce cost and increase speed, AWSimple offers a cached table scan via `DynamoDBAccess.scan_table_cached()` for tables that the
197 | user *knows* are static or at least verify slowly changing. If course, it's up to the user of awsimple to determine which method to use - the
198 | regular or cached version.
199 |
200 | For convenience, AWSimple also looks at the table's item count to determine if the cached scan needs to invalidate the cache. This can be
201 | useful if you know a table is only added to (thus the item count will change when it's updated) and you only try to access the table some time
202 | after the update. As of this writing the table item count is updated roughly every 6 hours. For example, you may use DynamoDB to store
203 | clinical trial data that is updated in a human time frame - e.g. weekly or even monthly, and once the trial is closed the data may never change.
204 | These sorts of situations may be appropriate for cached table scans. Of course it's up to the programmer to ensure this caching is appropriate
205 | for their use case. If not, use the regular `scan_table()` (albeit with the cost and performance implications).
206 |
207 | SNS
208 | ---
209 | SNS is AWS's Notification service for messages. SNS can create notifications for a variety of endpoints, including emails, text messages and
210 | :ref:`SQS` queues. SNS can also be "connected" to other AWS services such as S3 so that S3 events (e.g. writes) can cause an S3 notification.
211 |
212 | SQS
213 | ---
214 | SQS is AWS's queuing service. Messages can be placed in queues (either programmatically or "connected" to other AWS services like SNS).
215 | Programs can poll SQS queues to get messages to operate on. SQS queues can be immediately read (and return nothing of no messages are available)
216 | or `long polled` to wait for an incoming message to act on.
217 |
--------------------------------------------------------------------------------
/examples/aws_access_test.py:
--------------------------------------------------------------------------------
1 | from awsimple import AWSAccess
2 |
3 | # In this example we're using the default profile
4 | print(AWSAccess().test()) # Should be 'True'
5 |
--------------------------------------------------------------------------------
/examples/derived_access_class.py:
--------------------------------------------------------------------------------
1 | from ismain import is_main
2 | from os import getlogin
3 |
4 | from awsimple import S3Access
5 |
6 | profile_name = "testawsimple" # all of my derived classes use this AWS profile name
7 |
8 |
9 | class MyS3Access(S3Access):
10 | """
11 | MyS3Access class takes care of IAM via a profile name
12 | """
13 |
14 | def __init__(self, bucket: str, **kwargs):
15 | # define the profile name, but pass all other optional arguments to the base class
16 | super().__init__(bucket, profile_name=profile_name, **kwargs)
17 |
18 |
19 | def read_s3_object():
20 | # profile_name provided by MyStorageAccess
21 | # bucket names are globally unique, so change this bucket name to something unique to you
22 | s3_access = MyS3Access(f"awsimple-test-bucket-{getlogin()}") # bucket name (for this example we assume it already exists)
23 | print(s3_access.read_string("hello.txt")) # hello.txt is the S3 object key
24 |
25 |
26 | if is_main():
27 | read_s3_object()
28 |
--------------------------------------------------------------------------------
/examples/dynamodb_partition_and_sort.py:
--------------------------------------------------------------------------------
1 | import time
2 | from pprint import pprint
3 |
4 | from ismain import is_main
5 |
6 | from awsimple import DynamoDBAccess, dict_to_dynamodb
7 |
8 |
9 | def musical_instruments_example():
10 | """
11 | This example shows how to use DynamoDB to keep a table of musical instruments.
12 |
13 | """
14 |
15 | dynamodb_access = DynamoDBAccess("musical_instruments_example", profile_name="testawsimple", cache_life=60) # short cache life for development
16 |
17 | # Our primary key is a composite of partition (manufacturer) and sort (serial_number).
18 | # For a particular manufacturer, serial numbers define exactly one instrument (for this example we are assuming a serial number can be represented as an
19 | # integer and doesn't have to be a string).
20 | dynamodb_access.create_table("manufacturer", "serial_number", sort_key_type=int)
21 |
22 | # we have to convert float to a Decimal for DynamoDB
23 | dynamodb_access.put_item(dict_to_dynamodb({"manufacturer": "Gibson", "serial_number": 1234, "model": "Ripper", "year": 1983, "price": 1299.50}))
24 | dynamodb_access.put_item(dict_to_dynamodb({"manufacturer": "Gibson", "serial_number": 5678, "model": "Thunderbird", "year": 1977, "price": 2399.50}))
25 | dynamodb_access.put_item(
26 | dict_to_dynamodb(
27 | {
28 | "manufacturer": "Fender",
29 | "serial_number": 1234,
30 | "model": "Precision",
31 | "year": 2008,
32 | "price": 1800.0,
33 | } # same serial number as the Gibson Ripper, but that's OK since this is Fender
34 | )
35 | )
36 |
37 | # get all the Gibson instruments
38 | start = time.time()
39 | item = dynamodb_access.query("manufacturer", "Gibson") # this can (and will in this case) be multiple items
40 | end = time.time()
41 | pprint(item)
42 | print(f"query took {end-start} seconds") # nominal 0.1 to 0.15 seconds
43 | print()
44 |
45 | # get the entire inventory
46 | start = time.time()
47 | all_items = dynamodb_access.scan_table_cached() # use cached if the table is large and *only* if we know our table is slowly or never changing
48 | end = time.time()
49 | pprint(all_items)
50 | print(f"scan took {end-start} seconds ({dynamodb_access.cache_hit=})") # always fast for this small data set, but caching can offer a speedup for large tables
51 |
52 |
53 | if is_main():
54 | musical_instruments_example()
55 |
--------------------------------------------------------------------------------
/examples/dynamodb_partition_only.py:
--------------------------------------------------------------------------------
1 | import time
2 | from pprint import pprint
3 | from ismain import is_main
4 |
5 | from awsimple import DynamoDBAccess
6 |
7 |
8 | def users_example():
9 | """
10 | This example shows how to use DynamoDB to keep a table of users. This also illustrates the flexibility of NoSQL in that we can
11 | simply add fields at any time.
12 |
13 | """
14 |
15 | dynamodb_access = DynamoDBAccess("users_example", profile_name="testawsimple")
16 |
17 | # we're only using email as a partition key in our primary key (no sort key). emails are unique to each user.
18 | dynamodb_access.create_table("email")
19 |
20 | # add our first user using email, first and last name. Initially, we may think that's all we need.
21 | dynamodb_access.put_item({"email": "victor@victorwooten.com", "first_name": "Victor", "last_name": "Wooten"})
22 |
23 | # oh no. No one knows who "John Jones" is, they only know "John Paul Jones", so we need to add a middle name.
24 | # Luckily we are using a NoSQL database, so we just add "middle_name" in a new key/value pair. No database migration needed.
25 | dynamodb_access.put_item({"email": "john@ledzeppelin.com", "first_name": "John", "middle_name": "Paul", "last_name": "Jones"})
26 |
27 | # oh no again. No one knows who "Gordon Matthew Thomas Sumner" is either, even with 2 middle names! All they know is "Sting".
28 | # We need to add a nickname. No problem since we're using a NoSQL database.
29 | dynamodb_access.put_item(
30 | {
31 | "email": "sting@thepolice.com",
32 | "first_name": "Gordon",
33 | "middle_name": "Matthew",
34 | "middle_name_2": "Thomas",
35 | "last_name": "Sumner",
36 | "nickname": "Sting",
37 | }
38 | )
39 |
40 | # look up user info for one of our users
41 | start = time.time()
42 | item = dynamodb_access.get_item("email", "john@ledzeppelin.com") # this is a "get" since we're using a key and will always get back exactly one item
43 | end = time.time()
44 |
45 | pprint(item)
46 | print(f"took {end-start} seconds") # should take just a fraction of a second. 0.05 seconds was a nominal value on our test system.
47 |
48 |
49 | if is_main():
50 | users_example()
51 |
--------------------------------------------------------------------------------
/examples/make_venv.bat:
--------------------------------------------------------------------------------
1 | rmdir /S /Q venv
2 | c:"\Program Files\Python39\python.exe" -m venv --clear venv
3 | venv\Scripts\python.exe -m pip install --upgrade pip
4 | venv\Scripts\pip3 install -U setuptools
5 | venv\Scripts\pip3 install -U -r requirements-examples.txt
6 |
--------------------------------------------------------------------------------
/examples/make_venv.sh:
--------------------------------------------------------------------------------
1 | python3 -m venv --clear venv
2 | source ./venv/bin/activate
3 | python -m pip install -U -r requirements-examples.txt
4 | deactivate
5 |
--------------------------------------------------------------------------------
/examples/read_s3_object.py:
--------------------------------------------------------------------------------
1 | from ismain import is_main
2 |
3 | from awsimple import S3Access
4 |
5 |
6 | def read_s3_object():
7 | s3_access = S3Access("testawsimple")
8 | print(s3_access.read_string("helloworld.txt"))
9 |
10 |
11 | if is_main():
12 | read_s3_object()
13 |
--------------------------------------------------------------------------------
/examples/requirements-examples.txt:
--------------------------------------------------------------------------------
1 | awsimple
2 | ismain
3 |
--------------------------------------------------------------------------------
/examples/run_examples.bat:
--------------------------------------------------------------------------------
1 | call venv\Scripts\activate.bat
2 | python -m aws_access_test
3 | python -m write_read_s3_object
4 | python -m derived_access_class
5 | python -m dynamodb_partition_only
6 | python -m dynamodb_partition_and_sort
7 | deactivate
8 |
--------------------------------------------------------------------------------
/examples/run_examples.sh:
--------------------------------------------------------------------------------
1 | source venv/bin/activate
2 | python -m aws_access_test
3 | python -m write_read_s3_object
4 | python -m derived_access_class
5 | deactivate
6 |
--------------------------------------------------------------------------------
/examples/write_read_s3_object.py:
--------------------------------------------------------------------------------
1 | from awsimple import S3Access
2 | from os import getlogin
3 |
4 | # the S3 key is the name of the object in the S3 bucket, somewhat analogous to a file name
5 | s3_key = "hello.txt"
6 |
7 | # setup the s3_access object
8 | s3_access = S3Access(f"awsimple-test-bucket-{getlogin()}") # bucket names are globally unique, so change this bucket name to something unique to you
9 |
10 |
11 | # let's first make sure the bucket exists
12 | s3_access.create_bucket()
13 |
14 | # write our message to S3
15 | s3_access.write_string("hello world", s3_key)
16 |
17 |
18 | # will output "hello world"
19 | print(s3_access.read_string(s3_key))
20 |
--------------------------------------------------------------------------------
/make_venv_dev.bat:
--------------------------------------------------------------------------------
1 | rmdir /S /Q venv
2 | c:"\Program Files\Python313\python.exe" -m venv --clear venv
3 | venv\Scripts\python.exe -m pip install --upgrade pip
4 | venv\Scripts\pip3 install -U setuptools
5 | venv\Scripts\pip3 install -U -r requirements-dev.txt
6 |
--------------------------------------------------------------------------------
/make_venv_dev.sh:
--------------------------------------------------------------------------------
1 | python3 -m venv --clear venv
2 | source ./venv/bin/activate
3 | python -m pip install -U -r requirements-dev.txt
4 | deactivate
5 |
--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 |
3 | [mypy-moto]
4 | ignore_errors = True
5 | ignore_missing_imports = True
6 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 192
3 |
--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | #
2 | # awsimple requirements
3 | hashy
4 | boto3
5 | typeguard<3
6 | dictim
7 | appdirs
8 | tobool
9 | urllib3
10 | python-dateutil
11 | yasf
12 | #
13 | # examples
14 | ismain
15 | #
16 | # packaging
17 | twine
18 | wheel
19 | #
20 | # testing
21 | pytest
22 | moto[dynamodb,s3,sns,sqs]
23 | # moto apparently requires docker
24 | docker
25 | localstack
26 | localstack-client
27 | awscli
28 | awscli-local
29 | coverage
30 | pytest-cov
31 | pytest-pycharm
32 | pytest-socket
33 | pytest-xdist
34 | pillow
35 | requests
36 | #
37 | # formatting and linting
38 | black
39 | flake8
40 | mypy
41 | boto3-stubs[s3,dynamodb,sqs,sns]
42 | appdirs-stubs
43 | types-urllib3
44 | types-Pillow
45 | types-python-dateutil
46 | #
47 | # documentation
48 | sphinx
49 |
--------------------------------------------------------------------------------
/scripts/blackify.bat:
--------------------------------------------------------------------------------
1 | pushd .
2 | cd ..
3 | call venv\Scripts\activate.bat
4 | python -m black -l 192 awsimple test_awsimple setup.py examples
5 | call deactivate
6 | popd
7 |
--------------------------------------------------------------------------------
/scripts/coverage.bat:
--------------------------------------------------------------------------------
1 | pushd .
2 | cd ..
3 | set PYTHONPATH=%CD%
4 | set AWSIMPLE_USE_MOTO_MOCK=0
5 | mkdir cov
6 | venv\Scripts\pytest.exe --cov-report=html --cov-report=xml:cov\coverage.xml --cov --ignore=examples
7 | venv\Scripts\python.exe scripts\doc_coverage_updater.py
8 | set PYTHONPATH=
9 | set AWSIMPLE_USE_MOTO_MOCK=
10 | popd
11 |
--------------------------------------------------------------------------------
/scripts/doc_coverage_updater.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from xml.etree import ElementTree
3 |
4 | from ismain import is_main
5 |
6 |
7 | def doc_coverage_updater():
8 | attributes = ElementTree.parse(Path("cov", "coverage.xml")).getroot().attrib
9 | numerator = float(attributes["lines-covered"]) + float(attributes["branches-covered"])
10 | denominator = float(attributes["lines-valid"]) + float(attributes["branches-valid"])
11 | coverage = numerator/denominator
12 | Path("doc_source", "coverage.txt").write_text(f"Test coverage: {coverage:.2%}")
13 |
14 |
15 | if is_main():
16 | doc_coverage_updater()
17 |
--------------------------------------------------------------------------------
/scripts/pypi.bat:
--------------------------------------------------------------------------------
1 | pushd .
2 | cd ..
3 | rmdir /S /Q awsimple.egg-info
4 | rmdir /S /Q build
5 | rmdir /S /Q dist
6 | copy /Y LICENSE LICENSE.txt
7 | call venv\Scripts\activate.bat
8 | python.exe setup.py bdist_wheel
9 | twine upload dist/*
10 | rmdir /S /Q awsimple.egg-info
11 | rmdir /S /Q build
12 | call deactivate
13 | popd
14 |
--------------------------------------------------------------------------------
/scripts/pytest.bat:
--------------------------------------------------------------------------------
1 | REM run pytest with and without mocking
2 | pushd .
3 | cd ..
4 | call venv\Scripts\activate.bat
5 | set PYTHONPATH=%CD%
6 | python -m pytest -s test_awsimple --cov-report xml:coverage.xml --cov-report html --cov=.\awsimple
7 | REM
8 | REM set AWSIMPLE_USE_MOTO_MOCK=0
9 | REM python -m pytest
10 | REM
11 | set PYTHONPATH=
12 | set AWSIMPLE_USE_MOTO_MOCK=
13 | popd
14 |
--------------------------------------------------------------------------------
/scripts/run_flake8.bat:
--------------------------------------------------------------------------------
1 | pushd .
2 | cd ..
3 | del doc\flake8_report.txt
4 | call venv\Scripts\activate.bat
5 | REM
6 | REM E402 module level import not at top of file
7 | REM F401 imported but unused
8 | REM W503 line break before binary operator (black puts this in)
9 | REM E203 whitespace before ':' (black puts this in and may be controversial)
10 | REM E501 line too long
11 | flake8 --output-file doc\flake8_report.txt --ignore=E402,F401,W503,E203,E501 --tee awsimple
12 | call deactivate
13 | popd
14 |
--------------------------------------------------------------------------------
/scripts/run_mypy.bat:
--------------------------------------------------------------------------------
1 | pushd .
2 | cd ..
3 | call venv\Scripts\activate.bat
4 | mypy -m awsimple
5 | mypy -m test_awsimple
6 | call deactivate
7 | popd
8 |
--------------------------------------------------------------------------------
/scripts/run_sphinx.bat:
--------------------------------------------------------------------------------
1 | pushd .
2 | cd ..
3 | call venv\Scripts\activate.bat
4 | sphinx-build -M html doc_source build
5 | call deactivate
6 | popd
7 |
--------------------------------------------------------------------------------
/scripts/start_localstack.bat:
--------------------------------------------------------------------------------
1 | pushd .
2 | cd ..
3 | venv\Scripts\python.exe -m localstack.cli.main start
4 | popd
5 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from setuptools import setup
4 |
5 | from awsimple.__version__ import __version__, __title__, __author__, __author_email__, __url__, __download_url__, __description__
6 |
7 | readme_file_path = os.path.join("readme.md")
8 |
9 | with open(readme_file_path, encoding="utf-8") as f:
10 | long_description = "\n" + f.read()
11 |
12 | setup(
13 | name=__title__,
14 | description=__description__,
15 | long_description=long_description,
16 | long_description_content_type="text/markdown",
17 | version=__version__,
18 | author=__author__,
19 | author_email=__author_email__,
20 | license="MIT License",
21 | url=__url__,
22 | download_url=__download_url__,
23 | keywords=["aws", "cloud", "storage", "database", "dynamodb", "s3"],
24 | packages=[__title__],
25 | package_data={__title__: [readme_file_path, "py.typed"]},
26 | install_requires=["boto3", "typeguard<3", "hashy>=0.1.1", "dictim", "appdirs", "tobool", "urllib3", "python-dateutil", "yasf"],
27 | project_urls={"Documentation": "https://awsimple.readthedocs.io/"},
28 | classifiers=[],
29 | python_requires=">3.10",
30 | )
31 |
--------------------------------------------------------------------------------
/test_awsimple/280px-PNG_transparency_demonstration_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesabel/awsimple/8157ed3d9501fcbc56382f148385144ed73a307e/test_awsimple/280px-PNG_transparency_demonstration_1.png
--------------------------------------------------------------------------------
/test_awsimple/__init__.py:
--------------------------------------------------------------------------------
1 | from .const import id_str, test_awsimple_str, never_change_file_name, never_change_file_size
2 | from .tst_paths import temp_dir, cache_dir
3 | from .dict_is_close import dict_is_close
4 | from .sqs_drain import drain
5 |
--------------------------------------------------------------------------------
/test_awsimple/conftest.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pytest
3 | from pathlib import Path
4 | import logging
5 |
6 | from botocore.exceptions import EndpointConnectionError
7 |
8 | from awsimple import is_mock, use_moto_mock_env_var, S3Access, is_using_localstack, dynamodb
9 |
10 | from test_awsimple import test_awsimple_str, temp_dir, cache_dir
11 |
12 | mock_env_var = os.environ.get(use_moto_mock_env_var)
13 |
14 | if mock_env_var is None:
15 | # facilitates CI by using mocking by default
16 | os.environ[use_moto_mock_env_var] = "1"
17 |
18 | # if using non-local pytest, create the credentials and config files dynamically
19 | aws_credentials_and_config_dir = Path(Path.home(), ".aws")
20 | aws_credentials_file = Path(aws_credentials_and_config_dir, "credentials")
21 | aws_config_file = Path(aws_credentials_and_config_dir, "config")
22 | if is_mock():
23 | dynamodb.get_accommodated_clock_skew = lambda: 0.0 # no clock skew for mock (better for CI)
24 | if not aws_credentials_and_config_dir.exists():
25 | aws_credentials_and_config_dir.mkdir(parents=True, exist_ok=True)
26 | if not aws_credentials_file.exists():
27 | credential_strings = [
28 | "[default]\naws_access_key_id=AAAAAAAAAAAAAAAAAAAA\naws_secret_access_key=AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
29 | f"[{test_awsimple_str}]\naws_access_key_id=AAAAAAAAAAAAAAAAAAAA\naws_secret_access_key=AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
30 | ]
31 | aws_credentials_file.write_text("\n".join(credential_strings))
32 | if not aws_config_file.exists():
33 | config_strings = ["[profile default]\nregion=us-west-2", f"[profile {test_awsimple_str}]\nregion=us-west-2"]
34 | aws_config_file.write_text("\n".join(config_strings))
35 | else:
36 | dynamodb.get_accommodated_clock_skew = lambda: 1.0 # faster than the default so tests don't take too much time
37 |
38 |
39 | class TestAWSimpleLoggingHandler(logging.Handler):
40 | def emit(self, record):
41 | print(record.getMessage())
42 | assert False
43 |
44 |
45 | @pytest.fixture(scope="session", autouse=True)
46 | def session_fixture():
47 | temp_dir.mkdir(parents=True, exist_ok=True)
48 | cache_dir.mkdir(parents=True, exist_ok=True)
49 |
50 | # add handler that will throw an assert on ERROR or greater
51 | test_handler = TestAWSimpleLoggingHandler()
52 | test_handler.setLevel(logging.ERROR)
53 | logging.getLogger().addHandler(test_handler)
54 |
55 | print(f"{is_mock()=},{is_using_localstack()=}")
56 |
57 |
58 | @pytest.fixture(scope="module")
59 | def s3_access():
60 | _s3_access = S3Access(profile_name=test_awsimple_str, bucket_name=test_awsimple_str, cache_dir=cache_dir)
61 | return _s3_access
62 |
63 |
64 | @pytest.fixture(scope="session", autouse=True)
65 | def test_localstack():
66 | if is_using_localstack():
67 | # just try anything to see if localstack is running
68 | _s3_access = S3Access(profile_name=test_awsimple_str, bucket_name=test_awsimple_str, cache_dir=cache_dir)
69 | try:
70 | _s3_access.bucket_list()
71 | except EndpointConnectionError:
72 | pytest.exit(f"{is_using_localstack()=} and localstack is not running - please run scripts/start_localstack.bat")
73 |
--------------------------------------------------------------------------------
/test_awsimple/const.py:
--------------------------------------------------------------------------------
1 | id_str = "id"
2 | test_awsimple_str = "testawsimple" # underscores not allowed for S3 buckets
3 |
4 | never_change_file_name = "never_change.txt"
5 | never_change_file_size = 65
6 |
--------------------------------------------------------------------------------
/test_awsimple/dict_is_close.py:
--------------------------------------------------------------------------------
1 | from typing import Union, List
2 | from math import isinf, isnan, nan, inf
3 |
4 | from typeguard import typechecked
5 |
6 | rel_tol_default = 1e-09
7 | abs_tol_default = 0.0
8 |
9 | # todo: put this in PyPI as it's own package. Even though dictdiffer exists this is slightly different ...
10 |
11 |
12 | class ValueDivergence:
13 | @typechecked()
14 | def __init__(self, label: Union[str, None], value):
15 | self.label = label
16 | self.value = value
17 |
18 | def __repr__(self):
19 | v = str(self.value)
20 | if self.label is None:
21 | s = v
22 | else:
23 | s = self.label + ":" + v
24 | return s
25 |
26 | def __eq__(self, other):
27 | return self.label == other.label and self.value == other.value
28 |
29 | def to_sort(self):
30 | if isinstance(self.value, float) or isinstance(self.value, int):
31 | return self.value
32 | else:
33 | return 0.0 # for strings, etc. just use 0.0 to sort
34 |
35 |
36 | class ValueDivergences:
37 | @typechecked()
38 | def __init__(self, max_divergences: int = 10):
39 | self.max_divergences = max_divergences
40 | self.divergences = [] # type: List[ValueDivergence]
41 | self.hit_max_divergences_flag = False
42 |
43 | def __repr__(self):
44 | return self.divergences.__repr__()
45 |
46 | def __len__(self):
47 | return len(self.divergences)
48 |
49 | @typechecked()
50 | def add(self, divergence: ValueDivergence):
51 | if not any([d == divergence for d in self.divergences]):
52 | self.divergences.append(divergence)
53 | self.divergences.sort(key=lambda x: x.to_sort())
54 | if len(self.divergences) > self.max_divergences:
55 | self.divergences.pop()
56 | self.hit_max_divergences_flag = True
57 |
58 | def get(self):
59 | return self.divergences
60 |
61 | def max_value(self):
62 | mv = None
63 | if len(self.divergences) > 0:
64 | mv = self.divergences[-1].value
65 | if not (isinstance(mv, float) or isinstance(mv, int)):
66 | mv = 0.0
67 | return mv
68 |
69 | def max_label(self):
70 | ml = None
71 | if len(self.divergences) > 0:
72 | ml = self.divergences[-1].label
73 | return ml
74 |
75 | def hit_max_divergences(self):
76 | # is max incomplete?
77 | return self.hit_max_divergences_flag
78 |
79 |
80 | class DictIsClose:
81 | """
82 | Like doing x == y for a dict, except if there are floats then use math.isclose()
83 | """
84 |
85 | @typechecked()
86 | def __init__(self, x, y, rel_tol: Union[float, None] = None, abs_tol: Union[float, None] = None, divergences: ValueDivergences = ValueDivergences()):
87 | self._x = x
88 | self._y = y
89 | self._rel_tol = rel_tol
90 | self._abs_tol = abs_tol
91 | self.divergences = divergences
92 | self._is_close_flag = self._dict_is_close(self._x, self._y, self._rel_tol, self._abs_tol, None)
93 |
94 | def __repr__(self):
95 | return self.divergences.__repr__()
96 |
97 | @typechecked()
98 | def _is_close_number(self, a: Union[float, int], b: Union[float, int], rel_tol: float, abs_tol: float, value_label: Union[str, None]):
99 | """
100 | similar to math.isclose() except is keeps track of which values have the greatest difference
101 | :param a: first input
102 | :param b: second input
103 | :param rel_tol: relative tolerance
104 | :param abs_tol: absolute tolerance
105 | :return:
106 | """
107 |
108 | # handle NaN, INF. Matches math.isclose() .
109 | divergence_value = 0.0
110 | if isnan(a) or isnan(b):
111 | is_close_flag = False
112 | divergence_value = nan
113 | elif isinf(a) and isinf(b):
114 | is_close_flag = a == b # handles both +INF and -INF
115 | if not is_close_flag:
116 | divergence_value = inf
117 | elif isinf(a) or isinf(b):
118 | is_close_flag = False # only one or the other is (positive or negative) infinity
119 | divergence_value = inf
120 | elif isinf(rel_tol) or isinf(abs_tol):
121 | is_close_flag = True
122 | else:
123 | # is_close_flag is same as:
124 | # abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)
125 | divergence_value = abs(a - b) - max(rel_tol * max(abs(a), abs(b)), abs_tol) # if > 0.0, values are *not* close
126 | is_close_flag = divergence_value <= 0.0
127 |
128 | if not is_close_flag and divergence_value is not None and (self.divergences.max_value() is None or divergence_value > self.divergences.max_value()):
129 | self.divergences.add(ValueDivergence(value_label, divergence_value))
130 |
131 | return is_close_flag
132 |
133 | @typechecked()
134 | def _dict_is_close(self, x, y, rel_tol: Union[float, None], abs_tol: Union[float, None], parent_label: Union[str, None]):
135 | if rel_tol is None or isnan(rel_tol):
136 | rel_tol = rel_tol_default
137 | if abs_tol is None:
138 | abs_tol = abs_tol_default
139 |
140 | if (isinstance(x, float) or isinstance(x, int)) and (isinstance(y, float) or isinstance(y, int)):
141 | is_close_flag = self._is_close_number(x, y, rel_tol, abs_tol, parent_label)
142 | elif isinstance(x, dict) and isinstance(y, dict):
143 | is_close_flags = []
144 | if set(x.keys()) == set(y.keys()):
145 | for k in x:
146 | # keys can be things other than strings, e.g. int
147 | str_k = str(k)
148 | if parent_label is None:
149 | label = str_k
150 | else:
151 | label = parent_label + "." + str_k
152 |
153 | is_close_flag = self._dict_is_close(x[k], y[k], rel_tol, abs_tol, label)
154 | is_close_flags.append(is_close_flag)
155 | is_close_flag = all(is_close_flags)
156 | else:
157 | is_close_flag = x == y # everything else that can be evaluated with == such as strings
158 | if not is_close_flag:
159 | self.divergences.add(ValueDivergence(parent_label, str(x)))
160 |
161 | return is_close_flag
162 |
163 | def is_close(self):
164 | return self._is_close_flag
165 |
166 |
167 | @typechecked()
168 | def dict_is_close(x, y, rel_tol: Union[float, None] = None, abs_tol: Union[float, None] = None):
169 | """
170 |
171 | Like doing x == y for a dict, except if there are floats then use math.isclose()
172 |
173 | :param x: input x
174 | :param y: input y
175 | :param rel_tol: relative tolerance to pass to math.close
176 | :param abs_tol: absolute tolerance to pass to math.close
177 | :return: True if dictionaries match and float values are close
178 |
179 | """
180 | return DictIsClose(x, y, rel_tol, abs_tol).is_close()
181 |
--------------------------------------------------------------------------------
/test_awsimple/sqs_drain.py:
--------------------------------------------------------------------------------
1 | import time
2 | from pprint import pprint
3 |
4 | from awsimple import SQSAccess
5 |
6 | from test_awsimple import test_awsimple_str
7 |
8 |
9 | def drain():
10 | # drain existing messages
11 | q = SQSAccess(test_awsimple_str, profile_name=test_awsimple_str)
12 | q.create_queue() # just in case it doesn't exist
13 | while len(messages := q.receive_messages()) > 0:
14 | print("existing:")
15 | pprint(messages)
16 | time.sleep(0.1)
17 | print()
18 |
--------------------------------------------------------------------------------
/test_awsimple/test_aws_test.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from botocore.exceptions import ProfileNotFound
4 |
5 | from awsimple import AWSAccess, S3Access, DynamoDBAccess, SQSAccess, is_mock
6 |
7 | from test_awsimple import test_awsimple_str
8 |
9 |
10 | def test_aws_test():
11 | # test the test() method (basic AWS connection)
12 |
13 | # these should work
14 | if not is_mock():
15 | assert AWSAccess(profile_name=test_awsimple_str).test()
16 | assert S3Access(test_awsimple_str, profile_name=test_awsimple_str).test()
17 | assert DynamoDBAccess(test_awsimple_str, profile_name=test_awsimple_str).test()
18 | assert SQSAccess(test_awsimple_str, profile_name=test_awsimple_str).test()
19 |
20 | if not is_mock():
21 | # this (non-existent) profile doesn't have access at all
22 | with pytest.raises(ProfileNotFound):
23 | AWSAccess(profile_name="IAmNotAProfile").test()
24 |
--------------------------------------------------------------------------------
/test_awsimple/test_c_dynamodb_create_table.py:
--------------------------------------------------------------------------------
1 | from pprint import pprint
2 |
3 | from awsimple import DynamoDBAccess
4 | from test_awsimple import test_awsimple_str
5 |
6 |
7 | def test_dynamodb_create_table():
8 | table_name = f"{test_awsimple_str}temp"
9 |
10 | dynamodb_access = DynamoDBAccess(table_name, profile_name=test_awsimple_str)
11 |
12 | dynamodb_access.create_table("id")
13 | assert dynamodb_access.table_exists() # create_table has a waiter so the table should exist at this point
14 |
15 | dynamodb_access.put_item({"id": "me", "value": 1})
16 |
17 | table_data = dynamodb_access.scan_table_cached()
18 | pprint(table_data)
19 | assert table_data[0]["id"] == "me"
20 | assert table_data[0]["value"] == 1
21 | assert len(table_data) == 1
22 | assert len(dynamodb_access.scan_table_cached(invalidate_cache=True)) == 1
23 |
24 | dynamodb_access.delete_table()
25 | assert not dynamodb_access.delete_table() # delete_table has a waiter so the table should exist at this point
26 |
--------------------------------------------------------------------------------
/test_awsimple/test_dynamodb.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import decimal
4 | from collections import OrderedDict, defaultdict
5 | import math
6 | import datetime
7 | from datetime import timedelta, timezone
8 | import pickle
9 | from pathlib import Path
10 | import time
11 |
12 | from PIL import Image
13 | from ismain import is_main
14 | from dictim import dictim
15 |
16 | from awsimple import dict_to_dynamodb, DynamoDBAccess, is_mock, is_using_localstack, KeyType
17 | from awsimple.dynamodb import get_accommodated_clock_skew
18 | from test_awsimple import dict_is_close, test_awsimple_str, id_str
19 |
20 | dict_id = "test"
21 |
22 | # source:
23 | # https://en.wikipedia.org/wiki/Portable_Network_Graphics
24 | # https://en.wikipedia.org/wiki/File:PNG_transparency_demonstration_1.png
25 | png_image = Image.open(os.path.join("test_awsimple", "280px-PNG_transparency_demonstration_1.png"))
26 |
27 | od = OrderedDict()
28 | od["a"] = 1
29 | od["b"] = 2
30 |
31 | dd = defaultdict(int)
32 | dd[1] = 2
33 |
34 | sample_input = {
35 | id_str: dict_id,
36 | "sample1": "Test Data",
37 | "sample2": 2.0,
38 | "sample3": True,
39 | "sample4": int(1),
40 | "sample5": None,
41 | "sample6": {"test": True},
42 | "sample7": ["Hello", "World"],
43 | "sample8": [9, 10],
44 | "od": od,
45 | "dd": dd,
46 | "DecimalInt": decimal.Decimal(42),
47 | "DecimalFloat": decimal.Decimal(2.0) / decimal.Decimal(3.0),
48 | "a_tuple": (1, 2, 3),
49 | 42: "my_key_is_an_int",
50 | "difficult_floats": [math.pi, math.e, 0.6],
51 | "difficult_ints": [sys.maxsize],
52 | "image": png_image,
53 | "test_date_time": datetime.datetime.fromtimestamp(1559679535, tz=timezone.utc), # 2019-06-04T20:18:55+00:00
54 | "zero_len_string": "",
55 | "dictim": dictim({"HI": dictim({"there": 1})}), # nested
56 | }
57 |
58 |
59 | def check_table_contents(contents):
60 | with open(os.path.join("cache", f"{test_awsimple_str}.pickle"), "rb") as f:
61 | assert dict_is_close(sample_input, contents[0])
62 | assert dict_is_close(sample_input, pickle.load(f)[0])
63 |
64 |
65 | def test_get_table_names():
66 | if is_mock() or is_using_localstack():
67 | dynamodb_access = DynamoDBAccess(test_awsimple_str, profile_name=test_awsimple_str) # for mock we have to make the table
68 | dynamodb_access.create_table(id_str) # have to create the table on the fly for mocking
69 | else:
70 | dynamodb_access = DynamoDBAccess(profile_name=test_awsimple_str) # since we're only going to get the existing table names, we don't have to provide a table name
71 | dynamodb_tables = dynamodb_access.get_table_names()
72 | print(dynamodb_tables)
73 | assert len(dynamodb_tables) > 0
74 | assert test_awsimple_str in dynamodb_tables
75 |
76 |
77 | def test_dynamodb():
78 | dynamodb_dict = dict_to_dynamodb(sample_input)
79 |
80 | assert dynamodb_dict["sample1"] == "Test Data"
81 | assert math.isclose(float(dynamodb_dict["sample2"]), decimal.Decimal(2.0))
82 | assert dynamodb_dict["sample3"] is True
83 | assert dynamodb_dict["sample5"] is None
84 | assert dynamodb_dict["sample6"] == {"test": True}
85 | assert dynamodb_dict["sample7"] == ["Hello", "World"]
86 | assert dynamodb_dict["sample8"] == [decimal.Decimal(9), decimal.Decimal(10)]
87 | assert dynamodb_dict["DecimalInt"] == decimal.Decimal(42)
88 | assert dynamodb_dict["DecimalFloat"] == decimal.Decimal(2.0) / decimal.Decimal(3.0)
89 | assert dynamodb_dict["a_tuple"] == [1, 2, 3]
90 | assert dynamodb_dict["42"] == "my_key_is_an_int" # test conversion of an int key to a string
91 | assert dynamodb_dict["test_date_time"] == "2019-06-04T20:18:55+00:00"
92 | assert dynamodb_dict["zero_len_string"] is None
93 |
94 | # while dictim is case-insensitive, when we convert to dict for DynamoDB it becomes case-sensitive
95 | assert list(dynamodb_dict["dictim"]["HI"])[0] == "there"
96 | assert dynamodb_dict["dictim"]["HI"]["there"] == 1 # actually Decimal(1)
97 | assert dynamodb_dict["dictim"].get("hi") is None # we're back to case sensitivity
98 |
99 | # start with a cache life of 1 second to ensure there is no cache hit
100 | dynamodb_access = DynamoDBAccess(profile_name=test_awsimple_str, table_name=test_awsimple_str, cache_dir=Path("cache"), cache_life=timedelta(seconds=1).total_seconds())
101 | dynamodb_access.create_table(id_str)
102 | dynamodb_access.put_item(dynamodb_dict)
103 | time.sleep(get_accommodated_clock_skew())
104 |
105 | sample_from_db = dynamodb_access.get_item(id_str, dict_id)
106 | assert sample_from_db == dynamodb_dict # make sure we get back exactly what we wrote
107 |
108 | table_contents = dynamodb_access.scan_table_cached()
109 | assert not dynamodb_access.cache_hit
110 | check_table_contents(table_contents)
111 |
112 | table_contents = dynamodb_access.scan_table()
113 | check_table_contents(table_contents)
114 |
115 | if is_using_localstack():
116 | dynamodb_access.cache_life = 600.0 # localstack can take a while ...
117 | table_contents = dynamodb_access.scan_table_cached()
118 | assert dynamodb_access.cache_hit
119 | check_table_contents(table_contents)
120 |
121 | assert dynamodb_access.get_primary_keys_dict() == {KeyType.partition: id_str}
122 |
123 |
124 | if is_main():
125 | test_dynamodb()
126 |
--------------------------------------------------------------------------------
/test_awsimple/test_dynamodb_delete.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from awsimple import DynamoDBAccess, DBItemNotFound
4 |
5 | from test_awsimple import test_awsimple_str, id_str
6 |
7 |
8 | def test_dynamodb_delete():
9 | dynamodb_access = DynamoDBAccess(profile_name=test_awsimple_str, table_name=test_awsimple_str)
10 | dynamodb_access.create_table(id_str)
11 | test_id = "deleter"
12 | item_value = {id_str: test_id, "color": "blue"}
13 | dynamodb_access.put_item(item_value)
14 | assert dynamodb_access.get_item(id_str, test_id) == item_value # check that it's set
15 | dynamodb_access.delete_item(id_str, test_id)
16 | with pytest.raises(DBItemNotFound):
17 | print(dynamodb_access.get_item(id_str, test_id)) # check that it's deleted
18 |
--------------------------------------------------------------------------------
/test_awsimple/test_dynamodb_delete_all_items.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 | from awsimple import dict_to_dynamodb, DynamoDBAccess, is_mock
4 |
5 | from test_awsimple import id_str, test_awsimple_str
6 |
7 |
8 | def test_dynamodb_delete_all_items():
9 | table_name = "awsimple-delete-test" # this test is the only thing we'll use this table for
10 |
11 | dynamodb_access = DynamoDBAccess(table_name, profile_name=test_awsimple_str)
12 | dynamodb_access.create_table(id_str)
13 | dynamodb_access.put_item(dict_to_dynamodb({id_str: "me", "answer": 42}))
14 | dynamodb_access.put_item(dict_to_dynamodb({id_str: "you", "question": 0}))
15 | while len(table_contents := dynamodb_access.scan_table()) != 2:
16 | print(f"waiting for the put ...{table_contents}")
17 | time.sleep(1) # DynamoDB is "eventually consistent"
18 | rows_deleted = dynamodb_access.delete_all_items()
19 | assert rows_deleted == 2
20 | while len(table_contents := dynamodb_access.scan_table()) != 0:
21 | print(f"waiting for the delete all items ...{table_contents}")
22 | time.sleep(1) # DynamoDB is "eventually consistent"
23 |
--------------------------------------------------------------------------------
/test_awsimple/test_dynamodb_get_item.py:
--------------------------------------------------------------------------------
1 | from awsimple import DynamoDBAccess
2 |
3 | from test_awsimple import test_awsimple_str, id_str
4 |
5 |
6 | def test_dynamodb_get_item():
7 | test_id = "test_id"
8 | dynamodb_access = DynamoDBAccess(profile_name=test_awsimple_str, table_name=test_awsimple_str)
9 | dynamodb_access.create_table(id_str)
10 | dynamodb_access.delete_item(id_str, test_id) # make sure the item doesn't exist
11 |
12 | item_value = {id_str: test_id, "color": "blue"}
13 | dynamodb_access.upsert_item(id_str, test_id, item={"color": "blue"}) # insert
14 | assert dynamodb_access.get_item(id_str, test_id) == item_value # check that it's set
15 | assert dynamodb_access.get_item(partition_value=test_id) == item_value # check that it's set
16 |
--------------------------------------------------------------------------------
/test_awsimple/test_dynamodb_item_not_found.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from awsimple import DynamoDBAccess, DBItemNotFound
4 |
5 | from test_awsimple import test_awsimple_str, id_str
6 |
7 |
8 | def test_dynamodb_item_not_found():
9 | dynamodb_access = DynamoDBAccess(test_awsimple_str, profile_name=test_awsimple_str)
10 | dynamodb_access.create_table(id_str)
11 | with pytest.raises(DBItemNotFound):
12 | dynamodb_access.get_item(id_str, "I will never ever exist")
13 |
--------------------------------------------------------------------------------
/test_awsimple/test_dynamodb_miv_ui.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 | from awsimple import DynamoDBMIVUI, miv_string, get_time_us, miv_us_to_timestamp
4 |
5 |
6 | def test_dynamodb_miv_ui():
7 | test_name = "test_dynamodb_miv_ui"
8 | primary_partition_key = "id"
9 | id_value = "me"
10 | input_data = {primary_partition_key: id_value}
11 |
12 | dynamodb_miv_ui = DynamoDBMIVUI(test_name)
13 | dynamodb_miv_ui.create_table(primary_partition_key) # use default of str
14 | dynamodb_miv_ui.put_item(input_data)
15 | dynamodb_miv_ui.put_item(input_data)
16 | output_data = dynamodb_miv_ui.get_most_senior_item(primary_partition_key, id_value)
17 | print(output_data)
18 | assert output_data[primary_partition_key] == id_value
19 | miv_value = output_data[miv_string]
20 | assert miv_value <= get_time_us() # basic check for miv value
21 | difference = time.time() - miv_us_to_timestamp(miv_value)
22 | print(f"{difference=} seconds")
23 | assert 0 < difference < 100 # check that we can convert the MIV back to time in seconds since epoch
24 |
--------------------------------------------------------------------------------
/test_awsimple/test_dynamodb_primary_key_as_number.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from pprint import pprint
3 |
4 | from awsimple import DynamoDBAccess, dynamodb_to_dict
5 | from ismain import is_main
6 |
7 | from test_awsimple import test_awsimple_str
8 |
9 |
10 | def test_dynamodb_sort_as_number():
11 | dynamodb_access = DynamoDBAccess(profile_name=test_awsimple_str, table_name=f"{test_awsimple_str}_sort_as_number", cache_dir=Path("cache"))
12 | dynamodb_access.create_table("id", "year", sort_key_type=int) # sort key as number
13 | input_item = {"id": "me", "year": 1999, "out_of_time": False}
14 | dynamodb_access.put_item(input_item)
15 | item = dynamodb_access.get_item("id", "me", "year", 1999)
16 | output_item = dynamodb_to_dict(item)
17 | pprint(item)
18 | assert input_item == output_item
19 | dynamodb_access.delete_table()
20 |
21 |
22 | def test_dynamodb_partition_as_number():
23 | dynamodb_access = DynamoDBAccess(profile_name=test_awsimple_str, table_name=f"{test_awsimple_str}_partition_as_number", cache_dir=Path("cache"))
24 | dynamodb_access.create_table("year", "id", partition_key_type=int) # partition key as number
25 | input_item = {"id": "me", "year": 1999, "out_of_time": False}
26 | dynamodb_access.put_item(input_item)
27 | item = dynamodb_access.get_item("id", "me", "year", 1999)
28 | pprint(item)
29 | assert input_item == dynamodb_to_dict(item)
30 |
31 | item = dynamodb_access.query("year", 1999)[0] # only use the partition key (no sort key)
32 | pprint(item)
33 | assert input_item == dynamodb_to_dict(item)
34 |
35 | dynamodb_access.delete_table()
36 |
37 |
38 | if is_main():
39 | test_dynamodb_sort_as_number()
40 |
--------------------------------------------------------------------------------
/test_awsimple/test_dynamodb_query.py:
--------------------------------------------------------------------------------
1 | from ismain import is_main
2 |
3 | from awsimple import DynamoDBAccess, QuerySelection
4 |
5 | from test_awsimple import test_awsimple_str
6 |
7 |
8 | def test_dynamodb_query():
9 | table_name = "testawsimpleps" # ps = both partition and sort
10 |
11 | dynamodb_access = DynamoDBAccess(profile_name=test_awsimple_str, table_name=table_name)
12 | dynamodb_access.create_table("id", "name")
13 |
14 | # three entries for "me"
15 | dynamodb_access.put_item({"id": "me", "name": "james", "answer": 13}) # this will be the "first" one
16 | dynamodb_access.put_item({"id": "me", "name": "james abel", "answer": 1})
17 | dynamodb_access.put_item({"id": "me", "name": "zzz", "answer": 99}) # this will be the "last" one
18 |
19 | dynamodb_access.put_item({"id": "notme", "name": "notjames", "answer": 42})
20 |
21 | response = dynamodb_access.query("id", "me") # partition only
22 | assert len(response) == 3
23 |
24 | response = dynamodb_access.query("id", "me", "name", "james") # partition and sort
25 | assert len(response) == 1
26 |
27 | response = dynamodb_access.query_begins_with("id", "me", "name", "james a") # begins with
28 | assert len(response) == 1
29 | response = dynamodb_access.query_begins_with("id", "me", "name", "jame")
30 | assert len(response) == 2
31 |
32 | response = dynamodb_access.query("id", "idonotexist") # does not exist
33 | assert len(response) == 0
34 |
35 | response = dynamodb_access.query_one("id", "me", QuerySelection.highest)
36 | assert response["answer"] == 99
37 | assert response["name"] == "zzz" # the "last" entry, as sorted by sort key
38 |
39 | response = dynamodb_access.query_one("id", "me", QuerySelection.lowest)
40 | assert response["answer"] == 13
41 | assert response["name"] == "james" # the "first" entry, as sorted by sort key
42 |
43 | response = dynamodb_access.query_one("id", "idonotexist", QuerySelection.lowest)
44 | assert response is None
45 |
46 |
47 | if is_main():
48 | test_dynamodb_query()
49 |
--------------------------------------------------------------------------------
/test_awsimple/test_dynamodb_query_kwargs.py:
--------------------------------------------------------------------------------
1 | from ismain import is_main
2 |
3 | from awsimple import DynamoDBAccess, QuerySelection
4 |
5 | from test_awsimple import test_awsimple_str
6 |
7 |
8 | def test_dynamodb_query_kwargs():
9 | table_name = "testawsimpleps" # ps = both partition and sort
10 |
11 | dynamodb_access = DynamoDBAccess(profile_name=test_awsimple_str, table_name=table_name)
12 | dynamodb_access.create_table("id", "name")
13 |
14 | # three entries for "me"
15 | dynamodb_access.put_item({"id": "me", "name": "james", "answer": 13}) # this will be the "first" one
16 | dynamodb_access.put_item({"id": "me", "name": "james abel", "answer": 1})
17 | dynamodb_access.put_item({"id": "me", "name": "zzz", "answer": 99}) # this will be the "last" one
18 |
19 | dynamodb_access.put_item({"id": "notme", "name": "notjames", "answer": 42})
20 |
21 | response = dynamodb_access.query(id="me") # partition only
22 | assert len(response) == 3
23 |
24 | response = dynamodb_access.query(id="me", name="james") # partition and sort
25 | assert len(response) == 1
26 |
27 | response = dynamodb_access.query_begins_with(id="me", name="james a") # begins with
28 | assert len(response) == 1
29 | response = dynamodb_access.query_begins_with(id="me", name="jame")
30 | assert len(response) == 2
31 |
32 | response = dynamodb_access.query(id="idonotexist") # does not exist
33 | assert len(response) == 0
34 |
35 | response = dynamodb_access.query_one(partition_value="me") # highest is default
36 | assert response["answer"] == 99
37 | assert response["name"] == "zzz" # the "last" entry, as sorted by sort key
38 |
39 | response = dynamodb_access.query_one(partition_value="me", direction=QuerySelection.highest)
40 | assert response["answer"] == 99
41 | assert response["name"] == "zzz" # the "last" entry, as sorted by sort key
42 |
43 | response = dynamodb_access.query_one(partition_value="me", direction=QuerySelection.lowest)
44 | assert response["answer"] == 13
45 | assert response["name"] == "james" # the "first" entry, as sorted by sort key
46 |
47 | response = dynamodb_access.query_one(partition_value="idonotexist", direction=QuerySelection.lowest)
48 | assert response is None
49 |
50 |
51 | if is_main():
52 | test_dynamodb_query_kwargs()
53 |
--------------------------------------------------------------------------------
/test_awsimple/test_dynamodb_scan_cache.py:
--------------------------------------------------------------------------------
1 | def test_dynamodb_scan_cache_cache_life():
2 | """
3 | Test that we can properly do a DynamoDB cached scan using the cache life.
4 | """
5 | ...
6 |
7 |
8 | def test_dynamodb_scan_cache_mtime():
9 | """
10 | Test that we can properly do a DynamoDB cached scan using the mtime (metadata).
11 | """
12 | ...
13 |
--------------------------------------------------------------------------------
/test_awsimple/test_dynamodb_scan_table_as_dict.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from datetime import timedelta
3 | from decimal import Decimal
4 | import time
5 |
6 | from awsimple import DynamoDBAccess
7 | from awsimple.dynamodb import get_accommodated_clock_skew
8 | from test_awsimple import test_awsimple_str, id_str
9 |
10 |
11 | def check_scan_table(table_contents: dict, expected_contents: dict):
12 | keys = list(table_contents.keys())
13 | # for real AWS I may have other things in this table
14 | assert "a" in keys
15 | assert "b" in keys
16 | assert "c" in keys
17 | # check sort
18 | for key_index in range(0, len(keys) - 1):
19 | assert keys[key_index + 1] > keys[key_index]
20 | # only test for what we just put in - there may be other rows in the table in the real AWS
21 | for k, v in expected_contents.items():
22 | assert table_contents[k] == v
23 |
24 |
25 | def test_dynamodb_scan_table_as_dict():
26 | dynamodb_access = DynamoDBAccess(profile_name=test_awsimple_str, table_name=test_awsimple_str, cache_dir=Path("cache"), cache_life=timedelta(seconds=10).total_seconds())
27 | dynamodb_access.create_table(id_str)
28 | dynamodb_access.put_item({id_str: "b", "value": 1}) # will be sorted in a different order than we're inputting
29 | dynamodb_access.put_item({id_str: "c", "value": 3})
30 | dynamodb_access.put_item({id_str: "a", "value": 2})
31 | time.sleep(get_accommodated_clock_skew())
32 |
33 | expected_contents = {"a": {"id": "a", "value": Decimal("2")}, "b": {"id": "b", "value": Decimal("1")}, "c": {"id": "c", "value": Decimal("3")}}
34 | table_contents = dynamodb_access.scan_table_as_dict()
35 | check_scan_table(table_contents, expected_contents)
36 |
37 | table_contents = dynamodb_access.scan_table_cached_as_dict()
38 | check_scan_table(table_contents, expected_contents)
39 |
40 | table_contents = dynamodb_access.scan_table_cached_as_dict()
41 | assert dynamodb_access.cache_hit
42 | check_scan_table(table_contents, expected_contents)
43 |
44 | table_contents = dynamodb_access.scan_table_cached_as_dict(sort_key=lambda x: x[id_str]) # test sort_key
45 | check_scan_table(table_contents, expected_contents)
46 |
--------------------------------------------------------------------------------
/test_awsimple/test_dynamodb_secondary_index.py:
--------------------------------------------------------------------------------
1 | from awsimple import DynamoDBAccess, DictKey
2 | from copy import deepcopy
3 |
4 | from test_awsimple import test_awsimple_str, id_str
5 |
6 |
7 | def test_dynamodb_secondary_index():
8 | table_name = f"{test_awsimple_str}2"
9 | table = DynamoDBAccess(table_name)
10 |
11 | sort_key = "id2"
12 | secondary_index = "id3"
13 | table.create_table(id_str, sort_key, secondary_index)
14 |
15 | item = {id_str: "me", sort_key: "myself", secondary_index: "i"}
16 | table.put_item(item)
17 |
18 | item2 = deepcopy(item)
19 | item2[sort_key] = "moi même" # also test unicode!
20 | item2[secondary_index] = "je"
21 | table.put_item(item2)
22 |
23 | query_results = table.query(id_str, "me")
24 | print(f"{query_results=}")
25 | assert len(query_results) == 2 # just the partition key should provide us with both rows
26 |
27 | # with (only) the secondary index (in DynamoDB you can't mix primary and secondary indexes)
28 | assert table.query(secondary_index, "je") == [item2]
29 | assert table.query(id3="je") == [item2]
30 |
31 | expected_contents = {
32 | DictKey(partition="me", sort="moi même"): {"id": "me", "id2": "moi même", "id3": "je"},
33 | DictKey(partition="me", sort="myself"): {"id": "me", "id2": "myself", "id3": "i"},
34 | }
35 | contents = table.scan_table_cached_as_dict()
36 | assert contents == expected_contents
37 | assert list(contents.keys()) == [DictKey(partition="me", sort="moi même"), DictKey(partition="me", sort="myself")]
38 |
39 | table.delete_table()
40 |
41 |
42 | def test_dynamodb_secondary_index_int():
43 | table_name = f"{test_awsimple_str}3"
44 | table = DynamoDBAccess(table_name)
45 |
46 | sort_key = "id2"
47 | secondary_index = "num"
48 | table.create_table(id_str, sort_key, secondary_index, secondary_key_type=int) # secondary index as an int
49 |
50 | table.put_item({id_str: "me", sort_key: "myself", secondary_index: 1})
51 | table.put_item({id_str: "me", sort_key: "moi", secondary_index: 2})
52 |
53 | query_results = table.query(id_str, "me")
54 | print(f"{query_results=}")
55 | assert len(query_results) == 2 # just the partition key should provide us with both rows
56 | table.delete_table()
57 |
--------------------------------------------------------------------------------
/test_awsimple/test_dynamodb_table_not_found.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from awsimple import DynamoDBAccess, DynamoDBTableNotFound
4 |
5 | from test_awsimple import test_awsimple_str
6 |
7 |
8 | def test_dynamodb_table_not_found_put_item():
9 | with pytest.raises(DynamoDBTableNotFound):
10 | dynamodb_access = DynamoDBAccess("does_not_exist", profile_name=test_awsimple_str)
11 | dynamodb_access.put_item(item={}) # table won't exist
12 |
13 |
14 | def test_dynamodb_table_not_found_upsert_item():
15 | with pytest.raises(DynamoDBTableNotFound):
16 | dynamodb_access = DynamoDBAccess("does_not_exist", profile_name=test_awsimple_str)
17 | dynamodb_access.upsert_item(item={}) # table won't exist
18 |
19 |
20 | def test_dynamodb_table_not_found_get_item():
21 | with pytest.raises(DynamoDBTableNotFound):
22 | dynamodb_access = DynamoDBAccess("does_not_exist", profile_name=test_awsimple_str)
23 | dynamodb_access.get_item("dummy", "dummy") # table won't exist
24 |
--------------------------------------------------------------------------------
/test_awsimple/test_dynamodb_upsert.py:
--------------------------------------------------------------------------------
1 | from awsimple import DynamoDBAccess
2 |
3 | from test_awsimple import test_awsimple_str, id_str
4 |
5 |
6 | def test_dynamodb_upsert():
7 | dynamodb_access = DynamoDBAccess(profile_name=test_awsimple_str, table_name=test_awsimple_str)
8 | dynamodb_access.create_table(id_str)
9 | test_id = "upserter"
10 | dynamodb_access.delete_item(id_str, test_id) # make sure the item doesn't exist
11 |
12 | item_value = {id_str: test_id, "color": "blue"}
13 | dynamodb_access.upsert_item(id_str, test_id, item={"color": "blue"}) # insert
14 | assert dynamodb_access.get_item(id_str, test_id) == item_value # check that it's set
15 |
16 | item_value["my_size"] = 9
17 | dynamodb_access.upsert_item(id_str, test_id, item={"my_size": 9}) # update with new data
18 | assert dynamodb_access.get_item(id_str, test_id) == item_value # check that it's set to the new value
19 |
20 | item_value["my_size"] = 10
21 | dynamodb_access.upsert_item(id_str, test_id, item={"my_size": 10}) # update existing data
22 | assert dynamodb_access.get_item(id_str, test_id) == item_value # check that it's set to the new value
23 |
--------------------------------------------------------------------------------
/test_awsimple/test_get_account_id.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from ismain import is_main
4 |
5 | from awsimple import AWSAccess
6 |
7 |
8 | def test_get_account_id():
9 |
10 | with pytest.raises(NotImplementedError):
11 | aws_access = AWSAccess()
12 | account_id = aws_access.get_account_id()
13 | assert len(account_id) >= 12 # currently all account IDs are 12 numeric digits, but allow for them to increase in size (but still be only digits)
14 | assert account_id.isdigit()
15 | print(account_id)
16 |
17 |
18 | if is_main():
19 | test_get_account_id()
20 |
--------------------------------------------------------------------------------
/test_awsimple/test_get_configuration_information.py:
--------------------------------------------------------------------------------
1 | from awsimple import AWSAccess, is_mock
2 |
3 | from test_awsimple import test_awsimple_str
4 |
5 |
6 | def test_get_access_key():
7 | if not is_mock():
8 | # todo: get this to work with mocking
9 | access_key = AWSAccess(profile_name=test_awsimple_str).get_access_key()
10 | print(f"{access_key=}")
11 | print(f"{len(access_key)=}")
12 | # https://docs.aws.amazon.com/IAM/latest/APIReference/API_AccessKey.html
13 | assert len(access_key) >= 16 # as of this writing, the access key length was 20
14 |
15 |
16 | def test_get_region():
17 | if not is_mock():
18 | # todo: get this to work with mocking
19 | region = AWSAccess(profile_name=test_awsimple_str).get_region()
20 | print(f"{region=}")
21 | print(f"{len(region)=}")
22 | assert len(region) >= 5 # make sure we get back something
23 |
--------------------------------------------------------------------------------
/test_awsimple/test_logs.py:
--------------------------------------------------------------------------------
1 | from awsimple import LogsAccess
2 |
3 | from test_awsimple import test_awsimple_str
4 |
5 |
6 | def test_logs():
7 | logs_access = LogsAccess(test_awsimple_str)
8 | logs_access.put("my first log test")
9 | logs_access.put("my second log test")
10 |
11 | logs_access = LogsAccess(test_awsimple_str)
12 | logs_access.put("my third log test")
13 | logs_access.put("my forth log test")
14 |
--------------------------------------------------------------------------------
/test_awsimple/test_lru_cache_helpers.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | from awsimple import get_disk_free, get_directory_size, is_mock
4 |
5 |
6 | def test_disk_free():
7 | free = get_disk_free()
8 | print(f"{free=:,}")
9 | assert free > 1e9 # assume we have some reasonable amount free
10 |
11 |
12 | def test_get_directory_size():
13 | venv = Path("venv")
14 | if venv.exists():
15 | # doesn't work with Linux CI
16 | size = get_directory_size(venv) # just use the venv as something that's relatively large and multiple directory levels
17 | print(f"{size=:,}")
18 | assert size >= 50000000 # 94,302,709 on 8/21/20, so assume it's not going to get a lot smaller
19 |
--------------------------------------------------------------------------------
/test_awsimple/test_mock.py:
--------------------------------------------------------------------------------
1 | from awsimple import is_mock, S3Access
2 |
3 | from test_awsimple import test_awsimple_str
4 |
5 |
6 | def test_mock():
7 | s3_access = S3Access(test_awsimple_str)
8 | assert is_mock() == s3_access.is_mocked() # make sure that the AWSAccess instance is actually using mocking
9 |
--------------------------------------------------------------------------------
/test_awsimple/test_most_recent_error.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 | from awsimple import SQSAccess, is_mock, is_using_localstack
4 |
5 | from test_awsimple import test_awsimple_str, drain
6 | from pytest_socket import disable_socket, enable_socket
7 |
8 |
9 | def test_most_recent_error():
10 | message_contents = "hi"
11 |
12 | drain()
13 |
14 | queue = SQSAccess(test_awsimple_str)
15 | queue.create_queue()
16 | queue.send(message_contents)
17 |
18 | if not is_mock():
19 | # emulate a short internet disruption
20 | disable_socket()
21 |
22 | time.sleep(3)
23 | message = queue.receive_message()
24 | if not is_mock() and not is_using_localstack():
25 | # doesn't work with moto nor localstack :(
26 | assert message.message == message_contents
27 |
28 | if not is_mock():
29 | enable_socket()
30 |
31 | if is_mock():
32 | assert queue.most_recent_error is None
33 | else:
34 | print(f"{queue.most_recent_error=}") # disable_socket() doesn't seem to work for this case - somehow we get the message anyway
35 |
36 | drain()
37 |
--------------------------------------------------------------------------------
/test_awsimple/test_s3_bucket.py:
--------------------------------------------------------------------------------
1 | import time
2 | from logging import getLogger
3 |
4 | import pytest
5 | from awsimple import S3Access, BucketNotFound
6 |
7 | from test_awsimple import test_awsimple_str
8 |
9 | test_bucket_name = f"{test_awsimple_str}temp" # temp bucket that will be created and deleted
10 |
11 | log = getLogger(__name__)
12 |
13 |
14 | def test_s3_bucket():
15 | s3_access = S3Access(test_bucket_name, profile_name=test_awsimple_str) # use non-keyword parameter for bucket_name
16 | created = s3_access.create_bucket() # may already exist
17 | log.info(f"{created=}")
18 |
19 | # wait for bucket to exist
20 | timeout_count = 100
21 | while not (bucket_exists := s3_access.bucket_exists()) and timeout_count > 0:
22 | time.sleep(3)
23 | timeout_count -= 1
24 |
25 | log.info(f"{bucket_exists=}")
26 |
27 | assert s3_access.bucket_exists()
28 |
29 | assert not s3_access.create_bucket() # already exists
30 | assert s3_access.delete_bucket()
31 |
32 | # wait for bucket to get deleted
33 | timeout_count = 100
34 | while s3_access.bucket_exists() and timeout_count > 0:
35 | time.sleep(3) # wait for bucket to exist
36 | timeout_count -= 1
37 |
38 | assert not s3_access.bucket_exists()
39 | assert not s3_access.delete_bucket() # was nothing to delete
40 |
41 |
42 | def test_s3_bucket_not_found():
43 | with pytest.raises(BucketNotFound):
44 | s3_access = S3Access("IDoNotExist")
45 | s3_access.dir()
46 |
--------------------------------------------------------------------------------
/test_awsimple/test_s3_bucket_not_found.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from awsimple import S3Access, BucketNotFound
4 |
5 | from test_awsimple import test_awsimple_str
6 |
7 |
8 | def test_s3_bucket_not_found():
9 | s3_access = S3Access(profile_name=test_awsimple_str, bucket_name="doesnotexist")
10 | with pytest.raises(BucketNotFound):
11 | s3_access.keys()
12 |
--------------------------------------------------------------------------------
/test_awsimple/test_s3_delete.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 |
4 | def test_s3_delete(s3_access):
5 | test_string = "hi"
6 | s3_key = "hi.txt"
7 | s3_access.write_string(test_string, s3_key) # will create if the bucket doesn't exist
8 | assert s3_access.read_string(s3_key) == test_string
9 | s3_access.delete_object(s3_key)
10 | with pytest.raises(s3_access.client.exceptions.NoSuchKey):
11 | s3_access.read_string(s3_key)
12 |
--------------------------------------------------------------------------------
/test_awsimple/test_s3_dir.py:
--------------------------------------------------------------------------------
1 | from pprint import pprint
2 | from pathlib import Path
3 |
4 | from awsimple import S3Access
5 |
6 | from test_awsimple import test_awsimple_str, temp_dir
7 |
8 |
9 | def test_s3_dir():
10 | s3_access = S3Access(test_awsimple_str, profile_name=test_awsimple_str) # use non-keyword parameter for bucket_name
11 |
12 | # set up
13 | s3_access.create_bucket() # may already exist
14 | test_file_name = "test.txt"
15 | test_file_path = Path(temp_dir, test_file_name)
16 | test_file_path.open("w").write("hello world")
17 | s3_access.upload(test_file_path, test_file_name) # may already be in S3
18 |
19 | s3_dir = s3_access.dir()
20 | pprint(s3_dir)
21 | md = s3_dir[test_file_name]
22 | assert md.key == test_file_name
23 | assert md.sha512 == "309ecc489c12d6eb4cc40f50c902f2b4d0ed77ee511a7c7a9bcd3ca86d4cd86f989dd35bc5ff499670da34255b45b0cfd830e81f605dcf7dc5542e93ae9cd76f" # "hello world"
24 |
25 |
26 | def test_s3_dir_prefix():
27 | s3_access = S3Access(test_awsimple_str, profile_name=test_awsimple_str) # use non-keyword parameter for bucket_name
28 |
29 | # set up
30 | s3_access.create_bucket() # may already exist
31 | test_file_name = "test.txt"
32 | test_file_path = Path(temp_dir, test_file_name)
33 | test_file_path.open("w").write("hello world")
34 | s3_access.upload(test_file_path, test_file_name) # may already be in S3
35 |
36 | s3_dir = s3_access.dir("test")
37 | pprint(s3_dir)
38 | md = s3_dir[test_file_name]
39 | assert md.key == test_file_name
40 | assert md.sha512 == "309ecc489c12d6eb4cc40f50c902f2b4d0ed77ee511a7c7a9bcd3ca86d4cd86f989dd35bc5ff499670da34255b45b0cfd830e81f605dcf7dc5542e93ae9cd76f" # "hello world"
41 |
--------------------------------------------------------------------------------
/test_awsimple/test_s3_does_not_exist.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from awsimple import S3Access, AWSimpleException
3 |
4 | from test_awsimple import test_awsimple_str
5 |
6 |
7 | def test_s3_object_does_not_exist():
8 | i_do_not_exist_key = "i-do-not-exist"
9 |
10 | s3_access = S3Access(profile_name=test_awsimple_str, bucket_name=test_awsimple_str) # keyword parameter for bucket_name
11 | assert s3_access.bucket_exists() # make sure the bucket exists
12 | with pytest.raises(s3_access.client.exceptions.NoSuchKey):
13 | s3_access.read_string(i_do_not_exist_key)
14 |
15 | with pytest.raises(AWSimpleException):
16 | s3_access.get_s3_object_metadata(i_do_not_exist_key)
17 |
--------------------------------------------------------------------------------
/test_awsimple/test_s3_empty_bucket.py:
--------------------------------------------------------------------------------
1 | import time
2 | import platform
3 | import getpass
4 |
5 | from awsimple import S3Access
6 |
7 |
8 | def test_s3_empty_bucket():
9 | bucket_name = f"emptybuckettest{platform.node()}{getpass.getuser()}".lower() # must be globally unique when using real S3
10 | print(f"{bucket_name=}")
11 | s3_access = S3Access(bucket_name)
12 | s3_access.create_bucket()
13 | assert s3_access.bucket_exists()
14 | assert len(s3_access.dir()) == 0
15 | s3_access.delete_bucket()
16 |
--------------------------------------------------------------------------------
/test_awsimple/test_s3_file_transfer.py:
--------------------------------------------------------------------------------
1 | import time
2 | from datetime import timedelta
3 | from pathlib import Path
4 | from math import isclose
5 | import os
6 | from shutil import rmtree
7 | from logging import getLogger
8 |
9 | from awsimple import S3Access, get_directory_size, is_mock, is_using_localstack
10 | from test_awsimple import test_awsimple_str, never_change_file_name, temp_dir, cache_dir
11 |
12 | big_file_name = "big.txt"
13 | big_file_max_size = round(100e6) # should be large enough to do a multipart upload and would time out with default AWS timeouts (we use longer timeouts than the defaults)
14 |
15 | # real AWS
16 | never_change_size = 67
17 | never_change_mtime = 1636830116.0
18 | never_change_etag = "e3cb2ac8d7d4a8339ea3653f4f155ab4"
19 |
20 | log = getLogger(__name__)
21 |
22 |
23 | def test_get_never_change_metadata(s3_access) -> (int, float, str):
24 | global never_change_size, never_change_mtime, never_change_etag
25 |
26 | if is_mock() or is_using_localstack():
27 | # mocking always starts with nothing so we need up "upload" this file, but use boto3 so we don't write awsimple's SHA512.
28 | # localstack is similar in that we need to ensure we make the file.
29 |
30 | test_file_path = Path(temp_dir, never_change_file_name)
31 | never_change_file_contents = "modification Aug 21, 2020 at 2:51 PM PT\nnever change this file\n"
32 | test_file_path.open("w").write(never_change_file_contents)
33 | s3_access.client.upload_file(str(test_file_path), test_awsimple_str, never_change_file_name) # no awsimple SHA512
34 |
35 | keys = [obj["Key"] for obj in s3_access.client.list_objects_v2(Bucket=test_awsimple_str)["Contents"]]
36 | assert never_change_file_name in keys
37 |
38 | metadata = s3_access.get_s3_object_metadata(never_change_file_name)
39 | never_change_mtime = metadata.mtime.timestamp()
40 | never_change_etag = metadata.etag
41 | never_change_size = metadata.size
42 |
43 |
44 | def test_s3_read_string(s3_access):
45 | test_string = str(time.time()) # so it changes between tests
46 |
47 | # s3_access.create_bucket() # may already exist
48 | s3_access.write_string(test_string, test_awsimple_str)
49 | assert s3_access.read_string(test_awsimple_str) == test_string
50 |
51 |
52 | def test_s3_big_file_upload(s3_access):
53 | # test big file upload (e.g. that we don't get a timeout)
54 | # this is run before the cache tests (hence the function name)
55 |
56 | big_last_run_file_path = Path("big_last_run.txt")
57 | big_last_run_file_path.parent.mkdir(exist_ok=True, parents=True)
58 | last_run = 0.0
59 | if not (is_mock() or is_using_localstack()):
60 | # avoid large frequent file uploads with real AWS
61 | try:
62 | last_run = float(big_last_run_file_path.open().read().strip())
63 | except FileNotFoundError:
64 | pass
65 |
66 | # only run once a day max since it takes so long
67 | if last_run + timedelta(days=1).total_seconds() < time.time():
68 | big_file_path = Path(temp_dir, big_file_name)
69 | size = big_file_max_size / 1000 # start with something small
70 | while size < big_file_max_size:
71 | size *= 2 # get bigger on each iteration
72 | size = min(big_file_max_size, size) # make sure at the end we do one of max size
73 | with big_file_path.open("w") as f:
74 | f.truncate(round(size)) # this quickly makes a (sparse) file filled with zeros
75 | start = time.time()
76 | s3_access.upload(big_file_path, big_file_name)
77 | log.info(f"{time.time() - start},{size:.0f}")
78 |
79 | big_last_run_file_path.open("w").write(str(time.time()))
80 | else:
81 | log.info(f"last run {time.time() - last_run} seconds ago so not running now")
82 |
83 |
84 | def test_s3_upload(s3_access):
85 | test_file_name = "test.txt"
86 | test_file_path = Path(temp_dir, test_file_name)
87 | test_file_path.open("w").write("hello world")
88 | assert s3_access.upload(test_file_path, test_file_name, force=True)
89 | time.sleep(3)
90 | assert s3_access.object_exists(test_file_name)
91 |
92 |
93 | def test_s3_z_metadata(s3_access):
94 | # does not work for mock todo: fix
95 | test_file_name = "test.txt"
96 | s3_object_metadata = s3_access.get_s3_object_metadata(test_file_name)
97 | # "hello world" uploaded with awsimple
98 | assert s3_object_metadata.sha512 == "309ecc489c12d6eb4cc40f50c902f2b4d0ed77ee511a7c7a9bcd3ca86d4cd86f989dd35bc5ff499670da34255b45b0cfd830e81f605dcf7dc5542e93ae9cd76f"
99 | assert s3_object_metadata.size == 11
100 |
101 |
102 | def test_s3_download_dest_full_path(s3_access):
103 | dest_path = Path(temp_dir, never_change_file_name)
104 | dest_path.unlink(missing_ok=True)
105 | success = s3_access.download(never_change_file_name, dest_path) # dest is a full path
106 | assert success
107 | assert dest_path.exists()
108 | assert isclose(os.path.getmtime(dest_path), never_change_mtime, rel_tol=0.0, abs_tol=3.0)
109 |
110 |
111 | def test_s3_download_dest_dir(s3_access):
112 | dest_path = Path(temp_dir, never_change_file_name)
113 | dest_path.unlink(missing_ok=True)
114 | success = s3_access.download(never_change_file_name, temp_dir) # dest is a directory
115 | assert success
116 | assert dest_path.exists()
117 | assert isclose(os.path.getmtime(dest_path), never_change_mtime, rel_tol=0.0, abs_tol=3.0)
118 |
119 |
120 | def test_s3_metadata_not_uploaded_with_awsimple(s3_access):
121 | bucket_dir = s3_access.dir()
122 | assert len(bucket_dir) > 0
123 | assert bucket_dir["never_change.txt"].size == never_change_size
124 | s3_object_metadata = s3_access.get_s3_object_metadata(never_change_file_name)
125 | mtime_epoch = s3_object_metadata.mtime.timestamp()
126 | assert isclose(mtime_epoch, never_change_mtime, rel_tol=0.0, abs_tol=3.0) # SWAG
127 | assert s3_object_metadata.etag == never_change_etag
128 | assert s3_object_metadata.sha512 is None # not uploaded with awsimple
129 | assert s3_object_metadata.size == never_change_size
130 |
131 |
132 | def _s3_download(dest: Path, s3_access):
133 | """
134 | :param dest: directory or file path to download to
135 | :param s3_access: S3Access
136 | """
137 | dest_path = Path(temp_dir, never_change_file_name) # expect file to be downloaded here
138 | # start with empty cache
139 | rmtree(cache_dir, ignore_errors=True)
140 | cache_dir.mkdir(parents=True, exist_ok=True)
141 | dest_path.unlink(missing_ok=True)
142 | download_status = s3_access.download_cached(never_change_file_name, dest)
143 | assert download_status.success
144 | assert not download_status.cache_hit
145 | assert download_status.cache_write
146 | assert dest_path.exists()
147 | # download cached
148 | dest_path.unlink()
149 | download_status = s3_access.download_cached(never_change_file_name, dest)
150 | assert download_status.success
151 | assert download_status.cache_hit
152 | assert not download_status.cache_write
153 | assert dest_path.exists()
154 |
155 | # with warm cache
156 | dest_path.unlink()
157 | download_status = s3_access.download_cached(never_change_file_name, dest)
158 | assert download_status.success
159 | assert download_status.cache_hit
160 | assert dest_path.exists()
161 |
162 |
163 | def _s3_download_big(dest: Path, s3_access):
164 | # download big file with normal cache size
165 | cache_size = get_directory_size(cache_dir)
166 | assert cache_size < 1000 # big file not in cache
167 | big_file_path = Path(temp_dir, big_file_name)
168 | download_status = s3_access.download_cached(big_file_name, dest)
169 | assert download_status.success
170 | assert not download_status.cache_hit
171 | assert download_status.cache_write
172 | assert big_file_path.exists()
173 | cache_size = get_directory_size(cache_dir)
174 | assert cache_size > 1000 # big file is in cache
175 |
176 |
177 | def test_s3_download_cached(s3_access):
178 | _s3_download(Path(temp_dir, never_change_file_name), s3_access) # small file with no AWSimple SHA512
179 | _s3_download_big(Path(temp_dir, big_file_name), s3_access)
180 |
181 |
182 | def test_s3_download_cached_dir(s3_access):
183 | _s3_download(temp_dir, s3_access)
184 | _s3_download_big(temp_dir, s3_access)
185 |
186 |
187 | def test_cache_eviction(s3_access):
188 | # force cache eviction
189 | cache_max = 100
190 | eviction_dir = Path(temp_dir, "eviction")
191 | eviction_cache = Path(eviction_dir, "cache")
192 | s3_access_cache_eviction = S3Access(profile_name=test_awsimple_str, bucket_name=test_awsimple_str, cache_dir=eviction_cache, cache_max_absolute=cache_max)
193 | size = 50
194 | rmtree(eviction_dir, ignore_errors=True)
195 | while size <= 2 * cache_max:
196 | file_name = f"t{size}.txt"
197 | source_file_path = Path(eviction_dir, "source", file_name)
198 | source_file_path.parent.mkdir(parents=True, exist_ok=True)
199 |
200 | # upload
201 | with source_file_path.open("w") as f:
202 | f.truncate(round(size)) # this quickly makes a (sparse) file filled with zeros
203 | s3_access_cache_eviction.upload(source_file_path, file_name)
204 |
205 | dest_path = Path(eviction_dir, "dest", file_name)
206 |
207 | # cold download
208 | status_cold = s3_access_cache_eviction.download_cached(file_name, dest_path)
209 | assert not status_cold.cache_hit
210 | if size <= cache_max:
211 | assert status_cold.cache_write
212 |
213 | # warm download
214 | assert dest_path.exists()
215 | status_warm = s3_access_cache_eviction.download_cached(file_name, dest_path)
216 | if size <= cache_max:
217 | assert status_warm.cache_hit
218 | assert not status_warm.cache_write
219 | assert dest_path.exists()
220 |
221 | # make sure cache stays within max size limit
222 | cache_size = get_directory_size(eviction_cache)
223 | assert cache_size <= cache_max # make sure we stay within bounds
224 |
225 | size *= 2
226 |
--------------------------------------------------------------------------------
/test_awsimple/test_s3_keys.py:
--------------------------------------------------------------------------------
1 | from pprint import pprint
2 | from pathlib import Path
3 |
4 | from awsimple import S3Access
5 |
6 | from test_awsimple import test_awsimple_str, temp_dir
7 |
8 |
9 | def test_s3_keys():
10 | s3_access = S3Access(test_awsimple_str, profile_name=test_awsimple_str) # use non-keyword parameter for bucket_name
11 |
12 | # set up
13 | s3_access.create_bucket() # may already exist
14 | test_file_name = "test.txt"
15 | test_file_name_2 = "test2.txt"
16 | test_file_path = Path(temp_dir, test_file_name)
17 | test_file_path.open("w").write("hello world")
18 | s3_access.upload(test_file_path, test_file_name_2) # may already be in S3
19 | s3_access.upload(test_file_path, test_file_name) # may already be in S3
20 |
21 | s3_keys = s3_access.keys()
22 | pprint(s3_keys)
23 | # for real AWS I may have other objects in the test bucket
24 | assert test_file_name in s3_keys
25 | assert test_file_name_2 in s3_keys
26 |
27 |
28 | def test_s3_keys_prefix():
29 | s3_access = S3Access(test_awsimple_str, profile_name=test_awsimple_str) # use non-keyword parameter for bucket_name
30 |
31 | # set up
32 | s3_access.create_bucket() # may already exist
33 | test_file_name = "test.txt"
34 | test_file_name_2 = "test2.txt"
35 | test_file_path = Path(temp_dir, test_file_name)
36 | test_file_path.open("w").write("hello world")
37 | s3_access.upload(test_file_path, test_file_name_2) # may already be in S3
38 | s3_access.upload(test_file_path, test_file_name) # may already be in S3
39 |
40 | s3_keys = s3_access.keys("test2")
41 | pprint(s3_keys)
42 | # for real AWS I may have other objects in the test bucket
43 | assert test_file_name not in s3_keys
44 | assert test_file_name_2 in s3_keys
45 |
--------------------------------------------------------------------------------
/test_awsimple/test_s3_list_buckets.py:
--------------------------------------------------------------------------------
1 | from pprint import pprint
2 |
3 | from awsimple import S3Access
4 |
5 | from test_awsimple import test_awsimple_str
6 |
7 |
8 | def test_s3_list_buckets():
9 | bucket_names = S3Access().bucket_list()
10 | pprint(bucket_names)
11 | assert test_awsimple_str in bucket_names
12 |
--------------------------------------------------------------------------------
/test_awsimple/test_s3_multiple_transfers.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | import pytest
3 | from shutil import rmtree
4 |
5 | from awsimple import AWSimpleException, is_mock
6 |
7 | from test_awsimple import temp_dir, cache_dir
8 |
9 |
10 | def check_file_contents(file_path: Path, expected_contents: str):
11 | with file_path.open() as f:
12 | file_contents = f.read()
13 | assert file_contents == expected_contents
14 |
15 |
16 | def test_s3_multiple_transfers(s3_access):
17 | s3_paths = {}
18 | rmtree(temp_dir)
19 | for test_string in ["a", "b"]:
20 | s3_paths[test_string] = {}
21 | for mode in ["in", "out"]:
22 | p = Path(temp_dir, mode, f"{test_string}.txt")
23 | p.parent.mkdir(parents=True, exist_ok=True)
24 | if mode == "in":
25 | with p.open("w") as f:
26 | f.write(test_string)
27 | s3_paths[test_string][mode] = p
28 |
29 | if is_mock():
30 | with pytest.raises(AWSimpleException):
31 | s3_access.download_cached("a", s3_paths["a"]["out"]) # won't exist at first if mocked
32 |
33 | # upload and download file
34 | s3_access.upload(s3_paths["a"]["in"], "a")
35 | download_status = s3_access.download_cached("a", s3_paths["a"]["out"])
36 | assert download_status.success
37 | assert not download_status.cache_hit
38 | assert download_status.cache_write
39 | check_file_contents(s3_paths["a"]["out"], "a")
40 |
41 | # upload a different file into same bucket and check that we get the contents of that new file
42 | s3_access.upload(s3_paths["b"]["in"], "a")
43 | download_status = s3_access.download_cached("a", s3_paths["a"]["out"])
44 | assert download_status.success
45 | assert not download_status.cache_hit
46 | assert download_status.cache_write
47 | check_file_contents(s3_paths["a"]["out"], "b")
48 |
49 | # cached download
50 | download_status = s3_access.download_cached("a", s3_paths["a"]["out"])
51 | assert download_status.success
52 | assert download_status.cache_hit
53 | assert not download_status.cache_write
54 | check_file_contents(s3_paths["a"]["out"], "b")
55 |
56 | # put "a" back and just use regular download (not cached)
57 | s3_access.upload(s3_paths["a"]["in"], "a")
58 | assert s3_access.download("a", s3_paths["a"]["out"])
59 | check_file_contents(s3_paths["a"]["out"], "a")
60 |
61 | # write something else to that bucket
62 | s3_access.write_string("c", "a")
63 | assert s3_access.read_string("a") == "c"
64 |
65 | # now upload and download an object
66 | test_dict = {"z": 3}
67 | s3_access.upload_object_as_json(test_dict, "a")
68 | downloaded_dict = s3_access.download_object_as_json("a")
69 | assert test_dict == downloaded_dict
70 | downloaded_dict = s3_access.download_object_as_json_cached("a")
71 | assert test_dict == downloaded_dict
72 |
73 | assert len(list(cache_dir.glob("*"))) == 3 # there should be 3 entries in the cache at this point
74 |
--------------------------------------------------------------------------------
/test_awsimple/test_s3_object_floats.py:
--------------------------------------------------------------------------------
1 | def test_s3_object_floats(s3_access):
2 | object_with_floats = {"0.1": 2.3456789e-11}
3 | s3_key = "a"
4 | s3_access.upload_object_as_json(object_with_floats, s3_key)
5 | s3_object = s3_access.download_object_as_json_cached(s3_key)
6 | print(s3_object)
7 | assert s3_object == object_with_floats
8 |
--------------------------------------------------------------------------------
/test_awsimple/test_s3_public_readable.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | import time
3 |
4 | from awsimple import S3Access, is_using_localstack
5 | from requests import get
6 |
7 | from test_awsimple import test_awsimple_str, temp_dir
8 |
9 |
10 | def test_s3_upload():
11 | contents = "I am public readable"
12 | s3_access = S3Access(profile_name=test_awsimple_str, bucket_name=test_awsimple_str)
13 | s3_access.set_public_readable(True)
14 | test_file_name = "public_readable.txt"
15 | test_file_path = Path(temp_dir, test_file_name)
16 | test_file_path.open("w").write(contents)
17 | assert s3_access.upload(test_file_path, test_file_name, force=True)
18 | count = 0
19 | while not s3_access.object_exists(test_file_name) and count < 100:
20 | time.sleep(1)
21 | count += 1
22 | assert s3_access.object_exists(test_file_name)
23 |
24 | # read from the URL to see if the contents are public readable
25 | metadata = s3_access.get_s3_object_metadata(test_file_name)
26 | if not is_using_localstack():
27 | # localstack doesn't provide URL based access
28 | object_contents = get(metadata.url).content.decode("utf-8")
29 | assert object_contents == contents
30 |
--------------------------------------------------------------------------------
/test_awsimple/test_s3_python_object.py:
--------------------------------------------------------------------------------
1 | from awsimple import S3Access
2 |
3 | from test_awsimple import test_awsimple_str
4 |
5 |
6 | def test_s3_python_object():
7 | my_dict_a = {"a": 1}
8 | my_dict_b = {"b": 2}
9 | my_list = [1, 2, 3]
10 | my_complex_dict = {"1": 2, "my_list": [0, 9], "my_dict": {"z": -1, "w": -2}}
11 |
12 | s3_key = "my_object"
13 | s3_access = S3Access(profile_name=test_awsimple_str, bucket_name=test_awsimple_str)
14 |
15 | for my_object in (my_dict_a, my_dict_b, my_list, my_complex_dict):
16 | s3_access.upload_object_as_json(my_object, s3_key)
17 |
18 | my_dict_from_s3 = s3_access.download_object_as_json(s3_key)
19 | assert my_object == my_dict_from_s3
20 |
21 | my_dict_from_s3 = s3_access.download_object_as_json_cached(s3_key)
22 | assert my_object == my_dict_from_s3
23 | my_dict_from_s3 = s3_access.download_object_as_json_cached(s3_key) # this will be the cached version
24 | assert my_object == my_dict_from_s3
25 | assert s3_access.download_status.cache_hit
26 |
--------------------------------------------------------------------------------
/test_awsimple/test_s3_string.py:
--------------------------------------------------------------------------------
1 | from awsimple import S3Access
2 |
3 | from test_awsimple import test_awsimple_str
4 |
5 |
6 | def test_s3_string():
7 | s3_access = S3Access(test_awsimple_str)
8 | s3_access.write_string(test_awsimple_str, test_awsimple_str)
9 | d = s3_access.dir()
10 | metadata = d[test_awsimple_str]
11 | assert metadata.size == len(test_awsimple_str)
12 | assert metadata.key == test_awsimple_str # the contents are the same as the key
13 | # https://passwordsgenerator.net/sha512-hash-generator/
14 | assert metadata.sha512.lower() == "D16764F12E4D13555A88372CFE702EF8AE07F24A3FFCEDE6E1CDC8B7BFC2B18EC3468A7752A09F100C9F24EA2BC77566A08972019FC04CF75AB3A64B475BDFA3".lower()
15 |
--------------------------------------------------------------------------------
/test_awsimple/test_s3_transfer_lines.py:
--------------------------------------------------------------------------------
1 | def test_s3_transfer_lines(s3_access):
2 | s3_key = "a"
3 | lines = ["1", "2"]
4 | s3_access.write_lines(lines, s3_key)
5 | read_lines = s3_access.read_lines(s3_key)
6 | assert lines == read_lines
7 |
--------------------------------------------------------------------------------
/test_awsimple/test_serializable.py:
--------------------------------------------------------------------------------
1 | from decimal import Decimal
2 | from enum import Enum
3 | from pathlib import Path
4 | from math import pi, isclose
5 |
6 | from PIL import Image
7 |
8 | from awsimple import dict_to_dynamodb, dynamodb_to_dict
9 |
10 |
11 | class TstClass(Enum):
12 | a = 1
13 | b = 2
14 |
15 |
16 | def test_make_serializable():
17 | values = {
18 | "d": Decimal(1.0),
19 | "s": "s",
20 | "bool": True,
21 | "a": TstClass.a,
22 | "b": TstClass.b,
23 | "binary": b"\0\1",
24 | "ni": -100, # negative integer
25 | "nbi": -100000000000000000000000000000000000, # negative big integer
26 | "pi": pi,
27 | }
28 | values["image"] = Image.open(Path("test_awsimple", "280px-PNG_transparency_demonstration_1.png"))
29 | values = dict_to_dynamodb(values)
30 | serial_values = dynamodb_to_dict(values)
31 | assert serial_values["d"] == 1.0
32 | assert serial_values["s"] == "s"
33 | assert serial_values["bool"] is True
34 | assert serial_values["a"] == "a"
35 | assert serial_values["b"] == "b"
36 | image_size = len(serial_values["image"])
37 | assert image_size == 141233 or image_size == 140065 # depending on the version of Pillow
38 | assert serial_values["binary"] == "b'\\x00\\x01'"
39 | assert isinstance(serial_values["ni"], int)
40 | assert isinstance(serial_values["nbi"], float) # ends up being a float, even though we'd prefer it as an int
41 | assert isclose(serial_values["pi"], pi)
42 |
--------------------------------------------------------------------------------
/test_awsimple/test_sns_create.py:
--------------------------------------------------------------------------------
1 | from ismain import is_main
2 |
3 | from awsimple import SNSAccess
4 |
5 | from test_awsimple import test_awsimple_str
6 |
7 |
8 | def test_sns_create():
9 | sns_access = SNSAccess(test_awsimple_str)
10 | sns_access.create_topic()
11 |
12 |
13 | if is_main():
14 | test_sns_create()
15 |
--------------------------------------------------------------------------------
/test_awsimple/test_sns_publish.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | from awsimple import SNSAccess, SQSPollAccess, is_mock
4 |
5 | from test_awsimple import test_awsimple_str, drain
6 |
7 |
8 | def test_sns_publish():
9 | drain()
10 |
11 | sqs_access = SQSPollAccess(test_awsimple_str) # queue that will subscribe to this topic and we'll read from at the end to test the propagation from SNS to SQS
12 | sqs_access.create_queue()
13 | sns_access = SNSAccess(test_awsimple_str) # our test SNS topic
14 |
15 | sns_access.create_topic() # this can set the permissions, which can take a while to propagate so it might fail the first time through
16 |
17 | subscription_arn = sns_access.subscribe(sqs_access) # subscribe the SQS queue to the SNS topic
18 | print(f"{subscription_arn=}")
19 |
20 | # put in your actual email and run this at least once:
21 | # sns_access.subscribe("me@mydomain.com")
22 |
23 | message_string = "This is a test for awsimple."
24 | subject_string = "awsimple test"
25 | # doesn't work with moto :(
26 | if not is_mock():
27 | message_id = sns_access.publish(message_string, subject_string)
28 | print(f"{message_id=}")
29 | assert message_id is not None and len(message_id) > 0
30 |
31 | message = json.loads(sqs_access.receive_message().message)
32 | returned_message_string = message["Message"]
33 | print(f"{returned_message_string=}")
34 | assert returned_message_string == message_string
35 |
--------------------------------------------------------------------------------
/test_awsimple/test_sqs_create_and_delete_queue.py:
--------------------------------------------------------------------------------
1 | from awsimple import SQSAccess, is_using_localstack
2 |
3 | from test_awsimple import test_awsimple_str
4 |
5 |
6 | def test_sqs_create_and_delete_queue():
7 | # have to wait 60 seconds from delete to (re)creation so don't use the same queue name as other tests
8 | queue_name = "createdelete"
9 | q = SQSAccess(queue_name, profile_name=test_awsimple_str)
10 | url = q.create_queue()
11 | print(url)
12 |
13 | if not is_using_localstack():
14 | # something like https://us-west-2.queue.amazonaws.com/076966278319/createdelete
15 | assert len(url) > 10
16 | assert url.endswith(queue_name)
17 | assert url.startswith("https://")
18 | assert "aws" in url
19 |
20 | q.delete_queue()
21 |
--------------------------------------------------------------------------------
/test_awsimple/test_sqs_get_arn.py:
--------------------------------------------------------------------------------
1 | from ismain import is_main
2 |
3 | from awsimple import SQSAccess
4 |
5 | from test_awsimple import test_awsimple_str
6 |
7 |
8 | def test_sqs_get_arn():
9 | sqs_access = SQSAccess(test_awsimple_str)
10 | sqs_access.create_queue()
11 | arn = sqs_access.get_arn()
12 |
13 | # e.g. arn:aws:sqs:us-west-2:123456789012:testawsimple
14 | print(f"{arn=}")
15 |
16 | assert arn.startswith("arn:aws:sqs:")
17 | # AWS region and account number is in the middle
18 | assert arn.endswith(f":{test_awsimple_str}")
19 |
20 |
21 | if is_main():
22 | test_sqs_get_arn()
23 |
--------------------------------------------------------------------------------
/test_awsimple/test_sqs_messages.py:
--------------------------------------------------------------------------------
1 | from pprint import pprint
2 | import time
3 | import math
4 |
5 | from awsimple import SQSAccess, SQSPollAccess, is_using_localstack
6 |
7 | from test_awsimple import test_awsimple_str, drain
8 |
9 | send_message = "hi"
10 |
11 |
12 | def test_sqs_immediate_delete():
13 | drain()
14 |
15 | q = SQSAccess(test_awsimple_str, profile_name=test_awsimple_str)
16 | q.create_queue()
17 |
18 | send_time = time.time()
19 | q.send(send_message)
20 | time.sleep(0.1)
21 |
22 | while (receive_message := q.receive_message()) is None:
23 | time.sleep(0.1)
24 | print(receive_message)
25 | assert receive_message.message == send_message
26 | print(f"took {time.time() - send_time} seconds")
27 |
28 |
29 | def test_sqs_poll_immediate_delete():
30 | drain()
31 |
32 | q = SQSPollAccess(test_awsimple_str, profile_name=test_awsimple_str)
33 | q.create_queue()
34 |
35 | send_time = time.time()
36 | q.send(send_message)
37 |
38 | receive_message = q.receive_message() # will long poll so we expect the message to be available within one call
39 | assert receive_message is not None
40 | print(receive_message)
41 | assert receive_message.message == send_message
42 | print(f"took {time.time() - send_time} seconds")
43 |
44 |
45 | def test_sqs_poll_user_delete():
46 | work_time = 3.0
47 |
48 | drain()
49 |
50 | # populate the run time history
51 | queue = SQSAccess(test_awsimple_str, immediate_delete=False, profile_name=test_awsimple_str)
52 | queue.create_queue()
53 | queue._get_response_history_file_path().unlink(missing_ok=True)
54 | queue.max_history = 5 # test that we can delete old history values by using a very small history
55 | for value in range(0, queue.max_history):
56 | print(value)
57 | queue.send(str(value))
58 | while len(messages := queue.receive_messages()) > 0:
59 | time.sleep(work_time)
60 | pprint(messages)
61 | for m in messages:
62 | print(f"deleting {m.message}")
63 | m.delete()
64 |
65 | # now do a long poll style
66 | poll_queue = SQSPollAccess(test_awsimple_str, immediate_delete=False, profile_name=test_awsimple_str)
67 | poll_queue.create_queue()
68 |
69 | print("sending test message")
70 | send_time = time.time()
71 | poll_queue.send(send_message)
72 |
73 | receive_message = poll_queue.receive_message() # will long poll so we expect the message to be available within one call
74 | assert receive_message is not None
75 | print(receive_message.message)
76 | assert receive_message.message == send_message
77 | time.sleep(work_time) # do some work
78 | print(f"took {time.time() - send_time} seconds")
79 | receive_message.delete()
80 |
81 | nominal_work_time = poll_queue.calculate_nominal_work_time()
82 | print(f"{work_time=}, calculated {nominal_work_time=}")
83 | if not is_using_localstack():
84 | # localstack can be slow
85 | assert math.isclose(nominal_work_time, work_time, rel_tol=0.5, abs_tol=1.0) # fairly wide tolerance
86 |
87 |
88 | def test_sqs_n_messages():
89 | """
90 | test for a specific number of messages to be returned
91 | """
92 |
93 | drain()
94 |
95 | message = "hi"
96 | queue = SQSAccess(test_awsimple_str)
97 | queue.create_queue()
98 |
99 | # more than we'll try to take out, and more than the AWS max per call
100 | for _ in range(0, 14):
101 | queue.send(message)
102 | time.sleep(10.0) # wait for messages to become available
103 |
104 | received = queue.receive_messages(11) # just over the AWS max per call of 10
105 | assert len(received) == 11
106 |
107 | drain() # clean up unreceived messages
108 |
--------------------------------------------------------------------------------
/test_awsimple/test_sqs_messages_available_and_purge.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 | from awsimple import SQSAccess, is_mock
4 |
5 | from test_awsimple import test_awsimple_str, drain
6 |
7 |
8 | def wait_for_n_messages_available(queue: SQSAccess, expected_number_of_messages: int):
9 | time_out = 0
10 | while (messages_available := queue.messages_available()) != expected_number_of_messages and time_out < 60:
11 | time_out += 1
12 | time.sleep(1.0)
13 | assert messages_available == expected_number_of_messages
14 |
15 |
16 | def test_sqs_message_available_and_purge():
17 | if not is_mock():
18 | drain()
19 |
20 | queue = SQSAccess(test_awsimple_str)
21 | queue.create_queue()
22 |
23 | wait_for_n_messages_available(queue, 0)
24 |
25 | for number_of_messages in range(1, 5):
26 | queue.send(str(number_of_messages))
27 | wait_for_n_messages_available(queue, number_of_messages)
28 |
29 | queue.purge()
30 | wait_for_n_messages_available(queue, 0)
31 |
--------------------------------------------------------------------------------
/test_awsimple/test_sqs_queue_exists.py:
--------------------------------------------------------------------------------
1 | from awsimple import SQSAccess, is_mock
2 |
3 | from test_awsimple import test_awsimple_str
4 |
5 |
6 | def test_sqs_queue_exists():
7 | q = SQSAccess(test_awsimple_str)
8 | q.create_queue()
9 | queue_exists = q.exists()
10 | # doesn't work with moto :(
11 | if not is_mock():
12 | assert queue_exists
13 | queue_exists = SQSAccess("IDoNotExist").exists()
14 | assert not queue_exists
15 |
--------------------------------------------------------------------------------
/test_awsimple/test_sqs_receive_nothing.py:
--------------------------------------------------------------------------------
1 | import time
2 | import math
3 |
4 | from awsimple import SQSAccess, SQSPollAccess, aws_sqs_long_poll_max_wait_time, is_mock, is_using_localstack
5 |
6 | from test_awsimple import test_awsimple_str, drain
7 |
8 | margin = 3.0
9 | rel_tol = 0.2
10 |
11 |
12 | def test_sqs_receive_nothing():
13 | drain()
14 | start = time.time()
15 | queue = SQSAccess(test_awsimple_str) # will return immediately
16 | assert queue.receive_message() is None
17 | assert len(queue.receive_messages()) == 0
18 | t = time.time() - start
19 | print(f"{t=}")
20 | if is_using_localstack():
21 | assert t < 100.0 # local stack is slow
22 | else:
23 | assert t < 3.0 # "immediate"
24 |
25 |
26 | def test_sqs_receive_nothing_poll_one():
27 | if not is_mock():
28 | drain()
29 | start = time.time()
30 | queue = SQSPollAccess(test_awsimple_str) # will return in AWS SQS max wait time (e.g. 20 sec)
31 | queue.create_queue()
32 | assert queue.receive_message() is None
33 |
34 | t = time.time() - start
35 | print(f"{t=}")
36 | assert math.isclose(t, aws_sqs_long_poll_max_wait_time + margin, rel_tol=rel_tol, abs_tol=margin)
37 |
38 |
39 | def test_sqs_receive_nothing_poll_many():
40 | if not is_mock():
41 | drain()
42 | start = time.time()
43 | queue = SQSPollAccess(test_awsimple_str) # will return in AWS SQS max wait time (e.g. 20 sec)
44 | queue.create_queue()
45 | assert len(queue.receive_messages()) == 0
46 |
47 | t = time.time() - start
48 | print(f"{t=}")
49 | assert math.isclose(t, aws_sqs_long_poll_max_wait_time + margin, rel_tol=rel_tol, abs_tol=margin)
50 |
--------------------------------------------------------------------------------
/test_awsimple/test_sqs_user_provided_timeout.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import time
3 |
4 | from awsimple import SQSPollAccess, SQSAccess, is_mock, is_using_localstack
5 |
6 | from test_awsimple import test_awsimple_str, drain
7 |
8 |
9 | def test_user_provided_timeout():
10 | if not is_mock():
11 | drain()
12 |
13 | send_message = "hello"
14 | work_time = 2.0
15 |
16 | qp = SQSPollAccess(test_awsimple_str, visibility_timeout=round(10.0 * work_time), immediate_delete=False, profile_name=test_awsimple_str)
17 | qp.create_queue()
18 | qp.send(send_message)
19 | time.sleep(1.0)
20 | receive_message = qp.receive_message()
21 | assert receive_message.message == send_message
22 |
23 | q = SQSAccess(test_awsimple_str, profile_name=test_awsimple_str)
24 | q.create_queue()
25 | assert q.receive_message() is None # make sure the message is now invisible
26 |
27 | if not is_mock():
28 | receive_message.delete() # not working for mock todo: fix
29 |
30 | assert q.receive_message() is None
31 |
32 |
33 | def test_user_provided_minimum_timeout():
34 | if not is_mock():
35 | drain()
36 |
37 | send_message = "hello"
38 | work_time = 2.0
39 |
40 | qp = SQSPollAccess(test_awsimple_str, minimum_visibility_timeout=round(10.0 * work_time), immediate_delete=False, profile_name=test_awsimple_str)
41 | qp.create_queue()
42 | qp.send(send_message)
43 | time.sleep(1.0)
44 | receive_message = qp.receive_message()
45 | assert receive_message.message == send_message
46 |
47 | q = SQSAccess(test_awsimple_str, profile_name=test_awsimple_str)
48 | q.create_queue()
49 | assert q.receive_message() is None # make sure the message is now invisible
50 |
51 | if not is_mock():
52 | receive_message.delete() # not working for mock todo: fix
53 |
54 | assert q.receive_message() is None
55 |
56 |
57 | def test_actually_timeout():
58 | if not is_mock():
59 | drain()
60 |
61 | send_message = "hello"
62 | if is_using_localstack():
63 | # localstack is slow
64 | work_time = 500.0
65 | else:
66 | work_time = 5.0
67 |
68 | qp = SQSPollAccess(test_awsimple_str, visibility_timeout=round(0.5 * work_time), immediate_delete=False, profile_name=test_awsimple_str)
69 | qp.create_queue()
70 | qp.send(send_message)
71 | time.sleep(1.0)
72 | receive_message = qp.receive_message()
73 | assert receive_message.message == send_message # got it once
74 |
75 | q = SQSAccess(test_awsimple_str, profile_name=test_awsimple_str)
76 | assert q.receive_message() is None # make sure the message is now invisible
77 | time.sleep(work_time) # will take "too long", so message should be available again on next receive_message
78 |
79 | if not is_mock():
80 | # not working for mock todo: fix
81 | assert qp.receive_message().message == send_message
82 | receive_message.delete() # now we delete it
83 |
84 | assert q.receive_message() is None
85 |
86 |
87 | def test_user_provided_timeout_nonsensical_parameters():
88 | if not is_mock():
89 | drain()
90 |
91 | send_message = "hello"
92 | work_time = 2.0
93 |
94 | q = SQSPollAccess(test_awsimple_str, visibility_timeout=round(10.0 * work_time), profile_name=test_awsimple_str)
95 | q.create_queue()
96 | q.send(send_message)
97 | with pytest.raises(ValueError):
98 | q.receive_message()
99 |
--------------------------------------------------------------------------------
/test_awsimple/tst_paths.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | temp_dir = Path("temp")
4 | cache_dir = Path(temp_dir, "cache")
5 |
--------------------------------------------------------------------------------