├── .env ├── .github ├── pull_request_template.md └── workflows │ ├── publish.yml │ ├── test-publish.yml │ └── test.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── docs ├── Makefile ├── make.bat └── source │ ├── buckets.rst │ ├── conf.py │ ├── demo.gif │ ├── index.rst │ ├── introduction.rst │ ├── objects.rst │ └── requirements.txt ├── pyproject.toml ├── readthedocs.yaml ├── s3_tools ├── __init__.py ├── buckets │ ├── __init__.py │ ├── check.py │ ├── create.py │ ├── delete.py │ └── list.py ├── objects │ ├── __init__.py │ ├── check.py │ ├── copy.py │ ├── delete.py │ ├── download.py │ ├── list.py │ ├── move.py │ ├── presigned_url.py │ ├── read.py │ ├── upload.py │ └── write.py └── utils.py ├── tests ├── __init__.py ├── resources │ ├── empty.data │ └── mock_file.csv └── unit │ ├── buckets │ ├── test_check_bucket.py │ ├── test_create_bucket.py │ ├── test_delete_bucket.py │ └── test_list_buckets.py │ ├── conftest.py │ ├── objects │ ├── test_check_objects.py │ ├── test_copy_objects.py │ ├── test_delete_objects.py │ ├── test_download_objects.py │ ├── test_list_objects.py │ ├── test_move_objects.py │ ├── test_presigned_url.py │ ├── test_read_objects.py │ ├── test_upload_objects.py │ └── test_write_objects.py │ └── test_utils.py └── uv.lock /.env: -------------------------------------------------------------------------------- 1 | PYTHONPATH=. 2 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## :pencil: Changelog: 2 | 3 | - :rotating_light: Breaking Changes: 4 | 5 | - :sparkles: Features: 6 | 7 | - :lock: Security: 8 | 9 | - :hammer_and_wrench: Improvements: 10 | 11 | - :bug: Bug Fixes: 12 | 13 | - :recycle: Refactoring: 14 | 15 | - :vertical_traffic_light: Test: 16 | 17 | - :books: Documentation: 18 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will publish a AWS S3 Tools Package when a release is created 2 | 3 | name: Publish 4 | 5 | on: 6 | release: 7 | types: 8 | - published 9 | 10 | jobs: 11 | publish: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - name: Checkout code 17 | uses: actions/checkout@v4 18 | 19 | - name: Set up Python 20 | uses: actions/setup-python@v5 21 | with: 22 | python-version: '3.11' 23 | 24 | - name: Setup uv 25 | uses: astral-sh/setup-uv@v5 26 | with: 27 | version: "0.5.18" 28 | 29 | - name: Install dependencies 30 | run: uv sync 31 | 32 | - name: Build and publish 33 | env: 34 | UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }} 35 | run: | 36 | uv build 37 | uv publish 38 | -------------------------------------------------------------------------------- /.github/workflows/test-publish.yml: -------------------------------------------------------------------------------- 1 | name: Test Publish 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | env: 7 | UV_SYSTEM_PYTHON: 1 8 | 9 | jobs: 10 | 11 | publish: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - name: Checkout code 17 | uses: actions/checkout@v4 18 | 19 | - name: Set up Python 20 | uses: actions/setup-python@v5 21 | with: 22 | python-version: '3.11' 23 | 24 | - name: Setup uv 25 | uses: astral-sh/setup-uv@v5 26 | with: 27 | version: "0.5.18" 28 | 29 | - name: Install all dependencies 30 | run: uv sync --extra progress 31 | 32 | - name: Test publish to PyPI 33 | env: 34 | UV_PUBLISH_TOKEN: ${{ secrets.TEST_PYPI_TOKEN }} 35 | UV_PUBLISH_URL: "https://test.pypi.org/legacy/" 36 | run: | 37 | uv build 38 | uv publish 39 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | 3 | name: Tests 4 | 5 | on: 6 | workflow_dispatch: 7 | 8 | pull_request: 9 | branches: 10 | - main 11 | 12 | env: 13 | UV_SYSTEM_PYTHON: 1 14 | 15 | jobs: 16 | 17 | coverage: 18 | 19 | runs-on: ubuntu-latest 20 | 21 | steps: 22 | - name: Checkout code 23 | uses: actions/checkout@v4 24 | 25 | - name: Set up Python 26 | uses: actions/setup-python@v5 27 | with: 28 | python-version: '3.11' 29 | 30 | - name: Setup uv 31 | uses: astral-sh/setup-uv@v5 32 | with: 33 | version: "0.5.18" 34 | 35 | - name: Install all dependencies 36 | run: uv sync --extra progress 37 | 38 | - name: Run Coverage 39 | run: make coverage 40 | 41 | - name: Upload test results to Codecov 42 | uses: codecov/test-results-action@v1 43 | with: 44 | file: ./junit.xml 45 | token: ${{ secrets.CODECOV_TOKEN }} 46 | 47 | - name: Upload coverage to Codecov 48 | uses: codecov/codecov-action@v5 49 | with: 50 | files: ./coverage.xml 51 | token: ${{ secrets.CODECOV_TOKEN }} 52 | verbose: true 53 | 54 | python-checks: 55 | 56 | runs-on: ubuntu-latest 57 | strategy: 58 | matrix: 59 | python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] 60 | 61 | steps: 62 | - name: Checkout code 63 | uses: actions/checkout@v4 64 | 65 | - name: Setup uv 66 | uses: astral-sh/setup-uv@v5 67 | with: 68 | version: "0.5.18" 69 | python-version: ${{ matrix.python-version }} 70 | 71 | - name: Install dependencies 72 | run: uv sync 73 | 74 | - name: Static tests 75 | run: make static-tests 76 | 77 | - name: Unit tests with minimal dependencies 78 | run: make unit-tests 79 | 80 | - name: Unit tests with extra dependencies 81 | run: | 82 | uv sync --extra progress 83 | make unit-tests 84 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__/ 3 | 4 | # Distribution / packaging 5 | build/ 6 | dist/ 7 | 8 | # Unit test / coverage reports 9 | .coverage 10 | coverage.xml 11 | junit.xml 12 | .pytest_cache/ 13 | .mypy_cache/ 14 | .ruff_cache 15 | .pdm-build 16 | 17 | # Sphinx documentation 18 | docs/_build/ 19 | docs/source/_build 20 | 21 | # Environments 22 | .venv 23 | 24 | # IDE 25 | .vscode* 26 | 27 | # Others 28 | .DS_Store 29 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | When contributing to this repository, please first discuss the change you wish to make via issue, 4 | email, or any other method with the maintainers of this repository before making a change. 5 | 6 | Please note we have a code of conduct, please follow it in all your interactions with the project. 7 | 8 | ## Contributing to the Codebase 9 | 10 | The code is hosted on [GitHub](https://github.com/dlite-tools/aws-s3-tools), 11 | so you will need to use [Git](http://git-scm.com/) to fork and clone the project, 12 | and make changes to the codebase. Once you have obtained a copy of the code, 13 | you should create a development environment that is separate from your existing 14 | Python environment so that you can make and test changes without compromising your 15 | own work environment. 16 | 17 | ### Creating a Python environment 18 | 19 | To create an isolated development environment: 20 | 21 | * Install [uv](https://docs.astral.sh/uv/) 22 | * Make sure that you have cloned the repository 23 | * Go to the project source directory 24 | * Build environment. Run `uv sync` 25 | 26 | ### Run the test suite locally 27 | 28 | Before submitting your changes for review, make sure to check that your changes 29 | do not break any tests by running: 30 | 31 | ```shell 32 | make static-tests 33 | make unit-tests 34 | ``` 35 | 36 | Do not forget to create new tests to cover the code alterations. 37 | 38 | ### Pull Request Process 39 | 40 | 1. Ensure any install or build dependencies are removed before the end of the layer when doing a build. 41 | 2. Update the [README.md](README) with details of changes to the interface, this includes new environment variables, exposed ports, useful file locations and container parameters. 42 | 3. Increase the version numbers in any examples files and the [README.md](README) to the new version that this Pull Request would represent. The versioning scheme we use is [semantic versioning](http://semver.org/). 43 | 44 | ## Code of Conduct 45 | 46 | ### Our Pledge 47 | 48 | In the interest of fostering an open and welcoming environment, we as 49 | contributors and maintainers pledge to making participation in our project and 50 | our community a harassment-free experience for everyone, regardless of age, body 51 | size, disability, ethnicity, gender identity and expression, level of experience, 52 | nationality, personal appearance, race, religion, or sexual identity and 53 | orientation. 54 | 55 | ### Our Standards 56 | 57 | Examples of behavior that contributes to creating a positive environment 58 | include: 59 | 60 | * Using welcoming and inclusive language 61 | * Being respectful of differing viewpoints and experiences 62 | * Gracefully accepting constructive criticism 63 | * Focusing on what is best for the community 64 | * Showing empathy towards other community members 65 | 66 | Examples of unacceptable behavior by participants include: 67 | 68 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 69 | * Trolling, insulting/derogatory comments, and personal or political attacks 70 | * Public or private harassment 71 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 72 | * Other conduct which could reasonably be considered inappropriate in a professional setting 73 | 74 | ### Our Responsibilities 75 | 76 | Project maintainers are responsible for clarifying the standards of acceptable 77 | behavior and are expected to take appropriate and fair corrective action in 78 | response to any instances of unacceptable behavior. 79 | 80 | Project maintainers have the right and responsibility to remove, edit, or 81 | reject comments, commits, code, wiki edits, issues, and other contributions 82 | that are not aligned to this Code of Conduct, or to ban temporarily or 83 | permanently any contributor for other behaviors that they deem inappropriate, 84 | threatening, offensive, or harmful. 85 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Daniel Ferrari 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PACKAGE=s3_tools 2 | UNIT_TESTS=tests/unit 3 | 4 | static-tests: 5 | ###### Running linter analysis ###### 6 | uv run ruff check $(PACKAGE) $(UNIT_TESTS) 7 | 8 | ###### Running static type analysis ###### 9 | uv run mypy $(PACKAGE) $(UNIT_TESTS) 10 | 11 | unit-tests: 12 | ###### Running unit tests ###### 13 | uv run pytest -v $(UNIT_TESTS) 14 | 15 | coverage: 16 | ###### Running coverage analysis ###### 17 | uv run pytest --cov=$(PACKAGE) --cov-report=term-missing --cov-report=xml --junitxml=junit.xml 18 | 19 | build-docs: 20 | ###### Build documentation ###### 21 | uv run make -C docs html 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AWS S3 Tools 2 | 3 | ![MIT License](https://img.shields.io/pypi/l/aws-s3-tools) 4 | [![Test](https://github.com/dlite-tools/aws-s3-tools/actions/workflows/test.yml/badge.svg)](https://github.com/dlite-tools/aws-s3-tools/actions/workflows/test.yml) 5 | [![codecov](https://codecov.io/gh/dlite-tools/aws-s3-tools/branch/main/graph/badge.svg?token=YRM26tZexs)](https://codecov.io/gh/dlite-tools/aws-s3-tools) 6 | ![Documentation Status](https://readthedocs.org/projects/aws-s3-tools/badge/?version=latest) 7 | ![Package Version](https://img.shields.io/pypi/v/aws-s3-tools) 8 | ![Python Version](https://img.shields.io/pypi/pyversions/aws-s3-tools) 9 | 10 | AWS S3 Tools is a Python package to make it easier to interact with S3 objects, where you can: 11 | 12 | - List S3 bucket content 13 | - Check if an S3 object exists 14 | - Download/upload S3 objects to/from local files 15 | - Read/write S3 objects into/from Python variables 16 | - Delete/move/copy S3 objects 17 | 18 | The AWS S3 authentication is done via boto3 package, via environment variables, aws config file, or parameters. 19 | All S3 objects functions, in this package, have the option to set AWS Session authentication by passing the following dictionary on the `aws_auth` parameter, with the schema below (not all field are required). 20 | To understand more about AWS authentication mechanism, [read boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html). 21 | 22 | ```python 23 | aws_auth = { 24 | 'region_name': 'REGION', 25 | 'aws_access_key_id': 'ACCESS_KEY', 26 | 'aws_secret_access_key': 'SECRET_KEY', 27 | 'aws_session_token': 'SESSION_TOKEN', 28 | 'profile_name': 'PROFILE_NAME' 29 | } 30 | ``` 31 | 32 | --- 33 | 34 | ## Installation 35 | 36 | You can install AWS S3 Tools from PyPi with `pip` or your favorite package manager: 37 | 38 | ```shell 39 | pip install aws-s3-tools 40 | ``` 41 | 42 | Add the ``-U`` switch to update to the current version, if AWS S3 Tools is already installed. 43 | 44 | If you want to use the **progress bar** feature when downloading or uploading, 45 | you need to install an extra dependency. 46 | 47 | ```shell 48 | pip install aws-s3-tools[progress] 49 | ``` 50 | 51 | --- 52 | 53 | ## Usage 54 | 55 | [The full documentation can be found here](https://aws-s3-tools.readthedocs.io/en/latest/index.html). 56 | 57 | ```python 58 | from s3_tools import object_exists 59 | 60 | if object_exists("my-bucket", "s3-prefix/object.data"): 61 | # Your code goes here 62 | else: 63 | print("Object not found") 64 | ``` 65 | 66 | Example to use the progress bar: 67 | 68 | ```python 69 | from s3_tools import upload_folder_to_prefix 70 | 71 | result = upload_folder_to_prefix( 72 | bucket='dlite-tools', 73 | prefix='aws-s3-tools', 74 | search_str='*.py', 75 | threads=2, 76 | folder='s3_tools', 77 | show_progress=True 78 | ) 79 | ``` 80 | 81 | Progress bar when running the code above: 82 | 83 | ![Progress bar gif](docs/source/demo.gif) 84 | 85 | --- 86 | 87 | ## Contributions 88 | 89 | All contributions, bug reports, bug fixes, documentation improvements, 90 | enhancements and ideas are welcome. 91 | 92 | A detailed overview on how to contribute can be found in the 93 | [contributing guide](CONTRIBUTING.md) 94 | on GitHub. 95 | 96 | --- 97 | 98 | ## Issues 99 | 100 | Go [here](https://github.com/dlite-tools/aws-s3-tools/issues) to submit feature 101 | requests or bugfixes. 102 | 103 | --- 104 | 105 | ## License and Credits 106 | 107 | `AWS S3 Tools` is licensed under the [MIT license](LICENSE) and is written and 108 | maintained by: 109 | 110 | - Daniel Ferrari ([@FerrariDG](https://github.com/FerrariDG)) 111 | - Carlos Alves ([@cmalves](https://github.com/cmalves)) 112 | - Tomás Osório ([@tomassosorio](https://github.com/tomassosorio/)) 113 | 114 | --- 115 | 116 | ## Acknowledgement 117 | 118 | The idea from these functions come from an amazing team that I worked with. This repo is a refactor and documentation to make this public to everyone. 119 | 120 | Many thanks to: 121 | 122 | - [Anabela Nogueira](https://www.linkedin.com/in/abnogueira/) 123 | - [Carlos Alves](https://www.linkedin.com/in/carlosmalves/) 124 | - [João Machado](https://www.linkedin.com/in/machadojpf/) 125 | - [Renato Dantas](https://www.linkedin.com/in/renatomoura/) 126 | - [Ricardo Garcia](https://www.linkedin.com/in/ricardo-g-oliveira/) 127 | - [Tomás Osório](https://www.linkedin.com/in/tomas-osorio/) 128 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/source/buckets.rst: -------------------------------------------------------------------------------- 1 | S3 Buckets 2 | ========== 3 | 4 | All functionalities to deal with AWS S3 Buckets. 5 | 6 | Check 7 | ----- 8 | 9 | .. automodule:: s3_tools.buckets.check 10 | :members: 11 | :undoc-members: 12 | :show-inheritance: 13 | 14 | Create 15 | ------ 16 | 17 | .. automodule:: s3_tools.buckets.create 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | 22 | Delete 23 | ------ 24 | 25 | .. automodule:: s3_tools.buckets.delete 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | 30 | List 31 | ---- 32 | 33 | .. automodule:: s3_tools.buckets.list 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | 13 | import os 14 | import sys 15 | 16 | import toml 17 | 18 | sys.path.insert(0, os.path.abspath('../..')) 19 | 20 | # -- Project information ----------------------------------------------------- 21 | 22 | with open('../../pyproject.toml') as f: 23 | pyproject = toml.load(f) 24 | 25 | project = pyproject['project']['name'] 26 | copyright = pyproject['tool']['aws-s3-tools']['copyright'] 27 | author = pyproject['project']['authors'][0]['name'] 28 | 29 | # The full version, including alpha/beta/rc tags 30 | release = pyproject['project']['version'] 31 | 32 | 33 | # -- General configuration --------------------------------------------------- 34 | 35 | # Add any Sphinx extension module names here, as strings. They can be 36 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 37 | # ones. 38 | extensions = [ 39 | 'sphinx.ext.autodoc', 40 | 'sphinx.ext.viewcode', 41 | 'sphinx.ext.napoleon', 42 | 'sphinx.ext.autosummary' 43 | ] 44 | 45 | # Add any paths that contain templates here, relative to this directory. 46 | templates_path = ['_templates'] 47 | 48 | # List of patterns, relative to source directory, that match files and 49 | # directories to ignore when looking for source files. 50 | # This pattern also affects html_static_path and html_extra_path. 51 | exclude_patterns = [] 52 | 53 | 54 | # -- Options for HTML output ------------------------------------------------- 55 | 56 | # The theme to use for HTML and HTML Help pages. See the documentation for 57 | # a list of builtin themes. 58 | # 59 | html_theme = 'sphinx_rtd_theme' 60 | 61 | # Add any paths that contain custom static files (such as style sheets) here, 62 | # relative to this directory. They are copied after the builtin static files, 63 | # so a file named "default.css" will overwrite the builtin "default.css". 64 | html_static_path = ['_static'] 65 | 66 | # doc types to build 67 | sphinx_enable_epub_build = False 68 | sphinx_enable_pdf_build = False 69 | exclude_patterns = ["_build", "Thumbs.db", ".*", "~*", "*~", "*#"] 70 | -------------------------------------------------------------------------------- /docs/source/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlite-tools/aws-s3-tools/d329642a44749d3a22d600a3dba7bbd24efa21b4/docs/source/demo.gif -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. aws-s3-tools documentation master file, created by 2 | sphinx-quickstart on Tue Feb 16 18:45:12 2021. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to AWS S3 Tools Documentation 7 | ===================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 1 11 | :caption: Contents 12 | 13 | introduction.rst 14 | buckets.rst 15 | objects.rst 16 | 17 | 18 | Indices and tables 19 | ================== 20 | 21 | * :ref:`genindex` 22 | * :ref:`modindex` 23 | * :ref:`search` 24 | -------------------------------------------------------------------------------- /docs/source/introduction.rst: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============ 3 | 4 | AWS S3 Tools is a Python package to make it easier to interact with S3 objects, where you can: 5 | 6 | - List S3 bucket content 7 | - Check if an S3 object exists 8 | - Retrieve an S3 object metadata 9 | - Download/upload S3 objects to/from local files 10 | - Read/write S3 objects into/from Python variables 11 | - Delete/Move S3 objects 12 | 13 | The AWS S3 authentication is done via boto3 package, via environment variables, aws config file, or parameters. 14 | All S3 objects functions, in this package, have the option to set AWS Session authentication by passing the following dictionary on the `aws_auth` parameter, with the schema below (not all field are required). 15 | To understand more about AWS authentication mechanism, `read boto3 documentation `_. 16 | 17 | .. code-block:: python 18 | 19 | aws_auth = { 20 | 'region_name': 'REGION', 21 | 'aws_access_key_id': 'ACCESS_KEY', 22 | 'aws_secret_access_key': 'SECRET_KEY', 23 | 'aws_session_token': 'SESSION_TOKEN', 24 | 'profile_name': 'PROFILE_NAME', 25 | } 26 | 27 | Installation 28 | ------------ 29 | 30 | You can install AWS S3 Tools from PyPi with `pip` or your favorite package manager:: 31 | 32 | pip install aws-s3-tools 33 | 34 | Add the ``-U`` switch to update to the current version, if AWS S3 Tools is already installed. 35 | 36 | If you want to use the **progress bar** feature when downloading or uploading, you need to install an extra dependency:: 37 | 38 | pip install aws-s3-tools[progress] 39 | 40 | 41 | Usage 42 | ----- 43 | 44 | Simple example: 45 | 46 | .. code-block:: python 47 | 48 | from s3_tools import object_exists 49 | 50 | if object_exists("my-bucket", "s3-prefix/object.data"): 51 | # Do magic 52 | pass 53 | else: 54 | print("Object not found") 55 | 56 | Using the progress bar: 57 | 58 | .. code-block:: python 59 | 60 | from s3_tools import upload_folder_to_prefix 61 | 62 | result = upload_folder_to_prefix( 63 | bucket='dlite-tools', 64 | prefix='aws-s3-tools', 65 | search_str='*.py', 66 | threads=2, 67 | folder='s3_tools', 68 | show_progress=True 69 | ) 70 | 71 | .. image:: ./demo.gif 72 | :alt: Animated GIF with progress bar 73 | -------------------------------------------------------------------------------- /docs/source/objects.rst: -------------------------------------------------------------------------------- 1 | S3 Objects 2 | ========== 3 | 4 | All functionalities to deal with AWS S3 Objects. 5 | 6 | Check 7 | ----- 8 | 9 | .. automodule:: s3_tools.objects.check 10 | :members: 11 | :undoc-members: 12 | :show-inheritance: 13 | 14 | Copy 15 | ----- 16 | 17 | .. automodule:: s3_tools.objects.copy 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | 22 | Delete 23 | ------ 24 | 25 | .. automodule:: s3_tools.objects.delete 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | 30 | Download 31 | -------- 32 | 33 | .. automodule:: s3_tools.objects.download 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | List 39 | ---- 40 | 41 | .. automodule:: s3_tools.objects.list 42 | :members: 43 | :undoc-members: 44 | :show-inheritance: 45 | 46 | Move 47 | ---- 48 | 49 | .. automodule:: s3_tools.objects.move 50 | :members: 51 | :undoc-members: 52 | :show-inheritance: 53 | 54 | Presigned URL 55 | ------------- 56 | 57 | .. automodule:: s3_tools.objects.presigned_url 58 | :members: 59 | :undoc-members: 60 | :show-inheritance: 61 | 62 | Read 63 | ---- 64 | 65 | .. automodule:: s3_tools.objects.read 66 | :members: 67 | :undoc-members: 68 | :show-inheritance: 69 | 70 | Upload 71 | ------ 72 | 73 | .. automodule:: s3_tools.objects.upload 74 | :members: 75 | :undoc-members: 76 | :show-inheritance: 77 | 78 | Write 79 | ----- 80 | 81 | .. automodule:: s3_tools.objects.write 82 | :members: 83 | :undoc-members: 84 | :show-inheritance: 85 | -------------------------------------------------------------------------------- /docs/source/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 == 1.24.2 2 | ujson == 5.4.0 3 | toml == 0.10.2 4 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["pdm-backend"] 3 | build-backend = "pdm.backend" 4 | 5 | [project] 6 | name = "aws-s3-tools" 7 | version = "0.3.3" 8 | description = "AWS S3 tools package" 9 | readme = "README.md" 10 | requires-python = "<4.0,>=3.8" 11 | authors = [ 12 | {name = "Daniel Ferrari"}, 13 | ] 14 | maintainers = [ 15 | {name = "Daniel Ferrari, Carlos Alves, Tomás Osório"}, 16 | ] 17 | classifiers = [ 18 | "Development Status :: 5 - Production/Stable", 19 | "Intended Audience :: Developers", 20 | "License :: Freely Distributable", 21 | "License :: OSI Approved :: MIT License", 22 | "Natural Language :: English", 23 | "Programming Language :: Python :: 3.8", 24 | "Programming Language :: Python :: 3.9", 25 | "Programming Language :: Python :: 3.10", 26 | "Programming Language :: Python :: 3.11", 27 | "Programming Language :: Python :: 3.12", 28 | "Topic :: Software Development :: Libraries :: Python Modules", 29 | "Topic :: Utilities", 30 | ] 31 | keywords = [ 32 | "aws", 33 | "s3", 34 | "tools", 35 | "package", 36 | ] 37 | license = {text = "MIT"} 38 | dependencies = [ 39 | "boto3<2.0,>=1.35", 40 | "ujson<6,>=5", 41 | ] 42 | 43 | [project.urls] 44 | homepage = "https://github.com/dlite-tools/aws-s3-tools" 45 | repository = "https://github.com/dlite-tools/aws-s3-tools" 46 | documentation = "https://aws-s3-tools.readthedocs.io/en/latest/index.html" 47 | 48 | [project.optional-dependencies] 49 | progress = [ 50 | "rich<14,>=13", 51 | ] 52 | 53 | [dependency-groups] 54 | dev = [ 55 | "moto[s3]", 56 | "mypy", 57 | "pytest", 58 | "pytest-cov", 59 | "requests", 60 | "ruff", 61 | "sphinx", 62 | "sphinx-rtd-theme", 63 | "toml", 64 | "types-requests", 65 | "types-ujson", 66 | ] 67 | 68 | [tool.pdm.build] 69 | includes = ["s3_tools"] 70 | 71 | [tool.ruff] 72 | line-length = 120 73 | indent-width = 4 74 | target-version = "py311" 75 | src = ["s3_tools", "tests/unit"] 76 | 77 | [tool.ruff.format] 78 | quote-style = "double" 79 | indent-style = "space" 80 | 81 | [tool.ruff.lint] 82 | select = [ 83 | "D", # docstring 84 | "F", # flake8 85 | "E", # pycodestyle 86 | "W", # pycodestyle 87 | "I001", # isort 88 | "C90", # mccabe (complexity) 89 | ] 90 | 91 | [tool.ruff.lint.pydocstyle] 92 | convention = "numpy" 93 | 94 | [tool.ruff.lint.per-file-ignores] 95 | "__init__.py" = ["F401", "D104"] 96 | "test_*.py" = ["D10"] 97 | "conftest.py" = ["D10"] 98 | 99 | [tool.ruff.lint.mccabe] 100 | max-complexity = 10 101 | 102 | [tool.mypy] 103 | python_version = "3.11" 104 | ignore_missing_imports = true 105 | warn_unused_configs = true 106 | check_untyped_defs = true 107 | allow_redefinition = true 108 | strict_optional = false 109 | 110 | [tool.pytest.ini_options] 111 | filterwarnings = "ignore::DeprecationWarning" 112 | 113 | [tool.coverage.run] 114 | branch = true 115 | source = ["s3_tools"] 116 | 117 | [tool.aws-s3-tools] 118 | copyright = "2021, Daniel Ferrari" 119 | -------------------------------------------------------------------------------- /readthedocs.yaml: -------------------------------------------------------------------------------- 1 | 2 | version: 2 3 | sphinx: 4 | configuration: docs/source/conf.py 5 | build: 6 | image: latest 7 | python: 8 | version: 3.7 9 | install: 10 | - requirements: docs/source/requirements.txt 11 | -------------------------------------------------------------------------------- /s3_tools/__init__.py: -------------------------------------------------------------------------------- 1 | """AWS S3 Tools.""" 2 | from s3_tools.buckets.check import ( 3 | bucket_exists, 4 | ) 5 | from s3_tools.buckets.create import ( 6 | create_bucket, 7 | ) 8 | from s3_tools.buckets.delete import ( 9 | delete_bucket, 10 | ) 11 | from s3_tools.buckets.list import ( 12 | list_buckets, 13 | ) 14 | from s3_tools.objects.check import ( 15 | object_exists, 16 | object_metadata, 17 | ) 18 | from s3_tools.objects.copy import ( 19 | copy_keys, 20 | copy_object, 21 | copy_prefix, 22 | ) 23 | from s3_tools.objects.delete import ( 24 | delete_keys, 25 | delete_object, 26 | delete_prefix, 27 | ) 28 | from s3_tools.objects.download import ( 29 | download_key_to_file, 30 | download_keys_to_files, 31 | download_prefix_to_folder, 32 | ) 33 | from s3_tools.objects.list import ( 34 | list_objects, 35 | ) 36 | from s3_tools.objects.move import ( 37 | move_keys, 38 | move_object, 39 | ) 40 | from s3_tools.objects.presigned_url import ( 41 | get_presigned_download_url, 42 | get_presigned_upload_url, 43 | get_presigned_url, 44 | ) 45 | from s3_tools.objects.read import ( 46 | read_object_to_bytes, 47 | read_object_to_dict, 48 | read_object_to_text, 49 | ) 50 | from s3_tools.objects.upload import ( 51 | upload_file_to_key, 52 | upload_files_to_keys, 53 | upload_folder_to_prefix, 54 | ) 55 | from s3_tools.objects.write import ( 56 | write_object_from_bytes, 57 | write_object_from_dict, 58 | write_object_from_text, 59 | ) 60 | -------------------------------------------------------------------------------- /s3_tools/buckets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlite-tools/aws-s3-tools/d329642a44749d3a22d600a3dba7bbd24efa21b4/s3_tools/buckets/__init__.py -------------------------------------------------------------------------------- /s3_tools/buckets/check.py: -------------------------------------------------------------------------------- 1 | """Check S3 bucket.""" 2 | from typing import Dict 3 | 4 | import boto3 5 | from botocore.exceptions import ClientError 6 | 7 | 8 | def bucket_exists(bucket: str, aws_auth: Dict[str, str] = {}) -> bool: 9 | """Check if a bucket exists. 10 | 11 | Parameters 12 | ---------- 13 | bucket : str 14 | Bucket name to be checked. 15 | 16 | aws_auth: Dict[str, str] 17 | Contains AWS credentials, by default is empty. 18 | 19 | Returns 20 | ------- 21 | bool 22 | True if the bucket exists, otherwise False. 23 | 24 | Raises 25 | ------ 26 | Exception 27 | Any problem with the request is raised. 28 | 29 | Example 30 | ------- 31 | >>> bucket_exists("myBucket") 32 | True 33 | """ 34 | session = boto3.session.Session(**aws_auth) 35 | s3 = session.client("s3") 36 | 37 | try: 38 | s3.head_bucket(Bucket=bucket) 39 | except Exception as error: 40 | if isinstance(error, ClientError) and (error.response["Error"]["Code"] == "404"): 41 | return False 42 | 43 | raise error # Raise anything different from Not Found 44 | 45 | return True 46 | -------------------------------------------------------------------------------- /s3_tools/buckets/create.py: -------------------------------------------------------------------------------- 1 | """Create S3 Bucket.""" 2 | from typing import Dict 3 | 4 | import boto3 5 | 6 | 7 | def create_bucket(name: str, configs: Dict[str, str] = {}, aws_auth: Dict[str, str] = {}) -> bool: 8 | """Create an S3 bucket. 9 | 10 | Parameters 11 | ---------- 12 | name : str 13 | Name of the bucket to create. 14 | 15 | configs : Dict[str, str] 16 | Bucket configurations, by default is empty. 17 | To know more about it check boto3 documentation. 18 | 19 | aws_auth : Dict[str, str] 20 | Contains AWS credentials, by default is empty. 21 | 22 | Returns 23 | ------- 24 | bool 25 | True if the bucket was created, False otherwise. 26 | 27 | Examples 28 | -------- 29 | >>> create_bucket("myBucket") 30 | True 31 | 32 | """ 33 | session = boto3.session.Session(**aws_auth) 34 | s3 = session.client("s3") 35 | 36 | response = s3.create_bucket(Bucket=name, **configs) 37 | 38 | return response['ResponseMetadata']['HTTPStatusCode'] == 200 39 | -------------------------------------------------------------------------------- /s3_tools/buckets/delete.py: -------------------------------------------------------------------------------- 1 | """Delete S3 bucket.""" 2 | from typing import Dict 3 | 4 | import boto3 5 | from botocore.exceptions import ClientError 6 | 7 | 8 | def delete_bucket(name: str, aws_auth: Dict[str, str] = {}) -> bool: 9 | """Delete an S3 bucket. 10 | 11 | Parameters 12 | ---------- 13 | name : str 14 | Name of the bucket to delete. 15 | 16 | aws_auth : Dict[str, str], optional 17 | Contains AWS credentials, by default {} 18 | 19 | Returns 20 | ------- 21 | bool 22 | True if the bucket was deleted, False otherwise. 23 | 24 | Raises 25 | ------ 26 | Exception 27 | Any problem with the request is raised. 28 | 29 | Examples 30 | -------- 31 | >>> delete_bucket("myBucket") 32 | True 33 | 34 | """ 35 | session = boto3.session.Session(**aws_auth) 36 | s3 = session.client("s3") 37 | 38 | try: 39 | response = s3.delete_bucket(Bucket=name) 40 | except Exception as error: 41 | if isinstance(error, ClientError) and (error.response["Error"]["Code"] == "NoSuchBucket"): 42 | return False 43 | else: 44 | raise error 45 | 46 | return response['ResponseMetadata']['HTTPStatusCode'] == 204 47 | -------------------------------------------------------------------------------- /s3_tools/buckets/list.py: -------------------------------------------------------------------------------- 1 | """List S3 Buckets.""" 2 | import fnmatch 3 | from typing import Dict, List, Optional 4 | 5 | import boto3 6 | 7 | 8 | def list_buckets(search_str: Optional[str] = None, aws_auth: Dict[str, str] = {}) -> List[str]: 9 | """Retrieve the list of buckets from AWS S3 filtered by search string. 10 | 11 | Parameters 12 | ---------- 13 | search_str: str 14 | Basic search string to filter out buckets on result (uses Unix shell-style wildcards), by default is None. 15 | For more about the search check "fnmatch" package. 16 | 17 | aws_auth: Dict[str, str] 18 | Contains AWS credentials, by default is empty. 19 | 20 | Returns 21 | ------- 22 | List[str] 23 | List of bucket names filtered. 24 | 25 | Examples 26 | -------- 27 | >>> list_buckets() 28 | [ "myRawData", "myProcessedData", "myFinalData"] 29 | 30 | >>> list_buckets(search_str="*Raw*") 31 | [ "myRawData" ] 32 | 33 | """ 34 | session = boto3.session.Session(**aws_auth) 35 | s3 = session.client("s3") 36 | 37 | response = s3.list_buckets() 38 | 39 | buckets = [bucket["Name"] for bucket in response["Buckets"]] 40 | 41 | return buckets if not search_str else fnmatch.filter(buckets, search_str) 42 | -------------------------------------------------------------------------------- /s3_tools/objects/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlite-tools/aws-s3-tools/d329642a44749d3a22d600a3dba7bbd24efa21b4/s3_tools/objects/__init__.py -------------------------------------------------------------------------------- /s3_tools/objects/check.py: -------------------------------------------------------------------------------- 1 | """Check objects on S3 bucket.""" 2 | from pathlib import Path 3 | from typing import ( 4 | Any, 5 | Dict, 6 | Union, 7 | ) 8 | 9 | import boto3 10 | from botocore.exceptions import ClientError 11 | 12 | 13 | def object_exists(bucket: str, key: Union[str, Path], aws_auth: Dict[str, str] = {}) -> bool: 14 | """Check if an object exists for a given bucket and key. 15 | 16 | Parameters 17 | ---------- 18 | bucket : str 19 | Bucket name where the object is stored. 20 | 21 | key : Union[str, Path] 22 | Full key for the object. 23 | 24 | aws_auth: Dict[str, str] 25 | Contains AWS credentials, by default is empty. 26 | 27 | Returns 28 | ------- 29 | bool 30 | True if the object exists, otherwise False. 31 | 32 | Raises 33 | ------ 34 | Exception 35 | Any problem with the request is raised. 36 | 37 | Example 38 | ------- 39 | >>> object_exists("myBucket", "myFiles/music.mp3") 40 | True 41 | """ 42 | session = boto3.session.Session(**aws_auth) 43 | s3 = session.client("s3") 44 | 45 | try: 46 | s3.head_object(Bucket=bucket, Key=Path(key).as_posix()) 47 | except Exception as error: 48 | if isinstance(error, ClientError) and (error.response["Error"]["Code"] == "404"): 49 | return False 50 | 51 | raise error # Raise anything different from Not Found 52 | 53 | return True 54 | 55 | 56 | def object_metadata(bucket: str, key: Union[str, Path], aws_auth: Dict[str, str] = {}) -> Dict[str, Any]: 57 | """Get metadata from an S3 object. 58 | 59 | Parameters 60 | ---------- 61 | bucket : str 62 | Bucket name where the object is stored. 63 | 64 | key : Union[str, Path] 65 | Full key for the object. 66 | 67 | aws_auth: Dict[str, str] 68 | Contains AWS credentials, by default is empty. 69 | 70 | Returns 71 | ------- 72 | Dict[str, Any] 73 | Metadata from the object. 74 | 75 | Raises 76 | ------ 77 | Exception 78 | Any problem with the request is raised. 79 | 80 | Example 81 | ------- 82 | >>> object_metadata("myBucket", "myFiles/music.mp3") 83 | { 84 | 'ResponseMetadata': {}, 85 | 'AcceptRanges': 'bytes', 86 | 'LastModified': datetime.datetime(2020, 10, 31, 20, 46, 13, tzinfo=tzutc()), 87 | 'ContentLength': 123456, 88 | 'ETag': '"1234567890abcdef1234567890abcdef"', 89 | 'ContentType': 'audio/mpeg', 90 | 'Metadata': {} 91 | } 92 | """ 93 | session = boto3.session.Session(**aws_auth) 94 | s3 = session.client("s3") 95 | 96 | try: 97 | return s3.head_object(Bucket=bucket, Key=Path(key).as_posix()) 98 | except Exception as error: 99 | if isinstance(error, ClientError) and (error.response["Error"]["Code"] == "404"): 100 | return {} 101 | 102 | raise error # Raise anything different from Not Found 103 | -------------------------------------------------------------------------------- /s3_tools/objects/copy.py: -------------------------------------------------------------------------------- 1 | """Copy S3 objects.""" 2 | from concurrent import futures 3 | from pathlib import Path 4 | from typing import ( 5 | Dict, 6 | List, 7 | Optional, 8 | Tuple, 9 | Union, 10 | ) 11 | 12 | import boto3 13 | 14 | from s3_tools.objects.list import list_objects 15 | 16 | 17 | def copy_object( 18 | source_bucket: str, 19 | source_key: Union[str, Path], 20 | destination_bucket: str, 21 | destination_key: Union[str, Path], 22 | aws_auth: Dict[str, str] = {} 23 | ) -> None: 24 | """Copy S3 object from source bucket and key to destination. 25 | 26 | Parameters 27 | ---------- 28 | source_bucket : str 29 | S3 bucket where the object is stored. 30 | 31 | source_key : Union[str, Path] 32 | S3 key where the object is referenced. 33 | 34 | destination_bucket : str 35 | S3 destination bucket. 36 | 37 | destination_key : Union[str, Path] 38 | S3 destination key. 39 | 40 | aws_auth: Dict[str, str] 41 | Contains AWS credentials, by default is empty. 42 | 43 | Examples 44 | -------- 45 | >>> copy_object( 46 | ... source_bucket='bucket', 47 | ... source_key='myFiles/song.mp3', 48 | ... destination_bucket='bucket', 49 | ... destination_key='myMusic/song.mp3', 50 | ... ) 51 | 52 | """ 53 | session = boto3.session.Session(**aws_auth) 54 | s3 = session.resource("s3") 55 | 56 | s3.meta.client.copy( 57 | {'Bucket': source_bucket, 'Key': Path(source_key).as_posix()}, 58 | destination_bucket, 59 | Path(destination_key).as_posix() 60 | ) 61 | 62 | 63 | def copy_keys( 64 | source_bucket: str, 65 | source_keys: List[Union[str, Path]], 66 | destination_bucket: str, 67 | destination_keys: List[Union[str, Path]], 68 | threads: int = 5, 69 | aws_auth: Dict[str, str] = {} 70 | ) -> None: 71 | """Copy a list of S3 objects from source bucket to destination. 72 | 73 | Parameters 74 | ---------- 75 | source_bucket : str 76 | S3 bucket where the objects are stored. 77 | 78 | source_keys : List[Union[str, Path]] 79 | S3 keys where the objects are referenced. 80 | 81 | destination_bucket : str 82 | S3 destination bucket. 83 | 84 | destination_keys : List[Union[str, Path]] 85 | S3 destination keys. 86 | 87 | threads : int, optional 88 | Number of parallel uploads, by default 5. 89 | 90 | aws_auth: Dict[str, str] 91 | Contains AWS credentials, by default is empty. 92 | 93 | Raises 94 | ------ 95 | IndexError 96 | When the source_keys and destination_keys have different length. 97 | 98 | ValueError 99 | When the keys list is empty. 100 | 101 | Examples 102 | -------- 103 | >>> copy_keys( 104 | ... source_bucket='bucket', 105 | ... source_keys=[ 106 | ... 'myFiles/song.mp3', 107 | ... Path('myFiles/photo.jpg'), 108 | ... ], 109 | ... destination_bucket='bucket', 110 | ... destination_keys=[ 111 | ... Path('myMusic/song.mp3'), 112 | ... 'myPhotos/photo.jpg', 113 | ... ] 114 | ... ) 115 | 116 | """ 117 | if len(source_keys) != len(destination_keys): 118 | raise IndexError("Key lists must have the same length") 119 | 120 | if len(source_keys) == 0: 121 | raise ValueError("Key list length must be greater than zero") 122 | 123 | with futures.ThreadPoolExecutor(max_workers=threads) as executor: 124 | executors = ( 125 | executor.submit(copy_object, source_bucket, source, destination_bucket, destination, aws_auth) 126 | for source, destination in zip(source_keys, destination_keys) 127 | ) 128 | 129 | for ex in executors: 130 | ex.result() 131 | 132 | 133 | def copy_prefix( 134 | source_bucket: str, 135 | source_prefix: Union[str, Path], 136 | destination_bucket: str, 137 | change_prefix: Optional[Tuple[Union[str, Path], Union[str, Path]]] = None, 138 | filter_keys: Optional[str] = None, 139 | threads: int = 5, 140 | aws_auth: Dict[str, str] = {} 141 | ) -> None: 142 | """Copy S3 objects from source bucket to destination based on prefix filter. 143 | 144 | Parameters 145 | ---------- 146 | source_bucket : str 147 | S3 bucket where the objects are stored. 148 | 149 | source_prefix : Union[str, Path] 150 | S3 prefix where the objects are referenced. 151 | 152 | destination_bucket : str 153 | S3 destination bucket. 154 | 155 | change_prefix : Tuple[Union[str, Path], Union[str, Path]], optional 156 | Text to be replaced in keys prefixes, by default is None. 157 | The first element is the text to be replaced, the second is the replacement text. 158 | 159 | filter_keys : str, optional 160 | Basic search string to filter out keys on result (uses Unix shell-style wildcards), by default is None. 161 | For more about the search check "fnmatch" package. 162 | 163 | threads : int, optional 164 | Number of parallel uploads, by default 5. 165 | 166 | aws_auth: Dict[str, str] 167 | Contains AWS credentials, by default is empty. 168 | 169 | Examples 170 | -------- 171 | >>> copy_prefix( 172 | ... source_bucket='MyBucket', 173 | ... source_prefix='myFiles', 174 | ... destination_bucket='OtherBucket', 175 | ... filter_keys='*images*', 176 | ... change_prefix=('myFiles', 'backup') 177 | ... ) 178 | 179 | """ 180 | source_keys = list_objects( 181 | bucket=source_bucket, 182 | prefix=source_prefix, 183 | search_str=filter_keys, 184 | aws_auth=aws_auth 185 | ) 186 | 187 | destination_keys = source_keys if change_prefix is None else [ 188 | Path(key).as_posix().replace( 189 | Path(change_prefix[0]).as_posix(), 190 | Path(change_prefix[1]).as_posix() 191 | ) 192 | for key in source_keys 193 | ] 194 | 195 | copy_keys( 196 | source_bucket=source_bucket, 197 | source_keys=source_keys, 198 | destination_bucket=destination_bucket, 199 | destination_keys=destination_keys, 200 | threads=threads, 201 | aws_auth=aws_auth 202 | ) 203 | -------------------------------------------------------------------------------- /s3_tools/objects/delete.py: -------------------------------------------------------------------------------- 1 | """Delete objects from S3 bucket.""" 2 | from pathlib import Path 3 | from typing import ( 4 | Dict, 5 | List, 6 | Optional, 7 | Union, 8 | ) 9 | 10 | import boto3 11 | 12 | from s3_tools.objects.list import list_objects 13 | 14 | 15 | def delete_object(bucket: str, key: Union[str, Path], aws_auth: Dict[str, str] = {}) -> None: 16 | """Delete a given object from S3 bucket. 17 | 18 | Parameters 19 | ---------- 20 | bucket: str 21 | AWS S3 bucket where the object is stored. 22 | 23 | key: Union[str, Path] 24 | Key for the object that will be deleted. 25 | 26 | aws_auth: Dict[str, str] 27 | Contains AWS credentials, by default is empty. 28 | 29 | Examples 30 | -------- 31 | >>> delete_object(bucket="myBucket", key="myData/myFile.data") 32 | 33 | """ 34 | session = boto3.session.Session(**aws_auth) 35 | s3 = session.client("s3") 36 | s3.delete_object(Bucket=bucket, Key=Path(key).as_posix()) 37 | 38 | 39 | def delete_prefix( 40 | bucket: str, 41 | prefix: Union[str, Path], 42 | dry_run: bool = True, 43 | aws_auth: Dict[str, str] = {} 44 | ) -> Optional[List[Union[str, Path]]]: 45 | """Delete all objects under the given prefix from S3 bucket. 46 | 47 | Parameters 48 | ---------- 49 | bucket: str 50 | AWS S3 bucket where the objects are stored. 51 | 52 | prefix: Union[str, Path] 53 | Prefix where the objects are under. 54 | 55 | dry_run: bool 56 | If True will not delete the objects. 57 | 58 | aws_auth: Dict[str, str] 59 | Contains AWS credentials, by default is empty. 60 | 61 | Returns 62 | ------- 63 | List[Union[str, Path]] 64 | List of S3 keys to be deleted if dry_run True, else None. 65 | 66 | Examples 67 | -------- 68 | >>> delete_prefix(bucket="myBucket", prefix="myData") 69 | [ 70 | "myData/myMusic/awesome.mp3", 71 | "myData/myDocs/paper.doc" 72 | ] 73 | 74 | >>> delete_prefix(bucket="myBucket", prefix=Path("myData"), dry_run=False) 75 | 76 | """ 77 | keys = list_objects(bucket, prefix, aws_auth=aws_auth) 78 | 79 | if dry_run: 80 | return [key for key in keys] 81 | 82 | for key in keys: 83 | delete_object(bucket, key, aws_auth) 84 | 85 | return None 86 | 87 | 88 | def delete_keys(bucket: str, keys: List[Union[str, Path]], dry_run: bool = True, aws_auth: Dict[str, str] = {}) -> None: 89 | """Delete all objects in the keys list from S3 bucket. 90 | 91 | Parameters 92 | ---------- 93 | bucket: str 94 | AWS S3 bucket where the objects are stored. 95 | 96 | keys: List[Union[str, Path]] 97 | List of object keys. 98 | 99 | dry_run: bool 100 | If True will not delete the objects. 101 | 102 | aws_auth: Dict[str, str] 103 | Contains AWS credentials, by default is empty. 104 | 105 | Examples 106 | -------- 107 | >>> delete_keys( 108 | ... bucket="myBucket", 109 | ... keys=[ 110 | ... "myData/myMusic/awesome.mp3", 111 | ... Path("myData/myDocs/paper.doc") 112 | ... ], 113 | ... dry_run=False 114 | ... ) 115 | 116 | """ 117 | if dry_run: 118 | return 119 | 120 | for key in keys: 121 | delete_object(bucket, key, aws_auth) 122 | -------------------------------------------------------------------------------- /s3_tools/objects/download.py: -------------------------------------------------------------------------------- 1 | """Download S3 objects to files.""" 2 | from concurrent import futures 3 | from pathlib import Path 4 | from typing import ( 5 | Any, 6 | Dict, 7 | List, 8 | Optional, 9 | Tuple, 10 | Union, 11 | ) 12 | 13 | import boto3 14 | 15 | from s3_tools.objects.list import list_objects 16 | from s3_tools.utils import ( 17 | _create_progress_bar, 18 | _get_future_output, 19 | ) 20 | 21 | 22 | def download_key_to_file( 23 | bucket: str, 24 | key: Union[str, Path], 25 | local_filename: Union[str, Path], 26 | progress=None, # type: ignore # No import if extra not installed 27 | task_id: int = -1, 28 | aws_auth: Dict[str, str] = {}, 29 | extra_args: Dict[str, str] = {}, 30 | ) -> bool: 31 | """Retrieve one object from AWS S3 bucket and store into local disk. 32 | 33 | Parameters 34 | ---------- 35 | bucket: str 36 | AWS S3 bucket where the object is stored. 37 | 38 | key: Union[str, Path] 39 | Key where the object is stored. 40 | 41 | local_filename: Union[str, Path] 42 | Local file where the data will be downloaded to. 43 | 44 | progress: rich.Progress 45 | Instance of a rich Progress bar, by default None. 46 | 47 | task_id: int 48 | Task ID on the progress bar to be updated, by default -1. 49 | 50 | aws_auth: Dict[str, str] 51 | Contains AWS credentials, by default is empty. 52 | 53 | extra_args: Dict[str, str] 54 | Extra arguments to be passed to the boto3 download_file method, by default is empty. 55 | Allowed download arguments: 56 | https://boto3.amazonaws.com/v1/documentation/api/latest/reference/customizations/s3.html#boto3.s3.transfer.S3Transfer.ALLOWED_DOWNLOAD_ARGS 57 | 58 | Returns 59 | ------- 60 | bool 61 | True if the local file exists. 62 | 63 | Examples 64 | -------- 65 | >>> download_key_to_file( 66 | ... bucket="myBucket", 67 | ... key="myData/myFile.data", 68 | ... local_filename="theFile.data", 69 | ... ) 70 | True 71 | 72 | """ 73 | session = boto3.session.Session(**aws_auth) 74 | s3 = session.client("s3") 75 | Path(local_filename).parent.mkdir(parents=True, exist_ok=True) 76 | s3.download_file( 77 | Bucket=bucket, 78 | Key=Path(key).as_posix(), 79 | Filename=Path(local_filename).as_posix(), 80 | ExtraArgs=extra_args, 81 | ) 82 | if progress: 83 | progress.update(task_id, advance=1) 84 | return Path(local_filename).exists() 85 | 86 | 87 | def download_keys_to_files( 88 | bucket: str, 89 | keys_paths: List[Tuple[Union[str, Path], Union[str, Path]]], 90 | threads: int = 5, 91 | show_progress: bool = False, 92 | aws_auth: Dict[str, str] = {}, 93 | as_paths: bool = False, 94 | default_extra_args: Dict[str, str] = {}, 95 | extra_args_per_key: List[Dict[str, str]] = [], 96 | ) -> List[Tuple[Union[str, Path], Union[str, Path], Any]]: 97 | """Download list of objects to specific paths. 98 | 99 | Parameters 100 | ---------- 101 | bucket: str 102 | AWS S3 bucket where the objects are stored. 103 | 104 | keys_paths: List[Tuple[Union[str, Path], Union[str, Path]]] 105 | List with a tuple of S3 key to be downloaded and local path to be stored. 106 | e.g. [ 107 | ("S3_Key", "Local_Path"), 108 | (Path("S3_Key"), "Local_Path"), 109 | ("S3_Key", Path("Local_Path")), 110 | (Path("S3_Key"), Path("Local_Path")), 111 | ] 112 | 113 | threads: int 114 | Number of parallel downloads, by default 5. 115 | 116 | show_progress: bool 117 | Show progress bar on console, by default False. 118 | (Need to install extra [progress] to be used) 119 | 120 | aws_auth: Dict[str, str] 121 | Contains AWS credentials, by default is empty. 122 | 123 | as_paths: bool 124 | If True, the keys are returned as Path objects, otherwise as strings, by default is False. 125 | 126 | default_extra_args: Dict[str, str] 127 | Extra arguments to be passed to the boto3 download_file method, by default is empty. 128 | The extra arguments will be applied to all S3 keys. 129 | 130 | extra_args_per_key: List[Dict[str, str]] 131 | Extra arguments to be passed for each S3 key to the boto3 download_file method, by default is empty. 132 | The default extra arguments will be merged with the extra arguments passed for each key. 133 | 134 | Returns 135 | ------- 136 | List[Tuple] 137 | A list with tuples formed by the "S3_Key", "Local_Path", and the result of the download. 138 | If successful will have True, if not will contain the error message. 139 | Attention, the output list may not follow the same input order. 140 | 141 | Examples 142 | -------- 143 | >>> download_keys_to_files( 144 | ... bucket="myBucket", 145 | ... keys_paths=[ 146 | ... ("myData/myFile.data", "MyFiles/myFile.data"), 147 | ... ("myData/myMusic/awesome.mp3", "MyFiles/myMusic/awesome.mp3"), 148 | ... ("myData/myDocs/paper.doc", "MyFiles/myDocs/paper.doc"), 149 | ... ] 150 | ... ) 151 | [ 152 | ("myData/myMusic/awesome.mp3", "MyFiles/myMusic/awesome.mp3", True), 153 | ("myData/myDocs/paper.doc", "MyFiles/myDocs/paper.doc", True), 154 | ("myData/myFile.data", "MyFiles/myFile.data", True), 155 | ] 156 | 157 | """ 158 | if len(extra_args_per_key) != 0 and len(extra_args_per_key) != len(keys_paths): 159 | raise ValueError("The length of extra_args_per_key must be the same as keys_paths.") 160 | 161 | extra_arguments = [{}] * len(keys_paths) if len(extra_args_per_key) == 0 else extra_args_per_key 162 | 163 | if show_progress: 164 | progress, task_id = _create_progress_bar("Downloading", len(keys_paths)) 165 | progress.start() 166 | progress.start_task(task_id) 167 | else: 168 | progress, task_id = None, -1 169 | 170 | with futures.ThreadPoolExecutor(max_workers=threads) as executor: 171 | # Create a dictionary to map the future execution with the (S3 key, Local filename) 172 | # dict = {future: values} 173 | executions = { 174 | executor.submit( 175 | download_key_to_file, 176 | bucket, 177 | s3_key, 178 | filename, 179 | progress, 180 | task_id, 181 | aws_auth, 182 | {**default_extra_args, **extra_args}, 183 | ): {"s3": s3_key, "fn": filename} 184 | for (s3_key, filename), extra_args in zip(keys_paths, extra_arguments) 185 | } 186 | 187 | output = [ 188 | (executions[future]["s3"], executions[future]["fn"], _get_future_output(future)) 189 | for future in futures.as_completed(executions) 190 | ] 191 | 192 | if show_progress: 193 | progress.stop() 194 | 195 | if as_paths: 196 | output = [(Path(key), Path(fn), result) for key, fn, result in output] 197 | else: 198 | output = [(Path(key).as_posix(), Path(fn).as_posix(), result) for key, fn, result in output] 199 | 200 | return output 201 | 202 | 203 | def download_prefix_to_folder( 204 | bucket: str, 205 | prefix: Union[str, Path], 206 | folder: Union[str, Path], 207 | search_str: Optional[str] = None, 208 | remove_prefix: bool = True, 209 | threads: int = 5, 210 | show_progress: bool = False, 211 | aws_auth: Dict[str, str] = {}, 212 | as_paths: bool = False, 213 | default_extra_args: Dict[str, str] = {}, 214 | ) -> List[Tuple[Union[str, Path], Union[str, Path], Any]]: 215 | """Download objects to local folder. 216 | 217 | Function to retrieve all files under a prefix on S3 and store them into local folder. 218 | 219 | Parameters 220 | ---------- 221 | bucket: str 222 | AWS S3 bucket where the objects are stored. 223 | 224 | prefix: Union[str, Path] 225 | Prefix where the objects are under. 226 | 227 | folder: Union[str, Path] 228 | Local folder path where files will be stored. 229 | 230 | search_str: str 231 | Basic search string to filter out keys on result (uses Unix shell-style wildcards), by default is None. 232 | For more about the search check "fnmatch" package. 233 | 234 | remove_prefix: bool 235 | If True will remove the the prefix when writing to local folder. 236 | The remaining "folders" on the key will be created on the local folder. 237 | 238 | threads: int 239 | Number of parallel downloads, by default 5. 240 | 241 | show_progress: bool 242 | Show progress bar on console, by default False. 243 | (Need to install extra [progress] to be used) 244 | 245 | aws_auth: Dict[str, str] 246 | Contains AWS credentials, by default is empty. 247 | 248 | as_paths: bool 249 | If True, the keys are returned as Path objects, otherwise as strings, by default is False. 250 | 251 | default_extra_args: Dict[str, str] 252 | Extra arguments to be passed to the boto3 download_file method, by default is empty. 253 | The extra arguments will be applied to all S3 keys. 254 | 255 | Returns 256 | ------- 257 | List[Tuple] 258 | A list with tuples formed by the "S3_Key", "Local_Path", and the result of the download. 259 | If successful will have True, if not will contain the error message. 260 | 261 | Examples 262 | -------- 263 | >>> download_prefix_to_folder( 264 | ... bucket="myBucket", 265 | ... prefix="myData", 266 | ... folder="myFiles", 267 | ... ) 268 | [ 269 | ("myData/myFile.data", "MyFiles/myFile.data", True), 270 | ("myData/myMusic/awesome.mp3", "MyFiles/myMusic/awesome.mp3", True), 271 | ("myData/myDocs/paper.doc", "MyFiles/myDocs/paper.doc", True), 272 | ] 273 | 274 | """ 275 | s3_keys = list_objects( 276 | bucket=bucket, 277 | prefix=prefix, 278 | search_str=search_str, 279 | aws_auth=aws_auth, 280 | as_paths=as_paths, 281 | ) 282 | 283 | keys_paths: List[Tuple[Union[str, Path], Union[str, Path]]] = [( 284 | key, 285 | "{}/{}".format( 286 | Path(folder).as_posix(), 287 | Path(key).as_posix().replace(Path(prefix).as_posix(), "")[1:] if remove_prefix else key 288 | ) 289 | ) for key in s3_keys] 290 | 291 | return download_keys_to_files(bucket, keys_paths, threads, show_progress, aws_auth, as_paths, default_extra_args) 292 | -------------------------------------------------------------------------------- /s3_tools/objects/list.py: -------------------------------------------------------------------------------- 1 | """List S3 bucket objects.""" 2 | import fnmatch 3 | from pathlib import Path 4 | from typing import ( 5 | Dict, 6 | List, 7 | Optional, 8 | Union, 9 | ) 10 | 11 | import boto3 12 | 13 | 14 | def list_objects( 15 | bucket: str, 16 | prefix: Union[str, Path] = "", 17 | search_str: Optional[str] = None, 18 | max_keys: int = 1000, 19 | aws_auth: Dict[str, str] = {}, 20 | as_paths: bool = False, 21 | ) -> List[Union[str, Path]]: 22 | """Retrieve the list of objects from AWS S3 bucket under a given prefix and search string. 23 | 24 | Parameters 25 | ---------- 26 | bucket: str 27 | AWS S3 bucket where the objects are stored. 28 | 29 | prefix: Union[str, Path] 30 | Prefix where the objects are under. 31 | 32 | search_str: str 33 | Basic search string to filter out keys on result (uses Unix shell-style wildcards), by default is None. 34 | For more about the search check "fnmatch" package. 35 | 36 | max_keys: int 37 | Max number of keys to have pagination. 38 | 39 | aws_auth: Dict[str, str] 40 | Contains AWS credentials, by default is empty. 41 | 42 | as_paths: bool 43 | If True, the keys are returned as Path objects, otherwise as strings, by default is False. 44 | 45 | Returns 46 | ------- 47 | List[Union[str, Path]] 48 | List of keys inside the bucket, under the path, and filtered. 49 | 50 | Examples 51 | -------- 52 | >>> list_objects(bucket="myBucket", prefix="myData") 53 | [ 54 | "myData/myFile.data", 55 | "myData/myMusic/awesome.mp3", 56 | "myData/myDocs/paper.doc" 57 | ] 58 | 59 | >>> list_objects(bucket="myBucket", prefix="myData", search_str="*paper*", as_paths=True) 60 | [ 61 | Path("myData/myDocs/paper.doc") 62 | ] 63 | 64 | """ 65 | continuation_token: Optional[str] = None 66 | keys = [] 67 | 68 | session = boto3.session.Session(**aws_auth) 69 | s3 = session.client("s3") 70 | 71 | while True: 72 | list_kwargs = { 73 | "Bucket": bucket, 74 | "Prefix": Path(prefix).as_posix(), 75 | "MaxKeys": max_keys 76 | } 77 | if continuation_token: 78 | list_kwargs["ContinuationToken"] = continuation_token 79 | 80 | response = s3.list_objects_v2(**list_kwargs) 81 | if "Contents" in response: 82 | keys.extend([obj["Key"] for obj in response["Contents"]]) 83 | 84 | if not response.get("NextContinuationToken"): 85 | break 86 | 87 | continuation_token = response.get("NextContinuationToken") 88 | 89 | if isinstance(search_str, str): 90 | keys = fnmatch.filter(keys, search_str) 91 | 92 | return keys if not as_paths else [Path(key) for key in keys] 93 | -------------------------------------------------------------------------------- /s3_tools/objects/move.py: -------------------------------------------------------------------------------- 1 | """Move S3 objects.""" 2 | from concurrent import futures 3 | from pathlib import Path 4 | from typing import ( 5 | Dict, 6 | List, 7 | Union, 8 | ) 9 | 10 | import boto3 11 | 12 | from s3_tools.objects.delete import delete_object 13 | 14 | 15 | def move_object( 16 | source_bucket: str, 17 | source_key: Union[str, Path], 18 | destination_bucket: str, 19 | destination_key: Union[str, Path], 20 | aws_auth: Dict[str, str] = {}, 21 | ) -> None: 22 | """Move S3 object from source bucket and key to destination. 23 | 24 | Parameters 25 | ---------- 26 | source_bucket : str 27 | S3 bucket where the object is stored. 28 | 29 | source_key : Union[str, Path] 30 | S3 key where the object is referenced. 31 | 32 | destination_bucket : str 33 | S3 destination bucket. 34 | 35 | destination_key : Union[str, Path] 36 | S3 destination key. 37 | 38 | aws_auth: Dict[str, str] 39 | Contains AWS credentials, by default is empty. 40 | 41 | Examples 42 | -------- 43 | >>> move_object( 44 | ... source_bucket='bucket', 45 | ... source_key='myFiles/song.mp3', 46 | ... destination_bucket='bucket', 47 | ... destination_key='myMusic/song.mp3', 48 | ... ) 49 | 50 | """ 51 | session = boto3.session.Session(**aws_auth) 52 | s3 = session.resource("s3") 53 | 54 | s3.meta.client.copy( 55 | {'Bucket': source_bucket, 'Key': Path(source_key).as_posix()}, 56 | destination_bucket, 57 | Path(destination_key).as_posix(), 58 | ) 59 | 60 | delete_object(source_bucket, source_key, aws_auth) 61 | 62 | 63 | def move_keys( 64 | source_bucket: str, 65 | source_keys: List[Union[str, Path]], 66 | destination_bucket: str, 67 | destination_keys: List[Union[str, Path]], 68 | threads: int = 5, 69 | aws_auth: Dict[str, str] = {}, 70 | ) -> None: 71 | """Move a list of S3 objects from source bucket to destination. 72 | 73 | Parameters 74 | ---------- 75 | source_bucket : str 76 | S3 bucket where the objects are stored. 77 | 78 | source_keys : List[Union[str, Path]] 79 | S3 keys where the objects are referenced. 80 | 81 | destination_bucket : str 82 | S3 destination bucket. 83 | 84 | destination_keys : List[Union[str, Path]] 85 | S3 destination keys. 86 | 87 | threads : int, optional 88 | Number of parallel uploads, by default 5. 89 | 90 | aws_auth: Dict[str, str] 91 | Contains AWS credentials, by default is empty. 92 | 93 | Raises 94 | ------ 95 | IndexError 96 | When the source_keys and destination_keys have different length. 97 | 98 | ValueError 99 | When the keys list is empty. 100 | 101 | Examples 102 | -------- 103 | >>> move_keys( 104 | ... source_bucket='bucket', 105 | ... source_keys=[ 106 | ... 'myFiles/song.mp3', 107 | ... 'myFiles/photo.jpg', 108 | ... ], 109 | ... destination_bucket='bucket', 110 | ... destination_keys=[ 111 | ... 'myMusic/song.mp3', 112 | ... 'myPhotos/photo.jpg', 113 | ... ], 114 | ... ) 115 | 116 | """ 117 | if len(source_keys) != len(destination_keys): 118 | raise IndexError("Key lists must have the same length") 119 | 120 | if len(source_keys) == 0: 121 | raise ValueError("Key list length must be greater than zero") 122 | 123 | with futures.ThreadPoolExecutor(max_workers=threads) as executor: 124 | executors = ( 125 | executor.submit(move_object, source_bucket, source, destination_bucket, destination, aws_auth) 126 | for source, destination in zip(source_keys, destination_keys) 127 | ) 128 | 129 | for ex in executors: 130 | ex.result() 131 | -------------------------------------------------------------------------------- /s3_tools/objects/presigned_url.py: -------------------------------------------------------------------------------- 1 | """Create presigned URL for S3 bucket objects.""" 2 | from pathlib import Path 3 | from typing import ( 4 | Dict, 5 | Optional, 6 | Union, 7 | ) 8 | 9 | import boto3 10 | 11 | 12 | def get_presigned_url( 13 | client_method: str, 14 | method_parameters: Optional[dict] = None, 15 | http_method: Optional[str] = None, 16 | expiration: int = 300, 17 | aws_auth: Dict[str, str] = {}, 18 | ) -> str: 19 | """Generate a presigned URL to invoke an S3.Client method. 20 | 21 | Parameters 22 | ---------- 23 | client_method: str 24 | Name of the S3.Client method, e.g., 'list_buckets'. 25 | 26 | method_parameters: Optional[dict] 27 | Dictionary of parameters to send to the method. 28 | 29 | expiration: int 30 | Time in seconds for the presigned URL to remain valid, default 5 minutes. 31 | 32 | http_method: Optional[str] 33 | HTTP method to use, e.g., GET, POST. If not specified, will automatically be select the appropriate method. 34 | 35 | aws_auth: Dict[str, str] 36 | Contains AWS credentials, by default is empty. 37 | 38 | Returns 39 | ------- 40 | str 41 | Presigned URL. 42 | 43 | Raises 44 | ------ 45 | Exception 46 | Any problem with the request is raised. 47 | 48 | Examples 49 | -------- 50 | >>> get_presigned_url( 51 | ... client_method='list_objects', 52 | ... method_parameters={'Bucket': 'myBucket'}, 53 | ... ) 54 | https://myBucket.s3.amazonaws.com/?encoding-type=url&AWSAccessKeyId=ASI&Signature=5JLAcSKQ%3D&x-amz-security-token=FwoGZXIvY%&Expires=1646759818 55 | 56 | """ 57 | session = boto3.session.Session(**aws_auth) 58 | s3 = session.client("s3") 59 | 60 | try: 61 | response = s3.generate_presigned_url( 62 | ClientMethod=client_method, 63 | Params=method_parameters, 64 | ExpiresIn=expiration, 65 | HttpMethod=http_method, 66 | ) 67 | except Exception as error: 68 | raise error 69 | 70 | return response 71 | 72 | 73 | def get_presigned_download_url( 74 | bucket: str, 75 | key: Union[str, Path], 76 | expiration: int = 300, 77 | aws_auth: Dict[str, str] = {}, 78 | ) -> str: 79 | """Generate a presigned URL to download an S3 object. 80 | 81 | Parameters 82 | ---------- 83 | bucket: str 84 | AWS S3 bucket where the object is stored. 85 | 86 | key: Union[str, Path] 87 | Key for the object that will be downloaded. 88 | 89 | expiration: int 90 | Time in seconds for the presigned URL to remain valid, default 5 minutes. 91 | 92 | aws_auth: Dict[str, str] 93 | Contains AWS credentials, by default is empty. 94 | 95 | Returns 96 | ------- 97 | str 98 | Presigned URL. 99 | 100 | Raises 101 | ------ 102 | Exception 103 | Any problem with the request is raised. 104 | 105 | Examples 106 | -------- 107 | >>> import requests # To install: pip install requests 108 | >>> url = get_presigned_download_url( 109 | ... bucket='myBucket', 110 | ... key='myData/myFile.data', 111 | ... ) 112 | >>> response = requests.get(url) 113 | 114 | """ 115 | return get_presigned_url( 116 | client_method='get_object', 117 | method_parameters={'Bucket': bucket, 'Key': Path(key).as_posix()}, 118 | expiration=expiration, 119 | aws_auth=aws_auth, 120 | ) 121 | 122 | 123 | def get_presigned_upload_url( 124 | bucket: str, 125 | key: Union[str, Path], 126 | fields: Optional[dict] = None, 127 | conditions: Optional[list] = None, 128 | expiration: int = 300, 129 | aws_auth: Dict[str, str] = {}, 130 | ) -> dict: 131 | """Generate a presigned URL S3 POST request to upload a file. 132 | 133 | Parameters 134 | ---------- 135 | bucket: str 136 | AWS S3 bucket where the object will be stored. 137 | 138 | key: Union[str, Path] 139 | Key for the object that will will be stored. 140 | 141 | fields: Optional[dict] 142 | Dictionary of prefilled form fields. 143 | 144 | conditions: Optional[list] 145 | List of conditions to include in the policy. 146 | 147 | expiration: int 148 | Time in seconds for the presigned URL to remain valid, default 5 minutes. 149 | 150 | aws_auth: Dict[str, str] 151 | Contains AWS credentials, by default is empty. 152 | 153 | Returns 154 | ------- 155 | dict 156 | A dictionary with two elements: url and fields. 157 | Url is the url to post to. 158 | Fields is a dictionary filled with the form fields and respective values to use when submitting the post. 159 | 160 | Raises 161 | ------ 162 | Exception 163 | Any problem with the request is raised. 164 | 165 | Examples 166 | -------- 167 | >>> import requests # To install: pip install requests 168 | >>> response = get_presigned_upload_url( 169 | ... bucket='myBucket', 170 | ... key='myData/myFile.data', 171 | ... ) 172 | >>> with open('myFile.data', 'rb') as f: 173 | ... files = {'file': ('myFile.data', f)} 174 | ... http_response = requests.post(response['url'], data=response['fields'], files=files) 175 | 176 | """ 177 | if key is None: 178 | raise AttributeError("Key is required.") 179 | 180 | session = boto3.session.Session(**aws_auth) 181 | s3 = session.client("s3") 182 | 183 | try: 184 | response = s3.generate_presigned_post( 185 | Bucket=bucket, 186 | Key=Path(key).as_posix(), 187 | Fields=fields, 188 | Conditions=conditions, 189 | ExpiresIn=expiration, 190 | ) 191 | except Exception as error: 192 | raise error 193 | 194 | return response 195 | -------------------------------------------------------------------------------- /s3_tools/objects/read.py: -------------------------------------------------------------------------------- 1 | """Read S3 objects into variables.""" 2 | from pathlib import Path 3 | from typing import ( 4 | Any, 5 | Dict, 6 | Union, 7 | ) 8 | 9 | import boto3 10 | import ujson 11 | 12 | 13 | def read_object_to_bytes(bucket: str, key: Union[str, Path], aws_auth: Dict[str, str] = {}) -> bytes: 14 | """Retrieve one object from AWS S3 bucket as a byte array. 15 | 16 | Parameters 17 | ---------- 18 | bucket: str 19 | AWS S3 bucket where the object is stored. 20 | 21 | key: Union[str, Path] 22 | Key where the object is stored. 23 | 24 | aws_auth: Dict[str, str] 25 | Contains AWS credentials, by default is empty. 26 | 27 | Returns 28 | ------- 29 | bytes 30 | Object content as bytes. 31 | 32 | Examples 33 | -------- 34 | >>> read_object_to_bytes( 35 | ... bucket="myBucket", 36 | ... key="myData/myFile.data", 37 | ... ) 38 | b"The file content" 39 | 40 | """ 41 | session = boto3.session.Session(**aws_auth) 42 | s3 = session.client("s3") 43 | obj = s3.get_object(Bucket=bucket, Key=Path(key).as_posix()) 44 | 45 | return obj["Body"].read() 46 | 47 | 48 | def read_object_to_text(bucket: str, key: Union[str, Path], aws_auth: Dict[str, str] = {}) -> str: 49 | """Retrieve one object from AWS S3 bucket as a string. 50 | 51 | Parameters 52 | ---------- 53 | bucket: str 54 | AWS S3 bucket where the object is stored. 55 | 56 | key: Union[str, Path] 57 | Key where the object is stored. 58 | 59 | aws_auth: Dict[str, str] 60 | Contains AWS credentials, by default is empty. 61 | 62 | Returns 63 | ------- 64 | str 65 | Object content as string. 66 | 67 | Examples 68 | -------- 69 | >>> read_object_to_text( 70 | ... bucket="myBucket", 71 | ... key="myData/myFile.data" 72 | ... ) 73 | "The file content" 74 | 75 | """ 76 | data = read_object_to_bytes(bucket, key, aws_auth) 77 | return data.decode("utf-8") 78 | 79 | 80 | def read_object_to_dict(bucket: str, key: Union[str, Path], aws_auth: Dict[str, str] = {}) -> Dict[Any, Any]: 81 | """Retrieve one object from AWS S3 bucket as a dictionary. 82 | 83 | Parameters 84 | ---------- 85 | bucket: str 86 | AWS S3 bucket where the object is stored. 87 | 88 | key: Union[str, Path] 89 | Key where the object is stored. 90 | 91 | aws_auth: Dict[str, str] 92 | Contains AWS credentials, by default is empty. 93 | 94 | Returns 95 | ------- 96 | Dict[Any, Any] 97 | Object content as dictionary. 98 | 99 | Examples 100 | -------- 101 | >>> read_object_to_dict( 102 | ... bucket="myBucket", 103 | ... key="myData/myFile.json", 104 | ... ) 105 | {"key": "value", "1": "text"} 106 | 107 | """ 108 | data = read_object_to_bytes(bucket, key, aws_auth) 109 | return ujson.loads(data.decode("utf-8")) 110 | -------------------------------------------------------------------------------- /s3_tools/objects/upload.py: -------------------------------------------------------------------------------- 1 | """Upload files to S3 bucket.""" 2 | from concurrent import futures 3 | from pathlib import Path 4 | from typing import ( 5 | Any, 6 | Dict, 7 | List, 8 | Tuple, 9 | Union, 10 | ) 11 | 12 | import boto3 13 | 14 | from s3_tools.utils import ( 15 | _create_progress_bar, 16 | _get_future_output, 17 | ) 18 | 19 | 20 | def upload_file_to_key( 21 | bucket: str, 22 | key: Union[str, Path], 23 | local_filename: Union[str, Path], 24 | progress=None, # type: ignore # No import if extra not installed 25 | task_id: int = -1, 26 | aws_auth: Dict[str, str] = {}, 27 | extra_args: Dict[str, Any] = {}, 28 | ) -> str: 29 | """Upload one file from local disk and store into AWS S3 bucket. 30 | 31 | Parameters 32 | ---------- 33 | bucket: str 34 | AWS S3 bucket where the object will be stored. 35 | 36 | key: Union[str, Path] 37 | Key where the object will be stored. 38 | 39 | local_filename: Union[str, Path] 40 | Local file from where the data will be uploaded. 41 | 42 | progress: rich.Progress 43 | Instance of a rich Progress bar, by default None. 44 | 45 | task_id: int 46 | Task ID on the progress bar to be updated, by default -1. 47 | 48 | aws_auth: Dict[str, str] 49 | Contains AWS credentials, by default is empty. 50 | 51 | extra_args: Dict[str, Any] 52 | Extra arguments to be passed to the boto3 upload_file method, by default is empty. 53 | Allowed upload arguments: 54 | https://boto3.amazonaws.com/v1/documentation/api/latest/reference/customizations/s3.html#boto3.s3.transfer.S3Transfer.ALLOWED_UPLOAD_ARGS 55 | 56 | Returns 57 | ------- 58 | str 59 | The S3 full URL to the file. 60 | 61 | Examples 62 | -------- 63 | >>> upload_file_to_key( 64 | ... bucket="myBucket", 65 | ... key="myFiles/music.mp3", 66 | ... local_filename="files/music.mp3", 67 | ... ) 68 | http://s3.amazonaws.com/myBucket/myFiles/music.mp3 69 | 70 | """ 71 | session = boto3.session.Session(**aws_auth) 72 | s3 = session.client("s3") 73 | s3.upload_file( 74 | Bucket=bucket, 75 | Key=Path(key).as_posix(), 76 | Filename=Path(local_filename).as_posix(), 77 | ExtraArgs=extra_args, 78 | ) 79 | if progress: 80 | progress.update(task_id, advance=1) 81 | return "{}/{}/{}".format(s3.meta.endpoint_url, bucket, key) 82 | 83 | 84 | def upload_files_to_keys( 85 | bucket: str, 86 | paths_keys: List[Tuple[Union[str, Path], Union[str, Path]]], 87 | threads: int = 5, 88 | show_progress: bool = False, 89 | aws_auth: Dict[str, str] = {}, 90 | as_paths: bool = False, 91 | default_extra_args: Dict[str, str] = {}, 92 | extra_args_per_key: List[Dict[str, str]] = [], 93 | ) -> List[Tuple[Union[str, Path], Union[str, Path], Any]]: 94 | """Upload list of files to specific objects. 95 | 96 | Parameters 97 | ---------- 98 | bucket : str 99 | AWS S3 bucket where the objects will be stored. 100 | 101 | paths_keys : List[Tuple[Union[str, Path], Union[str, Path]]] 102 | List with a tuple of local path to be uploaded and S3 key destination. 103 | e.g. [("Local_Path", "S3_Key"), ("Local_Path", "S3_Key")] 104 | 105 | threads : int, optional 106 | Number of parallel uploads, by default 5. 107 | 108 | show_progress: bool 109 | Show progress bar on console, by default False. 110 | (Need to install extra [progress] to be used) 111 | 112 | aws_auth: Dict[str, str] 113 | Contains AWS credentials, by default is empty. 114 | 115 | as_paths: bool 116 | If True, the keys are returned as Path objects, otherwise as strings, by default is False. 117 | 118 | default_extra_args: Dict[str, str] 119 | Extra arguments to be passed to the boto3 upload_file method, by default is empty. 120 | The extra arguments will be applied to all S3 keys. 121 | 122 | extra_args_per_key: List[Dict[str, str]] 123 | Extra arguments to be passed for each S3 key to the boto3 upload_file method, by default is empty. 124 | The default extra arguments will be merged with the extra arguments passed for each key. 125 | 126 | Returns 127 | ------- 128 | List[Tuple[Union[str, Path], Union[str, Path], Any]] 129 | A list with tuples formed by the "Local_Path", "S3_Key", and the result of the upload. 130 | If successful will have True, if not will contain the error message. 131 | Attention, the output list may not follow the same input order. 132 | 133 | Raises 134 | ------ 135 | ValueError 136 | extra_args_per_key when used must have the same length of paths_keys. 137 | 138 | Examples 139 | -------- 140 | >>> upload_files_to_keys( 141 | ... bucket="myBucket", 142 | ... paths_keys=[ 143 | ... ("MyFiles/myFile.data", "myData/myFile.data"), 144 | ... ("MyFiles/myMusic/awesome.mp3", "myData/myMusic/awesome.mp3"), 145 | ... ("MyFiles/myDocs/paper.doc", "myData/myDocs/paper.doc"), 146 | ... ], 147 | ... ) 148 | [ 149 | ("MyFiles/myMusic/awesome.mp3", "myData/myMusic/awesome.mp3", True), 150 | ("MyFiles/myDocs/paper.doc", "myData/myDocs/paper.doc", True), 151 | ("MyFiles/myFile.data", "myData/myFile.data", True), 152 | ] 153 | 154 | """ 155 | if len(extra_args_per_key) != 0 and len(extra_args_per_key) != len(paths_keys): 156 | raise ValueError("The length of extra_args_per_key must be the same as paths_keys.") 157 | 158 | extra_arguments = [{}] * len(paths_keys) if len(extra_args_per_key) == 0 else extra_args_per_key 159 | 160 | if show_progress: 161 | progress, task_id = _create_progress_bar("Uploading", len(paths_keys)) 162 | progress.start() 163 | progress.start_task(task_id) 164 | else: 165 | progress, task_id = None, -1 166 | 167 | with futures.ThreadPoolExecutor(max_workers=threads) as executor: 168 | # Create a dictionary to map the future execution with the (S3 key, Local filename) 169 | # dict = {future: values} 170 | executions = { 171 | executor.submit( 172 | upload_file_to_key, 173 | bucket, 174 | s3_key, 175 | filename, 176 | progress, 177 | task_id, 178 | aws_auth, 179 | {**default_extra_args, **extra_args}, 180 | ): {"s3": s3_key, "fn": filename} 181 | for (filename, s3_key), extra_args in zip(paths_keys, extra_arguments) 182 | } 183 | 184 | output = [ 185 | (executions[future]["fn"], executions[future]["s3"], _get_future_output(future)) 186 | for future in futures.as_completed(executions) 187 | ] 188 | 189 | if show_progress: 190 | progress.stop() 191 | 192 | if as_paths: 193 | output = [(Path(key), Path(fn), result) for key, fn, result in output] 194 | 195 | return output 196 | 197 | 198 | def upload_folder_to_prefix( 199 | bucket: str, 200 | prefix: Union[str, Path], 201 | folder: Union[str, Path], 202 | search_str: str = "*", 203 | threads: int = 5, 204 | show_progress: bool = False, 205 | aws_auth: Dict[str, str] = {}, 206 | as_paths: bool = False, 207 | default_extra_args: Dict[str, str] = {}, 208 | ) -> List[Tuple[Union[str, Path], Union[str, Path], Any]]: 209 | """Upload local folder to a S3 prefix. 210 | 211 | Function to upload all files for a given folder (recursive) 212 | and store them into a S3 bucket under a prefix. 213 | The local folder structure will be replicated into S3. 214 | 215 | Parameters 216 | ---------- 217 | bucket : str 218 | AWS S3 bucket where the object will be stored. 219 | 220 | prefix : Union[str, Path] 221 | Prefix where the objects will be under. 222 | 223 | folder : Union[str, Path] 224 | Local folder path where files are stored. 225 | Prefer to use the full path for the folder. 226 | 227 | search_str : str. 228 | A match string to select all the files to upload, by default "*". 229 | The string follows the rglob function pattern from the pathlib package. 230 | 231 | threads : int, optional 232 | Number of parallel uploads, by default 5 233 | 234 | show_progress: bool 235 | Show progress bar on console, by default False. 236 | (Need to install extra [progress] to be used) 237 | 238 | aws_auth: Dict[str, str] 239 | Contains AWS credentials, by default is empty. 240 | 241 | as_paths: bool 242 | If True, the keys are returned as Path objects, otherwise as strings, by default is False. 243 | 244 | default_extra_args: Dict[str, str] 245 | Extra arguments to be passed to the boto3 upload_file method, by default is empty. 246 | The extra arguments will be applied to all S3 keys. 247 | 248 | Returns 249 | ------- 250 | List[Tuple[Union[str, Path], Union[str, Path], Any]] 251 | A list with tuples formed by the "Local_Path", "S3_Key", and the result of the upload. 252 | If successful will have True, if not will contain the error message. 253 | 254 | Examples 255 | -------- 256 | >>> upload_folder_to_prefix( 257 | ... bucket="myBucket", 258 | ... prefix="myFiles", 259 | ... folder="/usr/files", 260 | ... ) 261 | [ 262 | ("/usr/files/music.mp3", "myFiles/music.mp3", True), 263 | ("/usr/files/awesome.wav", "myFiles/awesome.wav", True), 264 | ("/usr/files/data/metadata.json", "myFiles/data/metadata.json", True), 265 | ] 266 | 267 | """ 268 | paths = [p for p in Path(folder).rglob(search_str) if p.is_file()] 269 | 270 | paths_keys: List[Tuple[Union[str, Path], Union[str, Path]]] = [ 271 | ( 272 | p.as_posix(), 273 | Path(prefix).joinpath(p.relative_to(Path(folder))).as_posix(), # S3 key 274 | ) 275 | for p in paths 276 | ] 277 | 278 | return upload_files_to_keys(bucket, paths_keys, threads, show_progress, aws_auth, as_paths, default_extra_args) 279 | -------------------------------------------------------------------------------- /s3_tools/objects/write.py: -------------------------------------------------------------------------------- 1 | """Write variables into S3 objects.""" 2 | import json 3 | from typing import Dict 4 | 5 | import boto3 6 | 7 | 8 | def write_object_from_bytes(bucket: str, key: str, data: bytes, aws_auth: Dict[str, str] = {}) -> str: 9 | """Upload a bytes object to an object into AWS S3 bucket. 10 | 11 | Parameters 12 | ---------- 13 | bucket: str 14 | AWS S3 bucket where the object will be stored. 15 | 16 | key: str 17 | Key where the object will be stored. 18 | 19 | data: bytes 20 | The object data to be uploaded to AWS S3. 21 | 22 | aws_auth: Dict[str, str] 23 | Contains AWS credentials, by default is empty. 24 | 25 | Returns 26 | ------- 27 | str 28 | The S3 full URL to the file. 29 | 30 | Raises 31 | ------ 32 | TypeError 33 | If data is not a bytes type. 34 | 35 | Examples 36 | -------- 37 | >>> data = bytes("String to bytes", "utf-8") 38 | >>> write_object_from_bytes( 39 | ... bucket="myBucket", 40 | ... key="myFiles/file.data", 41 | ... data=data 42 | ... ) 43 | http://s3.amazonaws.com/myBucket/myFiles/file.data 44 | 45 | """ 46 | if not isinstance(data, bytes): 47 | raise TypeError("Object data must be bytes type") 48 | 49 | session = boto3.session.Session(**aws_auth) 50 | s3 = session.client("s3") 51 | s3.put_object(Bucket=bucket, Key=key, Body=data) 52 | return "{}/{}/{}".format(s3.meta.endpoint_url, bucket, key) 53 | 54 | 55 | def write_object_from_text(bucket: str, key: str, data: str, aws_auth: Dict[str, str] = {}) -> str: 56 | """Upload a string to an object into AWS S3 bucket. 57 | 58 | Parameters 59 | ---------- 60 | bucket: str 61 | AWS S3 bucket where the object will be stored. 62 | 63 | key: str 64 | Key where the object will be stored. 65 | 66 | data: str 67 | The object data to be uploaded to AWS S3. 68 | 69 | aws_auth: Dict[str, str] 70 | Contains AWS credentials, by default is empty. 71 | 72 | Returns 73 | ------- 74 | str 75 | The S3 full URL to the file. 76 | 77 | Raises 78 | ------ 79 | TypeError 80 | If data is not a str type. 81 | 82 | Examples 83 | -------- 84 | >>> data = "A very very not so long text" 85 | >>> write_object_from_text( 86 | ... bucket="myBucket", 87 | ... key="myFiles/file.txt", 88 | ... data=data 89 | ... ) 90 | http://s3.amazonaws.com/myBucket/myFiles/file.txt 91 | 92 | """ 93 | if not isinstance(data, str): 94 | raise TypeError("Object data must be string type") 95 | 96 | return write_object_from_bytes(bucket, key, data.encode(), aws_auth) 97 | 98 | 99 | def write_object_from_dict(bucket: str, key: str, data: Dict, aws_auth: Dict[str, str] = {}) -> str: 100 | """Upload a dictionary to an object into AWS S3 bucket. 101 | 102 | Parameters 103 | ---------- 104 | bucket: str 105 | AWS S3 bucket where the object will be stored. 106 | 107 | key: str 108 | Key where the object will be stored. 109 | 110 | data: dict 111 | The object data to be uploaded to AWS S3. 112 | 113 | aws_auth: Dict[str, str] 114 | Contains AWS credentials, by default is empty. 115 | 116 | Returns 117 | ------- 118 | str 119 | The S3 full URL to the file. 120 | 121 | Raises 122 | ------ 123 | TypeError 124 | If `data` is not a dict type. 125 | 126 | Examples 127 | -------- 128 | >>> data = {"key": "value", "1": "text"} 129 | >>> write_object_from_dict( 130 | ... bucket="myBucket", 131 | ... key="myFiles/file.json", 132 | ... data=data 133 | ... ) 134 | http://s3.amazonaws.com/myBucket/myFiles/file.json 135 | 136 | """ 137 | if not isinstance(data, dict): 138 | raise TypeError("Object data must be dictionary type") 139 | 140 | return write_object_from_bytes(bucket, key, json.dumps(data).encode(), aws_auth) 141 | -------------------------------------------------------------------------------- /s3_tools/utils.py: -------------------------------------------------------------------------------- 1 | """General utilities.""" 2 | from concurrent import futures 3 | from typing import Any 4 | 5 | 6 | def _get_future_output(future: futures.Future) -> Any: 7 | """Get a futures.Future result or exception message. 8 | 9 | Parameters 10 | ---------- 11 | future : futures.Future 12 | An async callable method. 13 | 14 | Returns 15 | ------- 16 | Any 17 | If execution has no error will return the value returned by the callable method, 18 | else will return the exception message. 19 | """ 20 | try: 21 | return future.result() 22 | except Exception as e: 23 | return repr(e) 24 | 25 | 26 | def _create_progress_bar(description: str, length: int): 27 | """Create a console progress bar using 'rich' package. 28 | 29 | Parameters 30 | ---------- 31 | description : str 32 | Progress bar description. 33 | length : int 34 | Progress bar length. 35 | 36 | Returns 37 | ------- 38 | Tuple[Optional[Progress], Optional[int]] 39 | The progress bar object and the task ID associated. 40 | (Need to install extra [progress] to be used) 41 | """ 42 | try: 43 | from rich.progress import BarColumn, Progress, TextColumn 44 | 45 | progress = Progress( 46 | TextColumn("[bold blue]{task.description}", justify="right"), 47 | BarColumn(bar_width=None), 48 | TextColumn("[bold blue][{task.completed}/{task.total}]"), 49 | ) 50 | 51 | task_id = progress.add_task( 52 | description=description, 53 | total=length, 54 | start=False 55 | ) 56 | 57 | except ImportError: 58 | print("Missing extra dependency to use progress bar." 59 | " Please run 'pip install aws-s3-tools[progress]'.") 60 | raise 61 | 62 | return progress, task_id 63 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlite-tools/aws-s3-tools/d329642a44749d3a22d600a3dba7bbd24efa21b4/tests/__init__.py -------------------------------------------------------------------------------- /tests/resources/empty.data: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlite-tools/aws-s3-tools/d329642a44749d3a22d600a3dba7bbd24efa21b4/tests/resources/empty.data -------------------------------------------------------------------------------- /tests/resources/mock_file.csv: -------------------------------------------------------------------------------- 1 | purpose,known,found 2 | already,tide,ground 3 | sitting,ball,none 4 | meal,exciting,expect 5 | -------------------------------------------------------------------------------- /tests/unit/buckets/test_check_bucket.py: -------------------------------------------------------------------------------- 1 | """Unit tests for check bucket.""" 2 | import pytest 3 | from botocore.exceptions import ParamValidationError 4 | from s3_tools import bucket_exists 5 | from tests.unit.conftest import BUCKET_NAME, create_bucket 6 | 7 | 8 | class TestCheck: 9 | 10 | def test_check_nonexisting_bucket(self, s3_client): 11 | response = bucket_exists(BUCKET_NAME) 12 | 13 | assert response is False 14 | 15 | def test_check_existing_bucket(self, s3_client): 16 | with create_bucket(s3_client, BUCKET_NAME): 17 | response = bucket_exists(BUCKET_NAME) 18 | 19 | assert response is True 20 | 21 | def test_check_invalid_param(self, s3_client): 22 | with pytest.raises(ParamValidationError): 23 | bucket_exists("") 24 | -------------------------------------------------------------------------------- /tests/unit/buckets/test_create_bucket.py: -------------------------------------------------------------------------------- 1 | """Unit tests for create bucket.""" 2 | import pytest 3 | from botocore.exceptions import ParamValidationError 4 | from s3_tools import create_bucket 5 | from tests.unit.conftest import BUCKET_NAME 6 | 7 | 8 | class TestCreate: 9 | 10 | def test_create_bucket(self, s3_client): 11 | response = create_bucket(BUCKET_NAME) 12 | 13 | assert response is True 14 | 15 | def test_create_bucket_invalid_name(self, s3_client): 16 | 17 | with pytest.raises(ParamValidationError): 18 | create_bucket('') 19 | 20 | def test_create_duplicated_bucket(self, s3_client): 21 | create_bucket(BUCKET_NAME) 22 | 23 | response = create_bucket(BUCKET_NAME) 24 | 25 | assert response is True 26 | -------------------------------------------------------------------------------- /tests/unit/buckets/test_delete_bucket.py: -------------------------------------------------------------------------------- 1 | """Unit tests for create bucket.""" 2 | import pytest 3 | from botocore.exceptions import ParamValidationError 4 | from s3_tools import ( 5 | create_bucket, 6 | delete_bucket, 7 | ) 8 | from tests.unit.conftest import BUCKET_NAME 9 | 10 | 11 | class TestDelete: 12 | 13 | def test_delete_bucket(self, s3_client): 14 | 15 | create_bucket(BUCKET_NAME) 16 | response = delete_bucket(BUCKET_NAME) 17 | 18 | assert response is True 19 | 20 | def test_create_bucket_invalid_name(self, s3_client): 21 | 22 | with pytest.raises(ParamValidationError): 23 | delete_bucket('') 24 | 25 | def test_delete_nonexisting_bucket(self, s3_client): 26 | response = delete_bucket(BUCKET_NAME) 27 | 28 | assert response is False 29 | -------------------------------------------------------------------------------- /tests/unit/buckets/test_list_buckets.py: -------------------------------------------------------------------------------- 1 | """Unit tests for list buckets.""" 2 | from s3_tools import list_buckets 3 | from tests.unit.conftest import create_buckets 4 | 5 | 6 | class TestList: 7 | 8 | def test_list_buckets_empty(self, s3_client): 9 | buckets = list_buckets() 10 | 11 | assert len(buckets) == 0 12 | 13 | def test_list_buckets(self, s3_client): 14 | with create_buckets(s3_client, ['bucketA', 'bucketB', 'bucketC']): 15 | buckets = list_buckets() 16 | 17 | assert len(buckets) == 3 18 | 19 | def test_list_buckets_with_filter(self, s3_client): 20 | with create_buckets(s3_client, ['bucketA', 'bucketB', 'bucketC', 'ThisIsTheBucket']): 21 | buckets = list_buckets('*IsThe*') 22 | 23 | assert len(buckets) == 1 24 | -------------------------------------------------------------------------------- /tests/unit/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | from contextlib import contextmanager 3 | from pathlib import Path 4 | from typing import ( 5 | List, 6 | Union, 7 | ) 8 | 9 | import boto3 10 | import pytest 11 | from moto import mock_aws 12 | 13 | BUCKET_NAME = "mock" 14 | FILENAME = "tests/resources/mock_file.csv" 15 | EMPTY_FILE = "tests/resources/empty.data" 16 | 17 | 18 | @pytest.fixture(scope="module") 19 | def aws_credentials(): 20 | os.environ["AWS_DEFAULT_REGION"] = "us-east-1" 21 | os.environ["AWS_ACCESS_KEY_ID"] = "test" 22 | os.environ["AWS_SECRET_ACCESS_KEY"] = "test" 23 | os.environ["AWS_SECURITY_TOKEN"] = "test" 24 | os.environ["AWS_SESSION_TOKEN"] = "test" 25 | 26 | 27 | @pytest.yield_fixture(scope="module") 28 | def s3_client(aws_credentials): 29 | with mock_aws(): 30 | session = boto3.session.Session() 31 | s3 = session.client("s3") 32 | yield s3 33 | 34 | 35 | @contextmanager 36 | def create_buckets(s3_client, names): 37 | for name in names: 38 | s3_client.create_bucket(Bucket=name) 39 | yield 40 | for name in names: 41 | s3_client.delete_bucket(Bucket=name) 42 | 43 | 44 | @contextmanager 45 | def create_bucket(s3_client, bucket, key=None, data=None, keys_paths=[]): 46 | s3_client.create_bucket(Bucket=bucket) 47 | 48 | if key and data: 49 | s3_client.put_object(Bucket=bucket, Key=Path(key).as_posix(), Body=data) 50 | 51 | for key, fn in keys_paths: 52 | s3_client.upload_file( 53 | Bucket=bucket, 54 | Key=Path(key).as_posix(), 55 | Filename=Path(fn).as_posix() 56 | ) 57 | 58 | yield 59 | 60 | response = s3_client.list_objects_v2(Bucket=bucket) 61 | if "Contents" in response: 62 | for obj in response["Contents"]: 63 | s3_client.delete_object(Bucket=bucket, Key=obj["Key"]) 64 | 65 | s3_client.delete_bucket(Bucket=bucket) 66 | 67 | 68 | def create_files(as_path: bool = False) -> List[Union[str, Path]]: 69 | """Create folder structure. 70 | 71 | The function creates a folder structure with files under a path. 72 | 73 | Parameters 74 | ---------- 75 | as_path: bool 76 | If True, the keys are returned as Path objects, otherwise as strings, by default is False. 77 | 78 | 79 | Returns 80 | ------- 81 | List[Union[str, Path]] 82 | Path to all files/folders created. 83 | 84 | Examples 85 | -------- 86 | >>> create_files() 87 | [ 88 | 'root_folder/file.root', 89 | 'root_folder/folderA/file.A2', 90 | 'root_folder/folderC/folderC1/file.CC1' 91 | ] 92 | 93 | """ 94 | files = { 95 | "TEST_ROOT_A/file.root": "This file is in the root folder", 96 | "TEST_ROOT_A/folderA/file.A1": "This file is in the folder A - file A1", 97 | "TEST_ROOT_A/folderA/file.A2": "This file is in the folder A - file A2", 98 | "TEST_ROOT_A/folderB": "", 99 | "TEST_ROOT_A/folderC/folderD/file.D1": "This file is in the folder D - file D1", 100 | } 101 | 102 | for key, content in files.items(): 103 | fn = Path(key) 104 | if len(content) > 0: 105 | fn.parent.mkdir(parents=True, exist_ok=True) 106 | fn.open('w').write(content) 107 | else: 108 | fn.mkdir(parents=True, exist_ok=True) 109 | 110 | return [ 111 | Path(key) if as_path else key 112 | for key, content in files.items() 113 | if len(content) > 0 114 | ] 115 | -------------------------------------------------------------------------------- /tests/unit/objects/test_check_objects.py: -------------------------------------------------------------------------------- 1 | """Unit tests for check module.""" 2 | from pathlib import Path 3 | 4 | import pytest 5 | from botocore.exceptions import ClientError 6 | from s3_tools import ( 7 | object_exists, 8 | object_metadata, 9 | ) 10 | from tests.unit.conftest import BUCKET_NAME, FILENAME, create_bucket 11 | 12 | 13 | class TestCheck: 14 | 15 | def test_check_nonexisting_bucket(self, s3_client): 16 | with pytest.raises(ClientError): 17 | object_exists(BUCKET_NAME, "prefix/key.csv") 18 | 19 | @pytest.mark.parametrize("key", ["prefix/key.csv", Path("prefix/key.csv/")]) 20 | def test_check_nonexisting_object(self, s3_client, key): 21 | with create_bucket(s3_client, BUCKET_NAME): 22 | response = object_exists(BUCKET_NAME, key) 23 | 24 | assert response is False 25 | 26 | @pytest.mark.parametrize("key", ["prefix/key.csv", Path("prefix/key.csv/")]) 27 | def test_check_existing_object(self, s3_client, key): 28 | with create_bucket(s3_client, BUCKET_NAME, keys_paths=[(key, FILENAME)]): 29 | response = object_exists(BUCKET_NAME, key) 30 | 31 | assert response is True 32 | 33 | 34 | class TestMetadata: 35 | 36 | def test_metadata_nonexisting_bucket(self, s3_client): 37 | with pytest.raises(ClientError): 38 | object_metadata(BUCKET_NAME, "prefix/key.csv") 39 | 40 | @pytest.mark.parametrize("key", ["prefix/key.csv", Path("prefix/key.csv/")]) 41 | def test_metadata_nonexisting_object(self, s3_client, key): 42 | with create_bucket(s3_client, BUCKET_NAME): 43 | response = object_metadata(BUCKET_NAME, key) 44 | 45 | assert response == {} 46 | 47 | @pytest.mark.parametrize("key", ["prefix/key.csv", Path("prefix/key.csv/")]) 48 | def test_metadata_existing_object(self, s3_client, key): 49 | with create_bucket(s3_client, BUCKET_NAME, keys_paths=[(key, FILENAME)]): 50 | response = object_metadata(BUCKET_NAME, key) 51 | 52 | assert response['ContentLength'] == 79 53 | assert response['ContentType'] == 'binary/octet-stream' 54 | -------------------------------------------------------------------------------- /tests/unit/objects/test_copy_objects.py: -------------------------------------------------------------------------------- 1 | """Unit tests for copy module.""" 2 | from pathlib import Path 3 | 4 | import pytest 5 | from botocore.exceptions import ClientError 6 | from s3_tools import ( 7 | copy_keys, 8 | copy_object, 9 | copy_prefix, 10 | list_objects, 11 | object_exists, 12 | ) 13 | from tests.unit.conftest import BUCKET_NAME, FILENAME, create_bucket 14 | 15 | 16 | class TestCopy: 17 | 18 | source_key = "prefix/object" 19 | destination_key = "new-prefix/new-object" 20 | destination_bucket = "another-bucket" 21 | 22 | source_keys = [f"prefix/object_0{i}" for i in range(1, 5)] 23 | destination_keys = [f"new-prefix/new-object_0{i}" for i in range(1, 5)] 24 | 25 | source_keys_path = [Path(key) for key in source_keys] 26 | destination_keys_path = [Path(key) for key in destination_keys] 27 | 28 | def test_copy_from_nonexisting_bucket(self, s3_client): 29 | try: 30 | copy_object("Bucket", self.source_key, "Bucket", self.destination_key) 31 | except ClientError as e: 32 | error = e.response["Error"]["Code"] 33 | 34 | assert error == "NoSuchBucket" 35 | 36 | @pytest.mark.parametrize("source,destination", [ 37 | (source_key, destination_key), 38 | (Path(source_key), Path(destination_key)), 39 | (source_key, Path(destination_key)), 40 | (Path(source_key), destination_key), 41 | ]) 42 | def test_copy_from_nonexisting_key(self, s3_client, source, destination): 43 | with create_bucket(s3_client, BUCKET_NAME): 44 | try: 45 | copy_object(BUCKET_NAME, source, BUCKET_NAME, destination) 46 | except ClientError as e: 47 | error = e.response["Error"]["Code"] 48 | 49 | assert error == "404" 50 | 51 | @pytest.mark.parametrize("source,destination", [ 52 | (source_key, destination_key), 53 | (Path(source_key), Path(destination_key)), 54 | (source_key, Path(destination_key)), 55 | (Path(source_key), destination_key), 56 | ]) 57 | def test_copy_inside_bucket(self, s3_client, source, destination): 58 | with create_bucket(s3_client, BUCKET_NAME, keys_paths=[(self.source_key, FILENAME)]): 59 | source_before = object_exists(BUCKET_NAME, source) 60 | dest_before = object_exists(BUCKET_NAME, destination) 61 | 62 | copy_object(BUCKET_NAME, source, BUCKET_NAME, destination) 63 | 64 | source_after = object_exists(BUCKET_NAME, source) 65 | dest_after = object_exists(BUCKET_NAME, destination) 66 | 67 | assert source_before is True and dest_before is False 68 | assert source_after is True and dest_after is True 69 | 70 | @pytest.mark.parametrize("source,destination", [ 71 | (source_key, destination_key), 72 | (Path(source_key), Path(destination_key)), 73 | (source_key, Path(destination_key)), 74 | (Path(source_key), destination_key), 75 | ]) 76 | def test_copy_between_bucket(self, s3_client, source, destination): 77 | with create_bucket(s3_client, BUCKET_NAME, keys_paths=[(self.source_key, FILENAME)]), \ 78 | create_bucket(s3_client, self.destination_bucket): 79 | 80 | source_before = object_exists(BUCKET_NAME, source) 81 | dest_before = object_exists(self.destination_bucket, destination) 82 | 83 | copy_object(BUCKET_NAME, source, self.destination_bucket, destination) 84 | 85 | source_after = object_exists(BUCKET_NAME, source) 86 | dest_after = object_exists(self.destination_bucket, destination) 87 | 88 | assert source_before is True and dest_before is False 89 | assert source_after is True and dest_after is True 90 | 91 | def test_copy_to_nonexisting_bucket(self, s3_client): 92 | with create_bucket(s3_client, BUCKET_NAME, keys_paths=[(self.source_key, FILENAME)]): 93 | try: 94 | copy_object(BUCKET_NAME, self.source_key, self.destination_bucket, self.destination_key) 95 | except ClientError as e: 96 | error = e.response["Error"]["Code"] 97 | 98 | assert error == "NoSuchBucket" 99 | 100 | def test_copy_list_length_zero(self, s3_client): 101 | with create_bucket(s3_client, BUCKET_NAME), pytest.raises(ValueError): 102 | copy_keys(BUCKET_NAME, [], self.destination_bucket, []) 103 | 104 | def test_copy_list_different_length(self, s3_client): 105 | with create_bucket(s3_client, BUCKET_NAME), pytest.raises(IndexError): 106 | copy_keys(BUCKET_NAME, [self.source_key], self.destination_bucket, []) 107 | 108 | @pytest.mark.parametrize("source,destination", [ 109 | (source_keys, destination_keys), 110 | (source_keys_path, destination_keys_path), 111 | (source_keys, destination_keys_path), 112 | (source_keys_path, destination_keys), 113 | ]) 114 | def test_copy_list_inside_bucket(self, s3_client, source, destination): 115 | keys_paths = [(key, FILENAME) for key in source] 116 | with create_bucket(s3_client, BUCKET_NAME, keys_paths=keys_paths): 117 | source_before = [object_exists(BUCKET_NAME, key) for key in source] 118 | dest_before = [object_exists(BUCKET_NAME, key) for key in destination] 119 | 120 | copy_keys(BUCKET_NAME, source, BUCKET_NAME, destination) 121 | 122 | source_after = [object_exists(BUCKET_NAME, key) for key in source] 123 | dest_after = [object_exists(BUCKET_NAME, key) for key in destination] 124 | 125 | assert all(source_before) is True and all(dest_before) is False 126 | assert all(source_after) is True and all(dest_after) is True 127 | 128 | @pytest.mark.parametrize("source,prefix", [ 129 | (source_keys, "prefix"), 130 | (source_keys_path, Path("prefix")), 131 | (source_keys, Path("prefix")), 132 | (source_keys_path, "prefix"), 133 | ]) 134 | def test_copy_prefix_without_replacement(self, s3_client, source, prefix): 135 | keys_paths = [(key, FILENAME) for key in source] 136 | with create_bucket(s3_client, BUCKET_NAME, keys_paths=keys_paths), \ 137 | create_bucket(s3_client, self.destination_bucket): 138 | 139 | source_before = [object_exists(BUCKET_NAME, key) for key in source] 140 | dest_before = list_objects(self.destination_bucket, prefix=prefix) 141 | 142 | copy_prefix(BUCKET_NAME, prefix, self.destination_bucket, filter_keys='*02*') 143 | 144 | source_after = [object_exists(BUCKET_NAME, key) for key in source] 145 | dest_after = list_objects(self.destination_bucket, prefix=prefix) 146 | 147 | assert all(source_before) is True and all(source_after) is True 148 | assert len(dest_before) == 0 and len(dest_after) == 1 149 | 150 | @pytest.mark.parametrize("source,prefix,new_prefix", [ 151 | (source_keys, "prefix", "files"), 152 | (source_keys, Path("prefix"), "files"), 153 | (source_keys, "prefix", Path("files")), 154 | (source_keys_path, Path("prefix"), Path("files")), 155 | (source_keys_path, "prefix", Path("files")), 156 | (source_keys_path, Path("prefix"), "files"), 157 | ]) 158 | def test_copy_prefix_with_replacement(self, s3_client, source, prefix, new_prefix): 159 | keys_paths = [(key, FILENAME) for key in source] 160 | with create_bucket(s3_client, BUCKET_NAME, keys_paths=keys_paths), \ 161 | create_bucket(s3_client, self.destination_bucket): 162 | 163 | source_before = [object_exists(BUCKET_NAME, key) for key in source] 164 | dest_before = list_objects(self.destination_bucket, prefix=new_prefix) 165 | 166 | copy_prefix(BUCKET_NAME, prefix, self.destination_bucket, (prefix, new_prefix), '*') 167 | 168 | source_after = [object_exists(BUCKET_NAME, key) for key in source] 169 | dest_after_new_prefix = list_objects(self.destination_bucket, prefix=new_prefix) 170 | dest_after_old_prefix = list_objects(self.destination_bucket, prefix=prefix) 171 | 172 | assert all(source_before) is True and all(source_after) is True 173 | assert len(dest_before) == 0 and len(dest_after_old_prefix) == 0 and len(dest_after_new_prefix) == 4 174 | -------------------------------------------------------------------------------- /tests/unit/objects/test_delete_objects.py: -------------------------------------------------------------------------------- 1 | """Unit tests for delete module.""" 2 | from pathlib import Path 3 | 4 | import pytest 5 | from botocore.exceptions import ClientError 6 | from s3_tools import delete_keys, delete_object, delete_prefix, object_exists 7 | from tests.unit.conftest import BUCKET_NAME, FILENAME, create_bucket 8 | 9 | 10 | class TestDelete: 11 | 12 | files = [(f"prefix/mock_{i}.csv", FILENAME) for i in range(4)] 13 | keys = [key for key, fn in files] 14 | keys_path = [Path(key) for key in keys] 15 | 16 | def test_delete_nonexisting_bucket(self, s3_client): 17 | try: 18 | delete_object(BUCKET_NAME, "prefix/object") 19 | except ClientError as e: 20 | error = e.response["Error"]["Code"] 21 | 22 | assert error == "NoSuchBucket" 23 | 24 | @pytest.mark.parametrize("key", ["prefix/object", Path("prefix/object")]) 25 | def test_delete_nonexisting_object(self, s3_client, key): 26 | with create_bucket(s3_client, BUCKET_NAME): 27 | before = object_exists(BUCKET_NAME, key) 28 | delete_object(BUCKET_NAME, key) 29 | after = object_exists(BUCKET_NAME, key) 30 | 31 | assert before is False 32 | assert after is False 33 | 34 | @pytest.mark.parametrize("key", ["prefix/object", Path("prefix/object")]) 35 | def test_delete_existing_object(self, s3_client, key): 36 | with create_bucket(s3_client, BUCKET_NAME, keys_paths=[(key, FILENAME)]): 37 | before = object_exists(BUCKET_NAME, key) 38 | delete_object(BUCKET_NAME, key) 39 | after = object_exists(BUCKET_NAME, key) 40 | 41 | assert before is True 42 | assert after is False 43 | 44 | @pytest.mark.parametrize("keys", [keys, keys_path]) 45 | def test_delete_keys(self, s3_client, keys): 46 | with create_bucket(s3_client, BUCKET_NAME, keys_paths=self.files): 47 | before = [object_exists(BUCKET_NAME, key) for key in keys] 48 | delete_keys(BUCKET_NAME, keys, False) 49 | after = [object_exists(BUCKET_NAME, key) for key in keys] 50 | 51 | assert all(before) is True 52 | assert all(after) is False 53 | 54 | @pytest.mark.parametrize("keys", [keys, keys_path]) 55 | def test_delete_prefix(self, s3_client, keys): 56 | with create_bucket(s3_client, BUCKET_NAME, keys_paths=self.files): 57 | before = [object_exists(BUCKET_NAME, key) for key in keys] 58 | delete_prefix(BUCKET_NAME, "prefix", False) 59 | after = [object_exists(BUCKET_NAME, key) for key in keys] 60 | 61 | assert all(before) is True 62 | assert all(after) is False 63 | 64 | @pytest.mark.parametrize("keys", [keys, keys_path]) 65 | def test_delete_prefix_dry_run(self, s3_client, keys): 66 | with create_bucket(s3_client, BUCKET_NAME, keys_paths=self.files): 67 | before = [object_exists(BUCKET_NAME, key) for key in keys] 68 | delete_prefix(BUCKET_NAME, "prefix", True) 69 | after = [object_exists(BUCKET_NAME, key) for key in keys] 70 | 71 | assert all(before) is True 72 | assert all(after) is True 73 | 74 | @pytest.mark.parametrize("keys", [keys, keys_path]) 75 | def test_delete_keys_dry_run(self, s3_client, keys): 76 | with create_bucket(s3_client, BUCKET_NAME, keys_paths=self.files): 77 | before = [object_exists(BUCKET_NAME, key) for key in keys] 78 | delete_keys(BUCKET_NAME, keys, True) 79 | after = [object_exists(BUCKET_NAME, key) for key in keys] 80 | 81 | assert all(before) is True 82 | assert all(after) is True 83 | -------------------------------------------------------------------------------- /tests/unit/objects/test_download_objects.py: -------------------------------------------------------------------------------- 1 | """Unit tests for download module.""" 2 | import shutil 3 | from filecmp import dircmp 4 | from pathlib import Path 5 | 6 | import pytest 7 | from botocore.exceptions import ClientError 8 | from s3_tools import ( 9 | download_key_to_file, 10 | download_keys_to_files, 11 | download_prefix_to_folder, 12 | ) 13 | from tests.unit.conftest import ( 14 | BUCKET_NAME, 15 | EMPTY_FILE, 16 | FILENAME, 17 | create_bucket, 18 | create_files, 19 | ) 20 | 21 | 22 | class TestDownload: 23 | fn_test = FILENAME + ".tests" 24 | key = "prefix/object" 25 | 26 | create = [(f"prefix/mock_{i}.csv", FILENAME) for i in range(4)] 27 | create_path = [(Path(key), Path(fn)) for key, fn in create] 28 | 29 | download = [(f"prefix/mock_{i}.csv", f"{FILENAME}.{i}") for i in range(4)] 30 | download_path = [(Path(key), Path(fn)) for key, fn in download] 31 | 32 | root_folder = "TEST_ROOT_A" 33 | 34 | def test_download_nonexisting_bucket(self, s3_client): 35 | try: 36 | download_key_to_file(BUCKET_NAME, self.key, self.fn_test) 37 | except ClientError as e: 38 | error = e.response["Error"]["Code"] 39 | 40 | assert error == "NoSuchBucket" 41 | 42 | @pytest.mark.parametrize("key,fn", [ 43 | (key, fn_test), 44 | (key, Path(fn_test)), 45 | (Path(key), fn_test), 46 | (Path(key), Path(fn_test)), 47 | ]) 48 | def test_download_nonexisting_object(self, s3_client, key, fn): 49 | with create_bucket(s3_client, BUCKET_NAME): 50 | try: 51 | download_key_to_file(BUCKET_NAME, key, fn) 52 | except ClientError as e: 53 | error = e.response["Error"]["Code"] 54 | 55 | assert error == "404" 56 | 57 | @pytest.mark.parametrize("key,fn", [ 58 | (key, fn_test), 59 | (key, Path(fn_test)), 60 | (Path(key), fn_test), 61 | (Path(key), Path(fn_test)), 62 | ]) 63 | def test_download_object(self, s3_client, key, fn): 64 | with create_bucket(s3_client, BUCKET_NAME, keys_paths=[(key, FILENAME)]): 65 | before = Path(fn).exists() 66 | response = download_key_to_file(BUCKET_NAME, key, fn) 67 | after = Path(fn).exists() 68 | 69 | Path(fn).unlink() 70 | assert before is False 71 | assert after is True 72 | assert response is True 73 | 74 | @pytest.mark.parametrize("key,fn", [ 75 | (key, fn_test), 76 | (key, Path(fn_test)), 77 | (Path(key), fn_test), 78 | (Path(key), Path(fn_test)), 79 | ]) 80 | def test_download_empty_object(self, s3_client, key, fn): 81 | with create_bucket(s3_client, BUCKET_NAME, keys_paths=[(key, EMPTY_FILE)]): 82 | before = Path(fn).exists() 83 | response = download_key_to_file(BUCKET_NAME, key, fn) 84 | after = Path(fn).exists() 85 | 86 | Path(fn).unlink() 87 | assert before is False 88 | assert after is True 89 | assert response is True 90 | 91 | @pytest.mark.parametrize('show,create,download,as_paths', [ 92 | (False, create, download, True), 93 | (False, create, download_path, True), 94 | (True, create_path, download, False), 95 | (True, create_path, download_path, False), 96 | ]) 97 | def test_download_keys_to_files(self, s3_client, show, create, download, as_paths): 98 | if show: 99 | pytest.importorskip("rich") 100 | 101 | with create_bucket(s3_client, BUCKET_NAME, keys_paths=create): 102 | before = [Path(fn).exists() for key, fn in download] 103 | response = download_keys_to_files( 104 | bucket=BUCKET_NAME, 105 | keys_paths=download, 106 | show_progress=show, 107 | as_paths=as_paths, 108 | ) 109 | after = [Path(fn).exists() for key, fn in download] 110 | 111 | for key, fn in download: 112 | Path(fn).unlink() 113 | 114 | assert all(before) is False 115 | assert all(after) is True 116 | assert all(r[2] for r in response) is True 117 | 118 | if as_paths: 119 | assert all(Path in type(r[0]).__bases__ for r in response) is True 120 | else: 121 | assert all(type(r[0]) is str for r in response) is True 122 | 123 | @pytest.mark.parametrize('prefix,folder,as_paths', [ 124 | ("test_prefix", "test_folder", False), 125 | ("test_prefix", Path("test_folder"), False), 126 | (Path("test_prefix"), "test_folder", True), 127 | (Path("test_prefix"), Path("test_folder"), True), 128 | ]) 129 | def test_download_prefix_to_folder(self, s3_client, prefix, folder, as_paths): 130 | 131 | paths = create_files(as_paths) 132 | 133 | lst = [( 134 | Path(p).as_posix().replace(self.root_folder, Path(prefix).as_posix()), p) 135 | for p in paths 136 | ] 137 | 138 | with create_bucket(s3_client, BUCKET_NAME, keys_paths=lst): 139 | response = download_prefix_to_folder(BUCKET_NAME, prefix, folder) 140 | 141 | result = dircmp(self.root_folder, folder) 142 | 143 | # "Folder B" exists only on root_folder (empty folder is not upload to S3) 144 | test_1_Root = len(result.left_only) == 1 and len(result.right_only) == 0 145 | 146 | # Both root folders have 1 file and 2 dirs in common 147 | test_2_Root = len(result.common) == 3 and len(result.diff_files) == 0 148 | 149 | # Both "Folder A" folders have 2 files and 0 dirs in common 150 | folderA = result.subdirs['folderA'] 151 | test_1_FolderA = len(folderA.common) == 2 and len(folderA.diff_files) == 0 152 | 153 | # Both "Folder D" folders have 1 file and 0 dirs in common 154 | folderD = result.subdirs['folderC'].subdirs['folderD'] 155 | test_1_FolderD = len(folderD.common) == 1 and len(folderD.diff_files) == 0 156 | 157 | shutil.rmtree(self.root_folder) 158 | shutil.rmtree(folder) 159 | 160 | assert test_1_Root and test_2_Root and test_1_FolderA and test_1_FolderD 161 | assert len(response) == 4 162 | 163 | def test_download_not_enough_arguments(self): 164 | 165 | with pytest.raises(ValueError): 166 | download_keys_to_files(BUCKET_NAME, self.download, extra_args_per_key=[{'arg': 'value'}]) # type: ignore 167 | -------------------------------------------------------------------------------- /tests/unit/objects/test_list_objects.py: -------------------------------------------------------------------------------- 1 | """Unit tests for list module.""" 2 | from pathlib import Path 3 | 4 | import pytest 5 | from botocore.exceptions import ClientError 6 | from s3_tools import list_objects 7 | from tests.unit.conftest import BUCKET_NAME, FILENAME, create_bucket 8 | 9 | 10 | class TestList: 11 | 12 | def test_list_nonexisting_bucket(self, s3_client): 13 | try: 14 | list_objects(BUCKET_NAME) 15 | except ClientError as e: 16 | error = e.response["Error"]["Code"] 17 | 18 | assert error == "NoSuchBucket" 19 | 20 | @pytest.mark.parametrize("prefix", ["prefix", Path("prefix")]) 21 | def test_list_empty_bucket(self, s3_client, prefix): 22 | with create_bucket(s3_client, BUCKET_NAME): 23 | keys = list_objects(BUCKET_NAME, prefix) 24 | 25 | assert len(keys) == 0 26 | 27 | def test_list_bucket(self, s3_client): 28 | lst = [(f"prefix/mock_{i}.csv", FILENAME) for i in range(1)] 29 | 30 | with create_bucket(s3_client, BUCKET_NAME, keys_paths=lst): 31 | keys = list_objects(BUCKET_NAME, "prefix") 32 | 33 | assert len(keys) == 1 34 | assert keys[0] == lst[0][0] 35 | 36 | def test_list_bucket_with_pagination(self, s3_client): 37 | lst = [(f"prefix/mock_{i}.csv", FILENAME) for i in range(10)] 38 | 39 | with create_bucket(s3_client, BUCKET_NAME, keys_paths=lst): 40 | keys = list_objects(BUCKET_NAME, "prefix", max_keys=3) 41 | 42 | assert len(keys) == 10 43 | 44 | @pytest.mark.parametrize("prefix", ["prefix", Path("prefix")]) 45 | def test_list_bucket_return_as_path(self, s3_client, prefix): 46 | lst = [(f"prefix/mock_{i}.csv", FILENAME) for i in range(1)] 47 | 48 | with create_bucket(s3_client, BUCKET_NAME, keys_paths=lst): 49 | keys = list_objects(BUCKET_NAME, prefix, as_paths=True) 50 | 51 | assert len(keys) == 1 52 | assert keys[0] == Path(lst[0][0]) 53 | -------------------------------------------------------------------------------- /tests/unit/objects/test_move_objects.py: -------------------------------------------------------------------------------- 1 | """Unit tests for move module.""" 2 | from pathlib import Path 3 | 4 | import pytest 5 | from botocore.exceptions import ClientError 6 | from s3_tools import ( 7 | move_keys, 8 | move_object, 9 | object_exists, 10 | ) 11 | from tests.unit.conftest import ( 12 | BUCKET_NAME, 13 | FILENAME, 14 | create_bucket, 15 | ) 16 | 17 | 18 | class TestMove: 19 | 20 | source_key = "prefix/object" 21 | destination_key = "new-prefix/new-object" 22 | destination_bucket = "another-bucket" 23 | 24 | source_keys = [f"prefix/object_0{i}" for i in range(1, 5)] 25 | destination_keys = [f"new-prefix/new-object_0{i}" for i in range(1, 5)] 26 | 27 | source_keys_path = [Path(key) for key in source_keys] 28 | destination_keys_path = [Path(key) for key in destination_keys] 29 | 30 | def test_move_from_nonexisting_bucket(self, s3_client): 31 | try: 32 | move_object("Bucket", "key", "Bucket", "new-key") 33 | except ClientError as e: 34 | error = e.response["Error"]["Code"] 35 | 36 | assert error == "NoSuchBucket" 37 | 38 | def test_move_from_nonexisting_key(self, s3_client): 39 | with create_bucket(s3_client, BUCKET_NAME): 40 | try: 41 | move_object(BUCKET_NAME, "key", BUCKET_NAME, "new-key") 42 | except ClientError as e: 43 | error = e.response["Error"]["Code"] 44 | 45 | assert error == "404" 46 | 47 | @pytest.mark.parametrize("source,destination", [ 48 | (source_key, destination_key), 49 | (source_key, Path(destination_key)), 50 | (Path(source_key), destination_key), 51 | (Path(source_key), Path(destination_key)), 52 | ]) 53 | def test_move_inside_bucket(self, s3_client, source, destination): 54 | with create_bucket(s3_client, BUCKET_NAME, keys_paths=[(source, FILENAME)]): 55 | source_before = object_exists(BUCKET_NAME, source) 56 | dest_before = object_exists(BUCKET_NAME, destination) 57 | 58 | move_object(BUCKET_NAME, source, BUCKET_NAME, destination) 59 | 60 | source_after = object_exists(BUCKET_NAME, source) 61 | dest_after = object_exists(BUCKET_NAME, destination) 62 | 63 | assert source_before is True and dest_before is False 64 | assert source_after is False and dest_after is True 65 | 66 | @pytest.mark.parametrize("source,destination", [ 67 | (source_key, destination_key), 68 | (source_key, Path(destination_key)), 69 | (Path(source_key), destination_key), 70 | (Path(source_key), Path(destination_key)), 71 | ]) 72 | def test_move_between_bucket(self, s3_client, source, destination): 73 | with create_bucket(s3_client, BUCKET_NAME, keys_paths=[(source, FILENAME)]), \ 74 | create_bucket(s3_client, self.destination_bucket): 75 | 76 | source_before = object_exists(BUCKET_NAME, source) 77 | dest_before = object_exists(self.destination_bucket, destination) 78 | 79 | move_object(BUCKET_NAME, source, self.destination_bucket, destination) 80 | 81 | source_after = object_exists(BUCKET_NAME, source) 82 | dest_after = object_exists(self.destination_bucket, destination) 83 | 84 | assert source_before is True and dest_before is False 85 | assert source_after is False and dest_after is True 86 | 87 | def test_move_to_nonexisting_bucket(self, s3_client): 88 | with create_bucket(s3_client, BUCKET_NAME, keys_paths=[(self.source_key, FILENAME)]): 89 | try: 90 | move_object(BUCKET_NAME, self.source_key, self.destination_bucket, self.destination_key) 91 | except ClientError as e: 92 | error = e.response["Error"]["Code"] 93 | 94 | assert error == "NoSuchBucket" 95 | 96 | def test_move_list_length_zero(self, s3_client): 97 | with create_bucket(s3_client, BUCKET_NAME), pytest.raises(ValueError): 98 | move_keys(BUCKET_NAME, [], self.destination_bucket, []) 99 | 100 | def test_move_list_different_length(self, s3_client): 101 | with create_bucket(s3_client, BUCKET_NAME), pytest.raises(IndexError): 102 | move_keys(BUCKET_NAME, [self.source_key], self.destination_bucket, []) 103 | 104 | @pytest.mark.parametrize("source,destination", [ 105 | (source_keys, destination_keys), 106 | (source_keys, destination_keys_path), 107 | (source_keys_path, destination_keys), 108 | (source_keys_path, destination_keys_path), 109 | ]) 110 | def test_move_list_inside_bucket(self, s3_client, source, destination): 111 | keys_paths = [(key, FILENAME) for key in source] 112 | with create_bucket(s3_client, BUCKET_NAME, keys_paths=keys_paths): 113 | source_before = [object_exists(BUCKET_NAME, key) for key in source] 114 | dest_before = [object_exists(BUCKET_NAME, key) for key in destination] 115 | 116 | move_keys(BUCKET_NAME, source, BUCKET_NAME, destination) 117 | 118 | source_after = [object_exists(BUCKET_NAME, key) for key in source] 119 | dest_after = [object_exists(BUCKET_NAME, key) for key in destination] 120 | 121 | assert all(source_before) is True and all(dest_before) is False 122 | assert all(source_after) is False and all(dest_after) is True 123 | -------------------------------------------------------------------------------- /tests/unit/objects/test_presigned_url.py: -------------------------------------------------------------------------------- 1 | """Unit tests for presigned_url module.""" 2 | from pathlib import Path 3 | 4 | import pytest 5 | import requests 6 | from botocore.exceptions import ParamValidationError 7 | from s3_tools import ( 8 | get_presigned_download_url, 9 | get_presigned_upload_url, 10 | get_presigned_url, 11 | object_exists, 12 | ) 13 | from tests.unit.conftest import ( 14 | BUCKET_NAME, 15 | FILENAME, 16 | create_bucket, 17 | ) 18 | 19 | 20 | class TestPresignedUrl: 21 | fn_test = FILENAME + ".tests" 22 | key = "prefix/object" 23 | 24 | def test_download_nonexisting_object_with_presigned_url(self, s3_client): 25 | with create_bucket(s3_client, BUCKET_NAME), pytest.raises(requests.exceptions.HTTPError): 26 | url = get_presigned_download_url(bucket=BUCKET_NAME, key=self.key) 27 | response = requests.get(url) 28 | response.raise_for_status() 29 | 30 | assert url is not None 31 | assert response.status_code == 404 32 | 33 | @pytest.mark.parametrize("key", [key, Path(key)]) 34 | def test_download_object_with_presigned_url(self, s3_client, key): 35 | with create_bucket(s3_client, BUCKET_NAME, keys_paths=[(key, FILENAME)]): 36 | url = get_presigned_download_url(bucket=BUCKET_NAME, key=key) 37 | response = requests.get(url) 38 | 39 | assert url is not None 40 | assert response.status_code == 200 41 | 42 | def test_list_bucket_objects_with_presigned_url(self, s3_client): 43 | with create_bucket(s3_client, BUCKET_NAME, keys_paths=[(self.key, FILENAME)]): 44 | url = get_presigned_url(client_method='list_objects', method_parameters={'Bucket': BUCKET_NAME}) 45 | response = requests.get(url) 46 | 47 | assert url is not None 48 | assert response.status_code == 200 49 | 50 | def test_invalid_request_with_presigned_url(self, s3_client): 51 | with create_bucket(s3_client, BUCKET_NAME), pytest.raises(ParamValidationError): 52 | url = get_presigned_url(client_method='list_objects') 53 | requests.get(url) 54 | 55 | @pytest.mark.parametrize("key", [key, Path(key)]) 56 | def test_upload_objects_with_presigned_url(self, s3_client, key): 57 | with create_bucket(s3_client, BUCKET_NAME): 58 | before = object_exists(BUCKET_NAME, self.key) 59 | response = get_presigned_upload_url(bucket=BUCKET_NAME, key=key) 60 | print(response) 61 | with open(FILENAME, 'rb') as f: 62 | files = {'file': (Path(key).as_posix(), f)} 63 | post_response = requests.post(response['url'], data=response['fields'], files=files) 64 | 65 | after = object_exists(BUCKET_NAME, key) 66 | 67 | assert response is not None 68 | assert post_response.status_code == 204 69 | assert before is False 70 | assert after is True 71 | 72 | @pytest.mark.parametrize("key", [None, int]) 73 | def test_invalid_upload_objects_with_presigned_url(self, s3_client, key): 74 | with pytest.raises((AttributeError, TypeError)): 75 | get_presigned_upload_url(bucket=BUCKET_NAME, key=key) 76 | -------------------------------------------------------------------------------- /tests/unit/objects/test_read_objects.py: -------------------------------------------------------------------------------- 1 | """Unit tests for read module.""" 2 | import json 3 | from pathlib import Path 4 | 5 | import pytest 6 | from botocore.exceptions import ClientError 7 | from s3_tools import ( 8 | read_object_to_bytes, 9 | read_object_to_dict, 10 | read_object_to_text, 11 | ) 12 | from tests.unit.conftest import ( 13 | BUCKET_NAME, 14 | EMPTY_FILE, 15 | create_bucket, 16 | ) 17 | 18 | 19 | class TestRead: 20 | key = "prefix/object" 21 | 22 | def test_read_nonexisting_bucket(self, s3_client): 23 | try: 24 | read_object_to_bytes(BUCKET_NAME, self.key) 25 | except ClientError as e: 26 | error = e.response["Error"]["Code"] 27 | 28 | assert error == "NoSuchBucket" 29 | 30 | def test_read_nonexisting_object(self, s3_client): 31 | with create_bucket(s3_client, BUCKET_NAME): 32 | try: 33 | read_object_to_bytes(BUCKET_NAME, self.key) 34 | except ClientError as e: 35 | error = e.response["Error"]["Code"] 36 | 37 | assert error == "NoSuchKey" 38 | 39 | @pytest.mark.parametrize("key", [key, Path(key)]) 40 | def test_read_from_empty_bytes(self, s3_client, key): 41 | expected_obj = bytes() 42 | 43 | with create_bucket(s3_client, BUCKET_NAME, keys_paths=[(key, EMPTY_FILE)]): 44 | obj = read_object_to_bytes(BUCKET_NAME, key) 45 | 46 | assert expected_obj == obj 47 | 48 | @pytest.mark.parametrize("key", [key, Path(key)]) 49 | def test_read_to_bytes(self, s3_client, key): 50 | expected_obj = bytes("Just a test string converted to bytes", "utf-8") 51 | 52 | with create_bucket(s3_client, BUCKET_NAME, key=key, data=expected_obj): 53 | obj = read_object_to_bytes(BUCKET_NAME, key) 54 | 55 | assert expected_obj == obj 56 | 57 | @pytest.mark.parametrize("key", [key, Path(key)]) 58 | def test_read_to_dict(self, s3_client, key): 59 | expected_obj = {"key": "value"} 60 | 61 | with create_bucket(s3_client, BUCKET_NAME, key=key, data=json.dumps(expected_obj)): 62 | obj = read_object_to_dict(BUCKET_NAME, key) 63 | 64 | assert expected_obj == obj 65 | 66 | @pytest.mark.parametrize("key", [key, Path(key)]) 67 | def test_read_to_text(self, s3_client, key): 68 | expected_obj = "Just a test string" 69 | 70 | with create_bucket(s3_client, BUCKET_NAME, key=key, data=expected_obj): 71 | obj = read_object_to_text(BUCKET_NAME, key) 72 | 73 | assert expected_obj == obj 74 | -------------------------------------------------------------------------------- /tests/unit/objects/test_upload_objects.py: -------------------------------------------------------------------------------- 1 | """Unit tests for upload module.""" 2 | import shutil 3 | from pathlib import Path 4 | 5 | import pytest 6 | from boto3.exceptions import S3UploadFailedError 7 | from s3_tools import ( 8 | object_exists, 9 | object_metadata, 10 | upload_file_to_key, 11 | upload_files_to_keys, 12 | upload_folder_to_prefix, 13 | ) 14 | from tests.unit.conftest import ( 15 | BUCKET_NAME, 16 | EMPTY_FILE, 17 | FILENAME, 18 | create_bucket, 19 | create_files, 20 | ) 21 | 22 | 23 | class TestUpload: 24 | key = "prefix/object" 25 | root_folder = 'TEST_ROOT_A' 26 | 27 | keys = [(FILENAME, f"prefix/mock_{i}.csv") for i in range(4)] 28 | keys_paths = [(Path(fn), Path(key)) for fn, key in keys] 29 | 30 | def test_upload_nonexisting_bucket(self, s3_client): 31 | with pytest.raises(S3UploadFailedError): 32 | upload_file_to_key(BUCKET_NAME, self.key, FILENAME) 33 | 34 | def test_upload_nonexisting_file(self, s3_client): 35 | with create_bucket(s3_client, BUCKET_NAME): 36 | with pytest.raises(FileNotFoundError): 37 | upload_file_to_key(BUCKET_NAME, self.key, "/tmp/nonexisting.file") 38 | 39 | @pytest.mark.parametrize("key", [key, Path(key)]) 40 | def test_upload_file(self, s3_client, key): 41 | with create_bucket(s3_client, BUCKET_NAME): 42 | before = object_exists(BUCKET_NAME, key) 43 | upload_file_to_key(BUCKET_NAME, key, FILENAME) 44 | after = object_exists(BUCKET_NAME, key) 45 | 46 | assert before is False 47 | assert after is True 48 | 49 | @pytest.mark.parametrize("key", [key, Path(key)]) 50 | def test_upload_empty_file(self, s3_client, key): 51 | with create_bucket(s3_client, BUCKET_NAME): 52 | before = object_exists(BUCKET_NAME, key) 53 | upload_file_to_key(BUCKET_NAME, key, EMPTY_FILE) 54 | after = object_exists(BUCKET_NAME, key) 55 | 56 | assert before is False 57 | assert after is True 58 | 59 | @pytest.mark.parametrize('keys,show', [ 60 | (keys, False), 61 | (keys_paths, True), 62 | ]) 63 | def test_upload_files_to_keys(self, s3_client, keys, show): 64 | if show: 65 | pytest.importorskip("rich") 66 | 67 | with create_bucket(s3_client, BUCKET_NAME): 68 | before = [object_exists(BUCKET_NAME, key) for fn, key in keys] 69 | upload_files_to_keys(BUCKET_NAME, keys, show_progress=show) 70 | after = [object_exists(BUCKET_NAME, key) for fn, key in keys] 71 | 72 | assert all(before) is False 73 | assert all(after) is True 74 | 75 | @pytest.mark.parametrize('prefix,as_path', [ 76 | ("prefix", False), 77 | (Path("prefix"), True), 78 | ]) 79 | def test_upload_folder_to_prefix(self, s3_client, prefix, as_path): 80 | paths = create_files(as_path) 81 | 82 | with create_bucket(s3_client, BUCKET_NAME): 83 | response = upload_folder_to_prefix(BUCKET_NAME, prefix, self.root_folder, as_paths=as_path) 84 | 85 | shutil.rmtree(self.root_folder) 86 | 87 | assert len(response) == 4 88 | # The response must content all paths 89 | assert not set(paths) ^ set(r[0] for r in response) 90 | 91 | def test_upload_not_enough_arguments(self): 92 | 93 | with pytest.raises(ValueError): 94 | upload_files_to_keys(BUCKET_NAME, self.keys_paths, extra_args_per_key=[{'arg': 'value'}]) # type: ignore 95 | 96 | @pytest.mark.parametrize("key", [key, Path(key)]) 97 | def test_update_with_arguments(self, s3_client, key): 98 | with create_bucket(s3_client, BUCKET_NAME): 99 | before = object_exists(BUCKET_NAME, key) 100 | upload_file_to_key( 101 | BUCKET_NAME, 102 | key, 103 | FILENAME, 104 | extra_args={ 105 | 'Metadata': {'key': 'valueA'}, 106 | 'ContentType': 'text/csv', 107 | 'Tagging': 'tagA=valueA&tagB=valueB', 108 | } 109 | ) 110 | after = object_exists(BUCKET_NAME, key) 111 | metadata = object_metadata(BUCKET_NAME, key) 112 | 113 | assert before is False 114 | assert after is True 115 | 116 | assert metadata['Metadata']['key'] == 'valueA' 117 | assert metadata['ContentType'] == 'text/csv' 118 | assert metadata['ResponseMetadata']['HTTPHeaders']['x-amz-tagging-count'] == '2' 119 | -------------------------------------------------------------------------------- /tests/unit/objects/test_write_objects.py: -------------------------------------------------------------------------------- 1 | """Unit tests for write module.""" 2 | import pytest 3 | from botocore.exceptions import ClientError 4 | from s3_tools import object_exists, write_object_from_bytes, write_object_from_dict, write_object_from_text 5 | from tests.unit.conftest import BUCKET_NAME, create_bucket 6 | 7 | 8 | class TestWrite: 9 | key = "prefix/object" 10 | 11 | def test_write_nonexisting_bucket(self, s3_client): 12 | obj = bytes("Just a test string converted to bytes", 'utf-8') 13 | try: 14 | write_object_from_bytes(BUCKET_NAME, self.key, obj) 15 | except ClientError as e: 16 | error = e.response["Error"]["Code"] 17 | 18 | assert error == "NoSuchBucket" 19 | 20 | def test_write_from_empty_bytes(self, s3_client): 21 | obj = bytes() 22 | 23 | with create_bucket(s3_client, BUCKET_NAME): 24 | url = write_object_from_bytes(BUCKET_NAME, self.key, obj) 25 | exists = object_exists(BUCKET_NAME, self.key) 26 | 27 | assert url == f"https://s3.amazonaws.com/{BUCKET_NAME}/{self.key}" 28 | assert exists is True 29 | 30 | def test_write_from_bytes(self, s3_client): 31 | obj = bytes("Just a test string converted to bytes", 'utf-8') 32 | 33 | with create_bucket(s3_client, BUCKET_NAME): 34 | url = write_object_from_bytes(BUCKET_NAME, self.key, obj) 35 | exists = object_exists(BUCKET_NAME, self.key) 36 | 37 | assert url == f"https://s3.amazonaws.com/{BUCKET_NAME}/{self.key}" 38 | assert exists is True 39 | 40 | def test_write_from_bytes_wrong_format(self, s3_client): 41 | with create_bucket(s3_client, BUCKET_NAME): 42 | with pytest.raises(TypeError): 43 | write_object_from_bytes(BUCKET_NAME, self.key, 10) # type: ignore 44 | 45 | def test_write_from_dict(self, s3_client): 46 | obj = {"key": "value"} 47 | 48 | with create_bucket(s3_client, BUCKET_NAME): 49 | url = write_object_from_dict(BUCKET_NAME, self.key, obj) 50 | exists = object_exists(BUCKET_NAME, self.key) 51 | 52 | assert url == f"https://s3.amazonaws.com/{BUCKET_NAME}/{self.key}" 53 | assert exists is True 54 | 55 | def test_write_from_dict_wrong_format(self, s3_client): 56 | with create_bucket(s3_client, BUCKET_NAME): 57 | with pytest.raises(TypeError): 58 | write_object_from_dict(BUCKET_NAME, self.key, 10) # type: ignore 59 | 60 | def test_write_from_text(self, s3_client): 61 | obj = "Just a test string" 62 | 63 | with create_bucket(s3_client, BUCKET_NAME): 64 | url = write_object_from_text(BUCKET_NAME, self.key, obj) 65 | exists = object_exists(BUCKET_NAME, self.key) 66 | 67 | assert url == f"https://s3.amazonaws.com/{BUCKET_NAME}/{self.key}" 68 | assert exists is True 69 | 70 | def test_write_from_text_wrong_format(self, s3_client): 71 | with create_bucket(s3_client, BUCKET_NAME): 72 | with pytest.raises(TypeError): 73 | write_object_from_text(BUCKET_NAME, self.key, 10) # type: ignore 74 | -------------------------------------------------------------------------------- /tests/unit/test_utils.py: -------------------------------------------------------------------------------- 1 | """Unit tests for utils module.""" 2 | import builtins 3 | from concurrent import futures 4 | 5 | import pytest 6 | from s3_tools.utils import _create_progress_bar, _get_future_output 7 | 8 | 9 | @pytest.fixture 10 | def hide_available_pkg(monkeypatch): 11 | import_orig = builtins.__import__ 12 | 13 | def mocked_import(name, *args, **kwargs): 14 | if "rich." in name: 15 | raise ModuleNotFoundError() 16 | return import_orig(name, *args, **kwargs) 17 | 18 | monkeypatch.setattr(builtins, "__import__", mocked_import) 19 | 20 | 21 | class TestFuture: 22 | 23 | def foo(self, number): 24 | return 1 / number 25 | 26 | def test_get_future(self): 27 | 28 | with futures.ThreadPoolExecutor(max_workers=2) as executor: 29 | executions = { 30 | executor.submit(self.foo, i): {"number": i} 31 | for i in range(5) 32 | } 33 | 34 | responses = [ 35 | (executions[future]["number"], _get_future_output(future)) 36 | for future in futures.as_completed(executions) 37 | ] 38 | 39 | assert len(responses) == 5 40 | assert sorted(responses)[0][1] == "ZeroDivisionError('division by zero')" 41 | 42 | 43 | class TestProgressBar: 44 | 45 | @pytest.mark.usefixtures("hide_available_pkg") 46 | def test_if_no_package(self): 47 | with pytest.raises(ModuleNotFoundError): 48 | _create_progress_bar("Test", 10) 49 | 50 | def test_create_progress_bar(self): 51 | pytest.importorskip("rich") 52 | 53 | progress, task_id = _create_progress_bar("Test", 10) 54 | print(progress, task_id) 55 | 56 | assert len(progress.columns) == 3 57 | assert task_id == 0 58 | assert progress.tasks[task_id].total == 10 59 | --------------------------------------------------------------------------------