├── .cruft.json ├── .github ├── dependabot.yml └── workflows │ ├── benchmarks.yaml │ ├── release.yaml │ ├── tests.yaml │ └── update-template.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.rst ├── dvc_s3 ├── __init__.py └── tests │ ├── __init__.py │ ├── benchmarks.py │ ├── cloud.py │ ├── conftest.py │ ├── fixtures.py │ ├── test_dvc.py │ ├── test_s3.py │ └── test_utils.py └── pyproject.toml /.cruft.json: -------------------------------------------------------------------------------- 1 | { 2 | "template": "https://github.com/iterative/cookiecutter-dvc-plugin/", 3 | "commit": "13683f4eea044c4feb459503c391394f80e3d635", 4 | "checkout": null, 5 | "context": { 6 | "cookiecutter": { 7 | "plugin_name": "s3", 8 | "_template": "https://github.com/iterative/cookiecutter-dvc-plugin/", 9 | "_commit": "13683f4eea044c4feb459503c391394f80e3d635" 10 | } 11 | }, 12 | "directory": null, 13 | "skip": [ 14 | ".git" 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | updates: 4 | - directory: "/" 5 | package-ecosystem: "pip" 6 | schedule: 7 | interval: "weekly" 8 | labels: 9 | - "maintenance" 10 | 11 | - directory: "/" 12 | package-ecosystem: "github-actions" 13 | schedule: 14 | interval: "weekly" 15 | labels: 16 | - "maintenance" 17 | -------------------------------------------------------------------------------- /.github/workflows/benchmarks.yaml: -------------------------------------------------------------------------------- 1 | name: Benchmarks 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | workflow_dispatch: 8 | inputs: 9 | dataset: 10 | description: "Dataset Size" 11 | required: false 12 | default: "small" 13 | type: choice 14 | options: 15 | - tiny 16 | - small 17 | - large 18 | - mnist 19 | 20 | env: 21 | DVC_TEST: "true" 22 | FORCE_COLOR: "1" 23 | DATASET: ${{ github.event.inputs.dataset || ( github.event_name == 'schedule' && 'mnist' || 'small' ) }} 24 | 25 | concurrency: 26 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 27 | cancel-in-progress: true 28 | 29 | permissions: 30 | contents: read 31 | pull-requests: write 32 | 33 | jobs: 34 | bench: 35 | timeout-minutes: 45 36 | runs-on: ${{ matrix.os }} 37 | strategy: 38 | fail-fast: false 39 | matrix: 40 | os: [ubuntu-latest] 41 | pyv: ["3.12"] 42 | 43 | steps: 44 | - uses: iterative/setup-cml@v3 45 | - uses: actions/checkout@v4 46 | with: 47 | fetch-depth: 0 48 | path: dvc-s3 49 | 50 | - uses: actions/checkout@v4 51 | with: 52 | fetch-depth: 0 53 | repository: iterative/dvc 54 | path: dvc 55 | 56 | - uses: actions/checkout@v4 57 | with: 58 | fetch-depth: 0 59 | repository: iterative/dvc-bench 60 | path: dvc-bench 61 | 62 | - name: Set up Python 63 | uses: actions/setup-python@v5 64 | with: 65 | python-version: ${{ matrix.pyv }} 66 | cache: 'pip' 67 | cache-dependency-path: | 68 | pyproject.toml 69 | dvc/pyproject.toml 70 | 71 | - name: install 72 | run: pip install -e "./dvc[testing]" -e "./dvc-s3[tests]" 73 | 74 | - name: run benchmarks 75 | timeout-minutes: 180 76 | working-directory: dvc-s3/ 77 | run: > 78 | pytest --benchmark-save benchmarks-s3 --benchmark-group-by func 79 | dvc_s3/tests/benchmarks.py 80 | --dvc-revs main,3.10.0,2.58.2 81 | --dataset ${DATASET} 82 | --dvc-install-deps s3 83 | --dvc-bench-repo ../dvc-bench --dvc-repo ../dvc 84 | 85 | - name: upload raw results 86 | uses: actions/upload-artifact@v4 87 | with: 88 | name: .benchmarks 89 | path: dvc-s3/.benchmarks 90 | 91 | - name: create md 92 | env: 93 | REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }} 94 | if: ${{ github.event_name == 'pull_request' && ! github.event.pull_request.head.repo.fork }} 95 | run: | 96 | echo '```' > report.md 97 | PY_COLORS=0 pytest-benchmark compare --group-by func --sort name >> report.md 98 | echo '```' >> report.md 99 | cml comment create report.md 100 | working-directory: dvc-s3 101 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | permissions: 8 | contents: read 9 | 10 | jobs: 11 | build: 12 | environment: pypi 13 | permissions: 14 | contents: read 15 | id-token: write 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v4 19 | with: 20 | fetch-depth: 0 21 | - uses: actions/setup-python@v5 22 | with: 23 | python-version: 3.13 24 | - uses: astral-sh/setup-uv@v6 25 | - run: uv pip install twine --system 26 | - run: uv build 27 | - run: twine check dist/* 28 | - name: Publish packages to PyPI 29 | uses: pypa/gh-action-pypi-publish@release/v1 30 | -------------------------------------------------------------------------------- /.github/workflows/tests.yaml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | schedule: 8 | - cron: '5 1 * * *' # every day at 01:05 9 | workflow_dispatch: 10 | 11 | concurrency: 12 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 13 | cancel-in-progress: true 14 | 15 | permissions: 16 | contents: read 17 | 18 | jobs: 19 | tests: 20 | timeout-minutes: 45 21 | runs-on: ${{ matrix.os }} 22 | strategy: 23 | fail-fast: false 24 | matrix: 25 | os: [ubuntu-latest, windows-latest, macos-latest] 26 | pyv: ["3.9", "3.10", "3.11", "3.12", "3.13"] 27 | 28 | steps: 29 | - uses: actions/checkout@v4 30 | with: 31 | fetch-depth: 0 32 | 33 | - name: Set up Python 34 | uses: actions/setup-python@v5 35 | with: 36 | python-version: ${{ matrix.pyv }} 37 | 38 | - uses: astral-sh/setup-uv@v6 39 | with: 40 | enable-cache: true 41 | cache-dependency-glob: pyproject.toml 42 | 43 | - name: install 44 | run: uv pip install -e ".[dev]" "dvc[testing] @ git+https://github.com/iterative/dvc" --system 45 | 46 | - name: lint 47 | timeout-minutes: 10 48 | uses: pre-commit/action@v3.0.1 49 | 50 | - name: run tests 51 | timeout-minutes: 15 52 | run: pytest -v -n=auto --cov-report=xml --cov-report=term 53 | 54 | - name: upload coverage report 55 | uses: codecov/codecov-action@v5 56 | -------------------------------------------------------------------------------- /.github/workflows/update-template.yaml: -------------------------------------------------------------------------------- 1 | name: Update template 2 | 3 | on: 4 | schedule: 5 | - cron: '5 1 * * *' # every day at 01:05 6 | workflow_dispatch: 7 | 8 | jobs: 9 | update: 10 | permissions: 11 | contents: write 12 | pull-requests: write 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v4 16 | - name: Install deps 17 | run: pip install cruft 18 | - name: Update template 19 | id: update 20 | run: | 21 | cruft update -y 22 | echo "::set-output name=changes::$(git diff)" 23 | - name: Create PR 24 | if: ${{ steps.update.outputs.changes != '' }} 25 | uses: peter-evans/create-pull-request@v7 26 | with: 27 | commit-message: update template 28 | title: update template 29 | token: ${{ github.token }} 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | /dvc_s3/_dvc_s3_version.py 132 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | ci: 2 | skip: [mypy] 3 | 4 | repos: 5 | - hooks: 6 | - id: no rej 7 | name: Check for .rej files 8 | entry: .rej files found, fix conflicts from these rejected files. 9 | language: fail 10 | files: \.rej$ 11 | repo: local 12 | - repo: https://github.com/astral-sh/ruff-pre-commit 13 | rev: 'v0.11.13' 14 | hooks: 15 | - id: ruff 16 | args: [--fix, --exit-non-zero-on-fix] 17 | - id: ruff-format 18 | - repo: https://github.com/pre-commit/pre-commit-hooks 19 | rev: v5.0.0 20 | hooks: 21 | - id: check-toml 22 | - id: check-yaml 23 | - hooks: 24 | - id: codespell 25 | args: 26 | - --ignore-words-list 27 | - ba,datas,fo,uptodate 28 | repo: https://github.com/codespell-project/codespell 29 | rev: v2.4.1 30 | - repo: local 31 | hooks: 32 | - id: mypy 33 | name: mypy 34 | entry: mypy 35 | files: ^dvc_s3/ 36 | language: system 37 | types: [python] 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | dvc-s3 2 | ====== 3 | 4 | s3 plugin for dvc 5 | 6 | Tests 7 | ----- 8 | 9 | By default tests will be run against moto (via pytest-servers). 10 | To run against real S3, set ``DVC_TEST_AWS_REPO_BUCKET`` with an AWS bucket name. 11 | -------------------------------------------------------------------------------- /dvc_s3/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import threading 3 | from collections import defaultdict 4 | from typing import Any, ClassVar, Optional 5 | from urllib.parse import parse_qs, urlencode, urlsplit, urlunsplit 6 | 7 | from funcy import first, wrap_prop 8 | 9 | from dvc.utils.objects import cached_property 10 | from dvc_objects.fs.base import ObjectFileSystem 11 | from dvc_objects.fs.errors import ConfigError 12 | 13 | _AWS_CONFIG_PATH = os.path.join(os.path.expanduser("~"), ".aws", "config") 14 | 15 | 16 | # https://github.com/aws/aws-cli/blob/5aa599949f60b6af554fd5714d7161aa272716f7/awscli/customizations/s3/utils.py 17 | MULTIPLIERS = { 18 | "kb": 1024, 19 | "mb": 1024**2, 20 | "gb": 1024**3, 21 | "tb": 1024**4, 22 | "kib": 1024, 23 | "mib": 1024**2, 24 | "gib": 1024**3, 25 | "tib": 1024**4, 26 | } 27 | 28 | 29 | def human_readable_to_bytes(value: str) -> int: 30 | value = value.lower() 31 | suffix = "" 32 | if value.endswith(tuple(MULTIPLIERS.keys())): 33 | size = 2 34 | size += value[-2] == "i" # KiB, MiB etc 35 | value, suffix = value[:-size], value[-size:] 36 | 37 | multiplier = MULTIPLIERS.get(suffix, 1) 38 | return int(value) * multiplier 39 | 40 | 41 | # pylint:disable=abstract-method 42 | class S3FileSystem(ObjectFileSystem): 43 | protocol = "s3" 44 | REQUIRES: ClassVar[dict[str, str]] = {"s3fs": "s3fs", "boto3": "boto3"} 45 | PARAM_CHECKSUM = "etag" 46 | 47 | VERSION_ID_KEY = "versionId" 48 | 49 | _GRANTS: ClassVar[dict[str, str]] = { 50 | "grant_full_control": "GrantFullControl", 51 | "grant_read": "GrantRead", 52 | "grant_read_acp": "GrantReadACP", 53 | "grant_write_acp": "GrantWriteACP", 54 | } 55 | 56 | _TRANSFER_CONFIG_ALIASES: ClassVar[dict[str, str]] = { 57 | "max_queue_size": "max_io_queue", 58 | "max_concurrent_requests": "max_concurrency", 59 | "multipart_threshold": "multipart_threshold", 60 | "multipart_chunksize": "multipart_chunksize", 61 | } 62 | 63 | def getcwd(self): 64 | return self.fs.root_marker 65 | 66 | @classmethod 67 | def split_version(cls, path: str) -> tuple[str, Optional[str]]: 68 | parts = list(urlsplit(path)) 69 | query = parse_qs(parts[3]) 70 | if cls.VERSION_ID_KEY in query: 71 | version_id = first(query[cls.VERSION_ID_KEY]) 72 | del query[cls.VERSION_ID_KEY] 73 | parts[3] = urlencode(query) 74 | else: 75 | version_id = None 76 | return urlunsplit(parts), version_id 77 | 78 | @classmethod 79 | def join_version(cls, path: str, version_id: Optional[str]) -> str: 80 | parts = list(urlsplit(path)) 81 | query = parse_qs(parts[3]) 82 | if cls.VERSION_ID_KEY in query: 83 | raise ValueError("path already includes a version query") 84 | parts[3] = f"{cls.VERSION_ID_KEY}={version_id}" if version_id else "" 85 | return urlunsplit(parts) 86 | 87 | @classmethod 88 | def version_path(cls, path: str, version_id: Optional[str]) -> str: 89 | path, _ = cls.split_version(path) 90 | return cls.join_version(path, version_id) 91 | 92 | @classmethod 93 | def coalesce_version( 94 | cls, path: str, version_id: Optional[str] 95 | ) -> tuple[str, Optional[str]]: 96 | path, path_version_id = cls.split_version(path) 97 | versions = {ver for ver in (version_id, path_version_id) if ver} 98 | if len(versions) > 1: 99 | raise ValueError("Path version mismatch: '{path}', '{version_id}'") 100 | return path, (versions.pop() if versions else None) 101 | 102 | @classmethod 103 | def _get_kwargs_from_urls(cls, urlpath: str) -> dict[str, Any]: 104 | ret = super()._get_kwargs_from_urls(urlpath) 105 | url_query = ret.get("url_query") 106 | if url_query is not None: 107 | parsed = parse_qs(url_query) 108 | if "versionId" in parsed: 109 | ret["version_aware"] = True 110 | return ret 111 | 112 | def _split_s3_config(self, s3_config): 113 | """Splits the general s3 config into 2 different config 114 | objects, one for transfer.TransferConfig and other is the 115 | general session config""" 116 | 117 | from boto3.s3.transfer import TransferConfig 118 | 119 | config, transfer_config = {}, {} 120 | for key, value in s3_config.items(): 121 | if key in self._TRANSFER_CONFIG_ALIASES: 122 | if key in {"multipart_chunksize", "multipart_threshold"}: 123 | # cast human readable sizes (like 24MiB) to integers 124 | value = human_readable_to_bytes(value) 125 | else: 126 | value = int(value) 127 | transfer_config[self._TRANSFER_CONFIG_ALIASES[key]] = value 128 | else: 129 | config[key] = value 130 | 131 | # pylint: disable=attribute-defined-outside-init 132 | self._transfer_config = TransferConfig(**transfer_config) 133 | return config 134 | 135 | def _load_aws_config_file(self, profile): 136 | from botocore.configloader import load_config 137 | 138 | # pylint: disable=attribute-defined-outside-init 139 | self._transfer_config = None 140 | config_path = os.environ.get("AWS_CONFIG_FILE", _AWS_CONFIG_PATH) 141 | if not os.path.exists(config_path): 142 | return {} 143 | 144 | config = load_config(config_path) 145 | profile_config = config["profiles"].get(profile or "default") 146 | if not profile_config: 147 | return {} 148 | 149 | s3_config = profile_config.get("s3", {}) 150 | return self._split_s3_config(s3_config) 151 | 152 | def _prepare_credentials(self, **config): 153 | import base64 154 | 155 | from flatten_dict import flatten, unflatten 156 | from s3fs.utils import SSEParams 157 | 158 | login_info = defaultdict(dict) 159 | 160 | login_info["version_aware"] = config.get("version_aware", False) 161 | 162 | # credentials 163 | login_info["key"] = config.get("access_key_id") 164 | login_info["secret"] = config.get("secret_access_key") 165 | login_info["token"] = config.get("session_token") 166 | 167 | # session configuration 168 | login_info["profile"] = config.get("profile") 169 | login_info["use_ssl"] = config.get("use_ssl", True) 170 | login_info["anon"] = config.get("allow_anonymous_login") 171 | 172 | # extra client configuration 173 | client = login_info["client_kwargs"] 174 | client["region_name"] = config.get("region") 175 | client["endpoint_url"] = config.get("endpointurl") 176 | client["verify"] = config.get("ssl_verify") 177 | 178 | # timeout configuration 179 | config_kwargs = login_info["config_kwargs"] 180 | config_kwargs["read_timeout"] = config.get("read_timeout") 181 | config_kwargs["connect_timeout"] = config.get("connect_timeout") 182 | 183 | # encryptions 184 | additional = login_info["s3_additional_kwargs"] 185 | sse_customer_key = None 186 | if config.get("sse_customer_key"): 187 | if config.get("sse_kms_key_id"): 188 | raise ConfigError( 189 | "`sse_kms_key_id` and `sse_customer_key` AWS S3 config " 190 | "options are mutually exclusive" 191 | ) 192 | sse_customer_key = base64.b64decode(config.get("sse_customer_key")) 193 | sse_customer_algorithm = config.get("sse_customer_algorithm") 194 | if not sse_customer_algorithm and sse_customer_key: 195 | sse_customer_algorithm = "AES256" 196 | sse_params = SSEParams( 197 | server_side_encryption=config.get("sse"), 198 | sse_customer_algorithm=sse_customer_algorithm, 199 | sse_customer_key=sse_customer_key, 200 | sse_kms_key_id=config.get("sse_kms_key_id"), 201 | ) 202 | additional.update(sse_params.to_kwargs()) 203 | additional["ACL"] = config.get("acl") 204 | for grant_option, grant_key in self._GRANTS.items(): 205 | if config.get(grant_option): 206 | if additional["ACL"]: 207 | raise ConfigError( 208 | "`acl` and `grant_*` AWS S3 config options " 209 | "are mutually exclusive" 210 | ) 211 | additional[grant_key] = config[grant_option] 212 | 213 | # config kwargs 214 | session_config = login_info["config_kwargs"] 215 | session_config["s3"] = self._load_aws_config_file(login_info["profile"]) 216 | 217 | shared_creds = config.get("credentialpath") 218 | if shared_creds: 219 | os.environ.setdefault("AWS_SHARED_CREDENTIALS_FILE", shared_creds) 220 | 221 | if ( 222 | client["region_name"] is None 223 | and session_config["s3"].get("region_name") is None 224 | and os.getenv("AWS_REGION") is None 225 | ): 226 | # Enable bucket region caching 227 | login_info["cache_regions"] = config.get("cache_regions", True) 228 | 229 | config_path = config.get("configpath") 230 | if config_path: 231 | os.environ.setdefault("AWS_CONFIG_FILE", config_path) 232 | 233 | d = flatten(login_info, reducer="dot") 234 | return unflatten( 235 | {key: value for key, value in d.items() if value is not None}, 236 | splitter="dot", 237 | ) 238 | 239 | @wrap_prop(threading.Lock()) 240 | @cached_property 241 | def fs(self): 242 | from s3fs import S3FileSystem as _S3FileSystem 243 | 244 | s3_filesystem = _S3FileSystem(**self.fs_args) 245 | s3_filesystem.connect() 246 | 247 | return s3_filesystem 248 | 249 | @classmethod 250 | def _strip_protocol(cls, path: str) -> str: 251 | from fsspec.utils import infer_storage_options 252 | 253 | return infer_storage_options(path)["path"] 254 | 255 | def unstrip_protocol(self, path): 256 | return "s3://" + path.lstrip("/") 257 | -------------------------------------------------------------------------------- /dvc_s3/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iterative/dvc-s3/3c9970ff709d9e3269b983a71e52a34b62f68c8d/dvc_s3/tests/__init__.py -------------------------------------------------------------------------------- /dvc_s3/tests/benchmarks.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=unused-import 2 | from dvc.testing.benchmarks.cli.stories.use_cases.test_sharing import ( 3 | test_sharing as test_sharing_s3, # noqa: F401 4 | ) 5 | -------------------------------------------------------------------------------- /dvc_s3/tests/cloud.py: -------------------------------------------------------------------------------- 1 | import locale 2 | import os 3 | import uuid 4 | 5 | from funcy import cached_property 6 | 7 | from dvc.testing.cloud import Cloud 8 | from dvc.testing.path_info import CloudURLInfo 9 | 10 | 11 | class S3(Cloud, CloudURLInfo): 12 | @property 13 | def config(self): 14 | return {"url": self.url} 15 | 16 | @staticmethod 17 | def _get_storagepath(): 18 | bucket = os.environ.get("DVC_TEST_AWS_REPO_BUCKET") 19 | assert bucket 20 | return bucket + "/" + "dvc_test_caches" + "/" + str(uuid.uuid4()) 21 | 22 | @staticmethod 23 | def get_url(): 24 | return "s3://" + S3._get_storagepath() 25 | 26 | @cached_property 27 | def _s3(self): 28 | import boto3 29 | 30 | return boto3.client( 31 | "s3", 32 | aws_access_key_id=self.config.get("access_key_id"), 33 | aws_secret_access_key=self.config.get("secret_access_key"), 34 | aws_session_token=self.config.get("session_token"), 35 | endpoint_url=self.config.get("endpointurl"), 36 | region_name=self.config.get("region"), 37 | ) 38 | 39 | def is_file(self): 40 | from botocore.exceptions import ClientError 41 | 42 | if self.path.endswith("/"): 43 | return False 44 | 45 | try: 46 | self._s3.head_object(Bucket=self.bucket, Key=self.path) 47 | except ClientError as exc: 48 | if exc.response["Error"]["Code"] != "404": 49 | raise 50 | return False 51 | 52 | return True 53 | 54 | def is_dir(self): 55 | path = (self / "").path 56 | resp = self._s3.list_objects(Bucket=self.bucket, Prefix=path) 57 | return bool(resp.get("Contents")) 58 | 59 | def exists(self): 60 | return self.is_file() or self.is_dir() 61 | 62 | def mkdir(self, mode=0o777, parents=False, exist_ok=False): 63 | assert mode == 0o777 64 | assert parents 65 | 66 | def write_bytes(self, contents): 67 | self._s3.put_object(Bucket=self.bucket, Key=self.path, Body=contents) 68 | 69 | def unlink(self, missing_ok: bool = False) -> None: 70 | if not self.exists(): 71 | if not missing_ok: 72 | raise FileNotFoundError(str(self)) 73 | return 74 | self._s3.delete_object(Bucket=self.bucket, Key=self.path) 75 | 76 | def rmdir(self, recursive: bool = True) -> None: 77 | if not self.is_dir(): 78 | raise NotADirectoryError(str(self)) 79 | 80 | path = (self / "").path 81 | resp = self._s3.list_objects(Bucket=self.bucket, Prefix=path) 82 | entries = resp.get("Contents") 83 | if not entries: 84 | return 85 | 86 | if not recursive: 87 | raise OSError(f"Not recursive and directory not empty: {self}") 88 | 89 | for entry in entries: 90 | self._s3.delete_object(Bucket=self.bucket, Key=entry["Key"]) 91 | 92 | def read_bytes(self): 93 | data = self._s3.get_object(Bucket=self.bucket, Key=self.path) 94 | return data["Body"].read() 95 | 96 | def read_text(self, encoding=None, errors=None): 97 | if not encoding: 98 | encoding = locale.getpreferredencoding(False) 99 | assert errors is None 100 | return self.read_bytes().decode(encoding) 101 | 102 | @property 103 | def fs_path(self): 104 | return self.bucket + "/" + self.path.lstrip("/") 105 | 106 | 107 | class FakeS3(S3): 108 | """Fake S3 client that is supposed to be using a mock server's endpoint""" 109 | 110 | def __init__(self, *args, config: dict, **kwargs): 111 | super().__init__(*args, **kwargs) 112 | self._config = config 113 | 114 | def __truediv__(self, key): 115 | ret = super().__truediv__(key) 116 | ret._config = self._config 117 | return ret 118 | 119 | @property 120 | def config(self): 121 | return { 122 | "url": self.url, 123 | "endpointurl": self._config["endpoint_url"], 124 | "access_key_id": self._config["aws_access_key_id"], 125 | "secret_access_key": self._config["aws_secret_access_key"], 126 | "session_token": self._config["aws_session_token"], 127 | "region": self._config["region_name"], 128 | } 129 | 130 | def get_url(self): # pylint: disable=arguments-differ 131 | return str(self) 132 | -------------------------------------------------------------------------------- /dvc_s3/tests/conftest.py: -------------------------------------------------------------------------------- 1 | from dvc.testing.fixtures import * # noqa: F403 2 | 3 | from .fixtures import * # noqa: F403 4 | -------------------------------------------------------------------------------- /dvc_s3/tests/fixtures.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | 5 | from .cloud import S3, FakeS3 6 | 7 | 8 | @pytest.fixture 9 | # pylint: disable-next=redefined-outer-name,unused-argument 10 | def make_s3(request): 11 | def _make_s3(): 12 | if os.environ.get("DVC_TEST_AWS_REPO_BUCKET"): 13 | return S3(S3.get_url()) 14 | tmp_s3_path = request.getfixturevalue("tmp_s3_path") 15 | s3_server = request.getfixturevalue("s3_server") 16 | return FakeS3(str(tmp_s3_path).rstrip("/"), config=s3_server) 17 | 18 | return _make_s3 19 | 20 | 21 | @pytest.fixture 22 | # pylint: disable-next=redefined-outer-name,unused-argument 23 | def make_s3_version_aware(versioning, tmp_s3_path, s3_server): 24 | def _make_s3(): 25 | return FakeS3(str(tmp_s3_path).rstrip("/"), config=s3_server) 26 | 27 | return _make_s3 28 | 29 | 30 | @pytest.fixture 31 | def s3(make_s3): # pylint: disable=redefined-outer-name 32 | return make_s3() 33 | 34 | 35 | @pytest.fixture 36 | def cloud(make_cloud): 37 | return make_cloud(typ="s3") 38 | 39 | 40 | @pytest.fixture 41 | def remote(make_remote): 42 | return make_remote(name="upstream", typ="s3") 43 | 44 | 45 | @pytest.fixture 46 | def remote_version_aware(make_remote_version_aware): 47 | return make_remote_version_aware(name="upstream", typ="s3") 48 | 49 | 50 | @pytest.fixture 51 | def remote_worktree(make_remote_worktree): 52 | return make_remote_worktree(name="upstream", typ="s3") 53 | 54 | 55 | @pytest.fixture 56 | def workspace(make_workspace): 57 | return make_workspace(name="workspace", typ="s3") 58 | -------------------------------------------------------------------------------- /dvc_s3/tests/test_dvc.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from dvc.testing.api_tests import ( # noqa: F401 4 | TestAPI, 5 | ) 6 | from dvc.testing.remote_tests import ( # noqa: F401 7 | TestRemote, 8 | TestRemoteVersionAware, 9 | ) 10 | from dvc.testing.workspace_tests import TestGetUrl as _TestGetUrl 11 | from dvc.testing.workspace_tests import TestImport as _TestImport 12 | from dvc.testing.workspace_tests import ( # noqa: F401 13 | TestImportURLVersionAware, 14 | ) 15 | from dvc.testing.workspace_tests import TestLsUrl as _TestLsUrl 16 | from dvc.testing.workspace_tests import TestToRemote as _TestToRemote 17 | 18 | 19 | class TestImport(_TestImport): 20 | @pytest.fixture 21 | def stage_md5(self): 22 | return "ffe462bbb08432b7a1c3985fcf82ad3a" 23 | 24 | @pytest.fixture 25 | def is_object_storage(self): 26 | return True 27 | 28 | @pytest.fixture 29 | def dir_md5(self): 30 | return "ec602a6ba97b2dd07bd6d2cd89674a60.dir" 31 | 32 | 33 | class TestLsUrl(_TestLsUrl): 34 | pass 35 | 36 | 37 | class TestGetUrl(_TestGetUrl): 38 | pass 39 | 40 | 41 | class TestToRemote(_TestToRemote): 42 | pass 43 | -------------------------------------------------------------------------------- /dvc_s3/tests/test_s3.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | 5 | from dvc.fs import ConfigError 6 | from dvc_s3 import S3FileSystem 7 | 8 | bucket_name = "bucket-name" 9 | prefix = "some/prefix" 10 | url = f"s3://{bucket_name}/{prefix}" 11 | key_id = "key-id" 12 | key_secret = "key-secret" 13 | session_token = "session-token" 14 | 15 | 16 | @pytest.fixture(autouse=True, name="grants") 17 | def fixture_grants(): 18 | return { 19 | "grant_read": "id=read-permission-id,id=other-read-permission-id", 20 | "grant_read_acp": "id=read-acp-permission-id", 21 | "grant_write_acp": "id=write-acp-permission-id", 22 | "grant_full_control": "id=full-control-permission-id", 23 | } 24 | 25 | 26 | def test_verify_ssl_default_param(): 27 | config = {"url": url} 28 | fs = S3FileSystem(**config) 29 | 30 | assert "client_kwargs" not in fs.fs_args 31 | 32 | config = { 33 | "url": url, 34 | "endpointurl": "https://my.custom.s3:1234", 35 | } 36 | fs = S3FileSystem(**config) 37 | 38 | assert "verify" not in fs.fs_args["client_kwargs"] 39 | 40 | 41 | def test_s3_config_credentialpath(monkeypatch): 42 | environment = {} 43 | monkeypatch.setattr(os, "environ", environment) 44 | 45 | config = {"url": url, "credentialpath": "somewhere"} 46 | S3FileSystem(**config).fs_args # noqa: B018 47 | assert environment["AWS_SHARED_CREDENTIALS_FILE"] == "somewhere" 48 | environment.clear() 49 | 50 | config = {"url": url, "configpath": "somewhere"} 51 | S3FileSystem(**config).fs_args # noqa: B018 52 | assert environment["AWS_CONFIG_FILE"] == "somewhere" 53 | environment.clear() 54 | 55 | config = { 56 | "url": url, 57 | "credentialpath": "somewhere", 58 | "configpath": "elsewhere", 59 | } 60 | S3FileSystem(**config).fs_args # noqa: B018 61 | assert environment["AWS_SHARED_CREDENTIALS_FILE"] == "somewhere" 62 | assert environment["AWS_CONFIG_FILE"] == "elsewhere" 63 | environment.clear() 64 | 65 | 66 | def test_ssl_verify_bool_param(): 67 | config = {"url": url, "ssl_verify": False} 68 | fs = S3FileSystem(**config) 69 | 70 | assert fs.fs_args["client_kwargs"]["verify"] == config["ssl_verify"] 71 | 72 | 73 | def test_ssl_verify_path_param(): 74 | config = {"url": url, "ssl_verify": "/path/to/custom/cabundle.pem"} 75 | fs = S3FileSystem(**config) 76 | 77 | assert fs.fs_args["client_kwargs"]["verify"] == config["ssl_verify"] 78 | 79 | 80 | def test_ssl_verify_none_param(): 81 | config = {"url": url, "ssl_verify": None} 82 | fs = S3FileSystem(**config) 83 | 84 | assert "client_kwargs" not in fs.fs_args 85 | 86 | config = { 87 | "url": url, 88 | "endpointurl": "https://my.custom.s3:1234", 89 | "ssl_verify": None, 90 | } 91 | fs = S3FileSystem(**config) 92 | 93 | assert "verify" not in fs.fs_args["client_kwargs"] 94 | 95 | 96 | def test_grants(): 97 | config = { 98 | "url": url, 99 | "grant_read": "id=read-permission-id,id=other-read-permission-id", 100 | "grant_read_acp": "id=read-acp-permission-id", 101 | "grant_write_acp": "id=write-acp-permission-id", 102 | "grant_full_control": "id=full-control-permission-id", 103 | } 104 | fs = S3FileSystem(**config) 105 | 106 | extra_args = fs.fs_args["s3_additional_kwargs"] 107 | assert ( 108 | extra_args["GrantRead"] == "id=read-permission-id,id=other-read-permission-id" 109 | ) 110 | assert extra_args["GrantReadACP"] == "id=read-acp-permission-id" 111 | assert extra_args["GrantWriteACP"] == "id=write-acp-permission-id" 112 | assert extra_args["GrantFullControl"] == "id=full-control-permission-id" 113 | 114 | 115 | def test_grants_mutually_exclusive_acl_error(grants): 116 | for grant_option, grant_value in grants.items(): 117 | config = {"url": url, "acl": "public-read", grant_option: grant_value} 118 | 119 | fs = S3FileSystem(**config) 120 | with pytest.raises(ConfigError): 121 | fs.fs_args # noqa: B018 122 | 123 | 124 | def test_sse_kms_key_id(): 125 | fs = S3FileSystem(url=url, sse_kms_key_id="key") 126 | assert fs.fs_args["s3_additional_kwargs"]["SSEKMSKeyId"] == "key" 127 | 128 | 129 | def test_key_id_and_secret(): 130 | fs = S3FileSystem( 131 | url=url, 132 | access_key_id=key_id, 133 | secret_access_key=key_secret, 134 | session_token=session_token, 135 | ) 136 | assert fs.fs_args["key"] == key_id 137 | assert fs.fs_args["secret"] == key_secret 138 | assert fs.fs_args["token"] == session_token 139 | -------------------------------------------------------------------------------- /dvc_s3/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from dvc_s3 import human_readable_to_bytes 4 | 5 | KB = 1024 6 | MB = KB**2 7 | GB = KB**3 8 | TB = KB**4 9 | 10 | 11 | @pytest.mark.parametrize( 12 | "test_input, expected", 13 | [ 14 | ("10", 10), 15 | ("10 ", 10), 16 | ("1kb", 1 * KB), 17 | ("2kb", 2 * KB), 18 | ("1000mib", 1000 * MB), 19 | ("20gB", 20 * GB), 20 | ("10Tib", 10 * TB), 21 | ], 22 | ) 23 | def test_conversions_human_readable_to_bytes(test_input, expected): 24 | assert human_readable_to_bytes(test_input) == expected 25 | 26 | 27 | @pytest.mark.parametrize("invalid_input", ["foo", "10XB", "1000Pb", "fooMiB"]) 28 | def test_conversions_human_readable_to_bytes_invalid(invalid_input): 29 | with pytest.raises(ValueError): # noqa: PT011 30 | human_readable_to_bytes(invalid_input) 31 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=48", "wheel", "setuptools_scm[toml]>=6.3.1"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [tool.setuptools_scm] 6 | write_to = "dvc_s3/_dvc_s3_version.py" 7 | 8 | [project] 9 | name = "dvc-s3" 10 | description = "s3 plugin for dvc" 11 | readme = "README.rst" 12 | keywords = [ 13 | "dvc", 14 | "s3", 15 | ] 16 | license = { text = "Apache License 2.0" } 17 | maintainers = [{ name = "Iterative", email = "support@dvc.org" }] 18 | authors = [{ name = "Iterative", email = "support@dvc.org" }] 19 | requires-python = ">=3.9" 20 | classifiers = [ 21 | "Development Status :: 4 - Beta", 22 | "Programming Language :: Python :: 3", 23 | "Programming Language :: Python :: 3.9", 24 | "Programming Language :: Python :: 3.10", 25 | "Programming Language :: Python :: 3.11", 26 | "Programming Language :: Python :: 3.12", 27 | "Programming Language :: Python :: 3.13", 28 | ] 29 | dynamic = ["version"] 30 | dependencies = [ 31 | "dvc", 32 | "s3fs>=2024.12.0", 33 | "aiobotocore[boto3]>=2.5.0", 34 | "flatten_dict>=0.4.1,<1", 35 | ] 36 | 37 | [project.optional-dependencies] 38 | dev = [ 39 | "dvc-s3[tests]", 40 | "mypy==1.16.0", 41 | ] 42 | tests = [ 43 | "dvc[testing]", 44 | "pytest>=7,<9", 45 | "pytest-cov>=4.1.0", 46 | "pytest-xdist>=3.2", 47 | "pytest-servers[s3]>=0.4.0", 48 | ] 49 | 50 | [project.urls] 51 | Documentation = "https://dvc.org/doc" 52 | Source = "https://github.com/iterative/dvc-s3" 53 | 54 | [tool.pytest.ini_options] 55 | log_level = "debug" 56 | addopts = "-ra -pdvc.testing.plugin" 57 | markers = [ 58 | "needs_internet: Might need network access for the tests", 59 | ] 60 | 61 | [tool.mypy] 62 | # Error output 63 | show_column_numbers = true 64 | show_error_codes = true 65 | show_error_context = true 66 | show_traceback = true 67 | pretty = true 68 | # See https://mypy.readthedocs.io/en/latest/running_mypy.html#missing-imports. 69 | ignore_missing_imports = true 70 | check_untyped_defs = false 71 | # Warnings 72 | warn_no_return = true 73 | warn_redundant_casts = true 74 | warn_unreachable = true 75 | files = ["dvc_s3"] 76 | 77 | [tool.ruff] 78 | output-format = "full" 79 | show-fixes = true 80 | 81 | [tool.ruff.lint] 82 | ignore = [ 83 | "N818", "S101", "ISC001", "PT004", "PT007", "RET502", "RET503", "SIM105", "SIM108", "SIM117", 84 | "TRY003", "TRY300", "PLR2004", "PLW2901", "LOG007", 85 | ] 86 | select = [ 87 | "F", "E", "W", "C90", "I", "N", "UP", "YTT", "ASYNC", "S", "BLE", "B", "A", "C4", "T10", 88 | "EXE", "ISC", "ICN", "G", "INP", "PIE", "T20", "PYI", "PT", "Q", "RSE", "RET", 89 | "SLOT", "SIM", "TID", "TCH", "ARG", "PGH", "PLC", "PLE", "PLR", "PLW", "TRY", 90 | "FLY", "PERF101", "LOG", "RUF", "RUF022", "RUF023", "RUF024", "RUF025", "RUF026", 91 | ] 92 | preview = true 93 | explicit-preview-rules = true 94 | 95 | [tool.ruff.lint.flake8-pytest-style] 96 | fixture-parentheses = false 97 | mark-parentheses = false 98 | parametrize-names-type = "csv" 99 | raises-extend-require-match-for = ["dvc.exceptions.DvcException"] 100 | 101 | [tool.ruff.lint.flake8-tidy-imports] 102 | 103 | [tool.ruff.lint.flake8-type-checking] 104 | strict = true 105 | 106 | [tool.ruff.lint.flake8-unused-arguments] 107 | ignore-variadic-names = true 108 | 109 | [tool.ruff.lint.isort] 110 | known-first-party = ["dvc", "dvc_data", "dvc_objects"] 111 | 112 | [tool.ruff.lint.pep8-naming] 113 | extend-ignore-names = ["M", "SCM"] 114 | 115 | [tool.ruff.lint.pylint] 116 | max-args = 10 117 | 118 | [tool.ruff.lint.per-file-ignores] 119 | "dvc_s3/tests/**" = ["S", "ARG001", "ARG002", "TRY002", "TRY301"] 120 | --------------------------------------------------------------------------------