├── src
└── obspec
│ ├── py.typed
│ ├── _version.py
│ ├── _meta.py
│ ├── _head.py
│ ├── _delete.py
│ ├── _rename.py
│ ├── _copy.py
│ ├── __init__.py
│ ├── _attributes.py
│ ├── exceptions.py
│ ├── _list.py
│ ├── _put.py
│ └── _get.py
├── docs
├── index.md
├── CHANGELOG.md
├── blog
│ ├── index.md
│ ├── .authors.yml
│ └── posts
│ │ └── introducing-obspec.md
├── api
│ ├── meta.md
│ ├── exceptions.md
│ ├── copy.md
│ ├── head.md
│ ├── delete.md
│ ├── rename.md
│ ├── attributes.md
│ ├── put.md
│ ├── list.md
│ └── get.md
└── overrides
│ ├── main.html
│ └── stylesheets
│ └── extra.css
├── CHANGELOG.md
├── tests
└── test-get.yml
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── pyproject.toml
├── .github
└── workflows
│ ├── test-python.yml
│ └── publish-docs.yml
├── .gitignore
└── mkdocs.yml
/src/obspec/py.typed:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | ../README.md
--------------------------------------------------------------------------------
/docs/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | ../CHANGELOG.md
--------------------------------------------------------------------------------
/docs/blog/index.md:
--------------------------------------------------------------------------------
1 | # Blog
2 |
--------------------------------------------------------------------------------
/docs/api/meta.md:
--------------------------------------------------------------------------------
1 | # Meta
2 |
3 | ::: obspec.ObjectMeta
4 |
--------------------------------------------------------------------------------
/docs/api/exceptions.md:
--------------------------------------------------------------------------------
1 | # Exceptions
2 |
3 | ::: obspec.exceptions
4 |
--------------------------------------------------------------------------------
/docs/api/copy.md:
--------------------------------------------------------------------------------
1 | # Copy
2 |
3 | ::: obspec.Copy
4 | ::: obspec.CopyAsync
5 |
--------------------------------------------------------------------------------
/docs/api/head.md:
--------------------------------------------------------------------------------
1 | # Head
2 |
3 | ::: obspec.Head
4 | ::: obspec.HeadAsync
5 |
--------------------------------------------------------------------------------
/docs/api/delete.md:
--------------------------------------------------------------------------------
1 | # Delete
2 |
3 | ::: obspec.Delete
4 | ::: obspec.DeleteAsync
5 |
--------------------------------------------------------------------------------
/docs/api/rename.md:
--------------------------------------------------------------------------------
1 | # Rename
2 |
3 | ::: obspec.Rename
4 | ::: obspec.RenameAsync
5 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | ## [0.1.0] - 2025-06-25
4 |
5 | - Initial release.
6 |
--------------------------------------------------------------------------------
/docs/api/attributes.md:
--------------------------------------------------------------------------------
1 | # Attributes
2 |
3 | ::: obspec.Attribute
4 | ::: obspec.Attributes
5 |
--------------------------------------------------------------------------------
/docs/api/put.md:
--------------------------------------------------------------------------------
1 | # Put
2 |
3 | ::: obspec.Put
4 | ::: obspec.PutAsync
5 | ::: obspec.PutResult
6 | ::: obspec.UpdateVersion
7 | ::: obspec.PutMode
8 |
--------------------------------------------------------------------------------
/docs/blog/.authors.yml:
--------------------------------------------------------------------------------
1 | authors:
2 | kylebarron:
3 | name: Kyle Barron
4 | description: Creator
5 | avatar: https://github.com/kylebarron.png
6 |
--------------------------------------------------------------------------------
/docs/api/list.md:
--------------------------------------------------------------------------------
1 | # List
2 |
3 | ::: obspec.List
4 | ::: obspec.ListAsync
5 | ::: obspec.ListWithDelimiter
6 | ::: obspec.ListWithDelimiterAsync
7 | ::: obspec.ListResult
8 | ::: obspec.ListChunkType_co
9 |
--------------------------------------------------------------------------------
/src/obspec/_version.py:
--------------------------------------------------------------------------------
1 | from importlib.metadata import PackageNotFoundError, version
2 |
3 | try:
4 | __version__ = version("obspec")
5 | except PackageNotFoundError:
6 | __version__ = "uninstalled"
7 |
--------------------------------------------------------------------------------
/docs/overrides/main.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 |
3 | {% block outdated %}
4 | You're not viewing the latest version.
5 |
6 | Click here to go to latest.
7 |
8 | {% endblock %}
9 |
--------------------------------------------------------------------------------
/docs/api/get.md:
--------------------------------------------------------------------------------
1 | # Get
2 |
3 | ::: obspec.Get
4 | ::: obspec.GetAsync
5 | ::: obspec.GetRange
6 | ::: obspec.GetRangeAsync
7 | ::: obspec.GetRanges
8 | ::: obspec.GetRangesAsync
9 | ::: obspec.GetOptions
10 | ::: obspec.GetResult
11 | ::: obspec.GetResultAsync
12 | ::: obspec.OffsetRange
13 | ::: obspec.SuffixRange
14 |
--------------------------------------------------------------------------------
/tests/test-get.yml:
--------------------------------------------------------------------------------
1 | # yaml-language-server: $schema=https://raw.githubusercontent.com/typeddjango/pytest-mypy-plugins/master/pytest_mypy_plugins/schema.json
2 | - case: accepts_get
3 | main: |
4 | import sys
5 |
6 | from typing_extensions import assert_type
7 |
8 | from obspec import Get
9 |
10 | if sys.version_info >= (3, 12):
11 | from collections.abc import Buffer
12 | else:
13 | from typing_extensions import Buffer
14 |
15 |
16 | def accepts_get(client: Get) -> None:
17 | resp = client.get("path/to/file")
18 | assert_type(resp.range, tuple[int, int])
19 | for chunk in resp:
20 | assert_type(chunk, Buffer)
21 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | # See https://pre-commit.com for more information
2 | # See https://pre-commit.com/hooks.html for more hooks
3 |
4 | # Default to Python 3
5 | default_language_version:
6 | python: python3
7 |
8 | # Optionally both commit and push
9 | default_stages: [pre-commit]
10 |
11 | repos:
12 | - repo: https://github.com/pre-commit/pre-commit-hooks
13 | rev: v5.0.0
14 | hooks:
15 | - id: trailing-whitespace
16 | - id: end-of-file-fixer
17 | - id: check-added-large-files
18 | args: ["--maxkb=500"]
19 |
20 | - repo: https://github.com/astral-sh/ruff-pre-commit
21 | rev: v0.9.6
22 | hooks:
23 | - id: ruff
24 | args: ["--fix"]
25 | - id: ruff-format
26 |
--------------------------------------------------------------------------------
/src/obspec/_meta.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import TYPE_CHECKING, TypedDict
4 |
5 | if TYPE_CHECKING:
6 | from datetime import datetime
7 |
8 |
9 | class ObjectMeta(TypedDict):
10 | """The metadata that describes an object."""
11 |
12 | path: str
13 | """The full path to the object"""
14 |
15 | last_modified: datetime
16 | """The last modified time"""
17 |
18 | size: int
19 | """The size in bytes of the object"""
20 |
21 | e_tag: str | None
22 | """The unique identifier for the object
23 |
24 |
25 | """
26 |
27 | version: str | None
28 | """A version indicator for this object"""
29 |
--------------------------------------------------------------------------------
/src/obspec/_head.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import TYPE_CHECKING, Protocol
4 |
5 | if TYPE_CHECKING:
6 | from ._meta import ObjectMeta
7 |
8 |
9 | class Head(Protocol):
10 | def head(self, path: str) -> ObjectMeta:
11 | """Return the metadata for the specified location.
12 |
13 | Args:
14 | path: The path within the store to retrieve.
15 |
16 | Returns:
17 | ObjectMeta
18 |
19 | """
20 | ...
21 |
22 |
23 | class HeadAsync(Protocol):
24 | async def head_async(self, path: str) -> ObjectMeta:
25 | """Call `head` asynchronously.
26 |
27 | Refer to the documentation for [Head][obspec.Head].
28 | """
29 | ...
30 |
--------------------------------------------------------------------------------
/docs/overrides/stylesheets/extra.css:
--------------------------------------------------------------------------------
1 | :root,
2 | [data-md-color-scheme="default"] {
3 | /* --md-primary-fg-color: #cf3f02;
4 | --md-default-fg-color: #443f3f; */
5 | --boxShadowD: 0px 12px 24px 0px rgba(68, 63, 63, 0.08),
6 | 0px 0px 4px 0px rgba(68, 63, 63, 0.08);
7 | }
8 | body {
9 | margin: 0;
10 | padding: 0;
11 | /* font-size: 16px; */
12 | }
13 | h1,
14 | h2,
15 | h3,
16 | h4,
17 | h5,
18 | h6 {
19 | font-family: var(--md-heading-font);
20 | font-weight: bold;
21 | }
22 | .md-typeset h1,
23 | .md-typeset h2 {
24 | font-weight: normal;
25 | color: var(--md-default-fg-color);
26 | }
27 | .md-typeset h3,
28 | .md-typeset h4 {
29 | font-weight: bold;
30 | color: var(--md-default-fg-color);
31 | }
32 | .md-button,
33 | .md-typeset .md-button {
34 | font-family: var(--md-heading-font);
35 | }
36 | .md-content .supheading {
37 | font-family: var(--md-heading-font);
38 | text-transform: uppercase;
39 | color: var(--md-primary-fg-color);
40 | font-size: 0.75rem;
41 | font-weight: bold;
42 | }
43 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 Development Seed
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/src/obspec/_delete.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import TYPE_CHECKING, Protocol
4 |
5 | if TYPE_CHECKING:
6 | from collections.abc import Sequence
7 |
8 |
9 | class Delete(Protocol):
10 | def delete(self, paths: str | Sequence[str]) -> None:
11 | """Delete the object at the specified location(s).
12 |
13 | Args:
14 | paths: The path or paths within the store to delete.
15 |
16 | When supported by the underlying store, this method will use bulk
17 | operations that delete more than one object per a request.
18 |
19 | If the object did not exist, the result may be an error or a success,
20 | depending on the behavior of the underlying store. For example, local
21 | filesystems, GCP, and Azure return an error, while S3 and in-memory will
22 | return Ok.
23 |
24 | """
25 |
26 |
27 | class DeleteAsync(Protocol):
28 | async def delete_async(self, paths: str | Sequence[str]) -> None:
29 | """Call `delete` asynchronously.
30 |
31 | Refer to the documentation for [Delete][obspec.Delete].
32 | """
33 |
--------------------------------------------------------------------------------
/src/obspec/_rename.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import Protocol
4 |
5 |
6 | class Rename(Protocol):
7 | def rename(self, from_: str, to: str, *, overwrite: bool = True) -> None:
8 | """Move an object from one path to another in the same object store.
9 |
10 | By default, this is implemented as a copy and then delete source. It may not
11 | check when deleting source that it was the same object that was originally
12 | copied.
13 |
14 | Args:
15 | from_: Source path
16 | to: Destination path
17 |
18 | Keyword Args:
19 | overwrite: If `True`, if there exists an object at the destination, it will
20 | be overwritten. If `False`, will return an error if the destination
21 | already has an object.
22 |
23 | """
24 |
25 |
26 | class RenameAsync(Protocol):
27 | async def rename_async(
28 | self,
29 | from_: str,
30 | to: str,
31 | *,
32 | overwrite: bool = True,
33 | ) -> None:
34 | """Call `rename` asynchronously.
35 |
36 | Refer to the documentation for [Rename][obspec.Rename].
37 | """
38 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # obspec
2 |
3 | A Python protocol for interfacing with object storage.
4 |
5 | [Read the release post.](https://developmentseed.org/obspec/latest/blog/2025/06/25/introducing-obspec-a-python-protocol-for-interfacing-with-object-storage/)
6 |
7 | It's designed to abstract away the complexities of different object storage providers while acknowledging that object storage is _not a filesystem_. The Python protocols present more similarities to HTTP requests than Python file objects.
8 |
9 | ## Implementations
10 |
11 | The primary implementation that implements obspec is [obstore](https://developmentseed.org/obstore/latest/), and the obspec protocol was designed around the obstore API.
12 |
13 | ## Utilities
14 |
15 | There are planned to be utilities that build on top of obspec. Potentially:
16 |
17 | - globbing: an implementation of `glob()` similar to [`fsspec.glob`](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.glob) that uses `obspec` primitives.
18 | - Caching: wrappers around `Get`/`GetRange`/`GetRanges` that store a cache of bytes.
19 |
20 | By having these utilities operate on generic obspec protocols, it means that they can instantly be used with any future obspec backend.
21 |
--------------------------------------------------------------------------------
/src/obspec/_copy.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import Protocol
4 |
5 |
6 | class Copy(Protocol):
7 | def copy(self, from_: str, to: str, *, overwrite: bool = True) -> None:
8 | """Copy an object from one path to another in the same object store.
9 |
10 | Args:
11 | from_: Source path
12 | to: Destination path
13 |
14 | Keyword Args:
15 | overwrite: If `True`, if there exists an object at the destination, it will
16 | be overwritten.
17 |
18 | If `False`: will copy only if destination is empty. Performs an atomic
19 | operation if the underlying object storage supports it. If atomic
20 | operations are not supported by the underlying object storage (like S3)
21 | it will return an error.
22 |
23 | Will return an error if the destination already has an object.
24 |
25 | """
26 |
27 |
28 | class CopyAsync(Protocol):
29 | async def copy_async(
30 | self,
31 | from_: str,
32 | to: str,
33 | *,
34 | overwrite: bool = True,
35 | ) -> None:
36 | """Call `copy` asynchronously.
37 |
38 | Refer to the documentation for [Copy][obspec.Copy].
39 | """
40 |
--------------------------------------------------------------------------------
/src/obspec/__init__.py:
--------------------------------------------------------------------------------
1 | """Object storage protocol definitions for Python."""
2 |
3 | from ._attributes import Attribute, Attributes
4 | from ._copy import Copy, CopyAsync
5 | from ._delete import Delete, DeleteAsync
6 | from ._get import (
7 | Get,
8 | GetAsync,
9 | GetOptions,
10 | GetRange,
11 | GetRangeAsync,
12 | GetRanges,
13 | GetRangesAsync,
14 | GetResult,
15 | GetResultAsync,
16 | OffsetRange,
17 | SuffixRange,
18 | )
19 | from ._head import Head, HeadAsync
20 | from ._list import (
21 | List,
22 | ListAsync,
23 | ListChunkType_co,
24 | ListResult,
25 | ListWithDelimiter,
26 | ListWithDelimiterAsync,
27 | )
28 | from ._meta import ObjectMeta
29 | from ._put import Put, PutAsync, PutMode, PutResult, UpdateVersion
30 | from ._rename import Rename, RenameAsync
31 | from ._version import __version__
32 |
33 | __all__ = [
34 | "Attribute",
35 | "Attributes",
36 | "Copy",
37 | "CopyAsync",
38 | "Delete",
39 | "DeleteAsync",
40 | "Get",
41 | "GetAsync",
42 | "GetOptions",
43 | "GetRange",
44 | "GetRangeAsync",
45 | "GetRanges",
46 | "GetRangesAsync",
47 | "GetResult",
48 | "GetResultAsync",
49 | "Head",
50 | "HeadAsync",
51 | "List",
52 | "ListAsync",
53 | "ListChunkType_co",
54 | "ListResult",
55 | "ListWithDelimiter",
56 | "ListWithDelimiterAsync",
57 | "ObjectMeta",
58 | "OffsetRange",
59 | "Put",
60 | "PutAsync",
61 | "PutMode",
62 | "PutResult",
63 | "Rename",
64 | "RenameAsync",
65 | "SuffixRange",
66 | "UpdateVersion",
67 | "__version__",
68 | ]
69 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "obspec"
3 | version = "0.1.0"
4 | description = "Object storage interface definitions for Python."
5 | license = "MIT"
6 | readme = "README.md"
7 | authors = [{ name = "Kyle Barron", email = "kyle@developmentseed.org" }]
8 | requires-python = ">=3.9"
9 | dependencies = ["typing-extensions; python_version < '3.12'"]
10 | keywords = []
11 | classifiers = [
12 | "Development Status :: 4 - Beta",
13 | "License :: OSI Approved :: MIT License",
14 | "Operating System :: OS Independent",
15 | "Programming Language :: Python :: 3 :: Only",
16 | "Programming Language :: Python :: 3",
17 | ]
18 |
19 | [project.urls]
20 | homepage = "https://developmentseed.org/obspec/latest/"
21 | documentation = "https://developmentseed.org/obspec/latest/"
22 | repository = "https://github.com/developmentseed/obspec"
23 | issues = "https://github.com/developmentseed/obspec/issues"
24 | changelog = "https://github.com/developmentseed/obspec/blob/main/CHANGELOG.md"
25 |
26 | [build-system]
27 | requires = ["hatchling"]
28 | build-backend = "hatchling.build"
29 |
30 | [tool.uv]
31 | dev-dependencies = [
32 | "griffe>=1.7.3",
33 | "ipykernel>=6.29.5",
34 | "mike>=2.1.3",
35 | "mkdocs-material[imaging]>=9.6.14",
36 | "mkdocs>=1.6.1",
37 | "mkdocstrings>=0.29.1",
38 | "mkdocstrings-python>=1.16.11",
39 | "pytest>=8.3.3",
40 | "ruff>=0.11.10",
41 | "pytest-mypy-plugins>=3.2.0",
42 | "mypy>=1.15.0",
43 | ]
44 |
45 | [tool.ruff.lint]
46 | select = ["ALL"]
47 | ignore = [
48 | "PYI051", # redundant-literal-union
49 | ]
50 |
51 | [tool.ruff.lint.pydocstyle]
52 | convention = "google"
53 |
54 | [tool.pytest.ini_options]
55 | addopts = "-v"
56 | testpaths = ["tests"]
57 |
--------------------------------------------------------------------------------
/.github/workflows/test-python.yml:
--------------------------------------------------------------------------------
1 | name: Python
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 |
9 | permissions:
10 | contents: read
11 |
12 | concurrency:
13 | group: ${{ github.workflow }}-${{ github.ref }}
14 | cancel-in-progress: true
15 |
16 | jobs:
17 | pre-commit:
18 | name: Run pre-commit on Python code
19 | runs-on: ubuntu-latest
20 | steps:
21 | - uses: actions/checkout@v4
22 |
23 | - uses: actions/setup-python@v5
24 | with:
25 | python-version: "3.11"
26 |
27 | # Use ruff-action so we get annotations in the Github UI
28 | - uses: astral-sh/ruff-action@v3
29 |
30 | - name: Cache pre-commit virtualenvs
31 | uses: actions/cache@v4
32 | with:
33 | path: ~/.cache/pre-commit
34 | key: pre-commit-3|${{ hashFiles('.pre-commit-config.yaml') }}
35 |
36 | - name: run pre-commit
37 | run: |
38 | python -m pip install pre-commit
39 | pre-commit run --all-files
40 |
41 | test-python:
42 | name: Build and test Python
43 | runs-on: ubuntu-latest
44 | strategy:
45 | fail-fast: true
46 | matrix:
47 | python-version: ["3.9", "3.10", "3.11", "3.12"]
48 | steps:
49 | - uses: actions/checkout@v4
50 |
51 | - name: Install uv
52 | uses: astral-sh/setup-uv@v5
53 | with:
54 | enable-cache: true
55 | version: "0.5.x"
56 |
57 | - name: Set up Python
58 | run: uv python install ${{ matrix.python-version }}
59 |
60 | - name: Run python tests
61 | run: |
62 | uv run pytest tests
63 |
64 | # Ensure docs build without warnings
65 | - name: Check docs
66 | if: "${{ matrix.python-version == 3.11 }}"
67 | run: uv run mkdocs build --strict
68 |
--------------------------------------------------------------------------------
/src/obspec/_attributes.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import sys
4 | from typing import Literal, Union
5 |
6 | if sys.version_info >= (3, 10):
7 | from typing import TypeAlias
8 | else:
9 | from typing_extensions import TypeAlias
10 |
11 | Attribute: TypeAlias = Union[
12 | Literal[
13 | "Content-Disposition",
14 | "Content-Encoding",
15 | "Content-Language",
16 | "Content-Type",
17 | "Cache-Control",
18 | ],
19 | str,
20 | ]
21 | """Additional object attribute types.
22 |
23 | - `"Content-Disposition"`: Specifies how the object should be handled by a browser.
24 |
25 | See [Content-Disposition](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition).
26 |
27 | - `"Content-Encoding"`: Specifies the encodings applied to the object.
28 |
29 | See [Content-Encoding](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Encoding).
30 |
31 | - `"Content-Language"`: Specifies the language of the object.
32 |
33 | See [Content-Language](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Language).
34 |
35 | - `"Content-Type"`: Specifies the MIME type of the object.
36 |
37 | This takes precedence over any client configuration.
38 |
39 | See [Content-Type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
40 |
41 | - `"Cache-Control"`: Overrides cache control policy of the object.
42 |
43 | See [Cache-Control](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control).
44 |
45 | Any other string key specifies a user-defined metadata field for the object.
46 | """
47 |
48 | Attributes: TypeAlias = dict[Attribute, str]
49 | """Additional attributes of an object
50 |
51 | Attributes can be specified in [`Put`][obspec.Put]/[`PutAsync`][obspec.PutAsync] and
52 | retrieved from [`Get`][obspec.Get]/[`GetAsync`][obspec.GetAsync].
53 |
54 | Unlike ObjectMeta, Attributes are not returned by listing APIs
55 | """
56 |
--------------------------------------------------------------------------------
/.github/workflows/publish-docs.yml:
--------------------------------------------------------------------------------
1 | name: Publish Python docs
2 |
3 | # Only run on new tags starting with `v`
4 | on:
5 | push:
6 | tags:
7 | - "v*"
8 | workflow_dispatch:
9 |
10 | # https://stackoverflow.com/a/77412363
11 | permissions:
12 | contents: write
13 | pages: write
14 |
15 | jobs:
16 | build:
17 | name: Deploy Python docs
18 | runs-on: ubuntu-latest
19 | # Used for configuring social plugin in mkdocs.yml
20 | # Unclear if this is always set in github actions
21 | env:
22 | CI: "TRUE"
23 | steps:
24 | - uses: actions/checkout@v4
25 | # We need to additionally fetch the gh-pages branch for mike deploy
26 | with:
27 | fetch-depth: 0
28 |
29 | - name: Install a specific version of uv
30 | uses: astral-sh/setup-uv@v5
31 | with:
32 | enable-cache: true
33 | version: "0.5.x"
34 |
35 | - name: Set up Python 3.11
36 | run: uv python install 3.11
37 |
38 | - name: Install dependencies
39 | run: uv sync
40 |
41 | - name: Deploy docs
42 | env:
43 | GIT_COMMITTER_NAME: CI
44 | GIT_COMMITTER_EMAIL: ci-bot@example.com
45 | run: |
46 | # Get most recent git tag
47 | # https://stackoverflow.com/a/7261049
48 | # https://stackoverflow.com/a/3867811
49 | # We don't use {{github.ref_name}} because if triggered manually, it
50 | # will be a branch name instead of a tag version.
51 | VERSION=$(git describe --tags --abbrev=0)
52 |
53 | # Only push publish docs as latest version if no letters in git tag
54 | # after the first character
55 | # (usually the git tag will have v as the first character)
56 | # Note the `cut` index is 1-ordered
57 | if echo $VERSION | cut -c 2- | grep -q "[A-Za-z]"; then
58 | echo "Is beta version"
59 | # For beta versions publish but don't set as latest
60 | uv run mike deploy $VERSION --update-aliases --push
61 | else
62 | echo "Is NOT beta version"
63 | uv run mike deploy $VERSION latest --update-aliases --push
64 | fi
65 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # UV
98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | #uv.lock
102 |
103 | # poetry
104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | # This is especially recommended for binary packages to ensure reproducibility, and is more
106 | # commonly ignored for libraries.
107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 |
110 | # pdm
111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | # in version control.
115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116 | .pdm.toml
117 | .pdm-python
118 | .pdm-build/
119 |
120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121 | __pypackages__/
122 |
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 |
127 | # SageMath parsed files
128 | *.sage.py
129 |
130 | # Environments
131 | .env
132 | .venv
133 | env/
134 | venv/
135 | ENV/
136 | env.bak/
137 | venv.bak/
138 |
139 | # Spyder project settings
140 | .spyderproject
141 | .spyproject
142 |
143 | # Rope project settings
144 | .ropeproject
145 |
146 | # mkdocs documentation
147 | /site
148 |
149 | # mypy
150 | .mypy_cache/
151 | .dmypy.json
152 | dmypy.json
153 |
154 | # Pyre type checker
155 | .pyre/
156 |
157 | # pytype static type analyzer
158 | .pytype/
159 |
160 | # Cython debug symbols
161 | cython_debug/
162 |
163 | # PyCharm
164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166 | # and can be added to the global gitignore or merged into this file. For a more nuclear
167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168 | #.idea/
169 |
170 | # PyPI configuration file
171 | .pypirc
172 |
--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
1 | site_name: obspec
2 | repo_name: developmentseed/obspec
3 | repo_url: https://github.com/developmentseed/obspec
4 | site_description: Object storage interface definitions for Python.
5 | site_author: Development Seed
6 | # Note: trailing slash recommended with mike:
7 | # https://squidfunk.github.io/mkdocs-material/setup/setting-up-versioning/#publishing-a-new-version
8 | site_url: https://developmentseed.org/obspec/
9 | docs_dir: docs
10 |
11 | extra:
12 | social:
13 | - icon: "fontawesome/brands/github"
14 | link: "https://github.com/developmentseed"
15 | - icon: "fontawesome/brands/twitter"
16 | link: "https://twitter.com/developmentseed"
17 | - icon: "fontawesome/brands/linkedin"
18 | link: "https://www.linkedin.com/company/development-seed"
19 | version:
20 | alias: true
21 | provider: mike
22 |
23 | nav:
24 | - "index.md"
25 | # - User Guide:
26 | # - getting-started.md
27 | # - cookbook.md
28 | # - authentication.md
29 | # - integrations.md
30 | # - performance.md
31 | # - fsspec.md
32 | - Blog:
33 | - blog/index.md
34 | - API Reference:
35 | - api/copy.md
36 | - api/delete.md
37 | - api/get.md
38 | - api/head.md
39 | - api/list.md
40 | - api/meta.md
41 | - api/put.md
42 | - api/rename.md
43 | - api/attributes.md
44 | - api/exceptions.md
45 | - CHANGELOG.md
46 |
47 | watch:
48 | - src/obspec
49 | - docs
50 |
51 | theme:
52 | language: en
53 | name: material
54 | custom_dir: docs/overrides
55 | # logo: assets/logo_no_text.png
56 | palette:
57 | # Palette toggle for automatic mode
58 | - media: "(prefers-color-scheme)"
59 | toggle:
60 | icon: material/brightness-auto
61 | name: Switch to light mode
62 |
63 | # Palette toggle for light mode
64 | - media: "(prefers-color-scheme: light)"
65 | primary: blue grey
66 | # accent: deep orange
67 | toggle:
68 | icon: material/brightness-7
69 | name: Switch to dark mode
70 |
71 | # Palette toggle for dark mode
72 | - media: "(prefers-color-scheme: dark)"
73 | scheme: slate
74 | primary: blue grey
75 | # accent: deep orange
76 | toggle:
77 | icon: material/brightness-4
78 | name: Switch to system preference
79 |
80 | font:
81 | text: Roboto
82 | code: Roboto Mono
83 |
84 | features:
85 | - content.code.annotate
86 | - content.code.copy
87 | - navigation.indexes
88 | - navigation.instant
89 | - navigation.tracking
90 | - search.suggest
91 | - search.share
92 |
93 | extra_css:
94 | - overrides/stylesheets/extra.css
95 |
96 | plugins:
97 | - blog
98 | - search
99 | - social:
100 | enabled: !ENV [CI, false]
101 | - mike:
102 | alias_type: "copy"
103 | canonical_version: "latest"
104 | - mkdocstrings:
105 | enable_inventory: true
106 | handlers:
107 | python:
108 | paths: [src/obspec]
109 | options:
110 | # We set allow_inspection: false to ensure that all docstrings come
111 | # from the pyi files, not the Rust-facing doc comments.
112 | allow_inspection: false
113 | docstring_section_style: list
114 | docstring_style: google
115 | line_length: 80
116 | separate_signature: true
117 | show_root_heading: true
118 | show_signature_annotations: true
119 | show_source: false
120 | show_symbol_type_toc: true
121 | signature_crossrefs: true
122 |
123 | inventories:
124 | - https://docs.python.org/3/objects.inv
125 | - https://filesystem-spec.readthedocs.io/en/latest/objects.inv
126 |
127 | # https://github.com/developmentseed/titiler/blob/50934c929cca2fa8d3c408d239015f8da429c6a8/docs/mkdocs.yml#L115-L140
128 | markdown_extensions:
129 | - admonition
130 | - attr_list
131 | - codehilite:
132 | guess_lang: false
133 | - def_list
134 | - footnotes
135 | - md_in_html
136 | - pymdownx.arithmatex
137 | - pymdownx.betterem
138 | - pymdownx.caret:
139 | insert: false
140 | - pymdownx.details
141 | - pymdownx.emoji:
142 | emoji_index: !!python/name:material.extensions.emoji.twemoji
143 | emoji_generator: !!python/name:material.extensions.emoji.to_svg
144 | - pymdownx.escapeall:
145 | hardbreak: true
146 | nbsp: true
147 | - pymdownx.magiclink:
148 | hide_protocol: true
149 | repo_url_shortener: true
150 | - pymdownx.smartsymbols
151 | - pymdownx.superfences
152 | - pymdownx.tasklist:
153 | custom_checkbox: true
154 | - pymdownx.tilde
155 | - toc:
156 | permalink: true
157 |
--------------------------------------------------------------------------------
/src/obspec/exceptions.py:
--------------------------------------------------------------------------------
1 | """Common exceptions.
2 |
3 | Users writing generic code with obspec may wish to catch common exceptions. For example,
4 | a user might wish to perform a head request but allow for the case where the object does
5 | not exist.
6 |
7 | Common exceptions pose a challenge for obspec. In general obspec strives to use
8 | [structural subtyping (protocols) rather than nominal subtyping
9 | (subclassing)][mypy_subtyping]. This is because protocols allow for implementations to
10 | have no knowledge of or dependency on a shared base library (obspec) while still being
11 | able to use the same interface.
12 |
13 | [mypy_subtyping]: https://mypy.readthedocs.io/en/stable/protocols.html
14 |
15 | However, structural subtyping does not work for exceptions: when you use `except
16 | Exception`, that uses an `isinstance` check under the hood.
17 |
18 | As a workaround, we **define well-known names** for exceptions and expect external
19 | implementations to use the same names.
20 |
21 | # Obspec users
22 |
23 | Use the [`map_exception`][obspec.exceptions.map_exception] function in this module to
24 | convert from an implementation-defined exception to an obspec-defined exception.
25 |
26 | ```py
27 | from obspec import Head
28 | from obspec.exceptions import NotFoundError, map_exception
29 |
30 |
31 | def check_if_exists(client: Head, path: str) -> bool:
32 | \"\"\"Check if a file exists at the given location.
33 |
34 | Returns True if the file exists, False otherwise.
35 | \"\"\"
36 | try:
37 | client.head(path)
38 | except Exception as e:
39 | if isinstance(map_exception(e), NotFoundError):
40 | return False
41 |
42 | raise
43 |
44 | return True
45 | ```
46 |
47 | !!! note
48 | If you don't care about catching exceptions, you can ignore this module entirely.
49 |
50 | # Obspec implementors
51 |
52 | Create your own exceptions but ensure you use the **same names** for your own exceptions
53 | as defined in this module.
54 |
55 | You may also have other exceptions that are not defined here, but any exceptions that
56 | logically fall under the purview of the exceptions defined here should your exceptions
57 | with the same name.
58 |
59 | """
60 |
61 | from __future__ import annotations
62 |
63 | import builtins
64 | from typing import TypeVar
65 |
66 |
67 | class BaseError(Exception):
68 | """The base obspec exception from which all other errors subclass."""
69 |
70 |
71 | class NotFoundError(FileNotFoundError, BaseError):
72 | """Error when the object is not found at given location."""
73 |
74 |
75 | class InvalidPathError(BaseError):
76 | """Error for invalid path."""
77 |
78 |
79 | class NotSupportedError(BaseError):
80 | """Error when the attempted operation is not supported."""
81 |
82 |
83 | class AlreadyExistsError(BaseError):
84 | """Error when the object already exists."""
85 |
86 |
87 | class PreconditionError(BaseError):
88 | """Error when the required conditions failed for the operation."""
89 |
90 |
91 | class NotModifiedError(BaseError):
92 | """Error when the object at the location isn't modified."""
93 |
94 |
95 | class NotImplementedError(BaseError, builtins.NotImplementedError): # noqa: A001
96 | """Error when an operation is not implemented.
97 |
98 | Subclasses from the built-in [NotImplementedError][].
99 | """
100 |
101 |
102 | class PermissionDeniedError(BaseError):
103 | """Error when the used credentials don't have enough permission to perform the requested operation.""" # noqa: E501
104 |
105 |
106 | class UnauthenticatedError(BaseError):
107 | """Error when the used credentials lack valid authentication."""
108 |
109 |
110 | _name_mapping: dict[str, type[BaseError]] = {
111 | FileNotFoundError.__name__: NotFoundError,
112 | NotFoundError.__name__: NotFoundError,
113 | InvalidPathError.__name__: InvalidPathError,
114 | NotSupportedError.__name__: NotSupportedError,
115 | AlreadyExistsError.__name__: AlreadyExistsError,
116 | PreconditionError.__name__: PreconditionError,
117 | NotModifiedError.__name__: NotModifiedError,
118 | NotImplementedError.__name__: NotImplementedError,
119 | PermissionDeniedError.__name__: PermissionDeniedError,
120 | UnauthenticatedError.__name__: UnauthenticatedError,
121 | }
122 | """A mapping from well-known names to obspec-defined exception classes.
123 | """
124 |
125 | ExceptionType = TypeVar("ExceptionType", bound=Exception)
126 | """Type variable for an exception type, bound to `Exception`."""
127 |
128 |
129 | def map_exception(exception: ExceptionType) -> ExceptionType | BaseError:
130 | """Map an implementation-defined exception to an obspec-defined exception by name.
131 |
132 | This will use the name of the exception class to find a corresponding obspec-defined
133 | exception class. If no mapping is found, the original exception is returned.
134 | """
135 | new_exc_class = _name_mapping.get(exception.__class__.__name__)
136 |
137 | if new_exc_class is None:
138 | return exception
139 |
140 | return new_exc_class(*exception.args)
141 |
--------------------------------------------------------------------------------
/src/obspec/_list.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import sys
4 | from typing import TYPE_CHECKING, Generic, Protocol, TypeVar
5 |
6 | # Note: we need to use the typing-extensions typed dict because we also parametrize over
7 | # a generic
8 | # https://stackoverflow.com/a/79300271
9 | if sys.version_info >= (3, 11):
10 | from typing import TypedDict
11 | else:
12 | from typing_extensions import TypedDict
13 |
14 | if TYPE_CHECKING:
15 | from collections.abc import AsyncIterator, Iterator, Sequence
16 |
17 | from ._meta import ObjectMeta
18 |
19 |
20 | ListChunkType_co = TypeVar("ListChunkType_co", covariant=True)
21 | """The data structure used for holding list results."""
22 |
23 |
24 | class ListResult(TypedDict, Generic[ListChunkType_co]):
25 | """Result of a `list_with_delimiter` call.
26 |
27 | Includes objects, prefixes (directories) and a token for the next set of results.
28 | Individual result sets may be limited to 1,000 objects based on the underlying
29 | object storage's limitations.
30 | """
31 |
32 | common_prefixes: Sequence[str]
33 | """Prefixes that are common (like directories)"""
34 |
35 | objects: ListChunkType_co
36 | """Object metadata for the listing"""
37 |
38 |
39 | class List(Protocol):
40 | def list(
41 | self,
42 | prefix: str | None = None,
43 | *,
44 | offset: str | None = None,
45 | ) -> Iterator[Sequence[ObjectMeta]]:
46 | """List all the objects with the given prefix.
47 |
48 | Prefixes are evaluated on a path segment basis, i.e. `foo/bar/` is a prefix of
49 | `foo/bar/x` but not of `foo/bar_baz/x`. List is recursive, i.e. `foo/bar/more/x`
50 | will be included.
51 |
52 | **Examples**:
53 |
54 | Synchronously iterate through list results:
55 |
56 | ```py
57 | import obspec
58 |
59 | def upload_files(client: obspec.Put):
60 | for i in range(100):
61 | client.put(f"file{i}.txt", b"foo")
62 |
63 | def list_files(client: obspec.List):
64 | stream = client.list()
65 | for list_result in stream:
66 | print(list_result[0])
67 | # {'path': 'file0.txt', 'last_modified': datetime.datetime(2024, 10, 23, 19, 19, 28, 781723, tzinfo=datetime.timezone.utc), 'size': 3, 'e_tag': '0', 'version': None}
68 | break
69 | ```
70 |
71 | !!! note
72 | The order of returned [`ObjectMeta`][obspec.ObjectMeta] is not
73 | guaranteed
74 |
75 | Args:
76 | prefix: The prefix within the store to use for listing. Defaults to None.
77 |
78 | Keyword Args:
79 | offset: If provided, list all the objects with the given prefix and a
80 | location greater than `offset`. Defaults to `None`.
81 |
82 | Returns:
83 | A ListIterator, which you can iterate through to access list results.
84 |
85 | """ # noqa: E501
86 | ...
87 |
88 |
89 | class ListAsync(Protocol):
90 | def list_async(
91 | self,
92 | prefix: str | None = None,
93 | *,
94 | offset: str | None = None,
95 | ) -> AsyncIterator[Sequence[ObjectMeta]]:
96 | """List all the objects with the given prefix.
97 |
98 | Note that this method itself is **not async**. It's a synchronous method but
99 | returns an **async iterator**.
100 |
101 | Refer to [obspec.List][obspec.List] for more information about list semantics.
102 |
103 | **Examples**:
104 |
105 | Asynchronously iterate through list results. Just change `for` to `async for`:
106 |
107 | ```py
108 | stream = obs.list_async(store)
109 | async for list_result in stream:
110 | print(list_result[2])
111 | # {'path': 'file10.txt', 'last_modified': datetime.datetime(2024, 10, 23, 19, 21, 46, 224725, tzinfo=datetime.timezone.utc), 'size': 3, 'e_tag': '10', 'version': None}
112 | break
113 | ```
114 |
115 | !!! note
116 | The order of returned [`ObjectMeta`][obspec.ObjectMeta] is not
117 | guaranteed
118 |
119 | Args:
120 | prefix: The prefix within the store to use for listing. Defaults to None.
121 |
122 | Keyword Args:
123 | offset: If provided, list all the objects with the given prefix and a
124 | location greater than `offset`. Defaults to `None`.
125 |
126 | Returns:
127 | A ListStream, which you can iterate through to access list results.
128 |
129 | """ # noqa: E501
130 | ...
131 |
132 |
133 | class ListWithDelimiter(Protocol):
134 | def list_with_delimiter(
135 | self,
136 | prefix: str | None = None,
137 | ) -> ListResult[Sequence[ObjectMeta]]:
138 | """List objects with the given prefix and an implementation specific
139 | delimiter.
140 |
141 | Returns common prefixes (directories) in addition to object
142 | metadata.
143 |
144 | Prefixes are evaluated on a path segment basis, i.e. `foo/bar/` is a prefix of
145 | `foo/bar/x` but not of `foo/bar_baz/x`. This list is not recursive, i.e.
146 | `foo/bar/more/x` will **not** be included.
147 |
148 | !!! note
149 |
150 | Any prefix supplied to this `prefix` parameter will **not** be stripped off
151 | the paths in the result.
152 |
153 | Args:
154 | prefix: The prefix within the store to use for listing. Defaults to None.
155 |
156 | Returns:
157 | ListResult
158 |
159 | """ # noqa: D205
160 | ...
161 |
162 |
163 | class ListWithDelimiterAsync(Protocol):
164 | async def list_with_delimiter_async(
165 | self,
166 | prefix: str | None = None,
167 | ) -> ListResult[Sequence[ObjectMeta]]:
168 | """Call `list_with_delimiter` asynchronously.
169 |
170 | Refer to the documentation for
171 | [ListWithDelimiter][obspec.ListWithDelimiter].
172 | """
173 | ...
174 |
--------------------------------------------------------------------------------
/src/obspec/_put.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import IO, TYPE_CHECKING, Literal, Protocol, TypedDict, Union
4 |
5 | if TYPE_CHECKING:
6 | import sys
7 | from collections.abc import AsyncIterable, AsyncIterator, Iterable, Iterator
8 | from pathlib import Path
9 |
10 | from ._attributes import Attributes
11 |
12 | if sys.version_info >= (3, 10):
13 | from typing import TypeAlias
14 | else:
15 | from typing_extensions import TypeAlias
16 |
17 | if sys.version_info >= (3, 12):
18 | from collections.abc import Buffer
19 | else:
20 | from typing_extensions import Buffer
21 |
22 |
23 | class UpdateVersion(TypedDict, total=False):
24 | """Uniquely identifies a version of an object to update.
25 |
26 | Stores will use differing combinations of `e_tag` and `version` to provide
27 | conditional updates, and it is therefore recommended applications preserve both
28 | """
29 |
30 | e_tag: str | None
31 | """The unique identifier for the newly created object.
32 |
33 |
34 | """
35 |
36 | version: str | None
37 | """A version indicator for the newly created object."""
38 |
39 |
40 | PutMode: TypeAlias = Union[Literal["create", "overwrite"], UpdateVersion]
41 | """Configure preconditions for the put operation
42 |
43 | There are three modes:
44 |
45 | - Overwrite: Perform an atomic write operation, overwriting any object present at the
46 | provided path.
47 | - Create: Perform an atomic write operation, returning
48 | an error if an object already exists at the provided path.
49 | - Update: Perform an atomic write operation if the current version of the object matches
50 | the provided [`UpdateVersion`][obspec.UpdateVersion], returning an error otherwise.
51 |
52 | If a string is provided, it must be one of:
53 |
54 | - `"overwrite"`
55 | - `"create"`
56 |
57 | If a `dict` is provided, it must meet the criteria of
58 | [`UpdateVersion`][obspec.UpdateVersion].
59 | """
60 |
61 |
62 | class PutResult(TypedDict):
63 | """Result for a put request."""
64 |
65 | e_tag: str | None
66 | """
67 | The unique identifier for the newly created object
68 |
69 |
70 | """
71 |
72 | version: str | None
73 | """A version indicator for the newly created object."""
74 |
75 |
76 | class Put(Protocol):
77 | def put( # noqa: PLR0913
78 | self,
79 | path: str,
80 | file: IO[bytes] | Path | bytes | Buffer | Iterator[Buffer] | Iterable[Buffer],
81 | *,
82 | attributes: Attributes | None = None,
83 | tags: dict[str, str] | None = None,
84 | mode: PutMode | None = None,
85 | use_multipart: bool | None = None,
86 | chunk_size: int = ...,
87 | max_concurrency: int = ...,
88 | ) -> PutResult:
89 | """Save the provided bytes to the specified location.
90 |
91 | The operation is guaranteed to be atomic, it will either successfully write the
92 | entirety of `file` to `location`, or fail. No clients should be able to observe
93 | a partially written object.
94 |
95 | Args:
96 | path: The path within the store for where to save the file.
97 | file: The object to upload. Supports various input:
98 |
99 | - A file-like object opened in binary read mode
100 | - A [`Path`][pathlib.Path] to a local file
101 | - A [`bytes`][] object.
102 | - Any object implementing the Python [buffer
103 | protocol](https://docs.python.org/3/c-api/buffer.html) (includes `bytes`
104 | but also `memoryview`, numpy arrays, and more).
105 | - An iterator or iterable of objects implementing the Python buffer
106 | protocol.
107 |
108 | Keyword Args:
109 | mode: Configure the [`PutMode`][obspec.PutMode] for this operation. Refer
110 | to the [`PutMode`][obspec.PutMode] docstring for more information.
111 |
112 | If this provided and is not `"overwrite"`, a non-multipart upload will
113 | be performed. Defaults to `"overwrite"`.
114 | attributes: Provide a set of `Attributes`. Defaults to `None`.
115 | tags: Provide tags for this object. Defaults to `None`.
116 | use_multipart: Whether to force using a multipart upload.
117 |
118 | If `True`, the upload will always use a multipart upload, even if the
119 | length of the file is less than `chunk_size`. If `False`, the upload
120 | will never use a multipart upload, and the entire input will be
121 | materialized in memory as part of the upload. If `None`, the
122 | implementation will choose whether to use a multipart upload based on
123 | the length of the file and `chunk_size`.
124 |
125 | Defaults to `None`.
126 | chunk_size: The size of chunks to use within each part of the multipart
127 | upload. The default is allowed to be implementation-specific.
128 | max_concurrency: The maximum number of chunks to upload concurrently. This
129 | impacts the memory usage of large file uploads. The default is allowed
130 | to be implementation-specific.
131 |
132 | """
133 | ...
134 |
135 |
136 | class PutAsync(Protocol):
137 | async def put_async( # noqa: PLR0913
138 | self,
139 | path: str,
140 | file: IO[bytes]
141 | | Path
142 | | bytes
143 | | Buffer
144 | | AsyncIterator[Buffer]
145 | | AsyncIterable[Buffer]
146 | | Iterator[Buffer]
147 | | Iterable[Buffer],
148 | *,
149 | attributes: Attributes | None = None,
150 | tags: dict[str, str] | None = None,
151 | mode: PutMode | None = None,
152 | use_multipart: bool | None = None,
153 | chunk_size: int = ...,
154 | max_concurrency: int = ...,
155 | ) -> PutResult:
156 | """Call `put` asynchronously.
157 |
158 | Refer to the documentation for [`Put`][obspec.Put]. In addition to what the
159 | synchronous `put` allows for the `file` parameter, this **also supports an async
160 | iterator or iterable** of objects implementing the Python buffer protocol.
161 |
162 | This means, for example, you can pass the result of `get_async` directly to
163 | `put_async`, and the request will be streamed through Python during the put
164 | operation:
165 |
166 | ```py
167 | from obspec import GetAsync, PutAsync
168 |
169 | async def streaming_copy(
170 | fetch_client: GetAsync,
171 | put_client: PutAsync,
172 | path1: str,
173 | path2: str,
174 | ):
175 | # This only constructs the stream, it doesn't materialize the data in memory
176 | resp = await fetch_client.get_async(path1)
177 | # A streaming upload is created to copy the file to path2
178 | await put_client.put_async(path2, resp)
179 | ```
180 | """
181 | ...
182 |
--------------------------------------------------------------------------------
/src/obspec/_get.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import sys
4 | from collections.abc import AsyncIterable, Iterable
5 | from typing import TYPE_CHECKING, Protocol, TypedDict
6 |
7 | if sys.version_info >= (3, 12):
8 | from collections.abc import Buffer
9 | else:
10 | from typing_extensions import Buffer
11 |
12 | if TYPE_CHECKING:
13 | from collections.abc import Sequence
14 | from datetime import datetime
15 |
16 | from ._attributes import Attributes
17 | from ._meta import ObjectMeta
18 |
19 |
20 | class OffsetRange(TypedDict):
21 | """Request all bytes starting from a given byte offset."""
22 |
23 | offset: int
24 | """The byte offset for the offset range request."""
25 |
26 |
27 | class SuffixRange(TypedDict):
28 | """Request up to the last `n` bytes."""
29 |
30 | suffix: int
31 | """The number of bytes from the suffix to request."""
32 |
33 |
34 | class GetOptions(TypedDict, total=False):
35 | """Options for a get request.
36 |
37 | All options are optional.
38 | """
39 |
40 | if_match: str | None
41 | """
42 | Request will succeed if the `ObjectMeta::e_tag` matches.
43 |
44 | See
45 |
46 | Examples:
47 |
48 | ```text
49 | If-Match: "xyzzy"
50 | If-Match: "xyzzy", "r2d2xxxx", "c3piozzzz"
51 | If-Match: *
52 | ```
53 | """
54 |
55 | if_none_match: str | None
56 | """
57 | Request will succeed if the `ObjectMeta::e_tag` does not match.
58 |
59 | See
60 |
61 | Examples:
62 |
63 | ```text
64 | If-None-Match: "xyzzy"
65 | If-None-Match: "xyzzy", "r2d2xxxx", "c3piozzzz"
66 | If-None-Match: *
67 | ```
68 | """
69 |
70 | if_unmodified_since: datetime | None
71 | """
72 | Request will succeed if the object has been modified since
73 |
74 |
75 | """
76 |
77 | if_modified_since: datetime | None
78 | """
79 | Request will succeed if the object has not been modified since.
80 |
81 | Some stores, such as S3, will only return `NotModified` for exact
82 | timestamp matches, instead of for any timestamp greater than or equal.
83 |
84 |
85 | """
86 |
87 | range: tuple[int, int] | Sequence[int] | OffsetRange | SuffixRange
88 | """
89 | Request transfer of only the specified range of bytes.
90 |
91 | The semantics of this attribute are:
92 |
93 | - `(int, int)`: Request a specific range of bytes `(start, end)`.
94 |
95 | If the given range is zero-length or starts after the end of the object, an
96 | error will be returned. Additionally, if the range ends after the end of the
97 | object, the entire remainder of the object will be returned. Otherwise, the
98 | exact requested range will be returned.
99 |
100 | The `end` offset is _exclusive_.
101 |
102 | - `{"offset": int}`: Request all bytes starting from a given byte offset.
103 |
104 | This is equivalent to `bytes={int}-` as an HTTP header.
105 |
106 | - `{"suffix": int}`: Request the last `int` bytes. Note that here, `int` is _the
107 | size of the request_, not the byte offset. This is equivalent to `bytes=-{int}`
108 | as an HTTP header.
109 |
110 |
111 | """
112 |
113 | version: str | None
114 | """
115 | Request a particular object version
116 | """
117 |
118 | head: bool
119 | """
120 | Request transfer of no content
121 |
122 |
123 | """
124 |
125 |
126 | class GetResult(Iterable[Buffer], Protocol):
127 | """Result for a get request.
128 |
129 | You can materialize the entire buffer by calling the `buffer` method or you can
130 | stream the result by iterating over it .
131 |
132 | **Example:**
133 |
134 | ```py
135 | from obspec import Get
136 |
137 | def streaming_download(client: Get, path: str):
138 | resp = client.get(path)
139 | for buffer in resp:
140 | print(len(memoryview(buffer)))
141 | ```
142 | """
143 |
144 | @property
145 | def attributes(self) -> Attributes:
146 | """Additional object attributes."""
147 | ...
148 |
149 | def buffer(self) -> Buffer:
150 | """Collect the data into a `Buffer` object.
151 |
152 | This implements the Python buffer protocol. You can copy the buffer to Python
153 | memory by passing to [`bytes`][].
154 | """
155 | ...
156 |
157 | @property
158 | def meta(self) -> ObjectMeta:
159 | """The ObjectMeta for this object."""
160 | ...
161 |
162 | @property
163 | def range(self) -> tuple[int, int]:
164 | """The range of bytes returned by this request.
165 |
166 | Note that this is `(start, stop)` **not** `(start, length)`.
167 | """
168 | ...
169 |
170 |
171 | class GetResultAsync(AsyncIterable[Buffer], Protocol):
172 | """Result for an async get request.
173 |
174 | You can materialize the entire buffer by calling the `buffer_async` method or you
175 | can stream the result by asynchronously iterating over it.
176 |
177 | **Example:**
178 |
179 | ```py
180 | from obspec import GetAsync
181 |
182 | async def streaming_download(obs: GetAsync, path: str):
183 | resp = await client.get_async(path)
184 | async for buffer in resp:
185 | print(len(memoryview(buffer)))
186 | ```
187 | """
188 |
189 | @property
190 | def attributes(self) -> Attributes:
191 | """Additional object attributes."""
192 | ...
193 |
194 | async def buffer_async(self) -> Buffer:
195 | """Collect the data into a `Buffer` object.
196 |
197 | This implements the Python buffer protocol. You can copy the buffer to Python
198 | memory by passing to [`bytes`][].
199 | """
200 | ...
201 |
202 | @property
203 | def meta(self) -> ObjectMeta:
204 | """The ObjectMeta for this object."""
205 | ...
206 |
207 | @property
208 | def range(self) -> tuple[int, int]:
209 | """The range of bytes returned by this request.
210 |
211 | Note that this is `(start, stop)` **not** `(start, length)`.
212 |
213 | """
214 | ...
215 |
216 |
217 | class Get(Protocol):
218 | def get(
219 | self,
220 | path: str,
221 | *,
222 | options: GetOptions | None = None,
223 | ) -> GetResult:
224 | """Return the bytes that are stored at the specified location.
225 |
226 | Args:
227 | path: The path within the store to retrieve.
228 | options: options for accessing the file. Defaults to None.
229 |
230 | Returns:
231 | GetResult
232 |
233 | """
234 | ...
235 |
236 |
237 | class GetAsync(Protocol):
238 | async def get_async(
239 | self,
240 | path: str,
241 | *,
242 | options: GetOptions | None = None,
243 | ) -> GetResultAsync:
244 | """Call `get` asynchronously.
245 |
246 | Refer to the documentation for [Get][obspec.Get].
247 | """
248 | ...
249 |
250 |
251 | class GetRange(Protocol):
252 | def get_range(
253 | self,
254 | path: str,
255 | *,
256 | start: int,
257 | end: int | None = None,
258 | length: int | None = None,
259 | ) -> Buffer:
260 | """Return the bytes stored at the specified location in the given byte range.
261 |
262 | If the given range is zero-length or starts after the end of the object, an
263 | error will be returned. Additionally, if the range ends after the end of the
264 | object, the entire remainder of the object will be returned. Otherwise, the
265 | exact requested range will be returned.
266 |
267 | Args:
268 | path: The path within the store to retrieve.
269 |
270 | Keyword Args:
271 | start: The start of the byte range.
272 | end: The end of the byte range (exclusive). Either `end` or `length` must be
273 | non-None.
274 | length: The number of bytes of the byte range. Either `end` or `length` must
275 | be non-None.
276 |
277 | Returns:
278 | A `Buffer` object implementing the Python buffer protocol.
279 |
280 | """
281 | ...
282 |
283 |
284 | class GetRangeAsync(Protocol):
285 | async def get_range_async(
286 | self,
287 | path: str,
288 | *,
289 | start: int,
290 | end: int | None = None,
291 | length: int | None = None,
292 | ) -> Buffer:
293 | """Call `get_range` asynchronously.
294 |
295 | Refer to the documentation for [GetRange][obspec.GetRange].
296 | """
297 | ...
298 |
299 |
300 | class GetRanges(Protocol):
301 | def get_ranges(
302 | self,
303 | path: str,
304 | *,
305 | starts: Sequence[int],
306 | ends: Sequence[int] | None = None,
307 | lengths: Sequence[int] | None = None,
308 | ) -> Sequence[Buffer]:
309 | """Return the bytes stored at the specified location in the given byte ranges.
310 |
311 | The choice of how to implement multiple range requests is implementation
312 | specific.
313 |
314 | Args:
315 | path: The path within the store to retrieve.
316 |
317 | Other Args:
318 | starts: A sequence of `int` where each offset starts.
319 | ends: A sequence of `int` where each offset ends (exclusive). Either `ends`
320 | or `lengths` must be non-None.
321 | lengths: A sequence of `int` with the number of bytes of each byte range.
322 | Either `ends` or `lengths` must be non-None.
323 |
324 | Returns:
325 | A sequence of `Buffer`, one for each range, each implementing the Python
326 | buffer protocol.
327 |
328 | """
329 | ...
330 |
331 |
332 | class GetRangesAsync(Protocol):
333 | async def get_ranges_async(
334 | self,
335 | path: str,
336 | *,
337 | starts: Sequence[int],
338 | ends: Sequence[int] | None = None,
339 | lengths: Sequence[int] | None = None,
340 | ) -> Sequence[Buffer]:
341 | """Call `get_ranges` asynchronously.
342 |
343 | Refer to the documentation for [GetRanges][obspec.GetRanges].
344 | """
345 | ...
346 |
--------------------------------------------------------------------------------
/docs/blog/posts/introducing-obspec.md:
--------------------------------------------------------------------------------
1 | ---
2 | draft: false
3 | date: 2025-06-25
4 | categories:
5 | - Release
6 | authors:
7 | - kylebarron
8 | ---
9 |
10 | # Introducing Obspec: A Python protocol for interfacing with object storage
11 |
12 | Obspec defines a minimal, transparent Python interface to read, write, and modify data on object storage.
13 |
14 | It's designed to abstract away the complexities of different object storage providers while acknowledging that object storage is _not a filesystem_. The Python protocols present more similarities to HTTP requests than Python file objects.
15 |
16 |
17 |
18 | The primary existing Python specification used for object storage is [fsspec](https://filesystem-spec.readthedocs.io/en/latest/), which defines a filesystem-like interface based around Python file-like objects.
19 |
20 | However this presents an impedance mismatch: **object storage is not a filesystem** and does not have the same semantics as filesystems. This leads to surprising behavior, poor performance, and integration complexity.
21 |
22 | ## File-like, stateful APIs add ambiguity
23 |
24 | Fsspec has significant layers of caching to try to make object storage behave _like_ a filesystem, but this also causes unpredictable results.
25 |
26 | ### Fsspec: Opaque list caching
27 |
28 | Take the following example. Is the list request cached? How many requests are made, one or two? What happens if the remote data changes? Will the second list automatically reflect new data?
29 |
30 | ```py
31 | from time import sleep
32 | from fsspec import AbstractFileSystem
33 |
34 | def list_files_twice(fs: AbstractFileSystem):
35 | fs.ls("s3://mybucket")
36 | sleep(5)
37 | fs.ls("s3://mybucket")
38 | ```
39 |
40 | Because [`AbstractFileSystem.ls`][fsspec.spec.AbstractFileSystem.ls] returns a _fully-materialized_ `list` and there can be thousands of items in a bucket, fsspec implementations tend to use some sort of internal caching. Furthermore, the specification explicitly allows for caching by defining a keyword argument named `refresh`. But the API documentation for `ls` [doesn't say][fsspec.spec.AbstractFileSystem.ls] what the default for `refresh` is (only that you _may_ explicitly pass `refresh=True|False` to force a behavior).
41 |
42 | You have to read implementation-specific source code to find out that, in the case of [`s3fs`](https://github.com/fsspec/s3fs), the fsspec implementation for S3, the [default is `refresh=False`](https://github.com/fsspec/s3fs/blob/ec57f88c057dfd29fa1db80db423832fbfa4832a/s3fs/core.py#L1021). So in the case of `s3fs`, the list call _is cached_, only one HTTP request is made, and the second call to `ls` will not reflect new data without an explicit call to `refresh=True`.
43 |
44 | But the design of the abstraction means that it's very difficult for generic code operating on the abstract base class to infer from the function signature how many HTTP requests will be made by most implementations.
45 |
46 | ### Obstore: Streaming list
47 |
48 | In contrast, obspec relies on iterators wherever possible. The [`obspec.List`][] protocol returns an iterator of metadata about files, which enables stateless implementations that map much more closely to the underlying HTTP requests.
49 |
50 | ```py
51 | from time import sleep
52 | from obspec import List
53 |
54 | def list_files_twice(client: List):
55 | list_items = list(client.list("prefix"))
56 | sleep(5)
57 | list_items = list(client.list("prefix"))
58 | ```
59 |
60 | There's no internal caching, a set of possibly-multiple requests are made for each call to `list`, and each call to `list` will reflect the latest state of the bucket.
61 |
62 | ### Fsspec: Opaque file downloads
63 |
64 | Consider the options fsspec provides for downloading data. Fsspec doesn't have a method to stream a file download into memory, so your options are:
65 |
66 | 1. Materialize the entire file in memory, which is not practical for large files.
67 | 2. Make targeted range requests, which requires you to know the byte ranges you want to download and requires multiple HTTP calls.
68 | 3. Use a file-like object, which is not clear how many HTTP requests it will make, and how caching works.
69 | 4. Download to a local file, which incurs overhead of writing to disk and then reading back into memory.
70 |
71 | Suppose we choose option 3, using a file-like object. It's fully opaque how many requests are being made:
72 |
73 | ```py
74 | from fsspec import AbstractFileSystem
75 |
76 | def iterate_over_file_object(fs: AbstractFileSystem, path: str):
77 | with fs.open(path) as f:
78 | for line in f:
79 | print(line.strip())
80 | ```
81 |
82 | ### Obspec: Streaming download
83 |
84 | By mapping more closely to the underlying HTTP requests, obspec makes it clearer what HTTP requests are happening under the hood. [obspec.Get] allows for streaming a file download via a Python iterator:
85 |
86 | ```py
87 | from obspec import Get
88 |
89 | def download_file(client: Get):
90 | response = client.get("my-file.txt")
91 | for buffer in response:
92 | # Process each buffer chunk as needed
93 | print(f"Received buffer of size: {len(memoryview(buffer))} bytes")
94 | ```
95 |
96 | In this case, only one HTTP request is made, and you can start processing the data as it arrives without needing to materialize the entire file in memory.
97 |
98 | ### Support for functionality not native to filesystems
99 |
100 | Obspec allows for functionality not native to filesystems, such as preconditions (fetch if unmodified) and atomic multipart uploads.
101 |
102 | ## Native Async support
103 |
104 | Fsspec was originally designed for synchronous I/O. Async support was bolted on via async versions of methods, but the core architecture is still sync-first and the async support is relatively sparsely documented.
105 |
106 | The async support in fsspec is intentionally hidden away: all async operations are named with a leading underscore and in effect "private" and not designed to be visible by most users. Additionally some "async" calls in fsspec just use `loop.run_in_executor(...)` to perform the work in a thread in the background.
107 |
108 | In 2025, the Python async ecosystem has progressed to the point where an interface should provide **first-class support for async code**. All obspec functionality is defined in matching sync and async protocols with clear separation between the two.
109 |
110 | ## API Surface
111 |
112 | The fsspec API surface is _quite large_. [`AbstractFileSystem`][fsspec.spec.AbstractFileSystem] defines around 10 public attributes and 56 public methods. [`AbstractBufferedFile`][fsspec.spec.AbstractBufferedFile] defines around 20 public methods. And that's not including the async implementation in [`AsyncFileSystem`][fsspec.asyn.AsyncFileSystem].
113 |
114 | Aside from being difficult for backends to implement the full surface area, it's also common to hit `NotImplementedError` at runtime when a backend doesn't support the method you're using.
115 |
116 | Obspec has a **much smaller API surface** than fsspec, which makes it easier to understand, implement, and compose. Obspec has just 10 core methods with synchronous and asynchronous variants:
117 |
118 | - [`copy`][obspec.Copy]/[`copy_async`][obspec.CopyAsync]: Copy an object within the same store.
119 | - [`delete`][obspec.Delete]/[`delete_async`][obspec.DeleteAsync]: Delete an object.
120 | - [`get`][obspec.Get]/[`get_async`][obspec.GetAsync]: Download a file, returning an iterator or async iterator of buffers.
121 | - [`get_range`][obspec.GetRange]/[`get_range_async`][obspec.GetRangeAsync]: Get a single byte range.
122 | - [`get_ranges`][obspec.GetRanges]/[`get_ranges_async`][obspec.GetRangesAsync]: Get multiple byte ranges.
123 | - [`head`][obspec.Head]/[`head_async`][obspec.HeadAsync]: Access file metadata.
124 | - [`list`][obspec.List]/[`list_async`][obspec.ListAsync]: List objects, returning an iterator or async iterator of metadata.
125 | - [`list_with_delimiter`][obspec.ListWithDelimiter]/[`list_with_delimiter_async`][obspec.ListWithDelimiterAsync]: List objects within a specific directory.
126 | - [`put`][obspec.Put]/[`put_async`][obspec.PutAsync]: Upload a file, buffer, or iterable of buffers.
127 | - [`rename`][obspec.Rename]/[`rename_async`][obspec.RenameAsync]: Move an object from one path to another within the same store.
128 |
129 | This smaller API surface also means that it's much rarer to get a runtime `NotImplementedError`.
130 |
131 | ## Static typing support
132 |
133 | Fsspec hardly has any support for static typing, which makes it hard for a user to know they're using the interface correctly.
134 |
135 | Obspec is **fully statically typed**. This provides excellent in-editor documentation and autocompletion, as well as static warnings when the interface is used incorrectly.
136 |
137 |
141 |
142 | ## Protocols & duck typing, not subclassing
143 |
144 | Python defines two types of subtyping: [nominal and structural subtyping](https://docs.python.org/3/library/typing.html#nominal-vs-structural-subtyping).
145 |
146 | In essence, _nominal_ subtyping means _subclassing_. Class `A` is a nominal subtype of class `B` if `A` subclasses from `B`. _Structural_ subtyping means _duck typing_. Class `A` is a structural subtype of class `B` if `A` "looks like" `B`, that is, it _conforms to the same shape_ as `B`.
147 |
148 | Using structural subtyping means that an ecosystem of libraries don't need to have any knowledge or dependency on each other, as long as they strictly and accurately implement the same duck-typed interface.
149 |
150 | For example, an `Iterable` is a protocol. You don't need to subclass from a base `Iterable` class in order to make your type iterable. Instead, if you define an `__iter__` dunder method on your class, it _automatically becomes iterable_ because Python has a convention that if you see an `__iter__` method, you can call it to iterate over a sequence.
151 |
152 | As another example, the [Buffer Protocol](https://docs.python.org/3/c-api/buffer.html) is a protocol to enable zero-copy exchange of binary data between Python libraries. Unlike `Iterable`, this is a protocol that is inaccessible in user Python code and only accessible at the C level, but it's still a protocol. Numpy can create arrays that view a buffer via the buffer protocol, even when Numpy has no prior knowledge of the library that produces the buffer.
153 |
154 | Obspec relies on structural subtyping to provide flexibility to implementors while not requiring them to take an explicit dependency on obspec, which would be required to subclass from obspec using nominal subtyping.
155 |
156 | ## Existing implementations
157 |
158 | [Obstore](https://developmentseed.org/obstore/latest/) is the primary existing implementation of obspec. Indeed, obspec's API is essentially a simplified formalization of obstore's existing API.
159 |
160 | We'd like to see additional future first-party and third-party implementations of the obspec protocol.
161 |
162 | ## Example: Caching wrapper
163 |
164 | Obspec does not have any built-in caching logic. This is a deliberate design choice to keep the interface simple and predictable. Caching can be implemented as a wrapper around obspec, allowing users to choose their caching strategy without complicating the core interface.
165 |
166 | Here we have a very simple example of this approach. `SimpleCache` is a wrapper class around something implementing the `GetRange` protocol. The `SimpleCache` manages caching logic itself _outside the underlying `GetRange` backend_. But since `SimpleCache` also implements `GetRange`, it can be used wherever `GetRange` is expected.
167 |
168 | ```py
169 | from __future__ import annotations
170 | from typing_extensions import Buffer
171 | from obspec import GetRange
172 |
173 | class SimpleCache(GetRange):
174 | """A simple cache for synchronous range requests that never evicts data."""
175 |
176 | def __init__(self, client: GetRange):
177 | self.client = client
178 | self.cache: dict[tuple[str, int, int | None, int | None], Buffer] = {}
179 |
180 | def get_range(
181 | self,
182 | path: str,
183 | *,
184 | start: int,
185 | end: int | None = None,
186 | length: int | None = None,
187 | ) -> Buffer:
188 | cache_key = (path, start, end, length)
189 | if cache_key in self.cache:
190 | return self.cache[cache_key]
191 |
192 | response = self.client.get_range(
193 | path,
194 | start=start,
195 | end=end,
196 | length=length,
197 | )
198 | self.cache[cache_key] = response
199 | return response
200 | ```
201 |
202 | Of course, a real implementation would be smarter than just caching the exact byte range, and might use something like block caching.
203 |
204 | Now if `GetRange` is expected to be used like so:
205 |
206 | ```py
207 | def my_function(client: GetRange, path: str, *, start: int, end: int):
208 | buffer = client.get_range(path, start=start, end=end)
209 | # Do something with the buffer
210 | print(len(memoryview(buffer)))
211 | ```
212 |
213 | Then a user can seamlessly insert the `SimpleCache` in the middle. The second request will be cached and not reach the S3Store
214 |
215 | ```py
216 | from obstore.store import S3Store
217 |
218 | store = S3Store("bucket")
219 | caching_wrapper = SimpleCache(store)
220 | my_function(caching_wrapper, "path.txt", start=0, end=10)
221 | my_function(caching_wrapper, "path.txt", start=0, end=10)
222 | ```
223 |
224 | ## Usage for downstream libraries
225 |
226 | Not all backends will necessarily support all features. Obspec is defined as a set of _independent_ protocols to allow libraries depending on obspec to verify that obspec implementations provide all required functionality.
227 |
228 | In particular, Python allows you to [intersect protocols](https://typing.python.org/en/latest/spec/protocol.html#unions-and-intersections-of-protocols). Thus, you should use the most minimal methods required for your use case, **creating your own subclassed protocol** with just what you need.
229 |
230 | ```py
231 | from typing import Protocol
232 | from obspec import Delete, Get, List, Put
233 |
234 |
235 | class MyCustomObspecProtocol(Delete, Get, List, Put, Protocol):
236 | """
237 | My custom protocol with functionality required in a downstream library.
238 | """
239 | ```
240 |
241 | Then use that protocol generically:
242 |
243 | ```py
244 | def do_something(backend: MyCustomObspecProtocol):
245 | backend.put("path.txt", b"hello world!")
246 |
247 | files = list(backend.list())
248 | assert any(file["path"] == "path.txt" for file in files)
249 |
250 | assert memoryview(backend.get("path.txt").buffer()) == b"hello world!"
251 |
252 | backend.delete("path.txt")
253 |
254 | files = list(backend.list())
255 | assert not any(file["path"] == "path.txt" for file in files)
256 | ```
257 |
258 | By defining the most minimal interface you require, it widens the set of possible backends that can implement your interface. For example, making a range request is possible by any HTTP client, but a list call may have semantics not defined in the HTTP specification. So by only requiring, say, `Get` and `GetRange` you allow more implementations to be used with your program.
259 |
260 | Alternatively, if you only require a single method, there's no need to create your own custom protocol, and you can use the obspec protocol directly.
261 |
262 | ### Example: Cloud-Optimized GeoTIFF reader
263 |
264 | A [Cloud-Optimized GeoTIFF (COG)](https://cogeo.org/) reader might only require range requests
265 |
266 | ```py
267 | from typing import Protocol
268 | from obspec import GetRange, GetRanges
269 |
270 | class CloudOptimizedGeoTiffReader(GetRange, GetRanges, Protocol):
271 | """Protocol with necessary methods to read a Cloud-Optimized GeoTIFF file."""
272 |
273 | def read_cog_header(backend: CloudOptimizedGeoTiffReader, path: str):
274 | # Make request for first 32KB of file
275 | header_bytes = backend.get_range(path, start=0, end=32 * 1024)
276 | # TODO: parse information from header
277 | raise NotImplementedError
278 |
279 | def read_cog_image(backend: CloudOptimizedGeoTiffReader, path: str):
280 | header = read_cog_header(backend, path)
281 | # TODO: read image data from file.
282 | ```
283 |
284 | An _async_ Cloud-Optimized GeoTIFF reader might instead subclass from obspec's async methods:
285 |
286 | ```py
287 | from typing import Protocol
288 | from obspec import GetRangeAsync, GetRangesAsync
289 |
290 | class AsyncCloudOptimizedGeoTiffReader(GetRangeAsync, GetRangesAsync, Protocol):
291 | """Necessary methods to asynchronously read a Cloud-Optimized GeoTIFF file."""
292 |
293 | async def read_cog_header(backend: AsyncCloudOptimizedGeoTiffReader, path: str):
294 | # Make request for first 32KB of file
295 | header_bytes = await backend.get_range_async(path, start=0, end=32 * 1024)
296 | # TODO: parse information from header
297 | raise NotImplementedError
298 |
299 | async def read_cog_image(backend: AsyncCloudOptimizedGeoTiffReader, path: str):
300 | header = await read_cog_header(backend, path)
301 | # TODO: read image data from file.
302 | ```
303 |
--------------------------------------------------------------------------------