├── .github
└── workflows
│ ├── pypi-publish.yml
│ ├── run_examples_test.yml
│ ├── static_analysis.yml
│ └── unit_tests.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── Makefile
├── README.md
├── assets
└── ODP-SDK.png
├── docs
├── Makefile
├── make.bat
├── requirements-docs.txt
└── source
│ ├── _static
│ ├── .gitkeep
│ └── style.css
│ ├── _templates
│ └── .gitkeep
│ ├── conf.py
│ ├── img
│ ├── odp-favicon-rgb-blueandwhite.png
│ ├── odp-logo-rgb-blueandblack.png
│ └── odp-logo-rgb-blueandwhite.png
│ ├── index.rst
│ └── odp.rst
├── examples
├── README.md
├── catalog_client_example.py
├── catalog_oqs_query_example.py
├── observables_example.py
├── raw_client_example.py
├── raw_client_file_example.py
├── tabular_client_example.py
├── tabular_geography.py
└── workspace_examples
│ ├── raw-roundtrip.ipynb
│ └── tabular-roundtrip.ipynb
├── poetry.lock
├── pyproject.toml
├── scripts
└── migrate_local_deps.py
├── src
├── dto
│ ├── README.md
│ ├── odp
│ │ └── dto
│ │ │ ├── __init__.py
│ │ │ ├── catalog
│ │ │ ├── __init__.py
│ │ │ ├── _rg.py
│ │ │ ├── data_collection.py
│ │ │ ├── dataset.py
│ │ │ └── observable.py
│ │ │ ├── common
│ │ │ ├── __init__.py
│ │ │ ├── contact_info.py
│ │ │ └── license.py
│ │ │ ├── metadata.py
│ │ │ ├── registry
│ │ │ ├── __init__.py
│ │ │ ├── _rg.py
│ │ │ └── observable_class.py
│ │ │ ├── resource.py
│ │ │ ├── resource_registry.py
│ │ │ ├── resource_status.py
│ │ │ └── validators.py
│ ├── pyproject.toml
│ └── tests
│ │ └── test_dto
│ │ ├── __init__.py
│ │ ├── conftest.py
│ │ ├── test_dto_base.py
│ │ ├── test_resource_registry.py
│ │ ├── test_validators.py
│ │ └── utils.py
└── sdk
│ ├── README.md
│ ├── odp
│ └── client
│ │ ├── __init__.py
│ │ ├── auth.py
│ │ ├── client.py
│ │ ├── dto
│ │ ├── __init__.py
│ │ ├── file_dto.py
│ │ ├── table_spec.py
│ │ └── tabular_store.py
│ │ ├── exc.py
│ │ ├── http_client.py
│ │ ├── raw_storage_client.py
│ │ ├── resource_client.py
│ │ ├── tabular_storage_client.py
│ │ ├── tabular_storage_v2_client.py
│ │ ├── tabular_v2
│ │ ├── __init__.py
│ │ ├── big
│ │ │ ├── __init__.py
│ │ │ ├── big.py
│ │ │ ├── buffer.py
│ │ │ ├── local.py
│ │ │ └── remote.py
│ │ ├── bsquare
│ │ │ ├── __init__.py
│ │ │ ├── bsquare.py
│ │ │ └── query.py
│ │ ├── client
│ │ │ ├── __init__.py
│ │ │ ├── client.py
│ │ │ ├── table_cursor.py
│ │ │ ├── table_tx.py
│ │ │ └── tablehandler.py
│ │ └── util
│ │ │ ├── __init__.py
│ │ │ ├── cache.py
│ │ │ ├── exp.py
│ │ │ ├── reader.py
│ │ │ └── util.py
│ │ └── utils
│ │ ├── __init__.py
│ │ ├── geometry_conversion.py
│ │ ├── json.py
│ │ ├── ndjson.py
│ │ └── package_utils.py
│ ├── odp_sdk
│ └── __init__.py
│ ├── pyproject.toml
│ └── tests
│ └── test_sdk
│ ├── __init__.py
│ ├── conftest.py
│ ├── fixtures
│ ├── __init__.py
│ ├── auth_fixtures.py
│ ├── dto_fixtures.py
│ ├── jwt_fixtures.py
│ ├── odp_http_client_fixtures.py
│ ├── request_fixtures.py
│ └── time_fixtures.py
│ ├── test_auth
│ ├── __init__.py
│ ├── test_azure_token_provider.py
│ ├── test_get_default_token_provider.py
│ ├── test_jwks_token_provider.py
│ └── test_odp_workspace_token_provider.py
│ ├── test_http_client.py
│ ├── test_raw_storage_client.py
│ ├── test_resource_client.py
│ ├── test_tabular_storage_client.py
│ └── test_utils
│ ├── __init__.py
│ ├── test_dto.py
│ ├── test_geometry_conversion.py
│ ├── test_ndjson.py
│ └── test_package_utils.py
└── tests
└── test_examples
├── __init__.py
├── conftest.py
├── test_catalog_client_example.py
├── test_catalog_oqs_query_example.py
├── test_observables_example.py
├── test_raw_client_example.py
├── test_tabular_client_example.py
├── test_tabular_geography.py
└── test_tabular_v2_client_example.py
/.github/workflows/pypi-publish.yml:
--------------------------------------------------------------------------------
1 | name: Build & Release
2 |
3 | on:
4 | release:
5 | types: [published]
6 |
7 | jobs:
8 | build:
9 | runs-on: ubuntu-latest
10 | steps:
11 | - uses: actions/checkout@v3
12 |
13 | - name: Set up Python
14 | uses: actions/setup-python@v2
15 | with:
16 | python-version: "3.10"
17 |
18 | - name: Install packages
19 | run: |
20 | python -m pip install --upgrade pip build poetry
21 | python -m poetry install --only helper-scripts
22 |
23 | - name: Build a binary wheel and a source tarball
24 | run: |
25 | make version
26 | make build
27 |
28 | - name: Publish build artifacts
29 | uses: actions/upload-artifact@v3
30 | with:
31 | name: built-package
32 | path: "./src/**/dist"
33 |
34 | publish-release:
35 | name: Publish release to PyPI
36 | needs: [build]
37 | environment: "prod"
38 | runs-on: ubuntu-latest
39 | strategy:
40 | matrix:
41 | package_path:
42 | - "sdk"
43 | - "dto"
44 |
45 | steps:
46 | - name: Download build artifacts
47 | uses: actions/download-artifact@v3
48 | with:
49 | name: built-package
50 | path: "./build"
51 |
52 | - name: List out files
53 | run: |
54 | ls -lA ./build
55 |
56 | - name: List out files
57 | run: |
58 | ls -lA ./build/${{ matrix.package_path }}/dist
59 |
60 | - name: Publish distribution to PyPI
61 | uses: pypa/gh-action-pypi-publish@release/v1
62 | with:
63 | password: ${{ secrets.PYPI_TOKEN }}
64 | packages-dir: "./build/${{ matrix.package_path }}/dist"
65 | verbose: true
--------------------------------------------------------------------------------
/.github/workflows/run_examples_test.yml:
--------------------------------------------------------------------------------
1 | name: Run examples test
2 |
3 | on:
4 | push:
5 |
6 | jobs:
7 | examples-test:
8 | name: Test examples
9 | runs-on: ubuntu-latest
10 | env:
11 | ODCAT_AUTH_CLIENT_ID: ${{ secrets.ODCAT_AUTH_CLIENT_ID }}
12 | ODCAT_AUTH_CLIENT_SECRET: ${{ secrets.ODCAT_AUTH_CLIENT_SECRET }}
13 | ODCAT_AUTH_AUDIENCE: ${{ secrets.ODCAT_AUTH_AUDIENCE }}
14 | strategy:
15 | matrix:
16 | python-version:
17 | - "3.10"
18 | - "3.11"
19 | - "3.12"
20 | steps:
21 | - uses: actions/checkout@v3
22 |
23 | - name: Set up Python
24 | uses: actions/setup-python@v2
25 | with:
26 | python-version: ${{ matrix.python-version }}
27 |
28 | - name: Install poetry
29 | run: |
30 | python -m pip install --upgrade pip build poetry
31 |
32 | - name: Install packages
33 | run: |
34 | poetry install
35 |
36 | - name: Add Mask
37 | run: |
38 | echo "::add-mask::${{ env.ODCAT_AUTH_CLIENT_ID }}"
39 | echo "::add-mask::${{ env.ODCAT_AUTH_CLIENT_SECRET }}"
40 | echo "::add-mask::${{ env.ODCAT_AUTH_AUDIENCE }}"
41 |
42 | - name: Run tests
43 | run: |
44 | poetry run pytest tests/test_examples
45 | env:
46 | ODCAT_BASE_URL: https://odcat.dev.hubocean.io
47 | ODCAT_AUTH_SCOPE: https://oceandataplatform.onmicrosoft.com/odcat-dev/.default
48 | GITHUB_SHA: ${{ github.sha }}
--------------------------------------------------------------------------------
/.github/workflows/static_analysis.yml:
--------------------------------------------------------------------------------
1 | # yanked from https://github.com/PrefectHQ/prefect-collection-template/blob/main/%7B%7Bcookiecutter.collection_name%7D%7D/.github/workflows/static_analysis.yml
2 | name: Static analysis
3 |
4 | on: [pull_request]
5 |
6 | jobs:
7 | pre-commit-checks:
8 | name: Pre-commit checks
9 | runs-on: ubuntu-latest
10 | strategy:
11 | matrix:
12 | python-version:
13 | - "3.9"
14 | - "3.10"
15 | - "3.11"
16 | - "3.12"
17 |
18 | steps:
19 | - uses: actions/checkout@v3
20 | with:
21 | persist-credentials: false
22 |
23 | - name: Set up Python
24 | uses: actions/setup-python@v4
25 | with:
26 | python-version: ${{ matrix.python-version }}
27 |
28 | - name: Install pre-commit
29 | run: |
30 | python -m pip install --upgrade pip pre-commit
31 |
32 | - name: Run pre-commit
33 | run: |
34 | pre-commit run --show-diff-on-failure --color=always --all-files
35 |
--------------------------------------------------------------------------------
/.github/workflows/unit_tests.yml:
--------------------------------------------------------------------------------
1 | name: Run unit tests
2 |
3 | on:
4 | push:
5 |
6 | jobs:
7 | unit-tests:
8 | name: Unit tests
9 | runs-on: ubuntu-latest
10 | strategy:
11 | matrix:
12 | python-version:
13 | - "3.9"
14 | - "3.10"
15 | - "3.11"
16 | - "3.12"
17 |
18 | steps:
19 | - uses: actions/checkout@v3
20 |
21 | - name: Print current working directory
22 | run: pwd
23 |
24 | - name: Set up Python
25 | uses: actions/setup-python@v2
26 | with:
27 | python-version: ${{ matrix.python-version }}
28 |
29 | - name: Install poetry
30 | run: |
31 | python -m pip install --upgrade pip build poetry
32 |
33 | - name: Install packages
34 | run: |
35 | poetry install
36 |
37 | - name: Run tests
38 | run: |
39 | poetry run pytest src/sdk/tests
40 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 | version.txt
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 | cover/
54 |
55 | # Translations
56 | *.mo
57 | *.pot
58 |
59 | # Django stuff:
60 | *.log
61 | local_settings.py
62 | db.sqlite3
63 | db.sqlite3-journal
64 |
65 | # Flask stuff:
66 | instance/
67 | .webassets-cache
68 |
69 | # Scrapy stuff:
70 | .scrapy
71 |
72 | # Sphinx documentation
73 | docs/_build/
74 |
75 | # PyBuilder
76 | .pybuilder/
77 | target/
78 |
79 | # Jupyter Notebook
80 | .ipynb_checkpoints
81 |
82 | # IPython
83 | profile_default/
84 | ipython_config.py
85 |
86 | # pyenv
87 | # For a library or package, you might want to ignore these files since the code is
88 | # intended to run in multiple environments; otherwise, check them in:
89 | # .python-version
90 |
91 | # pipenv
92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
95 | # install all needed dependencies.
96 | #Pipfile.lock
97 |
98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
99 | __pypackages__/
100 |
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 |
105 | # SageMath parsed files
106 | *.sage.py
107 |
108 | # Environments
109 | .env
110 | .venv
111 | env/
112 | venv/
113 | ENV/
114 | env.bak/
115 | venv.bak/
116 |
117 | # Spyder project settings
118 | .spyderproject
119 | .spyproject
120 |
121 | # Rope project settings
122 | .ropeproject
123 |
124 | # mkdocs documentation
125 | /site
126 |
127 | # mypy
128 | .mypy_cache/
129 | .dmypy.json
130 | dmypy.json
131 |
132 | # Pyre type checker
133 | .pyre/
134 |
135 | # pytype static type analyzer
136 | .pytype/
137 |
138 | # Cython debug symbols
139 | cython_debug/
140 |
141 | #
142 | # IDE
143 | #
144 | /.idea
145 | .DS_Store
146 | /bin
147 | *.swp
148 | .token_cache.bin
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/pre-commit/pre-commit-hooks
3 | rev: "v4.4.0"
4 | hooks:
5 | - id: check-ast
6 | - repo: https://github.com/pycqa/autoflake
7 | rev: v2.0.1
8 | hooks:
9 | - id: autoflake
10 | args:
11 | [
12 | --remove-all-unused-imports,
13 | --in-place,
14 | --ignore-init-module-imports,
15 | ]
16 | - repo: https://github.com/pycqa/isort
17 | rev: 5.12.0
18 | hooks:
19 | - id: isort
20 | - repo: https://github.com/psf/black
21 | rev: 23.1.0
22 | hooks:
23 | - id: black
24 | args: [--line-length=120]
25 | language_version: python3.10
26 | - repo: https://github.com/pycqa/flake8
27 | rev: 6.0.0
28 | hooks:
29 | - id: flake8
30 | additional_dependencies:
31 | - "flake8-pyproject"
32 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2023 HUB Ocean
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining
4 | a copy of this software and associated documentation files (the
5 | "Software"), to deal in the Software without restriction, including
6 | without limitation the rights to use, copy, modify, merge, publish,
7 | distribute, sublicense, and/or sell copies of the Software, and to
8 | permit persons to whom the Software is furnished to do so, subject to
9 | the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | # Description: Makefile for building and publishing the SDK
2 |
3 | # External tools
4 | POETRY := poetry
5 | MD5SUM := md5sum
6 | TAR := tar
7 | GIT := git
8 | PYTHON := python3
9 |
10 | # Subprojects
11 | SUBPROJECTS := src/sdk src/dto
12 | DIST_DIRS := $(SUBPROJECTS:%=%/dist)
13 | PYPROJECTS := $(SUBPROJECTS:%=%/pyproject.toml)
14 | MD5S := $(DIST_DIRS:%=%/md5.published)
15 | VERSIONS := $(SUBPROJECTS:%=%/version.txt)
16 |
17 | # Get the current version from the git tags
18 | CURRENT_VERSION := $(shell $(GIT) describe --tags --abbrev=0)
19 |
20 | #
21 | # Rules
22 | #
23 |
24 | # Build the distribution
25 | %/dist: %/pyproject.toml
26 | cd $(dir $@) && $(POETRY) build
27 |
28 | # Create the md5 hash of the distribution
29 | %/dist/md5: %/dist
30 | $(TAR) -cf - $(dir $@) | $(MD5SUM) > $@
31 |
32 | # Publish the distribution
33 | %/dist/md5.published: %/dist/md5
34 | cd $(dir $@) && $(POETRY) publish --dry-run
35 | cp $< $@
36 |
37 | # Update the version in the pyproject.toml
38 | %/version.txt: %/pyproject.toml
39 | echo "Poetry version: $(CURRENT_VERSION)"
40 | $(POETRY) run python scripts/migrate_local_deps.py $(CURRENT_VERSION) $< --overwrite
41 | cd $(dir $<) && $(POETRY) version $(CURRENT_VERSION)
42 | echo $(CURRENT_VERSION) > $@
43 |
44 | # Update the version in all subprojects
45 | version: $(VERSIONS)
46 | $(POETRY) update odp-sdk odp-dto
47 |
48 | # Build all subprojects
49 | build: $(DIST_DIRS)
50 |
51 | # Publish all subprojects
52 | publish: $(MD5S)
53 |
54 | # Clean up
55 | clean:
56 | rm -vrf $(DIST_DIRS)
57 | rm -f $(VERSIONS)
58 |
59 | # Default target
60 | all: build
61 |
62 | # Phony targets
63 | .PHONY: build publish version clean all
64 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | # ODP Python SDK
7 |
8 | Connect to the Ocean Data Platform with Python through the Python SDK. Download queried ocean data easily and efficiently into data frames, for easy exploring and further processing in your data science project.
9 |
10 | ## Documentation
11 |
12 | [WIP]
13 |
14 | ## Installation
15 |
16 | Use the package manager [pip](https://pip.pypa.io/en/stable/) to install the Ocean Data Platform Python SDK.
17 |
18 | ```bash
19 | pip3 install odp_sdk
20 | ```
21 |
22 | ## Usage
23 |
24 | *Note: Accessing the Ocean Data Platform requires an authorized account. Contact ODP to require one.*
25 |
26 | ```python
27 | from odp_sdk.client import OdpClient
28 |
29 | client = OdpClient()
30 |
31 | for item in client.catalog.list():
32 | print(item)
33 | ```
34 |
35 | Examples can be found in /examples.
36 |
--------------------------------------------------------------------------------
/assets/ODP-SDK.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/assets/ODP-SDK.png
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = source
9 | BUILDDIR = build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/requirements-docs.txt:
--------------------------------------------------------------------------------
1 | cognite-sdk>=1.3
2 | cmocean>=2.0
3 | geojson>=2.5.0
4 | geopandas>=0.8.1
5 | matplotlib>=3.2.2
6 | MetPy>=0.12.1
7 | numpy>=1.19.0
8 | pandas>=1.0.5
9 | python-dateutil>=2.8.1
10 | scipy>=1.5.0
11 | seaborn>=0.10.1
12 | tqdm>=4.49.0
13 | pygeos>=0.8
14 | descartes>=1.1.0
15 |
--------------------------------------------------------------------------------
/docs/source/_static/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/docs/source/_static/.gitkeep
--------------------------------------------------------------------------------
/docs/source/_static/style.css:
--------------------------------------------------------------------------------
1 | /* Sidebar header (and topbar for mobile) */
2 |
3 | .wy-side-nav-search, .wy-nav-top {
4 | background-color: #0A1530;
5 | }
6 | /* Sidebar */
7 | .wy-nav-side {
8 | background-color: #0A1530;
9 | color: #FD5D16;
10 | }
11 |
12 | .wy-nav-content-wrap {
13 | background: #eff6fa;
14 | }
--------------------------------------------------------------------------------
/docs/source/_templates/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/docs/source/_templates/.gitkeep
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | import sys
10 |
11 | # If extensions (or modules to document with autodoc) are in another directory,
12 | # add these directories to sys.path here. If the directory is relative to the
13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
14 | #
15 | from os import path
16 |
17 | sys.path.insert(0, path.abspath(path.join(path.dirname(__file__), "../../")))
18 | sys.path.insert(0, path.abspath(path.join(path.dirname(__file__), "../../examples")))
19 |
20 |
21 | # -- Project information -----------------------------------------------------
22 |
23 | project = "ODP Python SDK"
24 | copyright = "2020, C4IR/Ocean Data Foundation"
25 | author = "C4IR/Ocean Data Foundation"
26 |
27 | version = "0.3.9"
28 |
29 | # The full version, including alpha/beta/rc tags
30 | release = version
31 |
32 |
33 | # -- General configuration ---------------------------------------------------
34 |
35 | # Add any Sphinx extension module names here, as strings. They can be
36 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
37 | # ones.
38 | extensions = ["sphinx.ext.autodoc", "sphinx.ext.autosummary", "sphinx.ext.napoleon", "sphinx_rtd_theme"]
39 |
40 | autodoc_mock_imports = ["cartopy"]
41 |
42 | autosummary_generate = True
43 |
44 | # Add any paths that contain templates here, relative to this directory.
45 | templates_path = ["_templates"]
46 |
47 | html_favicon = "img/odp-favicon-rgb-blueandwhite.png"
48 | html_logo = "img/odp-logo-rgb-blueandwhite.png"
49 |
50 | # List of patterns, relative to source directory, that match files and
51 | # directories to ignore when looking for source files.
52 | # This pattern also affects html_static_path and html_extra_path.
53 | exclude_patterns = []
54 |
55 |
56 | # -- Options for HTML output -------------------------------------------------
57 |
58 | # The theme to use for HTML and HTML Help pages. See the documentation for
59 | # a list of builtin themes.
60 | #
61 | html_theme = "sphinx_rtd_theme"
62 |
63 | # Add any paths that contain custom static files (such as style sheets) here,
64 | # relative to this directory. They are copied after the builtin static files,
65 | # so a file named "default.css" will overwrite the builtin "default.css".
66 | html_static_path = ["_static"]
67 |
68 | # html_context = {
69 | # "css_files": ["_static/style.css"]
70 | # }
71 |
72 | master_doc = "index"
73 |
74 |
75 | def setup(app):
76 | app.add_css_file("style.css")
77 |
--------------------------------------------------------------------------------
/docs/source/img/odp-favicon-rgb-blueandwhite.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/docs/source/img/odp-favicon-rgb-blueandwhite.png
--------------------------------------------------------------------------------
/docs/source/img/odp-logo-rgb-blueandblack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/docs/source/img/odp-logo-rgb-blueandblack.png
--------------------------------------------------------------------------------
/docs/source/img/odp-logo-rgb-blueandwhite.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/docs/source/img/odp-logo-rgb-blueandwhite.png
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | .. ODP Python SDK documentation master file, created by
2 | sphinx-quickstart on Wed Sep 30 13:21:32 2020.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to ODP Python SDK documentation
7 | ==========================================
8 |
9 |
10 | .. contents::
11 | :local:
12 |
13 | Installation
14 | ^^^^^^^^^^^^
15 |
16 | To install this package:
17 |
18 | .. code:: bash
19 |
20 | $ pip install odp_sdk
21 |
22 | To upgrade this package:
23 |
24 | .. code:: bash
25 |
26 | $ pip install -U odp_sdk
27 |
28 | *Note*: Utility functions available in CastFunctions.py and DataStatsFunctions.py are not included in the pip install package and has to be downloaded separately
29 |
30 | Contents
31 | ^^^^^^^^
32 |
33 | .. toctree::
34 | odp
35 |
36 |
--------------------------------------------------------------------------------
/docs/source/odp.rst:
--------------------------------------------------------------------------------
1 | Quickstart
2 | ==========
3 |
4 | Authenticate
5 | ------------
6 |
7 | In order to use the ODP SDK, you need to authenticate using your provided API-key. This is achieved by setting the
8 | `api_key`-argument when instantiating `ODPClient`:
9 |
10 | .. code:: python
11 |
12 | from odp_sdk import ODPClient
13 | client = ODPClient(api_key="")
14 |
15 | You can also set the `COGNITE_API_KEY` environment variable:
16 |
17 | .. code:: bash
18 |
19 | $ export COGNITE_API_KEY=
20 |
21 | Download Ocean Data
22 | -------------------
23 |
24 | Downloading ocean data is very easy once you have instantiated the `ODPClient`. The data is then returned as a
25 | Pandas DataFrame_
26 |
27 | .. code:: python
28 |
29 | df = client.casts(longitude=[-25, 35], latitude=[50, 80], timespan=["2018-06-01", "2018-06-30"])
30 |
31 | .. _DataFrame: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
32 |
33 | It is also possible to specify what parameters to download:
34 |
35 | .. code:: python
36 |
37 | df = client.casts(
38 | longitude = [-25, 35],
39 | latitude = [50, 80],
40 | timespan = ["2018-06-01", "2018-06-30"],
41 | parameters = ["date", "lon", "lat", "z", "Temperature", "Salinity"
42 | )
43 |
44 | In some instances, some filtering is necessary before downloading the data. This is achieved by first
45 | listing the available casts:
46 |
47 | .. code:: python
48 |
49 | casts = client.get_available_casts(
50 | longitude = [-25, 35],
51 | latitude = [50, 80],
52 | timespan = ["2018-06-01", "2018-06-30"],
53 | metadata_parameters = ["extId", "date", "time", "lat", "lon", "country", "Platform", "dataset_code"
54 | )
55 |
56 | Then apply any desirable filters before downloading the data:
57 |
58 | .. code:: python
59 |
60 | casts_norway = casts[casts.country == "NORWAY"]
61 | df = client.download_data_from_casts(casts_norway.extId.tolist(),
62 | parameters=["date", "lat", "lon", "z", "Temperature", "Salinity")
63 |
64 | You can also download the cast metadata:
65 |
66 | .. code:: python
67 |
68 | df = client.get_metadata(casts_norway.extId.tolist())
69 |
70 | API
71 | ===
72 | ODPClient
73 | ---------
74 | .. autoclass:: odp_sdk.ODPClient
75 | :members:
76 | :member-order: bysource
77 |
78 | Utilities
79 | =========
80 |
81 | Advanced Helper Functions
82 | -------------------------
83 |
84 | .. py:currentmodule:: Examples
85 |
86 | Interpolate Casts to Z
87 | ^^^^^^^^^^^^^^^^^^^^^^
88 |
89 | .. automethod:: UtilityFunctions.interpolate_casts_to_z
90 |
91 | Interpolate Casts to grid
92 | ^^^^^^^^^^^^^^^^^^^^^^^^^
93 | .. automethod:: UtilityFunctions.interpolate_to_grid
94 |
95 | Interpolate profile
96 | ^^^^^^^^^^^^^^^^^^^
97 | .. automethod:: UtilityFunctions.interpolate_profile
98 |
99 | Plot Casts
100 | ^^^^^^^^^^
101 | .. automethod:: UtilityFunctions.plot_casts
102 |
103 | Plot Grid
104 | ^^^^^^^^^
105 | .. automethod:: UtilityFunctions.plot_grid
106 |
107 | Get Units
108 | ^^^^^^^^^
109 | .. automethod:: UtilityFunctions.get_units
110 |
111 | Plot percentage of nulls for each variable in variable list
112 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
113 | .. automethod:: UtilityFunctions.plot_nulls
114 |
115 | Plot metadata-statistics
116 | ^^^^^^^^^^^^^^^^^^^^^^^^
117 | .. automethod:: UtilityFunctions.plot_meta_stats
118 |
119 | Plot distribution of values
120 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^
121 | .. automethod:: UtilityFunctions.plot_distributions
122 |
123 | Plot casts belonging to specific dataset
124 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
125 | .. automethod:: UtilityFunctions.plot_datasets
126 |
127 | Internal Helper Functions
128 | ^^^^^^^^^^^^^^^^^^^^^^^^^
129 | .. automethod:: UtilityFunctions.geo_map
130 | .. automethod:: UtilityFunctions.missing_values
131 |
132 | Geographic Utilities
133 | --------------------
134 |
135 | Convert Latitude and Longitude to Geo-Index
136 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
137 | .. automethod:: odp_sdk.utils.gcs_to_index
138 |
139 | Convert Latitude and Longitude to grid-coordinates
140 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
141 | .. automethod:: odp_sdk.utils.gcs_to_grid
142 |
143 | Convert Geo-Index to grid-coordinates
144 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
145 | .. automethod:: odp_sdk.utils.index_to_grid
146 |
147 | Convert Geo-Index to Latitude and Longitude
148 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
149 | .. automethod:: odp_sdk.utils.index_to_gcs
150 |
151 | Get all grid-coordinates within a rectangle
152 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
153 | .. automethod:: odp_sdk.utils.grid_rect_members
154 |
155 | Get all Geo-Indices within a rectangle
156 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
157 | .. automethod:: odp_sdk.utils.index_rect_members
158 |
--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
1 | ## Examples
2 |
3 | ### catalog_client_example.py
4 |
5 | - Listing datasets
6 | - Creating a dataset
7 | - Querying for a dataset
8 | - Deleting a dataset
9 |
10 | ### raw_client_example.py
11 |
12 | - Creating a dataset
13 | - Uploading a file to a dataset
14 | - Listing files in a dataset
15 | - Downloading a file from a dataset
16 | - Deleting a file from a dataset
17 | - Deleting a dataset
18 |
19 | ### tabular_client_example.py
20 |
21 | - Creating a dataset
22 | - Creating a schema for a dataset
23 | - Inserting data into a dataset
24 | - Querying for data in a dataset
25 | - Updating data in a dataset
26 | - Deleting data from a dataset
27 | - Deleting a schema from a dataset
28 | - Deleting a dataset
29 |
30 | ### observables_example.py
31 |
32 | - Listing observables
33 | - Creating an observable
34 | - Querying for an observable
35 | - Querying for observables using a geolocation filter
36 | - Deleting an observable
37 |
38 | ## tabular_geography.py
39 |
40 | - Creating dataset with geographical positions
41 | - Creating schema with partitioning
42 |
43 | ## Workspace Examples
44 |
45 | ### raw-roundtrip.ipynb
46 |
47 | - Demonstrates the process of uploading a file to a dataset, downloading the file, and deleting the file.
48 | - In Jupyter notebook format for ease of use in ODP workspaces.
49 |
50 | ### tabular-roundtrip.ipynb
51 |
52 | - Demonstrates the process of creating a dataset, creating a schema for the dataset, inserting data into the dataset,
53 | querying for data in the dataset, updating data in the dataset, deleting data from the dataset, deleting the schema
54 | from the dataset, and deleting the dataset.
55 | - In Jupyter notebook format for ease of use in ODP workspaces.
--------------------------------------------------------------------------------
/examples/catalog_client_example.py:
--------------------------------------------------------------------------------
1 | from odp.client import OdpClient
2 | from odp.dto import Metadata
3 | from odp.dto.catalog import DatasetDto, DatasetSpec
4 | from odp.dto.common.contact_info import ContactInfo
5 |
6 | # Instantiate the client without specifying a token provider.
7 | # The token provider will be set based on the environment.
8 | client = OdpClient()
9 |
10 | print("Datasets in the catalog:")
11 |
12 | # List all resources in the catalog
13 | for item in client.catalog.list():
14 | print(item)
15 |
16 | # Declare a dataset manifest to add to the catalog
17 | manifest = DatasetDto(
18 | metadata=Metadata(
19 | name=client.personalize_name("sdk-manifest-creation-example"),
20 | ),
21 | spec=DatasetSpec(
22 | storage_controller="registry.hubocean.io/storageController/storage-tabular",
23 | storage_class="registry.hubocean.io/storageClass/tabular",
24 | maintainer=ContactInfo(
25 | contact="User McUsername ",
26 | organisation="Organisation Name",
27 | ),
28 | ),
29 | )
30 |
31 | # The dataset is created in the catalog.
32 | manifest = client.catalog.create(manifest)
33 |
34 | # Fetch the manifest from the catalog using the UUID
35 | print("Fetching the manifest from the catalog using the UUID")
36 |
37 | fetched_manifest = client.catalog.get(manifest.metadata.uuid)
38 | print(fetched_manifest)
39 |
40 | # Clean up
41 | print("Cleaning up")
42 |
43 | client.catalog.delete(manifest)
44 |
45 | print("Done")
46 |
--------------------------------------------------------------------------------
/examples/catalog_oqs_query_example.py:
--------------------------------------------------------------------------------
1 | from odp.client import OdpClient
2 | from odp.dto.catalog import DataCollectionDto
3 |
4 | # Instantiate the client without specifying a token provider.
5 | # The token provider will be set based on the environment.
6 | client = OdpClient()
7 |
8 | # List all resources matching a given query
9 |
10 | oqs_filter = {
11 | "#EQUALS": [ # EQUALS is used here to compare to values
12 | "$kind", # The first value is the kind from the metadata, prefaced with a dollarsign.
13 | "catalog.hubocean.io/dataCollection", # And this is the value to compare with
14 | ]
15 | }
16 |
17 | print("Listing all data collections:")
18 |
19 | for item in client.catalog.list(oqs_filter):
20 | print(item)
21 |
22 | # If we know the type of the resource we are querying,
23 | # we can use the `tp` parameter to assert the type of the returned resources.
24 |
25 | print("Listing all data collections:")
26 |
27 | for item in client.catalog.list(oqs_filter, tp=DataCollectionDto, assert_type=True):
28 | print(item)
29 |
--------------------------------------------------------------------------------
/examples/observables_example.py:
--------------------------------------------------------------------------------
1 | from odp.client import OdpClient
2 | from odp.dto import Metadata
3 | from odp.dto.catalog import ObservableDto, ObservableSpec
4 |
5 | # Instantiate the client without specifying a token provider.
6 | # The token provider will be set based on the environment.
7 | client = OdpClient()
8 |
9 | created_manifests = []
10 |
11 | # List observables in the catalog
12 | observable_filter = {"#EQUALS": ["$kind", "catalog.hubocean.io/observable"]}
13 |
14 | # If we know the type of the resource we are querying,
15 | # we can use the `tp` parameter to assert the type of the returned resources.
16 |
17 | print("List of observables in the catalog:")
18 |
19 | for item in client.catalog.list(observable_filter, tp=ObservableDto, assert_type=True):
20 | print(item)
21 |
22 | # Declare a new observable to be added to the data catalog
23 |
24 | print("Creating a sample observable in the catalog")
25 |
26 | manifest = ObservableDto(
27 | metadata=Metadata(
28 | name=client.personalize_name("sdk-observable-example"),
29 | display_name="Test Observable for time",
30 | description="A test observable for time",
31 | labels={"hubocean.io/test": True},
32 | ),
33 | spec=ObservableSpec(
34 | ref="catalog.hubocean.io/dataset/test-dataset",
35 | observable_class="catalog.hubocean.io/observableClass/static-coverage",
36 | details={"value": [0, 1684147082], "attribute": "test"},
37 | ),
38 | )
39 |
40 | # The observable is created in the catalog.
41 | # The return value is the full manifest of the created observable.
42 | manifest = client.catalog.create(manifest)
43 | created_manifests.append(manifest)
44 |
45 | # An example query to search for observables in certain geometries
46 | observable_geometry_filter = {
47 | "#AND": [
48 | {"#EQUALS": ["$kind", "catalog.hubocean.io/observable"]},
49 | {
50 | "#ST_INTERSECTS": [
51 | "$spec.details.value",
52 | {
53 | "type": "Polygon",
54 | "coordinates": [
55 | [
56 | [-73.981200, 40.764950],
57 | [-73.980600, 40.764000],
58 | [-73.979800, 40.764450],
59 | [-73.980400, 40.765400],
60 | [-73.981200, 40.764950],
61 | ]
62 | ],
63 | },
64 | ]
65 | },
66 | ]
67 | }
68 |
69 | print("List of observables in the catalog:")
70 |
71 | # List all observables in the catalog that intersect with the geometry
72 | for item in client.catalog.list(observable_geometry_filter):
73 | print(item)
74 |
75 |
76 | print("Adding more sample observables in the catalog")
77 |
78 | # Create static observables to filter
79 | manifest = ObservableDto(
80 | metadata=Metadata(
81 | name=client.personalize_name("sdk-example-small-value"),
82 | display_name="SDK Example Small Value",
83 | description="An observable that emits a small value",
84 | labels={"hubocean.io/test": True},
85 | ),
86 | spec=ObservableSpec(
87 | ref="catalog.hubocean.io/dataset/test-dataset",
88 | observable_class="catalog.hubocean.io/observableClass/static-observable",
89 | details={"value": 1, "attribute": "test"},
90 | ),
91 | )
92 |
93 | manifest = client.catalog.create(manifest)
94 | created_manifests.append(manifest)
95 |
96 | manifest = ObservableDto(
97 | metadata=Metadata(
98 | name=client.personalize_name("sdk-example-large-value"),
99 | display_name="SDK Example Large Value",
100 | description="An observable that emits a large value",
101 | labels={"hubocean.io/test": True},
102 | ),
103 | spec=ObservableSpec(
104 | ref="catalog.hubocean.io/dataset/test-dataset",
105 | observable_class="catalog.hubocean.io/observableClass/static-observable",
106 | details={"value": 3, "attribute": "test"},
107 | ),
108 | )
109 |
110 | manifest = client.catalog.create(manifest)
111 | created_manifests.append(manifest)
112 |
113 |
114 | # An example query to search for observables in certain range
115 | observable_range_filter = {
116 | "#AND": [
117 | {"#WITHIN": ["$spec.observable_class", ["catalog.hubocean.io/observableClass/static-observable"]]},
118 | {"#GREATER_THAN_OR_EQUALS": ["$spec.details.value", "2"]},
119 | ]
120 | }
121 |
122 | print("List of observables in the catalog:")
123 |
124 | # List all observables in the catalog that intersect with the geometry
125 | for item in client.catalog.list(observable_range_filter):
126 | print(item)
127 |
128 | print("Cleaning up")
129 |
130 | # Clean up
131 | for man in created_manifests:
132 | client.catalog.delete(man)
133 |
134 | print("Done")
135 |
--------------------------------------------------------------------------------
/examples/raw_client_example.py:
--------------------------------------------------------------------------------
1 | from odp.client import OdpClient
2 | from odp.client.dto.file_dto import FileMetadataDto
3 | from odp.dto import Metadata
4 | from odp.dto.catalog import DataCollectionDto, DataCollectionSpec, DatasetDto, DatasetSpec
5 | from odp.dto.common.contact_info import ContactInfo
6 | from odp.dto.common.license import License
7 |
8 | # Instantiate the client without specifying a token provider.
9 | # The token provider will be set based on the environment.
10 | client = OdpClient()
11 |
12 | data_collection_name = "collection-manifest-example"
13 |
14 | collection = DataCollectionDto(
15 | metadata=Metadata(
16 | name=data_collection_name,
17 | display_name="collection-example",
18 | description="A test data collection",
19 | ),
20 | spec=DataCollectionSpec(
21 | published_by=ContactInfo(
22 | contact="User McUsername ",
23 | organisation="Organisation Name",
24 | ),
25 | published_date="2019-06-19T06:00:00",
26 | website="https://hubocean.earth",
27 | license=License(
28 | name="proprietary",
29 | full_text="This is a very strict legal text describing the data license.",
30 | href="www.wikipedia.org",
31 | ),
32 | tags=[],
33 | ),
34 | )
35 |
36 | collection = client.catalog.create(collection)
37 | print("Collection was created")
38 |
39 | # Declare a dataset manifest to add to the catalog
40 |
41 | print("Creating sample dataset")
42 |
43 | dataset = DatasetDto(
44 | metadata=Metadata(
45 | name=client.personalize_name("sdk-raw-example"),
46 | display_name="SDK Raw Example",
47 | description="A test dataset for raw data",
48 | labels={"hubocean.io/test": True},
49 | ),
50 | spec=DatasetSpec(
51 | data_collection=f"catalog.hubocean.io/dataCollection/{data_collection_name}",
52 | storage_controller="registry.hubocean.io/storageController/storage-raw-cdffs",
53 | storage_class="registry.hubocean.io/storageClass/raw",
54 | maintainer=ContactInfo(
55 | contact="User McUsername ",
56 | organisation="Organisation Name",
57 | ),
58 | ),
59 | )
60 |
61 | # The dataset is created in the catalog.
62 | dataset = client.catalog.create(dataset)
63 |
64 | # Creating and uploading a file.
65 | file_dto = client.raw.create_file(
66 | resource_dto=dataset,
67 | file_metadata_dto=FileMetadataDto(
68 | name="test.txt",
69 | mime_type="text/plain",
70 | ),
71 | contents=b"Hello, World!",
72 | )
73 |
74 | print("List of files in the dataset:")
75 |
76 | for file in client.raw.list(dataset):
77 | print(file)
78 |
79 | # Download file
80 | print("Downloading the file")
81 |
82 | client.raw.download_file(dataset, file_dto, "test.txt")
83 |
84 | # Clean up
85 | print("Cleaning up")
86 |
87 | client.raw.delete_file(dataset, file_dto)
88 | client.catalog.delete(dataset)
89 | client.catalog.delete(collection)
90 |
91 | print("Done")
92 |
--------------------------------------------------------------------------------
/examples/raw_client_file_example.py:
--------------------------------------------------------------------------------
1 | from odp.client import OdpClient
2 | from odp.client.dto.file_dto import FileMetadataDto
3 | from odp.client.exc import OdpFileAlreadyExistsError, OdpResourceExistsError
4 | from odp.dto import Metadata
5 | from odp.dto.catalog import DatasetDto, DatasetSpec
6 | from odp.dto.common.contact_info import ContactInfo
7 |
8 | # Instantiate the client without specifying a token provider.
9 | # The token provider will be set based on the environment.
10 | client = OdpClient()
11 |
12 | # Declare a dataset manifest to add to the catalog
13 |
14 | print("Creating sample dataset")
15 |
16 | dataset = DatasetDto(
17 | metadata=Metadata(
18 | name=client.personalize_name("sdk-raw-example"),
19 | display_name="SDK Raw Example",
20 | description="A test dataset for raw data",
21 | labels={"hubocean.io/test": True},
22 | ),
23 | spec=DatasetSpec(
24 | storage_controller="registry.hubocean.io/storageController/storage-raw-cdffs",
25 | storage_class="registry.hubocean.io/storageClass/raw",
26 | maintainer=ContactInfo(
27 | contact="User McUsername ",
28 | organisation="Organisation Name",
29 | ),
30 | ),
31 | )
32 |
33 | # The dataset is created in the catalog.
34 | try:
35 | dataset = client.catalog.create(dataset)
36 | print("Resource created successfully:", dataset)
37 | except OdpResourceExistsError:
38 | print("Dataset already exists. Getting existing dataset")
39 | dataset = client.catalog.get("catalog.hubocean.io/dataset/" + dataset.metadata.name)
40 | print(dataset)
41 |
42 | # Creating and uploading an existing file.
43 | path_to_file = "test.txt"
44 | file_metadata_dto = None
45 | file_dto = None
46 | try:
47 | with open(path_to_file, "rb") as data:
48 | file_metadata_dto = FileMetadataDto(
49 | name=data.name,
50 | mime_type="text/plain", # Update mime type of the file
51 | )
52 | file_dto = client.raw.create_file(
53 | resource_dto=dataset,
54 | file_metadata_dto=file_metadata_dto,
55 | contents=data.read(),
56 | )
57 | except OdpFileAlreadyExistsError:
58 | print("File already exists. Getting metadata of existing file")
59 | file_dto = client.raw.get_file_metadata(dataset, file_metadata_dto)
60 |
61 | print("List of files in the dataset:")
62 |
63 | for file in client.raw.list(dataset):
64 | print(file)
65 |
66 | # Download file
67 | print("Downloading the file:")
68 |
69 | client.raw.download_file(dataset, file_dto, "test.txt")
70 |
71 | # Clean up
72 | print("Cleaning up")
73 |
74 | client.raw.delete_file(dataset, file_dto)
75 | client.catalog.delete(dataset)
76 |
77 | print("Done")
78 |
--------------------------------------------------------------------------------
/examples/tabular_client_example.py:
--------------------------------------------------------------------------------
1 | from odp.client import OdpClient
2 | from odp.client.dto.table_spec import TableSpec
3 | from odp.client.exc import OdpResourceNotFoundError
4 | from odp.dto import Metadata
5 | from odp.dto.catalog import DataCollectionDto, DataCollectionSpec, DatasetDto, DatasetSpec
6 | from odp.dto.common.contact_info import ContactInfo
7 | from odp.dto.common.license import License
8 |
9 | # Instantiate the client without specifying a token provider.
10 | # The token provider will be set based on the environment.
11 | client = OdpClient()
12 |
13 | data_collection_name = "collection-manifest-example"
14 |
15 | collection = DataCollectionDto(
16 | metadata=Metadata(
17 | name=data_collection_name,
18 | display_name="collection-example",
19 | description="A test data collection",
20 | ),
21 | spec=DataCollectionSpec(
22 | published_by=ContactInfo(
23 | contact="User McUsername ",
24 | organisation="Organisation Name",
25 | ),
26 | published_date="2019-06-19T06:00:00",
27 | website="https://hubocean.earth",
28 | license=License(
29 | name="proprietary",
30 | full_text="This is a very strict legal text describing the data license.",
31 | href="www.wikipedia.org",
32 | ),
33 | tags=[],
34 | ),
35 | )
36 |
37 | collection = client.catalog.create(collection)
38 | print("Collection was created")
39 |
40 | # Declare a dataset manifest to add to the catalog
41 |
42 | print("Creating sample dataset")
43 |
44 | dataset = DatasetDto(
45 | metadata=Metadata(
46 | name=client.personalize_name("sdk-tabular-example"),
47 | display_name="SDK Tabular Example",
48 | description="A test dataset for tabular data",
49 | labels={"hubocean.io/test": True},
50 | ),
51 | spec=DatasetSpec(
52 | data_collection=f"catalog.hubocean.io/dataCollection/{data_collection_name}",
53 | storage_controller="registry.hubocean.io/storageController/storage-tabular",
54 | storage_class="registry.hubocean.io/storageClass/tabular",
55 | maintainer=ContactInfo(
56 | contact="User McUsername ",
57 | organisation="Organisation Name",
58 | ),
59 | ),
60 | )
61 |
62 | # The dataset is created in the catalog.
63 | dataset = client.catalog.create(dataset)
64 |
65 | # Create a table spec to create the schema in tabular client
66 | print("Creating table spec")
67 |
68 | mt_table_spec = client.tabular.create_schema(
69 | resource_dto=dataset, table_spec=TableSpec(table_schema={"Data": {"type": "string"}})
70 | )
71 |
72 | # Insert data into the table
73 | test_data = [{"Data": "Test"}, {"Data": "Test1"}]
74 | print(f"Inserting {len(test_data)} rows into the table")
75 |
76 | client.tabular.write(resource_dto=dataset, data=test_data)
77 |
78 | # Query the data as a list
79 | print("Querying data from the table as a list")
80 | our_data = client.tabular.select_as_list(dataset)
81 |
82 | print("Data query result:")
83 | print(f"{our_data}\n")
84 |
85 | # To update the data filters must be declared
86 | update_filters = {"#EQUALS": ["$Data", "Test"]}
87 | new_data = [{"Data": "Test Updated"}]
88 |
89 | print("Updating data in the table")
90 | client.tabular.update(
91 | resource_dto=dataset,
92 | data=new_data,
93 | filter_query=update_filters,
94 | )
95 |
96 | result = client.tabular.select_as_list(dataset)
97 |
98 | print(f"Data read back:\n{result}") # noqa: E231
99 |
100 | # Delete the data with another filter
101 | delete_filters = {"#EQUALS": ["$Data", "Test1"]}
102 | print("Deleting data in the table")
103 |
104 | client.tabular.delete(resource_dto=dataset, filter_query=delete_filters)
105 | result = client.tabular.select_as_list(dataset)
106 |
107 | print(f"Data read back:\n{result}") # noqa: E231
108 |
109 | # Clean up
110 |
111 | print("Cleaning up")
112 |
113 | # Delete the schema
114 | client.tabular.delete_schema(dataset, delete_data=True)
115 |
116 | # Reading the schema of a dataset without a schema will result in an error
117 | try:
118 | client.tabular.get_schema(dataset)
119 | except OdpResourceNotFoundError as e:
120 | print("Schema not found error since it is deleted")
121 | print(e)
122 |
123 | print("Deleting dataset")
124 |
125 | # Delete the dataset and collection
126 | client.catalog.delete(dataset)
127 | client.catalog.delete(collection)
128 |
129 | print("Done")
130 |
--------------------------------------------------------------------------------
/examples/tabular_geography.py:
--------------------------------------------------------------------------------
1 | from odp.client import OdpClient
2 | from odp.client.dto.table_spec import TableSpec
3 | from odp.client.dto.tabular_store import TablePartitioningSpec
4 | from odp.dto import Metadata
5 | from odp.dto.catalog import DatasetDto, DatasetSpec
6 | from odp.dto.common.contact_info import ContactInfo
7 |
8 | client = OdpClient()
9 |
10 | # Create a new manifest to add to the catalog
11 | dataset = DatasetDto(
12 | metadata=Metadata(
13 | name=client.personalize_name("st_within_example"),
14 | display_name="ST_WITHIN Example",
15 | description="A test dataset for ST_WITHIN query",
16 | labels={"hubocean.io/test": True},
17 | ),
18 | spec=DatasetSpec(
19 | storage_controller="registry.hubocean.io/storageController/storage-tabular",
20 | storage_class="registry.hubocean.io/storageClass/tabular",
21 | maintainer=ContactInfo(
22 | contact="User McUsername ",
23 | organisation="Organisation Name",
24 | ),
25 | ),
26 | )
27 |
28 | # The dataset is created in the catalog.
29 | dataset = client.catalog.create(dataset)
30 |
31 | print("Dataset created successfully")
32 |
33 | table_schema = {"name": {"type": "string"}, "location": {"type": "geometry"}}
34 | partitioning = [TablePartitioningSpec(columns=["location"], transformer_name="geohash", args=[2])]
35 |
36 | my_table_spec = TableSpec(table_schema=table_schema, partitioning=partitioning)
37 |
38 | client.tabular.create_schema(
39 | resource_dto=dataset, table_spec=TableSpec(table_schema=table_schema, partitioning=partitioning)
40 | )
41 |
42 | print("Table spec created successfully")
43 |
44 | data = [
45 | {"name": "Oslo", "location": {"type": "Point", "coordinates": [10.74609, 59.91273]}},
46 | {"name": "New York", "location": {"type": "Point", "coordinates": [-74.005974, 40.712776]}},
47 | {"name": "Los Angeles", "location": {"type": "Point", "coordinates": [-118.243683, 34.052235]}},
48 | {"name": "London", "location": {"type": "Point", "coordinates": [-0.127758, 51.507351]}},
49 | {"name": "Tokyo", "location": {"type": "Point", "coordinates": [139.691711, 35.689487]}},
50 | {"name": "Paris", "location": {"type": "Point", "coordinates": [2.352222, 48.856613]}},
51 | {"name": "Berlin", "location": {"type": "Point", "coordinates": [13.404954, 52.520008]}},
52 | {"name": "Moscow", "location": {"type": "Point", "coordinates": [37.617298, 55.755825]}},
53 | {"name": "Beijing", "location": {"type": "Point", "coordinates": [116.407394, 39.904202]}},
54 | {"name": "Mexico City", "location": {"type": "Point", "coordinates": [-99.133209, 19.432608]}},
55 | {"name": "São Paulo", "location": {"type": "Point", "coordinates": [-46.633308, -23.55052]}},
56 | {"name": "Buenos Aires", "location": {"type": "Point", "coordinates": [-58.381592, -34.603722]}},
57 | {"name": "New Delhi", "location": {"type": "Point", "coordinates": [77.209023, 28.613939]}},
58 | {"name": "Sydney", "location": {"type": "Point", "coordinates": [151.209296, -33.86882]}},
59 | {"name": "San Francisco", "location": {"type": "Point", "coordinates": [-122.419418, 37.774929]}},
60 | {"name": "Johannesburg", "location": {"type": "Point", "coordinates": [28.047305, -26.204103]}},
61 | {"name": "Chicago", "location": {"type": "Point", "coordinates": [-87.629799, 41.878113]}},
62 | {"name": "Melbourne", "location": {"type": "Point", "coordinates": [144.963058, -37.813628]}},
63 | {"name": "Edinburgh", "location": {"type": "Point", "coordinates": [-3.188267, 55.953251]}},
64 | {"name": "Stockholm", "location": {"type": "Point", "coordinates": [18.068581, 59.329323]}},
65 | {"name": "Ottawa", "location": {"type": "Point", "coordinates": [-75.697193, 45.42153]}},
66 | {"name": "Hong Kong", "location": {"type": "Point", "coordinates": [114.109497, 22.396428]}},
67 | {"name": "Jakarta", "location": {"type": "Point", "coordinates": [106.845599, -6.208763]}},
68 | {"name": "Cairo", "location": {"type": "Point", "coordinates": [31.235712, 30.04442]}},
69 | {"name": "Budapest", "location": {"type": "Point", "coordinates": [19.040236, 47.497913]}},
70 | {"name": "Christchurch", "location": {"type": "Point", "coordinates": [172.636225, -43.532054]}},
71 | {"name": "Manila", "location": {"type": "Point", "coordinates": [120.98422, 14.599512]}},
72 | {"name": "Bangkok", "location": {"type": "Point", "coordinates": [100.501765, 13.756331]}},
73 | {"name": "Rome", "location": {"type": "Point", "coordinates": [12.496366, 41.902783]}},
74 | {"name": "Shanghai", "location": {"type": "Point", "coordinates": [121.473702, 31.23039]}},
75 | {"name": "Rio de Janeiro", "location": {"type": "Point", "coordinates": [-43.172897, -22.906847]}},
76 | {"name": "Madrid", "location": {"type": "Point", "coordinates": [-3.70379, 40.416775]}},
77 | {"name": "Nairobi", "location": {"type": "Point", "coordinates": [36.821946, -1.292066]}},
78 | {"name": "Toronto", "location": {"type": "Point", "coordinates": [-79.383186, 43.653225]}},
79 | {"name": "Fortaleza", "location": {"type": "Point", "coordinates": [-38.526669, -3.731862]}},
80 | {"name": "Tehran", "location": {"type": "Point", "coordinates": [51.388973, 35.6895]}},
81 | {"name": "Brasília", "location": {"type": "Point", "coordinates": [-47.882166, -15.794229]}},
82 | {"name": "Bogotá", "location": {"type": "Point", "coordinates": [-74.072092, 4.710989]}},
83 | ]
84 |
85 | print(f"Inserting {len(data)} rows into the table")
86 | client.tabular.write(resource_dto=dataset, data=data)
87 | print("Data inserted and partitioned")
88 |
89 | print("Querying for cities in Europe")
90 | europe_list = client.tabular.select_as_list(
91 | resource_dto=dataset,
92 | filter_query={
93 | "#ST_WITHIN": [
94 | "$location", # <- Name of column to perform geographic query against.
95 | {
96 | "type": "Polygon", # This is a rough polygon encompassing Europe.
97 | "coordinates": [
98 | [
99 | [37.02028908997249, 70.9411520317463],
100 | [-24.834125592956013, 70.9411520317463],
101 | [-24.834125592956013, 35.753296916825306],
102 | [37.02028908997249, 35.753296916825306],
103 | [37.02028908997249, 70.9411520317463],
104 | ]
105 | ],
106 | },
107 | ]
108 | },
109 | )
110 |
111 | print("Cities in Europe:")
112 | for city in europe_list:
113 | print(city.get("name"))
114 |
115 | # Clean up
116 | print("Cleaning up")
117 |
118 | client.tabular.delete_schema(dataset)
119 | client.catalog.delete(dataset)
120 |
121 | print("Done")
122 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | description = "ODP Python SDK project"
3 | authors = ["Thomas Li Fredriksen "]
4 | license = "MIT"
5 | readme = "README.md"
6 | packages = []
7 | package-mode = false
8 |
9 | [tool.poetry.dependencies]
10 | python = "^3.9"
11 | odp-dto = { path = "./src/dto", develop = true }
12 | odp-sdk = { path = "./src/sdk", develop = true }
13 | jupyter = "^1.0.0"
14 | pyarrow = "^18.1.0"
15 |
16 | [tool.poetry.group.dev.dependencies]
17 | python-dotenv = "^1.0.1"
18 | pytest = "^7.4.3"
19 | coverage = "^7.3.2"
20 | flake8-pyproject = "^1.2.2"
21 | responses = "^0.23.1"
22 | pandas = "^2.1.4"
23 |
24 | [tool.poetry.group.helper-scripts]
25 | optional = true
26 |
27 | [tool.poetry.group.helper-scripts.dependencies]
28 | typer = "^0.12.3"
29 | tomli = "^2.0.1"
30 | tomli-w = "^1.0.0"
31 |
32 | [build-system]
33 | requires = ["poetry-core"]
34 | build-backend = "poetry.core.masonry.api"
35 |
36 |
37 | [tool.pytest.ini_options]
38 | log_cli = "true"
39 | asyncio_mode = "auto"
40 | log_level = "INFO"
41 | log_format = "%(asctime)s %(levelname)s %(message)s [%(filename)s:%(lineno)d]"
42 | log_date_format = "%Y-%m-%d %H:%M:%S"
43 | testpaths = [
44 | "src/sdk/tests",
45 | "src/dto/tests",
46 | ]
47 |
48 | [tool.flake8]
49 | ignore = ["E203", "E731", "W503"]
50 | per-file-ignores = ["*/__init__.py:F401", "tests/*:F841", "*/tests/*:F841"]
51 | max-line-length = 120
52 | count = true
53 |
54 | [tool.isort]
55 | line_length=120 # corresponds to -w flag
56 | multi_line_output=3 # corresponds to -m flag
57 | include_trailing_comma=true # corresponds to -tc flag
58 | profile="black"
59 | known_local_folder="src,tests"
60 |
61 | [tool.poetry.extras]
62 | pandas = ["pandas"]
63 |
--------------------------------------------------------------------------------
/scripts/migrate_local_deps.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import Annotated, Dict, List, Optional, Union
3 |
4 | import tomli
5 | import tomli_w
6 | import typer
7 |
8 | app = typer.Typer()
9 |
10 |
11 | def _update_local_version(
12 | dep: Dict[str, Union[str, Dict[str, str]]], version_overrides: Dict[str, str], default_version_tag: str
13 | ) -> Dict[str, Union[str, Dict[str, str]]]:
14 | ret = {}
15 |
16 | for dep_name, dep_info in dep.items():
17 | if isinstance(dep_info, str):
18 | ret[dep_name] = dep_info
19 | continue
20 |
21 | dep_info = dep_info.copy()
22 | ret[dep_name] = dep_info
23 |
24 | pth = dep_info.get("path")
25 | if pth is None:
26 | continue
27 |
28 | dep_info.pop("path")
29 | dep_info.pop("develop", None)
30 |
31 | dep_info["version"] = version_overrides.get(pth, default_version_tag)
32 |
33 | return ret
34 |
35 |
36 | @app.command()
37 | def migrate(
38 | default_version_tag: Annotated[str, typer.Argument(help="Default version tag to use for local dependencies")],
39 | src_file: Annotated[Path, typer.Argument(help="Path to the file to migrate")],
40 | dest_file: Annotated[
41 | Optional[Path], typer.Argument(help="Path to the destination file, defaults to the source file if not set")
42 | ] = None, # noqa: E501
43 | dry_run: Annotated[bool, typer.Option(help="Run in dry-run mode")] = False,
44 | overwrite: Annotated[bool, typer.Option(help="Overwrite the destination file if it exists")] = False,
45 | version_tag: Annotated[List[str], typer.Option(help="Version tags to use for local dependencies")] = [],
46 | ):
47 | try:
48 | version_overrides = {k: v for k, v in (x.split("=") for x in version_tag)}
49 | except ValueError as e:
50 | typer.echo(f"Invalid version tag: {e}")
51 | raise typer.Exit(code=1)
52 |
53 | print(f"Version overrides: {version_overrides}")
54 |
55 | if not src_file.suffix and src_file.is_dir():
56 | typer.echo("Directory detected, looking for pyproject.toml")
57 | src_file /= "pyproject.toml"
58 | elif src_file.suffix != ".toml":
59 | typer.echo("Only TOML files are supported: {}".format(src_file))
60 | raise typer.Exit(code=1)
61 |
62 | if not dest_file and not dry_run:
63 | if not overwrite:
64 | typer.echo(
65 | "Destination file not set. Using source file as destination but overwrite-flag is not set. Please set the destination file or use the --overwrite flag." # noqa: E501
66 | )
67 | typer.Exit(code=1)
68 |
69 | typer.echo("Destination file not set. Using source file as destination.")
70 | dest_file = src_file
71 |
72 | if dest_file and not dest_file.suffix and dest_file.is_dir():
73 | dest_file /= "pyproject.toml"
74 | elif dest_file and dest_file.suffix != ".toml":
75 | typer.echo("Only TOML files are supported")
76 | raise typer.Exit(code=1)
77 |
78 | if not src_file.exists():
79 | typer.echo(f"Source file {src_file} does not exist")
80 | raise typer.Exit(code=1)
81 |
82 | if dest_file and dest_file.exists() and not overwrite:
83 | typer.echo(f"Destination file {dest_file} exists and overwrite flag is not set")
84 | raise typer.Exit(code=1)
85 |
86 | with src_file.open("rb") as f:
87 | data = tomli.load(f)
88 |
89 | try:
90 | poetry_base = data["tool"]["poetry"]
91 | except KeyError:
92 | typer.echo("No poetry section found in the source file")
93 | raise typer.Exit(code=1)
94 |
95 | for key in ["dependencies", "dev-dependencies", "optional-dependencies"]:
96 | if key in poetry_base:
97 | poetry_base[key] = _update_local_version(poetry_base[key], version_overrides, default_version_tag)
98 |
99 | for group in poetry_base.get("group", []):
100 | try:
101 | poetry_base[group]["dependencies"] = _update_local_version(
102 | poetry_base[group]["dependencies"], version_overrides, default_version_tag
103 | )
104 | except KeyError:
105 | pass
106 |
107 | if dry_run:
108 | typer.echo("Dry-run mode, not writing to file")
109 | typer.echo(tomli_w.dumps(data))
110 | raise typer.Exit(code=0)
111 |
112 | with dest_file.open("wb+") as f:
113 | tomli_w.dump(data, f)
114 |
115 |
116 | if __name__ == "__main__":
117 | app()
118 |
--------------------------------------------------------------------------------
/src/dto/README.md:
--------------------------------------------------------------------------------
1 | # ODP Data Transfer Objects (DTOs)
2 |
3 | ## Documentation
4 |
5 | https://docs.hubocean.earth
6 |
7 | ## Installation
8 |
9 | ```shell
10 | pip install odp-dto
11 | ```
--------------------------------------------------------------------------------
/src/dto/odp/dto/__init__.py:
--------------------------------------------------------------------------------
1 | from .catalog import * # noqa: F401, F403
2 | from .metadata import Metadata
3 | from .resource import GenericResourceDto, ResourceDto, ResourceSpecABC, ResourceSpecT, get_resource_spec_type
4 | from .resource_registry import * # noqa: F401, F403
5 | from .resource_registry import DEFAULT_RESOURCE_REGISTRY, ResourceRegistry, ResourceRegistryEntry, kind
6 | from .resource_status import ResourceStatus
7 |
--------------------------------------------------------------------------------
/src/dto/odp/dto/catalog/__init__.py:
--------------------------------------------------------------------------------
1 | from odp.dto.resource import ResourceDto
2 |
3 | from .data_collection import DataCollectionSpec, Distribution
4 | from .dataset import Attribute, Citation, DatasetSpec
5 | from .observable import ObservableSpec
6 |
7 | # Convenience type aliases
8 | DataCollectionDto = ResourceDto[DataCollectionSpec]
9 | DatasetDto = ResourceDto[DatasetSpec]
10 | ObservableDto = ResourceDto[ObservableSpec]
11 |
12 | del ResourceDto
13 |
--------------------------------------------------------------------------------
/src/dto/odp/dto/catalog/_rg.py:
--------------------------------------------------------------------------------
1 | """Resource group of the data catalog"""
2 |
3 | CATALOG_RESOURCE_GROUP = "catalog.hubocean.io"
4 |
--------------------------------------------------------------------------------
/src/dto/odp/dto/catalog/data_collection.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 | from typing import Any, Optional
3 |
4 | from pydantic import BaseModel
5 |
6 | from ..common.contact_info import ContactInfo
7 | from ..common.license import License
8 | from ..resource import ResourceSpecABC
9 | from ..resource_registry import kind
10 | from ._rg import CATALOG_RESOURCE_GROUP
11 |
12 |
13 | class Distribution(BaseModel):
14 | """Distribution information"""
15 |
16 | published_by: ContactInfo
17 | """Publisher information"""
18 |
19 | published_date: datetime
20 | """Date of first published"""
21 |
22 | website: str
23 | """Distribution website"""
24 |
25 | license: Optional[License] = None
26 | """Dataset license information"""
27 |
28 |
29 | @kind(CATALOG_RESOURCE_GROUP, "dataCollection", "v1alpha1")
30 | class DataCollectionSpec(ResourceSpecABC):
31 | """Data collection specification model"""
32 |
33 | distribution: Optional[Distribution] = None
34 | """Information on how the dataset was distributed"""
35 |
36 | tags: set[str]
37 | """Tags for the dataset"""
38 |
39 | facets: Optional[dict[str, Any]] = None
40 | """Facets for the dataset"""
41 |
--------------------------------------------------------------------------------
/src/dto/odp/dto/catalog/dataset.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, List, Optional, Set
2 |
3 | from pydantic import BaseModel, Field
4 |
5 | from ..common.contact_info import ContactInfo
6 | from ..resource import ResourceSpecABC
7 | from ..resource_registry import kind
8 | from ._rg import CATALOG_RESOURCE_GROUP
9 |
10 |
11 | class Citation(BaseModel):
12 | """Citation information"""
13 |
14 | cite_as: Optional[str] = None
15 | """Directions on how to cite the dataset"""
16 |
17 | doi: Optional[str] = None
18 |
19 |
20 | class Attribute(BaseModel):
21 | """Dataset attribute"""
22 |
23 | name: str
24 | """Attribute name. This can be a column name in a table, a dimension in an array, etc."""
25 |
26 | description: Optional[str] = None
27 | """Attribute description"""
28 |
29 | traits: list[str]
30 | """List of traits. Traits are used to describe the attribute in more detail.
31 |
32 | Traits are based on Microsoft Common Data Model (CDM) traits. See the [CDM documentation]
33 | (https://learn.microsoft.com/en-us/common-data-model/sdk/trait-concepts-and-use-cases#what-are-traits)
34 | for more information.
35 | """
36 |
37 |
38 | @kind(CATALOG_RESOURCE_GROUP, "dataset", "v1alpha3")
39 | class DatasetSpec(ResourceSpecABC):
40 | """Dataset specification model"""
41 |
42 | storage_class: str
43 | """Storage class qualified name"""
44 |
45 | storage_controller: Optional[str] = None
46 | """Storage controller qualified name"""
47 |
48 | data_collection: Optional[str] = None
49 | """Data collection qualified name"""
50 |
51 | maintainer: ContactInfo
52 | """Active maintainer information"""
53 |
54 | citation: Optional[Citation] = None
55 | """Citation information"""
56 |
57 | documentation: List[str] = Field(default_factory=list)
58 | """Links to any relevant documentation for the dataset"""
59 |
60 | facets: Optional[Dict[str, Any]] = None
61 | """Facets for the dataset"""
62 |
63 | tags: Set[str] = Field(default_factory=set)
64 | """Tags for the dataset"""
65 |
66 | attributes: List[Attribute] = Field(default_factory=list)
67 | """Dataset attributes"""
68 |
--------------------------------------------------------------------------------
/src/dto/odp/dto/catalog/observable.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict
2 |
3 | from ..resource import ResourceSpecABC
4 | from ..resource_registry import kind
5 | from ._rg import CATALOG_RESOURCE_GROUP
6 |
7 |
8 | @kind(CATALOG_RESOURCE_GROUP, "observable", "v1alpha2")
9 | class ObservableSpec(ResourceSpecABC):
10 | observable_class: str
11 | """Observable class"""
12 |
13 | ref: str
14 | """Qualified name of the associated dataset or data collection"""
15 |
16 | details: Dict[str, Any]
17 | """Full observable object"""
18 |
--------------------------------------------------------------------------------
/src/dto/odp/dto/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/src/dto/odp/dto/common/__init__.py
--------------------------------------------------------------------------------
/src/dto/odp/dto/common/contact_info.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | from pydantic import BaseModel
4 |
5 |
6 | class ContactInfo(BaseModel):
7 | """Contact information for a user"""
8 |
9 | contact: str
10 | """Contact in the form `Firstname Lastname `"""
11 |
12 | organisation: Optional[str] = None
13 | """Organisation name"""
14 |
--------------------------------------------------------------------------------
/src/dto/odp/dto/common/license.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | from pydantic import BaseModel
4 |
5 |
6 | class License(BaseModel):
7 | """Data license information"""
8 |
9 | name: str
10 | """License name. Can be set to `Proprietary` for proprietary licenses"""
11 |
12 | href: Optional[str] = None
13 | """HREF to license text"""
14 |
15 | full_text: Optional[str] = None
16 | """Full license text"""
17 |
--------------------------------------------------------------------------------
/src/dto/odp/dto/metadata.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Optional
2 | from uuid import UUID
3 |
4 | from pydantic import BaseModel, Field
5 |
6 |
7 | class Metadata(BaseModel):
8 | """Resource manifest metadata"""
9 |
10 | name: str
11 | """Resource name. Must consist of alphanumeric characters, dashes or underscores and must start
12 | with an alphanumeric character"""
13 |
14 | display_name: Optional[str] = None
15 | """Human-friendly name"""
16 |
17 | description: Optional[str] = None
18 | """Resource description"""
19 |
20 | uuid: Optional[UUID] = None
21 | """System-assigned unique identifier"""
22 |
23 | labels: Dict = Field(default_factory=dict)
24 | """Resource labels"""
25 |
26 | owner: Optional[UUID] = None
27 | """Owner of the resource"""
28 |
--------------------------------------------------------------------------------
/src/dto/odp/dto/registry/__init__.py:
--------------------------------------------------------------------------------
1 | from .observable_class import ObservableClassSpec
2 |
--------------------------------------------------------------------------------
/src/dto/odp/dto/registry/_rg.py:
--------------------------------------------------------------------------------
1 | """Resource group of the ODP registry"""
2 |
3 | REGISTRY_RESOURCE_GROUP = "registry.hubocean.io"
4 |
--------------------------------------------------------------------------------
/src/dto/odp/dto/registry/observable_class.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict
2 |
3 | from ..resource import ResourceSpecABC
4 |
5 |
6 | class ObservableClassSpec(ResourceSpecABC):
7 | """Observable class specification model"""
8 |
9 | observable_schema: Dict[str, Any]
10 | """JSON schema for the observable class"""
11 |
--------------------------------------------------------------------------------
/src/dto/odp/dto/resource.py:
--------------------------------------------------------------------------------
1 | from abc import ABC
2 | from typing import Annotated, ClassVar, Generic, Optional, Type, TypeVar, Union, cast
3 | from uuid import UUID
4 |
5 | from pydantic import BaseModel
6 | from pydantic.functional_validators import AfterValidator
7 |
8 | from .metadata import Metadata
9 | from .resource_status import ResourceStatus
10 | from .validators import validate_resource_kind, validate_resource_version
11 |
12 |
13 | class ResourceSpecABC(BaseModel, ABC):
14 | """ResourceSpecABC is an abstract base class for resource specification."""
15 |
16 | __kind__: ClassVar[str]
17 | __manifest_version__: ClassVar[str]
18 |
19 |
20 | ResourceSpecT = Union[dict, ResourceSpecABC]
21 |
22 | T = TypeVar("T", bound=ResourceSpecT)
23 |
24 |
25 | class ResourceDto(BaseModel, Generic[T]):
26 | """Resource Data Transmission Object (DTO) representing a resource manifest"""
27 |
28 | kind: Annotated[str, AfterValidator(validate_resource_kind)] = None
29 | """kind is the kind of the resource."""
30 |
31 | version: Annotated[str, AfterValidator(validate_resource_version)] = None
32 | """version is the version of the resource."""
33 |
34 | metadata: Metadata
35 | """metadata is the metadata of the resource."""
36 |
37 | status: Optional[ResourceStatus] = None
38 | """status is the status of the resource."""
39 |
40 | spec: T
41 |
42 | def __init__(self, **data):
43 | spec = data.pop("spec")
44 |
45 | if hasattr(spec, "__kind__") and "kind" not in data:
46 | data["kind"] = spec.__kind__
47 | if hasattr(spec, "__manifest_version__") and "version" not in data:
48 | data["version"] = spec.__manifest_version__
49 |
50 | super().__init__(**data, spec=spec)
51 |
52 | @classmethod
53 | def is_generic(cls) -> bool:
54 | return isinstance(get_resource_spec_type(cls), dict)
55 |
56 | @property
57 | def qualified_name(self) -> str:
58 | return self.get_qualified_name()
59 |
60 | @property
61 | def uuid(self) -> UUID:
62 | return self.get_uuid()
63 |
64 | def get_qualified_name(self) -> str:
65 | """Get the resource qualified name
66 |
67 | The qualified name is the kind and resource name joined by a slash: `{kind}/{metadata.name}`
68 |
69 | Returns:
70 | Qualified name
71 | """
72 | return f"{self.kind}/{self.metadata.name}"
73 |
74 | def get_uuid(self) -> Optional[UUID]:
75 | """Get the resource UUID
76 |
77 | Returns:
78 | Resource UUID if it is set, `None` otherwise
79 | """
80 | return self.metadata.uuid
81 |
82 | def get_ref(self) -> Union[UUID, str]:
83 | """Get a valid reference to the resource
84 |
85 | Returns:
86 | The resource UUID if it is set, the qualified name otherwise
87 | """
88 | return self.get_uuid() or self.get_qualified_name()
89 |
90 |
91 | def get_resource_spec_type(cls: Union[Type[ResourceDto[T]], ResourceDto[T]]) -> Type[T]:
92 | """Get the resource spec type
93 |
94 | Args:
95 | cls: ResourceDto class or instance
96 |
97 | Returns:
98 | The resource spec type
99 | """
100 | if isinstance(cls, type) and issubclass(cls, ResourceDto):
101 | tp = cls.model_fields["spec"].annotation
102 | else:
103 | tp = type(cls.spec)
104 | return cast(Type[T], tp)
105 |
106 |
107 | GenericResourceDto = ResourceDto[dict]
108 |
--------------------------------------------------------------------------------
/src/dto/odp/dto/resource_registry.py:
--------------------------------------------------------------------------------
1 | from typing import Annotated, Callable, Dict, Optional, Tuple, Type, TypeVar, cast
2 |
3 | from pydantic import BaseModel, Field
4 | from pydantic.functional_validators import BeforeValidator
5 |
6 | from .resource import Metadata, ResourceDto, ResourceSpecABC, ResourceSpecT, ResourceStatus, get_resource_spec_type
7 | from .validators import validate_resource_kind, validate_resource_version
8 |
9 | T = TypeVar("T", bound=ResourceSpecT)
10 |
11 |
12 | class ResourceRegistryEntry(BaseModel):
13 | """ResourceRegistryEntry is a registry entry for a resource."""
14 |
15 | resource_kind: Annotated[str, BeforeValidator(validate_resource_kind)]
16 | """resource_type is the kind of the resource."""
17 |
18 | resource_version: Annotated[str, BeforeValidator(validate_resource_version)]
19 | """resource_version is the version of the resource. in the form v(alpha|beta)"""
20 |
21 | def __hash__(self):
22 | return hash((self.resource_kind, self.resource_version))
23 |
24 |
25 | class ResourceRegistry(BaseModel):
26 | """Registry used to register and lookup resource definitions."""
27 |
28 | entries: Dict[ResourceRegistryEntry, Type[ResourceSpecABC]] = Field(default_factory=dict)
29 | """entries is a list of resource registry entries."""
30 |
31 | def add(self, entry: ResourceRegistryEntry, spec: Type[ResourceSpecABC]) -> None:
32 | """add adds a resource to the registry."""
33 | if entry in self.entries:
34 | raise ValueError(f"resource {entry.resource_kind} ({entry.resource_version}) already exists")
35 | self.entries[entry] = spec
36 |
37 | def get_resource_cls(self, kind: str, version: str) -> Type[ResourceSpecABC]:
38 | """Returns the resource spec class for the given kind and version.
39 |
40 | Args:
41 | kind: kind is the kind of the resource.
42 | version: version is the version of the resource.
43 |
44 | Returns:
45 | Type[ResourceSpecABC]: the resource spec class.
46 | """
47 | entry = ResourceRegistryEntry(resource_kind=kind, resource_version=version)
48 | try:
49 | return self.entries[entry]
50 | except KeyError as e:
51 | raise KeyError(f"resource {kind} ({version}) not found") from e
52 |
53 | def factory(self, kind: str, version: str, data: dict) -> ResourceSpecABC:
54 | """factory creates a resource spec object for the given kind and version.
55 |
56 | Args:
57 | kind: kind is the kind of the resource.
58 | version: version is the version of the resource.
59 | data: data is the resource data.
60 |
61 | Returns:
62 | ResourceSpecABC: the resource spec object.
63 | """
64 | cls = self.get_resource_cls(kind, version)
65 | return cls(**data)
66 |
67 | def factory_cast(self, t: Type[T], kind: str, version: str, data: dict, assert_type: bool = True) -> T:
68 | """Convenience method to create a resource spec object and cast it to the given type.
69 |
70 | Args:
71 | t: Type to cast to.
72 | kind: kind is the kind of the resource.
73 | version: version is the version of the resource.
74 | data: data is the resource data.
75 | assert_type: Whether to assert the type before returning
76 |
77 | Returns:
78 | T: the resource spec object.
79 | """
80 | ret = self.factory(kind, version, data)
81 | if assert_type and not isinstance(ret, t):
82 | raise ValueError(f"Expected type {t.__name__}, got {type(ret).__name__}")
83 | return cast(T, self.factory(kind, version, data))
84 |
85 | def _resource_factory_prototype(self, manifest: dict) -> Tuple[str, str, Metadata, Optional[ResourceStatus], dict]:
86 | try:
87 | kind = manifest["kind"]
88 | version = manifest["version"]
89 | metadata = manifest["metadata"]
90 | status = manifest.get("status")
91 | spec = manifest["spec"]
92 | except KeyError as e:
93 | raise ValueError("Invalid resource manifest") from e
94 |
95 | return (kind, version, Metadata.parse_obj(metadata), ResourceStatus.parse_obj(status) if status else None, spec)
96 |
97 | def resource_factory(self, manifest: dict, raise_unknown: bool = True) -> ResourceDto:
98 | """Convert a manifest to a ResourceDto object.
99 |
100 | Args:
101 | manifest: Resource manifest.
102 | raise_unknown: Whether to raise an exception if the resource kind is unknown.
103 |
104 | Returns:
105 | Parsed ResourceDto object.
106 | """
107 | kind, version, metadata, status, spec_dict = self._resource_factory_prototype(manifest)
108 |
109 | try:
110 | spec = self.factory(kind, version, spec_dict)
111 | except KeyError:
112 | if raise_unknown:
113 | raise
114 | spec = spec_dict
115 |
116 | return ResourceDto(kind=kind, version=version, metadata=Metadata.parse_obj(metadata), status=status, spec=spec)
117 |
118 | def resource_factory_cast(
119 | self, t: Type[ResourceDto[T]], manifest: dict, raise_unknown: bool = True, assert_type: bool = True
120 | ) -> ResourceDto[T]:
121 | """Convenience method to create a ResourceDto object and cast it to the given type.
122 |
123 | Args:
124 | t: Type to cast to.
125 | manifest: manifest is the resource data.
126 | raise_unknown: Whether to raise an exception if the resource kind is unknown.
127 | assert_type: Whether to assert the type before returning
128 | """
129 | kind, version, metadata, status, spec_dict = self._resource_factory_prototype(manifest)
130 |
131 | spec_tp = get_resource_spec_type(t)
132 | try:
133 | spec = self.factory_cast(spec_tp, kind, version, spec_dict)
134 | except KeyError:
135 | if raise_unknown:
136 | raise
137 | elif issubclass(spec_tp, ResourceSpecABC):
138 | spec = spec_tp.parse_obj(spec_dict)
139 | else:
140 | spec = spec_dict
141 |
142 | ret = ResourceDto(kind=kind, version=version, metadata=metadata, status=status, spec=spec)
143 | return cast(ResourceDto[T], ret)
144 |
145 |
146 | DEFAULT_RESOURCE_REGISTRY = ResourceRegistry()
147 | """Globally default resource registry."""
148 |
149 |
150 | def kind(
151 | resource_group: str,
152 | resource_type: str,
153 | resource_version: str,
154 | registry: ResourceRegistry = DEFAULT_RESOURCE_REGISTRY,
155 | ) -> Callable[[Type[ResourceSpecABC]], Type[ResourceSpecABC]]:
156 | """kind is a decorator for resource specification classes to register them in the resource registry.
157 |
158 | Args:
159 | resource_group: resource_group is the group of the resource.
160 | resource_type: resource_type is the kind of the resource.
161 | resource_version: resource_version is the version of the resource. in the form v(alpha|beta)
162 | registry: registry is the resource registry to register the resource in.
163 |
164 | Returns:
165 | Callable[[Type[ResourceSpecABC]], Type[ResourceSpecABC]]: a decorator function.
166 | """
167 |
168 | def inner(cls: Type[ResourceSpecABC]) -> Type[ResourceSpecABC]:
169 | kind = f"{resource_group}/{resource_type}"
170 |
171 | cls.__kind__ = kind
172 | cls.__manifest_version__ = resource_version
173 |
174 | registry.add(
175 | ResourceRegistryEntry(resource_kind=kind, resource_version=resource_version),
176 | cls,
177 | )
178 |
179 | return cls
180 |
181 | return inner
182 |
--------------------------------------------------------------------------------
/src/dto/odp/dto/resource_status.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 | from typing import Optional
3 | from uuid import UUID
4 |
5 | from pydantic import BaseModel, Field
6 |
7 |
8 | class ResourceStatus(BaseModel):
9 | """Resource status model"""
10 |
11 | num_updates: int = Field(default=0, ge=0)
12 | """Number of time the manifest has been updated"""
13 |
14 | created_time: datetime
15 | """Created timestamp"""
16 |
17 | created_by: UUID
18 | """UUID of user that created the resource"""
19 |
20 | updated_time: datetime
21 | """Last updated timestamp"""
22 |
23 | updated_by: UUID
24 | """UUID of user that updated the resource"""
25 |
26 | deleted_time: Optional[datetime] = None
27 | """Deleted timestamp - used for soft-delete"""
28 |
29 | deleted_by: Optional[UUID] = None
30 | """UUID of user that deleted the resource"""
31 |
--------------------------------------------------------------------------------
/src/dto/odp/dto/validators.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | RX_RESOURCE_NAME = re.compile(r"[a-zA-Z0-9][a-zA-Z0-9\-_\.]*")
4 | RX_RESOURCE_KIND = re.compile(r"^(?:[a-zA-Z0-9][a-zA-Z0-9\-_\.]*)\/(?:[a-zA-Z0-9][a-zA-Z0-9\-_\.]*)$")
5 | RX_RESOURCE_VERSION = re.compile(r"^v[0-9]+(?:(?:alpha|beta)[0-9]+)?$")
6 |
7 |
8 | def validate_resource_version(val: str) -> str:
9 | if not RX_RESOURCE_VERSION.match(val):
10 | raise ValueError(f"Invalid resource version: {val}")
11 | return val
12 |
13 |
14 | def validate_resource_kind(val: str) -> str:
15 | if not RX_RESOURCE_KIND.match(val):
16 | raise ValueError(f"Invalid resource kind: {val}")
17 | return val
18 |
19 |
20 | def validate_resource_name(val: str) -> str:
21 | if not RX_RESOURCE_NAME.match(val):
22 | raise ValueError(f"Invalid resource component: {val}")
23 | return val
24 |
--------------------------------------------------------------------------------
/src/dto/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "odp-dto"
3 | version = "0.4.10"
4 | description = "ODP Data Transfer Object"
5 | authors = ["Thomas Li Fredriksen "]
6 | license = "MIT"
7 | readme = "README.md"
8 | packages = [
9 | {include="odp"},
10 | ]
11 |
12 | [tool.poetry.dependencies]
13 | python = "^3.9"
14 | pydantic = "^2.4.2"
15 |
16 | [build-system]
17 | requires = ["poetry-core>=1.0.0"]
18 | build-backend = "poetry.core.masonry.api"
--------------------------------------------------------------------------------
/src/dto/tests/test_dto/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/src/dto/tests/test_dto/__init__.py
--------------------------------------------------------------------------------
/src/dto/tests/test_dto/conftest.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from odp.dto import ResourceRegistry, ResourceRegistryEntry
3 |
4 | from .utils import MockSpec, SimpleSpec
5 |
6 |
7 | @pytest.fixture
8 | def empty_resource_registry() -> ResourceRegistry:
9 | return ResourceRegistry()
10 |
11 |
12 | @pytest.fixture
13 | def resource_registry(empty_resource_registry: ResourceRegistry) -> ResourceRegistry:
14 | empty_resource_registry.add(
15 | ResourceRegistryEntry(
16 | resource_kind="test.hubocean.io/mock",
17 | resource_version="v1alpha1",
18 | ),
19 | MockSpec,
20 | )
21 |
22 | empty_resource_registry.add(
23 | ResourceRegistryEntry(
24 | resource_kind="test.hubocean.io/simple",
25 | resource_version="v1alpha1",
26 | ),
27 | SimpleSpec,
28 | )
29 |
30 | return empty_resource_registry
31 |
--------------------------------------------------------------------------------
/src/dto/tests/test_dto/test_dto_base.py:
--------------------------------------------------------------------------------
1 | from odp.dto import Metadata, ResourceDto
2 |
3 | from .utils import TESTS_RESOURCE_REGISTRY, MockSpec, SimpleSpec
4 |
5 | MockDto = ResourceDto[MockSpec]
6 |
7 |
8 | def test_default_test_resource_registry():
9 | cls = TESTS_RESOURCE_REGISTRY.get_resource_cls("test.hubocean.io/mock", "v1alpha1")
10 | assert cls == MockSpec
11 |
12 | cls = TESTS_RESOURCE_REGISTRY.get_resource_cls("test.hubocean.io/simple", "v1alpha1")
13 | assert cls == SimpleSpec
14 |
15 |
16 | def test_dunders():
17 | assert MockSpec.__kind__ == "test.hubocean.io/mock"
18 | assert MockSpec.__manifest_version__ == "v1alpha1"
19 |
20 | assert SimpleSpec.__kind__ == "test.hubocean.io/simple"
21 | assert SimpleSpec.__manifest_version__ == "v1alpha1"
22 |
23 |
24 | def test_init_use_registered_kind_and_version():
25 | # Users should not need to provide the kind and version for a registered resource kind
26 | s = MockDto(metadata=Metadata(name="foo"), spec=MockSpec())
27 |
28 | assert s.metadata.name == "foo"
29 | assert isinstance(s.spec, MockSpec)
30 |
--------------------------------------------------------------------------------
/src/dto/tests/test_dto/test_validators.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | import pytest
4 | from odp.dto.validators import validate_resource_kind, validate_resource_version
5 |
6 |
7 | @pytest.mark.parametrize(
8 | "test_value,expected",
9 | [
10 | ("hubocean.io/testGroup", True),
11 | ("catalog.hubocean.io/testGroup", True),
12 | ("function.domain.com/testGroup", True),
13 | ("hubocean.io/testGroup/testProject", False),
14 | ("foobar", False),
15 | ],
16 | )
17 | def test_validate_kind(test_value: Optional[str], expected: bool):
18 | if expected:
19 | assert test_value == validate_resource_kind(test_value)
20 | else:
21 | with pytest.raises(ValueError):
22 | validate_resource_kind(test_value)
23 |
24 |
25 | @pytest.mark.parametrize(
26 | "test_value,expected",
27 | [
28 | ("v1alpha1", True),
29 | ("v1beta1", True),
30 | ("v2", True),
31 | ("v3alpha2", True),
32 | ("v1", True),
33 | ("v1alpha", False),
34 | ("v1beta", False),
35 | ("v1alpha1beta1", False),
36 | ("foobar", False),
37 | ("v100", True),
38 | ("v99999999", True),
39 | ("v1545325alpha6546464564", True),
40 | ],
41 | )
42 | def test_validate_resource_version(test_value: Optional[str], expected: bool):
43 | if expected:
44 | assert test_value == validate_resource_version(test_value)
45 | else:
46 | with pytest.raises(ValueError):
47 | validate_resource_version(test_value)
48 |
--------------------------------------------------------------------------------
/src/dto/tests/test_dto/utils.py:
--------------------------------------------------------------------------------
1 | from typing import Annotated
2 |
3 | from odp.dto import ResourceRegistry, ResourceSpecABC, kind
4 | from pydantic import Field
5 | from pydantic.functional_validators import BeforeValidator
6 |
7 | TESTS_RESOURCE_REGISTRY = ResourceRegistry()
8 |
9 |
10 | def _validate_starts_with(s: str, p: str) -> str:
11 | if not s.startswith(p):
12 | raise ValueError(f"string does not start with {p}")
13 | return s
14 |
15 |
16 | @kind("test.hubocean.io", "mock", "v1alpha1", TESTS_RESOURCE_REGISTRY)
17 | class MockSpec(ResourceSpecABC):
18 | pass
19 |
20 |
21 | @kind("test.hubocean.io", "simple", "v1alpha1", TESTS_RESOURCE_REGISTRY)
22 | class SimpleSpec(ResourceSpecABC):
23 | some_str: str
24 | some_int: int = Field(..., ge=1)
25 |
26 |
27 | class UnregisteredSpec(ResourceSpecABC):
28 | some_float: float
29 | some_validated_str: Annotated[str, BeforeValidator(lambda s: _validate_starts_with(s, "foo"))]
30 |
--------------------------------------------------------------------------------
/src/sdk/README.md:
--------------------------------------------------------------------------------
1 | # ODP Python SDK
2 |
3 | Connect to the Ocean Data Platform with Python through the Python SDK. Download queried ocean data easily and efficiently into data frames, for easy exploring and further processing in your data science project.
4 |
5 | ## Documentation
6 |
7 | https://docs.hubocea.earth
8 |
9 | ## Installation
10 |
11 | ```shell
12 | pip install odp-sdk
13 | ```
--------------------------------------------------------------------------------
/src/sdk/odp/client/__init__.py:
--------------------------------------------------------------------------------
1 | from .client import OdpClient
2 |
--------------------------------------------------------------------------------
/src/sdk/odp/client/client.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 | from uuid import UUID
3 |
4 | from pydantic import BaseModel, Field, PrivateAttr
5 |
6 | from ..dto import DatasetDto
7 | from .auth import TokenProvider, get_default_token_provider
8 | from .http_client import OdpHttpClient
9 | from .raw_storage_client import OdpRawStorageClient
10 | from .resource_client import OdpResourceClient
11 | from .tabular_storage_client import OdpTabularStorageClient
12 | from .tabular_storage_v2_client import ClientAuthorization
13 | from .tabular_v2.client import TableHandler
14 |
15 |
16 | class OdpClient(BaseModel):
17 | """Client for the ODP API"""
18 |
19 | base_url: str = "https://api.hubocean.earth"
20 | token_provider: TokenProvider = Field(default_factory=get_default_token_provider)
21 |
22 | _http_client: OdpHttpClient = PrivateAttr()
23 | _catalog_client: OdpResourceClient = PrivateAttr()
24 | _raw_storage_client: OdpRawStorageClient = PrivateAttr()
25 | _tabular_storage_client: OdpTabularStorageClient = PrivateAttr()
26 |
27 | def __init__(self, **data):
28 | super().__init__(**data)
29 |
30 | self._http_client = OdpHttpClient(base_url=self.base_url, token_provider=self.token_provider)
31 | self._catalog_client = OdpResourceClient(http_client=self._http_client, resource_endpoint="/catalog")
32 | self._raw_storage_client = OdpRawStorageClient(http_client=self._http_client)
33 | self._tabular_storage_client = OdpTabularStorageClient(http_client=self._http_client)
34 | self._tabular_storage_v2_client = ClientAuthorization(
35 | base_url=self.base_url, token_provider=self.token_provider
36 | )
37 |
38 | def personalize_name(self, name: str, fmt: Optional[str] = None) -> str:
39 | """Personalize a name by adding a postfix unique to the user
40 |
41 | Args:
42 | name: The name to personalize
43 | fmt: Used to override the default format string. Should be a python format-string with placeholders
44 | for the variables `uid` and `name`. For example: `"{uid}-{name}"`
45 |
46 | Returns:
47 | The personalized name
48 | """
49 | fmt = fmt or "{name}-{uid}"
50 | uid = self.token_provider.get_user_id()
51 |
52 | # Attempt to simplify the UID by only using the node part of the UUID
53 | try:
54 | uid = UUID(uid).node
55 | except ValueError:
56 | # User ID is not a valid UUID, use it as-is
57 | pass
58 |
59 | return fmt.format(uid=uid, name=name)
60 |
61 | @property
62 | def resource_store(self):
63 | # TODO: Implement resource store
64 | raise NotImplementedError("Resource store not implemented")
65 |
66 | @property
67 | def catalog(self) -> OdpResourceClient:
68 | return self._catalog_client
69 |
70 | @property
71 | def iam(self):
72 | # TODO: Implement IAM controller
73 | raise NotImplementedError("IAM not implemented")
74 |
75 | @property
76 | def registry(self):
77 | # TODO: Implement registry/core controller
78 | raise NotImplementedError("Registry not implemented")
79 |
80 | @property
81 | def raw(self) -> OdpRawStorageClient:
82 | return self._raw_storage_client
83 |
84 | @property
85 | def tabular(self) -> OdpTabularStorageClient:
86 | return self._tabular_storage_client
87 |
88 | def table_v2(self, dataset_dto: DatasetDto) -> TableHandler:
89 | return self._tabular_storage_v2_client.table(str(dataset_dto.uuid))
90 |
--------------------------------------------------------------------------------
/src/sdk/odp/client/dto/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/src/sdk/odp/client/dto/__init__.py
--------------------------------------------------------------------------------
/src/sdk/odp/client/dto/file_dto.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 | from typing import Any, Dict, Optional, Union
3 | from uuid import UUID
4 |
5 | from pydantic import BaseModel, Field, field_validator
6 |
7 |
8 | class FileMetadataDto(BaseModel):
9 | """File Metadata Model."""
10 |
11 | name: str
12 | mime_type: Optional[str] = None
13 | dataset: Optional[UUID] = None
14 | metadata: Dict[str, Union[bool, int, str]] = Field(default_factory=dict)
15 | geo_location: Optional[Any] = None
16 | size_bytes: Optional[int] = None
17 | checksum: Optional[str] = None
18 | created_time: Optional[datetime] = None
19 | modified_time: Optional[datetime] = None
20 | deleted_time: Optional[datetime] = None
21 |
22 | @field_validator("name")
23 | def lstrip_name(cls, v):
24 | if v.startswith("/"):
25 | raise ValueError("name cannot start with '/'. Absolute paths are not allowed.")
26 | return v
27 |
--------------------------------------------------------------------------------
/src/sdk/odp/client/dto/table_spec.py:
--------------------------------------------------------------------------------
1 | from typing import List, Literal, Optional
2 | from uuid import UUID
3 |
4 | from pydantic import BaseModel, model_validator
5 |
6 | from .tabular_store import TablePartitioningSpec
7 |
8 |
9 | class TableSpec(BaseModel):
10 | table_schema: dict
11 | partitioning: Optional[List[TablePartitioningSpec]] = None
12 |
13 |
14 | class StageDataPoints(BaseModel):
15 | """Model for update data point endpoint."""
16 |
17 | action: Literal["create", "commit"]
18 | stage_id: Optional[UUID]
19 |
20 | @model_validator(mode="before")
21 | def _validate_action(cls, values):
22 | if values.get("action") == "create" and values.get("stage_id"):
23 | raise ValueError("stage id cannot be issued with create action")
24 | elif values.get("action") == "commit" and not values.get("stage_id"):
25 | raise ValueError("stage id must be issued with commit action")
26 |
27 | return values
28 |
--------------------------------------------------------------------------------
/src/sdk/odp/client/dto/tabular_store.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime, timedelta
2 | from typing import List, Literal, Optional, Union
3 | from uuid import UUID, uuid4
4 |
5 | from pydantic import BaseModel
6 |
7 |
8 | class TablePartitioningSpec(BaseModel):
9 | columns: List[str]
10 | transformer_name: str
11 | args: Optional[List[Union[int, float, str]]] = None
12 |
13 | def serialize(self) -> bytes:
14 | return self.json().encode("utf-8")
15 |
16 |
17 | class TableStage(BaseModel):
18 | stage_id: UUID
19 | status: Literal["active", "commit", "commit-failed", "delete"]
20 | created_time: datetime
21 | expiry_time: datetime
22 | updated_time: Optional[datetime] = None
23 |
24 | error: Optional[str] = None
25 | error_info: Optional[dict] = None
26 |
27 | def serialize(self) -> bytes:
28 | return self.json(exclude_unset=True, exclude_none=True).encode("utf-8")
29 |
30 | @classmethod
31 | def generate(cls, expiry_time: timedelta) -> "TableStage":
32 | now = datetime.now()
33 |
34 | return cls(stage_id=uuid4(), status="active", created_time=now, expiry_time=now + expiry_time)
35 |
36 | def dict(self, **kwargs) -> "DictStrAny": # noqa: F821
37 | exclude_unset = kwargs.pop("exclude_unset", True)
38 | return super().dict(exclude_unset=exclude_unset, **kwargs)
39 |
--------------------------------------------------------------------------------
/src/sdk/odp/client/exc.py:
--------------------------------------------------------------------------------
1 | """This module contains the set of ODP SDK exceptions."""
2 |
3 |
4 | class OdpError(Exception):
5 | """Base class for exceptions in this module."""
6 |
7 |
8 | class OdpAuthError(OdpError):
9 | """Exception raised for authentication errors."""
10 |
11 |
12 | class OdpUnauthorizedError(OdpError):
13 | """Exception raised for unauthorized requests."""
14 |
15 |
16 | class OdpForbiddenError(OdpError):
17 | """Exception raised for forbidden requests."""
18 |
19 |
20 | class OdpTokenValidationError(OdpError):
21 | """Exception raised for invalid tokens."""
22 |
23 |
24 | class OdpResourceNotFoundError(OdpError):
25 | """Exception raised when a resource is not found."""
26 |
27 |
28 | class OdpResourceExistsError(OdpError):
29 | """Exception raised when a resource already exists."""
30 |
31 |
32 | class OdpValidationError(OdpError):
33 | """Exception raised when a resource is not found."""
34 |
35 |
36 | class OdpFileNotFoundError(OdpError):
37 | """Exception raised when a file is not found."""
38 |
39 |
40 | class OdpFileAlreadyExistsError(OdpError):
41 | """File already exists"""
42 |
43 |
44 | class OpenTableStageInvalidAction(OdpError):
45 | """Exception when table is getting deleted and it has active sessions."""
46 |
--------------------------------------------------------------------------------
/src/sdk/odp/client/tabular_storage_v2_client.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Optional, Union
2 |
3 | from odp.client.auth import TokenProvider
4 | from odp.client.tabular_v2.client import Client
5 |
6 |
7 | class ClientAuthorization(Client):
8 | def __init__(self, base_url, token_provider: TokenProvider):
9 | if base_url.endswith(":8888"):
10 | base_url = base_url.replace(":8888", ":31337")
11 | super().__init__(base_url)
12 | self.token_provider = token_provider
13 |
14 | def _request(
15 | self,
16 | path: str,
17 | data: Union[Dict, bytes, None] = None,
18 | params: Optional[Dict] = None,
19 | headers: Optional[Dict] = None,
20 | ) -> Client.Response:
21 | headers = headers or {}
22 | headers["Authorization"] = self.token_provider.get_token()
23 | return super()._request(path, data, params, headers)
24 |
--------------------------------------------------------------------------------
/src/sdk/odp/client/tabular_v2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/src/sdk/odp/client/tabular_v2/__init__.py
--------------------------------------------------------------------------------
/src/sdk/odp/client/tabular_v2/big/__init__.py:
--------------------------------------------------------------------------------
1 | from .big import BigCol, convert_schema_outward, inner_exp
2 | from .buffer import Buffer, convert_schema_inward
3 | from .local import LocalBigCol
4 | from .remote import RemoteBigCol
5 |
--------------------------------------------------------------------------------
/src/sdk/odp/client/tabular_v2/big/big.py:
--------------------------------------------------------------------------------
1 | import json
2 | import logging
3 | from abc import abstractmethod
4 | from typing import Iterable, Optional
5 |
6 | import pyarrow as pa
7 | from odp.client.tabular_v2.util.exp import BinOp, Field, Op, Parens, Scalar, UnaryOp
8 |
9 | SMALL_MAX = 256
10 | STR_LIMIT = 128 # when to start using a reference
11 | STR_MIN = 12 # what to keep as prefix in the reference
12 | MAX_BIGFILE_SIZE = 64 * 1024 * 1024 # max size of a big file
13 |
14 |
15 | def convert_schema_outward(schema: pa.Schema) -> pa.Schema:
16 | """drops the .ref fields"""
17 | out = []
18 | for name in schema.names:
19 | field: pa.Field = schema.field(name)
20 | if name.endswith(".ref") and name[:-4] in schema.names and field.type == pa.string():
21 | continue # skip
22 | out.append(field)
23 | return pa.schema(out)
24 |
25 |
26 | class BigCol:
27 | def __init__(self):
28 | pass
29 |
30 | @abstractmethod
31 | def fetch(self, md5: str) -> bytes:
32 | """fetch data, called often, should cache"""
33 | raise NotImplementedError()
34 |
35 | @abstractmethod
36 | def upload(self, md5: str, data: Iterable[bytes]):
37 | """upload data"""
38 | raise NotImplementedError()
39 |
40 | def decode(self, batch: pa.RecordBatch) -> pa.RecordBatch:
41 | cache = {} # FIXME: can this use too much memory?
42 | outer_schema = convert_schema_outward(batch.schema)
43 |
44 | refs = []
45 | for name in outer_schema.names:
46 | if name.endswith(".ref"):
47 | refs.append(name)
48 |
49 | if not refs:
50 | return batch.select(outer_schema.names)
51 |
52 | def decode_by_row(row):
53 | for name in refs:
54 | ref = row[name]
55 | if not ref:
56 | continue
57 |
58 | target = name[:-4]
59 | big_id, start, size = ref.split(":")
60 | start = int(start)
61 | size = int(size)
62 | if big_id in cache:
63 | data = cache[big_id]
64 | else:
65 | data = self.fetch(big_id)
66 | cache[big_id] = data
67 | if isinstance(row[name], str): # the field must contain the prefix, from which we infer the type
68 | row[target] = data[start : start + size].decode("utf-8")
69 | else:
70 | row[target] = data[start : start + size]
71 | return row
72 |
73 | df = batch.to_pandas()
74 | df = df.apply(decode_by_row, axis=1)
75 | return pa.RecordBatch.from_pandas(df, schema=outer_schema)
76 |
77 |
78 | def inner_exp(schema: pa.Schema, op: Optional[Op]) -> Optional[Op]:
79 | if op is None:
80 | return None
81 |
82 | fields = []
83 | for name in schema.names:
84 | field: pa.Field = schema.field(name)
85 | if field.type != pa.string() and field.type != pa.binary():
86 | continue
87 | if field.metadata and b"big" in field.metadata:
88 | fields.append(name)
89 |
90 | # TODO don't use the visitor, instead parse manually and use negation context
91 | def visitor(neg: bool, op: Op) -> Op:
92 | if isinstance(op, Field):
93 | return op
94 | if isinstance(op, Scalar):
95 | return op
96 | if isinstance(op, Parens):
97 | op.exp = visitor(neg, op.exp)
98 | return op
99 | if isinstance(op, UnaryOp):
100 | if op.prefix in ["~", "not", "!", "invert"]:
101 | return UnaryOp(prefix=op.prefix, exp=visitor(~neg, op.exp), suffix=op.suffix)
102 | return op
103 | if isinstance(op, BinOp):
104 | op = BinOp(left=visitor(neg, op.left), op=op.op, right=visitor(neg, op.right))
105 | if isinstance(op.left, Field):
106 | if str(op.left) in fields:
107 | return _inner_exp_binop(neg, op.left, op.op, op.right)
108 | return op
109 | elif isinstance(op.right, Field):
110 | try:
111 | op = op.flip()
112 | except NotImplementedError:
113 | logging.warning("can't flip big-col expression: %s", op)
114 | return Scalar(src="True", type="bool")
115 | return visitor(neg, op)
116 | else:
117 | return op
118 | raise ValueError(f"can't convert big-col expression: {type(op)}")
119 |
120 | op = visitor(False, op)
121 | logging.info("big: inner_exp: %s", repr(op))
122 | return op
123 |
124 |
125 | def _inner_exp_binop_str(neg: bool, field: Field, op: str, right: str) -> Op:
126 | if len(right) > STR_MIN:
127 | a = right[:STR_MIN]
128 | b = right[: STR_MIN - 1] + chr(ord(right[STR_MIN - 1]) + 1)
129 | logging.info("big: str: %s .. %s", json.dumps(a), json.dumps(b))
130 |
131 | if op == "==":
132 | if neg:
133 | return Scalar.from_py(False)
134 | return BinOp(
135 | left=BinOp(
136 | left=Scalar.from_py(a),
137 | op="<",
138 | right=field,
139 | ),
140 | op="and",
141 | right=BinOp(
142 | left=field,
143 | op="<",
144 | right=Scalar.from_py(b),
145 | ),
146 | )
147 | elif op == "!=":
148 | if neg:
149 | return Scalar.from_py(False)
150 | else:
151 | return Scalar.from_py(True)
152 | elif op == ">" or op == ">=":
153 | return BinOp(
154 | left=field,
155 | op=op,
156 | right=Scalar.from_py(a),
157 | )
158 | elif op == "<" or op == "<=":
159 | return BinOp(
160 | left=field,
161 | op=op,
162 | right=Scalar.from_py(b),
163 | )
164 | else:
165 | return BinOp(
166 | left=field,
167 | op=op,
168 | right=Scalar.from_py(right),
169 | )
170 | logging.error("can't convert big-col expression: %s %s %s", field, op, right)
171 | raise ValueError("can't convert big-col expression")
172 |
173 |
174 | def _inner_exp_binop(neg: bool, left: Field, op: str, right: Op) -> Op:
175 | if isinstance(right, Scalar):
176 | v = right.to_py()
177 | if isinstance(v, str):
178 | return _inner_exp_binop_str(neg, left, op, v)
179 | else:
180 | raise ValueError("can't convert big-col expression for scalar %s", right)
181 | raise ValueError("can't convert big-col expression: %s %s %s", left, op, right)
182 |
--------------------------------------------------------------------------------
/src/sdk/odp/client/tabular_v2/big/buffer.py:
--------------------------------------------------------------------------------
1 | import uuid
2 | from threading import Lock
3 | from typing import Optional
4 |
5 | import pandas as pd
6 | import pyarrow as pa
7 | from odp.client.tabular_v2.big import BigCol
8 | from odp.client.tabular_v2.big.big import MAX_BIGFILE_SIZE, SMALL_MAX, STR_LIMIT, STR_MIN
9 |
10 |
11 | def convert_schema_inward(schema: pa.Schema) -> pa.Schema:
12 | """add .ref fields for columns marked with big, helper only used by create()"""
13 | return Buffer(None).with_outer_schema(schema).inner_schema
14 |
15 |
16 | class Buffer:
17 | def __init__(self, parent: Optional[BigCol]):
18 | self.data = []
19 | self.size = 0
20 | self.next_id = uuid.uuid4().hex
21 | self.lock = Lock()
22 | self.parent = parent
23 | self.big_fields = []
24 | self.small_fields = []
25 | self.inner_schema = None
26 |
27 | def with_inner_schema(self, inner_schema: pa.Schema):
28 | self.inner_schema = inner_schema
29 | for name in inner_schema.names:
30 | if name.endswith(".ref"):
31 | continue
32 | f = inner_schema.field(name)
33 | if f.type != pa.string() and f.type != pa.binary():
34 | continue
35 | meta = f.metadata
36 | if meta and b"big" in meta:
37 | self.big_fields.append(name)
38 | else:
39 | self.small_fields.append(name)
40 | return self
41 |
42 | def with_outer_schema(self, outer_schema: pa.Schema) -> "Buffer":
43 | fields = []
44 | for name in outer_schema.names:
45 | field: pa.Field = outer_schema.field(name)
46 | fields.append(field)
47 | if field.type != pa.string() and field.type != pa.binary():
48 | continue
49 | meta = field.metadata
50 | if meta and b"big" in meta:
51 | fields.append(pa.field(name + ".ref", pa.string()))
52 | self.big_fields.append(name)
53 | else:
54 | self.small_fields.append(name)
55 | self.inner_schema = pa.schema(fields)
56 | return self
57 |
58 | def encode(self, batch: pa.RecordBatch):
59 | # TODO: avoid pandas?
60 | df: pd.DataFrame = batch.to_pandas()
61 | out = df.apply(self.append, axis=1)
62 | return pa.RecordBatch.from_pandas(out, schema=self.inner_schema)
63 |
64 | def append(self, row):
65 | for name in self.small_fields:
66 | data = row[name]
67 | if data is None:
68 | continue
69 | if len(data) > SMALL_MAX:
70 | raise ValueError(f"field {name} is too long: “{len(data)}”")
71 |
72 | for name in self.big_fields:
73 | row[name + ".ref"] = None
74 | data = row[name]
75 | if data is None:
76 | continue
77 | if isinstance(data, str):
78 | data = data.encode("utf-8") # convert to bytes
79 | size = len(data) # size in bytes
80 | if size < STR_LIMIT:
81 | continue
82 | with self.lock:
83 | ref = f"{self.next_id}:{self.size}:{size}" # noqa # ref to the current position
84 | self.data.append(data) # append the new data to the buffer
85 | self.size += size # update the size of the buffer
86 | if self.size > MAX_BIGFILE_SIZE: # too much data? flush
87 | self.parent.upload(self.next_id, self.data)
88 | self.next_id = uuid.uuid4().hex
89 | self.data = []
90 | self.size = 0
91 | row[name + ".ref"] = ref
92 | row[name] = row[name][0:STR_MIN]
93 | return row
94 |
95 | def flush(self):
96 | with self.lock:
97 | if self.size > 0:
98 | self.parent.upload(self.next_id, self.data)
99 | self.data = []
100 | self.size = 0
101 | self.next_id = uuid.uuid4().hex
102 |
--------------------------------------------------------------------------------
/src/sdk/odp/client/tabular_v2/big/local.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | from typing import Iterable
4 |
5 | from odp.client.tabular_v2.big.big import BigCol
6 |
7 |
8 | class LocalBigCol(BigCol):
9 | def __init__(self, root: str):
10 | super().__init__()
11 | self.root = root
12 | os.makedirs(self.root, exist_ok=True)
13 |
14 | def fetch(self, big_id: str) -> bytes:
15 | logging.info("downloading %s", big_id)
16 | with open(f"{self.root}/{big_id}.big", "rb") as f:
17 | return f.read()
18 |
19 | def upload(self, big_id: str, data: Iterable[bytes]):
20 | logging.info("uploading %s", big_id)
21 | with open(f"{self.root}/{big_id}.big", "wb") as f:
22 | for d in data:
23 | f.write(d)
24 |
--------------------------------------------------------------------------------
/src/sdk/odp/client/tabular_v2/big/remote.py:
--------------------------------------------------------------------------------
1 | import gzip
2 | import io
3 | import logging
4 | from typing import Callable, Iterable
5 |
6 | from odp.client.tabular_v2.big.big import BigCol
7 | from odp.client.tabular_v2.util.cache import Cache
8 |
9 |
10 | class RemoteBigCol(BigCol):
11 | def __init__(
12 | self,
13 | uploader: Callable[[str, bytes], None],
14 | downloader: Callable[[str], bytes],
15 | root_cache: str,
16 | ):
17 | super().__init__()
18 | self.cache = Cache(root_cache)
19 | self.uploader = uploader
20 | self.downloader = downloader
21 | # TODO: make sure to not fill up the disk?
22 |
23 | def fetch(self, bid: str) -> bytes:
24 | with self.cache.key("big." + bid) as e:
25 | if not e.exists():
26 | logging.info("fetching %s", bid)
27 | comp = self.downloader(bid)
28 | e.set(comp)
29 | else:
30 | logging.info("cache hit %s", bid)
31 | comp = e.get()
32 | # if exists, use the cached version
33 | return gzip.decompress(comp)
34 |
35 | def upload(self, bid: str, data: Iterable[bytes]):
36 | with self.cache.key("big." + bid) as e:
37 | buf = io.BytesIO()
38 | with gzip.GzipFile(fileobj=buf, mode="wb") as f:
39 | for d in data:
40 | f.write(d)
41 | comp = buf.getvalue()
42 | self.uploader(bid, comp)
43 | e.set(comp)
44 |
--------------------------------------------------------------------------------
/src/sdk/odp/client/tabular_v2/bsquare/__init__.py:
--------------------------------------------------------------------------------
1 | from .bsquare import convert_query, convert_schema_inward, convert_schema_outward, decode, encode
2 |
--------------------------------------------------------------------------------
/src/sdk/odp/client/tabular_v2/bsquare/bsquare.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from typing import Optional
3 |
4 | import pyarrow as pa
5 | import shapely
6 | from odp.client.tabular_v2.bsquare.query import _QueryContext
7 | from odp.client.tabular_v2.util.exp import Op
8 |
9 |
10 | def convert_schema_inward(outer_schema: pa.Schema) -> pa.Schema:
11 | out = []
12 | for name in outer_schema.names:
13 | f = outer_schema.field(name)
14 | if f.metadata and b"isGeometry" in f.metadata:
15 | meta = f.metadata
16 | if b"index" in meta:
17 | new_meta = meta.copy()
18 | del new_meta[b"index"]
19 | f = f.with_metadata(new_meta)
20 | out.append(f)
21 | out.append(pa.field(name + ".x", pa.float64(), True, metadata=meta))
22 | out.append(pa.field(name + ".y", pa.float64(), True, metadata=meta))
23 | out.append(pa.field(name + ".q", pa.float64(), True, metadata=meta))
24 | else:
25 | out.append(f)
26 | return pa.schema(out)
27 |
28 |
29 | # convert the inner_schema to outer_schema
30 | def convert_schema_outward(inner_schema: pa.Schema) -> pa.Schema:
31 | geo_indexes = set()
32 |
33 | def is_subfield(schema: pa.Schema, f: pa.Field) -> bool:
34 | if "." not in f.name:
35 | return False
36 | left, right = f.name.rsplit(".", 1)
37 | if left not in schema.names:
38 | return False
39 | if schema.field(left).metadata and b"isGeometry" not in schema.field(left).metadata:
40 | return False
41 | if f.metadata and b"index" in f.metadata:
42 | geo_indexes.add(left)
43 | return True
44 |
45 | # create a new schema with only the fields that are not subfields
46 | fields = []
47 | for names in inner_schema.names:
48 | f = inner_schema.field(names)
49 | if not is_subfield(inner_schema, f):
50 | fields.append(f)
51 |
52 | # add back the "index" to the main field (which was removed when creating the subfields)
53 | for i, f in enumerate(fields):
54 | if f.name in geo_indexes:
55 | meta = f.metadata
56 | meta[b"index"] = b"1"
57 | fields[i] = f.with_metadata(meta)
58 | return pa.schema(fields)
59 |
60 |
61 | # convert outer query to inner query using bsquare in .x, .y and .q
62 | def convert_query(outer_schema: pa.Schema, outer_query: Optional[Op]) -> Optional[Op]:
63 | if outer_query is None:
64 | return None
65 |
66 | geo_fields = []
67 | for f in outer_schema:
68 | if f.metadata and b"isGeometry" in f.metadata:
69 | geo_fields.append(f.name)
70 |
71 | return _QueryContext(geo_fields).convert(outer_query)
72 |
73 |
74 | def decode(b: pa.RecordBatch) -> pa.RecordBatch:
75 | outer_schema = convert_schema_outward(b.schema)
76 | if b.num_rows == 0:
77 | return pa.RecordBatch.from_pylist([], schema=outer_schema)
78 | list = pa.Table.from_batches([b], schema=b.schema).select(outer_schema.names).to_batches()
79 | if len(list) != 1:
80 | raise ValueError("expected exactly one batch")
81 | return list[0]
82 |
83 |
84 | def encode(b: pa.RecordBatch) -> pa.RecordBatch:
85 | logging.info("bsquare encoding %d rows", b.num_rows)
86 | inner_schema = convert_schema_inward(b.schema)
87 | geo_names = []
88 | for name in b.schema.names:
89 | f = b.schema.field(name)
90 | if f.metadata and b"isGeometry" in f.metadata:
91 | geo_names.append(name)
92 |
93 | # we encode rows by rows to made it simple to create multiple columns
94 | def _encode(row):
95 | for name in geo_names:
96 | if name in row and row[name] is not None:
97 | val = row[name]
98 | if isinstance(val, str):
99 | val = shapely.from_wkt(val)
100 | elif isinstance(val, bytes):
101 | val = shapely.from_wkb(val)
102 | else:
103 | raise ValueError(f"Unsupported type: {type(val)}")
104 | min_x, min_y, max_x, max_y = val.bounds
105 | row[name + ".x"] = (min_x + max_x) / 2
106 | row[name + ".y"] = (min_y + max_y) / 2
107 | row[name + ".q"] = max(max_x - min_x, max_y - min_y) / 2
108 | else:
109 | row[name + ".x"] = None
110 | row[name + ".y"] = None
111 | row[name + ".q"] = None
112 | return row
113 |
114 | d = b.to_pandas()
115 | for geo_name in geo_names:
116 | d[geo_name + ".x"] = None
117 | d[geo_name + ".y"] = None
118 | d[geo_name + ".q"] = None
119 | d = d.apply(func=_encode, axis=1)
120 | return pa.RecordBatch.from_pandas(d, schema=inner_schema)
121 |
122 |
123 | class BSquare:
124 | geometry_fields = ["{col_name}.x", "{col_name}.y", "{col_name}.q"] # add complexity and confuse the user
125 |
126 | def __init__(self, inner_schema: Optional[pa.Schema] = None):
127 | assert not "good"
128 | self._inner_schema = inner_schema
129 | self._geo_fields = [] # FIXME do this earlier, then cash on it
130 |
--------------------------------------------------------------------------------
/src/sdk/odp/client/tabular_v2/bsquare/query.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import logging
3 | from typing import Optional
4 |
5 | import shapely
6 | from odp.client.tabular_v2.util import exp
7 |
8 |
9 | class _QueryContext:
10 | def __init__(self, geo_fields: list[str]):
11 | self.geo_fields = geo_fields
12 | self.negate = False
13 |
14 | def neg(self) -> "_QueryContext":
15 | c = copy.copy(self)
16 | c.negate = ~c.negate
17 | return c
18 |
19 | def is_geo_field(self, op: Optional[exp.Op]) -> bool:
20 | if isinstance(op, exp.Field):
21 | return op.name in self.geo_fields
22 | return False
23 |
24 | def convert(self, op: Optional[exp.Op]) -> Optional[exp.Op]:
25 | if op is None:
26 | return None
27 | if isinstance(op, exp.Parens):
28 | inner = self.convert(op.exp)
29 | if isinstance(inner, exp.Parens):
30 | return inner
31 | return exp.Parens(inner)
32 | if isinstance(op, exp.BinOp):
33 | if self.is_geo_field(op.left) or self.is_geo_field(op.right):
34 | if op.op in ["intersects", "contains", "within", "=="]:
35 | return self._convert_intersect(op)
36 | elif op.op in ["intersects", "contains", "within"]:
37 | raise ValueError(f"can't do '{op.op}' on non-geo fields")
38 | left = self.convert(op.left)
39 | right = self.convert(op.right)
40 | return exp.BinOp(left=left, op=op.op, right=right)
41 | if isinstance(op, exp.Field):
42 | return op
43 | if isinstance(op, exp.Scalar):
44 | return op
45 | if isinstance(op, exp.UnaryOp):
46 | cur = self
47 | if op.prefix == "~":
48 | cur = self.neg()
49 | return exp.UnaryOp(prefix=op.prefix, exp=cur.convert(op.exp), suffix=op.suffix)
50 | if isinstance(op, exp.Func):
51 | cur = self
52 | if op.name == "invert":
53 | cur = self.neg()
54 | args = [cur.convert(a) for a in op.args]
55 | return exp.Func(name=op.name, args=args)
56 | raise ValueError(f"can't convert {op}: {type(op)}")
57 |
58 | def _convert_intersect(self, op: exp.BinOp) -> exp.Op:
59 | if isinstance(op.left, exp.Field):
60 | if isinstance(op.right, exp.Scalar):
61 | geo = shapely.from_wkt(op.right.to_py())
62 | return self._intersect_field(op.left, geo)
63 | # if isinstance(op.right, exp.Field):
64 | # return exp.Scalar.from_py(~self.negate)
65 |
66 | if isinstance(op.right, exp.Field):
67 | if isinstance(op.left, exp.Scalar):
68 | geo = shapely.from_wkt(op.left.to_py())
69 | return self._intersect_field(op.right, geo)
70 |
71 | raise ValueError(f"unsupported: {type(op.left)} {op.op} {type(op.right)}")
72 |
73 | def _intersect_field(self, field: exp.Field, geo: shapely.Geometry) -> exp.Op:
74 | logging.info("intersecting field '%s' with '%s'", field, geo)
75 | fx = exp.Field(name=field.name + ".x")
76 | fy = exp.Field(name=field.name + ".y")
77 | fq = exp.Field(name=field.name + ".q")
78 | x0, y0, x1, y1 = shapely.bounds(geo).tolist()
79 | if self.negate:
80 | xop = exp.BinOp(
81 | exp.Parens(exp.BinOp(exp.BinOp(fx, "-", fq), ">=", exp.Scalar.from_py(x0))),
82 | "and",
83 | exp.Parens(exp.BinOp(exp.BinOp(fx, "+", fq), "<=", exp.Scalar.from_py(x1))),
84 | )
85 | yop = exp.BinOp(
86 | exp.Parens(exp.BinOp(exp.BinOp(fy, "-", fq), ">=", exp.Scalar.from_py(y0))),
87 | "and",
88 | exp.Parens(exp.BinOp(exp.BinOp(fy, "+", fq), "<=", exp.Scalar.from_py(y1))),
89 | )
90 | else:
91 | xop = exp.BinOp(
92 | exp.Parens(exp.BinOp(exp.BinOp(fx, "+", fq), ">=", exp.Scalar.from_py(x0))),
93 | "and",
94 | exp.Parens(exp.BinOp(exp.BinOp(fx, "-", fq), "<=", exp.Scalar.from_py(x1))),
95 | )
96 | yop = exp.BinOp(
97 | exp.Parens(exp.BinOp(exp.BinOp(fy, "+", fq), ">=", exp.Scalar.from_py(y0))),
98 | "and",
99 | exp.Parens(exp.BinOp(exp.BinOp(fy, "-", fq), "<=", exp.Scalar.from_py(y1))),
100 | )
101 | return exp.Parens(exp.BinOp(xop, "and", yop))
102 |
103 |
104 | def test_query():
105 | op = exp.parse("color == 'red' and not (area intersect 'POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))')")
106 | logging.info("'%s'...", op)
107 | c = _QueryContext(["area"])
108 | op2 = c.convert(op)
109 | logging.info("'%s'...", op2)
110 | assert "color == 'red'" in str(op2)
111 | assert "area.x - area.q >= 0" in str(op2) # inverted sign
112 |
113 | # check that raises exception if intersect with no geo field
114 | try:
115 | op = exp.parse("other_field intersect 'POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))'")
116 | c = _QueryContext([])
117 | c.convert(op)
118 | except ValueError as e:
119 | assert "intersect" in str(e)
120 | else:
121 | assert False
122 |
--------------------------------------------------------------------------------
/src/sdk/odp/client/tabular_v2/client/__init__.py:
--------------------------------------------------------------------------------
1 | from .client import Client
2 | from .table_cursor import Cursor
3 | from .table_tx import Transaction
4 | from .tablehandler import TableHandler
5 |
--------------------------------------------------------------------------------
/src/sdk/odp/client/tabular_v2/client/client.py:
--------------------------------------------------------------------------------
1 | import io
2 | import logging
3 | from typing import TYPE_CHECKING, Dict, Iterator, Optional, Union
4 |
5 | import requests
6 | from odp.client.tabular_v2.util.reader import Iter2Reader
7 |
8 | if TYPE_CHECKING:
9 | from odp.client.tabular_v2.client import TableHandler
10 |
11 |
12 | class Client:
13 | def __init__(self, base_url: str):
14 | self._base_url = base_url
15 |
16 | class Response:
17 | # Abstraction for response object, shared between http client and test client
18 | def __init__(self, res: Union[requests.Response, Iterator[bytes], Dict, bytes]):
19 | if isinstance(res, requests.Response):
20 | if res.status_code == 204:
21 | raise FileNotFoundError(res.text)
22 | res.raise_for_status()
23 | logging.info("response: %s", res)
24 | self.res = res
25 |
26 | def reader(self):
27 | if isinstance(self.res, bytes):
28 | return io.BytesIO(self.res)
29 | if isinstance(self.res, Iterator):
30 | return Iter2Reader(self.res)
31 | return self.res.raw
32 |
33 | def iter(self) -> Iterator[bytes]:
34 | if isinstance(self.res, bytes):
35 | return iter([self.res])
36 | if isinstance(self.res, Iterator):
37 | return self.res
38 | return self.res.iter_content()
39 |
40 | def all(self) -> bytes:
41 | if isinstance(self.res, bytes):
42 | return self.res
43 | if isinstance(self.res, Iterator):
44 | return b"".join(self.res)
45 | return self.res.content
46 |
47 | def json(self) -> dict:
48 | if isinstance(self.res, dict):
49 | return self.res
50 | return self.res.json()
51 |
52 | def _request(
53 | self,
54 | path: str,
55 | data: Union[Dict, bytes, None] = None,
56 | params: Optional[Dict] = None,
57 | headers: Optional[Dict] = None,
58 | ) -> Response:
59 | logging.info("ktable: REQ %s %s (%d bytes)", path, params, len(data) if data else 0)
60 | if isinstance(data, dict):
61 | res = requests.post(self._base_url + path, headers=headers, params=params, json=data, stream=True)
62 | elif isinstance(data, bytes):
63 | res = requests.post(self._base_url + path, headers=headers, params=params, data=data, stream=True)
64 | elif isinstance(data, Iterator):
65 | res = requests.post(self._base_url + path, headers=headers, params=params, data=data, stream=True)
66 | elif data is None:
67 | res = requests.post(self._base_url + path, headers=headers, params=params, stream=True)
68 | else:
69 | raise ValueError(f"unexpected type {type(data)}")
70 | logging.info("response: %s", res.status_code)
71 | return self.Response(res)
72 |
73 | # @lru_cache(maxsize=10)
74 | def table(self, table_id: str) -> "TableHandler":
75 | from odp.client.tabular_v2.client.tablehandler import TableHandler
76 |
77 | return TableHandler(self, table_id)
78 |
--------------------------------------------------------------------------------
/src/sdk/odp/client/tabular_v2/client/table_cursor.py:
--------------------------------------------------------------------------------
1 | from typing import Callable, Iterator
2 |
3 | import pyarrow as pa
4 |
5 |
6 | class CursorException(Exception):
7 | """Raised when the client is required to connect again with the given cursor to fetch more data"""
8 |
9 | def __init__(self, cursor: str):
10 | self.cursor = cursor
11 |
12 |
13 | class Cursor:
14 | def __init__(
15 | self,
16 | scanner: Callable[[str], Iterator[pa.RecordBatch]],
17 | ):
18 | self.scanner = scanner
19 |
20 | def batches(self) -> Iterator[pa.RecordBatch]:
21 | cursor = ""
22 | while True:
23 | try:
24 | for b in self.scanner(cursor):
25 | yield b
26 | except CursorException as e:
27 | cursor = e.cursor
28 | continue # FIXME: Should not be raised?
29 | break
30 |
31 | def rows(self) -> Iterator[dict]:
32 | for b in self.batches():
33 | for row in b.to_pylist():
34 | yield row
35 |
36 | def pages(self, size: int = 0) -> Iterator[list[dict]]:
37 | if size < 1: # page based on what we get
38 | for b in self.batches():
39 | yield b.to_pydict()
40 | return
41 |
42 | # page based on page_size
43 | buf: list[dict] = []
44 | for b in self.batches():
45 | buf.extend(b.to_pydict())
46 | while len(buf) >= size:
47 | yield buf[:size]
48 | buf = buf[size:]
49 | if len(buf) > 0:
50 | yield buf
51 |
--------------------------------------------------------------------------------
/src/sdk/odp/client/tabular_v2/client/table_tx.py:
--------------------------------------------------------------------------------
1 | import io
2 | import logging
3 | from typing import Dict, Iterator, List, Union
4 |
5 | import pyarrow as pa
6 | from odp.client.tabular_v2 import big
7 | from odp.client.tabular_v2.bsquare import bsquare
8 | from odp.client.tabular_v2.client.table_cursor import CursorException
9 | from odp.client.tabular_v2.client.tablehandler import TableHandler
10 | from odp.client.tabular_v2.util import exp
11 | from odp.client.tabular_v2.util.reader import Iter2Reader
12 |
13 |
14 | class Transaction:
15 | def __init__(self, table: TableHandler, tx_id: str):
16 | if not tx_id:
17 | raise ValueError("tx_id must not be empty")
18 | self._table = table
19 | self._id = tx_id
20 | self._buf: list[pa.RecordBatch] = []
21 | self._buf_rows = 0
22 | self._big_buf: big.Buffer = big.Buffer(table._bigcol).with_inner_schema(table._inner_schema)
23 | self._old_rid = None
24 |
25 | def select(self, query: Union[exp.Op, str, None] = None) -> Iterator[dict]:
26 | for row in self._table.select(query).rows():
27 | yield row
28 |
29 | def replace(self, query: Union[exp.Op, str, None] = None) -> Iterator[dict]:
30 | """perform a two-step replace:
31 | rows that don't match the query are kept.
32 | rows that match are removed and sent to the caller.
33 | the caller might insert them again or do something else.
34 | """
35 | if query is None:
36 | raise ValueError("For your own safety, please provide a query like 1==1")
37 | assert self._buf_rows == 0 # FIXME: handle buffered data in replace/select
38 | if isinstance(query, str):
39 | query = exp.parse(query)
40 | inner_query = bsquare.convert_query(self._table._outer_schema, query)
41 | inner_query = big.inner_exp(self._table._inner_schema, inner_query)
42 | inner_query = str(inner_query.pyarrow())
43 |
44 | def scanner(cursor: str) -> Iterator[pa.RecordBatch]:
45 | res = self._table._client._request(
46 | path="/api/table/v2/replace",
47 | params={
48 | "table_id": self._table._id,
49 | "tx_id": self._id,
50 | },
51 | data={
52 | "query": inner_query,
53 | "cursor": cursor,
54 | },
55 | )
56 | r = Iter2Reader(res.iter())
57 | r = pa.ipc.RecordBatchStreamReader(r)
58 | for bm in r.iter_batches_with_custom_metadata():
59 | if bm.custom_metadata:
60 | meta = bm.custom_metadata
61 | if b"cursor" in meta:
62 | raise CursorException(meta[b"cursor"].decode())
63 | if b"error" in meta:
64 | raise ValueError("remote: " + meta[b"error"].decode())
65 | if bm.batch:
66 | yield bm.batch
67 |
68 | from odp.client.tabular_v2.client import Cursor
69 |
70 | for b in Cursor(scanner=scanner).batches():
71 | b = self._table._bigcol.decode(b) # TODO(oha): use buffer for partial big files not uploaded
72 | b = bsquare.decode(b)
73 | tab = pa.Table.from_batches([b], schema=self._table._outer_schema)
74 | for b2 in tab.filter(~query.pyarrow()).to_batches():
75 | if b2.num_rows > 0:
76 | self.insert(b2)
77 |
78 | for b2 in tab.filter(query.pyarrow()).to_batches():
79 | for row in b2.to_pylist():
80 | yield row
81 |
82 | def delete(self, query: Union[exp.Op, str, None] = None) -> int:
83 | ct = 0
84 | for _ in self.replace(query):
85 | ct += 1
86 | return ct
87 |
88 | def flush(self):
89 | logging.info("flushing to stage %s", self._id)
90 | if len(self._buf) == 0:
91 | return
92 | buf = io.BytesIO()
93 | w = pa.ipc.RecordBatchStreamWriter(buf, self._table._inner_schema)
94 | for b in self._buf:
95 | if isinstance(b, list):
96 | b = pa.RecordBatch.from_pylist(b, schema=self._table._outer_schema)
97 | b = bsquare.encode(b)
98 | b = self._big_buf.encode(b)
99 | w.write_batch(b)
100 | w.close()
101 | self._table._client._request(
102 | path="/api/table/v2/insert",
103 | params={
104 | "table_id": self._table._id,
105 | "tx_id": self._id,
106 | },
107 | data=buf.getvalue(),
108 | ).json()
109 | self._buf = []
110 | self._buf_rows = 0
111 |
112 | def insert(self, data: Union[Dict, List[Dict], pa.RecordBatch]):
113 | """queue data to be inserted on flush()"""
114 | if isinstance(data, dict):
115 | data = [data]
116 | if isinstance(data, list):
117 | # we expand the last list if it's already a list
118 | last = self._buf[-1] if self._buf else None
119 | if last and isinstance(last, list):
120 | last.extend(data)
121 | else:
122 | self._buf.append(data)
123 | self._buf_rows += len(data)
124 | elif isinstance(data, pa.RecordBatch):
125 | self._buf.append(data)
126 | self._buf_rows += data.num_rows
127 | else:
128 | raise ValueError(f"unexpected type {type(data)}")
129 |
130 | if self._buf_rows > 10_000:
131 | self.flush()
132 |
--------------------------------------------------------------------------------
/src/sdk/odp/client/tabular_v2/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/src/sdk/odp/client/tabular_v2/util/__init__.py
--------------------------------------------------------------------------------
/src/sdk/odp/client/tabular_v2/util/cache.py:
--------------------------------------------------------------------------------
1 | import base64
2 | import logging
3 | import os
4 | import threading
5 | from time import time
6 | from typing import Optional
7 |
8 | from odp.client.tabular_v2.util.util import size2human
9 |
10 |
11 | class Cache:
12 | class Entry:
13 | def __init__(self, key: str, cache: "Cache"):
14 | self.key = key
15 | self.lock = threading.Lock()
16 | self.cache = cache
17 | self.filename = base64.b64encode(key.encode()).decode()
18 | self.size = 0
19 |
20 | def set(self, value: bytes) -> bool:
21 | if len(value) > self.cache.max_entry_size:
22 | return False
23 |
24 | self.cache._make_space(len(value))
25 | self.cache.tot_bytes -= self.size # if replacing, this will be non-zero
26 | self.size = len(value)
27 | self.cache.tot_bytes += self.size
28 |
29 | with open(os.path.join(self.cache.root_folder, self.filename), "wb") as f:
30 | f.write(value)
31 |
32 | def exists(self) -> bool:
33 | filename = os.path.join(self.cache.root_folder, self.filename)
34 | return os.path.exists(filename)
35 |
36 | def age(self) -> float:
37 | return time() - os.path.getctime(os.path.join(self.cache.root_folder, self.filename))
38 |
39 | def get(self, max_age: Optional[float] = None) -> Optional[bytes]:
40 | try:
41 | if max_age is not None:
42 | if self.age() > max_age: # expired
43 | logging.info("expired %s (age: %.f > %.f)", self.key, self.age(), max_age)
44 | # TODO remove?
45 | return None
46 | with open(os.path.join(self.cache.root_folder, self.filename), "rb") as f:
47 | return f.read()
48 | except FileNotFoundError:
49 | return None
50 |
51 | def touch(self):
52 | file_path = os.path.join(self.cache.root_folder, self.filename)
53 | if not os.path.exists(file_path):
54 | return
55 | os.utime(file_path)
56 |
57 | def unlink(self):
58 | try:
59 | os.unlink(os.path.join(self.cache.root_folder, self.filename))
60 | except FileNotFoundError:
61 | logging.info(
62 | "removing but already gone: %s (%s)", self.key, os.path.join(self.cache.root_folder, self.filename)
63 | )
64 |
65 | def __enter__(self):
66 | self.lock.acquire()
67 | return self
68 |
69 | def __exit__(self, exc_type, exc_val, exc_tb):
70 | self.lock.release()
71 |
72 | def __init__(self, folder: str, max_entries=100, max_bytes=64 * 1024 * 1024):
73 | self.lock = threading.Lock()
74 | self.cache = []
75 | self.root_folder = folder
76 | self.max_entries = max_entries
77 | self.max_bytes = max_bytes
78 | self.max_entry_size = max_bytes // 16
79 | self.tot_bytes = 0
80 |
81 | os.makedirs(self.root_folder, exist_ok=True)
82 | # list files by mtime
83 | files = sorted(
84 | os.listdir(self.root_folder), key=lambda file: os.path.getmtime(os.path.join(self.root_folder, file))
85 | )
86 | for f in files:
87 | key = base64.b64decode(f.encode()).decode()
88 | e = Cache.Entry(key, self)
89 | size = os.path.getsize(os.path.join(self.root_folder, f))
90 | self.tot_bytes += size
91 | assert f == e.filename
92 | # logging.debug("recovered %s file %s", size2human(size), key)
93 | self.cache.append(e)
94 |
95 | self._make_space(0)
96 | # logging.debug("recovered %d files for a total of %s", len(self.cache), size2human(self.tot_bytes))
97 |
98 | def _make_space(self, space_needed):
99 | with self.lock:
100 | while self.tot_bytes + space_needed > self.max_bytes:
101 | # FIXME: Needs to be properly handled
102 | if len(self.cache) == 0:
103 | self.tot_bytes = 0
104 | return
105 | e = self.cache.pop(0)
106 | try:
107 | size = os.path.getsize(os.path.join(self.root_folder, e.filename))
108 | self.tot_bytes -= size
109 | e.unlink()
110 | logging.info("evicted %s file %s", size2human(size), e.key)
111 | except FileNotFoundError:
112 | logging.info("evicted but already gone: %s", e.key)
113 |
114 | def remove(self, key):
115 | with self.lock:
116 | for e in self.cache:
117 | if e.key == key:
118 | self.cache.remove(e)
119 | e.unlink()
120 | self.tot_bytes -= e.size
121 |
122 | def key(self, key):
123 | with self.lock:
124 | for e in self.cache:
125 | if e.key == key:
126 | self.cache.remove(e)
127 | self.cache.append(e) # move to end
128 | return e
129 | if len(self.cache) >= self.max_entries:
130 | e = self.cache.pop(0)
131 | self.tot_bytes -= e.size
132 | e.unlink()
133 | e = Cache.Entry(key, self)
134 | self.cache.append(e)
135 | return e
136 |
--------------------------------------------------------------------------------
/src/sdk/odp/client/tabular_v2/util/reader.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from typing import Iterator
3 |
4 |
5 | class Reader:
6 | def read(self, size: int = -1) -> bytes:
7 | raise NotImplementedError()
8 |
9 |
10 | class Writer:
11 | def write(self, data: bytes):
12 | raise NotImplementedError()
13 |
14 | def close(self):
15 | pass
16 |
17 |
18 | class Iter2Reader(Reader):
19 | """
20 | convert a byte iterator to a file-like object
21 | reads will attempt to read the next bytes from the iterator when needed
22 |
23 | FIXME: seems broken when used with real cases, avoid using
24 | """
25 |
26 | def __init__(self, i: Iterator[bytes]):
27 | self.iter = i
28 | self.closed = False
29 | self.buf = b""
30 |
31 | def preload(self):
32 | if not self.iter:
33 | return self
34 | try:
35 | self.buf += next(self.iter)
36 | except StopIteration:
37 | self.iter = None
38 | return self
39 |
40 | def read_some(self) -> bytes:
41 | if not self.buf:
42 | self.preload()
43 | out = self.buf
44 | self.buf = b""
45 | logging.debug("read %d", len(out))
46 | return out
47 |
48 | def read(self, size: int = -1) -> bytes:
49 | logging.debug("reading...")
50 | if size < 0:
51 | return self.read_some()
52 | while len(self.buf) < size:
53 | if not self.iter:
54 | break
55 | self.preload()
56 | ret = self.buf[:size]
57 | self.buf = self.buf[len(ret) :]
58 | logging.debug("read %d", len(ret))
59 | return ret
60 |
--------------------------------------------------------------------------------
/src/sdk/odp/client/tabular_v2/util/util.py:
--------------------------------------------------------------------------------
1 | from math import log2
2 |
3 | IEC_UNITS = ["KiB", "MiB", "GiB", "TiB"]
4 |
5 |
6 | def size2human(size: int) -> str:
7 | if size == 0:
8 | return "0B"
9 | p = int(log2(size) // 10.0)
10 |
11 | if p < 1:
12 | return f"{size}B"
13 | if p > len(IEC_UNITS):
14 | p = len(IEC_UNITS)
15 | converted_size = size / 1024**p
16 | return f"{converted_size:.1f}{IEC_UNITS[p - 1]}" # noqa
17 |
--------------------------------------------------------------------------------
/src/sdk/odp/client/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .geometry_conversion import convert_geometry
2 | from .package_utils import get_version
3 |
--------------------------------------------------------------------------------
/src/sdk/odp/client/utils/geometry_conversion.py:
--------------------------------------------------------------------------------
1 | import json
2 | from typing import Optional, Union
3 |
4 | import geojson
5 | from shapely import wkb, wkt
6 | from shapely.geometry import shape
7 |
8 |
9 | def convert_geometry(
10 | data: Union[str, dict, list, bytes], result_geometry: str, rounding_precision: Optional[int] = None
11 | ):
12 | if result_geometry == "wkb":
13 | return _convert_geometry_to_wkb(data)
14 | elif result_geometry == "wkt":
15 | return _convert_geometry_to_wkt(data, rounding_precision)
16 | elif result_geometry == "geojson":
17 | if _is_geojson(data):
18 | return data
19 | return _convert_geometry_to_geojson(data)
20 |
21 |
22 | def _convert_geometry_to_wkb(data: Union[str, dict, list]):
23 | if _is_geojson(data):
24 | return _convert_geojson_to_wkb(data)
25 | if isinstance(data, str):
26 | try:
27 | return _convert_wkt_to_wkb(data)
28 | except Exception:
29 | pass
30 | elif isinstance(data, dict):
31 | for key in data:
32 | value = data[key]
33 | data[key] = _convert_geometry_to_wkb(value)
34 | elif isinstance(data, list):
35 | for i, row in enumerate(data):
36 | try:
37 | data[i] = _convert_geometry_to_wkb(row)
38 | except Exception:
39 | continue
40 | return data
41 |
42 |
43 | def _convert_geometry_to_wkt(data: Union[str, dict, list, bytes], rounding_precision: Optional[int] = None):
44 | if _is_geojson(data):
45 | return _convert_geojson_to_wkt(data)
46 | if isinstance(data, (str, bytes)):
47 | try:
48 | return _convert_wkb_to_wkt(data, rounding_precision)
49 | except Exception:
50 | pass
51 | elif isinstance(data, dict):
52 | for key in data:
53 | value = data[key]
54 | data[key] = _convert_geometry_to_wkt(value, rounding_precision)
55 | elif isinstance(data, list):
56 | for i, row in enumerate(data):
57 | try:
58 | data[i] = _convert_geometry_to_wkt(row, rounding_precision)
59 | except Exception:
60 | continue
61 | return data
62 |
63 |
64 | def _convert_geometry_to_geojson(data: Union[str, dict, list, bytes]):
65 | if isinstance(data, str):
66 | try:
67 | if _is_wkt(data):
68 | return _convert_wkt_to_geojson(data)
69 | else:
70 | return _convert_wkb_to_geojson(data)
71 | except Exception:
72 | pass
73 | elif isinstance(data, bytes):
74 | try:
75 | return _convert_wkb_to_geojson(data)
76 | except Exception:
77 | pass
78 | elif isinstance(data, dict):
79 | for key in data:
80 | value = data[key]
81 | data[key] = _convert_geometry_to_geojson(value)
82 | elif isinstance(data, list):
83 | for i, row in enumerate(data):
84 | try:
85 | data[i] = _convert_geometry_to_geojson(row)
86 | except Exception:
87 | continue
88 | return data
89 |
90 |
91 | def _convert_geojson_to_wkb(geojson_data: Union[dict, str]) -> bytes:
92 | if isinstance(geojson_data, dict):
93 | geojson_data = json.dumps(geojson_data)
94 | geo = geojson.loads(geojson_data)
95 | return shape(geo).wkb
96 |
97 |
98 | def _convert_geojson_to_wkt(geojson_data: Union[dict, str]) -> str:
99 | if isinstance(geojson_data, dict):
100 | geojson_data = json.dumps(geojson_data)
101 | geo = geojson.loads(geojson_data)
102 | return shape(geo).wkt
103 |
104 |
105 | def _convert_wkb_to_geojson(wkb_data: Union[bytes, str]) -> dict:
106 | geo = wkb.loads(wkb_data)
107 | return geojson.Feature(geometry=geo, properties={}).geometry
108 |
109 |
110 | def _convert_wkb_to_wkt(wkb_data: Union[bytes, str], rounding_precision: Optional[int] = None) -> str:
111 | if rounding_precision:
112 | return wkt.dumps(wkb.loads(wkb_data), rounding_precision=rounding_precision)
113 | return wkt.dumps(wkb.loads(wkb_data))
114 |
115 |
116 | def _convert_wkt_to_geojson(wkt_data: str) -> dict:
117 | geo = wkt.loads(wkt_data)
118 | return geojson.Feature(geometry=geo, properties={}).geometry
119 |
120 |
121 | def _convert_wkt_to_wkb(wkt_data: str) -> bytes:
122 | return wkb.dumps(wkt.loads(wkt_data))
123 |
124 |
125 | def _is_geojson(data) -> bool:
126 | if isinstance(data, dict):
127 | return len(data.keys()) == 2 and "type" in data and "coordinates" in data
128 | elif isinstance(data, str):
129 | try:
130 | return _is_geojson(json.loads(data))
131 | except Exception:
132 | return False
133 | return False
134 |
135 |
136 | def _is_wkt(data: str) -> bool:
137 | # Cheap way of checking if the value is a WKT string
138 | # Simply see if the first character is the first letter of a WKT-Object:
139 | # P: Point, Polygon
140 | # L: LineString
141 | # M: MultiPoint, MultiPolygon, MultiLineString
142 | # G: GeometryCollection
143 | return data[0].upper() in {"P", "L", "M", "G"}
144 |
--------------------------------------------------------------------------------
/src/sdk/odp/client/utils/json.py:
--------------------------------------------------------------------------------
1 | from typing import IO, Any, Callable, Dict, List, Optional, Protocol, Type, Union
2 |
3 | JsonType = Union[None, int, str, bool, List["JsonType"], Dict[str, "JsonType"]]
4 |
5 |
6 | class JsonParser(Protocol):
7 | """JSON serialization/deserialization interface"""
8 |
9 | @staticmethod
10 | def load(
11 | fp,
12 | decoded_type: Type,
13 | cast_decimal: bool = True,
14 | cls: Optional[Type] = None,
15 | parse_float: Optional[Callable[[str], float]] = None,
16 | parse_int: Optional[Callable[[str], int]] = None,
17 | parse_constant: Optional[Callable[[str], JsonType]] = None,
18 | **kwargs
19 | ) -> JsonType:
20 | ...
21 |
22 | @staticmethod
23 | def loads(
24 | s: str,
25 | cast_decimal: bool = True,
26 | cls: Optional[Type] = None,
27 | parse_float: Optional[Callable[[str], float]] = None,
28 | parse_int: Optional[Callable[[str], int]] = None,
29 | parse_constant: Optional[Callable[[str], JsonType]] = None,
30 | **kwargs
31 | ) -> JsonType:
32 | ...
33 |
34 | @staticmethod
35 | def dump(
36 | obj: Any,
37 | fp: IO,
38 | skipkeys: bool = False,
39 | ensure_ascii: bool = True,
40 | check_circular: bool = True,
41 | allow_nan: bool = True,
42 | cls: Optional[Type] = None,
43 | indent=None,
44 | separators=None,
45 | default=None,
46 | sort_keys=False,
47 | **kwargs
48 | ):
49 | ...
50 |
51 | @staticmethod
52 | def dumps(
53 | obj: Any,
54 | skipkeys: bool = False,
55 | ensure_ascii: bool = True,
56 | check_circular: bool = True,
57 | allow_nan: bool = True,
58 | cls: Optional[Type] = None,
59 | indent: Optional[int] = None,
60 | separators: Optional[str] = None,
61 | default: Optional[Callable[[str], str]] = None,
62 | sort_keys: bool = False,
63 | **kwargs
64 | ) -> str:
65 | ...
66 |
--------------------------------------------------------------------------------
/src/sdk/odp/client/utils/ndjson.py:
--------------------------------------------------------------------------------
1 | import json
2 | from collections import deque
3 | from io import StringIO
4 | from typing import IO, Deque, Iterable, Optional, Sized, Union, cast
5 | from warnings import warn
6 |
7 | from .json import JsonParser, JsonType
8 |
9 |
10 | def parse_ndjson(iter: Iterable[bytes]) -> Iterable:
11 | """
12 | Parse NDJSON from an iterable of bytes
13 | returns an iterator of parsed JSON objects
14 | """
15 | buf = b""
16 | for s in iter:
17 | buf += s
18 | lines = buf.split(b"\n")
19 | buf = lines[-1]
20 | for line in lines[:-1]:
21 | yield json.loads(line)
22 |
23 | if buf:
24 | yield json.loads(buf)
25 |
26 |
27 | BacklogDataT = Union[Iterable[str], Sized]
28 | DEFAULT_JSON_PARSER = cast(JsonParser, json)
29 |
30 |
31 | class NdJsonParser:
32 | """Newline delimited JSON parser
33 |
34 | Parses NDJSON from a file-like object or a string.
35 | """
36 |
37 | def __init__(
38 | self,
39 | s: Union[str, bytes, None] = None,
40 | fp: Union[IO, Iterable[bytes], None] = None,
41 | json_parser: JsonParser = DEFAULT_JSON_PARSER,
42 | ):
43 | """Initialize the parser
44 |
45 | Args:
46 | s: String to parse, either this or 'fp' must be set
47 | fp: File-like object to parse, either this or 's' must be set
48 | json_parser: JSON parser to use, defaults to the standard `json` module
49 | """
50 | self.json_parser = json_parser
51 | self.line = []
52 | self.delimiter_stack: Deque[str] = deque()
53 | self.backlog: Optional[BacklogDataT] = None
54 |
55 | if s and fp:
56 | raise ValueError("Either 's' or 'fp' must be set, but now both")
57 | elif not s and not fp:
58 | raise ValueError("Either 's' or 'fp' must be set")
59 |
60 | if fp:
61 | self.fb = fp
62 | elif isinstance(s, str):
63 | self.fb = StringIO(s)
64 | else:
65 | self.fb = StringIO(s.decode())
66 |
67 | def _consume_line(self) -> JsonType:
68 | """Consume a line from the file-like object
69 |
70 | Returns:
71 | Parsed JSON object
72 | """
73 | if self.delimiter_stack:
74 | warn("Attempting to parse NDJSON line while the delimiter stack was non-empty")
75 |
76 | obj = self.json_parser.loads("".join(self.line))
77 | self.line = []
78 | self.delimiter_stack.clear()
79 |
80 | return obj
81 |
82 | def _have_backlog(self) -> bool:
83 | return self.backlog is not None
84 |
85 | def _backlog_data(self, data: BacklogDataT):
86 | self.backlog = data
87 |
88 | def _consume_backlog(self) -> BacklogDataT:
89 | if self.backlog is None:
90 | raise ValueError("No backlog data to consume")
91 | data = self.backlog
92 | self.backlog = None
93 | return data
94 |
95 | def _load_next(self) -> BacklogDataT:
96 | if self._have_backlog():
97 | return self._consume_backlog()
98 | ret = next(self.fb)
99 | if isinstance(ret, bytes):
100 | return ret.decode()
101 | return ret
102 |
103 | def __iter__(self) -> Iterable[JsonType]:
104 | return cast(Iterable[JsonType], self)
105 |
106 | def __next__(self) -> JsonType:
107 | while True:
108 | try:
109 | s = self._load_next()
110 | except StopIteration:
111 | if len(self.line) > 0:
112 | return self._consume_line()
113 | raise
114 |
115 | for idx, c in enumerate(s):
116 | c = chr(c) if isinstance(c, int) else c
117 | last_delimiter = self.delimiter_stack[-1] if self.delimiter_stack else None
118 |
119 | in_quote = last_delimiter in {"'", '"', "\\"}
120 |
121 | if c == "\n" and not in_quote:
122 | if idx + 1 < len(s):
123 | self.backlog = s[idx + 1 :]
124 | return self._consume_line()
125 |
126 | self.line.append(c)
127 | if in_quote:
128 | if last_delimiter == "\\":
129 | self.delimiter_stack.pop()
130 | elif c == "\\":
131 | self.delimiter_stack.append(c)
132 | elif c == last_delimiter:
133 | self.delimiter_stack.pop()
134 |
135 | continue
136 |
137 | is_quote = c in {"'", '"'}
138 | if is_quote:
139 | self.delimiter_stack.append(c)
140 | continue
141 |
142 | is_opening_bracket = c in {"{", "["}
143 |
144 | if is_opening_bracket:
145 | self.delimiter_stack.append(c)
146 | continue
147 |
148 | in_bracket = last_delimiter in {"{", "["}
149 | is_closing_bracket = c in {"}", "]"}
150 |
151 | if is_closing_bracket:
152 | if not in_bracket:
153 | raise ValueError(f"Got unexpected delimiter: {c}")
154 |
155 | if last_delimiter == "{" and c == "}":
156 | self.delimiter_stack.pop()
157 | elif last_delimiter == "[" and c == "]":
158 | self.delimiter_stack.pop()
159 | else:
160 | raise ValueError(f"Got unexpected delimiter: {c}")
161 |
162 |
163 | def load(fp: IO, json_parser: JsonParser = DEFAULT_JSON_PARSER) -> Iterable[JsonType]:
164 | """Load NDJSON from a file-like object
165 |
166 | Args:
167 | fp: File-like object
168 | json_parser: JSON parser to use
169 |
170 | Returns:
171 | Iterable of parsed JSON objects
172 | """
173 | return iter(NdJsonParser(fp=fp, json_parser=json_parser))
174 |
175 |
176 | def loads(s: Union[str, bytes], json_parser: JsonParser = DEFAULT_JSON_PARSER) -> Iterable[JsonType]:
177 | """Load NDJSON from a string
178 |
179 | Args:
180 | s: String
181 | json_parser: JSON parser to use
182 |
183 | Returns:
184 | Iterable of parsed JSON objects
185 | """
186 | return iter(NdJsonParser(s=s, json_parser=json_parser))
187 |
--------------------------------------------------------------------------------
/src/sdk/odp/client/utils/package_utils.py:
--------------------------------------------------------------------------------
1 | from importlib.metadata import version
2 |
3 |
4 | def get_version():
5 | try:
6 | return str(version("odp-sdk"))
7 | except Exception as e:
8 | print(e)
9 | return ""
10 |
--------------------------------------------------------------------------------
/src/sdk/odp_sdk/__init__.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | from odp.sdk import * # noqa: F401, F403
4 |
5 | warnings.warn("odp_sdk is deprecated, please import odp.client instead", DeprecationWarning)
6 |
7 | del warnings
8 |
--------------------------------------------------------------------------------
/src/sdk/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "odp-sdk"
3 | version = "0.4.10"
4 | description = "ODP Python SDK"
5 | authors = ["Thomas Li Fredriksen "]
6 | license = "MIT"
7 | readme = "README.md"
8 | packages = [
9 | {include="odp"},
10 | {include="odp_sdk"},
11 | ]
12 |
13 |
14 | [tool.poetry.dependencies]
15 | python = "^3.9"
16 | pydantic = "^2.4.2"
17 | odp-dto = { path = "../dto", develop = true }
18 | cryptography = ">=41.0.5,<43.0.0"
19 | pyjwt = "^2.8.0"
20 | msal = "^1.24.1"
21 | msal-extensions = "^1.1.0"
22 | pandas = "^2.1.4"
23 | shapely = "^2.0.4"
24 | geojson = "^3.1.0"
25 | validators = "^0.28.3"
26 | lark="^1.2.2"
27 | pyarrow = "^18.1.0"
28 |
29 | [build-system]
30 | requires = ["poetry-core>=1.0.0"]
31 | build-backend = "poetry.core.masonry.api"
--------------------------------------------------------------------------------
/src/sdk/tests/test_sdk/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/src/sdk/tests/test_sdk/__init__.py
--------------------------------------------------------------------------------
/src/sdk/tests/test_sdk/conftest.py:
--------------------------------------------------------------------------------
1 | from .fixtures import * # noqa: F401, F403
2 |
--------------------------------------------------------------------------------
/src/sdk/tests/test_sdk/fixtures/__init__.py:
--------------------------------------------------------------------------------
1 | from .auth_fixtures import * # noqa: F401, F403
2 | from .dto_fixtures import * # noqa: F401, F403
3 | from .jwt_fixtures import * # noqa: F401, F403
4 | from .odp_http_client_fixtures import * # noqa: F401, F403
5 | from .request_fixtures import * # noqa: F401, F403
6 | from .time_fixtures import * # noqa: F401, F403
7 |
--------------------------------------------------------------------------------
/src/sdk/tests/test_sdk/fixtures/auth_fixtures.py:
--------------------------------------------------------------------------------
1 | import json
2 | import random
3 | import time
4 | from typing import Callable
5 |
6 | import jwt
7 | import pytest
8 | import responses
9 | from cryptography.hazmat.primitives.asymmetric import rsa
10 | from odp.client.auth import AzureTokenProvider, OdpWorkspaceTokenProvider
11 | from pydantic import SecretStr
12 |
13 | __all__ = [
14 | "odp_workspace_token_provider",
15 | "azure_token_provider",
16 | "mock_token_response_body",
17 | "mock_token_response_callback",
18 | ]
19 |
20 | ALGORITHM = "RS256"
21 | PUBLIC_KEY_ID = "sample-key-id"
22 |
23 | MOCK_SIDECAR_URL = "http://token_endpoint.local"
24 | MOCK_CLIENT_ID = SecretStr("foo")
25 | MOCK_CLIENT_SECRET = SecretStr("bar")
26 | MOCK_TOKEN_URI = "http://token_uri.local"
27 | MOCK_ISSUER = "http://issuer.local"
28 | MOCK_AUDIENCE = "audience"
29 |
30 |
31 | @pytest.fixture()
32 | def odp_workspace_token_provider() -> OdpWorkspaceTokenProvider:
33 | with responses.RequestsMock() as rsps:
34 | rsps.add(
35 | responses.POST,
36 | MOCK_SIDECAR_URL,
37 | json={
38 | "token": "Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6ImVTMEtuOHRWNkpweHVnVGRXWVJTX2x5VlBpTFBPRHhxNmxjNlI0clE4NmsifQ.eyJzdWIiOiIwMDAwMDAwMC0wMDAwLTAwMDAtMDAwMC0wMDAwNDk5NjAyZDIiLCJuYW1lIjoiSm9obiBEb2UiLCJpYXQiOjE1MTYyMzkwMjJ9.tky9z3_WE0YSbg7mXUq-Wl9b0Xo_Hrd6nVVHfRGSHNI" # noqa: E501
39 | }, # noqa: E501
40 | )
41 |
42 | yield OdpWorkspaceTokenProvider(token_uri=MOCK_SIDECAR_URL)
43 |
44 |
45 | def encode_token(payload: dict, private_key: rsa.RSAPrivateKey) -> str:
46 | return jwt.encode(
47 | payload=payload,
48 | key=private_key, # The private key created in the previous step
49 | algorithm=ALGORITHM,
50 | headers={
51 | "kid": PUBLIC_KEY_ID,
52 | },
53 | )
54 |
55 |
56 | @pytest.fixture()
57 | def mock_token_response_callback(rsa_private_key) -> Callable[[], str]:
58 | def _cb():
59 | t = int(time.time())
60 | claims = {
61 | "sub": "123",
62 | "iss": MOCK_ISSUER,
63 | "aud": MOCK_AUDIENCE,
64 | "iat": t,
65 | "exp": t + 3600,
66 | "nonce": random.randint(0, 1000000),
67 | }
68 |
69 | token = encode_token(claims, rsa_private_key)
70 |
71 | return json.dumps(
72 | {
73 | "access_token": token,
74 | }
75 | )
76 |
77 | return _cb
78 |
79 |
80 | @pytest.fixture()
81 | def mock_token_response_body(mock_token_response_callback: Callable[[], str]) -> str:
82 | return mock_token_response_callback()
83 |
84 |
85 | @pytest.fixture()
86 | def azure_token_provider() -> AzureTokenProvider:
87 | return AzureTokenProvider(
88 | client_id=MOCK_CLIENT_ID,
89 | client_secret=MOCK_CLIENT_SECRET,
90 | token_uri=MOCK_TOKEN_URI,
91 | )
92 |
--------------------------------------------------------------------------------
/src/sdk/tests/test_sdk/fixtures/dto_fixtures.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | from uuid import uuid4
3 |
4 | import pytest
5 | from odp.client.dto.table_spec import TableSpec
6 | from odp.client.dto.tabular_store import TableStage
7 | from odp.dto import DatasetDto, DatasetSpec, Metadata
8 |
9 | __all__ = [
10 | "raw_resource_dto",
11 | "tabular_resource_dto",
12 | "table_spec",
13 | "table_stage",
14 | ]
15 |
16 | from odp.dto.common.contact_info import ContactInfo
17 |
18 |
19 | @pytest.fixture()
20 | def raw_resource_dto() -> DatasetDto:
21 | name = "test_dataset"
22 | uuid = uuid4()
23 | return DatasetDto(
24 | metadata=Metadata(name=name, uuid=uuid),
25 | spec=DatasetSpec(
26 | storage_class="registry.hubocean.io/storageClass/raw",
27 | maintainer=ContactInfo(
28 | organisation="HUB Ocean", contact="Name McNameson "
29 | ),
30 | documentation=["https://oceandata.earth"],
31 | tags={"test", "hubocean"},
32 | ),
33 | )
34 |
35 |
36 | @pytest.fixture()
37 | def tabular_resource_dto() -> DatasetDto:
38 | name = "test_dataset"
39 | uuid = uuid4()
40 |
41 | return DatasetDto(
42 | metadata=Metadata(name=name, uuid=uuid),
43 | spec=DatasetSpec(
44 | storage_class="registry.hubocean.io/storageClass/tabular",
45 | maintainer=ContactInfo(
46 | organisation="HUB Ocean", contact="Name McNameson "
47 | ),
48 | documentation=["https://oceandata.earth"],
49 | tags={"test", "hubocean"},
50 | ),
51 | )
52 |
53 |
54 | @pytest.fixture()
55 | def table_spec():
56 | table_schema = {
57 | "CatalogNumber": {"type": "long"},
58 | "Location": {"type": "geometry"},
59 | }
60 |
61 | return TableSpec(table_schema=table_schema)
62 |
63 |
64 | @pytest.fixture()
65 | def table_stage():
66 | return TableStage(
67 | stage_id=uuid4(), status="active", created_time=datetime.datetime.now(), expiry_time=datetime.MAXYEAR
68 | )
69 |
--------------------------------------------------------------------------------
/src/sdk/tests/test_sdk/fixtures/jwt_fixtures.py:
--------------------------------------------------------------------------------
1 | import json
2 | import random
3 | import time
4 | from typing import Union
5 |
6 | import jwt
7 | import pytest
8 | import requests
9 | import responses
10 | from cryptography.hazmat.primitives.asymmetric import rsa
11 | from jwt.utils import to_base64url_uint
12 | from odp.client.auth import JwtTokenProvider
13 |
14 | __all__ = [
15 | "rsa_public_private_key_pair",
16 | "rsa_public_key",
17 | "rsa_private_key",
18 | "jwt_response",
19 | "auth_response",
20 | "jwt_token_provider",
21 | ]
22 |
23 | ALGORITHM = "RS256"
24 | PUBLIC_KEY_ID = "sample-key-id"
25 |
26 | MOCK_TOKEN_ENDPOINT = "http://token_endpoint.local"
27 | MOCK_JWKS_ENDPOINT = "http://jwks_endpoint.local"
28 | MOCK_ISSUER = "http://issuer.local"
29 | MOCK_SCOPE = ["scope1"]
30 | MOCK_AUDIENCE = "audience"
31 |
32 |
33 | class MockTokenProvider(JwtTokenProvider):
34 | audience: str = MOCK_AUDIENCE
35 | """IDP token audience"""
36 |
37 | scope: list[str] = MOCK_SCOPE
38 | """IDP token scope"""
39 |
40 | def get_jwks_uri(self) -> str:
41 | return MOCK_JWKS_ENDPOINT
42 |
43 | def authenticate(self) -> dict[str, str]:
44 | res = requests.post(
45 | MOCK_TOKEN_ENDPOINT,
46 | data={
47 | "grant_type": "client_credentials",
48 | "client_id": "foo",
49 | "client_secret": "bar",
50 | "audience": self.audience,
51 | "scope": " ".join(self.scope),
52 | },
53 | )
54 |
55 | res.raise_for_status()
56 | return res.json()
57 |
58 |
59 | @pytest.fixture(scope="session")
60 | def rsa_public_private_key_pair() -> tuple[rsa.RSAPublicKey, rsa.RSAPrivateKey]:
61 | private_key = rsa.generate_private_key(public_exponent=65537, key_size=2048)
62 | public_key = private_key.public_key()
63 | return public_key, private_key
64 |
65 |
66 | @pytest.fixture(scope="session")
67 | def rsa_public_key(rsa_public_private_key_pair) -> rsa.RSAPublicKey:
68 | public_key, _ = rsa_public_private_key_pair
69 | return public_key
70 |
71 |
72 | @pytest.fixture(scope="session")
73 | def rsa_private_key(rsa_public_private_key_pair) -> rsa.RSAPrivateKey:
74 | _, private_key = rsa_public_private_key_pair
75 | return private_key
76 |
77 |
78 | def jwt_response(mock, rsa_public_key: rsa.RSAPublicKey):
79 | mock.add(
80 | responses.GET,
81 | MOCK_JWKS_ENDPOINT,
82 | json={
83 | "keys": [
84 | {
85 | "kty": "RSA",
86 | "use": "sig",
87 | "kid": PUBLIC_KEY_ID,
88 | "n": to_base64url_uint(rsa_public_key.public_numbers().n).decode("utf-8"),
89 | "e": to_base64url_uint(rsa_public_key.public_numbers().e).decode("utf-8"),
90 | "issuer": MOCK_ISSUER,
91 | }
92 | ]
93 | },
94 | )
95 |
96 |
97 | def auth_response(mock, rsa_private_key: rsa.RSAPrivateKey):
98 | def token_callback(request: requests.Request) -> tuple[int, dict, Union[str, bytes]]:
99 | t = int(time.time())
100 | claims = {
101 | "sub": "123",
102 | "iss": MOCK_ISSUER,
103 | "aud": MOCK_AUDIENCE,
104 | "iat": t,
105 | "exp": t + 3600,
106 | "nonce": random.randint(0, 1000000),
107 | }
108 |
109 | token = encode_token(claims, rsa_private_key)
110 | return (
111 | 200,
112 | {},
113 | json.dumps(
114 | {
115 | "access_token": token,
116 | }
117 | ),
118 | )
119 |
120 | mock.add_callback(responses.POST, MOCK_TOKEN_ENDPOINT, callback=token_callback, content_type="application/json")
121 |
122 |
123 | def encode_token(payload: dict, private_key: rsa.RSAPrivateKey) -> str:
124 | return jwt.encode(
125 | payload=payload,
126 | key=private_key, # The private key created in the previous step
127 | algorithm=ALGORITHM,
128 | headers={
129 | "kid": PUBLIC_KEY_ID,
130 | },
131 | )
132 |
133 |
134 | @pytest.fixture()
135 | def jwt_token_provider(
136 | request_mock: responses.RequestsMock,
137 | rsa_public_key: rsa.RSAPublicKey,
138 | rsa_private_key: rsa.RSAPrivateKey,
139 | ) -> JwtTokenProvider:
140 | auth_response(request_mock, rsa_private_key)
141 | jwt_response(request_mock, rsa_public_key)
142 |
143 | yield MockTokenProvider()
144 |
--------------------------------------------------------------------------------
/src/sdk/tests/test_sdk/fixtures/odp_http_client_fixtures.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from odp.client.auth import TokenProvider
3 | from odp.client.http_client import OdpHttpClient
4 |
5 | __all__ = [
6 | "mock_odp_endpoint",
7 | "http_client",
8 | ]
9 |
10 |
11 | @pytest.fixture(scope="session")
12 | def mock_odp_endpoint() -> str:
13 | return "http://odp.local"
14 |
15 |
16 | @pytest.fixture
17 | def http_client(mock_odp_endpoint: str, jwt_token_provider: TokenProvider) -> OdpHttpClient:
18 | return OdpHttpClient(base_url=mock_odp_endpoint, token_provider=jwt_token_provider)
19 |
--------------------------------------------------------------------------------
/src/sdk/tests/test_sdk/fixtures/request_fixtures.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import responses
3 |
4 |
5 | @pytest.fixture
6 | def request_mock() -> responses.RequestsMock:
7 | with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps:
8 | yield rsps
9 |
--------------------------------------------------------------------------------
/src/sdk/tests/test_sdk/fixtures/time_fixtures.py:
--------------------------------------------------------------------------------
1 | import time
2 | from unittest.mock import patch
3 |
4 | import pytest
5 |
6 |
7 | @pytest.fixture(autouse=True)
8 | def mock_sleep(request: pytest.FixtureRequest):
9 | if request.node.get_closest_marker("mock_sleep"):
10 | with patch.object(time, "sleep", lambda x: None):
11 | yield
12 | else:
13 | yield
14 |
15 |
16 | class MockTime:
17 | def __init__(self, use_time: float):
18 | self.use_time = use_time
19 |
20 | def get_time(self) -> float:
21 | return self.use_time
22 |
23 | def __enter__(self):
24 | self.patcher = patch.object(time, "time", lambda: self.use_time)
25 | self.patcher.start()
26 |
27 | def __exit__(self, exc_type, exc_val, exc_tb):
28 | self.patcher.stop()
29 |
30 | def advance(self, seconds: float):
31 | self.use_time += seconds
32 |
33 |
34 | @pytest.fixture(autouse=True)
35 | def mock_time(request: pytest.FixtureRequest):
36 | if marker := request.node.get_closest_marker("mock_time"):
37 | use_time = marker.kwargs.get("use_time", 1560926388)
38 | mock_timer = MockTime(use_time)
39 |
40 | with mock_timer:
41 | yield mock_timer
42 | else:
43 | yield None
44 |
--------------------------------------------------------------------------------
/src/sdk/tests/test_sdk/test_auth/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/src/sdk/tests/test_sdk/test_auth/__init__.py
--------------------------------------------------------------------------------
/src/sdk/tests/test_sdk/test_auth/test_azure_token_provider.py:
--------------------------------------------------------------------------------
1 | from typing import Callable
2 |
3 | import pytest
4 | import responses
5 | from odp.client.auth import AzureTokenProvider
6 |
7 |
8 | def test_get_token(azure_token_provider: AzureTokenProvider, mock_token_response_body: str):
9 | with responses.RequestsMock() as rsps:
10 | rsps.add(
11 | responses.POST,
12 | azure_token_provider.token_uri,
13 | body=mock_token_response_body,
14 | )
15 | access_token = azure_token_provider.get_token()
16 |
17 | assert rsps.assert_call_count(azure_token_provider.token_uri, 1)
18 | assert access_token
19 |
20 |
21 | def test_get_token_reuse(azure_token_provider: AzureTokenProvider, mock_token_response_body: str):
22 | with responses.RequestsMock() as rsps:
23 | rsps.add(
24 | responses.POST,
25 | azure_token_provider.token_uri,
26 | body=mock_token_response_body,
27 | )
28 | access_token = azure_token_provider.get_token()
29 |
30 | assert rsps.assert_call_count(azure_token_provider.token_uri, 1)
31 | assert access_token
32 |
33 | new_access_token = azure_token_provider.get_token()
34 |
35 | assert rsps.assert_call_count(azure_token_provider.token_uri, 1)
36 | assert access_token == new_access_token
37 |
38 |
39 | @pytest.mark.mock_time(use_time=123)
40 | def test_get_token_renew(
41 | azure_token_provider: AzureTokenProvider, mock_token_response_callback: Callable[[], str], mock_time
42 | ):
43 | with responses.RequestsMock() as rsps:
44 | rsps.add_callback(
45 | responses.POST,
46 | azure_token_provider.token_uri,
47 | callback=lambda _: (200, {}, mock_token_response_callback()),
48 | content_type="application/json",
49 | )
50 |
51 | access_token = azure_token_provider.get_token()
52 | assert access_token
53 | assert rsps.assert_call_count(azure_token_provider.token_uri, 1)
54 |
55 | mock_time.advance(3600)
56 |
57 | new_access_token = azure_token_provider.get_token()
58 | assert rsps.assert_call_count(azure_token_provider.token_uri, 2)
59 | assert new_access_token
60 | assert new_access_token != access_token
61 |
62 |
63 | @pytest.mark.mock_time(use_time=123)
64 | def test_get_token_renew_before_leeway(
65 | azure_token_provider: AzureTokenProvider, mock_token_response_callback: Callable[[], str], mock_time
66 | ):
67 | with responses.RequestsMock() as rsps:
68 | rsps.add_callback(
69 | responses.POST,
70 | azure_token_provider.token_uri,
71 | callback=lambda _: (200, {}, mock_token_response_callback()),
72 | content_type="application/json",
73 | )
74 |
75 | access_token = azure_token_provider.get_token()
76 | assert access_token
77 | assert rsps.assert_call_count(azure_token_provider.token_uri, 1)
78 |
79 | mock_time.advance(3600 - (azure_token_provider.token_exp_lee_way + 1))
80 |
81 | new_access_token = azure_token_provider.get_token()
82 |
83 | assert rsps.assert_call_count(azure_token_provider.token_uri, 1)
84 | assert new_access_token == access_token
85 |
86 |
87 | @pytest.mark.mock_time(use_time=123)
88 | def test_get_token_renew_after_leeway(
89 | azure_token_provider: AzureTokenProvider, mock_token_response_callback: Callable[[], str], mock_time
90 | ):
91 | with responses.RequestsMock() as rsps:
92 | rsps.add_callback(
93 | responses.POST,
94 | azure_token_provider.token_uri,
95 | callback=lambda _: (200, {}, mock_token_response_callback()),
96 | content_type="application/json",
97 | )
98 |
99 | access_token = azure_token_provider.get_token()
100 | assert access_token
101 | assert rsps.assert_call_count(azure_token_provider.token_uri, 1)
102 |
103 | mock_time.advance(3600 - (azure_token_provider.token_exp_lee_way - 1))
104 |
105 | new_access_token = azure_token_provider.get_token()
106 | assert rsps.assert_call_count(azure_token_provider.token_uri, 2)
107 | assert new_access_token
108 | assert new_access_token != access_token
109 |
--------------------------------------------------------------------------------
/src/sdk/tests/test_sdk/test_auth/test_get_default_token_provider.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from odp.client.auth import (
3 | AzureTokenProvider,
4 | HardcodedTokenProvider,
5 | InteractiveTokenProvider,
6 | OdpWorkspaceTokenProvider,
7 | get_default_token_provider,
8 | )
9 | from odp.client.exc import OdpAuthError
10 | from odp.client.utils import get_version
11 |
12 |
13 | @pytest.fixture(scope="function")
14 | def clean_env(monkeypatch):
15 | """Clean environment variables for each test. Some environment variables have priority over others while choosing
16 | the authentication method so all of them need to be cleaned before the relevant ones are set in tests."""
17 | monkeypatch.delenv("ODP_ACCESS_TOKEN", raising=False)
18 | monkeypatch.delenv("JUPYTERHUB_API_TOKEN", raising=False)
19 | monkeypatch.delenv("ODP_CLIENT_SECRET", raising=False)
20 |
21 |
22 | def test_interactive_auth():
23 | auth = get_default_token_provider()
24 | assert isinstance(auth, InteractiveTokenProvider)
25 | assert auth.user_agent == f"odp-sdk/{get_version()} (Interactive)"
26 |
27 |
28 | def test_hardcoded_auth(monkeypatch):
29 | monkeypatch.setenv("ODP_ACCESS_TOKEN", "Test")
30 | auth = get_default_token_provider()
31 | assert isinstance(auth, HardcodedTokenProvider)
32 | assert auth.user_agent == f"odp-sdk/{get_version()} (Hardcoded)"
33 |
34 |
35 | def test_workspace_auth(monkeypatch):
36 | monkeypatch.setenv("JUPYTERHUB_API_TOKEN", "Test")
37 | auth = get_default_token_provider()
38 | assert isinstance(auth, OdpWorkspaceTokenProvider)
39 | assert auth.user_agent == f"odp-sdk/{get_version()} (Workspaces)"
40 |
41 |
42 | def test_azure_auth(monkeypatch):
43 | monkeypatch.setenv("ODP_CLIENT_SECRET", "Test")
44 | auth = get_default_token_provider()
45 | assert isinstance(auth, AzureTokenProvider)
46 | assert auth.user_agent == f"odp-sdk/{get_version()} (Azure)"
47 |
48 |
49 | def test_auth_error():
50 | with pytest.raises(OdpAuthError):
51 | get_default_token_provider(fallback=False)
52 |
--------------------------------------------------------------------------------
/src/sdk/tests/test_sdk/test_auth/test_jwks_token_provider.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import responses
3 | from odp.client.auth import JwtTokenProvider
4 | from test_sdk.fixtures.jwt_fixtures import MOCK_TOKEN_ENDPOINT
5 |
6 |
7 | def test_authenticate(jwt_token_provider: JwtTokenProvider):
8 | access_token = jwt_token_provider.authenticate()
9 | assert access_token
10 |
11 | new_access_token = jwt_token_provider.authenticate()
12 | assert access_token != new_access_token
13 |
14 |
15 | def test_get_token_novalidate(jwt_token_provider: JwtTokenProvider):
16 | expected_prefix = "Bearer "
17 | jwt_token_provider.validate_token = False
18 |
19 | access_token = jwt_token_provider.get_token()
20 |
21 | assert access_token.startswith(expected_prefix)
22 |
23 | # The token should be cached and reused
24 | new_access_token = jwt_token_provider.get_token()
25 |
26 | assert new_access_token.startswith(expected_prefix)
27 | assert access_token == new_access_token
28 |
29 |
30 | def test_get_token_validate(jwt_token_provider: JwtTokenProvider):
31 | expected_prefix = "Bearer "
32 |
33 | jwt_token_provider.validate_token = True
34 |
35 | access_token = jwt_token_provider.get_token()
36 |
37 | assert access_token.startswith(expected_prefix)
38 |
39 | # The token should be cached and reused
40 | new_access_token = jwt_token_provider.get_token()
41 |
42 | assert new_access_token.startswith(expected_prefix)
43 | assert access_token == new_access_token
44 |
45 |
46 | @pytest.mark.mock_time(use_time=123)
47 | def test_renew_token(jwt_token_provider: JwtTokenProvider, request_mock: responses.RequestsMock, mock_time):
48 | responses.assert_call_count(MOCK_TOKEN_ENDPOINT, 0)
49 |
50 | access_token = jwt_token_provider.get_token()
51 | assert access_token
52 | request_mock.assert_call_count(MOCK_TOKEN_ENDPOINT, 1)
53 |
54 | new_access_token = jwt_token_provider.get_token()
55 | assert access_token == new_access_token
56 | request_mock.assert_call_count(MOCK_TOKEN_ENDPOINT, 1)
57 |
58 | mock_time.advance(3600)
59 |
60 | new_access_token = jwt_token_provider.get_token()
61 | assert access_token != new_access_token
62 | request_mock.assert_call_count(MOCK_TOKEN_ENDPOINT, 2)
63 |
64 |
65 | @pytest.mark.mock_time(use_time=123)
66 | def test_renew_token_before_leeway(
67 | jwt_token_provider: JwtTokenProvider, request_mock: responses.RequestsMock, mock_time
68 | ):
69 | responses.assert_call_count(MOCK_TOKEN_ENDPOINT, 0)
70 |
71 | access_token = jwt_token_provider.get_token()
72 | assert access_token
73 | request_mock.assert_call_count(MOCK_TOKEN_ENDPOINT, 1)
74 |
75 | mock_time.advance(3600 - (jwt_token_provider.token_exp_lee_way + 1))
76 |
77 | new_access_token = jwt_token_provider.get_token()
78 | assert access_token == new_access_token
79 | request_mock.assert_call_count(MOCK_TOKEN_ENDPOINT, 1)
80 |
81 |
82 | @pytest.mark.mock_time(use_time=123)
83 | def test_renew_token_after_leeway(
84 | jwt_token_provider: JwtTokenProvider, request_mock: responses.RequestsMock, mock_time
85 | ):
86 | responses.assert_call_count(MOCK_TOKEN_ENDPOINT, 0)
87 |
88 | access_token = jwt_token_provider.get_token()
89 | assert request_mock.assert_call_count(MOCK_TOKEN_ENDPOINT, 1)
90 | assert access_token
91 |
92 | mock_time.advance(3600 - (jwt_token_provider.token_exp_lee_way - 1))
93 |
94 | new_access_token = jwt_token_provider.get_token()
95 | assert request_mock.assert_call_count(MOCK_TOKEN_ENDPOINT, 2)
96 | assert new_access_token
97 | assert access_token != new_access_token
98 |
--------------------------------------------------------------------------------
/src/sdk/tests/test_sdk/test_auth/test_odp_workspace_token_provider.py:
--------------------------------------------------------------------------------
1 | from odp.client.auth import OdpWorkspaceTokenProvider
2 |
3 |
4 | def test_get_token(odp_workspace_token_provider: OdpWorkspaceTokenProvider):
5 | access_token = odp_workspace_token_provider.get_token()
6 |
7 | assert access_token
8 | assert access_token.startswith("Bearer")
9 |
--------------------------------------------------------------------------------
/src/sdk/tests/test_sdk/test_http_client.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import responses
3 | from odp.client.auth import TokenProvider
4 | from odp.client.http_client import OdpHttpClient
5 | from test_sdk.fixtures.jwt_fixtures import MOCK_TOKEN_ENDPOINT
6 |
7 |
8 | def test_request_relative(http_client: OdpHttpClient, request_mock: responses.RequestsMock):
9 | request_mock.add(responses.GET, f"{http_client.base_url}/foobar", status=200)
10 |
11 | res = http_client.get("/foobar")
12 | res.raise_for_status()
13 |
14 | assert res.status_code == 200
15 |
16 |
17 | def test_request_absolute(http_client: OdpHttpClient, request_mock: responses.RequestsMock):
18 | test_url = "http://someurl.local"
19 |
20 | assert test_url != http_client.base_url
21 |
22 | request_mock.add(responses.GET, test_url, status=200)
23 |
24 | res = http_client.get(test_url)
25 | res.raise_for_status()
26 |
27 | assert res.status_code == 200
28 |
29 |
30 | def test_request_has_auth_token(http_client: OdpHttpClient, request_mock: responses.RequestsMock):
31 | def _on_request(request):
32 | assert "Authorization" in request.headers
33 |
34 | auth_header = request.headers["Authorization"]
35 | assert auth_header is not None
36 | assert auth_header.startswith("Bearer ")
37 |
38 | return (200, {}, None)
39 |
40 | request_mock.add_callback(
41 | responses.GET,
42 | f"{http_client.base_url}/foobar",
43 | callback=_on_request,
44 | )
45 |
46 | http_client.get("/foobar")
47 |
48 |
49 | def test_request_reuse_auth_token(http_client: OdpHttpClient, request_mock: responses.RequestsMock):
50 | request_mock.add(responses.GET, f"{http_client.base_url}/foobar", status=200)
51 |
52 | res = http_client.get("/foobar")
53 | res.raise_for_status()
54 |
55 | assert res.status_code == 200
56 | assert request_mock.assert_call_count(MOCK_TOKEN_ENDPOINT, 1)
57 |
58 | res = http_client.get("/foobar")
59 | res.raise_for_status()
60 |
61 | assert res.status_code == 200
62 | assert request_mock.assert_call_count(MOCK_TOKEN_ENDPOINT, 1)
63 |
64 |
65 | @pytest.mark.mock_time(use_time=123)
66 | def test_request_renew_auth_token(http_client: OdpHttpClient, request_mock: responses.RequestsMock, mock_time):
67 | request_mock.add(responses.GET, f"{http_client.base_url}/foobar", status=200)
68 |
69 | res = http_client.get("/foobar")
70 | res.raise_for_status()
71 |
72 | assert res.status_code == 200
73 | assert request_mock.assert_call_count(MOCK_TOKEN_ENDPOINT, 1)
74 |
75 | mock_time.advance(3600)
76 |
77 | res = http_client.get("/foobar")
78 | res.raise_for_status()
79 |
80 | assert res.status_code == 200
81 | assert request_mock.assert_call_count(MOCK_TOKEN_ENDPOINT, 2)
82 |
83 |
84 | def test_custom_user_agent(http_client: OdpHttpClient, request_mock: responses.RequestsMock):
85 | custom_user_agent = "my-custom-user-agent"
86 |
87 | http_client.custom_user_agent = custom_user_agent
88 |
89 | test_url = "http://someurl.local"
90 |
91 | assert test_url != http_client.base_url
92 |
93 | request_mock.add(responses.GET, test_url, status=200)
94 |
95 | res = http_client.get(test_url)
96 | res.raise_for_status()
97 |
98 | assert res.status_code == 200
99 |
100 | assert request_mock.calls[1].request.headers["User-Agent"] == custom_user_agent
101 |
102 |
103 | @pytest.mark.parametrize(
104 | "url, expected",
105 | [
106 | ("http://localhost:8888", True),
107 | ("localhost:8888", False),
108 | ("foo.bar", False),
109 | ("https://foo.bar.com", True),
110 | ("not a valid url", False),
111 | ],
112 | )
113 | def test_http_client_url(jwt_token_provider: TokenProvider, url: str, expected: bool):
114 | try:
115 | http_client = OdpHttpClient(base_url=url, token_provider=jwt_token_provider)
116 | assert http_client.base_url == url and expected
117 | except ValueError:
118 | assert not expected
119 |
--------------------------------------------------------------------------------
/src/sdk/tests/test_sdk/test_raw_storage_client.py:
--------------------------------------------------------------------------------
1 | import json
2 | import uuid
3 | from datetime import datetime
4 | from pathlib import Path
5 |
6 | import pytest
7 | import responses
8 | from odp.client.dto.file_dto import FileMetadataDto
9 | from odp.client.exc import OdpFileNotFoundError
10 | from odp.client.http_client import OdpHttpClient
11 | from odp.client.raw_storage_client import OdpRawStorageClient
12 | from odp.dto import DatasetDto
13 |
14 |
15 | @pytest.fixture()
16 | def raw_storage_client(http_client: OdpHttpClient) -> OdpRawStorageClient:
17 | return OdpRawStorageClient(http_client=http_client, raw_storage_endpoint="/data")
18 |
19 |
20 | def test_get_file_metadata_success(
21 | raw_storage_client: OdpRawStorageClient, raw_resource_dto: DatasetDto, request_mock: responses.RequestsMock
22 | ):
23 | rand_uuid = uuid.uuid4()
24 | time_now = datetime.now()
25 | file_meta = FileMetadataDto(
26 | name="file.zip",
27 | mime_type="application/zip",
28 | dataset=rand_uuid,
29 | metadata={"name": "sdk-raw-example"},
30 | geo_location="Somewhere",
31 | size_bytes=123456789,
32 | checksum="asdf",
33 | created_time=time_now,
34 | modified_time=time_now,
35 | deleted_time=time_now,
36 | )
37 |
38 | request_mock.add(
39 | responses.GET,
40 | f"{raw_storage_client.raw_storage_url}/{raw_resource_dto.metadata.uuid}/{file_meta.name}/metadata",
41 | body=file_meta.model_dump_json(),
42 | status=200,
43 | content_type="application/json",
44 | )
45 |
46 | result = raw_storage_client.get_file_metadata(raw_resource_dto, file_meta)
47 |
48 | assert result.name == "file.zip"
49 | assert result.mime_type == "application/zip"
50 | assert result.dataset == rand_uuid
51 | assert result.metadata == {"name": "sdk-raw-example"}
52 | assert result.geo_location == "Somewhere"
53 | assert result.size_bytes == 123456789
54 | assert result.checksum == "asdf"
55 | assert result.created_time == time_now
56 | assert result.modified_time == time_now
57 | assert result.deleted_time == time_now
58 |
59 |
60 | def test_get_file_metadata_not_found(
61 | raw_storage_client: OdpRawStorageClient,
62 | raw_resource_dto: DatasetDto,
63 | request_mock: responses.RequestsMock,
64 | ):
65 | file_meta = FileMetadataDto(name="file.zip", mime_type="application/zip")
66 |
67 | request_mock.add(
68 | responses.GET,
69 | f"{raw_storage_client.raw_storage_url}/{raw_resource_dto.metadata.uuid}/{file_meta.name}/metadata",
70 | status=404,
71 | )
72 |
73 | with pytest.raises(OdpFileNotFoundError):
74 | raw_storage_client.get_file_metadata(raw_resource_dto, file_meta)
75 |
76 |
77 | def test_list_files_success(
78 | raw_storage_client: OdpRawStorageClient,
79 | raw_resource_dto: DatasetDto,
80 | request_mock: responses.RequestsMock,
81 | ):
82 | file_metadata = FileMetadataDto(name="file.zip", mime_type="application/zip")
83 |
84 | request_mock.add(
85 | responses.POST,
86 | f"{raw_storage_client.raw_storage_url}/{raw_resource_dto.metadata.uuid}/list",
87 | json={
88 | "results": [json.loads(file_metadata.model_dump_json())],
89 | "next": None,
90 | "num_results": 1,
91 | },
92 | status=200,
93 | content_type="application/json",
94 | )
95 |
96 | metadata_filter = {"name": file_metadata.name}
97 |
98 | result = raw_storage_client.list(raw_resource_dto, metadata_filter=metadata_filter)
99 |
100 | first_item = next(iter(result))
101 |
102 | assert first_item.name == file_metadata.name
103 | assert first_item.mime_type == file_metadata.mime_type
104 |
105 |
106 | def test_create_file_success(
107 | raw_storage_client: OdpRawStorageClient,
108 | raw_resource_dto: DatasetDto,
109 | request_mock: responses.RequestsMock,
110 | ):
111 | file_metadata = FileMetadataDto(
112 | name="new_file.txt",
113 | mime_type="text/plain",
114 | )
115 |
116 | request_mock.add(
117 | responses.POST,
118 | f"{raw_storage_client.raw_storage_url}/{raw_resource_dto.metadata.uuid}",
119 | status=200,
120 | json=json.loads(file_metadata.model_dump_json()),
121 | content_type="application/json",
122 | )
123 |
124 | request_mock.add(
125 | responses.GET,
126 | f"{raw_storage_client.raw_storage_url}/{raw_resource_dto.metadata.uuid}/{file_metadata.name}/metadata",
127 | json=json.loads(file_metadata.model_dump_json()),
128 | status=200,
129 | content_type="application/json",
130 | )
131 |
132 | result = raw_storage_client.create_file(raw_resource_dto, file_metadata_dto=file_metadata, contents=None)
133 |
134 | assert result.name == file_metadata.name
135 | assert result.mime_type == "text/plain"
136 |
137 |
138 | def test_download_file_save(
139 | raw_storage_client: OdpRawStorageClient,
140 | raw_resource_dto: DatasetDto,
141 | tmp_path: Path,
142 | request_mock: responses.RequestsMock,
143 | ):
144 | file_data = b"Sample file content"
145 | save_path = tmp_path / "downloaded_file.txt"
146 |
147 | file_metadata = FileMetadataDto(name="test_file.txt", mime_type="text/plain")
148 |
149 | request_mock.add(
150 | responses.GET,
151 | f"{raw_storage_client.raw_storage_url}/{raw_resource_dto.metadata.uuid}/{file_metadata.name}",
152 | body=file_data,
153 | status=200,
154 | )
155 |
156 | raw_storage_client.download_file(raw_resource_dto, file_metadata, save_path=str(save_path))
157 |
158 | with open(save_path, "rb") as file:
159 | saved_data = file.read()
160 |
161 | assert saved_data == file_data
162 |
163 |
164 | def test_delete_file_not_found(
165 | raw_storage_client: OdpRawStorageClient,
166 | raw_resource_dto: DatasetDto,
167 | request_mock: responses.RequestsMock,
168 | ):
169 | file_metadata = FileMetadataDto(name="test_file.txt", mime_type="text/plain")
170 |
171 | request_mock.add(
172 | responses.DELETE,
173 | f"{raw_storage_client.raw_storage_url}/{raw_resource_dto.metadata.uuid}/{file_metadata.name}",
174 | status=404, # Assuming status code 404 indicates file not found
175 | )
176 |
177 | with pytest.raises(OdpFileNotFoundError):
178 | raw_storage_client.delete_file(raw_resource_dto, file_metadata)
179 |
--------------------------------------------------------------------------------
/src/sdk/tests/test_sdk/test_resource_client.py:
--------------------------------------------------------------------------------
1 | import json
2 | from datetime import datetime
3 | from uuid import UUID, uuid4
4 |
5 | import pytest
6 | import responses
7 | from odp.client.resource_client import OdpResourceClient
8 | from odp.dto import Metadata, ResourceDto, ResourceStatus
9 |
10 |
11 | @pytest.fixture()
12 | def resource_client(http_client) -> OdpResourceClient:
13 | return OdpResourceClient(http_client=http_client, resource_endpoint="/foobar")
14 |
15 |
16 | def test_get_resource_by_uuid(
17 | resource_client: OdpResourceClient,
18 | request_mock: responses.RequestsMock,
19 | ):
20 | kind = "test.hubocean.io/tesType"
21 | version = "v1alpha1"
22 | name = "test"
23 | uuid = uuid4()
24 |
25 | request_mock.add(
26 | responses.GET,
27 | f"{resource_client.resource_url}/{uuid}",
28 | body=ResourceDto(
29 | kind=kind,
30 | version=version,
31 | metadata=Metadata(name=name, uuid=uuid),
32 | status=ResourceStatus(
33 | num_updates=0,
34 | created_time=datetime.fromisoformat("2021-01-01T00:00:00+00:00"),
35 | created_by=uuid4(),
36 | updated_time=datetime.fromisoformat("2021-01-01T00:00:00+00:00"),
37 | updated_by=uuid4(),
38 | ),
39 | spec={},
40 | ).model_dump_json(),
41 | status=200,
42 | content_type="application/json",
43 | )
44 |
45 | manifest = resource_client.get(uuid)
46 |
47 | assert manifest.kind == kind
48 | assert manifest.version == version
49 | assert manifest.metadata.name == name
50 |
51 |
52 | def test_get_resource_by_qname(
53 | resource_client: OdpResourceClient,
54 | request_mock: responses.RequestsMock,
55 | ):
56 | kind = "test.hubocean.io/tesType"
57 | version = "v1alpha1"
58 | name = "test"
59 | uuid = uuid4()
60 |
61 | request_mock.add(
62 | responses.GET,
63 | f"{resource_client.resource_url}/{kind}/{name}",
64 | body=ResourceDto(
65 | kind=kind,
66 | version=version,
67 | metadata=Metadata(name=name, uuid=uuid),
68 | status=ResourceStatus(
69 | num_updates=0,
70 | created_time=datetime.fromisoformat("2021-01-01T00:00:00+00:00"),
71 | created_by=uuid4(),
72 | updated_time=datetime.fromisoformat("2021-01-01T00:00:00+00:00"),
73 | updated_by=uuid4(),
74 | ),
75 | spec={},
76 | ).model_dump_json(),
77 | status=200,
78 | content_type="application/json",
79 | )
80 |
81 | manifest = resource_client.get(f"{kind}/{name}")
82 |
83 | assert manifest.kind == kind
84 | assert manifest.version == version
85 | assert manifest.metadata.name == name
86 | assert manifest.metadata.uuid == uuid
87 | assert manifest.metadata.uuid == uuid
88 |
89 |
90 | def test_create_resource(
91 | resource_client: OdpResourceClient,
92 | request_mock: responses.RequestsMock,
93 | ):
94 | def _on_create_request(request):
95 | manifest = json.loads(request.body)
96 |
97 | # Ensure that the status and uuid is not set. If they are set, they must have a null-value
98 | assert manifest.get("status", None) is None
99 | assert manifest["metadata"].get("uuid", None) is None
100 |
101 | t = datetime.now().isoformat()
102 | created_by = str(UUID(int=0))
103 | manifest["metadata"]["uuid"] = str(uuid4())
104 | manifest["metadata"].setdefault("owner", created_by)
105 | manifest["status"] = {
106 | "num_updates": 0,
107 | "created_by": created_by,
108 | "created_time": t,
109 | "updated_by": created_by,
110 | "updated_time": t,
111 | }
112 |
113 | return (201, {}, json.dumps(manifest))
114 |
115 | resource_manifest = ResourceDto(
116 | kind="test.hubocean.io/testType",
117 | version="v1alpha1",
118 | metadata=Metadata(name="foobar"),
119 | spec=dict(),
120 | )
121 |
122 | request_mock.add_callback(
123 | responses.POST,
124 | f"{resource_client.resource_url}",
125 | callback=_on_create_request,
126 | content_type="application/json",
127 | )
128 |
129 | populated_manifest = resource_client.create(resource_manifest)
130 |
131 | assert isinstance(populated_manifest, ResourceDto)
132 | assert populated_manifest.metadata.uuid is not None
133 | assert populated_manifest.status is not None
134 | assert populated_manifest.status.num_updates == 0
135 | assert populated_manifest.kind == resource_manifest.kind
136 | assert populated_manifest.metadata.name == resource_manifest.metadata.name
137 |
--------------------------------------------------------------------------------
/src/sdk/tests/test_sdk/test_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/src/sdk/tests/test_sdk/test_utils/__init__.py
--------------------------------------------------------------------------------
/src/sdk/tests/test_sdk/test_utils/test_dto.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from odp.client.dto.file_dto import FileMetadataDto
3 |
4 |
5 | @pytest.mark.parametrize(
6 | "file_name, correct",
7 | [("test.txt", True), ("foo/bar/test2.txt", True), ("/test.txt", False), ("/foo/bar/test2.txt", False)],
8 | )
9 | def test_file_dto_names(file_name, correct):
10 | if correct:
11 | file_metadata = FileMetadataDto(name=file_name)
12 | assert file_metadata.name == file_name
13 | else:
14 | with pytest.raises(ValueError):
15 | FileMetadataDto(name=file_name)
16 |
--------------------------------------------------------------------------------
/src/sdk/tests/test_sdk/test_utils/test_ndjson.py:
--------------------------------------------------------------------------------
1 | from textwrap import dedent
2 |
3 | from odp.client.utils.ndjson import NdJsonParser
4 |
5 |
6 | def test_parse_ndjson_simple():
7 | test_str = dedent(
8 | """
9 | {"name": "Alice", "age": 30}
10 | {"name": "Bob", "age": 25}
11 | {"name": "Charlie", "age": 35}
12 | """
13 | ).strip()
14 |
15 | ndjson_parser = NdJsonParser(s=test_str)
16 |
17 | parsed_rows = list(iter(ndjson_parser))
18 |
19 | assert isinstance(parsed_rows, list)
20 | assert len(parsed_rows) == 3
21 |
22 | assert parsed_rows[0]["name"] == "Alice"
23 | assert parsed_rows[0]["age"] == 30
24 | assert parsed_rows[1]["name"] == "Bob"
25 | assert parsed_rows[1]["age"] == 25
26 | assert parsed_rows[2]["name"] == "Charlie"
27 | assert parsed_rows[2]["age"] == 35
28 |
29 |
30 | def test_parse_ndjson_binary_simple():
31 | test_str = (
32 | dedent(
33 | """
34 | {"product_id": 1, "name": "Widget", "price": 10.99}
35 | {"product_id": 2, "name": "Gadget", "price": 19.99}
36 | {"product_id": 3, "name": "Tool", "price": 15.49}
37 | """
38 | )
39 | .strip()
40 | .encode("utf-8")
41 | )
42 |
43 | ndjson_parser = NdJsonParser(s=test_str)
44 |
45 | parsed_rows = list(iter(ndjson_parser))
46 |
47 | assert isinstance(parsed_rows, list)
48 | assert len(parsed_rows) == 3
49 |
50 | assert parsed_rows[0]["product_id"] == 1
51 | assert parsed_rows[0]["name"] == "Widget"
52 | assert parsed_rows[0]["price"] == 10.99
53 | assert parsed_rows[1]["product_id"] == 2
54 | assert parsed_rows[1]["name"] == "Gadget"
55 | assert parsed_rows[1]["price"] == 19.99
56 | assert parsed_rows[2]["product_id"] == 3
57 | assert parsed_rows[2]["name"] == "Tool"
58 | assert parsed_rows[2]["price"] == 15.49
59 |
60 |
61 | def test_parse_ndjson_special_characters():
62 | test_str = dedent(
63 | """
64 | {"fruits": ["apple", "banana", "cherry"], "description": "Delicious & healthy 🍏🍌🍒"}
65 | {"colors": ["red", "green", "blue"], "symbols": ["@#$%^&*()_+!"]}
66 | {"languages": ["English", "Español", "Français"], "special_chars": "ñçüëł"}
67 | """
68 | ).strip()
69 |
70 | ndjson_parser = NdJsonParser(s=test_str)
71 | parsed_rows = list(iter(ndjson_parser))
72 |
73 | assert isinstance(parsed_rows, list)
74 | assert len(parsed_rows) == 3
75 |
76 | assert parsed_rows[0]["fruits"] == ["apple", "banana", "cherry"]
77 | assert parsed_rows[0]["description"] == "Delicious & healthy 🍏🍌🍒"
78 | assert parsed_rows[1]["colors"] == ["red", "green", "blue"]
79 | assert parsed_rows[1]["symbols"] == ["@#$%^&*()_+!"]
80 | assert parsed_rows[2]["languages"] == ["English", "Español", "Français"]
81 | assert parsed_rows[2]["special_chars"] == "ñçüëł"
82 |
83 |
84 | def test_parse_ndjson_embedded_json():
85 | test_str = dedent(
86 | """
87 | {"content": "Nested objects: {\\\"key1\\\": \\\"value1\\\", \\\"key2\\\": \\\"value2\\\"}"}
88 | {"config": "{ \\\"param1\\\": [1, 2, 3], \\\"param2\\\": {\\\"a\\\": true, \\\"b\\\": false} }"}
89 | {"formula": "Mathematical expressions: {\\\"equation\\\": \\\"x^2 + y^2 = r^2\\\"}"}
90 | """
91 | ).strip()
92 |
93 | ndjson_parser = NdJsonParser(s=test_str)
94 | parsed_rows = list(iter(ndjson_parser))
95 |
96 | assert isinstance(parsed_rows, list)
97 | assert len(parsed_rows) == 3
98 |
99 | assert parsed_rows[0]["content"] == 'Nested objects: {"key1": "value1", "key2": "value2"}'
100 | assert parsed_rows[1]["config"] == '{ "param1": [1, 2, 3], "param2": {"a": true, "b": false} }'
101 | assert parsed_rows[2]["formula"] == 'Mathematical expressions: {"equation": "x^2 + y^2 = r^2"}'
102 |
103 |
104 | def test_parse_ndjson_wkt_simple():
105 | test_str = (
106 | dedent(
107 | """
108 | {"product_id": 1, "name": "Widget", "geo": "POINT(0 0)"}
109 | {"product_id": 2, "name": "Gadget", "geo": "POINT(0 1)"}
110 | {"product_id": 3, "name": "Tool", "geo": "POINT(0 2)"}
111 | """
112 | )
113 | .strip()
114 | .encode("utf-8")
115 | )
116 |
117 | ndjson_parser = NdJsonParser(s=test_str)
118 |
119 | parsed_rows = list(iter(ndjson_parser))
120 |
121 | assert isinstance(parsed_rows, list)
122 | assert len(parsed_rows) == 3
123 |
124 | assert parsed_rows[0]["product_id"] == 1
125 | assert parsed_rows[0]["name"] == "Widget"
126 | assert parsed_rows[0]["geo"] == "POINT(0 0)"
127 | assert parsed_rows[1]["product_id"] == 2
128 | assert parsed_rows[1]["name"] == "Gadget"
129 | assert parsed_rows[1]["geo"] == "POINT(0 1)"
130 | assert parsed_rows[2]["product_id"] == 3
131 | assert parsed_rows[2]["name"] == "Tool"
132 | assert parsed_rows[2]["geo"] == "POINT(0 2)"
133 |
--------------------------------------------------------------------------------
/src/sdk/tests/test_sdk/test_utils/test_package_utils.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | from odp.client.utils import get_version
4 |
5 |
6 | def test_get_version():
7 | assert re.match(r"^(\d+\.)?(\d+\.)?(\d+)$", get_version())
8 |
--------------------------------------------------------------------------------
/tests/test_examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/tests/test_examples/__init__.py
--------------------------------------------------------------------------------
/tests/test_examples/conftest.py:
--------------------------------------------------------------------------------
1 | import os
2 | import uuid
3 | from typing import Callable, Tuple
4 |
5 | import pytest
6 | from dotenv import load_dotenv
7 | from odp.client import OdpClient
8 | from odp.client.auth import AzureTokenProvider
9 | from odp.client.exc import OdpResourceNotFoundError
10 | from pydantic import SecretStr
11 |
12 |
13 | @pytest.fixture(scope="session")
14 | def dotenv() -> None:
15 | load_dotenv()
16 |
17 |
18 | @pytest.fixture(scope="session")
19 | def token_provider(dotenv) -> AzureTokenProvider:
20 | return AzureTokenProvider(
21 | authority=os.getenv(
22 | "ODCAT_AUTH_AUTHORITY",
23 | "https://oceandataplatform.b2clogin.com/755f6e58-74f0-4a07-a599-f7479b9669ab/v2.0/",
24 | ),
25 | client_id=SecretStr(os.getenv("ODCAT_AUTH_CLIENT_ID")),
26 | client_secret=SecretStr(os.getenv("ODCAT_AUTH_CLIENT_SECRET")),
27 | audience=os.getenv("ODCAT_AUTH_AUDIENCE", "a2e4df44-ed57-4673-8824-548256b92543"),
28 | tenant_id=os.getenv("ODCAT_AUTH_TENANT_ID", "755f6e58-74f0-4a07-a599-f7479b9669ab"),
29 | token_uri=os.getenv(
30 | "ODCAT_AUTH_TOKEN_ENDPOINT",
31 | "https://oceandataplatform.b2clogin.com/oceandataplatform.onmicrosoft.com/b2c_1a_signup_signin_custom/oauth2/v2.0/token", # noqa: E501
32 | ),
33 | jwks_uri=os.getenv(
34 | "ODCAT_AUTH_JWKS_URI",
35 | "https://oceandataplatform.b2clogin.com/oceandataplatform.onmicrosoft.com/b2c_1a_signup_signin_custom/discovery/v2.0/keys", # noqa: E501
36 | ),
37 | scope=[os.getenv("ODCAT_AUTH_SCOPE", "https://oceandataplatform.onmicrosoft.com/odcat/.default")],
38 | )
39 |
40 |
41 | @pytest.fixture(scope="session")
42 | def odp_client(token_provider: AzureTokenProvider) -> OdpClient:
43 | base_url = os.getenv("ODCAT_BASE_URL", "https://api.hubocean.earth")
44 |
45 | return OdpClient(
46 | base_url=base_url,
47 | token_provider=token_provider,
48 | )
49 |
50 |
51 | def delete_element(func: Callable, *args, **kwargs) -> None:
52 | try:
53 | func(*args, **kwargs)
54 | except OdpResourceNotFoundError:
55 | pass
56 |
57 |
58 | @pytest.fixture
59 | def odp_client_test_uuid(odp_client: OdpClient) -> Tuple[OdpClient, uuid.UUID]:
60 | test_uuid = uuid.uuid4()
61 | yield odp_client, test_uuid
62 |
63 | # Clean up
64 | for manifest in odp_client.catalog.list({"#EQUALS": ["$metadata.labels.test_uuid", str(test_uuid)]}):
65 | storage_class = getattr(manifest.spec, "storage_class", "")
66 | if "raw" in storage_class:
67 | for file in odp_client.raw.list(manifest):
68 | delete_element(odp_client.raw.delete_file, manifest, file)
69 | if os.path.exists(os.path.basename(file.name)):
70 | os.remove(os.path.basename(file.name))
71 | if "tabular" in storage_class:
72 | delete_element(odp_client.tabular.delete_schema, manifest, True)
73 | delete_element(odp_client.catalog.delete, manifest.metadata.uuid)
74 |
--------------------------------------------------------------------------------
/tests/test_examples/test_catalog_client_example.py:
--------------------------------------------------------------------------------
1 | import random
2 | import string
3 | from typing import Tuple
4 | from uuid import UUID
5 |
6 | from odp.client import OdpClient
7 | from odp.client.resource_client import OdpResourceClient
8 | from odp.dto import DatasetDto, DatasetSpec, ResourceDto
9 |
10 |
11 | def test_catalog_client(odp_client_test_uuid: Tuple[OdpClient, UUID]):
12 | catalog_client = odp_client_test_uuid[0].catalog
13 | assert isinstance(catalog_client, OdpResourceClient)
14 |
15 | for item in catalog_client.list():
16 | assert isinstance(item, ResourceDto)
17 |
18 | manifest = DatasetDto(
19 | **{
20 | "kind": "catalog.hubocean.io/dataset",
21 | "version": "v1alpha3",
22 | "metadata": {
23 | "name": "".join(random.choices(string.ascii_lowercase + string.digits, k=20)),
24 | "labels": {"test_uuid": odp_client_test_uuid[1]},
25 | },
26 | "spec": {
27 | "storage_controller": "registry.hubocean.io/storageController/storage-tabular",
28 | "storage_class": "registry.hubocean.io/storageClass/tabular",
29 | "maintainer": {"contact": "Just Me "}, # <-- strict syntax here
30 | },
31 | }
32 | )
33 |
34 | manifest = catalog_client.create(manifest)
35 | assert isinstance(manifest.spec, DatasetSpec)
36 |
37 | fetched_manifest = catalog_client.get(manifest.metadata.uuid, tp=DatasetDto)
38 | assert isinstance(fetched_manifest.spec, DatasetSpec)
39 |
--------------------------------------------------------------------------------
/tests/test_examples/test_catalog_oqs_query_example.py:
--------------------------------------------------------------------------------
1 | from odp.client import OdpClient
2 | from odp.dto import ResourceDto
3 |
4 |
5 | def test_catalog_oqs_query(odp_client: OdpClient):
6 | oqs_filter = {
7 | "#EQUALS": [
8 | "$kind",
9 | "catalog.hubocean.io/dataCollection",
10 | ]
11 | }
12 |
13 | for item in odp_client.catalog.list(oqs_filter):
14 | assert isinstance(item, ResourceDto)
15 |
16 | assert odp_client.catalog.list(oqs_filter) != []
17 |
--------------------------------------------------------------------------------
/tests/test_examples/test_observables_example.py:
--------------------------------------------------------------------------------
1 | import random
2 | import string
3 | from typing import Tuple
4 | from uuid import UUID
5 |
6 | from odp.client import OdpClient
7 | from odp.dto import ObservableDto, ObservableSpec
8 |
9 |
10 | def test_observables(odp_client_test_uuid: Tuple[OdpClient, UUID]):
11 | catalog_client = odp_client_test_uuid[0].catalog
12 |
13 | observable_filter = {"#EQUALS": ["$kind", "catalog.hubocean.io/observable"]}
14 |
15 | for item in catalog_client.list(observable_filter):
16 | assert isinstance(item.spec, ObservableSpec)
17 |
18 | observable_manifest = ObservableDto(
19 | **{
20 | "kind": "catalog.hubocean.io/observable",
21 | "version": "v1alpha2",
22 | "metadata": {
23 | "name": "".join(random.choices(string.ascii_lowercase + string.digits, k=20)),
24 | "display_name": "Test Observable for time",
25 | "description": "A test observable for time",
26 | "labels": {"hubocean.io/test": True, "test_uuid": odp_client_test_uuid[1]},
27 | },
28 | "spec": {
29 | "ref": "catalog.hubocean.io/dataset/test-dataset",
30 | "observable_class": "catalog.hubocean.io/observableClass/static-geometric-coverage",
31 | "details": {"value": {"type": "Point", "coordinates": [-73.981200, 40.764950]}, "attribute": "test"},
32 | },
33 | }
34 | )
35 |
36 | observable_manifest = catalog_client.create(observable_manifest)
37 | assert isinstance(observable_manifest.spec, ObservableSpec)
38 |
39 | fetched_manifest = catalog_client.get(observable_manifest.metadata.uuid)
40 | assert isinstance(fetched_manifest.spec, ObservableSpec)
41 |
42 | observable_geometry_filter = {
43 | "#AND": [
44 | {"#EQUALS": ["$kind", "catalog.hubocean.io/observable"]},
45 | {
46 | "#ST_INTERSECTS": [
47 | "$spec.details.value",
48 | {
49 | "type": "Polygon",
50 | "coordinates": [
51 | [
52 | [-73.981200, 40.764950],
53 | [-73.980600, 40.764000],
54 | [-73.979800, 40.764450],
55 | [-73.980400, 40.765400],
56 | [-73.981200, 40.764950],
57 | ]
58 | ],
59 | },
60 | ]
61 | },
62 | ]
63 | }
64 |
65 | for item in catalog_client.list(observable_geometry_filter):
66 | assert isinstance(item.spec, ObservableSpec)
67 | assert [observable for observable in catalog_client.list(observable_geometry_filter)] != []
68 |
69 | static_manifest_small = ObservableDto(
70 | **{
71 | "kind": "catalog.hubocean.io/observable",
72 | "version": "v1alpha2",
73 | "metadata": {
74 | "name": "".join(random.choices(string.ascii_lowercase + string.digits, k=20)),
75 | "display_name": "SDK Example Small Value",
76 | "description": "An observable that emits a small value",
77 | "labels": {"hubocean.io/test": True, "test_uuid": odp_client_test_uuid[1]},
78 | },
79 | "spec": {
80 | "ref": "catalog.hubocean.io/dataset/test-dataset",
81 | "observable_class": "catalog.hubocean.io/observableClass/static-observable",
82 | "details": {"value": 1, "attribute": "test"},
83 | },
84 | }
85 | )
86 |
87 | catalog_client.create(static_manifest_small)
88 |
89 | static_manifest_large = ObservableDto(
90 | **{
91 | "kind": "catalog.hubocean.io/observable",
92 | "version": "v1alpha2",
93 | "metadata": {
94 | "name": "".join(random.choices(string.ascii_lowercase + string.digits, k=20)),
95 | "display_name": "SDK Example Large Value",
96 | "description": "An observable that emits a large value",
97 | "labels": {"hubocean.io/test": True, "test_uuid": odp_client_test_uuid[1]},
98 | },
99 | "spec": {
100 | "ref": "catalog.hubocean.io/dataset/test-dataset",
101 | "observable_class": "catalog.hubocean.io/observableClass/static-observable",
102 | "details": {"value": 3, "attribute": "test"},
103 | },
104 | }
105 | )
106 |
107 | catalog_client.create(static_manifest_large)
108 |
109 | observable_range_filter = {
110 | "#AND": [
111 | {"#WITHIN": ["$spec.observable_class", ["catalog.hubocean.io/observableClass/static-observable"]]},
112 | {"#GREATER_THAN_OR_EQUALS": ["$spec.details.value", 2]},
113 | ]
114 | }
115 |
116 | list_observables = []
117 | for item in catalog_client.list(observable_range_filter):
118 | assert isinstance(item.spec, ObservableSpec)
119 | list_observables.append(item)
120 |
121 | assert list_observables != []
122 |
--------------------------------------------------------------------------------
/tests/test_examples/test_raw_client_example.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 | import string
4 | from typing import Tuple
5 | from uuid import UUID
6 |
7 | import pytest
8 | from odp.client import OdpClient
9 | from odp.client.dto.file_dto import FileMetadataDto
10 | from odp.dto import DatasetDto, DatasetSpec
11 |
12 |
13 | @pytest.mark.parametrize("file_name", ["test.txt", "foo/bar/test2.txt"])
14 | def test_raw_client(odp_client_test_uuid: Tuple[OdpClient, UUID], file_name):
15 | my_dataset = DatasetDto(
16 | **{
17 | "kind": "catalog.hubocean.io/dataset",
18 | "version": "v1alpha3",
19 | "metadata": {
20 | "name": "".join(random.choices(string.ascii_lowercase + string.digits, k=20)),
21 | "labels": {"test_uuid": odp_client_test_uuid[1]},
22 | },
23 | "spec": {
24 | "storage_controller": "registry.hubocean.io/storageController/storage-raw-cdffs",
25 | "storage_class": "registry.hubocean.io/storageClass/raw",
26 | "maintainer": {"contact": "Just Me "}, # <-- strict syntax here
27 | },
28 | }
29 | )
30 |
31 | my_dataset = odp_client_test_uuid[0].catalog.create(my_dataset)
32 | assert isinstance(my_dataset.spec, DatasetSpec)
33 |
34 | file_dto = odp_client_test_uuid[0].raw.create_file(
35 | resource_dto=my_dataset,
36 | file_metadata_dto=FileMetadataDto(**{"name": file_name, "mime_type": "text/plain"}),
37 | contents=b"Hello, World!",
38 | )
39 |
40 | for file in odp_client_test_uuid[0].raw.list(my_dataset):
41 | assert isinstance(file, FileMetadataDto)
42 | assert odp_client_test_uuid[0].raw.list(my_dataset) != []
43 |
44 | save_path = os.path.basename(file_name)
45 | odp_client_test_uuid[0].raw.download_file(my_dataset, file_dto, save_path)
46 | assert os.path.exists(save_path)
47 |
--------------------------------------------------------------------------------
/tests/test_examples/test_tabular_client_example.py:
--------------------------------------------------------------------------------
1 | import random
2 | import string
3 | from typing import Tuple
4 | from uuid import UUID
5 |
6 | from odp.client import OdpClient
7 | from odp.client.dto.table_spec import TableSpec
8 | from odp.client.exc import OdpResourceNotFoundError
9 | from odp.dto import DatasetDto, DatasetSpec
10 |
11 |
12 | def test_tabular_client(odp_client_test_uuid: Tuple[OdpClient, UUID]):
13 | my_dataset = DatasetDto(
14 | **{
15 | "kind": "catalog.hubocean.io/dataset",
16 | "version": "v1alpha3",
17 | "metadata": {
18 | "name": "".join(random.choices(string.ascii_lowercase + string.digits, k=20)),
19 | "labels": {"test_uuid": odp_client_test_uuid[1]},
20 | },
21 | "spec": {
22 | "storage_controller": "registry.hubocean.io/storageController/storage-tabular",
23 | "storage_class": "registry.hubocean.io/storageClass/tabular",
24 | "maintainer": {"contact": "Just Me "},
25 | },
26 | }
27 | )
28 |
29 | my_dataset = odp_client_test_uuid[0].catalog.create(my_dataset)
30 | assert isinstance(my_dataset.spec, DatasetSpec)
31 |
32 | table_schema = {"Data": {"type": "string"}}
33 | my_table_spec = TableSpec(table_schema=table_schema)
34 |
35 | mt_table_spec = odp_client_test_uuid[0].tabular.create_schema(resource_dto=my_dataset, table_spec=my_table_spec)
36 | assert isinstance(mt_table_spec, TableSpec)
37 |
38 | test_data = [{"Data": "Test"}, {"Data": "Test1"}]
39 | odp_client_test_uuid[0].tabular.write(resource_dto=my_dataset, data=test_data)
40 |
41 | our_data = odp_client_test_uuid[0].tabular.select_as_list(my_dataset)
42 | assert len(our_data) == 2
43 |
44 | our_data = list(odp_client_test_uuid[0].tabular.select_as_stream(my_dataset))
45 | assert len(our_data) == 2
46 |
47 | update_filters = {"#EQUALS": ["$Data", "Test"]}
48 | new_data = [{"Data": "Test Updated"}]
49 | odp_client_test_uuid[0].tabular.update(
50 | resource_dto=my_dataset,
51 | data=new_data,
52 | filter_query=update_filters,
53 | )
54 |
55 | result = odp_client_test_uuid[0].tabular.select_as_list(my_dataset)
56 | assert len(result) == 2
57 |
58 | delete_filters = {"#EQUALS": ["$Data", "Test1"]}
59 | odp_client_test_uuid[0].tabular.delete(resource_dto=my_dataset, filter_query=delete_filters)
60 | result = odp_client_test_uuid[0].tabular.select_as_list(my_dataset)
61 | assert len(result) == 1
62 |
63 | odp_client_test_uuid[0].tabular.delete_schema(my_dataset)
64 |
65 | try:
66 | odp_client_test_uuid[0].tabular.get_schema(my_dataset)
67 | except OdpResourceNotFoundError as e:
68 | print("Schema not found error since it is deleted")
69 | print(e)
70 |
71 | odp_client_test_uuid[0].catalog.delete(my_dataset)
72 |
--------------------------------------------------------------------------------
/tests/test_examples/test_tabular_geography.py:
--------------------------------------------------------------------------------
1 | import random
2 | import string
3 | from typing import Tuple
4 | from uuid import UUID
5 |
6 | from odp.client import OdpClient
7 | from odp.client.dto.table_spec import TableSpec
8 | from odp.dto import DatasetDto
9 |
10 |
11 | def test_tabular_geography(odp_client_test_uuid: Tuple[OdpClient, UUID]):
12 | manifest = DatasetDto(
13 | **{
14 | "kind": "catalog.hubocean.io/dataset",
15 | "version": "v1alpha3",
16 | "metadata": {
17 | "name": "".join(random.choices(string.ascii_lowercase + string.digits, k=20)),
18 | "labels": {"test_uuid": odp_client_test_uuid[1]},
19 | },
20 | "spec": {
21 | "storage_controller": "registry.hubocean.io/storageController/storage-tabular",
22 | "storage_class": "registry.hubocean.io/storageClass/tabular",
23 | "maintainer": {"contact": "Just Me "}, # <-- strict syntax here
24 | },
25 | }
26 | )
27 |
28 | manifest = odp_client_test_uuid[0].catalog.create(manifest)
29 |
30 | table_schema = {"name": {"type": "string"}, "location": {"type": "geometry"}}
31 |
32 | partitioning = [{"columns": ["location"], "transformer_name": "geohash", "args": [2]}]
33 |
34 | my_table_spec = TableSpec(table_schema=table_schema, partitioning=partitioning)
35 |
36 | my_table_spec = odp_client_test_uuid[0].tabular.create_schema(resource_dto=manifest, table_spec=my_table_spec)
37 |
38 | data = [
39 | {"name": "Oslo", "location": {"type": "Point", "coordinates": [10.74609, 59.91273]}},
40 | {"name": "New York", "location": {"type": "Point", "coordinates": [-74.005974, 40.712776]}},
41 | {"name": "Los Angeles", "location": {"type": "Point", "coordinates": [-118.243683, 34.052235]}},
42 | {"name": "London", "location": {"type": "Point", "coordinates": [-0.127758, 51.507351]}},
43 | {"name": "Tokyo", "location": {"type": "Point", "coordinates": [139.691711, 35.689487]}},
44 | {"name": "Paris", "location": {"type": "Point", "coordinates": [2.352222, 48.856613]}},
45 | {"name": "Berlin", "location": {"type": "Point", "coordinates": [13.404954, 52.520008]}},
46 | {"name": "Moscow", "location": {"type": "Point", "coordinates": [37.617298, 55.755825]}},
47 | {"name": "Beijing", "location": {"type": "Point", "coordinates": [116.407394, 39.904202]}},
48 | {"name": "Mexico City", "location": {"type": "Point", "coordinates": [-99.133209, 19.432608]}},
49 | {"name": "São Paulo", "location": {"type": "Point", "coordinates": [-46.633308, -23.55052]}},
50 | {"name": "Buenos Aires", "location": {"type": "Point", "coordinates": [-58.381592, -34.603722]}},
51 | {"name": "New Delhi", "location": {"type": "Point", "coordinates": [77.209023, 28.613939]}},
52 | {"name": "Sydney", "location": {"type": "Point", "coordinates": [151.209296, -33.86882]}},
53 | {"name": "San Francisco", "location": {"type": "Point", "coordinates": [-122.419418, 37.774929]}},
54 | {"name": "Johannesburg", "location": {"type": "Point", "coordinates": [28.047305, -26.204103]}},
55 | {"name": "Chicago", "location": {"type": "Point", "coordinates": [-87.629799, 41.878113]}},
56 | {"name": "Melbourne", "location": {"type": "Point", "coordinates": [144.963058, -37.813628]}},
57 | {"name": "Edinburgh", "location": {"type": "Point", "coordinates": [-3.188267, 55.953251]}},
58 | {"name": "Stockholm", "location": {"type": "Point", "coordinates": [18.068581, 59.329323]}},
59 | {"name": "Ottawa", "location": {"type": "Point", "coordinates": [-75.697193, 45.42153]}},
60 | {"name": "Hong Kong", "location": {"type": "Point", "coordinates": [114.109497, 22.396428]}},
61 | {"name": "Jakarta", "location": {"type": "Point", "coordinates": [106.845599, -6.208763]}},
62 | {"name": "Cairo", "location": {"type": "Point", "coordinates": [31.235712, 30.04442]}},
63 | {"name": "Budapest", "location": {"type": "Point", "coordinates": [19.040236, 47.497913]}},
64 | {"name": "Christchurch", "location": {"type": "Point", "coordinates": [172.636225, -43.532054]}},
65 | {"name": "Manila", "location": {"type": "Point", "coordinates": [120.98422, 14.599512]}},
66 | {"name": "Bangkok", "location": {"type": "Point", "coordinates": [100.501765, 13.756331]}},
67 | {"name": "Rome", "location": {"type": "Point", "coordinates": [12.496366, 41.902783]}},
68 | {"name": "Shanghai", "location": {"type": "Point", "coordinates": [121.473702, 31.23039]}},
69 | {"name": "Rio de Janeiro", "location": {"type": "Point", "coordinates": [-43.172897, -22.906847]}},
70 | {"name": "Madrid", "location": {"type": "Point", "coordinates": [-3.70379, 40.416775]}},
71 | {"name": "Nairobi", "location": {"type": "Point", "coordinates": [36.821946, -1.292066]}},
72 | {"name": "Toronto", "location": {"type": "Point", "coordinates": [-79.383186, 43.653225]}},
73 | {"name": "Fortaleza", "location": {"type": "Point", "coordinates": [-38.526669, -3.731862]}},
74 | {"name": "Tehran", "location": {"type": "Point", "coordinates": [51.388973, 35.6895]}},
75 | {"name": "Brasília", "location": {"type": "Point", "coordinates": [-47.882166, -15.794229]}},
76 | {"name": "Bogotá", "location": {"type": "Point", "coordinates": [-74.072092, 4.710989]}},
77 | ]
78 |
79 | odp_client_test_uuid[0].tabular.write(resource_dto=manifest, data=data)
80 |
81 | europe_list = odp_client_test_uuid[0].tabular.select_as_list(
82 | resource_dto=manifest,
83 | filter_query={
84 | "#ST_WITHIN": [
85 | "$location",
86 | {
87 | "type": "Polygon",
88 | "coordinates": [
89 | [
90 | [37.02028908997249, 70.9411520317463],
91 | [-24.834125592956013, 70.9411520317463],
92 | [-24.834125592956013, 35.753296916825306],
93 | [37.02028908997249, 35.753296916825306],
94 | [37.02028908997249, 70.9411520317463],
95 | ]
96 | ],
97 | },
98 | ]
99 | },
100 | )
101 |
102 | expected_cities = ["Paris", "London", "Edinburgh", "Budapest", "Stockholm", "Oslo", "Berlin", "Rome", "Madrid"]
103 | for city in europe_list:
104 | assert city.get("name") in expected_cities
105 | assert len(europe_list) == len(expected_cities)
106 |
--------------------------------------------------------------------------------
/tests/test_examples/test_tabular_v2_client_example.py:
--------------------------------------------------------------------------------
1 | import random
2 | import string
3 | from typing import Tuple
4 | from uuid import UUID
5 |
6 | import pyarrow as pa
7 | from odp.client import OdpClient
8 | from odp.client.exc import OdpResourceNotFoundError
9 | from odp.client.tabular_v2.util import exp
10 | from odp.dto import DatasetDto, DatasetSpec
11 |
12 |
13 | def test_tabular_client(odp_client_test_uuid: Tuple[OdpClient, UUID]):
14 | my_dataset = DatasetDto(
15 | **{
16 | "kind": "catalog.hubocean.io/dataset",
17 | "version": "v1alpha3",
18 | "metadata": {
19 | "name": "".join(random.choices(string.ascii_lowercase + string.digits, k=20)),
20 | "labels": {"test_uuid": odp_client_test_uuid[1]},
21 | },
22 | "spec": {
23 | "storage_controller": "registry.hubocean.io/storageController/storage-tabular",
24 | "storage_class": "registry.hubocean.io/storageClass/tabular",
25 | "maintainer": {"contact": "Just Me "},
26 | },
27 | }
28 | )
29 |
30 | my_dataset = odp_client_test_uuid[0].catalog.create(my_dataset)
31 | assert isinstance(my_dataset.spec, DatasetSpec)
32 |
33 | table = odp_client_test_uuid[0].table_v2(my_dataset)
34 |
35 | table_schema = pa.schema({"Data": pa.string()})
36 | table.create(table_schema)
37 |
38 | assert table.schema() is not None
39 |
40 | test_data = [{"Data": "Test"}, {"Data": "Test1"}]
41 | with table as tx:
42 | tx.insert(test_data)
43 |
44 | our_data = list(table.select().rows())
45 | assert len(our_data) == 2
46 |
47 | our_data = list(table.select().batches())
48 | assert len(our_data) == 1
49 | assert our_data[0].num_rows == 2
50 |
51 | update_filters = exp.parse("Data == 'Test'")
52 | new_data = [{"Data": "Test Updated"}]
53 | with table as tx:
54 | tx.delete(update_filters)
55 | tx.insert(new_data)
56 |
57 | result = list(table.select().rows())
58 | assert new_data[0] in result
59 | assert len(result) == 2
60 |
61 | delete_filters = exp.parse("Data == 'Test1'")
62 | with table as tx:
63 | tx.delete(delete_filters)
64 | result = list(table.select().rows())
65 | assert len(result) == 1
66 |
67 | table.drop()
68 |
69 | try:
70 | table.select()
71 | except OdpResourceNotFoundError as e:
72 | print("Schema not found error since it is deleted")
73 | print(e)
74 |
75 | odp_client_test_uuid[0].catalog.delete(my_dataset)
76 |
--------------------------------------------------------------------------------