├── .github └── workflows │ ├── pypi-publish.yml │ ├── run_examples_test.yml │ ├── static_analysis.yml │ └── unit_tests.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── Makefile ├── README.md ├── assets └── ODP-SDK.png ├── docs ├── Makefile ├── make.bat ├── requirements-docs.txt └── source │ ├── _static │ ├── .gitkeep │ └── style.css │ ├── _templates │ └── .gitkeep │ ├── conf.py │ ├── img │ ├── odp-favicon-rgb-blueandwhite.png │ ├── odp-logo-rgb-blueandblack.png │ └── odp-logo-rgb-blueandwhite.png │ ├── index.rst │ └── odp.rst ├── examples ├── README.md ├── catalog_client_example.py ├── catalog_oqs_query_example.py ├── observables_example.py ├── raw_client_example.py ├── raw_client_file_example.py ├── tabular_client_example.py ├── tabular_geography.py └── workspace_examples │ ├── raw-roundtrip.ipynb │ └── tabular-roundtrip.ipynb ├── poetry.lock ├── pyproject.toml ├── scripts └── migrate_local_deps.py ├── src ├── dto │ ├── README.md │ ├── odp │ │ └── dto │ │ │ ├── __init__.py │ │ │ ├── catalog │ │ │ ├── __init__.py │ │ │ ├── _rg.py │ │ │ ├── data_collection.py │ │ │ ├── dataset.py │ │ │ └── observable.py │ │ │ ├── common │ │ │ ├── __init__.py │ │ │ ├── contact_info.py │ │ │ └── license.py │ │ │ ├── metadata.py │ │ │ ├── registry │ │ │ ├── __init__.py │ │ │ ├── _rg.py │ │ │ └── observable_class.py │ │ │ ├── resource.py │ │ │ ├── resource_registry.py │ │ │ ├── resource_status.py │ │ │ └── validators.py │ ├── pyproject.toml │ └── tests │ │ └── test_dto │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── test_dto_base.py │ │ ├── test_resource_registry.py │ │ ├── test_validators.py │ │ └── utils.py └── sdk │ ├── README.md │ ├── odp │ └── client │ │ ├── __init__.py │ │ ├── auth.py │ │ ├── client.py │ │ ├── dto │ │ ├── __init__.py │ │ ├── file_dto.py │ │ ├── table_spec.py │ │ └── tabular_store.py │ │ ├── exc.py │ │ ├── http_client.py │ │ ├── raw_storage_client.py │ │ ├── resource_client.py │ │ ├── tabular_storage_client.py │ │ ├── tabular_storage_v2_client.py │ │ ├── tabular_v2 │ │ ├── __init__.py │ │ ├── big │ │ │ ├── __init__.py │ │ │ ├── big.py │ │ │ ├── buffer.py │ │ │ ├── local.py │ │ │ └── remote.py │ │ ├── bsquare │ │ │ ├── __init__.py │ │ │ ├── bsquare.py │ │ │ └── query.py │ │ ├── client │ │ │ ├── __init__.py │ │ │ ├── client.py │ │ │ ├── table_cursor.py │ │ │ ├── table_tx.py │ │ │ └── tablehandler.py │ │ └── util │ │ │ ├── __init__.py │ │ │ ├── cache.py │ │ │ ├── exp.py │ │ │ ├── reader.py │ │ │ └── util.py │ │ └── utils │ │ ├── __init__.py │ │ ├── geometry_conversion.py │ │ ├── json.py │ │ ├── ndjson.py │ │ └── package_utils.py │ ├── odp_sdk │ └── __init__.py │ ├── pyproject.toml │ └── tests │ └── test_sdk │ ├── __init__.py │ ├── conftest.py │ ├── fixtures │ ├── __init__.py │ ├── auth_fixtures.py │ ├── dto_fixtures.py │ ├── jwt_fixtures.py │ ├── odp_http_client_fixtures.py │ ├── request_fixtures.py │ └── time_fixtures.py │ ├── test_auth │ ├── __init__.py │ ├── test_azure_token_provider.py │ ├── test_get_default_token_provider.py │ ├── test_jwks_token_provider.py │ └── test_odp_workspace_token_provider.py │ ├── test_http_client.py │ ├── test_raw_storage_client.py │ ├── test_resource_client.py │ ├── test_tabular_storage_client.py │ └── test_utils │ ├── __init__.py │ ├── test_dto.py │ ├── test_geometry_conversion.py │ ├── test_ndjson.py │ └── test_package_utils.py └── tests └── test_examples ├── __init__.py ├── conftest.py ├── test_catalog_client_example.py ├── test_catalog_oqs_query_example.py ├── test_observables_example.py ├── test_raw_client_example.py ├── test_tabular_client_example.py ├── test_tabular_geography.py └── test_tabular_v2_client_example.py /.github/workflows/pypi-publish.yml: -------------------------------------------------------------------------------- 1 | name: Build & Release 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | build: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v3 12 | 13 | - name: Set up Python 14 | uses: actions/setup-python@v2 15 | with: 16 | python-version: "3.10" 17 | 18 | - name: Install packages 19 | run: | 20 | python -m pip install --upgrade pip build poetry 21 | python -m poetry install --only helper-scripts 22 | 23 | - name: Build a binary wheel and a source tarball 24 | run: | 25 | make version 26 | make build 27 | 28 | - name: Publish build artifacts 29 | uses: actions/upload-artifact@v3 30 | with: 31 | name: built-package 32 | path: "./src/**/dist" 33 | 34 | publish-release: 35 | name: Publish release to PyPI 36 | needs: [build] 37 | environment: "prod" 38 | runs-on: ubuntu-latest 39 | strategy: 40 | matrix: 41 | package_path: 42 | - "sdk" 43 | - "dto" 44 | 45 | steps: 46 | - name: Download build artifacts 47 | uses: actions/download-artifact@v3 48 | with: 49 | name: built-package 50 | path: "./build" 51 | 52 | - name: List out files 53 | run: | 54 | ls -lA ./build 55 | 56 | - name: List out files 57 | run: | 58 | ls -lA ./build/${{ matrix.package_path }}/dist 59 | 60 | - name: Publish distribution to PyPI 61 | uses: pypa/gh-action-pypi-publish@release/v1 62 | with: 63 | password: ${{ secrets.PYPI_TOKEN }} 64 | packages-dir: "./build/${{ matrix.package_path }}/dist" 65 | verbose: true -------------------------------------------------------------------------------- /.github/workflows/run_examples_test.yml: -------------------------------------------------------------------------------- 1 | name: Run examples test 2 | 3 | on: 4 | push: 5 | 6 | jobs: 7 | examples-test: 8 | name: Test examples 9 | runs-on: ubuntu-latest 10 | env: 11 | ODCAT_AUTH_CLIENT_ID: ${{ secrets.ODCAT_AUTH_CLIENT_ID }} 12 | ODCAT_AUTH_CLIENT_SECRET: ${{ secrets.ODCAT_AUTH_CLIENT_SECRET }} 13 | ODCAT_AUTH_AUDIENCE: ${{ secrets.ODCAT_AUTH_AUDIENCE }} 14 | strategy: 15 | matrix: 16 | python-version: 17 | - "3.10" 18 | - "3.11" 19 | - "3.12" 20 | steps: 21 | - uses: actions/checkout@v3 22 | 23 | - name: Set up Python 24 | uses: actions/setup-python@v2 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | 28 | - name: Install poetry 29 | run: | 30 | python -m pip install --upgrade pip build poetry 31 | 32 | - name: Install packages 33 | run: | 34 | poetry install 35 | 36 | - name: Add Mask 37 | run: | 38 | echo "::add-mask::${{ env.ODCAT_AUTH_CLIENT_ID }}" 39 | echo "::add-mask::${{ env.ODCAT_AUTH_CLIENT_SECRET }}" 40 | echo "::add-mask::${{ env.ODCAT_AUTH_AUDIENCE }}" 41 | 42 | - name: Run tests 43 | run: | 44 | poetry run pytest tests/test_examples 45 | env: 46 | ODCAT_BASE_URL: https://odcat.dev.hubocean.io 47 | ODCAT_AUTH_SCOPE: https://oceandataplatform.onmicrosoft.com/odcat-dev/.default 48 | GITHUB_SHA: ${{ github.sha }} -------------------------------------------------------------------------------- /.github/workflows/static_analysis.yml: -------------------------------------------------------------------------------- 1 | # yanked from https://github.com/PrefectHQ/prefect-collection-template/blob/main/%7B%7Bcookiecutter.collection_name%7D%7D/.github/workflows/static_analysis.yml 2 | name: Static analysis 3 | 4 | on: [pull_request] 5 | 6 | jobs: 7 | pre-commit-checks: 8 | name: Pre-commit checks 9 | runs-on: ubuntu-latest 10 | strategy: 11 | matrix: 12 | python-version: 13 | - "3.9" 14 | - "3.10" 15 | - "3.11" 16 | - "3.12" 17 | 18 | steps: 19 | - uses: actions/checkout@v3 20 | with: 21 | persist-credentials: false 22 | 23 | - name: Set up Python 24 | uses: actions/setup-python@v4 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | 28 | - name: Install pre-commit 29 | run: | 30 | python -m pip install --upgrade pip pre-commit 31 | 32 | - name: Run pre-commit 33 | run: | 34 | pre-commit run --show-diff-on-failure --color=always --all-files 35 | -------------------------------------------------------------------------------- /.github/workflows/unit_tests.yml: -------------------------------------------------------------------------------- 1 | name: Run unit tests 2 | 3 | on: 4 | push: 5 | 6 | jobs: 7 | unit-tests: 8 | name: Unit tests 9 | runs-on: ubuntu-latest 10 | strategy: 11 | matrix: 12 | python-version: 13 | - "3.9" 14 | - "3.10" 15 | - "3.11" 16 | - "3.12" 17 | 18 | steps: 19 | - uses: actions/checkout@v3 20 | 21 | - name: Print current working directory 22 | run: pwd 23 | 24 | - name: Set up Python 25 | uses: actions/setup-python@v2 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | 29 | - name: Install poetry 30 | run: | 31 | python -m pip install --upgrade pip build poetry 32 | 33 | - name: Install packages 34 | run: | 35 | poetry install 36 | 37 | - name: Run tests 38 | run: | 39 | poetry run pytest src/sdk/tests 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | version.txt 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | cover/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | .pybuilder/ 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | # For a library or package, you might want to ignore these files since the code is 88 | # intended to run in multiple environments; otherwise, check them in: 89 | # .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 99 | __pypackages__/ 100 | 101 | # Celery stuff 102 | celerybeat-schedule 103 | celerybeat.pid 104 | 105 | # SageMath parsed files 106 | *.sage.py 107 | 108 | # Environments 109 | .env 110 | .venv 111 | env/ 112 | venv/ 113 | ENV/ 114 | env.bak/ 115 | venv.bak/ 116 | 117 | # Spyder project settings 118 | .spyderproject 119 | .spyproject 120 | 121 | # Rope project settings 122 | .ropeproject 123 | 124 | # mkdocs documentation 125 | /site 126 | 127 | # mypy 128 | .mypy_cache/ 129 | .dmypy.json 130 | dmypy.json 131 | 132 | # Pyre type checker 133 | .pyre/ 134 | 135 | # pytype static type analyzer 136 | .pytype/ 137 | 138 | # Cython debug symbols 139 | cython_debug/ 140 | 141 | # 142 | # IDE 143 | # 144 | /.idea 145 | .DS_Store 146 | /bin 147 | *.swp 148 | .token_cache.bin -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: "v4.4.0" 4 | hooks: 5 | - id: check-ast 6 | - repo: https://github.com/pycqa/autoflake 7 | rev: v2.0.1 8 | hooks: 9 | - id: autoflake 10 | args: 11 | [ 12 | --remove-all-unused-imports, 13 | --in-place, 14 | --ignore-init-module-imports, 15 | ] 16 | - repo: https://github.com/pycqa/isort 17 | rev: 5.12.0 18 | hooks: 19 | - id: isort 20 | - repo: https://github.com/psf/black 21 | rev: 23.1.0 22 | hooks: 23 | - id: black 24 | args: [--line-length=120] 25 | language_version: python3.10 26 | - repo: https://github.com/pycqa/flake8 27 | rev: 6.0.0 28 | hooks: 29 | - id: flake8 30 | additional_dependencies: 31 | - "flake8-pyproject" 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2023 HUB Ocean 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Description: Makefile for building and publishing the SDK 2 | 3 | # External tools 4 | POETRY := poetry 5 | MD5SUM := md5sum 6 | TAR := tar 7 | GIT := git 8 | PYTHON := python3 9 | 10 | # Subprojects 11 | SUBPROJECTS := src/sdk src/dto 12 | DIST_DIRS := $(SUBPROJECTS:%=%/dist) 13 | PYPROJECTS := $(SUBPROJECTS:%=%/pyproject.toml) 14 | MD5S := $(DIST_DIRS:%=%/md5.published) 15 | VERSIONS := $(SUBPROJECTS:%=%/version.txt) 16 | 17 | # Get the current version from the git tags 18 | CURRENT_VERSION := $(shell $(GIT) describe --tags --abbrev=0) 19 | 20 | # 21 | # Rules 22 | # 23 | 24 | # Build the distribution 25 | %/dist: %/pyproject.toml 26 | cd $(dir $@) && $(POETRY) build 27 | 28 | # Create the md5 hash of the distribution 29 | %/dist/md5: %/dist 30 | $(TAR) -cf - $(dir $@) | $(MD5SUM) > $@ 31 | 32 | # Publish the distribution 33 | %/dist/md5.published: %/dist/md5 34 | cd $(dir $@) && $(POETRY) publish --dry-run 35 | cp $< $@ 36 | 37 | # Update the version in the pyproject.toml 38 | %/version.txt: %/pyproject.toml 39 | echo "Poetry version: $(CURRENT_VERSION)" 40 | $(POETRY) run python scripts/migrate_local_deps.py $(CURRENT_VERSION) $< --overwrite 41 | cd $(dir $<) && $(POETRY) version $(CURRENT_VERSION) 42 | echo $(CURRENT_VERSION) > $@ 43 | 44 | # Update the version in all subprojects 45 | version: $(VERSIONS) 46 | $(POETRY) update odp-sdk odp-dto 47 | 48 | # Build all subprojects 49 | build: $(DIST_DIRS) 50 | 51 | # Publish all subprojects 52 | publish: $(MD5S) 53 | 54 | # Clean up 55 | clean: 56 | rm -vrf $(DIST_DIRS) 57 | rm -f $(VERSIONS) 58 | 59 | # Default target 60 | all: build 61 | 62 | # Phony targets 63 | .PHONY: build publish version clean all 64 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | ODP SDK logo 3 | 4 | 5 | 6 | # ODP Python SDK 7 | 8 | Connect to the Ocean Data Platform with Python through the Python SDK. Download queried ocean data easily and efficiently into data frames, for easy exploring and further processing in your data science project. 9 | 10 | ## Documentation 11 | 12 | [WIP] 13 | 14 | ## Installation 15 | 16 | Use the package manager [pip](https://pip.pypa.io/en/stable/) to install the Ocean Data Platform Python SDK. 17 | 18 | ```bash 19 | pip3 install odp_sdk 20 | ``` 21 | 22 | ## Usage 23 | 24 | *Note: Accessing the Ocean Data Platform requires an authorized account. Contact ODP to require one.* 25 | 26 | ```python 27 | from odp_sdk.client import OdpClient 28 | 29 | client = OdpClient() 30 | 31 | for item in client.catalog.list(): 32 | print(item) 33 | ``` 34 | 35 | Examples can be found in /examples. 36 | -------------------------------------------------------------------------------- /assets/ODP-SDK.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/assets/ODP-SDK.png -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/requirements-docs.txt: -------------------------------------------------------------------------------- 1 | cognite-sdk>=1.3 2 | cmocean>=2.0 3 | geojson>=2.5.0 4 | geopandas>=0.8.1 5 | matplotlib>=3.2.2 6 | MetPy>=0.12.1 7 | numpy>=1.19.0 8 | pandas>=1.0.5 9 | python-dateutil>=2.8.1 10 | scipy>=1.5.0 11 | seaborn>=0.10.1 12 | tqdm>=4.49.0 13 | pygeos>=0.8 14 | descartes>=1.1.0 15 | -------------------------------------------------------------------------------- /docs/source/_static/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/docs/source/_static/.gitkeep -------------------------------------------------------------------------------- /docs/source/_static/style.css: -------------------------------------------------------------------------------- 1 | /* Sidebar header (and topbar for mobile) */ 2 | 3 | .wy-side-nav-search, .wy-nav-top { 4 | background-color: #0A1530; 5 | } 6 | /* Sidebar */ 7 | .wy-nav-side { 8 | background-color: #0A1530; 9 | color: #FD5D16; 10 | } 11 | 12 | .wy-nav-content-wrap { 13 | background: #eff6fa; 14 | } -------------------------------------------------------------------------------- /docs/source/_templates/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/docs/source/_templates/.gitkeep -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | import sys 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | from os import path 16 | 17 | sys.path.insert(0, path.abspath(path.join(path.dirname(__file__), "../../"))) 18 | sys.path.insert(0, path.abspath(path.join(path.dirname(__file__), "../../examples"))) 19 | 20 | 21 | # -- Project information ----------------------------------------------------- 22 | 23 | project = "ODP Python SDK" 24 | copyright = "2020, C4IR/Ocean Data Foundation" 25 | author = "C4IR/Ocean Data Foundation" 26 | 27 | version = "0.3.9" 28 | 29 | # The full version, including alpha/beta/rc tags 30 | release = version 31 | 32 | 33 | # -- General configuration --------------------------------------------------- 34 | 35 | # Add any Sphinx extension module names here, as strings. They can be 36 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 37 | # ones. 38 | extensions = ["sphinx.ext.autodoc", "sphinx.ext.autosummary", "sphinx.ext.napoleon", "sphinx_rtd_theme"] 39 | 40 | autodoc_mock_imports = ["cartopy"] 41 | 42 | autosummary_generate = True 43 | 44 | # Add any paths that contain templates here, relative to this directory. 45 | templates_path = ["_templates"] 46 | 47 | html_favicon = "img/odp-favicon-rgb-blueandwhite.png" 48 | html_logo = "img/odp-logo-rgb-blueandwhite.png" 49 | 50 | # List of patterns, relative to source directory, that match files and 51 | # directories to ignore when looking for source files. 52 | # This pattern also affects html_static_path and html_extra_path. 53 | exclude_patterns = [] 54 | 55 | 56 | # -- Options for HTML output ------------------------------------------------- 57 | 58 | # The theme to use for HTML and HTML Help pages. See the documentation for 59 | # a list of builtin themes. 60 | # 61 | html_theme = "sphinx_rtd_theme" 62 | 63 | # Add any paths that contain custom static files (such as style sheets) here, 64 | # relative to this directory. They are copied after the builtin static files, 65 | # so a file named "default.css" will overwrite the builtin "default.css". 66 | html_static_path = ["_static"] 67 | 68 | # html_context = { 69 | # "css_files": ["_static/style.css"] 70 | # } 71 | 72 | master_doc = "index" 73 | 74 | 75 | def setup(app): 76 | app.add_css_file("style.css") 77 | -------------------------------------------------------------------------------- /docs/source/img/odp-favicon-rgb-blueandwhite.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/docs/source/img/odp-favicon-rgb-blueandwhite.png -------------------------------------------------------------------------------- /docs/source/img/odp-logo-rgb-blueandblack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/docs/source/img/odp-logo-rgb-blueandblack.png -------------------------------------------------------------------------------- /docs/source/img/odp-logo-rgb-blueandwhite.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/docs/source/img/odp-logo-rgb-blueandwhite.png -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. ODP Python SDK documentation master file, created by 2 | sphinx-quickstart on Wed Sep 30 13:21:32 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to ODP Python SDK documentation 7 | ========================================== 8 | 9 | 10 | .. contents:: 11 | :local: 12 | 13 | Installation 14 | ^^^^^^^^^^^^ 15 | 16 | To install this package: 17 | 18 | .. code:: bash 19 | 20 | $ pip install odp_sdk 21 | 22 | To upgrade this package: 23 | 24 | .. code:: bash 25 | 26 | $ pip install -U odp_sdk 27 | 28 | *Note*: Utility functions available in CastFunctions.py and DataStatsFunctions.py are not included in the pip install package and has to be downloaded separately 29 | 30 | Contents 31 | ^^^^^^^^ 32 | 33 | .. toctree:: 34 | odp 35 | 36 | -------------------------------------------------------------------------------- /docs/source/odp.rst: -------------------------------------------------------------------------------- 1 | Quickstart 2 | ========== 3 | 4 | Authenticate 5 | ------------ 6 | 7 | In order to use the ODP SDK, you need to authenticate using your provided API-key. This is achieved by setting the 8 | `api_key`-argument when instantiating `ODPClient`: 9 | 10 | .. code:: python 11 | 12 | from odp_sdk import ODPClient 13 | client = ODPClient(api_key="") 14 | 15 | You can also set the `COGNITE_API_KEY` environment variable: 16 | 17 | .. code:: bash 18 | 19 | $ export COGNITE_API_KEY= 20 | 21 | Download Ocean Data 22 | ------------------- 23 | 24 | Downloading ocean data is very easy once you have instantiated the `ODPClient`. The data is then returned as a 25 | Pandas DataFrame_ 26 | 27 | .. code:: python 28 | 29 | df = client.casts(longitude=[-25, 35], latitude=[50, 80], timespan=["2018-06-01", "2018-06-30"]) 30 | 31 | .. _DataFrame: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html 32 | 33 | It is also possible to specify what parameters to download: 34 | 35 | .. code:: python 36 | 37 | df = client.casts( 38 | longitude = [-25, 35], 39 | latitude = [50, 80], 40 | timespan = ["2018-06-01", "2018-06-30"], 41 | parameters = ["date", "lon", "lat", "z", "Temperature", "Salinity" 42 | ) 43 | 44 | In some instances, some filtering is necessary before downloading the data. This is achieved by first 45 | listing the available casts: 46 | 47 | .. code:: python 48 | 49 | casts = client.get_available_casts( 50 | longitude = [-25, 35], 51 | latitude = [50, 80], 52 | timespan = ["2018-06-01", "2018-06-30"], 53 | metadata_parameters = ["extId", "date", "time", "lat", "lon", "country", "Platform", "dataset_code" 54 | ) 55 | 56 | Then apply any desirable filters before downloading the data: 57 | 58 | .. code:: python 59 | 60 | casts_norway = casts[casts.country == "NORWAY"] 61 | df = client.download_data_from_casts(casts_norway.extId.tolist(), 62 | parameters=["date", "lat", "lon", "z", "Temperature", "Salinity") 63 | 64 | You can also download the cast metadata: 65 | 66 | .. code:: python 67 | 68 | df = client.get_metadata(casts_norway.extId.tolist()) 69 | 70 | API 71 | === 72 | ODPClient 73 | --------- 74 | .. autoclass:: odp_sdk.ODPClient 75 | :members: 76 | :member-order: bysource 77 | 78 | Utilities 79 | ========= 80 | 81 | Advanced Helper Functions 82 | ------------------------- 83 | 84 | .. py:currentmodule:: Examples 85 | 86 | Interpolate Casts to Z 87 | ^^^^^^^^^^^^^^^^^^^^^^ 88 | 89 | .. automethod:: UtilityFunctions.interpolate_casts_to_z 90 | 91 | Interpolate Casts to grid 92 | ^^^^^^^^^^^^^^^^^^^^^^^^^ 93 | .. automethod:: UtilityFunctions.interpolate_to_grid 94 | 95 | Interpolate profile 96 | ^^^^^^^^^^^^^^^^^^^ 97 | .. automethod:: UtilityFunctions.interpolate_profile 98 | 99 | Plot Casts 100 | ^^^^^^^^^^ 101 | .. automethod:: UtilityFunctions.plot_casts 102 | 103 | Plot Grid 104 | ^^^^^^^^^ 105 | .. automethod:: UtilityFunctions.plot_grid 106 | 107 | Get Units 108 | ^^^^^^^^^ 109 | .. automethod:: UtilityFunctions.get_units 110 | 111 | Plot percentage of nulls for each variable in variable list 112 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 113 | .. automethod:: UtilityFunctions.plot_nulls 114 | 115 | Plot metadata-statistics 116 | ^^^^^^^^^^^^^^^^^^^^^^^^ 117 | .. automethod:: UtilityFunctions.plot_meta_stats 118 | 119 | Plot distribution of values 120 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ 121 | .. automethod:: UtilityFunctions.plot_distributions 122 | 123 | Plot casts belonging to specific dataset 124 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 125 | .. automethod:: UtilityFunctions.plot_datasets 126 | 127 | Internal Helper Functions 128 | ^^^^^^^^^^^^^^^^^^^^^^^^^ 129 | .. automethod:: UtilityFunctions.geo_map 130 | .. automethod:: UtilityFunctions.missing_values 131 | 132 | Geographic Utilities 133 | -------------------- 134 | 135 | Convert Latitude and Longitude to Geo-Index 136 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 137 | .. automethod:: odp_sdk.utils.gcs_to_index 138 | 139 | Convert Latitude and Longitude to grid-coordinates 140 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 141 | .. automethod:: odp_sdk.utils.gcs_to_grid 142 | 143 | Convert Geo-Index to grid-coordinates 144 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 145 | .. automethod:: odp_sdk.utils.index_to_grid 146 | 147 | Convert Geo-Index to Latitude and Longitude 148 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 149 | .. automethod:: odp_sdk.utils.index_to_gcs 150 | 151 | Get all grid-coordinates within a rectangle 152 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 153 | .. automethod:: odp_sdk.utils.grid_rect_members 154 | 155 | Get all Geo-Indices within a rectangle 156 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 157 | .. automethod:: odp_sdk.utils.index_rect_members 158 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | ## Examples 2 | 3 | ### catalog_client_example.py 4 | 5 | - Listing datasets 6 | - Creating a dataset 7 | - Querying for a dataset 8 | - Deleting a dataset 9 | 10 | ### raw_client_example.py 11 | 12 | - Creating a dataset 13 | - Uploading a file to a dataset 14 | - Listing files in a dataset 15 | - Downloading a file from a dataset 16 | - Deleting a file from a dataset 17 | - Deleting a dataset 18 | 19 | ### tabular_client_example.py 20 | 21 | - Creating a dataset 22 | - Creating a schema for a dataset 23 | - Inserting data into a dataset 24 | - Querying for data in a dataset 25 | - Updating data in a dataset 26 | - Deleting data from a dataset 27 | - Deleting a schema from a dataset 28 | - Deleting a dataset 29 | 30 | ### observables_example.py 31 | 32 | - Listing observables 33 | - Creating an observable 34 | - Querying for an observable 35 | - Querying for observables using a geolocation filter 36 | - Deleting an observable 37 | 38 | ## tabular_geography.py 39 | 40 | - Creating dataset with geographical positions 41 | - Creating schema with partitioning 42 | 43 | ## Workspace Examples 44 | 45 | ### raw-roundtrip.ipynb 46 | 47 | - Demonstrates the process of uploading a file to a dataset, downloading the file, and deleting the file. 48 | - In Jupyter notebook format for ease of use in ODP workspaces. 49 | 50 | ### tabular-roundtrip.ipynb 51 | 52 | - Demonstrates the process of creating a dataset, creating a schema for the dataset, inserting data into the dataset, 53 | querying for data in the dataset, updating data in the dataset, deleting data from the dataset, deleting the schema 54 | from the dataset, and deleting the dataset. 55 | - In Jupyter notebook format for ease of use in ODP workspaces. -------------------------------------------------------------------------------- /examples/catalog_client_example.py: -------------------------------------------------------------------------------- 1 | from odp.client import OdpClient 2 | from odp.dto import Metadata 3 | from odp.dto.catalog import DatasetDto, DatasetSpec 4 | from odp.dto.common.contact_info import ContactInfo 5 | 6 | # Instantiate the client without specifying a token provider. 7 | # The token provider will be set based on the environment. 8 | client = OdpClient() 9 | 10 | print("Datasets in the catalog:") 11 | 12 | # List all resources in the catalog 13 | for item in client.catalog.list(): 14 | print(item) 15 | 16 | # Declare a dataset manifest to add to the catalog 17 | manifest = DatasetDto( 18 | metadata=Metadata( 19 | name=client.personalize_name("sdk-manifest-creation-example"), 20 | ), 21 | spec=DatasetSpec( 22 | storage_controller="registry.hubocean.io/storageController/storage-tabular", 23 | storage_class="registry.hubocean.io/storageClass/tabular", 24 | maintainer=ContactInfo( 25 | contact="User McUsername ", 26 | organisation="Organisation Name", 27 | ), 28 | ), 29 | ) 30 | 31 | # The dataset is created in the catalog. 32 | manifest = client.catalog.create(manifest) 33 | 34 | # Fetch the manifest from the catalog using the UUID 35 | print("Fetching the manifest from the catalog using the UUID") 36 | 37 | fetched_manifest = client.catalog.get(manifest.metadata.uuid) 38 | print(fetched_manifest) 39 | 40 | # Clean up 41 | print("Cleaning up") 42 | 43 | client.catalog.delete(manifest) 44 | 45 | print("Done") 46 | -------------------------------------------------------------------------------- /examples/catalog_oqs_query_example.py: -------------------------------------------------------------------------------- 1 | from odp.client import OdpClient 2 | from odp.dto.catalog import DataCollectionDto 3 | 4 | # Instantiate the client without specifying a token provider. 5 | # The token provider will be set based on the environment. 6 | client = OdpClient() 7 | 8 | # List all resources matching a given query 9 | 10 | oqs_filter = { 11 | "#EQUALS": [ # EQUALS is used here to compare to values 12 | "$kind", # The first value is the kind from the metadata, prefaced with a dollarsign. 13 | "catalog.hubocean.io/dataCollection", # And this is the value to compare with 14 | ] 15 | } 16 | 17 | print("Listing all data collections:") 18 | 19 | for item in client.catalog.list(oqs_filter): 20 | print(item) 21 | 22 | # If we know the type of the resource we are querying, 23 | # we can use the `tp` parameter to assert the type of the returned resources. 24 | 25 | print("Listing all data collections:") 26 | 27 | for item in client.catalog.list(oqs_filter, tp=DataCollectionDto, assert_type=True): 28 | print(item) 29 | -------------------------------------------------------------------------------- /examples/observables_example.py: -------------------------------------------------------------------------------- 1 | from odp.client import OdpClient 2 | from odp.dto import Metadata 3 | from odp.dto.catalog import ObservableDto, ObservableSpec 4 | 5 | # Instantiate the client without specifying a token provider. 6 | # The token provider will be set based on the environment. 7 | client = OdpClient() 8 | 9 | created_manifests = [] 10 | 11 | # List observables in the catalog 12 | observable_filter = {"#EQUALS": ["$kind", "catalog.hubocean.io/observable"]} 13 | 14 | # If we know the type of the resource we are querying, 15 | # we can use the `tp` parameter to assert the type of the returned resources. 16 | 17 | print("List of observables in the catalog:") 18 | 19 | for item in client.catalog.list(observable_filter, tp=ObservableDto, assert_type=True): 20 | print(item) 21 | 22 | # Declare a new observable to be added to the data catalog 23 | 24 | print("Creating a sample observable in the catalog") 25 | 26 | manifest = ObservableDto( 27 | metadata=Metadata( 28 | name=client.personalize_name("sdk-observable-example"), 29 | display_name="Test Observable for time", 30 | description="A test observable for time", 31 | labels={"hubocean.io/test": True}, 32 | ), 33 | spec=ObservableSpec( 34 | ref="catalog.hubocean.io/dataset/test-dataset", 35 | observable_class="catalog.hubocean.io/observableClass/static-coverage", 36 | details={"value": [0, 1684147082], "attribute": "test"}, 37 | ), 38 | ) 39 | 40 | # The observable is created in the catalog. 41 | # The return value is the full manifest of the created observable. 42 | manifest = client.catalog.create(manifest) 43 | created_manifests.append(manifest) 44 | 45 | # An example query to search for observables in certain geometries 46 | observable_geometry_filter = { 47 | "#AND": [ 48 | {"#EQUALS": ["$kind", "catalog.hubocean.io/observable"]}, 49 | { 50 | "#ST_INTERSECTS": [ 51 | "$spec.details.value", 52 | { 53 | "type": "Polygon", 54 | "coordinates": [ 55 | [ 56 | [-73.981200, 40.764950], 57 | [-73.980600, 40.764000], 58 | [-73.979800, 40.764450], 59 | [-73.980400, 40.765400], 60 | [-73.981200, 40.764950], 61 | ] 62 | ], 63 | }, 64 | ] 65 | }, 66 | ] 67 | } 68 | 69 | print("List of observables in the catalog:") 70 | 71 | # List all observables in the catalog that intersect with the geometry 72 | for item in client.catalog.list(observable_geometry_filter): 73 | print(item) 74 | 75 | 76 | print("Adding more sample observables in the catalog") 77 | 78 | # Create static observables to filter 79 | manifest = ObservableDto( 80 | metadata=Metadata( 81 | name=client.personalize_name("sdk-example-small-value"), 82 | display_name="SDK Example Small Value", 83 | description="An observable that emits a small value", 84 | labels={"hubocean.io/test": True}, 85 | ), 86 | spec=ObservableSpec( 87 | ref="catalog.hubocean.io/dataset/test-dataset", 88 | observable_class="catalog.hubocean.io/observableClass/static-observable", 89 | details={"value": 1, "attribute": "test"}, 90 | ), 91 | ) 92 | 93 | manifest = client.catalog.create(manifest) 94 | created_manifests.append(manifest) 95 | 96 | manifest = ObservableDto( 97 | metadata=Metadata( 98 | name=client.personalize_name("sdk-example-large-value"), 99 | display_name="SDK Example Large Value", 100 | description="An observable that emits a large value", 101 | labels={"hubocean.io/test": True}, 102 | ), 103 | spec=ObservableSpec( 104 | ref="catalog.hubocean.io/dataset/test-dataset", 105 | observable_class="catalog.hubocean.io/observableClass/static-observable", 106 | details={"value": 3, "attribute": "test"}, 107 | ), 108 | ) 109 | 110 | manifest = client.catalog.create(manifest) 111 | created_manifests.append(manifest) 112 | 113 | 114 | # An example query to search for observables in certain range 115 | observable_range_filter = { 116 | "#AND": [ 117 | {"#WITHIN": ["$spec.observable_class", ["catalog.hubocean.io/observableClass/static-observable"]]}, 118 | {"#GREATER_THAN_OR_EQUALS": ["$spec.details.value", "2"]}, 119 | ] 120 | } 121 | 122 | print("List of observables in the catalog:") 123 | 124 | # List all observables in the catalog that intersect with the geometry 125 | for item in client.catalog.list(observable_range_filter): 126 | print(item) 127 | 128 | print("Cleaning up") 129 | 130 | # Clean up 131 | for man in created_manifests: 132 | client.catalog.delete(man) 133 | 134 | print("Done") 135 | -------------------------------------------------------------------------------- /examples/raw_client_example.py: -------------------------------------------------------------------------------- 1 | from odp.client import OdpClient 2 | from odp.client.dto.file_dto import FileMetadataDto 3 | from odp.dto import Metadata 4 | from odp.dto.catalog import DataCollectionDto, DataCollectionSpec, DatasetDto, DatasetSpec 5 | from odp.dto.common.contact_info import ContactInfo 6 | from odp.dto.common.license import License 7 | 8 | # Instantiate the client without specifying a token provider. 9 | # The token provider will be set based on the environment. 10 | client = OdpClient() 11 | 12 | data_collection_name = "collection-manifest-example" 13 | 14 | collection = DataCollectionDto( 15 | metadata=Metadata( 16 | name=data_collection_name, 17 | display_name="collection-example", 18 | description="A test data collection", 19 | ), 20 | spec=DataCollectionSpec( 21 | published_by=ContactInfo( 22 | contact="User McUsername ", 23 | organisation="Organisation Name", 24 | ), 25 | published_date="2019-06-19T06:00:00", 26 | website="https://hubocean.earth", 27 | license=License( 28 | name="proprietary", 29 | full_text="This is a very strict legal text describing the data license.", 30 | href="www.wikipedia.org", 31 | ), 32 | tags=[], 33 | ), 34 | ) 35 | 36 | collection = client.catalog.create(collection) 37 | print("Collection was created") 38 | 39 | # Declare a dataset manifest to add to the catalog 40 | 41 | print("Creating sample dataset") 42 | 43 | dataset = DatasetDto( 44 | metadata=Metadata( 45 | name=client.personalize_name("sdk-raw-example"), 46 | display_name="SDK Raw Example", 47 | description="A test dataset for raw data", 48 | labels={"hubocean.io/test": True}, 49 | ), 50 | spec=DatasetSpec( 51 | data_collection=f"catalog.hubocean.io/dataCollection/{data_collection_name}", 52 | storage_controller="registry.hubocean.io/storageController/storage-raw-cdffs", 53 | storage_class="registry.hubocean.io/storageClass/raw", 54 | maintainer=ContactInfo( 55 | contact="User McUsername ", 56 | organisation="Organisation Name", 57 | ), 58 | ), 59 | ) 60 | 61 | # The dataset is created in the catalog. 62 | dataset = client.catalog.create(dataset) 63 | 64 | # Creating and uploading a file. 65 | file_dto = client.raw.create_file( 66 | resource_dto=dataset, 67 | file_metadata_dto=FileMetadataDto( 68 | name="test.txt", 69 | mime_type="text/plain", 70 | ), 71 | contents=b"Hello, World!", 72 | ) 73 | 74 | print("List of files in the dataset:") 75 | 76 | for file in client.raw.list(dataset): 77 | print(file) 78 | 79 | # Download file 80 | print("Downloading the file") 81 | 82 | client.raw.download_file(dataset, file_dto, "test.txt") 83 | 84 | # Clean up 85 | print("Cleaning up") 86 | 87 | client.raw.delete_file(dataset, file_dto) 88 | client.catalog.delete(dataset) 89 | client.catalog.delete(collection) 90 | 91 | print("Done") 92 | -------------------------------------------------------------------------------- /examples/raw_client_file_example.py: -------------------------------------------------------------------------------- 1 | from odp.client import OdpClient 2 | from odp.client.dto.file_dto import FileMetadataDto 3 | from odp.client.exc import OdpFileAlreadyExistsError, OdpResourceExistsError 4 | from odp.dto import Metadata 5 | from odp.dto.catalog import DatasetDto, DatasetSpec 6 | from odp.dto.common.contact_info import ContactInfo 7 | 8 | # Instantiate the client without specifying a token provider. 9 | # The token provider will be set based on the environment. 10 | client = OdpClient() 11 | 12 | # Declare a dataset manifest to add to the catalog 13 | 14 | print("Creating sample dataset") 15 | 16 | dataset = DatasetDto( 17 | metadata=Metadata( 18 | name=client.personalize_name("sdk-raw-example"), 19 | display_name="SDK Raw Example", 20 | description="A test dataset for raw data", 21 | labels={"hubocean.io/test": True}, 22 | ), 23 | spec=DatasetSpec( 24 | storage_controller="registry.hubocean.io/storageController/storage-raw-cdffs", 25 | storage_class="registry.hubocean.io/storageClass/raw", 26 | maintainer=ContactInfo( 27 | contact="User McUsername ", 28 | organisation="Organisation Name", 29 | ), 30 | ), 31 | ) 32 | 33 | # The dataset is created in the catalog. 34 | try: 35 | dataset = client.catalog.create(dataset) 36 | print("Resource created successfully:", dataset) 37 | except OdpResourceExistsError: 38 | print("Dataset already exists. Getting existing dataset") 39 | dataset = client.catalog.get("catalog.hubocean.io/dataset/" + dataset.metadata.name) 40 | print(dataset) 41 | 42 | # Creating and uploading an existing file. 43 | path_to_file = "test.txt" 44 | file_metadata_dto = None 45 | file_dto = None 46 | try: 47 | with open(path_to_file, "rb") as data: 48 | file_metadata_dto = FileMetadataDto( 49 | name=data.name, 50 | mime_type="text/plain", # Update mime type of the file 51 | ) 52 | file_dto = client.raw.create_file( 53 | resource_dto=dataset, 54 | file_metadata_dto=file_metadata_dto, 55 | contents=data.read(), 56 | ) 57 | except OdpFileAlreadyExistsError: 58 | print("File already exists. Getting metadata of existing file") 59 | file_dto = client.raw.get_file_metadata(dataset, file_metadata_dto) 60 | 61 | print("List of files in the dataset:") 62 | 63 | for file in client.raw.list(dataset): 64 | print(file) 65 | 66 | # Download file 67 | print("Downloading the file:") 68 | 69 | client.raw.download_file(dataset, file_dto, "test.txt") 70 | 71 | # Clean up 72 | print("Cleaning up") 73 | 74 | client.raw.delete_file(dataset, file_dto) 75 | client.catalog.delete(dataset) 76 | 77 | print("Done") 78 | -------------------------------------------------------------------------------- /examples/tabular_client_example.py: -------------------------------------------------------------------------------- 1 | from odp.client import OdpClient 2 | from odp.client.dto.table_spec import TableSpec 3 | from odp.client.exc import OdpResourceNotFoundError 4 | from odp.dto import Metadata 5 | from odp.dto.catalog import DataCollectionDto, DataCollectionSpec, DatasetDto, DatasetSpec 6 | from odp.dto.common.contact_info import ContactInfo 7 | from odp.dto.common.license import License 8 | 9 | # Instantiate the client without specifying a token provider. 10 | # The token provider will be set based on the environment. 11 | client = OdpClient() 12 | 13 | data_collection_name = "collection-manifest-example" 14 | 15 | collection = DataCollectionDto( 16 | metadata=Metadata( 17 | name=data_collection_name, 18 | display_name="collection-example", 19 | description="A test data collection", 20 | ), 21 | spec=DataCollectionSpec( 22 | published_by=ContactInfo( 23 | contact="User McUsername ", 24 | organisation="Organisation Name", 25 | ), 26 | published_date="2019-06-19T06:00:00", 27 | website="https://hubocean.earth", 28 | license=License( 29 | name="proprietary", 30 | full_text="This is a very strict legal text describing the data license.", 31 | href="www.wikipedia.org", 32 | ), 33 | tags=[], 34 | ), 35 | ) 36 | 37 | collection = client.catalog.create(collection) 38 | print("Collection was created") 39 | 40 | # Declare a dataset manifest to add to the catalog 41 | 42 | print("Creating sample dataset") 43 | 44 | dataset = DatasetDto( 45 | metadata=Metadata( 46 | name=client.personalize_name("sdk-tabular-example"), 47 | display_name="SDK Tabular Example", 48 | description="A test dataset for tabular data", 49 | labels={"hubocean.io/test": True}, 50 | ), 51 | spec=DatasetSpec( 52 | data_collection=f"catalog.hubocean.io/dataCollection/{data_collection_name}", 53 | storage_controller="registry.hubocean.io/storageController/storage-tabular", 54 | storage_class="registry.hubocean.io/storageClass/tabular", 55 | maintainer=ContactInfo( 56 | contact="User McUsername ", 57 | organisation="Organisation Name", 58 | ), 59 | ), 60 | ) 61 | 62 | # The dataset is created in the catalog. 63 | dataset = client.catalog.create(dataset) 64 | 65 | # Create a table spec to create the schema in tabular client 66 | print("Creating table spec") 67 | 68 | mt_table_spec = client.tabular.create_schema( 69 | resource_dto=dataset, table_spec=TableSpec(table_schema={"Data": {"type": "string"}}) 70 | ) 71 | 72 | # Insert data into the table 73 | test_data = [{"Data": "Test"}, {"Data": "Test1"}] 74 | print(f"Inserting {len(test_data)} rows into the table") 75 | 76 | client.tabular.write(resource_dto=dataset, data=test_data) 77 | 78 | # Query the data as a list 79 | print("Querying data from the table as a list") 80 | our_data = client.tabular.select_as_list(dataset) 81 | 82 | print("Data query result:") 83 | print(f"{our_data}\n") 84 | 85 | # To update the data filters must be declared 86 | update_filters = {"#EQUALS": ["$Data", "Test"]} 87 | new_data = [{"Data": "Test Updated"}] 88 | 89 | print("Updating data in the table") 90 | client.tabular.update( 91 | resource_dto=dataset, 92 | data=new_data, 93 | filter_query=update_filters, 94 | ) 95 | 96 | result = client.tabular.select_as_list(dataset) 97 | 98 | print(f"Data read back:\n{result}") # noqa: E231 99 | 100 | # Delete the data with another filter 101 | delete_filters = {"#EQUALS": ["$Data", "Test1"]} 102 | print("Deleting data in the table") 103 | 104 | client.tabular.delete(resource_dto=dataset, filter_query=delete_filters) 105 | result = client.tabular.select_as_list(dataset) 106 | 107 | print(f"Data read back:\n{result}") # noqa: E231 108 | 109 | # Clean up 110 | 111 | print("Cleaning up") 112 | 113 | # Delete the schema 114 | client.tabular.delete_schema(dataset, delete_data=True) 115 | 116 | # Reading the schema of a dataset without a schema will result in an error 117 | try: 118 | client.tabular.get_schema(dataset) 119 | except OdpResourceNotFoundError as e: 120 | print("Schema not found error since it is deleted") 121 | print(e) 122 | 123 | print("Deleting dataset") 124 | 125 | # Delete the dataset and collection 126 | client.catalog.delete(dataset) 127 | client.catalog.delete(collection) 128 | 129 | print("Done") 130 | -------------------------------------------------------------------------------- /examples/tabular_geography.py: -------------------------------------------------------------------------------- 1 | from odp.client import OdpClient 2 | from odp.client.dto.table_spec import TableSpec 3 | from odp.client.dto.tabular_store import TablePartitioningSpec 4 | from odp.dto import Metadata 5 | from odp.dto.catalog import DatasetDto, DatasetSpec 6 | from odp.dto.common.contact_info import ContactInfo 7 | 8 | client = OdpClient() 9 | 10 | # Create a new manifest to add to the catalog 11 | dataset = DatasetDto( 12 | metadata=Metadata( 13 | name=client.personalize_name("st_within_example"), 14 | display_name="ST_WITHIN Example", 15 | description="A test dataset for ST_WITHIN query", 16 | labels={"hubocean.io/test": True}, 17 | ), 18 | spec=DatasetSpec( 19 | storage_controller="registry.hubocean.io/storageController/storage-tabular", 20 | storage_class="registry.hubocean.io/storageClass/tabular", 21 | maintainer=ContactInfo( 22 | contact="User McUsername ", 23 | organisation="Organisation Name", 24 | ), 25 | ), 26 | ) 27 | 28 | # The dataset is created in the catalog. 29 | dataset = client.catalog.create(dataset) 30 | 31 | print("Dataset created successfully") 32 | 33 | table_schema = {"name": {"type": "string"}, "location": {"type": "geometry"}} 34 | partitioning = [TablePartitioningSpec(columns=["location"], transformer_name="geohash", args=[2])] 35 | 36 | my_table_spec = TableSpec(table_schema=table_schema, partitioning=partitioning) 37 | 38 | client.tabular.create_schema( 39 | resource_dto=dataset, table_spec=TableSpec(table_schema=table_schema, partitioning=partitioning) 40 | ) 41 | 42 | print("Table spec created successfully") 43 | 44 | data = [ 45 | {"name": "Oslo", "location": {"type": "Point", "coordinates": [10.74609, 59.91273]}}, 46 | {"name": "New York", "location": {"type": "Point", "coordinates": [-74.005974, 40.712776]}}, 47 | {"name": "Los Angeles", "location": {"type": "Point", "coordinates": [-118.243683, 34.052235]}}, 48 | {"name": "London", "location": {"type": "Point", "coordinates": [-0.127758, 51.507351]}}, 49 | {"name": "Tokyo", "location": {"type": "Point", "coordinates": [139.691711, 35.689487]}}, 50 | {"name": "Paris", "location": {"type": "Point", "coordinates": [2.352222, 48.856613]}}, 51 | {"name": "Berlin", "location": {"type": "Point", "coordinates": [13.404954, 52.520008]}}, 52 | {"name": "Moscow", "location": {"type": "Point", "coordinates": [37.617298, 55.755825]}}, 53 | {"name": "Beijing", "location": {"type": "Point", "coordinates": [116.407394, 39.904202]}}, 54 | {"name": "Mexico City", "location": {"type": "Point", "coordinates": [-99.133209, 19.432608]}}, 55 | {"name": "São Paulo", "location": {"type": "Point", "coordinates": [-46.633308, -23.55052]}}, 56 | {"name": "Buenos Aires", "location": {"type": "Point", "coordinates": [-58.381592, -34.603722]}}, 57 | {"name": "New Delhi", "location": {"type": "Point", "coordinates": [77.209023, 28.613939]}}, 58 | {"name": "Sydney", "location": {"type": "Point", "coordinates": [151.209296, -33.86882]}}, 59 | {"name": "San Francisco", "location": {"type": "Point", "coordinates": [-122.419418, 37.774929]}}, 60 | {"name": "Johannesburg", "location": {"type": "Point", "coordinates": [28.047305, -26.204103]}}, 61 | {"name": "Chicago", "location": {"type": "Point", "coordinates": [-87.629799, 41.878113]}}, 62 | {"name": "Melbourne", "location": {"type": "Point", "coordinates": [144.963058, -37.813628]}}, 63 | {"name": "Edinburgh", "location": {"type": "Point", "coordinates": [-3.188267, 55.953251]}}, 64 | {"name": "Stockholm", "location": {"type": "Point", "coordinates": [18.068581, 59.329323]}}, 65 | {"name": "Ottawa", "location": {"type": "Point", "coordinates": [-75.697193, 45.42153]}}, 66 | {"name": "Hong Kong", "location": {"type": "Point", "coordinates": [114.109497, 22.396428]}}, 67 | {"name": "Jakarta", "location": {"type": "Point", "coordinates": [106.845599, -6.208763]}}, 68 | {"name": "Cairo", "location": {"type": "Point", "coordinates": [31.235712, 30.04442]}}, 69 | {"name": "Budapest", "location": {"type": "Point", "coordinates": [19.040236, 47.497913]}}, 70 | {"name": "Christchurch", "location": {"type": "Point", "coordinates": [172.636225, -43.532054]}}, 71 | {"name": "Manila", "location": {"type": "Point", "coordinates": [120.98422, 14.599512]}}, 72 | {"name": "Bangkok", "location": {"type": "Point", "coordinates": [100.501765, 13.756331]}}, 73 | {"name": "Rome", "location": {"type": "Point", "coordinates": [12.496366, 41.902783]}}, 74 | {"name": "Shanghai", "location": {"type": "Point", "coordinates": [121.473702, 31.23039]}}, 75 | {"name": "Rio de Janeiro", "location": {"type": "Point", "coordinates": [-43.172897, -22.906847]}}, 76 | {"name": "Madrid", "location": {"type": "Point", "coordinates": [-3.70379, 40.416775]}}, 77 | {"name": "Nairobi", "location": {"type": "Point", "coordinates": [36.821946, -1.292066]}}, 78 | {"name": "Toronto", "location": {"type": "Point", "coordinates": [-79.383186, 43.653225]}}, 79 | {"name": "Fortaleza", "location": {"type": "Point", "coordinates": [-38.526669, -3.731862]}}, 80 | {"name": "Tehran", "location": {"type": "Point", "coordinates": [51.388973, 35.6895]}}, 81 | {"name": "Brasília", "location": {"type": "Point", "coordinates": [-47.882166, -15.794229]}}, 82 | {"name": "Bogotá", "location": {"type": "Point", "coordinates": [-74.072092, 4.710989]}}, 83 | ] 84 | 85 | print(f"Inserting {len(data)} rows into the table") 86 | client.tabular.write(resource_dto=dataset, data=data) 87 | print("Data inserted and partitioned") 88 | 89 | print("Querying for cities in Europe") 90 | europe_list = client.tabular.select_as_list( 91 | resource_dto=dataset, 92 | filter_query={ 93 | "#ST_WITHIN": [ 94 | "$location", # <- Name of column to perform geographic query against. 95 | { 96 | "type": "Polygon", # This is a rough polygon encompassing Europe. 97 | "coordinates": [ 98 | [ 99 | [37.02028908997249, 70.9411520317463], 100 | [-24.834125592956013, 70.9411520317463], 101 | [-24.834125592956013, 35.753296916825306], 102 | [37.02028908997249, 35.753296916825306], 103 | [37.02028908997249, 70.9411520317463], 104 | ] 105 | ], 106 | }, 107 | ] 108 | }, 109 | ) 110 | 111 | print("Cities in Europe:") 112 | for city in europe_list: 113 | print(city.get("name")) 114 | 115 | # Clean up 116 | print("Cleaning up") 117 | 118 | client.tabular.delete_schema(dataset) 119 | client.catalog.delete(dataset) 120 | 121 | print("Done") 122 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | description = "ODP Python SDK project" 3 | authors = ["Thomas Li Fredriksen "] 4 | license = "MIT" 5 | readme = "README.md" 6 | packages = [] 7 | package-mode = false 8 | 9 | [tool.poetry.dependencies] 10 | python = "^3.9" 11 | odp-dto = { path = "./src/dto", develop = true } 12 | odp-sdk = { path = "./src/sdk", develop = true } 13 | jupyter = "^1.0.0" 14 | pyarrow = "^18.1.0" 15 | 16 | [tool.poetry.group.dev.dependencies] 17 | python-dotenv = "^1.0.1" 18 | pytest = "^7.4.3" 19 | coverage = "^7.3.2" 20 | flake8-pyproject = "^1.2.2" 21 | responses = "^0.23.1" 22 | pandas = "^2.1.4" 23 | 24 | [tool.poetry.group.helper-scripts] 25 | optional = true 26 | 27 | [tool.poetry.group.helper-scripts.dependencies] 28 | typer = "^0.12.3" 29 | tomli = "^2.0.1" 30 | tomli-w = "^1.0.0" 31 | 32 | [build-system] 33 | requires = ["poetry-core"] 34 | build-backend = "poetry.core.masonry.api" 35 | 36 | 37 | [tool.pytest.ini_options] 38 | log_cli = "true" 39 | asyncio_mode = "auto" 40 | log_level = "INFO" 41 | log_format = "%(asctime)s %(levelname)s %(message)s [%(filename)s:%(lineno)d]" 42 | log_date_format = "%Y-%m-%d %H:%M:%S" 43 | testpaths = [ 44 | "src/sdk/tests", 45 | "src/dto/tests", 46 | ] 47 | 48 | [tool.flake8] 49 | ignore = ["E203", "E731", "W503"] 50 | per-file-ignores = ["*/__init__.py:F401", "tests/*:F841", "*/tests/*:F841"] 51 | max-line-length = 120 52 | count = true 53 | 54 | [tool.isort] 55 | line_length=120 # corresponds to -w flag 56 | multi_line_output=3 # corresponds to -m flag 57 | include_trailing_comma=true # corresponds to -tc flag 58 | profile="black" 59 | known_local_folder="src,tests" 60 | 61 | [tool.poetry.extras] 62 | pandas = ["pandas"] 63 | -------------------------------------------------------------------------------- /scripts/migrate_local_deps.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Annotated, Dict, List, Optional, Union 3 | 4 | import tomli 5 | import tomli_w 6 | import typer 7 | 8 | app = typer.Typer() 9 | 10 | 11 | def _update_local_version( 12 | dep: Dict[str, Union[str, Dict[str, str]]], version_overrides: Dict[str, str], default_version_tag: str 13 | ) -> Dict[str, Union[str, Dict[str, str]]]: 14 | ret = {} 15 | 16 | for dep_name, dep_info in dep.items(): 17 | if isinstance(dep_info, str): 18 | ret[dep_name] = dep_info 19 | continue 20 | 21 | dep_info = dep_info.copy() 22 | ret[dep_name] = dep_info 23 | 24 | pth = dep_info.get("path") 25 | if pth is None: 26 | continue 27 | 28 | dep_info.pop("path") 29 | dep_info.pop("develop", None) 30 | 31 | dep_info["version"] = version_overrides.get(pth, default_version_tag) 32 | 33 | return ret 34 | 35 | 36 | @app.command() 37 | def migrate( 38 | default_version_tag: Annotated[str, typer.Argument(help="Default version tag to use for local dependencies")], 39 | src_file: Annotated[Path, typer.Argument(help="Path to the file to migrate")], 40 | dest_file: Annotated[ 41 | Optional[Path], typer.Argument(help="Path to the destination file, defaults to the source file if not set") 42 | ] = None, # noqa: E501 43 | dry_run: Annotated[bool, typer.Option(help="Run in dry-run mode")] = False, 44 | overwrite: Annotated[bool, typer.Option(help="Overwrite the destination file if it exists")] = False, 45 | version_tag: Annotated[List[str], typer.Option(help="Version tags to use for local dependencies")] = [], 46 | ): 47 | try: 48 | version_overrides = {k: v for k, v in (x.split("=") for x in version_tag)} 49 | except ValueError as e: 50 | typer.echo(f"Invalid version tag: {e}") 51 | raise typer.Exit(code=1) 52 | 53 | print(f"Version overrides: {version_overrides}") 54 | 55 | if not src_file.suffix and src_file.is_dir(): 56 | typer.echo("Directory detected, looking for pyproject.toml") 57 | src_file /= "pyproject.toml" 58 | elif src_file.suffix != ".toml": 59 | typer.echo("Only TOML files are supported: {}".format(src_file)) 60 | raise typer.Exit(code=1) 61 | 62 | if not dest_file and not dry_run: 63 | if not overwrite: 64 | typer.echo( 65 | "Destination file not set. Using source file as destination but overwrite-flag is not set. Please set the destination file or use the --overwrite flag." # noqa: E501 66 | ) 67 | typer.Exit(code=1) 68 | 69 | typer.echo("Destination file not set. Using source file as destination.") 70 | dest_file = src_file 71 | 72 | if dest_file and not dest_file.suffix and dest_file.is_dir(): 73 | dest_file /= "pyproject.toml" 74 | elif dest_file and dest_file.suffix != ".toml": 75 | typer.echo("Only TOML files are supported") 76 | raise typer.Exit(code=1) 77 | 78 | if not src_file.exists(): 79 | typer.echo(f"Source file {src_file} does not exist") 80 | raise typer.Exit(code=1) 81 | 82 | if dest_file and dest_file.exists() and not overwrite: 83 | typer.echo(f"Destination file {dest_file} exists and overwrite flag is not set") 84 | raise typer.Exit(code=1) 85 | 86 | with src_file.open("rb") as f: 87 | data = tomli.load(f) 88 | 89 | try: 90 | poetry_base = data["tool"]["poetry"] 91 | except KeyError: 92 | typer.echo("No poetry section found in the source file") 93 | raise typer.Exit(code=1) 94 | 95 | for key in ["dependencies", "dev-dependencies", "optional-dependencies"]: 96 | if key in poetry_base: 97 | poetry_base[key] = _update_local_version(poetry_base[key], version_overrides, default_version_tag) 98 | 99 | for group in poetry_base.get("group", []): 100 | try: 101 | poetry_base[group]["dependencies"] = _update_local_version( 102 | poetry_base[group]["dependencies"], version_overrides, default_version_tag 103 | ) 104 | except KeyError: 105 | pass 106 | 107 | if dry_run: 108 | typer.echo("Dry-run mode, not writing to file") 109 | typer.echo(tomli_w.dumps(data)) 110 | raise typer.Exit(code=0) 111 | 112 | with dest_file.open("wb+") as f: 113 | tomli_w.dump(data, f) 114 | 115 | 116 | if __name__ == "__main__": 117 | app() 118 | -------------------------------------------------------------------------------- /src/dto/README.md: -------------------------------------------------------------------------------- 1 | # ODP Data Transfer Objects (DTOs) 2 | 3 | ## Documentation 4 | 5 | https://docs.hubocean.earth 6 | 7 | ## Installation 8 | 9 | ```shell 10 | pip install odp-dto 11 | ``` -------------------------------------------------------------------------------- /src/dto/odp/dto/__init__.py: -------------------------------------------------------------------------------- 1 | from .catalog import * # noqa: F401, F403 2 | from .metadata import Metadata 3 | from .resource import GenericResourceDto, ResourceDto, ResourceSpecABC, ResourceSpecT, get_resource_spec_type 4 | from .resource_registry import * # noqa: F401, F403 5 | from .resource_registry import DEFAULT_RESOURCE_REGISTRY, ResourceRegistry, ResourceRegistryEntry, kind 6 | from .resource_status import ResourceStatus 7 | -------------------------------------------------------------------------------- /src/dto/odp/dto/catalog/__init__.py: -------------------------------------------------------------------------------- 1 | from odp.dto.resource import ResourceDto 2 | 3 | from .data_collection import DataCollectionSpec, Distribution 4 | from .dataset import Attribute, Citation, DatasetSpec 5 | from .observable import ObservableSpec 6 | 7 | # Convenience type aliases 8 | DataCollectionDto = ResourceDto[DataCollectionSpec] 9 | DatasetDto = ResourceDto[DatasetSpec] 10 | ObservableDto = ResourceDto[ObservableSpec] 11 | 12 | del ResourceDto 13 | -------------------------------------------------------------------------------- /src/dto/odp/dto/catalog/_rg.py: -------------------------------------------------------------------------------- 1 | """Resource group of the data catalog""" 2 | 3 | CATALOG_RESOURCE_GROUP = "catalog.hubocean.io" 4 | -------------------------------------------------------------------------------- /src/dto/odp/dto/catalog/data_collection.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from typing import Any, Optional 3 | 4 | from pydantic import BaseModel 5 | 6 | from ..common.contact_info import ContactInfo 7 | from ..common.license import License 8 | from ..resource import ResourceSpecABC 9 | from ..resource_registry import kind 10 | from ._rg import CATALOG_RESOURCE_GROUP 11 | 12 | 13 | class Distribution(BaseModel): 14 | """Distribution information""" 15 | 16 | published_by: ContactInfo 17 | """Publisher information""" 18 | 19 | published_date: datetime 20 | """Date of first published""" 21 | 22 | website: str 23 | """Distribution website""" 24 | 25 | license: Optional[License] = None 26 | """Dataset license information""" 27 | 28 | 29 | @kind(CATALOG_RESOURCE_GROUP, "dataCollection", "v1alpha1") 30 | class DataCollectionSpec(ResourceSpecABC): 31 | """Data collection specification model""" 32 | 33 | distribution: Optional[Distribution] = None 34 | """Information on how the dataset was distributed""" 35 | 36 | tags: set[str] 37 | """Tags for the dataset""" 38 | 39 | facets: Optional[dict[str, Any]] = None 40 | """Facets for the dataset""" 41 | -------------------------------------------------------------------------------- /src/dto/odp/dto/catalog/dataset.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional, Set 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | from ..common.contact_info import ContactInfo 6 | from ..resource import ResourceSpecABC 7 | from ..resource_registry import kind 8 | from ._rg import CATALOG_RESOURCE_GROUP 9 | 10 | 11 | class Citation(BaseModel): 12 | """Citation information""" 13 | 14 | cite_as: Optional[str] = None 15 | """Directions on how to cite the dataset""" 16 | 17 | doi: Optional[str] = None 18 | 19 | 20 | class Attribute(BaseModel): 21 | """Dataset attribute""" 22 | 23 | name: str 24 | """Attribute name. This can be a column name in a table, a dimension in an array, etc.""" 25 | 26 | description: Optional[str] = None 27 | """Attribute description""" 28 | 29 | traits: list[str] 30 | """List of traits. Traits are used to describe the attribute in more detail. 31 | 32 | Traits are based on Microsoft Common Data Model (CDM) traits. See the [CDM documentation] 33 | (https://learn.microsoft.com/en-us/common-data-model/sdk/trait-concepts-and-use-cases#what-are-traits) 34 | for more information. 35 | """ 36 | 37 | 38 | @kind(CATALOG_RESOURCE_GROUP, "dataset", "v1alpha3") 39 | class DatasetSpec(ResourceSpecABC): 40 | """Dataset specification model""" 41 | 42 | storage_class: str 43 | """Storage class qualified name""" 44 | 45 | storage_controller: Optional[str] = None 46 | """Storage controller qualified name""" 47 | 48 | data_collection: Optional[str] = None 49 | """Data collection qualified name""" 50 | 51 | maintainer: ContactInfo 52 | """Active maintainer information""" 53 | 54 | citation: Optional[Citation] = None 55 | """Citation information""" 56 | 57 | documentation: List[str] = Field(default_factory=list) 58 | """Links to any relevant documentation for the dataset""" 59 | 60 | facets: Optional[Dict[str, Any]] = None 61 | """Facets for the dataset""" 62 | 63 | tags: Set[str] = Field(default_factory=set) 64 | """Tags for the dataset""" 65 | 66 | attributes: List[Attribute] = Field(default_factory=list) 67 | """Dataset attributes""" 68 | -------------------------------------------------------------------------------- /src/dto/odp/dto/catalog/observable.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | from ..resource import ResourceSpecABC 4 | from ..resource_registry import kind 5 | from ._rg import CATALOG_RESOURCE_GROUP 6 | 7 | 8 | @kind(CATALOG_RESOURCE_GROUP, "observable", "v1alpha2") 9 | class ObservableSpec(ResourceSpecABC): 10 | observable_class: str 11 | """Observable class""" 12 | 13 | ref: str 14 | """Qualified name of the associated dataset or data collection""" 15 | 16 | details: Dict[str, Any] 17 | """Full observable object""" 18 | -------------------------------------------------------------------------------- /src/dto/odp/dto/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/src/dto/odp/dto/common/__init__.py -------------------------------------------------------------------------------- /src/dto/odp/dto/common/contact_info.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class ContactInfo(BaseModel): 7 | """Contact information for a user""" 8 | 9 | contact: str 10 | """Contact in the form `Firstname Lastname `""" 11 | 12 | organisation: Optional[str] = None 13 | """Organisation name""" 14 | -------------------------------------------------------------------------------- /src/dto/odp/dto/common/license.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class License(BaseModel): 7 | """Data license information""" 8 | 9 | name: str 10 | """License name. Can be set to `Proprietary` for proprietary licenses""" 11 | 12 | href: Optional[str] = None 13 | """HREF to license text""" 14 | 15 | full_text: Optional[str] = None 16 | """Full license text""" 17 | -------------------------------------------------------------------------------- /src/dto/odp/dto/metadata.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional 2 | from uuid import UUID 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | 7 | class Metadata(BaseModel): 8 | """Resource manifest metadata""" 9 | 10 | name: str 11 | """Resource name. Must consist of alphanumeric characters, dashes or underscores and must start 12 | with an alphanumeric character""" 13 | 14 | display_name: Optional[str] = None 15 | """Human-friendly name""" 16 | 17 | description: Optional[str] = None 18 | """Resource description""" 19 | 20 | uuid: Optional[UUID] = None 21 | """System-assigned unique identifier""" 22 | 23 | labels: Dict = Field(default_factory=dict) 24 | """Resource labels""" 25 | 26 | owner: Optional[UUID] = None 27 | """Owner of the resource""" 28 | -------------------------------------------------------------------------------- /src/dto/odp/dto/registry/__init__.py: -------------------------------------------------------------------------------- 1 | from .observable_class import ObservableClassSpec 2 | -------------------------------------------------------------------------------- /src/dto/odp/dto/registry/_rg.py: -------------------------------------------------------------------------------- 1 | """Resource group of the ODP registry""" 2 | 3 | REGISTRY_RESOURCE_GROUP = "registry.hubocean.io" 4 | -------------------------------------------------------------------------------- /src/dto/odp/dto/registry/observable_class.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | from ..resource import ResourceSpecABC 4 | 5 | 6 | class ObservableClassSpec(ResourceSpecABC): 7 | """Observable class specification model""" 8 | 9 | observable_schema: Dict[str, Any] 10 | """JSON schema for the observable class""" 11 | -------------------------------------------------------------------------------- /src/dto/odp/dto/resource.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | from typing import Annotated, ClassVar, Generic, Optional, Type, TypeVar, Union, cast 3 | from uuid import UUID 4 | 5 | from pydantic import BaseModel 6 | from pydantic.functional_validators import AfterValidator 7 | 8 | from .metadata import Metadata 9 | from .resource_status import ResourceStatus 10 | from .validators import validate_resource_kind, validate_resource_version 11 | 12 | 13 | class ResourceSpecABC(BaseModel, ABC): 14 | """ResourceSpecABC is an abstract base class for resource specification.""" 15 | 16 | __kind__: ClassVar[str] 17 | __manifest_version__: ClassVar[str] 18 | 19 | 20 | ResourceSpecT = Union[dict, ResourceSpecABC] 21 | 22 | T = TypeVar("T", bound=ResourceSpecT) 23 | 24 | 25 | class ResourceDto(BaseModel, Generic[T]): 26 | """Resource Data Transmission Object (DTO) representing a resource manifest""" 27 | 28 | kind: Annotated[str, AfterValidator(validate_resource_kind)] = None 29 | """kind is the kind of the resource.""" 30 | 31 | version: Annotated[str, AfterValidator(validate_resource_version)] = None 32 | """version is the version of the resource.""" 33 | 34 | metadata: Metadata 35 | """metadata is the metadata of the resource.""" 36 | 37 | status: Optional[ResourceStatus] = None 38 | """status is the status of the resource.""" 39 | 40 | spec: T 41 | 42 | def __init__(self, **data): 43 | spec = data.pop("spec") 44 | 45 | if hasattr(spec, "__kind__") and "kind" not in data: 46 | data["kind"] = spec.__kind__ 47 | if hasattr(spec, "__manifest_version__") and "version" not in data: 48 | data["version"] = spec.__manifest_version__ 49 | 50 | super().__init__(**data, spec=spec) 51 | 52 | @classmethod 53 | def is_generic(cls) -> bool: 54 | return isinstance(get_resource_spec_type(cls), dict) 55 | 56 | @property 57 | def qualified_name(self) -> str: 58 | return self.get_qualified_name() 59 | 60 | @property 61 | def uuid(self) -> UUID: 62 | return self.get_uuid() 63 | 64 | def get_qualified_name(self) -> str: 65 | """Get the resource qualified name 66 | 67 | The qualified name is the kind and resource name joined by a slash: `{kind}/{metadata.name}` 68 | 69 | Returns: 70 | Qualified name 71 | """ 72 | return f"{self.kind}/{self.metadata.name}" 73 | 74 | def get_uuid(self) -> Optional[UUID]: 75 | """Get the resource UUID 76 | 77 | Returns: 78 | Resource UUID if it is set, `None` otherwise 79 | """ 80 | return self.metadata.uuid 81 | 82 | def get_ref(self) -> Union[UUID, str]: 83 | """Get a valid reference to the resource 84 | 85 | Returns: 86 | The resource UUID if it is set, the qualified name otherwise 87 | """ 88 | return self.get_uuid() or self.get_qualified_name() 89 | 90 | 91 | def get_resource_spec_type(cls: Union[Type[ResourceDto[T]], ResourceDto[T]]) -> Type[T]: 92 | """Get the resource spec type 93 | 94 | Args: 95 | cls: ResourceDto class or instance 96 | 97 | Returns: 98 | The resource spec type 99 | """ 100 | if isinstance(cls, type) and issubclass(cls, ResourceDto): 101 | tp = cls.model_fields["spec"].annotation 102 | else: 103 | tp = type(cls.spec) 104 | return cast(Type[T], tp) 105 | 106 | 107 | GenericResourceDto = ResourceDto[dict] 108 | -------------------------------------------------------------------------------- /src/dto/odp/dto/resource_registry.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated, Callable, Dict, Optional, Tuple, Type, TypeVar, cast 2 | 3 | from pydantic import BaseModel, Field 4 | from pydantic.functional_validators import BeforeValidator 5 | 6 | from .resource import Metadata, ResourceDto, ResourceSpecABC, ResourceSpecT, ResourceStatus, get_resource_spec_type 7 | from .validators import validate_resource_kind, validate_resource_version 8 | 9 | T = TypeVar("T", bound=ResourceSpecT) 10 | 11 | 12 | class ResourceRegistryEntry(BaseModel): 13 | """ResourceRegistryEntry is a registry entry for a resource.""" 14 | 15 | resource_kind: Annotated[str, BeforeValidator(validate_resource_kind)] 16 | """resource_type is the kind of the resource.""" 17 | 18 | resource_version: Annotated[str, BeforeValidator(validate_resource_version)] 19 | """resource_version is the version of the resource. in the form v(alpha|beta)""" 20 | 21 | def __hash__(self): 22 | return hash((self.resource_kind, self.resource_version)) 23 | 24 | 25 | class ResourceRegistry(BaseModel): 26 | """Registry used to register and lookup resource definitions.""" 27 | 28 | entries: Dict[ResourceRegistryEntry, Type[ResourceSpecABC]] = Field(default_factory=dict) 29 | """entries is a list of resource registry entries.""" 30 | 31 | def add(self, entry: ResourceRegistryEntry, spec: Type[ResourceSpecABC]) -> None: 32 | """add adds a resource to the registry.""" 33 | if entry in self.entries: 34 | raise ValueError(f"resource {entry.resource_kind} ({entry.resource_version}) already exists") 35 | self.entries[entry] = spec 36 | 37 | def get_resource_cls(self, kind: str, version: str) -> Type[ResourceSpecABC]: 38 | """Returns the resource spec class for the given kind and version. 39 | 40 | Args: 41 | kind: kind is the kind of the resource. 42 | version: version is the version of the resource. 43 | 44 | Returns: 45 | Type[ResourceSpecABC]: the resource spec class. 46 | """ 47 | entry = ResourceRegistryEntry(resource_kind=kind, resource_version=version) 48 | try: 49 | return self.entries[entry] 50 | except KeyError as e: 51 | raise KeyError(f"resource {kind} ({version}) not found") from e 52 | 53 | def factory(self, kind: str, version: str, data: dict) -> ResourceSpecABC: 54 | """factory creates a resource spec object for the given kind and version. 55 | 56 | Args: 57 | kind: kind is the kind of the resource. 58 | version: version is the version of the resource. 59 | data: data is the resource data. 60 | 61 | Returns: 62 | ResourceSpecABC: the resource spec object. 63 | """ 64 | cls = self.get_resource_cls(kind, version) 65 | return cls(**data) 66 | 67 | def factory_cast(self, t: Type[T], kind: str, version: str, data: dict, assert_type: bool = True) -> T: 68 | """Convenience method to create a resource spec object and cast it to the given type. 69 | 70 | Args: 71 | t: Type to cast to. 72 | kind: kind is the kind of the resource. 73 | version: version is the version of the resource. 74 | data: data is the resource data. 75 | assert_type: Whether to assert the type before returning 76 | 77 | Returns: 78 | T: the resource spec object. 79 | """ 80 | ret = self.factory(kind, version, data) 81 | if assert_type and not isinstance(ret, t): 82 | raise ValueError(f"Expected type {t.__name__}, got {type(ret).__name__}") 83 | return cast(T, self.factory(kind, version, data)) 84 | 85 | def _resource_factory_prototype(self, manifest: dict) -> Tuple[str, str, Metadata, Optional[ResourceStatus], dict]: 86 | try: 87 | kind = manifest["kind"] 88 | version = manifest["version"] 89 | metadata = manifest["metadata"] 90 | status = manifest.get("status") 91 | spec = manifest["spec"] 92 | except KeyError as e: 93 | raise ValueError("Invalid resource manifest") from e 94 | 95 | return (kind, version, Metadata.parse_obj(metadata), ResourceStatus.parse_obj(status) if status else None, spec) 96 | 97 | def resource_factory(self, manifest: dict, raise_unknown: bool = True) -> ResourceDto: 98 | """Convert a manifest to a ResourceDto object. 99 | 100 | Args: 101 | manifest: Resource manifest. 102 | raise_unknown: Whether to raise an exception if the resource kind is unknown. 103 | 104 | Returns: 105 | Parsed ResourceDto object. 106 | """ 107 | kind, version, metadata, status, spec_dict = self._resource_factory_prototype(manifest) 108 | 109 | try: 110 | spec = self.factory(kind, version, spec_dict) 111 | except KeyError: 112 | if raise_unknown: 113 | raise 114 | spec = spec_dict 115 | 116 | return ResourceDto(kind=kind, version=version, metadata=Metadata.parse_obj(metadata), status=status, spec=spec) 117 | 118 | def resource_factory_cast( 119 | self, t: Type[ResourceDto[T]], manifest: dict, raise_unknown: bool = True, assert_type: bool = True 120 | ) -> ResourceDto[T]: 121 | """Convenience method to create a ResourceDto object and cast it to the given type. 122 | 123 | Args: 124 | t: Type to cast to. 125 | manifest: manifest is the resource data. 126 | raise_unknown: Whether to raise an exception if the resource kind is unknown. 127 | assert_type: Whether to assert the type before returning 128 | """ 129 | kind, version, metadata, status, spec_dict = self._resource_factory_prototype(manifest) 130 | 131 | spec_tp = get_resource_spec_type(t) 132 | try: 133 | spec = self.factory_cast(spec_tp, kind, version, spec_dict) 134 | except KeyError: 135 | if raise_unknown: 136 | raise 137 | elif issubclass(spec_tp, ResourceSpecABC): 138 | spec = spec_tp.parse_obj(spec_dict) 139 | else: 140 | spec = spec_dict 141 | 142 | ret = ResourceDto(kind=kind, version=version, metadata=metadata, status=status, spec=spec) 143 | return cast(ResourceDto[T], ret) 144 | 145 | 146 | DEFAULT_RESOURCE_REGISTRY = ResourceRegistry() 147 | """Globally default resource registry.""" 148 | 149 | 150 | def kind( 151 | resource_group: str, 152 | resource_type: str, 153 | resource_version: str, 154 | registry: ResourceRegistry = DEFAULT_RESOURCE_REGISTRY, 155 | ) -> Callable[[Type[ResourceSpecABC]], Type[ResourceSpecABC]]: 156 | """kind is a decorator for resource specification classes to register them in the resource registry. 157 | 158 | Args: 159 | resource_group: resource_group is the group of the resource. 160 | resource_type: resource_type is the kind of the resource. 161 | resource_version: resource_version is the version of the resource. in the form v(alpha|beta) 162 | registry: registry is the resource registry to register the resource in. 163 | 164 | Returns: 165 | Callable[[Type[ResourceSpecABC]], Type[ResourceSpecABC]]: a decorator function. 166 | """ 167 | 168 | def inner(cls: Type[ResourceSpecABC]) -> Type[ResourceSpecABC]: 169 | kind = f"{resource_group}/{resource_type}" 170 | 171 | cls.__kind__ = kind 172 | cls.__manifest_version__ = resource_version 173 | 174 | registry.add( 175 | ResourceRegistryEntry(resource_kind=kind, resource_version=resource_version), 176 | cls, 177 | ) 178 | 179 | return cls 180 | 181 | return inner 182 | -------------------------------------------------------------------------------- /src/dto/odp/dto/resource_status.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from typing import Optional 3 | from uuid import UUID 4 | 5 | from pydantic import BaseModel, Field 6 | 7 | 8 | class ResourceStatus(BaseModel): 9 | """Resource status model""" 10 | 11 | num_updates: int = Field(default=0, ge=0) 12 | """Number of time the manifest has been updated""" 13 | 14 | created_time: datetime 15 | """Created timestamp""" 16 | 17 | created_by: UUID 18 | """UUID of user that created the resource""" 19 | 20 | updated_time: datetime 21 | """Last updated timestamp""" 22 | 23 | updated_by: UUID 24 | """UUID of user that updated the resource""" 25 | 26 | deleted_time: Optional[datetime] = None 27 | """Deleted timestamp - used for soft-delete""" 28 | 29 | deleted_by: Optional[UUID] = None 30 | """UUID of user that deleted the resource""" 31 | -------------------------------------------------------------------------------- /src/dto/odp/dto/validators.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | RX_RESOURCE_NAME = re.compile(r"[a-zA-Z0-9][a-zA-Z0-9\-_\.]*") 4 | RX_RESOURCE_KIND = re.compile(r"^(?:[a-zA-Z0-9][a-zA-Z0-9\-_\.]*)\/(?:[a-zA-Z0-9][a-zA-Z0-9\-_\.]*)$") 5 | RX_RESOURCE_VERSION = re.compile(r"^v[0-9]+(?:(?:alpha|beta)[0-9]+)?$") 6 | 7 | 8 | def validate_resource_version(val: str) -> str: 9 | if not RX_RESOURCE_VERSION.match(val): 10 | raise ValueError(f"Invalid resource version: {val}") 11 | return val 12 | 13 | 14 | def validate_resource_kind(val: str) -> str: 15 | if not RX_RESOURCE_KIND.match(val): 16 | raise ValueError(f"Invalid resource kind: {val}") 17 | return val 18 | 19 | 20 | def validate_resource_name(val: str) -> str: 21 | if not RX_RESOURCE_NAME.match(val): 22 | raise ValueError(f"Invalid resource component: {val}") 23 | return val 24 | -------------------------------------------------------------------------------- /src/dto/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "odp-dto" 3 | version = "0.4.10" 4 | description = "ODP Data Transfer Object" 5 | authors = ["Thomas Li Fredriksen "] 6 | license = "MIT" 7 | readme = "README.md" 8 | packages = [ 9 | {include="odp"}, 10 | ] 11 | 12 | [tool.poetry.dependencies] 13 | python = "^3.9" 14 | pydantic = "^2.4.2" 15 | 16 | [build-system] 17 | requires = ["poetry-core>=1.0.0"] 18 | build-backend = "poetry.core.masonry.api" -------------------------------------------------------------------------------- /src/dto/tests/test_dto/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/src/dto/tests/test_dto/__init__.py -------------------------------------------------------------------------------- /src/dto/tests/test_dto/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from odp.dto import ResourceRegistry, ResourceRegistryEntry 3 | 4 | from .utils import MockSpec, SimpleSpec 5 | 6 | 7 | @pytest.fixture 8 | def empty_resource_registry() -> ResourceRegistry: 9 | return ResourceRegistry() 10 | 11 | 12 | @pytest.fixture 13 | def resource_registry(empty_resource_registry: ResourceRegistry) -> ResourceRegistry: 14 | empty_resource_registry.add( 15 | ResourceRegistryEntry( 16 | resource_kind="test.hubocean.io/mock", 17 | resource_version="v1alpha1", 18 | ), 19 | MockSpec, 20 | ) 21 | 22 | empty_resource_registry.add( 23 | ResourceRegistryEntry( 24 | resource_kind="test.hubocean.io/simple", 25 | resource_version="v1alpha1", 26 | ), 27 | SimpleSpec, 28 | ) 29 | 30 | return empty_resource_registry 31 | -------------------------------------------------------------------------------- /src/dto/tests/test_dto/test_dto_base.py: -------------------------------------------------------------------------------- 1 | from odp.dto import Metadata, ResourceDto 2 | 3 | from .utils import TESTS_RESOURCE_REGISTRY, MockSpec, SimpleSpec 4 | 5 | MockDto = ResourceDto[MockSpec] 6 | 7 | 8 | def test_default_test_resource_registry(): 9 | cls = TESTS_RESOURCE_REGISTRY.get_resource_cls("test.hubocean.io/mock", "v1alpha1") 10 | assert cls == MockSpec 11 | 12 | cls = TESTS_RESOURCE_REGISTRY.get_resource_cls("test.hubocean.io/simple", "v1alpha1") 13 | assert cls == SimpleSpec 14 | 15 | 16 | def test_dunders(): 17 | assert MockSpec.__kind__ == "test.hubocean.io/mock" 18 | assert MockSpec.__manifest_version__ == "v1alpha1" 19 | 20 | assert SimpleSpec.__kind__ == "test.hubocean.io/simple" 21 | assert SimpleSpec.__manifest_version__ == "v1alpha1" 22 | 23 | 24 | def test_init_use_registered_kind_and_version(): 25 | # Users should not need to provide the kind and version for a registered resource kind 26 | s = MockDto(metadata=Metadata(name="foo"), spec=MockSpec()) 27 | 28 | assert s.metadata.name == "foo" 29 | assert isinstance(s.spec, MockSpec) 30 | -------------------------------------------------------------------------------- /src/dto/tests/test_dto/test_validators.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import pytest 4 | from odp.dto.validators import validate_resource_kind, validate_resource_version 5 | 6 | 7 | @pytest.mark.parametrize( 8 | "test_value,expected", 9 | [ 10 | ("hubocean.io/testGroup", True), 11 | ("catalog.hubocean.io/testGroup", True), 12 | ("function.domain.com/testGroup", True), 13 | ("hubocean.io/testGroup/testProject", False), 14 | ("foobar", False), 15 | ], 16 | ) 17 | def test_validate_kind(test_value: Optional[str], expected: bool): 18 | if expected: 19 | assert test_value == validate_resource_kind(test_value) 20 | else: 21 | with pytest.raises(ValueError): 22 | validate_resource_kind(test_value) 23 | 24 | 25 | @pytest.mark.parametrize( 26 | "test_value,expected", 27 | [ 28 | ("v1alpha1", True), 29 | ("v1beta1", True), 30 | ("v2", True), 31 | ("v3alpha2", True), 32 | ("v1", True), 33 | ("v1alpha", False), 34 | ("v1beta", False), 35 | ("v1alpha1beta1", False), 36 | ("foobar", False), 37 | ("v100", True), 38 | ("v99999999", True), 39 | ("v1545325alpha6546464564", True), 40 | ], 41 | ) 42 | def test_validate_resource_version(test_value: Optional[str], expected: bool): 43 | if expected: 44 | assert test_value == validate_resource_version(test_value) 45 | else: 46 | with pytest.raises(ValueError): 47 | validate_resource_version(test_value) 48 | -------------------------------------------------------------------------------- /src/dto/tests/test_dto/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated 2 | 3 | from odp.dto import ResourceRegistry, ResourceSpecABC, kind 4 | from pydantic import Field 5 | from pydantic.functional_validators import BeforeValidator 6 | 7 | TESTS_RESOURCE_REGISTRY = ResourceRegistry() 8 | 9 | 10 | def _validate_starts_with(s: str, p: str) -> str: 11 | if not s.startswith(p): 12 | raise ValueError(f"string does not start with {p}") 13 | return s 14 | 15 | 16 | @kind("test.hubocean.io", "mock", "v1alpha1", TESTS_RESOURCE_REGISTRY) 17 | class MockSpec(ResourceSpecABC): 18 | pass 19 | 20 | 21 | @kind("test.hubocean.io", "simple", "v1alpha1", TESTS_RESOURCE_REGISTRY) 22 | class SimpleSpec(ResourceSpecABC): 23 | some_str: str 24 | some_int: int = Field(..., ge=1) 25 | 26 | 27 | class UnregisteredSpec(ResourceSpecABC): 28 | some_float: float 29 | some_validated_str: Annotated[str, BeforeValidator(lambda s: _validate_starts_with(s, "foo"))] 30 | -------------------------------------------------------------------------------- /src/sdk/README.md: -------------------------------------------------------------------------------- 1 | # ODP Python SDK 2 | 3 | Connect to the Ocean Data Platform with Python through the Python SDK. Download queried ocean data easily and efficiently into data frames, for easy exploring and further processing in your data science project. 4 | 5 | ## Documentation 6 | 7 | https://docs.hubocea.earth 8 | 9 | ## Installation 10 | 11 | ```shell 12 | pip install odp-sdk 13 | ``` -------------------------------------------------------------------------------- /src/sdk/odp/client/__init__.py: -------------------------------------------------------------------------------- 1 | from .client import OdpClient 2 | -------------------------------------------------------------------------------- /src/sdk/odp/client/client.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from uuid import UUID 3 | 4 | from pydantic import BaseModel, Field, PrivateAttr 5 | 6 | from ..dto import DatasetDto 7 | from .auth import TokenProvider, get_default_token_provider 8 | from .http_client import OdpHttpClient 9 | from .raw_storage_client import OdpRawStorageClient 10 | from .resource_client import OdpResourceClient 11 | from .tabular_storage_client import OdpTabularStorageClient 12 | from .tabular_storage_v2_client import ClientAuthorization 13 | from .tabular_v2.client import TableHandler 14 | 15 | 16 | class OdpClient(BaseModel): 17 | """Client for the ODP API""" 18 | 19 | base_url: str = "https://api.hubocean.earth" 20 | token_provider: TokenProvider = Field(default_factory=get_default_token_provider) 21 | 22 | _http_client: OdpHttpClient = PrivateAttr() 23 | _catalog_client: OdpResourceClient = PrivateAttr() 24 | _raw_storage_client: OdpRawStorageClient = PrivateAttr() 25 | _tabular_storage_client: OdpTabularStorageClient = PrivateAttr() 26 | 27 | def __init__(self, **data): 28 | super().__init__(**data) 29 | 30 | self._http_client = OdpHttpClient(base_url=self.base_url, token_provider=self.token_provider) 31 | self._catalog_client = OdpResourceClient(http_client=self._http_client, resource_endpoint="/catalog") 32 | self._raw_storage_client = OdpRawStorageClient(http_client=self._http_client) 33 | self._tabular_storage_client = OdpTabularStorageClient(http_client=self._http_client) 34 | self._tabular_storage_v2_client = ClientAuthorization( 35 | base_url=self.base_url, token_provider=self.token_provider 36 | ) 37 | 38 | def personalize_name(self, name: str, fmt: Optional[str] = None) -> str: 39 | """Personalize a name by adding a postfix unique to the user 40 | 41 | Args: 42 | name: The name to personalize 43 | fmt: Used to override the default format string. Should be a python format-string with placeholders 44 | for the variables `uid` and `name`. For example: `"{uid}-{name}"` 45 | 46 | Returns: 47 | The personalized name 48 | """ 49 | fmt = fmt or "{name}-{uid}" 50 | uid = self.token_provider.get_user_id() 51 | 52 | # Attempt to simplify the UID by only using the node part of the UUID 53 | try: 54 | uid = UUID(uid).node 55 | except ValueError: 56 | # User ID is not a valid UUID, use it as-is 57 | pass 58 | 59 | return fmt.format(uid=uid, name=name) 60 | 61 | @property 62 | def resource_store(self): 63 | # TODO: Implement resource store 64 | raise NotImplementedError("Resource store not implemented") 65 | 66 | @property 67 | def catalog(self) -> OdpResourceClient: 68 | return self._catalog_client 69 | 70 | @property 71 | def iam(self): 72 | # TODO: Implement IAM controller 73 | raise NotImplementedError("IAM not implemented") 74 | 75 | @property 76 | def registry(self): 77 | # TODO: Implement registry/core controller 78 | raise NotImplementedError("Registry not implemented") 79 | 80 | @property 81 | def raw(self) -> OdpRawStorageClient: 82 | return self._raw_storage_client 83 | 84 | @property 85 | def tabular(self) -> OdpTabularStorageClient: 86 | return self._tabular_storage_client 87 | 88 | def table_v2(self, dataset_dto: DatasetDto) -> TableHandler: 89 | return self._tabular_storage_v2_client.table(str(dataset_dto.uuid)) 90 | -------------------------------------------------------------------------------- /src/sdk/odp/client/dto/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/src/sdk/odp/client/dto/__init__.py -------------------------------------------------------------------------------- /src/sdk/odp/client/dto/file_dto.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from typing import Any, Dict, Optional, Union 3 | from uuid import UUID 4 | 5 | from pydantic import BaseModel, Field, field_validator 6 | 7 | 8 | class FileMetadataDto(BaseModel): 9 | """File Metadata Model.""" 10 | 11 | name: str 12 | mime_type: Optional[str] = None 13 | dataset: Optional[UUID] = None 14 | metadata: Dict[str, Union[bool, int, str]] = Field(default_factory=dict) 15 | geo_location: Optional[Any] = None 16 | size_bytes: Optional[int] = None 17 | checksum: Optional[str] = None 18 | created_time: Optional[datetime] = None 19 | modified_time: Optional[datetime] = None 20 | deleted_time: Optional[datetime] = None 21 | 22 | @field_validator("name") 23 | def lstrip_name(cls, v): 24 | if v.startswith("/"): 25 | raise ValueError("name cannot start with '/'. Absolute paths are not allowed.") 26 | return v 27 | -------------------------------------------------------------------------------- /src/sdk/odp/client/dto/table_spec.py: -------------------------------------------------------------------------------- 1 | from typing import List, Literal, Optional 2 | from uuid import UUID 3 | 4 | from pydantic import BaseModel, model_validator 5 | 6 | from .tabular_store import TablePartitioningSpec 7 | 8 | 9 | class TableSpec(BaseModel): 10 | table_schema: dict 11 | partitioning: Optional[List[TablePartitioningSpec]] = None 12 | 13 | 14 | class StageDataPoints(BaseModel): 15 | """Model for update data point endpoint.""" 16 | 17 | action: Literal["create", "commit"] 18 | stage_id: Optional[UUID] 19 | 20 | @model_validator(mode="before") 21 | def _validate_action(cls, values): 22 | if values.get("action") == "create" and values.get("stage_id"): 23 | raise ValueError("stage id cannot be issued with create action") 24 | elif values.get("action") == "commit" and not values.get("stage_id"): 25 | raise ValueError("stage id must be issued with commit action") 26 | 27 | return values 28 | -------------------------------------------------------------------------------- /src/sdk/odp/client/dto/tabular_store.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta 2 | from typing import List, Literal, Optional, Union 3 | from uuid import UUID, uuid4 4 | 5 | from pydantic import BaseModel 6 | 7 | 8 | class TablePartitioningSpec(BaseModel): 9 | columns: List[str] 10 | transformer_name: str 11 | args: Optional[List[Union[int, float, str]]] = None 12 | 13 | def serialize(self) -> bytes: 14 | return self.json().encode("utf-8") 15 | 16 | 17 | class TableStage(BaseModel): 18 | stage_id: UUID 19 | status: Literal["active", "commit", "commit-failed", "delete"] 20 | created_time: datetime 21 | expiry_time: datetime 22 | updated_time: Optional[datetime] = None 23 | 24 | error: Optional[str] = None 25 | error_info: Optional[dict] = None 26 | 27 | def serialize(self) -> bytes: 28 | return self.json(exclude_unset=True, exclude_none=True).encode("utf-8") 29 | 30 | @classmethod 31 | def generate(cls, expiry_time: timedelta) -> "TableStage": 32 | now = datetime.now() 33 | 34 | return cls(stage_id=uuid4(), status="active", created_time=now, expiry_time=now + expiry_time) 35 | 36 | def dict(self, **kwargs) -> "DictStrAny": # noqa: F821 37 | exclude_unset = kwargs.pop("exclude_unset", True) 38 | return super().dict(exclude_unset=exclude_unset, **kwargs) 39 | -------------------------------------------------------------------------------- /src/sdk/odp/client/exc.py: -------------------------------------------------------------------------------- 1 | """This module contains the set of ODP SDK exceptions.""" 2 | 3 | 4 | class OdpError(Exception): 5 | """Base class for exceptions in this module.""" 6 | 7 | 8 | class OdpAuthError(OdpError): 9 | """Exception raised for authentication errors.""" 10 | 11 | 12 | class OdpUnauthorizedError(OdpError): 13 | """Exception raised for unauthorized requests.""" 14 | 15 | 16 | class OdpForbiddenError(OdpError): 17 | """Exception raised for forbidden requests.""" 18 | 19 | 20 | class OdpTokenValidationError(OdpError): 21 | """Exception raised for invalid tokens.""" 22 | 23 | 24 | class OdpResourceNotFoundError(OdpError): 25 | """Exception raised when a resource is not found.""" 26 | 27 | 28 | class OdpResourceExistsError(OdpError): 29 | """Exception raised when a resource already exists.""" 30 | 31 | 32 | class OdpValidationError(OdpError): 33 | """Exception raised when a resource is not found.""" 34 | 35 | 36 | class OdpFileNotFoundError(OdpError): 37 | """Exception raised when a file is not found.""" 38 | 39 | 40 | class OdpFileAlreadyExistsError(OdpError): 41 | """File already exists""" 42 | 43 | 44 | class OpenTableStageInvalidAction(OdpError): 45 | """Exception when table is getting deleted and it has active sessions.""" 46 | -------------------------------------------------------------------------------- /src/sdk/odp/client/tabular_storage_v2_client.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional, Union 2 | 3 | from odp.client.auth import TokenProvider 4 | from odp.client.tabular_v2.client import Client 5 | 6 | 7 | class ClientAuthorization(Client): 8 | def __init__(self, base_url, token_provider: TokenProvider): 9 | if base_url.endswith(":8888"): 10 | base_url = base_url.replace(":8888", ":31337") 11 | super().__init__(base_url) 12 | self.token_provider = token_provider 13 | 14 | def _request( 15 | self, 16 | path: str, 17 | data: Union[Dict, bytes, None] = None, 18 | params: Optional[Dict] = None, 19 | headers: Optional[Dict] = None, 20 | ) -> Client.Response: 21 | headers = headers or {} 22 | headers["Authorization"] = self.token_provider.get_token() 23 | return super()._request(path, data, params, headers) 24 | -------------------------------------------------------------------------------- /src/sdk/odp/client/tabular_v2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/src/sdk/odp/client/tabular_v2/__init__.py -------------------------------------------------------------------------------- /src/sdk/odp/client/tabular_v2/big/__init__.py: -------------------------------------------------------------------------------- 1 | from .big import BigCol, convert_schema_outward, inner_exp 2 | from .buffer import Buffer, convert_schema_inward 3 | from .local import LocalBigCol 4 | from .remote import RemoteBigCol 5 | -------------------------------------------------------------------------------- /src/sdk/odp/client/tabular_v2/big/big.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | from abc import abstractmethod 4 | from typing import Iterable, Optional 5 | 6 | import pyarrow as pa 7 | from odp.client.tabular_v2.util.exp import BinOp, Field, Op, Parens, Scalar, UnaryOp 8 | 9 | SMALL_MAX = 256 10 | STR_LIMIT = 128 # when to start using a reference 11 | STR_MIN = 12 # what to keep as prefix in the reference 12 | MAX_BIGFILE_SIZE = 64 * 1024 * 1024 # max size of a big file 13 | 14 | 15 | def convert_schema_outward(schema: pa.Schema) -> pa.Schema: 16 | """drops the .ref fields""" 17 | out = [] 18 | for name in schema.names: 19 | field: pa.Field = schema.field(name) 20 | if name.endswith(".ref") and name[:-4] in schema.names and field.type == pa.string(): 21 | continue # skip 22 | out.append(field) 23 | return pa.schema(out) 24 | 25 | 26 | class BigCol: 27 | def __init__(self): 28 | pass 29 | 30 | @abstractmethod 31 | def fetch(self, md5: str) -> bytes: 32 | """fetch data, called often, should cache""" 33 | raise NotImplementedError() 34 | 35 | @abstractmethod 36 | def upload(self, md5: str, data: Iterable[bytes]): 37 | """upload data""" 38 | raise NotImplementedError() 39 | 40 | def decode(self, batch: pa.RecordBatch) -> pa.RecordBatch: 41 | cache = {} # FIXME: can this use too much memory? 42 | outer_schema = convert_schema_outward(batch.schema) 43 | 44 | refs = [] 45 | for name in outer_schema.names: 46 | if name.endswith(".ref"): 47 | refs.append(name) 48 | 49 | if not refs: 50 | return batch.select(outer_schema.names) 51 | 52 | def decode_by_row(row): 53 | for name in refs: 54 | ref = row[name] 55 | if not ref: 56 | continue 57 | 58 | target = name[:-4] 59 | big_id, start, size = ref.split(":") 60 | start = int(start) 61 | size = int(size) 62 | if big_id in cache: 63 | data = cache[big_id] 64 | else: 65 | data = self.fetch(big_id) 66 | cache[big_id] = data 67 | if isinstance(row[name], str): # the field must contain the prefix, from which we infer the type 68 | row[target] = data[start : start + size].decode("utf-8") 69 | else: 70 | row[target] = data[start : start + size] 71 | return row 72 | 73 | df = batch.to_pandas() 74 | df = df.apply(decode_by_row, axis=1) 75 | return pa.RecordBatch.from_pandas(df, schema=outer_schema) 76 | 77 | 78 | def inner_exp(schema: pa.Schema, op: Optional[Op]) -> Optional[Op]: 79 | if op is None: 80 | return None 81 | 82 | fields = [] 83 | for name in schema.names: 84 | field: pa.Field = schema.field(name) 85 | if field.type != pa.string() and field.type != pa.binary(): 86 | continue 87 | if field.metadata and b"big" in field.metadata: 88 | fields.append(name) 89 | 90 | # TODO don't use the visitor, instead parse manually and use negation context 91 | def visitor(neg: bool, op: Op) -> Op: 92 | if isinstance(op, Field): 93 | return op 94 | if isinstance(op, Scalar): 95 | return op 96 | if isinstance(op, Parens): 97 | op.exp = visitor(neg, op.exp) 98 | return op 99 | if isinstance(op, UnaryOp): 100 | if op.prefix in ["~", "not", "!", "invert"]: 101 | return UnaryOp(prefix=op.prefix, exp=visitor(~neg, op.exp), suffix=op.suffix) 102 | return op 103 | if isinstance(op, BinOp): 104 | op = BinOp(left=visitor(neg, op.left), op=op.op, right=visitor(neg, op.right)) 105 | if isinstance(op.left, Field): 106 | if str(op.left) in fields: 107 | return _inner_exp_binop(neg, op.left, op.op, op.right) 108 | return op 109 | elif isinstance(op.right, Field): 110 | try: 111 | op = op.flip() 112 | except NotImplementedError: 113 | logging.warning("can't flip big-col expression: %s", op) 114 | return Scalar(src="True", type="bool") 115 | return visitor(neg, op) 116 | else: 117 | return op 118 | raise ValueError(f"can't convert big-col expression: {type(op)}") 119 | 120 | op = visitor(False, op) 121 | logging.info("big: inner_exp: %s", repr(op)) 122 | return op 123 | 124 | 125 | def _inner_exp_binop_str(neg: bool, field: Field, op: str, right: str) -> Op: 126 | if len(right) > STR_MIN: 127 | a = right[:STR_MIN] 128 | b = right[: STR_MIN - 1] + chr(ord(right[STR_MIN - 1]) + 1) 129 | logging.info("big: str: %s .. %s", json.dumps(a), json.dumps(b)) 130 | 131 | if op == "==": 132 | if neg: 133 | return Scalar.from_py(False) 134 | return BinOp( 135 | left=BinOp( 136 | left=Scalar.from_py(a), 137 | op="<", 138 | right=field, 139 | ), 140 | op="and", 141 | right=BinOp( 142 | left=field, 143 | op="<", 144 | right=Scalar.from_py(b), 145 | ), 146 | ) 147 | elif op == "!=": 148 | if neg: 149 | return Scalar.from_py(False) 150 | else: 151 | return Scalar.from_py(True) 152 | elif op == ">" or op == ">=": 153 | return BinOp( 154 | left=field, 155 | op=op, 156 | right=Scalar.from_py(a), 157 | ) 158 | elif op == "<" or op == "<=": 159 | return BinOp( 160 | left=field, 161 | op=op, 162 | right=Scalar.from_py(b), 163 | ) 164 | else: 165 | return BinOp( 166 | left=field, 167 | op=op, 168 | right=Scalar.from_py(right), 169 | ) 170 | logging.error("can't convert big-col expression: %s %s %s", field, op, right) 171 | raise ValueError("can't convert big-col expression") 172 | 173 | 174 | def _inner_exp_binop(neg: bool, left: Field, op: str, right: Op) -> Op: 175 | if isinstance(right, Scalar): 176 | v = right.to_py() 177 | if isinstance(v, str): 178 | return _inner_exp_binop_str(neg, left, op, v) 179 | else: 180 | raise ValueError("can't convert big-col expression for scalar %s", right) 181 | raise ValueError("can't convert big-col expression: %s %s %s", left, op, right) 182 | -------------------------------------------------------------------------------- /src/sdk/odp/client/tabular_v2/big/buffer.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from threading import Lock 3 | from typing import Optional 4 | 5 | import pandas as pd 6 | import pyarrow as pa 7 | from odp.client.tabular_v2.big import BigCol 8 | from odp.client.tabular_v2.big.big import MAX_BIGFILE_SIZE, SMALL_MAX, STR_LIMIT, STR_MIN 9 | 10 | 11 | def convert_schema_inward(schema: pa.Schema) -> pa.Schema: 12 | """add .ref fields for columns marked with big, helper only used by create()""" 13 | return Buffer(None).with_outer_schema(schema).inner_schema 14 | 15 | 16 | class Buffer: 17 | def __init__(self, parent: Optional[BigCol]): 18 | self.data = [] 19 | self.size = 0 20 | self.next_id = uuid.uuid4().hex 21 | self.lock = Lock() 22 | self.parent = parent 23 | self.big_fields = [] 24 | self.small_fields = [] 25 | self.inner_schema = None 26 | 27 | def with_inner_schema(self, inner_schema: pa.Schema): 28 | self.inner_schema = inner_schema 29 | for name in inner_schema.names: 30 | if name.endswith(".ref"): 31 | continue 32 | f = inner_schema.field(name) 33 | if f.type != pa.string() and f.type != pa.binary(): 34 | continue 35 | meta = f.metadata 36 | if meta and b"big" in meta: 37 | self.big_fields.append(name) 38 | else: 39 | self.small_fields.append(name) 40 | return self 41 | 42 | def with_outer_schema(self, outer_schema: pa.Schema) -> "Buffer": 43 | fields = [] 44 | for name in outer_schema.names: 45 | field: pa.Field = outer_schema.field(name) 46 | fields.append(field) 47 | if field.type != pa.string() and field.type != pa.binary(): 48 | continue 49 | meta = field.metadata 50 | if meta and b"big" in meta: 51 | fields.append(pa.field(name + ".ref", pa.string())) 52 | self.big_fields.append(name) 53 | else: 54 | self.small_fields.append(name) 55 | self.inner_schema = pa.schema(fields) 56 | return self 57 | 58 | def encode(self, batch: pa.RecordBatch): 59 | # TODO: avoid pandas? 60 | df: pd.DataFrame = batch.to_pandas() 61 | out = df.apply(self.append, axis=1) 62 | return pa.RecordBatch.from_pandas(out, schema=self.inner_schema) 63 | 64 | def append(self, row): 65 | for name in self.small_fields: 66 | data = row[name] 67 | if data is None: 68 | continue 69 | if len(data) > SMALL_MAX: 70 | raise ValueError(f"field {name} is too long: “{len(data)}”") 71 | 72 | for name in self.big_fields: 73 | row[name + ".ref"] = None 74 | data = row[name] 75 | if data is None: 76 | continue 77 | if isinstance(data, str): 78 | data = data.encode("utf-8") # convert to bytes 79 | size = len(data) # size in bytes 80 | if size < STR_LIMIT: 81 | continue 82 | with self.lock: 83 | ref = f"{self.next_id}:{self.size}:{size}" # noqa # ref to the current position 84 | self.data.append(data) # append the new data to the buffer 85 | self.size += size # update the size of the buffer 86 | if self.size > MAX_BIGFILE_SIZE: # too much data? flush 87 | self.parent.upload(self.next_id, self.data) 88 | self.next_id = uuid.uuid4().hex 89 | self.data = [] 90 | self.size = 0 91 | row[name + ".ref"] = ref 92 | row[name] = row[name][0:STR_MIN] 93 | return row 94 | 95 | def flush(self): 96 | with self.lock: 97 | if self.size > 0: 98 | self.parent.upload(self.next_id, self.data) 99 | self.data = [] 100 | self.size = 0 101 | self.next_id = uuid.uuid4().hex 102 | -------------------------------------------------------------------------------- /src/sdk/odp/client/tabular_v2/big/local.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from typing import Iterable 4 | 5 | from odp.client.tabular_v2.big.big import BigCol 6 | 7 | 8 | class LocalBigCol(BigCol): 9 | def __init__(self, root: str): 10 | super().__init__() 11 | self.root = root 12 | os.makedirs(self.root, exist_ok=True) 13 | 14 | def fetch(self, big_id: str) -> bytes: 15 | logging.info("downloading %s", big_id) 16 | with open(f"{self.root}/{big_id}.big", "rb") as f: 17 | return f.read() 18 | 19 | def upload(self, big_id: str, data: Iterable[bytes]): 20 | logging.info("uploading %s", big_id) 21 | with open(f"{self.root}/{big_id}.big", "wb") as f: 22 | for d in data: 23 | f.write(d) 24 | -------------------------------------------------------------------------------- /src/sdk/odp/client/tabular_v2/big/remote.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | import io 3 | import logging 4 | from typing import Callable, Iterable 5 | 6 | from odp.client.tabular_v2.big.big import BigCol 7 | from odp.client.tabular_v2.util.cache import Cache 8 | 9 | 10 | class RemoteBigCol(BigCol): 11 | def __init__( 12 | self, 13 | uploader: Callable[[str, bytes], None], 14 | downloader: Callable[[str], bytes], 15 | root_cache: str, 16 | ): 17 | super().__init__() 18 | self.cache = Cache(root_cache) 19 | self.uploader = uploader 20 | self.downloader = downloader 21 | # TODO: make sure to not fill up the disk? 22 | 23 | def fetch(self, bid: str) -> bytes: 24 | with self.cache.key("big." + bid) as e: 25 | if not e.exists(): 26 | logging.info("fetching %s", bid) 27 | comp = self.downloader(bid) 28 | e.set(comp) 29 | else: 30 | logging.info("cache hit %s", bid) 31 | comp = e.get() 32 | # if exists, use the cached version 33 | return gzip.decompress(comp) 34 | 35 | def upload(self, bid: str, data: Iterable[bytes]): 36 | with self.cache.key("big." + bid) as e: 37 | buf = io.BytesIO() 38 | with gzip.GzipFile(fileobj=buf, mode="wb") as f: 39 | for d in data: 40 | f.write(d) 41 | comp = buf.getvalue() 42 | self.uploader(bid, comp) 43 | e.set(comp) 44 | -------------------------------------------------------------------------------- /src/sdk/odp/client/tabular_v2/bsquare/__init__.py: -------------------------------------------------------------------------------- 1 | from .bsquare import convert_query, convert_schema_inward, convert_schema_outward, decode, encode 2 | -------------------------------------------------------------------------------- /src/sdk/odp/client/tabular_v2/bsquare/bsquare.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Optional 3 | 4 | import pyarrow as pa 5 | import shapely 6 | from odp.client.tabular_v2.bsquare.query import _QueryContext 7 | from odp.client.tabular_v2.util.exp import Op 8 | 9 | 10 | def convert_schema_inward(outer_schema: pa.Schema) -> pa.Schema: 11 | out = [] 12 | for name in outer_schema.names: 13 | f = outer_schema.field(name) 14 | if f.metadata and b"isGeometry" in f.metadata: 15 | meta = f.metadata 16 | if b"index" in meta: 17 | new_meta = meta.copy() 18 | del new_meta[b"index"] 19 | f = f.with_metadata(new_meta) 20 | out.append(f) 21 | out.append(pa.field(name + ".x", pa.float64(), True, metadata=meta)) 22 | out.append(pa.field(name + ".y", pa.float64(), True, metadata=meta)) 23 | out.append(pa.field(name + ".q", pa.float64(), True, metadata=meta)) 24 | else: 25 | out.append(f) 26 | return pa.schema(out) 27 | 28 | 29 | # convert the inner_schema to outer_schema 30 | def convert_schema_outward(inner_schema: pa.Schema) -> pa.Schema: 31 | geo_indexes = set() 32 | 33 | def is_subfield(schema: pa.Schema, f: pa.Field) -> bool: 34 | if "." not in f.name: 35 | return False 36 | left, right = f.name.rsplit(".", 1) 37 | if left not in schema.names: 38 | return False 39 | if schema.field(left).metadata and b"isGeometry" not in schema.field(left).metadata: 40 | return False 41 | if f.metadata and b"index" in f.metadata: 42 | geo_indexes.add(left) 43 | return True 44 | 45 | # create a new schema with only the fields that are not subfields 46 | fields = [] 47 | for names in inner_schema.names: 48 | f = inner_schema.field(names) 49 | if not is_subfield(inner_schema, f): 50 | fields.append(f) 51 | 52 | # add back the "index" to the main field (which was removed when creating the subfields) 53 | for i, f in enumerate(fields): 54 | if f.name in geo_indexes: 55 | meta = f.metadata 56 | meta[b"index"] = b"1" 57 | fields[i] = f.with_metadata(meta) 58 | return pa.schema(fields) 59 | 60 | 61 | # convert outer query to inner query using bsquare in .x, .y and .q 62 | def convert_query(outer_schema: pa.Schema, outer_query: Optional[Op]) -> Optional[Op]: 63 | if outer_query is None: 64 | return None 65 | 66 | geo_fields = [] 67 | for f in outer_schema: 68 | if f.metadata and b"isGeometry" in f.metadata: 69 | geo_fields.append(f.name) 70 | 71 | return _QueryContext(geo_fields).convert(outer_query) 72 | 73 | 74 | def decode(b: pa.RecordBatch) -> pa.RecordBatch: 75 | outer_schema = convert_schema_outward(b.schema) 76 | if b.num_rows == 0: 77 | return pa.RecordBatch.from_pylist([], schema=outer_schema) 78 | list = pa.Table.from_batches([b], schema=b.schema).select(outer_schema.names).to_batches() 79 | if len(list) != 1: 80 | raise ValueError("expected exactly one batch") 81 | return list[0] 82 | 83 | 84 | def encode(b: pa.RecordBatch) -> pa.RecordBatch: 85 | logging.info("bsquare encoding %d rows", b.num_rows) 86 | inner_schema = convert_schema_inward(b.schema) 87 | geo_names = [] 88 | for name in b.schema.names: 89 | f = b.schema.field(name) 90 | if f.metadata and b"isGeometry" in f.metadata: 91 | geo_names.append(name) 92 | 93 | # we encode rows by rows to made it simple to create multiple columns 94 | def _encode(row): 95 | for name in geo_names: 96 | if name in row and row[name] is not None: 97 | val = row[name] 98 | if isinstance(val, str): 99 | val = shapely.from_wkt(val) 100 | elif isinstance(val, bytes): 101 | val = shapely.from_wkb(val) 102 | else: 103 | raise ValueError(f"Unsupported type: {type(val)}") 104 | min_x, min_y, max_x, max_y = val.bounds 105 | row[name + ".x"] = (min_x + max_x) / 2 106 | row[name + ".y"] = (min_y + max_y) / 2 107 | row[name + ".q"] = max(max_x - min_x, max_y - min_y) / 2 108 | else: 109 | row[name + ".x"] = None 110 | row[name + ".y"] = None 111 | row[name + ".q"] = None 112 | return row 113 | 114 | d = b.to_pandas() 115 | for geo_name in geo_names: 116 | d[geo_name + ".x"] = None 117 | d[geo_name + ".y"] = None 118 | d[geo_name + ".q"] = None 119 | d = d.apply(func=_encode, axis=1) 120 | return pa.RecordBatch.from_pandas(d, schema=inner_schema) 121 | 122 | 123 | class BSquare: 124 | geometry_fields = ["{col_name}.x", "{col_name}.y", "{col_name}.q"] # add complexity and confuse the user 125 | 126 | def __init__(self, inner_schema: Optional[pa.Schema] = None): 127 | assert not "good" 128 | self._inner_schema = inner_schema 129 | self._geo_fields = [] # FIXME do this earlier, then cash on it 130 | -------------------------------------------------------------------------------- /src/sdk/odp/client/tabular_v2/bsquare/query.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import logging 3 | from typing import Optional 4 | 5 | import shapely 6 | from odp.client.tabular_v2.util import exp 7 | 8 | 9 | class _QueryContext: 10 | def __init__(self, geo_fields: list[str]): 11 | self.geo_fields = geo_fields 12 | self.negate = False 13 | 14 | def neg(self) -> "_QueryContext": 15 | c = copy.copy(self) 16 | c.negate = ~c.negate 17 | return c 18 | 19 | def is_geo_field(self, op: Optional[exp.Op]) -> bool: 20 | if isinstance(op, exp.Field): 21 | return op.name in self.geo_fields 22 | return False 23 | 24 | def convert(self, op: Optional[exp.Op]) -> Optional[exp.Op]: 25 | if op is None: 26 | return None 27 | if isinstance(op, exp.Parens): 28 | inner = self.convert(op.exp) 29 | if isinstance(inner, exp.Parens): 30 | return inner 31 | return exp.Parens(inner) 32 | if isinstance(op, exp.BinOp): 33 | if self.is_geo_field(op.left) or self.is_geo_field(op.right): 34 | if op.op in ["intersects", "contains", "within", "=="]: 35 | return self._convert_intersect(op) 36 | elif op.op in ["intersects", "contains", "within"]: 37 | raise ValueError(f"can't do '{op.op}' on non-geo fields") 38 | left = self.convert(op.left) 39 | right = self.convert(op.right) 40 | return exp.BinOp(left=left, op=op.op, right=right) 41 | if isinstance(op, exp.Field): 42 | return op 43 | if isinstance(op, exp.Scalar): 44 | return op 45 | if isinstance(op, exp.UnaryOp): 46 | cur = self 47 | if op.prefix == "~": 48 | cur = self.neg() 49 | return exp.UnaryOp(prefix=op.prefix, exp=cur.convert(op.exp), suffix=op.suffix) 50 | if isinstance(op, exp.Func): 51 | cur = self 52 | if op.name == "invert": 53 | cur = self.neg() 54 | args = [cur.convert(a) for a in op.args] 55 | return exp.Func(name=op.name, args=args) 56 | raise ValueError(f"can't convert {op}: {type(op)}") 57 | 58 | def _convert_intersect(self, op: exp.BinOp) -> exp.Op: 59 | if isinstance(op.left, exp.Field): 60 | if isinstance(op.right, exp.Scalar): 61 | geo = shapely.from_wkt(op.right.to_py()) 62 | return self._intersect_field(op.left, geo) 63 | # if isinstance(op.right, exp.Field): 64 | # return exp.Scalar.from_py(~self.negate) 65 | 66 | if isinstance(op.right, exp.Field): 67 | if isinstance(op.left, exp.Scalar): 68 | geo = shapely.from_wkt(op.left.to_py()) 69 | return self._intersect_field(op.right, geo) 70 | 71 | raise ValueError(f"unsupported: {type(op.left)} {op.op} {type(op.right)}") 72 | 73 | def _intersect_field(self, field: exp.Field, geo: shapely.Geometry) -> exp.Op: 74 | logging.info("intersecting field '%s' with '%s'", field, geo) 75 | fx = exp.Field(name=field.name + ".x") 76 | fy = exp.Field(name=field.name + ".y") 77 | fq = exp.Field(name=field.name + ".q") 78 | x0, y0, x1, y1 = shapely.bounds(geo).tolist() 79 | if self.negate: 80 | xop = exp.BinOp( 81 | exp.Parens(exp.BinOp(exp.BinOp(fx, "-", fq), ">=", exp.Scalar.from_py(x0))), 82 | "and", 83 | exp.Parens(exp.BinOp(exp.BinOp(fx, "+", fq), "<=", exp.Scalar.from_py(x1))), 84 | ) 85 | yop = exp.BinOp( 86 | exp.Parens(exp.BinOp(exp.BinOp(fy, "-", fq), ">=", exp.Scalar.from_py(y0))), 87 | "and", 88 | exp.Parens(exp.BinOp(exp.BinOp(fy, "+", fq), "<=", exp.Scalar.from_py(y1))), 89 | ) 90 | else: 91 | xop = exp.BinOp( 92 | exp.Parens(exp.BinOp(exp.BinOp(fx, "+", fq), ">=", exp.Scalar.from_py(x0))), 93 | "and", 94 | exp.Parens(exp.BinOp(exp.BinOp(fx, "-", fq), "<=", exp.Scalar.from_py(x1))), 95 | ) 96 | yop = exp.BinOp( 97 | exp.Parens(exp.BinOp(exp.BinOp(fy, "+", fq), ">=", exp.Scalar.from_py(y0))), 98 | "and", 99 | exp.Parens(exp.BinOp(exp.BinOp(fy, "-", fq), "<=", exp.Scalar.from_py(y1))), 100 | ) 101 | return exp.Parens(exp.BinOp(xop, "and", yop)) 102 | 103 | 104 | def test_query(): 105 | op = exp.parse("color == 'red' and not (area intersect 'POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))')") 106 | logging.info("'%s'...", op) 107 | c = _QueryContext(["area"]) 108 | op2 = c.convert(op) 109 | logging.info("'%s'...", op2) 110 | assert "color == 'red'" in str(op2) 111 | assert "area.x - area.q >= 0" in str(op2) # inverted sign 112 | 113 | # check that raises exception if intersect with no geo field 114 | try: 115 | op = exp.parse("other_field intersect 'POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))'") 116 | c = _QueryContext([]) 117 | c.convert(op) 118 | except ValueError as e: 119 | assert "intersect" in str(e) 120 | else: 121 | assert False 122 | -------------------------------------------------------------------------------- /src/sdk/odp/client/tabular_v2/client/__init__.py: -------------------------------------------------------------------------------- 1 | from .client import Client 2 | from .table_cursor import Cursor 3 | from .table_tx import Transaction 4 | from .tablehandler import TableHandler 5 | -------------------------------------------------------------------------------- /src/sdk/odp/client/tabular_v2/client/client.py: -------------------------------------------------------------------------------- 1 | import io 2 | import logging 3 | from typing import TYPE_CHECKING, Dict, Iterator, Optional, Union 4 | 5 | import requests 6 | from odp.client.tabular_v2.util.reader import Iter2Reader 7 | 8 | if TYPE_CHECKING: 9 | from odp.client.tabular_v2.client import TableHandler 10 | 11 | 12 | class Client: 13 | def __init__(self, base_url: str): 14 | self._base_url = base_url 15 | 16 | class Response: 17 | # Abstraction for response object, shared between http client and test client 18 | def __init__(self, res: Union[requests.Response, Iterator[bytes], Dict, bytes]): 19 | if isinstance(res, requests.Response): 20 | if res.status_code == 204: 21 | raise FileNotFoundError(res.text) 22 | res.raise_for_status() 23 | logging.info("response: %s", res) 24 | self.res = res 25 | 26 | def reader(self): 27 | if isinstance(self.res, bytes): 28 | return io.BytesIO(self.res) 29 | if isinstance(self.res, Iterator): 30 | return Iter2Reader(self.res) 31 | return self.res.raw 32 | 33 | def iter(self) -> Iterator[bytes]: 34 | if isinstance(self.res, bytes): 35 | return iter([self.res]) 36 | if isinstance(self.res, Iterator): 37 | return self.res 38 | return self.res.iter_content() 39 | 40 | def all(self) -> bytes: 41 | if isinstance(self.res, bytes): 42 | return self.res 43 | if isinstance(self.res, Iterator): 44 | return b"".join(self.res) 45 | return self.res.content 46 | 47 | def json(self) -> dict: 48 | if isinstance(self.res, dict): 49 | return self.res 50 | return self.res.json() 51 | 52 | def _request( 53 | self, 54 | path: str, 55 | data: Union[Dict, bytes, None] = None, 56 | params: Optional[Dict] = None, 57 | headers: Optional[Dict] = None, 58 | ) -> Response: 59 | logging.info("ktable: REQ %s %s (%d bytes)", path, params, len(data) if data else 0) 60 | if isinstance(data, dict): 61 | res = requests.post(self._base_url + path, headers=headers, params=params, json=data, stream=True) 62 | elif isinstance(data, bytes): 63 | res = requests.post(self._base_url + path, headers=headers, params=params, data=data, stream=True) 64 | elif isinstance(data, Iterator): 65 | res = requests.post(self._base_url + path, headers=headers, params=params, data=data, stream=True) 66 | elif data is None: 67 | res = requests.post(self._base_url + path, headers=headers, params=params, stream=True) 68 | else: 69 | raise ValueError(f"unexpected type {type(data)}") 70 | logging.info("response: %s", res.status_code) 71 | return self.Response(res) 72 | 73 | # @lru_cache(maxsize=10) 74 | def table(self, table_id: str) -> "TableHandler": 75 | from odp.client.tabular_v2.client.tablehandler import TableHandler 76 | 77 | return TableHandler(self, table_id) 78 | -------------------------------------------------------------------------------- /src/sdk/odp/client/tabular_v2/client/table_cursor.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Iterator 2 | 3 | import pyarrow as pa 4 | 5 | 6 | class CursorException(Exception): 7 | """Raised when the client is required to connect again with the given cursor to fetch more data""" 8 | 9 | def __init__(self, cursor: str): 10 | self.cursor = cursor 11 | 12 | 13 | class Cursor: 14 | def __init__( 15 | self, 16 | scanner: Callable[[str], Iterator[pa.RecordBatch]], 17 | ): 18 | self.scanner = scanner 19 | 20 | def batches(self) -> Iterator[pa.RecordBatch]: 21 | cursor = "" 22 | while True: 23 | try: 24 | for b in self.scanner(cursor): 25 | yield b 26 | except CursorException as e: 27 | cursor = e.cursor 28 | continue # FIXME: Should not be raised? 29 | break 30 | 31 | def rows(self) -> Iterator[dict]: 32 | for b in self.batches(): 33 | for row in b.to_pylist(): 34 | yield row 35 | 36 | def pages(self, size: int = 0) -> Iterator[list[dict]]: 37 | if size < 1: # page based on what we get 38 | for b in self.batches(): 39 | yield b.to_pydict() 40 | return 41 | 42 | # page based on page_size 43 | buf: list[dict] = [] 44 | for b in self.batches(): 45 | buf.extend(b.to_pydict()) 46 | while len(buf) >= size: 47 | yield buf[:size] 48 | buf = buf[size:] 49 | if len(buf) > 0: 50 | yield buf 51 | -------------------------------------------------------------------------------- /src/sdk/odp/client/tabular_v2/client/table_tx.py: -------------------------------------------------------------------------------- 1 | import io 2 | import logging 3 | from typing import Dict, Iterator, List, Union 4 | 5 | import pyarrow as pa 6 | from odp.client.tabular_v2 import big 7 | from odp.client.tabular_v2.bsquare import bsquare 8 | from odp.client.tabular_v2.client.table_cursor import CursorException 9 | from odp.client.tabular_v2.client.tablehandler import TableHandler 10 | from odp.client.tabular_v2.util import exp 11 | from odp.client.tabular_v2.util.reader import Iter2Reader 12 | 13 | 14 | class Transaction: 15 | def __init__(self, table: TableHandler, tx_id: str): 16 | if not tx_id: 17 | raise ValueError("tx_id must not be empty") 18 | self._table = table 19 | self._id = tx_id 20 | self._buf: list[pa.RecordBatch] = [] 21 | self._buf_rows = 0 22 | self._big_buf: big.Buffer = big.Buffer(table._bigcol).with_inner_schema(table._inner_schema) 23 | self._old_rid = None 24 | 25 | def select(self, query: Union[exp.Op, str, None] = None) -> Iterator[dict]: 26 | for row in self._table.select(query).rows(): 27 | yield row 28 | 29 | def replace(self, query: Union[exp.Op, str, None] = None) -> Iterator[dict]: 30 | """perform a two-step replace: 31 | rows that don't match the query are kept. 32 | rows that match are removed and sent to the caller. 33 | the caller might insert them again or do something else. 34 | """ 35 | if query is None: 36 | raise ValueError("For your own safety, please provide a query like 1==1") 37 | assert self._buf_rows == 0 # FIXME: handle buffered data in replace/select 38 | if isinstance(query, str): 39 | query = exp.parse(query) 40 | inner_query = bsquare.convert_query(self._table._outer_schema, query) 41 | inner_query = big.inner_exp(self._table._inner_schema, inner_query) 42 | inner_query = str(inner_query.pyarrow()) 43 | 44 | def scanner(cursor: str) -> Iterator[pa.RecordBatch]: 45 | res = self._table._client._request( 46 | path="/api/table/v2/replace", 47 | params={ 48 | "table_id": self._table._id, 49 | "tx_id": self._id, 50 | }, 51 | data={ 52 | "query": inner_query, 53 | "cursor": cursor, 54 | }, 55 | ) 56 | r = Iter2Reader(res.iter()) 57 | r = pa.ipc.RecordBatchStreamReader(r) 58 | for bm in r.iter_batches_with_custom_metadata(): 59 | if bm.custom_metadata: 60 | meta = bm.custom_metadata 61 | if b"cursor" in meta: 62 | raise CursorException(meta[b"cursor"].decode()) 63 | if b"error" in meta: 64 | raise ValueError("remote: " + meta[b"error"].decode()) 65 | if bm.batch: 66 | yield bm.batch 67 | 68 | from odp.client.tabular_v2.client import Cursor 69 | 70 | for b in Cursor(scanner=scanner).batches(): 71 | b = self._table._bigcol.decode(b) # TODO(oha): use buffer for partial big files not uploaded 72 | b = bsquare.decode(b) 73 | tab = pa.Table.from_batches([b], schema=self._table._outer_schema) 74 | for b2 in tab.filter(~query.pyarrow()).to_batches(): 75 | if b2.num_rows > 0: 76 | self.insert(b2) 77 | 78 | for b2 in tab.filter(query.pyarrow()).to_batches(): 79 | for row in b2.to_pylist(): 80 | yield row 81 | 82 | def delete(self, query: Union[exp.Op, str, None] = None) -> int: 83 | ct = 0 84 | for _ in self.replace(query): 85 | ct += 1 86 | return ct 87 | 88 | def flush(self): 89 | logging.info("flushing to stage %s", self._id) 90 | if len(self._buf) == 0: 91 | return 92 | buf = io.BytesIO() 93 | w = pa.ipc.RecordBatchStreamWriter(buf, self._table._inner_schema) 94 | for b in self._buf: 95 | if isinstance(b, list): 96 | b = pa.RecordBatch.from_pylist(b, schema=self._table._outer_schema) 97 | b = bsquare.encode(b) 98 | b = self._big_buf.encode(b) 99 | w.write_batch(b) 100 | w.close() 101 | self._table._client._request( 102 | path="/api/table/v2/insert", 103 | params={ 104 | "table_id": self._table._id, 105 | "tx_id": self._id, 106 | }, 107 | data=buf.getvalue(), 108 | ).json() 109 | self._buf = [] 110 | self._buf_rows = 0 111 | 112 | def insert(self, data: Union[Dict, List[Dict], pa.RecordBatch]): 113 | """queue data to be inserted on flush()""" 114 | if isinstance(data, dict): 115 | data = [data] 116 | if isinstance(data, list): 117 | # we expand the last list if it's already a list 118 | last = self._buf[-1] if self._buf else None 119 | if last and isinstance(last, list): 120 | last.extend(data) 121 | else: 122 | self._buf.append(data) 123 | self._buf_rows += len(data) 124 | elif isinstance(data, pa.RecordBatch): 125 | self._buf.append(data) 126 | self._buf_rows += data.num_rows 127 | else: 128 | raise ValueError(f"unexpected type {type(data)}") 129 | 130 | if self._buf_rows > 10_000: 131 | self.flush() 132 | -------------------------------------------------------------------------------- /src/sdk/odp/client/tabular_v2/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/src/sdk/odp/client/tabular_v2/util/__init__.py -------------------------------------------------------------------------------- /src/sdk/odp/client/tabular_v2/util/cache.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import logging 3 | import os 4 | import threading 5 | from time import time 6 | from typing import Optional 7 | 8 | from odp.client.tabular_v2.util.util import size2human 9 | 10 | 11 | class Cache: 12 | class Entry: 13 | def __init__(self, key: str, cache: "Cache"): 14 | self.key = key 15 | self.lock = threading.Lock() 16 | self.cache = cache 17 | self.filename = base64.b64encode(key.encode()).decode() 18 | self.size = 0 19 | 20 | def set(self, value: bytes) -> bool: 21 | if len(value) > self.cache.max_entry_size: 22 | return False 23 | 24 | self.cache._make_space(len(value)) 25 | self.cache.tot_bytes -= self.size # if replacing, this will be non-zero 26 | self.size = len(value) 27 | self.cache.tot_bytes += self.size 28 | 29 | with open(os.path.join(self.cache.root_folder, self.filename), "wb") as f: 30 | f.write(value) 31 | 32 | def exists(self) -> bool: 33 | filename = os.path.join(self.cache.root_folder, self.filename) 34 | return os.path.exists(filename) 35 | 36 | def age(self) -> float: 37 | return time() - os.path.getctime(os.path.join(self.cache.root_folder, self.filename)) 38 | 39 | def get(self, max_age: Optional[float] = None) -> Optional[bytes]: 40 | try: 41 | if max_age is not None: 42 | if self.age() > max_age: # expired 43 | logging.info("expired %s (age: %.f > %.f)", self.key, self.age(), max_age) 44 | # TODO remove? 45 | return None 46 | with open(os.path.join(self.cache.root_folder, self.filename), "rb") as f: 47 | return f.read() 48 | except FileNotFoundError: 49 | return None 50 | 51 | def touch(self): 52 | file_path = os.path.join(self.cache.root_folder, self.filename) 53 | if not os.path.exists(file_path): 54 | return 55 | os.utime(file_path) 56 | 57 | def unlink(self): 58 | try: 59 | os.unlink(os.path.join(self.cache.root_folder, self.filename)) 60 | except FileNotFoundError: 61 | logging.info( 62 | "removing but already gone: %s (%s)", self.key, os.path.join(self.cache.root_folder, self.filename) 63 | ) 64 | 65 | def __enter__(self): 66 | self.lock.acquire() 67 | return self 68 | 69 | def __exit__(self, exc_type, exc_val, exc_tb): 70 | self.lock.release() 71 | 72 | def __init__(self, folder: str, max_entries=100, max_bytes=64 * 1024 * 1024): 73 | self.lock = threading.Lock() 74 | self.cache = [] 75 | self.root_folder = folder 76 | self.max_entries = max_entries 77 | self.max_bytes = max_bytes 78 | self.max_entry_size = max_bytes // 16 79 | self.tot_bytes = 0 80 | 81 | os.makedirs(self.root_folder, exist_ok=True) 82 | # list files by mtime 83 | files = sorted( 84 | os.listdir(self.root_folder), key=lambda file: os.path.getmtime(os.path.join(self.root_folder, file)) 85 | ) 86 | for f in files: 87 | key = base64.b64decode(f.encode()).decode() 88 | e = Cache.Entry(key, self) 89 | size = os.path.getsize(os.path.join(self.root_folder, f)) 90 | self.tot_bytes += size 91 | assert f == e.filename 92 | # logging.debug("recovered %s file %s", size2human(size), key) 93 | self.cache.append(e) 94 | 95 | self._make_space(0) 96 | # logging.debug("recovered %d files for a total of %s", len(self.cache), size2human(self.tot_bytes)) 97 | 98 | def _make_space(self, space_needed): 99 | with self.lock: 100 | while self.tot_bytes + space_needed > self.max_bytes: 101 | # FIXME: Needs to be properly handled 102 | if len(self.cache) == 0: 103 | self.tot_bytes = 0 104 | return 105 | e = self.cache.pop(0) 106 | try: 107 | size = os.path.getsize(os.path.join(self.root_folder, e.filename)) 108 | self.tot_bytes -= size 109 | e.unlink() 110 | logging.info("evicted %s file %s", size2human(size), e.key) 111 | except FileNotFoundError: 112 | logging.info("evicted but already gone: %s", e.key) 113 | 114 | def remove(self, key): 115 | with self.lock: 116 | for e in self.cache: 117 | if e.key == key: 118 | self.cache.remove(e) 119 | e.unlink() 120 | self.tot_bytes -= e.size 121 | 122 | def key(self, key): 123 | with self.lock: 124 | for e in self.cache: 125 | if e.key == key: 126 | self.cache.remove(e) 127 | self.cache.append(e) # move to end 128 | return e 129 | if len(self.cache) >= self.max_entries: 130 | e = self.cache.pop(0) 131 | self.tot_bytes -= e.size 132 | e.unlink() 133 | e = Cache.Entry(key, self) 134 | self.cache.append(e) 135 | return e 136 | -------------------------------------------------------------------------------- /src/sdk/odp/client/tabular_v2/util/reader.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Iterator 3 | 4 | 5 | class Reader: 6 | def read(self, size: int = -1) -> bytes: 7 | raise NotImplementedError() 8 | 9 | 10 | class Writer: 11 | def write(self, data: bytes): 12 | raise NotImplementedError() 13 | 14 | def close(self): 15 | pass 16 | 17 | 18 | class Iter2Reader(Reader): 19 | """ 20 | convert a byte iterator to a file-like object 21 | reads will attempt to read the next bytes from the iterator when needed 22 | 23 | FIXME: seems broken when used with real cases, avoid using 24 | """ 25 | 26 | def __init__(self, i: Iterator[bytes]): 27 | self.iter = i 28 | self.closed = False 29 | self.buf = b"" 30 | 31 | def preload(self): 32 | if not self.iter: 33 | return self 34 | try: 35 | self.buf += next(self.iter) 36 | except StopIteration: 37 | self.iter = None 38 | return self 39 | 40 | def read_some(self) -> bytes: 41 | if not self.buf: 42 | self.preload() 43 | out = self.buf 44 | self.buf = b"" 45 | logging.debug("read %d", len(out)) 46 | return out 47 | 48 | def read(self, size: int = -1) -> bytes: 49 | logging.debug("reading...") 50 | if size < 0: 51 | return self.read_some() 52 | while len(self.buf) < size: 53 | if not self.iter: 54 | break 55 | self.preload() 56 | ret = self.buf[:size] 57 | self.buf = self.buf[len(ret) :] 58 | logging.debug("read %d", len(ret)) 59 | return ret 60 | -------------------------------------------------------------------------------- /src/sdk/odp/client/tabular_v2/util/util.py: -------------------------------------------------------------------------------- 1 | from math import log2 2 | 3 | IEC_UNITS = ["KiB", "MiB", "GiB", "TiB"] 4 | 5 | 6 | def size2human(size: int) -> str: 7 | if size == 0: 8 | return "0B" 9 | p = int(log2(size) // 10.0) 10 | 11 | if p < 1: 12 | return f"{size}B" 13 | if p > len(IEC_UNITS): 14 | p = len(IEC_UNITS) 15 | converted_size = size / 1024**p 16 | return f"{converted_size:.1f}{IEC_UNITS[p - 1]}" # noqa 17 | -------------------------------------------------------------------------------- /src/sdk/odp/client/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .geometry_conversion import convert_geometry 2 | from .package_utils import get_version 3 | -------------------------------------------------------------------------------- /src/sdk/odp/client/utils/geometry_conversion.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Optional, Union 3 | 4 | import geojson 5 | from shapely import wkb, wkt 6 | from shapely.geometry import shape 7 | 8 | 9 | def convert_geometry( 10 | data: Union[str, dict, list, bytes], result_geometry: str, rounding_precision: Optional[int] = None 11 | ): 12 | if result_geometry == "wkb": 13 | return _convert_geometry_to_wkb(data) 14 | elif result_geometry == "wkt": 15 | return _convert_geometry_to_wkt(data, rounding_precision) 16 | elif result_geometry == "geojson": 17 | if _is_geojson(data): 18 | return data 19 | return _convert_geometry_to_geojson(data) 20 | 21 | 22 | def _convert_geometry_to_wkb(data: Union[str, dict, list]): 23 | if _is_geojson(data): 24 | return _convert_geojson_to_wkb(data) 25 | if isinstance(data, str): 26 | try: 27 | return _convert_wkt_to_wkb(data) 28 | except Exception: 29 | pass 30 | elif isinstance(data, dict): 31 | for key in data: 32 | value = data[key] 33 | data[key] = _convert_geometry_to_wkb(value) 34 | elif isinstance(data, list): 35 | for i, row in enumerate(data): 36 | try: 37 | data[i] = _convert_geometry_to_wkb(row) 38 | except Exception: 39 | continue 40 | return data 41 | 42 | 43 | def _convert_geometry_to_wkt(data: Union[str, dict, list, bytes], rounding_precision: Optional[int] = None): 44 | if _is_geojson(data): 45 | return _convert_geojson_to_wkt(data) 46 | if isinstance(data, (str, bytes)): 47 | try: 48 | return _convert_wkb_to_wkt(data, rounding_precision) 49 | except Exception: 50 | pass 51 | elif isinstance(data, dict): 52 | for key in data: 53 | value = data[key] 54 | data[key] = _convert_geometry_to_wkt(value, rounding_precision) 55 | elif isinstance(data, list): 56 | for i, row in enumerate(data): 57 | try: 58 | data[i] = _convert_geometry_to_wkt(row, rounding_precision) 59 | except Exception: 60 | continue 61 | return data 62 | 63 | 64 | def _convert_geometry_to_geojson(data: Union[str, dict, list, bytes]): 65 | if isinstance(data, str): 66 | try: 67 | if _is_wkt(data): 68 | return _convert_wkt_to_geojson(data) 69 | else: 70 | return _convert_wkb_to_geojson(data) 71 | except Exception: 72 | pass 73 | elif isinstance(data, bytes): 74 | try: 75 | return _convert_wkb_to_geojson(data) 76 | except Exception: 77 | pass 78 | elif isinstance(data, dict): 79 | for key in data: 80 | value = data[key] 81 | data[key] = _convert_geometry_to_geojson(value) 82 | elif isinstance(data, list): 83 | for i, row in enumerate(data): 84 | try: 85 | data[i] = _convert_geometry_to_geojson(row) 86 | except Exception: 87 | continue 88 | return data 89 | 90 | 91 | def _convert_geojson_to_wkb(geojson_data: Union[dict, str]) -> bytes: 92 | if isinstance(geojson_data, dict): 93 | geojson_data = json.dumps(geojson_data) 94 | geo = geojson.loads(geojson_data) 95 | return shape(geo).wkb 96 | 97 | 98 | def _convert_geojson_to_wkt(geojson_data: Union[dict, str]) -> str: 99 | if isinstance(geojson_data, dict): 100 | geojson_data = json.dumps(geojson_data) 101 | geo = geojson.loads(geojson_data) 102 | return shape(geo).wkt 103 | 104 | 105 | def _convert_wkb_to_geojson(wkb_data: Union[bytes, str]) -> dict: 106 | geo = wkb.loads(wkb_data) 107 | return geojson.Feature(geometry=geo, properties={}).geometry 108 | 109 | 110 | def _convert_wkb_to_wkt(wkb_data: Union[bytes, str], rounding_precision: Optional[int] = None) -> str: 111 | if rounding_precision: 112 | return wkt.dumps(wkb.loads(wkb_data), rounding_precision=rounding_precision) 113 | return wkt.dumps(wkb.loads(wkb_data)) 114 | 115 | 116 | def _convert_wkt_to_geojson(wkt_data: str) -> dict: 117 | geo = wkt.loads(wkt_data) 118 | return geojson.Feature(geometry=geo, properties={}).geometry 119 | 120 | 121 | def _convert_wkt_to_wkb(wkt_data: str) -> bytes: 122 | return wkb.dumps(wkt.loads(wkt_data)) 123 | 124 | 125 | def _is_geojson(data) -> bool: 126 | if isinstance(data, dict): 127 | return len(data.keys()) == 2 and "type" in data and "coordinates" in data 128 | elif isinstance(data, str): 129 | try: 130 | return _is_geojson(json.loads(data)) 131 | except Exception: 132 | return False 133 | return False 134 | 135 | 136 | def _is_wkt(data: str) -> bool: 137 | # Cheap way of checking if the value is a WKT string 138 | # Simply see if the first character is the first letter of a WKT-Object: 139 | # P: Point, Polygon 140 | # L: LineString 141 | # M: MultiPoint, MultiPolygon, MultiLineString 142 | # G: GeometryCollection 143 | return data[0].upper() in {"P", "L", "M", "G"} 144 | -------------------------------------------------------------------------------- /src/sdk/odp/client/utils/json.py: -------------------------------------------------------------------------------- 1 | from typing import IO, Any, Callable, Dict, List, Optional, Protocol, Type, Union 2 | 3 | JsonType = Union[None, int, str, bool, List["JsonType"], Dict[str, "JsonType"]] 4 | 5 | 6 | class JsonParser(Protocol): 7 | """JSON serialization/deserialization interface""" 8 | 9 | @staticmethod 10 | def load( 11 | fp, 12 | decoded_type: Type, 13 | cast_decimal: bool = True, 14 | cls: Optional[Type] = None, 15 | parse_float: Optional[Callable[[str], float]] = None, 16 | parse_int: Optional[Callable[[str], int]] = None, 17 | parse_constant: Optional[Callable[[str], JsonType]] = None, 18 | **kwargs 19 | ) -> JsonType: 20 | ... 21 | 22 | @staticmethod 23 | def loads( 24 | s: str, 25 | cast_decimal: bool = True, 26 | cls: Optional[Type] = None, 27 | parse_float: Optional[Callable[[str], float]] = None, 28 | parse_int: Optional[Callable[[str], int]] = None, 29 | parse_constant: Optional[Callable[[str], JsonType]] = None, 30 | **kwargs 31 | ) -> JsonType: 32 | ... 33 | 34 | @staticmethod 35 | def dump( 36 | obj: Any, 37 | fp: IO, 38 | skipkeys: bool = False, 39 | ensure_ascii: bool = True, 40 | check_circular: bool = True, 41 | allow_nan: bool = True, 42 | cls: Optional[Type] = None, 43 | indent=None, 44 | separators=None, 45 | default=None, 46 | sort_keys=False, 47 | **kwargs 48 | ): 49 | ... 50 | 51 | @staticmethod 52 | def dumps( 53 | obj: Any, 54 | skipkeys: bool = False, 55 | ensure_ascii: bool = True, 56 | check_circular: bool = True, 57 | allow_nan: bool = True, 58 | cls: Optional[Type] = None, 59 | indent: Optional[int] = None, 60 | separators: Optional[str] = None, 61 | default: Optional[Callable[[str], str]] = None, 62 | sort_keys: bool = False, 63 | **kwargs 64 | ) -> str: 65 | ... 66 | -------------------------------------------------------------------------------- /src/sdk/odp/client/utils/ndjson.py: -------------------------------------------------------------------------------- 1 | import json 2 | from collections import deque 3 | from io import StringIO 4 | from typing import IO, Deque, Iterable, Optional, Sized, Union, cast 5 | from warnings import warn 6 | 7 | from .json import JsonParser, JsonType 8 | 9 | 10 | def parse_ndjson(iter: Iterable[bytes]) -> Iterable: 11 | """ 12 | Parse NDJSON from an iterable of bytes 13 | returns an iterator of parsed JSON objects 14 | """ 15 | buf = b"" 16 | for s in iter: 17 | buf += s 18 | lines = buf.split(b"\n") 19 | buf = lines[-1] 20 | for line in lines[:-1]: 21 | yield json.loads(line) 22 | 23 | if buf: 24 | yield json.loads(buf) 25 | 26 | 27 | BacklogDataT = Union[Iterable[str], Sized] 28 | DEFAULT_JSON_PARSER = cast(JsonParser, json) 29 | 30 | 31 | class NdJsonParser: 32 | """Newline delimited JSON parser 33 | 34 | Parses NDJSON from a file-like object or a string. 35 | """ 36 | 37 | def __init__( 38 | self, 39 | s: Union[str, bytes, None] = None, 40 | fp: Union[IO, Iterable[bytes], None] = None, 41 | json_parser: JsonParser = DEFAULT_JSON_PARSER, 42 | ): 43 | """Initialize the parser 44 | 45 | Args: 46 | s: String to parse, either this or 'fp' must be set 47 | fp: File-like object to parse, either this or 's' must be set 48 | json_parser: JSON parser to use, defaults to the standard `json` module 49 | """ 50 | self.json_parser = json_parser 51 | self.line = [] 52 | self.delimiter_stack: Deque[str] = deque() 53 | self.backlog: Optional[BacklogDataT] = None 54 | 55 | if s and fp: 56 | raise ValueError("Either 's' or 'fp' must be set, but now both") 57 | elif not s and not fp: 58 | raise ValueError("Either 's' or 'fp' must be set") 59 | 60 | if fp: 61 | self.fb = fp 62 | elif isinstance(s, str): 63 | self.fb = StringIO(s) 64 | else: 65 | self.fb = StringIO(s.decode()) 66 | 67 | def _consume_line(self) -> JsonType: 68 | """Consume a line from the file-like object 69 | 70 | Returns: 71 | Parsed JSON object 72 | """ 73 | if self.delimiter_stack: 74 | warn("Attempting to parse NDJSON line while the delimiter stack was non-empty") 75 | 76 | obj = self.json_parser.loads("".join(self.line)) 77 | self.line = [] 78 | self.delimiter_stack.clear() 79 | 80 | return obj 81 | 82 | def _have_backlog(self) -> bool: 83 | return self.backlog is not None 84 | 85 | def _backlog_data(self, data: BacklogDataT): 86 | self.backlog = data 87 | 88 | def _consume_backlog(self) -> BacklogDataT: 89 | if self.backlog is None: 90 | raise ValueError("No backlog data to consume") 91 | data = self.backlog 92 | self.backlog = None 93 | return data 94 | 95 | def _load_next(self) -> BacklogDataT: 96 | if self._have_backlog(): 97 | return self._consume_backlog() 98 | ret = next(self.fb) 99 | if isinstance(ret, bytes): 100 | return ret.decode() 101 | return ret 102 | 103 | def __iter__(self) -> Iterable[JsonType]: 104 | return cast(Iterable[JsonType], self) 105 | 106 | def __next__(self) -> JsonType: 107 | while True: 108 | try: 109 | s = self._load_next() 110 | except StopIteration: 111 | if len(self.line) > 0: 112 | return self._consume_line() 113 | raise 114 | 115 | for idx, c in enumerate(s): 116 | c = chr(c) if isinstance(c, int) else c 117 | last_delimiter = self.delimiter_stack[-1] if self.delimiter_stack else None 118 | 119 | in_quote = last_delimiter in {"'", '"', "\\"} 120 | 121 | if c == "\n" and not in_quote: 122 | if idx + 1 < len(s): 123 | self.backlog = s[idx + 1 :] 124 | return self._consume_line() 125 | 126 | self.line.append(c) 127 | if in_quote: 128 | if last_delimiter == "\\": 129 | self.delimiter_stack.pop() 130 | elif c == "\\": 131 | self.delimiter_stack.append(c) 132 | elif c == last_delimiter: 133 | self.delimiter_stack.pop() 134 | 135 | continue 136 | 137 | is_quote = c in {"'", '"'} 138 | if is_quote: 139 | self.delimiter_stack.append(c) 140 | continue 141 | 142 | is_opening_bracket = c in {"{", "["} 143 | 144 | if is_opening_bracket: 145 | self.delimiter_stack.append(c) 146 | continue 147 | 148 | in_bracket = last_delimiter in {"{", "["} 149 | is_closing_bracket = c in {"}", "]"} 150 | 151 | if is_closing_bracket: 152 | if not in_bracket: 153 | raise ValueError(f"Got unexpected delimiter: {c}") 154 | 155 | if last_delimiter == "{" and c == "}": 156 | self.delimiter_stack.pop() 157 | elif last_delimiter == "[" and c == "]": 158 | self.delimiter_stack.pop() 159 | else: 160 | raise ValueError(f"Got unexpected delimiter: {c}") 161 | 162 | 163 | def load(fp: IO, json_parser: JsonParser = DEFAULT_JSON_PARSER) -> Iterable[JsonType]: 164 | """Load NDJSON from a file-like object 165 | 166 | Args: 167 | fp: File-like object 168 | json_parser: JSON parser to use 169 | 170 | Returns: 171 | Iterable of parsed JSON objects 172 | """ 173 | return iter(NdJsonParser(fp=fp, json_parser=json_parser)) 174 | 175 | 176 | def loads(s: Union[str, bytes], json_parser: JsonParser = DEFAULT_JSON_PARSER) -> Iterable[JsonType]: 177 | """Load NDJSON from a string 178 | 179 | Args: 180 | s: String 181 | json_parser: JSON parser to use 182 | 183 | Returns: 184 | Iterable of parsed JSON objects 185 | """ 186 | return iter(NdJsonParser(s=s, json_parser=json_parser)) 187 | -------------------------------------------------------------------------------- /src/sdk/odp/client/utils/package_utils.py: -------------------------------------------------------------------------------- 1 | from importlib.metadata import version 2 | 3 | 4 | def get_version(): 5 | try: 6 | return str(version("odp-sdk")) 7 | except Exception as e: 8 | print(e) 9 | return "" 10 | -------------------------------------------------------------------------------- /src/sdk/odp_sdk/__init__.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from odp.sdk import * # noqa: F401, F403 4 | 5 | warnings.warn("odp_sdk is deprecated, please import odp.client instead", DeprecationWarning) 6 | 7 | del warnings 8 | -------------------------------------------------------------------------------- /src/sdk/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "odp-sdk" 3 | version = "0.4.10" 4 | description = "ODP Python SDK" 5 | authors = ["Thomas Li Fredriksen "] 6 | license = "MIT" 7 | readme = "README.md" 8 | packages = [ 9 | {include="odp"}, 10 | {include="odp_sdk"}, 11 | ] 12 | 13 | 14 | [tool.poetry.dependencies] 15 | python = "^3.9" 16 | pydantic = "^2.4.2" 17 | odp-dto = { path = "../dto", develop = true } 18 | cryptography = ">=41.0.5,<43.0.0" 19 | pyjwt = "^2.8.0" 20 | msal = "^1.24.1" 21 | msal-extensions = "^1.1.0" 22 | pandas = "^2.1.4" 23 | shapely = "^2.0.4" 24 | geojson = "^3.1.0" 25 | validators = "^0.28.3" 26 | lark="^1.2.2" 27 | pyarrow = "^18.1.0" 28 | 29 | [build-system] 30 | requires = ["poetry-core>=1.0.0"] 31 | build-backend = "poetry.core.masonry.api" -------------------------------------------------------------------------------- /src/sdk/tests/test_sdk/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/src/sdk/tests/test_sdk/__init__.py -------------------------------------------------------------------------------- /src/sdk/tests/test_sdk/conftest.py: -------------------------------------------------------------------------------- 1 | from .fixtures import * # noqa: F401, F403 2 | -------------------------------------------------------------------------------- /src/sdk/tests/test_sdk/fixtures/__init__.py: -------------------------------------------------------------------------------- 1 | from .auth_fixtures import * # noqa: F401, F403 2 | from .dto_fixtures import * # noqa: F401, F403 3 | from .jwt_fixtures import * # noqa: F401, F403 4 | from .odp_http_client_fixtures import * # noqa: F401, F403 5 | from .request_fixtures import * # noqa: F401, F403 6 | from .time_fixtures import * # noqa: F401, F403 7 | -------------------------------------------------------------------------------- /src/sdk/tests/test_sdk/fixtures/auth_fixtures.py: -------------------------------------------------------------------------------- 1 | import json 2 | import random 3 | import time 4 | from typing import Callable 5 | 6 | import jwt 7 | import pytest 8 | import responses 9 | from cryptography.hazmat.primitives.asymmetric import rsa 10 | from odp.client.auth import AzureTokenProvider, OdpWorkspaceTokenProvider 11 | from pydantic import SecretStr 12 | 13 | __all__ = [ 14 | "odp_workspace_token_provider", 15 | "azure_token_provider", 16 | "mock_token_response_body", 17 | "mock_token_response_callback", 18 | ] 19 | 20 | ALGORITHM = "RS256" 21 | PUBLIC_KEY_ID = "sample-key-id" 22 | 23 | MOCK_SIDECAR_URL = "http://token_endpoint.local" 24 | MOCK_CLIENT_ID = SecretStr("foo") 25 | MOCK_CLIENT_SECRET = SecretStr("bar") 26 | MOCK_TOKEN_URI = "http://token_uri.local" 27 | MOCK_ISSUER = "http://issuer.local" 28 | MOCK_AUDIENCE = "audience" 29 | 30 | 31 | @pytest.fixture() 32 | def odp_workspace_token_provider() -> OdpWorkspaceTokenProvider: 33 | with responses.RequestsMock() as rsps: 34 | rsps.add( 35 | responses.POST, 36 | MOCK_SIDECAR_URL, 37 | json={ 38 | "token": "Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6ImVTMEtuOHRWNkpweHVnVGRXWVJTX2x5VlBpTFBPRHhxNmxjNlI0clE4NmsifQ.eyJzdWIiOiIwMDAwMDAwMC0wMDAwLTAwMDAtMDAwMC0wMDAwNDk5NjAyZDIiLCJuYW1lIjoiSm9obiBEb2UiLCJpYXQiOjE1MTYyMzkwMjJ9.tky9z3_WE0YSbg7mXUq-Wl9b0Xo_Hrd6nVVHfRGSHNI" # noqa: E501 39 | }, # noqa: E501 40 | ) 41 | 42 | yield OdpWorkspaceTokenProvider(token_uri=MOCK_SIDECAR_URL) 43 | 44 | 45 | def encode_token(payload: dict, private_key: rsa.RSAPrivateKey) -> str: 46 | return jwt.encode( 47 | payload=payload, 48 | key=private_key, # The private key created in the previous step 49 | algorithm=ALGORITHM, 50 | headers={ 51 | "kid": PUBLIC_KEY_ID, 52 | }, 53 | ) 54 | 55 | 56 | @pytest.fixture() 57 | def mock_token_response_callback(rsa_private_key) -> Callable[[], str]: 58 | def _cb(): 59 | t = int(time.time()) 60 | claims = { 61 | "sub": "123", 62 | "iss": MOCK_ISSUER, 63 | "aud": MOCK_AUDIENCE, 64 | "iat": t, 65 | "exp": t + 3600, 66 | "nonce": random.randint(0, 1000000), 67 | } 68 | 69 | token = encode_token(claims, rsa_private_key) 70 | 71 | return json.dumps( 72 | { 73 | "access_token": token, 74 | } 75 | ) 76 | 77 | return _cb 78 | 79 | 80 | @pytest.fixture() 81 | def mock_token_response_body(mock_token_response_callback: Callable[[], str]) -> str: 82 | return mock_token_response_callback() 83 | 84 | 85 | @pytest.fixture() 86 | def azure_token_provider() -> AzureTokenProvider: 87 | return AzureTokenProvider( 88 | client_id=MOCK_CLIENT_ID, 89 | client_secret=MOCK_CLIENT_SECRET, 90 | token_uri=MOCK_TOKEN_URI, 91 | ) 92 | -------------------------------------------------------------------------------- /src/sdk/tests/test_sdk/fixtures/dto_fixtures.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from uuid import uuid4 3 | 4 | import pytest 5 | from odp.client.dto.table_spec import TableSpec 6 | from odp.client.dto.tabular_store import TableStage 7 | from odp.dto import DatasetDto, DatasetSpec, Metadata 8 | 9 | __all__ = [ 10 | "raw_resource_dto", 11 | "tabular_resource_dto", 12 | "table_spec", 13 | "table_stage", 14 | ] 15 | 16 | from odp.dto.common.contact_info import ContactInfo 17 | 18 | 19 | @pytest.fixture() 20 | def raw_resource_dto() -> DatasetDto: 21 | name = "test_dataset" 22 | uuid = uuid4() 23 | return DatasetDto( 24 | metadata=Metadata(name=name, uuid=uuid), 25 | spec=DatasetSpec( 26 | storage_class="registry.hubocean.io/storageClass/raw", 27 | maintainer=ContactInfo( 28 | organisation="HUB Ocean", contact="Name McNameson " 29 | ), 30 | documentation=["https://oceandata.earth"], 31 | tags={"test", "hubocean"}, 32 | ), 33 | ) 34 | 35 | 36 | @pytest.fixture() 37 | def tabular_resource_dto() -> DatasetDto: 38 | name = "test_dataset" 39 | uuid = uuid4() 40 | 41 | return DatasetDto( 42 | metadata=Metadata(name=name, uuid=uuid), 43 | spec=DatasetSpec( 44 | storage_class="registry.hubocean.io/storageClass/tabular", 45 | maintainer=ContactInfo( 46 | organisation="HUB Ocean", contact="Name McNameson " 47 | ), 48 | documentation=["https://oceandata.earth"], 49 | tags={"test", "hubocean"}, 50 | ), 51 | ) 52 | 53 | 54 | @pytest.fixture() 55 | def table_spec(): 56 | table_schema = { 57 | "CatalogNumber": {"type": "long"}, 58 | "Location": {"type": "geometry"}, 59 | } 60 | 61 | return TableSpec(table_schema=table_schema) 62 | 63 | 64 | @pytest.fixture() 65 | def table_stage(): 66 | return TableStage( 67 | stage_id=uuid4(), status="active", created_time=datetime.datetime.now(), expiry_time=datetime.MAXYEAR 68 | ) 69 | -------------------------------------------------------------------------------- /src/sdk/tests/test_sdk/fixtures/jwt_fixtures.py: -------------------------------------------------------------------------------- 1 | import json 2 | import random 3 | import time 4 | from typing import Union 5 | 6 | import jwt 7 | import pytest 8 | import requests 9 | import responses 10 | from cryptography.hazmat.primitives.asymmetric import rsa 11 | from jwt.utils import to_base64url_uint 12 | from odp.client.auth import JwtTokenProvider 13 | 14 | __all__ = [ 15 | "rsa_public_private_key_pair", 16 | "rsa_public_key", 17 | "rsa_private_key", 18 | "jwt_response", 19 | "auth_response", 20 | "jwt_token_provider", 21 | ] 22 | 23 | ALGORITHM = "RS256" 24 | PUBLIC_KEY_ID = "sample-key-id" 25 | 26 | MOCK_TOKEN_ENDPOINT = "http://token_endpoint.local" 27 | MOCK_JWKS_ENDPOINT = "http://jwks_endpoint.local" 28 | MOCK_ISSUER = "http://issuer.local" 29 | MOCK_SCOPE = ["scope1"] 30 | MOCK_AUDIENCE = "audience" 31 | 32 | 33 | class MockTokenProvider(JwtTokenProvider): 34 | audience: str = MOCK_AUDIENCE 35 | """IDP token audience""" 36 | 37 | scope: list[str] = MOCK_SCOPE 38 | """IDP token scope""" 39 | 40 | def get_jwks_uri(self) -> str: 41 | return MOCK_JWKS_ENDPOINT 42 | 43 | def authenticate(self) -> dict[str, str]: 44 | res = requests.post( 45 | MOCK_TOKEN_ENDPOINT, 46 | data={ 47 | "grant_type": "client_credentials", 48 | "client_id": "foo", 49 | "client_secret": "bar", 50 | "audience": self.audience, 51 | "scope": " ".join(self.scope), 52 | }, 53 | ) 54 | 55 | res.raise_for_status() 56 | return res.json() 57 | 58 | 59 | @pytest.fixture(scope="session") 60 | def rsa_public_private_key_pair() -> tuple[rsa.RSAPublicKey, rsa.RSAPrivateKey]: 61 | private_key = rsa.generate_private_key(public_exponent=65537, key_size=2048) 62 | public_key = private_key.public_key() 63 | return public_key, private_key 64 | 65 | 66 | @pytest.fixture(scope="session") 67 | def rsa_public_key(rsa_public_private_key_pair) -> rsa.RSAPublicKey: 68 | public_key, _ = rsa_public_private_key_pair 69 | return public_key 70 | 71 | 72 | @pytest.fixture(scope="session") 73 | def rsa_private_key(rsa_public_private_key_pair) -> rsa.RSAPrivateKey: 74 | _, private_key = rsa_public_private_key_pair 75 | return private_key 76 | 77 | 78 | def jwt_response(mock, rsa_public_key: rsa.RSAPublicKey): 79 | mock.add( 80 | responses.GET, 81 | MOCK_JWKS_ENDPOINT, 82 | json={ 83 | "keys": [ 84 | { 85 | "kty": "RSA", 86 | "use": "sig", 87 | "kid": PUBLIC_KEY_ID, 88 | "n": to_base64url_uint(rsa_public_key.public_numbers().n).decode("utf-8"), 89 | "e": to_base64url_uint(rsa_public_key.public_numbers().e).decode("utf-8"), 90 | "issuer": MOCK_ISSUER, 91 | } 92 | ] 93 | }, 94 | ) 95 | 96 | 97 | def auth_response(mock, rsa_private_key: rsa.RSAPrivateKey): 98 | def token_callback(request: requests.Request) -> tuple[int, dict, Union[str, bytes]]: 99 | t = int(time.time()) 100 | claims = { 101 | "sub": "123", 102 | "iss": MOCK_ISSUER, 103 | "aud": MOCK_AUDIENCE, 104 | "iat": t, 105 | "exp": t + 3600, 106 | "nonce": random.randint(0, 1000000), 107 | } 108 | 109 | token = encode_token(claims, rsa_private_key) 110 | return ( 111 | 200, 112 | {}, 113 | json.dumps( 114 | { 115 | "access_token": token, 116 | } 117 | ), 118 | ) 119 | 120 | mock.add_callback(responses.POST, MOCK_TOKEN_ENDPOINT, callback=token_callback, content_type="application/json") 121 | 122 | 123 | def encode_token(payload: dict, private_key: rsa.RSAPrivateKey) -> str: 124 | return jwt.encode( 125 | payload=payload, 126 | key=private_key, # The private key created in the previous step 127 | algorithm=ALGORITHM, 128 | headers={ 129 | "kid": PUBLIC_KEY_ID, 130 | }, 131 | ) 132 | 133 | 134 | @pytest.fixture() 135 | def jwt_token_provider( 136 | request_mock: responses.RequestsMock, 137 | rsa_public_key: rsa.RSAPublicKey, 138 | rsa_private_key: rsa.RSAPrivateKey, 139 | ) -> JwtTokenProvider: 140 | auth_response(request_mock, rsa_private_key) 141 | jwt_response(request_mock, rsa_public_key) 142 | 143 | yield MockTokenProvider() 144 | -------------------------------------------------------------------------------- /src/sdk/tests/test_sdk/fixtures/odp_http_client_fixtures.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from odp.client.auth import TokenProvider 3 | from odp.client.http_client import OdpHttpClient 4 | 5 | __all__ = [ 6 | "mock_odp_endpoint", 7 | "http_client", 8 | ] 9 | 10 | 11 | @pytest.fixture(scope="session") 12 | def mock_odp_endpoint() -> str: 13 | return "http://odp.local" 14 | 15 | 16 | @pytest.fixture 17 | def http_client(mock_odp_endpoint: str, jwt_token_provider: TokenProvider) -> OdpHttpClient: 18 | return OdpHttpClient(base_url=mock_odp_endpoint, token_provider=jwt_token_provider) 19 | -------------------------------------------------------------------------------- /src/sdk/tests/test_sdk/fixtures/request_fixtures.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import responses 3 | 4 | 5 | @pytest.fixture 6 | def request_mock() -> responses.RequestsMock: 7 | with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: 8 | yield rsps 9 | -------------------------------------------------------------------------------- /src/sdk/tests/test_sdk/fixtures/time_fixtures.py: -------------------------------------------------------------------------------- 1 | import time 2 | from unittest.mock import patch 3 | 4 | import pytest 5 | 6 | 7 | @pytest.fixture(autouse=True) 8 | def mock_sleep(request: pytest.FixtureRequest): 9 | if request.node.get_closest_marker("mock_sleep"): 10 | with patch.object(time, "sleep", lambda x: None): 11 | yield 12 | else: 13 | yield 14 | 15 | 16 | class MockTime: 17 | def __init__(self, use_time: float): 18 | self.use_time = use_time 19 | 20 | def get_time(self) -> float: 21 | return self.use_time 22 | 23 | def __enter__(self): 24 | self.patcher = patch.object(time, "time", lambda: self.use_time) 25 | self.patcher.start() 26 | 27 | def __exit__(self, exc_type, exc_val, exc_tb): 28 | self.patcher.stop() 29 | 30 | def advance(self, seconds: float): 31 | self.use_time += seconds 32 | 33 | 34 | @pytest.fixture(autouse=True) 35 | def mock_time(request: pytest.FixtureRequest): 36 | if marker := request.node.get_closest_marker("mock_time"): 37 | use_time = marker.kwargs.get("use_time", 1560926388) 38 | mock_timer = MockTime(use_time) 39 | 40 | with mock_timer: 41 | yield mock_timer 42 | else: 43 | yield None 44 | -------------------------------------------------------------------------------- /src/sdk/tests/test_sdk/test_auth/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/src/sdk/tests/test_sdk/test_auth/__init__.py -------------------------------------------------------------------------------- /src/sdk/tests/test_sdk/test_auth/test_azure_token_provider.py: -------------------------------------------------------------------------------- 1 | from typing import Callable 2 | 3 | import pytest 4 | import responses 5 | from odp.client.auth import AzureTokenProvider 6 | 7 | 8 | def test_get_token(azure_token_provider: AzureTokenProvider, mock_token_response_body: str): 9 | with responses.RequestsMock() as rsps: 10 | rsps.add( 11 | responses.POST, 12 | azure_token_provider.token_uri, 13 | body=mock_token_response_body, 14 | ) 15 | access_token = azure_token_provider.get_token() 16 | 17 | assert rsps.assert_call_count(azure_token_provider.token_uri, 1) 18 | assert access_token 19 | 20 | 21 | def test_get_token_reuse(azure_token_provider: AzureTokenProvider, mock_token_response_body: str): 22 | with responses.RequestsMock() as rsps: 23 | rsps.add( 24 | responses.POST, 25 | azure_token_provider.token_uri, 26 | body=mock_token_response_body, 27 | ) 28 | access_token = azure_token_provider.get_token() 29 | 30 | assert rsps.assert_call_count(azure_token_provider.token_uri, 1) 31 | assert access_token 32 | 33 | new_access_token = azure_token_provider.get_token() 34 | 35 | assert rsps.assert_call_count(azure_token_provider.token_uri, 1) 36 | assert access_token == new_access_token 37 | 38 | 39 | @pytest.mark.mock_time(use_time=123) 40 | def test_get_token_renew( 41 | azure_token_provider: AzureTokenProvider, mock_token_response_callback: Callable[[], str], mock_time 42 | ): 43 | with responses.RequestsMock() as rsps: 44 | rsps.add_callback( 45 | responses.POST, 46 | azure_token_provider.token_uri, 47 | callback=lambda _: (200, {}, mock_token_response_callback()), 48 | content_type="application/json", 49 | ) 50 | 51 | access_token = azure_token_provider.get_token() 52 | assert access_token 53 | assert rsps.assert_call_count(azure_token_provider.token_uri, 1) 54 | 55 | mock_time.advance(3600) 56 | 57 | new_access_token = azure_token_provider.get_token() 58 | assert rsps.assert_call_count(azure_token_provider.token_uri, 2) 59 | assert new_access_token 60 | assert new_access_token != access_token 61 | 62 | 63 | @pytest.mark.mock_time(use_time=123) 64 | def test_get_token_renew_before_leeway( 65 | azure_token_provider: AzureTokenProvider, mock_token_response_callback: Callable[[], str], mock_time 66 | ): 67 | with responses.RequestsMock() as rsps: 68 | rsps.add_callback( 69 | responses.POST, 70 | azure_token_provider.token_uri, 71 | callback=lambda _: (200, {}, mock_token_response_callback()), 72 | content_type="application/json", 73 | ) 74 | 75 | access_token = azure_token_provider.get_token() 76 | assert access_token 77 | assert rsps.assert_call_count(azure_token_provider.token_uri, 1) 78 | 79 | mock_time.advance(3600 - (azure_token_provider.token_exp_lee_way + 1)) 80 | 81 | new_access_token = azure_token_provider.get_token() 82 | 83 | assert rsps.assert_call_count(azure_token_provider.token_uri, 1) 84 | assert new_access_token == access_token 85 | 86 | 87 | @pytest.mark.mock_time(use_time=123) 88 | def test_get_token_renew_after_leeway( 89 | azure_token_provider: AzureTokenProvider, mock_token_response_callback: Callable[[], str], mock_time 90 | ): 91 | with responses.RequestsMock() as rsps: 92 | rsps.add_callback( 93 | responses.POST, 94 | azure_token_provider.token_uri, 95 | callback=lambda _: (200, {}, mock_token_response_callback()), 96 | content_type="application/json", 97 | ) 98 | 99 | access_token = azure_token_provider.get_token() 100 | assert access_token 101 | assert rsps.assert_call_count(azure_token_provider.token_uri, 1) 102 | 103 | mock_time.advance(3600 - (azure_token_provider.token_exp_lee_way - 1)) 104 | 105 | new_access_token = azure_token_provider.get_token() 106 | assert rsps.assert_call_count(azure_token_provider.token_uri, 2) 107 | assert new_access_token 108 | assert new_access_token != access_token 109 | -------------------------------------------------------------------------------- /src/sdk/tests/test_sdk/test_auth/test_get_default_token_provider.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from odp.client.auth import ( 3 | AzureTokenProvider, 4 | HardcodedTokenProvider, 5 | InteractiveTokenProvider, 6 | OdpWorkspaceTokenProvider, 7 | get_default_token_provider, 8 | ) 9 | from odp.client.exc import OdpAuthError 10 | from odp.client.utils import get_version 11 | 12 | 13 | @pytest.fixture(scope="function") 14 | def clean_env(monkeypatch): 15 | """Clean environment variables for each test. Some environment variables have priority over others while choosing 16 | the authentication method so all of them need to be cleaned before the relevant ones are set in tests.""" 17 | monkeypatch.delenv("ODP_ACCESS_TOKEN", raising=False) 18 | monkeypatch.delenv("JUPYTERHUB_API_TOKEN", raising=False) 19 | monkeypatch.delenv("ODP_CLIENT_SECRET", raising=False) 20 | 21 | 22 | def test_interactive_auth(): 23 | auth = get_default_token_provider() 24 | assert isinstance(auth, InteractiveTokenProvider) 25 | assert auth.user_agent == f"odp-sdk/{get_version()} (Interactive)" 26 | 27 | 28 | def test_hardcoded_auth(monkeypatch): 29 | monkeypatch.setenv("ODP_ACCESS_TOKEN", "Test") 30 | auth = get_default_token_provider() 31 | assert isinstance(auth, HardcodedTokenProvider) 32 | assert auth.user_agent == f"odp-sdk/{get_version()} (Hardcoded)" 33 | 34 | 35 | def test_workspace_auth(monkeypatch): 36 | monkeypatch.setenv("JUPYTERHUB_API_TOKEN", "Test") 37 | auth = get_default_token_provider() 38 | assert isinstance(auth, OdpWorkspaceTokenProvider) 39 | assert auth.user_agent == f"odp-sdk/{get_version()} (Workspaces)" 40 | 41 | 42 | def test_azure_auth(monkeypatch): 43 | monkeypatch.setenv("ODP_CLIENT_SECRET", "Test") 44 | auth = get_default_token_provider() 45 | assert isinstance(auth, AzureTokenProvider) 46 | assert auth.user_agent == f"odp-sdk/{get_version()} (Azure)" 47 | 48 | 49 | def test_auth_error(): 50 | with pytest.raises(OdpAuthError): 51 | get_default_token_provider(fallback=False) 52 | -------------------------------------------------------------------------------- /src/sdk/tests/test_sdk/test_auth/test_jwks_token_provider.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import responses 3 | from odp.client.auth import JwtTokenProvider 4 | from test_sdk.fixtures.jwt_fixtures import MOCK_TOKEN_ENDPOINT 5 | 6 | 7 | def test_authenticate(jwt_token_provider: JwtTokenProvider): 8 | access_token = jwt_token_provider.authenticate() 9 | assert access_token 10 | 11 | new_access_token = jwt_token_provider.authenticate() 12 | assert access_token != new_access_token 13 | 14 | 15 | def test_get_token_novalidate(jwt_token_provider: JwtTokenProvider): 16 | expected_prefix = "Bearer " 17 | jwt_token_provider.validate_token = False 18 | 19 | access_token = jwt_token_provider.get_token() 20 | 21 | assert access_token.startswith(expected_prefix) 22 | 23 | # The token should be cached and reused 24 | new_access_token = jwt_token_provider.get_token() 25 | 26 | assert new_access_token.startswith(expected_prefix) 27 | assert access_token == new_access_token 28 | 29 | 30 | def test_get_token_validate(jwt_token_provider: JwtTokenProvider): 31 | expected_prefix = "Bearer " 32 | 33 | jwt_token_provider.validate_token = True 34 | 35 | access_token = jwt_token_provider.get_token() 36 | 37 | assert access_token.startswith(expected_prefix) 38 | 39 | # The token should be cached and reused 40 | new_access_token = jwt_token_provider.get_token() 41 | 42 | assert new_access_token.startswith(expected_prefix) 43 | assert access_token == new_access_token 44 | 45 | 46 | @pytest.mark.mock_time(use_time=123) 47 | def test_renew_token(jwt_token_provider: JwtTokenProvider, request_mock: responses.RequestsMock, mock_time): 48 | responses.assert_call_count(MOCK_TOKEN_ENDPOINT, 0) 49 | 50 | access_token = jwt_token_provider.get_token() 51 | assert access_token 52 | request_mock.assert_call_count(MOCK_TOKEN_ENDPOINT, 1) 53 | 54 | new_access_token = jwt_token_provider.get_token() 55 | assert access_token == new_access_token 56 | request_mock.assert_call_count(MOCK_TOKEN_ENDPOINT, 1) 57 | 58 | mock_time.advance(3600) 59 | 60 | new_access_token = jwt_token_provider.get_token() 61 | assert access_token != new_access_token 62 | request_mock.assert_call_count(MOCK_TOKEN_ENDPOINT, 2) 63 | 64 | 65 | @pytest.mark.mock_time(use_time=123) 66 | def test_renew_token_before_leeway( 67 | jwt_token_provider: JwtTokenProvider, request_mock: responses.RequestsMock, mock_time 68 | ): 69 | responses.assert_call_count(MOCK_TOKEN_ENDPOINT, 0) 70 | 71 | access_token = jwt_token_provider.get_token() 72 | assert access_token 73 | request_mock.assert_call_count(MOCK_TOKEN_ENDPOINT, 1) 74 | 75 | mock_time.advance(3600 - (jwt_token_provider.token_exp_lee_way + 1)) 76 | 77 | new_access_token = jwt_token_provider.get_token() 78 | assert access_token == new_access_token 79 | request_mock.assert_call_count(MOCK_TOKEN_ENDPOINT, 1) 80 | 81 | 82 | @pytest.mark.mock_time(use_time=123) 83 | def test_renew_token_after_leeway( 84 | jwt_token_provider: JwtTokenProvider, request_mock: responses.RequestsMock, mock_time 85 | ): 86 | responses.assert_call_count(MOCK_TOKEN_ENDPOINT, 0) 87 | 88 | access_token = jwt_token_provider.get_token() 89 | assert request_mock.assert_call_count(MOCK_TOKEN_ENDPOINT, 1) 90 | assert access_token 91 | 92 | mock_time.advance(3600 - (jwt_token_provider.token_exp_lee_way - 1)) 93 | 94 | new_access_token = jwt_token_provider.get_token() 95 | assert request_mock.assert_call_count(MOCK_TOKEN_ENDPOINT, 2) 96 | assert new_access_token 97 | assert access_token != new_access_token 98 | -------------------------------------------------------------------------------- /src/sdk/tests/test_sdk/test_auth/test_odp_workspace_token_provider.py: -------------------------------------------------------------------------------- 1 | from odp.client.auth import OdpWorkspaceTokenProvider 2 | 3 | 4 | def test_get_token(odp_workspace_token_provider: OdpWorkspaceTokenProvider): 5 | access_token = odp_workspace_token_provider.get_token() 6 | 7 | assert access_token 8 | assert access_token.startswith("Bearer") 9 | -------------------------------------------------------------------------------- /src/sdk/tests/test_sdk/test_http_client.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import responses 3 | from odp.client.auth import TokenProvider 4 | from odp.client.http_client import OdpHttpClient 5 | from test_sdk.fixtures.jwt_fixtures import MOCK_TOKEN_ENDPOINT 6 | 7 | 8 | def test_request_relative(http_client: OdpHttpClient, request_mock: responses.RequestsMock): 9 | request_mock.add(responses.GET, f"{http_client.base_url}/foobar", status=200) 10 | 11 | res = http_client.get("/foobar") 12 | res.raise_for_status() 13 | 14 | assert res.status_code == 200 15 | 16 | 17 | def test_request_absolute(http_client: OdpHttpClient, request_mock: responses.RequestsMock): 18 | test_url = "http://someurl.local" 19 | 20 | assert test_url != http_client.base_url 21 | 22 | request_mock.add(responses.GET, test_url, status=200) 23 | 24 | res = http_client.get(test_url) 25 | res.raise_for_status() 26 | 27 | assert res.status_code == 200 28 | 29 | 30 | def test_request_has_auth_token(http_client: OdpHttpClient, request_mock: responses.RequestsMock): 31 | def _on_request(request): 32 | assert "Authorization" in request.headers 33 | 34 | auth_header = request.headers["Authorization"] 35 | assert auth_header is not None 36 | assert auth_header.startswith("Bearer ") 37 | 38 | return (200, {}, None) 39 | 40 | request_mock.add_callback( 41 | responses.GET, 42 | f"{http_client.base_url}/foobar", 43 | callback=_on_request, 44 | ) 45 | 46 | http_client.get("/foobar") 47 | 48 | 49 | def test_request_reuse_auth_token(http_client: OdpHttpClient, request_mock: responses.RequestsMock): 50 | request_mock.add(responses.GET, f"{http_client.base_url}/foobar", status=200) 51 | 52 | res = http_client.get("/foobar") 53 | res.raise_for_status() 54 | 55 | assert res.status_code == 200 56 | assert request_mock.assert_call_count(MOCK_TOKEN_ENDPOINT, 1) 57 | 58 | res = http_client.get("/foobar") 59 | res.raise_for_status() 60 | 61 | assert res.status_code == 200 62 | assert request_mock.assert_call_count(MOCK_TOKEN_ENDPOINT, 1) 63 | 64 | 65 | @pytest.mark.mock_time(use_time=123) 66 | def test_request_renew_auth_token(http_client: OdpHttpClient, request_mock: responses.RequestsMock, mock_time): 67 | request_mock.add(responses.GET, f"{http_client.base_url}/foobar", status=200) 68 | 69 | res = http_client.get("/foobar") 70 | res.raise_for_status() 71 | 72 | assert res.status_code == 200 73 | assert request_mock.assert_call_count(MOCK_TOKEN_ENDPOINT, 1) 74 | 75 | mock_time.advance(3600) 76 | 77 | res = http_client.get("/foobar") 78 | res.raise_for_status() 79 | 80 | assert res.status_code == 200 81 | assert request_mock.assert_call_count(MOCK_TOKEN_ENDPOINT, 2) 82 | 83 | 84 | def test_custom_user_agent(http_client: OdpHttpClient, request_mock: responses.RequestsMock): 85 | custom_user_agent = "my-custom-user-agent" 86 | 87 | http_client.custom_user_agent = custom_user_agent 88 | 89 | test_url = "http://someurl.local" 90 | 91 | assert test_url != http_client.base_url 92 | 93 | request_mock.add(responses.GET, test_url, status=200) 94 | 95 | res = http_client.get(test_url) 96 | res.raise_for_status() 97 | 98 | assert res.status_code == 200 99 | 100 | assert request_mock.calls[1].request.headers["User-Agent"] == custom_user_agent 101 | 102 | 103 | @pytest.mark.parametrize( 104 | "url, expected", 105 | [ 106 | ("http://localhost:8888", True), 107 | ("localhost:8888", False), 108 | ("foo.bar", False), 109 | ("https://foo.bar.com", True), 110 | ("not a valid url", False), 111 | ], 112 | ) 113 | def test_http_client_url(jwt_token_provider: TokenProvider, url: str, expected: bool): 114 | try: 115 | http_client = OdpHttpClient(base_url=url, token_provider=jwt_token_provider) 116 | assert http_client.base_url == url and expected 117 | except ValueError: 118 | assert not expected 119 | -------------------------------------------------------------------------------- /src/sdk/tests/test_sdk/test_raw_storage_client.py: -------------------------------------------------------------------------------- 1 | import json 2 | import uuid 3 | from datetime import datetime 4 | from pathlib import Path 5 | 6 | import pytest 7 | import responses 8 | from odp.client.dto.file_dto import FileMetadataDto 9 | from odp.client.exc import OdpFileNotFoundError 10 | from odp.client.http_client import OdpHttpClient 11 | from odp.client.raw_storage_client import OdpRawStorageClient 12 | from odp.dto import DatasetDto 13 | 14 | 15 | @pytest.fixture() 16 | def raw_storage_client(http_client: OdpHttpClient) -> OdpRawStorageClient: 17 | return OdpRawStorageClient(http_client=http_client, raw_storage_endpoint="/data") 18 | 19 | 20 | def test_get_file_metadata_success( 21 | raw_storage_client: OdpRawStorageClient, raw_resource_dto: DatasetDto, request_mock: responses.RequestsMock 22 | ): 23 | rand_uuid = uuid.uuid4() 24 | time_now = datetime.now() 25 | file_meta = FileMetadataDto( 26 | name="file.zip", 27 | mime_type="application/zip", 28 | dataset=rand_uuid, 29 | metadata={"name": "sdk-raw-example"}, 30 | geo_location="Somewhere", 31 | size_bytes=123456789, 32 | checksum="asdf", 33 | created_time=time_now, 34 | modified_time=time_now, 35 | deleted_time=time_now, 36 | ) 37 | 38 | request_mock.add( 39 | responses.GET, 40 | f"{raw_storage_client.raw_storage_url}/{raw_resource_dto.metadata.uuid}/{file_meta.name}/metadata", 41 | body=file_meta.model_dump_json(), 42 | status=200, 43 | content_type="application/json", 44 | ) 45 | 46 | result = raw_storage_client.get_file_metadata(raw_resource_dto, file_meta) 47 | 48 | assert result.name == "file.zip" 49 | assert result.mime_type == "application/zip" 50 | assert result.dataset == rand_uuid 51 | assert result.metadata == {"name": "sdk-raw-example"} 52 | assert result.geo_location == "Somewhere" 53 | assert result.size_bytes == 123456789 54 | assert result.checksum == "asdf" 55 | assert result.created_time == time_now 56 | assert result.modified_time == time_now 57 | assert result.deleted_time == time_now 58 | 59 | 60 | def test_get_file_metadata_not_found( 61 | raw_storage_client: OdpRawStorageClient, 62 | raw_resource_dto: DatasetDto, 63 | request_mock: responses.RequestsMock, 64 | ): 65 | file_meta = FileMetadataDto(name="file.zip", mime_type="application/zip") 66 | 67 | request_mock.add( 68 | responses.GET, 69 | f"{raw_storage_client.raw_storage_url}/{raw_resource_dto.metadata.uuid}/{file_meta.name}/metadata", 70 | status=404, 71 | ) 72 | 73 | with pytest.raises(OdpFileNotFoundError): 74 | raw_storage_client.get_file_metadata(raw_resource_dto, file_meta) 75 | 76 | 77 | def test_list_files_success( 78 | raw_storage_client: OdpRawStorageClient, 79 | raw_resource_dto: DatasetDto, 80 | request_mock: responses.RequestsMock, 81 | ): 82 | file_metadata = FileMetadataDto(name="file.zip", mime_type="application/zip") 83 | 84 | request_mock.add( 85 | responses.POST, 86 | f"{raw_storage_client.raw_storage_url}/{raw_resource_dto.metadata.uuid}/list", 87 | json={ 88 | "results": [json.loads(file_metadata.model_dump_json())], 89 | "next": None, 90 | "num_results": 1, 91 | }, 92 | status=200, 93 | content_type="application/json", 94 | ) 95 | 96 | metadata_filter = {"name": file_metadata.name} 97 | 98 | result = raw_storage_client.list(raw_resource_dto, metadata_filter=metadata_filter) 99 | 100 | first_item = next(iter(result)) 101 | 102 | assert first_item.name == file_metadata.name 103 | assert first_item.mime_type == file_metadata.mime_type 104 | 105 | 106 | def test_create_file_success( 107 | raw_storage_client: OdpRawStorageClient, 108 | raw_resource_dto: DatasetDto, 109 | request_mock: responses.RequestsMock, 110 | ): 111 | file_metadata = FileMetadataDto( 112 | name="new_file.txt", 113 | mime_type="text/plain", 114 | ) 115 | 116 | request_mock.add( 117 | responses.POST, 118 | f"{raw_storage_client.raw_storage_url}/{raw_resource_dto.metadata.uuid}", 119 | status=200, 120 | json=json.loads(file_metadata.model_dump_json()), 121 | content_type="application/json", 122 | ) 123 | 124 | request_mock.add( 125 | responses.GET, 126 | f"{raw_storage_client.raw_storage_url}/{raw_resource_dto.metadata.uuid}/{file_metadata.name}/metadata", 127 | json=json.loads(file_metadata.model_dump_json()), 128 | status=200, 129 | content_type="application/json", 130 | ) 131 | 132 | result = raw_storage_client.create_file(raw_resource_dto, file_metadata_dto=file_metadata, contents=None) 133 | 134 | assert result.name == file_metadata.name 135 | assert result.mime_type == "text/plain" 136 | 137 | 138 | def test_download_file_save( 139 | raw_storage_client: OdpRawStorageClient, 140 | raw_resource_dto: DatasetDto, 141 | tmp_path: Path, 142 | request_mock: responses.RequestsMock, 143 | ): 144 | file_data = b"Sample file content" 145 | save_path = tmp_path / "downloaded_file.txt" 146 | 147 | file_metadata = FileMetadataDto(name="test_file.txt", mime_type="text/plain") 148 | 149 | request_mock.add( 150 | responses.GET, 151 | f"{raw_storage_client.raw_storage_url}/{raw_resource_dto.metadata.uuid}/{file_metadata.name}", 152 | body=file_data, 153 | status=200, 154 | ) 155 | 156 | raw_storage_client.download_file(raw_resource_dto, file_metadata, save_path=str(save_path)) 157 | 158 | with open(save_path, "rb") as file: 159 | saved_data = file.read() 160 | 161 | assert saved_data == file_data 162 | 163 | 164 | def test_delete_file_not_found( 165 | raw_storage_client: OdpRawStorageClient, 166 | raw_resource_dto: DatasetDto, 167 | request_mock: responses.RequestsMock, 168 | ): 169 | file_metadata = FileMetadataDto(name="test_file.txt", mime_type="text/plain") 170 | 171 | request_mock.add( 172 | responses.DELETE, 173 | f"{raw_storage_client.raw_storage_url}/{raw_resource_dto.metadata.uuid}/{file_metadata.name}", 174 | status=404, # Assuming status code 404 indicates file not found 175 | ) 176 | 177 | with pytest.raises(OdpFileNotFoundError): 178 | raw_storage_client.delete_file(raw_resource_dto, file_metadata) 179 | -------------------------------------------------------------------------------- /src/sdk/tests/test_sdk/test_resource_client.py: -------------------------------------------------------------------------------- 1 | import json 2 | from datetime import datetime 3 | from uuid import UUID, uuid4 4 | 5 | import pytest 6 | import responses 7 | from odp.client.resource_client import OdpResourceClient 8 | from odp.dto import Metadata, ResourceDto, ResourceStatus 9 | 10 | 11 | @pytest.fixture() 12 | def resource_client(http_client) -> OdpResourceClient: 13 | return OdpResourceClient(http_client=http_client, resource_endpoint="/foobar") 14 | 15 | 16 | def test_get_resource_by_uuid( 17 | resource_client: OdpResourceClient, 18 | request_mock: responses.RequestsMock, 19 | ): 20 | kind = "test.hubocean.io/tesType" 21 | version = "v1alpha1" 22 | name = "test" 23 | uuid = uuid4() 24 | 25 | request_mock.add( 26 | responses.GET, 27 | f"{resource_client.resource_url}/{uuid}", 28 | body=ResourceDto( 29 | kind=kind, 30 | version=version, 31 | metadata=Metadata(name=name, uuid=uuid), 32 | status=ResourceStatus( 33 | num_updates=0, 34 | created_time=datetime.fromisoformat("2021-01-01T00:00:00+00:00"), 35 | created_by=uuid4(), 36 | updated_time=datetime.fromisoformat("2021-01-01T00:00:00+00:00"), 37 | updated_by=uuid4(), 38 | ), 39 | spec={}, 40 | ).model_dump_json(), 41 | status=200, 42 | content_type="application/json", 43 | ) 44 | 45 | manifest = resource_client.get(uuid) 46 | 47 | assert manifest.kind == kind 48 | assert manifest.version == version 49 | assert manifest.metadata.name == name 50 | 51 | 52 | def test_get_resource_by_qname( 53 | resource_client: OdpResourceClient, 54 | request_mock: responses.RequestsMock, 55 | ): 56 | kind = "test.hubocean.io/tesType" 57 | version = "v1alpha1" 58 | name = "test" 59 | uuid = uuid4() 60 | 61 | request_mock.add( 62 | responses.GET, 63 | f"{resource_client.resource_url}/{kind}/{name}", 64 | body=ResourceDto( 65 | kind=kind, 66 | version=version, 67 | metadata=Metadata(name=name, uuid=uuid), 68 | status=ResourceStatus( 69 | num_updates=0, 70 | created_time=datetime.fromisoformat("2021-01-01T00:00:00+00:00"), 71 | created_by=uuid4(), 72 | updated_time=datetime.fromisoformat("2021-01-01T00:00:00+00:00"), 73 | updated_by=uuid4(), 74 | ), 75 | spec={}, 76 | ).model_dump_json(), 77 | status=200, 78 | content_type="application/json", 79 | ) 80 | 81 | manifest = resource_client.get(f"{kind}/{name}") 82 | 83 | assert manifest.kind == kind 84 | assert manifest.version == version 85 | assert manifest.metadata.name == name 86 | assert manifest.metadata.uuid == uuid 87 | assert manifest.metadata.uuid == uuid 88 | 89 | 90 | def test_create_resource( 91 | resource_client: OdpResourceClient, 92 | request_mock: responses.RequestsMock, 93 | ): 94 | def _on_create_request(request): 95 | manifest = json.loads(request.body) 96 | 97 | # Ensure that the status and uuid is not set. If they are set, they must have a null-value 98 | assert manifest.get("status", None) is None 99 | assert manifest["metadata"].get("uuid", None) is None 100 | 101 | t = datetime.now().isoformat() 102 | created_by = str(UUID(int=0)) 103 | manifest["metadata"]["uuid"] = str(uuid4()) 104 | manifest["metadata"].setdefault("owner", created_by) 105 | manifest["status"] = { 106 | "num_updates": 0, 107 | "created_by": created_by, 108 | "created_time": t, 109 | "updated_by": created_by, 110 | "updated_time": t, 111 | } 112 | 113 | return (201, {}, json.dumps(manifest)) 114 | 115 | resource_manifest = ResourceDto( 116 | kind="test.hubocean.io/testType", 117 | version="v1alpha1", 118 | metadata=Metadata(name="foobar"), 119 | spec=dict(), 120 | ) 121 | 122 | request_mock.add_callback( 123 | responses.POST, 124 | f"{resource_client.resource_url}", 125 | callback=_on_create_request, 126 | content_type="application/json", 127 | ) 128 | 129 | populated_manifest = resource_client.create(resource_manifest) 130 | 131 | assert isinstance(populated_manifest, ResourceDto) 132 | assert populated_manifest.metadata.uuid is not None 133 | assert populated_manifest.status is not None 134 | assert populated_manifest.status.num_updates == 0 135 | assert populated_manifest.kind == resource_manifest.kind 136 | assert populated_manifest.metadata.name == resource_manifest.metadata.name 137 | -------------------------------------------------------------------------------- /src/sdk/tests/test_sdk/test_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/src/sdk/tests/test_sdk/test_utils/__init__.py -------------------------------------------------------------------------------- /src/sdk/tests/test_sdk/test_utils/test_dto.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from odp.client.dto.file_dto import FileMetadataDto 3 | 4 | 5 | @pytest.mark.parametrize( 6 | "file_name, correct", 7 | [("test.txt", True), ("foo/bar/test2.txt", True), ("/test.txt", False), ("/foo/bar/test2.txt", False)], 8 | ) 9 | def test_file_dto_names(file_name, correct): 10 | if correct: 11 | file_metadata = FileMetadataDto(name=file_name) 12 | assert file_metadata.name == file_name 13 | else: 14 | with pytest.raises(ValueError): 15 | FileMetadataDto(name=file_name) 16 | -------------------------------------------------------------------------------- /src/sdk/tests/test_sdk/test_utils/test_ndjson.py: -------------------------------------------------------------------------------- 1 | from textwrap import dedent 2 | 3 | from odp.client.utils.ndjson import NdJsonParser 4 | 5 | 6 | def test_parse_ndjson_simple(): 7 | test_str = dedent( 8 | """ 9 | {"name": "Alice", "age": 30} 10 | {"name": "Bob", "age": 25} 11 | {"name": "Charlie", "age": 35} 12 | """ 13 | ).strip() 14 | 15 | ndjson_parser = NdJsonParser(s=test_str) 16 | 17 | parsed_rows = list(iter(ndjson_parser)) 18 | 19 | assert isinstance(parsed_rows, list) 20 | assert len(parsed_rows) == 3 21 | 22 | assert parsed_rows[0]["name"] == "Alice" 23 | assert parsed_rows[0]["age"] == 30 24 | assert parsed_rows[1]["name"] == "Bob" 25 | assert parsed_rows[1]["age"] == 25 26 | assert parsed_rows[2]["name"] == "Charlie" 27 | assert parsed_rows[2]["age"] == 35 28 | 29 | 30 | def test_parse_ndjson_binary_simple(): 31 | test_str = ( 32 | dedent( 33 | """ 34 | {"product_id": 1, "name": "Widget", "price": 10.99} 35 | {"product_id": 2, "name": "Gadget", "price": 19.99} 36 | {"product_id": 3, "name": "Tool", "price": 15.49} 37 | """ 38 | ) 39 | .strip() 40 | .encode("utf-8") 41 | ) 42 | 43 | ndjson_parser = NdJsonParser(s=test_str) 44 | 45 | parsed_rows = list(iter(ndjson_parser)) 46 | 47 | assert isinstance(parsed_rows, list) 48 | assert len(parsed_rows) == 3 49 | 50 | assert parsed_rows[0]["product_id"] == 1 51 | assert parsed_rows[0]["name"] == "Widget" 52 | assert parsed_rows[0]["price"] == 10.99 53 | assert parsed_rows[1]["product_id"] == 2 54 | assert parsed_rows[1]["name"] == "Gadget" 55 | assert parsed_rows[1]["price"] == 19.99 56 | assert parsed_rows[2]["product_id"] == 3 57 | assert parsed_rows[2]["name"] == "Tool" 58 | assert parsed_rows[2]["price"] == 15.49 59 | 60 | 61 | def test_parse_ndjson_special_characters(): 62 | test_str = dedent( 63 | """ 64 | {"fruits": ["apple", "banana", "cherry"], "description": "Delicious & healthy 🍏🍌🍒"} 65 | {"colors": ["red", "green", "blue"], "symbols": ["@#$%^&*()_+!"]} 66 | {"languages": ["English", "Español", "Français"], "special_chars": "ñçüëł"} 67 | """ 68 | ).strip() 69 | 70 | ndjson_parser = NdJsonParser(s=test_str) 71 | parsed_rows = list(iter(ndjson_parser)) 72 | 73 | assert isinstance(parsed_rows, list) 74 | assert len(parsed_rows) == 3 75 | 76 | assert parsed_rows[0]["fruits"] == ["apple", "banana", "cherry"] 77 | assert parsed_rows[0]["description"] == "Delicious & healthy 🍏🍌🍒" 78 | assert parsed_rows[1]["colors"] == ["red", "green", "blue"] 79 | assert parsed_rows[1]["symbols"] == ["@#$%^&*()_+!"] 80 | assert parsed_rows[2]["languages"] == ["English", "Español", "Français"] 81 | assert parsed_rows[2]["special_chars"] == "ñçüëł" 82 | 83 | 84 | def test_parse_ndjson_embedded_json(): 85 | test_str = dedent( 86 | """ 87 | {"content": "Nested objects: {\\\"key1\\\": \\\"value1\\\", \\\"key2\\\": \\\"value2\\\"}"} 88 | {"config": "{ \\\"param1\\\": [1, 2, 3], \\\"param2\\\": {\\\"a\\\": true, \\\"b\\\": false} }"} 89 | {"formula": "Mathematical expressions: {\\\"equation\\\": \\\"x^2 + y^2 = r^2\\\"}"} 90 | """ 91 | ).strip() 92 | 93 | ndjson_parser = NdJsonParser(s=test_str) 94 | parsed_rows = list(iter(ndjson_parser)) 95 | 96 | assert isinstance(parsed_rows, list) 97 | assert len(parsed_rows) == 3 98 | 99 | assert parsed_rows[0]["content"] == 'Nested objects: {"key1": "value1", "key2": "value2"}' 100 | assert parsed_rows[1]["config"] == '{ "param1": [1, 2, 3], "param2": {"a": true, "b": false} }' 101 | assert parsed_rows[2]["formula"] == 'Mathematical expressions: {"equation": "x^2 + y^2 = r^2"}' 102 | 103 | 104 | def test_parse_ndjson_wkt_simple(): 105 | test_str = ( 106 | dedent( 107 | """ 108 | {"product_id": 1, "name": "Widget", "geo": "POINT(0 0)"} 109 | {"product_id": 2, "name": "Gadget", "geo": "POINT(0 1)"} 110 | {"product_id": 3, "name": "Tool", "geo": "POINT(0 2)"} 111 | """ 112 | ) 113 | .strip() 114 | .encode("utf-8") 115 | ) 116 | 117 | ndjson_parser = NdJsonParser(s=test_str) 118 | 119 | parsed_rows = list(iter(ndjson_parser)) 120 | 121 | assert isinstance(parsed_rows, list) 122 | assert len(parsed_rows) == 3 123 | 124 | assert parsed_rows[0]["product_id"] == 1 125 | assert parsed_rows[0]["name"] == "Widget" 126 | assert parsed_rows[0]["geo"] == "POINT(0 0)" 127 | assert parsed_rows[1]["product_id"] == 2 128 | assert parsed_rows[1]["name"] == "Gadget" 129 | assert parsed_rows[1]["geo"] == "POINT(0 1)" 130 | assert parsed_rows[2]["product_id"] == 3 131 | assert parsed_rows[2]["name"] == "Tool" 132 | assert parsed_rows[2]["geo"] == "POINT(0 2)" 133 | -------------------------------------------------------------------------------- /src/sdk/tests/test_sdk/test_utils/test_package_utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from odp.client.utils import get_version 4 | 5 | 6 | def test_get_version(): 7 | assert re.match(r"^(\d+\.)?(\d+\.)?(\d+)$", get_version()) 8 | -------------------------------------------------------------------------------- /tests/test_examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/C4IROcean-archive/odp-sdk-python/cfe9d937e620dfa8f35e6e92646d9625de5a0de9/tests/test_examples/__init__.py -------------------------------------------------------------------------------- /tests/test_examples/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import uuid 3 | from typing import Callable, Tuple 4 | 5 | import pytest 6 | from dotenv import load_dotenv 7 | from odp.client import OdpClient 8 | from odp.client.auth import AzureTokenProvider 9 | from odp.client.exc import OdpResourceNotFoundError 10 | from pydantic import SecretStr 11 | 12 | 13 | @pytest.fixture(scope="session") 14 | def dotenv() -> None: 15 | load_dotenv() 16 | 17 | 18 | @pytest.fixture(scope="session") 19 | def token_provider(dotenv) -> AzureTokenProvider: 20 | return AzureTokenProvider( 21 | authority=os.getenv( 22 | "ODCAT_AUTH_AUTHORITY", 23 | "https://oceandataplatform.b2clogin.com/755f6e58-74f0-4a07-a599-f7479b9669ab/v2.0/", 24 | ), 25 | client_id=SecretStr(os.getenv("ODCAT_AUTH_CLIENT_ID")), 26 | client_secret=SecretStr(os.getenv("ODCAT_AUTH_CLIENT_SECRET")), 27 | audience=os.getenv("ODCAT_AUTH_AUDIENCE", "a2e4df44-ed57-4673-8824-548256b92543"), 28 | tenant_id=os.getenv("ODCAT_AUTH_TENANT_ID", "755f6e58-74f0-4a07-a599-f7479b9669ab"), 29 | token_uri=os.getenv( 30 | "ODCAT_AUTH_TOKEN_ENDPOINT", 31 | "https://oceandataplatform.b2clogin.com/oceandataplatform.onmicrosoft.com/b2c_1a_signup_signin_custom/oauth2/v2.0/token", # noqa: E501 32 | ), 33 | jwks_uri=os.getenv( 34 | "ODCAT_AUTH_JWKS_URI", 35 | "https://oceandataplatform.b2clogin.com/oceandataplatform.onmicrosoft.com/b2c_1a_signup_signin_custom/discovery/v2.0/keys", # noqa: E501 36 | ), 37 | scope=[os.getenv("ODCAT_AUTH_SCOPE", "https://oceandataplatform.onmicrosoft.com/odcat/.default")], 38 | ) 39 | 40 | 41 | @pytest.fixture(scope="session") 42 | def odp_client(token_provider: AzureTokenProvider) -> OdpClient: 43 | base_url = os.getenv("ODCAT_BASE_URL", "https://api.hubocean.earth") 44 | 45 | return OdpClient( 46 | base_url=base_url, 47 | token_provider=token_provider, 48 | ) 49 | 50 | 51 | def delete_element(func: Callable, *args, **kwargs) -> None: 52 | try: 53 | func(*args, **kwargs) 54 | except OdpResourceNotFoundError: 55 | pass 56 | 57 | 58 | @pytest.fixture 59 | def odp_client_test_uuid(odp_client: OdpClient) -> Tuple[OdpClient, uuid.UUID]: 60 | test_uuid = uuid.uuid4() 61 | yield odp_client, test_uuid 62 | 63 | # Clean up 64 | for manifest in odp_client.catalog.list({"#EQUALS": ["$metadata.labels.test_uuid", str(test_uuid)]}): 65 | storage_class = getattr(manifest.spec, "storage_class", "") 66 | if "raw" in storage_class: 67 | for file in odp_client.raw.list(manifest): 68 | delete_element(odp_client.raw.delete_file, manifest, file) 69 | if os.path.exists(os.path.basename(file.name)): 70 | os.remove(os.path.basename(file.name)) 71 | if "tabular" in storage_class: 72 | delete_element(odp_client.tabular.delete_schema, manifest, True) 73 | delete_element(odp_client.catalog.delete, manifest.metadata.uuid) 74 | -------------------------------------------------------------------------------- /tests/test_examples/test_catalog_client_example.py: -------------------------------------------------------------------------------- 1 | import random 2 | import string 3 | from typing import Tuple 4 | from uuid import UUID 5 | 6 | from odp.client import OdpClient 7 | from odp.client.resource_client import OdpResourceClient 8 | from odp.dto import DatasetDto, DatasetSpec, ResourceDto 9 | 10 | 11 | def test_catalog_client(odp_client_test_uuid: Tuple[OdpClient, UUID]): 12 | catalog_client = odp_client_test_uuid[0].catalog 13 | assert isinstance(catalog_client, OdpResourceClient) 14 | 15 | for item in catalog_client.list(): 16 | assert isinstance(item, ResourceDto) 17 | 18 | manifest = DatasetDto( 19 | **{ 20 | "kind": "catalog.hubocean.io/dataset", 21 | "version": "v1alpha3", 22 | "metadata": { 23 | "name": "".join(random.choices(string.ascii_lowercase + string.digits, k=20)), 24 | "labels": {"test_uuid": odp_client_test_uuid[1]}, 25 | }, 26 | "spec": { 27 | "storage_controller": "registry.hubocean.io/storageController/storage-tabular", 28 | "storage_class": "registry.hubocean.io/storageClass/tabular", 29 | "maintainer": {"contact": "Just Me "}, # <-- strict syntax here 30 | }, 31 | } 32 | ) 33 | 34 | manifest = catalog_client.create(manifest) 35 | assert isinstance(manifest.spec, DatasetSpec) 36 | 37 | fetched_manifest = catalog_client.get(manifest.metadata.uuid, tp=DatasetDto) 38 | assert isinstance(fetched_manifest.spec, DatasetSpec) 39 | -------------------------------------------------------------------------------- /tests/test_examples/test_catalog_oqs_query_example.py: -------------------------------------------------------------------------------- 1 | from odp.client import OdpClient 2 | from odp.dto import ResourceDto 3 | 4 | 5 | def test_catalog_oqs_query(odp_client: OdpClient): 6 | oqs_filter = { 7 | "#EQUALS": [ 8 | "$kind", 9 | "catalog.hubocean.io/dataCollection", 10 | ] 11 | } 12 | 13 | for item in odp_client.catalog.list(oqs_filter): 14 | assert isinstance(item, ResourceDto) 15 | 16 | assert odp_client.catalog.list(oqs_filter) != [] 17 | -------------------------------------------------------------------------------- /tests/test_examples/test_observables_example.py: -------------------------------------------------------------------------------- 1 | import random 2 | import string 3 | from typing import Tuple 4 | from uuid import UUID 5 | 6 | from odp.client import OdpClient 7 | from odp.dto import ObservableDto, ObservableSpec 8 | 9 | 10 | def test_observables(odp_client_test_uuid: Tuple[OdpClient, UUID]): 11 | catalog_client = odp_client_test_uuid[0].catalog 12 | 13 | observable_filter = {"#EQUALS": ["$kind", "catalog.hubocean.io/observable"]} 14 | 15 | for item in catalog_client.list(observable_filter): 16 | assert isinstance(item.spec, ObservableSpec) 17 | 18 | observable_manifest = ObservableDto( 19 | **{ 20 | "kind": "catalog.hubocean.io/observable", 21 | "version": "v1alpha2", 22 | "metadata": { 23 | "name": "".join(random.choices(string.ascii_lowercase + string.digits, k=20)), 24 | "display_name": "Test Observable for time", 25 | "description": "A test observable for time", 26 | "labels": {"hubocean.io/test": True, "test_uuid": odp_client_test_uuid[1]}, 27 | }, 28 | "spec": { 29 | "ref": "catalog.hubocean.io/dataset/test-dataset", 30 | "observable_class": "catalog.hubocean.io/observableClass/static-geometric-coverage", 31 | "details": {"value": {"type": "Point", "coordinates": [-73.981200, 40.764950]}, "attribute": "test"}, 32 | }, 33 | } 34 | ) 35 | 36 | observable_manifest = catalog_client.create(observable_manifest) 37 | assert isinstance(observable_manifest.spec, ObservableSpec) 38 | 39 | fetched_manifest = catalog_client.get(observable_manifest.metadata.uuid) 40 | assert isinstance(fetched_manifest.spec, ObservableSpec) 41 | 42 | observable_geometry_filter = { 43 | "#AND": [ 44 | {"#EQUALS": ["$kind", "catalog.hubocean.io/observable"]}, 45 | { 46 | "#ST_INTERSECTS": [ 47 | "$spec.details.value", 48 | { 49 | "type": "Polygon", 50 | "coordinates": [ 51 | [ 52 | [-73.981200, 40.764950], 53 | [-73.980600, 40.764000], 54 | [-73.979800, 40.764450], 55 | [-73.980400, 40.765400], 56 | [-73.981200, 40.764950], 57 | ] 58 | ], 59 | }, 60 | ] 61 | }, 62 | ] 63 | } 64 | 65 | for item in catalog_client.list(observable_geometry_filter): 66 | assert isinstance(item.spec, ObservableSpec) 67 | assert [observable for observable in catalog_client.list(observable_geometry_filter)] != [] 68 | 69 | static_manifest_small = ObservableDto( 70 | **{ 71 | "kind": "catalog.hubocean.io/observable", 72 | "version": "v1alpha2", 73 | "metadata": { 74 | "name": "".join(random.choices(string.ascii_lowercase + string.digits, k=20)), 75 | "display_name": "SDK Example Small Value", 76 | "description": "An observable that emits a small value", 77 | "labels": {"hubocean.io/test": True, "test_uuid": odp_client_test_uuid[1]}, 78 | }, 79 | "spec": { 80 | "ref": "catalog.hubocean.io/dataset/test-dataset", 81 | "observable_class": "catalog.hubocean.io/observableClass/static-observable", 82 | "details": {"value": 1, "attribute": "test"}, 83 | }, 84 | } 85 | ) 86 | 87 | catalog_client.create(static_manifest_small) 88 | 89 | static_manifest_large = ObservableDto( 90 | **{ 91 | "kind": "catalog.hubocean.io/observable", 92 | "version": "v1alpha2", 93 | "metadata": { 94 | "name": "".join(random.choices(string.ascii_lowercase + string.digits, k=20)), 95 | "display_name": "SDK Example Large Value", 96 | "description": "An observable that emits a large value", 97 | "labels": {"hubocean.io/test": True, "test_uuid": odp_client_test_uuid[1]}, 98 | }, 99 | "spec": { 100 | "ref": "catalog.hubocean.io/dataset/test-dataset", 101 | "observable_class": "catalog.hubocean.io/observableClass/static-observable", 102 | "details": {"value": 3, "attribute": "test"}, 103 | }, 104 | } 105 | ) 106 | 107 | catalog_client.create(static_manifest_large) 108 | 109 | observable_range_filter = { 110 | "#AND": [ 111 | {"#WITHIN": ["$spec.observable_class", ["catalog.hubocean.io/observableClass/static-observable"]]}, 112 | {"#GREATER_THAN_OR_EQUALS": ["$spec.details.value", 2]}, 113 | ] 114 | } 115 | 116 | list_observables = [] 117 | for item in catalog_client.list(observable_range_filter): 118 | assert isinstance(item.spec, ObservableSpec) 119 | list_observables.append(item) 120 | 121 | assert list_observables != [] 122 | -------------------------------------------------------------------------------- /tests/test_examples/test_raw_client_example.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import string 4 | from typing import Tuple 5 | from uuid import UUID 6 | 7 | import pytest 8 | from odp.client import OdpClient 9 | from odp.client.dto.file_dto import FileMetadataDto 10 | from odp.dto import DatasetDto, DatasetSpec 11 | 12 | 13 | @pytest.mark.parametrize("file_name", ["test.txt", "foo/bar/test2.txt"]) 14 | def test_raw_client(odp_client_test_uuid: Tuple[OdpClient, UUID], file_name): 15 | my_dataset = DatasetDto( 16 | **{ 17 | "kind": "catalog.hubocean.io/dataset", 18 | "version": "v1alpha3", 19 | "metadata": { 20 | "name": "".join(random.choices(string.ascii_lowercase + string.digits, k=20)), 21 | "labels": {"test_uuid": odp_client_test_uuid[1]}, 22 | }, 23 | "spec": { 24 | "storage_controller": "registry.hubocean.io/storageController/storage-raw-cdffs", 25 | "storage_class": "registry.hubocean.io/storageClass/raw", 26 | "maintainer": {"contact": "Just Me "}, # <-- strict syntax here 27 | }, 28 | } 29 | ) 30 | 31 | my_dataset = odp_client_test_uuid[0].catalog.create(my_dataset) 32 | assert isinstance(my_dataset.spec, DatasetSpec) 33 | 34 | file_dto = odp_client_test_uuid[0].raw.create_file( 35 | resource_dto=my_dataset, 36 | file_metadata_dto=FileMetadataDto(**{"name": file_name, "mime_type": "text/plain"}), 37 | contents=b"Hello, World!", 38 | ) 39 | 40 | for file in odp_client_test_uuid[0].raw.list(my_dataset): 41 | assert isinstance(file, FileMetadataDto) 42 | assert odp_client_test_uuid[0].raw.list(my_dataset) != [] 43 | 44 | save_path = os.path.basename(file_name) 45 | odp_client_test_uuid[0].raw.download_file(my_dataset, file_dto, save_path) 46 | assert os.path.exists(save_path) 47 | -------------------------------------------------------------------------------- /tests/test_examples/test_tabular_client_example.py: -------------------------------------------------------------------------------- 1 | import random 2 | import string 3 | from typing import Tuple 4 | from uuid import UUID 5 | 6 | from odp.client import OdpClient 7 | from odp.client.dto.table_spec import TableSpec 8 | from odp.client.exc import OdpResourceNotFoundError 9 | from odp.dto import DatasetDto, DatasetSpec 10 | 11 | 12 | def test_tabular_client(odp_client_test_uuid: Tuple[OdpClient, UUID]): 13 | my_dataset = DatasetDto( 14 | **{ 15 | "kind": "catalog.hubocean.io/dataset", 16 | "version": "v1alpha3", 17 | "metadata": { 18 | "name": "".join(random.choices(string.ascii_lowercase + string.digits, k=20)), 19 | "labels": {"test_uuid": odp_client_test_uuid[1]}, 20 | }, 21 | "spec": { 22 | "storage_controller": "registry.hubocean.io/storageController/storage-tabular", 23 | "storage_class": "registry.hubocean.io/storageClass/tabular", 24 | "maintainer": {"contact": "Just Me "}, 25 | }, 26 | } 27 | ) 28 | 29 | my_dataset = odp_client_test_uuid[0].catalog.create(my_dataset) 30 | assert isinstance(my_dataset.spec, DatasetSpec) 31 | 32 | table_schema = {"Data": {"type": "string"}} 33 | my_table_spec = TableSpec(table_schema=table_schema) 34 | 35 | mt_table_spec = odp_client_test_uuid[0].tabular.create_schema(resource_dto=my_dataset, table_spec=my_table_spec) 36 | assert isinstance(mt_table_spec, TableSpec) 37 | 38 | test_data = [{"Data": "Test"}, {"Data": "Test1"}] 39 | odp_client_test_uuid[0].tabular.write(resource_dto=my_dataset, data=test_data) 40 | 41 | our_data = odp_client_test_uuid[0].tabular.select_as_list(my_dataset) 42 | assert len(our_data) == 2 43 | 44 | our_data = list(odp_client_test_uuid[0].tabular.select_as_stream(my_dataset)) 45 | assert len(our_data) == 2 46 | 47 | update_filters = {"#EQUALS": ["$Data", "Test"]} 48 | new_data = [{"Data": "Test Updated"}] 49 | odp_client_test_uuid[0].tabular.update( 50 | resource_dto=my_dataset, 51 | data=new_data, 52 | filter_query=update_filters, 53 | ) 54 | 55 | result = odp_client_test_uuid[0].tabular.select_as_list(my_dataset) 56 | assert len(result) == 2 57 | 58 | delete_filters = {"#EQUALS": ["$Data", "Test1"]} 59 | odp_client_test_uuid[0].tabular.delete(resource_dto=my_dataset, filter_query=delete_filters) 60 | result = odp_client_test_uuid[0].tabular.select_as_list(my_dataset) 61 | assert len(result) == 1 62 | 63 | odp_client_test_uuid[0].tabular.delete_schema(my_dataset) 64 | 65 | try: 66 | odp_client_test_uuid[0].tabular.get_schema(my_dataset) 67 | except OdpResourceNotFoundError as e: 68 | print("Schema not found error since it is deleted") 69 | print(e) 70 | 71 | odp_client_test_uuid[0].catalog.delete(my_dataset) 72 | -------------------------------------------------------------------------------- /tests/test_examples/test_tabular_geography.py: -------------------------------------------------------------------------------- 1 | import random 2 | import string 3 | from typing import Tuple 4 | from uuid import UUID 5 | 6 | from odp.client import OdpClient 7 | from odp.client.dto.table_spec import TableSpec 8 | from odp.dto import DatasetDto 9 | 10 | 11 | def test_tabular_geography(odp_client_test_uuid: Tuple[OdpClient, UUID]): 12 | manifest = DatasetDto( 13 | **{ 14 | "kind": "catalog.hubocean.io/dataset", 15 | "version": "v1alpha3", 16 | "metadata": { 17 | "name": "".join(random.choices(string.ascii_lowercase + string.digits, k=20)), 18 | "labels": {"test_uuid": odp_client_test_uuid[1]}, 19 | }, 20 | "spec": { 21 | "storage_controller": "registry.hubocean.io/storageController/storage-tabular", 22 | "storage_class": "registry.hubocean.io/storageClass/tabular", 23 | "maintainer": {"contact": "Just Me "}, # <-- strict syntax here 24 | }, 25 | } 26 | ) 27 | 28 | manifest = odp_client_test_uuid[0].catalog.create(manifest) 29 | 30 | table_schema = {"name": {"type": "string"}, "location": {"type": "geometry"}} 31 | 32 | partitioning = [{"columns": ["location"], "transformer_name": "geohash", "args": [2]}] 33 | 34 | my_table_spec = TableSpec(table_schema=table_schema, partitioning=partitioning) 35 | 36 | my_table_spec = odp_client_test_uuid[0].tabular.create_schema(resource_dto=manifest, table_spec=my_table_spec) 37 | 38 | data = [ 39 | {"name": "Oslo", "location": {"type": "Point", "coordinates": [10.74609, 59.91273]}}, 40 | {"name": "New York", "location": {"type": "Point", "coordinates": [-74.005974, 40.712776]}}, 41 | {"name": "Los Angeles", "location": {"type": "Point", "coordinates": [-118.243683, 34.052235]}}, 42 | {"name": "London", "location": {"type": "Point", "coordinates": [-0.127758, 51.507351]}}, 43 | {"name": "Tokyo", "location": {"type": "Point", "coordinates": [139.691711, 35.689487]}}, 44 | {"name": "Paris", "location": {"type": "Point", "coordinates": [2.352222, 48.856613]}}, 45 | {"name": "Berlin", "location": {"type": "Point", "coordinates": [13.404954, 52.520008]}}, 46 | {"name": "Moscow", "location": {"type": "Point", "coordinates": [37.617298, 55.755825]}}, 47 | {"name": "Beijing", "location": {"type": "Point", "coordinates": [116.407394, 39.904202]}}, 48 | {"name": "Mexico City", "location": {"type": "Point", "coordinates": [-99.133209, 19.432608]}}, 49 | {"name": "São Paulo", "location": {"type": "Point", "coordinates": [-46.633308, -23.55052]}}, 50 | {"name": "Buenos Aires", "location": {"type": "Point", "coordinates": [-58.381592, -34.603722]}}, 51 | {"name": "New Delhi", "location": {"type": "Point", "coordinates": [77.209023, 28.613939]}}, 52 | {"name": "Sydney", "location": {"type": "Point", "coordinates": [151.209296, -33.86882]}}, 53 | {"name": "San Francisco", "location": {"type": "Point", "coordinates": [-122.419418, 37.774929]}}, 54 | {"name": "Johannesburg", "location": {"type": "Point", "coordinates": [28.047305, -26.204103]}}, 55 | {"name": "Chicago", "location": {"type": "Point", "coordinates": [-87.629799, 41.878113]}}, 56 | {"name": "Melbourne", "location": {"type": "Point", "coordinates": [144.963058, -37.813628]}}, 57 | {"name": "Edinburgh", "location": {"type": "Point", "coordinates": [-3.188267, 55.953251]}}, 58 | {"name": "Stockholm", "location": {"type": "Point", "coordinates": [18.068581, 59.329323]}}, 59 | {"name": "Ottawa", "location": {"type": "Point", "coordinates": [-75.697193, 45.42153]}}, 60 | {"name": "Hong Kong", "location": {"type": "Point", "coordinates": [114.109497, 22.396428]}}, 61 | {"name": "Jakarta", "location": {"type": "Point", "coordinates": [106.845599, -6.208763]}}, 62 | {"name": "Cairo", "location": {"type": "Point", "coordinates": [31.235712, 30.04442]}}, 63 | {"name": "Budapest", "location": {"type": "Point", "coordinates": [19.040236, 47.497913]}}, 64 | {"name": "Christchurch", "location": {"type": "Point", "coordinates": [172.636225, -43.532054]}}, 65 | {"name": "Manila", "location": {"type": "Point", "coordinates": [120.98422, 14.599512]}}, 66 | {"name": "Bangkok", "location": {"type": "Point", "coordinates": [100.501765, 13.756331]}}, 67 | {"name": "Rome", "location": {"type": "Point", "coordinates": [12.496366, 41.902783]}}, 68 | {"name": "Shanghai", "location": {"type": "Point", "coordinates": [121.473702, 31.23039]}}, 69 | {"name": "Rio de Janeiro", "location": {"type": "Point", "coordinates": [-43.172897, -22.906847]}}, 70 | {"name": "Madrid", "location": {"type": "Point", "coordinates": [-3.70379, 40.416775]}}, 71 | {"name": "Nairobi", "location": {"type": "Point", "coordinates": [36.821946, -1.292066]}}, 72 | {"name": "Toronto", "location": {"type": "Point", "coordinates": [-79.383186, 43.653225]}}, 73 | {"name": "Fortaleza", "location": {"type": "Point", "coordinates": [-38.526669, -3.731862]}}, 74 | {"name": "Tehran", "location": {"type": "Point", "coordinates": [51.388973, 35.6895]}}, 75 | {"name": "Brasília", "location": {"type": "Point", "coordinates": [-47.882166, -15.794229]}}, 76 | {"name": "Bogotá", "location": {"type": "Point", "coordinates": [-74.072092, 4.710989]}}, 77 | ] 78 | 79 | odp_client_test_uuid[0].tabular.write(resource_dto=manifest, data=data) 80 | 81 | europe_list = odp_client_test_uuid[0].tabular.select_as_list( 82 | resource_dto=manifest, 83 | filter_query={ 84 | "#ST_WITHIN": [ 85 | "$location", 86 | { 87 | "type": "Polygon", 88 | "coordinates": [ 89 | [ 90 | [37.02028908997249, 70.9411520317463], 91 | [-24.834125592956013, 70.9411520317463], 92 | [-24.834125592956013, 35.753296916825306], 93 | [37.02028908997249, 35.753296916825306], 94 | [37.02028908997249, 70.9411520317463], 95 | ] 96 | ], 97 | }, 98 | ] 99 | }, 100 | ) 101 | 102 | expected_cities = ["Paris", "London", "Edinburgh", "Budapest", "Stockholm", "Oslo", "Berlin", "Rome", "Madrid"] 103 | for city in europe_list: 104 | assert city.get("name") in expected_cities 105 | assert len(europe_list) == len(expected_cities) 106 | -------------------------------------------------------------------------------- /tests/test_examples/test_tabular_v2_client_example.py: -------------------------------------------------------------------------------- 1 | import random 2 | import string 3 | from typing import Tuple 4 | from uuid import UUID 5 | 6 | import pyarrow as pa 7 | from odp.client import OdpClient 8 | from odp.client.exc import OdpResourceNotFoundError 9 | from odp.client.tabular_v2.util import exp 10 | from odp.dto import DatasetDto, DatasetSpec 11 | 12 | 13 | def test_tabular_client(odp_client_test_uuid: Tuple[OdpClient, UUID]): 14 | my_dataset = DatasetDto( 15 | **{ 16 | "kind": "catalog.hubocean.io/dataset", 17 | "version": "v1alpha3", 18 | "metadata": { 19 | "name": "".join(random.choices(string.ascii_lowercase + string.digits, k=20)), 20 | "labels": {"test_uuid": odp_client_test_uuid[1]}, 21 | }, 22 | "spec": { 23 | "storage_controller": "registry.hubocean.io/storageController/storage-tabular", 24 | "storage_class": "registry.hubocean.io/storageClass/tabular", 25 | "maintainer": {"contact": "Just Me "}, 26 | }, 27 | } 28 | ) 29 | 30 | my_dataset = odp_client_test_uuid[0].catalog.create(my_dataset) 31 | assert isinstance(my_dataset.spec, DatasetSpec) 32 | 33 | table = odp_client_test_uuid[0].table_v2(my_dataset) 34 | 35 | table_schema = pa.schema({"Data": pa.string()}) 36 | table.create(table_schema) 37 | 38 | assert table.schema() is not None 39 | 40 | test_data = [{"Data": "Test"}, {"Data": "Test1"}] 41 | with table as tx: 42 | tx.insert(test_data) 43 | 44 | our_data = list(table.select().rows()) 45 | assert len(our_data) == 2 46 | 47 | our_data = list(table.select().batches()) 48 | assert len(our_data) == 1 49 | assert our_data[0].num_rows == 2 50 | 51 | update_filters = exp.parse("Data == 'Test'") 52 | new_data = [{"Data": "Test Updated"}] 53 | with table as tx: 54 | tx.delete(update_filters) 55 | tx.insert(new_data) 56 | 57 | result = list(table.select().rows()) 58 | assert new_data[0] in result 59 | assert len(result) == 2 60 | 61 | delete_filters = exp.parse("Data == 'Test1'") 62 | with table as tx: 63 | tx.delete(delete_filters) 64 | result = list(table.select().rows()) 65 | assert len(result) == 1 66 | 67 | table.drop() 68 | 69 | try: 70 | table.select() 71 | except OdpResourceNotFoundError as e: 72 | print("Schema not found error since it is deleted") 73 | print(e) 74 | 75 | odp_client_test_uuid[0].catalog.delete(my_dataset) 76 | --------------------------------------------------------------------------------