├── images
    ├── gfs-web.png
    ├── ioos_logo.jpeg
    ├── example-image-api.png
    └── map-tile-example.png
├── examples
    └── sample_image.png
├── doc-requirements.txt
├── .gitignore
├── _config.yml
├── requirements.txt
├── xpublish
    ├── test_xpublish.py
    ├── static
    │   ├── map.css
    │   ├── map.html
    │   └── map.js
    ├── dynamic_xpublish.py
    ├── demo_rest.py
    ├── tile_router.py
    ├── main.py
    ├── dap_router.py
    ├── test_routers.py
    ├── dynamic_xpublish.md
    ├── edr_router.py
    ├── tree_router.py
    ├── test_get_chunk.ipynb
    └── wms_router.py
├── _toc.yml
├── dockerfile
├── recipes
    └── gfs-wave
    │   ├── meta.yaml
    │   ├── makezarr.py
    │   └── recipe.py
├── environment.yml
├── notes
    └── accomplishments.md
├── LICENSE
├── .github
    └── workflows
    │   └── deploy-book.yml
├── .circleci
    └── config.yml
├── project-overview.md
├── pygeoapi
    └── config.yaml
├── README.md
├── xpublish_routers
    ├── openapi.json
    └── EDR.ipynb
└── S3 bucket access.ipynb


/images/gfs-web.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asascience-open/restful-grids/HEAD/images/gfs-web.png


--------------------------------------------------------------------------------
/images/ioos_logo.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asascience-open/restful-grids/HEAD/images/ioos_logo.jpeg


--------------------------------------------------------------------------------
/examples/sample_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asascience-open/restful-grids/HEAD/examples/sample_image.png


--------------------------------------------------------------------------------
/images/example-image-api.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asascience-open/restful-grids/HEAD/images/example-image-api.png


--------------------------------------------------------------------------------
/images/map-tile-example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asascience-open/restful-grids/HEAD/images/map-tile-example.png


--------------------------------------------------------------------------------
/doc-requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/executablebooks/jupyter-book
2 | jupyterlab
3 | ghp-import
4 | sphinxcontrib-openapi


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.nc
2 | pygeoapi/.ipynb_checkpoints
3 | .ipynb_checkpoints
4 | *.pyc
5 | pyramid/dask-worker-space
6 | pyramid/gfs-wave-resampled
7 | _build
8 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
 1 | title: Restful Grids Exploration
 2 | author: IOOS Code Springs
 3 | logo: images/ioos_logo.jpeg
 4 | execute:
 5 |   execute_notebooks: "off"
 6 | parse:
 7 |   myst_enable_extensions:
 8 |     - html_image
 9 | html:
10 |   comments:
11 |     hypothesis: true
12 | sphinx:
13 |   extra_extensions:
14 |     - sphinxcontrib.openapi
15 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | xarray
 2 | jupyterlab
 3 | dask
 4 | dask[distributed]
 5 | zarr
 6 | rioxarray
 7 | fsspec
 8 | s3fs
 9 | netCDF4
10 | pygeoapi
11 | flask_cors
12 | jinja2==3.0.3
13 | pip
14 | xpublish
15 | matplotlib
16 | rasterio
17 | Pillow
18 | cf_xarray
19 | shapely
20 | ndpyramid
21 | pyproj
22 | mercantile
23 | h5netcdf
24 | opendap_protocol


--------------------------------------------------------------------------------
/xpublish/test_xpublish.py:
--------------------------------------------------------------------------------
 1 | import xarray as xr
 2 | import xpublish
 3 | 
 4 | ds = xr.open_dataset("../datasets/ww3_72_east_coast_2022041112.nc")
 5 | 
 6 | 
 7 | # ds.rest.serve(log_level="debug")
 8 | 
 9 | rest_collection = xpublish.Rest(ds)
10 | # rest_collection = xpublish.Rest({"ww3": ds, "bio": ds})
11 | rest_collection.serve(log_level="trace", port=9005)
12 | 


--------------------------------------------------------------------------------
/_toc.yml:
--------------------------------------------------------------------------------
 1 | format: jb-book
 2 | root: README
 3 | parts:
 4 |   - caption: Xpublish Routers
 5 |     chapters:
 6 |       - glob: xpublish_routers/*
 7 |   - caption: Notes
 8 |     chapters:
 9 |       - glob: notes/*
10 |   - caption: PyGEO API
11 |     chapters:
12 |       - glob: pygeoapi/*
13 |   - caption: Xpublish Approach
14 |     chapters:
15 |       - glob: xpublish/*
16 |   - caption: NdPyramid
17 |     chapters:
18 |       - glob: pyramid/*
19 | 


--------------------------------------------------------------------------------
/dockerfile:
--------------------------------------------------------------------------------
 1 | FROM mambaorg/micromamba:latest
 2 | 
 3 | RUN --mount=type=cache,id=mamba,target=/opt/conda/pkgs,uid=1000,gid=1000 \
 4 |     --mount=type=bind,source=environment.yml,target=/tmp/environment.yml \
 5 |     micromamba install -y -n base -f /tmp/environment.yml
 6 | 
 7 | EXPOSE 9005
 8 | COPY . .
 9 | WORKDIR xpublish
10 | 
11 | #ENTRYPOINT ["python" "uvicorn" "--port" "9005" "main:app" "--reload"]
12 | #CMD ["uvicorn" "--port" "9005" "main:app" "--reload"]
13 | CMD python main.py


--------------------------------------------------------------------------------
/recipes/gfs-wave/meta.yaml:
--------------------------------------------------------------------------------
 1 | title: "GFS Wave"
 2 | description: ""
 3 | pangeo_forge_version: "0.8.2"
 4 | pangeo_notebook_version: "2021.12.02"
 5 | recipes:
 6 |   - id: riops
 7 |     object: "recipe:recipe"
 8 | provenance:
 9 |   providers:
10 |     - name: ""
11 |       description: ""
12 |       roles:
13 |         - producer
14 |         - licensor
15 |       url: https://nomads.ncep.noaa.gov/pub/data/nccf/com/gfs/
16 |   license: "CC-BY-4.0"
17 | maintainers:
18 |   - name: "James Munroe"
19 |     orcid: "0000-0002-4078-0852"
20 |     github: jmunroe
21 | bakery:
22 |   id: "pangeo-ldeo-nsf-earthcube"
23 | 


--------------------------------------------------------------------------------
/xpublish/static/map.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |     margin: 0px;
 3 | }
 4 | 
 5 | .fill-window {
 6 |     height: 100%;
 7 |     position: absolute;
 8 |     left: 0;
 9 |     width: 100%;
10 |     overflow: hidden;
11 | }
12 | 
13 | .controls {
14 |     position: absolute;
15 |     right: 1em;
16 |     bottom:  2em;
17 |     width: 350px;
18 |     height: 50px;
19 |     background-color: #efefef2f;
20 |     border-radius: 8px;
21 |     padding: 8px;
22 | }
23 | 
24 | .control-row {
25 |     display: flex;
26 |     flex-direction: row;
27 |     align-items: center;
28 | }
29 | 
30 | .control-label {
31 |     color: #ffffff;
32 |     padding-right: 16px;
33 | }
34 | 
35 | .control-slider {
36 |     flex: 1;
37 | }


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: code-sprint-2022
 2 | channels:
 3 |   - conda-forge
 4 |   - defaults
 5 |   - pyviz
 6 | dependencies:
 7 |   - xarray
 8 |   - numpy=1.21
 9 |   - jupyterlab
10 |   - dask
11 |   - zarr
12 |   - rioxarray
13 |   - fsspec
14 |   - s3fs
15 |   - netCDF4
16 |   - pygeoapi
17 |   - flask_cors
18 |   - jinja2=3.0.3
19 |   - shapely
20 |   - cf_xarray
21 |   - ndpyramid
22 |   - matplotlib
23 |   - httpx
24 |   - git
25 |   - pip
26 |   - hvplot
27 |   - holoviews
28 |   - bokeh
29 |   - pip:
30 |       - git+https://github.com/xarray-contrib/xpublish.git@632a720aadba39cebaf062da7043835262d9fa3d
31 |   - Pillow
32 |   - rasterio
33 |   - pyproj
34 |   - ipytree
35 |   - xesmf
36 |   - mercantile
37 |   - opendap-protocol
38 | 


--------------------------------------------------------------------------------
/notes/accomplishments.md:
--------------------------------------------------------------------------------
 1 | # Accomplishments
 2 | 
 3 | ## Day 2 - 4/27/2022
 4 | 
 5 | * Dockerized and deployed to cloud
 6 | * Improved image endpoints
 7 | * (Working) Trying to serve zarr that is dynamically chunked
 8 | * Improved documentation and use-cases
 9 | * General code improvements and decoupling
10 | 
11 | ## Day 1 - 4/26/2022
12 | 
13 | * Alex: OGC EDR API implementation for point data using xpublish
14 | * James: Pangeo Forge script for converting GFS-WAVE from GRIB to zarr
15 | * Matt: Wrote endpoint to return image tile from xarray
16 | * Max: Tested n-d pyramid of GFS-WAVE
17 | * Jonathan: created Dockerfile and updated AWS permissions
18 | 
19 | ## Goals for Tomorrow:
20 | * Work with n-d pyramid team for faster tiling data
21 | * Convert more data in the cloud
22 | 


--------------------------------------------------------------------------------
/recipes/gfs-wave/makezarr.py:
--------------------------------------------------------------------------------
 1 | from recipe import recipe
 2 | 
 3 | from pangeo_forge_recipes.storage import CacheFSSpecTarget, FSSpecTarget, MetadataTarget, StorageConfig
 4 | 
 5 | from fsspec.implementations.local import LocalFileSystem
 6 | 
 7 | import os, shutil
 8 | 
 9 | if os.path.exists('target'):
10 |     shutil.rmtree('target')
11 | 
12 | fs = LocalFileSystem()
13 | 
14 | cache = CacheFSSpecTarget(fs=fs, root_path="./cache/")
15 | target = CacheFSSpecTarget(fs=fs, root_path="./target/")
16 | 
17 | recipe.storage_config = StorageConfig(target, cache)
18 | 
19 | from pangeo_forge_recipes.recipes import setup_logging
20 | setup_logging(level="INFO")
21 | 
22 | recipe_pruned = recipe.copy_pruned(96)
23 | 
24 | recipe_function = recipe_pruned.to_function()
25 | 
26 | recipe_function()
27 | 
28 | import xarray as xr
29 | 
30 | ds = xr.open_zarr(target.get_mapper())
31 | print(ds)
32 | 


--------------------------------------------------------------------------------
/xpublish/static/map.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 | 
 4 | <head>
 5 |   <meta charset="UTF-8">
 6 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |   <title>IOOS XPublish Viewer</title>
 8 |   <script src='https://api.tiles.mapbox.com/mapbox-gl-js/v2.8.2/mapbox-gl.js'></script>
 9 |   <link href='https://api.tiles.mapbox.com/mapbox-gl-js/v2.8.2/mapbox-gl.css' rel='stylesheet' />
10 |   <link rel="stylesheet" href="./map.css"> 
11 | </head>
12 | 
13 | <body>
14 |   <div id="map" class="fill-window"></div>
15 |   <script type='module' src="/static/map.js"></script>
16 | </body>
17 | 
18 | <div class="controls" >
19 |   <div class="control-row">
20 |       <label class="control-label" for="timestep-slider">Timestep</label>
21 |       <input class="control-slider" id="timestep-slider" name="timestep" type="range" min="0" max="72" value="0" step="1" />
22 |   </div>   
23 | </div>
24 | 
25 | </html>


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Applied Science Associates
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy-book.yml:
--------------------------------------------------------------------------------
 1 | name: deploy-book
 2 | 
 3 | on:
 4 |   # Trigger the workflow on push to main branch
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 |   workflow_dispatch:
12 | 
13 | # This job installs dependencies, build the book, and pushes it to `gh-pages`
14 | jobs:
15 |   build-and-deploy-book:
16 |     runs-on: ${{ matrix.os }}
17 |     strategy:
18 |       matrix:
19 |         os: [ubuntu-latest]
20 |         python-version: [3.8]
21 |     steps:
22 |     - uses: actions/checkout@v2
23 | 
24 |     # Install dependencies
25 |     - name: Set up Python ${{ matrix.python-version }}
26 |       uses: actions/setup-python@v1
27 |       with:
28 |         python-version: ${{ matrix.python-version }}
29 |     - name: Install dependencies
30 |       run: |
31 |         pip install -r doc-requirements.txt
32 |     # Build the book
33 |     - name: Build the book
34 |       run: |
35 |         jupyter-book build .
36 |     # Deploy the book's HTML to gh-pages branch
37 |     - name: GitHub Pages action
38 |       uses: peaceiris/actions-gh-pages@v3.6.1
39 |       if: github.ref == 'refs/heads/main'
40 |       with:
41 |         github_token: ${{ secrets.GITHUB_TOKEN }}
42 |         publish_dir: _build/html
43 | 


--------------------------------------------------------------------------------
/recipes/gfs-wave/recipe.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from datetime import datetime, date
 3 | from pangeo_forge_recipes.patterns import ConcatDim, MergeDim, FilePattern
 4 | from pangeo_forge_recipes.recipes import XarrayZarrRecipe
 5 | 
 6 | # URL
 7 | # https://nomads.ncep.noaa.gov/pub/data/nccf/com/gfs/prod/gfs.20220426/00/wave/gridded/gfswave.t00z.atlocn.0p16.f000.grib2 
 8 | 
 9 | def make_url(time):
10 | 
11 |     return (
12 |             "https://nomads.ncep.noaa.gov/pub/data/nccf/com/gfs/"
13 |             "prod/gfs.20220426/00/wave/gridded/"
14 |             f"gfswave.t00z.atlocn.0p16.f{time:03d}.grib2"
15 |         )
16 | 
17 | 
18 | # A GFS Wave forecast is every hour for 384 hours
19 | time_concat_dim = ConcatDim("time", range(384), nitems_per_file=1)
20 | 
21 | pattern = FilePattern(make_url, time_concat_dim)
22 | 
23 | def process_input(ds, filename):
24 | 
25 |     ds = ds.expand_dims('time')
26 |     return ds
27 | 
28 | recipe = XarrayZarrRecipe(file_pattern=pattern,  
29 |                           process_input=process_input,
30 |                           target_chunks={'time': 1, 'latitude':166, 'longitude':151 },
31 |                           xarray_open_kwargs={'engine': 'cfgrib'},
32 |                           copy_input_to_local_file=True
33 |                          )
34 | 


--------------------------------------------------------------------------------
/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | # Use the latest 2.1 version of CircleCI pipeline process engine.
 2 | # See: https://circleci.com/docs/2.0/configuration-reference
 3 | version: 2.1
 4 | 
 5 | orbs:
 6 |   aws-ecr: circleci/aws-ecr@8.1.1
 7 | 
 8 | # Invoke jobs via workflows
 9 | # See: https://circleci.com/docs/2.0/configuration-reference/#workflows
10 | workflows:
11 |     build_and_push_image:
12 |         jobs:
13 |           - aws-ecr/build-and-push-image:
14 |               aws-access-key-id: ACCESS_KEY_ID
15 |               aws-cli-version: latest
16 |               aws-secret-access-key: SECRET_ACCESS_KEY
17 |               context: .
18 |               create-repo: false
19 |               dockerfile: dockerfile
20 |               executor: aws-ecr/default
21 |               extra-build-args: '--compress'
22 |               no-output-timeout: 20m
23 |               path: .
24 |               platform: linux/amd64
25 |               public-registry: true
26 |               push-image: true
27 |               region: us-east-1
28 |               registry-id: REGISTRY_ID
29 |               repo: restful-grids
30 |               repo-scan-on-push: false
31 |               skip-when-tags-exist: false
32 |               tag: 'dev'
33 |               filters:
34 |                 branches:
35 |                   only: main


--------------------------------------------------------------------------------
/project-overview.md:
--------------------------------------------------------------------------------
 1 | # Goals for API 
 2 | 
 3 | ## Resources
 4 | - AWS Data - s3://ioos-code-sprint-2022
 5 | - Github Repo - https://github.com/asascience/restful-grids
 6 | - FVCOM Forecast from UMASS - https://gmri-research-data.nyc3.digitaloceanspaces.com/IOOS-code-sprint/fvcom_gom3_2022-04-10.nc
 7 | - Wave Watch 3 from Bedford Institute of Oceanography - https://gmri-research-data.nyc3.digitaloceanspaces.com/IOOS-code-sprint/ww3_72_east_coast_2022041112.nc
 8 | 
 9 | ## Current Solutions
10 | * Point to Zarr file --> user subsets
11 | 
12 | ## Goals
13 | * Getting a single point
14 | * Getting a bounding box
15 | * Query using time
16 | * Optimize data retrieval for temporal data
17 |     * Chunk by space
18 |     * Chunk by time
19 |     * Chunk by space + time
20 | 
21 | ## First Steps
22 | * What does it take to subset a point of data from a cloud hosted dataset?
23 |     * What dataset?
24 |         * GFS!!
25 |         * https://registry.opendata.aws/noaa-gfs-bdp-pds/#usageexamples
26 |     * Wave Watch + Buoy
27 | * Consider OGC API integration with xarray; see where pain points are
28 | * Try pygeoapi, but know there are existing issues
29 | * Testing 
30 | 
31 | 
32 | ## Existing solutions
33 | * OGC PyGEOAPI - https://pygeoapi.io
34 | * stack STAC - https://stackstac.readthedocs.io/en/latest/basic.html
35 | * Xpublish - https://github.com/xarray-contrib/xpublish
36 | * OGC Environment Data Retrieval - https://github.com/opengeospatial/ogcapi-environmental-data-retrieval
37 | * NetCDF subset - https://www.unidata.ucar.edu/software/tds/current/reference/NetcdfSubsetServiceReference.html
38 | * ERDDAP
39 | 
40 | ## Defining IO
41 | * In - zarr dataset
42 | * Out
43 |     * Json, binary, or text
44 |     * Provide a tile
45 | 
46 | ## Datasets
47 | * NECOFS
48 | 


--------------------------------------------------------------------------------
/xpublish/dynamic_xpublish.py:
--------------------------------------------------------------------------------
 1 | # Testing accessing datasets based on lazily loaded Pangeo Forge Zarr data
 2 | 
 3 | import fsspec
 4 | import requests
 5 | import xarray as xr
 6 | import xpublish
 7 | from xpublish import rest
 8 | 
 9 | 
10 | recipe_runs_url = "https://api.pangeo-forge.org/recipe_runs/"
11 | 
12 | 
13 | def pangeo_forge_datasets():
14 |     res = requests.get(recipe_runs_url)
15 |     return res.json()
16 | 
17 | 
18 | def pangeo_forge_with_data():
19 |     datasets = pangeo_forge_datasets()
20 |     return [r for r in datasets if r["dataset_public_url"]]
21 | 
22 | 
23 | def pangeo_forge_dataset_map():
24 |     datasets = pangeo_forge_with_data()
25 |     return {r["recipe_id"]: r["dataset_public_url"] for r in datasets}
26 | 
27 | 
28 | def get_pangeo_forge_dataset(dataset_id: str) -> xr.Dataset:
29 |     dataset_map = pangeo_forge_dataset_map()
30 |     zarr_url = dataset_map[dataset_id]
31 | 
32 |     mapper = fsspec.get_mapper(zarr_url)
33 |     ds = xr.open_zarr(mapper, consolidated=True)
34 |     return ds
35 | 
36 | 
37 | class DynamicRest(xpublish.Rest):
38 |     def __init__(self, routers=None, cache_kws=None, app_kws=None):
39 |         self._get_dataset_func = get_pangeo_forge_dataset
40 |         self._datasets = list(pangeo_forge_dataset_map().keys())
41 |         dataset_route_prefix = "/datasets/{dataset_id}"
42 | 
43 |         self._app_routers = rest._set_app_routers(routers, dataset_route_prefix)
44 | 
45 |         self._app = None
46 |         self._app_kws = {}
47 |         if app_kws is not None:
48 |             self._app_kws.update(app_kws)
49 | 
50 |         self._cache = None
51 |         self._cache_kws = {"available_bytes": 1e6}
52 |         if cache_kws is not None:
53 |             self._cache_kws.update(cache_kws)
54 | 
55 | 
56 | dynamic = DynamicRest()
57 | dynamic.serve(log_level="trace", port=9005)
58 | 


--------------------------------------------------------------------------------
/xpublish/demo_rest.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Load Pangeo-Forge and our datasets
 3 | """
 4 | import fsspec
 5 | import requests
 6 | import xarray as xr
 7 | import cf_xarray
 8 | import xpublish
 9 | from xpublish import rest
10 | 
11 | 
12 | recipe_runs_url = "https://api.pangeo-forge.org/recipe_runs/"
13 | 
14 | 
15 | def pangeo_forge_datasets_map():
16 |     res = requests.get(recipe_runs_url)
17 |     datasets = res.json()
18 |     datasets = [r for r in datasets if r["dataset_public_url"]]
19 |     return {r["recipe_id"]: r["dataset_public_url"] for r in datasets}
20 | 
21 | 
22 | def dataset_map():
23 |     datasets = pangeo_forge_datasets_map()
24 |     datasets["ww3"] = "ww3-stub"
25 |     datasets["gfs"] = "https://ioos-code-sprint-2022.s3.amazonaws.com/gfs-wave.zarr"
26 | 
27 |     return datasets
28 | 
29 | 
30 | def get_dataset(dataset_id: str) -> xr.Dataset:
31 |     if dataset_id == "ww3":
32 |         return xr.open_dataset("../datasets/ww3_72_east_coast_2022041112.nc")
33 | 
34 |     zarr_url = dataset_map()[dataset_id]
35 | 
36 |     mapper = fsspec.get_mapper(zarr_url)
37 |     ds = xr.open_zarr(mapper, consolidated=True)
38 | 
39 |     if "X" not in ds.cf.axes:
40 |         x_axis = ds[ds.cf.coordinates["longitude"][0]]
41 |         x_axis.attrs["axis"] = "X"
42 |     if "Y" not in ds.cf.axes:
43 |         y_axis = ds[ds.cf.coordinates["latitude"][0]]
44 |         y_axis.attrs["axis"] = "Y"
45 | 
46 |     return ds
47 | 
48 | 
49 | class DemoRest(xpublish.Rest):
50 |     def __init__(self, routers=None, cache_kws=None, app_kws=None):
51 |         self._get_dataset_func = get_dataset
52 |         self._datasets = list(dataset_map().keys())
53 |         dataset_route_prefix = "/datasets/{dataset_id}"
54 | 
55 |         self._app_routers = rest._set_app_routers(routers, dataset_route_prefix)
56 | 
57 |         self._app = None
58 |         self._app_kws = {}
59 |         if app_kws is not None:
60 |             self._app_kws.update(app_kws)
61 | 
62 |         self._cache = None
63 |         self._cache_kws = {"available_bytes": 1e6}
64 |         if cache_kws is not None:
65 |             self._cache_kws.update(cache_kws)
66 | 


--------------------------------------------------------------------------------
/xpublish/tile_router.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import logging
 3 | from typing import Dict, Optional
 4 | 
 5 | import numpy as np
 6 | import mercantile
 7 | import xarray as xr
 8 | from xpublish.dependencies import get_dataset
 9 | from fastapi import APIRouter, Depends, Response
10 | from rasterio.enums import Resampling
11 | from rasterio.transform import Affine
12 | from PIL import Image
13 | from matplotlib import cm
14 | 
15 | # rioxarray and cf_xarray will show as not being used but its necesary for enabling rio extensions for xarray
16 | import cf_xarray
17 | import rioxarray
18 | 
19 | 
20 | logger = logging.getLogger("api")
21 | 
22 | tile_router = APIRouter()
23 | 
24 | @tile_router.get('/{parameter}/{t}/{z}/{x}/{y}', response_class=Response)
25 | def get_image_tile(parameter: str, t: str, z: int, x: int, y: int, size: int = 256, cmap: str = None, color_range: str = None, dataset: xr.Dataset = Depends(get_dataset)):
26 |     if not dataset.rio.crs:
27 |         dataset = dataset.rio.write_crs(4326)
28 |     ds = dataset.squeeze()
29 |     bbox = mercantile.xy_bounds(x, y, z)
30 | 
31 |     dim = (2 ** z) * size
32 |     transform = Affine.translation(bbox.left, bbox.top) * Affine.scale(
33 |        (20037508.342789244 * 2) / float(dim), -(20037508.342789244 * 2) / float(dim)
34 |     )
35 | 
36 |     resampled_data = ds[parameter].rio.reproject(
37 |         'EPSG:3857', 
38 |         shape=(size, size), 
39 |         resampling=Resampling.nearest, 
40 |         transform=transform,
41 |     )
42 | 
43 |     # This is an image, so only use the timestepm that was requested
44 |     resampled_data = resampled_data.cf.sel({'T': t}).squeeze()
45 |     
46 |     # if the user has supplied a color range, use it. Otherwise autoscale
47 |     if color_range is not None:
48 |         color_range = [float(x) for x in color_range.split(',')]
49 |         min_value = color_range[0]
50 |         max_value = color_range[1]
51 |     else:
52 |         min_value = float(ds[parameter].min())
53 |         max_value = float(ds[parameter].max())
54 | 
55 |     ds_scaled = (resampled_data - min_value) / (max_value - min_value)
56 | 
57 |     # Let user pick cm from here https://predictablynoisy.com/matplotlib/gallery/color/colormap_reference.html#sphx-glr-gallery-color-colormap-reference-py
58 |     # Otherwise default to rainbow
59 |     im = Image.fromarray(np.uint8(cm.get_cmap(cmap)(ds_scaled)*255))
60 | 
61 |     image_bytes = io.BytesIO()
62 |     im.save(image_bytes, format='PNG')
63 |     image_bytes = image_bytes.getvalue()
64 | 
65 |     return Response(content=image_bytes, media_type='image/png')
66 | 


--------------------------------------------------------------------------------
/xpublish/main.py:
--------------------------------------------------------------------------------
 1 | # Run with `uvicorn --port 9005 main:app --reload`
 2 | from xpublish.routers import base_router, zarr_router
 3 | from fastapi.staticfiles import StaticFiles
 4 | 
 5 | from demo_rest import DemoRest
 6 | from edr_router import edr_router
 7 | from tree_router import tree_router
 8 | from dap_router import dap_router
 9 | from tile_router import tile_router
10 | from wms_router import wms_router
11 | 
12 | 
13 | rest = DemoRest(
14 |     routers=[
15 |         (base_router, {"tags": ["info"]}),
16 |         (edr_router, {"tags": ["edr"], "prefix": "/edr"}),
17 |         (tree_router, {"tags": ["datatree"], "prefix": "/tree"}),
18 |         (dap_router, {"tags": ["opendap"], "prefix": "/opendap"}),
19 |         (tile_router, {"tags": ["image"], "prefix": "/tile"}),
20 |         (wms_router, {"tags": ["wms"], "prefix": "/wms"}),
21 |         (zarr_router, {"tags": ["zarr"], "prefix": "/zarr"}),
22 |     ]
23 | )
24 | 
25 | app = rest.app
26 | 
27 | app.description = "Hacking on xpublish during the IOOS Code Sprint"
28 | app.title = "IOOS xpublish"
29 | 
30 | edr_description = """
31 | OGC Environmental Data Retrieval API
32 | 
33 | Currently the position query is supported, which takes a single Well Known Text point.
34 | """
35 | 
36 | datatree_description = """
37 | Dynamic generation of Zarr ndpyramid/Datatree for access from webmaps.
38 | 
39 | - [carbonplan/maps](https://carbonplan.org/blog/maps-library-release)
40 | - [xpublish#92](https://github.com/xarray-contrib/xpublish/issues/92)
41 | """
42 | 
43 | zarr_description = """
44 | Zarr access to NetCDF datasets.
45 | 
46 | Load by using an fsspec mapper
47 | 
48 | ```python
49 | mapper = fsspec.get_mapper("/datasets/{dataset_id}/zarr/")
50 | ds = xr.open_zarr(mapper, consolidated=True)
51 | ```
52 | """
53 | 
54 | app.openapi_tags = [
55 |     {"name": "info"},
56 |     {
57 |         "name": "edr",
58 |         "description": edr_description,
59 |         "externalDocs": {
60 |             "description": "OGC EDR Reference",
61 |             "url": "https://ogcapi.ogc.org/edr/",
62 |         },
63 |     },
64 |     {"name": "image", "description": "WMS-like image generation"},
65 |     {"name": "datatree", "description": datatree_description},
66 |     {"name": "opendap", "description": "OpenDAP access"},
67 |     {"name": "zarr", "description": zarr_description},
68 | ]
69 | 
70 | app.mount("/static", StaticFiles(directory="static"), name="static")
71 | 
72 | if __name__ == "__main__":
73 |     import uvicorn
74 | 
75 |     # When run directly, run in debug mode
76 |     uvicorn.run(
77 |         "main:app",
78 |         port=9005,
79 |         reload=True,
80 |         log_level="debug",
81 |         debug=True,
82 |     )
83 | 


--------------------------------------------------------------------------------
/pygeoapi/config.yaml:
--------------------------------------------------------------------------------
 1 | server:
 2 |   bind:
 3 |     host: 0.0.0.0
 4 |     port: 5002
 5 |   cors: true
 6 |   language: en-US
 7 |   manager:
 8 |     connection: /tmp/pygeoapi-process-manager.db
 9 |     name: TinyDB
10 |     output_dir: /tmp/
11 |   map:
12 |     attribution:
13 |       <a href="https://wikimediafoundation.org/wiki/Maps_Terms_of_Use">Wikimedia
14 |       maps</a> | Map data &copy; <a href="https://openstreetmap.org/copyright">OpenStreetMap
15 |       contributors</a>
16 |     url: https://maps.wikimedia.org/osm-intl/{z}/{x}/{y}.png
17 |   url: http://localhost:5002
18 | logging:
19 |   level: DEBUG
20 | metadata:
21 |   contact:
22 |     address: 195 New Hampshire Ave, Suite 240
23 |     city: Portsmouth
24 |     country: United States
25 |     email: tom@neracoos.org
26 |     name: Shyka, Tom
27 |     phone: +01-603-319-1785
28 |     position: Product & Engagement Manager
29 |     postalcode: 03801
30 |     role: pointOfContact
31 |     stateorprovince: New Hampshire
32 |     url: http://neracoos.org
33 |   identification:
34 |     description: OGC APIs for NERACOOS services
35 |     keywords:
36 |       - geospatial
37 |       - data
38 |       - api
39 |       - oceanographic
40 |     keywords_type: theme
41 |     terms_of_service: https://creativecommons.org/licenses/by/4.0/
42 |     title: data.neracoos.org
43 |     url: http://neracoos.org
44 |   license:
45 |     name: CC-BY 4.0 license
46 |     url: https://creativecommons.org/licenses/by/4.0/
47 |   provider:
48 |     name: NERACOOS
49 |     url: https://neracoos.org
50 | resources:
51 |   bio_ww3_east_coast_latest:
52 |     description:
53 |       Bedford Institute of Oceanography Wave Watch 3 72 hour forecast for
54 |       the East Coast
55 |     extents:
56 |       spatial:
57 |         bbox:
58 |           - -93.0
59 |           - 20.0
60 |           - -55.0
61 |           - 55.0
62 |         crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84
63 |       temporal:
64 |         begin: 2022-04-11 12:00:00
65 |         end: 2022-04-14 12:00:00
66 |     keywords:
67 |       - forecast
68 |       - wave
69 |     links:
70 |       - href: https://data.neracoos.org/erddap/griddap/WW3_72_EastCoast.html
71 |         rel: service-doc
72 |         title: WW3_72_EastCoast on ERDDAP
73 |         type: text/html
74 |     providers:
75 |       - data: ../datasets/ww3_72_east_coast_2022041112.nc
76 |         format:
77 |           mimetype: application/x-netcdf
78 |           name: NetCDF
79 |         name: xarray-edr
80 |         time_field: time
81 |         type: edr
82 |         x_field: longitude
83 |         y_field: latitude
84 |     title:
85 |       Bedford Institute of Oceanography Wave Watch 3 72 hour forecast for the East
86 |       Coast
87 |     type: collection
88 | 


--------------------------------------------------------------------------------
/xpublish/dap_router.py:
--------------------------------------------------------------------------------
  1 | """
  2 | OpenDAP router
  3 | """
  4 | import logging
  5 | import urllib
  6 | 
  7 | import cachey
  8 | from fastapi import APIRouter, Depends, Request, HTTPException
  9 | from fastapi.responses import StreamingResponse
 10 | import numpy as np
 11 | import opendap_protocol as dap
 12 | import xarray as xr
 13 | from xpublish.dependencies import get_cache, get_dataset
 14 | 
 15 | 
 16 | logger = logging.getLogger("uvicorn")
 17 | 
 18 | 
 19 | dap_router = APIRouter()
 20 | 
 21 | 
 22 | dtype_dap = {
 23 |     np.ubyte: dap.Byte,
 24 |     np.int16: dap.Int16,
 25 |     np.uint16: dap.UInt16,
 26 |     np.int32: dap.Int32,
 27 |     np.uint32: dap.UInt32,
 28 |     np.float32: dap.Float32,
 29 |     np.float64: dap.Float64,
 30 |     np.str_: dap.String,
 31 |     # Not a direct mapping
 32 |     np.int64: dap.Float64,
 33 | }
 34 | dtype_dap = {np.dtype(k): v for k, v in dtype_dap.items()}
 35 | 
 36 | 
 37 | def dap_dtype(da: xr.DataArray):
 38 |     """ Return a DAP type for the xr.DataArray """
 39 |     try:
 40 |         return dtype_dap[da.dtype]
 41 |     except KeyError as e:
 42 |         logger.warning(
 43 |             f"Unable to match dtype for {da.name}. Going to assume string will work for now... ({e})"
 44 |         )
 45 |         return dap.String
 46 | 
 47 | 
 48 | def dap_dimension(da: xr.DataArray) -> dap.Array:
 49 |     """ Transform an xarray dimension into a DAP dimension """
 50 |     encoded_da = xr.conventions.encode_cf_variable(da)
 51 |     dim = dap.Array(name=da.name, data=encoded_da.values, dtype=dap_dtype(encoded_da))
 52 | 
 53 |     for k, v in encoded_da.attrs.items():
 54 |         dim.append(dap.Attribute(name=k, value=v, dtype=dap.String))
 55 | 
 56 |     return dim
 57 | 
 58 | 
 59 | def dap_grid(da: xr.DataArray, dims: dict[str, dap.Array]) -> dap.Grid:
 60 |     """ Transform an xarray DataArray into a DAP Grid"""
 61 |     data_array = dap.Grid(
 62 |         name=da.name,
 63 |         data=da.astype(da.encoding["dtype"]).data,
 64 |         dtype=dap_dtype(da),
 65 |         dimensions=[dims[dim] for dim in da.dims],
 66 |     )
 67 | 
 68 |     for k, v in da.attrs.items():
 69 |         data_array.append(dap.Attribute(name=k, value=v, dtype=dap.String))
 70 | 
 71 |     return data_array
 72 | 
 73 | 
 74 | def dap_dataset(ds: xr.Dataset, name: str) -> dap.Dataset:
 75 |     """ Create a DAP Dataset for an xarray Dataset """
 76 |     dataset = dap.Dataset(name=name)
 77 | 
 78 |     dims = {}
 79 |     for dim in ds.dims:
 80 |         dims[dim] = dap_dimension(ds[dim])
 81 | 
 82 |     dataset.append(*dims.values())
 83 | 
 84 |     for var in ds.variables:
 85 |         if var not in ds.dims:
 86 |             data_array = dap_grid(ds[var], dims)
 87 |             dataset.append(data_array)
 88 | 
 89 |     for k, v in ds.attrs.items():
 90 |         dataset.append(dap.Attribute(name=k, value=v, dtype=dap.String))
 91 | 
 92 |     return dataset
 93 | 
 94 | 
 95 | def get_dap_dataset(
 96 |     dataset_id: str,
 97 |     ds: xr.Dataset = Depends(get_dataset),
 98 |     cache: cachey.Cache = Depends(get_cache),
 99 | ):
100 |     cache_key = f"opendap_dataset_{dataset_id}"
101 |     dataset = cache.get(cache_key)
102 | 
103 |     if dataset is None:
104 |         dataset = dap_dataset(ds, dataset_id)
105 | 
106 |         cache.put(cache_key, dataset, 99999)
107 | 
108 |     return dataset
109 | 
110 | 
111 | @dap_router.get(".dds")
112 | def dds_response(request: Request, dataset: dap.Dataset = Depends(get_dap_dataset)):
113 |     constraint = request.url.components[3]
114 |     return StreamingResponse(
115 |         dataset.dds(constraint=constraint), media_type="text/plain"
116 |     )
117 | 
118 | 
119 | @dap_router.get(".das")
120 | def das_response(request: Request, dataset: dap.Dataset = Depends(get_dap_dataset)):
121 |     constraint = request.url.components[3]
122 |     return StreamingResponse(
123 |         dataset.das(constraint=constraint), media_type="text/plain"
124 |     )
125 | 
126 | 
127 | @dap_router.get(".dods")
128 | def dods_response(request: Request, dataset: dap.Dataset = Depends(get_dap_dataset)):
129 |     constraint = request.url.components[3]
130 |     return StreamingResponse(
131 |         dataset.dods(constraint=constraint), media_type="application/octet-stream"
132 |     )
133 | 


--------------------------------------------------------------------------------
/xpublish/test_routers.py:
--------------------------------------------------------------------------------
  1 | from atexit import register
  2 | from cmath import isnan
  3 | from logging import getLogger
  4 | import logging
  5 | import re
  6 | from typing import Optional
  7 | import io
  8 | 
  9 | from fastapi import APIRouter, Body, Depends, HTTPException, Query, Response as FastApiResponse
 10 | from fastapi.responses import StreamingResponse
 11 | from pydantic import BaseModel, Field
 12 | from requests import Response
 13 | import xarray as xr
 14 | import cf_xarray as cfxr
 15 | import xpublish
 16 | from xpublish.dependencies import get_dataset
 17 | from xpublish.routers import base_router, zarr_router
 18 | from rasterio.enums import Resampling
 19 | from PIL import Image
 20 | from matplotlib import cm
 21 | import numpy as np
 22 | # rioxarray will show as not being used but its necesarry for enabling rio extensions for xarray
 23 | import rioxarray
 24 | 
 25 | # logger = logging.getLogger(__name__)
 26 | logger = logging.getLogger("fastapi")
 27 | 
 28 | ds = xr.open_dataset("../datasets/ww3_72_east_coast_2022041112.nc")
 29 | # We need a coordinate system to tile 
 30 | ds = ds.rio.write_crs(4326)
 31 | 
 32 | meanrouter = APIRouter()
 33 | 
 34 | 
 35 | @meanrouter.get("/{var_name}/mean")
 36 | def get_mean(var_name: str, dataset: xr.Dataset = Depends(get_dataset)):
 37 |     if var_name not in dataset.variables:
 38 |         raise HTTPException(
 39 |             status_code=404, detail=f"Variable `{var_name}` not found in dataset"
 40 |         )
 41 | 
 42 |     return float(ds[var_name].mean())
 43 | 
 44 | 
 45 | edrrouter = APIRouter()
 46 | 
 47 | 
 48 | class EDRQuery(BaseModel):
 49 |     coords: str = Field(..., title="Point in WKT format")
 50 |     z: Optional[str] = None
 51 |     datetime: Optional[str] = None
 52 |     parameters: Optional[str] = None
 53 |     crs: Optional[str] = None
 54 |     f: Optional[str] = None
 55 | 
 56 |     @property
 57 |     def point(self):
 58 |         from shapely import wkt
 59 | 
 60 |         return wkt.loads(self.coords)
 61 | 
 62 | 
 63 | def edr_query_params(
 64 |     coords: str = Query(
 65 |         ..., title="WKT Coordinates", description="Well Known Text Coordinates"
 66 |     ),
 67 |     z: Optional[str] = None,
 68 |     datetime: Optional[str] = None,
 69 |     parameters: Optional[str] = None,
 70 |     crs: Optional[str] = None,
 71 |     f: Optional[str] = None,
 72 | ):
 73 |     return EDRQuery(
 74 |         coords=coords, z=z, datetime=datetime, parameters=parameters, crs=crs, f=f
 75 |     )
 76 | 
 77 | 
 78 | # POINT(-69.35 43.72)
 79 | 
 80 | 
 81 | @edrrouter.get("/position")
 82 | def get_position(
 83 |     query: EDRQuery = Depends(edr_query_params),
 84 |     dataset: xr.Dataset = Depends(get_dataset),
 85 | ):
 86 |     ds = dataset.cf.sel(X=query.point.x, Y=query.point.y, method="nearest")
 87 | 
 88 |     if query.parameters:
 89 |         ds = ds[query.parameters.split(",")]
 90 | 
 91 |     return to_covjson(ds)
 92 | 
 93 | 
 94 | def to_covjson(ds: xr.Dataset):
 95 |     covjson = {
 96 |         "type": "Coverage",
 97 |         "domainType": "Grid",
 98 |         "domain": {"axes": {}},
 99 |         "parameters": {},
100 |         "ranges": {},
101 |     }
102 | 
103 |     for var in ds.variables:
104 |         if var not in ds.coords:
105 |             da = ds[var]
106 | 
107 |             parameter = {"type": "Parameter"}
108 | 
109 |             covjson["parameters"][var] = parameter
110 | 
111 |             cov_range = {
112 |                 "type": "NdArray",
113 |                 "dataType": str(da.dtype),
114 |                 "axisNames": da.dims,
115 |                 "shape": da.shape,
116 |                 "values": da.values.ravel().tolist(),
117 |             }
118 | 
119 |             covjson["ranges"][var] = cov_range
120 | 
121 |     return covjson
122 | 
123 | 
124 | image_router = APIRouter()
125 | 
126 | @image_router.get('/image', response_class=Response)
127 | async def get_image(bbox: str, width: int, height: int, var: str, cmap: Optional[str]=None, dataset: xr.Dataset = Depends(get_dataset)):
128 |     xmin, ymin, xmax, ymax = [float(x) for x in bbox.split(',')]
129 |     q = ds.sel({'latitude': slice(ymin, ymax), 'longitude': slice(xmin, xmax)})
130 | 
131 |     resampled_data = q[var][0][0].rio.reproject(
132 |         ds.rio.crs, 
133 |         shape=(width, height), 
134 |         resampling=Resampling.bilinear,
135 |     )
136 | 
137 |     # This is autoscaling, we can add more params to make this user controlled 
138 |     # if not min_value: 
139 |     min_value = resampled_data.min()
140 |     # if not max_value:
141 |     max_value = resampled_data.max()
142 | 
143 |     ds_scaled = (resampled_data - min_value) / (max_value - min_value)
144 | 
145 |     # Let user pick cm from here https://predictablynoisy.com/matplotlib/gallery/color/colormap_reference.html#sphx-glr-gallery-color-colormap-reference-py
146 |     # Otherwise default to rainbow
147 |     if not cmap:
148 |         cmap = 'rainbow'
149 |     im = Image.fromarray(np.uint8(cm.get_cmap(cmap)(ds_scaled)*255))
150 | 
151 |     image_bytes = io.BytesIO()
152 |     im.save(image_bytes, format='PNG')
153 |     image_bytes = image_bytes.getvalue()
154 | 
155 |     return FastApiResponse(content=image_bytes, media_type='image/png')
156 | 
157 | 
158 | # router order is important
159 | rest_collection = xpublish.Rest(
160 |     {"ww3": ds, "bio": ds}, routers=[base_router, edrrouter, meanrouter, image_router, zarr_router]
161 | )
162 | rest_collection.serve(log_level="trace", port=9005)
163 | 


--------------------------------------------------------------------------------
/xpublish/static/map.js:
--------------------------------------------------------------------------------
  1 | import * as zarr from 'https://cdn.skypack.dev/@manzt/zarr-lite';
  2 | 
  3 | mapboxgl.accessToken = 'pk.eyJ1IjoibWF0dC1pYW5udWNjaS1ycHMiLCJhIjoiY2wyaHh3cnZsMGk3YzNlcWg3bnFhcG1yZSJ9.L47O4NS5aFlWgCX0uUvgjA';
  4 | 
  5 | // From https://github.com/notenoughneon/await-semaphore/blob/master/index.ts
  6 | export class Semaphore {
  7 | 
  8 |     constructor(count) {
  9 |         this.count = count;
 10 |         this.tasks = [];
 11 |     }
 12 | 
 13 |     sched() {
 14 |         if (this.count > 0 && this.tasks.length > 0) {
 15 |             this.count--;
 16 |             let next = this.tasks.shift();
 17 |             if (next === undefined) {
 18 |                 throw "Unexpected undefined value in tasks list";
 19 |             }
 20 | 
 21 |             next();
 22 |         }
 23 |     }
 24 | 
 25 |     acquire() {
 26 |         return new Promise((res, _) => {
 27 |             var task = () => {
 28 |                 var released = false;
 29 |                 res(() => {
 30 |                     if (!released) {
 31 |                         released = true;
 32 |                         this.count++;
 33 |                         this.sched();
 34 |                     }
 35 |                 });
 36 |             };
 37 |             this.tasks.push(task);
 38 |            
 39 |             setTimeout(this.sched.bind(this), 0);
 40 |             //setImmediate(this.sched.bind(this));
 41 |         });
 42 |     }
 43 | 
 44 |     use(f) {
 45 |         return this.acquire()
 46 |         .then(release => {
 47 |             return f()
 48 |             .then((res) => {
 49 |                 release();
 50 |                 return res;
 51 |             })
 52 |             .catch((err) => {
 53 |                 release();
 54 |                 throw err;
 55 |             });
 56 |         });
 57 |     }
 58 | }
 59 | 
 60 | export class Mutex extends Semaphore {
 61 |     constructor() {
 62 |         super(1);
 63 |     }
 64 | }
 65 | 
 66 | 
 67 | class ZarrTileSource {
 68 | 
 69 |     constructor({ rootUrl, variable, initialTimestep, tileSize = 256, minZoom = 0, maxZoom = 10, bounds }) {
 70 |         this.type = 'custom';
 71 |         this.tileSize = tileSize;
 72 |         this.minZoom = minZoom; 
 73 |         this.maxZoom = maxZoom;
 74 |         this.bounds = bounds;
 75 | 
 76 |         this.rootUrl = rootUrl + `/${minZoom},${maxZoom}/${tileSize}`;
 77 |         this.variable = variable;
 78 |         this._timeIndex = initialTimestep;
 79 |     }
 80 | 
 81 |     /**
 82 |      * Get the current time index
 83 |      */
 84 |     get timeIndex() {
 85 |         return this._timeIndex;
 86 |     }
 87 | 
 88 |     /**
 89 |      * Set the time index to the given value.
 90 |      * @param {number} timeIndex
 91 |      */
 92 |     set timeIndex(newIndex) {
 93 |         this._timeIndex = newIndex;
 94 |         // TODO: For now the reload has to be triggered from user space
 95 |     }
 96 | 
 97 |     getLevelKey(level) {
 98 |         return `/${level}/${this.variable}`;
 99 |     }
100 | 
101 |     async getZarrArray(level) {
102 |         let levelKey = this.getLevelKey(level);
103 | 
104 |         const array = await this.zarrayMutex.use(async () => {
105 |             let array = this.arrayCache[levelKey];
106 | 
107 |             if (!array) {
108 |                 array = await zarr.openArray({store: this.store, path: levelKey});
109 |                 this.arrayCache[levelKey] = array;
110 |             }
111 |             return array;
112 |         });
113 | 
114 |         return array;
115 |     }
116 | 
117 |     async onAdd(map) {
118 |         this.store = new zarr.HTTPStore(this.rootUrl);
119 |         this.zarrayMutex = new Mutex();
120 |         this.chunkCache = {};
121 |         this.arrayCache = {};
122 |     }
123 | 
124 |     async loadTile({ x, y, z }) {
125 |         const array = await this.getZarrArray(z);
126 |         const chunkKey = `0.0.${x}.${y}`;
127 | 
128 |         let rawChunkData = this.chunkCache[chunkKey];
129 |         if (!rawChunkData) {
130 |             rawChunkData = await array.getRawChunk(chunkKey);
131 |             this.chunkCache[chunkKey] = rawChunkData;
132 |         }
133 | 
134 |         const width = rawChunkData.shape[rawChunkData.shape.length - 2];
135 |         const height = rawChunkData.shape[rawChunkData.shape.length - 1];
136 |         const tileSizeBytes = width * height;
137 |         const tileSliceStart = this._timeIndex * tileSizeBytes;
138 |         const tileSliceEnd = (this._timeIndex + 1) * tileSizeBytes;
139 |         const rawTileData = rawChunkData.data.slice(tileSliceStart, tileSliceEnd);
140 | 
141 |         const colorData = new Uint8ClampedArray(4 * width * height);
142 |         for (let i = 0; i < rawTileData.length; i++) {
143 |             const value = rawTileData[i];
144 |             const r = (value / 5.0) * 255;
145 |             colorData[4 * i] = r;
146 |             colorData[4 * i + 1] = 0;
147 |             colorData[4 * i + 2] = 0;
148 |             colorData[4 * i + 3] = isNaN(value) ? 0 : 255;
149 |         }
150 | 
151 |         return new ImageData(colorData, width);
152 |     };
153 | }
154 | 
155 | const map = new mapboxgl.Map({
156 |     container: document.getElementById('map'),
157 |     style: 'mapbox://styles/mapbox/dark-v8',
158 |     center: [-71, 40],
159 |     zoom: 6,
160 | });
161 | 
162 | map.on('load', () => {
163 |     map.addSource('ww3-wms', {
164 |         type: 'raster',
165 |         tileSize: 512, 
166 |         tiles: [
167 |             '/datasets/ww3/wms/?service=WMS&version=1.3.0&request=GetMap&layers=hs&crs=EPSG:3857&bbox={bbox-epsg-3857}&width=512&height=512&styles=raster/rainbow&colorscalerange=0,5&time=2022-04-12T21:00:00.00',
168 |         ]
169 |     });
170 | 
171 |     map.addLayer({
172 |         id: 'ww3-wms',
173 |         source: 'ww3-wms',
174 |         type: 'raster',
175 |         paint: {
176 |             'raster-opacity': 1.0,
177 |             'raster-fade-duration': 0,
178 |         },
179 |     });
180 | 
181 |     // map.addSource('ww3-zarr', new ZarrTileSource({
182 |     //     rootUrl: 'http://localhost:9005/datasets/ww3/tree',
183 |     //     variable: 'hs',
184 |     //     initialTimestep: 0,
185 |     //     tileSize: 256,
186 |     //     bounds: [-93.0, 20.0, -55.0, 55.0],
187 |     // }));
188 | 
189 |     // map.addLayer({
190 |     //     id: 'ww3-zarr',
191 |     //     source: 'ww3-zarr',
192 |     //     type: 'raster',
193 |     //     paint: {
194 |     //         'raster-opacity': 1.0,
195 |     //         'raster-fade-duration': 0,
196 |     //     },
197 |     // });
198 | 
199 |     // const zarrSource = map.getSource('ww3-zarr');
200 | 
201 |     // let timestepSlider = document.getElementById('timestep-slider');
202 |     // timestepSlider.oninput = e => {
203 |     //     const newTimeIndex = e.target.valueAsNumber;
204 |     //     zarrSource._implementation.timeIndex = newTimeIndex;
205 |     //     zarrSource.load();
206 |     // }
207 | 
208 | });


--------------------------------------------------------------------------------
/xpublish/dynamic_xpublish.md:
--------------------------------------------------------------------------------
  1 | # Dynamically loading datasets with xpublish
  2 | 
  3 | Currently [`xpublish.Rest`](https://xpublish.readthedocs.io/en/latest/generated/xpublish.Rest.html) requires datasets to be loaded ahead of time, but with a little subclassing, it's possible to load the datasets on demand.
  4 | 
  5 | ## Borrowing the Pangeo-Forge API
  6 | 
  7 | We attempted this with the [Pangeo-Forge](https://pangeo-forge.org/) recipe_runs API: https://api.pangeo-forge.org/recipe_runs/
  8 | 
  9 | ```json
 10 | [
 11 |   {
 12 |     "recipe_id": "noaa-oisst-avhrr-only",
 13 |     "bakery_id": 1,
 14 |     "feedstock_id": 1,
 15 |     "head_sha": "c975c63bec53029fcb299bbd98eac2abb43d2cfe",
 16 |     "version": "0.0",
 17 |     "started_at": "2022-03-04T13:27:43",
 18 |     "completed_at": "2022-03-04T13:37:43",
 19 |     "conclusion": "success",
 20 |     "status": "completed",
 21 |     "is_test": true,
 22 |     "dataset_type": "zarr",
 23 |     "dataset_public_url": "https://ncsa.osn.xsede.org/Pangeo/pangeo-forge-test/prod/recipe-run-5/pangeo-forge/staged-recipes/noaa-oisst-avhrr-only.zarr",
 24 |     "message": "{\"flow_id\": \"871c003c-e273-41d8-8440-2622492a2ead\"}",
 25 |     "id": 5
 26 |   },
 27 | ]
 28 | ```
 29 | 
 30 | ````{margin}
 31 | ```{admonition} Incomplete
 32 | 
 33 | This isn't the best representation of the datasets on Pangeo-Forge, as this API is focused around the processing steps, rather than the datasets themselves.
 34 | Therefore, some datasets are duplicated, and others may be missing when the API paginates, but it's good enough to test ideas out with.
 35 | 
 36 | ```
 37 | ````
 38 | 
 39 | With this API, we can use the `recipe_id` and the `dataset_public_url` to make a mapping of datasets that then we can use with xpublish.
 40 | 
 41 | With that we can build a mapper from `recipe_id`s to the Zarr URLs needed to load them.
 42 | 
 43 | ```py
 44 | def pangeo_forge_dataset_map():
 45 |     datasets = requests.get(recipe_runs_url)
 46 |     datasets = [r for r in datasets if r["dataset_public_url"]]
 47 |     return {r["recipe_id"]: r["dataset_public_url"] for r in datasets}
 48 | ```
 49 | 
 50 | ## Dataset Loader
 51 | 
 52 | From there, we need a function that can will take a `dataset_id` as a string, and return an xarray dataset. xpublish by default [curries a function](https://github.com/xarray-contrib/xpublish/blob/632a720aadba39cebaf062da7043835262d9fa3d/xpublish/rest.py#L16-L28) with the [datasets passed to the init method as a loader](https://github.com/xarray-contrib/xpublish/blob/632a720aadba39cebaf062da7043835262d9fa3d/xpublish/rest.py#L118), but we can get more creative and delay dataset access until needed.
 53 | 
 54 | ```py
 55 | def get_pangeo_forge_dataset(dataset_id: str) -> xr.Dataset:
 56 |     dataset_map = pangeo_forge_dataset_map()
 57 |     zarr_url = dataset_map[dataset_id]
 58 | 
 59 |     mapper = fsspec.get_mapper(zarr_url)
 60 |     ds = xr.open_zarr(mapper, consolidated=True)
 61 |     return ds
 62 | ```
 63 | 
 64 | ## Connecting it together in the `__init__` method
 65 | 
 66 | Instead of calling super in the init method and having to pass in mock info, we can override the whole init and change the signature.
 67 | 
 68 | ```py
 69 | class DynamicRest(xpublish.Rest):
 70 |     def __init__(self, routers=None, cache_kws=None, app_kws=None):
 71 |         self._get_dataset_func = get_pangeo_forge_dataset
 72 |         self._datasets = list(pangeo_forge_dataset_map().keys())
 73 |         dataset_route_prefix = "/datasets/{dataset_id}"
 74 | 
 75 |         self._app_routers = rest._set_app_routers(routers, dataset_route_prefix)
 76 | 
 77 |         self._app = None
 78 |         self._app_kws = {}
 79 |         if app_kws is not None:
 80 |             self._app_kws.update(app_kws)
 81 | 
 82 |         self._cache = None
 83 |         self._cache_kws = {"available_bytes": 1e6}
 84 |         if cache_kws is not None:
 85 |             self._cache_kws.update(cache_kws)
 86 | ```
 87 | 
 88 | The first three lines of the method are the key ones. We are setting our dataset function for the get_dataset_func, listing the ids of our datasets, and setting the prefix that we want to have multiple dataset access.
 89 | 
 90 | The rest of the method is unchanged.
 91 | 
 92 | From there, you can call `rest = DynamicRest()` or pass in routers as normal with xpublish.
 93 | 
 94 | ## What next?
 95 | 
 96 | There are a few things that could be further improved with this method.
 97 | The biggest improvement would be to cache the `dataset_id`s and datasets themselves.
 98 | 
 99 | Since both of these are used as FastAPI dependencies, they can also use dependencies themselves.
100 | 
101 | ````{margin}
102 | ```{admonition} Untested
103 | :class: warning
104 | 
105 | Use as is at your own peril.
106 | 
107 | ```
108 | ````
109 | 
110 | ```py
111 | def pangeo_forge_dataset_map(cache: cachey.Cache = Depends(get_cache)):
112 |     cache_key = "dataset_ids"
113 |     datasets = cache.get(cache_key)
114 |     if not datasets:
115 |         datasets = requests.get(recipe_runs_url)
116 |         datasets = [r for r in datasets if r["dataset_public_url"]]
117 |         datasets = {r["recipe_id"]: r["dataset_public_url"] for r in datasets}
118 |         cache.set(cache_key, datasets, NOT_TO_EXPENSIVE_CACHE_COST)
119 | 
120 |     return datasets
121 | 
122 | 
123 | def get_pangeo_forge_dataset(
124 |     dataset_id: str, 
125 |     datasets_map: dict = Depends(pangeo_forge_dataset_map),
126 |     cache: cachey.Cache = Depends(get_cache),
127 | ) -> xr.Dataset:
128 |     cache_key = f"dataset-{dataset_id}"
129 |     ds = cache.get(cache_key)
130 |     if not dataset:
131 |         zarr_url = dataset_map[dataset_id]
132 | 
133 |         mapper = fsspec.get_mapper(zarr_url)
134 |         ds = xr.open_zarr(mapper, consolidated=True)
135 | 
136 |         cache.set(cache_key, ds, EXPENSIVE_CACHE_COST)
137 | 
138 |     return ds
139 | ```
140 | 
141 | To truly use the datasets lazily, the dependency needs to be set.
142 | This isn't happening in the init method, but in [`_init_app`](https://github.com/xarray-contrib/xpublish/blob/632a720aadba39cebaf062da7043835262d9fa3d/xpublish/rest.py#L149), so we'd have to change things up a little.
143 | 
144 | ```py
145 | class DynamicRest(xpublish.Rest):
146 |     def __init__(self, routers=None, cache_kws=None, app_kws=None):
147 |         self._get_dataset_func = get_pangeo_forge_dataset
148 |         self._datasets = ["these", "are", "a", "lie"]
149 |         dataset_route_prefix = "/datasets/{dataset_id}"
150 | 
151 |         self._app_routers = rest._set_app_routers(routers, dataset_route_prefix)
152 | 
153 |         self._app = None
154 |         self._app_kws = {}
155 |         if app_kws is not None:
156 |             self._app_kws.update(app_kws)
157 | 
158 |         self._cache = None
159 |         self._cache_kws = {"available_bytes": 1e6}
160 |         if cache_kws is not None:
161 |             self._cache_kws.update(cache_kws)
162 | 
163 |     def _init_app(self):
164 |         super(self)._init_app()  # let it do the normal setup, then just re-override things
165 | 
166 |         self._app.dependency_overrides[get_dataset_ids] = pangeo_forge_dataset_map
167 | ```


--------------------------------------------------------------------------------
/xpublish/edr_router.py:
--------------------------------------------------------------------------------
  1 | """
  2 | OGC EDR router for datasets with CF convention metadata
  3 | """
  4 | import logging
  5 | from pathlib import Path
  6 | from tempfile import TemporaryDirectory
  7 | from typing import Optional
  8 | 
  9 | from fastapi import APIRouter, Depends, Response, Query, Request, HTTPException
 10 | import numpy as np
 11 | from pydantic import BaseModel, Field
 12 | import xarray as xr
 13 | from xpublish.dependencies import get_dataset
 14 | 
 15 | 
 16 | logger = logging.getLogger("uvicorn")
 17 | 
 18 | edr_router = APIRouter()
 19 | 
 20 | 
 21 | class EDRQuery(BaseModel):
 22 |     coords: str = Field(
 23 |         ..., title="Point in WKT format", description="Well Known Text coordinates"
 24 |     )
 25 |     z: Optional[str] = None
 26 |     datetime: Optional[str] = None
 27 |     parameters: Optional[str] = None
 28 |     crs: Optional[str] = None
 29 |     format: Optional[str] = None
 30 | 
 31 |     @property
 32 |     def point(self):
 33 |         from shapely import wkt
 34 | 
 35 |         return wkt.loads(self.coords)
 36 | 
 37 | 
 38 | def edr_query(
 39 |     coords: str = Query(
 40 |         ..., title="Point in WKT format", description="Well Known Text coordinates"
 41 |     ),
 42 |     z: Optional[str] = Query(
 43 |         None, title="Z axis", description="Height or depth of query"
 44 |     ),
 45 |     datetime: Optional[str] = Query(
 46 |         None,
 47 |         title="Datetime or datetime range",
 48 |         description="Query by a single ISO time or a range of ISO times. To query by a range, split the times with a slash",
 49 |     ),
 50 |     parameters: Optional[str] = Query(
 51 |         None, alias="parameter-name", description="xarray variables to query"
 52 |     ),
 53 |     crs: Optional[str] = Query(
 54 |         None, deprecated=True, description="CRS is not yet implemented"
 55 |     ),
 56 |     f: Optional[str] = Query(
 57 |         None,
 58 |         title="Response format",
 59 |         description="Data is returned as a CoverageJSON by default, but NetCDF is supported with `f=nc`, or CSV with `csv`",
 60 |     ),
 61 | ):
 62 |     return EDRQuery(
 63 |         coords=coords, z=z, datetime=datetime, parameters=parameters, crs=crs, format=f
 64 |     )
 65 | 
 66 | 
 67 | edr_query_params = set(["coords", "z", "datetime", "parameter-name", "crs", "f"])
 68 | 
 69 | 
 70 | @edr_router.get("/position", summary="Position query")
 71 | def get_position(
 72 |     request: Request,
 73 |     query: EDRQuery = Depends(edr_query),
 74 |     dataset: xr.Dataset = Depends(get_dataset),
 75 | ):
 76 |     """
 77 |     Return position data based on WKT Point(lon lat) coordinate.
 78 | 
 79 |     Extra selecting/slicing parameters can be provided as additional query strings.
 80 |     """
 81 |     try:
 82 |         ds = dataset.cf.sel(X=query.point.x, Y=query.point.y, method="nearest")
 83 |     except KeyError:
 84 |         raise HTTPException(
 85 |             status_code=404,
 86 |             detail="Dataset does not have CF Convention compliant metadata",
 87 |         )
 88 | 
 89 |     if query.z:
 90 |         ds = dataset.cf.sel(Z=query.z, method="nearest")
 91 | 
 92 |     if query.datetime:
 93 |         datetimes = query.datetime.split("/")
 94 | 
 95 |         try:
 96 |             if len(datetimes) == 1:
 97 |                 ds = ds.cf.sel(T=datetimes[0], method="nearest")
 98 |             elif len(datetimes) == 2:
 99 |                 ds = ds.cf.sel(T=slice(datetimes[0], datetimes[1]))
100 |             else:
101 |                 raise HTTPException(
102 |                     status_code=404, detail="Invalid datetimes submitted"
103 |                 )
104 |         except ValueError as e:
105 |             logger.error("Error with datetime", exc_info=1)
106 |             raise HTTPException(
107 |                 status_code=404, detail=f"Invalid datetime ({e})"
108 |             ) from e
109 | 
110 |     if query.parameters:
111 |         try:
112 |             ds = ds.cf[query.parameters.split(",")]
113 |         except KeyError as e:
114 |             raise HTTPException(status_code=404, detail=f"Invalid variable: {e}")
115 | 
116 |         logger.debug(f"Dataset filtered by query params {ds}")
117 | 
118 |     query_params = dict(request.query_params)
119 |     for query_param in request.query_params:
120 |         if query_param in edr_query_params:
121 |             del query_params[query_param]
122 | 
123 |     method = "nearest"
124 | 
125 |     for key, value in query_params.items():
126 |         split_value = value.split("/")
127 |         if len(split_value) == 1:
128 |             continue
129 |         elif len(split_value) == 2:
130 |             query_params[key] = slice(split_value[0], split_value[1])
131 |             method = None
132 |         else:
133 |             raise HTTPException(404, f"Too many values for selecting {key}")
134 | 
135 |     ds = ds.sel(query_params, method=method)
136 | 
137 |     if query.format == "nc":
138 |         with TemporaryDirectory() as tmpdir:
139 |             path = Path(tmpdir) / "position.nc"
140 |             ds.to_netcdf(path)
141 | 
142 |             with path.open("rb") as f:
143 |                 return Response(
144 |                     f.read(),
145 |                     media_type="application/netcdf",
146 |                     headers={
147 |                         "Content-Disposition": 'attachment; filename="position.nc"'
148 |                     },
149 |                 )
150 | 
151 |     if query.format == "csv":
152 |         ds = ds.squeeze()
153 |         df = ds.to_pandas()
154 |         csv = df.to_csv()
155 | 
156 |         return Response(
157 |             csv,
158 |             media_type="text/csv",
159 |             headers={"Content-Disposition": 'attachment; filename="position.csv"'},
160 |         )
161 | 
162 |     return to_covjson(ds)
163 | 
164 | 
165 | def to_covjson(ds: xr.Dataset):
166 |     """ Transform an xarray dataset to CoverageJSON """
167 | 
168 |     covjson = {
169 |         "type": "Coverage",
170 |         "domain": {
171 |             "type": "Domain",
172 |             "domainType": "Grid",
173 |             "axes": {},
174 |             "referencing": [],
175 |         },
176 |         "parameters": {},
177 |         "ranges": {},
178 |     }
179 | 
180 |     inverted_dims = invert_cf_dims(ds)
181 | 
182 |     for name, da in ds.coords.items():
183 |         if "datetime" in str(da.dtype):
184 |             values = da.dt.strftime("%Y-%m-%dT%H:%M:%S%Z").values.tolist()
185 |         else:
186 |             values = da.values
187 |             values = np.where(np.isnan(values), None, values).tolist()
188 |         try:
189 |             if not isinstance(values, list):
190 |                 values = [values.item()]
191 |             covjson["domain"]["axes"][inverted_dims.get(name, name)] = {
192 |                 "values": values
193 |             }
194 |         except (ValueError, TypeError):
195 |             pass
196 | 
197 |     for var in ds.variables:
198 |         if var not in ds.coords:
199 |             da = ds[var]
200 | 
201 |             parameter = {"type": "Parameter", "observedProperty": {}}
202 | 
203 |             try:
204 |                 parameter["description"] = {"en": da.attrs["long_name"]}
205 |                 parameter["observedProperty"]["label"] = {"en": da.attrs["long_name"]}
206 |             except KeyError:
207 |                 pass
208 | 
209 |             try:
210 |                 parameter["unit"] = {"label": {"en": da.attrs["units"]}}
211 |             except KeyError:
212 |                 pass
213 | 
214 |             covjson["parameters"][var] = parameter
215 | 
216 |             values = da.values.ravel()
217 |             if "datetime" in str(da.dtype):
218 |                 values = da.dt.strftime("%Y-%m-%dT%H:%M:%S%Z").values.tolist()
219 |                 dataType = "string"
220 |             else:
221 |                 values = np.where(np.isnan(values), None, values).tolist()
222 | 
223 |                 if da.dtype.kind in ("i", "u"):
224 |                     values = [int(v) for v in values]
225 |                     dataType = "integer"
226 |                 elif da.dtype.kind in ("f", "c"):
227 |                     dataType = "float"
228 |                 else:
229 |                     dataType = "string"
230 | 
231 |             cov_range = {
232 |                 "type": "NdArray",
233 |                 "dataType": dataType,
234 |                 "axisNames": [inverted_dims.get(dim, dim) for dim in da.dims],
235 |                 "shape": da.shape,
236 |                 "values": values,
237 |             }
238 | 
239 |             covjson["ranges"][var] = cov_range
240 | 
241 |     return covjson
242 | 
243 | 
244 | def invert_cf_dims(ds):
245 |     inverted = {}
246 |     for key, values in ds.cf.axes.items():
247 |         for value in values:
248 |             inverted[value] = key.lower()
249 |     return inverted
250 | 


--------------------------------------------------------------------------------
/xpublish/tree_router.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | 
  3 | import cachey
  4 | from fastapi import APIRouter, Depends, Response
  5 | import mercantile
  6 | from ndpyramid.utils import (
  7 |     add_metadata_and_zarr_encoding,
  8 |     get_version,
  9 |     multiscales_template,
 10 | )
 11 | import numpy as np
 12 | import xarray as xr
 13 | from xarray.backends.zarr import (
 14 |     DIMENSION_KEY,
 15 |     encode_zarr_attr_value,
 16 |     encode_zarr_variable,
 17 |     extract_zarr_variable_encoding,
 18 | )
 19 | from xpublish.dependencies import get_dataset, get_cache
 20 | from xpublish.utils.api import DATASET_ID_ATTR_KEY
 21 | from xpublish.utils.zarr import (
 22 |     jsonify_zmetadata,
 23 |     get_data_chunk,
 24 |     zarr_metadata_key,
 25 |     _extract_dataarray_zattrs,
 26 |     _extract_zarray,
 27 |     _extract_fill_value,
 28 |     encode_chunk
 29 | )
 30 | from zarr.storage import array_meta_key, attrs_key, default_compressor, group_meta_key
 31 | from rasterio.transform import Affine
 32 | from rasterio.enums import Resampling
 33 | 
 34 | 
 35 | tree_router = APIRouter()
 36 | 
 37 | 
 38 | def cache_key_for(ds: xr.Dataset, key: str):
 39 |     return ds.attrs.get(DATASET_ID_ATTR_KEY, "") + f"-tree/{key}" 
 40 | 
 41 | def cache_key_for_level(ds: xr.Dataset, key: str, level: int):
 42 |     return ds.attrs.get(DATASET_ID_ATTR_KEY, "") + f"-tree/{level}/{key}"
 43 | 
 44 | 
 45 | def extract_zarray(da: xr.DataArray, encoding: dict, dtype: np.dtype, level: int, tile_size: int):
 46 |     """ helper function to extract zarr array metadata. """
 47 | 
 48 |     pixels_per_tile = tile_size
 49 |     tile_count = 2 ** level
 50 |     pixel_count = tile_count * pixels_per_tile
 51 |     
 52 |     data_shape = list(da.shape)
 53 |     data_shape[-2:] = [pixel_count, pixel_count]
 54 |     
 55 |     chunk_shape = list(da.shape)
 56 |     chunk_shape[-2:] = [pixels_per_tile, pixels_per_tile]
 57 | 
 58 |     meta = {
 59 |         'compressor': encoding.get('compressor', da.encoding.get('compressor', default_compressor)),
 60 |         'filters': encoding.get('filters', da.encoding.get('filters', None)),
 61 |         'chunks': chunk_shape,
 62 |         'dtype': dtype.str,
 63 |         'fill_value': _extract_fill_value(da, dtype),
 64 |         'order': 'C',
 65 |         'shape': data_shape,
 66 |         'zarr_format': 2,
 67 |     }
 68 | 
 69 |     if meta['chunks'] is None:
 70 |         meta['chunks'] = da.shape
 71 | 
 72 |     # # validate chunks
 73 |     # if isinstance(da.data, dask_array_type):
 74 |     #     var_chunks = tuple([c[0] for c in da.data.chunks])
 75 |     # else:
 76 |     #     var_chunks = da.shape
 77 |     # if not var_chunks == tuple(meta['chunks']):
 78 |     #     raise ValueError('Encoding chunks do not match inferred chunks')
 79 | 
 80 |     # meta['chunks'] = list(meta['chunks'])  # return chunks as a list
 81 | 
 82 |     return meta
 83 | 
 84 | def create_tree_metadata(levels: list[int, int], tile_size: int, dataset: xr.Dataset):
 85 |     save_kwargs = {"levels": range(levels[0], levels[1]), "tile_size": tile_size}
 86 |     attrs = {
 87 |         "multiscales": multiscales_template(
 88 |             datasets=[{"path": str(i)} for i in range(levels)],
 89 |             type="reduce",
 90 |             method="pyramid_reproject",
 91 |             version=get_version(),
 92 |             kwargs=save_kwargs,
 93 |         )
 94 |     }
 95 | 
 96 |     metadata = {
 97 |         "metadata": {".zattrs": attrs, ".zgroup": {"zarr_format": 2}},
 98 |         "zarr_consolidated_format": 1,
 99 |     }
100 | 
101 |     for level in range(levels):
102 |         metadata["metadata"][f"{level}/.zgroup"] = {"zarr_format": 2}
103 | 
104 |         for key, da in dataset.variables.items():
105 |             # da needs to be resized based on level
106 |             encoded_da = encode_zarr_variable(da, name=key)
107 |             encoding = extract_zarr_variable_encoding(da)
108 |             metadata["metadata"][
109 |                 f"{level}/{key}/{attrs_key}"
110 |             ] = _extract_dataarray_zattrs(da)
111 |             metadata["metadata"][f"{level}/{key}/{array_meta_key}"] = extract_zarray(
112 |                 encoded_da, encoding, encoded_da.dtype, level
113 |             )
114 | 
115 |             # convert compressor to dict
116 |             compressor = metadata['metadata'][f'{level}/{key}/{array_meta_key}']['compressor']
117 |             if compressor is not None:
118 |                 compressor_config = metadata['metadata'][f'{level}/{key}/{array_meta_key}'][
119 |                     'compressor'
120 |                 ].get_config()
121 |                 metadata['metadata'][f'{level}/{key}/{array_meta_key}']['compressor'] = compressor_config
122 | 
123 |     return metadata
124 | 
125 | 
126 | def get_levels(levels: str = '0,30'):
127 |     """
128 |     Extracts the levels from a {min}/{max}}
129 |     """
130 |     return [int(l) for l in levels.split(',')]
131 | 
132 | 
133 | def get_tile_size(tile_size: int = 256):
134 |     """
135 |     Common dependency for the tile size in pixels 
136 |     """
137 |     return tile_size
138 | 
139 | def get_tree_metadata(
140 |     levels: int = Depends(get_levels),
141 |     tile_size: int = Depends(get_tile_size),
142 |     dataset: xr.Dataset = Depends(get_dataset),
143 |     cache: cachey.Cache = Depends(get_cache),
144 | ):
145 |     cache_key = cache_key_for(dataset, zarr_metadata_key)
146 |     metadata = cache.get(cache_key)
147 | 
148 |     if metadata is None:
149 |         metadata = create_tree_metadata(levels, tile_size, dataset)
150 | 
151 |         cache.put(cache_key, metadata, 99999)
152 | 
153 |     return metadata
154 | 
155 | def get_variable_zarray(level: int, var_name: str, tile_size: int = Depends(get_tile_size), ds: xr.Dataset = Depends(get_dataset), cache: cachey.Cache = Depends(get_cache)):
156 |     """
157 |     Returns the zarray metadata for a given level and dataarray.
158 |     """
159 |     da = ds[var_name]
160 |     encoded_da = encode_zarr_variable(da, name=var_name)
161 |     encoding = extract_zarr_variable_encoding(da)
162 | 
163 |     array_metadata = extract_zarray(encoded_da, encoding, encoded_da.dtype, level, tile_size)
164 | 
165 |     # convert compressor to dict
166 |     compressor = array_metadata['compressor']
167 |     if compressor is not None:
168 |         compressor_config = array_metadata['compressor'].get_config()
169 |         array_metadata['compressor'] = compressor_config
170 | 
171 |     return array_metadata
172 | 
173 | 
174 | @tree_router.get("/{levels}/{tile_size}/.zmetadata")
175 | def get_tree_metadata(metadata: dict = Depends(get_tree_metadata)):
176 |     return metadata
177 | 
178 | 
179 | @tree_router.get("/{levels}/{tile_size}/.zgroup")
180 | def get_top_zgroup(metadata: dict = Depends(get_tree_metadata)):
181 |     return metadata["metadata"][".zgroup"]
182 | 
183 | 
184 | @tree_router.get("/{levels}/{tile_size}/.zattrs")
185 | def get_top_zattrs(levels: int = Depends(get_levels), tile_size: int = Depends(get_tile_size)):
186 |     return {
187 |         "multiscales": multiscales_template(
188 |             datasets=[{"path": str(i)} for i in range(levels)],
189 |             type="reduce",
190 |             method="pyramid_reproject",
191 |             version=get_version(),
192 |             kwargs={"levels": levels, "tile_size": tile_size},
193 |         )
194 |     }
195 | 
196 | 
197 | @tree_router.get("/{levels}/{tile_size}/{level}/.zgroup")
198 | def get_zgroup(level: int):
199 |     return {"zarr_format": 2}
200 | 
201 | 
202 | @tree_router.get("/{levels}/{tile_size}/{level}/{var_name}/.zattrs")
203 | def get_variable_zattrs(
204 |     level: int, var_name: str, dataset = Depends(get_dataset)
205 | ):
206 |     return _extract_dataarray_zattrs(dataset[var_name])
207 | 
208 | 
209 | @tree_router.get("/{levels}/{tile_size}/{level}/{var_name}/.zarray")
210 | def get_variable_zarray(
211 |     zarray: dict = Depends(get_variable_zarray)
212 | ):
213 |     return zarray
214 | 
215 | 
216 | @tree_router.get("/{levels}/{tile_size}/{level}/{var_name}/{chunk}")
217 | def get_variable_chunk(
218 |     level: int, 
219 |     var_name: str, 
220 |     chunk: str, 
221 |     dataset: xr.Dataset = Depends(get_dataset),
222 |     tile_size: int = Depends(get_tile_size),
223 | ):
224 |     if not dataset.rio.crs:
225 |         dataset = dataset.rio.write_crs(4326)
226 |     ds = dataset.squeeze()
227 |     
228 |     # Extract the requested tile metadata
229 |     chunk_coords = [int(i) for i in chunk.split(".")]
230 |     x = chunk_coords[-2]
231 |     y = chunk_coords[-1]
232 |     z = level
233 |     
234 |     bbox = mercantile.xy_bounds(x, y, z)
235 | 
236 |     dim = (2 ** z) * tile_size
237 |     transform = Affine.translation(bbox.left, bbox.top) * Affine.scale(
238 |        (20037508.342789244 * 2) / float(dim), -(20037508.342789244 * 2) / float(dim)
239 |     )
240 | 
241 |     resampled_data = ds[var_name].rio.reproject(
242 |         'EPSG:3857', 
243 |         shape=(tile_size, tile_size), 
244 |         resampling=Resampling.cubic, 
245 |         transform=transform,
246 |     )
247 | 
248 |     resampled_data_array = np.asarray(resampled_data)
249 | 
250 |     encoded_chunk = encode_chunk(
251 |         resampled_data_array.tobytes(),                     
252 |         filters=resampled_data.encoding.get('filters', None),
253 |         compressor=resampled_data.encoding.get('compressor', default_compressor)
254 |     )
255 |     return Response(encoded_chunk, media_type='application/octet-stream')


--------------------------------------------------------------------------------
/xpublish/test_get_chunk.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "data": {
 10 |       "text/plain": [
 11 |        "<zarr.core.Array '/hs' (1, 73, 256, 256) float32 read-only>"
 12 |       ]
 13 |      },
 14 |      "execution_count": 1,
 15 |      "metadata": {},
 16 |      "output_type": "execute_result"
 17 |     }
 18 |    ],
 19 |    "source": [
 20 |     "%matplotlib inline\n",
 21 |     "import zarr\n",
 22 |     "import matplotlib.pyplot as plt\n",
 23 |     "\n",
 24 |     "l0 = zarr.open_group('http://0.0.0.0:9005/datasets/ww3/tree/0,12/256/0', mode='r')\n",
 25 |     "\n",
 26 |     "hs = l0['hs']\n",
 27 |     "hs"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 2,
 33 |    "metadata": {},
 34 |    "outputs": [
 35 |     {
 36 |      "data": {
 37 |       "text/html": [
 38 |        "<table class=\"zarr-info\"><tbody><tr><th style=\"text-align: left\">Name</th><td style=\"text-align: left\">/hs</td></tr><tr><th style=\"text-align: left\">Type</th><td style=\"text-align: left\">zarr.core.Array</td></tr><tr><th style=\"text-align: left\">Data type</th><td style=\"text-align: left\">float32</td></tr><tr><th style=\"text-align: left\">Shape</th><td style=\"text-align: left\">(1, 73, 256, 256)</td></tr><tr><th style=\"text-align: left\">Chunk shape</th><td style=\"text-align: left\">(1, 73, 256, 256)</td></tr><tr><th style=\"text-align: left\">Order</th><td style=\"text-align: left\">C</td></tr><tr><th style=\"text-align: left\">Read-only</th><td style=\"text-align: left\">True</td></tr><tr><th style=\"text-align: left\">Compressor</th><td style=\"text-align: left\">Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)</td></tr><tr><th style=\"text-align: left\">Store type</th><td style=\"text-align: left\">zarr.storage.FSStore</td></tr><tr><th style=\"text-align: left\">No. bytes</th><td style=\"text-align: left\">19136512 (18.2M)</td></tr><tr><th style=\"text-align: left\">Chunks initialized</th><td style=\"text-align: left\">0/1</td></tr></tbody></table>"
 39 |       ],
 40 |       "text/plain": [
 41 |        "Name               : /hs\n",
 42 |        "Type               : zarr.core.Array\n",
 43 |        "Data type          : float32\n",
 44 |        "Shape              : (1, 73, 256, 256)\n",
 45 |        "Chunk shape        : (1, 73, 256, 256)\n",
 46 |        "Order              : C\n",
 47 |        "Read-only          : True\n",
 48 |        "Compressor         : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)\n",
 49 |        "Store type         : zarr.storage.FSStore\n",
 50 |        "No. bytes          : 19136512 (18.2M)\n",
 51 |        "Chunks initialized : 0/1"
 52 |       ]
 53 |      },
 54 |      "execution_count": 2,
 55 |      "metadata": {},
 56 |      "output_type": "execute_result"
 57 |     }
 58 |    ],
 59 |    "source": [
 60 |     "hs.info"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 3,
 66 |    "metadata": {},
 67 |    "outputs": [
 68 |     {
 69 |      "data": {
 70 |       "text/plain": [
 71 |        "array([[nan, nan, nan, ..., nan, nan, nan],\n",
 72 |        "       [nan, nan, nan, ..., nan, nan, nan],\n",
 73 |        "       [nan, nan, nan, ..., nan, nan, nan],\n",
 74 |        "       ...,\n",
 75 |        "       [nan, nan, nan, ..., nan, nan, nan],\n",
 76 |        "       [nan, nan, nan, ..., nan, nan, nan],\n",
 77 |        "       [nan, nan, nan, ..., nan, nan, nan]], dtype=float32)"
 78 |       ]
 79 |      },
 80 |      "execution_count": 3,
 81 |      "metadata": {},
 82 |      "output_type": "execute_result"
 83 |     }
 84 |    ],
 85 |    "source": [
 86 |     "tile_data = hs[0, 0, :, :]\n",
 87 |     "tile_data"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "markdown",
 92 |    "metadata": {},
 93 |    "source": []
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 4,
 98 |    "metadata": {},
 99 |    "outputs": [
100 |     {
101 |      "data": {
102 |       "text/plain": [
103 |        "<matplotlib.image.AxesImage at 0x10e63f220>"
104 |       ]
105 |      },
106 |      "execution_count": 4,
107 |      "metadata": {},
108 |      "output_type": "execute_result"
109 |     },
110 |     {
111 |      "data": {
112 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQYAAAD8CAYAAACVSwr3AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQfElEQVR4nO3da4xcd33G8e8zsxfbaye+rS1fYycxBVsVBgYXKRVQUIlJVTm8SOW8QJaaylQ1EqjQ1glqyQtSKJAgtRWoRkRYCAiWAMWtKCW1kCJUQbKGkMQxIYtjOxsv9jqOb+v1XmZ/fbEnydj/Xe9md86eWe/zkVZz5j//M/v4xHp8zpk5J4oIzMxqlYoOYGaNx8VgZgkXg5klXAxmlnAxmFnCxWBmidyKQdJWSc9L6pS0O6/fY2b1pzy+xyCpDPwW+FOgC3gSuDsinqv7LzOzustrj2EL0BkRRyJiAHgE2JbT7zKzOmvK6X1XAS/VPO8C/misyUuXLo1169blFMXMAA4ePHg6ItonMjevYtAoY1ccs0jaCewEWLt2LR0dHTlFMTMASccmOjevQ4kuYE3N89XAidoJEbEnIioRUWlvn1CJmdk0yasYngQ2SFovqQXYDuzP6XeZWZ3lcigREUOSPg78D1AGHo6IQ3n8LjOrv7zOMRARPwJ+lNf7m1l+/M1HM0u4GMws4WIws4SLwcwSLgYzS7gYzCzhYjCzhIvBzBIuBjNLuBjMLOFiMLOEi8HMEi4GM0u4GMws4WIws4SLwcwSLgYzS7gYzCzhYjCzhIvBzBIuBjNLuBjMLOFiMLOEi8HMEi4GM0u4GMws4WIws4SLwcwSLgYzS7gYzCzhYjCzhIvBzBIuBjNLuBjMLNE0lZUlHQUuAFVgKCIqkhYD3wPWAUeBv4iIV6cW08ymUz32GP4kIjZHRCV7vhs4EBEbgAPZczObQfI4lNgG7M2W9wJ35vA7zCxHUy2GAH4i6aCkndnY8ojoBsgel422oqSdkjokdfT09EwxhpnV05TOMQC3RcQJScuAxyT9ZqIrRsQeYA9ApVKJKeYwszqa0h5DRJzIHk8BPwS2ACclrQDIHk9NNaSZTa9JF4OkNkkLXlsGPgQ8C+wHdmTTdgCPTjWkmU2vqRxKLAd+KOm19/lORPxY0pPAPkn3AMeBu6Ye08ym06SLISKOAG8fZfwV4INTCWVmxfI3H80s4WIws4SLwcwSLgYzS7gYzCzhYjCzhIvBzBIuBjNLuBjMLOFiMLOEi8HMEi4GM0u4GMws4WIws4SLwcwSLgYzS7gYzCzhYjCzhIvBzBIuBjNLuBjMLOFiMLOEi8HMEi4GM0u4GMws4WIws4SLYQZY9+9fLjqCzTIuhga34fMPUVoywE17vlR0FJtFXAwN7oV7/5Yjd9/HuzYd4a8PfrToODZLuBhmiJZSlf7qpP/n5GZviv+mNbA/3P9PrF90hhta+ljYMlB0HJtFvMfQYN534NOvL69ZeJZzA3P4vyO38PhLt7Cy9WxxwWxW8R5DA1n3rc+zaHErb//Pf2ThvD7ammGgWuaGBZcYGi7x8zPri45os8S4ewySHpZ0StKzNWOLJT0m6YXscVHNa/dK6pT0vKTb8wp+Pdm86yHW/duDzGkb4OKlOSyc18fyeRe4ef5p7lz9a6Tg4vm5HDm9pOioNktM5FDim8DWq8Z2AwciYgNwIHuOpI3AdmBTts5XJZXrlvY6de4tQan9MlKwcvE5KkuO87b5v2c4SrzY185gtUxTc5WB/uaio9osMW4xRMTjwJmrhrcBe7PlvcCdNeOPRER/RLwIdAJb6hP1+jU8r8rShRdpX9DL3KZBequtXBpu4W1tJ1jZepa1C8/SOmeQUmm46Kg2S0z2HMPyiOgGiIhuScuy8VXAz2vmdWVjdg2l3jLrbjzDexYeoX+4mf/u3sRfrv0ZS5ousqb5FZYtP8/FpXM43r+46Kg2S9T75KNGGYtRJ0o7gZ0Aa9eurXOMmaW8oo+B6sgR162tJ/mbdaf4wNwT/Ffvet495xjvm/sK/THMzy+3F5zUZovJFsNJSSuyvYUVwKlsvAtYUzNvNXBitDeIiD3AHoBKpTJqeVzv3vvnX+TC6ia0eh5PDa6hqTTMu25s5h1zj9JTFW9t6WaeqhwbauLE0GIOX171+jGbWZ4mWwz7gR3AF7LHR2vGvyPpIWAlsAF4Yqohr0cfevf9lG+cw+B8QSng1RZ+dXwNPe3z+e2C5fRWW7h53mmaS1XmlQY4dnkJHafWcO+mopPbbDBuMUj6LvB+YKmkLuCzjBTCPkn3AMeBuwAi4pCkfcBzwBCwKyKqOWWfsba+dTelnjOUNt1Ey9kARFOvqJ6by7FXWnm5/UYGe1v43fKlSEGE6B8qc+FMW9HRbZYYtxgi4u4xXvrgGPMfAB6YSqjryU3/8SWOfezvrhj78W++wNY/+AcU0NQfcB6iBNUWoeEyg5fbmHtOnDm/mGgJYm6VlrYBWl72x5U2PfzNx5zc+r3PUR0q0bpkiFu/9zmGTs/h6K43vu58+rbltFwYZqhVRAmaLkFpMGjqA1VhaC4MtwhVS0S5hIZbKPUX+AeyWcXFkJP2RRfoPrmQgcvNLFl8kdPnWq94/dwtUKqW6V9SpdQv5vSUQFDug+ZLQTQBMfIhz7wTgGLkfITZNPBFVDm5sfUyC27so7l1iFfPtdG2rJebv/Lg668P3hBcXjlIeUk/1UVDXFpd5dKqKhdvGqZ35Rt7EfNfChZ2DjC/u0rrq7PywxsrgPcYcjIcoqlcZaC3DQZKPHf3fVe8HjcMwlCJoYvNI3sKS/qRgqGBMsN9rbT9Pph/vI/yxX4olWg5W6L5fOsYv82svlwMORkcLvOrPxv7HOyxHbtfX17/7X8eWae3mdLFJpouiqa+Kk1n+zi/ceT6tAWdF2g505dvaLOMiyEnP/3Ag1c8X/+vDzLcMjzy3dBycOyv/v7113SqlTm/LxFlGC5D80W4tKzMcNMiepeXabkQRHOZ4SYf+dn0UETxx62VSiU6OjqKjmF2XZN0MCIqE5nrf4LMLOFiMLOEi8HMEi4GM0u4GMws4WIws4SLwcwSLgYzS7gYzCzhYjCzhIvBzBIuBjNLuBjMLOFiMLOEi8HMEi4GM0u4GMws4WIws4SLwcwSLgYzS7gYzCzhYjCzhIvBzBIuBjNLuBjMLOFiMLOEi8HMEuMWg6SHJZ2S9GzN2P2SXpb0VPZzR81r90rqlPS8pNvzCm5m+ZnIHsM3ga2jjH8lIjZnPz8CkLQR2A5sytb5qqRyvcKa2fQYtxgi4nHgzATfbxvwSET0R8SLQCewZQr5zKwAUznH8HFJT2eHGouysVXASzVzurKxhKSdkjokdfT09EwhhpnV22SL4WvALcBmoBt4MBvXKHNjtDeIiD0RUYmISnt7+yRjmFkeJlUMEXEyIqoRMQx8nTcOF7qANTVTVwMnphbRzKbbpIpB0oqapx8BXvvEYj+wXVKrpPXABuCJqUU0s+nWNN4ESd8F3g8sldQFfBZ4v6TNjBwmHAU+BhARhyTtA54DhoBdEVHNJbmZ5UYRo54CmFaVSiU6OjqKjmF2XZN0MCIqE5nrbz6aWcLFYGYJF4OZJVwMZpZwMZhZwsVgZgkXg5klXAxmlnAxmFnCxWBmCReDmSVcDGaWcDGYWcLFYGYJF4OZJVwMZpZwMZhZwsVgZgkXg5klXAxmlnAxmFnCxWBmCReDmSVcDGaWcDGYWcLFYGYJF4OZJVwMZpZwMZhZwsVgZgkXg5klXAxmlnAxmFli3GKQtEbSTyUdlnRI0iey8cWSHpP0Qva4qGadeyV1Snpe0u15/gHMrP4msscwBHwqIt4GvAfYJWkjsBs4EBEbgAPZc7LXtgObgK3AVyWV8whvZvkYtxgiojsifpktXwAOA6uAbcDebNpe4M5seRvwSET0R8SLQCewpc65zSxHb+ocg6R1wDuAXwDLI6IbRsoDWJZNWwW8VLNaVzZmZjPEhItB0nzg+8AnI+L8taaOMhajvN9OSR2SOnp6eiYaw8ymwYSKQVIzI6Xw7Yj4QTZ8UtKK7PUVwKlsvAtYU7P6auDE1e8ZEXsiohIRlfb29snmN7McTORTCQHfAA5HxEM1L+0HdmTLO4BHa8a3S2qVtB7YADxRv8hmlremCcy5Dfgo8Iykp7Kx+4AvAPsk3QMcB+4CiIhDkvYBzzHyicauiKjWO7iZ5WfcYoiInzH6eQOAD46xzgPAA1PIZWYF8jcfzSzhYjCzhIvBzBIuBjNLuBjMLOFiMLOEi8HMEi4GM0u4GMws4WIws4SLwcwSLgYzS7gYzCzhYjCzhIvBzBIuBjNLuBjMLOFiMLOEi8HMEi4GM0u4GMws4WIws4SLwcwSLgYzS7gYzCzhYjCzhIvBzBIuBjNLuBjMLOFiMLOEi8HMEi4GM0u4GMws4WIws8S4xSBpjaSfSjos6ZCkT2Tj90t6WdJT2c8dNevcK6lT0vOSbs/zD2Bm9dc0gTlDwKci4peSFgAHJT2WvfaViPhy7WRJG4HtwCZgJfC/kt4SEdV6Bjez/Iy7xxAR3RHxy2z5AnAYWHWNVbYBj0REf0S8CHQCW+oR1symx5s6xyBpHfAO4BfZ0MclPS3pYUmLsrFVwEs1q3UxSpFI2impQ1JHT0/Pm09uZrmZcDFImg98H/hkRJwHvgbcAmwGuoEHX5s6yuqRDETsiYhKRFTa29vfbG4zy9GEikFSMyOl8O2I+AFARJyMiGpEDANf543DhS5gTc3qq4ET9YtsZnmbyKcSAr4BHI6Ih2rGV9RM+wjwbLa8H9guqVXSemAD8ET9IptZ3ibyqcRtwEeBZyQ9lY3dB9wtaTMjhwlHgY8BRMQhSfuA5xj5RGOXP5Ewm1kUkRz+T38IqQfoBU4XnWUCljIzcsLMyTpTcsLMyTpazpsiYkIn9BqiGAAkdUREpegc45kpOWHmZJ0pOWHmZJ1qTn8l2swSLgYzSzRSMewpOsAEzZScMHOyzpScMHOyTilnw5xjMLPG0Uh7DGbWIAovBklbs8uzOyXtLjrP1SQdlfRMdml5Rza2WNJjkl7IHheN9z455HpY0ilJz9aMjZmryEvhx8jacJftX+MWAw21XaflVggRUdgPUAZ+B9wMtAC/BjYWmWmUjEeBpVeNfRHYnS3vBv6lgFzvBd4JPDteLmBjtm1bgfXZNi8XnPV+4NOjzC0sK7ACeGe2vAD4bZanobbrNXLWbZsWvcewBeiMiCMRMQA8wshl241uG7A3W94L3DndASLiceDMVcNj5Sr0Uvgxso6lsKwx9i0GGmq7XiPnWN50zqKLYUKXaBcsgJ9IOihpZza2PCK6YeQ/ErCssHRXGitXo27nSV+2n7erbjHQsNu1nrdCqFV0MUzoEu2C3RYR7wQ+DOyS9N6iA01CI27nKV22n6dRbjEw5tRRxqYta71vhVCr6GJo+Eu0I+JE9ngK+CEju2AnX7u6NHs8VVzCK4yVq+G2czToZfuj3WKABtyued8KoehieBLYIGm9pBZG7hW5v+BMr5PUlt3nEkltwIcYubx8P7Ajm7YDeLSYhImxcjXcpfCNeNn+WLcYoMG267TcCmE6zvaOc4b1DkbOqv4O+EzRea7KdjMjZ3N/DRx6LR+wBDgAvJA9Li4g23cZ2V0cZORfhHuulQv4TLaNnwc+3ABZvwU8Azyd/cVdUXRW4I8Z2cV+Gngq+7mj0bbrNXLWbZv6m49mlij6UMLMGpCLwcwSLgYzS7gYzCzhYjCzhIvBzBIuBjNLuBjMLPH/vPrHTQCXzP4AAAAASUVORK5CYII=",
113 |       "text/plain": [
114 |        "<Figure size 432x288 with 1 Axes>"
115 |       ]
116 |      },
117 |      "metadata": {
118 |       "needs_background": "light"
119 |      },
120 |      "output_type": "display_data"
121 |     }
122 |    ],
123 |    "source": [
124 |     "plt.imshow(tile_data)"
125 |    ]
126 |   }
127 |  ],
128 |  "metadata": {
129 |   "interpreter": {
130 |    "hash": "1b81d1d535df7769bbd10807f688dfefefc291b6f98a68417a180e56994d6783"
131 |   },
132 |   "kernelspec": {
133 |    "display_name": "Python 3.9.11 ('env': venv)",
134 |    "language": "python",
135 |    "name": "python3"
136 |   },
137 |   "language_info": {
138 |    "codemirror_mode": {
139 |     "name": "ipython",
140 |     "version": 3
141 |    },
142 |    "file_extension": ".py",
143 |    "mimetype": "text/x-python",
144 |    "name": "python",
145 |    "nbconvert_exporter": "python",
146 |    "pygments_lexer": "ipython3",
147 |    "version": "3.10.4"
148 |   },
149 |   "orig_nbformat": 4
150 |  },
151 |  "nbformat": 4,
152 |  "nbformat_minor": 2
153 | }
154 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![CircleCI](https://circleci.com/gh/asascience/restful-grids/tree/main.svg?style=svg)](https://circleci.com/gh/asascience/restful-grids/tree/main)
  2 | 
  3 | # restful-grids
  4 | Exploring modern RESTful services for gridded data
  5 | 
  6 | ## Resources
  7 | Use this S3 bucket for test data: `s3://ioos-code-sprint-2022`  
  8 | Several zarr datasets have been added to it. It also contains a static STAC catalog for identifying the data.
  9 | 
 10 | [Public Docker images](https://gallery.ecr.aws/m2c5k9c1/restful-grids)  
 11 | The `dev` tag is built on every main branch commit.  
 12 | To get the image:  
 13 |   
 14 | `docker pull public.ecr.aws/m2c5k9c1/restful-grids:dev`
 15 | 
 16 | Then run it:  
 17 | `docker run -d -p 9005:9005 public.ecr.aws/m2c5k9c1/restful-grids:dev`
 18 | 
 19 | In addition, you can mount a local datasets directory with the -v option:  
 20 | `docker run -d -p 9005:9005 -v /tmp/datasets:/tmp/datasets public.ecr.aws/m2c5k9c1/restful-grids:dev`
 21 | 
 22 | ## Setup
 23 | [Miniconda](https://docs.conda.io/en/latest/miniconda.html) is recommended to manage Python dependencies.  
 24 | 
 25 | In the Anaconda prompt, you can load the `environment.yml` file to configure your environment:
 26 | `conda env create -f environment.yml`
 27 | 
 28 | Once you install the environment, you will need to activate it using
 29 | 
 30 | `conda activate code-sprint-2022`
 31 | 
 32 | To update your conda environment with any new packages added or removed to the `environment.yml` file use
 33 | 
 34 | `conda env update -f environment.yml --prune`
 35 | 
 36 | Alternatively, you can install dependencies with `pip` and `virtualenv`: 
 37 | 
 38 | ```bash
 39 | virutalenv env/
 40 | source env/bin/activate
 41 | pip install -r requirements.txt
 42 | ```
 43 | 
 44 | ## Taking a Look at the Notebook Example
 45 | We have an example notebook in the `/examples` directory, which can be run using the `enivronment.yml` file
 46 | - [Link to rendered notebook](https://nbviewer.org/github/asascience/restful-grids/blob/main/examples/demo-apis.ipynb)
 47 | 
 48 | ## Running This Work-In-Progress
 49 | 
 50 | Once you install your environment, you can run your local server with the:
 51 | - Wave Watch 3 (ww3) dataset, which can be downloaded [here]()
 52 | - Global Forecast System (GFS) in Zarr format hosted on the cloud
 53 | 
 54 | Once you have your data, use following steps:
 55 | 
 56 | ### Start the Server
 57 | You can start the server using the `main.py` in the `/xpublish` directory
 58 | 
 59 | ```
 60 | cd xpublish
 61 | python main.py
 62 | ```
 63 | 
 64 | This will spin up a server, accessible using the following link (localhost:9005):
 65 | 
 66 | ```
 67 | INFO:     Uvicorn running on http://0.0.0.0:9005 (Press CTRL+C to quit)
 68 | INFO:     Started reloader process [5152] using statreload
 69 | INFO:     Started server process [5155]
 70 | INFO:     Waiting for application startup.
 71 | INFO:     Application startup complete.
 72 | ```
 73 | 
 74 | When you go to the web address, you you will see a page specifying which datasets are available
 75 | 
 76 | ```
 77 | ["ww3","gfs"]
 78 | ```
 79 | 
 80 | We can look at the GFS dataset, by adding `/datasets/gfs` to the url, which results in a web-rendered version of the dataset
 81 | 
 82 | ![GFS-web](images/gfs-web.png)
 83 | 
 84 | ### Subset a Point
 85 | 
 86 | One of the methods of accessing data is using the data point API, using something similar to the following:
 87 | 
 88 | ```
 89 | localhost:9005/datasets/ww3/edr/position?coords=POINT(-69.35%2043.72)%27&parameter-name=hs,dir,t02
 90 | ```
 91 | 
 92 | Which returns a [json](https://www.json.org/json-en.html) file with the desired data:
 93 | 
 94 | 
 95 | ```json
 96 | {"type":"Coverage","domain":{"type":"Domain","domainType":"Grid","axes":{"x":{"values":[-69.30000305175781]},"y":{"values":[43.70000076293945]},"t":{"values":["2022-04-11T12:00:00","2022-04-11T12:59:59","2022-04-11T14:00:00","2022-04-11T15:00:00","2022-04-11T15:59:59","2022-04-11T17:00:00","2022-04-11T18:00:00","2022-04-11T18:59:59","2022-04-11T20:00:00","2022-04-11T21:00:00","2022-04-11T21:59:59","2022-04-11T23:00:00","2022-04-12T00:00:00","2022-04-12T00:59:59","2022-04-12T02:00:00","2022-04-12T03:00:00","2022-04-12T03:59:59","2022-04-12T05:00:00","2022-04-12T06:00:00","2022-04-12T06:59:59","2022-04-12T08:00:00","2022-04-12T09:00:00","2022-04-12T09:59:59","2022-04-12T11:00:00","2022-04-12T12:00:00","2022-04-12T12:59:59","2022-04-12T14:00:00","2022-04-12T15:00:00","2022-04-12T15:59:59","2022-04-12T17:00:00","2022-04-12T18:00:00","2022-04-12T18:59:59","2022-04-12T20:00:00","2022-04-12T21:00:00","2022-04-12T21:59:59","2022-04-12T23:00:00","2022-04-13T00:00:00","2022-04-13T00:59:59","2022-04-13T02:00:00","2022-04-13T03:00:00","2022-04-13T03:59:59","2022-04-13T05:00:00","2022-04-13T06:00:00","2022-04-13T06:59:59","2022-04-13T08:00:00","2022-04-13T09:00:00","2022-04-13T09:59:59","2022-04-13T11:00:00","2022-04-13T12:00:00","2022-04-13T12:59:59","2022-04-13T14:00:00","2022-04-13T15:00:00","2022-04-13T15:59:59","2022-04-13T17:00:00","2022-04-13T18:00:00","2022-04-13T18:59:59","2022-04-13T20:00:00","2022-04-13T21:00:00","2022-04-13T21:59:59","2022-04-13T23:00:00","2022-04-14T00:00:00","2022-04-14T00:59:59","2022-04-14T02:00:00","2022-04-14T03:00:00","2022-04-14T03:59:59","2022-04-14T05:00:00","2022-04-14T06:00:00","2022-04-14T06:59:59","2022-04-14T08:00:00","2022-04-14T09:00:00","2022-04-14T09:59:59","2022-04-14T11:00:00","2022-04-14T12:00:00"]},"forecast_reference_time":{"values":["2022-04-11T12:00:00"]}},"referencing":[]},"parameters":{"hs":{"type":"Parameter","observedProperty":{"label":{"en":"significant height of wind and swell waves"}},"description":{"en":"significant height of wind and swell waves"},"unit":{"label":{"en":"m"}}},"dir":{"type":"Parameter","observedProperty":{"label":{"en":"wave mean direction"}},"description":{"en":"wave mean direction"},"unit":{"label":{"en":"degree"}}},"t02":{"type":"Parameter","observedProperty":{"label":{"en":"mean period T02"}},"description":{"en":"mean period T02"},"unit":{"label":{"en":"s"}}}},"ranges":{"hs":{"type":"NdArray","dataType":"float","axisNames":["forecast_reference_time","t"],"shape":[1,73],"values":[0.33467215299606323,0.3588910698890686,0.3660368025302887,0.3152061402797699,0.2875429093837738,0.33364781737327576,0.42414912581443787,0.5218766927719116,0.599566638469696,0.6628382802009583,0.6959347724914551,0.7017455697059631,0.6900897026062012,0.6990023255348206,0.7459676861763,0.8135576248168945,0.8708090782165527,0.9190717339515686,0.9822579026222229,1.0730650424957275,1.1682802438735962,1.2368590831756592,1.2590762376785278,1.2461904287338257,1.2177737951278687,1.190627098083496,1.1743522882461548,1.1686142683029175,1.168257474899292,1.1705492734909058,1.1713541746139526,1.1505155563354492,1.1002039909362793,1.029807448387146,0.9527088403701782,0.8763468265533447,0.8059961199760437,0.7473487257957458,0.6959123611450195,0.6488614678382874,0.6027891635894775,0.5554247498512268,0.5091127157211304,0.4687694013118744,0.4349559545516968,0.40602195262908936,0.3779057264328003,0.3484857380390167,0.3213227689266205,0.30005601048469543,0.2922517955303192,0.3058054745197296,0.34318259358406067,0.39665448665618896,0.4514908790588379,0.4962618947029114,0.5274868011474609,0.5485127568244934,0.5546026825904846,0.5439878106117249,0.5306615829467773,0.521487832069397,0.5167329907417297,0.513405442237854,0.5168517827987671,0.531062662601471,0.5381449460983276,0.5489262938499451,0.570189356803894,0.6079721450805664,0.6753485798835754,0.7782320976257324,0.9024170637130737]},"dir":{"type":"NdArray","dataType":"float","axisNames":["forecast_reference_time","t"],"shape":[1,73],"values":[304.64556884765625,299.618408203125,293.408203125,287.8389892578125,280.72564697265625,269.44873046875,255.81439208984375,244.49017333984375,236.51898193359375,230.26300048828125,225.9736328125,223.1942138671875,221.13653564453125,218.9971923828125,215.77105712890625,211.55718994140625,210.140380859375,211.71331787109375,214.11346435546875,215.63812255859375,215.6729736328125,214.7518310546875,212.01513671875,208.25762939453125,204.655029296875,201.95989990234375,200.77069091796875,201.060302734375,201.87841796875,202.632568359375,203.35174560546875,203.40252685546875,202.67822265625,201.50372314453125,200.7591552734375,200.6708984375,201.12451171875,202.5379638671875,204.12567138671875,205.147216796875,204.88092041015625,202.3099365234375,198.0283203125,194.3463134765625,192.36212158203125,191.99456787109375,191.7603759765625,190.14593505859375,187.52301025390625,184.47686767578125,181.40606689453125,178.68524169921875,176.647705078125,175.54791259765625,175.24810791015625,175.56658935546875,176.4949951171875,178.193603515625,178.86566162109375,178.3890380859375,177.8448486328125,177.36468505859375,177.0433349609375,176.85498046875,175.67352294921875,174.07855224609375,173.53839111328125,173.0093994140625,173.1402587890625,174.214111328125,174.9512939453125,173.96197509765625,171.16070556640625]},"t02":{"type":"NdArray","dataType":"float","axisNames":["forecast_reference_time","t"],"shape":[1,73],"values":[1.8070895671844482,2.1569175720214844,2.2606236934661865,2.272696018218994,2.1709280014038086,2.151611089706421,2.3017566204071045,2.452406644821167,2.5829691886901855,2.69464111328125,2.7830710411071777,2.8376331329345703,2.8641910552978516,2.8558714389801025,2.8827998638153076,2.953688144683838,3.059943675994873,3.1737217903137207,3.3152377605438232,3.4846651554107666,3.6484591960906982,3.765639543533325,3.8690288066864014,3.9598238468170166,4.043913841247559,4.048367500305176,3.98111629486084,3.90596079826355,3.8609814643859863,3.8333399295806885,3.807370662689209,3.8096847534179688,3.8128299713134766,3.805934190750122,3.7544338703155518,3.6700472831726074,3.5692813396453857,3.447746992111206,3.3469176292419434,3.2537217140197754,3.212505578994751,3.2340455055236816,3.3002560138702393,3.3129446506500244,3.226036310195923,3.080014944076538,2.9937071800231934,3.0099799633026123,3.108988046646118,3.356945514678955,3.7293829917907715,4.201430797576904,4.8152899742126465,5.385035037994385,5.667536735534668,5.556057453155518,5.037730693817139,4.278224468231201,4.069947719573975,4.364011287689209,4.786285877227783,5.236138820648193,5.629247188568115,5.760343074798584,4.635284900665283,3.936607837677002,3.8392491340637207,3.6843204498291016,3.5711114406585693,3.5300326347351074,3.4591054916381836,3.4388718605041504,3.4989757537841797]}}}
 97 | ```
 98 | 
 99 | ### Subset a Tile
100 | 
101 | The other method of accessing/visualizing data is through the `TileRouter`, which given a:
102 | - parameter (which field) - variable name
103 | - time - time step to view
104 | - z,x,y - tile coordinate (see [here](https://www.maptiler.com/google-maps-coordinates-tile-bounds-projection))
105 | 
106 | and optionally a:
107 | - cmap - matplotlib colormap name 
108 | - color_range - color mapping range for teh data value in the format min,max
109 | 
110 | For example, the following:
111 | 
112 | http://localhost:9005/datasets/ww3/tile/hs/2022-04-12T21:00:00.00/0/0/0?size=1024&color_range=0,2
113 | 
114 | Would result in this plot:
115 | 
116 | ![Example Image API](images/example-image-api.png)
117 | 
118 | Or visualized on a tiled map: 
119 | 
120 | ![Map Image Example](images/map-tile-example.png)
121 | 


--------------------------------------------------------------------------------
/xpublish_routers/openapi.json:
--------------------------------------------------------------------------------
1 | {"openapi":"3.0.2","info":{"title":"IOOS xpublish","description":"Hacking on xpublish during the IOOS Code Sprint","version":"0.1.0"},"paths":{"/versions":{"get":{"summary":"Get Versions","operationId":"get_versions_versions_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/datasets":{"get":{"tags":["info"],"summary":"Get Dataset Collection Keys","operationId":"get_dataset_collection_keys_datasets_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/datasets/{dataset_id}/":{"get":{"tags":["info"],"summary":"Html Representation","description":"Returns a HTML representation of the dataset.","operationId":"html_representation_datasets__dataset_id___get","parameters":[{"required":true,"schema":{"title":"Dataset Id","type":"string"},"name":"dataset_id","in":"path"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/datasets/{dataset_id}/keys":{"get":{"tags":["info"],"summary":"List Keys","operationId":"list_keys_datasets__dataset_id__keys_get","parameters":[{"required":true,"schema":{"title":"Dataset Id","type":"string"},"name":"dataset_id","in":"path"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/datasets/{dataset_id}/dict":{"get":{"tags":["info"],"summary":"To Dict","operationId":"to_dict_datasets__dataset_id__dict_get","parameters":[{"required":true,"schema":{"title":"Dataset Id","type":"string"},"name":"dataset_id","in":"path"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/datasets/{dataset_id}/info":{"get":{"tags":["info"],"summary":"Info","description":"Dataset schema (close to the NCO-JSON schema).","operationId":"info_datasets__dataset_id__info_get","parameters":[{"required":true,"schema":{"title":"Dataset Id","type":"string"},"name":"dataset_id","in":"path"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/datasets/{dataset_id}/edr/position":{"get":{"tags":["edr"],"summary":"Position query","description":"Return position data based on WKT Point(lon lat) coordinate.\n\nExtra selecting/slicing parameters can be provided as additional query strings.","operationId":"get_position_datasets__dataset_id__edr_position_get","parameters":[{"required":true,"schema":{"title":"Dataset Id","type":"string"},"name":"dataset_id","in":"path"},{"description":"Well Known Text coordinates","required":true,"schema":{"title":"Point in WKT format","type":"string","description":"Well Known Text coordinates"},"name":"coords","in":"query"},{"description":"Height or depth of query","required":false,"schema":{"title":"Z axis","type":"string","description":"Height or depth of query"},"name":"z","in":"query"},{"description":"Query by a single ISO time or a range of ISO times. To query by a range, split the times with a slash","required":false,"schema":{"title":"Datetime or datetime range","type":"string","description":"Query by a single ISO time or a range of ISO times. To query by a range, split the times with a slash"},"name":"datetime","in":"query"},{"description":"xarray variables to query","required":false,"schema":{"title":"Parameter-Name","type":"string","description":"xarray variables to query"},"name":"parameter-name","in":"query"},{"description":"CRS is not yet implemented","required":false,"deprecated":true,"schema":{"title":"Crs","type":"string","description":"CRS is not yet implemented"},"name":"crs","in":"query"},{"description":"Data is returned as a CoverageJSON by default, but NetCDF is supported with `f=nc`, or CSV with `csv`","required":false,"schema":{"title":"Response format","type":"string","description":"Data is returned as a CoverageJSON by default, but NetCDF is supported with `f=nc`, or CSV with `csv`"},"name":"f","in":"query"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/datasets/{dataset_id}/tree/.zmetadata":{"get":{"tags":["datatree"],"summary":"Get Tree Metadata","operationId":"get_tree_metadata_datasets__dataset_id__tree__zmetadata_get","parameters":[{"required":true,"schema":{"title":"Dataset Id","type":"string"},"name":"dataset_id","in":"path"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/datasets/{dataset_id}/tree/.zgroup":{"get":{"tags":["datatree"],"summary":"Get Top Zgroup","operationId":"get_top_zgroup_datasets__dataset_id__tree__zgroup_get","parameters":[{"required":true,"schema":{"title":"Dataset Id","type":"string"},"name":"dataset_id","in":"path"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/datasets/{dataset_id}/tree/.zattrs":{"get":{"tags":["datatree"],"summary":"Get Top Zattrs","operationId":"get_top_zattrs_datasets__dataset_id__tree__zattrs_get","parameters":[{"required":true,"schema":{"title":"Dataset Id","type":"string"},"name":"dataset_id","in":"path"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/datasets/{dataset_id}/tree/{level}/.zgroup":{"get":{"tags":["datatree"],"summary":"Get Zgroup","operationId":"get_zgroup_datasets__dataset_id__tree__level___zgroup_get","parameters":[{"required":true,"schema":{"title":"Level","type":"integer"},"name":"level","in":"path"},{"required":true,"schema":{"title":"Dataset Id","type":"string"},"name":"dataset_id","in":"path"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/datasets/{dataset_id}/tree/{level}/{var_name}/.zattrs":{"get":{"tags":["datatree"],"summary":"Get Variable Zattrs","operationId":"get_variable_zattrs_datasets__dataset_id__tree__level___var_name___zattrs_get","parameters":[{"required":true,"schema":{"title":"Level","type":"integer"},"name":"level","in":"path"},{"required":true,"schema":{"title":"Var Name","type":"string"},"name":"var_name","in":"path"},{"required":true,"schema":{"title":"Dataset Id","type":"string"},"name":"dataset_id","in":"path"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/datasets/{dataset_id}/tree/{level}/{var_name}/.zarray":{"get":{"tags":["datatree"],"summary":"Get Variable Zarray","operationId":"get_variable_zarray_datasets__dataset_id__tree__level___var_name___zarray_get","parameters":[{"required":true,"schema":{"title":"Level","type":"integer"},"name":"level","in":"path"},{"required":true,"schema":{"title":"Var Name","type":"string"},"name":"var_name","in":"path"},{"required":true,"schema":{"title":"Dataset Id","type":"string"},"name":"dataset_id","in":"path"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/datasets/{dataset_id}/tree/{level}/{var_name}/{chunk}":{"get":{"tags":["datatree"],"summary":"Get Variable Chunk","operationId":"get_variable_chunk_datasets__dataset_id__tree__level___var_name___chunk__get","parameters":[{"required":true,"schema":{"title":"Level","type":"integer"},"name":"level","in":"path"},{"required":true,"schema":{"title":"Var Name","type":"string"},"name":"var_name","in":"path"},{"required":true,"schema":{"title":"Chunk","type":"string"},"name":"chunk","in":"path"},{"required":true,"schema":{"title":"Dataset Id","type":"string"},"name":"dataset_id","in":"path"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/datasets/{dataset_id}/image/":{"get":{"tags":["image"],"summary":"Get Image","operationId":"get_image_datasets__dataset_id__image__get","parameters":[{"required":true,"schema":{"title":"Dataset Id","type":"string"},"name":"dataset_id","in":"path"},{"required":true,"schema":{"title":"Bbox","type":"string"},"name":"bbox","in":"query"},{"required":true,"schema":{"title":"Width","type":"integer"},"name":"width","in":"query"},{"required":true,"schema":{"title":"Height","type":"integer"},"name":"height","in":"query"},{"required":true,"schema":{"title":"Parameter","type":"string"},"name":"parameter","in":"query"},{"required":true,"schema":{"title":"Datetime","type":"string"},"name":"datetime","in":"query"},{"required":false,"schema":{"title":"Crs","type":"string"},"name":"crs","in":"query"},{"required":false,"schema":{"title":"Cmap","type":"string"},"name":"cmap","in":"query"}],"responses":{"200":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/datasets/{dataset_id}/image/tile/{parameter}/{t}/{z}/{x}/{y}":{"get":{"tags":["image"],"summary":"Get Image Tile","operationId":"get_image_tile_datasets__dataset_id__image_tile__parameter___t___z___x___y__get","parameters":[{"required":true,"schema":{"title":"Parameter","type":"string"},"name":"parameter","in":"path"},{"required":true,"schema":{"title":"T","type":"string"},"name":"t","in":"path"},{"required":true,"schema":{"title":"Z","type":"integer"},"name":"z","in":"path"},{"required":true,"schema":{"title":"X","type":"integer"},"name":"x","in":"path"},{"required":true,"schema":{"title":"Y","type":"integer"},"name":"y","in":"path"},{"required":true,"schema":{"title":"Dataset Id","type":"string"},"name":"dataset_id","in":"path"},{"required":false,"schema":{"title":"Size","type":"integer","default":256},"name":"size","in":"query"}],"responses":{"200":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/datasets/{dataset_id}/zarr/.zmetadata":{"get":{"tags":["zarr"],"summary":"Get Zmetadata","operationId":"get_zmetadata_datasets__dataset_id__zarr__zmetadata_get","parameters":[{"required":true,"schema":{"title":"Dataset Id","type":"string"},"name":"dataset_id","in":"path"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/datasets/{dataset_id}/zarr/.zgroup":{"get":{"tags":["zarr"],"summary":"Get Zgroup","operationId":"get_zgroup_datasets__dataset_id__zarr__zgroup_get","parameters":[{"required":true,"schema":{"title":"Dataset Id","type":"string"},"name":"dataset_id","in":"path"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/datasets/{dataset_id}/zarr/.zattrs":{"get":{"tags":["zarr"],"summary":"Get Zattrs","operationId":"get_zattrs_datasets__dataset_id__zarr__zattrs_get","parameters":[{"required":true,"schema":{"title":"Dataset Id","type":"string"},"name":"dataset_id","in":"path"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/datasets/{dataset_id}/zarr/{var}/{chunk}":{"get":{"tags":["zarr"],"summary":"Get Variable Chunk","description":"Get a zarr array chunk.\n\nThis will return cached responses when available.","operationId":"get_variable_chunk_datasets__dataset_id__zarr__var___chunk__get","parameters":[{"required":true,"schema":{"title":"Var","type":"string"},"name":"var","in":"path"},{"required":true,"schema":{"title":"Chunk","type":"string"},"name":"chunk","in":"path"},{"required":true,"schema":{"title":"Dataset Id","type":"string"},"name":"dataset_id","in":"path"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}}},"components":{"schemas":{"HTTPValidationError":{"title":"HTTPValidationError","type":"object","properties":{"detail":{"title":"Detail","type":"array","items":{"$ref":"#/components/schemas/ValidationError"}}}},"ValidationError":{"title":"ValidationError","required":["loc","msg","type"],"type":"object","properties":{"loc":{"title":"Location","type":"array","items":{"anyOf":[{"type":"string"},{"type":"integer"}]}},"msg":{"title":"Message","type":"string"},"type":{"title":"Error Type","type":"string"}}}}},"tags":[{"name":"info"},{"name":"edr","description":"\nOGC Environmental Data Retrieval API\n\nCurrently the position query is supported, which takes a single Well Known Text point.\n","externalDocs":{"description":"OGC EDR Reference","url":"https://ogcapi.ogc.org/edr/"}},{"name":"image","description":"WMS-like image generation"},{"name":"datatree","description":"\nDynamic generation of Zarr ndpyramid/Datatree for access from webmaps.\n\n- [carbonplan/maps](https://carbonplan.org/blog/maps-library-release)\n- [xpublish#92](https://github.com/xarray-contrib/xpublish/issues/92)\n"},{"name":"zarr","description":"\nZarr access to NetCDF datasets.\n\nLoad by using an fsspec mapper\n\n```python\nmapper = fsspec.get_mapper(\"/datasets/{dataset_id}/zarr/\")\nds = xr.open_zarr(mapper, consolidated=True)\n```\n"}]}


--------------------------------------------------------------------------------
/xpublish/wms_router.py:
--------------------------------------------------------------------------------
  1 | from cmath import isnan
  2 | import io
  3 | import logging
  4 | import xml.etree.ElementTree as ET
  5 | 
  6 | import numpy as np
  7 | import xarray as xr
  8 | from fastapi import APIRouter, Depends, HTTPException, Request, Response
  9 | from xpublish.dependencies import get_dataset
 10 | from rasterio.enums import Resampling
 11 | from rasterio.transform import Affine
 12 | from rasterio.warp import calculate_default_transform
 13 | from PIL import Image
 14 | from matplotlib import cm, colorbar
 15 | import matplotlib.pyplot as plt
 16 | 
 17 | 
 18 | # These will show as unused to the linter but they are necessary
 19 | import cf_xarray
 20 | import rioxarray
 21 | 
 22 | 
 23 | logger = logging.getLogger("api")
 24 | 
 25 | wms_router = APIRouter()
 26 | 
 27 | 
 28 | styles = [
 29 |     {
 30 |         'name': 'raster/default',
 31 |         'title': 'Raster',
 32 |         'abstract': 'The default raster styling, scaled to the given range. The palette can be overriden by replacing default with a matplotlib colormap name'
 33 |     }
 34 | ]
 35 | 
 36 | 
 37 | def lower_case_keys(d: dict) -> dict:
 38 |     return dict((k.lower(), v) for k,v in d.items())
 39 | 
 40 | 
 41 | def format_timestamp(value):
 42 |     return str(value.dt.strftime(date_format='%Y-%m-%dT%H:%M:%S').values)
 43 | 
 44 | 
 45 | def strip_float(value): 
 46 |     return float(value.values)
 47 | 
 48 | 
 49 | def round_float_values(v: list) -> list:
 50 |     return [round(x, 5) for x in v]
 51 | 
 52 | 
 53 | def create_text_element(root, name: str, text: str):
 54 |     element = ET.SubElement(root, name)
 55 |     element.text = text
 56 |     return element
 57 | 
 58 | 
 59 | def create_capability_element(root, name: str, url: str, formats: list[str]):
 60 |     cap = ET.SubElement(root, name)
 61 |     # TODO: Add more image formats
 62 |     for fmt in formats:
 63 |         create_text_element(cap, 'Format', fmt)
 64 | 
 65 |     dcp_type = ET.SubElement(cap, 'DCPType')
 66 |     http = ET.SubElement(dcp_type, 'HTTP')
 67 |     get = ET.SubElement(http, 'Get')
 68 |     get.append(ET.Element('OnlineResource', attrib={
 69 |                'xlink:type': 'simple', 'xlink:href': url}))
 70 |     return cap
 71 | 
 72 | 
 73 | def get_capabilities(dataset: xr.Dataset, request: Request):
 74 |     """
 75 |     Return the WMS capabilities for the dataset
 76 |     """
 77 |     wms_url = f'{request.base_url}{request.url.path.removeprefix("/")}'
 78 | 
 79 |     root = ET.Element('WMS_Capabilities', version='1.3.0', attrib={
 80 |                       'xmlns': 'http://www.opengis.net/wms', 'xmlns:xlink': 'http://www.w3.org/1999/xlink'})
 81 | 
 82 |     service = ET.SubElement(root, 'Service')
 83 |     create_text_element(service, 'Name', 'WMS')
 84 |     create_text_element(service, 'Title', 'IOOS XPublish WMS')
 85 |     create_text_element(service, 'Abstract', 'IOOS XPublish WMS')
 86 |     service.append(ET.Element('KeywordList'))
 87 |     service.append(ET.Element('OnlineResource', attrib={
 88 |                    'xlink:type': 'simple', 'xlink:href': 'http://www.opengis.net/spec/wms_schema_1/1.3.0'}))
 89 | 
 90 |     capability = ET.SubElement(root, 'Capability')
 91 |     request_tag = ET.SubElement(capability, 'Request')
 92 | 
 93 |     get_capabilities = create_capability_element(
 94 |         request_tag, 'GetCapabilities', wms_url, ['text/xml'])
 95 |     # TODO: Add more image formats
 96 |     get_map = create_capability_element(
 97 |         request_tag, 'GetMap', wms_url, ['image/png'])
 98 |     # TODO: Add more feature info formats
 99 |     get_feature_info = create_capability_element(
100 |         request_tag, 'GetFeatureInfo', wms_url, ['text/json'])
101 |     # TODO: Add more image formats
102 |     get_legend_graphic = create_capability_element(
103 |         request_tag, 'GetLegendGraphic', wms_url, ['image/png'])
104 | 
105 |     exeption_tag = ET.SubElement(capability, 'Exception')
106 |     exception_format = ET.SubElement(exeption_tag, 'Format')
107 |     exception_format.text = 'text/json'
108 | 
109 |     layer_tag = ET.SubElement(capability, 'Layer')
110 |     create_text_element(layer_tag, 'Title',
111 |                         dataset.attrs.get('title', 'Untitled'))
112 |     create_text_element(layer_tag, 'Description',
113 |                         dataset.attrs.get('description', 'No Description'))
114 |     create_text_element(layer_tag, 'CRS', 'EPSG:4326')
115 |     create_text_element(layer_tag, 'CRS', 'EPSG:3857')
116 |     create_text_element(layer_tag, 'CRS', 'CRS:84')
117 | 
118 |     for var in dataset.data_vars:
119 |         da = dataset[var]
120 |         attrs = da.cf.attrs
121 |         layer = ET.SubElement(layer_tag, 'Layer', attrib={'queryable': '1'})
122 |         create_text_element(layer, 'Name', var)
123 |         create_text_element(layer, 'Title', attrs['long_name'])
124 |         create_text_element(layer, 'Abstract', attrs['long_name'])
125 |         create_text_element(layer, 'CRS', 'EPSG:4326')
126 |         create_text_element(layer, 'CRS', 'EPSG:3857')
127 |         create_text_element(layer, 'CRS', 'CRS:84')
128 | 
129 |         create_text_element(layer, 'Units', attrs.get('units', ''))
130 | 
131 |         # Not sure if this can be copied, its possible variables have different extents within
132 |         # a given dataset probably
133 |         bounding_box_element = ET.SubElement(layer, 'BoundingBox', attrib={
134 |             'CRS': 'EPSG:4326',
135 |             'minx': f'{da["longitude"].min().item()}',
136 |             'miny': f'{da["latitude"].min().item()}',
137 |             'maxx': f'{da["longitude"].max().item()}',
138 |             'maxy': f'{da["latitude"].max().item()}'
139 |         })
140 | 
141 |         time_dimension_element = ET.SubElement(layer, 'Dimension', attrib={
142 |             'name': 'time',
143 |             'units': 'ISO8601',
144 |             'default': format_timestamp(da.cf['time'].min()),
145 |         })
146 |         # TODO: Add ISO duration specifier
147 |         time_dimension_element.text = f"{format_timestamp(da.cf['time'].min())}/{format_timestamp(da.cf['time'].max())}"
148 | 
149 |         style_tag = ET.SubElement(layer, 'Style')
150 | 
151 |         for style in styles:
152 |             style_element = ET.SubElement(
153 |                 style_tag, 'Style', attrib={'name': style['name']})
154 |             create_text_element(style_element, 'Title', style['title'])
155 |             create_text_element(style_element, 'Abstract', style['abstract'])
156 | 
157 |             legend_url = f'{wms_url}?service=WMS&request=GetLegendGraphic&format=image/png&width=20&height=20&layers={var}&styles={style["name"]}'
158 |             create_text_element(style_element, 'LegendURL', legend_url)
159 | 
160 |     ET.indent(root, space="\t", level=0)
161 |     return Response(ET.tostring(root).decode('utf-8'), media_type='text/xml')
162 | 
163 | 
164 | def get_map(dataset: xr.Dataset, query: dict):
165 |     """
166 |     Return the WMS map for the dataset and given parameters
167 |     """
168 |     if not dataset.rio.crs:
169 |         dataset = dataset.rio.write_crs(4326)
170 | 
171 |     ds = dataset.squeeze()
172 |     bbox = [float(x) for x in query['bbox'].split(',')]
173 |     width = int(query['width'])
174 |     height = int(query['height'])
175 |     crs = query.get('crs', None) or query.get('srs')
176 |     parameter = query['layers']
177 |     t = query.get('time')
178 |     colorscalerange = [float(x) for x in query['colorscalerange'].split(',')]
179 |     autoscale = query.get('autoscale', 'false') != 'false'
180 |     style = query['styles']
181 |     stylename, palettename = style.split('/')
182 | 
183 |     x_tile_size = bbox[2] - bbox[0]
184 |     y_tile_size = bbox[3] - bbox[1]
185 |     x_resolution = x_tile_size / float(width)
186 |     y_resolution = y_tile_size / float(height)
187 | 
188 |     # TODO: Calculate the transform
189 |     transform = Affine.translation(
190 |         bbox[0], bbox[3]) * Affine.scale(x_resolution, -y_resolution)
191 | 
192 |     resampled_data = ds[parameter].rio.reproject(
193 |         crs,
194 |         shape=(width, height),
195 |         resampling=Resampling.bilinear,
196 |         transform=transform,
197 |     )
198 | 
199 |     # This is an image, so only use the timestep that was requested
200 |     resampled_data = resampled_data.cf.sel({'T': t}).squeeze()
201 | 
202 |     # if the user has supplied a color range, use it. Otherwise autoscale
203 |     if autoscale:
204 |         min_value = float(ds[parameter].min())
205 |         max_value = float(ds[parameter].max())
206 |     else:
207 |         min_value = colorscalerange[0]
208 |         max_value = colorscalerange[1]
209 | 
210 |     ds_scaled = (resampled_data - min_value) / (max_value - min_value)
211 | 
212 |     # Let user pick cm from here https://predictablynoisy.com/matplotlib/gallery/color/colormap_reference.html#sphx-glr-gallery-color-colormap-reference-py
213 |     # Otherwise default to rainbow
214 |     if palettename == 'default':
215 |         palettename = 'rainbow'
216 |     im = Image.fromarray(np.uint8(cm.get_cmap(palettename)(ds_scaled)*255))
217 | 
218 |     image_bytes = io.BytesIO()
219 |     im.save(image_bytes, format='PNG')
220 |     image_bytes = image_bytes.getvalue()
221 | 
222 |     return Response(content=image_bytes, media_type='image/png')
223 | 
224 | 
225 | def get_feature_info(dataset: xr.Dataset, query: dict):
226 |     """
227 |     Return the WMS feature info for the dataset and given parameters
228 |     """
229 |     if not dataset.rio.crs:
230 |         dataset = dataset.rio.write_crs(4326)
231 | 
232 |     ds = dataset.squeeze()
233 | 
234 |     parameters = query['query_layers'].split(',')
235 |     times = [t.replace('Z', '') for t in query['time'].split('/')]
236 |     crs = query.get('crs', None) or query.get('srs')
237 |     bbox = [float(x) for x in query['bbox'].split(',')]
238 |     width = int(query['width'])
239 |     height = int(query['height'])
240 |     x = int(query['x'])
241 |     y = int(query['y'])
242 |     format = query['info_format']
243 | 
244 |     x_tile_size = bbox[2] - bbox[0]
245 |     y_tile_size = bbox[3] - bbox[1]
246 |     x_resolution = x_tile_size / float(width)
247 |     y_resolution = y_tile_size / float(height)
248 | 
249 |     # TODO: Calculate the transform
250 |     transform = Affine.translation(
251 |         bbox[0], bbox[3]) * Affine.scale(x_resolution, -y_resolution)
252 | 
253 |     if len(times) == 1:
254 |         ds = ds.cf.sel({'T': times[0]}).squeeze()
255 |     elif len(times) > 1: 
256 |         ds = ds.cf.sel({'T': slice(times[0], times[1])}).squeeze()
257 |     else: 
258 |         raise HTTPException(500, f"Invalid time requested: {times}")
259 |         
260 |     resampled_data = ds.rio.reproject(
261 |         crs,
262 |         shape=(width, height),
263 |         resampling=Resampling.nearest,
264 |         transform=transform,
265 |     )
266 | 
267 |     t_axis = [format_timestamp(t) for t in resampled_data.cf['T']]
268 |     x_axis = [strip_float(resampled_data.cf['X'][x])]
269 |     y_axis = [strip_float(resampled_data.cf['Y'][y])]
270 | 
271 |     parameter_info = {}
272 |     ranges = {}
273 | 
274 |     for parameter in parameters:
275 |         parameter_info[parameter] = {
276 |             'type': 'Parameter', 
277 |             'description': {
278 |                 'en': ds[parameter].cf.attrs['long_name'],
279 |             },
280 |             'observedProperty': {
281 |                 'label': {
282 |                     'en': ds[parameter].cf.attrs['long_name'],
283 |                 }, 
284 |                 'id': ds[parameter].cf.attrs['standard_name'],
285 |             }
286 |         }
287 | 
288 |         ranges[parameter] = {
289 |             'type': 'NdArray',
290 |             'dataType': 'float',
291 |             # TODO: Some fields might not have a time field? 
292 |             'axisNames': ['t', 'x', 'y'],
293 |             'shape': [len(t_axis), len(x_axis), len(y_axis)],
294 |             'values': round_float_values(resampled_data[parameter].cf.sel({'X': x_axis, 'Y': y_axis}).squeeze().values.tolist()),
295 |         }
296 | 
297 |     return {
298 |         'type': 'Coverage',
299 |         'title': {
300 |             'en': 'Extracted Profile Feature',
301 |         },
302 |         'domain': {
303 |             'type': 'Domain',
304 |             'domainType': 'PointSeries',
305 |             'axes': {
306 |                 't': {
307 |                     'values': t_axis
308 |                 },
309 |                 'x': {
310 |                     'values': x_axis
311 |                 },
312 |                 'y': {
313 |                     'values': y_axis
314 |                 }
315 |             },
316 |             'referencing': [
317 |                 {
318 |                     'coordinates': ['t'], 
319 |                     'system': {
320 |                         'type': 'TemporalRS', 
321 |                         'calendar': 'gregorian',
322 |                     }
323 |                 },
324 |                 {
325 |                     'coordinates': ['x', 'y'],
326 |                     'system': {
327 |                         'type': 'GeographicCRS',
328 |                         'id': crs,
329 |                     }
330 |                 }
331 |             ],
332 |         },
333 |         'parameters': parameter_info,
334 |         'ranges': ranges
335 |     }
336 | 
337 | 
338 | def get_legend_info(dataset: xr.Dataset, query: dict):
339 |     """
340 |     Return the WMS legend graphic for the dataset and given parameters
341 |     """
342 |     parameter = query['layers']
343 |     width: int = int(query['width'])
344 |     height: int = int(query['height'])
345 |     vertical = query.get('vertical', 'false') == 'true'
346 |     colorbaronly = query.get('colorbaronly', 'False') == 'True'
347 |     colorscalerange = [float(x) for x in query.get('colorscalerange', 'nan,nan').split(',')]
348 |     if isnan(colorscalerange[0]):
349 |         autoscale = True
350 |     else: 
351 |         autoscale = query.get('autoscale', 'false') != 'false'
352 |     style = query['styles']
353 |     stylename, palettename = style.split('/')
354 | 
355 |     ds = dataset.squeeze()
356 | 
357 |     # if the user has supplied a color range, use it. Otherwise autoscale
358 |     if autoscale:
359 |         min_value = float(ds[parameter].min())
360 |         max_value = float(ds[parameter].max())
361 |     else:
362 |         min_value = colorscalerange[0]
363 |         max_value = colorscalerange[1]
364 | 
365 |     scaled = (np.linspace(min_value, max_value, width) - min_value) / (max_value - min_value)
366 |     data = np.ones((height, width)) * scaled
367 | 
368 |     if vertical:
369 |         data = np.flipud(data.T)
370 |         data = data.reshape((height, width))
371 | 
372 |     # Let user pick cm from here https://predictablynoisy.com/matplotlib/gallery/color/colormap_reference.html#sphx-glr-gallery-color-colormap-reference-py
373 |     # Otherwise default to rainbow
374 |     if palettename == 'default':
375 |         palettename = 'rainbow'
376 |     im = Image.fromarray(np.uint8(cm.get_cmap(palettename)(data)*255))
377 | 
378 |     image_bytes = io.BytesIO()
379 |     im.save(image_bytes, format='PNG')
380 |     image_bytes = image_bytes.getvalue()
381 | 
382 |     return Response(content=image_bytes, media_type='image/png')
383 | 
384 | 
385 | @wms_router.get('/')
386 | def wms_root(request: Request, dataset: xr.Dataset = Depends(get_dataset)):
387 |     query_params = lower_case_keys(request.query_params)
388 |     method = query_params['request']
389 |     if method == 'GetCapabilities':
390 |         return get_capabilities(dataset, request)
391 |     elif method == 'GetMap':
392 |         return get_map(dataset, query_params)
393 |     elif method == 'GetFeatureInfo' or method == 'GetTimeseries':
394 |         return get_feature_info(dataset, query_params)
395 |     elif method == 'GetLegendGraphic':
396 |         return get_legend_info(dataset, query_params)
397 |     else:
398 |         raise HTTPException(
399 |             status_code=404, detail=f"{method} is not a valid option for REQUEST")
400 | 


--------------------------------------------------------------------------------
/xpublish_routers/EDR.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "c961fa90-b1b8-4626-8b0d-2206aec0b480",
  6 |    "metadata": {
  7 |     "tags": []
  8 |    },
  9 |    "source": [
 10 |     "# EDR Router\n",
 11 |     "\n",
 12 |     "The OGC [Environmental Data Retrieval](https://ogcapi.ogc.org/edr/) API is designed to be a common web mapping focused method for querying data.\n",
 13 |     "\n",
 14 |     "In `restful-grids/xpublish/edr_router.py`, we've implemented `edr_router` which currently provides an EDR position endpoint. This endpoint is especially useful for querying a time series from a gridded dataset.\n",
 15 |     "\n",
 16 |     "The default response for EDR endpoints is [CoverageJSON](https://covjson.org/), but we've also decided to support NetCDF responses if `f=nc` is added to the query parameters, or CSV with `f=csv`."
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "id": "d482b0f5-2db9-46ad-ab6c-9d71deb80e55",
 22 |    "metadata": {},
 23 |    "source": [
 24 |     "## Setting up the router\n",
 25 |     "\n",
 26 |     "The edr_router expects datasets that have [CF conventions](http://cfconventions.org/) attributes that [cf-xarray](https://cf-xarray.readthedocs.io/) can read. Specifically it's looking for attributes that the `ds.cf.axes` can find `X` and `Y` axes (`Z` and `T` will also be used if found).\n",
 27 |     "\n",
 28 |     "If a dataset doesn't have full CF attributes, you can set them with `ds[X_COORD].attrs[\"axis\"] = \"X\"` and similar for the other axes.\n",
 29 |     "\n",
 30 |     "Then you can import and include `edr_router` when instantiating `xpublish.Rest` or subclass. We suggest including a prefix for routers to avoid conflicts, similar to:\n",
 31 |     "\n",
 32 |     "```py\n",
 33 |     "rest = xpublish.Rest(\n",
 34 |     "    DATASETS_DICT,\n",
 35 |     "    routers=[\n",
 36 |     "        (base_router, {\"tags\": [\"info\"]}),\n",
 37 |     "        (edr_router, {\"tags\": [\"edr\"], \"prefix\": \"/edr\"}),\n",
 38 |     "        (zarr_router, {\"tags\": [\"zarr\"], \"prefix\": \"/zarr\"}),\n",
 39 |     "    ]\n",
 40 |     ")\n",
 41 |     "```\n",
 42 |     "\n",
 43 |     "At this point you will get an EDR endpoint at `/datasets/{dataset_id}/edr/position` (or `/edr/position` if you only have a single dataset)."
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "id": "9c595748-a5ac-43dc-90c6-31885f052627",
 49 |    "metadata": {},
 50 |    "source": [
 51 |     "## Making a request\n",
 52 |     "\n",
 53 |     "````{margin}\n",
 54 |     "```{admonition} A Note on WKT\n",
 55 |     "\n",
 56 |     "Well Known Text uses X Y coordinate order, or long lat for those of us who are dyslexic.\n",
 57 |     "\n",
 58 |     "So `POINT(-69.35 43.72)` gives you a point off in the Gulf of Maine where NERACOOS's buoy N should be.\n",
 59 |     "\n",
 60 |     "```\n",
 61 |     "````\n",
 62 |     "\n",
 63 |     "The minimum path and query params you need for a request is a `dataset_id` (see `/datasets/`) and a [Well Known Text point](https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Geometric_objects) for the `coords` query string.\n",
 64 |     "\n",
 65 |     "The endpoint will try to find the nearest values to the point."
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "markdown",
 70 |    "id": "cbcebc9c-4133-4772-b0c2-0f88e0db5882",
 71 |    "metadata": {},
 72 |    "source": [
 73 |     "### `parameter-name`\n",
 74 |     "\n",
 75 |     "We are also going to add a `parameter-name` to keep this somewhat reasonable, though that is not necessary and the endpoint will respond with all variables.\n",
 76 |     "\n",
 77 |     "Multiple parameters (variables) can also be given by comma seperating them, and due to the magic of cf-xarray, [CF standard names can also be used](https://cf-xarray.readthedocs.io/en/latest/selecting.html#by-standard-name) and will return the associated variables. (in this case `&parameter-name=sea_surface_wave_significant_height` would return the `hs` variable."
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 1,
 83 |    "id": "3e9b7c32-adeb-4fba-b60c-b10e6eaf24cb",
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "import requests"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "markdown",
 92 |    "id": "b9a021e2-69e3-4f9d-afb1-196184cefd04",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "```{margin}\n",
 96 |     "No commas between the strings means they will be recombined. This makes it a little easier to read when URLs get long.\n",
 97 |     "```"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 15,
103 |    "id": "ead99a1e-375e-4f94-b333-ab1b1556984d",
104 |    "metadata": {},
105 |    "outputs": [
106 |     {
107 |      "data": {
108 |       "text/plain": [
109 |        "{'type': 'Coverage',\n",
110 |        " 'domain': {'type': 'Domain',\n",
111 |        "  'domainType': 'Grid',\n",
112 |        "  'axes': {'t': {'values': ['2022-04-11T12:00:00',\n",
113 |        "     '2022-04-11T12:59:59',\n",
114 |        "     '2022-04-11T14:00:00',\n",
115 |        "     '2022-04-11T15:00:00',\n",
116 |        "     '2022-04-11T15:59:59',\n",
117 |        "     '2022-04-11T17:00:00',\n",
118 |        "     '2022-04-11T18:00:00',\n",
119 |        "     '2022-04-11T18:59:59',\n",
120 |        "     '2022-04-11T20:00:00',\n",
121 |        "     '2022-04-11T21:00:00',\n",
122 |        "     '2022-04-11T21:59:59',\n",
123 |        "     '2022-04-11T23:00:00',\n",
124 |        "     '2022-04-12T00:00:00',\n",
125 |        "     '2022-04-12T00:59:59',\n",
126 |        "     '2022-04-12T02:00:00',\n",
127 |        "     '2022-04-12T03:00:00',\n",
128 |        "     '2022-04-12T03:59:59',\n",
129 |        "     '2022-04-12T05:00:00',\n",
130 |        "     '2022-04-12T06:00:00',\n",
131 |        "     '2022-04-12T06:59:59',\n",
132 |        "     '2022-04-12T08:00:00',\n",
133 |        "     '2022-04-12T09:00:00',\n",
134 |        "     '2022-04-12T09:59:59',\n",
135 |        "     '2022-04-12T11:00:00',\n",
136 |        "     '2022-04-12T12:00:00',\n",
137 |        "     '2022-04-12T12:59:59',\n",
138 |        "     '2022-04-12T14:00:00',\n",
139 |        "     '2022-04-12T15:00:00',\n",
140 |        "     '2022-04-12T15:59:59',\n",
141 |        "     '2022-04-12T17:00:00',\n",
142 |        "     '2022-04-12T18:00:00',\n",
143 |        "     '2022-04-12T18:59:59',\n",
144 |        "     '2022-04-12T20:00:00',\n",
145 |        "     '2022-04-12T21:00:00',\n",
146 |        "     '2022-04-12T21:59:59',\n",
147 |        "     '2022-04-12T23:00:00',\n",
148 |        "     '2022-04-13T00:00:00',\n",
149 |        "     '2022-04-13T00:59:59',\n",
150 |        "     '2022-04-13T02:00:00',\n",
151 |        "     '2022-04-13T03:00:00',\n",
152 |        "     '2022-04-13T03:59:59',\n",
153 |        "     '2022-04-13T05:00:00',\n",
154 |        "     '2022-04-13T06:00:00',\n",
155 |        "     '2022-04-13T06:59:59',\n",
156 |        "     '2022-04-13T08:00:00',\n",
157 |        "     '2022-04-13T09:00:00',\n",
158 |        "     '2022-04-13T09:59:59',\n",
159 |        "     '2022-04-13T11:00:00',\n",
160 |        "     '2022-04-13T12:00:00',\n",
161 |        "     '2022-04-13T12:59:59',\n",
162 |        "     '2022-04-13T14:00:00',\n",
163 |        "     '2022-04-13T15:00:00',\n",
164 |        "     '2022-04-13T15:59:59',\n",
165 |        "     '2022-04-13T17:00:00',\n",
166 |        "     '2022-04-13T18:00:00',\n",
167 |        "     '2022-04-13T18:59:59',\n",
168 |        "     '2022-04-13T20:00:00',\n",
169 |        "     '2022-04-13T21:00:00',\n",
170 |        "     '2022-04-13T21:59:59',\n",
171 |        "     '2022-04-13T23:00:00',\n",
172 |        "     '2022-04-14T00:00:00',\n",
173 |        "     '2022-04-14T00:59:59',\n",
174 |        "     '2022-04-14T02:00:00',\n",
175 |        "     '2022-04-14T03:00:00',\n",
176 |        "     '2022-04-14T03:59:59',\n",
177 |        "     '2022-04-14T05:00:00',\n",
178 |        "     '2022-04-14T06:00:00',\n",
179 |        "     '2022-04-14T06:59:59',\n",
180 |        "     '2022-04-14T08:00:00',\n",
181 |        "     '2022-04-14T09:00:00',\n",
182 |        "     '2022-04-14T09:59:59',\n",
183 |        "     '2022-04-14T11:00:00',\n",
184 |        "     '2022-04-14T12:00:00']},\n",
185 |        "   'forecast_reference_time': {'values': ['2022-04-11T12:00:00']}},\n",
186 |        "  'referencing': []},\n",
187 |        " 'parameters': {'hs': {'type': 'Parameter',\n",
188 |        "   'observedProperty': {'label': {'en': 'significant height of wind and swell waves'}},\n",
189 |        "   'description': {'en': 'significant height of wind and swell waves'},\n",
190 |        "   'unit': {'label': {'en': 'm'}}}},\n",
191 |        " 'ranges': {'hs': {'type': 'NdArray',\n",
192 |        "   'dataType': 'float',\n",
193 |        "   'axisNames': ['forecast_reference_time', 't'],\n",
194 |        "   'shape': [1, 73],\n",
195 |        "   'values': [0.33467215299606323,\n",
196 |        "    0.3588910698890686,\n",
197 |        "    0.3660368025302887,\n",
198 |        "    0.3152061402797699,\n",
199 |        "    0.2875429093837738,\n",
200 |        "    0.33364781737327576,\n",
201 |        "    0.42414912581443787,\n",
202 |        "    0.5218766927719116,\n",
203 |        "    0.599566638469696,\n",
204 |        "    0.6628382802009583,\n",
205 |        "    0.6959347724914551,\n",
206 |        "    0.7017455697059631,\n",
207 |        "    0.6900897026062012,\n",
208 |        "    0.6990023255348206,\n",
209 |        "    0.7459676861763,\n",
210 |        "    0.8135576248168945,\n",
211 |        "    0.8708090782165527,\n",
212 |        "    0.9190717339515686,\n",
213 |        "    0.9822579026222229,\n",
214 |        "    1.0730650424957275,\n",
215 |        "    1.1682802438735962,\n",
216 |        "    1.2368590831756592,\n",
217 |        "    1.2590762376785278,\n",
218 |        "    1.2461904287338257,\n",
219 |        "    1.2177737951278687,\n",
220 |        "    1.190627098083496,\n",
221 |        "    1.1743522882461548,\n",
222 |        "    1.1686142683029175,\n",
223 |        "    1.168257474899292,\n",
224 |        "    1.1705492734909058,\n",
225 |        "    1.1713541746139526,\n",
226 |        "    1.1505155563354492,\n",
227 |        "    1.1002039909362793,\n",
228 |        "    1.029807448387146,\n",
229 |        "    0.9527088403701782,\n",
230 |        "    0.8763468265533447,\n",
231 |        "    0.8059961199760437,\n",
232 |        "    0.7473487257957458,\n",
233 |        "    0.6959123611450195,\n",
234 |        "    0.6488614678382874,\n",
235 |        "    0.6027891635894775,\n",
236 |        "    0.5554247498512268,\n",
237 |        "    0.5091127157211304,\n",
238 |        "    0.4687694013118744,\n",
239 |        "    0.4349559545516968,\n",
240 |        "    0.40602195262908936,\n",
241 |        "    0.3779057264328003,\n",
242 |        "    0.3484857380390167,\n",
243 |        "    0.3213227689266205,\n",
244 |        "    0.30005601048469543,\n",
245 |        "    0.2922517955303192,\n",
246 |        "    0.3058054745197296,\n",
247 |        "    0.34318259358406067,\n",
248 |        "    0.39665448665618896,\n",
249 |        "    0.4514908790588379,\n",
250 |        "    0.4962618947029114,\n",
251 |        "    0.5274868011474609,\n",
252 |        "    0.5485127568244934,\n",
253 |        "    0.5546026825904846,\n",
254 |        "    0.5439878106117249,\n",
255 |        "    0.5306615829467773,\n",
256 |        "    0.521487832069397,\n",
257 |        "    0.5167329907417297,\n",
258 |        "    0.513405442237854,\n",
259 |        "    0.5168517827987671,\n",
260 |        "    0.531062662601471,\n",
261 |        "    0.5381449460983276,\n",
262 |        "    0.5489262938499451,\n",
263 |        "    0.570189356803894,\n",
264 |        "    0.6079721450805664,\n",
265 |        "    0.6753485798835754,\n",
266 |        "    0.7782320976257324,\n",
267 |        "    0.9024170637130737]}}}"
268 |       ]
269 |      },
270 |      "execution_count": 15,
271 |      "metadata": {},
272 |      "output_type": "execute_result"
273 |     }
274 |    ],
275 |    "source": [
276 |     "r = requests.get(\n",
277 |     "    \"http://0.0.0.0:9005/datasets/ww3/edr/position\"\n",
278 |     "    \"?coords=POINT(-69.35 43.72)\"\n",
279 |     "    \"&parameter-name=sea_surface_wave_significant_height\"\n",
280 |     ")\n",
281 |     "r.json()"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "markdown",
286 |    "id": "fbbc52c0-e622-4aae-ae37-46896a0a62ec",
287 |    "metadata": {},
288 |    "source": [
289 |     "### `datetime`\n",
290 |     "\n",
291 |     "The next query param of interest to most users will be datetime. This will take either a single datetime and a range as [ISO formatted string](https://en.wikipedia.org/wiki/ISO_8601). To use a range, put a slash between the two times.\n",
292 |     "\n",
293 |     "```{admonition} The trouble with timezones\n",
294 |     ":class: warning\n",
295 |     "\n",
296 |     "The date format needs to match if the dataset is timezone aware, or not.\n",
297 |     "\n",
298 |     "```\n",
299 |     "\n",
300 |     "So we can add `&datetime=2022-04-11T12:00:00/2022-04-11T23:00:00` to our previous query to restrict down the response further."
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "code",
305 |    "execution_count": 13,
306 |    "id": "567d1fa4-eea9-4c44-b155-f4db633b1bad",
307 |    "metadata": {},
308 |    "outputs": [
309 |     {
310 |      "data": {
311 |       "text/plain": [
312 |        "{'type': 'Coverage',\n",
313 |        " 'domain': {'type': 'Domain',\n",
314 |        "  'domainType': 'Grid',\n",
315 |        "  'axes': {'t': {'values': ['2022-04-11T12:00:00',\n",
316 |        "     '2022-04-11T12:59:59',\n",
317 |        "     '2022-04-11T14:00:00',\n",
318 |        "     '2022-04-11T15:00:00',\n",
319 |        "     '2022-04-11T15:59:59',\n",
320 |        "     '2022-04-11T17:00:00',\n",
321 |        "     '2022-04-11T18:00:00',\n",
322 |        "     '2022-04-11T18:59:59',\n",
323 |        "     '2022-04-11T20:00:00',\n",
324 |        "     '2022-04-11T21:00:00',\n",
325 |        "     '2022-04-11T21:59:59',\n",
326 |        "     '2022-04-11T23:00:00']},\n",
327 |        "   'forecast_reference_time': {'values': ['2022-04-11T12:00:00']}},\n",
328 |        "  'referencing': []},\n",
329 |        " 'parameters': {'hs': {'type': 'Parameter',\n",
330 |        "   'observedProperty': {'label': {'en': 'significant height of wind and swell waves'}},\n",
331 |        "   'description': {'en': 'significant height of wind and swell waves'},\n",
332 |        "   'unit': {'label': {'en': 'm'}}}},\n",
333 |        " 'ranges': {'hs': {'type': 'NdArray',\n",
334 |        "   'dataType': 'float',\n",
335 |        "   'axisNames': ['forecast_reference_time', 't'],\n",
336 |        "   'shape': [1, 12],\n",
337 |        "   'values': [0.33467215299606323,\n",
338 |        "    0.3588910698890686,\n",
339 |        "    0.3660368025302887,\n",
340 |        "    0.3152061402797699,\n",
341 |        "    0.2875429093837738,\n",
342 |        "    0.33364781737327576,\n",
343 |        "    0.42414912581443787,\n",
344 |        "    0.5218766927719116,\n",
345 |        "    0.599566638469696,\n",
346 |        "    0.6628382802009583,\n",
347 |        "    0.6959347724914551,\n",
348 |        "    0.7017455697059631]}}}"
349 |       ]
350 |      },
351 |      "execution_count": 13,
352 |      "metadata": {},
353 |      "output_type": "execute_result"
354 |     }
355 |    ],
356 |    "source": [
357 |     "r = requests.get(\n",
358 |     "    \"http://0.0.0.0:9005/datasets/ww3/edr/position\"\n",
359 |     "    \"?coords=POINT(-69.35 43.72)\"\n",
360 |     "    \"&parameter-name=sea_surface_wave_significant_height\"\n",
361 |     "    \"&datetime=2022-04-11T12:00:00/2022-04-11T23:00:00\"\n",
362 |     ")\n",
363 |     "r.json()"
364 |    ]
365 |   },
366 |   {
367 |    "cell_type": "markdown",
368 |    "id": "f7dec3d6-61ad-4187-b2e9-e5416e3bb705",
369 |    "metadata": {},
370 |    "source": [
371 |     "### `f` for format\n",
372 |     "\n",
373 |     "While CoverageJSON is useful for browser based access, other formats can be useful in other contexts. For that the `f` query parameter can be passed.\n",
374 |     "\n",
375 |     "Currently `csv` for CSV files, and `nc` for NetCDF files have been added.\n",
376 |     "\n",
377 |     "````{margin}\n",
378 |     "```{admonition} Future formats\n",
379 |     "\n",
380 |     "Once we build a package to make the EDR router easier to install, \n",
381 |     "it could be interesting to use [entrypoints](https://amir.rachum.com/blog/2017/07/28/python-entry-points/) to support the addition of more formats.\n",
382 |     "\n",
383 |     "```\n",
384 |     "````"
385 |    ]
386 |   },
387 |   {
388 |    "cell_type": "markdown",
389 |    "id": "01e8b750-16dc-4c27-80c2-0d4cc224c2aa",
390 |    "metadata": {},
391 |    "source": [
392 |     "### Extra coordinates\n",
393 |     "\n",
394 |     "If there are extra coordinates they can also be included as query parameters. Similar to the `datetime` query param, `/` is supported for a range to slice on in place of selecting.\n",
395 |     "\n",
396 |     "For this dataset, if we used `&time=2022-04-11T12:00:00/2022-04-11T23:00:00` we would have gotten the same result as the last query."
397 |    ]
398 |   },
399 |   {
400 |    "cell_type": "code",
401 |    "execution_count": null,
402 |    "id": "a2c06284-7f9b-4c37-85b4-27d98629290c",
403 |    "metadata": {},
404 |    "outputs": [],
405 |    "source": [
406 |     "full_url = \"http://0.0.0.0:9005/datasets/ww3/edr/position?coords=POINT(-69.35 43.72)&parameter-name=sea_surface_wave_significant_height,dir,t02&datetime=2022-04-11T12:00:00/2022-04-11T23:00:00&f=csv\""
407 |    ]
408 |   },
409 |   {
410 |    "cell_type": "markdown",
411 |    "id": "54660416-c03f-4577-a3a7-bbfe2ae251e1",
412 |    "metadata": {},
413 |    "source": [
414 |     "## API Reference\n",
415 |     "\n",
416 |     "```{eval-rst}\n",
417 |     ".. openapi:: ./openapi.json\n",
418 |     "    :include:\n",
419 |     "        /datasets/{dataset_id}/edr/*\n",
420 |     "    \n",
421 |     "```"
422 |    ]
423 |   }
424 |  ],
425 |  "metadata": {
426 |   "kernelspec": {
427 |    "display_name": "restful-grids",
428 |    "language": "python",
429 |    "name": "restful-grids"
430 |   },
431 |   "language_info": {
432 |    "codemirror_mode": {
433 |     "name": "ipython",
434 |     "version": 3
435 |    },
436 |    "file_extension": ".py",
437 |    "mimetype": "text/x-python",
438 |    "name": "python",
439 |    "nbconvert_exporter": "python",
440 |    "pygments_lexer": "ipython3",
441 |    "version": "3.10.4"
442 |   }
443 |  },
444 |  "nbformat": 4,
445 |  "nbformat_minor": 5
446 | }
447 | 


--------------------------------------------------------------------------------
/S3 bucket access.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "e799ad23-7448-4fc6-8519-41fcbed97b26",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "import fsspec\n",
 11 |     "import s3fs\n",
 12 |     "import xarray as xr"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 9,
 18 |    "id": "64f71339-89e9-4741-abba-2f896560be72",
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "from fsspec.registry import known_implementations"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 11,
 28 |    "id": "34910a8c-2071-4f94-a520-dd737a4eab03",
 29 |    "metadata": {},
 30 |    "outputs": [
 31 |     {
 32 |      "data": {
 33 |       "text/plain": [
 34 |        "{'file': {'class': 'fsspec.implementations.local.LocalFileSystem'},\n",
 35 |        " 'memory': {'class': 'fsspec.implementations.memory.MemoryFileSystem'},\n",
 36 |        " 'dropbox': {'class': 'dropboxdrivefs.DropboxDriveFileSystem',\n",
 37 |        "  'err': 'DropboxFileSystem requires \"dropboxdrivefs\",\"requests\" and \"dropbox\" to be installed'},\n",
 38 |        " 'http': {'class': 'fsspec.implementations.http.HTTPFileSystem',\n",
 39 |        "  'err': 'HTTPFileSystem requires \"requests\" and \"aiohttp\" to be installed'},\n",
 40 |        " 'https': {'class': 'fsspec.implementations.http.HTTPFileSystem',\n",
 41 |        "  'err': 'HTTPFileSystem requires \"requests\" and \"aiohttp\" to be installed'},\n",
 42 |        " 'zip': {'class': 'fsspec.implementations.zip.ZipFileSystem'},\n",
 43 |        " 'tar': {'class': 'fsspec.implementations.tar.TarFileSystem'},\n",
 44 |        " 'gcs': {'class': 'gcsfs.GCSFileSystem',\n",
 45 |        "  'err': 'Please install gcsfs to access Google Storage'},\n",
 46 |        " 'gs': {'class': 'gcsfs.GCSFileSystem',\n",
 47 |        "  'err': 'Please install gcsfs to access Google Storage'},\n",
 48 |        " 'gdrive': {'class': 'gdrivefs.GoogleDriveFileSystem',\n",
 49 |        "  'err': 'Please install gdrivefs for access to Google Drive'},\n",
 50 |        " 'sftp': {'class': 'fsspec.implementations.sftp.SFTPFileSystem',\n",
 51 |        "  'err': 'SFTPFileSystem requires \"paramiko\" to be installed'},\n",
 52 |        " 'ssh': {'class': 'fsspec.implementations.sftp.SFTPFileSystem',\n",
 53 |        "  'err': 'SFTPFileSystem requires \"paramiko\" to be installed'},\n",
 54 |        " 'ftp': {'class': 'fsspec.implementations.ftp.FTPFileSystem'},\n",
 55 |        " 'hdfs': {'class': 'fsspec.implementations.hdfs.PyArrowHDFS',\n",
 56 |        "  'err': 'pyarrow and local java libraries required for HDFS'},\n",
 57 |        " 'arrow_hdfs': {'class': 'fsspec.implementations.arrow.HadoopFileSystem',\n",
 58 |        "  'err': 'pyarrow and local java libraries required for HDFS'},\n",
 59 |        " 'webhdfs': {'class': 'fsspec.implementations.webhdfs.WebHDFS',\n",
 60 |        "  'err': 'webHDFS access requires \"requests\" to be installed'},\n",
 61 |        " 's3': {'class': 's3fs.S3FileSystem', 'err': 'Install s3fs to access S3'},\n",
 62 |        " 's3a': {'class': 's3fs.S3FileSystem', 'err': 'Install s3fs to access S3'},\n",
 63 |        " 'wandb': {'class': 'wandbfs.WandbFS',\n",
 64 |        "  'err': 'Install wandbfs to access wandb'},\n",
 65 |        " 'oci': {'class': 'ocifs.OCIFileSystem',\n",
 66 |        "  'err': 'Install ocifs to access OCI Object Storage'},\n",
 67 |        " 'adl': {'class': 'adlfs.AzureDatalakeFileSystem',\n",
 68 |        "  'err': 'Install adlfs to access Azure Datalake Gen1'},\n",
 69 |        " 'abfs': {'class': 'adlfs.AzureBlobFileSystem',\n",
 70 |        "  'err': 'Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage'},\n",
 71 |        " 'az': {'class': 'adlfs.AzureBlobFileSystem',\n",
 72 |        "  'err': 'Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage'},\n",
 73 |        " 'cached': {'class': 'fsspec.implementations.cached.CachingFileSystem'},\n",
 74 |        " 'blockcache': {'class': 'fsspec.implementations.cached.CachingFileSystem'},\n",
 75 |        " 'filecache': {'class': 'fsspec.implementations.cached.WholeFileCacheFileSystem'},\n",
 76 |        " 'simplecache': {'class': 'fsspec.implementations.cached.SimpleCacheFileSystem'},\n",
 77 |        " 'dask': {'class': 'fsspec.implementations.dask.DaskWorkerFileSystem',\n",
 78 |        "  'err': 'Install dask distributed to access worker file system'},\n",
 79 |        " 'dbfs': {'class': 'fsspec.implementations.dbfs.DatabricksFileSystem',\n",
 80 |        "  'err': 'Install the requests package to use the DatabricksFileSystem'},\n",
 81 |        " 'github': {'class': 'fsspec.implementations.github.GithubFileSystem',\n",
 82 |        "  'err': 'Install the requests package to use the github FS'},\n",
 83 |        " 'git': {'class': 'fsspec.implementations.git.GitFileSystem',\n",
 84 |        "  'err': 'Install pygit2 to browse local git repos'},\n",
 85 |        " 'smb': {'class': 'fsspec.implementations.smb.SMBFileSystem',\n",
 86 |        "  'err': 'SMB requires \"smbprotocol\" or \"smbprotocol[kerberos]\" installed'},\n",
 87 |        " 'jupyter': {'class': 'fsspec.implementations.jupyter.JupyterFileSystem',\n",
 88 |        "  'err': 'Jupyter FS requires requests to be installed'},\n",
 89 |        " 'jlab': {'class': 'fsspec.implementations.jupyter.JupyterFileSystem',\n",
 90 |        "  'err': 'Jupyter FS requires requests to be installed'},\n",
 91 |        " 'libarchive': {'class': 'fsspec.implementations.libarchive.LibArchiveFileSystem',\n",
 92 |        "  'err': 'LibArchive requires to be installed'},\n",
 93 |        " 'reference': {'class': 'fsspec.implementations.reference.ReferenceFileSystem'}}"
 94 |       ]
 95 |      },
 96 |      "execution_count": 11,
 97 |      "metadata": {},
 98 |      "output_type": "execute_result"
 99 |     }
100 |    ],
101 |    "source": [
102 |     "known_implementations"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": 12,
108 |    "id": "6f14362f-0df4-4cb4-af74-39a126f56f4a",
109 |    "metadata": {},
110 |    "outputs": [
111 |     {
112 |      "data": {
113 |       "text/plain": [
114 |        "<s3fs.core.S3FileSystem at 0x183ba07f0>"
115 |       ]
116 |      },
117 |      "execution_count": 12,
118 |      "metadata": {},
119 |      "output_type": "execute_result"
120 |     }
121 |    ],
122 |    "source": [
123 |     "fs = s3fs.S3FileSystem(anon=True)\n",
124 |     "fs"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 13,
130 |    "id": "ca57afe1-53bc-439f-86cd-d99bf9d675eb",
131 |    "metadata": {},
132 |    "outputs": [
133 |     {
134 |      "ename": "PermissionError",
135 |      "evalue": "Access Denied",
136 |      "output_type": "error",
137 |      "traceback": [
138 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
139 |       "\u001b[0;31mClientError\u001b[0m                               Traceback (most recent call last)",
140 |       "File \u001b[0;32m/usr/local/miniconda3/envs/code-sprint-2022/lib/python3.10/site-packages/s3fs/core.py:614\u001b[0m, in \u001b[0;36mS3FileSystem._lsdir\u001b[0;34m(self, path, refresh, max_items, delimiter, prefix)\u001b[0m\n\u001b[1;32m    613\u001b[0m dircache \u001b[38;5;241m=\u001b[39m []\n\u001b[0;32m--> 614\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m it:\n\u001b[1;32m    615\u001b[0m     dircache\u001b[38;5;241m.\u001b[39mextend(i\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCommonPrefixes\u001b[39m\u001b[38;5;124m\"\u001b[39m, []))\n",
141 |       "File \u001b[0;32m/usr/local/miniconda3/envs/code-sprint-2022/lib/python3.10/site-packages/aiobotocore/paginate.py:32\u001b[0m, in \u001b[0;36mAioPageIterator.__anext__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m     31\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m---> 32\u001b[0m     response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_request(current_kwargs)\n\u001b[1;32m     33\u001b[0m     parsed \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_extract_parsed_response(response)\n",
142 |       "File \u001b[0;32m/usr/local/miniconda3/envs/code-sprint-2022/lib/python3.10/site-packages/aiobotocore/client.py:228\u001b[0m, in \u001b[0;36mAioBaseClient._make_api_call\u001b[0;34m(self, operation_name, api_params)\u001b[0m\n\u001b[1;32m    227\u001b[0m     error_class \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mfrom_code(error_code)\n\u001b[0;32m--> 228\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m error_class(parsed_response, operation_name)\n\u001b[1;32m    229\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
143 |       "\u001b[0;31mClientError\u001b[0m: An error occurred (AccessDenied) when calling the ListObjectsV2 operation: Access Denied",
144 |       "\nThe above exception was the direct cause of the following exception:\n",
145 |       "\u001b[0;31mPermissionError\u001b[0m                           Traceback (most recent call last)",
146 |       "Input \u001b[0;32mIn [13]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mfs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mls\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mioos-code-sprint-2022\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n",
147 |       "File \u001b[0;32m/usr/local/miniconda3/envs/code-sprint-2022/lib/python3.10/site-packages/fsspec/asyn.py:85\u001b[0m, in \u001b[0;36msync_wrapper.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m     82\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m     83\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrapper\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m     84\u001b[0m     \u001b[38;5;28mself\u001b[39m \u001b[38;5;241m=\u001b[39m obj \u001b[38;5;129;01mor\u001b[39;00m args[\u001b[38;5;241m0\u001b[39m]\n\u001b[0;32m---> 85\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msync\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
148 |       "File \u001b[0;32m/usr/local/miniconda3/envs/code-sprint-2022/lib/python3.10/site-packages/fsspec/asyn.py:65\u001b[0m, in \u001b[0;36msync\u001b[0;34m(loop, func, timeout, *args, **kwargs)\u001b[0m\n\u001b[1;32m     63\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m FSTimeoutError \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mreturn_result\u001b[39;00m\n\u001b[1;32m     64\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(return_result, \u001b[38;5;167;01mBaseException\u001b[39;00m):\n\u001b[0;32m---> 65\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m return_result\n\u001b[1;32m     66\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m     67\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m return_result\n",
149 |       "File \u001b[0;32m/usr/local/miniconda3/envs/code-sprint-2022/lib/python3.10/site-packages/fsspec/asyn.py:25\u001b[0m, in \u001b[0;36m_runner\u001b[0;34m(event, coro, result, timeout)\u001b[0m\n\u001b[1;32m     23\u001b[0m     coro \u001b[38;5;241m=\u001b[39m asyncio\u001b[38;5;241m.\u001b[39mwait_for(coro, timeout\u001b[38;5;241m=\u001b[39mtimeout)\n\u001b[1;32m     24\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 25\u001b[0m     result[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m coro\n\u001b[1;32m     26\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m ex:\n\u001b[1;32m     27\u001b[0m     result[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m=\u001b[39m ex\n",
150 |       "File \u001b[0;32m/usr/local/miniconda3/envs/code-sprint-2022/lib/python3.10/site-packages/s3fs/core.py:831\u001b[0m, in \u001b[0;36mS3FileSystem._ls\u001b[0;34m(self, path, detail, refresh)\u001b[0m\n\u001b[1;32m    829\u001b[0m     files \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lsbuckets(refresh)\n\u001b[1;32m    830\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 831\u001b[0m     files \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lsdir(path, refresh)\n\u001b[1;32m    832\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m files \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m path:\n\u001b[1;32m    833\u001b[0m         files \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lsdir(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_parent(path), refresh\u001b[38;5;241m=\u001b[39mrefresh)\n",
151 |       "File \u001b[0;32m/usr/local/miniconda3/envs/code-sprint-2022/lib/python3.10/site-packages/s3fs/core.py:637\u001b[0m, in \u001b[0;36mS3FileSystem._lsdir\u001b[0;34m(self, path, refresh, max_items, delimiter, prefix)\u001b[0m\n\u001b[1;32m    635\u001b[0m         f[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m f[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mKey\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m    636\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ClientError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m--> 637\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m translate_boto_error(e)\n\u001b[1;32m    639\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m delimiter \u001b[38;5;129;01mand\u001b[39;00m files:\n\u001b[1;32m    640\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdircache[path] \u001b[38;5;241m=\u001b[39m files\n",
152 |       "\u001b[0;31mPermissionError\u001b[0m: Access Denied"
153 |      ]
154 |     }
155 |    ],
156 |    "source": [
157 |     "fs.ls('ioos-code-sprint-2022')"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": 8,
163 |    "id": "76ecf5b5-113a-4311-a4ca-2aaaebb5a1ef",
164 |    "metadata": {},
165 |    "outputs": [
166 |     {
167 |      "ename": "ValueError",
168 |      "evalue": "Protocol not known: s3://ioos-code-sprint-2022",
169 |      "output_type": "error",
170 |      "traceback": [
171 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
172 |       "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
173 |       "Input \u001b[0;32mIn [8]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m fs \u001b[38;5;241m=\u001b[39m \u001b[43mfsspec\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfilesystem\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43ms3://ioos-code-sprint-2022\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m      2\u001b[0m fs\n",
174 |       "File \u001b[0;32m/usr/local/miniconda3/envs/code-sprint-2022/lib/python3.10/site-packages/fsspec/registry.py:252\u001b[0m, in \u001b[0;36mfilesystem\u001b[0;34m(protocol, **storage_options)\u001b[0m\n\u001b[1;32m    246\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfilesystem\u001b[39m(protocol, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mstorage_options):\n\u001b[1;32m    247\u001b[0m     \u001b[38;5;124;03m\"\"\"Instantiate filesystems for given protocol and arguments\u001b[39;00m\n\u001b[1;32m    248\u001b[0m \n\u001b[1;32m    249\u001b[0m \u001b[38;5;124;03m    ``storage_options`` are specific to the protocol being chosen, and are\u001b[39;00m\n\u001b[1;32m    250\u001b[0m \u001b[38;5;124;03m    passed directly to the class.\u001b[39;00m\n\u001b[1;32m    251\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m--> 252\u001b[0m     \u001b[38;5;28mcls\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[43mget_filesystem_class\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprotocol\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    253\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mcls\u001b[39m(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mstorage_options)\n",
175 |       "File \u001b[0;32m/usr/local/miniconda3/envs/code-sprint-2022/lib/python3.10/site-packages/fsspec/registry.py:216\u001b[0m, in \u001b[0;36mget_filesystem_class\u001b[0;34m(protocol)\u001b[0m\n\u001b[1;32m    214\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m protocol \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m registry:\n\u001b[1;32m    215\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m protocol \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m known_implementations:\n\u001b[0;32m--> 216\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mProtocol not known: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m protocol)\n\u001b[1;32m    217\u001b[0m     bit \u001b[38;5;241m=\u001b[39m known_implementations[protocol]\n\u001b[1;32m    218\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n",
176 |       "\u001b[0;31mValueError\u001b[0m: Protocol not known: s3://ioos-code-sprint-2022"
177 |      ]
178 |     }
179 |    ],
180 |    "source": [
181 |     "fs = fsspec.filesystem(\"s3://ioos-code-sprint-2022\")\n",
182 |     "fs"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": 3,
188 |    "id": "dfb529a2-dc8c-4fb3-820f-4711ca5838b6",
189 |    "metadata": {},
190 |    "outputs": [
191 |     {
192 |      "ename": "AttributeError",
193 |      "evalue": "'OpenFile' object has no attribute 'ls'",
194 |      "output_type": "error",
195 |      "traceback": [
196 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
197 |       "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
198 |       "Input \u001b[0;32mIn [3]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mfs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mls\u001b[49m()\n",
199 |       "\u001b[0;31mAttributeError\u001b[0m: 'OpenFile' object has no attribute 'ls'"
200 |      ]
201 |     }
202 |    ],
203 |    "source": [
204 |     "fs.ls()"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": 4,
210 |    "id": "ec344bbe-81ac-4d4a-a583-f12075baecce",
211 |    "metadata": {},
212 |    "outputs": [
213 |     {
214 |      "data": {
215 |       "text/plain": [
216 |        "['__class__',\n",
217 |        " '__del__',\n",
218 |        " '__delattr__',\n",
219 |        " '__dict__',\n",
220 |        " '__dir__',\n",
221 |        " '__doc__',\n",
222 |        " '__enter__',\n",
223 |        " '__eq__',\n",
224 |        " '__exit__',\n",
225 |        " '__format__',\n",
226 |        " '__fspath__',\n",
227 |        " '__ge__',\n",
228 |        " '__getattribute__',\n",
229 |        " '__gt__',\n",
230 |        " '__hash__',\n",
231 |        " '__init__',\n",
232 |        " '__init_subclass__',\n",
233 |        " '__le__',\n",
234 |        " '__lt__',\n",
235 |        " '__module__',\n",
236 |        " '__ne__',\n",
237 |        " '__new__',\n",
238 |        " '__reduce__',\n",
239 |        " '__reduce_ex__',\n",
240 |        " '__repr__',\n",
241 |        " '__setattr__',\n",
242 |        " '__sizeof__',\n",
243 |        " '__str__',\n",
244 |        " '__subclasshook__',\n",
245 |        " '__weakref__',\n",
246 |        " 'close',\n",
247 |        " 'compression',\n",
248 |        " 'encoding',\n",
249 |        " 'errors',\n",
250 |        " 'fobjects',\n",
251 |        " 'fs',\n",
252 |        " 'full_name',\n",
253 |        " 'mode',\n",
254 |        " 'newline',\n",
255 |        " 'open',\n",
256 |        " 'path']"
257 |       ]
258 |      },
259 |      "execution_count": 4,
260 |      "metadata": {},
261 |      "output_type": "execute_result"
262 |     }
263 |    ],
264 |    "source": [
265 |     "dir(fs)"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": 17,
271 |    "id": "4aa1333b-14c1-4e96-bbdc-68f1800f506d",
272 |    "metadata": {},
273 |    "outputs": [
274 |     {
275 |      "name": "stdout",
276 |      "output_type": "stream",
277 |      "text": [
278 |       "{\n",
279 |       "  \"type\": \"Catalog\",\n",
280 |       "  \"id\": \"DMAC-ZARR\",\n",
281 |       "  \"stac_version\": \"1.0.0\",\n",
282 |       "  \"description\": \"Experimental Catalog for Next-Gen DMAC\",\n",
283 |       "  \"links\": [\n",
284 |       "    {\n",
285 |       "      \"rel\": \"root\",\n",
286 |       "      \"href\": \"./catalog.json\",\n",
287 |       "      \"type\": \"application/json\"\n",
288 |       "    },\n",
289 |       "    {\n",
290 |       "      \"rel\": \"child\",\n",
291 |       "      \"href\": \"./CBOFS/collection.json\",\n",
292 |       "      \"type\": \"application/json\"\n",
293 |       "    },\n",
294 |       "    {\n",
295 |       "      \"rel\": \"child\",\n",
296 |       "      \"href\": \"./GFS/collection.json\",\n",
297 |       "      \"type\": \"application/json\"\n",
298 |       "    },\n",
299 |       "    {\n",
300 |       "      \"rel\": \"child\",\n",
301 |       "      \"href\": \"./GFSWAVE/collection.json\",\n",
302 |       "      \"type\": \"application/json\"\n",
303 |       "    },\n",
304 |       "    {\n",
305 |       "      \"rel\": \"self\",\n",
306 |       "      \"href\": \"s3://dmac-zarr/catalog.json\",\n",
307 |       "      \"type\": \"application/json\"\n",
308 |       "    }\n",
309 |       "  ],\n",
310 |       "  \"stac_extensions\": []\n",
311 |       "}\n"
312 |      ]
313 |     }
314 |    ],
315 |    "source": [
316 |     "print(fs.cat_file(\"ioos-code-sprint-2022/catalog.json\").decode(\"utf-8\"))"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": 19,
322 |    "id": "ca20241c-20a6-4c78-becc-441acc0e0915",
323 |    "metadata": {},
324 |    "outputs": [
325 |     {
326 |      "ename": "PermissionError",
327 |      "evalue": "Access Denied",
328 |      "output_type": "error",
329 |      "traceback": [
330 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
331 |       "\u001b[0;31mClientError\u001b[0m                               Traceback (most recent call last)",
332 |       "File \u001b[0;32m/usr/local/miniconda3/envs/code-sprint-2022/lib/python3.10/site-packages/s3fs/core.py:614\u001b[0m, in \u001b[0;36mS3FileSystem._lsdir\u001b[0;34m(self, path, refresh, max_items, delimiter, prefix)\u001b[0m\n\u001b[1;32m    613\u001b[0m dircache \u001b[38;5;241m=\u001b[39m []\n\u001b[0;32m--> 614\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m it:\n\u001b[1;32m    615\u001b[0m     dircache\u001b[38;5;241m.\u001b[39mextend(i\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCommonPrefixes\u001b[39m\u001b[38;5;124m\"\u001b[39m, []))\n",
333 |       "File \u001b[0;32m/usr/local/miniconda3/envs/code-sprint-2022/lib/python3.10/site-packages/aiobotocore/paginate.py:32\u001b[0m, in \u001b[0;36mAioPageIterator.__anext__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m     31\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m---> 32\u001b[0m     response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_request(current_kwargs)\n\u001b[1;32m     33\u001b[0m     parsed \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_extract_parsed_response(response)\n",
334 |       "File \u001b[0;32m/usr/local/miniconda3/envs/code-sprint-2022/lib/python3.10/site-packages/aiobotocore/client.py:228\u001b[0m, in \u001b[0;36mAioBaseClient._make_api_call\u001b[0;34m(self, operation_name, api_params)\u001b[0m\n\u001b[1;32m    227\u001b[0m     error_class \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mfrom_code(error_code)\n\u001b[0;32m--> 228\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m error_class(parsed_response, operation_name)\n\u001b[1;32m    229\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
335 |       "\u001b[0;31mClientError\u001b[0m: An error occurred (AccessDenied) when calling the ListObjectsV2 operation: Access Denied",
336 |       "\nThe above exception was the direct cause of the following exception:\n",
337 |       "\u001b[0;31mPermissionError\u001b[0m                           Traceback (most recent call last)",
338 |       "Input \u001b[0;32mIn [19]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mfs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mls\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mioos-code-sprint-2022\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
339 |       "File \u001b[0;32m/usr/local/miniconda3/envs/code-sprint-2022/lib/python3.10/site-packages/fsspec/asyn.py:85\u001b[0m, in \u001b[0;36msync_wrapper.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m     82\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m     83\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrapper\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m     84\u001b[0m     \u001b[38;5;28mself\u001b[39m \u001b[38;5;241m=\u001b[39m obj \u001b[38;5;129;01mor\u001b[39;00m args[\u001b[38;5;241m0\u001b[39m]\n\u001b[0;32m---> 85\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msync\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
340 |       "File \u001b[0;32m/usr/local/miniconda3/envs/code-sprint-2022/lib/python3.10/site-packages/fsspec/asyn.py:65\u001b[0m, in \u001b[0;36msync\u001b[0;34m(loop, func, timeout, *args, **kwargs)\u001b[0m\n\u001b[1;32m     63\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m FSTimeoutError \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mreturn_result\u001b[39;00m\n\u001b[1;32m     64\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(return_result, \u001b[38;5;167;01mBaseException\u001b[39;00m):\n\u001b[0;32m---> 65\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m return_result\n\u001b[1;32m     66\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m     67\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m return_result\n",
341 |       "File \u001b[0;32m/usr/local/miniconda3/envs/code-sprint-2022/lib/python3.10/site-packages/fsspec/asyn.py:25\u001b[0m, in \u001b[0;36m_runner\u001b[0;34m(event, coro, result, timeout)\u001b[0m\n\u001b[1;32m     23\u001b[0m     coro \u001b[38;5;241m=\u001b[39m asyncio\u001b[38;5;241m.\u001b[39mwait_for(coro, timeout\u001b[38;5;241m=\u001b[39mtimeout)\n\u001b[1;32m     24\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 25\u001b[0m     result[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m coro\n\u001b[1;32m     26\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m ex:\n\u001b[1;32m     27\u001b[0m     result[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m=\u001b[39m ex\n",
342 |       "File \u001b[0;32m/usr/local/miniconda3/envs/code-sprint-2022/lib/python3.10/site-packages/s3fs/core.py:831\u001b[0m, in \u001b[0;36mS3FileSystem._ls\u001b[0;34m(self, path, detail, refresh)\u001b[0m\n\u001b[1;32m    829\u001b[0m     files \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lsbuckets(refresh)\n\u001b[1;32m    830\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 831\u001b[0m     files \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lsdir(path, refresh)\n\u001b[1;32m    832\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m files \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m path:\n\u001b[1;32m    833\u001b[0m         files \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lsdir(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_parent(path), refresh\u001b[38;5;241m=\u001b[39mrefresh)\n",
343 |       "File \u001b[0;32m/usr/local/miniconda3/envs/code-sprint-2022/lib/python3.10/site-packages/s3fs/core.py:637\u001b[0m, in \u001b[0;36mS3FileSystem._lsdir\u001b[0;34m(self, path, refresh, max_items, delimiter, prefix)\u001b[0m\n\u001b[1;32m    635\u001b[0m         f[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m f[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mKey\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m    636\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ClientError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m--> 637\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m translate_boto_error(e)\n\u001b[1;32m    639\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m delimiter \u001b[38;5;129;01mand\u001b[39;00m files:\n\u001b[1;32m    640\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdircache[path] \u001b[38;5;241m=\u001b[39m files\n",
344 |       "\u001b[0;31mPermissionError\u001b[0m: Access Denied"
345 |      ]
346 |     }
347 |    ],
348 |    "source": [
349 |     "fs.ls(\"ioos-code-sprint-2022\")"
350 |    ]
351 |   }
352 |  ],
353 |  "metadata": {
354 |   "kernelspec": {
355 |    "display_name": "restful-grids",
356 |    "language": "python",
357 |    "name": "restful-grids"
358 |   },
359 |   "language_info": {
360 |    "codemirror_mode": {
361 |     "name": "ipython",
362 |     "version": 3
363 |    },
364 |    "file_extension": ".py",
365 |    "mimetype": "text/x-python",
366 |    "name": "python",
367 |    "nbconvert_exporter": "python",
368 |    "pygments_lexer": "ipython3",
369 |    "version": "3.10.4"
370 |   }
371 |  },
372 |  "nbformat": 4,
373 |  "nbformat_minor": 5
374 | }
375 | 


--------------------------------------------------------------------------------