├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── deploy-docs.yml
    │   ├── pypi.yml
    │   └── tests.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE.txt
├── MANIFEST.in
├── README.md
├── docs
    ├── Makefile
    ├── conf.py
    ├── development.rst
    ├── index.rst
    ├── notebooks
    │   ├── Readme.md
    │   ├── adcp.ipynb
    │   ├── full.nc
    │   ├── hello.nc
    │   └── imp.nc
    └── requirements.txt
├── pocean
    ├── __init__.py
    ├── cf.py
    ├── dataset.py
    ├── dsg
    │   ├── __init__.py
    │   ├── profile
    │   │   ├── __init__.py
    │   │   ├── im.py
    │   │   └── om.py
    │   ├── timeseries
    │   │   ├── __init__.py
    │   │   ├── cr.py
    │   │   ├── im.py
    │   │   ├── ir.py
    │   │   └── om.py
    │   ├── timeseriesProfile
    │   │   ├── __init__.py
    │   │   ├── im.py
    │   │   ├── om.py
    │   │   └── r.py
    │   ├── trajectory
    │   │   ├── __init__.py
    │   │   ├── cr.py
    │   │   ├── im.py
    │   │   └── ir.py
    │   ├── trajectoryProfile
    │   │   ├── __init__.py
    │   │   └── cr.py
    │   └── utils.py
    ├── grid
    │   └── __init__.py
    ├── meta.py
    ├── tests
    │   ├── __init__.py
    │   ├── download_test_data.py
    │   ├── dsg
    │   │   ├── __init__.py
    │   │   ├── profile
    │   │   │   ├── test_profile_im.py
    │   │   │   └── test_profile_om.py
    │   │   ├── test_new.py
    │   │   ├── test_utils.py
    │   │   ├── timeseries
    │   │   │   ├── test_timeseries_im.py
    │   │   │   └── test_timeseries_om.py
    │   │   ├── timeseriesProfile
    │   │   │   ├── test_timeseriesProfile_im.py
    │   │   │   ├── test_timeseriesProfile_om.py
    │   │   │   └── test_timeseriesProfile_r.py
    │   │   ├── trajectory
    │   │   │   ├── test_trajectory_cr.py
    │   │   │   └── test_trajectory_im.py
    │   │   └── trajectoryProfile
    │   │   │   └── test_trajectoryProfile_cr.py
    │   ├── test_cf.py
    │   ├── test_nc.py
    │   └── test_utils.py
    └── utils.py
├── pyproject.toml
├── requirements-dev.txt
└── requirements.txt


/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # See https://docs.github.com/en/code-security/supply-chain-security/keeping-your-dependencies-updated-automatically/keeping-your-actions-up-to-date-with-dependabot
 2 | 
 3 | version: 2
 4 | updates:
 5 | 
 6 |   - package-ecosystem: "github-actions"
 7 |     directory: "/"
 8 |     schedule:
 9 |       interval: "daily"
10 |     labels:
11 |       - "Bot"
12 |     groups:
13 |       github-actions:
14 |         patterns:
15 |           - '*'


--------------------------------------------------------------------------------
/.github/workflows/deploy-docs.yml:
--------------------------------------------------------------------------------
 1 | name: Build and Deploy docs
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |   release:
 9 |     types:
10 |       - published
11 | 
12 | jobs:
13 |   build-docs:
14 |     runs-on: ubuntu-latest
15 |     defaults:
16 |       run:
17 |         shell: bash -l {0}
18 | 
19 |     steps:
20 |     - name: checkout
21 |       uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683  # v4.2.2
22 |       with:
23 |         fetch-depth: 0
24 | 
25 |     - name: Setup Micromamba
26 |       uses: mamba-org/setup-micromamba@b09ef9b599704322748535812ca03efb2625677b # v2.0.5
27 |       with:
28 |         environment-name: TEST
29 |         init-shell: bash
30 |         create-args: >-
31 |           python=3 --file requirements.txt
32 |           --file requirements-dev.txt
33 |           --file docs/requirements.txt
34 |           --channel conda-forge
35 | 
36 |     - name: Install library
37 |       run: |
38 |         python -m pip install -e . --no-deps --force-reinstall
39 | 
40 |     - name: Build documentation
41 |       run: |
42 |         set -e
43 |         pushd docs
44 |         sphinx-apidoc -M -f -o api ../pocean ../pocean/tests
45 |         make clean html linkcheck
46 |         popd
47 | 
48 |     - name: Deploy
49 |       if: success() && github.event_name == 'release'
50 |       uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e  # v4.0.0
51 |       with:
52 |         github_token: ${{ secrets.GITHUB_TOKEN }}
53 |         publish_dir: docs/_site/html
54 | 


--------------------------------------------------------------------------------
/.github/workflows/pypi.yml:
--------------------------------------------------------------------------------
 1 | name: Publish to PyPI
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |   release:
 9 |     types:
10 |       - published
11 | 
12 | defaults:
13 |   run:
14 |     shell: bash
15 | 
16 | jobs:
17 |   packages:
18 |     runs-on: ubuntu-latest
19 |     steps:
20 |     - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683  # v4.2.2
21 | 
22 |     - name: Set up Python
23 |       uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5.6.0
24 |       with:
25 |         python-version: "3.x"
26 | 
27 |     - name: Get tags
28 |       run: git fetch --depth=1 origin +refs/tags/*:refs/tags/*
29 | 
30 |     - name: Install build tools
31 |       run: |
32 |         python -m pip install --upgrade build
33 | 
34 |     - name: Build sdist and binary wheel
35 |       run: python -m build --sdist --wheel . --outdir dist
36 | 
37 |     - name: CheckFiles
38 |       run: |
39 |         ls dist
40 |         python -m pip install --upgrade check-manifest
41 |         check-manifest --verbose
42 | 
43 |     - name: Test wheels
44 |       run: |
45 |         cd dist && python -m pip install *.whl
46 |         python -m pip install --upgrade twine
47 |         python -m twine check *
48 | 
49 |     - name: Publish a Python distribution to PyPI
50 |       if: success() && github.event_name == 'release'
51 |       uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc  # v1.12.4
52 |       with:
53 |         user: __token__
54 |         password: ${{ secrets.PYPI_PASSWORD }}
55 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |     branches: [main]
 7 | 
 8 | jobs:
 9 |   run:
10 |     runs-on: ${{ matrix.os }}
11 |     strategy:
12 |       matrix:
13 |         python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13" ]
14 |         os: [ windows-latest, ubuntu-latest, macos-latest ]
15 |       fail-fast: false
16 |     defaults:
17 |       run:
18 |         shell: bash -l {0}
19 | 
20 |     steps:
21 |     - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683  # v4.2.2
22 |       with:
23 |         fetch-depth: 0
24 | 
25 |     - name: Setup Micromamba Python ${{ matrix.python-version }}
26 |       uses: mamba-org/setup-micromamba@b09ef9b599704322748535812ca03efb2625677b # v2.0.5
27 |       with:
28 |         environment-name: TEST
29 |         init-shell: bash
30 |         create-args: >-
31 |           python=${{ matrix.python-version }}
32 |           --file requirements.txt
33 |           --file requirements-dev.txt
34 |           --channel conda-forge
35 | 
36 |     - name: Install library
37 |       run: |
38 |         python -m pip install -e . --no-deps --force-reinstall
39 | 
40 |     - name: Tests
41 |       run: >
42 |         python pocean/tests/download_test_data.py
43 |         && python -m pytest --pyargs pocean
44 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | output/*
 3 | *.sublime-*
 4 | *.swp
 5 | build/*
 6 | dist/*
 7 | resources/
 8 | *.egg-info*
 9 | .cache
10 | docs/api
11 | docs/_site
12 | .pytest_cache/
13 | .envrc
14 | .idea
15 | .vscode
16 | pocean/_version.py
17 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |   rev: v5.0.0
 4 |   hooks:
 5 |     - id: end-of-file-fixer
 6 |       files: .*\.*.py
 7 |     - id: trailing-whitespace
 8 |       files: .*\.*.py
 9 |     - id: debug-statements
10 |     - id: check-ast
11 |     - id: check-added-large-files
12 |     - id: check-json
13 |     - id: check-merge-conflict
14 |     - id: check-yaml
15 |     - id: requirements-txt-fixer
16 |       args:
17 |         - requirements.txt
18 |         - requirements-dev.txt
19 | 
20 | - repo: https://github.com/astral-sh/ruff-pre-commit
21 |   rev: v0.11.12
22 |   hooks:
23 |     - id: ruff
24 |       args: ["--fix", "--show-fixes"]
25 |     - id: ruff-format
26 | 
27 | - repo: https://github.com/tox-dev/pyproject-fmt
28 |   rev: "v2.6.0"
29 |   hooks:
30 |     - id: pyproject-fmt
31 | 
32 | - repo: https://github.com/asottile/pyupgrade
33 |   rev: v3.20.0
34 |   hooks:
35 |     - id: pyupgrade
36 |       args: [--py38-plus]
37 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2022 Axiom Data Science
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include *.txt
 2 | include README.md
 3 | include pyproject.toml
 4 | 
 5 | graft pocean
 6 | 
 7 | prune .github
 8 | prune *.egg-info
 9 | prune docs
10 | prune pocean/tests
11 | 
12 | exclude .coveragerc
13 | exclude .gitignore
14 | exclude .pre-commit-config.yaml
15 | exclude pocean/_version.py
16 | exclude ruff.toml
17 | 
18 | global-exclude *.nc
19 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 🌐 pocean-core
 2 | 
 3 | [![Push](https://github.com/pyoceans/pocean-core/actions/workflows/tests.yml/badge.svg)](https://github.com/pyoceans/pocean-core/actions/workflows/push.yml)
 4 | [![license](https://img.shields.io/github/license/pyoceans/pocean-core.svg)](https://github.com/pyoceans/pocean-core/blob/master/LICENSE.txt)
 5 | [![GitHub release](https://img.shields.io/github/release/pyoceans/pocean-core/all.svg)](https://pypi.org/project/pocean-core/)
 6 | 
 7 | 
 8 | 🐍 + 🌊
 9 | 
10 | A python framework for working with met-ocean data
11 | 
12 | ## Resources
13 | + **Documentation:** <https://pyoceans.github.io/pocean-core/>
14 | + **API:** <https://pyoceans.github.io/pocean-core/api/pocean.html>
15 | + **Source Code:** <https://github.com/pyoceans/pocean-core/>
16 | + **Git clone URL:** <https://github.com/pyoceans/pocean-core.git>
17 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = pocean-core
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _site
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | #
  3 | # pocean-core documentation build configuration file, created by
  4 | # sphinx-quickstart on Fri Feb 10 16:09:19 2017.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | # If extensions (or modules to document with autodoc) are in another directory,
 16 | # add these directories to sys.path here. If the directory is relative to the
 17 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 18 | #
 19 | import os
 20 | import sys
 21 | p = os.path.abspath(
 22 |     os.path.dirname(os.path.dirname(__file__))
 23 | )
 24 | sys.path.insert(0, p)
 25 | 
 26 | # -- General configuration ------------------------------------------------
 27 | 
 28 | # If your documentation needs a minimal Sphinx version, state it here.
 29 | #
 30 | # needs_sphinx = '1.0'
 31 | 
 32 | # Add any Sphinx extension module names here, as strings. They can be
 33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 34 | # ones.
 35 | extensions = [
 36 |     'sphinx.ext.autodoc',
 37 |     'sphinx.ext.autosummary',
 38 |     'sphinx.ext.napoleon'
 39 | ]
 40 | 
 41 | napoleon_google_docstring = True
 42 | napoleon_numpy_docstring = True
 43 | napoleon_include_init_with_doc = False
 44 | napoleon_include_private_with_doc = True
 45 | napoleon_include_special_with_doc = False
 46 | napoleon_use_admonition_for_examples = True
 47 | napoleon_use_admonition_for_notes = True
 48 | napoleon_use_admonition_for_references = True
 49 | napoleon_use_ivar = False
 50 | napoleon_use_param = True
 51 | napoleon_use_keyword = True
 52 | napoleon_use_rtype = True
 53 | 
 54 | # Add any paths that contain templates here, relative to this directory.
 55 | templates_path = ['_templates']
 56 | 
 57 | # The suffix(es) of source filenames.
 58 | # You can specify multiple suffix as a list of string:
 59 | #
 60 | source_suffix = ['.rst']
 61 | 
 62 | # The master toctree document.
 63 | master_doc = 'index'
 64 | 
 65 | # General information about the project.
 66 | project = 'pocean-core'
 67 | copyright = '2023, Kyle Wilcox'
 68 | author = 'Kyle Wilcox'
 69 | 
 70 | # The version info for the project you're documenting, acts as replacement for
 71 | # |version| and |release|, also used in various other places throughout the
 72 | # built documents.
 73 | #
 74 | # The short X.Y version.
 75 | from pocean import __version__  # noqa
 76 | 
 77 | version = __version__
 78 | # The full version, including alpha/beta/rc tags.
 79 | release = __version__
 80 | 
 81 | # The language for content autogenerated by Sphinx. Refer to documentation
 82 | # for a list of supported languages.
 83 | #
 84 | # This is also used if you do content translation via gettext catalogs.
 85 | # Usually you set "language" from the command line for these cases.
 86 | language = "en"
 87 | 
 88 | # List of patterns, relative to source directory, that match files and
 89 | # directories to ignore when looking for source files.
 90 | # This patterns also effect to html_static_path and html_extra_path
 91 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 92 | 
 93 | # The name of the Pygments (syntax highlighting) style to use.
 94 | pygments_style = 'sphinx'
 95 | 
 96 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 97 | todo_include_todos = False
 98 | 
 99 | 
100 | # -- Options for HTML output ----------------------------------------------
101 | 
102 | # The theme to use for HTML and HTML Help pages.  See the documentation for
103 | # a list of builtin themes.
104 | #
105 | html_theme = 'alabaster'
106 | 
107 | # Theme options are theme-specific and customize the look and feel of a theme
108 | # further.  For a list of options available for each theme, see the
109 | # documentation.
110 | #
111 | html_theme_options = {
112 |     'description': 'A python framework for working with met-ocean data',
113 |     'github_user': 'pyoceans',
114 |     'github_repo': 'pocean-core',
115 |     'github_button': 'true',
116 | }
117 | 
118 | # Add any paths that contain custom static files (such as style sheets) here,
119 | # relative to this directory. They are copied after the builtin static files,
120 | # so a file named "default.css" will overwrite the builtin "default.css".
121 | #html_static_path = ['_static']
122 | 
123 | # If true, links to the reST sources are added to the pages.
124 | html_show_sourcelink = False
125 | 
126 | # -- Options for HTMLHelp output ------------------------------------------
127 | 
128 | # Output file base name for HTML help builder.
129 | htmlhelp_basename = 'pocean-coredoc'
130 | 
131 | 
132 | # -- Options for LaTeX output ---------------------------------------------
133 | 
134 | latex_elements = {
135 |     # The paper size ('letterpaper' or 'a4paper').
136 |     #
137 |     # 'papersize': 'letterpaper',
138 | 
139 |     # The font size ('10pt', '11pt' or '12pt').
140 |     #
141 |     # 'pointsize': '10pt',
142 | 
143 |     # Additional stuff for the LaTeX preamble.
144 |     #
145 |     # 'preamble': '',
146 | 
147 |     # Latex figure (float) alignment
148 |     #
149 |     # 'figure_align': 'htbp',
150 | }
151 | 
152 | # Grouping the document tree into LaTeX files. List of tuples
153 | # (source start file, target name, title,
154 | #  author, documentclass [howto, manual, or own class]).
155 | latex_documents = [
156 |     (master_doc, 'pocean-core.tex', 'pocean-core Documentation',
157 |      'Kyle Wilcox', 'manual'),
158 | ]
159 | 
160 | 
161 | # -- Options for manual page output ---------------------------------------
162 | 
163 | # One entry per manual page. List of tuples
164 | # (source start file, name, description, authors, manual section).
165 | man_pages = [
166 |     (master_doc, 'pocean-core', 'pocean-core Documentation',
167 |      [author], 1)
168 | ]
169 | 
170 | 
171 | # -- Options for Texinfo output -------------------------------------------
172 | 
173 | # Grouping the document tree into Texinfo files. List of tuples
174 | # (source start file, target name, title, author,
175 | #  dir menu entry, description, category)
176 | texinfo_documents = [
177 |     (master_doc, 'pocean-core', 'pocean-core Documentation',
178 |      author, 'pocean-core', 'A python framework for working with met-ocean data.',
179 |      'Miscellaneous'),
180 | ]
181 | 


--------------------------------------------------------------------------------
/docs/development.rst:
--------------------------------------------------------------------------------
 1 | Development
 2 | ============
 3 | 
 4 | Create a conda environment
 5 | 
 6 | .. code-block:: bash
 7 | 
 8 |     conda create --name pocean310 python=3.10 --file requirements.txt --file requirements-dev.txt
 9 |     conda activate pocean310
10 | 
11 | Running tests
12 | -------------
13 | 
14 | .. code-block:: bash
15 | 
16 |     # download test datasets
17 |     cd pocean/tests
18 |     python download_test_data.py
19 | 
20 |     # run test suite
21 |     pytest
22 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | 🌐 pocean-core
 2 | ==============
 3 | 
 4 | 🐍 + 🌊
 5 | 
 6 | A python framework for working with met-ocean data
 7 | 
 8 | 
 9 | Documentation
10 | =============
11 | 
12 | .. toctree::
13 |    :maxdepth: 3
14 |    :caption: Contents:
15 | 
16 |    api/modules
17 |    development
18 | 
19 | Indices and tables
20 | ==================
21 | 
22 | * :ref:`genindex`
23 | * :ref:`modindex`
24 | * :ref:`search`
25 | 


--------------------------------------------------------------------------------
/docs/notebooks/Readme.md:
--------------------------------------------------------------------------------
1 | # Notebook examples using pocean-core
2 | 


--------------------------------------------------------------------------------
/docs/notebooks/adcp.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "collapsed": true
  7 |    },
  8 |    "source": [
  9 |     "# NRL ADCP .mat file to CF-1.6 timeSeriesProfile using pocean"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "Here we read a matlab file with minimal metadata, and write a CF-DSG 1.6 timeSeriesProfile netcdf file.  We want the file to work seamlessly with ERDDAP, so we add some ERDDAP specific attributes like `cdm_timeseries_variables`, `cdm_profile_variables`, and `subsetVariables`."
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 1,
 22 |    "metadata": {
 23 |     "collapsed": true
 24 |    },
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "import matplotlib.pyplot as plt\n",
 28 |     "import pandas as pd\n",
 29 |     "from scipy.io import loadmat\n",
 30 |     "import datetime as dt\n",
 31 |     "import numpy as np\n",
 32 |     "\n",
 33 |     "#conda install -c conda-forge pocean-core\n",
 34 |     "from pocean.dsg.timeseriesProfile.om import OrthogonalMultidimensionalTimeseriesProfile"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 2,
 40 |    "metadata": {
 41 |     "collapsed": true
 42 |    },
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "# wget http://www.satlab.hawaii.edu/onr/adria/data/moorings/nrl/Final/ADCP_matlab/VR4f.mat\n",
 46 |     "d = loadmat('/data/ADRIA/MOORINGS/NRL/VR4f.mat')"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 3,
 52 |    "metadata": {},
 53 |    "outputs": [
 54 |     {
 55 |      "data": {
 56 |       "text/html": [
 57 |        "<div>\n",
 58 |        "<style>\n",
 59 |        "    .dataframe thead tr:only-child th {\n",
 60 |        "        text-align: right;\n",
 61 |        "    }\n",
 62 |        "\n",
 63 |        "    .dataframe thead th {\n",
 64 |        "        text-align: left;\n",
 65 |        "    }\n",
 66 |        "\n",
 67 |        "    .dataframe tbody tr th {\n",
 68 |        "        vertical-align: top;\n",
 69 |        "    }\n",
 70 |        "</style>\n",
 71 |        "<table border=\"1\" class=\"dataframe\">\n",
 72 |        "  <thead>\n",
 73 |        "    <tr style=\"text-align: right;\">\n",
 74 |        "      <th></th>\n",
 75 |        "      <th>profile</th>\n",
 76 |        "      <th>station</th>\n",
 77 |        "      <th>t</th>\n",
 78 |        "      <th>un</th>\n",
 79 |        "      <th>vn</th>\n",
 80 |        "      <th>wn</th>\n",
 81 |        "      <th>x</th>\n",
 82 |        "      <th>y</th>\n",
 83 |        "      <th>z</th>\n",
 84 |        "    </tr>\n",
 85 |        "  </thead>\n",
 86 |        "  <tbody>\n",
 87 |        "    <tr>\n",
 88 |        "      <th>296338</th>\n",
 89 |        "      <td>5199</td>\n",
 90 |        "      <td>VR4F</td>\n",
 91 |        "      <td>2003-04-29 03:00:00</td>\n",
 92 |        "      <td>0.061417</td>\n",
 93 |        "      <td>0.394078</td>\n",
 94 |        "      <td>0.017406</td>\n",
 95 |        "      <td>13.0281</td>\n",
 96 |        "      <td>45.187783</td>\n",
 97 |        "      <td>28.548073</td>\n",
 98 |        "    </tr>\n",
 99 |        "    <tr>\n",
100 |        "      <th>296339</th>\n",
101 |        "      <td>5199</td>\n",
102 |        "      <td>VR4F</td>\n",
103 |        "      <td>2003-04-29 03:00:00</td>\n",
104 |        "      <td>-0.044268</td>\n",
105 |        "      <td>0.653439</td>\n",
106 |        "      <td>0.003686</td>\n",
107 |        "      <td>13.0281</td>\n",
108 |        "      <td>45.187783</td>\n",
109 |        "      <td>29.048073</td>\n",
110 |        "    </tr>\n",
111 |        "    <tr>\n",
112 |        "      <th>296340</th>\n",
113 |        "      <td>5199</td>\n",
114 |        "      <td>VR4F</td>\n",
115 |        "      <td>2003-04-29 03:00:00</td>\n",
116 |        "      <td>0.054443</td>\n",
117 |        "      <td>0.386804</td>\n",
118 |        "      <td>0.004221</td>\n",
119 |        "      <td>13.0281</td>\n",
120 |        "      <td>45.187783</td>\n",
121 |        "      <td>29.548073</td>\n",
122 |        "    </tr>\n",
123 |        "    <tr>\n",
124 |        "      <th>296341</th>\n",
125 |        "      <td>5199</td>\n",
126 |        "      <td>VR4F</td>\n",
127 |        "      <td>2003-04-29 03:00:00</td>\n",
128 |        "      <td>0.098836</td>\n",
129 |        "      <td>0.529064</td>\n",
130 |        "      <td>-0.011401</td>\n",
131 |        "      <td>13.0281</td>\n",
132 |        "      <td>45.187783</td>\n",
133 |        "      <td>30.048073</td>\n",
134 |        "    </tr>\n",
135 |        "    <tr>\n",
136 |        "      <th>296342</th>\n",
137 |        "      <td>5199</td>\n",
138 |        "      <td>VR4F</td>\n",
139 |        "      <td>2003-04-29 03:00:00</td>\n",
140 |        "      <td>0.008518</td>\n",
141 |        "      <td>0.550976</td>\n",
142 |        "      <td>0.011823</td>\n",
143 |        "      <td>13.0281</td>\n",
144 |        "      <td>45.187783</td>\n",
145 |        "      <td>30.548073</td>\n",
146 |        "    </tr>\n",
147 |        "  </tbody>\n",
148 |        "</table>\n",
149 |        "</div>"
150 |       ],
151 |       "text/plain": [
152 |        "        profile station                   t        un        vn        wn  \\\n",
153 |        "296338     5199    VR4F 2003-04-29 03:00:00  0.061417  0.394078  0.017406   \n",
154 |        "296339     5199    VR4F 2003-04-29 03:00:00 -0.044268  0.653439  0.003686   \n",
155 |        "296340     5199    VR4F 2003-04-29 03:00:00  0.054443  0.386804  0.004221   \n",
156 |        "296341     5199    VR4F 2003-04-29 03:00:00  0.098836  0.529064 -0.011401   \n",
157 |        "296342     5199    VR4F 2003-04-29 03:00:00  0.008518  0.550976  0.011823   \n",
158 |        "\n",
159 |        "              x          y          z  \n",
160 |        "296338  13.0281  45.187783  28.548073  \n",
161 |        "296339  13.0281  45.187783  29.048073  \n",
162 |        "296340  13.0281  45.187783  29.548073  \n",
163 |        "296341  13.0281  45.187783  30.048073  \n",
164 |        "296342  13.0281  45.187783  30.548073  "
165 |       ]
166 |      },
167 |      "execution_count": 3,
168 |      "metadata": {},
169 |      "output_type": "execute_result"
170 |     }
171 |    ],
172 |    "source": [
173 |     "times = [dt.datetime(2002,1,1,0,0,0) + dt.timedelta(a) for a in d['timen'].flatten()]\n",
174 |     "depths = d['mdepth'].flatten()\n",
175 |     "\n",
176 |     "# Repeat each time for the number of depths\n",
177 |     "t = np.repeat(times, len(depths))\n",
178 |     "\n",
179 |     "# Create a profile index, and repeat for number of depths\n",
180 |     "profile = np.repeat(np.array(range(len(times)), dtype=np.int32) + 1, len(depths))\n",
181 |     "\n",
182 |     "# Tile the depths for each time\n",
183 |     "z = np.tile(depths, len(times))\n",
184 |     "\n",
185 |     "df = pd.DataFrame({\n",
186 |     "    't': t,\n",
187 |     "    'x': 13.0281,\n",
188 |     "    'y': 45.187783,\n",
189 |     "    'z': z,\n",
190 |     "    'un': d['un'].T.flatten()/10.,   # cm/s to m/s\n",
191 |     "    'vn': d['vn'].T.flatten()/10.,   # cm/s to m/s\n",
192 |     "    'wn': d['wn'].T.flatten()/10.,   # cm/s to m/s\n",
193 |     "    'profile': profile,\n",
194 |     "    'station': 'VR4F'\n",
195 |     "})\n",
196 |     "\n",
197 |     "df.tail()"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": 4,
203 |    "metadata": {
204 |     "collapsed": true
205 |    },
206 |    "outputs": [],
207 |    "source": [
208 |     "atts={\n",
209 |     "            'global': {\n",
210 |     "                'title': 'ADRIA02 Mooring VR4',\n",
211 |     "                'summary': 'Data from bottom-mounted ADCP',\n",
212 |     "                'institution': 'NRL',\n",
213 |     "                'cdm_timeseries_variables': 'station',\n",
214 |     "                'cdm_profile_variables': 'profile',\n",
215 |     "                'subsetVariables': 'depth'\n",
216 |     "            },\n",
217 |     "            'longitude': {\n",
218 |     "                'units': 'degrees_east',\n",
219 |     "                'standard_name':'longitude'\n",
220 |     "            },\n",
221 |     "            'latitude': {\n",
222 |     "                'units': 'degrees_north',\n",
223 |     "                'standard_name':'latitude'\n",
224 |     "            },\n",
225 |     "            'z': {\n",
226 |     "                'units': 'm',\n",
227 |     "                'standard_name': 'depth',\n",
228 |     "                'positive':'down'\n",
229 |     "            },\n",
230 |     "            'un': {\n",
231 |     "                'units': 'm/s',\n",
232 |     "                'standard_name':'eastward_sea_water_velocity'\n",
233 |     "            },\n",
234 |     "            'vn': {\n",
235 |     "                'units': 'm/s',\n",
236 |     "                'standard_name':'northward_sea_water_velocity'\n",
237 |     "            },\n",
238 |     "            'profile': {\n",
239 |     "                'cf_role': 'profile_id'\n",
240 |     "            }\n",
241 |     "        }"
242 |    ]
243 |   },
244 |   {
245 |    "cell_type": "code",
246 |    "execution_count": 5,
247 |    "metadata": {},
248 |    "outputs": [
249 |     {
250 |      "data": {
251 |       "text/plain": [
252 |        "{'cdm_profile_variables': 'profile',\n",
253 |        " 'cdm_timeseries_variables': 'station',\n",
254 |        " 'institution': 'NRL',\n",
255 |        " 'subsetVariables': 'depth',\n",
256 |        " 'summary': 'Data from bottom-mounted ADCP',\n",
257 |        " 'title': 'ADRIA02 Mooring VR4'}"
258 |       ]
259 |      },
260 |      "execution_count": 5,
261 |      "metadata": {},
262 |      "output_type": "execute_result"
263 |     }
264 |    ],
265 |    "source": [
266 |     "atts['global']"
267 |    ]
268 |   },
269 |   {
270 |    "cell_type": "code",
271 |    "execution_count": 6,
272 |    "metadata": {
273 |     "scrolled": true
274 |    },
275 |    "outputs": [
276 |     {
277 |      "name": "stderr",
278 |      "output_type": "stream",
279 |      "text": [
280 |       "/home/richard.signell/miniconda3/envs/IOOS3/lib/python3.6/site-packages/pocean/dsg/timeseriesProfile/om.py:82: UserWarning: endian-ness of dtype and endian kwarg do not match, using endian kwarg\n",
281 |       "  latitude = nc.createVariable('latitude', get_dtype(df.y), ('station',))\n",
282 |       "/home/richard.signell/miniconda3/envs/IOOS3/lib/python3.6/site-packages/pocean/dsg/timeseriesProfile/om.py:83: UserWarning: endian-ness of dtype and endian kwarg do not match, using endian kwarg\n",
283 |       "  longitude = nc.createVariable('longitude', get_dtype(df.x), ('station',))\n",
284 |       "/home/richard.signell/miniconda3/envs/IOOS3/lib/python3.6/site-packages/pocean/dsg/timeseriesProfile/om.py:84: UserWarning: endian-ness of dtype and endian kwarg do not match, using endian kwarg\n",
285 |       "  z = nc.createVariable('z', get_dtype(df.z), ('z',))\n",
286 |       "/home/richard.signell/miniconda3/envs/IOOS3/lib/python3.6/site-packages/pocean/dsg/timeseriesProfile/om.py:108: UserWarning: endian-ness of dtype and endian kwarg do not match, using endian kwarg\n",
287 |       "  v = nc.createVariable(var_name, get_dtype(sdf[c]), ('time', 'z', 'station'), fill_value=sdf[c].dtype.type(cls.default_fill_value))\n"
288 |      ]
289 |     },
290 |     {
291 |      "data": {
292 |       "text/plain": [
293 |        "<class 'pocean.dsg.timeseriesProfile.om.OrthogonalMultidimensionalTimeseriesProfile'>\n",
294 |        "root group (NETCDF4 data model, file format HDF5):\n",
295 |        "    Conventions: CF-1.6\n",
296 |        "    date_created: 2017-06-21T12:19:00Z\n",
297 |        "    featureType: timeseriesProfile\n",
298 |        "    cdm_data_type: TimeseriesProfile\n",
299 |        "    title: ADRIA02 Mooring VR4\n",
300 |        "    summary: Data from bottom-mounted ADCP\n",
301 |        "    institution: NRL\n",
302 |        "    cdm_timeseries_variables: station\n",
303 |        "    cdm_profile_variables: profile\n",
304 |        "    subsetVariables: depth\n",
305 |        "    dimensions(sizes): station(1), time(5199), z(57)\n",
306 |        "    variables(dimensions): int32 \u001b[4mcrs\u001b[0m(), <class 'str'> \u001b[4mstation\u001b[0m(station), float64 \u001b[4mtime\u001b[0m(time), float64 \u001b[4mlatitude\u001b[0m(station), float64 \u001b[4mlongitude\u001b[0m(station), float64 \u001b[4mz\u001b[0m(z), int32 \u001b[4mprofile\u001b[0m(time,z,station), float64 \u001b[4mun\u001b[0m(time,z,station), float64 \u001b[4mvn\u001b[0m(time,z,station), float64 \u001b[4mwn\u001b[0m(time,z,station)\n",
307 |        "    groups: "
308 |       ]
309 |      },
310 |      "execution_count": 6,
311 |      "metadata": {},
312 |      "output_type": "execute_result"
313 |     }
314 |    ],
315 |    "source": [
316 |     "OrthogonalMultidimensionalTimeseriesProfile.from_dataframe(df, output='/data/ADRIA/MOORINGS/NRL/vr4f.nc', \n",
317 |     "                                                          attributes=atts)"
318 |    ]
319 |   }
320 |  ],
321 |  "metadata": {
322 |   "_draft": {
323 |    "nbviewer_url": "https://gist.github.com/b2f37b7724981e80e48bd59311ac9a58"
324 |   },
325 |   "gist": {
326 |    "data": {
327 |     "description": "erddap/adcp.ipynb",
328 |     "public": true
329 |    },
330 |    "id": "b2f37b7724981e80e48bd59311ac9a58"
331 |   },
332 |   "kernelspec": {
333 |    "display_name": "Python [conda env:IOOS3]",
334 |    "language": "python",
335 |    "name": "conda-env-IOOS3-py"
336 |   },
337 |   "language_info": {
338 |    "codemirror_mode": {
339 |     "name": "ipython",
340 |     "version": 3
341 |    },
342 |    "file_extension": ".py",
343 |    "mimetype": "text/x-python",
344 |    "name": "python",
345 |    "nbconvert_exporter": "python",
346 |    "pygments_lexer": "ipython3",
347 |    "version": "3.6.1"
348 |   }
349 |  },
350 |  "nbformat": 4,
351 |  "nbformat_minor": 2
352 | }
353 | 


--------------------------------------------------------------------------------
/docs/notebooks/full.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/docs/notebooks/full.nc


--------------------------------------------------------------------------------
/docs/notebooks/hello.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/docs/notebooks/hello.nc


--------------------------------------------------------------------------------
/docs/notebooks/imp.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/docs/notebooks/imp.nc


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | 


--------------------------------------------------------------------------------
/pocean/__init__.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | 
 3 | # Package level logger
 4 | import logging
 5 | 
 6 | logger = logging.getLogger("pocean")
 7 | logger.addHandler(logging.NullHandler())
 8 | 
 9 | try:
10 |     from ._version import __version__
11 | except ImportError:
12 |     __version__ = "unknown"
13 | 


--------------------------------------------------------------------------------
/pocean/cf.py:
--------------------------------------------------------------------------------
  1 | #!python
  2 | import datetime
  3 | import itertools
  4 | import os
  5 | import re
  6 | 
  7 | from . import logger
  8 | from .dataset import EnhancedDataset
  9 | from .utils import all_subclasses, is_url
 10 | 
 11 | datetime.UTC = datetime.timezone.utc
 12 | 
 13 | 
 14 | class CFDataset(EnhancedDataset):
 15 |     default_fill_value = False
 16 |     default_time_unit = "seconds since 1990-01-01 00:00:00Z"
 17 | 
 18 |     @classmethod
 19 |     def load(cls, path):
 20 |         """Attempt to load a netCDF file as a CF compatible dataset
 21 | 
 22 |         Extended description of function.
 23 | 
 24 |         Parameters
 25 |         ----------
 26 |         path :
 27 |             Path to netCDF file
 28 | 
 29 |         Returns
 30 |         -------
 31 |             CFDataset subclass for your netCDF file
 32 | 
 33 |         Raises
 34 |         ------
 35 |             ValueError:
 36 |                 If no suitable class is found for your dataset
 37 | 
 38 |         """
 39 | 
 40 |         if not is_url(path):
 41 |             path = os.path.realpath(path)
 42 | 
 43 |         subs = list(all_subclasses(cls))
 44 | 
 45 |         dsg = None
 46 |         try:
 47 |             dsg = cls(path)
 48 |             for klass in subs:
 49 |                 logger.debug(f"Trying {klass.__name__}...")
 50 |                 if hasattr(klass, "is_mine"):
 51 |                     if klass.is_mine(dsg):
 52 |                         return klass(path)
 53 |         except OSError:
 54 |             raise
 55 |         finally:
 56 |             if hasattr(dsg, "close"):
 57 |                 dsg.close()
 58 | 
 59 |         subnames = ", ".join([s.__name__ for s in subs])
 60 |         raise ValueError(f"Could not open {path} as any type of CF Dataset. Tried: {subnames}.")
 61 | 
 62 |     def axes(self, name):
 63 |         return getattr(self, f"{name.lower()}_axes")()
 64 | 
 65 |     def t_axes(self):
 66 |         # If there is only one variable with the axis parameter, return it
 67 |         hasaxis = self.filter_by_attrs(axis=lambda x: x and str(x).lower() == "t")
 68 |         if len(hasaxis) == 1:
 69 |             return hasaxis
 70 | 
 71 |         tvars = list(
 72 |             set(
 73 |                 itertools.chain(
 74 |                     hasaxis,
 75 |                     self.filter_by_attrs(
 76 |                         standard_name=lambda x: x in ["time", "forecast_reference_time"]
 77 |                     ),
 78 |                 )
 79 |             )
 80 |         )
 81 |         return tvars
 82 | 
 83 |     def x_axes(self):
 84 |         """
 85 |         CF X axis will have one of the following:
 86 |           * The `axis` property has the value ``'X'``
 87 |           * Units of longitude (see `cf.Units.islongitude` for details)
 88 |           * The `standard_name` property is one of ``'longitude'``,
 89 |             ``'projection_x_coordinate'`` or ``'grid_longitude'``
 90 |         """
 91 |         xnames = ["longitude", "grid_longitude", "projection_x_coordinate"]
 92 |         xunits = ["degrees_east", "degree_east", "degree_E", "degrees_E", "degreeE", "degreesE"]
 93 | 
 94 |         # If there is only one variable with the axis parameter, return it
 95 |         hasaxis = self.filter_by_attrs(axis=lambda x: x and str(x).lower() == "x")
 96 |         if len(hasaxis) == 1:
 97 |             return hasaxis
 98 | 
 99 |         xvars = list(
100 |             set(
101 |                 itertools.chain(
102 |                     hasaxis,
103 |                     self.filter_by_attrs(standard_name=lambda x: x and str(x).lower() in xnames),
104 |                     self.filter_by_attrs(units=lambda x: x and str(x).lower() in xunits),
105 |                 )
106 |             )
107 |         )
108 |         return xvars
109 | 
110 |     def y_axes(self):
111 |         ynames = ["latitude", "grid_latitude", "projection_y_coordinate"]
112 |         yunits = ["degrees_north", "degree_north", "degree_N", "degrees_N", "degreeN", "degreesN"]
113 | 
114 |         # If there is only one variable with the axis parameter, return it
115 |         hasaxis = self.filter_by_attrs(axis=lambda x: x and str(x).lower() == "y")
116 |         if len(hasaxis) == 1:
117 |             return hasaxis
118 | 
119 |         yvars = list(
120 |             set(
121 |                 itertools.chain(
122 |                     hasaxis,
123 |                     self.filter_by_attrs(standard_name=lambda x: x and str(x).lower() in ynames),
124 |                     self.filter_by_attrs(units=lambda x: x and str(x).lower() in yunits),
125 |                 )
126 |             )
127 |         )
128 |         return yvars
129 | 
130 |     def z_axes(self):
131 |         znames = [
132 |             "atmosphere_ln_pressure_coordinate",
133 |             "atmosphere_sigma_coordinate",
134 |             "atmosphere_hybrid_sigma_pressure_coordinate",
135 |             "atmosphere_hybrid_height_coordinate",
136 |             "atmosphere_sleve_coordinate",
137 |             "ocean_sigma_coordinate",
138 |             "ocean_s_coordinate",
139 |             "ocean_s_coordinate_g1",
140 |             "ocean_s_coordinate_g2",
141 |             "ocean_sigma_z_coordinate",
142 |             "ocean_double_sigma_coordinate",
143 |         ]
144 | 
145 |         # If there is only one variable with the axis parameter, return it
146 |         hasaxis = self.filter_by_attrs(axis=lambda x: x and str(x).lower() == "z")
147 |         if len(hasaxis) == 1:
148 |             return hasaxis
149 | 
150 |         zvars = list(
151 |             set(
152 |                 itertools.chain(
153 |                     hasaxis,
154 |                     self.filter_by_attrs(positive=lambda x: x and str(x).lower() in ["up", "down"]),
155 |                     self.filter_by_attrs(standard_name=lambda x: x and str(x).lower() in znames),
156 |                 )
157 |             )
158 |         )
159 |         return zvars
160 | 
161 |     def is_valid(self, *args, **kwargs):
162 |         return self.__class__.is_mine(self, *args, **kwargs)
163 | 
164 |     def data_vars(self):
165 |         return self.filter_by_attrs(
166 |             coordinates=lambda x: x is not None,
167 |             units=lambda x: x is not None,
168 |             standard_name=lambda x: x is not None,
169 |             flag_values=lambda x: x is None,
170 |             flag_masks=lambda x: x is None,
171 |             flag_meanings=lambda x: x is None,
172 |         )
173 | 
174 |     def ancillary_vars(self):
175 |         ancillary_variables = []
176 |         for rv in self.filter_by_attrs(ancillary_variables=lambda x: x is not None):
177 |             # Space separated ancillary variables
178 |             for av in rv.ancillary_variables.split(" "):
179 |                 if av in self.variables:
180 |                     ancillary_variables.append(self.variables[av])
181 |         return list(set(ancillary_variables))
182 | 
183 |     def nc_attributes(self):
184 |         return {
185 |             "global": {
186 |                 "Conventions": "CF-1.6",
187 |                 "date_created": datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%dT%H:%M:00Z"),
188 |             }
189 |         }
190 | 
191 | 
192 | def cf_safe_name(name):
193 |     if isinstance(name, str):
194 |         if re.match("^[0-9_]", name):
195 |             # Add a letter to the front
196 |             name = f"v_{name}"
197 |         return re.sub(r"[^_a-zA-Z0-9]", "_", name)
198 | 
199 |     raise ValueError(f'Could not convert "{name}" to a safe name')
200 | 


--------------------------------------------------------------------------------
/pocean/dataset.py:
--------------------------------------------------------------------------------
  1 | #!python
  2 | import warnings
  3 | from collections import OrderedDict
  4 | 
  5 | import numpy as np
  6 | import simplejson as json
  7 | from netCDF4 import Dataset
  8 | 
  9 | from . import logger as L
 10 | from .meta import (
 11 |     MetaInterface,
 12 |     ncpyattributes,
 13 |     string_to_dtype,
 14 |     untype_attributes,
 15 | )
 16 | from .utils import (
 17 |     generic_masked,
 18 |     JSONEncoder,
 19 |     safe_attribute_typing,
 20 |     safe_issubdtype,
 21 | )
 22 | 
 23 | # Attribute that need to be of the same type as the variables
 24 | _TYPE_SENSITIVE_ATTRIBUTES = [
 25 |     "_FillValue",
 26 |     "missing_value",
 27 |     "valid_min",
 28 |     "valid_max",
 29 |     "valid_range",
 30 |     "display_min",
 31 |     "display_max",
 32 |     "display_range",
 33 |     "colorBarMinimum",
 34 |     "colorBarMaximum",
 35 | ]
 36 | 
 37 | 
 38 | class EnhancedDataset(Dataset):
 39 |     def __del__(self):
 40 |         try:
 41 |             self.close()
 42 |         except RuntimeError:
 43 |             pass
 44 | 
 45 |     def close(self):
 46 |         if not self.isopen():
 47 |             return
 48 | 
 49 |         super().close()
 50 | 
 51 |     def vatts(self, vname):
 52 |         d = {}
 53 |         var = self.variables[vname]
 54 |         for k in var.ncattrs():
 55 |             d[k] = var.getncattr(k)
 56 |         return d
 57 | 
 58 |     def filter_by_attrs(self, *args, **kwargs):
 59 |         return self.get_variables_by_attributes(*args, **kwargs)
 60 | 
 61 |     def __apply_meta_interface__(self, meta, **kwargs):
 62 |         warnings.warn(
 63 |             "`__apply_meta_interface__` is deprecated. Use `apply_meta()` instead",
 64 |             DeprecationWarning,
 65 |         )
 66 |         return self.apply_meta(meta, **kwargs)
 67 | 
 68 |     def __getattr__(self, name):
 69 |         if name in ["__meta_interface__", "_meta"]:
 70 |             warnings.warn(
 71 |                 "`__meta_interface__` and `_meta` are deprecated. Use `meta()` instead",
 72 |                 DeprecationWarning,
 73 |             )
 74 |             return self.meta()
 75 |         else:
 76 |             return super().__getattr__(name)
 77 | 
 78 |     def apply_meta(self, *args, **kwargs):
 79 |         """Shortcut to the JSON object without writing any data"""
 80 |         kwargs["create_data"] = False
 81 |         return self.apply_json(*args, **kwargs)
 82 | 
 83 |     def meta(self, *args, **kwargs):
 84 |         """Shortcut to the JSON object without any data"""
 85 |         kwargs["return_data"] = False
 86 |         return self.json(*args, **kwargs)
 87 | 
 88 |     def json(self, return_data=True, fill_data=True):
 89 |         ds = OrderedDict()
 90 |         vs = OrderedDict()
 91 |         gs = ncpyattributes({ga: self.getncattr(ga) for ga in self.ncattrs()})
 92 | 
 93 |         # Dimensions
 94 |         for dname, dim in self.dimensions.items():
 95 |             if dim.isunlimited():
 96 |                 ds[dname] = None
 97 |             else:
 98 |                 ds[dname] = dim.size
 99 | 
100 |         # Variables
101 |         for k, v in self.variables.items():
102 |             typed = v.dtype
103 |             if isinstance(typed, np.dtype):
104 |                 typed = str(typed.name)
105 |             elif isinstance(typed, type):
106 |                 typed = typed.__name__
107 | 
108 |             vattrs = {va: v.getncattr(va) for va in v.ncattrs()}
109 |             vardict = {"attributes": ncpyattributes(vattrs), "shape": v.dimensions, "type": typed}
110 |             if return_data is True:
111 |                 vdata = generic_masked(v[:], attrs=vattrs)
112 |                 if fill_data is True:
113 |                     vdata = vdata.filled()
114 |                 vardict["data"] = vdata.tolist()
115 | 
116 |             vs[k] = vardict
117 | 
118 |         return MetaInterface(dimensions=ds, variables=vs, attributes=gs)
119 | 
120 |     def apply_json(self, meta, create_vars=True, create_dims=True, create_data=True):
121 |         """Apply a meta interface object to a netCDF4 compatible object"""
122 |         ds = meta.get("dimensions", OrderedDict())
123 |         gs = meta.get("attributes", OrderedDict())
124 |         vs = meta.get("variables", OrderedDict())
125 | 
126 |         # Dimensions
127 |         for dname, dsize in ds.items():
128 |             # Ignore dimension sizes less than 0
129 |             if dsize and dsize < 0:
130 |                 continue
131 |             if dname not in self.dimensions:
132 |                 # Don't create new dimensions
133 |                 if create_dims is False:
134 |                     continue
135 | 
136 |                 self.createDimension(dname, size=dsize)
137 |             else:
138 |                 dfilesize = self.dimensions[dname].size
139 |                 if dfilesize != dsize:
140 |                     L.warning(
141 |                         "Not changing size of dimension {}. file: {}, meta: {}".format(
142 |                             dname, dfilesize, dsize
143 |                         )
144 |                     )
145 | 
146 |         # Global attributes
147 |         typed_gs = untype_attributes(gs)
148 |         self.setncatts(typed_gs)
149 | 
150 |         # Variables
151 |         for vname, vvalue in vs.items():
152 |             vatts = untype_attributes(vvalue.get("attributes", {}))
153 | 
154 |             if vname not in self.variables:
155 |                 # Don't create new variables
156 |                 if create_vars is False:
157 |                     continue
158 | 
159 |                 if "shape" not in vvalue and "type" not in vvalue:
160 |                     L.debug(f"Skipping {vname} creation, no shape or no type defined")
161 |                     continue
162 |                 shape = vvalue.get("shape", [])  # Dimension names
163 |                 vardtype = string_to_dtype(vvalue.get("type"))
164 | 
165 |                 if safe_issubdtype(vardtype, np.floating):
166 |                     defaultfill = vardtype.type(np.nan)  # We can use `nan` for floats
167 |                 elif vardtype.kind in ["U", "S"]:
168 |                     defaultfill = None  # No fillvalue on VLENs
169 |                 else:
170 |                     # Use a masked value which evaluates to different things depending on the dtype
171 |                     # For integers is resolves to `0`.
172 |                     defaultfill = vardtype.type(np.ma.masked)
173 | 
174 |                 fillmiss = vatts.get("_FillValue", vatts.get("missing_value", defaultfill))
175 |                 newvar = self.createVariable(vname, vardtype, dimensions=shape, fill_value=fillmiss)
176 |             else:
177 |                 newvar = self.variables[vname]
178 | 
179 |             # Now assign the data if is exists
180 |             if create_data is True and "data" in vvalue:
181 |                 # Because the JSON format can be flattened already we are just
182 |                 # going to always reshape the data to the variable shape
183 |                 data = generic_masked(
184 |                     np.array(vvalue["data"], dtype=newvar.dtype).flatten()
185 |                 ).reshape(newvar.shape)
186 |                 newvar[:] = data
187 | 
188 |             # Don't re-assign fill value attributes
189 |             if "_FillValue" in vatts:
190 |                 del vatts["_FillValue"]
191 |             if "missing_value" in vatts:
192 |                 del vatts["missing_value"]
193 | 
194 |             # Convert any attribute that need to match the variables dtype to that dtype
195 |             for sattr in _TYPE_SENSITIVE_ATTRIBUTES:
196 |                 if sattr in vatts:
197 |                     vatts[sattr] = safe_attribute_typing(newvar.dtype, vatts[sattr])
198 | 
199 |             newvar.setncatts(vatts)
200 | 
201 |     def to_json(self, *args, **kwargs):
202 |         return json.dumps(self.to_dict(), *args, **kwargs)
203 | 
204 |     def json_attributes(self, vfuncs=None):
205 |         """
206 |         vfuncs can be any callable that accepts a single argument, the
207 |         Variable object, and returns a dictionary of new attributes to
208 |         set. These will overwrite existing attributes
209 |         """
210 | 
211 |         vfuncs = vfuncs or []
212 | 
213 |         js = {"global": {}}
214 | 
215 |         for k in self.ncattrs():
216 |             js["global"][k] = self.getncattr(k)
217 | 
218 |         for varname, var in self.variables.items():
219 |             js[varname] = {}
220 |             for k in var.ncattrs():
221 |                 z = var.getncattr(k)
222 |                 try:
223 |                     assert not np.isnan(z).all()
224 |                     js[varname][k] = z
225 |                 except AssertionError:
226 |                     js[varname][k] = None
227 |                 except TypeError:
228 |                     js[varname][k] = z
229 | 
230 |             for vf in vfuncs:
231 |                 try:
232 |                     js[varname].update(vfuncs(var))
233 |                 except BaseException:
234 |                     L.exception("Could not apply custom variable attribute function")
235 | 
236 |         return json.loads(json.dumps(js, cls=JSONEncoder))
237 | 
238 |     def update_attributes(self, attributes):
239 |         for k, v in attributes.pop("global", {}).items():
240 |             try:
241 |                 self.setncattr(k, v)
242 |             except BaseException:
243 |                 L.warning(f"Could not set global attribute {k}: {v}")
244 | 
245 |         for k, v in attributes.items():
246 |             if k in self.variables:
247 |                 for n, z in v.items():
248 |                     # Don't re-assign fill value attributes
249 |                     if n in ["_FillValue", "missing_value"]:
250 |                         L.warning(f"Refusing to set {n} on {k}")
251 |                         continue
252 | 
253 |                     try:
254 |                         self.variables[k].setncattr(n, z)
255 |                     except BaseException:
256 |                         L.warning(f"Could not set attribute {n} on {k}")
257 |         self.sync()
258 | 


--------------------------------------------------------------------------------
/pocean/dsg/__init__.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | 
 3 | # Profile
 4 | from .profile.im import IncompleteMultidimensionalProfile
 5 | from .profile.om import OrthogonalMultidimensionalProfile
 6 | 
 7 | # Timeseries
 8 | from .timeseries.cr import ContiguousRaggedTimeseries
 9 | from .timeseries.im import IncompleteMultidimensionalTimeseries
10 | from .timeseries.ir import IndexedRaggedTimeseries
11 | from .timeseries.om import OrthogonalMultidimensionalTimeseries
12 | from .timeseriesProfile.im import IncompleteMultidimensionalTimeseriesProfile
13 | from .timeseriesProfile.om import OrthogonalMultidimensionalTimeseriesProfile
14 | 
15 | # TimeseriesProfile
16 | from .timeseriesProfile.r import RaggedTimeseriesProfile
17 | 
18 | # Trajectory
19 | from .trajectory.cr import ContiguousRaggedTrajectory
20 | from .trajectory.im import IncompleteMultidimensionalTrajectory
21 | from .trajectory.ir import IndexedRaggedTrajectory
22 | 
23 | # TrajectoryProfile
24 | from .trajectoryProfile.cr import ContiguousRaggedTrajectoryProfile
25 | 
26 | # Attribute Utilities
27 | from .utils import (
28 |     get_calculated_attributes,
29 |     get_creation_attributes,
30 |     get_geographic_attributes,
31 |     get_temporal_attributes,
32 |     get_vertical_attributes,
33 | )
34 | 
35 | __all__ = [
36 |     "IncompleteMultidimensionalProfile",
37 |     "OrthogonalMultidimensionalProfile",
38 |     "ContiguousRaggedTrajectory",
39 |     "IndexedRaggedTrajectory",
40 |     "IncompleteMultidimensionalTrajectory",
41 |     "ContiguousRaggedTrajectoryProfile",
42 |     "ContiguousRaggedTimeseries",
43 |     "IndexedRaggedTimeseries",
44 |     "IncompleteMultidimensionalTimeseries",
45 |     "OrthogonalMultidimensionalTimeseries",
46 |     "RaggedTimeseriesProfile",
47 |     "IncompleteMultidimensionalTimeseriesProfile",
48 |     "OrthogonalMultidimensionalTimeseriesProfile",
49 |     "get_geographic_attributes",
50 |     "get_vertical_attributes",
51 |     "get_temporal_attributes",
52 |     "get_creation_attributes",
53 |     "get_calculated_attributes",
54 | ]
55 | 


--------------------------------------------------------------------------------
/pocean/dsg/profile/__init__.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | from collections import namedtuple
 3 | 
 4 | from shapely.geometry import LineString, Point
 5 | 
 6 | from pocean.utils import logger as L  # noqa
 7 | from pocean.utils import (
 8 |     unique_justseen,
 9 | )
10 | 
11 | profile_meta = namedtuple("Profile", ["min_z", "max_z", "t", "x", "y", "id", "geometry"])
12 | profiles_meta = namedtuple(
13 |     "ProfileCollection", ["min_z", "max_z", "min_t", "max_t", "profiles", "geometry"]
14 | )
15 | 
16 | 
17 | def profile_calculated_metadata(df, axes, geometries=True):
18 |     profiles = {}
19 |     for pid, pgroup in df.groupby(axes.profile):
20 |         pgroup = pgroup.sort_values(axes.t)
21 |         first_row = pgroup.iloc[0]
22 |         profiles[pid] = profile_meta(
23 |             min_z=pgroup[axes.z].min(),
24 |             max_z=pgroup[axes.z].max(),
25 |             t=first_row[axes.t],
26 |             x=first_row[axes.x],
27 |             y=first_row[axes.y],
28 |             id=pid,
29 |             geometry=Point(first_row[axes.x], first_row[axes.y]),
30 |         )
31 | 
32 |     if geometries:
33 |         null_coordinates = df[axes.x].isnull() | df[axes.y].isnull()
34 |         coords = list(
35 |             unique_justseen(
36 |                 zip(
37 |                     df.loc[~null_coordinates, axes.x].tolist(),
38 |                     df.loc[~null_coordinates, axes.y].tolist(),
39 |                 )
40 |             )
41 |         )
42 |     else:
43 |         # Calculate the geometry as the linestring between all of the profile points
44 |         coords = [p.geometry for _, p in profiles.items()]
45 | 
46 |     geometry = None
47 |     if len(coords) > 1:
48 |         geometry = LineString(coords)
49 |     elif len(coords) == 1:
50 |         geometry = Point(coords[0])
51 | 
52 |     return profiles_meta(
53 |         min_z=df[axes.z].min(),
54 |         max_z=df[axes.z].max(),
55 |         min_t=df[axes.t].min(),
56 |         max_t=df[axes.t].max(),
57 |         profiles=profiles,
58 |         geometry=geometry,
59 |     )
60 | 


--------------------------------------------------------------------------------
/pocean/dsg/profile/im.py:
--------------------------------------------------------------------------------
  1 | #!python
  2 | from collections import OrderedDict
  3 | from copy import copy
  4 | 
  5 | import numpy as np
  6 | import pandas as pd
  7 | from cftime import date2num
  8 | 
  9 | from pocean import logger as L  # noqa
 10 | from pocean.cf import cf_safe_name, CFDataset
 11 | from pocean.dsg.profile import profile_calculated_metadata
 12 | from pocean.utils import (
 13 |     create_ncvar_from_series,
 14 |     dict_update,
 15 |     downcast_dataframe,
 16 |     generic_masked,
 17 |     get_default_axes,
 18 |     get_dtype,
 19 |     get_mapped_axes_variables,
 20 |     get_masked_datetime_array,
 21 |     get_ncdata_from_series,
 22 |     nativize_times,
 23 |     normalize_countable_array,
 24 | )
 25 | 
 26 | 
 27 | class IncompleteMultidimensionalProfile(CFDataset):
 28 |     """
 29 |     If there are the same number of levels in each profile, but they do not
 30 |     have the same set of vertical coordinates, one can use the incomplete
 31 |     multidimensional array representation, which the vertical coordinate
 32 |     variable is two-dimensional e.g. replacing z(z) in Example H.8,
 33 |     "Atmospheric sounding profiles for a common set of vertical coordinates
 34 |     stored in the orthogonal multidimensional array representation." with
 35 |     alt(profile,z).  This representation also allows one to have a variable
 36 |     number of elements in different profiles, at the cost of some wasted space.
 37 |     In that case, any unused elements of the data and auxiliary coordinate
 38 |     variables must contain missing data values (section 9.6).
 39 |     """
 40 | 
 41 |     @classmethod
 42 |     def is_mine(cls, dsg, strict=False):
 43 |         try:
 44 |             pvars = dsg.filter_by_attrs(cf_role="profile_id")
 45 |             assert len(pvars) == 1
 46 |             assert dsg.featureType.lower() == "profile"
 47 |             assert len(dsg.t_axes()) >= 1
 48 |             assert len(dsg.x_axes()) >= 1
 49 |             assert len(dsg.y_axes()) >= 1
 50 |             assert len(dsg.z_axes()) >= 1
 51 | 
 52 |             # Allow for string variables
 53 |             pvar = pvars[0]
 54 |             # 0 = single
 55 |             # 1 = array of strings/ints/bytes/etc
 56 |             # 2 = array of character arrays
 57 |             assert 0 <= len(pvar.dimensions) <= 2
 58 | 
 59 |             t = dsg.t_axes()[0]
 60 |             x = dsg.x_axes()[0]
 61 |             y = dsg.y_axes()[0]
 62 |             z = dsg.z_axes()[0]
 63 |             assert len(z.dimensions) == 2
 64 | 
 65 |             assert t.size == pvar.size
 66 |             assert x.size == pvar.size
 67 |             assert y.size == pvar.size
 68 |             p_dim = dsg.dimensions[pvar.dimensions[0]]
 69 |             z_dim = dsg.dimensions[[d for d in z.dimensions if d != p_dim.name][0]]
 70 |             for dv in dsg.data_vars():
 71 |                 assert len(dv.dimensions) in [1, 2]  # dimensioned by profile or profile, z
 72 |                 assert z_dim.name in dv.dimensions or p_dim.name in dv.dimensions
 73 |                 assert dv.size in [z_dim.size, p_dim.size, z_dim.size * p_dim.size]
 74 | 
 75 |         except BaseException:
 76 |             if strict is True:
 77 |                 raise
 78 |             return False
 79 | 
 80 |         return True
 81 | 
 82 |     @classmethod
 83 |     def from_dataframe(cls, df, output, **kwargs):
 84 |         axes = get_default_axes(kwargs.pop("axes", {}))
 85 |         daxes = axes
 86 |         data_columns = [d for d in df.columns if d not in axes]
 87 | 
 88 |         unlimited = kwargs.pop("unlimited", False)
 89 | 
 90 |         unique_dims = kwargs.pop("unique_dims", False)
 91 |         if unique_dims is True:
 92 |             # Rename the dimension to avoid a dimension and coordinate having the same name
 93 |             # which is not support in xarray
 94 |             changed_axes = {k: f"{v}_dim" for k, v in axes._asdict().items()}
 95 |             daxes = get_default_axes(changed_axes)
 96 | 
 97 |         # Downcast anything from int64 to int32
 98 |         # Convert any timezone aware datetimes to native UTC times
 99 |         df = downcast_dataframe(nativize_times(df))
100 | 
101 |         with IncompleteMultidimensionalProfile(output, "w") as nc:
102 |             profile_group = df.groupby(axes.profile)
103 | 
104 |             if unlimited is True:
105 |                 max_profiles = None
106 |             else:
107 |                 max_profiles = df[axes.profile].unique().size
108 |             nc.createDimension(daxes.profile, max_profiles)
109 | 
110 |             max_zs = profile_group.size().max()
111 |             nc.createDimension(daxes.z, max_zs)
112 | 
113 |             # Metadata variables
114 |             nc.createVariable("crs", "i4")
115 | 
116 |             profile = nc.createVariable(axes.profile, get_dtype(df[axes.profile]), (daxes.profile,))
117 | 
118 |             # Create all of the variables
119 |             time = nc.createVariable(axes.t, "f8", (daxes.profile,))
120 |             latitude = nc.createVariable(axes.y, get_dtype(df[axes.y]), (daxes.profile,))
121 |             longitude = nc.createVariable(axes.x, get_dtype(df[axes.x]), (daxes.profile,))
122 |             z = nc.createVariable(
123 |                 axes.z,
124 |                 get_dtype(df[axes.z]),
125 |                 (daxes.profile, daxes.z),
126 |                 fill_value=df[axes.z].dtype.type(cls.default_fill_value),
127 |             )
128 | 
129 |             attributes = dict_update(nc.nc_attributes(axes, daxes), kwargs.pop("attributes", {}))
130 | 
131 |             # Create vars based on full dataframe (to get all variables)
132 |             for c in data_columns:
133 |                 var_name = cf_safe_name(c)
134 |                 if var_name not in nc.variables:
135 |                     v = create_ncvar_from_series(
136 |                         nc,
137 |                         var_name,
138 |                         (daxes.profile, daxes.z),
139 |                         df[c],
140 |                     )
141 |                     attributes[var_name] = dict_update(
142 |                         attributes.get(var_name, {}),
143 |                         {"coordinates": f"{axes.t} {axes.z} {axes.x} {axes.y}"},
144 |                     )
145 | 
146 |             # Write values for each profile within profile_group
147 |             for i, (uid, pdf) in enumerate(profile_group):
148 |                 profile[i] = uid
149 | 
150 |                 time[i] = date2num(pdf[axes.t].iloc[0], units=cls.default_time_unit)
151 |                 latitude[i] = pdf[axes.y].iloc[0]
152 |                 longitude[i] = pdf[axes.x].iloc[0]
153 | 
154 |                 zvalues = pdf[axes.z].fillna(z._FillValue).values
155 |                 sl = slice(0, zvalues.size)
156 |                 z[i, sl] = zvalues
157 | 
158 |                 for c in data_columns:
159 |                     var_name = cf_safe_name(c)
160 |                     v = nc.variables[var_name]
161 | 
162 |                     vvalues = get_ncdata_from_series(pdf[c], v)
163 | 
164 |                     sl = slice(0, vvalues.size)
165 |                     v[i, sl] = vvalues
166 | 
167 |             # Set global attributes
168 |             nc.update_attributes(attributes)
169 | 
170 |         return IncompleteMultidimensionalProfile(output, **kwargs)
171 | 
172 |     def calculated_metadata(
173 |         self, df=None, geometries=True, clean_cols=True, clean_rows=True, **kwargs
174 |     ):
175 |         axes = get_default_axes(kwargs.pop("axes", {}))
176 |         if df is None:
177 |             df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows, axes=axes)
178 |         return profile_calculated_metadata(df, axes, geometries)
179 | 
180 |     def to_dataframe(self, clean_cols=True, clean_rows=True, **kwargs):
181 |         axes = get_default_axes(kwargs.pop("axes", {}))
182 | 
183 |         axv = get_mapped_axes_variables(self, axes)
184 | 
185 |         # Multiple profiles in the file
186 |         pvar = axv.profile
187 |         p_dim = self.dimensions[pvar.dimensions[0]]
188 | 
189 |         zvar = axv.z
190 |         zs = len(self.dimensions[[d for d in zvar.dimensions if d != p_dim.name][0]])
191 | 
192 |         # Profiles
193 |         p = normalize_countable_array(pvar)
194 |         p = p.repeat(zs)
195 | 
196 |         # Z
197 |         z = generic_masked(zvar[:].flatten(), attrs=self.vatts(zvar.name))
198 | 
199 |         # T
200 |         tvar = axv.t
201 |         t = tvar[:].repeat(zs)
202 |         nt = get_masked_datetime_array(t, tvar).flatten()
203 | 
204 |         # X
205 |         xvar = axv.x
206 |         x = generic_masked(xvar[:].repeat(zs), attrs=self.vatts(xvar.name))
207 | 
208 |         # Y
209 |         yvar = axv.y
210 |         y = generic_masked(yvar[:].repeat(zs), attrs=self.vatts(yvar.name))
211 | 
212 |         df_data = OrderedDict(
213 |             [(axes.t, nt), (axes.x, x), (axes.y, y), (axes.z, z), (axes.profile, p)]
214 |         )
215 | 
216 |         building_index_to_drop = np.ones(t.size, dtype=bool)
217 | 
218 |         extract_vars = copy(self.variables)
219 |         for ncvar in axv._asdict().values():
220 |             if ncvar is not None and ncvar.name in extract_vars:
221 |                 del extract_vars[ncvar.name]
222 | 
223 |         for i, (dnam, dvar) in enumerate(extract_vars.items()):
224 |             # Profile dimension
225 |             if dvar.dimensions == pvar.dimensions:
226 |                 vdata = generic_masked(
227 |                     dvar[:].repeat(zs).astype(dvar.dtype), attrs=self.vatts(dnam)
228 |                 )
229 | 
230 |             # Profile, z dimension
231 |             elif dvar.dimensions == zvar.dimensions:
232 |                 vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype), attrs=self.vatts(dnam))
233 | 
234 |             else:
235 |                 vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype), attrs=self.vatts(dnam))
236 |                 # Carry through size 1 variables
237 |                 if vdata.size == 1:
238 |                     if vdata[0] is np.ma.masked:
239 |                         L.warning(f"Skipping variable {dnam} that is completely masked")
240 |                         continue
241 |                 else:
242 |                     L.warning(f"Skipping variable {dnam} since it didn't match any dimension sizes")
243 |                     continue
244 | 
245 |             # Mark rows with data so we don't remove them with clear_rows
246 |             if vdata.size == building_index_to_drop.size:
247 |                 building_index_to_drop = (building_index_to_drop == True) & (vdata.mask == True)  # noqa
248 | 
249 |             # Handle scalars here at the end
250 |             if vdata.size == 1:
251 |                 vdata = vdata[0]
252 | 
253 |             df_data[dnam] = vdata
254 | 
255 |         df = pd.DataFrame(df_data)
256 | 
257 |         # Drop all data columns with no data
258 |         if clean_cols:
259 |             df = df.dropna(axis=1, how="all")
260 | 
261 |         # Drop all data rows with no data variable data
262 |         if clean_rows:
263 |             df = df.iloc[~building_index_to_drop]
264 | 
265 |         return df
266 | 
267 |     def nc_attributes(self, axes, daxes):
268 |         atts = super().nc_attributes()
269 |         return dict_update(
270 |             atts,
271 |             {
272 |                 "global": {"featureType": "profile", "cdm_data_type": "Profile"},
273 |                 axes.profile: {"cf_role": "profile_id", "long_name": "profile identifier"},
274 |                 axes.x: {"axis": "X"},
275 |                 axes.y: {"axis": "Y"},
276 |                 axes.z: {"axis": "Z"},
277 |                 axes.t: {"units": self.default_time_unit, "standard_name": "time", "axis": "T"},
278 |             },
279 |         )
280 | 


--------------------------------------------------------------------------------
/pocean/dsg/profile/om.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | from copy import copy
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | 
  7 | from pocean import logger as L  # noqa
  8 | from pocean.cf import CFDataset
  9 | from pocean.dsg.profile import profile_calculated_metadata
 10 | from pocean.utils import (
 11 |     generic_masked,
 12 |     get_default_axes,
 13 |     get_mapped_axes_variables,
 14 |     get_masked_datetime_array,
 15 |     normalize_array,
 16 |     normalize_countable_array,
 17 | )
 18 | 
 19 | 
 20 | class OrthogonalMultidimensionalProfile(CFDataset):
 21 |     """
 22 |     If the profile instances have the same number of elements and the vertical
 23 |     coordinate values are identical for all instances, you may use the
 24 |     orthogonal multidimensional array representation. This has either a
 25 |     one-dimensional coordinate variable, z(z), provided the vertical coordinate
 26 |     values are ordered monotonically, or a one-dimensional auxiliary coordinate
 27 |     variable, alt(o), where o is the element dimension. In the former case,
 28 |     listing the vertical coordinate variable in the coordinates attributes of
 29 |     the data variables is optional.
 30 |     """
 31 | 
 32 |     @classmethod
 33 |     def is_mine(cls, dsg, strict=False):
 34 |         try:
 35 |             pvars = dsg.filter_by_attrs(cf_role="profile_id")
 36 |             assert len(pvars) == 1
 37 |             assert dsg.featureType.lower() == "profile"
 38 |             assert len(dsg.t_axes()) >= 1
 39 |             assert len(dsg.x_axes()) >= 1
 40 |             assert len(dsg.y_axes()) >= 1
 41 |             assert len(dsg.z_axes()) >= 1
 42 | 
 43 |             # Allow for string variables
 44 |             pvar = pvars[0]
 45 |             # 0 = single
 46 |             # 1 = array of strings/ints/bytes/etc
 47 |             # 2 = array of character arrays
 48 |             assert 0 <= len(pvar.dimensions) <= 2
 49 | 
 50 |             t = dsg.t_axes()[0]
 51 |             x = dsg.x_axes()[0]
 52 |             y = dsg.y_axes()[0]
 53 |             z = dsg.z_axes()[0]
 54 |             assert len(z.dimensions) == 1
 55 |             z_dim = dsg.dimensions[z.dimensions[0]]
 56 | 
 57 |             ps = normalize_array(pvar)
 58 |             is_single = False
 59 | 
 60 |             if pvar.ndim == 0:
 61 |                 is_single = True
 62 |             elif pvar.ndim == 2:
 63 |                 is_single = False
 64 |             elif isinstance(ps, str):
 65 |                 # Non-dimensioned string variable
 66 |                 is_single = True
 67 |             elif pvar.ndim == 1 and hasattr(ps, "dtype") and ps.dtype.kind in ["U", "S"]:
 68 |                 is_single = True
 69 | 
 70 |             if is_single:
 71 |                 assert t.size == 1
 72 |                 assert x.size == 1
 73 |                 assert y.size == 1
 74 |                 for dv in dsg.data_vars():
 75 |                     assert len(dv.dimensions) == 1
 76 |                     assert z_dim.name in dv.dimensions
 77 |                     assert dv.size == z_dim.size
 78 |             else:
 79 |                 assert t.size == pvar.size
 80 |                 assert x.size == pvar.size
 81 |                 assert y.size == pvar.size
 82 |                 p_dim = dsg.dimensions[pvar.dimensions[0]]
 83 |                 for dv in dsg.data_vars():
 84 |                     assert len(dv.dimensions) in [1, 2]  # dimensioned by profile or profile, z
 85 |                     assert z_dim.name in dv.dimensions or p_dim.name in dv.dimensions
 86 |                     assert dv.size in [z_dim.size, p_dim.size, z_dim.size * p_dim.size]
 87 | 
 88 |         except BaseException:
 89 |             if strict is True:
 90 |                 raise
 91 |             return False
 92 | 
 93 |         return True
 94 | 
 95 |     @classmethod
 96 |     def from_dataframe(cls, df, output, **kwargs):
 97 |         raise NotImplementedError
 98 | 
 99 |     def calculated_metadata(
100 |         self, df=None, geometries=True, clean_cols=True, clean_rows=True, **kwargs
101 |     ):
102 |         axes = get_default_axes(kwargs.pop("axes", {}))
103 |         if df is None:
104 |             df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows, axes=axes)
105 |         return profile_calculated_metadata(df, axes, geometries)
106 | 
107 |     def to_dataframe(self, clean_cols=True, clean_rows=True, **kwargs):
108 |         axes = get_default_axes(kwargs.pop("axes", {}))
109 | 
110 |         axv = get_mapped_axes_variables(self, axes)
111 | 
112 |         zvar = axv.z
113 |         zs = len(self.dimensions[zvar.dimensions[0]])
114 | 
115 |         # Profiles
116 |         pvar = axv.profile
117 |         p = normalize_countable_array(pvar)
118 |         ps = p.size
119 |         p = p.repeat(zs)
120 | 
121 |         # Z
122 |         z = generic_masked(zvar[:], attrs=self.vatts(zvar.name))
123 |         try:
124 |             z = np.tile(z, ps)
125 |         except ValueError:
126 |             z = z.flatten()
127 | 
128 |         # T
129 |         tvar = axv.t
130 |         t = tvar[:].repeat(zs)
131 |         nt = get_masked_datetime_array(t, tvar).flatten()
132 | 
133 |         # X
134 |         xvar = axv.x
135 |         x = generic_masked(xvar[:].repeat(zs), attrs=self.vatts(xvar.name))
136 | 
137 |         # Y
138 |         yvar = axv.y
139 |         y = generic_masked(yvar[:].repeat(zs), attrs=self.vatts(yvar.name))
140 | 
141 |         df_data = OrderedDict(
142 |             [(axes.t, nt), (axes.x, x), (axes.y, y), (axes.z, z), (axes.profile, p)]
143 |         )
144 | 
145 |         building_index_to_drop = np.ones(t.size, dtype=bool)
146 | 
147 |         # Axes variables are already processed so skip them
148 |         extract_vars = copy(self.variables)
149 |         for ncvar in axv._asdict().values():
150 |             if ncvar is not None and ncvar.name in extract_vars:
151 |                 del extract_vars[ncvar.name]
152 | 
153 |         for i, (dnam, dvar) in enumerate(extract_vars.items()):
154 |             # Profile dimension
155 |             if dvar.dimensions == pvar.dimensions:
156 |                 vdata = generic_masked(
157 |                     dvar[:].repeat(zs).astype(dvar.dtype), attrs=self.vatts(dnam)
158 |                 )
159 | 
160 |             # Z dimension
161 |             elif dvar.dimensions == zvar.dimensions:
162 |                 vdata = generic_masked(
163 |                     np.tile(dvar[:], ps).flatten().astype(dvar.dtype), attrs=self.vatts(dnam)
164 |                 )
165 | 
166 |             # Profile, z dimension
167 |             elif dvar.dimensions == pvar.dimensions + zvar.dimensions:
168 |                 vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype), attrs=self.vatts(dnam))
169 | 
170 |             else:
171 |                 vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype), attrs=self.vatts(dnam))
172 |                 # Carry through size 1 variables
173 |                 if vdata.size == 1:
174 |                     if vdata[0] is np.ma.masked:
175 |                         L.warning(f"Skipping variable {dnam} that is completely masked")
176 |                         continue
177 |                 else:
178 |                     L.warning(f"Skipping variable {dnam} since it didn't match any dimension sizes")
179 |                     continue
180 | 
181 |             # Mark rows with data so we don't remove them with clear_rows
182 |             if vdata.size == building_index_to_drop.size:
183 |                 building_index_to_drop = (building_index_to_drop == True) & (vdata.mask == True)  # noqa
184 | 
185 |             # Handle scalars here at the end
186 |             if vdata.size == 1:
187 |                 vdata = vdata[0]
188 | 
189 |             df_data[dnam] = vdata
190 | 
191 |         df = pd.DataFrame(df_data)
192 | 
193 |         # Drop all data columns with no data
194 |         if clean_cols:
195 |             df = df.dropna(axis=1, how="all")
196 | 
197 |         # Drop all data rows with no data variable data
198 |         if clean_rows:
199 |             df = df.iloc[~building_index_to_drop]
200 | 
201 |         return df
202 | 


--------------------------------------------------------------------------------
/pocean/dsg/timeseries/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/pocean/dsg/timeseries/__init__.py


--------------------------------------------------------------------------------
/pocean/dsg/timeseries/cr.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | from pocean import logger  # noqa
 3 | from pocean.cf import CFDataset
 4 | 
 5 | 
 6 | class ContiguousRaggedTimeseries(CFDataset):
 7 |     @classmethod
 8 |     def is_mine(cls, dsg, strict=False):
 9 |         try:
10 |             rvars = dsg.filter_by_attrs(cf_role="timeseries_id")
11 |             assert len(rvars) == 1
12 |             assert dsg.featureType.lower() == "timeseries"
13 |             assert len(dsg.t_axes()) >= 1
14 |             assert len(dsg.x_axes()) >= 1
15 |             assert len(dsg.y_axes()) >= 1
16 | 
17 |             o_index_vars = dsg.filter_by_attrs(sample_dimension=lambda x: x is not None)
18 |             assert len(o_index_vars) == 1
19 |             assert o_index_vars[0].sample_dimension in dsg.dimensions  # Sample dimension
20 | 
21 |             # Allow for string variables
22 |             rvar = rvars[0]
23 |             # 0 = single
24 |             # 1 = array of strings/ints/bytes/etc
25 |             # 2 = array of character arrays
26 |             assert 0 <= len(rvar.dimensions) <= 2
27 |         except BaseException:
28 |             if strict is True:
29 |                 raise
30 |             return False
31 | 
32 |         return True
33 | 
34 |     def from_dataframe(cls, df, output, **kwargs):
35 |         raise NotImplementedError
36 | 
37 |     def calculated_metadata(self, df=None, geometries=True, clean_cols=True, clean_rows=True):
38 |         # if df is None:
39 |         #     df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows)
40 |         raise NotImplementedError
41 | 
42 |     def to_dataframe(self):
43 |         raise NotImplementedError
44 | 


--------------------------------------------------------------------------------
/pocean/dsg/timeseries/im.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | from pocean import logger  # noqa
 3 | from pocean.cf import CFDataset
 4 | 
 5 | 
 6 | class IncompleteMultidimensionalTimeseries(CFDataset):
 7 |     @classmethod
 8 |     def is_mine(cls, dsg, strict=False):
 9 |         try:
10 |             rvars = dsg.filter_by_attrs(cf_role="timeseries_id")
11 |             assert len(rvars) == 1
12 |             assert dsg.featureType.lower() == "timeseries"
13 |             assert len(dsg.t_axes()) >= 1
14 |             assert len(dsg.x_axes()) >= 1
15 |             assert len(dsg.y_axes()) >= 1
16 | 
17 |             # Not a CR
18 |             assert not dsg.filter_by_attrs(sample_dimension=lambda x: x is not None)
19 | 
20 |             # Not an IR
21 |             assert not dsg.filter_by_attrs(instance_dimension=lambda x: x is not None)
22 | 
23 |             # IM files will always have a time variable with two dimensions
24 |             # because IM files are never used for files with a single station.
25 |             assert len(dsg.t_axes()[0].dimensions) == 2
26 | 
27 |             # Allow for string variables
28 |             rvar = rvars[0]
29 |             # 0 = single
30 |             # 1 = array of strings/ints/bytes/etc
31 |             # 2 = array of character arrays
32 |             assert 0 <= len(rvar.dimensions) <= 2
33 | 
34 |         except BaseException:
35 |             if strict is True:
36 |                 raise
37 |             return False
38 | 
39 |         return True
40 | 
41 |     def from_dataframe(cls, df, output, **kwargs):
42 |         raise NotImplementedError
43 | 
44 |     def calculated_metadata(self, df=None, geometries=True, clean_cols=True, clean_rows=True):
45 |         # if df is None:
46 |         #     df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows)
47 |         raise NotImplementedError
48 | 
49 |     def to_dataframe(self):
50 |         raise NotImplementedError
51 | 


--------------------------------------------------------------------------------
/pocean/dsg/timeseries/ir.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | from pocean import logger  # noqa
 3 | from pocean.cf import CFDataset
 4 | 
 5 | 
 6 | class IndexedRaggedTimeseries(CFDataset):
 7 |     @classmethod
 8 |     def is_mine(cls, dsg, strict=False):
 9 |         try:
10 |             rvars = dsg.filter_by_attrs(cf_role="timeseries_id")
11 |             assert len(rvars) == 1
12 |             assert dsg.featureType.lower() == "timeseries"
13 |             assert len(dsg.t_axes()) >= 1
14 |             assert len(dsg.x_axes()) >= 1
15 |             assert len(dsg.y_axes()) >= 1
16 | 
17 |             r_index_vars = dsg.filter_by_attrs(instance_dimension=lambda x: x is not None)
18 |             assert len(r_index_vars) == 1
19 |             assert r_index_vars[0].instance_dimension in dsg.dimensions  # Station dimension
20 | 
21 |             # Allow for string variables
22 |             rvar = rvars[0]
23 |             # 0 = single
24 |             # 1 = array of strings/ints/bytes/etc
25 |             # 2 = array of character arrays
26 |             assert 0 <= len(rvar.dimensions) <= 2
27 | 
28 |         except BaseException:
29 |             if strict is True:
30 |                 raise
31 |             return False
32 | 
33 |         return True
34 | 
35 |     def from_dataframe(cls, df, output, **kwargs):
36 |         raise NotImplementedError
37 | 
38 |     def calculated_metadata(self, df=None, geometries=True, clean_cols=True, clean_rows=True):
39 |         # if df is None:
40 |         #     df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows)
41 |         raise NotImplementedError
42 | 
43 |     def to_dataframe(self):
44 |         raise NotImplementedError
45 | 


--------------------------------------------------------------------------------
/pocean/dsg/timeseries/om.py:
--------------------------------------------------------------------------------
  1 | #!python
  2 | from collections import OrderedDict
  3 | from copy import copy
  4 | 
  5 | import numpy as np
  6 | import pandas as pd
  7 | 
  8 | from pocean import logger as L  # noqa
  9 | from pocean.cf import cf_safe_name, CFDataset
 10 | from pocean.utils import (
 11 |     create_ncvar_from_series,
 12 |     dict_update,
 13 |     downcast_dataframe,
 14 |     generic_masked,
 15 |     get_default_axes,
 16 |     get_dtype,
 17 |     get_mapped_axes_variables,
 18 |     get_masked_datetime_array,
 19 |     get_ncdata_from_series,
 20 |     nativize_times,
 21 |     normalize_countable_array,
 22 | )
 23 | 
 24 | 
 25 | class OrthogonalMultidimensionalTimeseries(CFDataset):
 26 |     """
 27 |     H.2.1. Orthogonal multidimensional array representation of time series
 28 | 
 29 |     If the time series instances have the same number of elements and the time values are identical
 30 |     for all instances, you may use the orthogonal multidimensional array representation. This has
 31 |     either a one-dimensional coordinate variable, time(time), provided the time values are ordered
 32 |     monotonically, or a one-dimensional auxiliary coordinate variable, time(o), where o is the
 33 |     element dimension. In the former case, listing the time variable in the coordinates attributes
 34 |     of the data variables is optional.
 35 |     """
 36 | 
 37 |     @classmethod
 38 |     def is_mine(cls, dsg, strict=False):
 39 |         try:
 40 |             rvars = dsg.filter_by_attrs(cf_role="timeseries_id")
 41 |             assert len(rvars) == 1
 42 |             assert dsg.featureType.lower() == "timeseries"
 43 |             assert len(dsg.t_axes()) >= 1
 44 |             assert len(dsg.x_axes()) >= 1
 45 |             assert len(dsg.y_axes()) >= 1
 46 | 
 47 |             # Not a CR
 48 |             assert not dsg.filter_by_attrs(sample_dimension=lambda x: x is not None)
 49 | 
 50 |             # Not an IR
 51 |             assert not dsg.filter_by_attrs(instance_dimension=lambda x: x is not None)
 52 | 
 53 |             # OM files will always have a time variable with one dimension.
 54 |             assert len(dsg.t_axes()[0].dimensions) == 1
 55 | 
 56 |             # Allow for string variables
 57 |             rvar = rvars[0]
 58 |             # 0 = single
 59 |             # 1 = array of strings/ints/bytes/etc
 60 |             # 2 = array of character arrays
 61 |             assert 0 <= len(rvar.dimensions) <= 2
 62 | 
 63 |         except BaseException:
 64 |             if strict is True:
 65 |                 raise
 66 |             return False
 67 | 
 68 |         return True
 69 | 
 70 |     @classmethod
 71 |     def from_dataframe(cls, df, output, **kwargs):
 72 |         axes = get_default_axes(kwargs.pop("axes", {}))
 73 |         daxes = axes
 74 |         data_columns = [d for d in df.columns if d not in axes]
 75 | 
 76 |         reduce_dims = kwargs.pop("reduce_dims", False)
 77 |         _ = kwargs.pop("unlimited", False)
 78 | 
 79 |         unique_dims = kwargs.pop("unique_dims", False)
 80 |         if unique_dims is True:
 81 |             # Rename the dimension to avoid a dimension and coordinate having the same name
 82 |             # which is not support in xarray
 83 |             changed_axes = {k: f"{v}_dim" for k, v in axes._asdict().items()}
 84 |             daxes = get_default_axes(changed_axes)
 85 | 
 86 |         # Downcast anything from int64 to int32
 87 |         # Convert any timezone aware datetimes to native UTC times
 88 |         df = downcast_dataframe(nativize_times(df))
 89 | 
 90 |         with OrthogonalMultidimensionalTimeseries(output, "w") as nc:
 91 |             station_group = df.groupby(axes.station)
 92 |             num_stations = len(station_group)
 93 |             has_z = axes.z is not None
 94 | 
 95 |             if reduce_dims is True and num_stations == 1:
 96 |                 # If a station, we can reduce that dimension if it is of size 1
 97 |                 def ts(i):
 98 |                     return np.s_[:]
 99 | 
100 |                 default_dimensions = (daxes.t,)
101 |                 station_dimensions = ()
102 |             else:
103 | 
104 |                 def ts(i):
105 |                     return np.s_[i, :]
106 | 
107 |                 default_dimensions = (daxes.station, daxes.t)
108 |                 station_dimensions = (daxes.station,)
109 |                 nc.createDimension(daxes.station, num_stations)
110 | 
111 |             # Set the coordinates attribute correctly
112 |             coordinates = [axes.t, axes.x, axes.y]
113 |             if has_z is True:
114 |                 coordinates.insert(1, axes.z)
115 |             coordinates = " ".join(coordinates)
116 | 
117 |             # assume all groups are the same size and have identical times
118 |             _, sdf = list(station_group)[0]
119 |             t = sdf[axes.t]
120 | 
121 |             # Metadata variables
122 |             nc.createVariable("crs", "i4")
123 | 
124 |             # Create all of the variables
125 |             nc.createDimension(daxes.t, t.size)
126 |             time = nc.createVariable(axes.t, "f8", (daxes.t,))
127 |             station = nc.createVariable(
128 |                 axes.station, get_dtype(df[axes.station]), station_dimensions
129 |             )
130 |             latitude = nc.createVariable(axes.y, get_dtype(df[axes.y]), station_dimensions)
131 |             longitude = nc.createVariable(axes.x, get_dtype(df[axes.x]), station_dimensions)
132 |             if has_z is True:
133 |                 z = nc.createVariable(
134 |                     axes.z,
135 |                     get_dtype(df[axes.z]),
136 |                     station_dimensions,
137 |                     fill_value=df[axes.z].dtype.type(cls.default_fill_value),
138 |                 )
139 | 
140 |             attributes = dict_update(nc.nc_attributes(axes, daxes), kwargs.pop("attributes", {}))
141 | 
142 |             time[:] = get_ncdata_from_series(t, time).astype("f8")
143 | 
144 |             # Create vars based on full dataframe (to get all variables)
145 |             for c in data_columns:
146 |                 var_name = cf_safe_name(c)
147 |                 if var_name not in nc.variables:
148 |                     v = create_ncvar_from_series(
149 |                         nc,
150 |                         var_name,
151 |                         default_dimensions,
152 |                         df[c],
153 |                     )
154 |                     attributes[var_name] = dict_update(
155 |                         attributes.get(var_name, {}), {"coordinates": coordinates}
156 |                     )
157 | 
158 |             for i, (uid, sdf) in enumerate(station_group):
159 |                 station[i] = uid
160 |                 latitude[i] = sdf[axes.y].iloc[0]
161 |                 longitude[i] = sdf[axes.x].iloc[0]
162 | 
163 |                 if has_z is True:
164 |                     # TODO: write a test for a Z with a _FillValue
165 |                     z[i] = sdf[axes.z].iloc[0]
166 | 
167 |                 for c in data_columns:
168 |                     # Create variable if it doesn't exist
169 |                     var_name = cf_safe_name(c)
170 |                     v = nc.variables[var_name]
171 | 
172 |                     vvalues = get_ncdata_from_series(sdf[c], v)
173 |                     try:
174 |                         v[ts(i)] = vvalues
175 |                     except BaseException:
176 |                         L.debug(f"{v.name} was not written. Likely a metadata variable")
177 | 
178 |             # Set global attributes
179 |             nc.update_attributes(attributes)
180 | 
181 |         return OrthogonalMultidimensionalTimeseries(output, **kwargs)
182 | 
183 |     def calculated_metadata(
184 |         self, df=None, geometries=True, clean_cols=True, clean_rows=True, **kwargs
185 |     ):
186 |         # axes = get_default_axes(kwargs.pop('axes', {}))
187 |         # if df is None:
188 |         #     df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows, axes=axes)
189 |         raise NotImplementedError
190 | 
191 |     def to_dataframe(self, clean_cols=False, clean_rows=False, **kwargs):
192 |         axes = get_default_axes(kwargs.pop("axes", {}))
193 | 
194 |         axv = get_mapped_axes_variables(self, axes)
195 | 
196 |         # T
197 |         t = get_masked_datetime_array(axv.t[:], axv.t)
198 | 
199 |         # X
200 |         x = generic_masked(axv.x[:].repeat(t.size), attrs=self.vatts(axv.x.name))
201 | 
202 |         # Y
203 |         y = generic_masked(axv.y[:].repeat(t.size), attrs=self.vatts(axv.y.name))
204 | 
205 |         # Z
206 |         if axv.z is not None:
207 |             z = generic_masked(axv.z[:].repeat(t.size), attrs=self.vatts(axv.z.name))
208 |         else:
209 |             z = None
210 | 
211 |         svar = axv.station
212 |         s = normalize_countable_array(svar)
213 |         s = np.repeat(s, t.size)
214 | 
215 |         # now repeat t per station
216 |         # figure out if this is a single-station file by checking
217 |         # the dimension size of the x dimension
218 |         if axv.x.ndim == 1:
219 |             t = np.repeat(t, len(svar))
220 | 
221 |         df_data = OrderedDict(
222 |             [
223 |                 (axes.t, t),
224 |                 (axes.x, x),
225 |                 (axes.y, y),
226 |                 (axes.z, z),
227 |                 (axes.station, s),
228 |             ]
229 |         )
230 | 
231 |         building_index_to_drop = np.ma.zeros(t.size, dtype=bool)
232 | 
233 |         # Axes variables are already processed so skip them
234 |         extract_vars = copy(self.variables)
235 |         for ncvar in axv._asdict().values():
236 |             if ncvar is not None and ncvar.name in extract_vars:
237 |                 del extract_vars[ncvar.name]
238 | 
239 |         for i, (dnam, dvar) in enumerate(extract_vars.items()):
240 |             vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype), attrs=self.vatts(dnam))
241 | 
242 |             # Carry through size 1 variables
243 |             if vdata.size == 1:
244 |                 if vdata[0] is np.ma.masked:
245 |                     L.warning(f"Skipping variable {dnam} that is completely masked")
246 |                     continue
247 |             else:
248 |                 if dvar[:].flatten().size != t.size:
249 |                     L.warning(f"Variable {dnam} is not the correct size, skipping.")
250 |                     continue
251 | 
252 |             # Mark rows with data so we don't remove them with clear_rows
253 |             if vdata.size == building_index_to_drop.size:
254 |                 building_index_to_drop = (building_index_to_drop == True) & (vdata.mask == True)  # noqa
255 | 
256 |             # Handle scalars here at the end
257 |             if vdata.size == 1:
258 |                 vdata = vdata[0]
259 | 
260 |             df_data[dnam] = vdata
261 | 
262 |         df = pd.DataFrame(df_data)
263 | 
264 |         # Drop all data columns with no data
265 |         if clean_cols:
266 |             df = df.dropna(axis=1, how="all")
267 | 
268 |         # Drop all data rows with no data variable data
269 |         if clean_rows:
270 |             df = df.iloc[~building_index_to_drop]
271 | 
272 |         return df
273 | 
274 |     def nc_attributes(self, axes, daxes):
275 |         atts = super().nc_attributes()
276 |         return dict_update(
277 |             atts,
278 |             {
279 |                 "global": {"featureType": "timeseries", "cdm_data_type": "Timeseries"},
280 |                 axes.station: {"cf_role": "timeseries_id", "long_name": "station identifier"},
281 |                 axes.t: {"units": self.default_time_unit, "standard_name": "time", "axis": "T"},
282 |                 axes.y: {"axis": "Y"},
283 |                 axes.x: {"axis": "X"},
284 |                 axes.z: {"axis": "Z"},
285 |             },
286 |         )
287 | 


--------------------------------------------------------------------------------
/pocean/dsg/timeseriesProfile/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/pocean/dsg/timeseriesProfile/__init__.py


--------------------------------------------------------------------------------
/pocean/dsg/timeseriesProfile/im.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | from pocean.cf import CFDataset
 3 | 
 4 | 
 5 | class IncompleteMultidimensionalTimeseriesProfile(CFDataset):
 6 |     @classmethod
 7 |     def is_mine(cls, dsg, strict=False):
 8 |         try:
 9 |             assert dsg.featureType.lower() == "timeseriesprofile"
10 |             assert len(dsg.t_axes()) >= 1
11 |             assert len(dsg.x_axes()) >= 1
12 |             assert len(dsg.y_axes()) >= 1
13 |             assert len(dsg.z_axes()) >= 1
14 | 
15 |             zvar = dsg.z_axes()[0]
16 |             assert len(zvar.dimensions) > 1
17 | 
18 |             # Not ragged
19 |             o_index_vars = dsg.filter_by_attrs(sample_dimension=lambda x: x is not None)
20 |             assert len(o_index_vars) == 0
21 | 
22 |             r_index_vars = dsg.filter_by_attrs(instance_dimension=lambda x: x is not None)
23 |             assert len(r_index_vars) == 0
24 | 
25 |         except BaseException:
26 |             if strict is True:
27 |                 raise
28 |             return False
29 | 
30 |         return True
31 | 
32 |     def from_dataframe(cls, df, output, **kwargs):
33 |         raise NotImplementedError
34 | 
35 |     def calculated_metadata(self, df=None, geometries=True, clean_cols=True, clean_rows=True):
36 |         # if df is None:
37 |         #     df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows)
38 |         raise NotImplementedError
39 | 
40 |     def to_dataframe(self):
41 |         raise NotImplementedError
42 | 


--------------------------------------------------------------------------------
/pocean/dsg/timeseriesProfile/om.py:
--------------------------------------------------------------------------------
  1 | #!python
  2 | from collections import OrderedDict
  3 | from copy import copy
  4 | 
  5 | import numpy as np
  6 | import pandas as pd
  7 | from cftime import date2num
  8 | 
  9 | from pocean import logger as L  # noqa
 10 | from pocean.cf import cf_safe_name, CFDataset
 11 | from pocean.utils import (
 12 |     create_ncvar_from_series,
 13 |     dict_update,
 14 |     downcast_dataframe,
 15 |     generic_masked,
 16 |     get_default_axes,
 17 |     get_dtype,
 18 |     get_mapped_axes_variables,
 19 |     get_masked_datetime_array,
 20 |     get_ncdata_from_series,
 21 |     nativize_times,
 22 |     normalize_countable_array,
 23 | )
 24 | 
 25 | 
 26 | class OrthogonalMultidimensionalTimeseriesProfile(CFDataset):
 27 |     @classmethod
 28 |     def is_mine(cls, dsg, strict=False):
 29 |         try:
 30 |             assert dsg.featureType.lower() == "timeseriesprofile"
 31 |             assert len(dsg.t_axes()) >= 1
 32 |             assert len(dsg.x_axes()) >= 1
 33 |             assert len(dsg.y_axes()) >= 1
 34 |             assert len(dsg.z_axes()) >= 1
 35 | 
 36 |             # If there is only a single set of levels and a single set of
 37 |             # times, then it is orthogonal.
 38 |             tvar = dsg.t_axes()[0]
 39 |             assert len(tvar.dimensions) == 1
 40 | 
 41 |             zvar = dsg.z_axes()[0]
 42 |             assert len(zvar.dimensions) == 1
 43 | 
 44 |             assert tvar.dimensions != zvar.dimensions
 45 | 
 46 |             # Not ragged
 47 |             o_index_vars = dsg.filter_by_attrs(sample_dimension=lambda x: x is not None)
 48 |             assert len(o_index_vars) == 0
 49 | 
 50 |             r_index_vars = dsg.filter_by_attrs(instance_dimension=lambda x: x is not None)
 51 |             assert len(r_index_vars) == 0
 52 | 
 53 |         except BaseException:
 54 |             if strict is True:
 55 |                 raise
 56 |             return False
 57 | 
 58 |         return True
 59 | 
 60 |     @classmethod
 61 |     def from_dataframe(cls, df, output, **kwargs):
 62 |         axes = get_default_axes(kwargs.pop("axes", {}))
 63 |         daxes = axes
 64 |         data_columns = [d for d in df.columns if d not in axes]
 65 | 
 66 |         reduce_dims = kwargs.pop("reduce_dims", False)
 67 |         unlimited = kwargs.pop("unlimited", False)
 68 | 
 69 |         unique_dims = kwargs.pop("unique_dims", False)
 70 |         if unique_dims is True:
 71 |             # Rename the dimension to avoid a dimension and coordinate having the same name
 72 |             # which is not supported in xarray
 73 |             changed_axes = {k: f"{v}_dim" for k, v in axes._asdict().items()}
 74 |             daxes = get_default_axes(changed_axes)
 75 | 
 76 |         # Downcast anything from int64 to int32
 77 |         # Convert any timezone aware datetimes to native UTC times
 78 |         df = downcast_dataframe(nativize_times(df))
 79 | 
 80 |         # Make a new index that is the Cartesian product of all of the values from all of the
 81 |         # values of the old index. This is so don't have to iterate over anything. The full column
 82 |         # of data will be able to be shaped to the size of the final unique sized dimensions.
 83 |         index_order = [axes.t, axes.z, axes.station]
 84 |         df = df.set_index(index_order)
 85 |         df = df.reindex(pd.MultiIndex.from_product(df.index.levels, names=index_order))
 86 | 
 87 |         unique_z = df.index.get_level_values(axes.z).unique().values
 88 |         unique_t = (
 89 |             df.index.get_level_values(axes.t).unique().tolist()
 90 |         )  # tolist converts to Timestamp
 91 |         all_stations = df.index.get_level_values(axes.station)
 92 |         unique_s = all_stations.unique()
 93 | 
 94 |         with OrthogonalMultidimensionalTimeseriesProfile(output, "w") as nc:
 95 |             if reduce_dims is True and unique_s.size == 1:
 96 |                 # If a singular trajectory, we can reduce that dimension if it is of size 1
 97 |                 default_dimensions = (daxes.t, daxes.z)
 98 |                 station_dimensions = ()
 99 |             else:
100 |                 default_dimensions = (daxes.t, daxes.z, daxes.station)
101 |                 station_dimensions = (daxes.station,)
102 |                 nc.createDimension(daxes.station, unique_s.size)
103 | 
104 |             station = nc.createVariable(axes.station, get_dtype(unique_s), station_dimensions)
105 |             latitude = nc.createVariable(axes.y, get_dtype(df[axes.y]), station_dimensions)
106 |             longitude = nc.createVariable(axes.x, get_dtype(df[axes.x]), station_dimensions)
107 |             # Assign over loop because VLEN variables (strings) have to be assigned by integer index
108 |             # and we need to find the lat/lon based on station index
109 |             for si, st in enumerate(unique_s):
110 |                 station[si] = st
111 |                 latitude[si] = df[axes.y][all_stations == st].dropna().iloc[0]
112 |                 longitude[si] = df[axes.x][all_stations == st].dropna().iloc[0]
113 | 
114 |             # Metadata variables
115 |             nc.createVariable("crs", "i4")
116 | 
117 |             # Create all of the variables
118 |             if unlimited is True:
119 |                 nc.createDimension(daxes.t, None)
120 |             else:
121 |                 nc.createDimension(daxes.t, len(unique_t))
122 |             time = nc.createVariable(axes.t, "f8", (daxes.t,))
123 |             time[:] = date2num(unique_t, units=cls.default_time_unit).astype("f8")
124 | 
125 |             nc.createDimension(daxes.z, unique_z.size)
126 |             z = nc.createVariable(axes.z, get_dtype(unique_z), (daxes.z,))
127 |             z[:] = unique_z
128 | 
129 |             attributes = dict_update(nc.nc_attributes(axes, daxes), kwargs.pop("attributes", {}))
130 | 
131 |             # Variables defined on only the time axis and not the depth axis
132 |             detach_z_vars = kwargs.pop("detach_z", [])
133 |             detach_z_columnms = [p for p in detach_z_vars if p in data_columns]
134 |             for c in detach_z_columnms:
135 |                 var_name = cf_safe_name(c)
136 |                 if var_name not in nc.variables:
137 |                     v = create_ncvar_from_series(
138 |                         nc,
139 |                         var_name,
140 |                         default_dimensions[0::2],  # this removes the second dimension (z)
141 |                         df[c],
142 |                     )
143 |                     attributes[var_name] = dict_update(
144 |                         attributes.get(var_name, {}),
145 |                         {"coordinates": f"{axes.t} {axes.x} {axes.y}"},
146 |                     )
147 |                 else:
148 |                     v = nc.variables[var_name]
149 | 
150 |                 # Because we need access to the fillvalues here, we ask not to return
151 |                 # the values with them already filled.
152 |                 vvalues = get_ncdata_from_series(df[c], v, fillna=False)
153 |                 # Reshape to the full array, with Z
154 |                 vvalues = vvalues.reshape(len(unique_t), unique_z.size, unique_s.size)
155 |                 # The Z axis is always the second axis, take the mean over that axis
156 |                 vvalues = np.apply_along_axis(np.nanmean, 1, vvalues).flatten()
157 |                 # Now reshape to the array without Z
158 |                 vvalues = vvalues.reshape(len(unique_t), unique_s.size)
159 |                 try:
160 |                     v[:] = vvalues.reshape(v.shape)
161 |                 except BaseException:
162 |                     L.exception(f"Failed to add {c}")
163 |                     continue
164 | 
165 |             full_columns = [f for f in data_columns if f not in detach_z_columnms]
166 |             for c in full_columns:
167 |                 # Create variable if it doesn't exist
168 |                 var_name = cf_safe_name(c)
169 |                 if var_name not in nc.variables:
170 |                     v = create_ncvar_from_series(
171 |                         nc,
172 |                         var_name,
173 |                         default_dimensions,
174 |                         df[c],
175 |                     )
176 |                     attributes[var_name] = dict_update(
177 |                         attributes.get(var_name, {}),
178 |                         {"coordinates": f"{axes.t} {axes.z} {axes.x} {axes.y}"},
179 |                     )
180 |                 else:
181 |                     v = nc.variables[var_name]
182 | 
183 |                 vvalues = get_ncdata_from_series(df[c], v)
184 |                 v[:] = vvalues.reshape(v.shape)
185 | 
186 |             nc.update_attributes(attributes)
187 | 
188 |         return OrthogonalMultidimensionalTimeseriesProfile(output, **kwargs)
189 | 
190 |     def calculated_metadata(
191 |         self, df=None, geometries=True, clean_cols=True, clean_rows=True, **kwargs
192 |     ):
193 |         # axes = get_default_axes(kwargs.pop('axes', {}))
194 |         # if df is None:
195 |         #     df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows, axes=axes)
196 |         raise NotImplementedError
197 | 
198 |     def to_dataframe(self, clean_cols=True, clean_rows=True, **kwargs):
199 |         axes = get_default_axes(kwargs.pop("axes", {}))
200 | 
201 |         axv = get_mapped_axes_variables(self, axes)
202 | 
203 |         svar = axv.station
204 |         s = normalize_countable_array(svar)
205 | 
206 |         # T
207 |         t = get_masked_datetime_array(axv.t[:], axv.t)
208 |         n_times = t.size
209 | 
210 |         # X
211 |         x = generic_masked(axv.x[:], attrs=self.vatts(axv.x.name))
212 | 
213 |         # Y
214 |         y = generic_masked(axv.y[:], attrs=self.vatts(axv.y.name))
215 | 
216 |         # Z
217 |         z = generic_masked(axv.z[:], attrs=self.vatts(axv.z.name))
218 |         n_z = z.size
219 | 
220 |         # denormalize table structure
221 |         t = np.repeat(t, s.size * n_z)
222 |         z = np.tile(np.repeat(z, s.size), n_times)
223 |         s = np.tile(s, n_z * n_times)
224 |         y = np.tile(y, n_times * n_z)
225 |         x = np.tile(x, n_times * n_z)
226 | 
227 |         df_data = OrderedDict(
228 |             [
229 |                 (axes.t, t),
230 |                 (axes.x, x),
231 |                 (axes.y, y),
232 |                 (axes.z, z),
233 |                 (axes.station, s),
234 |             ]
235 |         )
236 | 
237 |         building_index_to_drop = np.ones(t.size, dtype=bool)
238 | 
239 |         # Axes variables are already processed so skip them
240 |         extract_vars = copy(self.variables)
241 |         for ncvar in axv._asdict().values():
242 |             if ncvar is not None and ncvar.name in extract_vars:
243 |                 del extract_vars[ncvar.name]
244 | 
245 |         for i, (dnam, dvar) in enumerate(extract_vars.items()):
246 |             vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype), attrs=self.vatts(dnam))
247 | 
248 |             # Carry through size 1 variables
249 |             if vdata.size == 1:
250 |                 if vdata[0] is np.ma.masked:
251 |                     L.warning(f"Skipping variable {dnam} that is completely masked")
252 |                     continue
253 | 
254 |             # Carry through profile only variables
255 |             elif dvar.dimensions == axv.t.dimensions:
256 |                 # Make the first value valid and fill with nans
257 |                 vdata = vdata.repeat(n_z).reshape((n_times, n_z))
258 |                 # Set everything after the first value to missing
259 |                 vdata[:, 1:] = np.ma.masked
260 |                 vdata = vdata.flatten()
261 |                 if vdata.size != t.size:
262 |                     L.warning(f"Variable {dnam} is not the correct size, skipping.")
263 |                     continue
264 | 
265 |             else:
266 |                 if vdata.size != t.size:
267 |                     L.warning(f"Variable {dnam} is not the correct size, skipping.")
268 |                     continue
269 | 
270 |             # Mark rows with data so we don't remove them with clear_rows
271 |             if vdata.size == building_index_to_drop.size:
272 |                 building_index_to_drop = (building_index_to_drop == True) & (vdata.mask == True)  # noqa
273 | 
274 |             # Handle scalars here at the end
275 |             if vdata.size == 1:
276 |                 vdata = vdata[0]
277 | 
278 |             df_data[dnam] = vdata
279 | 
280 |         df = pd.DataFrame(df_data)
281 | 
282 |         # Drop all data columns with no data
283 |         if clean_cols:
284 |             df = df.dropna(axis=1, how="all")
285 | 
286 |         # Drop all data rows with no data variable data
287 |         if clean_rows:
288 |             df = df.iloc[~building_index_to_drop]
289 | 
290 |         return df
291 | 
292 |     def nc_attributes(self, axes, daxes):
293 |         atts = super().nc_attributes()
294 |         return dict_update(
295 |             atts,
296 |             {
297 |                 "global": {
298 |                     "featureType": "timeSeriesProfile",
299 |                     "cdm_data_type": "TimeseriesProfile",
300 |                 },
301 |                 axes.station: {"cf_role": "timeseries_id", "long_name": "station identifier"},
302 |                 axes.x: {"axis": "X"},
303 |                 axes.y: {"axis": "Y"},
304 |                 axes.z: {"axis": "Z"},
305 |                 axes.t: {"units": self.default_time_unit, "standard_name": "time", "axis": "T"},
306 |             },
307 |         )
308 | 


--------------------------------------------------------------------------------
/pocean/dsg/trajectory/__init__.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | from collections import namedtuple
 3 | 
 4 | from shapely.geometry import LineString, Point
 5 | 
 6 | from pocean.utils import (
 7 |     unique_justseen,
 8 | )
 9 | 
10 | trajectory_meta = namedtuple("Trajectory", ["min_z", "max_z", "min_t", "max_t", "geometry"])
11 | 
12 | trajectories_meta = namedtuple(
13 |     "TrajectoryCollection", ["min_z", "max_z", "min_t", "max_t", "trajectories"]
14 | )
15 | 
16 | 
17 | def trajectory_calculated_metadata(df, axes, geometries=True):
18 |     trajectories = {}
19 |     for tid, tgroup in df.groupby(axes.trajectory):
20 |         tgroup = tgroup.sort_values(axes.t)
21 | 
22 |         if geometries:
23 |             null_coordinates = tgroup[axes.x].isnull() | tgroup[axes.y].isnull()
24 |             coords = list(
25 |                 unique_justseen(
26 |                     zip(
27 |                         tgroup.loc[~null_coordinates, axes.x].tolist(),
28 |                         tgroup.loc[~null_coordinates, axes.y].tolist(),
29 |                     )
30 |                 )
31 |             )
32 |         else:
33 |             # Calculate the geometry as the linestring between all of the profile points
34 |             first_row = tgroup.iloc[0]
35 |             coords = [(first_row[axes.x], first_row[axes.y])]
36 | 
37 |         geometry = None
38 |         if len(coords) > 1:
39 |             geometry = LineString(coords)
40 |         elif len(coords) == 1:
41 |             geometry = Point(coords[0])
42 | 
43 |         trajectories[tid] = trajectory_meta(
44 |             min_z=tgroup[axes.z].min(),
45 |             max_z=tgroup[axes.z].max(),
46 |             min_t=tgroup[axes.t].min(),
47 |             max_t=tgroup[axes.t].max(),
48 |             geometry=geometry,
49 |         )
50 | 
51 |     return trajectories_meta(
52 |         min_z=df[axes.z].min(),
53 |         max_z=df[axes.z].max(),
54 |         min_t=df[axes.t].min(),
55 |         max_t=df[axes.t].max(),
56 |         trajectories=trajectories,
57 |     )
58 | 


--------------------------------------------------------------------------------
/pocean/dsg/trajectory/cr.py:
--------------------------------------------------------------------------------
  1 | #!python
  2 | from collections import OrderedDict
  3 | from copy import copy
  4 | 
  5 | import numpy as np
  6 | import pandas as pd
  7 | 
  8 | from pocean import logger as L  # noqa
  9 | from pocean.cf import cf_safe_name, CFDataset
 10 | from pocean.dsg.trajectory import trajectory_calculated_metadata
 11 | from pocean.utils import (
 12 |     create_ncvar_from_series,
 13 |     dict_update,
 14 |     downcast_dataframe,
 15 |     generic_masked,
 16 |     get_default_axes,
 17 |     get_dtype,
 18 |     get_mapped_axes_variables,
 19 |     get_masked_datetime_array,
 20 |     get_ncdata_from_series,
 21 |     nativize_times,
 22 |     normalize_countable_array,
 23 | )
 24 | 
 25 | 
 26 | class ContiguousRaggedTrajectory(CFDataset):
 27 |     @classmethod
 28 |     def is_mine(cls, dsg, strict=False):
 29 |         try:
 30 |             rvars = dsg.filter_by_attrs(cf_role="trajectory_id")
 31 |             assert len(rvars) == 1
 32 |             assert dsg.featureType.lower() == "trajectory"
 33 |             assert len(dsg.t_axes()) >= 1
 34 |             assert len(dsg.x_axes()) >= 1
 35 |             assert len(dsg.y_axes()) >= 1
 36 |             assert len(dsg.z_axes()) >= 1
 37 | 
 38 |             o_index_vars = dsg.filter_by_attrs(sample_dimension=lambda x: x is not None)
 39 |             assert len(o_index_vars) == 1
 40 |             assert o_index_vars[0].sample_dimension in dsg.dimensions  # Sample dimension
 41 | 
 42 |             # Allow for string variables
 43 |             rvar = rvars[0]
 44 |             # 0 = single
 45 |             # 1 = array of strings/ints/bytes/etc
 46 |             # 2 = array of character arrays
 47 |             assert 0 <= len(rvar.dimensions) <= 2
 48 |         except BaseException:
 49 |             if strict is True:
 50 |                 raise
 51 |             return False
 52 | 
 53 |         return True
 54 | 
 55 |     @classmethod
 56 |     def from_dataframe(cls, df, output, **kwargs):
 57 |         axes = get_default_axes(kwargs.pop("axes", {}))
 58 |         daxes = axes
 59 | 
 60 |         # Should never be a CR file with one trajectory so we ignore the "reduce_dims" attribute
 61 |         _ = kwargs.pop("reduce_dims", False)  # noqa
 62 |         unlimited = kwargs.pop("unlimited", False)
 63 | 
 64 |         unique_dims = kwargs.pop("unique_dims", False)
 65 |         if unique_dims is True:
 66 |             # Rename the dimension to avoid a dimension and coordinate having the same name
 67 |             # which is not support in xarray
 68 |             changed_axes = {k: f"{v}_dim" for k, v in axes._asdict().items()}
 69 |             daxes = get_default_axes(changed_axes)
 70 | 
 71 |         # Downcast anything from int64 to int32
 72 |         # Convert any timezone aware datetimes to native UTC times
 73 |         df = downcast_dataframe(nativize_times(df))
 74 | 
 75 |         with ContiguousRaggedTrajectory(output, "w") as nc:
 76 |             trajectory_groups = df.groupby(axes.trajectory)
 77 |             unique_trajectories = list(trajectory_groups.groups.keys())
 78 |             num_trajectories = len(unique_trajectories)
 79 |             nc.createDimension(daxes.trajectory, num_trajectories)
 80 |             trajectory = nc.createVariable(
 81 |                 axes.trajectory, get_dtype(df[axes.trajectory]), (daxes.trajectory,)
 82 |             )
 83 | 
 84 |             # Get unique obs by grouping on traj getting the max size
 85 |             if unlimited is True:
 86 |                 nc.createDimension(daxes.sample, None)
 87 |             else:
 88 |                 nc.createDimension(daxes.sample, len(df))
 89 | 
 90 |             # Number of observations in each trajectory
 91 |             row_size = nc.createVariable("rowSize", "i4", (daxes.trajectory,))
 92 | 
 93 |             attributes = dict_update(nc.nc_attributes(axes, daxes), kwargs.pop("attributes", {}))
 94 | 
 95 |             # Variables defined on only the trajectory axis
 96 |             traj_vars = kwargs.pop("traj_vars", [])
 97 |             traj_columns = [p for p in traj_vars if p in df.columns]
 98 |             for c in traj_columns:
 99 |                 var_name = cf_safe_name(c)
100 |                 if var_name not in nc.variables:
101 |                     create_ncvar_from_series(
102 |                         nc,
103 |                         var_name,
104 |                         (daxes.trajectory,),
105 |                         df[c],
106 |                     )
107 | 
108 |             for i, (trajid, trg) in enumerate(trajectory_groups):
109 |                 trajectory[i] = trajid
110 |                 row_size[i] = len(trg)
111 | 
112 |                 # Save any trajectory variables using the first value found
113 |                 # in the column.
114 |                 for c in traj_columns:
115 |                     var_name = cf_safe_name(c)
116 |                     if var_name not in nc.variables:
117 |                         continue
118 |                     v = nc.variables[var_name]
119 |                     vvalues = get_ncdata_from_series(trg[c], v)[0]
120 |                     try:
121 |                         v[i] = vvalues
122 |                     except BaseException:
123 |                         L.exception(f"Failed to add {c}")
124 |                         continue
125 | 
126 |             # Add all of the columns based on the sample dimension. Take all columns and remove the
127 |             # trajectory, rowSize and other trajectory based columns.
128 |             sample_columns = [
129 |                 f for f in df.columns if f not in traj_columns + ["rowSize", axes.trajectory]
130 |             ]
131 |             for c in sample_columns:
132 |                 var_name = cf_safe_name(c)
133 |                 if var_name not in nc.variables:
134 |                     v = create_ncvar_from_series(
135 |                         nc,
136 |                         var_name,
137 |                         (daxes.sample,),
138 |                         df[c],
139 |                     )
140 |                 else:
141 |                     v = nc.variables[var_name]
142 |                 vvalues = get_ncdata_from_series(df[c], v)
143 |                 try:
144 |                     if unlimited is True:
145 |                         v[:] = vvalues
146 |                     else:
147 |                         v[:] = vvalues.reshape(v.shape)
148 |                 except BaseException:
149 |                     L.exception(f"Failed to add {c}")
150 |                     continue
151 | 
152 |             # Metadata variables
153 |             if "crs" not in nc.variables:
154 |                 nc.createVariable("crs", "i4")
155 | 
156 |             # Set attributes
157 |             nc.update_attributes(attributes)
158 | 
159 |         return ContiguousRaggedTrajectory(output, **kwargs)
160 | 
161 |     def calculated_metadata(
162 |         self, df=None, geometries=True, clean_cols=True, clean_rows=True, **kwargs
163 |     ):
164 |         axes = get_default_axes(kwargs.pop("axes", {}))
165 |         if df is None:
166 |             df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows, axes=axes)
167 |         return trajectory_calculated_metadata(df, axes, geometries)
168 | 
169 |     def to_dataframe(self, clean_cols=True, clean_rows=True, **kwargs):
170 |         axes = get_default_axes(kwargs.pop("axes", {}))
171 | 
172 |         axv = get_mapped_axes_variables(self, axes)
173 | 
174 |         o_index_var = self.filter_by_attrs(sample_dimension=lambda x: x is not None)
175 |         if not o_index_var:
176 |             raise ValueError(
177 |                 'Could not find the "sample_dimension" attribute on any variables, '
178 |                 "is this a valid {}?".format(self.__class__.__name__)
179 |             )
180 |         else:
181 |             o_index_var = o_index_var[0]
182 |             o_dim = self.dimensions[o_index_var.sample_dimension]  # Sample dimension
183 |             t_dim = o_index_var.dimensions
184 | 
185 |         # Trajectory
186 |         row_sizes = o_index_var[:]
187 |         traj_data = normalize_countable_array(axv.trajectory)
188 |         traj_data = np.repeat(traj_data, row_sizes)
189 | 
190 |         # time
191 |         time_data = get_masked_datetime_array(axv.t[:], axv.t).flatten()
192 | 
193 |         df_data = OrderedDict([(axes.t, time_data), (axes.trajectory, traj_data)])
194 | 
195 |         building_index_to_drop = np.ones(o_dim.size, dtype=bool)
196 | 
197 |         extract_vars = copy(self.variables)
198 |         # Skip the time and row index variables
199 |         del extract_vars[o_index_var.name]
200 |         del extract_vars[axes.t]
201 | 
202 |         for i, (dnam, dvar) in enumerate(extract_vars.items()):
203 |             # Trajectory dimensions
204 |             if dvar.dimensions == t_dim:
205 |                 vdata = np.repeat(generic_masked(dvar[:], attrs=self.vatts(dnam)), row_sizes)
206 | 
207 |             # Sample dimensions
208 |             elif dvar.dimensions == (o_dim.name,):
209 |                 vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype), attrs=self.vatts(dnam))
210 | 
211 |             else:
212 |                 vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype), attrs=self.vatts(dnam))
213 |                 # Carry through size 1 variables
214 |                 if vdata.size == 1:
215 |                     if vdata[0] is np.ma.masked:
216 |                         L.warning(f"Skipping variable {dnam} that is completely masked")
217 |                         continue
218 |                 else:
219 |                     L.warning(f"Skipping variable {dnam} since it didn't match any dimension sizes")
220 |                     continue
221 | 
222 |             # Mark rows with data so we don't remove them with clear_rows
223 |             if vdata.size == building_index_to_drop.size:
224 |                 building_index_to_drop = (building_index_to_drop == True) & (vdata.mask == True)  # noqa
225 | 
226 |             # Handle scalars here at the end
227 |             if vdata.size == 1:
228 |                 vdata = vdata[0]
229 | 
230 |             df_data[dnam] = vdata
231 | 
232 |         df = pd.DataFrame(df_data)
233 | 
234 |         # Drop all data columns with no data
235 |         if clean_cols:
236 |             df = df.dropna(axis=1, how="all")
237 | 
238 |         # Drop all data rows with no data variable data
239 |         if clean_rows:
240 |             df = df.iloc[~building_index_to_drop]
241 | 
242 |         return df
243 | 
244 |     def nc_attributes(self, axes, daxes):
245 |         atts = super().nc_attributes()
246 |         return dict_update(
247 |             atts,
248 |             {
249 |                 "global": {"featureType": "trajectory", "cdm_data_type": "Trajectory"},
250 |                 axes.trajectory: {
251 |                     "cf_role": "trajectory_id",
252 |                     "long_name": "trajectory identifier",
253 |                     "ioos_category": "identifier",
254 |                 },
255 |                 axes.x: {"axis": "X"},
256 |                 axes.y: {"axis": "Y"},
257 |                 axes.z: {"axis": "Z"},
258 |                 axes.t: {"units": self.default_time_unit, "standard_name": "time", "axis": "T"},
259 |                 "rowSize": {"sample_dimension": daxes.sample},
260 |             },
261 |         )
262 | 


--------------------------------------------------------------------------------
/pocean/dsg/trajectory/ir.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | from pocean.cf import CFDataset
 3 | 
 4 | 
 5 | class IndexedRaggedTrajectory(CFDataset):
 6 |     def from_dataframe(cls, df, output, **kwargs):
 7 |         raise NotImplementedError
 8 | 
 9 |     def calculated_metadata(self, df=None, geometries=True, clean_cols=True, clean_rows=True):
10 |         # if df is None:
11 |         #     df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows)
12 |         raise NotImplementedError
13 | 
14 |     def to_dataframe(self):
15 |         raise NotImplementedError
16 | 


--------------------------------------------------------------------------------
/pocean/dsg/trajectoryProfile/__init__.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | from pocean.dsg.profile import profile_calculated_metadata
 3 | from pocean.dsg.trajectory import trajectories_meta
 4 | 
 5 | 
 6 | def trajectory_profile_calculated_metadata(df, axes, geometries=True):
 7 |     trajectories = {}
 8 |     for tid, tgroup in df.groupby(axes.trajectory):
 9 |         tgroup = tgroup.sort_values(axes.t)
10 |         trajectories[tid] = profile_calculated_metadata(tgroup, axes, geometries)
11 | 
12 |     return trajectories_meta(
13 |         min_z=df[axes.z].min(),
14 |         max_z=df[axes.z].max(),
15 |         min_t=df[axes.t].min(),
16 |         max_t=df[axes.t].max(),
17 |         trajectories=trajectories,
18 |     )
19 | 


--------------------------------------------------------------------------------
/pocean/dsg/utils.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | 
  3 | import pandas as pd
  4 | from shapely.geometry import (
  5 |     box,
  6 |     LineString,
  7 |     Point,
  8 |     Polygon,
  9 | )
 10 | from shapely.validation import make_valid
 11 | 
 12 | from pocean import logger as L  # noqa
 13 | from pocean.utils import dict_update, get_default_axes, unique_justseen
 14 | 
 15 | datetime.UTC = datetime.timezone.utc
 16 | 
 17 | 
 18 | def get_calculated_attributes(df, axes=None, history=None):
 19 |     """Functions to automate netCDF attribute generation from the data itself
 20 |     This is a wrapper for the other four functions, which could be called separately.
 21 | 
 22 |     :param df: data (Pandas DataFrame)
 23 |     :param axes: keys (x,y,z,t) are associated with actual column names (dictionary)
 24 |     :param history: history: text initializing audit trail for modifications to the original data (optional, string)
 25 |     :return: dictionary of global attributes
 26 |     """
 27 | 
 28 |     axes = get_default_axes(axes)
 29 |     attrs = get_geographic_attributes(df, axes)
 30 |     attrs = dict_update(attrs, get_vertical_attributes(df, axes))
 31 |     attrs = dict_update(attrs, get_temporal_attributes(df, axes))
 32 |     attrs = dict_update(attrs, get_creation_attributes(history))
 33 | 
 34 |     return attrs
 35 | 
 36 | 
 37 | def get_geographic_attributes(df, axes=None):
 38 |     """Use values in a dataframe to set geographic attributes for the eventual netCDF file
 39 |     Attribute names come from https://www.ncei.noaa.gov/data/oceans/ncei/formats/netcdf/v2.0/index.html
 40 |     The coordinate reference system (CRS) is assumed to be EPSG:4326, which is WGS84 and is used with
 41 |     GPS satellite navigation (http://spatialreference.org/ref/epsg/wgs-84/).  This is NCEI's default.
 42 |     Coordinate values are latitude (decimal degrees_north) and longitude (decimal degrees_east).
 43 |     Longitude values are limited to [-180, 180).
 44 | 
 45 |     :param df: data (Pandas DataFrame)
 46 |     :param axes: keys (x,y,z,t) are associated with actual column names (dictionary)
 47 |     :return: nested dictionary of variable and global attributes
 48 |     """
 49 |     axes = get_default_axes(axes)
 50 | 
 51 |     carry_miny = round(float(df[axes.y].min()), 6)
 52 |     carry_maxy = round(float(df[axes.y].max()), 6)
 53 |     carry_minx = round(float(df[axes.x].min()), 6)
 54 |     carry_maxx = round(float(df[axes.x].max()), 6)
 55 | 
 56 |     notnull = df[axes.x].notnull() & df[axes.y].notnull()
 57 |     coords = list(zip(df.loc[notnull, axes.x], df.loc[notnull, axes.y]))
 58 | 
 59 |     if len(set(coords)) == 1:
 60 |         geoclass = Point
 61 |         # The set is to workaround the fact tht pocean
 62 |         # relied in a shapely<2 bug to pass a vector here instead of a point.
 63 |         coords = set(coords)
 64 |     elif len(coords) > 2:
 65 |         geoclass = Polygon
 66 |     else:
 67 |         geoclass = LineString
 68 | 
 69 |     p = geoclass(coords)
 70 |     dateline = LineString([(180, 90), (-180, -90)])
 71 |     # If we cross the dateline normalize the coordinates before polygon
 72 |     if dateline.crosses(p):
 73 |         newx = (df.loc[notnull, axes.x] + 360) % 360
 74 |         p = geoclass(zip(newx, df.loc[notnull, axes.y]))
 75 |         p = make_valid(p)
 76 | 
 77 |     geometry_bbox = box(*p.bounds).wkt
 78 |     geometry_wkt = p.convex_hull.wkt
 79 | 
 80 |     return {
 81 |         "variables": {
 82 |             axes.y: {
 83 |                 "attributes": {
 84 |                     "actual_min": carry_miny,
 85 |                     "actual_max": carry_maxy,
 86 |                 }
 87 |             },
 88 |             axes.x: {
 89 |                 "attributes": {
 90 |                     "actual_min": carry_minx,
 91 |                     "actual_max": carry_maxx,
 92 |                 }
 93 |             },
 94 |         },
 95 |         "attributes": {
 96 |             "geospatial_lat_min": carry_miny,
 97 |             "geospatial_lat_max": carry_maxy,
 98 |             "geospatial_lon_min": carry_minx,
 99 |             "geospatial_lon_max": carry_maxx,
100 |             "geospatial_bbox": geometry_bbox,
101 |             "geospatial_bounds": geometry_wkt,
102 |             "geospatial_bounds_crs": "EPSG:4326",
103 |         },
104 |     }
105 | 
106 | 
107 | def get_vertical_attributes(df, axes=None):
108 |     """Use values in a dataframe to set vertical attributes for the eventual netCDF file
109 |     Attribute names come from https://www.ncei.noaa.gov/data/oceans/ncei/formats/netcdf/v2.0/index.html
110 |     The CRS, geospatial_bounds_vertical_crs, cannot be assumed because NCEI suggests any of
111 |     * 'EPSG:5829' (instantaneous height above sea level),
112 |     * 'EPSG:5831' (instantaneous depth below sea level), or
113 |     * 'EPSG:5703' (NAVD88 height).
114 |     Likewise, geospatial_vertical_positive cannot be assumed to be either 'up' or 'down'.
115 |     Set these attributes separately according to the dataset.
116 |     Note: values are cast from numpy.int to float
117 | 
118 |     :param df: data (Pandas DataFrame)
119 |     :param axes: keys (x,y,z,t) are associated with actual column names (dictionary). z in meters.
120 |     :return: nested dictionary of variable and global attributes
121 |     """
122 |     axes = get_default_axes(axes)
123 |     minz = round(float(df[axes.z].min()), 6)
124 |     maxz = round(float(df[axes.z].max()), 6)
125 | 
126 |     return {
127 |         "variables": {
128 |             axes.z: {
129 |                 "attributes": {
130 |                     "actual_min": minz,
131 |                     "actual_max": maxz,
132 |                 }
133 |             },
134 |         },
135 |         "attributes": {
136 |             "geospatial_vertical_min": minz,
137 |             "geospatial_vertical_max": maxz,
138 |             "geospatial_vertical_units": "m",
139 |         },
140 |     }
141 | 
142 | 
143 | def get_temporal_attributes(df, axes=None):
144 |     """Use values in a dataframe to set temporal attributes for the eventual netCDF file
145 |     Attribute names come from https://www.ncei.noaa.gov/data/oceans/ncei/formats/netcdf/v2.0/index.html
146 | 
147 |     :param df: data (Pandas DataFrame)
148 |     :param axes: keys (x,y,z,t) are associated with actual column names (dictionary). z in meters.
149 |     :return: nested dictionary of variable and global attributes
150 |     """
151 | 
152 |     axes = get_default_axes(axes)
153 |     mint = df[axes.t].min()
154 |     maxt = df[axes.t].max()
155 | 
156 |     times = pd.DatetimeIndex(unique_justseen(df[axes.t]))
157 |     dt_index_diff = times[1:] - times[:-1]
158 |     dt_counts = dt_index_diff.value_counts(sort=True)
159 | 
160 |     if dt_counts.size > 0 and dt_counts.values[0] / (len(times) - 1) > 0.75:
161 |         mode_value = dt_counts.index[0]
162 |     else:
163 |         # Calculate a static resolution
164 |         mode_value = (maxt - mint) / len(times)
165 | 
166 |     return {
167 |         "variables": {
168 |             axes.t: {
169 |                 "attributes": {
170 |                     "actual_min": mint.strftime("%Y-%m-%dT%H:%M:%SZ"),
171 |                     "actual_max": maxt.strftime("%Y-%m-%dT%H:%M:%SZ"),
172 |                 }
173 |             },
174 |         },
175 |         "attributes": {
176 |             "time_coverage_start": mint.strftime("%Y-%m-%dT%H:%M:%SZ"),
177 |             "time_coverage_end": maxt.strftime("%Y-%m-%dT%H:%M:%SZ"),
178 |             "time_coverage_duration": (maxt - mint).round("1s").isoformat(),
179 |             "time_coverage_resolution": mode_value.round("1s").isoformat(),
180 |         },
181 |     }
182 | 
183 | 
184 | def get_creation_attributes(history=None):
185 |     """Query system for netCDF file creation times
186 | 
187 |     :param history: text initializing audit trail for modifications to the original data (optional, string)
188 |     :return: dictionary of global attributes
189 |     """
190 |     nc_create_ts = datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%dT%H:%M:%SZ")
191 | 
192 |     attrs = {
193 |         "attributes": {
194 |             "date_created": nc_create_ts,
195 |             "date_issued": nc_create_ts,
196 |             "date_modified": nc_create_ts,
197 |         }
198 |     }
199 | 
200 |     # Add in the passed in history
201 |     if history is not None:
202 |         attrs["attributes"]["history"] = f"{nc_create_ts} - {history}"
203 | 
204 |     return attrs
205 | 


--------------------------------------------------------------------------------
/pocean/grid/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/pocean/grid/__init__.py


--------------------------------------------------------------------------------
/pocean/meta.py:
--------------------------------------------------------------------------------
  1 | #!python
  2 | import os
  3 | from collections import OrderedDict
  4 | from collections.abc import Iterable, Mapping
  5 | from copy import deepcopy
  6 | 
  7 | import numpy as np
  8 | import simplejson as json
  9 | 
 10 | from . import logger
 11 | 
 12 | 
 13 | class MetaInterface(Mapping):
 14 |     VALID_KEYS = ["dimensions", "variables", "attributes"]
 15 | 
 16 |     @classmethod
 17 |     def from_jsonfile(cls, jsf):
 18 |         if not os.path.isfile(jsf):
 19 |             raise ValueError(f"{jsf} is not a file")
 20 | 
 21 |         with open(jsf) as jf:
 22 |             return cls.from_jsonstr(jf.read())
 23 | 
 24 |     @classmethod
 25 |     def from_jsonstr(cls, js):
 26 |         try:
 27 |             d = json.loads(js, object_pairs_hook=OrderedDict)
 28 |         except BaseException as e:
 29 |             raise ValueError(f"Could not parse JSON string: {e}")
 30 | 
 31 |         return cls(d)
 32 | 
 33 |     def __init__(self, *args, **kwargs):
 34 |         self._data = dict(*args, **kwargs)
 35 | 
 36 |     def __getitem__(self, key):
 37 |         return self._data[key]
 38 | 
 39 |     def __iter__(self):
 40 |         return iter(self._data)
 41 | 
 42 |     def __len__(self):
 43 |         return len(self._data)
 44 | 
 45 |     def __str__(self):
 46 |         return str(self._data)
 47 | 
 48 | 
 49 | def safe_attribute_typing(zdtype, value):
 50 |     try:
 51 |         return zdtype.type(value)
 52 |     except ValueError:
 53 |         logger.warning(f"Could not convert {value} to type {zdtype}")
 54 |         return None
 55 | 
 56 | 
 57 | def string_to_dtype(type_str):
 58 |     # int - we avoid int64
 59 |     if type_str in ["int", "int32", "int64", "i", "i4", "i8", "i32", "i64", "long"]:
 60 |         return np.dtype("int32")
 61 | 
 62 |     elif type_str in ["uint", "ui4", "ui", "uint32", "uint64", "ui64", "u4", "u8"]:
 63 |         return np.dtype("uint32")
 64 | 
 65 |     elif type_str in ["float", "float32", "f", "f4", "f32"]:
 66 |         return np.dtype("float32")
 67 | 
 68 |     elif type_str in ["double", "float64", "d", "f8", "f64"]:
 69 |         return np.dtype("float64")
 70 | 
 71 |     elif type_str in ["byte", "bytes8", "i1", "b", "B", "int8"]:
 72 |         return np.dtype("int8")
 73 | 
 74 |     elif type_str in ["ubyte", "ui1", "ubuB", "uint8"]:
 75 |         return np.dtype("uint8")
 76 | 
 77 |     elif type_str in ["char", "c", "string", "S1", "str", "unicode", "string8"]:
 78 |         return np.dtype("U")
 79 | 
 80 |     elif type_str in ["short", "s", "i2", "h", "int16"]:
 81 |         return np.dtype("int16")
 82 | 
 83 |     elif type_str in ["ushort", "us", "u2", "ui2", "uh", "uint16"]:
 84 |         return np.dtype("uint16")
 85 | 
 86 |     raise ValueError(f"Could not find dtype for {type_str}")
 87 | 
 88 | 
 89 | def untype_attributes(vd):
 90 |     typed = OrderedDict()
 91 |     for k, v in vd.items():
 92 |         if isinstance(v, dict):
 93 |             dtype = string_to_dtype(v.get("type"))
 94 |             vval = v.get("data")
 95 |             if isinstance(vval, (list, tuple)):
 96 |                 safe = (safe_attribute_typing(dtype, x) for x in vval)
 97 |                 typed[k] = [x for x in safe if x is not None]
 98 |             else:
 99 |                 safe = safe_attribute_typing(dtype, vval)
100 |                 if safe is not None:
101 |                     typed[k] = safe
102 |         else:
103 |             typed[k] = v
104 |     return typed
105 | 
106 | 
107 | def ncpyattributes(obj, verbose=True):
108 |     """Converts any attributes that are not native python types to those types"""
109 | 
110 |     return_copy = deepcopy(obj)
111 | 
112 |     for k, v in obj.items():
113 |         if isinstance(v, np.ndarray):
114 |             newv = v.tolist()
115 |         elif hasattr(v, "dtype"):
116 |             newv = v.item()
117 |         else:
118 |             newv = v
119 | 
120 |         if hasattr(v, "dtype"):
121 |             newt = v.dtype.name
122 |         else:
123 |             if isinstance(v, Iterable) and v:
124 |                 # Use the type of the first one
125 |                 v = v[0]
126 |             else:
127 |                 # This is likely an empty value
128 |                 # so just default to an empty string
129 |                 v = ""
130 |             newt = type(v).__name__
131 | 
132 |         if verbose is True:
133 |             return_copy[k] = {"type": newt, "data": newv}
134 |         else:
135 |             return_copy[k] = newv
136 | 
137 |     return return_copy
138 | 


--------------------------------------------------------------------------------
/pocean/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/pocean/tests/__init__.py


--------------------------------------------------------------------------------
/pocean/tests/download_test_data.py:
--------------------------------------------------------------------------------
 1 | import zipfile
 2 | from pathlib import Path
 3 | 
 4 | import pooch
 5 | 
 6 | 
 7 | def download_test_data():
 8 |     url = "https://github.com/pyoceans/pocean-core/releases/download"
 9 |     version = "2025.01"
10 | 
11 |     fname = pooch.retrieve(
12 |         url=f"{url}/{version}/test_data.zip",
13 |         known_hash="sha256:41180c6bc6017de935250c9e8c1bbb407507049baebd767692c4f74fb8d662a8",
14 |     )
15 | 
16 |     here = Path(__file__).resolve().parent
17 |     print(fname)
18 |     print(here)
19 |     with zipfile.ZipFile(fname, "r") as zip_ref:
20 |         zip_ref.extractall(here)
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     download_test_data()
25 | 


--------------------------------------------------------------------------------
/pocean/tests/dsg/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/pocean/tests/dsg/__init__.py


--------------------------------------------------------------------------------
/pocean/tests/dsg/profile/test_profile_im.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import tempfile
 4 | import unittest
 5 | 
 6 | import numpy as np
 7 | import pandas as pd
 8 | from dateutil.parser import parse as dtparse
 9 | 
10 | from pocean import logger
11 | from pocean.dsg import IncompleteMultidimensionalProfile
12 | from pocean.tests.dsg.test_new import test_is_mine
13 | 
14 | logger.level = logging.DEBUG
15 | logger.handlers = [logging.StreamHandler()]
16 | 
17 | 
18 | class TestIMPStrings(unittest.TestCase):
19 |     def setUp(self):
20 |         self.df = pd.read_csv(
21 |             os.path.join(os.path.dirname(__file__), "resources", "basis_2011.csv"),
22 |             parse_dates=["time"],
23 |         )
24 |         # self.df = pd.read_csv('resources/basis_2011.csv', parse_dates=['time'])
25 | 
26 |     def test_print_dtypes(self):
27 |         print(self.df.dtypes)
28 | 
29 |     def test_write_nc(self):
30 |         fid, single_tmp = tempfile.mkstemp(suffix=".nc")
31 | 
32 |         axes = {"t": "time", "x": "longitude", "y": "latitude", "z": "z", "profile": "stationid"}
33 | 
34 |         with IncompleteMultidimensionalProfile.from_dataframe(
35 |             self.df, single_tmp, axes=axes, mode="a"
36 |         ) as ncd:
37 |             ncd.renameDimension("stationid", "profile")
38 | 
39 |         test_is_mine(IncompleteMultidimensionalProfile, single_tmp)  # Try to load it again
40 |         os.close(fid)
41 |         os.remove(single_tmp)
42 | 
43 | 
44 | class TestIncompleteMultidimensionalProfile(unittest.TestCase):
45 |     def setUp(self):
46 |         self.multi = os.path.join(os.path.dirname(__file__), "resources", "im-multiple.nc")
47 | 
48 |     def test_imp_load(self):
49 |         IncompleteMultidimensionalProfile(self.multi).close()
50 | 
51 |     def test_imp_dataframe(self):
52 |         fid, single_tmp = tempfile.mkstemp(suffix=".nc")
53 |         with IncompleteMultidimensionalProfile(self.multi) as ncd:
54 |             df = ncd.to_dataframe()
55 |             with IncompleteMultidimensionalProfile.from_dataframe(df, single_tmp) as result_ncd:
56 |                 assert "profile" in result_ncd.dimensions
57 |         test_is_mine(IncompleteMultidimensionalProfile, single_tmp)  # Try to load it again
58 |         os.close(fid)
59 |         os.remove(single_tmp)
60 | 
61 |     def test_imp_dataframe_unique_dims(self):
62 |         fid, single_tmp = tempfile.mkstemp(suffix=".nc")
63 |         with IncompleteMultidimensionalProfile(self.multi) as ncd:
64 |             df = ncd.to_dataframe()
65 |             with IncompleteMultidimensionalProfile.from_dataframe(
66 |                 df, single_tmp, unique_dims=True
67 |             ) as result_ncd:
68 |                 assert "profile_dim" in result_ncd.dimensions
69 |         test_is_mine(IncompleteMultidimensionalProfile, single_tmp)  # Try to load it again
70 |         os.close(fid)
71 |         os.remove(single_tmp)
72 | 
73 |     def test_imp_calculated_metadata(self):
74 |         with IncompleteMultidimensionalProfile(self.multi) as ncd:
75 |             m = ncd.calculated_metadata()
76 |             assert m.min_t == dtparse("1990-01-01 00:00:00")
77 |             assert m.max_t == dtparse("1990-01-06 21:00:00")
78 |             assert len(m.profiles.keys()) == 137
79 |             assert np.isclose(m.profiles[0].min_z, 0.05376, atol=1e-5)
80 |             assert np.isclose(m.profiles[0].max_z, 9.62958, atol=1e-5)
81 |             assert m.profiles[0].t == dtparse("1990-01-01 00:00:00")
82 |             assert m.profiles[0].x == 119
83 |             assert m.profiles[0].y == 171
84 | 
85 |             assert np.isclose(m.profiles[141].min_z, 0.04196, atol=1e-5)
86 |             assert np.isclose(m.profiles[141].max_z, 9.85909, atol=1e-5)
87 |             assert m.profiles[141].t == dtparse("1990-01-06 21:00:00")
88 |             assert m.profiles[141].x == 34
89 |             assert m.profiles[141].y == 80
90 | 


--------------------------------------------------------------------------------
/pocean/tests/dsg/profile/test_profile_om.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import tempfile
  4 | import unittest
  5 | 
  6 | import numpy as np
  7 | from dateutil.parser import parse as dtparse
  8 | 
  9 | from pocean import logger
 10 | from pocean.cf import CFDataset
 11 | from pocean.dsg import OrthogonalMultidimensionalProfile
 12 | from pocean.tests.dsg.test_new import test_is_mine
 13 | 
 14 | logger.level = logging.INFO
 15 | logger.handlers = [logging.StreamHandler()]
 16 | 
 17 | 
 18 | class TestOrthogonalMultidimensionalProfile(unittest.TestCase):
 19 |     def setUp(self):
 20 |         self.single = os.path.join(os.path.dirname(__file__), "resources", "om-single.nc")
 21 |         self.multi = os.path.join(os.path.dirname(__file__), "resources", "om-multiple.nc")
 22 | 
 23 |     def test_omp_load(self):
 24 |         OrthogonalMultidimensionalProfile(self.single).close()
 25 |         OrthogonalMultidimensionalProfile(self.multi).close()
 26 | 
 27 |     def test_omp_dataframe_single(self):
 28 |         CFDataset.load(self.single)
 29 | 
 30 |         fid, single_tmp = tempfile.mkstemp(suffix=".nc")
 31 |         with OrthogonalMultidimensionalProfile(self.single) as ncd:
 32 |             df = ncd.to_dataframe()
 33 |             with self.assertRaises(NotImplementedError):
 34 |                 with OrthogonalMultidimensionalProfile.from_dataframe(df, single_tmp) as result_ncd:
 35 |                     assert "profile" in result_ncd.dimensions
 36 |                 test_is_mine(OrthogonalMultidimensionalProfile, single_tmp)  # Try to load it again
 37 |         os.close(fid)
 38 |         os.remove(single_tmp)
 39 | 
 40 |     def test_omp_dataframe_multi(self):
 41 |         CFDataset.load(self.multi)
 42 | 
 43 |         fid, multi_tmp = tempfile.mkstemp(suffix=".nc")
 44 |         with OrthogonalMultidimensionalProfile(self.multi) as ncd:
 45 |             df = ncd.to_dataframe()
 46 |             with self.assertRaises(NotImplementedError):
 47 |                 with OrthogonalMultidimensionalProfile.from_dataframe(df, multi_tmp) as result_ncd:
 48 |                     assert "profile" in result_ncd.dimensions
 49 |                 test_is_mine(OrthogonalMultidimensionalProfile, multi_tmp)  # Try to load it again
 50 |         os.close(fid)
 51 |         os.remove(multi_tmp)
 52 | 
 53 |     def test_omp_dataframe_multi_unique_dims(self):
 54 |         CFDataset.load(self.multi)
 55 | 
 56 |         fid, multi_tmp = tempfile.mkstemp(suffix=".nc")
 57 |         with OrthogonalMultidimensionalProfile(self.multi) as ncd:
 58 |             df = ncd.to_dataframe()
 59 |             with self.assertRaises(NotImplementedError):
 60 |                 with OrthogonalMultidimensionalProfile.from_dataframe(
 61 |                     df, multi_tmp, unique_dims=True
 62 |                 ) as result_ncd:
 63 |                     assert "profile_dim" in result_ncd.dimensions
 64 |                 test_is_mine(OrthogonalMultidimensionalProfile, multi_tmp)  # Try to load it again
 65 |         os.close(fid)
 66 |         os.remove(multi_tmp)
 67 | 
 68 |     def test_omp_calculated_metadata(self):
 69 |         with OrthogonalMultidimensionalProfile(self.single) as ncd:
 70 |             s = ncd.calculated_metadata()
 71 |             assert s.min_t == dtparse("2005-07-09 01:48:00")
 72 |             assert s.max_t == dtparse("2005-07-09 01:48:00")
 73 |             assert np.isclose(s.profiles[1].min_z, 0.0)
 74 |             assert np.isclose(s.profiles[1].max_z, 96.06)
 75 |             assert s.profiles[1].t == dtparse("2005-07-09 01:48:00")
 76 |             assert np.isclose(s.profiles[1].x, -149.3582)
 77 |             assert np.isclose(s.profiles[1].y, 60.0248)
 78 | 
 79 |         with OrthogonalMultidimensionalProfile(self.multi) as ncd:
 80 |             m = ncd.calculated_metadata()
 81 |             assert m.min_t == dtparse("2005-09-10 07:08:00")
 82 |             assert m.max_t == dtparse("2005-09-14 17:27:00")
 83 |             assert len(m.profiles.keys()) == 35
 84 |             assert np.isclose(m.profiles[2].min_z, 0.0)
 85 |             assert np.isclose(m.profiles[2].max_z, 499.69)
 86 |             assert m.profiles[2].t == dtparse("2005-09-10 07:08:00")
 87 |             assert np.isclose(m.profiles[2].x, -148.2182)
 88 |             assert np.isclose(m.profiles[2].y, 58.5395)
 89 | 
 90 |             assert np.isclose(m.profiles[37].min_z, 0.0)
 91 |             assert np.isclose(m.profiles[37].max_z, 292.01001)
 92 |             assert m.profiles[37].t == dtparse("2005-09-14 17:27:00")
 93 |             assert np.isclose(m.profiles[37].x, -149.468)
 94 |             assert np.isclose(m.profiles[37].y, 60.01)
 95 | 
 96 |     def test_json_attributes(self):
 97 |         ds = os.path.join(os.path.dirname(__file__), "resources", "om-1dy11.nc")
 98 |         om = OrthogonalMultidimensionalProfile(ds)
 99 |         om.to_dataframe()
100 |         om.json_attributes()
101 |         om.close()
102 | 


--------------------------------------------------------------------------------
/pocean/tests/dsg/test_new.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from os.path import dirname as dn
  3 | from os.path import join as jn
  4 | 
  5 | import pytest
  6 | 
  7 | from pocean import logger
  8 | from pocean.cf import CFDataset
  9 | from pocean.dsg import *
 10 | from pocean.utils import all_subclasses
 11 | 
 12 | logger.level = logging.INFO
 13 | logger.handlers = [logging.StreamHandler()]
 14 | 
 15 | 
 16 | @pytest.mark.parametrize(
 17 |     "klass,fp",
 18 |     [
 19 |         (
 20 |             OrthogonalMultidimensionalProfile,
 21 |             jn(dn(__file__), "profile", "resources", "om-single.nc"),
 22 |         ),
 23 |         (
 24 |             OrthogonalMultidimensionalProfile,
 25 |             jn(dn(__file__), "profile", "resources", "om-multiple.nc"),
 26 |         ),
 27 |         (
 28 |             OrthogonalMultidimensionalProfile,
 29 |             jn(dn(__file__), "profile", "resources", "om-1dy11.nc"),
 30 |         ),
 31 |         (
 32 |             IncompleteMultidimensionalProfile,
 33 |             jn(dn(__file__), "profile", "resources", "im-multiple.nc"),
 34 |         ),
 35 |         (
 36 |             IncompleteMultidimensionalTrajectory,
 37 |             jn(dn(__file__), "trajectory", "resources", "im-single.nc"),
 38 |         ),
 39 |         (
 40 |             IncompleteMultidimensionalTrajectory,
 41 |             jn(dn(__file__), "trajectory", "resources", "im-multiple.nc"),
 42 |         ),
 43 |         (
 44 |             IncompleteMultidimensionalTrajectory,
 45 |             jn(dn(__file__), "trajectory", "resources", "im-multiple-nonstring.nc"),
 46 |         ),
 47 |         (
 48 |             IncompleteMultidimensionalTrajectory,
 49 |             jn(dn(__file__), "trajectory", "resources", "wave-glider-int-attrs.nc"),
 50 |         ),
 51 |         (ContiguousRaggedTrajectory, jn(dn(__file__), "trajectory", "resources", "cr-multiple.nc")),
 52 |         (ContiguousRaggedTrajectory, jn(dn(__file__), "trajectory", "resources", "cr-oot-A.nc")),
 53 |         (ContiguousRaggedTrajectory, jn(dn(__file__), "trajectory", "resources", "cr-oot-B.nc")),
 54 |         (
 55 |             ContiguousRaggedTrajectoryProfile,
 56 |             jn(dn(__file__), "trajectoryProfile", "resources", "cr-single.nc"),
 57 |         ),
 58 |         (
 59 |             ContiguousRaggedTrajectoryProfile,
 60 |             jn(dn(__file__), "trajectoryProfile", "resources", "cr-multiple.nc"),
 61 |         ),
 62 |         (
 63 |             ContiguousRaggedTrajectoryProfile,
 64 |             jn(dn(__file__), "trajectoryProfile", "resources", "cr-missing-time.nc"),
 65 |         ),
 66 |         (
 67 |             IncompleteMultidimensionalTimeseries,
 68 |             jn(dn(__file__), "timeseries", "resources", "im-multiple.nc"),
 69 |         ),
 70 |         (
 71 |             OrthogonalMultidimensionalTimeseries,
 72 |             jn(dn(__file__), "timeseries", "resources", "om-single.nc"),
 73 |         ),
 74 |         (
 75 |             OrthogonalMultidimensionalTimeseries,
 76 |             jn(dn(__file__), "timeseries", "resources", "om-multiple.nc"),
 77 |         ),
 78 |         # (IndexedRaggedTimeseries,                     jn(dn(__file__), 'timeseries', 'resources', 'cr-multiple.nc')),
 79 |         # (ContiguousRaggedTimeseries,                  jn(dn(__file__), 'timeseries', 'resources', 'cr-multiple.nc')),
 80 |         (
 81 |             OrthogonalMultidimensionalTimeseriesProfile,
 82 |             jn(dn(__file__), "timeseriesProfile", "resources", "om-multiple.nc"),
 83 |         ),
 84 |         (
 85 |             IncompleteMultidimensionalTimeseriesProfile,
 86 |             jn(dn(__file__), "timeseriesProfile", "resources", "im-single.nc"),
 87 |         ),
 88 |         (
 89 |             IncompleteMultidimensionalTimeseriesProfile,
 90 |             jn(dn(__file__), "timeseriesProfile", "resources", "im-multiple.nc"),
 91 |         ),
 92 |         (
 93 |             RaggedTimeseriesProfile,
 94 |             jn(dn(__file__), "timeseriesProfile", "resources", "r-single.nc"),
 95 |         ),
 96 |         (
 97 |             RaggedTimeseriesProfile,
 98 |             jn(dn(__file__), "timeseriesProfile", "resources", "r-multiple.nc"),
 99 |         ),
100 |     ],
101 | )
102 | def test_is_mine(klass, fp):
103 |     with CFDataset.load(fp) as dsg:
104 |         assert dsg.__class__ == klass
105 | 
106 |     allsubs = list(all_subclasses(CFDataset))
107 |     subs = [s for s in allsubs if s != klass]
108 |     with CFDataset(fp) as dsg:
109 |         logger.debug(f"\nTesting {klass.__name__}")
110 |         assert klass.is_mine(dsg, strict=True) is True
111 |         for s in subs:
112 |             if hasattr(s, "is_mine"):
113 |                 logger.debug(f"  * Trying {s.__name__}...")
114 |                 assert s.is_mine(dsg) is False
115 | 


--------------------------------------------------------------------------------
/pocean/tests/dsg/test_utils.py:
--------------------------------------------------------------------------------
  1 | #!python
  2 | import datetime
  3 | import os
  4 | import unittest
  5 | 
  6 | import pandas as pd
  7 | import pytest
  8 | import pytz
  9 | from dateutil.parser import parse as dtparse
 10 | 
 11 | from pocean import logger as L  # noqa
 12 | from pocean.cf import CFDataset
 13 | from pocean.dsg import utils
 14 | 
 15 | datetime.UTC = datetime.timezone.utc
 16 | 
 17 | # RuntimeWarning: invalid value encountered in cast is fine here.
 18 | ignore_invalid_value_cast = pytest.mark.filterwarnings("ignore::RuntimeWarning")
 19 | 
 20 | 
 21 | class TestDsgUtils(unittest.TestCase):
 22 |     geo = pd.DataFrame({"x": [-1, -2, -3, -4], "y": [1, 2, 3, 4]})
 23 | 
 24 |     z = pd.DataFrame(
 25 |         {
 26 |             "z": [1, 2, 3, 4],
 27 |         }
 28 |     )
 29 | 
 30 |     times = pd.DataFrame(
 31 |         {
 32 |             "t": pd.to_datetime(
 33 |                 [
 34 |                     "2018-08-19 00:00:00",
 35 |                     "2018-08-20 00:00:00",
 36 |                     "2018-08-21 00:00:00",
 37 |                     "2018-08-22 00:00:00",
 38 |                     "2018-08-23 00:00:00",
 39 |                     "2018-08-23 00:00:05",
 40 |                 ]
 41 |             )
 42 |         }
 43 |     )
 44 | 
 45 |     avgtimes = pd.DataFrame(
 46 |         {
 47 |             "t": pd.to_datetime(
 48 |                 [
 49 |                     "2018-08-19 00:00:00",
 50 |                     "2018-08-20 23:00:55",
 51 |                     "2018-08-21 00:00:35",
 52 |                 ]
 53 |             )
 54 |         }
 55 |     )
 56 | 
 57 |     def test_get_vertical_meta(self):
 58 |         meta = utils.get_vertical_attributes(self.z)
 59 | 
 60 |         assert meta == {
 61 |             "variables": {
 62 |                 "z": {
 63 |                     "attributes": {
 64 |                         "actual_min": 1,
 65 |                         "actual_max": 4,
 66 |                     }
 67 |                 },
 68 |             },
 69 |             "attributes": {
 70 |                 "geospatial_vertical_min": 1,
 71 |                 "geospatial_vertical_max": 4,
 72 |                 "geospatial_vertical_units": "m",
 73 |             },
 74 |         }
 75 | 
 76 |     def test_get_geospatial_meta(self):
 77 |         meta = utils.get_geographic_attributes(self.geo)
 78 | 
 79 |         assert meta == {
 80 |             "variables": {
 81 |                 "y": {
 82 |                     "attributes": {
 83 |                         "actual_min": 1,
 84 |                         "actual_max": 4,
 85 |                     }
 86 |                 },
 87 |                 "x": {
 88 |                     "attributes": {
 89 |                         "actual_min": -4,
 90 |                         "actual_max": -1,
 91 |                     }
 92 |                 },
 93 |             },
 94 |             "attributes": {
 95 |                 "geospatial_lat_min": 1.0,
 96 |                 "geospatial_lat_max": 4.0,
 97 |                 "geospatial_lon_min": -4.0,
 98 |                 "geospatial_lon_max": -1.0,
 99 |                 "geospatial_bbox": "POLYGON ((-1 1, -1 4, -4 4, -4 1, -1 1))",
100 |                 "geospatial_bounds": "LINESTRING (-1 1, -4 4)",
101 |                 "geospatial_bounds_crs": "EPSG:4326",
102 |             },
103 |         }
104 | 
105 |     def test_get_temporal_meta_from_times_average(self):
106 |         meta = utils.get_temporal_attributes(self.avgtimes)
107 | 
108 |         assert meta == {
109 |             "variables": {
110 |                 "t": {
111 |                     "attributes": {
112 |                         "actual_min": "2018-08-19T00:00:00Z",
113 |                         "actual_max": "2018-08-21T00:00:35Z",
114 |                     }
115 |                 }
116 |             },
117 |             "attributes": {
118 |                 "time_coverage_start": "2018-08-19T00:00:00Z",
119 |                 "time_coverage_end": "2018-08-21T00:00:35Z",
120 |                 "time_coverage_duration": "P2DT0H0M35S",
121 |                 "time_coverage_resolution": "P0DT16H0M12S",
122 |             },
123 |         }
124 | 
125 |     def test_get_temporal_meta_from_times(self):
126 |         meta = utils.get_temporal_attributes(self.times)
127 | 
128 |         assert meta == {
129 |             "variables": {
130 |                 "t": {
131 |                     "attributes": {
132 |                         "actual_min": "2018-08-19T00:00:00Z",
133 |                         "actual_max": "2018-08-23T00:00:05Z",
134 |                     }
135 |                 }
136 |             },
137 |             "attributes": {
138 |                 "time_coverage_start": "2018-08-19T00:00:00Z",
139 |                 "time_coverage_end": "2018-08-23T00:00:05Z",
140 |                 "time_coverage_duration": "P4DT0H0M5S",
141 |                 "time_coverage_resolution": "P1DT0H0M0S",
142 |             },
143 |         }
144 | 
145 |     def test_get_creation(self):
146 |         meta = utils.get_creation_attributes(history="DID THINGS")
147 | 
148 |         now = datetime.datetime.now(datetime.UTC).replace(tzinfo=pytz.utc)
149 | 
150 |         assert (now - dtparse(meta["attributes"]["date_created"])) < datetime.timedelta(minutes=1)
151 |         assert (now - dtparse(meta["attributes"]["date_issued"])) < datetime.timedelta(minutes=1)
152 |         assert (now - dtparse(meta["attributes"]["date_modified"])) < datetime.timedelta(minutes=1)
153 |         assert "DID THINGS" in meta["attributes"]["history"]
154 | 
155 |     @ignore_invalid_value_cast
156 |     def test_wrap_dateline(self):
157 |         ncfile = os.path.join(
158 |             os.path.dirname(os.path.dirname(__file__)), "resources/wrapping_dateline.nc"
159 |         )
160 | 
161 |         with CFDataset.load(ncfile) as ncd:
162 |             axes = {
163 |                 "t": "time",
164 |                 "z": "z",
165 |                 "x": "lon",
166 |                 "y": "lat",
167 |             }
168 |             df = ncd.to_dataframe(axes=axes)
169 | 
170 |             meta = utils.get_geographic_attributes(df, axes=axes)
171 | 
172 |             assert meta == {
173 |                 "variables": {
174 |                     "lat": {"attributes": {"actual_min": 61.777, "actual_max": 67.068}},
175 |                     "lon": {"attributes": {"actual_min": -179.966, "actual_max": 179.858}},
176 |                 },
177 |                 "attributes": {
178 |                     "geospatial_lat_min": 61.777,
179 |                     "geospatial_lat_max": 67.068,
180 |                     "geospatial_lon_min": -179.966,
181 |                     "geospatial_lon_max": 179.858,
182 |                     "geospatial_bbox": "POLYGON ((198.669 61.777, 198.669 67.068, 174.79200000000003 67.068, 174.79200000000003 61.777, 198.669 61.777))",
183 |                     "geospatial_bounds": "POLYGON ((174.79200000000003 61.777, 174.92599999999993 62.206, 178.812 64.098, 192.86 67.029, 196.86 67.068, 197.094 67.044, 198.669 66.861, 187.784 64.188, 179.10799999999995 62.266, 176.16899999999998 61.862, 174.79200000000003 61.777))",
184 |                     "geospatial_bounds_crs": "EPSG:4326",
185 |                 },
186 |             }
187 | 
188 |     def test_wrap_small_coords(self):
189 |         geo = pd.DataFrame({"x": [-1, -2], "y": [1, 2]})
190 | 
191 |         meta = utils.get_geographic_attributes(geo)
192 | 
193 |         assert meta == {
194 |             "variables": {
195 |                 "y": {
196 |                     "attributes": {
197 |                         "actual_min": 1,
198 |                         "actual_max": 2,
199 |                     }
200 |                 },
201 |                 "x": {
202 |                     "attributes": {
203 |                         "actual_min": -2,
204 |                         "actual_max": -1,
205 |                     }
206 |                 },
207 |             },
208 |             "attributes": {
209 |                 "geospatial_lat_min": 1,
210 |                 "geospatial_lat_max": 2,
211 |                 "geospatial_lon_min": -2,
212 |                 "geospatial_lon_max": -1,
213 |                 "geospatial_bbox": "POLYGON ((-1 1, -1 2, -2 2, -2 1, -1 1))",
214 |                 "geospatial_bounds": "LINESTRING (-1 1, -2 2)",
215 |                 "geospatial_bounds_crs": "EPSG:4326",
216 |             },
217 |         }
218 | 
219 |     def test_wrap_same_coords(self):
220 |         geo = pd.DataFrame({"x": [-1, -1, -1], "y": [1, 1, 1]})
221 | 
222 |         meta = utils.get_geographic_attributes(geo)
223 | 
224 |         assert meta == {
225 |             "variables": {
226 |                 "y": {
227 |                     "attributes": {
228 |                         "actual_min": 1,
229 |                         "actual_max": 1,
230 |                     }
231 |                 },
232 |                 "x": {
233 |                     "attributes": {
234 |                         "actual_min": -1,
235 |                         "actual_max": -1,
236 |                     }
237 |                 },
238 |             },
239 |             "attributes": {
240 |                 "geospatial_lat_min": 1,
241 |                 "geospatial_lat_max": 1,
242 |                 "geospatial_lon_min": -1,
243 |                 "geospatial_lon_max": -1,
244 |                 "geospatial_bbox": "POLYGON ((-1 1, -1 1, -1 1, -1 1))",
245 |                 "geospatial_bounds": "POINT (-1 1)",
246 |                 "geospatial_bounds_crs": "EPSG:4326",
247 |             },
248 |         }
249 | 


--------------------------------------------------------------------------------
/pocean/tests/dsg/timeseries/test_timeseries_im.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/pocean/tests/dsg/timeseries/test_timeseries_im.py


--------------------------------------------------------------------------------
/pocean/tests/dsg/timeseries/test_timeseries_om.py:
--------------------------------------------------------------------------------
  1 | #!python
  2 | import logging
  3 | import os
  4 | import tempfile
  5 | import unittest
  6 | 
  7 | import numpy as np
  8 | import pytest
  9 | 
 10 | from pocean import logger
 11 | from pocean.dsg import OrthogonalMultidimensionalTimeseries
 12 | from pocean.tests.dsg.test_new import test_is_mine
 13 | 
 14 | # RuntimeWarning: invalid value encountered in cast is fine here.
 15 | ignore_invalid_value_cast = pytest.mark.filterwarnings("ignore::RuntimeWarning")
 16 | 
 17 | logger.level = logging.INFO
 18 | logger.handlers = [logging.StreamHandler()]
 19 | 
 20 | 
 21 | class TestOrthogonalMultidimensionalTimeseries(unittest.TestCase):
 22 |     def setUp(self):
 23 |         self.single = os.path.join(os.path.dirname(__file__), "resources", "tt.nc")
 24 |         self.multi = os.path.join(os.path.dirname(__file__), "resources", "om-multiple.nc")
 25 |         self.ph = np.ma.array(
 26 |             [
 27 |                 8.1080176,
 28 |                 8.11740265,
 29 |                 8.11924184,
 30 |                 8.11615471,
 31 |                 8.11445695,
 32 |                 8.11600021,
 33 |                 8.11903291,
 34 |                 8.1187229,
 35 |                 8.105218,
 36 |                 8.10998784,
 37 |                 8.10715445,
 38 |                 8.10530323,
 39 |                 8.11167052,
 40 |                 8.11142766,
 41 |                 8.10897461,
 42 |                 8.08827717,
 43 |                 8.11343609,
 44 |                 8.11746859,
 45 |                 8.12326458,
 46 |                 8.11770947,
 47 |                 8.09127117,
 48 |                 8.10770576,
 49 |                 8.10252467,
 50 |                 8.10252874,
 51 |             ]
 52 |         )
 53 | 
 54 |     def test_omp_load(self):
 55 |         OrthogonalMultidimensionalTimeseries(self.single).close()
 56 |         OrthogonalMultidimensionalTimeseries(self.multi).close()
 57 | 
 58 |     @ignore_invalid_value_cast
 59 |     def test_timeseries_omt_dataframe_single(self):
 60 |         fid, single_tmp = tempfile.mkstemp(suffix=".nc")
 61 |         with OrthogonalMultidimensionalTimeseries(self.single) as s:
 62 |             df = s.to_dataframe()
 63 |             with OrthogonalMultidimensionalTimeseries.from_dataframe(df, single_tmp) as result_ncd:
 64 |                 assert "station" in result_ncd.dimensions
 65 |                 assert np.ma.allclose(result_ncd.variables["pH"][:].flatten(), self.ph)
 66 |         test_is_mine(OrthogonalMultidimensionalTimeseries, single_tmp)  # Try to load it again
 67 |         os.close(fid)
 68 |         os.remove(single_tmp)
 69 | 
 70 |     def test_timeseries_omt_dataframe_multi(self):
 71 |         fid, single_tmp = tempfile.mkstemp(suffix=".nc")
 72 |         with OrthogonalMultidimensionalTimeseries(self.multi) as s:
 73 |             df = s.to_dataframe()
 74 |             with OrthogonalMultidimensionalTimeseries.from_dataframe(df, single_tmp) as result_ncd:
 75 |                 assert "station" in result_ncd.dimensions
 76 |                 assert np.ma.allclose(
 77 |                     result_ncd.variables["temperature"][0, 0:7].flatten(),
 78 |                     [18.61804, 13.2165, 39.30018, 17.00865, 24.95154, 35.99525, 24.33436],
 79 |                 )
 80 |         test_is_mine(OrthogonalMultidimensionalTimeseries, single_tmp)  # Try to load it again
 81 |         os.close(fid)
 82 |         os.remove(single_tmp)
 83 | 
 84 |     @ignore_invalid_value_cast
 85 |     def test_timeseries_omt_dataframe_unique_dims(self):
 86 |         fid, single_tmp = tempfile.mkstemp(suffix=".nc")
 87 |         with OrthogonalMultidimensionalTimeseries(self.single) as s:
 88 |             df = s.to_dataframe()
 89 |             with OrthogonalMultidimensionalTimeseries.from_dataframe(
 90 |                 df, single_tmp, unique_dims=True
 91 |             ) as result_ncd:
 92 |                 assert "station_dim" in result_ncd.dimensions
 93 |                 assert np.ma.allclose(result_ncd.variables["pH"][:].flatten(), self.ph)
 94 |         test_is_mine(OrthogonalMultidimensionalTimeseries, single_tmp)  # Try to load it again
 95 |         os.close(fid)
 96 |         os.remove(single_tmp)
 97 | 
 98 |     @ignore_invalid_value_cast
 99 |     def test_timeseries_omt_reduce_dims(self):
100 |         fid, single_tmp = tempfile.mkstemp(suffix=".nc")
101 |         with OrthogonalMultidimensionalTimeseries(self.single) as s:
102 |             df = s.to_dataframe()
103 |             with OrthogonalMultidimensionalTimeseries.from_dataframe(
104 |                 df, single_tmp, reduce_dims=True
105 |             ) as result_ncd:
106 |                 assert "station" not in result_ncd.dimensions
107 |                 assert np.ma.allclose(result_ncd.variables["pH"][:].flatten(), self.ph)
108 |         test_is_mine(OrthogonalMultidimensionalTimeseries, single_tmp)  # Try to load it again
109 |         os.close(fid)
110 |         os.remove(single_tmp)
111 | 
112 |     @ignore_invalid_value_cast
113 |     def test_timeseries_omt_no_z(self):
114 |         fid, single_tmp = tempfile.mkstemp(suffix=".nc")
115 |         with OrthogonalMultidimensionalTimeseries(self.single) as s:
116 |             df = s.to_dataframe()
117 |             axes = {"z": None}
118 |             df.drop(columns=["z"], inplace=True)
119 |             with OrthogonalMultidimensionalTimeseries.from_dataframe(
120 |                 df,
121 |                 single_tmp,
122 |                 axes=axes,
123 |             ) as result_ncd:
124 |                 assert "station" in result_ncd.dimensions
125 |                 assert "z" not in result_ncd.variables
126 |                 assert np.ma.allclose(result_ncd.variables["pH"][:].flatten(), self.ph)
127 |         test_is_mine(OrthogonalMultidimensionalTimeseries, single_tmp)  # Try to load it again
128 |         os.close(fid)
129 |         os.remove(single_tmp)
130 | 
131 |     @ignore_invalid_value_cast
132 |     def test_timeseries_omt_no_z_no_station(self):
133 |         fid, single_tmp = tempfile.mkstemp(suffix=".nc")
134 |         with OrthogonalMultidimensionalTimeseries(self.single) as s:
135 |             df = s.to_dataframe()
136 |             axes = {"z": None}
137 |             df.drop(columns=["z"], inplace=True)
138 |             with OrthogonalMultidimensionalTimeseries.from_dataframe(
139 |                 df, single_tmp, axes=axes, reduce_dims=True
140 |             ) as result_ncd:
141 |                 assert "station" not in result_ncd.dimensions
142 |                 assert "z" not in result_ncd.variables
143 |                 assert np.ma.allclose(result_ncd.variables["pH"][:].flatten(), self.ph)
144 |         test_is_mine(OrthogonalMultidimensionalTimeseries, single_tmp)  # Try to load it again
145 |         os.close(fid)
146 |         os.remove(single_tmp)
147 | 
148 |     @ignore_invalid_value_cast
149 |     def test_supplying_attributes(self):
150 |         fid, single_tmp = tempfile.mkstemp(suffix=".nc")
151 | 
152 |         attrs = {
153 |             "y": {
154 |                 "_CoordinateAxisType": "Lat",
155 |                 "_FillValue": -9999.9,
156 |                 "missing_value": -9999.9,
157 |             }
158 |         }
159 | 
160 |         with OrthogonalMultidimensionalTimeseries(self.single) as s:
161 |             df = s.to_dataframe()
162 |             with OrthogonalMultidimensionalTimeseries.from_dataframe(
163 |                 df, single_tmp, attributes=attrs
164 |             ) as result_ncd:
165 |                 assert "station" in result_ncd.dimensions
166 |                 assert result_ncd.variables["y"]._CoordinateAxisType == "Lat"
167 |                 with self.assertRaises(AttributeError):
168 |                     result_ncd.variables["y"].missing_value
169 |                 with self.assertRaises(AttributeError):
170 |                     result_ncd.variables["y"]._FillValue
171 | 
172 |         test_is_mine(OrthogonalMultidimensionalTimeseries, single_tmp)  # Try to load it again
173 |         os.close(fid)
174 |         os.remove(single_tmp)
175 | 


--------------------------------------------------------------------------------
/pocean/tests/dsg/timeseriesProfile/test_timeseriesProfile_im.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/pocean/tests/dsg/timeseriesProfile/test_timeseriesProfile_im.py


--------------------------------------------------------------------------------
/pocean/tests/dsg/timeseriesProfile/test_timeseriesProfile_r.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import tempfile
  4 | import unittest
  5 | from datetime import datetime
  6 | 
  7 | import netCDF4 as nc4
  8 | import pandas as pd
  9 | import pytest
 10 | from numpy.testing import assert_array_equal as npeq
 11 | 
 12 | from pocean import logger
 13 | from pocean.cf import CFDataset
 14 | from pocean.dsg import RaggedTimeseriesProfile
 15 | from pocean.tests.dsg.test_new import test_is_mine
 16 | 
 17 | logger.level = logging.INFO
 18 | logger.handlers = [logging.StreamHandler()]
 19 | 
 20 | # RuntimeWarning: invalid value encountered in cast is fine here.
 21 | ignore_invalid_value_cast = pytest.mark.filterwarnings("ignore::RuntimeWarning")
 22 | 
 23 | 
 24 | class TestRaggedTimeseriesProfile(unittest.TestCase):
 25 |     def test_csv_to_nc_single(self):
 26 |         filepath = os.path.join(os.path.dirname(__file__), "resources", "r-single.csv")
 27 | 
 28 |         df = pd.read_csv(filepath)
 29 |         fid, tmpfile = tempfile.mkstemp(suffix=".nc")
 30 | 
 31 |         axes = {"t": "time", "x": "lon", "y": "lat", "z": "z"}
 32 | 
 33 |         df.time = pd.to_datetime(df.time)
 34 | 
 35 |         CFDataset.default_time_unit = "hours since 2003-01-01 00:00:00Z"
 36 | 
 37 |         with RaggedTimeseriesProfile.from_dataframe(df, tmpfile, axes=axes) as result_ncd:
 38 |             assert "station" in result_ncd.dimensions
 39 |             assert result_ncd.dimensions["station"].size == 1
 40 |             assert "profile" in result_ncd.dimensions
 41 |             assert result_ncd.dimensions["profile"].size == 1
 42 | 
 43 |             check_vars = ["z", "t090C", "SP", "SA", "SR", "CT", "sigma0_CT"]
 44 |             for v in check_vars:
 45 |                 npeq(result_ncd.variables[v][:], df[v].values)
 46 | 
 47 |             assert result_ncd.variables["station"][0] == df.station.iloc[0] == "CH2"
 48 |             assert result_ncd.variables["profile"][0] == df.profile.iloc[0] == "030617B"
 49 |             assert result_ncd.variables["lat"].size == 1
 50 |             assert result_ncd.variables["lat"].ndim == 1  # Not reduced
 51 |             assert result_ncd.variables["lat"][0] == df.lat.iloc[0] == 33.558
 52 |             assert result_ncd.variables["lon"].size == 1
 53 |             assert result_ncd.variables["lon"].ndim == 1  # Not reduced
 54 |             assert result_ncd.variables["lon"][0] == df.lon.iloc[0] == -118.405
 55 | 
 56 |             assert result_ncd.variables["time"].units == "hours since 2003-01-01 00:00:00Z"
 57 |             assert result_ncd.variables["time"][0] == nc4.date2num(
 58 |                 datetime(2003, 6, 17, 10, 32, 0), units=result_ncd.variables["time"].units
 59 |             )
 60 | 
 61 |             assert RaggedTimeseriesProfile.is_mine(result_ncd, strict=True)
 62 | 
 63 |         os.close(fid)
 64 |         os.remove(tmpfile)
 65 | 
 66 |     def test_csv_to_nc_multi(self):
 67 |         filepath = os.path.join(os.path.dirname(__file__), "resources", "r-multi.csv")
 68 | 
 69 |         df = pd.read_csv(filepath)
 70 |         fid, tmpfile = tempfile.mkstemp(suffix=".nc")
 71 | 
 72 |         axes = {"t": "time", "x": "lon", "y": "lat", "z": "z"}
 73 | 
 74 |         df.time = pd.to_datetime(df.time)
 75 | 
 76 |         with RaggedTimeseriesProfile.from_dataframe(df, tmpfile, axes=axes) as result_ncd:
 77 |             assert "station" in result_ncd.dimensions
 78 |             assert result_ncd.dimensions["station"].size == 2
 79 |             assert "profile" in result_ncd.dimensions
 80 |             assert result_ncd.dimensions["profile"].size == 5
 81 | 
 82 |             check_vars = ["z", "salinity", "sigma0"]
 83 |             for v in check_vars:
 84 |                 npeq(result_ncd.variables[v][:], df[v].values)
 85 | 
 86 |             npeq(result_ncd.variables["station"][:], ["CN1", "CN2"])
 87 |             npeq(
 88 |                 result_ncd.variables["profile"][:],
 89 |                 ["030312B", "030617B", "030702B", "030814B", "031216C"],
 90 |             )
 91 |             assert result_ncd.variables["profile"][0] == df.profile.iloc[0] == "030312B"
 92 |             assert result_ncd.variables["lat"].size == 2
 93 |             assert result_ncd.variables["lat"].ndim == 1  # Not reduced
 94 |             assert result_ncd.variables["lat"][0] == df.lat.iloc[0] == 33.5
 95 |             assert result_ncd.variables["lon"].size == 2
 96 |             assert result_ncd.variables["lon"].ndim == 1  # Not reduced
 97 |             assert result_ncd.variables["lon"][0] == df.lon.iloc[0] == -118.4
 98 | 
 99 |             npeq(result_ncd.variables["stationIndex"][:], [0, 0, 1, 0, 1])
100 | 
101 |             npeq(result_ncd.variables["rowSize"][:], [844, 892, 893, 893, 891])
102 | 
103 |             assert result_ncd.variables["time"][0] == nc4.date2num(
104 |                 datetime(2013, 3, 12, 10, 19, 6), units=result_ncd.variables["time"].units
105 |             )
106 |             assert RaggedTimeseriesProfile.is_mine(result_ncd, strict=True)
107 | 
108 |         os.close(fid)
109 |         os.remove(tmpfile)
110 | 
111 |     def test_csv_to_nc_single_timezones(self):
112 |         filepath = os.path.join(os.path.dirname(__file__), "resources", "r-single.csv")
113 | 
114 |         df = pd.read_csv(filepath)
115 |         fid, tmpfile = tempfile.mkstemp(suffix=".nc")
116 | 
117 |         axes = {"t": "time", "x": "lon", "y": "lat", "z": "z"}
118 | 
119 |         df.time = pd.to_datetime(df.time)
120 |         df.time = df.time.dt.tz_localize("UTC")
121 | 
122 |         with RaggedTimeseriesProfile.from_dataframe(df, tmpfile, axes=axes) as result_ncd:
123 |             assert "station" in result_ncd.dimensions
124 |             assert result_ncd.dimensions["station"].size == 1
125 |             assert "profile" in result_ncd.dimensions
126 |             assert result_ncd.dimensions["profile"].size == 1
127 | 
128 |             check_vars = ["z", "t090C", "SP", "SA", "SR", "CT", "sigma0_CT"]
129 |             for v in check_vars:
130 |                 npeq(result_ncd.variables[v][:], df[v].values)
131 | 
132 |             assert result_ncd.variables["station"][0] == df.station.iloc[0] == "CH2"
133 |             assert result_ncd.variables["profile"][0] == df.profile.iloc[0] == "030617B"
134 |             assert result_ncd.variables["lat"].size == 1
135 |             assert result_ncd.variables["lat"].ndim == 1  # Not reduced
136 |             assert result_ncd.variables["lat"][0] == df.lat.iloc[0] == 33.558
137 |             assert result_ncd.variables["lon"].size == 1
138 |             assert result_ncd.variables["lon"].ndim == 1  # Not reduced
139 |             assert result_ncd.variables["lon"][0] == df.lon.iloc[0] == -118.405
140 | 
141 |             assert result_ncd.variables["time"][0] == nc4.date2num(
142 |                 datetime(2003, 6, 17, 10, 32, 0), units=result_ncd.variables["time"].units
143 |             )
144 | 
145 |             assert RaggedTimeseriesProfile.is_mine(result_ncd, strict=True)
146 | 
147 |         os.close(fid)
148 |         os.remove(tmpfile)
149 | 
150 |     def test_csv_to_nc_single_reduce(self):
151 |         filepath = os.path.join(os.path.dirname(__file__), "resources", "r-single.csv")
152 | 
153 |         df = pd.read_csv(filepath)
154 |         fid, tmpfile = tempfile.mkstemp(suffix=".nc")
155 | 
156 |         axes = {"t": "time", "x": "lon", "y": "lat", "z": "z"}
157 | 
158 |         df.time = pd.to_datetime(df.time)
159 | 
160 |         with RaggedTimeseriesProfile.from_dataframe(
161 |             df, tmpfile, axes=axes, reduce_dims=True
162 |         ) as result_ncd:
163 |             assert "station" not in result_ncd.dimensions
164 |             assert "profile" in result_ncd.dimensions
165 |             assert result_ncd.dimensions["profile"].size == 1
166 | 
167 |             check_vars = ["z", "t090C", "SP", "SA", "SR", "CT", "sigma0_CT"]
168 |             for v in check_vars:
169 |                 npeq(result_ncd.variables[v][:], df[v].values)
170 | 
171 |             assert result_ncd.variables["station"][0] == df.station.iloc[0] == "CH2"
172 |             assert result_ncd.variables["profile"][0] == df.profile.iloc[0] == "030617B"
173 |             assert result_ncd.variables["lat"].size == 1
174 |             assert result_ncd.variables["lat"].ndim == 0  # Reduced to 0
175 |             assert result_ncd.variables["lat"][0] == df.lat.iloc[0] == 33.558
176 |             assert result_ncd.variables["lon"].size == 1
177 |             assert result_ncd.variables["lon"].ndim == 0  # Reduced to 0
178 |             assert result_ncd.variables["lon"][0] == df.lon.iloc[0] == -118.405
179 | 
180 |             assert RaggedTimeseriesProfile.is_mine(result_ncd, strict=True)
181 | 
182 |         os.close(fid)
183 |         os.remove(tmpfile)
184 | 
185 |     @ignore_invalid_value_cast
186 |     def test_rtp_single(self):
187 |         filepath = os.path.join(os.path.dirname(__file__), "resources", "r-ctd-single.nc")
188 | 
189 |         with RaggedTimeseriesProfile(filepath) as ncd:
190 |             fid, tmpfile = tempfile.mkstemp(suffix=".nc")
191 |             df = ncd.to_dataframe(clean_rows=False)
192 | 
193 |             with RaggedTimeseriesProfile.from_dataframe(df, tmpfile) as result_ncd:
194 |                 assert "station" in result_ncd.dimensions
195 |             test_is_mine(RaggedTimeseriesProfile, tmpfile)  # Try to load it again
196 | 
197 |             with RaggedTimeseriesProfile.from_dataframe(
198 |                 df, tmpfile, unique_dims=True
199 |             ) as result_ncd:
200 |                 assert "station_dim" in result_ncd.dimensions
201 |             test_is_mine(RaggedTimeseriesProfile, tmpfile)  # Try to load it again
202 | 
203 |             with RaggedTimeseriesProfile.from_dataframe(
204 |                 df, tmpfile, reduce_dims=True
205 |             ) as result_ncd:
206 |                 # Even though we pass reduce_dims, there are two stations so it is not reduced
207 |                 assert "station" not in result_ncd.dimensions
208 |                 assert "profile" in result_ncd.dimensions
209 |             test_is_mine(RaggedTimeseriesProfile, tmpfile)  # Try to load it again
210 | 
211 |             with RaggedTimeseriesProfile.from_dataframe(df, tmpfile, unlimited=True) as result_ncd:
212 |                 assert "station" in result_ncd.dimensions
213 |                 assert "profile" in result_ncd.dimensions
214 |                 assert result_ncd.dimensions["obs"].isunlimited() is True
215 |             test_is_mine(RaggedTimeseriesProfile, tmpfile)  # Try to load it again
216 | 
217 |             with RaggedTimeseriesProfile.from_dataframe(
218 |                 df, tmpfile, reduce_dims=True, unlimited=True
219 |             ) as result_ncd:
220 |                 assert "station" not in result_ncd.dimensions
221 |                 assert "profile" in result_ncd.dimensions
222 |                 assert result_ncd.dimensions["obs"].isunlimited() is True
223 |             test_is_mine(RaggedTimeseriesProfile, tmpfile)  # Try to load it again
224 | 
225 |             with RaggedTimeseriesProfile.from_dataframe(
226 |                 df, tmpfile, unique_dims=True, reduce_dims=False, unlimited=True
227 |             ) as result_ncd:
228 |                 assert "station_dim" in result_ncd.dimensions
229 |                 assert "profile_dim" in result_ncd.dimensions
230 |                 assert result_ncd.dimensions["obs_dim"].isunlimited() is True
231 |             test_is_mine(RaggedTimeseriesProfile, tmpfile)  # Try to load it again
232 | 
233 |             os.close(fid)
234 |             os.remove(tmpfile)
235 | 


--------------------------------------------------------------------------------
/pocean/tests/dsg/trajectory/test_trajectory_cr.py:
--------------------------------------------------------------------------------
  1 | #!python
  2 | import logging
  3 | import os
  4 | import tempfile
  5 | import unittest
  6 | from os.path import dirname as dn
  7 | from os.path import join as jn
  8 | 
  9 | import pytest
 10 | 
 11 | from pocean import logger
 12 | from pocean.dsg import ContiguousRaggedTrajectory, get_calculated_attributes
 13 | from pocean.tests.dsg.test_new import test_is_mine
 14 | 
 15 | logger.level = logging.INFO
 16 | logger.handlers = [logging.StreamHandler()]
 17 | 
 18 | # RuntimeWarning: invalid value encountered in cast is fine here.
 19 | ignore_invalid_value_cast = pytest.mark.filterwarnings("ignore::RuntimeWarning")
 20 | 
 21 | 
 22 | @pytest.mark.parametrize(
 23 |     "fp",
 24 |     [
 25 |         # jn(dn(__file__), 'resources', 'cr-single.nc'),
 26 |         jn(dn(__file__), "resources", "cr-multiple.nc"),
 27 |         jn(dn(__file__), "resources", "cr-oot-A.nc"),
 28 |         jn(dn(__file__), "resources", "cr-oot-B.nc"),
 29 |     ],
 30 | )
 31 | def test_crt_load(fp):
 32 |     test_is_mine(ContiguousRaggedTrajectory, fp)
 33 | 
 34 | 
 35 | class TestContiguousRaggedTrajectory(unittest.TestCase):
 36 |     def setUp(self):
 37 |         self.multi = jn(dn(__file__), "resources", "cr-multiple.nc")
 38 |         self.oot_A = jn(dn(__file__), "resources", "cr-oot-A.nc")
 39 |         self.oot_B = jn(dn(__file__), "resources", "cr-oot-B.nc")
 40 | 
 41 |     def test_crt_dataframe_multiple(self):
 42 |         axes = {
 43 |             "t": "time",
 44 |             "x": "lon",
 45 |             "y": "lat",
 46 |             "z": "z",
 47 |         }
 48 |         fid, tmpnc = tempfile.mkstemp(suffix=".nc")
 49 |         with ContiguousRaggedTrajectory(self.multi) as ncd:
 50 |             df = ncd.to_dataframe(axes=axes)
 51 |             with ContiguousRaggedTrajectory.from_dataframe(df, tmpnc, axes=axes) as result_ncd:
 52 |                 assert "trajectory" in result_ncd.dimensions
 53 |             test_is_mine(ContiguousRaggedTrajectory, tmpnc)  # Try to load it again
 54 |         os.close(fid)
 55 |         os.remove(tmpnc)
 56 | 
 57 |     def test_crt_dataframe_multiple_unique_dims(self):
 58 |         axes = {
 59 |             "t": "time",
 60 |             "x": "lon",
 61 |             "y": "lat",
 62 |             "z": "z",
 63 |         }
 64 |         fid, tmpnc = tempfile.mkstemp(suffix=".nc")
 65 |         with ContiguousRaggedTrajectory(self.multi) as ncd:
 66 |             df = ncd.to_dataframe(axes=axes)
 67 |             with ContiguousRaggedTrajectory.from_dataframe(
 68 |                 df, tmpnc, axes=axes, unique_dims=True
 69 |             ) as result_ncd:
 70 |                 assert "trajectory_dim" in result_ncd.dimensions
 71 |             test_is_mine(ContiguousRaggedTrajectory, tmpnc)  # Try to load it again
 72 |         os.close(fid)
 73 |         os.remove(tmpnc)
 74 | 
 75 |     def test_crt_dataframe_unlimited_dim(self):
 76 |         axes = {
 77 |             "t": "time",
 78 |             "x": "lon",
 79 |             "y": "lat",
 80 |             "z": "z",
 81 |         }
 82 |         fid, tmpnc = tempfile.mkstemp(suffix=".nc")
 83 |         with ContiguousRaggedTrajectory(self.multi) as ncd:
 84 |             df = ncd.to_dataframe(axes=axes)
 85 |             with ContiguousRaggedTrajectory.from_dataframe(
 86 |                 df, tmpnc, axes=axes, unlimited=True, unique_dims=True
 87 |             ) as result_ncd:
 88 |                 assert "trajectory_dim" in result_ncd.dimensions
 89 |                 assert "obs_dim" in result_ncd.dimensions
 90 |                 assert result_ncd.dimensions["obs_dim"].isunlimited() is True
 91 |             test_is_mine(ContiguousRaggedTrajectory, tmpnc)  # Try to load it again
 92 |         os.close(fid)
 93 |         os.remove(tmpnc)
 94 | 
 95 |     @ignore_invalid_value_cast
 96 |     def test_crt_dataframe_oot_A(self):
 97 |         axes = {"t": "time", "x": "lon", "y": "lat", "z": "depth", "sample": "sample"}
 98 |         fid, tmpnc = tempfile.mkstemp(suffix=".nc")
 99 |         with ContiguousRaggedTrajectory(self.oot_A) as ncd:
100 |             df = ncd.to_dataframe(axes=axes)
101 |             df = df.sort_values(["trajectory", "time"])
102 |             attrs = get_calculated_attributes(df, axes=axes)
103 | 
104 |             with ContiguousRaggedTrajectory.from_dataframe(
105 |                 df, tmpnc, axes=axes, mode="a"
106 |             ) as result_ncd:
107 |                 assert "sample" in result_ncd.dimensions
108 |                 assert result_ncd.dimensions["sample"].size == 6610
109 |                 assert "trajectory" in result_ncd.dimensions
110 |                 # This is removing null trajectories that have no data. Not much to do about this
111 |                 # because there is no way to store this empty trajectory in a dataframe.
112 |                 assert result_ncd.dimensions["trajectory"].size == 507
113 |                 result_ncd.apply_meta(attrs)
114 | 
115 |             test_is_mine(ContiguousRaggedTrajectory, tmpnc)  # Try to load it again
116 | 
117 |         os.close(fid)
118 |         os.remove(tmpnc)
119 | 
120 |     @ignore_invalid_value_cast
121 |     def test_crt_dataframe_oot_B(self):
122 |         axes = {
123 |             "t": "time",
124 |             "x": "lon",
125 |             "y": "lat",
126 |             "z": "depth",
127 |         }
128 |         fid, tmpnc = tempfile.mkstemp(suffix=".nc")
129 |         with ContiguousRaggedTrajectory(self.oot_B) as ncd:
130 |             df = ncd.to_dataframe(axes=axes)
131 |             df = df.sort_values(["trajectory", "time"])
132 |             attrs = get_calculated_attributes(df, axes=axes)
133 | 
134 |             with ContiguousRaggedTrajectory.from_dataframe(
135 |                 df, tmpnc, axes=axes, mode="a"
136 |             ) as result_ncd:
137 |                 assert "obs" in result_ncd.dimensions
138 |                 assert result_ncd.dimensions["obs"].size == 64116
139 |                 assert "trajectory" in result_ncd.dimensions
140 |                 # This is removing null trajectories that have no data. Not much to do about this
141 |                 # because there is no way to store this empty trajectory in a dataframe.
142 |                 assert result_ncd.dimensions["trajectory"].size == 1000
143 |                 result_ncd.apply_meta(attrs)
144 | 
145 |             test_is_mine(ContiguousRaggedTrajectory, tmpnc)  # Try to load it again
146 | 
147 |         os.close(fid)
148 |         os.remove(tmpnc)
149 | 


--------------------------------------------------------------------------------
/pocean/tests/dsg/trajectory/test_trajectory_im.py:
--------------------------------------------------------------------------------
  1 | #!python
  2 | import logging
  3 | import os
  4 | import tempfile
  5 | import unittest
  6 | 
  7 | import numpy as np
  8 | import pytest
  9 | from dateutil.parser import parse as dtparse
 10 | 
 11 | from pocean import logger
 12 | from pocean.cf import CFDataset
 13 | from pocean.dsg import IncompleteMultidimensionalTrajectory
 14 | from pocean.tests.dsg.test_new import test_is_mine
 15 | 
 16 | logger.level = logging.INFO
 17 | logger.handlers = [logging.StreamHandler()]
 18 | 
 19 | # RuntimeWarning: invalid value encountered in cast is fine here.
 20 | ignore_invalid_value_cast = pytest.mark.filterwarnings("ignore::RuntimeWarning")
 21 | 
 22 | 
 23 | class TestIncompleteMultidimensionalTrajectory(unittest.TestCase):
 24 |     @ignore_invalid_value_cast
 25 |     def test_im_single_row(self):
 26 |         filepath = os.path.join(os.path.dirname(__file__), "resources", "im-singlerow.nc")
 27 | 
 28 |         with IncompleteMultidimensionalTrajectory(filepath) as s:
 29 |             df = s.to_dataframe(clean_rows=True)
 30 |             assert len(df) == 1
 31 | 
 32 |     def test_imt_multi(self):
 33 |         filepath = os.path.join(os.path.dirname(__file__), "resources", "im-multiple.nc")
 34 | 
 35 |         CFDataset.load(filepath).close()
 36 | 
 37 |         with IncompleteMultidimensionalTrajectory(filepath) as ncd:
 38 |             fid, tmpfile = tempfile.mkstemp(suffix=".nc")
 39 |             df = ncd.to_dataframe(clean_rows=False)
 40 | 
 41 |             with IncompleteMultidimensionalTrajectory.from_dataframe(df, tmpfile) as result_ncd:
 42 |                 assert "trajectory" in result_ncd.dimensions
 43 |             test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile)  # Try to load it again
 44 | 
 45 |             with IncompleteMultidimensionalTrajectory.from_dataframe(
 46 |                 df, tmpfile, unique_dims=True
 47 |             ) as result_ncd:
 48 |                 assert "trajectory_dim" in result_ncd.dimensions
 49 |             test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile)  # Try to load it again
 50 | 
 51 |             with IncompleteMultidimensionalTrajectory.from_dataframe(
 52 |                 df, tmpfile, reduce_dims=True
 53 |             ) as result_ncd:
 54 |                 # Could not reduce dims since there was more than one trajectory
 55 |                 assert "trajectory" in result_ncd.dimensions
 56 |             test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile)  # Try to load it again
 57 | 
 58 |             with IncompleteMultidimensionalTrajectory.from_dataframe(
 59 |                 df, tmpfile, unlimited=True
 60 |             ) as result_ncd:
 61 |                 assert result_ncd.dimensions["obs"].isunlimited() is True
 62 |             test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile)  # Try to load it again
 63 | 
 64 |             with IncompleteMultidimensionalTrajectory.from_dataframe(
 65 |                 df, tmpfile, reduce_dims=True, unlimited=True
 66 |             ) as result_ncd:
 67 |                 # Could not reduce dims since there was more than one trajectory
 68 |                 assert "trajectory" in result_ncd.dimensions
 69 |                 assert result_ncd.dimensions["obs"].isunlimited() is True
 70 |             test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile)  # Try to load it again
 71 | 
 72 |             with IncompleteMultidimensionalTrajectory.from_dataframe(
 73 |                 df, tmpfile, unique_dims=True, reduce_dims=True, unlimited=True
 74 |             ) as result_ncd:
 75 |                 # Could not reduce dims since there was more than one trajectory
 76 |                 assert "trajectory_dim" in result_ncd.dimensions
 77 |                 assert result_ncd.dimensions["obs_dim"].isunlimited() is True
 78 |             test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile)  # Try to load it again
 79 | 
 80 |             os.close(fid)
 81 |             os.remove(tmpfile)
 82 | 
 83 |     @ignore_invalid_value_cast
 84 |     def test_imt_multi_not_string(self):
 85 |         filepath = os.path.join(os.path.dirname(__file__), "resources", "im-multiple-nonstring.nc")
 86 | 
 87 |         CFDataset.load(filepath).close()
 88 | 
 89 |         with IncompleteMultidimensionalTrajectory(filepath) as ncd:
 90 |             fid, tmpfile = tempfile.mkstemp(suffix=".nc")
 91 |             df = ncd.to_dataframe(clean_rows=False)
 92 | 
 93 |             with IncompleteMultidimensionalTrajectory.from_dataframe(df, tmpfile) as result_ncd:
 94 |                 assert "trajectory" in result_ncd.dimensions
 95 |             test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile)  # Try to load it again
 96 | 
 97 |             with IncompleteMultidimensionalTrajectory.from_dataframe(
 98 |                 df, tmpfile, reduce_dims=True
 99 |             ) as result_ncd:
100 |                 # Could not reduce dims since there was more than one trajectory
101 |                 assert "trajectory" not in result_ncd.dimensions
102 |             test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile)  # Try to load it again
103 | 
104 |             with IncompleteMultidimensionalTrajectory.from_dataframe(
105 |                 df, tmpfile, unlimited=True
106 |             ) as result_ncd:
107 |                 assert result_ncd.dimensions["obs"].isunlimited() is True
108 |             test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile)  # Try to load it again
109 | 
110 |             with IncompleteMultidimensionalTrajectory.from_dataframe(
111 |                 df, tmpfile, reduce_dims=True, unlimited=True
112 |             ) as result_ncd:
113 |                 # Could not reduce dims since there was more than one trajectory
114 |                 assert "trajectory" not in result_ncd.dimensions
115 |                 assert result_ncd.dimensions["obs"].isunlimited() is True
116 |             test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile)  # Try to load it again
117 | 
118 |             os.close(fid)
119 |             os.remove(tmpfile)
120 | 
121 |     def test_imt_single(self):
122 |         filepath = os.path.join(os.path.dirname(__file__), "resources", "im-single.nc")
123 | 
124 |         CFDataset.load(filepath).close()
125 | 
126 |         with IncompleteMultidimensionalTrajectory(filepath) as ncd:
127 |             fid, tmpfile = tempfile.mkstemp(suffix=".nc")
128 |             df = ncd.to_dataframe(clean_rows=False)
129 | 
130 |             with IncompleteMultidimensionalTrajectory.from_dataframe(df, tmpfile) as result_ncd:
131 |                 assert "trajectory" in result_ncd.dimensions
132 |             test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile)  # Try to load it again
133 | 
134 |             with IncompleteMultidimensionalTrajectory.from_dataframe(
135 |                 df, tmpfile, reduce_dims=True
136 |             ) as result_ncd:
137 |                 # Reduced trajectory dimension
138 |                 assert "trajectory" not in result_ncd.dimensions
139 |             test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile)  # Try to load it again
140 | 
141 |             with IncompleteMultidimensionalTrajectory.from_dataframe(
142 |                 df, tmpfile, unlimited=True
143 |             ) as result_ncd:
144 |                 # Reduced trajectory dimension
145 |                 assert result_ncd.dimensions["obs"].isunlimited() is True
146 |             test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile)  # Try to load it again
147 | 
148 |             with IncompleteMultidimensionalTrajectory.from_dataframe(
149 |                 df, tmpfile, reduce_dims=True, unlimited=True
150 |             ) as result_ncd:
151 |                 # Reduced trajectory dimension
152 |                 assert "trajectory" not in result_ncd.dimensions
153 |                 assert result_ncd.dimensions["obs"].isunlimited() is True
154 |             test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile)  # Try to load it again
155 | 
156 |             os.close(fid)
157 |             os.remove(tmpfile)
158 | 
159 |     def test_imt_change_axis_names(self):
160 |         new_axis = {"t": "time", "x": "lon", "y": "lat", "z": "depth"}
161 | 
162 |         filepath = os.path.join(os.path.dirname(__file__), "resources", "im-multiple.nc")
163 |         with IncompleteMultidimensionalTrajectory(filepath) as ncd:
164 |             fid, tmpfile = tempfile.mkstemp(suffix=".nc")
165 |             df = ncd.to_dataframe(clean_rows=False, axes=new_axis)
166 | 
167 |             with IncompleteMultidimensionalTrajectory.from_dataframe(
168 |                 df, tmpfile, axes=new_axis
169 |             ) as result_ncd:
170 |                 assert "trajectory" in result_ncd.dimensions
171 |                 assert "time" in result_ncd.variables
172 |                 assert "lon" in result_ncd.variables
173 |                 assert "lat" in result_ncd.variables
174 |                 assert "depth" in result_ncd.variables
175 |             test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile)  # Try to load it again
176 | 
177 |             os.close(fid)
178 |             os.remove(tmpfile)
179 | 
180 |     def test_imt_calculated_metadata_single(self):
181 |         filepath = os.path.join(os.path.dirname(__file__), "resources", "im-single.nc")
182 | 
183 |         with IncompleteMultidimensionalTrajectory(filepath) as ncd:
184 |             s = ncd.calculated_metadata()
185 |             assert s.min_t.round("s") == dtparse("1990-01-01 00:00:00")
186 |             assert s.max_t.round("s") == dtparse("1990-01-05 03:00:00")
187 |             traj1 = s.trajectories["Trajectory1"]
188 |             assert traj1.min_z == 0
189 |             assert traj1.max_z == 99
190 |             assert traj1.min_t.round("s") == dtparse("1990-01-01 00:00:00")
191 |             assert traj1.max_t.round("s") == dtparse("1990-01-05 03:00:00")
192 |             first_loc = traj1.geometry.coords[0]
193 |             assert np.isclose(first_loc[0], -7.9336)
194 |             assert np.isclose(first_loc[1], 42.00339)
195 | 
196 |     def test_imt_calculated_metadata_multi(self):
197 |         filepath = os.path.join(os.path.dirname(__file__), "resources", "im-multiple.nc")
198 | 
199 |         with IncompleteMultidimensionalTrajectory(filepath) as ncd:
200 |             m = ncd.calculated_metadata()
201 |             assert m.min_t == dtparse("1990-01-01 00:00:00")
202 |             assert m.max_t == dtparse("1990-01-02 12:00:00")
203 |             assert len(m.trajectories) == 4
204 |             traj0 = m.trajectories["Trajectory0"]
205 |             assert traj0.min_z == 0
206 |             assert traj0.max_z == 35
207 |             assert traj0.min_t.round("s") == dtparse("1990-01-01 00:00:00")
208 |             assert traj0.max_t.round("s") == dtparse("1990-01-02 11:00:00")
209 |             first_loc = traj0.geometry.coords[0]
210 |             assert np.isclose(first_loc[0], -35.07884)
211 |             assert np.isclose(first_loc[1], 2.15286)
212 | 
213 |             traj3 = m.trajectories["Trajectory3"]
214 |             assert traj3.min_z == 0
215 |             assert traj3.max_z == 36
216 |             assert traj3.min_t.round("s") == dtparse("1990-01-01 00:00:00")
217 |             assert traj3.max_t.round("s") == dtparse("1990-01-02 12:00:00")
218 |             first_loc = traj3.geometry.coords[0]
219 |             assert np.isclose(first_loc[0], -73.3026)
220 |             assert np.isclose(first_loc[1], 1.95761)
221 | 
222 |     def test_json_attributes_single(self):
223 |         filepath = os.path.join(os.path.dirname(__file__), "resources", "im-single.nc")
224 | 
225 |         with IncompleteMultidimensionalTrajectory(filepath) as s:
226 |             s.json_attributes()
227 | 
228 |     def test_json_attributes_multi(self):
229 |         filepath = os.path.join(os.path.dirname(__file__), "resources", "im-multiple.nc")
230 | 
231 |         with IncompleteMultidimensionalTrajectory(filepath) as s:
232 |             s.json_attributes()
233 | 


--------------------------------------------------------------------------------
/pocean/tests/dsg/trajectoryProfile/test_trajectoryProfile_cr.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import math
  3 | import os
  4 | import tempfile
  5 | import unittest
  6 | 
  7 | import numpy as np
  8 | import pytest
  9 | from dateutil.parser import parse as dtparse
 10 | from shapely.wkt import loads as wktloads
 11 | 
 12 | from pocean import logger as L
 13 | from pocean.dsg import ContiguousRaggedTrajectoryProfile
 14 | from pocean.tests.dsg.test_new import test_is_mine
 15 | 
 16 | L.level = logging.INFO
 17 | L.handlers = [logging.StreamHandler()]
 18 | 
 19 | # RuntimeWarning: invalid value encountered in cast is fine here.
 20 | ignore_invalid_value_cast = pytest.mark.filterwarnings("ignore::RuntimeWarning")
 21 | 
 22 | 
 23 | class TestContinousRaggedTrajectoryProfile(unittest.TestCase):
 24 |     def setUp(self):
 25 |         self.single = os.path.join(os.path.dirname(__file__), "resources", "cr-single.nc")
 26 |         self.multi = os.path.join(os.path.dirname(__file__), "resources", "cr-multiple.nc")
 27 |         self.missing_time = os.path.join(
 28 |             os.path.dirname(__file__), "resources", "cr-missing-time.nc"
 29 |         )
 30 |         self.nan_locations = os.path.join(
 31 |             os.path.dirname(__file__), "resources", "cr-nan-locations.nc"
 32 |         )
 33 | 
 34 |     def test_crtp_load(self):
 35 |         ContiguousRaggedTrajectoryProfile(self.single).close()
 36 |         ContiguousRaggedTrajectoryProfile(self.multi).close()
 37 |         ContiguousRaggedTrajectoryProfile(self.missing_time).close()
 38 | 
 39 |     @ignore_invalid_value_cast
 40 |     def test_crtp_dataframe_single(self):
 41 |         axes = {
 42 |             "t": "time",
 43 |             "x": "longitude",
 44 |             "y": "latitude",
 45 |             "z": "depth",
 46 |         }
 47 |         fid, tmpnc = tempfile.mkstemp(suffix=".nc")
 48 |         with ContiguousRaggedTrajectoryProfile(self.single) as ncd:
 49 |             df = ncd.to_dataframe(axes=axes)
 50 |             with ContiguousRaggedTrajectoryProfile.from_dataframe(
 51 |                 df, tmpnc, axes=axes
 52 |             ) as result_ncd:
 53 |                 assert "profile" in result_ncd.dimensions
 54 |                 assert "trajectory" in result_ncd.dimensions
 55 |             test_is_mine(ContiguousRaggedTrajectoryProfile, tmpnc)  # Try to load it again
 56 |         os.close(fid)
 57 |         os.remove(tmpnc)
 58 | 
 59 |     @ignore_invalid_value_cast
 60 |     def test_crtp_dataframe_single_unique_dims(self):
 61 |         axes = {
 62 |             "t": "time",
 63 |             "x": "longitude",
 64 |             "y": "latitude",
 65 |             "z": "depth",
 66 |         }
 67 |         fid, tmpnc = tempfile.mkstemp(suffix=".nc")
 68 |         with ContiguousRaggedTrajectoryProfile(self.single) as ncd:
 69 |             df = ncd.to_dataframe(axes=axes)
 70 |             with ContiguousRaggedTrajectoryProfile.from_dataframe(
 71 |                 df, tmpnc, axes=axes, unique_dims=True
 72 |             ) as result_ncd:
 73 |                 assert "profile_dim" in result_ncd.dimensions
 74 |                 assert "trajectory_dim" in result_ncd.dimensions
 75 |             test_is_mine(ContiguousRaggedTrajectoryProfile, tmpnc)  # Try to load it again
 76 |         os.close(fid)
 77 |         os.remove(tmpnc)
 78 | 
 79 |     def test_crtp_dataframe_multi(self):
 80 |         axes = {
 81 |             "t": "time",
 82 |             "x": "lon",
 83 |             "y": "lat",
 84 |             "z": "z",
 85 |         }
 86 |         fid, tmpnc = tempfile.mkstemp(suffix=".nc")
 87 |         with ContiguousRaggedTrajectoryProfile(self.multi) as ncd:
 88 |             df = ncd.to_dataframe(axes=axes)
 89 |             with ContiguousRaggedTrajectoryProfile.from_dataframe(
 90 |                 df, tmpnc, axes=axes
 91 |             ) as result_ncd:
 92 |                 assert "profile" in result_ncd.dimensions
 93 |                 assert "trajectory" in result_ncd.dimensions
 94 |             test_is_mine(ContiguousRaggedTrajectoryProfile, tmpnc)  # Try to load it again
 95 |         os.close(fid)
 96 |         os.remove(tmpnc)
 97 | 
 98 |     @ignore_invalid_value_cast
 99 |     def test_crtp_dataframe_missing_time(self):
100 |         axes = {
101 |             "t": "precise_time",
102 |             "x": "precise_lon",
103 |             "y": "precise_lat",
104 |             "z": "depth",
105 |         }
106 |         fid, tmpnc = tempfile.mkstemp(suffix=".nc")
107 |         with ContiguousRaggedTrajectoryProfile(self.missing_time) as ncd:
108 |             df = ncd.to_dataframe(axes=axes)
109 |             with ContiguousRaggedTrajectoryProfile.from_dataframe(
110 |                 df, tmpnc, axes=axes
111 |             ) as result_ncd:
112 |                 assert "profile" in result_ncd.dimensions
113 |                 assert "trajectory" in result_ncd.dimensions
114 |             test_is_mine(ContiguousRaggedTrajectoryProfile, tmpnc)  # Try to load it again
115 |         os.close(fid)
116 |         os.remove(tmpnc)
117 | 
118 |     @ignore_invalid_value_cast
119 |     def test_crtp_calculated_metadata_single(self):
120 |         axes = {
121 |             "t": "time",
122 |             "x": "longitude",
123 |             "y": "latitude",
124 |             "z": "depth",
125 |         }
126 | 
127 |         with ContiguousRaggedTrajectoryProfile(self.single) as st:
128 |             s = st.calculated_metadata(axes=axes)
129 |             assert s.min_t.round("s") == dtparse("2014-11-25 18:57:30")
130 |             assert s.max_t.round("s") == dtparse("2014-11-27 07:10:30")
131 |             assert len(s.trajectories) == 1
132 |             traj = s.trajectories["sp025-20141125T1730"]
133 |             assert traj.min_z == 0
134 |             assert np.isclose(traj.max_z, 504.37827)
135 |             assert traj.min_t.round("s") == dtparse("2014-11-25 18:57:30")
136 |             assert traj.max_t.round("s") == dtparse("2014-11-27 07:10:30")
137 | 
138 |             first_loc = traj.geometry.coords[0]
139 |             assert np.isclose(first_loc[0], -119.79025)
140 |             assert np.isclose(first_loc[1], 34.30818)
141 |             assert len(traj.profiles) == 17
142 | 
143 |     def test_crtp_calculated_metadata_multi(self):
144 |         axes = {
145 |             "t": "time",
146 |             "x": "longitude",
147 |             "y": "latitude",
148 |             "z": "depth",
149 |         }
150 | 
151 |         with ContiguousRaggedTrajectoryProfile(self.multi) as mt:
152 |             m = mt.calculated_metadata(axes=axes)
153 |             assert m.min_t.round("s") == dtparse("1990-01-01 00:00:00")
154 |             assert m.max_t.round("s") == dtparse("1990-01-03 02:00:00")
155 |             assert len(m.trajectories) == 5
156 |             # First trajectory
157 |             traj0 = m.trajectories[0]
158 |             assert traj0.min_z == 0
159 |             assert traj0.max_z == 43
160 |             assert traj0.min_t.round("s") == dtparse("1990-01-02 05:00:00")
161 |             assert traj0.max_t.round("s") == dtparse("1990-01-03 01:00:00")
162 |             first_loc = traj0.geometry.coords[0]
163 |             assert first_loc[0] == -60
164 |             assert first_loc[1] == 53
165 |             assert len(traj0.profiles) == 4
166 |             assert traj0.profiles[0].t.round("s") == dtparse("1990-01-03 01:00:00")
167 |             assert traj0.profiles[0].x == -60
168 |             assert traj0.profiles[0].y == 49
169 | 
170 |             # Last trajectory
171 |             traj4 = m.trajectories[4]
172 |             assert traj4.min_z == 0
173 |             assert traj4.max_z == 38
174 |             assert traj4.min_t.round("s") == dtparse("1990-01-02 14:00:00")
175 |             assert traj4.max_t.round("s") == dtparse("1990-01-02 15:00:00")
176 |             first_loc = traj4.geometry.coords[0]
177 |             assert first_loc[0] == -67
178 |             assert first_loc[1] == 47
179 |             assert len(traj4.profiles) == 4
180 |             assert traj4.profiles[19].t.round("s") == dtparse("1990-01-02 14:00:00")
181 |             assert traj4.profiles[19].x == -44
182 |             assert traj4.profiles[19].y == 47
183 | 
184 |     @ignore_invalid_value_cast
185 |     def test_crtp_calculated_metadata_missing_time(self):
186 |         axes = {
187 |             "t": "time",
188 |             "x": "longitude",
189 |             "y": "latitude",
190 |             "z": "depth",
191 |         }
192 | 
193 |         with ContiguousRaggedTrajectoryProfile(self.missing_time) as mmt:
194 |             t = mmt.calculated_metadata(axes=axes)
195 |             assert t.min_t == dtparse("2014-11-16 21:32:29.952500")
196 |             assert t.max_t == dtparse("2014-11-17 07:59:08.398500")
197 |             assert len(t.trajectories) == 1
198 | 
199 |             traj = t.trajectories["UW157-20141116T211809"]
200 |             assert np.isclose(traj.min_z, 0.47928014)
201 |             assert np.isclose(traj.max_z, 529.68005)
202 |             assert traj.min_t == dtparse("2014-11-16 21:32:29.952500")
203 |             assert traj.max_t == dtparse("2014-11-17 07:59:08.398500")
204 | 
205 |             first_loc = traj.geometry.coords[0]
206 | 
207 |             assert np.isclose(first_loc[0], -124.681526638573)
208 |             assert np.isclose(first_loc[1], 43.5022166666667)
209 |             assert len(traj.profiles) == 13
210 | 
211 |     @ignore_invalid_value_cast
212 |     def test_crtp_just_missing_time(self):
213 |         axes = {
214 |             "t": "time",
215 |             "x": "longitude",
216 |             "y": "latitude",
217 |             "z": "depth",
218 |         }
219 | 
220 |         with ContiguousRaggedTrajectoryProfile(self.missing_time) as mmt:
221 |             t = mmt.calculated_metadata(axes=axes)
222 |             assert t.min_t == dtparse("2014-11-16 21:32:29.952500")
223 |             assert t.max_t == dtparse("2014-11-17 07:59:08.398500")
224 |             assert len(t.trajectories) == 1
225 | 
226 |             traj = t.trajectories["UW157-20141116T211809"]
227 |             assert np.isclose(traj.min_z, 0.47928014)
228 |             assert np.isclose(traj.max_z, 529.68005)
229 |             assert traj.min_t == dtparse("2014-11-16 21:32:29.952500")
230 |             assert traj.max_t == dtparse("2014-11-17 07:59:08.398500")
231 | 
232 |             first_loc = traj.geometry.coords[0]
233 |             assert np.isclose(first_loc[0], -124.681526638573)
234 |             assert np.isclose(first_loc[1], 43.5022166666667)
235 |             assert len(traj.profiles) == 13
236 | 
237 |     @ignore_invalid_value_cast
238 |     def test_crtp_just_missing_locations(self):
239 |         axes = {
240 |             "t": "time",
241 |             "x": "longitude",
242 |             "y": "latitude",
243 |             "z": "depth",
244 |         }
245 | 
246 |         with ContiguousRaggedTrajectoryProfile(self.nan_locations) as ml:
247 |             t = ml.calculated_metadata(axes=axes)
248 |             assert len(t.trajectories) == 1
249 | 
250 |             traj = t.trajectories["clark-20150709T1803"]
251 |             coords = list(wktloads(traj.geometry.wkt).coords)
252 |             assert True not in [math.isnan(x) for x, y in coords]
253 |             assert True not in [math.isnan(y) for x, y in coords]
254 | 


--------------------------------------------------------------------------------
/pocean/tests/test_cf.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | import logging
 3 | import os
 4 | import unittest
 5 | 
 6 | from pocean import logger as L
 7 | from pocean.cf import CFDataset
 8 | from pocean.dsg import OrthogonalMultidimensionalTimeseries as omt
 9 | 
10 | L.level = logging.INFO
11 | L.handlers = [logging.StreamHandler()]
12 | 
13 | 
14 | class TestCFDatasetLoad(unittest.TestCase):
15 |     def test_load_url(self):
16 |         # File downloaded from https://geoport.usgs.esipfed.org/thredds/dodsC/silt/usgs/Projects/stellwagen/CF-1.6/ARGO_MERCHANT/1211-AA.cdf.html
17 |         fname = os.path.join(os.path.dirname(__file__), "resources", "1211-AA.cdf")
18 |         ncd = CFDataset.load(fname)
19 |         assert omt.is_mine(ncd) is True
20 |         ncd.close()
21 | 
22 |     def test_load_strict(self):
23 |         ncfile = os.path.join(
24 |             os.path.dirname(__file__), "dsg", "profile", "resources", "om-single.nc"
25 |         )
26 | 
27 |         ncd = CFDataset.load(ncfile)
28 |         assert omt.is_mine(ncd) is False
29 |         with self.assertRaises(BaseException):
30 |             omt.is_mine(ncd, strict=True)
31 |         ncd.close()
32 | 


--------------------------------------------------------------------------------
/pocean/tests/test_nc.py:
--------------------------------------------------------------------------------
  1 | #!python
  2 | import logging
  3 | import os
  4 | import tempfile
  5 | import unittest
  6 | 
  7 | from numpy import testing as npt
  8 | 
  9 | from pocean import logger as L
 10 | from pocean.cf import CFDataset
 11 | from pocean.dataset import EnhancedDataset
 12 | from pocean.meta import MetaInterface, ncpyattributes
 13 | 
 14 | L.level = logging.INFO
 15 | L.handlers = [logging.StreamHandler()]
 16 | 
 17 | 
 18 | class TestJsonDataset(unittest.TestCase):
 19 |     def setUp(self):
 20 |         self.maxDiff = 9999
 21 |         self.hdl, self.ncdf = tempfile.mkstemp(prefix="pocean_test_")
 22 | 
 23 |     def tearDown(self):
 24 |         os.close(self.hdl)
 25 |         os.remove(self.ncdf)
 26 | 
 27 |     def test_lvl0_apply(self):
 28 |         jsf = os.path.join(os.path.dirname(__file__), "resources/coamps_lvl0.json")
 29 |         mi = MetaInterface.from_jsonfile(jsf)
 30 | 
 31 |         with EnhancedDataset(self.ncdf, "w") as ncd:
 32 |             ncd.apply_meta(mi)
 33 | 
 34 |             assert {k: v.size for k, v in ncd.dimensions.items()} == mi["dimensions"]
 35 | 
 36 |             fileglobatts = mi["attributes"]
 37 |             newglobatts = {}
 38 |             for nk in ncd.ncattrs():
 39 |                 newglobatts[nk] = ncd.getncattr(nk)
 40 | 
 41 |             self.assertDictEqual(fileglobatts, newglobatts)
 42 | 
 43 |             for k, v in ncd.variables.items():
 44 |                 filevaratts = mi["variables"][k]["attributes"]
 45 |                 newvaratts = ncpyattributes(dict(v.__dict__), verbose=False)
 46 | 
 47 |                 # _FillValue gets added even if it wasn't in the original attributes
 48 |                 if "_FillValue" in newvaratts:
 49 |                     del newvaratts["_FillValue"]
 50 | 
 51 |                 if "missing_value" in filevaratts:
 52 |                     del filevaratts["missing_value"]
 53 | 
 54 |                 self.assertDictEqual(filevaratts, newvaratts)
 55 | 
 56 |     def test_lvl2_apply(self):
 57 |         jsf = os.path.join(os.path.dirname(__file__), "resources/coamps_lvl2.json")
 58 |         mi = MetaInterface.from_jsonfile(jsf)
 59 | 
 60 |         with EnhancedDataset(self.ncdf, "w") as ncd:
 61 |             ncd.apply_meta(mi)
 62 | 
 63 |             assert {k: v.size for k, v in ncd.dimensions.items()} == mi["dimensions"]
 64 | 
 65 |             fileglobatts = {k: v["data"] for k, v in mi["attributes"].items()}
 66 |             newglobatts = {}
 67 |             for nk in ncd.ncattrs():
 68 |                 newglobatts[nk] = ncd.getncattr(nk)
 69 | 
 70 |             self.assertDictEqual(fileglobatts, newglobatts)
 71 | 
 72 |             for k, v in ncd.variables.items():
 73 |                 filevaratts = {k: v["data"] for k, v in mi["variables"][k]["attributes"].items()}
 74 |                 newvaratts = ncpyattributes(dict(v.__dict__), verbose=False)
 75 | 
 76 |                 # _FillValue gets added even if it wasn't in the original attributes
 77 |                 if "_FillValue" in newvaratts:
 78 |                     del newvaratts["_FillValue"]
 79 | 
 80 |                 if "missing_value" in filevaratts:
 81 |                     del filevaratts["missing_value"]
 82 | 
 83 |                 self.assertDictEqual(filevaratts, newvaratts)
 84 | 
 85 |     def test_input_output(self):
 86 |         ncfile = os.path.join(os.path.dirname(__file__), "resources/coamps.nc")
 87 | 
 88 |         with EnhancedDataset(ncfile, "r") as original_ncd:
 89 |             mi = original_ncd.meta()
 90 | 
 91 |             with EnhancedDataset(self.ncdf, "w") as ncd:
 92 |                 ncd.apply_meta(mi)
 93 | 
 94 |                 self.assertDictEqual(
 95 |                     ncpyattributes(dict(original_ncd.__dict__)), ncpyattributes(dict(ncd.__dict__))
 96 |                 )
 97 | 
 98 |                 for k, v in original_ncd.variables.items():
 99 |                     oldatts = ncpyattributes(dict(v.__dict__))
100 |                     newatts = ncpyattributes(dict(ncd.variables[k].__dict__))
101 | 
102 |                     # _FillValue gets added even if it wasn't in the original attributes
103 |                     if "_FillValue" in newatts:
104 |                         del newatts["_FillValue"]
105 | 
106 |                     if "missing_value" in oldatts:
107 |                         del oldatts["missing_value"]
108 | 
109 |                     self.assertDictEqual(oldatts, newatts)
110 | 
111 |     def test_serialize_and_reload_data(self):
112 |         ncfile = os.path.join(os.path.dirname(__file__), "resources/qc-month.nc")
113 | 
114 |         with CFDataset(ncfile) as cfncd:
115 |             # Data from netCDF variable
116 |             ncdata = cfncd.variables["data1"][:]
117 | 
118 |             # Not filled
119 |             meta = cfncd.json(return_data=True, fill_data=False)
120 |             jsdata = meta["variables"]["data1"]["data"]
121 |             npt.assert_array_equal(ncdata, jsdata)
122 |             fhandle1, fname1 = tempfile.mkstemp()
123 |             with CFDataset(fname1, "w") as newcf:
124 |                 newcf.apply_json(meta)
125 |             with CFDataset(fname1, "r") as rcf:
126 |                 newncdata = rcf.variables["data1"][:]
127 |                 npt.assert_array_equal(ncdata, newncdata)
128 |             os.close(fhandle1)
129 |             os.remove(fname1)
130 | 
131 |             # Filled
132 |             meta = cfncd.json(return_data=True, fill_data=True)
133 |             jsdata = meta["variables"]["data1"]["data"]
134 |             npt.assert_array_equal(ncdata, jsdata)
135 |             fhandle2, fname2 = tempfile.mkstemp()
136 |             with CFDataset(fname2, "w") as newcf:
137 |                 newcf.apply_json(meta)
138 | 
139 |             with CFDataset(fname2, "r") as rcf:
140 |                 newncdata = rcf.variables["data1"][:]
141 |                 npt.assert_array_equal(ncdata, newncdata)
142 | 
143 |             os.close(fhandle2)
144 |             os.remove(fname2)
145 | 


--------------------------------------------------------------------------------
/pocean/tests/test_utils.py:
--------------------------------------------------------------------------------
  1 | #!python
  2 | import logging
  3 | import os
  4 | import shutil
  5 | import tempfile
  6 | import unittest
  7 | 
  8 | import netCDF4 as nc4
  9 | import numpy as np
 10 | import pytest
 11 | 
 12 | from pocean import logger
 13 | from pocean.dataset import EnhancedDataset
 14 | from pocean.utils import generic_masked, get_default_axes, normalize_array
 15 | 
 16 | logger.level = logging.INFO
 17 | logger.handlers = [logging.StreamHandler()]
 18 | 
 19 | 
 20 | class TestUtils(unittest.TestCase):
 21 |     def setUp(self):
 22 |         self.input_file = os.path.join(os.path.dirname(__file__), "resources/coamps.nc")
 23 | 
 24 |     def test_get_default_axes(self):
 25 |         assert get_default_axes() == (
 26 |             "trajectory",
 27 |             "station",
 28 |             "profile",
 29 |             "obs",
 30 |             "t",
 31 |             "x",
 32 |             "y",
 33 |             "z",
 34 |         )
 35 | 
 36 |         new_defaults = {
 37 |             "trajectory": "a",
 38 |             "station": "b",
 39 |             "profile": "c",
 40 |             "sample": "h",
 41 |             "t": "d",
 42 |             "x": "e",
 43 |             "y": "f",
 44 |             "z": "g",
 45 |         }
 46 |         assert get_default_axes(new_defaults) == (
 47 |             "a",
 48 |             "b",
 49 |             "c",
 50 |             "h",
 51 |             "d",
 52 |             "e",
 53 |             "f",
 54 |             "g",
 55 |         )
 56 | 
 57 |         new_defaults = {"trajectory": "a", "station": "b", "profile": "c"}
 58 |         assert get_default_axes(new_defaults) == (
 59 |             "a",
 60 |             "b",
 61 |             "c",
 62 |             "obs",
 63 |             "t",
 64 |             "x",
 65 |             "y",
 66 |             "z",
 67 |         )
 68 | 
 69 |         # Time is not a valid axis key
 70 |         bad_defaults = {"time": "a"}
 71 |         with self.assertRaises(TypeError):
 72 |             get_default_axes(bad_defaults)
 73 | 
 74 |         # Can't have duplicate values
 75 |         bad_defaults = {"x": "a", "y": "a"}
 76 |         with self.assertRaises(ValueError):
 77 |             get_default_axes(bad_defaults)
 78 | 
 79 |         # but you can with the sample dimension
 80 |         bad_defaults = {"t": "time", "sample": "time"}
 81 |         assert get_default_axes(bad_defaults) == (
 82 |             "trajectory",
 83 |             "station",
 84 |             "profile",
 85 |             "time",
 86 |             "time",
 87 |             "x",
 88 |             "y",
 89 |             "z",
 90 |         )
 91 | 
 92 |     def test_single_attr_filter(self):
 93 |         nc = EnhancedDataset(self.input_file)
 94 |         grid_spacing_vars = nc.filter_by_attrs(grid_spacing="4.0 km")
 95 | 
 96 |         x = nc.variables.get("x")
 97 |         y = nc.variables.get("y")
 98 | 
 99 |         self.assertEqual(len(grid_spacing_vars), 2)
100 |         assert x in grid_spacing_vars
101 |         assert y in grid_spacing_vars
102 | 
103 |     def test_multiple_attr_filter(self):
104 |         nc = EnhancedDataset(self.input_file)
105 |         grid_spacing_vars = nc.filter_by_attrs(
106 |             grid_spacing="4.0 km", standard_name="projection_y_coordinate"
107 |         )
108 | 
109 |         y = nc.variables.get("y")
110 | 
111 |         self.assertEqual(len(grid_spacing_vars), 1)
112 |         assert y in grid_spacing_vars
113 | 
114 |     @pytest.mark.filterwarnings("ignore::UserWarning")
115 |     def test_generic_masked_bad_min_max_value(self):
116 |         fid, tpath = tempfile.mkstemp(suffix=".nc", prefix="pocean-test")
117 |         shutil.copy2(self.input_file, tpath)
118 | 
119 |         with EnhancedDataset(tpath, "a") as ncd:
120 |             v = ncd.variables["v_component_wind_true_direction_all_geometries"]
121 |             v.valid_min = np.float32(0.1)
122 |             v.valid_max = np.float32(0.1)
123 |             r = generic_masked(v[:], attrs=ncd.vatts(v.name))
124 |             rflat = r.flatten()
125 |             assert rflat[~rflat.mask].size == 0
126 | 
127 |             # Create a byte variable with a float valid_min and valid_max
128 |             # to make sure it doesn't error
129 |             b = ncd.createVariable("imabyte", "b")
130 |             b.valid_min = 0
131 |             b.valid_max = np.int16(600)  # this is over a byte and thus invalid
132 |             b[:] = 3
133 |             r = generic_masked(b[:], attrs=ncd.vatts(b.name))
134 |             assert np.all(r.mask == False)  # noqa
135 | 
136 |             b.valid_min = 0
137 |             b.valid_max = 2
138 |             r = generic_masked(b[:], attrs=ncd.vatts(b.name))
139 |             assert np.all(r.mask == True)  # noqa
140 | 
141 |             c = ncd.createVariable("imanotherbyte", "f4")
142 |             c.setncattr("valid_min", b"0")
143 |             c.setncattr("valid_max", b"9")
144 |             c[:] = 3
145 |             r = generic_masked(c[:], attrs=ncd.vatts(c.name))
146 |             assert np.all(r.mask == False)  # noqa
147 | 
148 |             c = ncd.createVariable("imarange", "f4")
149 |             c.valid_range = [0.0, 2.0]
150 |             c[:] = 3.0
151 |             r = generic_masked(c[:], attrs=ncd.vatts(c.name))
152 |             assert np.all(r.mask == True)  # noqa
153 | 
154 |             c.valid_range = [0.0, 2.0]
155 |             c[:] = 1.0
156 |             r = generic_masked(c[:], attrs=ncd.vatts(c.name))
157 |             assert np.all(r.mask == False)  # noqa
158 | 
159 |         os.close(fid)
160 |         if os.path.exists(tpath):
161 |             os.remove(tpath)
162 | 
163 | 
164 | class TestNetcdfUtils(unittest.TestCase):
165 |     def test_cf_safe_name(self):
166 |         from pocean.cf import cf_safe_name
167 | 
168 |         self.assertEqual("foo", cf_safe_name("foo"))
169 |         self.assertEqual("v_1foo", cf_safe_name("1foo"))
170 |         self.assertEqual("v_1foo_99", cf_safe_name("1foo-99"))
171 |         self.assertEqual("foo_99", cf_safe_name("foo-99"))
172 |         self.assertEqual("foo_99_", cf_safe_name("foo(99)"))
173 |         self.assertEqual("v__foo_99_", cf_safe_name("_foo(99)"))
174 | 
175 | 
176 | class TestNormalizeArray(unittest.TestCase):
177 |     def setUp(self):
178 |         self.fh, self.fp = tempfile.mkstemp(suffix=".nc", prefix="pocean_testing_")
179 | 
180 |     def tearDown(self):
181 |         os.close(self.fh)
182 |         if os.path.exists(self.fp):
183 |             os.remove(self.fp)
184 | 
185 |     def test_normalization_of_string_arrays_netcdf4(self):
186 |         thestr = "bosadfsdfkljskfusdiofu987987987om"
187 | 
188 |         with nc4.Dataset(self.fp, "w", format="NETCDF4") as ncd:
189 |             dimsize = len(thestr)
190 |             ncd.createDimension("n", dimsize)
191 | 
192 |             # Single str (no dimension)
193 |             ncd.createVariable("single_str", str)
194 |             ncd.createVariable("single_unicode_", np.str_)
195 |             ncd.createVariable("single_U", "<U1")
196 |             ncd.createVariable("single_S", "S1", ("n",))
197 | 
198 |             for k, v in ncd.variables.items():
199 |                 if k.startswith("single_"):
200 |                     if v.dimensions:
201 |                         v[:] = nc4.stringtoarr(thestr, dimsize)
202 |                     else:
203 |                         v[0] = thestr
204 | 
205 |             # Array of str
206 |             ncd.createVariable("many_str", str, ("n",))
207 |             ncd.createVariable("many_unicode_", np.str_, ("n",))
208 |             ncd.createVariable("many_U", "<U1", ("n",))
209 |             ncd.createVariable(
210 |                 "many_S",
211 |                 "S1",
212 |                 (
213 |                     "n",
214 |                     "n",
215 |                 ),
216 |             )
217 | 
218 |             for k, v in ncd.variables.items():
219 |                 if k.startswith("many_"):
220 |                     if len(v.dimensions) > 1:
221 |                         v[:, :] = np.tile(nc4.stringtoarr(thestr, dimsize), dimsize).reshape(
222 |                             v.shape
223 |                         )
224 |                     else:
225 |                         v[:] = np.tile(thestr, dimsize).reshape(v.shape)
226 | 
227 |         with nc4.Dataset(self.fp) as ncd:
228 |             assert normalize_array(ncd.variables["single_str"]) == thestr
229 |             assert normalize_array(ncd.variables["single_unicode_"]) == thestr
230 |             assert normalize_array(ncd.variables["single_U"]) == thestr
231 |             assert normalize_array(ncd.variables["single_S"]) == thestr
232 | 
233 |             assert np.all(normalize_array(ncd.variables["many_str"]) == [thestr] * len(thestr))
234 |             assert np.all(normalize_array(ncd.variables["many_unicode_"]) == [thestr] * len(thestr))
235 |             assert np.all(normalize_array(ncd.variables["many_U"]) == [thestr] * len(thestr))
236 |             assert np.all(normalize_array(ncd.variables["many_S"]) == [thestr] * len(thestr))
237 | 
238 |     def test_normalization_of_string_arrays_netcdf3(self):
239 |         thestr = "boodsfasfasdfm"
240 | 
241 |         with nc4.Dataset(self.fp, "w", format="NETCDF3_CLASSIC") as ncd:
242 |             dimsize = len(thestr)
243 |             ncd.createDimension("n", dimsize)
244 | 
245 |             # Single str (no dimension)
246 |             ncd.createVariable("single_S", "S1", ("n",))
247 | 
248 |             for k, v in ncd.variables.items():
249 |                 if k.startswith("single_"):
250 |                     v[:] = nc4.stringtoarr(thestr, dimsize)
251 | 
252 |             # Array of strq
253 |             ncd.createVariable(
254 |                 "many_S",
255 |                 "S1",
256 |                 (
257 |                     "n",
258 |                     "n",
259 |                 ),
260 |             )
261 | 
262 |             for k, v in ncd.variables.items():
263 |                 if k.startswith("many_"):
264 |                     v[:, :] = np.tile(nc4.stringtoarr(thestr, dimsize), dimsize).reshape(v.shape)
265 | 
266 |         with nc4.Dataset(self.fp) as ncd:
267 |             assert normalize_array(ncd.variables["single_S"]) == thestr
268 |             assert np.all(normalize_array(ncd.variables["many_S"]) == [thestr] * dimsize)
269 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | build-backend = "setuptools.build_meta"
 3 | requires = [
 4 |   "setuptools>=42",
 5 |   "setuptools-scm[toml]>=3.4",
 6 |   "wheel",
 7 | ]
 8 | 
 9 | [project]
10 | name = "pocean-core"
11 | description = "A python framework for working with met-ocean data"
12 | readme = "README.md"
13 | license = { file = "LICENSE.txt" }
14 | authors = [
15 |   { name = "Kyle Wilcox", email = "kyle@axds.co" },
16 | ]
17 | requires-python = ">=3.9"
18 | classifiers = [
19 |   "Programming Language :: Python :: 3 :: Only",
20 |   "Programming Language :: Python :: 3.9",
21 |   "Programming Language :: Python :: 3.10",
22 |   "Programming Language :: Python :: 3.11",
23 |   "Programming Language :: Python :: 3.12",
24 |   "Programming Language :: Python :: 3.13",
25 | ]
26 | dynamic = [
27 |   "version",
28 | ]
29 | dependencies = [
30 |   "cftime>=1.2.1",
31 |   "netcdf4",
32 |   "numpy>=1.20",
33 |   "pandas>=1.0.5",
34 |   "python-dateutil",
35 |   "pytz",
36 |   "shapely>=1.8",
37 |   "simplejson",
38 | ]
39 | urls.documentation = "https://pyoceans.github.io/pocean-core"
40 | urls.homepage = "https://pypi.org/project/pocean-core/"
41 | urls.repository = "https://github.com/pyoceans/pocean-core"
42 | 
43 | [tool.setuptools]
44 | packages = [
45 |   "pocean",
46 | ]
47 | 
48 | [tool.setuptools_scm]
49 | write_to = "pocean/_version.py"
50 | write_to_template = "__version__ = '{version}'"
51 | tag_regex = "^(?P<prefix>v)?(?P<version>[^\\+]+)(?P<suffix>.*)?$"
52 | 
53 | [tool.ruff]
54 | 
55 | line-length = 100
56 | 
57 | exclude = [
58 |   ".git",
59 |   ".git/",
60 |   "__pycache__",
61 |   "dist",
62 |   "docs/",
63 | ]
64 | 
65 | lint.select = [
66 |   "E", # pycodecstyle
67 |   "F", # flakes
68 |   "I", # import sorting
69 |   "W", # pydocstyle
70 | ]
71 | 
72 | lint.ignore = [
73 |   #"E265",
74 |   #"E221",
75 |   #"E203",
76 |   #"E201",
77 |   #"E124",
78 |   #"E202",
79 |   #"E241",
80 |   #"E251",
81 |   #"W504",
82 |   "E501",
83 |   "W291",
84 |   "W293",
85 | ]
86 | 
87 | lint.per-file-ignores."pocean/tests/*.py" = [
88 |   "F403",
89 |   "F405",
90 | ]
91 | lint.isort.order-by-type = false
92 | 
93 | [tool.pytest.ini_options]
94 | addopts = "-s -rxs -v"
95 | 
96 | filterwarnings = [
97 |   "error",
98 | ]
99 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | flake8
2 | pooch
3 | pre-commit
4 | pytest
5 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | cftime>=1.2.1
2 | netcdf4
3 | numpy>=1.20
4 | pandas>=1.0.5
5 | python-dateutil
6 | pytz
7 | shapely>=1.8
8 | simplejson
9 | 


--------------------------------------------------------------------------------