├── .github ├── dependabot.yml └── workflows │ ├── deploy-docs.yml │ ├── pypi.yml │ └── tests.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE.txt ├── MANIFEST.in ├── README.md ├── docs ├── Makefile ├── conf.py ├── development.rst ├── index.rst ├── notebooks │ ├── Readme.md │ ├── adcp.ipynb │ ├── full.nc │ ├── hello.nc │ └── imp.nc └── requirements.txt ├── pocean ├── __init__.py ├── cf.py ├── dataset.py ├── dsg │ ├── __init__.py │ ├── profile │ │ ├── __init__.py │ │ ├── im.py │ │ └── om.py │ ├── timeseries │ │ ├── __init__.py │ │ ├── cr.py │ │ ├── im.py │ │ ├── ir.py │ │ └── om.py │ ├── timeseriesProfile │ │ ├── __init__.py │ │ ├── im.py │ │ ├── om.py │ │ └── r.py │ ├── trajectory │ │ ├── __init__.py │ │ ├── cr.py │ │ ├── im.py │ │ └── ir.py │ ├── trajectoryProfile │ │ ├── __init__.py │ │ └── cr.py │ └── utils.py ├── grid │ └── __init__.py ├── meta.py ├── tests │ ├── __init__.py │ ├── download_test_data.py │ ├── dsg │ │ ├── __init__.py │ │ ├── profile │ │ │ ├── test_profile_im.py │ │ │ └── test_profile_om.py │ │ ├── test_new.py │ │ ├── test_utils.py │ │ ├── timeseries │ │ │ ├── test_timeseries_im.py │ │ │ └── test_timeseries_om.py │ │ ├── timeseriesProfile │ │ │ ├── test_timeseriesProfile_im.py │ │ │ ├── test_timeseriesProfile_om.py │ │ │ └── test_timeseriesProfile_r.py │ │ ├── trajectory │ │ │ ├── test_trajectory_cr.py │ │ │ └── test_trajectory_im.py │ │ └── trajectoryProfile │ │ │ └── test_trajectoryProfile_cr.py │ ├── test_cf.py │ ├── test_nc.py │ └── test_utils.py └── utils.py ├── pyproject.toml ├── requirements-dev.txt └── requirements.txt /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # See https://docs.github.com/en/code-security/supply-chain-security/keeping-your-dependencies-updated-automatically/keeping-your-actions-up-to-date-with-dependabot 2 | 3 | version: 2 4 | updates: 5 | 6 | - package-ecosystem: "github-actions" 7 | directory: "/" 8 | schedule: 9 | interval: "daily" 10 | labels: 11 | - "Bot" 12 | groups: 13 | github-actions: 14 | patterns: 15 | - '*' -------------------------------------------------------------------------------- /.github/workflows/deploy-docs.yml: -------------------------------------------------------------------------------- 1 | name: Build and Deploy docs 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - main 8 | release: 9 | types: 10 | - published 11 | 12 | jobs: 13 | build-docs: 14 | runs-on: ubuntu-latest 15 | defaults: 16 | run: 17 | shell: bash -l {0} 18 | 19 | steps: 20 | - name: checkout 21 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 22 | with: 23 | fetch-depth: 0 24 | 25 | - name: Setup Micromamba 26 | uses: mamba-org/setup-micromamba@b09ef9b599704322748535812ca03efb2625677b # v2.0.5 27 | with: 28 | environment-name: TEST 29 | init-shell: bash 30 | create-args: >- 31 | python=3 --file requirements.txt 32 | --file requirements-dev.txt 33 | --file docs/requirements.txt 34 | --channel conda-forge 35 | 36 | - name: Install library 37 | run: | 38 | python -m pip install -e . --no-deps --force-reinstall 39 | 40 | - name: Build documentation 41 | run: | 42 | set -e 43 | pushd docs 44 | sphinx-apidoc -M -f -o api ../pocean ../pocean/tests 45 | make clean html linkcheck 46 | popd 47 | 48 | - name: Deploy 49 | if: success() && github.event_name == 'release' 50 | uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 51 | with: 52 | github_token: ${{ secrets.GITHUB_TOKEN }} 53 | publish_dir: docs/_site/html 54 | -------------------------------------------------------------------------------- /.github/workflows/pypi.yml: -------------------------------------------------------------------------------- 1 | name: Publish to PyPI 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - main 8 | release: 9 | types: 10 | - published 11 | 12 | defaults: 13 | run: 14 | shell: bash 15 | 16 | jobs: 17 | packages: 18 | runs-on: ubuntu-latest 19 | steps: 20 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 21 | 22 | - name: Set up Python 23 | uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 24 | with: 25 | python-version: "3.x" 26 | 27 | - name: Get tags 28 | run: git fetch --depth=1 origin +refs/tags/*:refs/tags/* 29 | 30 | - name: Install build tools 31 | run: | 32 | python -m pip install --upgrade build 33 | 34 | - name: Build sdist and binary wheel 35 | run: python -m build --sdist --wheel . --outdir dist 36 | 37 | - name: CheckFiles 38 | run: | 39 | ls dist 40 | python -m pip install --upgrade check-manifest 41 | check-manifest --verbose 42 | 43 | - name: Test wheels 44 | run: | 45 | cd dist && python -m pip install *.whl 46 | python -m pip install --upgrade twine 47 | python -m twine check * 48 | 49 | - name: Publish a Python distribution to PyPI 50 | if: success() && github.event_name == 'release' 51 | uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc # v1.12.4 52 | with: 53 | user: __token__ 54 | password: ${{ secrets.PYPI_PASSWORD }} 55 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: [main] 7 | 8 | jobs: 9 | run: 10 | runs-on: ${{ matrix.os }} 11 | strategy: 12 | matrix: 13 | python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13" ] 14 | os: [ windows-latest, ubuntu-latest, macos-latest ] 15 | fail-fast: false 16 | defaults: 17 | run: 18 | shell: bash -l {0} 19 | 20 | steps: 21 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 22 | with: 23 | fetch-depth: 0 24 | 25 | - name: Setup Micromamba Python ${{ matrix.python-version }} 26 | uses: mamba-org/setup-micromamba@b09ef9b599704322748535812ca03efb2625677b # v2.0.5 27 | with: 28 | environment-name: TEST 29 | init-shell: bash 30 | create-args: >- 31 | python=${{ matrix.python-version }} 32 | --file requirements.txt 33 | --file requirements-dev.txt 34 | --channel conda-forge 35 | 36 | - name: Install library 37 | run: | 38 | python -m pip install -e . --no-deps --force-reinstall 39 | 40 | - name: Tests 41 | run: > 42 | python pocean/tests/download_test_data.py 43 | && python -m pytest --pyargs pocean 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | output/* 3 | *.sublime-* 4 | *.swp 5 | build/* 6 | dist/* 7 | resources/ 8 | *.egg-info* 9 | .cache 10 | docs/api 11 | docs/_site 12 | .pytest_cache/ 13 | .envrc 14 | .idea 15 | .vscode 16 | pocean/_version.py 17 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v5.0.0 4 | hooks: 5 | - id: end-of-file-fixer 6 | files: .*\.*.py 7 | - id: trailing-whitespace 8 | files: .*\.*.py 9 | - id: debug-statements 10 | - id: check-ast 11 | - id: check-added-large-files 12 | - id: check-json 13 | - id: check-merge-conflict 14 | - id: check-yaml 15 | - id: requirements-txt-fixer 16 | args: 17 | - requirements.txt 18 | - requirements-dev.txt 19 | 20 | - repo: https://github.com/astral-sh/ruff-pre-commit 21 | rev: v0.11.12 22 | hooks: 23 | - id: ruff 24 | args: ["--fix", "--show-fixes"] 25 | - id: ruff-format 26 | 27 | - repo: https://github.com/tox-dev/pyproject-fmt 28 | rev: "v2.6.0" 29 | hooks: 30 | - id: pyproject-fmt 31 | 32 | - repo: https://github.com/asottile/pyupgrade 33 | rev: v3.20.0 34 | hooks: 35 | - id: pyupgrade 36 | args: [--py38-plus] 37 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2022 Axiom Data Science 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.txt 2 | include README.md 3 | include pyproject.toml 4 | 5 | graft pocean 6 | 7 | prune .github 8 | prune *.egg-info 9 | prune docs 10 | prune pocean/tests 11 | 12 | exclude .coveragerc 13 | exclude .gitignore 14 | exclude .pre-commit-config.yaml 15 | exclude pocean/_version.py 16 | exclude ruff.toml 17 | 18 | global-exclude *.nc 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🌐 pocean-core 2 | 3 | [![Push](https://github.com/pyoceans/pocean-core/actions/workflows/tests.yml/badge.svg)](https://github.com/pyoceans/pocean-core/actions/workflows/push.yml) 4 | [![license](https://img.shields.io/github/license/pyoceans/pocean-core.svg)](https://github.com/pyoceans/pocean-core/blob/master/LICENSE.txt) 5 | [![GitHub release](https://img.shields.io/github/release/pyoceans/pocean-core/all.svg)](https://pypi.org/project/pocean-core/) 6 | 7 | 8 | 🐍 + 🌊 9 | 10 | A python framework for working with met-ocean data 11 | 12 | ## Resources 13 | + **Documentation:** 14 | + **API:** 15 | + **Source Code:** 16 | + **Git clone URL:** 17 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = pocean-core 8 | SOURCEDIR = . 9 | BUILDDIR = _site 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # pocean-core documentation build configuration file, created by 4 | # sphinx-quickstart on Fri Feb 10 16:09:19 2017. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | # If extensions (or modules to document with autodoc) are in another directory, 16 | # add these directories to sys.path here. If the directory is relative to the 17 | # documentation root, use os.path.abspath to make it absolute, like shown here. 18 | # 19 | import os 20 | import sys 21 | p = os.path.abspath( 22 | os.path.dirname(os.path.dirname(__file__)) 23 | ) 24 | sys.path.insert(0, p) 25 | 26 | # -- General configuration ------------------------------------------------ 27 | 28 | # If your documentation needs a minimal Sphinx version, state it here. 29 | # 30 | # needs_sphinx = '1.0' 31 | 32 | # Add any Sphinx extension module names here, as strings. They can be 33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 34 | # ones. 35 | extensions = [ 36 | 'sphinx.ext.autodoc', 37 | 'sphinx.ext.autosummary', 38 | 'sphinx.ext.napoleon' 39 | ] 40 | 41 | napoleon_google_docstring = True 42 | napoleon_numpy_docstring = True 43 | napoleon_include_init_with_doc = False 44 | napoleon_include_private_with_doc = True 45 | napoleon_include_special_with_doc = False 46 | napoleon_use_admonition_for_examples = True 47 | napoleon_use_admonition_for_notes = True 48 | napoleon_use_admonition_for_references = True 49 | napoleon_use_ivar = False 50 | napoleon_use_param = True 51 | napoleon_use_keyword = True 52 | napoleon_use_rtype = True 53 | 54 | # Add any paths that contain templates here, relative to this directory. 55 | templates_path = ['_templates'] 56 | 57 | # The suffix(es) of source filenames. 58 | # You can specify multiple suffix as a list of string: 59 | # 60 | source_suffix = ['.rst'] 61 | 62 | # The master toctree document. 63 | master_doc = 'index' 64 | 65 | # General information about the project. 66 | project = 'pocean-core' 67 | copyright = '2023, Kyle Wilcox' 68 | author = 'Kyle Wilcox' 69 | 70 | # The version info for the project you're documenting, acts as replacement for 71 | # |version| and |release|, also used in various other places throughout the 72 | # built documents. 73 | # 74 | # The short X.Y version. 75 | from pocean import __version__ # noqa 76 | 77 | version = __version__ 78 | # The full version, including alpha/beta/rc tags. 79 | release = __version__ 80 | 81 | # The language for content autogenerated by Sphinx. Refer to documentation 82 | # for a list of supported languages. 83 | # 84 | # This is also used if you do content translation via gettext catalogs. 85 | # Usually you set "language" from the command line for these cases. 86 | language = "en" 87 | 88 | # List of patterns, relative to source directory, that match files and 89 | # directories to ignore when looking for source files. 90 | # This patterns also effect to html_static_path and html_extra_path 91 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 92 | 93 | # The name of the Pygments (syntax highlighting) style to use. 94 | pygments_style = 'sphinx' 95 | 96 | # If true, `todo` and `todoList` produce output, else they produce nothing. 97 | todo_include_todos = False 98 | 99 | 100 | # -- Options for HTML output ---------------------------------------------- 101 | 102 | # The theme to use for HTML and HTML Help pages. See the documentation for 103 | # a list of builtin themes. 104 | # 105 | html_theme = 'alabaster' 106 | 107 | # Theme options are theme-specific and customize the look and feel of a theme 108 | # further. For a list of options available for each theme, see the 109 | # documentation. 110 | # 111 | html_theme_options = { 112 | 'description': 'A python framework for working with met-ocean data', 113 | 'github_user': 'pyoceans', 114 | 'github_repo': 'pocean-core', 115 | 'github_button': 'true', 116 | } 117 | 118 | # Add any paths that contain custom static files (such as style sheets) here, 119 | # relative to this directory. They are copied after the builtin static files, 120 | # so a file named "default.css" will overwrite the builtin "default.css". 121 | #html_static_path = ['_static'] 122 | 123 | # If true, links to the reST sources are added to the pages. 124 | html_show_sourcelink = False 125 | 126 | # -- Options for HTMLHelp output ------------------------------------------ 127 | 128 | # Output file base name for HTML help builder. 129 | htmlhelp_basename = 'pocean-coredoc' 130 | 131 | 132 | # -- Options for LaTeX output --------------------------------------------- 133 | 134 | latex_elements = { 135 | # The paper size ('letterpaper' or 'a4paper'). 136 | # 137 | # 'papersize': 'letterpaper', 138 | 139 | # The font size ('10pt', '11pt' or '12pt'). 140 | # 141 | # 'pointsize': '10pt', 142 | 143 | # Additional stuff for the LaTeX preamble. 144 | # 145 | # 'preamble': '', 146 | 147 | # Latex figure (float) alignment 148 | # 149 | # 'figure_align': 'htbp', 150 | } 151 | 152 | # Grouping the document tree into LaTeX files. List of tuples 153 | # (source start file, target name, title, 154 | # author, documentclass [howto, manual, or own class]). 155 | latex_documents = [ 156 | (master_doc, 'pocean-core.tex', 'pocean-core Documentation', 157 | 'Kyle Wilcox', 'manual'), 158 | ] 159 | 160 | 161 | # -- Options for manual page output --------------------------------------- 162 | 163 | # One entry per manual page. List of tuples 164 | # (source start file, name, description, authors, manual section). 165 | man_pages = [ 166 | (master_doc, 'pocean-core', 'pocean-core Documentation', 167 | [author], 1) 168 | ] 169 | 170 | 171 | # -- Options for Texinfo output ------------------------------------------- 172 | 173 | # Grouping the document tree into Texinfo files. List of tuples 174 | # (source start file, target name, title, author, 175 | # dir menu entry, description, category) 176 | texinfo_documents = [ 177 | (master_doc, 'pocean-core', 'pocean-core Documentation', 178 | author, 'pocean-core', 'A python framework for working with met-ocean data.', 179 | 'Miscellaneous'), 180 | ] 181 | -------------------------------------------------------------------------------- /docs/development.rst: -------------------------------------------------------------------------------- 1 | Development 2 | ============ 3 | 4 | Create a conda environment 5 | 6 | .. code-block:: bash 7 | 8 | conda create --name pocean310 python=3.10 --file requirements.txt --file requirements-dev.txt 9 | conda activate pocean310 10 | 11 | Running tests 12 | ------------- 13 | 14 | .. code-block:: bash 15 | 16 | # download test datasets 17 | cd pocean/tests 18 | python download_test_data.py 19 | 20 | # run test suite 21 | pytest 22 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | 🌐 pocean-core 2 | ============== 3 | 4 | 🐍 + 🌊 5 | 6 | A python framework for working with met-ocean data 7 | 8 | 9 | Documentation 10 | ============= 11 | 12 | .. toctree:: 13 | :maxdepth: 3 14 | :caption: Contents: 15 | 16 | api/modules 17 | development 18 | 19 | Indices and tables 20 | ================== 21 | 22 | * :ref:`genindex` 23 | * :ref:`modindex` 24 | * :ref:`search` 25 | -------------------------------------------------------------------------------- /docs/notebooks/Readme.md: -------------------------------------------------------------------------------- 1 | # Notebook examples using pocean-core 2 | -------------------------------------------------------------------------------- /docs/notebooks/adcp.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true 7 | }, 8 | "source": [ 9 | "# NRL ADCP .mat file to CF-1.6 timeSeriesProfile using pocean" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "Here we read a matlab file with minimal metadata, and write a CF-DSG 1.6 timeSeriesProfile netcdf file. We want the file to work seamlessly with ERDDAP, so we add some ERDDAP specific attributes like `cdm_timeseries_variables`, `cdm_profile_variables`, and `subsetVariables`." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": { 23 | "collapsed": true 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "import matplotlib.pyplot as plt\n", 28 | "import pandas as pd\n", 29 | "from scipy.io import loadmat\n", 30 | "import datetime as dt\n", 31 | "import numpy as np\n", 32 | "\n", 33 | "#conda install -c conda-forge pocean-core\n", 34 | "from pocean.dsg.timeseriesProfile.om import OrthogonalMultidimensionalTimeseriesProfile" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": { 41 | "collapsed": true 42 | }, 43 | "outputs": [], 44 | "source": [ 45 | "# wget http://www.satlab.hawaii.edu/onr/adria/data/moorings/nrl/Final/ADCP_matlab/VR4f.mat\n", 46 | "d = loadmat('/data/ADRIA/MOORINGS/NRL/VR4f.mat')" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 3, 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "data": { 56 | "text/html": [ 57 | "
\n", 58 | "\n", 71 | "\n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | "
profilestationtunvnwnxyz
2963385199VR4F2003-04-29 03:00:000.0614170.3940780.01740613.028145.18778328.548073
2963395199VR4F2003-04-29 03:00:00-0.0442680.6534390.00368613.028145.18778329.048073
2963405199VR4F2003-04-29 03:00:000.0544430.3868040.00422113.028145.18778329.548073
2963415199VR4F2003-04-29 03:00:000.0988360.529064-0.01140113.028145.18778330.048073
2963425199VR4F2003-04-29 03:00:000.0085180.5509760.01182313.028145.18778330.548073
\n", 149 | "
" 150 | ], 151 | "text/plain": [ 152 | " profile station t un vn wn \\\n", 153 | "296338 5199 VR4F 2003-04-29 03:00:00 0.061417 0.394078 0.017406 \n", 154 | "296339 5199 VR4F 2003-04-29 03:00:00 -0.044268 0.653439 0.003686 \n", 155 | "296340 5199 VR4F 2003-04-29 03:00:00 0.054443 0.386804 0.004221 \n", 156 | "296341 5199 VR4F 2003-04-29 03:00:00 0.098836 0.529064 -0.011401 \n", 157 | "296342 5199 VR4F 2003-04-29 03:00:00 0.008518 0.550976 0.011823 \n", 158 | "\n", 159 | " x y z \n", 160 | "296338 13.0281 45.187783 28.548073 \n", 161 | "296339 13.0281 45.187783 29.048073 \n", 162 | "296340 13.0281 45.187783 29.548073 \n", 163 | "296341 13.0281 45.187783 30.048073 \n", 164 | "296342 13.0281 45.187783 30.548073 " 165 | ] 166 | }, 167 | "execution_count": 3, 168 | "metadata": {}, 169 | "output_type": "execute_result" 170 | } 171 | ], 172 | "source": [ 173 | "times = [dt.datetime(2002,1,1,0,0,0) + dt.timedelta(a) for a in d['timen'].flatten()]\n", 174 | "depths = d['mdepth'].flatten()\n", 175 | "\n", 176 | "# Repeat each time for the number of depths\n", 177 | "t = np.repeat(times, len(depths))\n", 178 | "\n", 179 | "# Create a profile index, and repeat for number of depths\n", 180 | "profile = np.repeat(np.array(range(len(times)), dtype=np.int32) + 1, len(depths))\n", 181 | "\n", 182 | "# Tile the depths for each time\n", 183 | "z = np.tile(depths, len(times))\n", 184 | "\n", 185 | "df = pd.DataFrame({\n", 186 | " 't': t,\n", 187 | " 'x': 13.0281,\n", 188 | " 'y': 45.187783,\n", 189 | " 'z': z,\n", 190 | " 'un': d['un'].T.flatten()/10., # cm/s to m/s\n", 191 | " 'vn': d['vn'].T.flatten()/10., # cm/s to m/s\n", 192 | " 'wn': d['wn'].T.flatten()/10., # cm/s to m/s\n", 193 | " 'profile': profile,\n", 194 | " 'station': 'VR4F'\n", 195 | "})\n", 196 | "\n", 197 | "df.tail()" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 4, 203 | "metadata": { 204 | "collapsed": true 205 | }, 206 | "outputs": [], 207 | "source": [ 208 | "atts={\n", 209 | " 'global': {\n", 210 | " 'title': 'ADRIA02 Mooring VR4',\n", 211 | " 'summary': 'Data from bottom-mounted ADCP',\n", 212 | " 'institution': 'NRL',\n", 213 | " 'cdm_timeseries_variables': 'station',\n", 214 | " 'cdm_profile_variables': 'profile',\n", 215 | " 'subsetVariables': 'depth'\n", 216 | " },\n", 217 | " 'longitude': {\n", 218 | " 'units': 'degrees_east',\n", 219 | " 'standard_name':'longitude'\n", 220 | " },\n", 221 | " 'latitude': {\n", 222 | " 'units': 'degrees_north',\n", 223 | " 'standard_name':'latitude'\n", 224 | " },\n", 225 | " 'z': {\n", 226 | " 'units': 'm',\n", 227 | " 'standard_name': 'depth',\n", 228 | " 'positive':'down'\n", 229 | " },\n", 230 | " 'un': {\n", 231 | " 'units': 'm/s',\n", 232 | " 'standard_name':'eastward_sea_water_velocity'\n", 233 | " },\n", 234 | " 'vn': {\n", 235 | " 'units': 'm/s',\n", 236 | " 'standard_name':'northward_sea_water_velocity'\n", 237 | " },\n", 238 | " 'profile': {\n", 239 | " 'cf_role': 'profile_id'\n", 240 | " }\n", 241 | " }" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 5, 247 | "metadata": {}, 248 | "outputs": [ 249 | { 250 | "data": { 251 | "text/plain": [ 252 | "{'cdm_profile_variables': 'profile',\n", 253 | " 'cdm_timeseries_variables': 'station',\n", 254 | " 'institution': 'NRL',\n", 255 | " 'subsetVariables': 'depth',\n", 256 | " 'summary': 'Data from bottom-mounted ADCP',\n", 257 | " 'title': 'ADRIA02 Mooring VR4'}" 258 | ] 259 | }, 260 | "execution_count": 5, 261 | "metadata": {}, 262 | "output_type": "execute_result" 263 | } 264 | ], 265 | "source": [ 266 | "atts['global']" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": 6, 272 | "metadata": { 273 | "scrolled": true 274 | }, 275 | "outputs": [ 276 | { 277 | "name": "stderr", 278 | "output_type": "stream", 279 | "text": [ 280 | "/home/richard.signell/miniconda3/envs/IOOS3/lib/python3.6/site-packages/pocean/dsg/timeseriesProfile/om.py:82: UserWarning: endian-ness of dtype and endian kwarg do not match, using endian kwarg\n", 281 | " latitude = nc.createVariable('latitude', get_dtype(df.y), ('station',))\n", 282 | "/home/richard.signell/miniconda3/envs/IOOS3/lib/python3.6/site-packages/pocean/dsg/timeseriesProfile/om.py:83: UserWarning: endian-ness of dtype and endian kwarg do not match, using endian kwarg\n", 283 | " longitude = nc.createVariable('longitude', get_dtype(df.x), ('station',))\n", 284 | "/home/richard.signell/miniconda3/envs/IOOS3/lib/python3.6/site-packages/pocean/dsg/timeseriesProfile/om.py:84: UserWarning: endian-ness of dtype and endian kwarg do not match, using endian kwarg\n", 285 | " z = nc.createVariable('z', get_dtype(df.z), ('z',))\n", 286 | "/home/richard.signell/miniconda3/envs/IOOS3/lib/python3.6/site-packages/pocean/dsg/timeseriesProfile/om.py:108: UserWarning: endian-ness of dtype and endian kwarg do not match, using endian kwarg\n", 287 | " v = nc.createVariable(var_name, get_dtype(sdf[c]), ('time', 'z', 'station'), fill_value=sdf[c].dtype.type(cls.default_fill_value))\n" 288 | ] 289 | }, 290 | { 291 | "data": { 292 | "text/plain": [ 293 | "\n", 294 | "root group (NETCDF4 data model, file format HDF5):\n", 295 | " Conventions: CF-1.6\n", 296 | " date_created: 2017-06-21T12:19:00Z\n", 297 | " featureType: timeseriesProfile\n", 298 | " cdm_data_type: TimeseriesProfile\n", 299 | " title: ADRIA02 Mooring VR4\n", 300 | " summary: Data from bottom-mounted ADCP\n", 301 | " institution: NRL\n", 302 | " cdm_timeseries_variables: station\n", 303 | " cdm_profile_variables: profile\n", 304 | " subsetVariables: depth\n", 305 | " dimensions(sizes): station(1), time(5199), z(57)\n", 306 | " variables(dimensions): int32 \u001b[4mcrs\u001b[0m(), \u001b[4mstation\u001b[0m(station), float64 \u001b[4mtime\u001b[0m(time), float64 \u001b[4mlatitude\u001b[0m(station), float64 \u001b[4mlongitude\u001b[0m(station), float64 \u001b[4mz\u001b[0m(z), int32 \u001b[4mprofile\u001b[0m(time,z,station), float64 \u001b[4mun\u001b[0m(time,z,station), float64 \u001b[4mvn\u001b[0m(time,z,station), float64 \u001b[4mwn\u001b[0m(time,z,station)\n", 307 | " groups: " 308 | ] 309 | }, 310 | "execution_count": 6, 311 | "metadata": {}, 312 | "output_type": "execute_result" 313 | } 314 | ], 315 | "source": [ 316 | "OrthogonalMultidimensionalTimeseriesProfile.from_dataframe(df, output='/data/ADRIA/MOORINGS/NRL/vr4f.nc', \n", 317 | " attributes=atts)" 318 | ] 319 | } 320 | ], 321 | "metadata": { 322 | "_draft": { 323 | "nbviewer_url": "https://gist.github.com/b2f37b7724981e80e48bd59311ac9a58" 324 | }, 325 | "gist": { 326 | "data": { 327 | "description": "erddap/adcp.ipynb", 328 | "public": true 329 | }, 330 | "id": "b2f37b7724981e80e48bd59311ac9a58" 331 | }, 332 | "kernelspec": { 333 | "display_name": "Python [conda env:IOOS3]", 334 | "language": "python", 335 | "name": "conda-env-IOOS3-py" 336 | }, 337 | "language_info": { 338 | "codemirror_mode": { 339 | "name": "ipython", 340 | "version": 3 341 | }, 342 | "file_extension": ".py", 343 | "mimetype": "text/x-python", 344 | "name": "python", 345 | "nbconvert_exporter": "python", 346 | "pygments_lexer": "ipython3", 347 | "version": "3.6.1" 348 | } 349 | }, 350 | "nbformat": 4, 351 | "nbformat_minor": 2 352 | } 353 | -------------------------------------------------------------------------------- /docs/notebooks/full.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/docs/notebooks/full.nc -------------------------------------------------------------------------------- /docs/notebooks/hello.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/docs/notebooks/hello.nc -------------------------------------------------------------------------------- /docs/notebooks/imp.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/docs/notebooks/imp.nc -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | -------------------------------------------------------------------------------- /pocean/__init__.py: -------------------------------------------------------------------------------- 1 | #!python 2 | 3 | # Package level logger 4 | import logging 5 | 6 | logger = logging.getLogger("pocean") 7 | logger.addHandler(logging.NullHandler()) 8 | 9 | try: 10 | from ._version import __version__ 11 | except ImportError: 12 | __version__ = "unknown" 13 | -------------------------------------------------------------------------------- /pocean/cf.py: -------------------------------------------------------------------------------- 1 | #!python 2 | import datetime 3 | import itertools 4 | import os 5 | import re 6 | 7 | from . import logger 8 | from .dataset import EnhancedDataset 9 | from .utils import all_subclasses, is_url 10 | 11 | datetime.UTC = datetime.timezone.utc 12 | 13 | 14 | class CFDataset(EnhancedDataset): 15 | default_fill_value = False 16 | default_time_unit = "seconds since 1990-01-01 00:00:00Z" 17 | 18 | @classmethod 19 | def load(cls, path): 20 | """Attempt to load a netCDF file as a CF compatible dataset 21 | 22 | Extended description of function. 23 | 24 | Parameters 25 | ---------- 26 | path : 27 | Path to netCDF file 28 | 29 | Returns 30 | ------- 31 | CFDataset subclass for your netCDF file 32 | 33 | Raises 34 | ------ 35 | ValueError: 36 | If no suitable class is found for your dataset 37 | 38 | """ 39 | 40 | if not is_url(path): 41 | path = os.path.realpath(path) 42 | 43 | subs = list(all_subclasses(cls)) 44 | 45 | dsg = None 46 | try: 47 | dsg = cls(path) 48 | for klass in subs: 49 | logger.debug(f"Trying {klass.__name__}...") 50 | if hasattr(klass, "is_mine"): 51 | if klass.is_mine(dsg): 52 | return klass(path) 53 | except OSError: 54 | raise 55 | finally: 56 | if hasattr(dsg, "close"): 57 | dsg.close() 58 | 59 | subnames = ", ".join([s.__name__ for s in subs]) 60 | raise ValueError(f"Could not open {path} as any type of CF Dataset. Tried: {subnames}.") 61 | 62 | def axes(self, name): 63 | return getattr(self, f"{name.lower()}_axes")() 64 | 65 | def t_axes(self): 66 | # If there is only one variable with the axis parameter, return it 67 | hasaxis = self.filter_by_attrs(axis=lambda x: x and str(x).lower() == "t") 68 | if len(hasaxis) == 1: 69 | return hasaxis 70 | 71 | tvars = list( 72 | set( 73 | itertools.chain( 74 | hasaxis, 75 | self.filter_by_attrs( 76 | standard_name=lambda x: x in ["time", "forecast_reference_time"] 77 | ), 78 | ) 79 | ) 80 | ) 81 | return tvars 82 | 83 | def x_axes(self): 84 | """ 85 | CF X axis will have one of the following: 86 | * The `axis` property has the value ``'X'`` 87 | * Units of longitude (see `cf.Units.islongitude` for details) 88 | * The `standard_name` property is one of ``'longitude'``, 89 | ``'projection_x_coordinate'`` or ``'grid_longitude'`` 90 | """ 91 | xnames = ["longitude", "grid_longitude", "projection_x_coordinate"] 92 | xunits = ["degrees_east", "degree_east", "degree_E", "degrees_E", "degreeE", "degreesE"] 93 | 94 | # If there is only one variable with the axis parameter, return it 95 | hasaxis = self.filter_by_attrs(axis=lambda x: x and str(x).lower() == "x") 96 | if len(hasaxis) == 1: 97 | return hasaxis 98 | 99 | xvars = list( 100 | set( 101 | itertools.chain( 102 | hasaxis, 103 | self.filter_by_attrs(standard_name=lambda x: x and str(x).lower() in xnames), 104 | self.filter_by_attrs(units=lambda x: x and str(x).lower() in xunits), 105 | ) 106 | ) 107 | ) 108 | return xvars 109 | 110 | def y_axes(self): 111 | ynames = ["latitude", "grid_latitude", "projection_y_coordinate"] 112 | yunits = ["degrees_north", "degree_north", "degree_N", "degrees_N", "degreeN", "degreesN"] 113 | 114 | # If there is only one variable with the axis parameter, return it 115 | hasaxis = self.filter_by_attrs(axis=lambda x: x and str(x).lower() == "y") 116 | if len(hasaxis) == 1: 117 | return hasaxis 118 | 119 | yvars = list( 120 | set( 121 | itertools.chain( 122 | hasaxis, 123 | self.filter_by_attrs(standard_name=lambda x: x and str(x).lower() in ynames), 124 | self.filter_by_attrs(units=lambda x: x and str(x).lower() in yunits), 125 | ) 126 | ) 127 | ) 128 | return yvars 129 | 130 | def z_axes(self): 131 | znames = [ 132 | "atmosphere_ln_pressure_coordinate", 133 | "atmosphere_sigma_coordinate", 134 | "atmosphere_hybrid_sigma_pressure_coordinate", 135 | "atmosphere_hybrid_height_coordinate", 136 | "atmosphere_sleve_coordinate", 137 | "ocean_sigma_coordinate", 138 | "ocean_s_coordinate", 139 | "ocean_s_coordinate_g1", 140 | "ocean_s_coordinate_g2", 141 | "ocean_sigma_z_coordinate", 142 | "ocean_double_sigma_coordinate", 143 | ] 144 | 145 | # If there is only one variable with the axis parameter, return it 146 | hasaxis = self.filter_by_attrs(axis=lambda x: x and str(x).lower() == "z") 147 | if len(hasaxis) == 1: 148 | return hasaxis 149 | 150 | zvars = list( 151 | set( 152 | itertools.chain( 153 | hasaxis, 154 | self.filter_by_attrs(positive=lambda x: x and str(x).lower() in ["up", "down"]), 155 | self.filter_by_attrs(standard_name=lambda x: x and str(x).lower() in znames), 156 | ) 157 | ) 158 | ) 159 | return zvars 160 | 161 | def is_valid(self, *args, **kwargs): 162 | return self.__class__.is_mine(self, *args, **kwargs) 163 | 164 | def data_vars(self): 165 | return self.filter_by_attrs( 166 | coordinates=lambda x: x is not None, 167 | units=lambda x: x is not None, 168 | standard_name=lambda x: x is not None, 169 | flag_values=lambda x: x is None, 170 | flag_masks=lambda x: x is None, 171 | flag_meanings=lambda x: x is None, 172 | ) 173 | 174 | def ancillary_vars(self): 175 | ancillary_variables = [] 176 | for rv in self.filter_by_attrs(ancillary_variables=lambda x: x is not None): 177 | # Space separated ancillary variables 178 | for av in rv.ancillary_variables.split(" "): 179 | if av in self.variables: 180 | ancillary_variables.append(self.variables[av]) 181 | return list(set(ancillary_variables)) 182 | 183 | def nc_attributes(self): 184 | return { 185 | "global": { 186 | "Conventions": "CF-1.6", 187 | "date_created": datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%dT%H:%M:00Z"), 188 | } 189 | } 190 | 191 | 192 | def cf_safe_name(name): 193 | if isinstance(name, str): 194 | if re.match("^[0-9_]", name): 195 | # Add a letter to the front 196 | name = f"v_{name}" 197 | return re.sub(r"[^_a-zA-Z0-9]", "_", name) 198 | 199 | raise ValueError(f'Could not convert "{name}" to a safe name') 200 | -------------------------------------------------------------------------------- /pocean/dataset.py: -------------------------------------------------------------------------------- 1 | #!python 2 | import warnings 3 | from collections import OrderedDict 4 | 5 | import numpy as np 6 | import simplejson as json 7 | from netCDF4 import Dataset 8 | 9 | from . import logger as L 10 | from .meta import ( 11 | MetaInterface, 12 | ncpyattributes, 13 | string_to_dtype, 14 | untype_attributes, 15 | ) 16 | from .utils import ( 17 | generic_masked, 18 | JSONEncoder, 19 | safe_attribute_typing, 20 | safe_issubdtype, 21 | ) 22 | 23 | # Attribute that need to be of the same type as the variables 24 | _TYPE_SENSITIVE_ATTRIBUTES = [ 25 | "_FillValue", 26 | "missing_value", 27 | "valid_min", 28 | "valid_max", 29 | "valid_range", 30 | "display_min", 31 | "display_max", 32 | "display_range", 33 | "colorBarMinimum", 34 | "colorBarMaximum", 35 | ] 36 | 37 | 38 | class EnhancedDataset(Dataset): 39 | def __del__(self): 40 | try: 41 | self.close() 42 | except RuntimeError: 43 | pass 44 | 45 | def close(self): 46 | if not self.isopen(): 47 | return 48 | 49 | super().close() 50 | 51 | def vatts(self, vname): 52 | d = {} 53 | var = self.variables[vname] 54 | for k in var.ncattrs(): 55 | d[k] = var.getncattr(k) 56 | return d 57 | 58 | def filter_by_attrs(self, *args, **kwargs): 59 | return self.get_variables_by_attributes(*args, **kwargs) 60 | 61 | def __apply_meta_interface__(self, meta, **kwargs): 62 | warnings.warn( 63 | "`__apply_meta_interface__` is deprecated. Use `apply_meta()` instead", 64 | DeprecationWarning, 65 | ) 66 | return self.apply_meta(meta, **kwargs) 67 | 68 | def __getattr__(self, name): 69 | if name in ["__meta_interface__", "_meta"]: 70 | warnings.warn( 71 | "`__meta_interface__` and `_meta` are deprecated. Use `meta()` instead", 72 | DeprecationWarning, 73 | ) 74 | return self.meta() 75 | else: 76 | return super().__getattr__(name) 77 | 78 | def apply_meta(self, *args, **kwargs): 79 | """Shortcut to the JSON object without writing any data""" 80 | kwargs["create_data"] = False 81 | return self.apply_json(*args, **kwargs) 82 | 83 | def meta(self, *args, **kwargs): 84 | """Shortcut to the JSON object without any data""" 85 | kwargs["return_data"] = False 86 | return self.json(*args, **kwargs) 87 | 88 | def json(self, return_data=True, fill_data=True): 89 | ds = OrderedDict() 90 | vs = OrderedDict() 91 | gs = ncpyattributes({ga: self.getncattr(ga) for ga in self.ncattrs()}) 92 | 93 | # Dimensions 94 | for dname, dim in self.dimensions.items(): 95 | if dim.isunlimited(): 96 | ds[dname] = None 97 | else: 98 | ds[dname] = dim.size 99 | 100 | # Variables 101 | for k, v in self.variables.items(): 102 | typed = v.dtype 103 | if isinstance(typed, np.dtype): 104 | typed = str(typed.name) 105 | elif isinstance(typed, type): 106 | typed = typed.__name__ 107 | 108 | vattrs = {va: v.getncattr(va) for va in v.ncattrs()} 109 | vardict = {"attributes": ncpyattributes(vattrs), "shape": v.dimensions, "type": typed} 110 | if return_data is True: 111 | vdata = generic_masked(v[:], attrs=vattrs) 112 | if fill_data is True: 113 | vdata = vdata.filled() 114 | vardict["data"] = vdata.tolist() 115 | 116 | vs[k] = vardict 117 | 118 | return MetaInterface(dimensions=ds, variables=vs, attributes=gs) 119 | 120 | def apply_json(self, meta, create_vars=True, create_dims=True, create_data=True): 121 | """Apply a meta interface object to a netCDF4 compatible object""" 122 | ds = meta.get("dimensions", OrderedDict()) 123 | gs = meta.get("attributes", OrderedDict()) 124 | vs = meta.get("variables", OrderedDict()) 125 | 126 | # Dimensions 127 | for dname, dsize in ds.items(): 128 | # Ignore dimension sizes less than 0 129 | if dsize and dsize < 0: 130 | continue 131 | if dname not in self.dimensions: 132 | # Don't create new dimensions 133 | if create_dims is False: 134 | continue 135 | 136 | self.createDimension(dname, size=dsize) 137 | else: 138 | dfilesize = self.dimensions[dname].size 139 | if dfilesize != dsize: 140 | L.warning( 141 | "Not changing size of dimension {}. file: {}, meta: {}".format( 142 | dname, dfilesize, dsize 143 | ) 144 | ) 145 | 146 | # Global attributes 147 | typed_gs = untype_attributes(gs) 148 | self.setncatts(typed_gs) 149 | 150 | # Variables 151 | for vname, vvalue in vs.items(): 152 | vatts = untype_attributes(vvalue.get("attributes", {})) 153 | 154 | if vname not in self.variables: 155 | # Don't create new variables 156 | if create_vars is False: 157 | continue 158 | 159 | if "shape" not in vvalue and "type" not in vvalue: 160 | L.debug(f"Skipping {vname} creation, no shape or no type defined") 161 | continue 162 | shape = vvalue.get("shape", []) # Dimension names 163 | vardtype = string_to_dtype(vvalue.get("type")) 164 | 165 | if safe_issubdtype(vardtype, np.floating): 166 | defaultfill = vardtype.type(np.nan) # We can use `nan` for floats 167 | elif vardtype.kind in ["U", "S"]: 168 | defaultfill = None # No fillvalue on VLENs 169 | else: 170 | # Use a masked value which evaluates to different things depending on the dtype 171 | # For integers is resolves to `0`. 172 | defaultfill = vardtype.type(np.ma.masked) 173 | 174 | fillmiss = vatts.get("_FillValue", vatts.get("missing_value", defaultfill)) 175 | newvar = self.createVariable(vname, vardtype, dimensions=shape, fill_value=fillmiss) 176 | else: 177 | newvar = self.variables[vname] 178 | 179 | # Now assign the data if is exists 180 | if create_data is True and "data" in vvalue: 181 | # Because the JSON format can be flattened already we are just 182 | # going to always reshape the data to the variable shape 183 | data = generic_masked( 184 | np.array(vvalue["data"], dtype=newvar.dtype).flatten() 185 | ).reshape(newvar.shape) 186 | newvar[:] = data 187 | 188 | # Don't re-assign fill value attributes 189 | if "_FillValue" in vatts: 190 | del vatts["_FillValue"] 191 | if "missing_value" in vatts: 192 | del vatts["missing_value"] 193 | 194 | # Convert any attribute that need to match the variables dtype to that dtype 195 | for sattr in _TYPE_SENSITIVE_ATTRIBUTES: 196 | if sattr in vatts: 197 | vatts[sattr] = safe_attribute_typing(newvar.dtype, vatts[sattr]) 198 | 199 | newvar.setncatts(vatts) 200 | 201 | def to_json(self, *args, **kwargs): 202 | return json.dumps(self.to_dict(), *args, **kwargs) 203 | 204 | def json_attributes(self, vfuncs=None): 205 | """ 206 | vfuncs can be any callable that accepts a single argument, the 207 | Variable object, and returns a dictionary of new attributes to 208 | set. These will overwrite existing attributes 209 | """ 210 | 211 | vfuncs = vfuncs or [] 212 | 213 | js = {"global": {}} 214 | 215 | for k in self.ncattrs(): 216 | js["global"][k] = self.getncattr(k) 217 | 218 | for varname, var in self.variables.items(): 219 | js[varname] = {} 220 | for k in var.ncattrs(): 221 | z = var.getncattr(k) 222 | try: 223 | assert not np.isnan(z).all() 224 | js[varname][k] = z 225 | except AssertionError: 226 | js[varname][k] = None 227 | except TypeError: 228 | js[varname][k] = z 229 | 230 | for vf in vfuncs: 231 | try: 232 | js[varname].update(vfuncs(var)) 233 | except BaseException: 234 | L.exception("Could not apply custom variable attribute function") 235 | 236 | return json.loads(json.dumps(js, cls=JSONEncoder)) 237 | 238 | def update_attributes(self, attributes): 239 | for k, v in attributes.pop("global", {}).items(): 240 | try: 241 | self.setncattr(k, v) 242 | except BaseException: 243 | L.warning(f"Could not set global attribute {k}: {v}") 244 | 245 | for k, v in attributes.items(): 246 | if k in self.variables: 247 | for n, z in v.items(): 248 | # Don't re-assign fill value attributes 249 | if n in ["_FillValue", "missing_value"]: 250 | L.warning(f"Refusing to set {n} on {k}") 251 | continue 252 | 253 | try: 254 | self.variables[k].setncattr(n, z) 255 | except BaseException: 256 | L.warning(f"Could not set attribute {n} on {k}") 257 | self.sync() 258 | -------------------------------------------------------------------------------- /pocean/dsg/__init__.py: -------------------------------------------------------------------------------- 1 | #!python 2 | 3 | # Profile 4 | from .profile.im import IncompleteMultidimensionalProfile 5 | from .profile.om import OrthogonalMultidimensionalProfile 6 | 7 | # Timeseries 8 | from .timeseries.cr import ContiguousRaggedTimeseries 9 | from .timeseries.im import IncompleteMultidimensionalTimeseries 10 | from .timeseries.ir import IndexedRaggedTimeseries 11 | from .timeseries.om import OrthogonalMultidimensionalTimeseries 12 | from .timeseriesProfile.im import IncompleteMultidimensionalTimeseriesProfile 13 | from .timeseriesProfile.om import OrthogonalMultidimensionalTimeseriesProfile 14 | 15 | # TimeseriesProfile 16 | from .timeseriesProfile.r import RaggedTimeseriesProfile 17 | 18 | # Trajectory 19 | from .trajectory.cr import ContiguousRaggedTrajectory 20 | from .trajectory.im import IncompleteMultidimensionalTrajectory 21 | from .trajectory.ir import IndexedRaggedTrajectory 22 | 23 | # TrajectoryProfile 24 | from .trajectoryProfile.cr import ContiguousRaggedTrajectoryProfile 25 | 26 | # Attribute Utilities 27 | from .utils import ( 28 | get_calculated_attributes, 29 | get_creation_attributes, 30 | get_geographic_attributes, 31 | get_temporal_attributes, 32 | get_vertical_attributes, 33 | ) 34 | 35 | __all__ = [ 36 | "IncompleteMultidimensionalProfile", 37 | "OrthogonalMultidimensionalProfile", 38 | "ContiguousRaggedTrajectory", 39 | "IndexedRaggedTrajectory", 40 | "IncompleteMultidimensionalTrajectory", 41 | "ContiguousRaggedTrajectoryProfile", 42 | "ContiguousRaggedTimeseries", 43 | "IndexedRaggedTimeseries", 44 | "IncompleteMultidimensionalTimeseries", 45 | "OrthogonalMultidimensionalTimeseries", 46 | "RaggedTimeseriesProfile", 47 | "IncompleteMultidimensionalTimeseriesProfile", 48 | "OrthogonalMultidimensionalTimeseriesProfile", 49 | "get_geographic_attributes", 50 | "get_vertical_attributes", 51 | "get_temporal_attributes", 52 | "get_creation_attributes", 53 | "get_calculated_attributes", 54 | ] 55 | -------------------------------------------------------------------------------- /pocean/dsg/profile/__init__.py: -------------------------------------------------------------------------------- 1 | #!python 2 | from collections import namedtuple 3 | 4 | from shapely.geometry import LineString, Point 5 | 6 | from pocean.utils import logger as L # noqa 7 | from pocean.utils import ( 8 | unique_justseen, 9 | ) 10 | 11 | profile_meta = namedtuple("Profile", ["min_z", "max_z", "t", "x", "y", "id", "geometry"]) 12 | profiles_meta = namedtuple( 13 | "ProfileCollection", ["min_z", "max_z", "min_t", "max_t", "profiles", "geometry"] 14 | ) 15 | 16 | 17 | def profile_calculated_metadata(df, axes, geometries=True): 18 | profiles = {} 19 | for pid, pgroup in df.groupby(axes.profile): 20 | pgroup = pgroup.sort_values(axes.t) 21 | first_row = pgroup.iloc[0] 22 | profiles[pid] = profile_meta( 23 | min_z=pgroup[axes.z].min(), 24 | max_z=pgroup[axes.z].max(), 25 | t=first_row[axes.t], 26 | x=first_row[axes.x], 27 | y=first_row[axes.y], 28 | id=pid, 29 | geometry=Point(first_row[axes.x], first_row[axes.y]), 30 | ) 31 | 32 | if geometries: 33 | null_coordinates = df[axes.x].isnull() | df[axes.y].isnull() 34 | coords = list( 35 | unique_justseen( 36 | zip( 37 | df.loc[~null_coordinates, axes.x].tolist(), 38 | df.loc[~null_coordinates, axes.y].tolist(), 39 | ) 40 | ) 41 | ) 42 | else: 43 | # Calculate the geometry as the linestring between all of the profile points 44 | coords = [p.geometry for _, p in profiles.items()] 45 | 46 | geometry = None 47 | if len(coords) > 1: 48 | geometry = LineString(coords) 49 | elif len(coords) == 1: 50 | geometry = Point(coords[0]) 51 | 52 | return profiles_meta( 53 | min_z=df[axes.z].min(), 54 | max_z=df[axes.z].max(), 55 | min_t=df[axes.t].min(), 56 | max_t=df[axes.t].max(), 57 | profiles=profiles, 58 | geometry=geometry, 59 | ) 60 | -------------------------------------------------------------------------------- /pocean/dsg/profile/im.py: -------------------------------------------------------------------------------- 1 | #!python 2 | from collections import OrderedDict 3 | from copy import copy 4 | 5 | import numpy as np 6 | import pandas as pd 7 | from cftime import date2num 8 | 9 | from pocean import logger as L # noqa 10 | from pocean.cf import cf_safe_name, CFDataset 11 | from pocean.dsg.profile import profile_calculated_metadata 12 | from pocean.utils import ( 13 | create_ncvar_from_series, 14 | dict_update, 15 | downcast_dataframe, 16 | generic_masked, 17 | get_default_axes, 18 | get_dtype, 19 | get_mapped_axes_variables, 20 | get_masked_datetime_array, 21 | get_ncdata_from_series, 22 | nativize_times, 23 | normalize_countable_array, 24 | ) 25 | 26 | 27 | class IncompleteMultidimensionalProfile(CFDataset): 28 | """ 29 | If there are the same number of levels in each profile, but they do not 30 | have the same set of vertical coordinates, one can use the incomplete 31 | multidimensional array representation, which the vertical coordinate 32 | variable is two-dimensional e.g. replacing z(z) in Example H.8, 33 | "Atmospheric sounding profiles for a common set of vertical coordinates 34 | stored in the orthogonal multidimensional array representation." with 35 | alt(profile,z). This representation also allows one to have a variable 36 | number of elements in different profiles, at the cost of some wasted space. 37 | In that case, any unused elements of the data and auxiliary coordinate 38 | variables must contain missing data values (section 9.6). 39 | """ 40 | 41 | @classmethod 42 | def is_mine(cls, dsg, strict=False): 43 | try: 44 | pvars = dsg.filter_by_attrs(cf_role="profile_id") 45 | assert len(pvars) == 1 46 | assert dsg.featureType.lower() == "profile" 47 | assert len(dsg.t_axes()) >= 1 48 | assert len(dsg.x_axes()) >= 1 49 | assert len(dsg.y_axes()) >= 1 50 | assert len(dsg.z_axes()) >= 1 51 | 52 | # Allow for string variables 53 | pvar = pvars[0] 54 | # 0 = single 55 | # 1 = array of strings/ints/bytes/etc 56 | # 2 = array of character arrays 57 | assert 0 <= len(pvar.dimensions) <= 2 58 | 59 | t = dsg.t_axes()[0] 60 | x = dsg.x_axes()[0] 61 | y = dsg.y_axes()[0] 62 | z = dsg.z_axes()[0] 63 | assert len(z.dimensions) == 2 64 | 65 | assert t.size == pvar.size 66 | assert x.size == pvar.size 67 | assert y.size == pvar.size 68 | p_dim = dsg.dimensions[pvar.dimensions[0]] 69 | z_dim = dsg.dimensions[[d for d in z.dimensions if d != p_dim.name][0]] 70 | for dv in dsg.data_vars(): 71 | assert len(dv.dimensions) in [1, 2] # dimensioned by profile or profile, z 72 | assert z_dim.name in dv.dimensions or p_dim.name in dv.dimensions 73 | assert dv.size in [z_dim.size, p_dim.size, z_dim.size * p_dim.size] 74 | 75 | except BaseException: 76 | if strict is True: 77 | raise 78 | return False 79 | 80 | return True 81 | 82 | @classmethod 83 | def from_dataframe(cls, df, output, **kwargs): 84 | axes = get_default_axes(kwargs.pop("axes", {})) 85 | daxes = axes 86 | data_columns = [d for d in df.columns if d not in axes] 87 | 88 | unlimited = kwargs.pop("unlimited", False) 89 | 90 | unique_dims = kwargs.pop("unique_dims", False) 91 | if unique_dims is True: 92 | # Rename the dimension to avoid a dimension and coordinate having the same name 93 | # which is not support in xarray 94 | changed_axes = {k: f"{v}_dim" for k, v in axes._asdict().items()} 95 | daxes = get_default_axes(changed_axes) 96 | 97 | # Downcast anything from int64 to int32 98 | # Convert any timezone aware datetimes to native UTC times 99 | df = downcast_dataframe(nativize_times(df)) 100 | 101 | with IncompleteMultidimensionalProfile(output, "w") as nc: 102 | profile_group = df.groupby(axes.profile) 103 | 104 | if unlimited is True: 105 | max_profiles = None 106 | else: 107 | max_profiles = df[axes.profile].unique().size 108 | nc.createDimension(daxes.profile, max_profiles) 109 | 110 | max_zs = profile_group.size().max() 111 | nc.createDimension(daxes.z, max_zs) 112 | 113 | # Metadata variables 114 | nc.createVariable("crs", "i4") 115 | 116 | profile = nc.createVariable(axes.profile, get_dtype(df[axes.profile]), (daxes.profile,)) 117 | 118 | # Create all of the variables 119 | time = nc.createVariable(axes.t, "f8", (daxes.profile,)) 120 | latitude = nc.createVariable(axes.y, get_dtype(df[axes.y]), (daxes.profile,)) 121 | longitude = nc.createVariable(axes.x, get_dtype(df[axes.x]), (daxes.profile,)) 122 | z = nc.createVariable( 123 | axes.z, 124 | get_dtype(df[axes.z]), 125 | (daxes.profile, daxes.z), 126 | fill_value=df[axes.z].dtype.type(cls.default_fill_value), 127 | ) 128 | 129 | attributes = dict_update(nc.nc_attributes(axes, daxes), kwargs.pop("attributes", {})) 130 | 131 | # Create vars based on full dataframe (to get all variables) 132 | for c in data_columns: 133 | var_name = cf_safe_name(c) 134 | if var_name not in nc.variables: 135 | v = create_ncvar_from_series( 136 | nc, 137 | var_name, 138 | (daxes.profile, daxes.z), 139 | df[c], 140 | ) 141 | attributes[var_name] = dict_update( 142 | attributes.get(var_name, {}), 143 | {"coordinates": f"{axes.t} {axes.z} {axes.x} {axes.y}"}, 144 | ) 145 | 146 | # Write values for each profile within profile_group 147 | for i, (uid, pdf) in enumerate(profile_group): 148 | profile[i] = uid 149 | 150 | time[i] = date2num(pdf[axes.t].iloc[0], units=cls.default_time_unit) 151 | latitude[i] = pdf[axes.y].iloc[0] 152 | longitude[i] = pdf[axes.x].iloc[0] 153 | 154 | zvalues = pdf[axes.z].fillna(z._FillValue).values 155 | sl = slice(0, zvalues.size) 156 | z[i, sl] = zvalues 157 | 158 | for c in data_columns: 159 | var_name = cf_safe_name(c) 160 | v = nc.variables[var_name] 161 | 162 | vvalues = get_ncdata_from_series(pdf[c], v) 163 | 164 | sl = slice(0, vvalues.size) 165 | v[i, sl] = vvalues 166 | 167 | # Set global attributes 168 | nc.update_attributes(attributes) 169 | 170 | return IncompleteMultidimensionalProfile(output, **kwargs) 171 | 172 | def calculated_metadata( 173 | self, df=None, geometries=True, clean_cols=True, clean_rows=True, **kwargs 174 | ): 175 | axes = get_default_axes(kwargs.pop("axes", {})) 176 | if df is None: 177 | df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows, axes=axes) 178 | return profile_calculated_metadata(df, axes, geometries) 179 | 180 | def to_dataframe(self, clean_cols=True, clean_rows=True, **kwargs): 181 | axes = get_default_axes(kwargs.pop("axes", {})) 182 | 183 | axv = get_mapped_axes_variables(self, axes) 184 | 185 | # Multiple profiles in the file 186 | pvar = axv.profile 187 | p_dim = self.dimensions[pvar.dimensions[0]] 188 | 189 | zvar = axv.z 190 | zs = len(self.dimensions[[d for d in zvar.dimensions if d != p_dim.name][0]]) 191 | 192 | # Profiles 193 | p = normalize_countable_array(pvar) 194 | p = p.repeat(zs) 195 | 196 | # Z 197 | z = generic_masked(zvar[:].flatten(), attrs=self.vatts(zvar.name)) 198 | 199 | # T 200 | tvar = axv.t 201 | t = tvar[:].repeat(zs) 202 | nt = get_masked_datetime_array(t, tvar).flatten() 203 | 204 | # X 205 | xvar = axv.x 206 | x = generic_masked(xvar[:].repeat(zs), attrs=self.vatts(xvar.name)) 207 | 208 | # Y 209 | yvar = axv.y 210 | y = generic_masked(yvar[:].repeat(zs), attrs=self.vatts(yvar.name)) 211 | 212 | df_data = OrderedDict( 213 | [(axes.t, nt), (axes.x, x), (axes.y, y), (axes.z, z), (axes.profile, p)] 214 | ) 215 | 216 | building_index_to_drop = np.ones(t.size, dtype=bool) 217 | 218 | extract_vars = copy(self.variables) 219 | for ncvar in axv._asdict().values(): 220 | if ncvar is not None and ncvar.name in extract_vars: 221 | del extract_vars[ncvar.name] 222 | 223 | for i, (dnam, dvar) in enumerate(extract_vars.items()): 224 | # Profile dimension 225 | if dvar.dimensions == pvar.dimensions: 226 | vdata = generic_masked( 227 | dvar[:].repeat(zs).astype(dvar.dtype), attrs=self.vatts(dnam) 228 | ) 229 | 230 | # Profile, z dimension 231 | elif dvar.dimensions == zvar.dimensions: 232 | vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype), attrs=self.vatts(dnam)) 233 | 234 | else: 235 | vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype), attrs=self.vatts(dnam)) 236 | # Carry through size 1 variables 237 | if vdata.size == 1: 238 | if vdata[0] is np.ma.masked: 239 | L.warning(f"Skipping variable {dnam} that is completely masked") 240 | continue 241 | else: 242 | L.warning(f"Skipping variable {dnam} since it didn't match any dimension sizes") 243 | continue 244 | 245 | # Mark rows with data so we don't remove them with clear_rows 246 | if vdata.size == building_index_to_drop.size: 247 | building_index_to_drop = (building_index_to_drop == True) & (vdata.mask == True) # noqa 248 | 249 | # Handle scalars here at the end 250 | if vdata.size == 1: 251 | vdata = vdata[0] 252 | 253 | df_data[dnam] = vdata 254 | 255 | df = pd.DataFrame(df_data) 256 | 257 | # Drop all data columns with no data 258 | if clean_cols: 259 | df = df.dropna(axis=1, how="all") 260 | 261 | # Drop all data rows with no data variable data 262 | if clean_rows: 263 | df = df.iloc[~building_index_to_drop] 264 | 265 | return df 266 | 267 | def nc_attributes(self, axes, daxes): 268 | atts = super().nc_attributes() 269 | return dict_update( 270 | atts, 271 | { 272 | "global": {"featureType": "profile", "cdm_data_type": "Profile"}, 273 | axes.profile: {"cf_role": "profile_id", "long_name": "profile identifier"}, 274 | axes.x: {"axis": "X"}, 275 | axes.y: {"axis": "Y"}, 276 | axes.z: {"axis": "Z"}, 277 | axes.t: {"units": self.default_time_unit, "standard_name": "time", "axis": "T"}, 278 | }, 279 | ) 280 | -------------------------------------------------------------------------------- /pocean/dsg/profile/om.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from copy import copy 3 | 4 | import numpy as np 5 | import pandas as pd 6 | 7 | from pocean import logger as L # noqa 8 | from pocean.cf import CFDataset 9 | from pocean.dsg.profile import profile_calculated_metadata 10 | from pocean.utils import ( 11 | generic_masked, 12 | get_default_axes, 13 | get_mapped_axes_variables, 14 | get_masked_datetime_array, 15 | normalize_array, 16 | normalize_countable_array, 17 | ) 18 | 19 | 20 | class OrthogonalMultidimensionalProfile(CFDataset): 21 | """ 22 | If the profile instances have the same number of elements and the vertical 23 | coordinate values are identical for all instances, you may use the 24 | orthogonal multidimensional array representation. This has either a 25 | one-dimensional coordinate variable, z(z), provided the vertical coordinate 26 | values are ordered monotonically, or a one-dimensional auxiliary coordinate 27 | variable, alt(o), where o is the element dimension. In the former case, 28 | listing the vertical coordinate variable in the coordinates attributes of 29 | the data variables is optional. 30 | """ 31 | 32 | @classmethod 33 | def is_mine(cls, dsg, strict=False): 34 | try: 35 | pvars = dsg.filter_by_attrs(cf_role="profile_id") 36 | assert len(pvars) == 1 37 | assert dsg.featureType.lower() == "profile" 38 | assert len(dsg.t_axes()) >= 1 39 | assert len(dsg.x_axes()) >= 1 40 | assert len(dsg.y_axes()) >= 1 41 | assert len(dsg.z_axes()) >= 1 42 | 43 | # Allow for string variables 44 | pvar = pvars[0] 45 | # 0 = single 46 | # 1 = array of strings/ints/bytes/etc 47 | # 2 = array of character arrays 48 | assert 0 <= len(pvar.dimensions) <= 2 49 | 50 | t = dsg.t_axes()[0] 51 | x = dsg.x_axes()[0] 52 | y = dsg.y_axes()[0] 53 | z = dsg.z_axes()[0] 54 | assert len(z.dimensions) == 1 55 | z_dim = dsg.dimensions[z.dimensions[0]] 56 | 57 | ps = normalize_array(pvar) 58 | is_single = False 59 | 60 | if pvar.ndim == 0: 61 | is_single = True 62 | elif pvar.ndim == 2: 63 | is_single = False 64 | elif isinstance(ps, str): 65 | # Non-dimensioned string variable 66 | is_single = True 67 | elif pvar.ndim == 1 and hasattr(ps, "dtype") and ps.dtype.kind in ["U", "S"]: 68 | is_single = True 69 | 70 | if is_single: 71 | assert t.size == 1 72 | assert x.size == 1 73 | assert y.size == 1 74 | for dv in dsg.data_vars(): 75 | assert len(dv.dimensions) == 1 76 | assert z_dim.name in dv.dimensions 77 | assert dv.size == z_dim.size 78 | else: 79 | assert t.size == pvar.size 80 | assert x.size == pvar.size 81 | assert y.size == pvar.size 82 | p_dim = dsg.dimensions[pvar.dimensions[0]] 83 | for dv in dsg.data_vars(): 84 | assert len(dv.dimensions) in [1, 2] # dimensioned by profile or profile, z 85 | assert z_dim.name in dv.dimensions or p_dim.name in dv.dimensions 86 | assert dv.size in [z_dim.size, p_dim.size, z_dim.size * p_dim.size] 87 | 88 | except BaseException: 89 | if strict is True: 90 | raise 91 | return False 92 | 93 | return True 94 | 95 | @classmethod 96 | def from_dataframe(cls, df, output, **kwargs): 97 | raise NotImplementedError 98 | 99 | def calculated_metadata( 100 | self, df=None, geometries=True, clean_cols=True, clean_rows=True, **kwargs 101 | ): 102 | axes = get_default_axes(kwargs.pop("axes", {})) 103 | if df is None: 104 | df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows, axes=axes) 105 | return profile_calculated_metadata(df, axes, geometries) 106 | 107 | def to_dataframe(self, clean_cols=True, clean_rows=True, **kwargs): 108 | axes = get_default_axes(kwargs.pop("axes", {})) 109 | 110 | axv = get_mapped_axes_variables(self, axes) 111 | 112 | zvar = axv.z 113 | zs = len(self.dimensions[zvar.dimensions[0]]) 114 | 115 | # Profiles 116 | pvar = axv.profile 117 | p = normalize_countable_array(pvar) 118 | ps = p.size 119 | p = p.repeat(zs) 120 | 121 | # Z 122 | z = generic_masked(zvar[:], attrs=self.vatts(zvar.name)) 123 | try: 124 | z = np.tile(z, ps) 125 | except ValueError: 126 | z = z.flatten() 127 | 128 | # T 129 | tvar = axv.t 130 | t = tvar[:].repeat(zs) 131 | nt = get_masked_datetime_array(t, tvar).flatten() 132 | 133 | # X 134 | xvar = axv.x 135 | x = generic_masked(xvar[:].repeat(zs), attrs=self.vatts(xvar.name)) 136 | 137 | # Y 138 | yvar = axv.y 139 | y = generic_masked(yvar[:].repeat(zs), attrs=self.vatts(yvar.name)) 140 | 141 | df_data = OrderedDict( 142 | [(axes.t, nt), (axes.x, x), (axes.y, y), (axes.z, z), (axes.profile, p)] 143 | ) 144 | 145 | building_index_to_drop = np.ones(t.size, dtype=bool) 146 | 147 | # Axes variables are already processed so skip them 148 | extract_vars = copy(self.variables) 149 | for ncvar in axv._asdict().values(): 150 | if ncvar is not None and ncvar.name in extract_vars: 151 | del extract_vars[ncvar.name] 152 | 153 | for i, (dnam, dvar) in enumerate(extract_vars.items()): 154 | # Profile dimension 155 | if dvar.dimensions == pvar.dimensions: 156 | vdata = generic_masked( 157 | dvar[:].repeat(zs).astype(dvar.dtype), attrs=self.vatts(dnam) 158 | ) 159 | 160 | # Z dimension 161 | elif dvar.dimensions == zvar.dimensions: 162 | vdata = generic_masked( 163 | np.tile(dvar[:], ps).flatten().astype(dvar.dtype), attrs=self.vatts(dnam) 164 | ) 165 | 166 | # Profile, z dimension 167 | elif dvar.dimensions == pvar.dimensions + zvar.dimensions: 168 | vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype), attrs=self.vatts(dnam)) 169 | 170 | else: 171 | vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype), attrs=self.vatts(dnam)) 172 | # Carry through size 1 variables 173 | if vdata.size == 1: 174 | if vdata[0] is np.ma.masked: 175 | L.warning(f"Skipping variable {dnam} that is completely masked") 176 | continue 177 | else: 178 | L.warning(f"Skipping variable {dnam} since it didn't match any dimension sizes") 179 | continue 180 | 181 | # Mark rows with data so we don't remove them with clear_rows 182 | if vdata.size == building_index_to_drop.size: 183 | building_index_to_drop = (building_index_to_drop == True) & (vdata.mask == True) # noqa 184 | 185 | # Handle scalars here at the end 186 | if vdata.size == 1: 187 | vdata = vdata[0] 188 | 189 | df_data[dnam] = vdata 190 | 191 | df = pd.DataFrame(df_data) 192 | 193 | # Drop all data columns with no data 194 | if clean_cols: 195 | df = df.dropna(axis=1, how="all") 196 | 197 | # Drop all data rows with no data variable data 198 | if clean_rows: 199 | df = df.iloc[~building_index_to_drop] 200 | 201 | return df 202 | -------------------------------------------------------------------------------- /pocean/dsg/timeseries/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/pocean/dsg/timeseries/__init__.py -------------------------------------------------------------------------------- /pocean/dsg/timeseries/cr.py: -------------------------------------------------------------------------------- 1 | #!python 2 | from pocean import logger # noqa 3 | from pocean.cf import CFDataset 4 | 5 | 6 | class ContiguousRaggedTimeseries(CFDataset): 7 | @classmethod 8 | def is_mine(cls, dsg, strict=False): 9 | try: 10 | rvars = dsg.filter_by_attrs(cf_role="timeseries_id") 11 | assert len(rvars) == 1 12 | assert dsg.featureType.lower() == "timeseries" 13 | assert len(dsg.t_axes()) >= 1 14 | assert len(dsg.x_axes()) >= 1 15 | assert len(dsg.y_axes()) >= 1 16 | 17 | o_index_vars = dsg.filter_by_attrs(sample_dimension=lambda x: x is not None) 18 | assert len(o_index_vars) == 1 19 | assert o_index_vars[0].sample_dimension in dsg.dimensions # Sample dimension 20 | 21 | # Allow for string variables 22 | rvar = rvars[0] 23 | # 0 = single 24 | # 1 = array of strings/ints/bytes/etc 25 | # 2 = array of character arrays 26 | assert 0 <= len(rvar.dimensions) <= 2 27 | except BaseException: 28 | if strict is True: 29 | raise 30 | return False 31 | 32 | return True 33 | 34 | def from_dataframe(cls, df, output, **kwargs): 35 | raise NotImplementedError 36 | 37 | def calculated_metadata(self, df=None, geometries=True, clean_cols=True, clean_rows=True): 38 | # if df is None: 39 | # df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows) 40 | raise NotImplementedError 41 | 42 | def to_dataframe(self): 43 | raise NotImplementedError 44 | -------------------------------------------------------------------------------- /pocean/dsg/timeseries/im.py: -------------------------------------------------------------------------------- 1 | #!python 2 | from pocean import logger # noqa 3 | from pocean.cf import CFDataset 4 | 5 | 6 | class IncompleteMultidimensionalTimeseries(CFDataset): 7 | @classmethod 8 | def is_mine(cls, dsg, strict=False): 9 | try: 10 | rvars = dsg.filter_by_attrs(cf_role="timeseries_id") 11 | assert len(rvars) == 1 12 | assert dsg.featureType.lower() == "timeseries" 13 | assert len(dsg.t_axes()) >= 1 14 | assert len(dsg.x_axes()) >= 1 15 | assert len(dsg.y_axes()) >= 1 16 | 17 | # Not a CR 18 | assert not dsg.filter_by_attrs(sample_dimension=lambda x: x is not None) 19 | 20 | # Not an IR 21 | assert not dsg.filter_by_attrs(instance_dimension=lambda x: x is not None) 22 | 23 | # IM files will always have a time variable with two dimensions 24 | # because IM files are never used for files with a single station. 25 | assert len(dsg.t_axes()[0].dimensions) == 2 26 | 27 | # Allow for string variables 28 | rvar = rvars[0] 29 | # 0 = single 30 | # 1 = array of strings/ints/bytes/etc 31 | # 2 = array of character arrays 32 | assert 0 <= len(rvar.dimensions) <= 2 33 | 34 | except BaseException: 35 | if strict is True: 36 | raise 37 | return False 38 | 39 | return True 40 | 41 | def from_dataframe(cls, df, output, **kwargs): 42 | raise NotImplementedError 43 | 44 | def calculated_metadata(self, df=None, geometries=True, clean_cols=True, clean_rows=True): 45 | # if df is None: 46 | # df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows) 47 | raise NotImplementedError 48 | 49 | def to_dataframe(self): 50 | raise NotImplementedError 51 | -------------------------------------------------------------------------------- /pocean/dsg/timeseries/ir.py: -------------------------------------------------------------------------------- 1 | #!python 2 | from pocean import logger # noqa 3 | from pocean.cf import CFDataset 4 | 5 | 6 | class IndexedRaggedTimeseries(CFDataset): 7 | @classmethod 8 | def is_mine(cls, dsg, strict=False): 9 | try: 10 | rvars = dsg.filter_by_attrs(cf_role="timeseries_id") 11 | assert len(rvars) == 1 12 | assert dsg.featureType.lower() == "timeseries" 13 | assert len(dsg.t_axes()) >= 1 14 | assert len(dsg.x_axes()) >= 1 15 | assert len(dsg.y_axes()) >= 1 16 | 17 | r_index_vars = dsg.filter_by_attrs(instance_dimension=lambda x: x is not None) 18 | assert len(r_index_vars) == 1 19 | assert r_index_vars[0].instance_dimension in dsg.dimensions # Station dimension 20 | 21 | # Allow for string variables 22 | rvar = rvars[0] 23 | # 0 = single 24 | # 1 = array of strings/ints/bytes/etc 25 | # 2 = array of character arrays 26 | assert 0 <= len(rvar.dimensions) <= 2 27 | 28 | except BaseException: 29 | if strict is True: 30 | raise 31 | return False 32 | 33 | return True 34 | 35 | def from_dataframe(cls, df, output, **kwargs): 36 | raise NotImplementedError 37 | 38 | def calculated_metadata(self, df=None, geometries=True, clean_cols=True, clean_rows=True): 39 | # if df is None: 40 | # df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows) 41 | raise NotImplementedError 42 | 43 | def to_dataframe(self): 44 | raise NotImplementedError 45 | -------------------------------------------------------------------------------- /pocean/dsg/timeseries/om.py: -------------------------------------------------------------------------------- 1 | #!python 2 | from collections import OrderedDict 3 | from copy import copy 4 | 5 | import numpy as np 6 | import pandas as pd 7 | 8 | from pocean import logger as L # noqa 9 | from pocean.cf import cf_safe_name, CFDataset 10 | from pocean.utils import ( 11 | create_ncvar_from_series, 12 | dict_update, 13 | downcast_dataframe, 14 | generic_masked, 15 | get_default_axes, 16 | get_dtype, 17 | get_mapped_axes_variables, 18 | get_masked_datetime_array, 19 | get_ncdata_from_series, 20 | nativize_times, 21 | normalize_countable_array, 22 | ) 23 | 24 | 25 | class OrthogonalMultidimensionalTimeseries(CFDataset): 26 | """ 27 | H.2.1. Orthogonal multidimensional array representation of time series 28 | 29 | If the time series instances have the same number of elements and the time values are identical 30 | for all instances, you may use the orthogonal multidimensional array representation. This has 31 | either a one-dimensional coordinate variable, time(time), provided the time values are ordered 32 | monotonically, or a one-dimensional auxiliary coordinate variable, time(o), where o is the 33 | element dimension. In the former case, listing the time variable in the coordinates attributes 34 | of the data variables is optional. 35 | """ 36 | 37 | @classmethod 38 | def is_mine(cls, dsg, strict=False): 39 | try: 40 | rvars = dsg.filter_by_attrs(cf_role="timeseries_id") 41 | assert len(rvars) == 1 42 | assert dsg.featureType.lower() == "timeseries" 43 | assert len(dsg.t_axes()) >= 1 44 | assert len(dsg.x_axes()) >= 1 45 | assert len(dsg.y_axes()) >= 1 46 | 47 | # Not a CR 48 | assert not dsg.filter_by_attrs(sample_dimension=lambda x: x is not None) 49 | 50 | # Not an IR 51 | assert not dsg.filter_by_attrs(instance_dimension=lambda x: x is not None) 52 | 53 | # OM files will always have a time variable with one dimension. 54 | assert len(dsg.t_axes()[0].dimensions) == 1 55 | 56 | # Allow for string variables 57 | rvar = rvars[0] 58 | # 0 = single 59 | # 1 = array of strings/ints/bytes/etc 60 | # 2 = array of character arrays 61 | assert 0 <= len(rvar.dimensions) <= 2 62 | 63 | except BaseException: 64 | if strict is True: 65 | raise 66 | return False 67 | 68 | return True 69 | 70 | @classmethod 71 | def from_dataframe(cls, df, output, **kwargs): 72 | axes = get_default_axes(kwargs.pop("axes", {})) 73 | daxes = axes 74 | data_columns = [d for d in df.columns if d not in axes] 75 | 76 | reduce_dims = kwargs.pop("reduce_dims", False) 77 | _ = kwargs.pop("unlimited", False) 78 | 79 | unique_dims = kwargs.pop("unique_dims", False) 80 | if unique_dims is True: 81 | # Rename the dimension to avoid a dimension and coordinate having the same name 82 | # which is not support in xarray 83 | changed_axes = {k: f"{v}_dim" for k, v in axes._asdict().items()} 84 | daxes = get_default_axes(changed_axes) 85 | 86 | # Downcast anything from int64 to int32 87 | # Convert any timezone aware datetimes to native UTC times 88 | df = downcast_dataframe(nativize_times(df)) 89 | 90 | with OrthogonalMultidimensionalTimeseries(output, "w") as nc: 91 | station_group = df.groupby(axes.station) 92 | num_stations = len(station_group) 93 | has_z = axes.z is not None 94 | 95 | if reduce_dims is True and num_stations == 1: 96 | # If a station, we can reduce that dimension if it is of size 1 97 | def ts(i): 98 | return np.s_[:] 99 | 100 | default_dimensions = (daxes.t,) 101 | station_dimensions = () 102 | else: 103 | 104 | def ts(i): 105 | return np.s_[i, :] 106 | 107 | default_dimensions = (daxes.station, daxes.t) 108 | station_dimensions = (daxes.station,) 109 | nc.createDimension(daxes.station, num_stations) 110 | 111 | # Set the coordinates attribute correctly 112 | coordinates = [axes.t, axes.x, axes.y] 113 | if has_z is True: 114 | coordinates.insert(1, axes.z) 115 | coordinates = " ".join(coordinates) 116 | 117 | # assume all groups are the same size and have identical times 118 | _, sdf = list(station_group)[0] 119 | t = sdf[axes.t] 120 | 121 | # Metadata variables 122 | nc.createVariable("crs", "i4") 123 | 124 | # Create all of the variables 125 | nc.createDimension(daxes.t, t.size) 126 | time = nc.createVariable(axes.t, "f8", (daxes.t,)) 127 | station = nc.createVariable( 128 | axes.station, get_dtype(df[axes.station]), station_dimensions 129 | ) 130 | latitude = nc.createVariable(axes.y, get_dtype(df[axes.y]), station_dimensions) 131 | longitude = nc.createVariable(axes.x, get_dtype(df[axes.x]), station_dimensions) 132 | if has_z is True: 133 | z = nc.createVariable( 134 | axes.z, 135 | get_dtype(df[axes.z]), 136 | station_dimensions, 137 | fill_value=df[axes.z].dtype.type(cls.default_fill_value), 138 | ) 139 | 140 | attributes = dict_update(nc.nc_attributes(axes, daxes), kwargs.pop("attributes", {})) 141 | 142 | time[:] = get_ncdata_from_series(t, time).astype("f8") 143 | 144 | # Create vars based on full dataframe (to get all variables) 145 | for c in data_columns: 146 | var_name = cf_safe_name(c) 147 | if var_name not in nc.variables: 148 | v = create_ncvar_from_series( 149 | nc, 150 | var_name, 151 | default_dimensions, 152 | df[c], 153 | ) 154 | attributes[var_name] = dict_update( 155 | attributes.get(var_name, {}), {"coordinates": coordinates} 156 | ) 157 | 158 | for i, (uid, sdf) in enumerate(station_group): 159 | station[i] = uid 160 | latitude[i] = sdf[axes.y].iloc[0] 161 | longitude[i] = sdf[axes.x].iloc[0] 162 | 163 | if has_z is True: 164 | # TODO: write a test for a Z with a _FillValue 165 | z[i] = sdf[axes.z].iloc[0] 166 | 167 | for c in data_columns: 168 | # Create variable if it doesn't exist 169 | var_name = cf_safe_name(c) 170 | v = nc.variables[var_name] 171 | 172 | vvalues = get_ncdata_from_series(sdf[c], v) 173 | try: 174 | v[ts(i)] = vvalues 175 | except BaseException: 176 | L.debug(f"{v.name} was not written. Likely a metadata variable") 177 | 178 | # Set global attributes 179 | nc.update_attributes(attributes) 180 | 181 | return OrthogonalMultidimensionalTimeseries(output, **kwargs) 182 | 183 | def calculated_metadata( 184 | self, df=None, geometries=True, clean_cols=True, clean_rows=True, **kwargs 185 | ): 186 | # axes = get_default_axes(kwargs.pop('axes', {})) 187 | # if df is None: 188 | # df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows, axes=axes) 189 | raise NotImplementedError 190 | 191 | def to_dataframe(self, clean_cols=False, clean_rows=False, **kwargs): 192 | axes = get_default_axes(kwargs.pop("axes", {})) 193 | 194 | axv = get_mapped_axes_variables(self, axes) 195 | 196 | # T 197 | t = get_masked_datetime_array(axv.t[:], axv.t) 198 | 199 | # X 200 | x = generic_masked(axv.x[:].repeat(t.size), attrs=self.vatts(axv.x.name)) 201 | 202 | # Y 203 | y = generic_masked(axv.y[:].repeat(t.size), attrs=self.vatts(axv.y.name)) 204 | 205 | # Z 206 | if axv.z is not None: 207 | z = generic_masked(axv.z[:].repeat(t.size), attrs=self.vatts(axv.z.name)) 208 | else: 209 | z = None 210 | 211 | svar = axv.station 212 | s = normalize_countable_array(svar) 213 | s = np.repeat(s, t.size) 214 | 215 | # now repeat t per station 216 | # figure out if this is a single-station file by checking 217 | # the dimension size of the x dimension 218 | if axv.x.ndim == 1: 219 | t = np.repeat(t, len(svar)) 220 | 221 | df_data = OrderedDict( 222 | [ 223 | (axes.t, t), 224 | (axes.x, x), 225 | (axes.y, y), 226 | (axes.z, z), 227 | (axes.station, s), 228 | ] 229 | ) 230 | 231 | building_index_to_drop = np.ma.zeros(t.size, dtype=bool) 232 | 233 | # Axes variables are already processed so skip them 234 | extract_vars = copy(self.variables) 235 | for ncvar in axv._asdict().values(): 236 | if ncvar is not None and ncvar.name in extract_vars: 237 | del extract_vars[ncvar.name] 238 | 239 | for i, (dnam, dvar) in enumerate(extract_vars.items()): 240 | vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype), attrs=self.vatts(dnam)) 241 | 242 | # Carry through size 1 variables 243 | if vdata.size == 1: 244 | if vdata[0] is np.ma.masked: 245 | L.warning(f"Skipping variable {dnam} that is completely masked") 246 | continue 247 | else: 248 | if dvar[:].flatten().size != t.size: 249 | L.warning(f"Variable {dnam} is not the correct size, skipping.") 250 | continue 251 | 252 | # Mark rows with data so we don't remove them with clear_rows 253 | if vdata.size == building_index_to_drop.size: 254 | building_index_to_drop = (building_index_to_drop == True) & (vdata.mask == True) # noqa 255 | 256 | # Handle scalars here at the end 257 | if vdata.size == 1: 258 | vdata = vdata[0] 259 | 260 | df_data[dnam] = vdata 261 | 262 | df = pd.DataFrame(df_data) 263 | 264 | # Drop all data columns with no data 265 | if clean_cols: 266 | df = df.dropna(axis=1, how="all") 267 | 268 | # Drop all data rows with no data variable data 269 | if clean_rows: 270 | df = df.iloc[~building_index_to_drop] 271 | 272 | return df 273 | 274 | def nc_attributes(self, axes, daxes): 275 | atts = super().nc_attributes() 276 | return dict_update( 277 | atts, 278 | { 279 | "global": {"featureType": "timeseries", "cdm_data_type": "Timeseries"}, 280 | axes.station: {"cf_role": "timeseries_id", "long_name": "station identifier"}, 281 | axes.t: {"units": self.default_time_unit, "standard_name": "time", "axis": "T"}, 282 | axes.y: {"axis": "Y"}, 283 | axes.x: {"axis": "X"}, 284 | axes.z: {"axis": "Z"}, 285 | }, 286 | ) 287 | -------------------------------------------------------------------------------- /pocean/dsg/timeseriesProfile/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/pocean/dsg/timeseriesProfile/__init__.py -------------------------------------------------------------------------------- /pocean/dsg/timeseriesProfile/im.py: -------------------------------------------------------------------------------- 1 | #!python 2 | from pocean.cf import CFDataset 3 | 4 | 5 | class IncompleteMultidimensionalTimeseriesProfile(CFDataset): 6 | @classmethod 7 | def is_mine(cls, dsg, strict=False): 8 | try: 9 | assert dsg.featureType.lower() == "timeseriesprofile" 10 | assert len(dsg.t_axes()) >= 1 11 | assert len(dsg.x_axes()) >= 1 12 | assert len(dsg.y_axes()) >= 1 13 | assert len(dsg.z_axes()) >= 1 14 | 15 | zvar = dsg.z_axes()[0] 16 | assert len(zvar.dimensions) > 1 17 | 18 | # Not ragged 19 | o_index_vars = dsg.filter_by_attrs(sample_dimension=lambda x: x is not None) 20 | assert len(o_index_vars) == 0 21 | 22 | r_index_vars = dsg.filter_by_attrs(instance_dimension=lambda x: x is not None) 23 | assert len(r_index_vars) == 0 24 | 25 | except BaseException: 26 | if strict is True: 27 | raise 28 | return False 29 | 30 | return True 31 | 32 | def from_dataframe(cls, df, output, **kwargs): 33 | raise NotImplementedError 34 | 35 | def calculated_metadata(self, df=None, geometries=True, clean_cols=True, clean_rows=True): 36 | # if df is None: 37 | # df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows) 38 | raise NotImplementedError 39 | 40 | def to_dataframe(self): 41 | raise NotImplementedError 42 | -------------------------------------------------------------------------------- /pocean/dsg/timeseriesProfile/om.py: -------------------------------------------------------------------------------- 1 | #!python 2 | from collections import OrderedDict 3 | from copy import copy 4 | 5 | import numpy as np 6 | import pandas as pd 7 | from cftime import date2num 8 | 9 | from pocean import logger as L # noqa 10 | from pocean.cf import cf_safe_name, CFDataset 11 | from pocean.utils import ( 12 | create_ncvar_from_series, 13 | dict_update, 14 | downcast_dataframe, 15 | generic_masked, 16 | get_default_axes, 17 | get_dtype, 18 | get_mapped_axes_variables, 19 | get_masked_datetime_array, 20 | get_ncdata_from_series, 21 | nativize_times, 22 | normalize_countable_array, 23 | ) 24 | 25 | 26 | class OrthogonalMultidimensionalTimeseriesProfile(CFDataset): 27 | @classmethod 28 | def is_mine(cls, dsg, strict=False): 29 | try: 30 | assert dsg.featureType.lower() == "timeseriesprofile" 31 | assert len(dsg.t_axes()) >= 1 32 | assert len(dsg.x_axes()) >= 1 33 | assert len(dsg.y_axes()) >= 1 34 | assert len(dsg.z_axes()) >= 1 35 | 36 | # If there is only a single set of levels and a single set of 37 | # times, then it is orthogonal. 38 | tvar = dsg.t_axes()[0] 39 | assert len(tvar.dimensions) == 1 40 | 41 | zvar = dsg.z_axes()[0] 42 | assert len(zvar.dimensions) == 1 43 | 44 | assert tvar.dimensions != zvar.dimensions 45 | 46 | # Not ragged 47 | o_index_vars = dsg.filter_by_attrs(sample_dimension=lambda x: x is not None) 48 | assert len(o_index_vars) == 0 49 | 50 | r_index_vars = dsg.filter_by_attrs(instance_dimension=lambda x: x is not None) 51 | assert len(r_index_vars) == 0 52 | 53 | except BaseException: 54 | if strict is True: 55 | raise 56 | return False 57 | 58 | return True 59 | 60 | @classmethod 61 | def from_dataframe(cls, df, output, **kwargs): 62 | axes = get_default_axes(kwargs.pop("axes", {})) 63 | daxes = axes 64 | data_columns = [d for d in df.columns if d not in axes] 65 | 66 | reduce_dims = kwargs.pop("reduce_dims", False) 67 | unlimited = kwargs.pop("unlimited", False) 68 | 69 | unique_dims = kwargs.pop("unique_dims", False) 70 | if unique_dims is True: 71 | # Rename the dimension to avoid a dimension and coordinate having the same name 72 | # which is not supported in xarray 73 | changed_axes = {k: f"{v}_dim" for k, v in axes._asdict().items()} 74 | daxes = get_default_axes(changed_axes) 75 | 76 | # Downcast anything from int64 to int32 77 | # Convert any timezone aware datetimes to native UTC times 78 | df = downcast_dataframe(nativize_times(df)) 79 | 80 | # Make a new index that is the Cartesian product of all of the values from all of the 81 | # values of the old index. This is so don't have to iterate over anything. The full column 82 | # of data will be able to be shaped to the size of the final unique sized dimensions. 83 | index_order = [axes.t, axes.z, axes.station] 84 | df = df.set_index(index_order) 85 | df = df.reindex(pd.MultiIndex.from_product(df.index.levels, names=index_order)) 86 | 87 | unique_z = df.index.get_level_values(axes.z).unique().values 88 | unique_t = ( 89 | df.index.get_level_values(axes.t).unique().tolist() 90 | ) # tolist converts to Timestamp 91 | all_stations = df.index.get_level_values(axes.station) 92 | unique_s = all_stations.unique() 93 | 94 | with OrthogonalMultidimensionalTimeseriesProfile(output, "w") as nc: 95 | if reduce_dims is True and unique_s.size == 1: 96 | # If a singular trajectory, we can reduce that dimension if it is of size 1 97 | default_dimensions = (daxes.t, daxes.z) 98 | station_dimensions = () 99 | else: 100 | default_dimensions = (daxes.t, daxes.z, daxes.station) 101 | station_dimensions = (daxes.station,) 102 | nc.createDimension(daxes.station, unique_s.size) 103 | 104 | station = nc.createVariable(axes.station, get_dtype(unique_s), station_dimensions) 105 | latitude = nc.createVariable(axes.y, get_dtype(df[axes.y]), station_dimensions) 106 | longitude = nc.createVariable(axes.x, get_dtype(df[axes.x]), station_dimensions) 107 | # Assign over loop because VLEN variables (strings) have to be assigned by integer index 108 | # and we need to find the lat/lon based on station index 109 | for si, st in enumerate(unique_s): 110 | station[si] = st 111 | latitude[si] = df[axes.y][all_stations == st].dropna().iloc[0] 112 | longitude[si] = df[axes.x][all_stations == st].dropna().iloc[0] 113 | 114 | # Metadata variables 115 | nc.createVariable("crs", "i4") 116 | 117 | # Create all of the variables 118 | if unlimited is True: 119 | nc.createDimension(daxes.t, None) 120 | else: 121 | nc.createDimension(daxes.t, len(unique_t)) 122 | time = nc.createVariable(axes.t, "f8", (daxes.t,)) 123 | time[:] = date2num(unique_t, units=cls.default_time_unit).astype("f8") 124 | 125 | nc.createDimension(daxes.z, unique_z.size) 126 | z = nc.createVariable(axes.z, get_dtype(unique_z), (daxes.z,)) 127 | z[:] = unique_z 128 | 129 | attributes = dict_update(nc.nc_attributes(axes, daxes), kwargs.pop("attributes", {})) 130 | 131 | # Variables defined on only the time axis and not the depth axis 132 | detach_z_vars = kwargs.pop("detach_z", []) 133 | detach_z_columnms = [p for p in detach_z_vars if p in data_columns] 134 | for c in detach_z_columnms: 135 | var_name = cf_safe_name(c) 136 | if var_name not in nc.variables: 137 | v = create_ncvar_from_series( 138 | nc, 139 | var_name, 140 | default_dimensions[0::2], # this removes the second dimension (z) 141 | df[c], 142 | ) 143 | attributes[var_name] = dict_update( 144 | attributes.get(var_name, {}), 145 | {"coordinates": f"{axes.t} {axes.x} {axes.y}"}, 146 | ) 147 | else: 148 | v = nc.variables[var_name] 149 | 150 | # Because we need access to the fillvalues here, we ask not to return 151 | # the values with them already filled. 152 | vvalues = get_ncdata_from_series(df[c], v, fillna=False) 153 | # Reshape to the full array, with Z 154 | vvalues = vvalues.reshape(len(unique_t), unique_z.size, unique_s.size) 155 | # The Z axis is always the second axis, take the mean over that axis 156 | vvalues = np.apply_along_axis(np.nanmean, 1, vvalues).flatten() 157 | # Now reshape to the array without Z 158 | vvalues = vvalues.reshape(len(unique_t), unique_s.size) 159 | try: 160 | v[:] = vvalues.reshape(v.shape) 161 | except BaseException: 162 | L.exception(f"Failed to add {c}") 163 | continue 164 | 165 | full_columns = [f for f in data_columns if f not in detach_z_columnms] 166 | for c in full_columns: 167 | # Create variable if it doesn't exist 168 | var_name = cf_safe_name(c) 169 | if var_name not in nc.variables: 170 | v = create_ncvar_from_series( 171 | nc, 172 | var_name, 173 | default_dimensions, 174 | df[c], 175 | ) 176 | attributes[var_name] = dict_update( 177 | attributes.get(var_name, {}), 178 | {"coordinates": f"{axes.t} {axes.z} {axes.x} {axes.y}"}, 179 | ) 180 | else: 181 | v = nc.variables[var_name] 182 | 183 | vvalues = get_ncdata_from_series(df[c], v) 184 | v[:] = vvalues.reshape(v.shape) 185 | 186 | nc.update_attributes(attributes) 187 | 188 | return OrthogonalMultidimensionalTimeseriesProfile(output, **kwargs) 189 | 190 | def calculated_metadata( 191 | self, df=None, geometries=True, clean_cols=True, clean_rows=True, **kwargs 192 | ): 193 | # axes = get_default_axes(kwargs.pop('axes', {})) 194 | # if df is None: 195 | # df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows, axes=axes) 196 | raise NotImplementedError 197 | 198 | def to_dataframe(self, clean_cols=True, clean_rows=True, **kwargs): 199 | axes = get_default_axes(kwargs.pop("axes", {})) 200 | 201 | axv = get_mapped_axes_variables(self, axes) 202 | 203 | svar = axv.station 204 | s = normalize_countable_array(svar) 205 | 206 | # T 207 | t = get_masked_datetime_array(axv.t[:], axv.t) 208 | n_times = t.size 209 | 210 | # X 211 | x = generic_masked(axv.x[:], attrs=self.vatts(axv.x.name)) 212 | 213 | # Y 214 | y = generic_masked(axv.y[:], attrs=self.vatts(axv.y.name)) 215 | 216 | # Z 217 | z = generic_masked(axv.z[:], attrs=self.vatts(axv.z.name)) 218 | n_z = z.size 219 | 220 | # denormalize table structure 221 | t = np.repeat(t, s.size * n_z) 222 | z = np.tile(np.repeat(z, s.size), n_times) 223 | s = np.tile(s, n_z * n_times) 224 | y = np.tile(y, n_times * n_z) 225 | x = np.tile(x, n_times * n_z) 226 | 227 | df_data = OrderedDict( 228 | [ 229 | (axes.t, t), 230 | (axes.x, x), 231 | (axes.y, y), 232 | (axes.z, z), 233 | (axes.station, s), 234 | ] 235 | ) 236 | 237 | building_index_to_drop = np.ones(t.size, dtype=bool) 238 | 239 | # Axes variables are already processed so skip them 240 | extract_vars = copy(self.variables) 241 | for ncvar in axv._asdict().values(): 242 | if ncvar is not None and ncvar.name in extract_vars: 243 | del extract_vars[ncvar.name] 244 | 245 | for i, (dnam, dvar) in enumerate(extract_vars.items()): 246 | vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype), attrs=self.vatts(dnam)) 247 | 248 | # Carry through size 1 variables 249 | if vdata.size == 1: 250 | if vdata[0] is np.ma.masked: 251 | L.warning(f"Skipping variable {dnam} that is completely masked") 252 | continue 253 | 254 | # Carry through profile only variables 255 | elif dvar.dimensions == axv.t.dimensions: 256 | # Make the first value valid and fill with nans 257 | vdata = vdata.repeat(n_z).reshape((n_times, n_z)) 258 | # Set everything after the first value to missing 259 | vdata[:, 1:] = np.ma.masked 260 | vdata = vdata.flatten() 261 | if vdata.size != t.size: 262 | L.warning(f"Variable {dnam} is not the correct size, skipping.") 263 | continue 264 | 265 | else: 266 | if vdata.size != t.size: 267 | L.warning(f"Variable {dnam} is not the correct size, skipping.") 268 | continue 269 | 270 | # Mark rows with data so we don't remove them with clear_rows 271 | if vdata.size == building_index_to_drop.size: 272 | building_index_to_drop = (building_index_to_drop == True) & (vdata.mask == True) # noqa 273 | 274 | # Handle scalars here at the end 275 | if vdata.size == 1: 276 | vdata = vdata[0] 277 | 278 | df_data[dnam] = vdata 279 | 280 | df = pd.DataFrame(df_data) 281 | 282 | # Drop all data columns with no data 283 | if clean_cols: 284 | df = df.dropna(axis=1, how="all") 285 | 286 | # Drop all data rows with no data variable data 287 | if clean_rows: 288 | df = df.iloc[~building_index_to_drop] 289 | 290 | return df 291 | 292 | def nc_attributes(self, axes, daxes): 293 | atts = super().nc_attributes() 294 | return dict_update( 295 | atts, 296 | { 297 | "global": { 298 | "featureType": "timeSeriesProfile", 299 | "cdm_data_type": "TimeseriesProfile", 300 | }, 301 | axes.station: {"cf_role": "timeseries_id", "long_name": "station identifier"}, 302 | axes.x: {"axis": "X"}, 303 | axes.y: {"axis": "Y"}, 304 | axes.z: {"axis": "Z"}, 305 | axes.t: {"units": self.default_time_unit, "standard_name": "time", "axis": "T"}, 306 | }, 307 | ) 308 | -------------------------------------------------------------------------------- /pocean/dsg/trajectory/__init__.py: -------------------------------------------------------------------------------- 1 | #!python 2 | from collections import namedtuple 3 | 4 | from shapely.geometry import LineString, Point 5 | 6 | from pocean.utils import ( 7 | unique_justseen, 8 | ) 9 | 10 | trajectory_meta = namedtuple("Trajectory", ["min_z", "max_z", "min_t", "max_t", "geometry"]) 11 | 12 | trajectories_meta = namedtuple( 13 | "TrajectoryCollection", ["min_z", "max_z", "min_t", "max_t", "trajectories"] 14 | ) 15 | 16 | 17 | def trajectory_calculated_metadata(df, axes, geometries=True): 18 | trajectories = {} 19 | for tid, tgroup in df.groupby(axes.trajectory): 20 | tgroup = tgroup.sort_values(axes.t) 21 | 22 | if geometries: 23 | null_coordinates = tgroup[axes.x].isnull() | tgroup[axes.y].isnull() 24 | coords = list( 25 | unique_justseen( 26 | zip( 27 | tgroup.loc[~null_coordinates, axes.x].tolist(), 28 | tgroup.loc[~null_coordinates, axes.y].tolist(), 29 | ) 30 | ) 31 | ) 32 | else: 33 | # Calculate the geometry as the linestring between all of the profile points 34 | first_row = tgroup.iloc[0] 35 | coords = [(first_row[axes.x], first_row[axes.y])] 36 | 37 | geometry = None 38 | if len(coords) > 1: 39 | geometry = LineString(coords) 40 | elif len(coords) == 1: 41 | geometry = Point(coords[0]) 42 | 43 | trajectories[tid] = trajectory_meta( 44 | min_z=tgroup[axes.z].min(), 45 | max_z=tgroup[axes.z].max(), 46 | min_t=tgroup[axes.t].min(), 47 | max_t=tgroup[axes.t].max(), 48 | geometry=geometry, 49 | ) 50 | 51 | return trajectories_meta( 52 | min_z=df[axes.z].min(), 53 | max_z=df[axes.z].max(), 54 | min_t=df[axes.t].min(), 55 | max_t=df[axes.t].max(), 56 | trajectories=trajectories, 57 | ) 58 | -------------------------------------------------------------------------------- /pocean/dsg/trajectory/cr.py: -------------------------------------------------------------------------------- 1 | #!python 2 | from collections import OrderedDict 3 | from copy import copy 4 | 5 | import numpy as np 6 | import pandas as pd 7 | 8 | from pocean import logger as L # noqa 9 | from pocean.cf import cf_safe_name, CFDataset 10 | from pocean.dsg.trajectory import trajectory_calculated_metadata 11 | from pocean.utils import ( 12 | create_ncvar_from_series, 13 | dict_update, 14 | downcast_dataframe, 15 | generic_masked, 16 | get_default_axes, 17 | get_dtype, 18 | get_mapped_axes_variables, 19 | get_masked_datetime_array, 20 | get_ncdata_from_series, 21 | nativize_times, 22 | normalize_countable_array, 23 | ) 24 | 25 | 26 | class ContiguousRaggedTrajectory(CFDataset): 27 | @classmethod 28 | def is_mine(cls, dsg, strict=False): 29 | try: 30 | rvars = dsg.filter_by_attrs(cf_role="trajectory_id") 31 | assert len(rvars) == 1 32 | assert dsg.featureType.lower() == "trajectory" 33 | assert len(dsg.t_axes()) >= 1 34 | assert len(dsg.x_axes()) >= 1 35 | assert len(dsg.y_axes()) >= 1 36 | assert len(dsg.z_axes()) >= 1 37 | 38 | o_index_vars = dsg.filter_by_attrs(sample_dimension=lambda x: x is not None) 39 | assert len(o_index_vars) == 1 40 | assert o_index_vars[0].sample_dimension in dsg.dimensions # Sample dimension 41 | 42 | # Allow for string variables 43 | rvar = rvars[0] 44 | # 0 = single 45 | # 1 = array of strings/ints/bytes/etc 46 | # 2 = array of character arrays 47 | assert 0 <= len(rvar.dimensions) <= 2 48 | except BaseException: 49 | if strict is True: 50 | raise 51 | return False 52 | 53 | return True 54 | 55 | @classmethod 56 | def from_dataframe(cls, df, output, **kwargs): 57 | axes = get_default_axes(kwargs.pop("axes", {})) 58 | daxes = axes 59 | 60 | # Should never be a CR file with one trajectory so we ignore the "reduce_dims" attribute 61 | _ = kwargs.pop("reduce_dims", False) # noqa 62 | unlimited = kwargs.pop("unlimited", False) 63 | 64 | unique_dims = kwargs.pop("unique_dims", False) 65 | if unique_dims is True: 66 | # Rename the dimension to avoid a dimension and coordinate having the same name 67 | # which is not support in xarray 68 | changed_axes = {k: f"{v}_dim" for k, v in axes._asdict().items()} 69 | daxes = get_default_axes(changed_axes) 70 | 71 | # Downcast anything from int64 to int32 72 | # Convert any timezone aware datetimes to native UTC times 73 | df = downcast_dataframe(nativize_times(df)) 74 | 75 | with ContiguousRaggedTrajectory(output, "w") as nc: 76 | trajectory_groups = df.groupby(axes.trajectory) 77 | unique_trajectories = list(trajectory_groups.groups.keys()) 78 | num_trajectories = len(unique_trajectories) 79 | nc.createDimension(daxes.trajectory, num_trajectories) 80 | trajectory = nc.createVariable( 81 | axes.trajectory, get_dtype(df[axes.trajectory]), (daxes.trajectory,) 82 | ) 83 | 84 | # Get unique obs by grouping on traj getting the max size 85 | if unlimited is True: 86 | nc.createDimension(daxes.sample, None) 87 | else: 88 | nc.createDimension(daxes.sample, len(df)) 89 | 90 | # Number of observations in each trajectory 91 | row_size = nc.createVariable("rowSize", "i4", (daxes.trajectory,)) 92 | 93 | attributes = dict_update(nc.nc_attributes(axes, daxes), kwargs.pop("attributes", {})) 94 | 95 | # Variables defined on only the trajectory axis 96 | traj_vars = kwargs.pop("traj_vars", []) 97 | traj_columns = [p for p in traj_vars if p in df.columns] 98 | for c in traj_columns: 99 | var_name = cf_safe_name(c) 100 | if var_name not in nc.variables: 101 | create_ncvar_from_series( 102 | nc, 103 | var_name, 104 | (daxes.trajectory,), 105 | df[c], 106 | ) 107 | 108 | for i, (trajid, trg) in enumerate(trajectory_groups): 109 | trajectory[i] = trajid 110 | row_size[i] = len(trg) 111 | 112 | # Save any trajectory variables using the first value found 113 | # in the column. 114 | for c in traj_columns: 115 | var_name = cf_safe_name(c) 116 | if var_name not in nc.variables: 117 | continue 118 | v = nc.variables[var_name] 119 | vvalues = get_ncdata_from_series(trg[c], v)[0] 120 | try: 121 | v[i] = vvalues 122 | except BaseException: 123 | L.exception(f"Failed to add {c}") 124 | continue 125 | 126 | # Add all of the columns based on the sample dimension. Take all columns and remove the 127 | # trajectory, rowSize and other trajectory based columns. 128 | sample_columns = [ 129 | f for f in df.columns if f not in traj_columns + ["rowSize", axes.trajectory] 130 | ] 131 | for c in sample_columns: 132 | var_name = cf_safe_name(c) 133 | if var_name not in nc.variables: 134 | v = create_ncvar_from_series( 135 | nc, 136 | var_name, 137 | (daxes.sample,), 138 | df[c], 139 | ) 140 | else: 141 | v = nc.variables[var_name] 142 | vvalues = get_ncdata_from_series(df[c], v) 143 | try: 144 | if unlimited is True: 145 | v[:] = vvalues 146 | else: 147 | v[:] = vvalues.reshape(v.shape) 148 | except BaseException: 149 | L.exception(f"Failed to add {c}") 150 | continue 151 | 152 | # Metadata variables 153 | if "crs" not in nc.variables: 154 | nc.createVariable("crs", "i4") 155 | 156 | # Set attributes 157 | nc.update_attributes(attributes) 158 | 159 | return ContiguousRaggedTrajectory(output, **kwargs) 160 | 161 | def calculated_metadata( 162 | self, df=None, geometries=True, clean_cols=True, clean_rows=True, **kwargs 163 | ): 164 | axes = get_default_axes(kwargs.pop("axes", {})) 165 | if df is None: 166 | df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows, axes=axes) 167 | return trajectory_calculated_metadata(df, axes, geometries) 168 | 169 | def to_dataframe(self, clean_cols=True, clean_rows=True, **kwargs): 170 | axes = get_default_axes(kwargs.pop("axes", {})) 171 | 172 | axv = get_mapped_axes_variables(self, axes) 173 | 174 | o_index_var = self.filter_by_attrs(sample_dimension=lambda x: x is not None) 175 | if not o_index_var: 176 | raise ValueError( 177 | 'Could not find the "sample_dimension" attribute on any variables, ' 178 | "is this a valid {}?".format(self.__class__.__name__) 179 | ) 180 | else: 181 | o_index_var = o_index_var[0] 182 | o_dim = self.dimensions[o_index_var.sample_dimension] # Sample dimension 183 | t_dim = o_index_var.dimensions 184 | 185 | # Trajectory 186 | row_sizes = o_index_var[:] 187 | traj_data = normalize_countable_array(axv.trajectory) 188 | traj_data = np.repeat(traj_data, row_sizes) 189 | 190 | # time 191 | time_data = get_masked_datetime_array(axv.t[:], axv.t).flatten() 192 | 193 | df_data = OrderedDict([(axes.t, time_data), (axes.trajectory, traj_data)]) 194 | 195 | building_index_to_drop = np.ones(o_dim.size, dtype=bool) 196 | 197 | extract_vars = copy(self.variables) 198 | # Skip the time and row index variables 199 | del extract_vars[o_index_var.name] 200 | del extract_vars[axes.t] 201 | 202 | for i, (dnam, dvar) in enumerate(extract_vars.items()): 203 | # Trajectory dimensions 204 | if dvar.dimensions == t_dim: 205 | vdata = np.repeat(generic_masked(dvar[:], attrs=self.vatts(dnam)), row_sizes) 206 | 207 | # Sample dimensions 208 | elif dvar.dimensions == (o_dim.name,): 209 | vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype), attrs=self.vatts(dnam)) 210 | 211 | else: 212 | vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype), attrs=self.vatts(dnam)) 213 | # Carry through size 1 variables 214 | if vdata.size == 1: 215 | if vdata[0] is np.ma.masked: 216 | L.warning(f"Skipping variable {dnam} that is completely masked") 217 | continue 218 | else: 219 | L.warning(f"Skipping variable {dnam} since it didn't match any dimension sizes") 220 | continue 221 | 222 | # Mark rows with data so we don't remove them with clear_rows 223 | if vdata.size == building_index_to_drop.size: 224 | building_index_to_drop = (building_index_to_drop == True) & (vdata.mask == True) # noqa 225 | 226 | # Handle scalars here at the end 227 | if vdata.size == 1: 228 | vdata = vdata[0] 229 | 230 | df_data[dnam] = vdata 231 | 232 | df = pd.DataFrame(df_data) 233 | 234 | # Drop all data columns with no data 235 | if clean_cols: 236 | df = df.dropna(axis=1, how="all") 237 | 238 | # Drop all data rows with no data variable data 239 | if clean_rows: 240 | df = df.iloc[~building_index_to_drop] 241 | 242 | return df 243 | 244 | def nc_attributes(self, axes, daxes): 245 | atts = super().nc_attributes() 246 | return dict_update( 247 | atts, 248 | { 249 | "global": {"featureType": "trajectory", "cdm_data_type": "Trajectory"}, 250 | axes.trajectory: { 251 | "cf_role": "trajectory_id", 252 | "long_name": "trajectory identifier", 253 | "ioos_category": "identifier", 254 | }, 255 | axes.x: {"axis": "X"}, 256 | axes.y: {"axis": "Y"}, 257 | axes.z: {"axis": "Z"}, 258 | axes.t: {"units": self.default_time_unit, "standard_name": "time", "axis": "T"}, 259 | "rowSize": {"sample_dimension": daxes.sample}, 260 | }, 261 | ) 262 | -------------------------------------------------------------------------------- /pocean/dsg/trajectory/ir.py: -------------------------------------------------------------------------------- 1 | #!python 2 | from pocean.cf import CFDataset 3 | 4 | 5 | class IndexedRaggedTrajectory(CFDataset): 6 | def from_dataframe(cls, df, output, **kwargs): 7 | raise NotImplementedError 8 | 9 | def calculated_metadata(self, df=None, geometries=True, clean_cols=True, clean_rows=True): 10 | # if df is None: 11 | # df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows) 12 | raise NotImplementedError 13 | 14 | def to_dataframe(self): 15 | raise NotImplementedError 16 | -------------------------------------------------------------------------------- /pocean/dsg/trajectoryProfile/__init__.py: -------------------------------------------------------------------------------- 1 | #!python 2 | from pocean.dsg.profile import profile_calculated_metadata 3 | from pocean.dsg.trajectory import trajectories_meta 4 | 5 | 6 | def trajectory_profile_calculated_metadata(df, axes, geometries=True): 7 | trajectories = {} 8 | for tid, tgroup in df.groupby(axes.trajectory): 9 | tgroup = tgroup.sort_values(axes.t) 10 | trajectories[tid] = profile_calculated_metadata(tgroup, axes, geometries) 11 | 12 | return trajectories_meta( 13 | min_z=df[axes.z].min(), 14 | max_z=df[axes.z].max(), 15 | min_t=df[axes.t].min(), 16 | max_t=df[axes.t].max(), 17 | trajectories=trajectories, 18 | ) 19 | -------------------------------------------------------------------------------- /pocean/dsg/utils.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import pandas as pd 4 | from shapely.geometry import ( 5 | box, 6 | LineString, 7 | Point, 8 | Polygon, 9 | ) 10 | from shapely.validation import make_valid 11 | 12 | from pocean import logger as L # noqa 13 | from pocean.utils import dict_update, get_default_axes, unique_justseen 14 | 15 | datetime.UTC = datetime.timezone.utc 16 | 17 | 18 | def get_calculated_attributes(df, axes=None, history=None): 19 | """Functions to automate netCDF attribute generation from the data itself 20 | This is a wrapper for the other four functions, which could be called separately. 21 | 22 | :param df: data (Pandas DataFrame) 23 | :param axes: keys (x,y,z,t) are associated with actual column names (dictionary) 24 | :param history: history: text initializing audit trail for modifications to the original data (optional, string) 25 | :return: dictionary of global attributes 26 | """ 27 | 28 | axes = get_default_axes(axes) 29 | attrs = get_geographic_attributes(df, axes) 30 | attrs = dict_update(attrs, get_vertical_attributes(df, axes)) 31 | attrs = dict_update(attrs, get_temporal_attributes(df, axes)) 32 | attrs = dict_update(attrs, get_creation_attributes(history)) 33 | 34 | return attrs 35 | 36 | 37 | def get_geographic_attributes(df, axes=None): 38 | """Use values in a dataframe to set geographic attributes for the eventual netCDF file 39 | Attribute names come from https://www.ncei.noaa.gov/data/oceans/ncei/formats/netcdf/v2.0/index.html 40 | The coordinate reference system (CRS) is assumed to be EPSG:4326, which is WGS84 and is used with 41 | GPS satellite navigation (http://spatialreference.org/ref/epsg/wgs-84/). This is NCEI's default. 42 | Coordinate values are latitude (decimal degrees_north) and longitude (decimal degrees_east). 43 | Longitude values are limited to [-180, 180). 44 | 45 | :param df: data (Pandas DataFrame) 46 | :param axes: keys (x,y,z,t) are associated with actual column names (dictionary) 47 | :return: nested dictionary of variable and global attributes 48 | """ 49 | axes = get_default_axes(axes) 50 | 51 | carry_miny = round(float(df[axes.y].min()), 6) 52 | carry_maxy = round(float(df[axes.y].max()), 6) 53 | carry_minx = round(float(df[axes.x].min()), 6) 54 | carry_maxx = round(float(df[axes.x].max()), 6) 55 | 56 | notnull = df[axes.x].notnull() & df[axes.y].notnull() 57 | coords = list(zip(df.loc[notnull, axes.x], df.loc[notnull, axes.y])) 58 | 59 | if len(set(coords)) == 1: 60 | geoclass = Point 61 | # The set is to workaround the fact tht pocean 62 | # relied in a shapely<2 bug to pass a vector here instead of a point. 63 | coords = set(coords) 64 | elif len(coords) > 2: 65 | geoclass = Polygon 66 | else: 67 | geoclass = LineString 68 | 69 | p = geoclass(coords) 70 | dateline = LineString([(180, 90), (-180, -90)]) 71 | # If we cross the dateline normalize the coordinates before polygon 72 | if dateline.crosses(p): 73 | newx = (df.loc[notnull, axes.x] + 360) % 360 74 | p = geoclass(zip(newx, df.loc[notnull, axes.y])) 75 | p = make_valid(p) 76 | 77 | geometry_bbox = box(*p.bounds).wkt 78 | geometry_wkt = p.convex_hull.wkt 79 | 80 | return { 81 | "variables": { 82 | axes.y: { 83 | "attributes": { 84 | "actual_min": carry_miny, 85 | "actual_max": carry_maxy, 86 | } 87 | }, 88 | axes.x: { 89 | "attributes": { 90 | "actual_min": carry_minx, 91 | "actual_max": carry_maxx, 92 | } 93 | }, 94 | }, 95 | "attributes": { 96 | "geospatial_lat_min": carry_miny, 97 | "geospatial_lat_max": carry_maxy, 98 | "geospatial_lon_min": carry_minx, 99 | "geospatial_lon_max": carry_maxx, 100 | "geospatial_bbox": geometry_bbox, 101 | "geospatial_bounds": geometry_wkt, 102 | "geospatial_bounds_crs": "EPSG:4326", 103 | }, 104 | } 105 | 106 | 107 | def get_vertical_attributes(df, axes=None): 108 | """Use values in a dataframe to set vertical attributes for the eventual netCDF file 109 | Attribute names come from https://www.ncei.noaa.gov/data/oceans/ncei/formats/netcdf/v2.0/index.html 110 | The CRS, geospatial_bounds_vertical_crs, cannot be assumed because NCEI suggests any of 111 | * 'EPSG:5829' (instantaneous height above sea level), 112 | * 'EPSG:5831' (instantaneous depth below sea level), or 113 | * 'EPSG:5703' (NAVD88 height). 114 | Likewise, geospatial_vertical_positive cannot be assumed to be either 'up' or 'down'. 115 | Set these attributes separately according to the dataset. 116 | Note: values are cast from numpy.int to float 117 | 118 | :param df: data (Pandas DataFrame) 119 | :param axes: keys (x,y,z,t) are associated with actual column names (dictionary). z in meters. 120 | :return: nested dictionary of variable and global attributes 121 | """ 122 | axes = get_default_axes(axes) 123 | minz = round(float(df[axes.z].min()), 6) 124 | maxz = round(float(df[axes.z].max()), 6) 125 | 126 | return { 127 | "variables": { 128 | axes.z: { 129 | "attributes": { 130 | "actual_min": minz, 131 | "actual_max": maxz, 132 | } 133 | }, 134 | }, 135 | "attributes": { 136 | "geospatial_vertical_min": minz, 137 | "geospatial_vertical_max": maxz, 138 | "geospatial_vertical_units": "m", 139 | }, 140 | } 141 | 142 | 143 | def get_temporal_attributes(df, axes=None): 144 | """Use values in a dataframe to set temporal attributes for the eventual netCDF file 145 | Attribute names come from https://www.ncei.noaa.gov/data/oceans/ncei/formats/netcdf/v2.0/index.html 146 | 147 | :param df: data (Pandas DataFrame) 148 | :param axes: keys (x,y,z,t) are associated with actual column names (dictionary). z in meters. 149 | :return: nested dictionary of variable and global attributes 150 | """ 151 | 152 | axes = get_default_axes(axes) 153 | mint = df[axes.t].min() 154 | maxt = df[axes.t].max() 155 | 156 | times = pd.DatetimeIndex(unique_justseen(df[axes.t])) 157 | dt_index_diff = times[1:] - times[:-1] 158 | dt_counts = dt_index_diff.value_counts(sort=True) 159 | 160 | if dt_counts.size > 0 and dt_counts.values[0] / (len(times) - 1) > 0.75: 161 | mode_value = dt_counts.index[0] 162 | else: 163 | # Calculate a static resolution 164 | mode_value = (maxt - mint) / len(times) 165 | 166 | return { 167 | "variables": { 168 | axes.t: { 169 | "attributes": { 170 | "actual_min": mint.strftime("%Y-%m-%dT%H:%M:%SZ"), 171 | "actual_max": maxt.strftime("%Y-%m-%dT%H:%M:%SZ"), 172 | } 173 | }, 174 | }, 175 | "attributes": { 176 | "time_coverage_start": mint.strftime("%Y-%m-%dT%H:%M:%SZ"), 177 | "time_coverage_end": maxt.strftime("%Y-%m-%dT%H:%M:%SZ"), 178 | "time_coverage_duration": (maxt - mint).round("1s").isoformat(), 179 | "time_coverage_resolution": mode_value.round("1s").isoformat(), 180 | }, 181 | } 182 | 183 | 184 | def get_creation_attributes(history=None): 185 | """Query system for netCDF file creation times 186 | 187 | :param history: text initializing audit trail for modifications to the original data (optional, string) 188 | :return: dictionary of global attributes 189 | """ 190 | nc_create_ts = datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%dT%H:%M:%SZ") 191 | 192 | attrs = { 193 | "attributes": { 194 | "date_created": nc_create_ts, 195 | "date_issued": nc_create_ts, 196 | "date_modified": nc_create_ts, 197 | } 198 | } 199 | 200 | # Add in the passed in history 201 | if history is not None: 202 | attrs["attributes"]["history"] = f"{nc_create_ts} - {history}" 203 | 204 | return attrs 205 | -------------------------------------------------------------------------------- /pocean/grid/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/pocean/grid/__init__.py -------------------------------------------------------------------------------- /pocean/meta.py: -------------------------------------------------------------------------------- 1 | #!python 2 | import os 3 | from collections import OrderedDict 4 | from collections.abc import Iterable, Mapping 5 | from copy import deepcopy 6 | 7 | import numpy as np 8 | import simplejson as json 9 | 10 | from . import logger 11 | 12 | 13 | class MetaInterface(Mapping): 14 | VALID_KEYS = ["dimensions", "variables", "attributes"] 15 | 16 | @classmethod 17 | def from_jsonfile(cls, jsf): 18 | if not os.path.isfile(jsf): 19 | raise ValueError(f"{jsf} is not a file") 20 | 21 | with open(jsf) as jf: 22 | return cls.from_jsonstr(jf.read()) 23 | 24 | @classmethod 25 | def from_jsonstr(cls, js): 26 | try: 27 | d = json.loads(js, object_pairs_hook=OrderedDict) 28 | except BaseException as e: 29 | raise ValueError(f"Could not parse JSON string: {e}") 30 | 31 | return cls(d) 32 | 33 | def __init__(self, *args, **kwargs): 34 | self._data = dict(*args, **kwargs) 35 | 36 | def __getitem__(self, key): 37 | return self._data[key] 38 | 39 | def __iter__(self): 40 | return iter(self._data) 41 | 42 | def __len__(self): 43 | return len(self._data) 44 | 45 | def __str__(self): 46 | return str(self._data) 47 | 48 | 49 | def safe_attribute_typing(zdtype, value): 50 | try: 51 | return zdtype.type(value) 52 | except ValueError: 53 | logger.warning(f"Could not convert {value} to type {zdtype}") 54 | return None 55 | 56 | 57 | def string_to_dtype(type_str): 58 | # int - we avoid int64 59 | if type_str in ["int", "int32", "int64", "i", "i4", "i8", "i32", "i64", "long"]: 60 | return np.dtype("int32") 61 | 62 | elif type_str in ["uint", "ui4", "ui", "uint32", "uint64", "ui64", "u4", "u8"]: 63 | return np.dtype("uint32") 64 | 65 | elif type_str in ["float", "float32", "f", "f4", "f32"]: 66 | return np.dtype("float32") 67 | 68 | elif type_str in ["double", "float64", "d", "f8", "f64"]: 69 | return np.dtype("float64") 70 | 71 | elif type_str in ["byte", "bytes8", "i1", "b", "B", "int8"]: 72 | return np.dtype("int8") 73 | 74 | elif type_str in ["ubyte", "ui1", "ubuB", "uint8"]: 75 | return np.dtype("uint8") 76 | 77 | elif type_str in ["char", "c", "string", "S1", "str", "unicode", "string8"]: 78 | return np.dtype("U") 79 | 80 | elif type_str in ["short", "s", "i2", "h", "int16"]: 81 | return np.dtype("int16") 82 | 83 | elif type_str in ["ushort", "us", "u2", "ui2", "uh", "uint16"]: 84 | return np.dtype("uint16") 85 | 86 | raise ValueError(f"Could not find dtype for {type_str}") 87 | 88 | 89 | def untype_attributes(vd): 90 | typed = OrderedDict() 91 | for k, v in vd.items(): 92 | if isinstance(v, dict): 93 | dtype = string_to_dtype(v.get("type")) 94 | vval = v.get("data") 95 | if isinstance(vval, (list, tuple)): 96 | safe = (safe_attribute_typing(dtype, x) for x in vval) 97 | typed[k] = [x for x in safe if x is not None] 98 | else: 99 | safe = safe_attribute_typing(dtype, vval) 100 | if safe is not None: 101 | typed[k] = safe 102 | else: 103 | typed[k] = v 104 | return typed 105 | 106 | 107 | def ncpyattributes(obj, verbose=True): 108 | """Converts any attributes that are not native python types to those types""" 109 | 110 | return_copy = deepcopy(obj) 111 | 112 | for k, v in obj.items(): 113 | if isinstance(v, np.ndarray): 114 | newv = v.tolist() 115 | elif hasattr(v, "dtype"): 116 | newv = v.item() 117 | else: 118 | newv = v 119 | 120 | if hasattr(v, "dtype"): 121 | newt = v.dtype.name 122 | else: 123 | if isinstance(v, Iterable) and v: 124 | # Use the type of the first one 125 | v = v[0] 126 | else: 127 | # This is likely an empty value 128 | # so just default to an empty string 129 | v = "" 130 | newt = type(v).__name__ 131 | 132 | if verbose is True: 133 | return_copy[k] = {"type": newt, "data": newv} 134 | else: 135 | return_copy[k] = newv 136 | 137 | return return_copy 138 | -------------------------------------------------------------------------------- /pocean/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/pocean/tests/__init__.py -------------------------------------------------------------------------------- /pocean/tests/download_test_data.py: -------------------------------------------------------------------------------- 1 | import zipfile 2 | from pathlib import Path 3 | 4 | import pooch 5 | 6 | 7 | def download_test_data(): 8 | url = "https://github.com/pyoceans/pocean-core/releases/download" 9 | version = "2025.01" 10 | 11 | fname = pooch.retrieve( 12 | url=f"{url}/{version}/test_data.zip", 13 | known_hash="sha256:41180c6bc6017de935250c9e8c1bbb407507049baebd767692c4f74fb8d662a8", 14 | ) 15 | 16 | here = Path(__file__).resolve().parent 17 | print(fname) 18 | print(here) 19 | with zipfile.ZipFile(fname, "r") as zip_ref: 20 | zip_ref.extractall(here) 21 | 22 | 23 | if __name__ == "__main__": 24 | download_test_data() 25 | -------------------------------------------------------------------------------- /pocean/tests/dsg/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/pocean/tests/dsg/__init__.py -------------------------------------------------------------------------------- /pocean/tests/dsg/profile/test_profile_im.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import tempfile 4 | import unittest 5 | 6 | import numpy as np 7 | import pandas as pd 8 | from dateutil.parser import parse as dtparse 9 | 10 | from pocean import logger 11 | from pocean.dsg import IncompleteMultidimensionalProfile 12 | from pocean.tests.dsg.test_new import test_is_mine 13 | 14 | logger.level = logging.DEBUG 15 | logger.handlers = [logging.StreamHandler()] 16 | 17 | 18 | class TestIMPStrings(unittest.TestCase): 19 | def setUp(self): 20 | self.df = pd.read_csv( 21 | os.path.join(os.path.dirname(__file__), "resources", "basis_2011.csv"), 22 | parse_dates=["time"], 23 | ) 24 | # self.df = pd.read_csv('resources/basis_2011.csv', parse_dates=['time']) 25 | 26 | def test_print_dtypes(self): 27 | print(self.df.dtypes) 28 | 29 | def test_write_nc(self): 30 | fid, single_tmp = tempfile.mkstemp(suffix=".nc") 31 | 32 | axes = {"t": "time", "x": "longitude", "y": "latitude", "z": "z", "profile": "stationid"} 33 | 34 | with IncompleteMultidimensionalProfile.from_dataframe( 35 | self.df, single_tmp, axes=axes, mode="a" 36 | ) as ncd: 37 | ncd.renameDimension("stationid", "profile") 38 | 39 | test_is_mine(IncompleteMultidimensionalProfile, single_tmp) # Try to load it again 40 | os.close(fid) 41 | os.remove(single_tmp) 42 | 43 | 44 | class TestIncompleteMultidimensionalProfile(unittest.TestCase): 45 | def setUp(self): 46 | self.multi = os.path.join(os.path.dirname(__file__), "resources", "im-multiple.nc") 47 | 48 | def test_imp_load(self): 49 | IncompleteMultidimensionalProfile(self.multi).close() 50 | 51 | def test_imp_dataframe(self): 52 | fid, single_tmp = tempfile.mkstemp(suffix=".nc") 53 | with IncompleteMultidimensionalProfile(self.multi) as ncd: 54 | df = ncd.to_dataframe() 55 | with IncompleteMultidimensionalProfile.from_dataframe(df, single_tmp) as result_ncd: 56 | assert "profile" in result_ncd.dimensions 57 | test_is_mine(IncompleteMultidimensionalProfile, single_tmp) # Try to load it again 58 | os.close(fid) 59 | os.remove(single_tmp) 60 | 61 | def test_imp_dataframe_unique_dims(self): 62 | fid, single_tmp = tempfile.mkstemp(suffix=".nc") 63 | with IncompleteMultidimensionalProfile(self.multi) as ncd: 64 | df = ncd.to_dataframe() 65 | with IncompleteMultidimensionalProfile.from_dataframe( 66 | df, single_tmp, unique_dims=True 67 | ) as result_ncd: 68 | assert "profile_dim" in result_ncd.dimensions 69 | test_is_mine(IncompleteMultidimensionalProfile, single_tmp) # Try to load it again 70 | os.close(fid) 71 | os.remove(single_tmp) 72 | 73 | def test_imp_calculated_metadata(self): 74 | with IncompleteMultidimensionalProfile(self.multi) as ncd: 75 | m = ncd.calculated_metadata() 76 | assert m.min_t == dtparse("1990-01-01 00:00:00") 77 | assert m.max_t == dtparse("1990-01-06 21:00:00") 78 | assert len(m.profiles.keys()) == 137 79 | assert np.isclose(m.profiles[0].min_z, 0.05376, atol=1e-5) 80 | assert np.isclose(m.profiles[0].max_z, 9.62958, atol=1e-5) 81 | assert m.profiles[0].t == dtparse("1990-01-01 00:00:00") 82 | assert m.profiles[0].x == 119 83 | assert m.profiles[0].y == 171 84 | 85 | assert np.isclose(m.profiles[141].min_z, 0.04196, atol=1e-5) 86 | assert np.isclose(m.profiles[141].max_z, 9.85909, atol=1e-5) 87 | assert m.profiles[141].t == dtparse("1990-01-06 21:00:00") 88 | assert m.profiles[141].x == 34 89 | assert m.profiles[141].y == 80 90 | -------------------------------------------------------------------------------- /pocean/tests/dsg/profile/test_profile_om.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import tempfile 4 | import unittest 5 | 6 | import numpy as np 7 | from dateutil.parser import parse as dtparse 8 | 9 | from pocean import logger 10 | from pocean.cf import CFDataset 11 | from pocean.dsg import OrthogonalMultidimensionalProfile 12 | from pocean.tests.dsg.test_new import test_is_mine 13 | 14 | logger.level = logging.INFO 15 | logger.handlers = [logging.StreamHandler()] 16 | 17 | 18 | class TestOrthogonalMultidimensionalProfile(unittest.TestCase): 19 | def setUp(self): 20 | self.single = os.path.join(os.path.dirname(__file__), "resources", "om-single.nc") 21 | self.multi = os.path.join(os.path.dirname(__file__), "resources", "om-multiple.nc") 22 | 23 | def test_omp_load(self): 24 | OrthogonalMultidimensionalProfile(self.single).close() 25 | OrthogonalMultidimensionalProfile(self.multi).close() 26 | 27 | def test_omp_dataframe_single(self): 28 | CFDataset.load(self.single) 29 | 30 | fid, single_tmp = tempfile.mkstemp(suffix=".nc") 31 | with OrthogonalMultidimensionalProfile(self.single) as ncd: 32 | df = ncd.to_dataframe() 33 | with self.assertRaises(NotImplementedError): 34 | with OrthogonalMultidimensionalProfile.from_dataframe(df, single_tmp) as result_ncd: 35 | assert "profile" in result_ncd.dimensions 36 | test_is_mine(OrthogonalMultidimensionalProfile, single_tmp) # Try to load it again 37 | os.close(fid) 38 | os.remove(single_tmp) 39 | 40 | def test_omp_dataframe_multi(self): 41 | CFDataset.load(self.multi) 42 | 43 | fid, multi_tmp = tempfile.mkstemp(suffix=".nc") 44 | with OrthogonalMultidimensionalProfile(self.multi) as ncd: 45 | df = ncd.to_dataframe() 46 | with self.assertRaises(NotImplementedError): 47 | with OrthogonalMultidimensionalProfile.from_dataframe(df, multi_tmp) as result_ncd: 48 | assert "profile" in result_ncd.dimensions 49 | test_is_mine(OrthogonalMultidimensionalProfile, multi_tmp) # Try to load it again 50 | os.close(fid) 51 | os.remove(multi_tmp) 52 | 53 | def test_omp_dataframe_multi_unique_dims(self): 54 | CFDataset.load(self.multi) 55 | 56 | fid, multi_tmp = tempfile.mkstemp(suffix=".nc") 57 | with OrthogonalMultidimensionalProfile(self.multi) as ncd: 58 | df = ncd.to_dataframe() 59 | with self.assertRaises(NotImplementedError): 60 | with OrthogonalMultidimensionalProfile.from_dataframe( 61 | df, multi_tmp, unique_dims=True 62 | ) as result_ncd: 63 | assert "profile_dim" in result_ncd.dimensions 64 | test_is_mine(OrthogonalMultidimensionalProfile, multi_tmp) # Try to load it again 65 | os.close(fid) 66 | os.remove(multi_tmp) 67 | 68 | def test_omp_calculated_metadata(self): 69 | with OrthogonalMultidimensionalProfile(self.single) as ncd: 70 | s = ncd.calculated_metadata() 71 | assert s.min_t == dtparse("2005-07-09 01:48:00") 72 | assert s.max_t == dtparse("2005-07-09 01:48:00") 73 | assert np.isclose(s.profiles[1].min_z, 0.0) 74 | assert np.isclose(s.profiles[1].max_z, 96.06) 75 | assert s.profiles[1].t == dtparse("2005-07-09 01:48:00") 76 | assert np.isclose(s.profiles[1].x, -149.3582) 77 | assert np.isclose(s.profiles[1].y, 60.0248) 78 | 79 | with OrthogonalMultidimensionalProfile(self.multi) as ncd: 80 | m = ncd.calculated_metadata() 81 | assert m.min_t == dtparse("2005-09-10 07:08:00") 82 | assert m.max_t == dtparse("2005-09-14 17:27:00") 83 | assert len(m.profiles.keys()) == 35 84 | assert np.isclose(m.profiles[2].min_z, 0.0) 85 | assert np.isclose(m.profiles[2].max_z, 499.69) 86 | assert m.profiles[2].t == dtparse("2005-09-10 07:08:00") 87 | assert np.isclose(m.profiles[2].x, -148.2182) 88 | assert np.isclose(m.profiles[2].y, 58.5395) 89 | 90 | assert np.isclose(m.profiles[37].min_z, 0.0) 91 | assert np.isclose(m.profiles[37].max_z, 292.01001) 92 | assert m.profiles[37].t == dtparse("2005-09-14 17:27:00") 93 | assert np.isclose(m.profiles[37].x, -149.468) 94 | assert np.isclose(m.profiles[37].y, 60.01) 95 | 96 | def test_json_attributes(self): 97 | ds = os.path.join(os.path.dirname(__file__), "resources", "om-1dy11.nc") 98 | om = OrthogonalMultidimensionalProfile(ds) 99 | om.to_dataframe() 100 | om.json_attributes() 101 | om.close() 102 | -------------------------------------------------------------------------------- /pocean/tests/dsg/test_new.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from os.path import dirname as dn 3 | from os.path import join as jn 4 | 5 | import pytest 6 | 7 | from pocean import logger 8 | from pocean.cf import CFDataset 9 | from pocean.dsg import * 10 | from pocean.utils import all_subclasses 11 | 12 | logger.level = logging.INFO 13 | logger.handlers = [logging.StreamHandler()] 14 | 15 | 16 | @pytest.mark.parametrize( 17 | "klass,fp", 18 | [ 19 | ( 20 | OrthogonalMultidimensionalProfile, 21 | jn(dn(__file__), "profile", "resources", "om-single.nc"), 22 | ), 23 | ( 24 | OrthogonalMultidimensionalProfile, 25 | jn(dn(__file__), "profile", "resources", "om-multiple.nc"), 26 | ), 27 | ( 28 | OrthogonalMultidimensionalProfile, 29 | jn(dn(__file__), "profile", "resources", "om-1dy11.nc"), 30 | ), 31 | ( 32 | IncompleteMultidimensionalProfile, 33 | jn(dn(__file__), "profile", "resources", "im-multiple.nc"), 34 | ), 35 | ( 36 | IncompleteMultidimensionalTrajectory, 37 | jn(dn(__file__), "trajectory", "resources", "im-single.nc"), 38 | ), 39 | ( 40 | IncompleteMultidimensionalTrajectory, 41 | jn(dn(__file__), "trajectory", "resources", "im-multiple.nc"), 42 | ), 43 | ( 44 | IncompleteMultidimensionalTrajectory, 45 | jn(dn(__file__), "trajectory", "resources", "im-multiple-nonstring.nc"), 46 | ), 47 | ( 48 | IncompleteMultidimensionalTrajectory, 49 | jn(dn(__file__), "trajectory", "resources", "wave-glider-int-attrs.nc"), 50 | ), 51 | (ContiguousRaggedTrajectory, jn(dn(__file__), "trajectory", "resources", "cr-multiple.nc")), 52 | (ContiguousRaggedTrajectory, jn(dn(__file__), "trajectory", "resources", "cr-oot-A.nc")), 53 | (ContiguousRaggedTrajectory, jn(dn(__file__), "trajectory", "resources", "cr-oot-B.nc")), 54 | ( 55 | ContiguousRaggedTrajectoryProfile, 56 | jn(dn(__file__), "trajectoryProfile", "resources", "cr-single.nc"), 57 | ), 58 | ( 59 | ContiguousRaggedTrajectoryProfile, 60 | jn(dn(__file__), "trajectoryProfile", "resources", "cr-multiple.nc"), 61 | ), 62 | ( 63 | ContiguousRaggedTrajectoryProfile, 64 | jn(dn(__file__), "trajectoryProfile", "resources", "cr-missing-time.nc"), 65 | ), 66 | ( 67 | IncompleteMultidimensionalTimeseries, 68 | jn(dn(__file__), "timeseries", "resources", "im-multiple.nc"), 69 | ), 70 | ( 71 | OrthogonalMultidimensionalTimeseries, 72 | jn(dn(__file__), "timeseries", "resources", "om-single.nc"), 73 | ), 74 | ( 75 | OrthogonalMultidimensionalTimeseries, 76 | jn(dn(__file__), "timeseries", "resources", "om-multiple.nc"), 77 | ), 78 | # (IndexedRaggedTimeseries, jn(dn(__file__), 'timeseries', 'resources', 'cr-multiple.nc')), 79 | # (ContiguousRaggedTimeseries, jn(dn(__file__), 'timeseries', 'resources', 'cr-multiple.nc')), 80 | ( 81 | OrthogonalMultidimensionalTimeseriesProfile, 82 | jn(dn(__file__), "timeseriesProfile", "resources", "om-multiple.nc"), 83 | ), 84 | ( 85 | IncompleteMultidimensionalTimeseriesProfile, 86 | jn(dn(__file__), "timeseriesProfile", "resources", "im-single.nc"), 87 | ), 88 | ( 89 | IncompleteMultidimensionalTimeseriesProfile, 90 | jn(dn(__file__), "timeseriesProfile", "resources", "im-multiple.nc"), 91 | ), 92 | ( 93 | RaggedTimeseriesProfile, 94 | jn(dn(__file__), "timeseriesProfile", "resources", "r-single.nc"), 95 | ), 96 | ( 97 | RaggedTimeseriesProfile, 98 | jn(dn(__file__), "timeseriesProfile", "resources", "r-multiple.nc"), 99 | ), 100 | ], 101 | ) 102 | def test_is_mine(klass, fp): 103 | with CFDataset.load(fp) as dsg: 104 | assert dsg.__class__ == klass 105 | 106 | allsubs = list(all_subclasses(CFDataset)) 107 | subs = [s for s in allsubs if s != klass] 108 | with CFDataset(fp) as dsg: 109 | logger.debug(f"\nTesting {klass.__name__}") 110 | assert klass.is_mine(dsg, strict=True) is True 111 | for s in subs: 112 | if hasattr(s, "is_mine"): 113 | logger.debug(f" * Trying {s.__name__}...") 114 | assert s.is_mine(dsg) is False 115 | -------------------------------------------------------------------------------- /pocean/tests/dsg/test_utils.py: -------------------------------------------------------------------------------- 1 | #!python 2 | import datetime 3 | import os 4 | import unittest 5 | 6 | import pandas as pd 7 | import pytest 8 | import pytz 9 | from dateutil.parser import parse as dtparse 10 | 11 | from pocean import logger as L # noqa 12 | from pocean.cf import CFDataset 13 | from pocean.dsg import utils 14 | 15 | datetime.UTC = datetime.timezone.utc 16 | 17 | # RuntimeWarning: invalid value encountered in cast is fine here. 18 | ignore_invalid_value_cast = pytest.mark.filterwarnings("ignore::RuntimeWarning") 19 | 20 | 21 | class TestDsgUtils(unittest.TestCase): 22 | geo = pd.DataFrame({"x": [-1, -2, -3, -4], "y": [1, 2, 3, 4]}) 23 | 24 | z = pd.DataFrame( 25 | { 26 | "z": [1, 2, 3, 4], 27 | } 28 | ) 29 | 30 | times = pd.DataFrame( 31 | { 32 | "t": pd.to_datetime( 33 | [ 34 | "2018-08-19 00:00:00", 35 | "2018-08-20 00:00:00", 36 | "2018-08-21 00:00:00", 37 | "2018-08-22 00:00:00", 38 | "2018-08-23 00:00:00", 39 | "2018-08-23 00:00:05", 40 | ] 41 | ) 42 | } 43 | ) 44 | 45 | avgtimes = pd.DataFrame( 46 | { 47 | "t": pd.to_datetime( 48 | [ 49 | "2018-08-19 00:00:00", 50 | "2018-08-20 23:00:55", 51 | "2018-08-21 00:00:35", 52 | ] 53 | ) 54 | } 55 | ) 56 | 57 | def test_get_vertical_meta(self): 58 | meta = utils.get_vertical_attributes(self.z) 59 | 60 | assert meta == { 61 | "variables": { 62 | "z": { 63 | "attributes": { 64 | "actual_min": 1, 65 | "actual_max": 4, 66 | } 67 | }, 68 | }, 69 | "attributes": { 70 | "geospatial_vertical_min": 1, 71 | "geospatial_vertical_max": 4, 72 | "geospatial_vertical_units": "m", 73 | }, 74 | } 75 | 76 | def test_get_geospatial_meta(self): 77 | meta = utils.get_geographic_attributes(self.geo) 78 | 79 | assert meta == { 80 | "variables": { 81 | "y": { 82 | "attributes": { 83 | "actual_min": 1, 84 | "actual_max": 4, 85 | } 86 | }, 87 | "x": { 88 | "attributes": { 89 | "actual_min": -4, 90 | "actual_max": -1, 91 | } 92 | }, 93 | }, 94 | "attributes": { 95 | "geospatial_lat_min": 1.0, 96 | "geospatial_lat_max": 4.0, 97 | "geospatial_lon_min": -4.0, 98 | "geospatial_lon_max": -1.0, 99 | "geospatial_bbox": "POLYGON ((-1 1, -1 4, -4 4, -4 1, -1 1))", 100 | "geospatial_bounds": "LINESTRING (-1 1, -4 4)", 101 | "geospatial_bounds_crs": "EPSG:4326", 102 | }, 103 | } 104 | 105 | def test_get_temporal_meta_from_times_average(self): 106 | meta = utils.get_temporal_attributes(self.avgtimes) 107 | 108 | assert meta == { 109 | "variables": { 110 | "t": { 111 | "attributes": { 112 | "actual_min": "2018-08-19T00:00:00Z", 113 | "actual_max": "2018-08-21T00:00:35Z", 114 | } 115 | } 116 | }, 117 | "attributes": { 118 | "time_coverage_start": "2018-08-19T00:00:00Z", 119 | "time_coverage_end": "2018-08-21T00:00:35Z", 120 | "time_coverage_duration": "P2DT0H0M35S", 121 | "time_coverage_resolution": "P0DT16H0M12S", 122 | }, 123 | } 124 | 125 | def test_get_temporal_meta_from_times(self): 126 | meta = utils.get_temporal_attributes(self.times) 127 | 128 | assert meta == { 129 | "variables": { 130 | "t": { 131 | "attributes": { 132 | "actual_min": "2018-08-19T00:00:00Z", 133 | "actual_max": "2018-08-23T00:00:05Z", 134 | } 135 | } 136 | }, 137 | "attributes": { 138 | "time_coverage_start": "2018-08-19T00:00:00Z", 139 | "time_coverage_end": "2018-08-23T00:00:05Z", 140 | "time_coverage_duration": "P4DT0H0M5S", 141 | "time_coverage_resolution": "P1DT0H0M0S", 142 | }, 143 | } 144 | 145 | def test_get_creation(self): 146 | meta = utils.get_creation_attributes(history="DID THINGS") 147 | 148 | now = datetime.datetime.now(datetime.UTC).replace(tzinfo=pytz.utc) 149 | 150 | assert (now - dtparse(meta["attributes"]["date_created"])) < datetime.timedelta(minutes=1) 151 | assert (now - dtparse(meta["attributes"]["date_issued"])) < datetime.timedelta(minutes=1) 152 | assert (now - dtparse(meta["attributes"]["date_modified"])) < datetime.timedelta(minutes=1) 153 | assert "DID THINGS" in meta["attributes"]["history"] 154 | 155 | @ignore_invalid_value_cast 156 | def test_wrap_dateline(self): 157 | ncfile = os.path.join( 158 | os.path.dirname(os.path.dirname(__file__)), "resources/wrapping_dateline.nc" 159 | ) 160 | 161 | with CFDataset.load(ncfile) as ncd: 162 | axes = { 163 | "t": "time", 164 | "z": "z", 165 | "x": "lon", 166 | "y": "lat", 167 | } 168 | df = ncd.to_dataframe(axes=axes) 169 | 170 | meta = utils.get_geographic_attributes(df, axes=axes) 171 | 172 | assert meta == { 173 | "variables": { 174 | "lat": {"attributes": {"actual_min": 61.777, "actual_max": 67.068}}, 175 | "lon": {"attributes": {"actual_min": -179.966, "actual_max": 179.858}}, 176 | }, 177 | "attributes": { 178 | "geospatial_lat_min": 61.777, 179 | "geospatial_lat_max": 67.068, 180 | "geospatial_lon_min": -179.966, 181 | "geospatial_lon_max": 179.858, 182 | "geospatial_bbox": "POLYGON ((198.669 61.777, 198.669 67.068, 174.79200000000003 67.068, 174.79200000000003 61.777, 198.669 61.777))", 183 | "geospatial_bounds": "POLYGON ((174.79200000000003 61.777, 174.92599999999993 62.206, 178.812 64.098, 192.86 67.029, 196.86 67.068, 197.094 67.044, 198.669 66.861, 187.784 64.188, 179.10799999999995 62.266, 176.16899999999998 61.862, 174.79200000000003 61.777))", 184 | "geospatial_bounds_crs": "EPSG:4326", 185 | }, 186 | } 187 | 188 | def test_wrap_small_coords(self): 189 | geo = pd.DataFrame({"x": [-1, -2], "y": [1, 2]}) 190 | 191 | meta = utils.get_geographic_attributes(geo) 192 | 193 | assert meta == { 194 | "variables": { 195 | "y": { 196 | "attributes": { 197 | "actual_min": 1, 198 | "actual_max": 2, 199 | } 200 | }, 201 | "x": { 202 | "attributes": { 203 | "actual_min": -2, 204 | "actual_max": -1, 205 | } 206 | }, 207 | }, 208 | "attributes": { 209 | "geospatial_lat_min": 1, 210 | "geospatial_lat_max": 2, 211 | "geospatial_lon_min": -2, 212 | "geospatial_lon_max": -1, 213 | "geospatial_bbox": "POLYGON ((-1 1, -1 2, -2 2, -2 1, -1 1))", 214 | "geospatial_bounds": "LINESTRING (-1 1, -2 2)", 215 | "geospatial_bounds_crs": "EPSG:4326", 216 | }, 217 | } 218 | 219 | def test_wrap_same_coords(self): 220 | geo = pd.DataFrame({"x": [-1, -1, -1], "y": [1, 1, 1]}) 221 | 222 | meta = utils.get_geographic_attributes(geo) 223 | 224 | assert meta == { 225 | "variables": { 226 | "y": { 227 | "attributes": { 228 | "actual_min": 1, 229 | "actual_max": 1, 230 | } 231 | }, 232 | "x": { 233 | "attributes": { 234 | "actual_min": -1, 235 | "actual_max": -1, 236 | } 237 | }, 238 | }, 239 | "attributes": { 240 | "geospatial_lat_min": 1, 241 | "geospatial_lat_max": 1, 242 | "geospatial_lon_min": -1, 243 | "geospatial_lon_max": -1, 244 | "geospatial_bbox": "POLYGON ((-1 1, -1 1, -1 1, -1 1))", 245 | "geospatial_bounds": "POINT (-1 1)", 246 | "geospatial_bounds_crs": "EPSG:4326", 247 | }, 248 | } 249 | -------------------------------------------------------------------------------- /pocean/tests/dsg/timeseries/test_timeseries_im.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/pocean/tests/dsg/timeseries/test_timeseries_im.py -------------------------------------------------------------------------------- /pocean/tests/dsg/timeseries/test_timeseries_om.py: -------------------------------------------------------------------------------- 1 | #!python 2 | import logging 3 | import os 4 | import tempfile 5 | import unittest 6 | 7 | import numpy as np 8 | import pytest 9 | 10 | from pocean import logger 11 | from pocean.dsg import OrthogonalMultidimensionalTimeseries 12 | from pocean.tests.dsg.test_new import test_is_mine 13 | 14 | # RuntimeWarning: invalid value encountered in cast is fine here. 15 | ignore_invalid_value_cast = pytest.mark.filterwarnings("ignore::RuntimeWarning") 16 | 17 | logger.level = logging.INFO 18 | logger.handlers = [logging.StreamHandler()] 19 | 20 | 21 | class TestOrthogonalMultidimensionalTimeseries(unittest.TestCase): 22 | def setUp(self): 23 | self.single = os.path.join(os.path.dirname(__file__), "resources", "tt.nc") 24 | self.multi = os.path.join(os.path.dirname(__file__), "resources", "om-multiple.nc") 25 | self.ph = np.ma.array( 26 | [ 27 | 8.1080176, 28 | 8.11740265, 29 | 8.11924184, 30 | 8.11615471, 31 | 8.11445695, 32 | 8.11600021, 33 | 8.11903291, 34 | 8.1187229, 35 | 8.105218, 36 | 8.10998784, 37 | 8.10715445, 38 | 8.10530323, 39 | 8.11167052, 40 | 8.11142766, 41 | 8.10897461, 42 | 8.08827717, 43 | 8.11343609, 44 | 8.11746859, 45 | 8.12326458, 46 | 8.11770947, 47 | 8.09127117, 48 | 8.10770576, 49 | 8.10252467, 50 | 8.10252874, 51 | ] 52 | ) 53 | 54 | def test_omp_load(self): 55 | OrthogonalMultidimensionalTimeseries(self.single).close() 56 | OrthogonalMultidimensionalTimeseries(self.multi).close() 57 | 58 | @ignore_invalid_value_cast 59 | def test_timeseries_omt_dataframe_single(self): 60 | fid, single_tmp = tempfile.mkstemp(suffix=".nc") 61 | with OrthogonalMultidimensionalTimeseries(self.single) as s: 62 | df = s.to_dataframe() 63 | with OrthogonalMultidimensionalTimeseries.from_dataframe(df, single_tmp) as result_ncd: 64 | assert "station" in result_ncd.dimensions 65 | assert np.ma.allclose(result_ncd.variables["pH"][:].flatten(), self.ph) 66 | test_is_mine(OrthogonalMultidimensionalTimeseries, single_tmp) # Try to load it again 67 | os.close(fid) 68 | os.remove(single_tmp) 69 | 70 | def test_timeseries_omt_dataframe_multi(self): 71 | fid, single_tmp = tempfile.mkstemp(suffix=".nc") 72 | with OrthogonalMultidimensionalTimeseries(self.multi) as s: 73 | df = s.to_dataframe() 74 | with OrthogonalMultidimensionalTimeseries.from_dataframe(df, single_tmp) as result_ncd: 75 | assert "station" in result_ncd.dimensions 76 | assert np.ma.allclose( 77 | result_ncd.variables["temperature"][0, 0:7].flatten(), 78 | [18.61804, 13.2165, 39.30018, 17.00865, 24.95154, 35.99525, 24.33436], 79 | ) 80 | test_is_mine(OrthogonalMultidimensionalTimeseries, single_tmp) # Try to load it again 81 | os.close(fid) 82 | os.remove(single_tmp) 83 | 84 | @ignore_invalid_value_cast 85 | def test_timeseries_omt_dataframe_unique_dims(self): 86 | fid, single_tmp = tempfile.mkstemp(suffix=".nc") 87 | with OrthogonalMultidimensionalTimeseries(self.single) as s: 88 | df = s.to_dataframe() 89 | with OrthogonalMultidimensionalTimeseries.from_dataframe( 90 | df, single_tmp, unique_dims=True 91 | ) as result_ncd: 92 | assert "station_dim" in result_ncd.dimensions 93 | assert np.ma.allclose(result_ncd.variables["pH"][:].flatten(), self.ph) 94 | test_is_mine(OrthogonalMultidimensionalTimeseries, single_tmp) # Try to load it again 95 | os.close(fid) 96 | os.remove(single_tmp) 97 | 98 | @ignore_invalid_value_cast 99 | def test_timeseries_omt_reduce_dims(self): 100 | fid, single_tmp = tempfile.mkstemp(suffix=".nc") 101 | with OrthogonalMultidimensionalTimeseries(self.single) as s: 102 | df = s.to_dataframe() 103 | with OrthogonalMultidimensionalTimeseries.from_dataframe( 104 | df, single_tmp, reduce_dims=True 105 | ) as result_ncd: 106 | assert "station" not in result_ncd.dimensions 107 | assert np.ma.allclose(result_ncd.variables["pH"][:].flatten(), self.ph) 108 | test_is_mine(OrthogonalMultidimensionalTimeseries, single_tmp) # Try to load it again 109 | os.close(fid) 110 | os.remove(single_tmp) 111 | 112 | @ignore_invalid_value_cast 113 | def test_timeseries_omt_no_z(self): 114 | fid, single_tmp = tempfile.mkstemp(suffix=".nc") 115 | with OrthogonalMultidimensionalTimeseries(self.single) as s: 116 | df = s.to_dataframe() 117 | axes = {"z": None} 118 | df.drop(columns=["z"], inplace=True) 119 | with OrthogonalMultidimensionalTimeseries.from_dataframe( 120 | df, 121 | single_tmp, 122 | axes=axes, 123 | ) as result_ncd: 124 | assert "station" in result_ncd.dimensions 125 | assert "z" not in result_ncd.variables 126 | assert np.ma.allclose(result_ncd.variables["pH"][:].flatten(), self.ph) 127 | test_is_mine(OrthogonalMultidimensionalTimeseries, single_tmp) # Try to load it again 128 | os.close(fid) 129 | os.remove(single_tmp) 130 | 131 | @ignore_invalid_value_cast 132 | def test_timeseries_omt_no_z_no_station(self): 133 | fid, single_tmp = tempfile.mkstemp(suffix=".nc") 134 | with OrthogonalMultidimensionalTimeseries(self.single) as s: 135 | df = s.to_dataframe() 136 | axes = {"z": None} 137 | df.drop(columns=["z"], inplace=True) 138 | with OrthogonalMultidimensionalTimeseries.from_dataframe( 139 | df, single_tmp, axes=axes, reduce_dims=True 140 | ) as result_ncd: 141 | assert "station" not in result_ncd.dimensions 142 | assert "z" not in result_ncd.variables 143 | assert np.ma.allclose(result_ncd.variables["pH"][:].flatten(), self.ph) 144 | test_is_mine(OrthogonalMultidimensionalTimeseries, single_tmp) # Try to load it again 145 | os.close(fid) 146 | os.remove(single_tmp) 147 | 148 | @ignore_invalid_value_cast 149 | def test_supplying_attributes(self): 150 | fid, single_tmp = tempfile.mkstemp(suffix=".nc") 151 | 152 | attrs = { 153 | "y": { 154 | "_CoordinateAxisType": "Lat", 155 | "_FillValue": -9999.9, 156 | "missing_value": -9999.9, 157 | } 158 | } 159 | 160 | with OrthogonalMultidimensionalTimeseries(self.single) as s: 161 | df = s.to_dataframe() 162 | with OrthogonalMultidimensionalTimeseries.from_dataframe( 163 | df, single_tmp, attributes=attrs 164 | ) as result_ncd: 165 | assert "station" in result_ncd.dimensions 166 | assert result_ncd.variables["y"]._CoordinateAxisType == "Lat" 167 | with self.assertRaises(AttributeError): 168 | result_ncd.variables["y"].missing_value 169 | with self.assertRaises(AttributeError): 170 | result_ncd.variables["y"]._FillValue 171 | 172 | test_is_mine(OrthogonalMultidimensionalTimeseries, single_tmp) # Try to load it again 173 | os.close(fid) 174 | os.remove(single_tmp) 175 | -------------------------------------------------------------------------------- /pocean/tests/dsg/timeseriesProfile/test_timeseriesProfile_im.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/pocean/tests/dsg/timeseriesProfile/test_timeseriesProfile_im.py -------------------------------------------------------------------------------- /pocean/tests/dsg/timeseriesProfile/test_timeseriesProfile_r.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import tempfile 4 | import unittest 5 | from datetime import datetime 6 | 7 | import netCDF4 as nc4 8 | import pandas as pd 9 | import pytest 10 | from numpy.testing import assert_array_equal as npeq 11 | 12 | from pocean import logger 13 | from pocean.cf import CFDataset 14 | from pocean.dsg import RaggedTimeseriesProfile 15 | from pocean.tests.dsg.test_new import test_is_mine 16 | 17 | logger.level = logging.INFO 18 | logger.handlers = [logging.StreamHandler()] 19 | 20 | # RuntimeWarning: invalid value encountered in cast is fine here. 21 | ignore_invalid_value_cast = pytest.mark.filterwarnings("ignore::RuntimeWarning") 22 | 23 | 24 | class TestRaggedTimeseriesProfile(unittest.TestCase): 25 | def test_csv_to_nc_single(self): 26 | filepath = os.path.join(os.path.dirname(__file__), "resources", "r-single.csv") 27 | 28 | df = pd.read_csv(filepath) 29 | fid, tmpfile = tempfile.mkstemp(suffix=".nc") 30 | 31 | axes = {"t": "time", "x": "lon", "y": "lat", "z": "z"} 32 | 33 | df.time = pd.to_datetime(df.time) 34 | 35 | CFDataset.default_time_unit = "hours since 2003-01-01 00:00:00Z" 36 | 37 | with RaggedTimeseriesProfile.from_dataframe(df, tmpfile, axes=axes) as result_ncd: 38 | assert "station" in result_ncd.dimensions 39 | assert result_ncd.dimensions["station"].size == 1 40 | assert "profile" in result_ncd.dimensions 41 | assert result_ncd.dimensions["profile"].size == 1 42 | 43 | check_vars = ["z", "t090C", "SP", "SA", "SR", "CT", "sigma0_CT"] 44 | for v in check_vars: 45 | npeq(result_ncd.variables[v][:], df[v].values) 46 | 47 | assert result_ncd.variables["station"][0] == df.station.iloc[0] == "CH2" 48 | assert result_ncd.variables["profile"][0] == df.profile.iloc[0] == "030617B" 49 | assert result_ncd.variables["lat"].size == 1 50 | assert result_ncd.variables["lat"].ndim == 1 # Not reduced 51 | assert result_ncd.variables["lat"][0] == df.lat.iloc[0] == 33.558 52 | assert result_ncd.variables["lon"].size == 1 53 | assert result_ncd.variables["lon"].ndim == 1 # Not reduced 54 | assert result_ncd.variables["lon"][0] == df.lon.iloc[0] == -118.405 55 | 56 | assert result_ncd.variables["time"].units == "hours since 2003-01-01 00:00:00Z" 57 | assert result_ncd.variables["time"][0] == nc4.date2num( 58 | datetime(2003, 6, 17, 10, 32, 0), units=result_ncd.variables["time"].units 59 | ) 60 | 61 | assert RaggedTimeseriesProfile.is_mine(result_ncd, strict=True) 62 | 63 | os.close(fid) 64 | os.remove(tmpfile) 65 | 66 | def test_csv_to_nc_multi(self): 67 | filepath = os.path.join(os.path.dirname(__file__), "resources", "r-multi.csv") 68 | 69 | df = pd.read_csv(filepath) 70 | fid, tmpfile = tempfile.mkstemp(suffix=".nc") 71 | 72 | axes = {"t": "time", "x": "lon", "y": "lat", "z": "z"} 73 | 74 | df.time = pd.to_datetime(df.time) 75 | 76 | with RaggedTimeseriesProfile.from_dataframe(df, tmpfile, axes=axes) as result_ncd: 77 | assert "station" in result_ncd.dimensions 78 | assert result_ncd.dimensions["station"].size == 2 79 | assert "profile" in result_ncd.dimensions 80 | assert result_ncd.dimensions["profile"].size == 5 81 | 82 | check_vars = ["z", "salinity", "sigma0"] 83 | for v in check_vars: 84 | npeq(result_ncd.variables[v][:], df[v].values) 85 | 86 | npeq(result_ncd.variables["station"][:], ["CN1", "CN2"]) 87 | npeq( 88 | result_ncd.variables["profile"][:], 89 | ["030312B", "030617B", "030702B", "030814B", "031216C"], 90 | ) 91 | assert result_ncd.variables["profile"][0] == df.profile.iloc[0] == "030312B" 92 | assert result_ncd.variables["lat"].size == 2 93 | assert result_ncd.variables["lat"].ndim == 1 # Not reduced 94 | assert result_ncd.variables["lat"][0] == df.lat.iloc[0] == 33.5 95 | assert result_ncd.variables["lon"].size == 2 96 | assert result_ncd.variables["lon"].ndim == 1 # Not reduced 97 | assert result_ncd.variables["lon"][0] == df.lon.iloc[0] == -118.4 98 | 99 | npeq(result_ncd.variables["stationIndex"][:], [0, 0, 1, 0, 1]) 100 | 101 | npeq(result_ncd.variables["rowSize"][:], [844, 892, 893, 893, 891]) 102 | 103 | assert result_ncd.variables["time"][0] == nc4.date2num( 104 | datetime(2013, 3, 12, 10, 19, 6), units=result_ncd.variables["time"].units 105 | ) 106 | assert RaggedTimeseriesProfile.is_mine(result_ncd, strict=True) 107 | 108 | os.close(fid) 109 | os.remove(tmpfile) 110 | 111 | def test_csv_to_nc_single_timezones(self): 112 | filepath = os.path.join(os.path.dirname(__file__), "resources", "r-single.csv") 113 | 114 | df = pd.read_csv(filepath) 115 | fid, tmpfile = tempfile.mkstemp(suffix=".nc") 116 | 117 | axes = {"t": "time", "x": "lon", "y": "lat", "z": "z"} 118 | 119 | df.time = pd.to_datetime(df.time) 120 | df.time = df.time.dt.tz_localize("UTC") 121 | 122 | with RaggedTimeseriesProfile.from_dataframe(df, tmpfile, axes=axes) as result_ncd: 123 | assert "station" in result_ncd.dimensions 124 | assert result_ncd.dimensions["station"].size == 1 125 | assert "profile" in result_ncd.dimensions 126 | assert result_ncd.dimensions["profile"].size == 1 127 | 128 | check_vars = ["z", "t090C", "SP", "SA", "SR", "CT", "sigma0_CT"] 129 | for v in check_vars: 130 | npeq(result_ncd.variables[v][:], df[v].values) 131 | 132 | assert result_ncd.variables["station"][0] == df.station.iloc[0] == "CH2" 133 | assert result_ncd.variables["profile"][0] == df.profile.iloc[0] == "030617B" 134 | assert result_ncd.variables["lat"].size == 1 135 | assert result_ncd.variables["lat"].ndim == 1 # Not reduced 136 | assert result_ncd.variables["lat"][0] == df.lat.iloc[0] == 33.558 137 | assert result_ncd.variables["lon"].size == 1 138 | assert result_ncd.variables["lon"].ndim == 1 # Not reduced 139 | assert result_ncd.variables["lon"][0] == df.lon.iloc[0] == -118.405 140 | 141 | assert result_ncd.variables["time"][0] == nc4.date2num( 142 | datetime(2003, 6, 17, 10, 32, 0), units=result_ncd.variables["time"].units 143 | ) 144 | 145 | assert RaggedTimeseriesProfile.is_mine(result_ncd, strict=True) 146 | 147 | os.close(fid) 148 | os.remove(tmpfile) 149 | 150 | def test_csv_to_nc_single_reduce(self): 151 | filepath = os.path.join(os.path.dirname(__file__), "resources", "r-single.csv") 152 | 153 | df = pd.read_csv(filepath) 154 | fid, tmpfile = tempfile.mkstemp(suffix=".nc") 155 | 156 | axes = {"t": "time", "x": "lon", "y": "lat", "z": "z"} 157 | 158 | df.time = pd.to_datetime(df.time) 159 | 160 | with RaggedTimeseriesProfile.from_dataframe( 161 | df, tmpfile, axes=axes, reduce_dims=True 162 | ) as result_ncd: 163 | assert "station" not in result_ncd.dimensions 164 | assert "profile" in result_ncd.dimensions 165 | assert result_ncd.dimensions["profile"].size == 1 166 | 167 | check_vars = ["z", "t090C", "SP", "SA", "SR", "CT", "sigma0_CT"] 168 | for v in check_vars: 169 | npeq(result_ncd.variables[v][:], df[v].values) 170 | 171 | assert result_ncd.variables["station"][0] == df.station.iloc[0] == "CH2" 172 | assert result_ncd.variables["profile"][0] == df.profile.iloc[0] == "030617B" 173 | assert result_ncd.variables["lat"].size == 1 174 | assert result_ncd.variables["lat"].ndim == 0 # Reduced to 0 175 | assert result_ncd.variables["lat"][0] == df.lat.iloc[0] == 33.558 176 | assert result_ncd.variables["lon"].size == 1 177 | assert result_ncd.variables["lon"].ndim == 0 # Reduced to 0 178 | assert result_ncd.variables["lon"][0] == df.lon.iloc[0] == -118.405 179 | 180 | assert RaggedTimeseriesProfile.is_mine(result_ncd, strict=True) 181 | 182 | os.close(fid) 183 | os.remove(tmpfile) 184 | 185 | @ignore_invalid_value_cast 186 | def test_rtp_single(self): 187 | filepath = os.path.join(os.path.dirname(__file__), "resources", "r-ctd-single.nc") 188 | 189 | with RaggedTimeseriesProfile(filepath) as ncd: 190 | fid, tmpfile = tempfile.mkstemp(suffix=".nc") 191 | df = ncd.to_dataframe(clean_rows=False) 192 | 193 | with RaggedTimeseriesProfile.from_dataframe(df, tmpfile) as result_ncd: 194 | assert "station" in result_ncd.dimensions 195 | test_is_mine(RaggedTimeseriesProfile, tmpfile) # Try to load it again 196 | 197 | with RaggedTimeseriesProfile.from_dataframe( 198 | df, tmpfile, unique_dims=True 199 | ) as result_ncd: 200 | assert "station_dim" in result_ncd.dimensions 201 | test_is_mine(RaggedTimeseriesProfile, tmpfile) # Try to load it again 202 | 203 | with RaggedTimeseriesProfile.from_dataframe( 204 | df, tmpfile, reduce_dims=True 205 | ) as result_ncd: 206 | # Even though we pass reduce_dims, there are two stations so it is not reduced 207 | assert "station" not in result_ncd.dimensions 208 | assert "profile" in result_ncd.dimensions 209 | test_is_mine(RaggedTimeseriesProfile, tmpfile) # Try to load it again 210 | 211 | with RaggedTimeseriesProfile.from_dataframe(df, tmpfile, unlimited=True) as result_ncd: 212 | assert "station" in result_ncd.dimensions 213 | assert "profile" in result_ncd.dimensions 214 | assert result_ncd.dimensions["obs"].isunlimited() is True 215 | test_is_mine(RaggedTimeseriesProfile, tmpfile) # Try to load it again 216 | 217 | with RaggedTimeseriesProfile.from_dataframe( 218 | df, tmpfile, reduce_dims=True, unlimited=True 219 | ) as result_ncd: 220 | assert "station" not in result_ncd.dimensions 221 | assert "profile" in result_ncd.dimensions 222 | assert result_ncd.dimensions["obs"].isunlimited() is True 223 | test_is_mine(RaggedTimeseriesProfile, tmpfile) # Try to load it again 224 | 225 | with RaggedTimeseriesProfile.from_dataframe( 226 | df, tmpfile, unique_dims=True, reduce_dims=False, unlimited=True 227 | ) as result_ncd: 228 | assert "station_dim" in result_ncd.dimensions 229 | assert "profile_dim" in result_ncd.dimensions 230 | assert result_ncd.dimensions["obs_dim"].isunlimited() is True 231 | test_is_mine(RaggedTimeseriesProfile, tmpfile) # Try to load it again 232 | 233 | os.close(fid) 234 | os.remove(tmpfile) 235 | -------------------------------------------------------------------------------- /pocean/tests/dsg/trajectory/test_trajectory_cr.py: -------------------------------------------------------------------------------- 1 | #!python 2 | import logging 3 | import os 4 | import tempfile 5 | import unittest 6 | from os.path import dirname as dn 7 | from os.path import join as jn 8 | 9 | import pytest 10 | 11 | from pocean import logger 12 | from pocean.dsg import ContiguousRaggedTrajectory, get_calculated_attributes 13 | from pocean.tests.dsg.test_new import test_is_mine 14 | 15 | logger.level = logging.INFO 16 | logger.handlers = [logging.StreamHandler()] 17 | 18 | # RuntimeWarning: invalid value encountered in cast is fine here. 19 | ignore_invalid_value_cast = pytest.mark.filterwarnings("ignore::RuntimeWarning") 20 | 21 | 22 | @pytest.mark.parametrize( 23 | "fp", 24 | [ 25 | # jn(dn(__file__), 'resources', 'cr-single.nc'), 26 | jn(dn(__file__), "resources", "cr-multiple.nc"), 27 | jn(dn(__file__), "resources", "cr-oot-A.nc"), 28 | jn(dn(__file__), "resources", "cr-oot-B.nc"), 29 | ], 30 | ) 31 | def test_crt_load(fp): 32 | test_is_mine(ContiguousRaggedTrajectory, fp) 33 | 34 | 35 | class TestContiguousRaggedTrajectory(unittest.TestCase): 36 | def setUp(self): 37 | self.multi = jn(dn(__file__), "resources", "cr-multiple.nc") 38 | self.oot_A = jn(dn(__file__), "resources", "cr-oot-A.nc") 39 | self.oot_B = jn(dn(__file__), "resources", "cr-oot-B.nc") 40 | 41 | def test_crt_dataframe_multiple(self): 42 | axes = { 43 | "t": "time", 44 | "x": "lon", 45 | "y": "lat", 46 | "z": "z", 47 | } 48 | fid, tmpnc = tempfile.mkstemp(suffix=".nc") 49 | with ContiguousRaggedTrajectory(self.multi) as ncd: 50 | df = ncd.to_dataframe(axes=axes) 51 | with ContiguousRaggedTrajectory.from_dataframe(df, tmpnc, axes=axes) as result_ncd: 52 | assert "trajectory" in result_ncd.dimensions 53 | test_is_mine(ContiguousRaggedTrajectory, tmpnc) # Try to load it again 54 | os.close(fid) 55 | os.remove(tmpnc) 56 | 57 | def test_crt_dataframe_multiple_unique_dims(self): 58 | axes = { 59 | "t": "time", 60 | "x": "lon", 61 | "y": "lat", 62 | "z": "z", 63 | } 64 | fid, tmpnc = tempfile.mkstemp(suffix=".nc") 65 | with ContiguousRaggedTrajectory(self.multi) as ncd: 66 | df = ncd.to_dataframe(axes=axes) 67 | with ContiguousRaggedTrajectory.from_dataframe( 68 | df, tmpnc, axes=axes, unique_dims=True 69 | ) as result_ncd: 70 | assert "trajectory_dim" in result_ncd.dimensions 71 | test_is_mine(ContiguousRaggedTrajectory, tmpnc) # Try to load it again 72 | os.close(fid) 73 | os.remove(tmpnc) 74 | 75 | def test_crt_dataframe_unlimited_dim(self): 76 | axes = { 77 | "t": "time", 78 | "x": "lon", 79 | "y": "lat", 80 | "z": "z", 81 | } 82 | fid, tmpnc = tempfile.mkstemp(suffix=".nc") 83 | with ContiguousRaggedTrajectory(self.multi) as ncd: 84 | df = ncd.to_dataframe(axes=axes) 85 | with ContiguousRaggedTrajectory.from_dataframe( 86 | df, tmpnc, axes=axes, unlimited=True, unique_dims=True 87 | ) as result_ncd: 88 | assert "trajectory_dim" in result_ncd.dimensions 89 | assert "obs_dim" in result_ncd.dimensions 90 | assert result_ncd.dimensions["obs_dim"].isunlimited() is True 91 | test_is_mine(ContiguousRaggedTrajectory, tmpnc) # Try to load it again 92 | os.close(fid) 93 | os.remove(tmpnc) 94 | 95 | @ignore_invalid_value_cast 96 | def test_crt_dataframe_oot_A(self): 97 | axes = {"t": "time", "x": "lon", "y": "lat", "z": "depth", "sample": "sample"} 98 | fid, tmpnc = tempfile.mkstemp(suffix=".nc") 99 | with ContiguousRaggedTrajectory(self.oot_A) as ncd: 100 | df = ncd.to_dataframe(axes=axes) 101 | df = df.sort_values(["trajectory", "time"]) 102 | attrs = get_calculated_attributes(df, axes=axes) 103 | 104 | with ContiguousRaggedTrajectory.from_dataframe( 105 | df, tmpnc, axes=axes, mode="a" 106 | ) as result_ncd: 107 | assert "sample" in result_ncd.dimensions 108 | assert result_ncd.dimensions["sample"].size == 6610 109 | assert "trajectory" in result_ncd.dimensions 110 | # This is removing null trajectories that have no data. Not much to do about this 111 | # because there is no way to store this empty trajectory in a dataframe. 112 | assert result_ncd.dimensions["trajectory"].size == 507 113 | result_ncd.apply_meta(attrs) 114 | 115 | test_is_mine(ContiguousRaggedTrajectory, tmpnc) # Try to load it again 116 | 117 | os.close(fid) 118 | os.remove(tmpnc) 119 | 120 | @ignore_invalid_value_cast 121 | def test_crt_dataframe_oot_B(self): 122 | axes = { 123 | "t": "time", 124 | "x": "lon", 125 | "y": "lat", 126 | "z": "depth", 127 | } 128 | fid, tmpnc = tempfile.mkstemp(suffix=".nc") 129 | with ContiguousRaggedTrajectory(self.oot_B) as ncd: 130 | df = ncd.to_dataframe(axes=axes) 131 | df = df.sort_values(["trajectory", "time"]) 132 | attrs = get_calculated_attributes(df, axes=axes) 133 | 134 | with ContiguousRaggedTrajectory.from_dataframe( 135 | df, tmpnc, axes=axes, mode="a" 136 | ) as result_ncd: 137 | assert "obs" in result_ncd.dimensions 138 | assert result_ncd.dimensions["obs"].size == 64116 139 | assert "trajectory" in result_ncd.dimensions 140 | # This is removing null trajectories that have no data. Not much to do about this 141 | # because there is no way to store this empty trajectory in a dataframe. 142 | assert result_ncd.dimensions["trajectory"].size == 1000 143 | result_ncd.apply_meta(attrs) 144 | 145 | test_is_mine(ContiguousRaggedTrajectory, tmpnc) # Try to load it again 146 | 147 | os.close(fid) 148 | os.remove(tmpnc) 149 | -------------------------------------------------------------------------------- /pocean/tests/dsg/trajectory/test_trajectory_im.py: -------------------------------------------------------------------------------- 1 | #!python 2 | import logging 3 | import os 4 | import tempfile 5 | import unittest 6 | 7 | import numpy as np 8 | import pytest 9 | from dateutil.parser import parse as dtparse 10 | 11 | from pocean import logger 12 | from pocean.cf import CFDataset 13 | from pocean.dsg import IncompleteMultidimensionalTrajectory 14 | from pocean.tests.dsg.test_new import test_is_mine 15 | 16 | logger.level = logging.INFO 17 | logger.handlers = [logging.StreamHandler()] 18 | 19 | # RuntimeWarning: invalid value encountered in cast is fine here. 20 | ignore_invalid_value_cast = pytest.mark.filterwarnings("ignore::RuntimeWarning") 21 | 22 | 23 | class TestIncompleteMultidimensionalTrajectory(unittest.TestCase): 24 | @ignore_invalid_value_cast 25 | def test_im_single_row(self): 26 | filepath = os.path.join(os.path.dirname(__file__), "resources", "im-singlerow.nc") 27 | 28 | with IncompleteMultidimensionalTrajectory(filepath) as s: 29 | df = s.to_dataframe(clean_rows=True) 30 | assert len(df) == 1 31 | 32 | def test_imt_multi(self): 33 | filepath = os.path.join(os.path.dirname(__file__), "resources", "im-multiple.nc") 34 | 35 | CFDataset.load(filepath).close() 36 | 37 | with IncompleteMultidimensionalTrajectory(filepath) as ncd: 38 | fid, tmpfile = tempfile.mkstemp(suffix=".nc") 39 | df = ncd.to_dataframe(clean_rows=False) 40 | 41 | with IncompleteMultidimensionalTrajectory.from_dataframe(df, tmpfile) as result_ncd: 42 | assert "trajectory" in result_ncd.dimensions 43 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again 44 | 45 | with IncompleteMultidimensionalTrajectory.from_dataframe( 46 | df, tmpfile, unique_dims=True 47 | ) as result_ncd: 48 | assert "trajectory_dim" in result_ncd.dimensions 49 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again 50 | 51 | with IncompleteMultidimensionalTrajectory.from_dataframe( 52 | df, tmpfile, reduce_dims=True 53 | ) as result_ncd: 54 | # Could not reduce dims since there was more than one trajectory 55 | assert "trajectory" in result_ncd.dimensions 56 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again 57 | 58 | with IncompleteMultidimensionalTrajectory.from_dataframe( 59 | df, tmpfile, unlimited=True 60 | ) as result_ncd: 61 | assert result_ncd.dimensions["obs"].isunlimited() is True 62 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again 63 | 64 | with IncompleteMultidimensionalTrajectory.from_dataframe( 65 | df, tmpfile, reduce_dims=True, unlimited=True 66 | ) as result_ncd: 67 | # Could not reduce dims since there was more than one trajectory 68 | assert "trajectory" in result_ncd.dimensions 69 | assert result_ncd.dimensions["obs"].isunlimited() is True 70 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again 71 | 72 | with IncompleteMultidimensionalTrajectory.from_dataframe( 73 | df, tmpfile, unique_dims=True, reduce_dims=True, unlimited=True 74 | ) as result_ncd: 75 | # Could not reduce dims since there was more than one trajectory 76 | assert "trajectory_dim" in result_ncd.dimensions 77 | assert result_ncd.dimensions["obs_dim"].isunlimited() is True 78 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again 79 | 80 | os.close(fid) 81 | os.remove(tmpfile) 82 | 83 | @ignore_invalid_value_cast 84 | def test_imt_multi_not_string(self): 85 | filepath = os.path.join(os.path.dirname(__file__), "resources", "im-multiple-nonstring.nc") 86 | 87 | CFDataset.load(filepath).close() 88 | 89 | with IncompleteMultidimensionalTrajectory(filepath) as ncd: 90 | fid, tmpfile = tempfile.mkstemp(suffix=".nc") 91 | df = ncd.to_dataframe(clean_rows=False) 92 | 93 | with IncompleteMultidimensionalTrajectory.from_dataframe(df, tmpfile) as result_ncd: 94 | assert "trajectory" in result_ncd.dimensions 95 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again 96 | 97 | with IncompleteMultidimensionalTrajectory.from_dataframe( 98 | df, tmpfile, reduce_dims=True 99 | ) as result_ncd: 100 | # Could not reduce dims since there was more than one trajectory 101 | assert "trajectory" not in result_ncd.dimensions 102 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again 103 | 104 | with IncompleteMultidimensionalTrajectory.from_dataframe( 105 | df, tmpfile, unlimited=True 106 | ) as result_ncd: 107 | assert result_ncd.dimensions["obs"].isunlimited() is True 108 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again 109 | 110 | with IncompleteMultidimensionalTrajectory.from_dataframe( 111 | df, tmpfile, reduce_dims=True, unlimited=True 112 | ) as result_ncd: 113 | # Could not reduce dims since there was more than one trajectory 114 | assert "trajectory" not in result_ncd.dimensions 115 | assert result_ncd.dimensions["obs"].isunlimited() is True 116 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again 117 | 118 | os.close(fid) 119 | os.remove(tmpfile) 120 | 121 | def test_imt_single(self): 122 | filepath = os.path.join(os.path.dirname(__file__), "resources", "im-single.nc") 123 | 124 | CFDataset.load(filepath).close() 125 | 126 | with IncompleteMultidimensionalTrajectory(filepath) as ncd: 127 | fid, tmpfile = tempfile.mkstemp(suffix=".nc") 128 | df = ncd.to_dataframe(clean_rows=False) 129 | 130 | with IncompleteMultidimensionalTrajectory.from_dataframe(df, tmpfile) as result_ncd: 131 | assert "trajectory" in result_ncd.dimensions 132 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again 133 | 134 | with IncompleteMultidimensionalTrajectory.from_dataframe( 135 | df, tmpfile, reduce_dims=True 136 | ) as result_ncd: 137 | # Reduced trajectory dimension 138 | assert "trajectory" not in result_ncd.dimensions 139 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again 140 | 141 | with IncompleteMultidimensionalTrajectory.from_dataframe( 142 | df, tmpfile, unlimited=True 143 | ) as result_ncd: 144 | # Reduced trajectory dimension 145 | assert result_ncd.dimensions["obs"].isunlimited() is True 146 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again 147 | 148 | with IncompleteMultidimensionalTrajectory.from_dataframe( 149 | df, tmpfile, reduce_dims=True, unlimited=True 150 | ) as result_ncd: 151 | # Reduced trajectory dimension 152 | assert "trajectory" not in result_ncd.dimensions 153 | assert result_ncd.dimensions["obs"].isunlimited() is True 154 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again 155 | 156 | os.close(fid) 157 | os.remove(tmpfile) 158 | 159 | def test_imt_change_axis_names(self): 160 | new_axis = {"t": "time", "x": "lon", "y": "lat", "z": "depth"} 161 | 162 | filepath = os.path.join(os.path.dirname(__file__), "resources", "im-multiple.nc") 163 | with IncompleteMultidimensionalTrajectory(filepath) as ncd: 164 | fid, tmpfile = tempfile.mkstemp(suffix=".nc") 165 | df = ncd.to_dataframe(clean_rows=False, axes=new_axis) 166 | 167 | with IncompleteMultidimensionalTrajectory.from_dataframe( 168 | df, tmpfile, axes=new_axis 169 | ) as result_ncd: 170 | assert "trajectory" in result_ncd.dimensions 171 | assert "time" in result_ncd.variables 172 | assert "lon" in result_ncd.variables 173 | assert "lat" in result_ncd.variables 174 | assert "depth" in result_ncd.variables 175 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again 176 | 177 | os.close(fid) 178 | os.remove(tmpfile) 179 | 180 | def test_imt_calculated_metadata_single(self): 181 | filepath = os.path.join(os.path.dirname(__file__), "resources", "im-single.nc") 182 | 183 | with IncompleteMultidimensionalTrajectory(filepath) as ncd: 184 | s = ncd.calculated_metadata() 185 | assert s.min_t.round("s") == dtparse("1990-01-01 00:00:00") 186 | assert s.max_t.round("s") == dtparse("1990-01-05 03:00:00") 187 | traj1 = s.trajectories["Trajectory1"] 188 | assert traj1.min_z == 0 189 | assert traj1.max_z == 99 190 | assert traj1.min_t.round("s") == dtparse("1990-01-01 00:00:00") 191 | assert traj1.max_t.round("s") == dtparse("1990-01-05 03:00:00") 192 | first_loc = traj1.geometry.coords[0] 193 | assert np.isclose(first_loc[0], -7.9336) 194 | assert np.isclose(first_loc[1], 42.00339) 195 | 196 | def test_imt_calculated_metadata_multi(self): 197 | filepath = os.path.join(os.path.dirname(__file__), "resources", "im-multiple.nc") 198 | 199 | with IncompleteMultidimensionalTrajectory(filepath) as ncd: 200 | m = ncd.calculated_metadata() 201 | assert m.min_t == dtparse("1990-01-01 00:00:00") 202 | assert m.max_t == dtparse("1990-01-02 12:00:00") 203 | assert len(m.trajectories) == 4 204 | traj0 = m.trajectories["Trajectory0"] 205 | assert traj0.min_z == 0 206 | assert traj0.max_z == 35 207 | assert traj0.min_t.round("s") == dtparse("1990-01-01 00:00:00") 208 | assert traj0.max_t.round("s") == dtparse("1990-01-02 11:00:00") 209 | first_loc = traj0.geometry.coords[0] 210 | assert np.isclose(first_loc[0], -35.07884) 211 | assert np.isclose(first_loc[1], 2.15286) 212 | 213 | traj3 = m.trajectories["Trajectory3"] 214 | assert traj3.min_z == 0 215 | assert traj3.max_z == 36 216 | assert traj3.min_t.round("s") == dtparse("1990-01-01 00:00:00") 217 | assert traj3.max_t.round("s") == dtparse("1990-01-02 12:00:00") 218 | first_loc = traj3.geometry.coords[0] 219 | assert np.isclose(first_loc[0], -73.3026) 220 | assert np.isclose(first_loc[1], 1.95761) 221 | 222 | def test_json_attributes_single(self): 223 | filepath = os.path.join(os.path.dirname(__file__), "resources", "im-single.nc") 224 | 225 | with IncompleteMultidimensionalTrajectory(filepath) as s: 226 | s.json_attributes() 227 | 228 | def test_json_attributes_multi(self): 229 | filepath = os.path.join(os.path.dirname(__file__), "resources", "im-multiple.nc") 230 | 231 | with IncompleteMultidimensionalTrajectory(filepath) as s: 232 | s.json_attributes() 233 | -------------------------------------------------------------------------------- /pocean/tests/dsg/trajectoryProfile/test_trajectoryProfile_cr.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import math 3 | import os 4 | import tempfile 5 | import unittest 6 | 7 | import numpy as np 8 | import pytest 9 | from dateutil.parser import parse as dtparse 10 | from shapely.wkt import loads as wktloads 11 | 12 | from pocean import logger as L 13 | from pocean.dsg import ContiguousRaggedTrajectoryProfile 14 | from pocean.tests.dsg.test_new import test_is_mine 15 | 16 | L.level = logging.INFO 17 | L.handlers = [logging.StreamHandler()] 18 | 19 | # RuntimeWarning: invalid value encountered in cast is fine here. 20 | ignore_invalid_value_cast = pytest.mark.filterwarnings("ignore::RuntimeWarning") 21 | 22 | 23 | class TestContinousRaggedTrajectoryProfile(unittest.TestCase): 24 | def setUp(self): 25 | self.single = os.path.join(os.path.dirname(__file__), "resources", "cr-single.nc") 26 | self.multi = os.path.join(os.path.dirname(__file__), "resources", "cr-multiple.nc") 27 | self.missing_time = os.path.join( 28 | os.path.dirname(__file__), "resources", "cr-missing-time.nc" 29 | ) 30 | self.nan_locations = os.path.join( 31 | os.path.dirname(__file__), "resources", "cr-nan-locations.nc" 32 | ) 33 | 34 | def test_crtp_load(self): 35 | ContiguousRaggedTrajectoryProfile(self.single).close() 36 | ContiguousRaggedTrajectoryProfile(self.multi).close() 37 | ContiguousRaggedTrajectoryProfile(self.missing_time).close() 38 | 39 | @ignore_invalid_value_cast 40 | def test_crtp_dataframe_single(self): 41 | axes = { 42 | "t": "time", 43 | "x": "longitude", 44 | "y": "latitude", 45 | "z": "depth", 46 | } 47 | fid, tmpnc = tempfile.mkstemp(suffix=".nc") 48 | with ContiguousRaggedTrajectoryProfile(self.single) as ncd: 49 | df = ncd.to_dataframe(axes=axes) 50 | with ContiguousRaggedTrajectoryProfile.from_dataframe( 51 | df, tmpnc, axes=axes 52 | ) as result_ncd: 53 | assert "profile" in result_ncd.dimensions 54 | assert "trajectory" in result_ncd.dimensions 55 | test_is_mine(ContiguousRaggedTrajectoryProfile, tmpnc) # Try to load it again 56 | os.close(fid) 57 | os.remove(tmpnc) 58 | 59 | @ignore_invalid_value_cast 60 | def test_crtp_dataframe_single_unique_dims(self): 61 | axes = { 62 | "t": "time", 63 | "x": "longitude", 64 | "y": "latitude", 65 | "z": "depth", 66 | } 67 | fid, tmpnc = tempfile.mkstemp(suffix=".nc") 68 | with ContiguousRaggedTrajectoryProfile(self.single) as ncd: 69 | df = ncd.to_dataframe(axes=axes) 70 | with ContiguousRaggedTrajectoryProfile.from_dataframe( 71 | df, tmpnc, axes=axes, unique_dims=True 72 | ) as result_ncd: 73 | assert "profile_dim" in result_ncd.dimensions 74 | assert "trajectory_dim" in result_ncd.dimensions 75 | test_is_mine(ContiguousRaggedTrajectoryProfile, tmpnc) # Try to load it again 76 | os.close(fid) 77 | os.remove(tmpnc) 78 | 79 | def test_crtp_dataframe_multi(self): 80 | axes = { 81 | "t": "time", 82 | "x": "lon", 83 | "y": "lat", 84 | "z": "z", 85 | } 86 | fid, tmpnc = tempfile.mkstemp(suffix=".nc") 87 | with ContiguousRaggedTrajectoryProfile(self.multi) as ncd: 88 | df = ncd.to_dataframe(axes=axes) 89 | with ContiguousRaggedTrajectoryProfile.from_dataframe( 90 | df, tmpnc, axes=axes 91 | ) as result_ncd: 92 | assert "profile" in result_ncd.dimensions 93 | assert "trajectory" in result_ncd.dimensions 94 | test_is_mine(ContiguousRaggedTrajectoryProfile, tmpnc) # Try to load it again 95 | os.close(fid) 96 | os.remove(tmpnc) 97 | 98 | @ignore_invalid_value_cast 99 | def test_crtp_dataframe_missing_time(self): 100 | axes = { 101 | "t": "precise_time", 102 | "x": "precise_lon", 103 | "y": "precise_lat", 104 | "z": "depth", 105 | } 106 | fid, tmpnc = tempfile.mkstemp(suffix=".nc") 107 | with ContiguousRaggedTrajectoryProfile(self.missing_time) as ncd: 108 | df = ncd.to_dataframe(axes=axes) 109 | with ContiguousRaggedTrajectoryProfile.from_dataframe( 110 | df, tmpnc, axes=axes 111 | ) as result_ncd: 112 | assert "profile" in result_ncd.dimensions 113 | assert "trajectory" in result_ncd.dimensions 114 | test_is_mine(ContiguousRaggedTrajectoryProfile, tmpnc) # Try to load it again 115 | os.close(fid) 116 | os.remove(tmpnc) 117 | 118 | @ignore_invalid_value_cast 119 | def test_crtp_calculated_metadata_single(self): 120 | axes = { 121 | "t": "time", 122 | "x": "longitude", 123 | "y": "latitude", 124 | "z": "depth", 125 | } 126 | 127 | with ContiguousRaggedTrajectoryProfile(self.single) as st: 128 | s = st.calculated_metadata(axes=axes) 129 | assert s.min_t.round("s") == dtparse("2014-11-25 18:57:30") 130 | assert s.max_t.round("s") == dtparse("2014-11-27 07:10:30") 131 | assert len(s.trajectories) == 1 132 | traj = s.trajectories["sp025-20141125T1730"] 133 | assert traj.min_z == 0 134 | assert np.isclose(traj.max_z, 504.37827) 135 | assert traj.min_t.round("s") == dtparse("2014-11-25 18:57:30") 136 | assert traj.max_t.round("s") == dtparse("2014-11-27 07:10:30") 137 | 138 | first_loc = traj.geometry.coords[0] 139 | assert np.isclose(first_loc[0], -119.79025) 140 | assert np.isclose(first_loc[1], 34.30818) 141 | assert len(traj.profiles) == 17 142 | 143 | def test_crtp_calculated_metadata_multi(self): 144 | axes = { 145 | "t": "time", 146 | "x": "longitude", 147 | "y": "latitude", 148 | "z": "depth", 149 | } 150 | 151 | with ContiguousRaggedTrajectoryProfile(self.multi) as mt: 152 | m = mt.calculated_metadata(axes=axes) 153 | assert m.min_t.round("s") == dtparse("1990-01-01 00:00:00") 154 | assert m.max_t.round("s") == dtparse("1990-01-03 02:00:00") 155 | assert len(m.trajectories) == 5 156 | # First trajectory 157 | traj0 = m.trajectories[0] 158 | assert traj0.min_z == 0 159 | assert traj0.max_z == 43 160 | assert traj0.min_t.round("s") == dtparse("1990-01-02 05:00:00") 161 | assert traj0.max_t.round("s") == dtparse("1990-01-03 01:00:00") 162 | first_loc = traj0.geometry.coords[0] 163 | assert first_loc[0] == -60 164 | assert first_loc[1] == 53 165 | assert len(traj0.profiles) == 4 166 | assert traj0.profiles[0].t.round("s") == dtparse("1990-01-03 01:00:00") 167 | assert traj0.profiles[0].x == -60 168 | assert traj0.profiles[0].y == 49 169 | 170 | # Last trajectory 171 | traj4 = m.trajectories[4] 172 | assert traj4.min_z == 0 173 | assert traj4.max_z == 38 174 | assert traj4.min_t.round("s") == dtparse("1990-01-02 14:00:00") 175 | assert traj4.max_t.round("s") == dtparse("1990-01-02 15:00:00") 176 | first_loc = traj4.geometry.coords[0] 177 | assert first_loc[0] == -67 178 | assert first_loc[1] == 47 179 | assert len(traj4.profiles) == 4 180 | assert traj4.profiles[19].t.round("s") == dtparse("1990-01-02 14:00:00") 181 | assert traj4.profiles[19].x == -44 182 | assert traj4.profiles[19].y == 47 183 | 184 | @ignore_invalid_value_cast 185 | def test_crtp_calculated_metadata_missing_time(self): 186 | axes = { 187 | "t": "time", 188 | "x": "longitude", 189 | "y": "latitude", 190 | "z": "depth", 191 | } 192 | 193 | with ContiguousRaggedTrajectoryProfile(self.missing_time) as mmt: 194 | t = mmt.calculated_metadata(axes=axes) 195 | assert t.min_t == dtparse("2014-11-16 21:32:29.952500") 196 | assert t.max_t == dtparse("2014-11-17 07:59:08.398500") 197 | assert len(t.trajectories) == 1 198 | 199 | traj = t.trajectories["UW157-20141116T211809"] 200 | assert np.isclose(traj.min_z, 0.47928014) 201 | assert np.isclose(traj.max_z, 529.68005) 202 | assert traj.min_t == dtparse("2014-11-16 21:32:29.952500") 203 | assert traj.max_t == dtparse("2014-11-17 07:59:08.398500") 204 | 205 | first_loc = traj.geometry.coords[0] 206 | 207 | assert np.isclose(first_loc[0], -124.681526638573) 208 | assert np.isclose(first_loc[1], 43.5022166666667) 209 | assert len(traj.profiles) == 13 210 | 211 | @ignore_invalid_value_cast 212 | def test_crtp_just_missing_time(self): 213 | axes = { 214 | "t": "time", 215 | "x": "longitude", 216 | "y": "latitude", 217 | "z": "depth", 218 | } 219 | 220 | with ContiguousRaggedTrajectoryProfile(self.missing_time) as mmt: 221 | t = mmt.calculated_metadata(axes=axes) 222 | assert t.min_t == dtparse("2014-11-16 21:32:29.952500") 223 | assert t.max_t == dtparse("2014-11-17 07:59:08.398500") 224 | assert len(t.trajectories) == 1 225 | 226 | traj = t.trajectories["UW157-20141116T211809"] 227 | assert np.isclose(traj.min_z, 0.47928014) 228 | assert np.isclose(traj.max_z, 529.68005) 229 | assert traj.min_t == dtparse("2014-11-16 21:32:29.952500") 230 | assert traj.max_t == dtparse("2014-11-17 07:59:08.398500") 231 | 232 | first_loc = traj.geometry.coords[0] 233 | assert np.isclose(first_loc[0], -124.681526638573) 234 | assert np.isclose(first_loc[1], 43.5022166666667) 235 | assert len(traj.profiles) == 13 236 | 237 | @ignore_invalid_value_cast 238 | def test_crtp_just_missing_locations(self): 239 | axes = { 240 | "t": "time", 241 | "x": "longitude", 242 | "y": "latitude", 243 | "z": "depth", 244 | } 245 | 246 | with ContiguousRaggedTrajectoryProfile(self.nan_locations) as ml: 247 | t = ml.calculated_metadata(axes=axes) 248 | assert len(t.trajectories) == 1 249 | 250 | traj = t.trajectories["clark-20150709T1803"] 251 | coords = list(wktloads(traj.geometry.wkt).coords) 252 | assert True not in [math.isnan(x) for x, y in coords] 253 | assert True not in [math.isnan(y) for x, y in coords] 254 | -------------------------------------------------------------------------------- /pocean/tests/test_cf.py: -------------------------------------------------------------------------------- 1 | #!python 2 | import logging 3 | import os 4 | import unittest 5 | 6 | from pocean import logger as L 7 | from pocean.cf import CFDataset 8 | from pocean.dsg import OrthogonalMultidimensionalTimeseries as omt 9 | 10 | L.level = logging.INFO 11 | L.handlers = [logging.StreamHandler()] 12 | 13 | 14 | class TestCFDatasetLoad(unittest.TestCase): 15 | def test_load_url(self): 16 | # File downloaded from https://geoport.usgs.esipfed.org/thredds/dodsC/silt/usgs/Projects/stellwagen/CF-1.6/ARGO_MERCHANT/1211-AA.cdf.html 17 | fname = os.path.join(os.path.dirname(__file__), "resources", "1211-AA.cdf") 18 | ncd = CFDataset.load(fname) 19 | assert omt.is_mine(ncd) is True 20 | ncd.close() 21 | 22 | def test_load_strict(self): 23 | ncfile = os.path.join( 24 | os.path.dirname(__file__), "dsg", "profile", "resources", "om-single.nc" 25 | ) 26 | 27 | ncd = CFDataset.load(ncfile) 28 | assert omt.is_mine(ncd) is False 29 | with self.assertRaises(BaseException): 30 | omt.is_mine(ncd, strict=True) 31 | ncd.close() 32 | -------------------------------------------------------------------------------- /pocean/tests/test_nc.py: -------------------------------------------------------------------------------- 1 | #!python 2 | import logging 3 | import os 4 | import tempfile 5 | import unittest 6 | 7 | from numpy import testing as npt 8 | 9 | from pocean import logger as L 10 | from pocean.cf import CFDataset 11 | from pocean.dataset import EnhancedDataset 12 | from pocean.meta import MetaInterface, ncpyattributes 13 | 14 | L.level = logging.INFO 15 | L.handlers = [logging.StreamHandler()] 16 | 17 | 18 | class TestJsonDataset(unittest.TestCase): 19 | def setUp(self): 20 | self.maxDiff = 9999 21 | self.hdl, self.ncdf = tempfile.mkstemp(prefix="pocean_test_") 22 | 23 | def tearDown(self): 24 | os.close(self.hdl) 25 | os.remove(self.ncdf) 26 | 27 | def test_lvl0_apply(self): 28 | jsf = os.path.join(os.path.dirname(__file__), "resources/coamps_lvl0.json") 29 | mi = MetaInterface.from_jsonfile(jsf) 30 | 31 | with EnhancedDataset(self.ncdf, "w") as ncd: 32 | ncd.apply_meta(mi) 33 | 34 | assert {k: v.size for k, v in ncd.dimensions.items()} == mi["dimensions"] 35 | 36 | fileglobatts = mi["attributes"] 37 | newglobatts = {} 38 | for nk in ncd.ncattrs(): 39 | newglobatts[nk] = ncd.getncattr(nk) 40 | 41 | self.assertDictEqual(fileglobatts, newglobatts) 42 | 43 | for k, v in ncd.variables.items(): 44 | filevaratts = mi["variables"][k]["attributes"] 45 | newvaratts = ncpyattributes(dict(v.__dict__), verbose=False) 46 | 47 | # _FillValue gets added even if it wasn't in the original attributes 48 | if "_FillValue" in newvaratts: 49 | del newvaratts["_FillValue"] 50 | 51 | if "missing_value" in filevaratts: 52 | del filevaratts["missing_value"] 53 | 54 | self.assertDictEqual(filevaratts, newvaratts) 55 | 56 | def test_lvl2_apply(self): 57 | jsf = os.path.join(os.path.dirname(__file__), "resources/coamps_lvl2.json") 58 | mi = MetaInterface.from_jsonfile(jsf) 59 | 60 | with EnhancedDataset(self.ncdf, "w") as ncd: 61 | ncd.apply_meta(mi) 62 | 63 | assert {k: v.size for k, v in ncd.dimensions.items()} == mi["dimensions"] 64 | 65 | fileglobatts = {k: v["data"] for k, v in mi["attributes"].items()} 66 | newglobatts = {} 67 | for nk in ncd.ncattrs(): 68 | newglobatts[nk] = ncd.getncattr(nk) 69 | 70 | self.assertDictEqual(fileglobatts, newglobatts) 71 | 72 | for k, v in ncd.variables.items(): 73 | filevaratts = {k: v["data"] for k, v in mi["variables"][k]["attributes"].items()} 74 | newvaratts = ncpyattributes(dict(v.__dict__), verbose=False) 75 | 76 | # _FillValue gets added even if it wasn't in the original attributes 77 | if "_FillValue" in newvaratts: 78 | del newvaratts["_FillValue"] 79 | 80 | if "missing_value" in filevaratts: 81 | del filevaratts["missing_value"] 82 | 83 | self.assertDictEqual(filevaratts, newvaratts) 84 | 85 | def test_input_output(self): 86 | ncfile = os.path.join(os.path.dirname(__file__), "resources/coamps.nc") 87 | 88 | with EnhancedDataset(ncfile, "r") as original_ncd: 89 | mi = original_ncd.meta() 90 | 91 | with EnhancedDataset(self.ncdf, "w") as ncd: 92 | ncd.apply_meta(mi) 93 | 94 | self.assertDictEqual( 95 | ncpyattributes(dict(original_ncd.__dict__)), ncpyattributes(dict(ncd.__dict__)) 96 | ) 97 | 98 | for k, v in original_ncd.variables.items(): 99 | oldatts = ncpyattributes(dict(v.__dict__)) 100 | newatts = ncpyattributes(dict(ncd.variables[k].__dict__)) 101 | 102 | # _FillValue gets added even if it wasn't in the original attributes 103 | if "_FillValue" in newatts: 104 | del newatts["_FillValue"] 105 | 106 | if "missing_value" in oldatts: 107 | del oldatts["missing_value"] 108 | 109 | self.assertDictEqual(oldatts, newatts) 110 | 111 | def test_serialize_and_reload_data(self): 112 | ncfile = os.path.join(os.path.dirname(__file__), "resources/qc-month.nc") 113 | 114 | with CFDataset(ncfile) as cfncd: 115 | # Data from netCDF variable 116 | ncdata = cfncd.variables["data1"][:] 117 | 118 | # Not filled 119 | meta = cfncd.json(return_data=True, fill_data=False) 120 | jsdata = meta["variables"]["data1"]["data"] 121 | npt.assert_array_equal(ncdata, jsdata) 122 | fhandle1, fname1 = tempfile.mkstemp() 123 | with CFDataset(fname1, "w") as newcf: 124 | newcf.apply_json(meta) 125 | with CFDataset(fname1, "r") as rcf: 126 | newncdata = rcf.variables["data1"][:] 127 | npt.assert_array_equal(ncdata, newncdata) 128 | os.close(fhandle1) 129 | os.remove(fname1) 130 | 131 | # Filled 132 | meta = cfncd.json(return_data=True, fill_data=True) 133 | jsdata = meta["variables"]["data1"]["data"] 134 | npt.assert_array_equal(ncdata, jsdata) 135 | fhandle2, fname2 = tempfile.mkstemp() 136 | with CFDataset(fname2, "w") as newcf: 137 | newcf.apply_json(meta) 138 | 139 | with CFDataset(fname2, "r") as rcf: 140 | newncdata = rcf.variables["data1"][:] 141 | npt.assert_array_equal(ncdata, newncdata) 142 | 143 | os.close(fhandle2) 144 | os.remove(fname2) 145 | -------------------------------------------------------------------------------- /pocean/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | #!python 2 | import logging 3 | import os 4 | import shutil 5 | import tempfile 6 | import unittest 7 | 8 | import netCDF4 as nc4 9 | import numpy as np 10 | import pytest 11 | 12 | from pocean import logger 13 | from pocean.dataset import EnhancedDataset 14 | from pocean.utils import generic_masked, get_default_axes, normalize_array 15 | 16 | logger.level = logging.INFO 17 | logger.handlers = [logging.StreamHandler()] 18 | 19 | 20 | class TestUtils(unittest.TestCase): 21 | def setUp(self): 22 | self.input_file = os.path.join(os.path.dirname(__file__), "resources/coamps.nc") 23 | 24 | def test_get_default_axes(self): 25 | assert get_default_axes() == ( 26 | "trajectory", 27 | "station", 28 | "profile", 29 | "obs", 30 | "t", 31 | "x", 32 | "y", 33 | "z", 34 | ) 35 | 36 | new_defaults = { 37 | "trajectory": "a", 38 | "station": "b", 39 | "profile": "c", 40 | "sample": "h", 41 | "t": "d", 42 | "x": "e", 43 | "y": "f", 44 | "z": "g", 45 | } 46 | assert get_default_axes(new_defaults) == ( 47 | "a", 48 | "b", 49 | "c", 50 | "h", 51 | "d", 52 | "e", 53 | "f", 54 | "g", 55 | ) 56 | 57 | new_defaults = {"trajectory": "a", "station": "b", "profile": "c"} 58 | assert get_default_axes(new_defaults) == ( 59 | "a", 60 | "b", 61 | "c", 62 | "obs", 63 | "t", 64 | "x", 65 | "y", 66 | "z", 67 | ) 68 | 69 | # Time is not a valid axis key 70 | bad_defaults = {"time": "a"} 71 | with self.assertRaises(TypeError): 72 | get_default_axes(bad_defaults) 73 | 74 | # Can't have duplicate values 75 | bad_defaults = {"x": "a", "y": "a"} 76 | with self.assertRaises(ValueError): 77 | get_default_axes(bad_defaults) 78 | 79 | # but you can with the sample dimension 80 | bad_defaults = {"t": "time", "sample": "time"} 81 | assert get_default_axes(bad_defaults) == ( 82 | "trajectory", 83 | "station", 84 | "profile", 85 | "time", 86 | "time", 87 | "x", 88 | "y", 89 | "z", 90 | ) 91 | 92 | def test_single_attr_filter(self): 93 | nc = EnhancedDataset(self.input_file) 94 | grid_spacing_vars = nc.filter_by_attrs(grid_spacing="4.0 km") 95 | 96 | x = nc.variables.get("x") 97 | y = nc.variables.get("y") 98 | 99 | self.assertEqual(len(grid_spacing_vars), 2) 100 | assert x in grid_spacing_vars 101 | assert y in grid_spacing_vars 102 | 103 | def test_multiple_attr_filter(self): 104 | nc = EnhancedDataset(self.input_file) 105 | grid_spacing_vars = nc.filter_by_attrs( 106 | grid_spacing="4.0 km", standard_name="projection_y_coordinate" 107 | ) 108 | 109 | y = nc.variables.get("y") 110 | 111 | self.assertEqual(len(grid_spacing_vars), 1) 112 | assert y in grid_spacing_vars 113 | 114 | @pytest.mark.filterwarnings("ignore::UserWarning") 115 | def test_generic_masked_bad_min_max_value(self): 116 | fid, tpath = tempfile.mkstemp(suffix=".nc", prefix="pocean-test") 117 | shutil.copy2(self.input_file, tpath) 118 | 119 | with EnhancedDataset(tpath, "a") as ncd: 120 | v = ncd.variables["v_component_wind_true_direction_all_geometries"] 121 | v.valid_min = np.float32(0.1) 122 | v.valid_max = np.float32(0.1) 123 | r = generic_masked(v[:], attrs=ncd.vatts(v.name)) 124 | rflat = r.flatten() 125 | assert rflat[~rflat.mask].size == 0 126 | 127 | # Create a byte variable with a float valid_min and valid_max 128 | # to make sure it doesn't error 129 | b = ncd.createVariable("imabyte", "b") 130 | b.valid_min = 0 131 | b.valid_max = np.int16(600) # this is over a byte and thus invalid 132 | b[:] = 3 133 | r = generic_masked(b[:], attrs=ncd.vatts(b.name)) 134 | assert np.all(r.mask == False) # noqa 135 | 136 | b.valid_min = 0 137 | b.valid_max = 2 138 | r = generic_masked(b[:], attrs=ncd.vatts(b.name)) 139 | assert np.all(r.mask == True) # noqa 140 | 141 | c = ncd.createVariable("imanotherbyte", "f4") 142 | c.setncattr("valid_min", b"0") 143 | c.setncattr("valid_max", b"9") 144 | c[:] = 3 145 | r = generic_masked(c[:], attrs=ncd.vatts(c.name)) 146 | assert np.all(r.mask == False) # noqa 147 | 148 | c = ncd.createVariable("imarange", "f4") 149 | c.valid_range = [0.0, 2.0] 150 | c[:] = 3.0 151 | r = generic_masked(c[:], attrs=ncd.vatts(c.name)) 152 | assert np.all(r.mask == True) # noqa 153 | 154 | c.valid_range = [0.0, 2.0] 155 | c[:] = 1.0 156 | r = generic_masked(c[:], attrs=ncd.vatts(c.name)) 157 | assert np.all(r.mask == False) # noqa 158 | 159 | os.close(fid) 160 | if os.path.exists(tpath): 161 | os.remove(tpath) 162 | 163 | 164 | class TestNetcdfUtils(unittest.TestCase): 165 | def test_cf_safe_name(self): 166 | from pocean.cf import cf_safe_name 167 | 168 | self.assertEqual("foo", cf_safe_name("foo")) 169 | self.assertEqual("v_1foo", cf_safe_name("1foo")) 170 | self.assertEqual("v_1foo_99", cf_safe_name("1foo-99")) 171 | self.assertEqual("foo_99", cf_safe_name("foo-99")) 172 | self.assertEqual("foo_99_", cf_safe_name("foo(99)")) 173 | self.assertEqual("v__foo_99_", cf_safe_name("_foo(99)")) 174 | 175 | 176 | class TestNormalizeArray(unittest.TestCase): 177 | def setUp(self): 178 | self.fh, self.fp = tempfile.mkstemp(suffix=".nc", prefix="pocean_testing_") 179 | 180 | def tearDown(self): 181 | os.close(self.fh) 182 | if os.path.exists(self.fp): 183 | os.remove(self.fp) 184 | 185 | def test_normalization_of_string_arrays_netcdf4(self): 186 | thestr = "bosadfsdfkljskfusdiofu987987987om" 187 | 188 | with nc4.Dataset(self.fp, "w", format="NETCDF4") as ncd: 189 | dimsize = len(thestr) 190 | ncd.createDimension("n", dimsize) 191 | 192 | # Single str (no dimension) 193 | ncd.createVariable("single_str", str) 194 | ncd.createVariable("single_unicode_", np.str_) 195 | ncd.createVariable("single_U", " 1: 221 | v[:, :] = np.tile(nc4.stringtoarr(thestr, dimsize), dimsize).reshape( 222 | v.shape 223 | ) 224 | else: 225 | v[:] = np.tile(thestr, dimsize).reshape(v.shape) 226 | 227 | with nc4.Dataset(self.fp) as ncd: 228 | assert normalize_array(ncd.variables["single_str"]) == thestr 229 | assert normalize_array(ncd.variables["single_unicode_"]) == thestr 230 | assert normalize_array(ncd.variables["single_U"]) == thestr 231 | assert normalize_array(ncd.variables["single_S"]) == thestr 232 | 233 | assert np.all(normalize_array(ncd.variables["many_str"]) == [thestr] * len(thestr)) 234 | assert np.all(normalize_array(ncd.variables["many_unicode_"]) == [thestr] * len(thestr)) 235 | assert np.all(normalize_array(ncd.variables["many_U"]) == [thestr] * len(thestr)) 236 | assert np.all(normalize_array(ncd.variables["many_S"]) == [thestr] * len(thestr)) 237 | 238 | def test_normalization_of_string_arrays_netcdf3(self): 239 | thestr = "boodsfasfasdfm" 240 | 241 | with nc4.Dataset(self.fp, "w", format="NETCDF3_CLASSIC") as ncd: 242 | dimsize = len(thestr) 243 | ncd.createDimension("n", dimsize) 244 | 245 | # Single str (no dimension) 246 | ncd.createVariable("single_S", "S1", ("n",)) 247 | 248 | for k, v in ncd.variables.items(): 249 | if k.startswith("single_"): 250 | v[:] = nc4.stringtoarr(thestr, dimsize) 251 | 252 | # Array of strq 253 | ncd.createVariable( 254 | "many_S", 255 | "S1", 256 | ( 257 | "n", 258 | "n", 259 | ), 260 | ) 261 | 262 | for k, v in ncd.variables.items(): 263 | if k.startswith("many_"): 264 | v[:, :] = np.tile(nc4.stringtoarr(thestr, dimsize), dimsize).reshape(v.shape) 265 | 266 | with nc4.Dataset(self.fp) as ncd: 267 | assert normalize_array(ncd.variables["single_S"]) == thestr 268 | assert np.all(normalize_array(ncd.variables["many_S"]) == [thestr] * dimsize) 269 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | build-backend = "setuptools.build_meta" 3 | requires = [ 4 | "setuptools>=42", 5 | "setuptools-scm[toml]>=3.4", 6 | "wheel", 7 | ] 8 | 9 | [project] 10 | name = "pocean-core" 11 | description = "A python framework for working with met-ocean data" 12 | readme = "README.md" 13 | license = { file = "LICENSE.txt" } 14 | authors = [ 15 | { name = "Kyle Wilcox", email = "kyle@axds.co" }, 16 | ] 17 | requires-python = ">=3.9" 18 | classifiers = [ 19 | "Programming Language :: Python :: 3 :: Only", 20 | "Programming Language :: Python :: 3.9", 21 | "Programming Language :: Python :: 3.10", 22 | "Programming Language :: Python :: 3.11", 23 | "Programming Language :: Python :: 3.12", 24 | "Programming Language :: Python :: 3.13", 25 | ] 26 | dynamic = [ 27 | "version", 28 | ] 29 | dependencies = [ 30 | "cftime>=1.2.1", 31 | "netcdf4", 32 | "numpy>=1.20", 33 | "pandas>=1.0.5", 34 | "python-dateutil", 35 | "pytz", 36 | "shapely>=1.8", 37 | "simplejson", 38 | ] 39 | urls.documentation = "https://pyoceans.github.io/pocean-core" 40 | urls.homepage = "https://pypi.org/project/pocean-core/" 41 | urls.repository = "https://github.com/pyoceans/pocean-core" 42 | 43 | [tool.setuptools] 44 | packages = [ 45 | "pocean", 46 | ] 47 | 48 | [tool.setuptools_scm] 49 | write_to = "pocean/_version.py" 50 | write_to_template = "__version__ = '{version}'" 51 | tag_regex = "^(?Pv)?(?P[^\\+]+)(?P.*)?$" 52 | 53 | [tool.ruff] 54 | 55 | line-length = 100 56 | 57 | exclude = [ 58 | ".git", 59 | ".git/", 60 | "__pycache__", 61 | "dist", 62 | "docs/", 63 | ] 64 | 65 | lint.select = [ 66 | "E", # pycodecstyle 67 | "F", # flakes 68 | "I", # import sorting 69 | "W", # pydocstyle 70 | ] 71 | 72 | lint.ignore = [ 73 | #"E265", 74 | #"E221", 75 | #"E203", 76 | #"E201", 77 | #"E124", 78 | #"E202", 79 | #"E241", 80 | #"E251", 81 | #"W504", 82 | "E501", 83 | "W291", 84 | "W293", 85 | ] 86 | 87 | lint.per-file-ignores."pocean/tests/*.py" = [ 88 | "F403", 89 | "F405", 90 | ] 91 | lint.isort.order-by-type = false 92 | 93 | [tool.pytest.ini_options] 94 | addopts = "-s -rxs -v" 95 | 96 | filterwarnings = [ 97 | "error", 98 | ] 99 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | flake8 2 | pooch 3 | pre-commit 4 | pytest 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cftime>=1.2.1 2 | netcdf4 3 | numpy>=1.20 4 | pandas>=1.0.5 5 | python-dateutil 6 | pytz 7 | shapely>=1.8 8 | simplejson 9 | --------------------------------------------------------------------------------