├── .github
├── dependabot.yml
└── workflows
│ ├── deploy-docs.yml
│ ├── pypi.yml
│ └── tests.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE.txt
├── MANIFEST.in
├── README.md
├── docs
├── Makefile
├── conf.py
├── development.rst
├── index.rst
├── notebooks
│ ├── Readme.md
│ ├── adcp.ipynb
│ ├── full.nc
│ ├── hello.nc
│ └── imp.nc
└── requirements.txt
├── pocean
├── __init__.py
├── cf.py
├── dataset.py
├── dsg
│ ├── __init__.py
│ ├── profile
│ │ ├── __init__.py
│ │ ├── im.py
│ │ └── om.py
│ ├── timeseries
│ │ ├── __init__.py
│ │ ├── cr.py
│ │ ├── im.py
│ │ ├── ir.py
│ │ └── om.py
│ ├── timeseriesProfile
│ │ ├── __init__.py
│ │ ├── im.py
│ │ ├── om.py
│ │ └── r.py
│ ├── trajectory
│ │ ├── __init__.py
│ │ ├── cr.py
│ │ ├── im.py
│ │ └── ir.py
│ ├── trajectoryProfile
│ │ ├── __init__.py
│ │ └── cr.py
│ └── utils.py
├── grid
│ └── __init__.py
├── meta.py
├── tests
│ ├── __init__.py
│ ├── download_test_data.py
│ ├── dsg
│ │ ├── __init__.py
│ │ ├── profile
│ │ │ ├── test_profile_im.py
│ │ │ └── test_profile_om.py
│ │ ├── test_new.py
│ │ ├── test_utils.py
│ │ ├── timeseries
│ │ │ ├── test_timeseries_im.py
│ │ │ └── test_timeseries_om.py
│ │ ├── timeseriesProfile
│ │ │ ├── test_timeseriesProfile_im.py
│ │ │ ├── test_timeseriesProfile_om.py
│ │ │ └── test_timeseriesProfile_r.py
│ │ ├── trajectory
│ │ │ ├── test_trajectory_cr.py
│ │ │ └── test_trajectory_im.py
│ │ └── trajectoryProfile
│ │ │ └── test_trajectoryProfile_cr.py
│ ├── test_cf.py
│ ├── test_nc.py
│ └── test_utils.py
└── utils.py
├── pyproject.toml
├── requirements-dev.txt
└── requirements.txt
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | # See https://docs.github.com/en/code-security/supply-chain-security/keeping-your-dependencies-updated-automatically/keeping-your-actions-up-to-date-with-dependabot
2 |
3 | version: 2
4 | updates:
5 |
6 | - package-ecosystem: "github-actions"
7 | directory: "/"
8 | schedule:
9 | interval: "daily"
10 | labels:
11 | - "Bot"
12 | groups:
13 | github-actions:
14 | patterns:
15 | - '*'
--------------------------------------------------------------------------------
/.github/workflows/deploy-docs.yml:
--------------------------------------------------------------------------------
1 | name: Build and Deploy docs
2 |
3 | on:
4 | pull_request:
5 | push:
6 | branches:
7 | - main
8 | release:
9 | types:
10 | - published
11 |
12 | jobs:
13 | build-docs:
14 | runs-on: ubuntu-latest
15 | defaults:
16 | run:
17 | shell: bash -l {0}
18 |
19 | steps:
20 | - name: checkout
21 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
22 | with:
23 | fetch-depth: 0
24 |
25 | - name: Setup Micromamba
26 | uses: mamba-org/setup-micromamba@b09ef9b599704322748535812ca03efb2625677b # v2.0.5
27 | with:
28 | environment-name: TEST
29 | init-shell: bash
30 | create-args: >-
31 | python=3 --file requirements.txt
32 | --file requirements-dev.txt
33 | --file docs/requirements.txt
34 | --channel conda-forge
35 |
36 | - name: Install library
37 | run: |
38 | python -m pip install -e . --no-deps --force-reinstall
39 |
40 | - name: Build documentation
41 | run: |
42 | set -e
43 | pushd docs
44 | sphinx-apidoc -M -f -o api ../pocean ../pocean/tests
45 | make clean html linkcheck
46 | popd
47 |
48 | - name: Deploy
49 | if: success() && github.event_name == 'release'
50 | uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0
51 | with:
52 | github_token: ${{ secrets.GITHUB_TOKEN }}
53 | publish_dir: docs/_site/html
54 |
--------------------------------------------------------------------------------
/.github/workflows/pypi.yml:
--------------------------------------------------------------------------------
1 | name: Publish to PyPI
2 |
3 | on:
4 | pull_request:
5 | push:
6 | branches:
7 | - main
8 | release:
9 | types:
10 | - published
11 |
12 | defaults:
13 | run:
14 | shell: bash
15 |
16 | jobs:
17 | packages:
18 | runs-on: ubuntu-latest
19 | steps:
20 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
21 |
22 | - name: Set up Python
23 | uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
24 | with:
25 | python-version: "3.x"
26 |
27 | - name: Get tags
28 | run: git fetch --depth=1 origin +refs/tags/*:refs/tags/*
29 |
30 | - name: Install build tools
31 | run: |
32 | python -m pip install --upgrade build
33 |
34 | - name: Build sdist and binary wheel
35 | run: python -m build --sdist --wheel . --outdir dist
36 |
37 | - name: CheckFiles
38 | run: |
39 | ls dist
40 | python -m pip install --upgrade check-manifest
41 | check-manifest --verbose
42 |
43 | - name: Test wheels
44 | run: |
45 | cd dist && python -m pip install *.whl
46 | python -m pip install --upgrade twine
47 | python -m twine check *
48 |
49 | - name: Publish a Python distribution to PyPI
50 | if: success() && github.event_name == 'release'
51 | uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc # v1.12.4
52 | with:
53 | user: __token__
54 | password: ${{ secrets.PYPI_PASSWORD }}
55 |
--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | name: Tests
2 |
3 | on:
4 | pull_request:
5 | push:
6 | branches: [main]
7 |
8 | jobs:
9 | run:
10 | runs-on: ${{ matrix.os }}
11 | strategy:
12 | matrix:
13 | python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13" ]
14 | os: [ windows-latest, ubuntu-latest, macos-latest ]
15 | fail-fast: false
16 | defaults:
17 | run:
18 | shell: bash -l {0}
19 |
20 | steps:
21 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
22 | with:
23 | fetch-depth: 0
24 |
25 | - name: Setup Micromamba Python ${{ matrix.python-version }}
26 | uses: mamba-org/setup-micromamba@b09ef9b599704322748535812ca03efb2625677b # v2.0.5
27 | with:
28 | environment-name: TEST
29 | init-shell: bash
30 | create-args: >-
31 | python=${{ matrix.python-version }}
32 | --file requirements.txt
33 | --file requirements-dev.txt
34 | --channel conda-forge
35 |
36 | - name: Install library
37 | run: |
38 | python -m pip install -e . --no-deps --force-reinstall
39 |
40 | - name: Tests
41 | run: >
42 | python pocean/tests/download_test_data.py
43 | && python -m pytest --pyargs pocean
44 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | output/*
3 | *.sublime-*
4 | *.swp
5 | build/*
6 | dist/*
7 | resources/
8 | *.egg-info*
9 | .cache
10 | docs/api
11 | docs/_site
12 | .pytest_cache/
13 | .envrc
14 | .idea
15 | .vscode
16 | pocean/_version.py
17 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/pre-commit/pre-commit-hooks
3 | rev: v5.0.0
4 | hooks:
5 | - id: end-of-file-fixer
6 | files: .*\.*.py
7 | - id: trailing-whitespace
8 | files: .*\.*.py
9 | - id: debug-statements
10 | - id: check-ast
11 | - id: check-added-large-files
12 | - id: check-json
13 | - id: check-merge-conflict
14 | - id: check-yaml
15 | - id: requirements-txt-fixer
16 | args:
17 | - requirements.txt
18 | - requirements-dev.txt
19 |
20 | - repo: https://github.com/astral-sh/ruff-pre-commit
21 | rev: v0.11.12
22 | hooks:
23 | - id: ruff
24 | args: ["--fix", "--show-fixes"]
25 | - id: ruff-format
26 |
27 | - repo: https://github.com/tox-dev/pyproject-fmt
28 | rev: "v2.6.0"
29 | hooks:
30 | - id: pyproject-fmt
31 |
32 | - repo: https://github.com/asottile/pyupgrade
33 | rev: v3.20.0
34 | hooks:
35 | - id: pyupgrade
36 | args: [--py38-plus]
37 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2022 Axiom Data Science
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.txt
2 | include README.md
3 | include pyproject.toml
4 |
5 | graft pocean
6 |
7 | prune .github
8 | prune *.egg-info
9 | prune docs
10 | prune pocean/tests
11 |
12 | exclude .coveragerc
13 | exclude .gitignore
14 | exclude .pre-commit-config.yaml
15 | exclude pocean/_version.py
16 | exclude ruff.toml
17 |
18 | global-exclude *.nc
19 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 🌐 pocean-core
2 |
3 | [](https://github.com/pyoceans/pocean-core/actions/workflows/push.yml)
4 | [](https://github.com/pyoceans/pocean-core/blob/master/LICENSE.txt)
5 | [](https://pypi.org/project/pocean-core/)
6 |
7 |
8 | 🐍 + 🌊
9 |
10 | A python framework for working with met-ocean data
11 |
12 | ## Resources
13 | + **Documentation:**
14 | + **API:**
15 | + **Source Code:**
16 | + **Git clone URL:**
17 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SPHINXPROJ = pocean-core
8 | SOURCEDIR = .
9 | BUILDDIR = _site
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # pocean-core documentation build configuration file, created by
4 | # sphinx-quickstart on Fri Feb 10 16:09:19 2017.
5 | #
6 | # This file is execfile()d with the current directory set to its
7 | # containing dir.
8 | #
9 | # Note that not all possible configuration values are present in this
10 | # autogenerated file.
11 | #
12 | # All configuration values have a default; values that are commented out
13 | # serve to show the default.
14 |
15 | # If extensions (or modules to document with autodoc) are in another directory,
16 | # add these directories to sys.path here. If the directory is relative to the
17 | # documentation root, use os.path.abspath to make it absolute, like shown here.
18 | #
19 | import os
20 | import sys
21 | p = os.path.abspath(
22 | os.path.dirname(os.path.dirname(__file__))
23 | )
24 | sys.path.insert(0, p)
25 |
26 | # -- General configuration ------------------------------------------------
27 |
28 | # If your documentation needs a minimal Sphinx version, state it here.
29 | #
30 | # needs_sphinx = '1.0'
31 |
32 | # Add any Sphinx extension module names here, as strings. They can be
33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
34 | # ones.
35 | extensions = [
36 | 'sphinx.ext.autodoc',
37 | 'sphinx.ext.autosummary',
38 | 'sphinx.ext.napoleon'
39 | ]
40 |
41 | napoleon_google_docstring = True
42 | napoleon_numpy_docstring = True
43 | napoleon_include_init_with_doc = False
44 | napoleon_include_private_with_doc = True
45 | napoleon_include_special_with_doc = False
46 | napoleon_use_admonition_for_examples = True
47 | napoleon_use_admonition_for_notes = True
48 | napoleon_use_admonition_for_references = True
49 | napoleon_use_ivar = False
50 | napoleon_use_param = True
51 | napoleon_use_keyword = True
52 | napoleon_use_rtype = True
53 |
54 | # Add any paths that contain templates here, relative to this directory.
55 | templates_path = ['_templates']
56 |
57 | # The suffix(es) of source filenames.
58 | # You can specify multiple suffix as a list of string:
59 | #
60 | source_suffix = ['.rst']
61 |
62 | # The master toctree document.
63 | master_doc = 'index'
64 |
65 | # General information about the project.
66 | project = 'pocean-core'
67 | copyright = '2023, Kyle Wilcox'
68 | author = 'Kyle Wilcox'
69 |
70 | # The version info for the project you're documenting, acts as replacement for
71 | # |version| and |release|, also used in various other places throughout the
72 | # built documents.
73 | #
74 | # The short X.Y version.
75 | from pocean import __version__ # noqa
76 |
77 | version = __version__
78 | # The full version, including alpha/beta/rc tags.
79 | release = __version__
80 |
81 | # The language for content autogenerated by Sphinx. Refer to documentation
82 | # for a list of supported languages.
83 | #
84 | # This is also used if you do content translation via gettext catalogs.
85 | # Usually you set "language" from the command line for these cases.
86 | language = "en"
87 |
88 | # List of patterns, relative to source directory, that match files and
89 | # directories to ignore when looking for source files.
90 | # This patterns also effect to html_static_path and html_extra_path
91 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
92 |
93 | # The name of the Pygments (syntax highlighting) style to use.
94 | pygments_style = 'sphinx'
95 |
96 | # If true, `todo` and `todoList` produce output, else they produce nothing.
97 | todo_include_todos = False
98 |
99 |
100 | # -- Options for HTML output ----------------------------------------------
101 |
102 | # The theme to use for HTML and HTML Help pages. See the documentation for
103 | # a list of builtin themes.
104 | #
105 | html_theme = 'alabaster'
106 |
107 | # Theme options are theme-specific and customize the look and feel of a theme
108 | # further. For a list of options available for each theme, see the
109 | # documentation.
110 | #
111 | html_theme_options = {
112 | 'description': 'A python framework for working with met-ocean data',
113 | 'github_user': 'pyoceans',
114 | 'github_repo': 'pocean-core',
115 | 'github_button': 'true',
116 | }
117 |
118 | # Add any paths that contain custom static files (such as style sheets) here,
119 | # relative to this directory. They are copied after the builtin static files,
120 | # so a file named "default.css" will overwrite the builtin "default.css".
121 | #html_static_path = ['_static']
122 |
123 | # If true, links to the reST sources are added to the pages.
124 | html_show_sourcelink = False
125 |
126 | # -- Options for HTMLHelp output ------------------------------------------
127 |
128 | # Output file base name for HTML help builder.
129 | htmlhelp_basename = 'pocean-coredoc'
130 |
131 |
132 | # -- Options for LaTeX output ---------------------------------------------
133 |
134 | latex_elements = {
135 | # The paper size ('letterpaper' or 'a4paper').
136 | #
137 | # 'papersize': 'letterpaper',
138 |
139 | # The font size ('10pt', '11pt' or '12pt').
140 | #
141 | # 'pointsize': '10pt',
142 |
143 | # Additional stuff for the LaTeX preamble.
144 | #
145 | # 'preamble': '',
146 |
147 | # Latex figure (float) alignment
148 | #
149 | # 'figure_align': 'htbp',
150 | }
151 |
152 | # Grouping the document tree into LaTeX files. List of tuples
153 | # (source start file, target name, title,
154 | # author, documentclass [howto, manual, or own class]).
155 | latex_documents = [
156 | (master_doc, 'pocean-core.tex', 'pocean-core Documentation',
157 | 'Kyle Wilcox', 'manual'),
158 | ]
159 |
160 |
161 | # -- Options for manual page output ---------------------------------------
162 |
163 | # One entry per manual page. List of tuples
164 | # (source start file, name, description, authors, manual section).
165 | man_pages = [
166 | (master_doc, 'pocean-core', 'pocean-core Documentation',
167 | [author], 1)
168 | ]
169 |
170 |
171 | # -- Options for Texinfo output -------------------------------------------
172 |
173 | # Grouping the document tree into Texinfo files. List of tuples
174 | # (source start file, target name, title, author,
175 | # dir menu entry, description, category)
176 | texinfo_documents = [
177 | (master_doc, 'pocean-core', 'pocean-core Documentation',
178 | author, 'pocean-core', 'A python framework for working with met-ocean data.',
179 | 'Miscellaneous'),
180 | ]
181 |
--------------------------------------------------------------------------------
/docs/development.rst:
--------------------------------------------------------------------------------
1 | Development
2 | ============
3 |
4 | Create a conda environment
5 |
6 | .. code-block:: bash
7 |
8 | conda create --name pocean310 python=3.10 --file requirements.txt --file requirements-dev.txt
9 | conda activate pocean310
10 |
11 | Running tests
12 | -------------
13 |
14 | .. code-block:: bash
15 |
16 | # download test datasets
17 | cd pocean/tests
18 | python download_test_data.py
19 |
20 | # run test suite
21 | pytest
22 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | 🌐 pocean-core
2 | ==============
3 |
4 | 🐍 + 🌊
5 |
6 | A python framework for working with met-ocean data
7 |
8 |
9 | Documentation
10 | =============
11 |
12 | .. toctree::
13 | :maxdepth: 3
14 | :caption: Contents:
15 |
16 | api/modules
17 | development
18 |
19 | Indices and tables
20 | ==================
21 |
22 | * :ref:`genindex`
23 | * :ref:`modindex`
24 | * :ref:`search`
25 |
--------------------------------------------------------------------------------
/docs/notebooks/Readme.md:
--------------------------------------------------------------------------------
1 | # Notebook examples using pocean-core
2 |
--------------------------------------------------------------------------------
/docs/notebooks/adcp.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "collapsed": true
7 | },
8 | "source": [
9 | "# NRL ADCP .mat file to CF-1.6 timeSeriesProfile using pocean"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "Here we read a matlab file with minimal metadata, and write a CF-DSG 1.6 timeSeriesProfile netcdf file. We want the file to work seamlessly with ERDDAP, so we add some ERDDAP specific attributes like `cdm_timeseries_variables`, `cdm_profile_variables`, and `subsetVariables`."
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 1,
22 | "metadata": {
23 | "collapsed": true
24 | },
25 | "outputs": [],
26 | "source": [
27 | "import matplotlib.pyplot as plt\n",
28 | "import pandas as pd\n",
29 | "from scipy.io import loadmat\n",
30 | "import datetime as dt\n",
31 | "import numpy as np\n",
32 | "\n",
33 | "#conda install -c conda-forge pocean-core\n",
34 | "from pocean.dsg.timeseriesProfile.om import OrthogonalMultidimensionalTimeseriesProfile"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": 2,
40 | "metadata": {
41 | "collapsed": true
42 | },
43 | "outputs": [],
44 | "source": [
45 | "# wget http://www.satlab.hawaii.edu/onr/adria/data/moorings/nrl/Final/ADCP_matlab/VR4f.mat\n",
46 | "d = loadmat('/data/ADRIA/MOORINGS/NRL/VR4f.mat')"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": 3,
52 | "metadata": {},
53 | "outputs": [
54 | {
55 | "data": {
56 | "text/html": [
57 | "\n",
58 | "\n",
71 | "
\n",
72 | " \n",
73 | " \n",
74 | " | \n",
75 | " profile | \n",
76 | " station | \n",
77 | " t | \n",
78 | " un | \n",
79 | " vn | \n",
80 | " wn | \n",
81 | " x | \n",
82 | " y | \n",
83 | " z | \n",
84 | "
\n",
85 | " \n",
86 | " \n",
87 | " \n",
88 | " 296338 | \n",
89 | " 5199 | \n",
90 | " VR4F | \n",
91 | " 2003-04-29 03:00:00 | \n",
92 | " 0.061417 | \n",
93 | " 0.394078 | \n",
94 | " 0.017406 | \n",
95 | " 13.0281 | \n",
96 | " 45.187783 | \n",
97 | " 28.548073 | \n",
98 | "
\n",
99 | " \n",
100 | " 296339 | \n",
101 | " 5199 | \n",
102 | " VR4F | \n",
103 | " 2003-04-29 03:00:00 | \n",
104 | " -0.044268 | \n",
105 | " 0.653439 | \n",
106 | " 0.003686 | \n",
107 | " 13.0281 | \n",
108 | " 45.187783 | \n",
109 | " 29.048073 | \n",
110 | "
\n",
111 | " \n",
112 | " 296340 | \n",
113 | " 5199 | \n",
114 | " VR4F | \n",
115 | " 2003-04-29 03:00:00 | \n",
116 | " 0.054443 | \n",
117 | " 0.386804 | \n",
118 | " 0.004221 | \n",
119 | " 13.0281 | \n",
120 | " 45.187783 | \n",
121 | " 29.548073 | \n",
122 | "
\n",
123 | " \n",
124 | " 296341 | \n",
125 | " 5199 | \n",
126 | " VR4F | \n",
127 | " 2003-04-29 03:00:00 | \n",
128 | " 0.098836 | \n",
129 | " 0.529064 | \n",
130 | " -0.011401 | \n",
131 | " 13.0281 | \n",
132 | " 45.187783 | \n",
133 | " 30.048073 | \n",
134 | "
\n",
135 | " \n",
136 | " 296342 | \n",
137 | " 5199 | \n",
138 | " VR4F | \n",
139 | " 2003-04-29 03:00:00 | \n",
140 | " 0.008518 | \n",
141 | " 0.550976 | \n",
142 | " 0.011823 | \n",
143 | " 13.0281 | \n",
144 | " 45.187783 | \n",
145 | " 30.548073 | \n",
146 | "
\n",
147 | " \n",
148 | "
\n",
149 | "
"
150 | ],
151 | "text/plain": [
152 | " profile station t un vn wn \\\n",
153 | "296338 5199 VR4F 2003-04-29 03:00:00 0.061417 0.394078 0.017406 \n",
154 | "296339 5199 VR4F 2003-04-29 03:00:00 -0.044268 0.653439 0.003686 \n",
155 | "296340 5199 VR4F 2003-04-29 03:00:00 0.054443 0.386804 0.004221 \n",
156 | "296341 5199 VR4F 2003-04-29 03:00:00 0.098836 0.529064 -0.011401 \n",
157 | "296342 5199 VR4F 2003-04-29 03:00:00 0.008518 0.550976 0.011823 \n",
158 | "\n",
159 | " x y z \n",
160 | "296338 13.0281 45.187783 28.548073 \n",
161 | "296339 13.0281 45.187783 29.048073 \n",
162 | "296340 13.0281 45.187783 29.548073 \n",
163 | "296341 13.0281 45.187783 30.048073 \n",
164 | "296342 13.0281 45.187783 30.548073 "
165 | ]
166 | },
167 | "execution_count": 3,
168 | "metadata": {},
169 | "output_type": "execute_result"
170 | }
171 | ],
172 | "source": [
173 | "times = [dt.datetime(2002,1,1,0,0,0) + dt.timedelta(a) for a in d['timen'].flatten()]\n",
174 | "depths = d['mdepth'].flatten()\n",
175 | "\n",
176 | "# Repeat each time for the number of depths\n",
177 | "t = np.repeat(times, len(depths))\n",
178 | "\n",
179 | "# Create a profile index, and repeat for number of depths\n",
180 | "profile = np.repeat(np.array(range(len(times)), dtype=np.int32) + 1, len(depths))\n",
181 | "\n",
182 | "# Tile the depths for each time\n",
183 | "z = np.tile(depths, len(times))\n",
184 | "\n",
185 | "df = pd.DataFrame({\n",
186 | " 't': t,\n",
187 | " 'x': 13.0281,\n",
188 | " 'y': 45.187783,\n",
189 | " 'z': z,\n",
190 | " 'un': d['un'].T.flatten()/10., # cm/s to m/s\n",
191 | " 'vn': d['vn'].T.flatten()/10., # cm/s to m/s\n",
192 | " 'wn': d['wn'].T.flatten()/10., # cm/s to m/s\n",
193 | " 'profile': profile,\n",
194 | " 'station': 'VR4F'\n",
195 | "})\n",
196 | "\n",
197 | "df.tail()"
198 | ]
199 | },
200 | {
201 | "cell_type": "code",
202 | "execution_count": 4,
203 | "metadata": {
204 | "collapsed": true
205 | },
206 | "outputs": [],
207 | "source": [
208 | "atts={\n",
209 | " 'global': {\n",
210 | " 'title': 'ADRIA02 Mooring VR4',\n",
211 | " 'summary': 'Data from bottom-mounted ADCP',\n",
212 | " 'institution': 'NRL',\n",
213 | " 'cdm_timeseries_variables': 'station',\n",
214 | " 'cdm_profile_variables': 'profile',\n",
215 | " 'subsetVariables': 'depth'\n",
216 | " },\n",
217 | " 'longitude': {\n",
218 | " 'units': 'degrees_east',\n",
219 | " 'standard_name':'longitude'\n",
220 | " },\n",
221 | " 'latitude': {\n",
222 | " 'units': 'degrees_north',\n",
223 | " 'standard_name':'latitude'\n",
224 | " },\n",
225 | " 'z': {\n",
226 | " 'units': 'm',\n",
227 | " 'standard_name': 'depth',\n",
228 | " 'positive':'down'\n",
229 | " },\n",
230 | " 'un': {\n",
231 | " 'units': 'm/s',\n",
232 | " 'standard_name':'eastward_sea_water_velocity'\n",
233 | " },\n",
234 | " 'vn': {\n",
235 | " 'units': 'm/s',\n",
236 | " 'standard_name':'northward_sea_water_velocity'\n",
237 | " },\n",
238 | " 'profile': {\n",
239 | " 'cf_role': 'profile_id'\n",
240 | " }\n",
241 | " }"
242 | ]
243 | },
244 | {
245 | "cell_type": "code",
246 | "execution_count": 5,
247 | "metadata": {},
248 | "outputs": [
249 | {
250 | "data": {
251 | "text/plain": [
252 | "{'cdm_profile_variables': 'profile',\n",
253 | " 'cdm_timeseries_variables': 'station',\n",
254 | " 'institution': 'NRL',\n",
255 | " 'subsetVariables': 'depth',\n",
256 | " 'summary': 'Data from bottom-mounted ADCP',\n",
257 | " 'title': 'ADRIA02 Mooring VR4'}"
258 | ]
259 | },
260 | "execution_count": 5,
261 | "metadata": {},
262 | "output_type": "execute_result"
263 | }
264 | ],
265 | "source": [
266 | "atts['global']"
267 | ]
268 | },
269 | {
270 | "cell_type": "code",
271 | "execution_count": 6,
272 | "metadata": {
273 | "scrolled": true
274 | },
275 | "outputs": [
276 | {
277 | "name": "stderr",
278 | "output_type": "stream",
279 | "text": [
280 | "/home/richard.signell/miniconda3/envs/IOOS3/lib/python3.6/site-packages/pocean/dsg/timeseriesProfile/om.py:82: UserWarning: endian-ness of dtype and endian kwarg do not match, using endian kwarg\n",
281 | " latitude = nc.createVariable('latitude', get_dtype(df.y), ('station',))\n",
282 | "/home/richard.signell/miniconda3/envs/IOOS3/lib/python3.6/site-packages/pocean/dsg/timeseriesProfile/om.py:83: UserWarning: endian-ness of dtype and endian kwarg do not match, using endian kwarg\n",
283 | " longitude = nc.createVariable('longitude', get_dtype(df.x), ('station',))\n",
284 | "/home/richard.signell/miniconda3/envs/IOOS3/lib/python3.6/site-packages/pocean/dsg/timeseriesProfile/om.py:84: UserWarning: endian-ness of dtype and endian kwarg do not match, using endian kwarg\n",
285 | " z = nc.createVariable('z', get_dtype(df.z), ('z',))\n",
286 | "/home/richard.signell/miniconda3/envs/IOOS3/lib/python3.6/site-packages/pocean/dsg/timeseriesProfile/om.py:108: UserWarning: endian-ness of dtype and endian kwarg do not match, using endian kwarg\n",
287 | " v = nc.createVariable(var_name, get_dtype(sdf[c]), ('time', 'z', 'station'), fill_value=sdf[c].dtype.type(cls.default_fill_value))\n"
288 | ]
289 | },
290 | {
291 | "data": {
292 | "text/plain": [
293 | "\n",
294 | "root group (NETCDF4 data model, file format HDF5):\n",
295 | " Conventions: CF-1.6\n",
296 | " date_created: 2017-06-21T12:19:00Z\n",
297 | " featureType: timeseriesProfile\n",
298 | " cdm_data_type: TimeseriesProfile\n",
299 | " title: ADRIA02 Mooring VR4\n",
300 | " summary: Data from bottom-mounted ADCP\n",
301 | " institution: NRL\n",
302 | " cdm_timeseries_variables: station\n",
303 | " cdm_profile_variables: profile\n",
304 | " subsetVariables: depth\n",
305 | " dimensions(sizes): station(1), time(5199), z(57)\n",
306 | " variables(dimensions): int32 \u001b[4mcrs\u001b[0m(), \u001b[4mstation\u001b[0m(station), float64 \u001b[4mtime\u001b[0m(time), float64 \u001b[4mlatitude\u001b[0m(station), float64 \u001b[4mlongitude\u001b[0m(station), float64 \u001b[4mz\u001b[0m(z), int32 \u001b[4mprofile\u001b[0m(time,z,station), float64 \u001b[4mun\u001b[0m(time,z,station), float64 \u001b[4mvn\u001b[0m(time,z,station), float64 \u001b[4mwn\u001b[0m(time,z,station)\n",
307 | " groups: "
308 | ]
309 | },
310 | "execution_count": 6,
311 | "metadata": {},
312 | "output_type": "execute_result"
313 | }
314 | ],
315 | "source": [
316 | "OrthogonalMultidimensionalTimeseriesProfile.from_dataframe(df, output='/data/ADRIA/MOORINGS/NRL/vr4f.nc', \n",
317 | " attributes=atts)"
318 | ]
319 | }
320 | ],
321 | "metadata": {
322 | "_draft": {
323 | "nbviewer_url": "https://gist.github.com/b2f37b7724981e80e48bd59311ac9a58"
324 | },
325 | "gist": {
326 | "data": {
327 | "description": "erddap/adcp.ipynb",
328 | "public": true
329 | },
330 | "id": "b2f37b7724981e80e48bd59311ac9a58"
331 | },
332 | "kernelspec": {
333 | "display_name": "Python [conda env:IOOS3]",
334 | "language": "python",
335 | "name": "conda-env-IOOS3-py"
336 | },
337 | "language_info": {
338 | "codemirror_mode": {
339 | "name": "ipython",
340 | "version": 3
341 | },
342 | "file_extension": ".py",
343 | "mimetype": "text/x-python",
344 | "name": "python",
345 | "nbconvert_exporter": "python",
346 | "pygments_lexer": "ipython3",
347 | "version": "3.6.1"
348 | }
349 | },
350 | "nbformat": 4,
351 | "nbformat_minor": 2
352 | }
353 |
--------------------------------------------------------------------------------
/docs/notebooks/full.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/docs/notebooks/full.nc
--------------------------------------------------------------------------------
/docs/notebooks/hello.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/docs/notebooks/hello.nc
--------------------------------------------------------------------------------
/docs/notebooks/imp.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/docs/notebooks/imp.nc
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 |
--------------------------------------------------------------------------------
/pocean/__init__.py:
--------------------------------------------------------------------------------
1 | #!python
2 |
3 | # Package level logger
4 | import logging
5 |
6 | logger = logging.getLogger("pocean")
7 | logger.addHandler(logging.NullHandler())
8 |
9 | try:
10 | from ._version import __version__
11 | except ImportError:
12 | __version__ = "unknown"
13 |
--------------------------------------------------------------------------------
/pocean/cf.py:
--------------------------------------------------------------------------------
1 | #!python
2 | import datetime
3 | import itertools
4 | import os
5 | import re
6 |
7 | from . import logger
8 | from .dataset import EnhancedDataset
9 | from .utils import all_subclasses, is_url
10 |
11 | datetime.UTC = datetime.timezone.utc
12 |
13 |
14 | class CFDataset(EnhancedDataset):
15 | default_fill_value = False
16 | default_time_unit = "seconds since 1990-01-01 00:00:00Z"
17 |
18 | @classmethod
19 | def load(cls, path):
20 | """Attempt to load a netCDF file as a CF compatible dataset
21 |
22 | Extended description of function.
23 |
24 | Parameters
25 | ----------
26 | path :
27 | Path to netCDF file
28 |
29 | Returns
30 | -------
31 | CFDataset subclass for your netCDF file
32 |
33 | Raises
34 | ------
35 | ValueError:
36 | If no suitable class is found for your dataset
37 |
38 | """
39 |
40 | if not is_url(path):
41 | path = os.path.realpath(path)
42 |
43 | subs = list(all_subclasses(cls))
44 |
45 | dsg = None
46 | try:
47 | dsg = cls(path)
48 | for klass in subs:
49 | logger.debug(f"Trying {klass.__name__}...")
50 | if hasattr(klass, "is_mine"):
51 | if klass.is_mine(dsg):
52 | return klass(path)
53 | except OSError:
54 | raise
55 | finally:
56 | if hasattr(dsg, "close"):
57 | dsg.close()
58 |
59 | subnames = ", ".join([s.__name__ for s in subs])
60 | raise ValueError(f"Could not open {path} as any type of CF Dataset. Tried: {subnames}.")
61 |
62 | def axes(self, name):
63 | return getattr(self, f"{name.lower()}_axes")()
64 |
65 | def t_axes(self):
66 | # If there is only one variable with the axis parameter, return it
67 | hasaxis = self.filter_by_attrs(axis=lambda x: x and str(x).lower() == "t")
68 | if len(hasaxis) == 1:
69 | return hasaxis
70 |
71 | tvars = list(
72 | set(
73 | itertools.chain(
74 | hasaxis,
75 | self.filter_by_attrs(
76 | standard_name=lambda x: x in ["time", "forecast_reference_time"]
77 | ),
78 | )
79 | )
80 | )
81 | return tvars
82 |
83 | def x_axes(self):
84 | """
85 | CF X axis will have one of the following:
86 | * The `axis` property has the value ``'X'``
87 | * Units of longitude (see `cf.Units.islongitude` for details)
88 | * The `standard_name` property is one of ``'longitude'``,
89 | ``'projection_x_coordinate'`` or ``'grid_longitude'``
90 | """
91 | xnames = ["longitude", "grid_longitude", "projection_x_coordinate"]
92 | xunits = ["degrees_east", "degree_east", "degree_E", "degrees_E", "degreeE", "degreesE"]
93 |
94 | # If there is only one variable with the axis parameter, return it
95 | hasaxis = self.filter_by_attrs(axis=lambda x: x and str(x).lower() == "x")
96 | if len(hasaxis) == 1:
97 | return hasaxis
98 |
99 | xvars = list(
100 | set(
101 | itertools.chain(
102 | hasaxis,
103 | self.filter_by_attrs(standard_name=lambda x: x and str(x).lower() in xnames),
104 | self.filter_by_attrs(units=lambda x: x and str(x).lower() in xunits),
105 | )
106 | )
107 | )
108 | return xvars
109 |
110 | def y_axes(self):
111 | ynames = ["latitude", "grid_latitude", "projection_y_coordinate"]
112 | yunits = ["degrees_north", "degree_north", "degree_N", "degrees_N", "degreeN", "degreesN"]
113 |
114 | # If there is only one variable with the axis parameter, return it
115 | hasaxis = self.filter_by_attrs(axis=lambda x: x and str(x).lower() == "y")
116 | if len(hasaxis) == 1:
117 | return hasaxis
118 |
119 | yvars = list(
120 | set(
121 | itertools.chain(
122 | hasaxis,
123 | self.filter_by_attrs(standard_name=lambda x: x and str(x).lower() in ynames),
124 | self.filter_by_attrs(units=lambda x: x and str(x).lower() in yunits),
125 | )
126 | )
127 | )
128 | return yvars
129 |
130 | def z_axes(self):
131 | znames = [
132 | "atmosphere_ln_pressure_coordinate",
133 | "atmosphere_sigma_coordinate",
134 | "atmosphere_hybrid_sigma_pressure_coordinate",
135 | "atmosphere_hybrid_height_coordinate",
136 | "atmosphere_sleve_coordinate",
137 | "ocean_sigma_coordinate",
138 | "ocean_s_coordinate",
139 | "ocean_s_coordinate_g1",
140 | "ocean_s_coordinate_g2",
141 | "ocean_sigma_z_coordinate",
142 | "ocean_double_sigma_coordinate",
143 | ]
144 |
145 | # If there is only one variable with the axis parameter, return it
146 | hasaxis = self.filter_by_attrs(axis=lambda x: x and str(x).lower() == "z")
147 | if len(hasaxis) == 1:
148 | return hasaxis
149 |
150 | zvars = list(
151 | set(
152 | itertools.chain(
153 | hasaxis,
154 | self.filter_by_attrs(positive=lambda x: x and str(x).lower() in ["up", "down"]),
155 | self.filter_by_attrs(standard_name=lambda x: x and str(x).lower() in znames),
156 | )
157 | )
158 | )
159 | return zvars
160 |
161 | def is_valid(self, *args, **kwargs):
162 | return self.__class__.is_mine(self, *args, **kwargs)
163 |
164 | def data_vars(self):
165 | return self.filter_by_attrs(
166 | coordinates=lambda x: x is not None,
167 | units=lambda x: x is not None,
168 | standard_name=lambda x: x is not None,
169 | flag_values=lambda x: x is None,
170 | flag_masks=lambda x: x is None,
171 | flag_meanings=lambda x: x is None,
172 | )
173 |
174 | def ancillary_vars(self):
175 | ancillary_variables = []
176 | for rv in self.filter_by_attrs(ancillary_variables=lambda x: x is not None):
177 | # Space separated ancillary variables
178 | for av in rv.ancillary_variables.split(" "):
179 | if av in self.variables:
180 | ancillary_variables.append(self.variables[av])
181 | return list(set(ancillary_variables))
182 |
183 | def nc_attributes(self):
184 | return {
185 | "global": {
186 | "Conventions": "CF-1.6",
187 | "date_created": datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%dT%H:%M:00Z"),
188 | }
189 | }
190 |
191 |
192 | def cf_safe_name(name):
193 | if isinstance(name, str):
194 | if re.match("^[0-9_]", name):
195 | # Add a letter to the front
196 | name = f"v_{name}"
197 | return re.sub(r"[^_a-zA-Z0-9]", "_", name)
198 |
199 | raise ValueError(f'Could not convert "{name}" to a safe name')
200 |
--------------------------------------------------------------------------------
/pocean/dataset.py:
--------------------------------------------------------------------------------
1 | #!python
2 | import warnings
3 | from collections import OrderedDict
4 |
5 | import numpy as np
6 | import simplejson as json
7 | from netCDF4 import Dataset
8 |
9 | from . import logger as L
10 | from .meta import (
11 | MetaInterface,
12 | ncpyattributes,
13 | string_to_dtype,
14 | untype_attributes,
15 | )
16 | from .utils import (
17 | generic_masked,
18 | JSONEncoder,
19 | safe_attribute_typing,
20 | safe_issubdtype,
21 | )
22 |
23 | # Attribute that need to be of the same type as the variables
24 | _TYPE_SENSITIVE_ATTRIBUTES = [
25 | "_FillValue",
26 | "missing_value",
27 | "valid_min",
28 | "valid_max",
29 | "valid_range",
30 | "display_min",
31 | "display_max",
32 | "display_range",
33 | "colorBarMinimum",
34 | "colorBarMaximum",
35 | ]
36 |
37 |
38 | class EnhancedDataset(Dataset):
39 | def __del__(self):
40 | try:
41 | self.close()
42 | except RuntimeError:
43 | pass
44 |
45 | def close(self):
46 | if not self.isopen():
47 | return
48 |
49 | super().close()
50 |
51 | def vatts(self, vname):
52 | d = {}
53 | var = self.variables[vname]
54 | for k in var.ncattrs():
55 | d[k] = var.getncattr(k)
56 | return d
57 |
58 | def filter_by_attrs(self, *args, **kwargs):
59 | return self.get_variables_by_attributes(*args, **kwargs)
60 |
61 | def __apply_meta_interface__(self, meta, **kwargs):
62 | warnings.warn(
63 | "`__apply_meta_interface__` is deprecated. Use `apply_meta()` instead",
64 | DeprecationWarning,
65 | )
66 | return self.apply_meta(meta, **kwargs)
67 |
68 | def __getattr__(self, name):
69 | if name in ["__meta_interface__", "_meta"]:
70 | warnings.warn(
71 | "`__meta_interface__` and `_meta` are deprecated. Use `meta()` instead",
72 | DeprecationWarning,
73 | )
74 | return self.meta()
75 | else:
76 | return super().__getattr__(name)
77 |
78 | def apply_meta(self, *args, **kwargs):
79 | """Shortcut to the JSON object without writing any data"""
80 | kwargs["create_data"] = False
81 | return self.apply_json(*args, **kwargs)
82 |
83 | def meta(self, *args, **kwargs):
84 | """Shortcut to the JSON object without any data"""
85 | kwargs["return_data"] = False
86 | return self.json(*args, **kwargs)
87 |
88 | def json(self, return_data=True, fill_data=True):
89 | ds = OrderedDict()
90 | vs = OrderedDict()
91 | gs = ncpyattributes({ga: self.getncattr(ga) for ga in self.ncattrs()})
92 |
93 | # Dimensions
94 | for dname, dim in self.dimensions.items():
95 | if dim.isunlimited():
96 | ds[dname] = None
97 | else:
98 | ds[dname] = dim.size
99 |
100 | # Variables
101 | for k, v in self.variables.items():
102 | typed = v.dtype
103 | if isinstance(typed, np.dtype):
104 | typed = str(typed.name)
105 | elif isinstance(typed, type):
106 | typed = typed.__name__
107 |
108 | vattrs = {va: v.getncattr(va) for va in v.ncattrs()}
109 | vardict = {"attributes": ncpyattributes(vattrs), "shape": v.dimensions, "type": typed}
110 | if return_data is True:
111 | vdata = generic_masked(v[:], attrs=vattrs)
112 | if fill_data is True:
113 | vdata = vdata.filled()
114 | vardict["data"] = vdata.tolist()
115 |
116 | vs[k] = vardict
117 |
118 | return MetaInterface(dimensions=ds, variables=vs, attributes=gs)
119 |
120 | def apply_json(self, meta, create_vars=True, create_dims=True, create_data=True):
121 | """Apply a meta interface object to a netCDF4 compatible object"""
122 | ds = meta.get("dimensions", OrderedDict())
123 | gs = meta.get("attributes", OrderedDict())
124 | vs = meta.get("variables", OrderedDict())
125 |
126 | # Dimensions
127 | for dname, dsize in ds.items():
128 | # Ignore dimension sizes less than 0
129 | if dsize and dsize < 0:
130 | continue
131 | if dname not in self.dimensions:
132 | # Don't create new dimensions
133 | if create_dims is False:
134 | continue
135 |
136 | self.createDimension(dname, size=dsize)
137 | else:
138 | dfilesize = self.dimensions[dname].size
139 | if dfilesize != dsize:
140 | L.warning(
141 | "Not changing size of dimension {}. file: {}, meta: {}".format(
142 | dname, dfilesize, dsize
143 | )
144 | )
145 |
146 | # Global attributes
147 | typed_gs = untype_attributes(gs)
148 | self.setncatts(typed_gs)
149 |
150 | # Variables
151 | for vname, vvalue in vs.items():
152 | vatts = untype_attributes(vvalue.get("attributes", {}))
153 |
154 | if vname not in self.variables:
155 | # Don't create new variables
156 | if create_vars is False:
157 | continue
158 |
159 | if "shape" not in vvalue and "type" not in vvalue:
160 | L.debug(f"Skipping {vname} creation, no shape or no type defined")
161 | continue
162 | shape = vvalue.get("shape", []) # Dimension names
163 | vardtype = string_to_dtype(vvalue.get("type"))
164 |
165 | if safe_issubdtype(vardtype, np.floating):
166 | defaultfill = vardtype.type(np.nan) # We can use `nan` for floats
167 | elif vardtype.kind in ["U", "S"]:
168 | defaultfill = None # No fillvalue on VLENs
169 | else:
170 | # Use a masked value which evaluates to different things depending on the dtype
171 | # For integers is resolves to `0`.
172 | defaultfill = vardtype.type(np.ma.masked)
173 |
174 | fillmiss = vatts.get("_FillValue", vatts.get("missing_value", defaultfill))
175 | newvar = self.createVariable(vname, vardtype, dimensions=shape, fill_value=fillmiss)
176 | else:
177 | newvar = self.variables[vname]
178 |
179 | # Now assign the data if is exists
180 | if create_data is True and "data" in vvalue:
181 | # Because the JSON format can be flattened already we are just
182 | # going to always reshape the data to the variable shape
183 | data = generic_masked(
184 | np.array(vvalue["data"], dtype=newvar.dtype).flatten()
185 | ).reshape(newvar.shape)
186 | newvar[:] = data
187 |
188 | # Don't re-assign fill value attributes
189 | if "_FillValue" in vatts:
190 | del vatts["_FillValue"]
191 | if "missing_value" in vatts:
192 | del vatts["missing_value"]
193 |
194 | # Convert any attribute that need to match the variables dtype to that dtype
195 | for sattr in _TYPE_SENSITIVE_ATTRIBUTES:
196 | if sattr in vatts:
197 | vatts[sattr] = safe_attribute_typing(newvar.dtype, vatts[sattr])
198 |
199 | newvar.setncatts(vatts)
200 |
201 | def to_json(self, *args, **kwargs):
202 | return json.dumps(self.to_dict(), *args, **kwargs)
203 |
204 | def json_attributes(self, vfuncs=None):
205 | """
206 | vfuncs can be any callable that accepts a single argument, the
207 | Variable object, and returns a dictionary of new attributes to
208 | set. These will overwrite existing attributes
209 | """
210 |
211 | vfuncs = vfuncs or []
212 |
213 | js = {"global": {}}
214 |
215 | for k in self.ncattrs():
216 | js["global"][k] = self.getncattr(k)
217 |
218 | for varname, var in self.variables.items():
219 | js[varname] = {}
220 | for k in var.ncattrs():
221 | z = var.getncattr(k)
222 | try:
223 | assert not np.isnan(z).all()
224 | js[varname][k] = z
225 | except AssertionError:
226 | js[varname][k] = None
227 | except TypeError:
228 | js[varname][k] = z
229 |
230 | for vf in vfuncs:
231 | try:
232 | js[varname].update(vfuncs(var))
233 | except BaseException:
234 | L.exception("Could not apply custom variable attribute function")
235 |
236 | return json.loads(json.dumps(js, cls=JSONEncoder))
237 |
238 | def update_attributes(self, attributes):
239 | for k, v in attributes.pop("global", {}).items():
240 | try:
241 | self.setncattr(k, v)
242 | except BaseException:
243 | L.warning(f"Could not set global attribute {k}: {v}")
244 |
245 | for k, v in attributes.items():
246 | if k in self.variables:
247 | for n, z in v.items():
248 | # Don't re-assign fill value attributes
249 | if n in ["_FillValue", "missing_value"]:
250 | L.warning(f"Refusing to set {n} on {k}")
251 | continue
252 |
253 | try:
254 | self.variables[k].setncattr(n, z)
255 | except BaseException:
256 | L.warning(f"Could not set attribute {n} on {k}")
257 | self.sync()
258 |
--------------------------------------------------------------------------------
/pocean/dsg/__init__.py:
--------------------------------------------------------------------------------
1 | #!python
2 |
3 | # Profile
4 | from .profile.im import IncompleteMultidimensionalProfile
5 | from .profile.om import OrthogonalMultidimensionalProfile
6 |
7 | # Timeseries
8 | from .timeseries.cr import ContiguousRaggedTimeseries
9 | from .timeseries.im import IncompleteMultidimensionalTimeseries
10 | from .timeseries.ir import IndexedRaggedTimeseries
11 | from .timeseries.om import OrthogonalMultidimensionalTimeseries
12 | from .timeseriesProfile.im import IncompleteMultidimensionalTimeseriesProfile
13 | from .timeseriesProfile.om import OrthogonalMultidimensionalTimeseriesProfile
14 |
15 | # TimeseriesProfile
16 | from .timeseriesProfile.r import RaggedTimeseriesProfile
17 |
18 | # Trajectory
19 | from .trajectory.cr import ContiguousRaggedTrajectory
20 | from .trajectory.im import IncompleteMultidimensionalTrajectory
21 | from .trajectory.ir import IndexedRaggedTrajectory
22 |
23 | # TrajectoryProfile
24 | from .trajectoryProfile.cr import ContiguousRaggedTrajectoryProfile
25 |
26 | # Attribute Utilities
27 | from .utils import (
28 | get_calculated_attributes,
29 | get_creation_attributes,
30 | get_geographic_attributes,
31 | get_temporal_attributes,
32 | get_vertical_attributes,
33 | )
34 |
35 | __all__ = [
36 | "IncompleteMultidimensionalProfile",
37 | "OrthogonalMultidimensionalProfile",
38 | "ContiguousRaggedTrajectory",
39 | "IndexedRaggedTrajectory",
40 | "IncompleteMultidimensionalTrajectory",
41 | "ContiguousRaggedTrajectoryProfile",
42 | "ContiguousRaggedTimeseries",
43 | "IndexedRaggedTimeseries",
44 | "IncompleteMultidimensionalTimeseries",
45 | "OrthogonalMultidimensionalTimeseries",
46 | "RaggedTimeseriesProfile",
47 | "IncompleteMultidimensionalTimeseriesProfile",
48 | "OrthogonalMultidimensionalTimeseriesProfile",
49 | "get_geographic_attributes",
50 | "get_vertical_attributes",
51 | "get_temporal_attributes",
52 | "get_creation_attributes",
53 | "get_calculated_attributes",
54 | ]
55 |
--------------------------------------------------------------------------------
/pocean/dsg/profile/__init__.py:
--------------------------------------------------------------------------------
1 | #!python
2 | from collections import namedtuple
3 |
4 | from shapely.geometry import LineString, Point
5 |
6 | from pocean.utils import logger as L # noqa
7 | from pocean.utils import (
8 | unique_justseen,
9 | )
10 |
11 | profile_meta = namedtuple("Profile", ["min_z", "max_z", "t", "x", "y", "id", "geometry"])
12 | profiles_meta = namedtuple(
13 | "ProfileCollection", ["min_z", "max_z", "min_t", "max_t", "profiles", "geometry"]
14 | )
15 |
16 |
17 | def profile_calculated_metadata(df, axes, geometries=True):
18 | profiles = {}
19 | for pid, pgroup in df.groupby(axes.profile):
20 | pgroup = pgroup.sort_values(axes.t)
21 | first_row = pgroup.iloc[0]
22 | profiles[pid] = profile_meta(
23 | min_z=pgroup[axes.z].min(),
24 | max_z=pgroup[axes.z].max(),
25 | t=first_row[axes.t],
26 | x=first_row[axes.x],
27 | y=first_row[axes.y],
28 | id=pid,
29 | geometry=Point(first_row[axes.x], first_row[axes.y]),
30 | )
31 |
32 | if geometries:
33 | null_coordinates = df[axes.x].isnull() | df[axes.y].isnull()
34 | coords = list(
35 | unique_justseen(
36 | zip(
37 | df.loc[~null_coordinates, axes.x].tolist(),
38 | df.loc[~null_coordinates, axes.y].tolist(),
39 | )
40 | )
41 | )
42 | else:
43 | # Calculate the geometry as the linestring between all of the profile points
44 | coords = [p.geometry for _, p in profiles.items()]
45 |
46 | geometry = None
47 | if len(coords) > 1:
48 | geometry = LineString(coords)
49 | elif len(coords) == 1:
50 | geometry = Point(coords[0])
51 |
52 | return profiles_meta(
53 | min_z=df[axes.z].min(),
54 | max_z=df[axes.z].max(),
55 | min_t=df[axes.t].min(),
56 | max_t=df[axes.t].max(),
57 | profiles=profiles,
58 | geometry=geometry,
59 | )
60 |
--------------------------------------------------------------------------------
/pocean/dsg/profile/im.py:
--------------------------------------------------------------------------------
1 | #!python
2 | from collections import OrderedDict
3 | from copy import copy
4 |
5 | import numpy as np
6 | import pandas as pd
7 | from cftime import date2num
8 |
9 | from pocean import logger as L # noqa
10 | from pocean.cf import cf_safe_name, CFDataset
11 | from pocean.dsg.profile import profile_calculated_metadata
12 | from pocean.utils import (
13 | create_ncvar_from_series,
14 | dict_update,
15 | downcast_dataframe,
16 | generic_masked,
17 | get_default_axes,
18 | get_dtype,
19 | get_mapped_axes_variables,
20 | get_masked_datetime_array,
21 | get_ncdata_from_series,
22 | nativize_times,
23 | normalize_countable_array,
24 | )
25 |
26 |
27 | class IncompleteMultidimensionalProfile(CFDataset):
28 | """
29 | If there are the same number of levels in each profile, but they do not
30 | have the same set of vertical coordinates, one can use the incomplete
31 | multidimensional array representation, which the vertical coordinate
32 | variable is two-dimensional e.g. replacing z(z) in Example H.8,
33 | "Atmospheric sounding profiles for a common set of vertical coordinates
34 | stored in the orthogonal multidimensional array representation." with
35 | alt(profile,z). This representation also allows one to have a variable
36 | number of elements in different profiles, at the cost of some wasted space.
37 | In that case, any unused elements of the data and auxiliary coordinate
38 | variables must contain missing data values (section 9.6).
39 | """
40 |
41 | @classmethod
42 | def is_mine(cls, dsg, strict=False):
43 | try:
44 | pvars = dsg.filter_by_attrs(cf_role="profile_id")
45 | assert len(pvars) == 1
46 | assert dsg.featureType.lower() == "profile"
47 | assert len(dsg.t_axes()) >= 1
48 | assert len(dsg.x_axes()) >= 1
49 | assert len(dsg.y_axes()) >= 1
50 | assert len(dsg.z_axes()) >= 1
51 |
52 | # Allow for string variables
53 | pvar = pvars[0]
54 | # 0 = single
55 | # 1 = array of strings/ints/bytes/etc
56 | # 2 = array of character arrays
57 | assert 0 <= len(pvar.dimensions) <= 2
58 |
59 | t = dsg.t_axes()[0]
60 | x = dsg.x_axes()[0]
61 | y = dsg.y_axes()[0]
62 | z = dsg.z_axes()[0]
63 | assert len(z.dimensions) == 2
64 |
65 | assert t.size == pvar.size
66 | assert x.size == pvar.size
67 | assert y.size == pvar.size
68 | p_dim = dsg.dimensions[pvar.dimensions[0]]
69 | z_dim = dsg.dimensions[[d for d in z.dimensions if d != p_dim.name][0]]
70 | for dv in dsg.data_vars():
71 | assert len(dv.dimensions) in [1, 2] # dimensioned by profile or profile, z
72 | assert z_dim.name in dv.dimensions or p_dim.name in dv.dimensions
73 | assert dv.size in [z_dim.size, p_dim.size, z_dim.size * p_dim.size]
74 |
75 | except BaseException:
76 | if strict is True:
77 | raise
78 | return False
79 |
80 | return True
81 |
82 | @classmethod
83 | def from_dataframe(cls, df, output, **kwargs):
84 | axes = get_default_axes(kwargs.pop("axes", {}))
85 | daxes = axes
86 | data_columns = [d for d in df.columns if d not in axes]
87 |
88 | unlimited = kwargs.pop("unlimited", False)
89 |
90 | unique_dims = kwargs.pop("unique_dims", False)
91 | if unique_dims is True:
92 | # Rename the dimension to avoid a dimension and coordinate having the same name
93 | # which is not support in xarray
94 | changed_axes = {k: f"{v}_dim" for k, v in axes._asdict().items()}
95 | daxes = get_default_axes(changed_axes)
96 |
97 | # Downcast anything from int64 to int32
98 | # Convert any timezone aware datetimes to native UTC times
99 | df = downcast_dataframe(nativize_times(df))
100 |
101 | with IncompleteMultidimensionalProfile(output, "w") as nc:
102 | profile_group = df.groupby(axes.profile)
103 |
104 | if unlimited is True:
105 | max_profiles = None
106 | else:
107 | max_profiles = df[axes.profile].unique().size
108 | nc.createDimension(daxes.profile, max_profiles)
109 |
110 | max_zs = profile_group.size().max()
111 | nc.createDimension(daxes.z, max_zs)
112 |
113 | # Metadata variables
114 | nc.createVariable("crs", "i4")
115 |
116 | profile = nc.createVariable(axes.profile, get_dtype(df[axes.profile]), (daxes.profile,))
117 |
118 | # Create all of the variables
119 | time = nc.createVariable(axes.t, "f8", (daxes.profile,))
120 | latitude = nc.createVariable(axes.y, get_dtype(df[axes.y]), (daxes.profile,))
121 | longitude = nc.createVariable(axes.x, get_dtype(df[axes.x]), (daxes.profile,))
122 | z = nc.createVariable(
123 | axes.z,
124 | get_dtype(df[axes.z]),
125 | (daxes.profile, daxes.z),
126 | fill_value=df[axes.z].dtype.type(cls.default_fill_value),
127 | )
128 |
129 | attributes = dict_update(nc.nc_attributes(axes, daxes), kwargs.pop("attributes", {}))
130 |
131 | # Create vars based on full dataframe (to get all variables)
132 | for c in data_columns:
133 | var_name = cf_safe_name(c)
134 | if var_name not in nc.variables:
135 | v = create_ncvar_from_series(
136 | nc,
137 | var_name,
138 | (daxes.profile, daxes.z),
139 | df[c],
140 | )
141 | attributes[var_name] = dict_update(
142 | attributes.get(var_name, {}),
143 | {"coordinates": f"{axes.t} {axes.z} {axes.x} {axes.y}"},
144 | )
145 |
146 | # Write values for each profile within profile_group
147 | for i, (uid, pdf) in enumerate(profile_group):
148 | profile[i] = uid
149 |
150 | time[i] = date2num(pdf[axes.t].iloc[0], units=cls.default_time_unit)
151 | latitude[i] = pdf[axes.y].iloc[0]
152 | longitude[i] = pdf[axes.x].iloc[0]
153 |
154 | zvalues = pdf[axes.z].fillna(z._FillValue).values
155 | sl = slice(0, zvalues.size)
156 | z[i, sl] = zvalues
157 |
158 | for c in data_columns:
159 | var_name = cf_safe_name(c)
160 | v = nc.variables[var_name]
161 |
162 | vvalues = get_ncdata_from_series(pdf[c], v)
163 |
164 | sl = slice(0, vvalues.size)
165 | v[i, sl] = vvalues
166 |
167 | # Set global attributes
168 | nc.update_attributes(attributes)
169 |
170 | return IncompleteMultidimensionalProfile(output, **kwargs)
171 |
172 | def calculated_metadata(
173 | self, df=None, geometries=True, clean_cols=True, clean_rows=True, **kwargs
174 | ):
175 | axes = get_default_axes(kwargs.pop("axes", {}))
176 | if df is None:
177 | df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows, axes=axes)
178 | return profile_calculated_metadata(df, axes, geometries)
179 |
180 | def to_dataframe(self, clean_cols=True, clean_rows=True, **kwargs):
181 | axes = get_default_axes(kwargs.pop("axes", {}))
182 |
183 | axv = get_mapped_axes_variables(self, axes)
184 |
185 | # Multiple profiles in the file
186 | pvar = axv.profile
187 | p_dim = self.dimensions[pvar.dimensions[0]]
188 |
189 | zvar = axv.z
190 | zs = len(self.dimensions[[d for d in zvar.dimensions if d != p_dim.name][0]])
191 |
192 | # Profiles
193 | p = normalize_countable_array(pvar)
194 | p = p.repeat(zs)
195 |
196 | # Z
197 | z = generic_masked(zvar[:].flatten(), attrs=self.vatts(zvar.name))
198 |
199 | # T
200 | tvar = axv.t
201 | t = tvar[:].repeat(zs)
202 | nt = get_masked_datetime_array(t, tvar).flatten()
203 |
204 | # X
205 | xvar = axv.x
206 | x = generic_masked(xvar[:].repeat(zs), attrs=self.vatts(xvar.name))
207 |
208 | # Y
209 | yvar = axv.y
210 | y = generic_masked(yvar[:].repeat(zs), attrs=self.vatts(yvar.name))
211 |
212 | df_data = OrderedDict(
213 | [(axes.t, nt), (axes.x, x), (axes.y, y), (axes.z, z), (axes.profile, p)]
214 | )
215 |
216 | building_index_to_drop = np.ones(t.size, dtype=bool)
217 |
218 | extract_vars = copy(self.variables)
219 | for ncvar in axv._asdict().values():
220 | if ncvar is not None and ncvar.name in extract_vars:
221 | del extract_vars[ncvar.name]
222 |
223 | for i, (dnam, dvar) in enumerate(extract_vars.items()):
224 | # Profile dimension
225 | if dvar.dimensions == pvar.dimensions:
226 | vdata = generic_masked(
227 | dvar[:].repeat(zs).astype(dvar.dtype), attrs=self.vatts(dnam)
228 | )
229 |
230 | # Profile, z dimension
231 | elif dvar.dimensions == zvar.dimensions:
232 | vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype), attrs=self.vatts(dnam))
233 |
234 | else:
235 | vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype), attrs=self.vatts(dnam))
236 | # Carry through size 1 variables
237 | if vdata.size == 1:
238 | if vdata[0] is np.ma.masked:
239 | L.warning(f"Skipping variable {dnam} that is completely masked")
240 | continue
241 | else:
242 | L.warning(f"Skipping variable {dnam} since it didn't match any dimension sizes")
243 | continue
244 |
245 | # Mark rows with data so we don't remove them with clear_rows
246 | if vdata.size == building_index_to_drop.size:
247 | building_index_to_drop = (building_index_to_drop == True) & (vdata.mask == True) # noqa
248 |
249 | # Handle scalars here at the end
250 | if vdata.size == 1:
251 | vdata = vdata[0]
252 |
253 | df_data[dnam] = vdata
254 |
255 | df = pd.DataFrame(df_data)
256 |
257 | # Drop all data columns with no data
258 | if clean_cols:
259 | df = df.dropna(axis=1, how="all")
260 |
261 | # Drop all data rows with no data variable data
262 | if clean_rows:
263 | df = df.iloc[~building_index_to_drop]
264 |
265 | return df
266 |
267 | def nc_attributes(self, axes, daxes):
268 | atts = super().nc_attributes()
269 | return dict_update(
270 | atts,
271 | {
272 | "global": {"featureType": "profile", "cdm_data_type": "Profile"},
273 | axes.profile: {"cf_role": "profile_id", "long_name": "profile identifier"},
274 | axes.x: {"axis": "X"},
275 | axes.y: {"axis": "Y"},
276 | axes.z: {"axis": "Z"},
277 | axes.t: {"units": self.default_time_unit, "standard_name": "time", "axis": "T"},
278 | },
279 | )
280 |
--------------------------------------------------------------------------------
/pocean/dsg/profile/om.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 | from copy import copy
3 |
4 | import numpy as np
5 | import pandas as pd
6 |
7 | from pocean import logger as L # noqa
8 | from pocean.cf import CFDataset
9 | from pocean.dsg.profile import profile_calculated_metadata
10 | from pocean.utils import (
11 | generic_masked,
12 | get_default_axes,
13 | get_mapped_axes_variables,
14 | get_masked_datetime_array,
15 | normalize_array,
16 | normalize_countable_array,
17 | )
18 |
19 |
20 | class OrthogonalMultidimensionalProfile(CFDataset):
21 | """
22 | If the profile instances have the same number of elements and the vertical
23 | coordinate values are identical for all instances, you may use the
24 | orthogonal multidimensional array representation. This has either a
25 | one-dimensional coordinate variable, z(z), provided the vertical coordinate
26 | values are ordered monotonically, or a one-dimensional auxiliary coordinate
27 | variable, alt(o), where o is the element dimension. In the former case,
28 | listing the vertical coordinate variable in the coordinates attributes of
29 | the data variables is optional.
30 | """
31 |
32 | @classmethod
33 | def is_mine(cls, dsg, strict=False):
34 | try:
35 | pvars = dsg.filter_by_attrs(cf_role="profile_id")
36 | assert len(pvars) == 1
37 | assert dsg.featureType.lower() == "profile"
38 | assert len(dsg.t_axes()) >= 1
39 | assert len(dsg.x_axes()) >= 1
40 | assert len(dsg.y_axes()) >= 1
41 | assert len(dsg.z_axes()) >= 1
42 |
43 | # Allow for string variables
44 | pvar = pvars[0]
45 | # 0 = single
46 | # 1 = array of strings/ints/bytes/etc
47 | # 2 = array of character arrays
48 | assert 0 <= len(pvar.dimensions) <= 2
49 |
50 | t = dsg.t_axes()[0]
51 | x = dsg.x_axes()[0]
52 | y = dsg.y_axes()[0]
53 | z = dsg.z_axes()[0]
54 | assert len(z.dimensions) == 1
55 | z_dim = dsg.dimensions[z.dimensions[0]]
56 |
57 | ps = normalize_array(pvar)
58 | is_single = False
59 |
60 | if pvar.ndim == 0:
61 | is_single = True
62 | elif pvar.ndim == 2:
63 | is_single = False
64 | elif isinstance(ps, str):
65 | # Non-dimensioned string variable
66 | is_single = True
67 | elif pvar.ndim == 1 and hasattr(ps, "dtype") and ps.dtype.kind in ["U", "S"]:
68 | is_single = True
69 |
70 | if is_single:
71 | assert t.size == 1
72 | assert x.size == 1
73 | assert y.size == 1
74 | for dv in dsg.data_vars():
75 | assert len(dv.dimensions) == 1
76 | assert z_dim.name in dv.dimensions
77 | assert dv.size == z_dim.size
78 | else:
79 | assert t.size == pvar.size
80 | assert x.size == pvar.size
81 | assert y.size == pvar.size
82 | p_dim = dsg.dimensions[pvar.dimensions[0]]
83 | for dv in dsg.data_vars():
84 | assert len(dv.dimensions) in [1, 2] # dimensioned by profile or profile, z
85 | assert z_dim.name in dv.dimensions or p_dim.name in dv.dimensions
86 | assert dv.size in [z_dim.size, p_dim.size, z_dim.size * p_dim.size]
87 |
88 | except BaseException:
89 | if strict is True:
90 | raise
91 | return False
92 |
93 | return True
94 |
95 | @classmethod
96 | def from_dataframe(cls, df, output, **kwargs):
97 | raise NotImplementedError
98 |
99 | def calculated_metadata(
100 | self, df=None, geometries=True, clean_cols=True, clean_rows=True, **kwargs
101 | ):
102 | axes = get_default_axes(kwargs.pop("axes", {}))
103 | if df is None:
104 | df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows, axes=axes)
105 | return profile_calculated_metadata(df, axes, geometries)
106 |
107 | def to_dataframe(self, clean_cols=True, clean_rows=True, **kwargs):
108 | axes = get_default_axes(kwargs.pop("axes", {}))
109 |
110 | axv = get_mapped_axes_variables(self, axes)
111 |
112 | zvar = axv.z
113 | zs = len(self.dimensions[zvar.dimensions[0]])
114 |
115 | # Profiles
116 | pvar = axv.profile
117 | p = normalize_countable_array(pvar)
118 | ps = p.size
119 | p = p.repeat(zs)
120 |
121 | # Z
122 | z = generic_masked(zvar[:], attrs=self.vatts(zvar.name))
123 | try:
124 | z = np.tile(z, ps)
125 | except ValueError:
126 | z = z.flatten()
127 |
128 | # T
129 | tvar = axv.t
130 | t = tvar[:].repeat(zs)
131 | nt = get_masked_datetime_array(t, tvar).flatten()
132 |
133 | # X
134 | xvar = axv.x
135 | x = generic_masked(xvar[:].repeat(zs), attrs=self.vatts(xvar.name))
136 |
137 | # Y
138 | yvar = axv.y
139 | y = generic_masked(yvar[:].repeat(zs), attrs=self.vatts(yvar.name))
140 |
141 | df_data = OrderedDict(
142 | [(axes.t, nt), (axes.x, x), (axes.y, y), (axes.z, z), (axes.profile, p)]
143 | )
144 |
145 | building_index_to_drop = np.ones(t.size, dtype=bool)
146 |
147 | # Axes variables are already processed so skip them
148 | extract_vars = copy(self.variables)
149 | for ncvar in axv._asdict().values():
150 | if ncvar is not None and ncvar.name in extract_vars:
151 | del extract_vars[ncvar.name]
152 |
153 | for i, (dnam, dvar) in enumerate(extract_vars.items()):
154 | # Profile dimension
155 | if dvar.dimensions == pvar.dimensions:
156 | vdata = generic_masked(
157 | dvar[:].repeat(zs).astype(dvar.dtype), attrs=self.vatts(dnam)
158 | )
159 |
160 | # Z dimension
161 | elif dvar.dimensions == zvar.dimensions:
162 | vdata = generic_masked(
163 | np.tile(dvar[:], ps).flatten().astype(dvar.dtype), attrs=self.vatts(dnam)
164 | )
165 |
166 | # Profile, z dimension
167 | elif dvar.dimensions == pvar.dimensions + zvar.dimensions:
168 | vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype), attrs=self.vatts(dnam))
169 |
170 | else:
171 | vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype), attrs=self.vatts(dnam))
172 | # Carry through size 1 variables
173 | if vdata.size == 1:
174 | if vdata[0] is np.ma.masked:
175 | L.warning(f"Skipping variable {dnam} that is completely masked")
176 | continue
177 | else:
178 | L.warning(f"Skipping variable {dnam} since it didn't match any dimension sizes")
179 | continue
180 |
181 | # Mark rows with data so we don't remove them with clear_rows
182 | if vdata.size == building_index_to_drop.size:
183 | building_index_to_drop = (building_index_to_drop == True) & (vdata.mask == True) # noqa
184 |
185 | # Handle scalars here at the end
186 | if vdata.size == 1:
187 | vdata = vdata[0]
188 |
189 | df_data[dnam] = vdata
190 |
191 | df = pd.DataFrame(df_data)
192 |
193 | # Drop all data columns with no data
194 | if clean_cols:
195 | df = df.dropna(axis=1, how="all")
196 |
197 | # Drop all data rows with no data variable data
198 | if clean_rows:
199 | df = df.iloc[~building_index_to_drop]
200 |
201 | return df
202 |
--------------------------------------------------------------------------------
/pocean/dsg/timeseries/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/pocean/dsg/timeseries/__init__.py
--------------------------------------------------------------------------------
/pocean/dsg/timeseries/cr.py:
--------------------------------------------------------------------------------
1 | #!python
2 | from pocean import logger # noqa
3 | from pocean.cf import CFDataset
4 |
5 |
6 | class ContiguousRaggedTimeseries(CFDataset):
7 | @classmethod
8 | def is_mine(cls, dsg, strict=False):
9 | try:
10 | rvars = dsg.filter_by_attrs(cf_role="timeseries_id")
11 | assert len(rvars) == 1
12 | assert dsg.featureType.lower() == "timeseries"
13 | assert len(dsg.t_axes()) >= 1
14 | assert len(dsg.x_axes()) >= 1
15 | assert len(dsg.y_axes()) >= 1
16 |
17 | o_index_vars = dsg.filter_by_attrs(sample_dimension=lambda x: x is not None)
18 | assert len(o_index_vars) == 1
19 | assert o_index_vars[0].sample_dimension in dsg.dimensions # Sample dimension
20 |
21 | # Allow for string variables
22 | rvar = rvars[0]
23 | # 0 = single
24 | # 1 = array of strings/ints/bytes/etc
25 | # 2 = array of character arrays
26 | assert 0 <= len(rvar.dimensions) <= 2
27 | except BaseException:
28 | if strict is True:
29 | raise
30 | return False
31 |
32 | return True
33 |
34 | def from_dataframe(cls, df, output, **kwargs):
35 | raise NotImplementedError
36 |
37 | def calculated_metadata(self, df=None, geometries=True, clean_cols=True, clean_rows=True):
38 | # if df is None:
39 | # df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows)
40 | raise NotImplementedError
41 |
42 | def to_dataframe(self):
43 | raise NotImplementedError
44 |
--------------------------------------------------------------------------------
/pocean/dsg/timeseries/im.py:
--------------------------------------------------------------------------------
1 | #!python
2 | from pocean import logger # noqa
3 | from pocean.cf import CFDataset
4 |
5 |
6 | class IncompleteMultidimensionalTimeseries(CFDataset):
7 | @classmethod
8 | def is_mine(cls, dsg, strict=False):
9 | try:
10 | rvars = dsg.filter_by_attrs(cf_role="timeseries_id")
11 | assert len(rvars) == 1
12 | assert dsg.featureType.lower() == "timeseries"
13 | assert len(dsg.t_axes()) >= 1
14 | assert len(dsg.x_axes()) >= 1
15 | assert len(dsg.y_axes()) >= 1
16 |
17 | # Not a CR
18 | assert not dsg.filter_by_attrs(sample_dimension=lambda x: x is not None)
19 |
20 | # Not an IR
21 | assert not dsg.filter_by_attrs(instance_dimension=lambda x: x is not None)
22 |
23 | # IM files will always have a time variable with two dimensions
24 | # because IM files are never used for files with a single station.
25 | assert len(dsg.t_axes()[0].dimensions) == 2
26 |
27 | # Allow for string variables
28 | rvar = rvars[0]
29 | # 0 = single
30 | # 1 = array of strings/ints/bytes/etc
31 | # 2 = array of character arrays
32 | assert 0 <= len(rvar.dimensions) <= 2
33 |
34 | except BaseException:
35 | if strict is True:
36 | raise
37 | return False
38 |
39 | return True
40 |
41 | def from_dataframe(cls, df, output, **kwargs):
42 | raise NotImplementedError
43 |
44 | def calculated_metadata(self, df=None, geometries=True, clean_cols=True, clean_rows=True):
45 | # if df is None:
46 | # df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows)
47 | raise NotImplementedError
48 |
49 | def to_dataframe(self):
50 | raise NotImplementedError
51 |
--------------------------------------------------------------------------------
/pocean/dsg/timeseries/ir.py:
--------------------------------------------------------------------------------
1 | #!python
2 | from pocean import logger # noqa
3 | from pocean.cf import CFDataset
4 |
5 |
6 | class IndexedRaggedTimeseries(CFDataset):
7 | @classmethod
8 | def is_mine(cls, dsg, strict=False):
9 | try:
10 | rvars = dsg.filter_by_attrs(cf_role="timeseries_id")
11 | assert len(rvars) == 1
12 | assert dsg.featureType.lower() == "timeseries"
13 | assert len(dsg.t_axes()) >= 1
14 | assert len(dsg.x_axes()) >= 1
15 | assert len(dsg.y_axes()) >= 1
16 |
17 | r_index_vars = dsg.filter_by_attrs(instance_dimension=lambda x: x is not None)
18 | assert len(r_index_vars) == 1
19 | assert r_index_vars[0].instance_dimension in dsg.dimensions # Station dimension
20 |
21 | # Allow for string variables
22 | rvar = rvars[0]
23 | # 0 = single
24 | # 1 = array of strings/ints/bytes/etc
25 | # 2 = array of character arrays
26 | assert 0 <= len(rvar.dimensions) <= 2
27 |
28 | except BaseException:
29 | if strict is True:
30 | raise
31 | return False
32 |
33 | return True
34 |
35 | def from_dataframe(cls, df, output, **kwargs):
36 | raise NotImplementedError
37 |
38 | def calculated_metadata(self, df=None, geometries=True, clean_cols=True, clean_rows=True):
39 | # if df is None:
40 | # df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows)
41 | raise NotImplementedError
42 |
43 | def to_dataframe(self):
44 | raise NotImplementedError
45 |
--------------------------------------------------------------------------------
/pocean/dsg/timeseries/om.py:
--------------------------------------------------------------------------------
1 | #!python
2 | from collections import OrderedDict
3 | from copy import copy
4 |
5 | import numpy as np
6 | import pandas as pd
7 |
8 | from pocean import logger as L # noqa
9 | from pocean.cf import cf_safe_name, CFDataset
10 | from pocean.utils import (
11 | create_ncvar_from_series,
12 | dict_update,
13 | downcast_dataframe,
14 | generic_masked,
15 | get_default_axes,
16 | get_dtype,
17 | get_mapped_axes_variables,
18 | get_masked_datetime_array,
19 | get_ncdata_from_series,
20 | nativize_times,
21 | normalize_countable_array,
22 | )
23 |
24 |
25 | class OrthogonalMultidimensionalTimeseries(CFDataset):
26 | """
27 | H.2.1. Orthogonal multidimensional array representation of time series
28 |
29 | If the time series instances have the same number of elements and the time values are identical
30 | for all instances, you may use the orthogonal multidimensional array representation. This has
31 | either a one-dimensional coordinate variable, time(time), provided the time values are ordered
32 | monotonically, or a one-dimensional auxiliary coordinate variable, time(o), where o is the
33 | element dimension. In the former case, listing the time variable in the coordinates attributes
34 | of the data variables is optional.
35 | """
36 |
37 | @classmethod
38 | def is_mine(cls, dsg, strict=False):
39 | try:
40 | rvars = dsg.filter_by_attrs(cf_role="timeseries_id")
41 | assert len(rvars) == 1
42 | assert dsg.featureType.lower() == "timeseries"
43 | assert len(dsg.t_axes()) >= 1
44 | assert len(dsg.x_axes()) >= 1
45 | assert len(dsg.y_axes()) >= 1
46 |
47 | # Not a CR
48 | assert not dsg.filter_by_attrs(sample_dimension=lambda x: x is not None)
49 |
50 | # Not an IR
51 | assert not dsg.filter_by_attrs(instance_dimension=lambda x: x is not None)
52 |
53 | # OM files will always have a time variable with one dimension.
54 | assert len(dsg.t_axes()[0].dimensions) == 1
55 |
56 | # Allow for string variables
57 | rvar = rvars[0]
58 | # 0 = single
59 | # 1 = array of strings/ints/bytes/etc
60 | # 2 = array of character arrays
61 | assert 0 <= len(rvar.dimensions) <= 2
62 |
63 | except BaseException:
64 | if strict is True:
65 | raise
66 | return False
67 |
68 | return True
69 |
70 | @classmethod
71 | def from_dataframe(cls, df, output, **kwargs):
72 | axes = get_default_axes(kwargs.pop("axes", {}))
73 | daxes = axes
74 | data_columns = [d for d in df.columns if d not in axes]
75 |
76 | reduce_dims = kwargs.pop("reduce_dims", False)
77 | _ = kwargs.pop("unlimited", False)
78 |
79 | unique_dims = kwargs.pop("unique_dims", False)
80 | if unique_dims is True:
81 | # Rename the dimension to avoid a dimension and coordinate having the same name
82 | # which is not support in xarray
83 | changed_axes = {k: f"{v}_dim" for k, v in axes._asdict().items()}
84 | daxes = get_default_axes(changed_axes)
85 |
86 | # Downcast anything from int64 to int32
87 | # Convert any timezone aware datetimes to native UTC times
88 | df = downcast_dataframe(nativize_times(df))
89 |
90 | with OrthogonalMultidimensionalTimeseries(output, "w") as nc:
91 | station_group = df.groupby(axes.station)
92 | num_stations = len(station_group)
93 | has_z = axes.z is not None
94 |
95 | if reduce_dims is True and num_stations == 1:
96 | # If a station, we can reduce that dimension if it is of size 1
97 | def ts(i):
98 | return np.s_[:]
99 |
100 | default_dimensions = (daxes.t,)
101 | station_dimensions = ()
102 | else:
103 |
104 | def ts(i):
105 | return np.s_[i, :]
106 |
107 | default_dimensions = (daxes.station, daxes.t)
108 | station_dimensions = (daxes.station,)
109 | nc.createDimension(daxes.station, num_stations)
110 |
111 | # Set the coordinates attribute correctly
112 | coordinates = [axes.t, axes.x, axes.y]
113 | if has_z is True:
114 | coordinates.insert(1, axes.z)
115 | coordinates = " ".join(coordinates)
116 |
117 | # assume all groups are the same size and have identical times
118 | _, sdf = list(station_group)[0]
119 | t = sdf[axes.t]
120 |
121 | # Metadata variables
122 | nc.createVariable("crs", "i4")
123 |
124 | # Create all of the variables
125 | nc.createDimension(daxes.t, t.size)
126 | time = nc.createVariable(axes.t, "f8", (daxes.t,))
127 | station = nc.createVariable(
128 | axes.station, get_dtype(df[axes.station]), station_dimensions
129 | )
130 | latitude = nc.createVariable(axes.y, get_dtype(df[axes.y]), station_dimensions)
131 | longitude = nc.createVariable(axes.x, get_dtype(df[axes.x]), station_dimensions)
132 | if has_z is True:
133 | z = nc.createVariable(
134 | axes.z,
135 | get_dtype(df[axes.z]),
136 | station_dimensions,
137 | fill_value=df[axes.z].dtype.type(cls.default_fill_value),
138 | )
139 |
140 | attributes = dict_update(nc.nc_attributes(axes, daxes), kwargs.pop("attributes", {}))
141 |
142 | time[:] = get_ncdata_from_series(t, time).astype("f8")
143 |
144 | # Create vars based on full dataframe (to get all variables)
145 | for c in data_columns:
146 | var_name = cf_safe_name(c)
147 | if var_name not in nc.variables:
148 | v = create_ncvar_from_series(
149 | nc,
150 | var_name,
151 | default_dimensions,
152 | df[c],
153 | )
154 | attributes[var_name] = dict_update(
155 | attributes.get(var_name, {}), {"coordinates": coordinates}
156 | )
157 |
158 | for i, (uid, sdf) in enumerate(station_group):
159 | station[i] = uid
160 | latitude[i] = sdf[axes.y].iloc[0]
161 | longitude[i] = sdf[axes.x].iloc[0]
162 |
163 | if has_z is True:
164 | # TODO: write a test for a Z with a _FillValue
165 | z[i] = sdf[axes.z].iloc[0]
166 |
167 | for c in data_columns:
168 | # Create variable if it doesn't exist
169 | var_name = cf_safe_name(c)
170 | v = nc.variables[var_name]
171 |
172 | vvalues = get_ncdata_from_series(sdf[c], v)
173 | try:
174 | v[ts(i)] = vvalues
175 | except BaseException:
176 | L.debug(f"{v.name} was not written. Likely a metadata variable")
177 |
178 | # Set global attributes
179 | nc.update_attributes(attributes)
180 |
181 | return OrthogonalMultidimensionalTimeseries(output, **kwargs)
182 |
183 | def calculated_metadata(
184 | self, df=None, geometries=True, clean_cols=True, clean_rows=True, **kwargs
185 | ):
186 | # axes = get_default_axes(kwargs.pop('axes', {}))
187 | # if df is None:
188 | # df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows, axes=axes)
189 | raise NotImplementedError
190 |
191 | def to_dataframe(self, clean_cols=False, clean_rows=False, **kwargs):
192 | axes = get_default_axes(kwargs.pop("axes", {}))
193 |
194 | axv = get_mapped_axes_variables(self, axes)
195 |
196 | # T
197 | t = get_masked_datetime_array(axv.t[:], axv.t)
198 |
199 | # X
200 | x = generic_masked(axv.x[:].repeat(t.size), attrs=self.vatts(axv.x.name))
201 |
202 | # Y
203 | y = generic_masked(axv.y[:].repeat(t.size), attrs=self.vatts(axv.y.name))
204 |
205 | # Z
206 | if axv.z is not None:
207 | z = generic_masked(axv.z[:].repeat(t.size), attrs=self.vatts(axv.z.name))
208 | else:
209 | z = None
210 |
211 | svar = axv.station
212 | s = normalize_countable_array(svar)
213 | s = np.repeat(s, t.size)
214 |
215 | # now repeat t per station
216 | # figure out if this is a single-station file by checking
217 | # the dimension size of the x dimension
218 | if axv.x.ndim == 1:
219 | t = np.repeat(t, len(svar))
220 |
221 | df_data = OrderedDict(
222 | [
223 | (axes.t, t),
224 | (axes.x, x),
225 | (axes.y, y),
226 | (axes.z, z),
227 | (axes.station, s),
228 | ]
229 | )
230 |
231 | building_index_to_drop = np.ma.zeros(t.size, dtype=bool)
232 |
233 | # Axes variables are already processed so skip them
234 | extract_vars = copy(self.variables)
235 | for ncvar in axv._asdict().values():
236 | if ncvar is not None and ncvar.name in extract_vars:
237 | del extract_vars[ncvar.name]
238 |
239 | for i, (dnam, dvar) in enumerate(extract_vars.items()):
240 | vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype), attrs=self.vatts(dnam))
241 |
242 | # Carry through size 1 variables
243 | if vdata.size == 1:
244 | if vdata[0] is np.ma.masked:
245 | L.warning(f"Skipping variable {dnam} that is completely masked")
246 | continue
247 | else:
248 | if dvar[:].flatten().size != t.size:
249 | L.warning(f"Variable {dnam} is not the correct size, skipping.")
250 | continue
251 |
252 | # Mark rows with data so we don't remove them with clear_rows
253 | if vdata.size == building_index_to_drop.size:
254 | building_index_to_drop = (building_index_to_drop == True) & (vdata.mask == True) # noqa
255 |
256 | # Handle scalars here at the end
257 | if vdata.size == 1:
258 | vdata = vdata[0]
259 |
260 | df_data[dnam] = vdata
261 |
262 | df = pd.DataFrame(df_data)
263 |
264 | # Drop all data columns with no data
265 | if clean_cols:
266 | df = df.dropna(axis=1, how="all")
267 |
268 | # Drop all data rows with no data variable data
269 | if clean_rows:
270 | df = df.iloc[~building_index_to_drop]
271 |
272 | return df
273 |
274 | def nc_attributes(self, axes, daxes):
275 | atts = super().nc_attributes()
276 | return dict_update(
277 | atts,
278 | {
279 | "global": {"featureType": "timeseries", "cdm_data_type": "Timeseries"},
280 | axes.station: {"cf_role": "timeseries_id", "long_name": "station identifier"},
281 | axes.t: {"units": self.default_time_unit, "standard_name": "time", "axis": "T"},
282 | axes.y: {"axis": "Y"},
283 | axes.x: {"axis": "X"},
284 | axes.z: {"axis": "Z"},
285 | },
286 | )
287 |
--------------------------------------------------------------------------------
/pocean/dsg/timeseriesProfile/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/pocean/dsg/timeseriesProfile/__init__.py
--------------------------------------------------------------------------------
/pocean/dsg/timeseriesProfile/im.py:
--------------------------------------------------------------------------------
1 | #!python
2 | from pocean.cf import CFDataset
3 |
4 |
5 | class IncompleteMultidimensionalTimeseriesProfile(CFDataset):
6 | @classmethod
7 | def is_mine(cls, dsg, strict=False):
8 | try:
9 | assert dsg.featureType.lower() == "timeseriesprofile"
10 | assert len(dsg.t_axes()) >= 1
11 | assert len(dsg.x_axes()) >= 1
12 | assert len(dsg.y_axes()) >= 1
13 | assert len(dsg.z_axes()) >= 1
14 |
15 | zvar = dsg.z_axes()[0]
16 | assert len(zvar.dimensions) > 1
17 |
18 | # Not ragged
19 | o_index_vars = dsg.filter_by_attrs(sample_dimension=lambda x: x is not None)
20 | assert len(o_index_vars) == 0
21 |
22 | r_index_vars = dsg.filter_by_attrs(instance_dimension=lambda x: x is not None)
23 | assert len(r_index_vars) == 0
24 |
25 | except BaseException:
26 | if strict is True:
27 | raise
28 | return False
29 |
30 | return True
31 |
32 | def from_dataframe(cls, df, output, **kwargs):
33 | raise NotImplementedError
34 |
35 | def calculated_metadata(self, df=None, geometries=True, clean_cols=True, clean_rows=True):
36 | # if df is None:
37 | # df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows)
38 | raise NotImplementedError
39 |
40 | def to_dataframe(self):
41 | raise NotImplementedError
42 |
--------------------------------------------------------------------------------
/pocean/dsg/timeseriesProfile/om.py:
--------------------------------------------------------------------------------
1 | #!python
2 | from collections import OrderedDict
3 | from copy import copy
4 |
5 | import numpy as np
6 | import pandas as pd
7 | from cftime import date2num
8 |
9 | from pocean import logger as L # noqa
10 | from pocean.cf import cf_safe_name, CFDataset
11 | from pocean.utils import (
12 | create_ncvar_from_series,
13 | dict_update,
14 | downcast_dataframe,
15 | generic_masked,
16 | get_default_axes,
17 | get_dtype,
18 | get_mapped_axes_variables,
19 | get_masked_datetime_array,
20 | get_ncdata_from_series,
21 | nativize_times,
22 | normalize_countable_array,
23 | )
24 |
25 |
26 | class OrthogonalMultidimensionalTimeseriesProfile(CFDataset):
27 | @classmethod
28 | def is_mine(cls, dsg, strict=False):
29 | try:
30 | assert dsg.featureType.lower() == "timeseriesprofile"
31 | assert len(dsg.t_axes()) >= 1
32 | assert len(dsg.x_axes()) >= 1
33 | assert len(dsg.y_axes()) >= 1
34 | assert len(dsg.z_axes()) >= 1
35 |
36 | # If there is only a single set of levels and a single set of
37 | # times, then it is orthogonal.
38 | tvar = dsg.t_axes()[0]
39 | assert len(tvar.dimensions) == 1
40 |
41 | zvar = dsg.z_axes()[0]
42 | assert len(zvar.dimensions) == 1
43 |
44 | assert tvar.dimensions != zvar.dimensions
45 |
46 | # Not ragged
47 | o_index_vars = dsg.filter_by_attrs(sample_dimension=lambda x: x is not None)
48 | assert len(o_index_vars) == 0
49 |
50 | r_index_vars = dsg.filter_by_attrs(instance_dimension=lambda x: x is not None)
51 | assert len(r_index_vars) == 0
52 |
53 | except BaseException:
54 | if strict is True:
55 | raise
56 | return False
57 |
58 | return True
59 |
60 | @classmethod
61 | def from_dataframe(cls, df, output, **kwargs):
62 | axes = get_default_axes(kwargs.pop("axes", {}))
63 | daxes = axes
64 | data_columns = [d for d in df.columns if d not in axes]
65 |
66 | reduce_dims = kwargs.pop("reduce_dims", False)
67 | unlimited = kwargs.pop("unlimited", False)
68 |
69 | unique_dims = kwargs.pop("unique_dims", False)
70 | if unique_dims is True:
71 | # Rename the dimension to avoid a dimension and coordinate having the same name
72 | # which is not supported in xarray
73 | changed_axes = {k: f"{v}_dim" for k, v in axes._asdict().items()}
74 | daxes = get_default_axes(changed_axes)
75 |
76 | # Downcast anything from int64 to int32
77 | # Convert any timezone aware datetimes to native UTC times
78 | df = downcast_dataframe(nativize_times(df))
79 |
80 | # Make a new index that is the Cartesian product of all of the values from all of the
81 | # values of the old index. This is so don't have to iterate over anything. The full column
82 | # of data will be able to be shaped to the size of the final unique sized dimensions.
83 | index_order = [axes.t, axes.z, axes.station]
84 | df = df.set_index(index_order)
85 | df = df.reindex(pd.MultiIndex.from_product(df.index.levels, names=index_order))
86 |
87 | unique_z = df.index.get_level_values(axes.z).unique().values
88 | unique_t = (
89 | df.index.get_level_values(axes.t).unique().tolist()
90 | ) # tolist converts to Timestamp
91 | all_stations = df.index.get_level_values(axes.station)
92 | unique_s = all_stations.unique()
93 |
94 | with OrthogonalMultidimensionalTimeseriesProfile(output, "w") as nc:
95 | if reduce_dims is True and unique_s.size == 1:
96 | # If a singular trajectory, we can reduce that dimension if it is of size 1
97 | default_dimensions = (daxes.t, daxes.z)
98 | station_dimensions = ()
99 | else:
100 | default_dimensions = (daxes.t, daxes.z, daxes.station)
101 | station_dimensions = (daxes.station,)
102 | nc.createDimension(daxes.station, unique_s.size)
103 |
104 | station = nc.createVariable(axes.station, get_dtype(unique_s), station_dimensions)
105 | latitude = nc.createVariable(axes.y, get_dtype(df[axes.y]), station_dimensions)
106 | longitude = nc.createVariable(axes.x, get_dtype(df[axes.x]), station_dimensions)
107 | # Assign over loop because VLEN variables (strings) have to be assigned by integer index
108 | # and we need to find the lat/lon based on station index
109 | for si, st in enumerate(unique_s):
110 | station[si] = st
111 | latitude[si] = df[axes.y][all_stations == st].dropna().iloc[0]
112 | longitude[si] = df[axes.x][all_stations == st].dropna().iloc[0]
113 |
114 | # Metadata variables
115 | nc.createVariable("crs", "i4")
116 |
117 | # Create all of the variables
118 | if unlimited is True:
119 | nc.createDimension(daxes.t, None)
120 | else:
121 | nc.createDimension(daxes.t, len(unique_t))
122 | time = nc.createVariable(axes.t, "f8", (daxes.t,))
123 | time[:] = date2num(unique_t, units=cls.default_time_unit).astype("f8")
124 |
125 | nc.createDimension(daxes.z, unique_z.size)
126 | z = nc.createVariable(axes.z, get_dtype(unique_z), (daxes.z,))
127 | z[:] = unique_z
128 |
129 | attributes = dict_update(nc.nc_attributes(axes, daxes), kwargs.pop("attributes", {}))
130 |
131 | # Variables defined on only the time axis and not the depth axis
132 | detach_z_vars = kwargs.pop("detach_z", [])
133 | detach_z_columnms = [p for p in detach_z_vars if p in data_columns]
134 | for c in detach_z_columnms:
135 | var_name = cf_safe_name(c)
136 | if var_name not in nc.variables:
137 | v = create_ncvar_from_series(
138 | nc,
139 | var_name,
140 | default_dimensions[0::2], # this removes the second dimension (z)
141 | df[c],
142 | )
143 | attributes[var_name] = dict_update(
144 | attributes.get(var_name, {}),
145 | {"coordinates": f"{axes.t} {axes.x} {axes.y}"},
146 | )
147 | else:
148 | v = nc.variables[var_name]
149 |
150 | # Because we need access to the fillvalues here, we ask not to return
151 | # the values with them already filled.
152 | vvalues = get_ncdata_from_series(df[c], v, fillna=False)
153 | # Reshape to the full array, with Z
154 | vvalues = vvalues.reshape(len(unique_t), unique_z.size, unique_s.size)
155 | # The Z axis is always the second axis, take the mean over that axis
156 | vvalues = np.apply_along_axis(np.nanmean, 1, vvalues).flatten()
157 | # Now reshape to the array without Z
158 | vvalues = vvalues.reshape(len(unique_t), unique_s.size)
159 | try:
160 | v[:] = vvalues.reshape(v.shape)
161 | except BaseException:
162 | L.exception(f"Failed to add {c}")
163 | continue
164 |
165 | full_columns = [f for f in data_columns if f not in detach_z_columnms]
166 | for c in full_columns:
167 | # Create variable if it doesn't exist
168 | var_name = cf_safe_name(c)
169 | if var_name not in nc.variables:
170 | v = create_ncvar_from_series(
171 | nc,
172 | var_name,
173 | default_dimensions,
174 | df[c],
175 | )
176 | attributes[var_name] = dict_update(
177 | attributes.get(var_name, {}),
178 | {"coordinates": f"{axes.t} {axes.z} {axes.x} {axes.y}"},
179 | )
180 | else:
181 | v = nc.variables[var_name]
182 |
183 | vvalues = get_ncdata_from_series(df[c], v)
184 | v[:] = vvalues.reshape(v.shape)
185 |
186 | nc.update_attributes(attributes)
187 |
188 | return OrthogonalMultidimensionalTimeseriesProfile(output, **kwargs)
189 |
190 | def calculated_metadata(
191 | self, df=None, geometries=True, clean_cols=True, clean_rows=True, **kwargs
192 | ):
193 | # axes = get_default_axes(kwargs.pop('axes', {}))
194 | # if df is None:
195 | # df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows, axes=axes)
196 | raise NotImplementedError
197 |
198 | def to_dataframe(self, clean_cols=True, clean_rows=True, **kwargs):
199 | axes = get_default_axes(kwargs.pop("axes", {}))
200 |
201 | axv = get_mapped_axes_variables(self, axes)
202 |
203 | svar = axv.station
204 | s = normalize_countable_array(svar)
205 |
206 | # T
207 | t = get_masked_datetime_array(axv.t[:], axv.t)
208 | n_times = t.size
209 |
210 | # X
211 | x = generic_masked(axv.x[:], attrs=self.vatts(axv.x.name))
212 |
213 | # Y
214 | y = generic_masked(axv.y[:], attrs=self.vatts(axv.y.name))
215 |
216 | # Z
217 | z = generic_masked(axv.z[:], attrs=self.vatts(axv.z.name))
218 | n_z = z.size
219 |
220 | # denormalize table structure
221 | t = np.repeat(t, s.size * n_z)
222 | z = np.tile(np.repeat(z, s.size), n_times)
223 | s = np.tile(s, n_z * n_times)
224 | y = np.tile(y, n_times * n_z)
225 | x = np.tile(x, n_times * n_z)
226 |
227 | df_data = OrderedDict(
228 | [
229 | (axes.t, t),
230 | (axes.x, x),
231 | (axes.y, y),
232 | (axes.z, z),
233 | (axes.station, s),
234 | ]
235 | )
236 |
237 | building_index_to_drop = np.ones(t.size, dtype=bool)
238 |
239 | # Axes variables are already processed so skip them
240 | extract_vars = copy(self.variables)
241 | for ncvar in axv._asdict().values():
242 | if ncvar is not None and ncvar.name in extract_vars:
243 | del extract_vars[ncvar.name]
244 |
245 | for i, (dnam, dvar) in enumerate(extract_vars.items()):
246 | vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype), attrs=self.vatts(dnam))
247 |
248 | # Carry through size 1 variables
249 | if vdata.size == 1:
250 | if vdata[0] is np.ma.masked:
251 | L.warning(f"Skipping variable {dnam} that is completely masked")
252 | continue
253 |
254 | # Carry through profile only variables
255 | elif dvar.dimensions == axv.t.dimensions:
256 | # Make the first value valid and fill with nans
257 | vdata = vdata.repeat(n_z).reshape((n_times, n_z))
258 | # Set everything after the first value to missing
259 | vdata[:, 1:] = np.ma.masked
260 | vdata = vdata.flatten()
261 | if vdata.size != t.size:
262 | L.warning(f"Variable {dnam} is not the correct size, skipping.")
263 | continue
264 |
265 | else:
266 | if vdata.size != t.size:
267 | L.warning(f"Variable {dnam} is not the correct size, skipping.")
268 | continue
269 |
270 | # Mark rows with data so we don't remove them with clear_rows
271 | if vdata.size == building_index_to_drop.size:
272 | building_index_to_drop = (building_index_to_drop == True) & (vdata.mask == True) # noqa
273 |
274 | # Handle scalars here at the end
275 | if vdata.size == 1:
276 | vdata = vdata[0]
277 |
278 | df_data[dnam] = vdata
279 |
280 | df = pd.DataFrame(df_data)
281 |
282 | # Drop all data columns with no data
283 | if clean_cols:
284 | df = df.dropna(axis=1, how="all")
285 |
286 | # Drop all data rows with no data variable data
287 | if clean_rows:
288 | df = df.iloc[~building_index_to_drop]
289 |
290 | return df
291 |
292 | def nc_attributes(self, axes, daxes):
293 | atts = super().nc_attributes()
294 | return dict_update(
295 | atts,
296 | {
297 | "global": {
298 | "featureType": "timeSeriesProfile",
299 | "cdm_data_type": "TimeseriesProfile",
300 | },
301 | axes.station: {"cf_role": "timeseries_id", "long_name": "station identifier"},
302 | axes.x: {"axis": "X"},
303 | axes.y: {"axis": "Y"},
304 | axes.z: {"axis": "Z"},
305 | axes.t: {"units": self.default_time_unit, "standard_name": "time", "axis": "T"},
306 | },
307 | )
308 |
--------------------------------------------------------------------------------
/pocean/dsg/trajectory/__init__.py:
--------------------------------------------------------------------------------
1 | #!python
2 | from collections import namedtuple
3 |
4 | from shapely.geometry import LineString, Point
5 |
6 | from pocean.utils import (
7 | unique_justseen,
8 | )
9 |
10 | trajectory_meta = namedtuple("Trajectory", ["min_z", "max_z", "min_t", "max_t", "geometry"])
11 |
12 | trajectories_meta = namedtuple(
13 | "TrajectoryCollection", ["min_z", "max_z", "min_t", "max_t", "trajectories"]
14 | )
15 |
16 |
17 | def trajectory_calculated_metadata(df, axes, geometries=True):
18 | trajectories = {}
19 | for tid, tgroup in df.groupby(axes.trajectory):
20 | tgroup = tgroup.sort_values(axes.t)
21 |
22 | if geometries:
23 | null_coordinates = tgroup[axes.x].isnull() | tgroup[axes.y].isnull()
24 | coords = list(
25 | unique_justseen(
26 | zip(
27 | tgroup.loc[~null_coordinates, axes.x].tolist(),
28 | tgroup.loc[~null_coordinates, axes.y].tolist(),
29 | )
30 | )
31 | )
32 | else:
33 | # Calculate the geometry as the linestring between all of the profile points
34 | first_row = tgroup.iloc[0]
35 | coords = [(first_row[axes.x], first_row[axes.y])]
36 |
37 | geometry = None
38 | if len(coords) > 1:
39 | geometry = LineString(coords)
40 | elif len(coords) == 1:
41 | geometry = Point(coords[0])
42 |
43 | trajectories[tid] = trajectory_meta(
44 | min_z=tgroup[axes.z].min(),
45 | max_z=tgroup[axes.z].max(),
46 | min_t=tgroup[axes.t].min(),
47 | max_t=tgroup[axes.t].max(),
48 | geometry=geometry,
49 | )
50 |
51 | return trajectories_meta(
52 | min_z=df[axes.z].min(),
53 | max_z=df[axes.z].max(),
54 | min_t=df[axes.t].min(),
55 | max_t=df[axes.t].max(),
56 | trajectories=trajectories,
57 | )
58 |
--------------------------------------------------------------------------------
/pocean/dsg/trajectory/cr.py:
--------------------------------------------------------------------------------
1 | #!python
2 | from collections import OrderedDict
3 | from copy import copy
4 |
5 | import numpy as np
6 | import pandas as pd
7 |
8 | from pocean import logger as L # noqa
9 | from pocean.cf import cf_safe_name, CFDataset
10 | from pocean.dsg.trajectory import trajectory_calculated_metadata
11 | from pocean.utils import (
12 | create_ncvar_from_series,
13 | dict_update,
14 | downcast_dataframe,
15 | generic_masked,
16 | get_default_axes,
17 | get_dtype,
18 | get_mapped_axes_variables,
19 | get_masked_datetime_array,
20 | get_ncdata_from_series,
21 | nativize_times,
22 | normalize_countable_array,
23 | )
24 |
25 |
26 | class ContiguousRaggedTrajectory(CFDataset):
27 | @classmethod
28 | def is_mine(cls, dsg, strict=False):
29 | try:
30 | rvars = dsg.filter_by_attrs(cf_role="trajectory_id")
31 | assert len(rvars) == 1
32 | assert dsg.featureType.lower() == "trajectory"
33 | assert len(dsg.t_axes()) >= 1
34 | assert len(dsg.x_axes()) >= 1
35 | assert len(dsg.y_axes()) >= 1
36 | assert len(dsg.z_axes()) >= 1
37 |
38 | o_index_vars = dsg.filter_by_attrs(sample_dimension=lambda x: x is not None)
39 | assert len(o_index_vars) == 1
40 | assert o_index_vars[0].sample_dimension in dsg.dimensions # Sample dimension
41 |
42 | # Allow for string variables
43 | rvar = rvars[0]
44 | # 0 = single
45 | # 1 = array of strings/ints/bytes/etc
46 | # 2 = array of character arrays
47 | assert 0 <= len(rvar.dimensions) <= 2
48 | except BaseException:
49 | if strict is True:
50 | raise
51 | return False
52 |
53 | return True
54 |
55 | @classmethod
56 | def from_dataframe(cls, df, output, **kwargs):
57 | axes = get_default_axes(kwargs.pop("axes", {}))
58 | daxes = axes
59 |
60 | # Should never be a CR file with one trajectory so we ignore the "reduce_dims" attribute
61 | _ = kwargs.pop("reduce_dims", False) # noqa
62 | unlimited = kwargs.pop("unlimited", False)
63 |
64 | unique_dims = kwargs.pop("unique_dims", False)
65 | if unique_dims is True:
66 | # Rename the dimension to avoid a dimension and coordinate having the same name
67 | # which is not support in xarray
68 | changed_axes = {k: f"{v}_dim" for k, v in axes._asdict().items()}
69 | daxes = get_default_axes(changed_axes)
70 |
71 | # Downcast anything from int64 to int32
72 | # Convert any timezone aware datetimes to native UTC times
73 | df = downcast_dataframe(nativize_times(df))
74 |
75 | with ContiguousRaggedTrajectory(output, "w") as nc:
76 | trajectory_groups = df.groupby(axes.trajectory)
77 | unique_trajectories = list(trajectory_groups.groups.keys())
78 | num_trajectories = len(unique_trajectories)
79 | nc.createDimension(daxes.trajectory, num_trajectories)
80 | trajectory = nc.createVariable(
81 | axes.trajectory, get_dtype(df[axes.trajectory]), (daxes.trajectory,)
82 | )
83 |
84 | # Get unique obs by grouping on traj getting the max size
85 | if unlimited is True:
86 | nc.createDimension(daxes.sample, None)
87 | else:
88 | nc.createDimension(daxes.sample, len(df))
89 |
90 | # Number of observations in each trajectory
91 | row_size = nc.createVariable("rowSize", "i4", (daxes.trajectory,))
92 |
93 | attributes = dict_update(nc.nc_attributes(axes, daxes), kwargs.pop("attributes", {}))
94 |
95 | # Variables defined on only the trajectory axis
96 | traj_vars = kwargs.pop("traj_vars", [])
97 | traj_columns = [p for p in traj_vars if p in df.columns]
98 | for c in traj_columns:
99 | var_name = cf_safe_name(c)
100 | if var_name not in nc.variables:
101 | create_ncvar_from_series(
102 | nc,
103 | var_name,
104 | (daxes.trajectory,),
105 | df[c],
106 | )
107 |
108 | for i, (trajid, trg) in enumerate(trajectory_groups):
109 | trajectory[i] = trajid
110 | row_size[i] = len(trg)
111 |
112 | # Save any trajectory variables using the first value found
113 | # in the column.
114 | for c in traj_columns:
115 | var_name = cf_safe_name(c)
116 | if var_name not in nc.variables:
117 | continue
118 | v = nc.variables[var_name]
119 | vvalues = get_ncdata_from_series(trg[c], v)[0]
120 | try:
121 | v[i] = vvalues
122 | except BaseException:
123 | L.exception(f"Failed to add {c}")
124 | continue
125 |
126 | # Add all of the columns based on the sample dimension. Take all columns and remove the
127 | # trajectory, rowSize and other trajectory based columns.
128 | sample_columns = [
129 | f for f in df.columns if f not in traj_columns + ["rowSize", axes.trajectory]
130 | ]
131 | for c in sample_columns:
132 | var_name = cf_safe_name(c)
133 | if var_name not in nc.variables:
134 | v = create_ncvar_from_series(
135 | nc,
136 | var_name,
137 | (daxes.sample,),
138 | df[c],
139 | )
140 | else:
141 | v = nc.variables[var_name]
142 | vvalues = get_ncdata_from_series(df[c], v)
143 | try:
144 | if unlimited is True:
145 | v[:] = vvalues
146 | else:
147 | v[:] = vvalues.reshape(v.shape)
148 | except BaseException:
149 | L.exception(f"Failed to add {c}")
150 | continue
151 |
152 | # Metadata variables
153 | if "crs" not in nc.variables:
154 | nc.createVariable("crs", "i4")
155 |
156 | # Set attributes
157 | nc.update_attributes(attributes)
158 |
159 | return ContiguousRaggedTrajectory(output, **kwargs)
160 |
161 | def calculated_metadata(
162 | self, df=None, geometries=True, clean_cols=True, clean_rows=True, **kwargs
163 | ):
164 | axes = get_default_axes(kwargs.pop("axes", {}))
165 | if df is None:
166 | df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows, axes=axes)
167 | return trajectory_calculated_metadata(df, axes, geometries)
168 |
169 | def to_dataframe(self, clean_cols=True, clean_rows=True, **kwargs):
170 | axes = get_default_axes(kwargs.pop("axes", {}))
171 |
172 | axv = get_mapped_axes_variables(self, axes)
173 |
174 | o_index_var = self.filter_by_attrs(sample_dimension=lambda x: x is not None)
175 | if not o_index_var:
176 | raise ValueError(
177 | 'Could not find the "sample_dimension" attribute on any variables, '
178 | "is this a valid {}?".format(self.__class__.__name__)
179 | )
180 | else:
181 | o_index_var = o_index_var[0]
182 | o_dim = self.dimensions[o_index_var.sample_dimension] # Sample dimension
183 | t_dim = o_index_var.dimensions
184 |
185 | # Trajectory
186 | row_sizes = o_index_var[:]
187 | traj_data = normalize_countable_array(axv.trajectory)
188 | traj_data = np.repeat(traj_data, row_sizes)
189 |
190 | # time
191 | time_data = get_masked_datetime_array(axv.t[:], axv.t).flatten()
192 |
193 | df_data = OrderedDict([(axes.t, time_data), (axes.trajectory, traj_data)])
194 |
195 | building_index_to_drop = np.ones(o_dim.size, dtype=bool)
196 |
197 | extract_vars = copy(self.variables)
198 | # Skip the time and row index variables
199 | del extract_vars[o_index_var.name]
200 | del extract_vars[axes.t]
201 |
202 | for i, (dnam, dvar) in enumerate(extract_vars.items()):
203 | # Trajectory dimensions
204 | if dvar.dimensions == t_dim:
205 | vdata = np.repeat(generic_masked(dvar[:], attrs=self.vatts(dnam)), row_sizes)
206 |
207 | # Sample dimensions
208 | elif dvar.dimensions == (o_dim.name,):
209 | vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype), attrs=self.vatts(dnam))
210 |
211 | else:
212 | vdata = generic_masked(dvar[:].flatten().astype(dvar.dtype), attrs=self.vatts(dnam))
213 | # Carry through size 1 variables
214 | if vdata.size == 1:
215 | if vdata[0] is np.ma.masked:
216 | L.warning(f"Skipping variable {dnam} that is completely masked")
217 | continue
218 | else:
219 | L.warning(f"Skipping variable {dnam} since it didn't match any dimension sizes")
220 | continue
221 |
222 | # Mark rows with data so we don't remove them with clear_rows
223 | if vdata.size == building_index_to_drop.size:
224 | building_index_to_drop = (building_index_to_drop == True) & (vdata.mask == True) # noqa
225 |
226 | # Handle scalars here at the end
227 | if vdata.size == 1:
228 | vdata = vdata[0]
229 |
230 | df_data[dnam] = vdata
231 |
232 | df = pd.DataFrame(df_data)
233 |
234 | # Drop all data columns with no data
235 | if clean_cols:
236 | df = df.dropna(axis=1, how="all")
237 |
238 | # Drop all data rows with no data variable data
239 | if clean_rows:
240 | df = df.iloc[~building_index_to_drop]
241 |
242 | return df
243 |
244 | def nc_attributes(self, axes, daxes):
245 | atts = super().nc_attributes()
246 | return dict_update(
247 | atts,
248 | {
249 | "global": {"featureType": "trajectory", "cdm_data_type": "Trajectory"},
250 | axes.trajectory: {
251 | "cf_role": "trajectory_id",
252 | "long_name": "trajectory identifier",
253 | "ioos_category": "identifier",
254 | },
255 | axes.x: {"axis": "X"},
256 | axes.y: {"axis": "Y"},
257 | axes.z: {"axis": "Z"},
258 | axes.t: {"units": self.default_time_unit, "standard_name": "time", "axis": "T"},
259 | "rowSize": {"sample_dimension": daxes.sample},
260 | },
261 | )
262 |
--------------------------------------------------------------------------------
/pocean/dsg/trajectory/ir.py:
--------------------------------------------------------------------------------
1 | #!python
2 | from pocean.cf import CFDataset
3 |
4 |
5 | class IndexedRaggedTrajectory(CFDataset):
6 | def from_dataframe(cls, df, output, **kwargs):
7 | raise NotImplementedError
8 |
9 | def calculated_metadata(self, df=None, geometries=True, clean_cols=True, clean_rows=True):
10 | # if df is None:
11 | # df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows)
12 | raise NotImplementedError
13 |
14 | def to_dataframe(self):
15 | raise NotImplementedError
16 |
--------------------------------------------------------------------------------
/pocean/dsg/trajectoryProfile/__init__.py:
--------------------------------------------------------------------------------
1 | #!python
2 | from pocean.dsg.profile import profile_calculated_metadata
3 | from pocean.dsg.trajectory import trajectories_meta
4 |
5 |
6 | def trajectory_profile_calculated_metadata(df, axes, geometries=True):
7 | trajectories = {}
8 | for tid, tgroup in df.groupby(axes.trajectory):
9 | tgroup = tgroup.sort_values(axes.t)
10 | trajectories[tid] = profile_calculated_metadata(tgroup, axes, geometries)
11 |
12 | return trajectories_meta(
13 | min_z=df[axes.z].min(),
14 | max_z=df[axes.z].max(),
15 | min_t=df[axes.t].min(),
16 | max_t=df[axes.t].max(),
17 | trajectories=trajectories,
18 | )
19 |
--------------------------------------------------------------------------------
/pocean/dsg/utils.py:
--------------------------------------------------------------------------------
1 | import datetime
2 |
3 | import pandas as pd
4 | from shapely.geometry import (
5 | box,
6 | LineString,
7 | Point,
8 | Polygon,
9 | )
10 | from shapely.validation import make_valid
11 |
12 | from pocean import logger as L # noqa
13 | from pocean.utils import dict_update, get_default_axes, unique_justseen
14 |
15 | datetime.UTC = datetime.timezone.utc
16 |
17 |
18 | def get_calculated_attributes(df, axes=None, history=None):
19 | """Functions to automate netCDF attribute generation from the data itself
20 | This is a wrapper for the other four functions, which could be called separately.
21 |
22 | :param df: data (Pandas DataFrame)
23 | :param axes: keys (x,y,z,t) are associated with actual column names (dictionary)
24 | :param history: history: text initializing audit trail for modifications to the original data (optional, string)
25 | :return: dictionary of global attributes
26 | """
27 |
28 | axes = get_default_axes(axes)
29 | attrs = get_geographic_attributes(df, axes)
30 | attrs = dict_update(attrs, get_vertical_attributes(df, axes))
31 | attrs = dict_update(attrs, get_temporal_attributes(df, axes))
32 | attrs = dict_update(attrs, get_creation_attributes(history))
33 |
34 | return attrs
35 |
36 |
37 | def get_geographic_attributes(df, axes=None):
38 | """Use values in a dataframe to set geographic attributes for the eventual netCDF file
39 | Attribute names come from https://www.ncei.noaa.gov/data/oceans/ncei/formats/netcdf/v2.0/index.html
40 | The coordinate reference system (CRS) is assumed to be EPSG:4326, which is WGS84 and is used with
41 | GPS satellite navigation (http://spatialreference.org/ref/epsg/wgs-84/). This is NCEI's default.
42 | Coordinate values are latitude (decimal degrees_north) and longitude (decimal degrees_east).
43 | Longitude values are limited to [-180, 180).
44 |
45 | :param df: data (Pandas DataFrame)
46 | :param axes: keys (x,y,z,t) are associated with actual column names (dictionary)
47 | :return: nested dictionary of variable and global attributes
48 | """
49 | axes = get_default_axes(axes)
50 |
51 | carry_miny = round(float(df[axes.y].min()), 6)
52 | carry_maxy = round(float(df[axes.y].max()), 6)
53 | carry_minx = round(float(df[axes.x].min()), 6)
54 | carry_maxx = round(float(df[axes.x].max()), 6)
55 |
56 | notnull = df[axes.x].notnull() & df[axes.y].notnull()
57 | coords = list(zip(df.loc[notnull, axes.x], df.loc[notnull, axes.y]))
58 |
59 | if len(set(coords)) == 1:
60 | geoclass = Point
61 | # The set is to workaround the fact tht pocean
62 | # relied in a shapely<2 bug to pass a vector here instead of a point.
63 | coords = set(coords)
64 | elif len(coords) > 2:
65 | geoclass = Polygon
66 | else:
67 | geoclass = LineString
68 |
69 | p = geoclass(coords)
70 | dateline = LineString([(180, 90), (-180, -90)])
71 | # If we cross the dateline normalize the coordinates before polygon
72 | if dateline.crosses(p):
73 | newx = (df.loc[notnull, axes.x] + 360) % 360
74 | p = geoclass(zip(newx, df.loc[notnull, axes.y]))
75 | p = make_valid(p)
76 |
77 | geometry_bbox = box(*p.bounds).wkt
78 | geometry_wkt = p.convex_hull.wkt
79 |
80 | return {
81 | "variables": {
82 | axes.y: {
83 | "attributes": {
84 | "actual_min": carry_miny,
85 | "actual_max": carry_maxy,
86 | }
87 | },
88 | axes.x: {
89 | "attributes": {
90 | "actual_min": carry_minx,
91 | "actual_max": carry_maxx,
92 | }
93 | },
94 | },
95 | "attributes": {
96 | "geospatial_lat_min": carry_miny,
97 | "geospatial_lat_max": carry_maxy,
98 | "geospatial_lon_min": carry_minx,
99 | "geospatial_lon_max": carry_maxx,
100 | "geospatial_bbox": geometry_bbox,
101 | "geospatial_bounds": geometry_wkt,
102 | "geospatial_bounds_crs": "EPSG:4326",
103 | },
104 | }
105 |
106 |
107 | def get_vertical_attributes(df, axes=None):
108 | """Use values in a dataframe to set vertical attributes for the eventual netCDF file
109 | Attribute names come from https://www.ncei.noaa.gov/data/oceans/ncei/formats/netcdf/v2.0/index.html
110 | The CRS, geospatial_bounds_vertical_crs, cannot be assumed because NCEI suggests any of
111 | * 'EPSG:5829' (instantaneous height above sea level),
112 | * 'EPSG:5831' (instantaneous depth below sea level), or
113 | * 'EPSG:5703' (NAVD88 height).
114 | Likewise, geospatial_vertical_positive cannot be assumed to be either 'up' or 'down'.
115 | Set these attributes separately according to the dataset.
116 | Note: values are cast from numpy.int to float
117 |
118 | :param df: data (Pandas DataFrame)
119 | :param axes: keys (x,y,z,t) are associated with actual column names (dictionary). z in meters.
120 | :return: nested dictionary of variable and global attributes
121 | """
122 | axes = get_default_axes(axes)
123 | minz = round(float(df[axes.z].min()), 6)
124 | maxz = round(float(df[axes.z].max()), 6)
125 |
126 | return {
127 | "variables": {
128 | axes.z: {
129 | "attributes": {
130 | "actual_min": minz,
131 | "actual_max": maxz,
132 | }
133 | },
134 | },
135 | "attributes": {
136 | "geospatial_vertical_min": minz,
137 | "geospatial_vertical_max": maxz,
138 | "geospatial_vertical_units": "m",
139 | },
140 | }
141 |
142 |
143 | def get_temporal_attributes(df, axes=None):
144 | """Use values in a dataframe to set temporal attributes for the eventual netCDF file
145 | Attribute names come from https://www.ncei.noaa.gov/data/oceans/ncei/formats/netcdf/v2.0/index.html
146 |
147 | :param df: data (Pandas DataFrame)
148 | :param axes: keys (x,y,z,t) are associated with actual column names (dictionary). z in meters.
149 | :return: nested dictionary of variable and global attributes
150 | """
151 |
152 | axes = get_default_axes(axes)
153 | mint = df[axes.t].min()
154 | maxt = df[axes.t].max()
155 |
156 | times = pd.DatetimeIndex(unique_justseen(df[axes.t]))
157 | dt_index_diff = times[1:] - times[:-1]
158 | dt_counts = dt_index_diff.value_counts(sort=True)
159 |
160 | if dt_counts.size > 0 and dt_counts.values[0] / (len(times) - 1) > 0.75:
161 | mode_value = dt_counts.index[0]
162 | else:
163 | # Calculate a static resolution
164 | mode_value = (maxt - mint) / len(times)
165 |
166 | return {
167 | "variables": {
168 | axes.t: {
169 | "attributes": {
170 | "actual_min": mint.strftime("%Y-%m-%dT%H:%M:%SZ"),
171 | "actual_max": maxt.strftime("%Y-%m-%dT%H:%M:%SZ"),
172 | }
173 | },
174 | },
175 | "attributes": {
176 | "time_coverage_start": mint.strftime("%Y-%m-%dT%H:%M:%SZ"),
177 | "time_coverage_end": maxt.strftime("%Y-%m-%dT%H:%M:%SZ"),
178 | "time_coverage_duration": (maxt - mint).round("1s").isoformat(),
179 | "time_coverage_resolution": mode_value.round("1s").isoformat(),
180 | },
181 | }
182 |
183 |
184 | def get_creation_attributes(history=None):
185 | """Query system for netCDF file creation times
186 |
187 | :param history: text initializing audit trail for modifications to the original data (optional, string)
188 | :return: dictionary of global attributes
189 | """
190 | nc_create_ts = datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%dT%H:%M:%SZ")
191 |
192 | attrs = {
193 | "attributes": {
194 | "date_created": nc_create_ts,
195 | "date_issued": nc_create_ts,
196 | "date_modified": nc_create_ts,
197 | }
198 | }
199 |
200 | # Add in the passed in history
201 | if history is not None:
202 | attrs["attributes"]["history"] = f"{nc_create_ts} - {history}"
203 |
204 | return attrs
205 |
--------------------------------------------------------------------------------
/pocean/grid/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/pocean/grid/__init__.py
--------------------------------------------------------------------------------
/pocean/meta.py:
--------------------------------------------------------------------------------
1 | #!python
2 | import os
3 | from collections import OrderedDict
4 | from collections.abc import Iterable, Mapping
5 | from copy import deepcopy
6 |
7 | import numpy as np
8 | import simplejson as json
9 |
10 | from . import logger
11 |
12 |
13 | class MetaInterface(Mapping):
14 | VALID_KEYS = ["dimensions", "variables", "attributes"]
15 |
16 | @classmethod
17 | def from_jsonfile(cls, jsf):
18 | if not os.path.isfile(jsf):
19 | raise ValueError(f"{jsf} is not a file")
20 |
21 | with open(jsf) as jf:
22 | return cls.from_jsonstr(jf.read())
23 |
24 | @classmethod
25 | def from_jsonstr(cls, js):
26 | try:
27 | d = json.loads(js, object_pairs_hook=OrderedDict)
28 | except BaseException as e:
29 | raise ValueError(f"Could not parse JSON string: {e}")
30 |
31 | return cls(d)
32 |
33 | def __init__(self, *args, **kwargs):
34 | self._data = dict(*args, **kwargs)
35 |
36 | def __getitem__(self, key):
37 | return self._data[key]
38 |
39 | def __iter__(self):
40 | return iter(self._data)
41 |
42 | def __len__(self):
43 | return len(self._data)
44 |
45 | def __str__(self):
46 | return str(self._data)
47 |
48 |
49 | def safe_attribute_typing(zdtype, value):
50 | try:
51 | return zdtype.type(value)
52 | except ValueError:
53 | logger.warning(f"Could not convert {value} to type {zdtype}")
54 | return None
55 |
56 |
57 | def string_to_dtype(type_str):
58 | # int - we avoid int64
59 | if type_str in ["int", "int32", "int64", "i", "i4", "i8", "i32", "i64", "long"]:
60 | return np.dtype("int32")
61 |
62 | elif type_str in ["uint", "ui4", "ui", "uint32", "uint64", "ui64", "u4", "u8"]:
63 | return np.dtype("uint32")
64 |
65 | elif type_str in ["float", "float32", "f", "f4", "f32"]:
66 | return np.dtype("float32")
67 |
68 | elif type_str in ["double", "float64", "d", "f8", "f64"]:
69 | return np.dtype("float64")
70 |
71 | elif type_str in ["byte", "bytes8", "i1", "b", "B", "int8"]:
72 | return np.dtype("int8")
73 |
74 | elif type_str in ["ubyte", "ui1", "ubuB", "uint8"]:
75 | return np.dtype("uint8")
76 |
77 | elif type_str in ["char", "c", "string", "S1", "str", "unicode", "string8"]:
78 | return np.dtype("U")
79 |
80 | elif type_str in ["short", "s", "i2", "h", "int16"]:
81 | return np.dtype("int16")
82 |
83 | elif type_str in ["ushort", "us", "u2", "ui2", "uh", "uint16"]:
84 | return np.dtype("uint16")
85 |
86 | raise ValueError(f"Could not find dtype for {type_str}")
87 |
88 |
89 | def untype_attributes(vd):
90 | typed = OrderedDict()
91 | for k, v in vd.items():
92 | if isinstance(v, dict):
93 | dtype = string_to_dtype(v.get("type"))
94 | vval = v.get("data")
95 | if isinstance(vval, (list, tuple)):
96 | safe = (safe_attribute_typing(dtype, x) for x in vval)
97 | typed[k] = [x for x in safe if x is not None]
98 | else:
99 | safe = safe_attribute_typing(dtype, vval)
100 | if safe is not None:
101 | typed[k] = safe
102 | else:
103 | typed[k] = v
104 | return typed
105 |
106 |
107 | def ncpyattributes(obj, verbose=True):
108 | """Converts any attributes that are not native python types to those types"""
109 |
110 | return_copy = deepcopy(obj)
111 |
112 | for k, v in obj.items():
113 | if isinstance(v, np.ndarray):
114 | newv = v.tolist()
115 | elif hasattr(v, "dtype"):
116 | newv = v.item()
117 | else:
118 | newv = v
119 |
120 | if hasattr(v, "dtype"):
121 | newt = v.dtype.name
122 | else:
123 | if isinstance(v, Iterable) and v:
124 | # Use the type of the first one
125 | v = v[0]
126 | else:
127 | # This is likely an empty value
128 | # so just default to an empty string
129 | v = ""
130 | newt = type(v).__name__
131 |
132 | if verbose is True:
133 | return_copy[k] = {"type": newt, "data": newv}
134 | else:
135 | return_copy[k] = newv
136 |
137 | return return_copy
138 |
--------------------------------------------------------------------------------
/pocean/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/pocean/tests/__init__.py
--------------------------------------------------------------------------------
/pocean/tests/download_test_data.py:
--------------------------------------------------------------------------------
1 | import zipfile
2 | from pathlib import Path
3 |
4 | import pooch
5 |
6 |
7 | def download_test_data():
8 | url = "https://github.com/pyoceans/pocean-core/releases/download"
9 | version = "2025.01"
10 |
11 | fname = pooch.retrieve(
12 | url=f"{url}/{version}/test_data.zip",
13 | known_hash="sha256:41180c6bc6017de935250c9e8c1bbb407507049baebd767692c4f74fb8d662a8",
14 | )
15 |
16 | here = Path(__file__).resolve().parent
17 | print(fname)
18 | print(here)
19 | with zipfile.ZipFile(fname, "r") as zip_ref:
20 | zip_ref.extractall(here)
21 |
22 |
23 | if __name__ == "__main__":
24 | download_test_data()
25 |
--------------------------------------------------------------------------------
/pocean/tests/dsg/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/pocean/tests/dsg/__init__.py
--------------------------------------------------------------------------------
/pocean/tests/dsg/profile/test_profile_im.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | import tempfile
4 | import unittest
5 |
6 | import numpy as np
7 | import pandas as pd
8 | from dateutil.parser import parse as dtparse
9 |
10 | from pocean import logger
11 | from pocean.dsg import IncompleteMultidimensionalProfile
12 | from pocean.tests.dsg.test_new import test_is_mine
13 |
14 | logger.level = logging.DEBUG
15 | logger.handlers = [logging.StreamHandler()]
16 |
17 |
18 | class TestIMPStrings(unittest.TestCase):
19 | def setUp(self):
20 | self.df = pd.read_csv(
21 | os.path.join(os.path.dirname(__file__), "resources", "basis_2011.csv"),
22 | parse_dates=["time"],
23 | )
24 | # self.df = pd.read_csv('resources/basis_2011.csv', parse_dates=['time'])
25 |
26 | def test_print_dtypes(self):
27 | print(self.df.dtypes)
28 |
29 | def test_write_nc(self):
30 | fid, single_tmp = tempfile.mkstemp(suffix=".nc")
31 |
32 | axes = {"t": "time", "x": "longitude", "y": "latitude", "z": "z", "profile": "stationid"}
33 |
34 | with IncompleteMultidimensionalProfile.from_dataframe(
35 | self.df, single_tmp, axes=axes, mode="a"
36 | ) as ncd:
37 | ncd.renameDimension("stationid", "profile")
38 |
39 | test_is_mine(IncompleteMultidimensionalProfile, single_tmp) # Try to load it again
40 | os.close(fid)
41 | os.remove(single_tmp)
42 |
43 |
44 | class TestIncompleteMultidimensionalProfile(unittest.TestCase):
45 | def setUp(self):
46 | self.multi = os.path.join(os.path.dirname(__file__), "resources", "im-multiple.nc")
47 |
48 | def test_imp_load(self):
49 | IncompleteMultidimensionalProfile(self.multi).close()
50 |
51 | def test_imp_dataframe(self):
52 | fid, single_tmp = tempfile.mkstemp(suffix=".nc")
53 | with IncompleteMultidimensionalProfile(self.multi) as ncd:
54 | df = ncd.to_dataframe()
55 | with IncompleteMultidimensionalProfile.from_dataframe(df, single_tmp) as result_ncd:
56 | assert "profile" in result_ncd.dimensions
57 | test_is_mine(IncompleteMultidimensionalProfile, single_tmp) # Try to load it again
58 | os.close(fid)
59 | os.remove(single_tmp)
60 |
61 | def test_imp_dataframe_unique_dims(self):
62 | fid, single_tmp = tempfile.mkstemp(suffix=".nc")
63 | with IncompleteMultidimensionalProfile(self.multi) as ncd:
64 | df = ncd.to_dataframe()
65 | with IncompleteMultidimensionalProfile.from_dataframe(
66 | df, single_tmp, unique_dims=True
67 | ) as result_ncd:
68 | assert "profile_dim" in result_ncd.dimensions
69 | test_is_mine(IncompleteMultidimensionalProfile, single_tmp) # Try to load it again
70 | os.close(fid)
71 | os.remove(single_tmp)
72 |
73 | def test_imp_calculated_metadata(self):
74 | with IncompleteMultidimensionalProfile(self.multi) as ncd:
75 | m = ncd.calculated_metadata()
76 | assert m.min_t == dtparse("1990-01-01 00:00:00")
77 | assert m.max_t == dtparse("1990-01-06 21:00:00")
78 | assert len(m.profiles.keys()) == 137
79 | assert np.isclose(m.profiles[0].min_z, 0.05376, atol=1e-5)
80 | assert np.isclose(m.profiles[0].max_z, 9.62958, atol=1e-5)
81 | assert m.profiles[0].t == dtparse("1990-01-01 00:00:00")
82 | assert m.profiles[0].x == 119
83 | assert m.profiles[0].y == 171
84 |
85 | assert np.isclose(m.profiles[141].min_z, 0.04196, atol=1e-5)
86 | assert np.isclose(m.profiles[141].max_z, 9.85909, atol=1e-5)
87 | assert m.profiles[141].t == dtparse("1990-01-06 21:00:00")
88 | assert m.profiles[141].x == 34
89 | assert m.profiles[141].y == 80
90 |
--------------------------------------------------------------------------------
/pocean/tests/dsg/profile/test_profile_om.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | import tempfile
4 | import unittest
5 |
6 | import numpy as np
7 | from dateutil.parser import parse as dtparse
8 |
9 | from pocean import logger
10 | from pocean.cf import CFDataset
11 | from pocean.dsg import OrthogonalMultidimensionalProfile
12 | from pocean.tests.dsg.test_new import test_is_mine
13 |
14 | logger.level = logging.INFO
15 | logger.handlers = [logging.StreamHandler()]
16 |
17 |
18 | class TestOrthogonalMultidimensionalProfile(unittest.TestCase):
19 | def setUp(self):
20 | self.single = os.path.join(os.path.dirname(__file__), "resources", "om-single.nc")
21 | self.multi = os.path.join(os.path.dirname(__file__), "resources", "om-multiple.nc")
22 |
23 | def test_omp_load(self):
24 | OrthogonalMultidimensionalProfile(self.single).close()
25 | OrthogonalMultidimensionalProfile(self.multi).close()
26 |
27 | def test_omp_dataframe_single(self):
28 | CFDataset.load(self.single)
29 |
30 | fid, single_tmp = tempfile.mkstemp(suffix=".nc")
31 | with OrthogonalMultidimensionalProfile(self.single) as ncd:
32 | df = ncd.to_dataframe()
33 | with self.assertRaises(NotImplementedError):
34 | with OrthogonalMultidimensionalProfile.from_dataframe(df, single_tmp) as result_ncd:
35 | assert "profile" in result_ncd.dimensions
36 | test_is_mine(OrthogonalMultidimensionalProfile, single_tmp) # Try to load it again
37 | os.close(fid)
38 | os.remove(single_tmp)
39 |
40 | def test_omp_dataframe_multi(self):
41 | CFDataset.load(self.multi)
42 |
43 | fid, multi_tmp = tempfile.mkstemp(suffix=".nc")
44 | with OrthogonalMultidimensionalProfile(self.multi) as ncd:
45 | df = ncd.to_dataframe()
46 | with self.assertRaises(NotImplementedError):
47 | with OrthogonalMultidimensionalProfile.from_dataframe(df, multi_tmp) as result_ncd:
48 | assert "profile" in result_ncd.dimensions
49 | test_is_mine(OrthogonalMultidimensionalProfile, multi_tmp) # Try to load it again
50 | os.close(fid)
51 | os.remove(multi_tmp)
52 |
53 | def test_omp_dataframe_multi_unique_dims(self):
54 | CFDataset.load(self.multi)
55 |
56 | fid, multi_tmp = tempfile.mkstemp(suffix=".nc")
57 | with OrthogonalMultidimensionalProfile(self.multi) as ncd:
58 | df = ncd.to_dataframe()
59 | with self.assertRaises(NotImplementedError):
60 | with OrthogonalMultidimensionalProfile.from_dataframe(
61 | df, multi_tmp, unique_dims=True
62 | ) as result_ncd:
63 | assert "profile_dim" in result_ncd.dimensions
64 | test_is_mine(OrthogonalMultidimensionalProfile, multi_tmp) # Try to load it again
65 | os.close(fid)
66 | os.remove(multi_tmp)
67 |
68 | def test_omp_calculated_metadata(self):
69 | with OrthogonalMultidimensionalProfile(self.single) as ncd:
70 | s = ncd.calculated_metadata()
71 | assert s.min_t == dtparse("2005-07-09 01:48:00")
72 | assert s.max_t == dtparse("2005-07-09 01:48:00")
73 | assert np.isclose(s.profiles[1].min_z, 0.0)
74 | assert np.isclose(s.profiles[1].max_z, 96.06)
75 | assert s.profiles[1].t == dtparse("2005-07-09 01:48:00")
76 | assert np.isclose(s.profiles[1].x, -149.3582)
77 | assert np.isclose(s.profiles[1].y, 60.0248)
78 |
79 | with OrthogonalMultidimensionalProfile(self.multi) as ncd:
80 | m = ncd.calculated_metadata()
81 | assert m.min_t == dtparse("2005-09-10 07:08:00")
82 | assert m.max_t == dtparse("2005-09-14 17:27:00")
83 | assert len(m.profiles.keys()) == 35
84 | assert np.isclose(m.profiles[2].min_z, 0.0)
85 | assert np.isclose(m.profiles[2].max_z, 499.69)
86 | assert m.profiles[2].t == dtparse("2005-09-10 07:08:00")
87 | assert np.isclose(m.profiles[2].x, -148.2182)
88 | assert np.isclose(m.profiles[2].y, 58.5395)
89 |
90 | assert np.isclose(m.profiles[37].min_z, 0.0)
91 | assert np.isclose(m.profiles[37].max_z, 292.01001)
92 | assert m.profiles[37].t == dtparse("2005-09-14 17:27:00")
93 | assert np.isclose(m.profiles[37].x, -149.468)
94 | assert np.isclose(m.profiles[37].y, 60.01)
95 |
96 | def test_json_attributes(self):
97 | ds = os.path.join(os.path.dirname(__file__), "resources", "om-1dy11.nc")
98 | om = OrthogonalMultidimensionalProfile(ds)
99 | om.to_dataframe()
100 | om.json_attributes()
101 | om.close()
102 |
--------------------------------------------------------------------------------
/pocean/tests/dsg/test_new.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from os.path import dirname as dn
3 | from os.path import join as jn
4 |
5 | import pytest
6 |
7 | from pocean import logger
8 | from pocean.cf import CFDataset
9 | from pocean.dsg import *
10 | from pocean.utils import all_subclasses
11 |
12 | logger.level = logging.INFO
13 | logger.handlers = [logging.StreamHandler()]
14 |
15 |
16 | @pytest.mark.parametrize(
17 | "klass,fp",
18 | [
19 | (
20 | OrthogonalMultidimensionalProfile,
21 | jn(dn(__file__), "profile", "resources", "om-single.nc"),
22 | ),
23 | (
24 | OrthogonalMultidimensionalProfile,
25 | jn(dn(__file__), "profile", "resources", "om-multiple.nc"),
26 | ),
27 | (
28 | OrthogonalMultidimensionalProfile,
29 | jn(dn(__file__), "profile", "resources", "om-1dy11.nc"),
30 | ),
31 | (
32 | IncompleteMultidimensionalProfile,
33 | jn(dn(__file__), "profile", "resources", "im-multiple.nc"),
34 | ),
35 | (
36 | IncompleteMultidimensionalTrajectory,
37 | jn(dn(__file__), "trajectory", "resources", "im-single.nc"),
38 | ),
39 | (
40 | IncompleteMultidimensionalTrajectory,
41 | jn(dn(__file__), "trajectory", "resources", "im-multiple.nc"),
42 | ),
43 | (
44 | IncompleteMultidimensionalTrajectory,
45 | jn(dn(__file__), "trajectory", "resources", "im-multiple-nonstring.nc"),
46 | ),
47 | (
48 | IncompleteMultidimensionalTrajectory,
49 | jn(dn(__file__), "trajectory", "resources", "wave-glider-int-attrs.nc"),
50 | ),
51 | (ContiguousRaggedTrajectory, jn(dn(__file__), "trajectory", "resources", "cr-multiple.nc")),
52 | (ContiguousRaggedTrajectory, jn(dn(__file__), "trajectory", "resources", "cr-oot-A.nc")),
53 | (ContiguousRaggedTrajectory, jn(dn(__file__), "trajectory", "resources", "cr-oot-B.nc")),
54 | (
55 | ContiguousRaggedTrajectoryProfile,
56 | jn(dn(__file__), "trajectoryProfile", "resources", "cr-single.nc"),
57 | ),
58 | (
59 | ContiguousRaggedTrajectoryProfile,
60 | jn(dn(__file__), "trajectoryProfile", "resources", "cr-multiple.nc"),
61 | ),
62 | (
63 | ContiguousRaggedTrajectoryProfile,
64 | jn(dn(__file__), "trajectoryProfile", "resources", "cr-missing-time.nc"),
65 | ),
66 | (
67 | IncompleteMultidimensionalTimeseries,
68 | jn(dn(__file__), "timeseries", "resources", "im-multiple.nc"),
69 | ),
70 | (
71 | OrthogonalMultidimensionalTimeseries,
72 | jn(dn(__file__), "timeseries", "resources", "om-single.nc"),
73 | ),
74 | (
75 | OrthogonalMultidimensionalTimeseries,
76 | jn(dn(__file__), "timeseries", "resources", "om-multiple.nc"),
77 | ),
78 | # (IndexedRaggedTimeseries, jn(dn(__file__), 'timeseries', 'resources', 'cr-multiple.nc')),
79 | # (ContiguousRaggedTimeseries, jn(dn(__file__), 'timeseries', 'resources', 'cr-multiple.nc')),
80 | (
81 | OrthogonalMultidimensionalTimeseriesProfile,
82 | jn(dn(__file__), "timeseriesProfile", "resources", "om-multiple.nc"),
83 | ),
84 | (
85 | IncompleteMultidimensionalTimeseriesProfile,
86 | jn(dn(__file__), "timeseriesProfile", "resources", "im-single.nc"),
87 | ),
88 | (
89 | IncompleteMultidimensionalTimeseriesProfile,
90 | jn(dn(__file__), "timeseriesProfile", "resources", "im-multiple.nc"),
91 | ),
92 | (
93 | RaggedTimeseriesProfile,
94 | jn(dn(__file__), "timeseriesProfile", "resources", "r-single.nc"),
95 | ),
96 | (
97 | RaggedTimeseriesProfile,
98 | jn(dn(__file__), "timeseriesProfile", "resources", "r-multiple.nc"),
99 | ),
100 | ],
101 | )
102 | def test_is_mine(klass, fp):
103 | with CFDataset.load(fp) as dsg:
104 | assert dsg.__class__ == klass
105 |
106 | allsubs = list(all_subclasses(CFDataset))
107 | subs = [s for s in allsubs if s != klass]
108 | with CFDataset(fp) as dsg:
109 | logger.debug(f"\nTesting {klass.__name__}")
110 | assert klass.is_mine(dsg, strict=True) is True
111 | for s in subs:
112 | if hasattr(s, "is_mine"):
113 | logger.debug(f" * Trying {s.__name__}...")
114 | assert s.is_mine(dsg) is False
115 |
--------------------------------------------------------------------------------
/pocean/tests/dsg/test_utils.py:
--------------------------------------------------------------------------------
1 | #!python
2 | import datetime
3 | import os
4 | import unittest
5 |
6 | import pandas as pd
7 | import pytest
8 | import pytz
9 | from dateutil.parser import parse as dtparse
10 |
11 | from pocean import logger as L # noqa
12 | from pocean.cf import CFDataset
13 | from pocean.dsg import utils
14 |
15 | datetime.UTC = datetime.timezone.utc
16 |
17 | # RuntimeWarning: invalid value encountered in cast is fine here.
18 | ignore_invalid_value_cast = pytest.mark.filterwarnings("ignore::RuntimeWarning")
19 |
20 |
21 | class TestDsgUtils(unittest.TestCase):
22 | geo = pd.DataFrame({"x": [-1, -2, -3, -4], "y": [1, 2, 3, 4]})
23 |
24 | z = pd.DataFrame(
25 | {
26 | "z": [1, 2, 3, 4],
27 | }
28 | )
29 |
30 | times = pd.DataFrame(
31 | {
32 | "t": pd.to_datetime(
33 | [
34 | "2018-08-19 00:00:00",
35 | "2018-08-20 00:00:00",
36 | "2018-08-21 00:00:00",
37 | "2018-08-22 00:00:00",
38 | "2018-08-23 00:00:00",
39 | "2018-08-23 00:00:05",
40 | ]
41 | )
42 | }
43 | )
44 |
45 | avgtimes = pd.DataFrame(
46 | {
47 | "t": pd.to_datetime(
48 | [
49 | "2018-08-19 00:00:00",
50 | "2018-08-20 23:00:55",
51 | "2018-08-21 00:00:35",
52 | ]
53 | )
54 | }
55 | )
56 |
57 | def test_get_vertical_meta(self):
58 | meta = utils.get_vertical_attributes(self.z)
59 |
60 | assert meta == {
61 | "variables": {
62 | "z": {
63 | "attributes": {
64 | "actual_min": 1,
65 | "actual_max": 4,
66 | }
67 | },
68 | },
69 | "attributes": {
70 | "geospatial_vertical_min": 1,
71 | "geospatial_vertical_max": 4,
72 | "geospatial_vertical_units": "m",
73 | },
74 | }
75 |
76 | def test_get_geospatial_meta(self):
77 | meta = utils.get_geographic_attributes(self.geo)
78 |
79 | assert meta == {
80 | "variables": {
81 | "y": {
82 | "attributes": {
83 | "actual_min": 1,
84 | "actual_max": 4,
85 | }
86 | },
87 | "x": {
88 | "attributes": {
89 | "actual_min": -4,
90 | "actual_max": -1,
91 | }
92 | },
93 | },
94 | "attributes": {
95 | "geospatial_lat_min": 1.0,
96 | "geospatial_lat_max": 4.0,
97 | "geospatial_lon_min": -4.0,
98 | "geospatial_lon_max": -1.0,
99 | "geospatial_bbox": "POLYGON ((-1 1, -1 4, -4 4, -4 1, -1 1))",
100 | "geospatial_bounds": "LINESTRING (-1 1, -4 4)",
101 | "geospatial_bounds_crs": "EPSG:4326",
102 | },
103 | }
104 |
105 | def test_get_temporal_meta_from_times_average(self):
106 | meta = utils.get_temporal_attributes(self.avgtimes)
107 |
108 | assert meta == {
109 | "variables": {
110 | "t": {
111 | "attributes": {
112 | "actual_min": "2018-08-19T00:00:00Z",
113 | "actual_max": "2018-08-21T00:00:35Z",
114 | }
115 | }
116 | },
117 | "attributes": {
118 | "time_coverage_start": "2018-08-19T00:00:00Z",
119 | "time_coverage_end": "2018-08-21T00:00:35Z",
120 | "time_coverage_duration": "P2DT0H0M35S",
121 | "time_coverage_resolution": "P0DT16H0M12S",
122 | },
123 | }
124 |
125 | def test_get_temporal_meta_from_times(self):
126 | meta = utils.get_temporal_attributes(self.times)
127 |
128 | assert meta == {
129 | "variables": {
130 | "t": {
131 | "attributes": {
132 | "actual_min": "2018-08-19T00:00:00Z",
133 | "actual_max": "2018-08-23T00:00:05Z",
134 | }
135 | }
136 | },
137 | "attributes": {
138 | "time_coverage_start": "2018-08-19T00:00:00Z",
139 | "time_coverage_end": "2018-08-23T00:00:05Z",
140 | "time_coverage_duration": "P4DT0H0M5S",
141 | "time_coverage_resolution": "P1DT0H0M0S",
142 | },
143 | }
144 |
145 | def test_get_creation(self):
146 | meta = utils.get_creation_attributes(history="DID THINGS")
147 |
148 | now = datetime.datetime.now(datetime.UTC).replace(tzinfo=pytz.utc)
149 |
150 | assert (now - dtparse(meta["attributes"]["date_created"])) < datetime.timedelta(minutes=1)
151 | assert (now - dtparse(meta["attributes"]["date_issued"])) < datetime.timedelta(minutes=1)
152 | assert (now - dtparse(meta["attributes"]["date_modified"])) < datetime.timedelta(minutes=1)
153 | assert "DID THINGS" in meta["attributes"]["history"]
154 |
155 | @ignore_invalid_value_cast
156 | def test_wrap_dateline(self):
157 | ncfile = os.path.join(
158 | os.path.dirname(os.path.dirname(__file__)), "resources/wrapping_dateline.nc"
159 | )
160 |
161 | with CFDataset.load(ncfile) as ncd:
162 | axes = {
163 | "t": "time",
164 | "z": "z",
165 | "x": "lon",
166 | "y": "lat",
167 | }
168 | df = ncd.to_dataframe(axes=axes)
169 |
170 | meta = utils.get_geographic_attributes(df, axes=axes)
171 |
172 | assert meta == {
173 | "variables": {
174 | "lat": {"attributes": {"actual_min": 61.777, "actual_max": 67.068}},
175 | "lon": {"attributes": {"actual_min": -179.966, "actual_max": 179.858}},
176 | },
177 | "attributes": {
178 | "geospatial_lat_min": 61.777,
179 | "geospatial_lat_max": 67.068,
180 | "geospatial_lon_min": -179.966,
181 | "geospatial_lon_max": 179.858,
182 | "geospatial_bbox": "POLYGON ((198.669 61.777, 198.669 67.068, 174.79200000000003 67.068, 174.79200000000003 61.777, 198.669 61.777))",
183 | "geospatial_bounds": "POLYGON ((174.79200000000003 61.777, 174.92599999999993 62.206, 178.812 64.098, 192.86 67.029, 196.86 67.068, 197.094 67.044, 198.669 66.861, 187.784 64.188, 179.10799999999995 62.266, 176.16899999999998 61.862, 174.79200000000003 61.777))",
184 | "geospatial_bounds_crs": "EPSG:4326",
185 | },
186 | }
187 |
188 | def test_wrap_small_coords(self):
189 | geo = pd.DataFrame({"x": [-1, -2], "y": [1, 2]})
190 |
191 | meta = utils.get_geographic_attributes(geo)
192 |
193 | assert meta == {
194 | "variables": {
195 | "y": {
196 | "attributes": {
197 | "actual_min": 1,
198 | "actual_max": 2,
199 | }
200 | },
201 | "x": {
202 | "attributes": {
203 | "actual_min": -2,
204 | "actual_max": -1,
205 | }
206 | },
207 | },
208 | "attributes": {
209 | "geospatial_lat_min": 1,
210 | "geospatial_lat_max": 2,
211 | "geospatial_lon_min": -2,
212 | "geospatial_lon_max": -1,
213 | "geospatial_bbox": "POLYGON ((-1 1, -1 2, -2 2, -2 1, -1 1))",
214 | "geospatial_bounds": "LINESTRING (-1 1, -2 2)",
215 | "geospatial_bounds_crs": "EPSG:4326",
216 | },
217 | }
218 |
219 | def test_wrap_same_coords(self):
220 | geo = pd.DataFrame({"x": [-1, -1, -1], "y": [1, 1, 1]})
221 |
222 | meta = utils.get_geographic_attributes(geo)
223 |
224 | assert meta == {
225 | "variables": {
226 | "y": {
227 | "attributes": {
228 | "actual_min": 1,
229 | "actual_max": 1,
230 | }
231 | },
232 | "x": {
233 | "attributes": {
234 | "actual_min": -1,
235 | "actual_max": -1,
236 | }
237 | },
238 | },
239 | "attributes": {
240 | "geospatial_lat_min": 1,
241 | "geospatial_lat_max": 1,
242 | "geospatial_lon_min": -1,
243 | "geospatial_lon_max": -1,
244 | "geospatial_bbox": "POLYGON ((-1 1, -1 1, -1 1, -1 1))",
245 | "geospatial_bounds": "POINT (-1 1)",
246 | "geospatial_bounds_crs": "EPSG:4326",
247 | },
248 | }
249 |
--------------------------------------------------------------------------------
/pocean/tests/dsg/timeseries/test_timeseries_im.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/pocean/tests/dsg/timeseries/test_timeseries_im.py
--------------------------------------------------------------------------------
/pocean/tests/dsg/timeseries/test_timeseries_om.py:
--------------------------------------------------------------------------------
1 | #!python
2 | import logging
3 | import os
4 | import tempfile
5 | import unittest
6 |
7 | import numpy as np
8 | import pytest
9 |
10 | from pocean import logger
11 | from pocean.dsg import OrthogonalMultidimensionalTimeseries
12 | from pocean.tests.dsg.test_new import test_is_mine
13 |
14 | # RuntimeWarning: invalid value encountered in cast is fine here.
15 | ignore_invalid_value_cast = pytest.mark.filterwarnings("ignore::RuntimeWarning")
16 |
17 | logger.level = logging.INFO
18 | logger.handlers = [logging.StreamHandler()]
19 |
20 |
21 | class TestOrthogonalMultidimensionalTimeseries(unittest.TestCase):
22 | def setUp(self):
23 | self.single = os.path.join(os.path.dirname(__file__), "resources", "tt.nc")
24 | self.multi = os.path.join(os.path.dirname(__file__), "resources", "om-multiple.nc")
25 | self.ph = np.ma.array(
26 | [
27 | 8.1080176,
28 | 8.11740265,
29 | 8.11924184,
30 | 8.11615471,
31 | 8.11445695,
32 | 8.11600021,
33 | 8.11903291,
34 | 8.1187229,
35 | 8.105218,
36 | 8.10998784,
37 | 8.10715445,
38 | 8.10530323,
39 | 8.11167052,
40 | 8.11142766,
41 | 8.10897461,
42 | 8.08827717,
43 | 8.11343609,
44 | 8.11746859,
45 | 8.12326458,
46 | 8.11770947,
47 | 8.09127117,
48 | 8.10770576,
49 | 8.10252467,
50 | 8.10252874,
51 | ]
52 | )
53 |
54 | def test_omp_load(self):
55 | OrthogonalMultidimensionalTimeseries(self.single).close()
56 | OrthogonalMultidimensionalTimeseries(self.multi).close()
57 |
58 | @ignore_invalid_value_cast
59 | def test_timeseries_omt_dataframe_single(self):
60 | fid, single_tmp = tempfile.mkstemp(suffix=".nc")
61 | with OrthogonalMultidimensionalTimeseries(self.single) as s:
62 | df = s.to_dataframe()
63 | with OrthogonalMultidimensionalTimeseries.from_dataframe(df, single_tmp) as result_ncd:
64 | assert "station" in result_ncd.dimensions
65 | assert np.ma.allclose(result_ncd.variables["pH"][:].flatten(), self.ph)
66 | test_is_mine(OrthogonalMultidimensionalTimeseries, single_tmp) # Try to load it again
67 | os.close(fid)
68 | os.remove(single_tmp)
69 |
70 | def test_timeseries_omt_dataframe_multi(self):
71 | fid, single_tmp = tempfile.mkstemp(suffix=".nc")
72 | with OrthogonalMultidimensionalTimeseries(self.multi) as s:
73 | df = s.to_dataframe()
74 | with OrthogonalMultidimensionalTimeseries.from_dataframe(df, single_tmp) as result_ncd:
75 | assert "station" in result_ncd.dimensions
76 | assert np.ma.allclose(
77 | result_ncd.variables["temperature"][0, 0:7].flatten(),
78 | [18.61804, 13.2165, 39.30018, 17.00865, 24.95154, 35.99525, 24.33436],
79 | )
80 | test_is_mine(OrthogonalMultidimensionalTimeseries, single_tmp) # Try to load it again
81 | os.close(fid)
82 | os.remove(single_tmp)
83 |
84 | @ignore_invalid_value_cast
85 | def test_timeseries_omt_dataframe_unique_dims(self):
86 | fid, single_tmp = tempfile.mkstemp(suffix=".nc")
87 | with OrthogonalMultidimensionalTimeseries(self.single) as s:
88 | df = s.to_dataframe()
89 | with OrthogonalMultidimensionalTimeseries.from_dataframe(
90 | df, single_tmp, unique_dims=True
91 | ) as result_ncd:
92 | assert "station_dim" in result_ncd.dimensions
93 | assert np.ma.allclose(result_ncd.variables["pH"][:].flatten(), self.ph)
94 | test_is_mine(OrthogonalMultidimensionalTimeseries, single_tmp) # Try to load it again
95 | os.close(fid)
96 | os.remove(single_tmp)
97 |
98 | @ignore_invalid_value_cast
99 | def test_timeseries_omt_reduce_dims(self):
100 | fid, single_tmp = tempfile.mkstemp(suffix=".nc")
101 | with OrthogonalMultidimensionalTimeseries(self.single) as s:
102 | df = s.to_dataframe()
103 | with OrthogonalMultidimensionalTimeseries.from_dataframe(
104 | df, single_tmp, reduce_dims=True
105 | ) as result_ncd:
106 | assert "station" not in result_ncd.dimensions
107 | assert np.ma.allclose(result_ncd.variables["pH"][:].flatten(), self.ph)
108 | test_is_mine(OrthogonalMultidimensionalTimeseries, single_tmp) # Try to load it again
109 | os.close(fid)
110 | os.remove(single_tmp)
111 |
112 | @ignore_invalid_value_cast
113 | def test_timeseries_omt_no_z(self):
114 | fid, single_tmp = tempfile.mkstemp(suffix=".nc")
115 | with OrthogonalMultidimensionalTimeseries(self.single) as s:
116 | df = s.to_dataframe()
117 | axes = {"z": None}
118 | df.drop(columns=["z"], inplace=True)
119 | with OrthogonalMultidimensionalTimeseries.from_dataframe(
120 | df,
121 | single_tmp,
122 | axes=axes,
123 | ) as result_ncd:
124 | assert "station" in result_ncd.dimensions
125 | assert "z" not in result_ncd.variables
126 | assert np.ma.allclose(result_ncd.variables["pH"][:].flatten(), self.ph)
127 | test_is_mine(OrthogonalMultidimensionalTimeseries, single_tmp) # Try to load it again
128 | os.close(fid)
129 | os.remove(single_tmp)
130 |
131 | @ignore_invalid_value_cast
132 | def test_timeseries_omt_no_z_no_station(self):
133 | fid, single_tmp = tempfile.mkstemp(suffix=".nc")
134 | with OrthogonalMultidimensionalTimeseries(self.single) as s:
135 | df = s.to_dataframe()
136 | axes = {"z": None}
137 | df.drop(columns=["z"], inplace=True)
138 | with OrthogonalMultidimensionalTimeseries.from_dataframe(
139 | df, single_tmp, axes=axes, reduce_dims=True
140 | ) as result_ncd:
141 | assert "station" not in result_ncd.dimensions
142 | assert "z" not in result_ncd.variables
143 | assert np.ma.allclose(result_ncd.variables["pH"][:].flatten(), self.ph)
144 | test_is_mine(OrthogonalMultidimensionalTimeseries, single_tmp) # Try to load it again
145 | os.close(fid)
146 | os.remove(single_tmp)
147 |
148 | @ignore_invalid_value_cast
149 | def test_supplying_attributes(self):
150 | fid, single_tmp = tempfile.mkstemp(suffix=".nc")
151 |
152 | attrs = {
153 | "y": {
154 | "_CoordinateAxisType": "Lat",
155 | "_FillValue": -9999.9,
156 | "missing_value": -9999.9,
157 | }
158 | }
159 |
160 | with OrthogonalMultidimensionalTimeseries(self.single) as s:
161 | df = s.to_dataframe()
162 | with OrthogonalMultidimensionalTimeseries.from_dataframe(
163 | df, single_tmp, attributes=attrs
164 | ) as result_ncd:
165 | assert "station" in result_ncd.dimensions
166 | assert result_ncd.variables["y"]._CoordinateAxisType == "Lat"
167 | with self.assertRaises(AttributeError):
168 | result_ncd.variables["y"].missing_value
169 | with self.assertRaises(AttributeError):
170 | result_ncd.variables["y"]._FillValue
171 |
172 | test_is_mine(OrthogonalMultidimensionalTimeseries, single_tmp) # Try to load it again
173 | os.close(fid)
174 | os.remove(single_tmp)
175 |
--------------------------------------------------------------------------------
/pocean/tests/dsg/timeseriesProfile/test_timeseriesProfile_im.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyoceans/pocean-core/6e4dda9c818c65b31192e8d5fb01d6bcf29a43f3/pocean/tests/dsg/timeseriesProfile/test_timeseriesProfile_im.py
--------------------------------------------------------------------------------
/pocean/tests/dsg/timeseriesProfile/test_timeseriesProfile_r.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | import tempfile
4 | import unittest
5 | from datetime import datetime
6 |
7 | import netCDF4 as nc4
8 | import pandas as pd
9 | import pytest
10 | from numpy.testing import assert_array_equal as npeq
11 |
12 | from pocean import logger
13 | from pocean.cf import CFDataset
14 | from pocean.dsg import RaggedTimeseriesProfile
15 | from pocean.tests.dsg.test_new import test_is_mine
16 |
17 | logger.level = logging.INFO
18 | logger.handlers = [logging.StreamHandler()]
19 |
20 | # RuntimeWarning: invalid value encountered in cast is fine here.
21 | ignore_invalid_value_cast = pytest.mark.filterwarnings("ignore::RuntimeWarning")
22 |
23 |
24 | class TestRaggedTimeseriesProfile(unittest.TestCase):
25 | def test_csv_to_nc_single(self):
26 | filepath = os.path.join(os.path.dirname(__file__), "resources", "r-single.csv")
27 |
28 | df = pd.read_csv(filepath)
29 | fid, tmpfile = tempfile.mkstemp(suffix=".nc")
30 |
31 | axes = {"t": "time", "x": "lon", "y": "lat", "z": "z"}
32 |
33 | df.time = pd.to_datetime(df.time)
34 |
35 | CFDataset.default_time_unit = "hours since 2003-01-01 00:00:00Z"
36 |
37 | with RaggedTimeseriesProfile.from_dataframe(df, tmpfile, axes=axes) as result_ncd:
38 | assert "station" in result_ncd.dimensions
39 | assert result_ncd.dimensions["station"].size == 1
40 | assert "profile" in result_ncd.dimensions
41 | assert result_ncd.dimensions["profile"].size == 1
42 |
43 | check_vars = ["z", "t090C", "SP", "SA", "SR", "CT", "sigma0_CT"]
44 | for v in check_vars:
45 | npeq(result_ncd.variables[v][:], df[v].values)
46 |
47 | assert result_ncd.variables["station"][0] == df.station.iloc[0] == "CH2"
48 | assert result_ncd.variables["profile"][0] == df.profile.iloc[0] == "030617B"
49 | assert result_ncd.variables["lat"].size == 1
50 | assert result_ncd.variables["lat"].ndim == 1 # Not reduced
51 | assert result_ncd.variables["lat"][0] == df.lat.iloc[0] == 33.558
52 | assert result_ncd.variables["lon"].size == 1
53 | assert result_ncd.variables["lon"].ndim == 1 # Not reduced
54 | assert result_ncd.variables["lon"][0] == df.lon.iloc[0] == -118.405
55 |
56 | assert result_ncd.variables["time"].units == "hours since 2003-01-01 00:00:00Z"
57 | assert result_ncd.variables["time"][0] == nc4.date2num(
58 | datetime(2003, 6, 17, 10, 32, 0), units=result_ncd.variables["time"].units
59 | )
60 |
61 | assert RaggedTimeseriesProfile.is_mine(result_ncd, strict=True)
62 |
63 | os.close(fid)
64 | os.remove(tmpfile)
65 |
66 | def test_csv_to_nc_multi(self):
67 | filepath = os.path.join(os.path.dirname(__file__), "resources", "r-multi.csv")
68 |
69 | df = pd.read_csv(filepath)
70 | fid, tmpfile = tempfile.mkstemp(suffix=".nc")
71 |
72 | axes = {"t": "time", "x": "lon", "y": "lat", "z": "z"}
73 |
74 | df.time = pd.to_datetime(df.time)
75 |
76 | with RaggedTimeseriesProfile.from_dataframe(df, tmpfile, axes=axes) as result_ncd:
77 | assert "station" in result_ncd.dimensions
78 | assert result_ncd.dimensions["station"].size == 2
79 | assert "profile" in result_ncd.dimensions
80 | assert result_ncd.dimensions["profile"].size == 5
81 |
82 | check_vars = ["z", "salinity", "sigma0"]
83 | for v in check_vars:
84 | npeq(result_ncd.variables[v][:], df[v].values)
85 |
86 | npeq(result_ncd.variables["station"][:], ["CN1", "CN2"])
87 | npeq(
88 | result_ncd.variables["profile"][:],
89 | ["030312B", "030617B", "030702B", "030814B", "031216C"],
90 | )
91 | assert result_ncd.variables["profile"][0] == df.profile.iloc[0] == "030312B"
92 | assert result_ncd.variables["lat"].size == 2
93 | assert result_ncd.variables["lat"].ndim == 1 # Not reduced
94 | assert result_ncd.variables["lat"][0] == df.lat.iloc[0] == 33.5
95 | assert result_ncd.variables["lon"].size == 2
96 | assert result_ncd.variables["lon"].ndim == 1 # Not reduced
97 | assert result_ncd.variables["lon"][0] == df.lon.iloc[0] == -118.4
98 |
99 | npeq(result_ncd.variables["stationIndex"][:], [0, 0, 1, 0, 1])
100 |
101 | npeq(result_ncd.variables["rowSize"][:], [844, 892, 893, 893, 891])
102 |
103 | assert result_ncd.variables["time"][0] == nc4.date2num(
104 | datetime(2013, 3, 12, 10, 19, 6), units=result_ncd.variables["time"].units
105 | )
106 | assert RaggedTimeseriesProfile.is_mine(result_ncd, strict=True)
107 |
108 | os.close(fid)
109 | os.remove(tmpfile)
110 |
111 | def test_csv_to_nc_single_timezones(self):
112 | filepath = os.path.join(os.path.dirname(__file__), "resources", "r-single.csv")
113 |
114 | df = pd.read_csv(filepath)
115 | fid, tmpfile = tempfile.mkstemp(suffix=".nc")
116 |
117 | axes = {"t": "time", "x": "lon", "y": "lat", "z": "z"}
118 |
119 | df.time = pd.to_datetime(df.time)
120 | df.time = df.time.dt.tz_localize("UTC")
121 |
122 | with RaggedTimeseriesProfile.from_dataframe(df, tmpfile, axes=axes) as result_ncd:
123 | assert "station" in result_ncd.dimensions
124 | assert result_ncd.dimensions["station"].size == 1
125 | assert "profile" in result_ncd.dimensions
126 | assert result_ncd.dimensions["profile"].size == 1
127 |
128 | check_vars = ["z", "t090C", "SP", "SA", "SR", "CT", "sigma0_CT"]
129 | for v in check_vars:
130 | npeq(result_ncd.variables[v][:], df[v].values)
131 |
132 | assert result_ncd.variables["station"][0] == df.station.iloc[0] == "CH2"
133 | assert result_ncd.variables["profile"][0] == df.profile.iloc[0] == "030617B"
134 | assert result_ncd.variables["lat"].size == 1
135 | assert result_ncd.variables["lat"].ndim == 1 # Not reduced
136 | assert result_ncd.variables["lat"][0] == df.lat.iloc[0] == 33.558
137 | assert result_ncd.variables["lon"].size == 1
138 | assert result_ncd.variables["lon"].ndim == 1 # Not reduced
139 | assert result_ncd.variables["lon"][0] == df.lon.iloc[0] == -118.405
140 |
141 | assert result_ncd.variables["time"][0] == nc4.date2num(
142 | datetime(2003, 6, 17, 10, 32, 0), units=result_ncd.variables["time"].units
143 | )
144 |
145 | assert RaggedTimeseriesProfile.is_mine(result_ncd, strict=True)
146 |
147 | os.close(fid)
148 | os.remove(tmpfile)
149 |
150 | def test_csv_to_nc_single_reduce(self):
151 | filepath = os.path.join(os.path.dirname(__file__), "resources", "r-single.csv")
152 |
153 | df = pd.read_csv(filepath)
154 | fid, tmpfile = tempfile.mkstemp(suffix=".nc")
155 |
156 | axes = {"t": "time", "x": "lon", "y": "lat", "z": "z"}
157 |
158 | df.time = pd.to_datetime(df.time)
159 |
160 | with RaggedTimeseriesProfile.from_dataframe(
161 | df, tmpfile, axes=axes, reduce_dims=True
162 | ) as result_ncd:
163 | assert "station" not in result_ncd.dimensions
164 | assert "profile" in result_ncd.dimensions
165 | assert result_ncd.dimensions["profile"].size == 1
166 |
167 | check_vars = ["z", "t090C", "SP", "SA", "SR", "CT", "sigma0_CT"]
168 | for v in check_vars:
169 | npeq(result_ncd.variables[v][:], df[v].values)
170 |
171 | assert result_ncd.variables["station"][0] == df.station.iloc[0] == "CH2"
172 | assert result_ncd.variables["profile"][0] == df.profile.iloc[0] == "030617B"
173 | assert result_ncd.variables["lat"].size == 1
174 | assert result_ncd.variables["lat"].ndim == 0 # Reduced to 0
175 | assert result_ncd.variables["lat"][0] == df.lat.iloc[0] == 33.558
176 | assert result_ncd.variables["lon"].size == 1
177 | assert result_ncd.variables["lon"].ndim == 0 # Reduced to 0
178 | assert result_ncd.variables["lon"][0] == df.lon.iloc[0] == -118.405
179 |
180 | assert RaggedTimeseriesProfile.is_mine(result_ncd, strict=True)
181 |
182 | os.close(fid)
183 | os.remove(tmpfile)
184 |
185 | @ignore_invalid_value_cast
186 | def test_rtp_single(self):
187 | filepath = os.path.join(os.path.dirname(__file__), "resources", "r-ctd-single.nc")
188 |
189 | with RaggedTimeseriesProfile(filepath) as ncd:
190 | fid, tmpfile = tempfile.mkstemp(suffix=".nc")
191 | df = ncd.to_dataframe(clean_rows=False)
192 |
193 | with RaggedTimeseriesProfile.from_dataframe(df, tmpfile) as result_ncd:
194 | assert "station" in result_ncd.dimensions
195 | test_is_mine(RaggedTimeseriesProfile, tmpfile) # Try to load it again
196 |
197 | with RaggedTimeseriesProfile.from_dataframe(
198 | df, tmpfile, unique_dims=True
199 | ) as result_ncd:
200 | assert "station_dim" in result_ncd.dimensions
201 | test_is_mine(RaggedTimeseriesProfile, tmpfile) # Try to load it again
202 |
203 | with RaggedTimeseriesProfile.from_dataframe(
204 | df, tmpfile, reduce_dims=True
205 | ) as result_ncd:
206 | # Even though we pass reduce_dims, there are two stations so it is not reduced
207 | assert "station" not in result_ncd.dimensions
208 | assert "profile" in result_ncd.dimensions
209 | test_is_mine(RaggedTimeseriesProfile, tmpfile) # Try to load it again
210 |
211 | with RaggedTimeseriesProfile.from_dataframe(df, tmpfile, unlimited=True) as result_ncd:
212 | assert "station" in result_ncd.dimensions
213 | assert "profile" in result_ncd.dimensions
214 | assert result_ncd.dimensions["obs"].isunlimited() is True
215 | test_is_mine(RaggedTimeseriesProfile, tmpfile) # Try to load it again
216 |
217 | with RaggedTimeseriesProfile.from_dataframe(
218 | df, tmpfile, reduce_dims=True, unlimited=True
219 | ) as result_ncd:
220 | assert "station" not in result_ncd.dimensions
221 | assert "profile" in result_ncd.dimensions
222 | assert result_ncd.dimensions["obs"].isunlimited() is True
223 | test_is_mine(RaggedTimeseriesProfile, tmpfile) # Try to load it again
224 |
225 | with RaggedTimeseriesProfile.from_dataframe(
226 | df, tmpfile, unique_dims=True, reduce_dims=False, unlimited=True
227 | ) as result_ncd:
228 | assert "station_dim" in result_ncd.dimensions
229 | assert "profile_dim" in result_ncd.dimensions
230 | assert result_ncd.dimensions["obs_dim"].isunlimited() is True
231 | test_is_mine(RaggedTimeseriesProfile, tmpfile) # Try to load it again
232 |
233 | os.close(fid)
234 | os.remove(tmpfile)
235 |
--------------------------------------------------------------------------------
/pocean/tests/dsg/trajectory/test_trajectory_cr.py:
--------------------------------------------------------------------------------
1 | #!python
2 | import logging
3 | import os
4 | import tempfile
5 | import unittest
6 | from os.path import dirname as dn
7 | from os.path import join as jn
8 |
9 | import pytest
10 |
11 | from pocean import logger
12 | from pocean.dsg import ContiguousRaggedTrajectory, get_calculated_attributes
13 | from pocean.tests.dsg.test_new import test_is_mine
14 |
15 | logger.level = logging.INFO
16 | logger.handlers = [logging.StreamHandler()]
17 |
18 | # RuntimeWarning: invalid value encountered in cast is fine here.
19 | ignore_invalid_value_cast = pytest.mark.filterwarnings("ignore::RuntimeWarning")
20 |
21 |
22 | @pytest.mark.parametrize(
23 | "fp",
24 | [
25 | # jn(dn(__file__), 'resources', 'cr-single.nc'),
26 | jn(dn(__file__), "resources", "cr-multiple.nc"),
27 | jn(dn(__file__), "resources", "cr-oot-A.nc"),
28 | jn(dn(__file__), "resources", "cr-oot-B.nc"),
29 | ],
30 | )
31 | def test_crt_load(fp):
32 | test_is_mine(ContiguousRaggedTrajectory, fp)
33 |
34 |
35 | class TestContiguousRaggedTrajectory(unittest.TestCase):
36 | def setUp(self):
37 | self.multi = jn(dn(__file__), "resources", "cr-multiple.nc")
38 | self.oot_A = jn(dn(__file__), "resources", "cr-oot-A.nc")
39 | self.oot_B = jn(dn(__file__), "resources", "cr-oot-B.nc")
40 |
41 | def test_crt_dataframe_multiple(self):
42 | axes = {
43 | "t": "time",
44 | "x": "lon",
45 | "y": "lat",
46 | "z": "z",
47 | }
48 | fid, tmpnc = tempfile.mkstemp(suffix=".nc")
49 | with ContiguousRaggedTrajectory(self.multi) as ncd:
50 | df = ncd.to_dataframe(axes=axes)
51 | with ContiguousRaggedTrajectory.from_dataframe(df, tmpnc, axes=axes) as result_ncd:
52 | assert "trajectory" in result_ncd.dimensions
53 | test_is_mine(ContiguousRaggedTrajectory, tmpnc) # Try to load it again
54 | os.close(fid)
55 | os.remove(tmpnc)
56 |
57 | def test_crt_dataframe_multiple_unique_dims(self):
58 | axes = {
59 | "t": "time",
60 | "x": "lon",
61 | "y": "lat",
62 | "z": "z",
63 | }
64 | fid, tmpnc = tempfile.mkstemp(suffix=".nc")
65 | with ContiguousRaggedTrajectory(self.multi) as ncd:
66 | df = ncd.to_dataframe(axes=axes)
67 | with ContiguousRaggedTrajectory.from_dataframe(
68 | df, tmpnc, axes=axes, unique_dims=True
69 | ) as result_ncd:
70 | assert "trajectory_dim" in result_ncd.dimensions
71 | test_is_mine(ContiguousRaggedTrajectory, tmpnc) # Try to load it again
72 | os.close(fid)
73 | os.remove(tmpnc)
74 |
75 | def test_crt_dataframe_unlimited_dim(self):
76 | axes = {
77 | "t": "time",
78 | "x": "lon",
79 | "y": "lat",
80 | "z": "z",
81 | }
82 | fid, tmpnc = tempfile.mkstemp(suffix=".nc")
83 | with ContiguousRaggedTrajectory(self.multi) as ncd:
84 | df = ncd.to_dataframe(axes=axes)
85 | with ContiguousRaggedTrajectory.from_dataframe(
86 | df, tmpnc, axes=axes, unlimited=True, unique_dims=True
87 | ) as result_ncd:
88 | assert "trajectory_dim" in result_ncd.dimensions
89 | assert "obs_dim" in result_ncd.dimensions
90 | assert result_ncd.dimensions["obs_dim"].isunlimited() is True
91 | test_is_mine(ContiguousRaggedTrajectory, tmpnc) # Try to load it again
92 | os.close(fid)
93 | os.remove(tmpnc)
94 |
95 | @ignore_invalid_value_cast
96 | def test_crt_dataframe_oot_A(self):
97 | axes = {"t": "time", "x": "lon", "y": "lat", "z": "depth", "sample": "sample"}
98 | fid, tmpnc = tempfile.mkstemp(suffix=".nc")
99 | with ContiguousRaggedTrajectory(self.oot_A) as ncd:
100 | df = ncd.to_dataframe(axes=axes)
101 | df = df.sort_values(["trajectory", "time"])
102 | attrs = get_calculated_attributes(df, axes=axes)
103 |
104 | with ContiguousRaggedTrajectory.from_dataframe(
105 | df, tmpnc, axes=axes, mode="a"
106 | ) as result_ncd:
107 | assert "sample" in result_ncd.dimensions
108 | assert result_ncd.dimensions["sample"].size == 6610
109 | assert "trajectory" in result_ncd.dimensions
110 | # This is removing null trajectories that have no data. Not much to do about this
111 | # because there is no way to store this empty trajectory in a dataframe.
112 | assert result_ncd.dimensions["trajectory"].size == 507
113 | result_ncd.apply_meta(attrs)
114 |
115 | test_is_mine(ContiguousRaggedTrajectory, tmpnc) # Try to load it again
116 |
117 | os.close(fid)
118 | os.remove(tmpnc)
119 |
120 | @ignore_invalid_value_cast
121 | def test_crt_dataframe_oot_B(self):
122 | axes = {
123 | "t": "time",
124 | "x": "lon",
125 | "y": "lat",
126 | "z": "depth",
127 | }
128 | fid, tmpnc = tempfile.mkstemp(suffix=".nc")
129 | with ContiguousRaggedTrajectory(self.oot_B) as ncd:
130 | df = ncd.to_dataframe(axes=axes)
131 | df = df.sort_values(["trajectory", "time"])
132 | attrs = get_calculated_attributes(df, axes=axes)
133 |
134 | with ContiguousRaggedTrajectory.from_dataframe(
135 | df, tmpnc, axes=axes, mode="a"
136 | ) as result_ncd:
137 | assert "obs" in result_ncd.dimensions
138 | assert result_ncd.dimensions["obs"].size == 64116
139 | assert "trajectory" in result_ncd.dimensions
140 | # This is removing null trajectories that have no data. Not much to do about this
141 | # because there is no way to store this empty trajectory in a dataframe.
142 | assert result_ncd.dimensions["trajectory"].size == 1000
143 | result_ncd.apply_meta(attrs)
144 |
145 | test_is_mine(ContiguousRaggedTrajectory, tmpnc) # Try to load it again
146 |
147 | os.close(fid)
148 | os.remove(tmpnc)
149 |
--------------------------------------------------------------------------------
/pocean/tests/dsg/trajectory/test_trajectory_im.py:
--------------------------------------------------------------------------------
1 | #!python
2 | import logging
3 | import os
4 | import tempfile
5 | import unittest
6 |
7 | import numpy as np
8 | import pytest
9 | from dateutil.parser import parse as dtparse
10 |
11 | from pocean import logger
12 | from pocean.cf import CFDataset
13 | from pocean.dsg import IncompleteMultidimensionalTrajectory
14 | from pocean.tests.dsg.test_new import test_is_mine
15 |
16 | logger.level = logging.INFO
17 | logger.handlers = [logging.StreamHandler()]
18 |
19 | # RuntimeWarning: invalid value encountered in cast is fine here.
20 | ignore_invalid_value_cast = pytest.mark.filterwarnings("ignore::RuntimeWarning")
21 |
22 |
23 | class TestIncompleteMultidimensionalTrajectory(unittest.TestCase):
24 | @ignore_invalid_value_cast
25 | def test_im_single_row(self):
26 | filepath = os.path.join(os.path.dirname(__file__), "resources", "im-singlerow.nc")
27 |
28 | with IncompleteMultidimensionalTrajectory(filepath) as s:
29 | df = s.to_dataframe(clean_rows=True)
30 | assert len(df) == 1
31 |
32 | def test_imt_multi(self):
33 | filepath = os.path.join(os.path.dirname(__file__), "resources", "im-multiple.nc")
34 |
35 | CFDataset.load(filepath).close()
36 |
37 | with IncompleteMultidimensionalTrajectory(filepath) as ncd:
38 | fid, tmpfile = tempfile.mkstemp(suffix=".nc")
39 | df = ncd.to_dataframe(clean_rows=False)
40 |
41 | with IncompleteMultidimensionalTrajectory.from_dataframe(df, tmpfile) as result_ncd:
42 | assert "trajectory" in result_ncd.dimensions
43 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again
44 |
45 | with IncompleteMultidimensionalTrajectory.from_dataframe(
46 | df, tmpfile, unique_dims=True
47 | ) as result_ncd:
48 | assert "trajectory_dim" in result_ncd.dimensions
49 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again
50 |
51 | with IncompleteMultidimensionalTrajectory.from_dataframe(
52 | df, tmpfile, reduce_dims=True
53 | ) as result_ncd:
54 | # Could not reduce dims since there was more than one trajectory
55 | assert "trajectory" in result_ncd.dimensions
56 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again
57 |
58 | with IncompleteMultidimensionalTrajectory.from_dataframe(
59 | df, tmpfile, unlimited=True
60 | ) as result_ncd:
61 | assert result_ncd.dimensions["obs"].isunlimited() is True
62 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again
63 |
64 | with IncompleteMultidimensionalTrajectory.from_dataframe(
65 | df, tmpfile, reduce_dims=True, unlimited=True
66 | ) as result_ncd:
67 | # Could not reduce dims since there was more than one trajectory
68 | assert "trajectory" in result_ncd.dimensions
69 | assert result_ncd.dimensions["obs"].isunlimited() is True
70 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again
71 |
72 | with IncompleteMultidimensionalTrajectory.from_dataframe(
73 | df, tmpfile, unique_dims=True, reduce_dims=True, unlimited=True
74 | ) as result_ncd:
75 | # Could not reduce dims since there was more than one trajectory
76 | assert "trajectory_dim" in result_ncd.dimensions
77 | assert result_ncd.dimensions["obs_dim"].isunlimited() is True
78 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again
79 |
80 | os.close(fid)
81 | os.remove(tmpfile)
82 |
83 | @ignore_invalid_value_cast
84 | def test_imt_multi_not_string(self):
85 | filepath = os.path.join(os.path.dirname(__file__), "resources", "im-multiple-nonstring.nc")
86 |
87 | CFDataset.load(filepath).close()
88 |
89 | with IncompleteMultidimensionalTrajectory(filepath) as ncd:
90 | fid, tmpfile = tempfile.mkstemp(suffix=".nc")
91 | df = ncd.to_dataframe(clean_rows=False)
92 |
93 | with IncompleteMultidimensionalTrajectory.from_dataframe(df, tmpfile) as result_ncd:
94 | assert "trajectory" in result_ncd.dimensions
95 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again
96 |
97 | with IncompleteMultidimensionalTrajectory.from_dataframe(
98 | df, tmpfile, reduce_dims=True
99 | ) as result_ncd:
100 | # Could not reduce dims since there was more than one trajectory
101 | assert "trajectory" not in result_ncd.dimensions
102 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again
103 |
104 | with IncompleteMultidimensionalTrajectory.from_dataframe(
105 | df, tmpfile, unlimited=True
106 | ) as result_ncd:
107 | assert result_ncd.dimensions["obs"].isunlimited() is True
108 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again
109 |
110 | with IncompleteMultidimensionalTrajectory.from_dataframe(
111 | df, tmpfile, reduce_dims=True, unlimited=True
112 | ) as result_ncd:
113 | # Could not reduce dims since there was more than one trajectory
114 | assert "trajectory" not in result_ncd.dimensions
115 | assert result_ncd.dimensions["obs"].isunlimited() is True
116 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again
117 |
118 | os.close(fid)
119 | os.remove(tmpfile)
120 |
121 | def test_imt_single(self):
122 | filepath = os.path.join(os.path.dirname(__file__), "resources", "im-single.nc")
123 |
124 | CFDataset.load(filepath).close()
125 |
126 | with IncompleteMultidimensionalTrajectory(filepath) as ncd:
127 | fid, tmpfile = tempfile.mkstemp(suffix=".nc")
128 | df = ncd.to_dataframe(clean_rows=False)
129 |
130 | with IncompleteMultidimensionalTrajectory.from_dataframe(df, tmpfile) as result_ncd:
131 | assert "trajectory" in result_ncd.dimensions
132 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again
133 |
134 | with IncompleteMultidimensionalTrajectory.from_dataframe(
135 | df, tmpfile, reduce_dims=True
136 | ) as result_ncd:
137 | # Reduced trajectory dimension
138 | assert "trajectory" not in result_ncd.dimensions
139 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again
140 |
141 | with IncompleteMultidimensionalTrajectory.from_dataframe(
142 | df, tmpfile, unlimited=True
143 | ) as result_ncd:
144 | # Reduced trajectory dimension
145 | assert result_ncd.dimensions["obs"].isunlimited() is True
146 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again
147 |
148 | with IncompleteMultidimensionalTrajectory.from_dataframe(
149 | df, tmpfile, reduce_dims=True, unlimited=True
150 | ) as result_ncd:
151 | # Reduced trajectory dimension
152 | assert "trajectory" not in result_ncd.dimensions
153 | assert result_ncd.dimensions["obs"].isunlimited() is True
154 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again
155 |
156 | os.close(fid)
157 | os.remove(tmpfile)
158 |
159 | def test_imt_change_axis_names(self):
160 | new_axis = {"t": "time", "x": "lon", "y": "lat", "z": "depth"}
161 |
162 | filepath = os.path.join(os.path.dirname(__file__), "resources", "im-multiple.nc")
163 | with IncompleteMultidimensionalTrajectory(filepath) as ncd:
164 | fid, tmpfile = tempfile.mkstemp(suffix=".nc")
165 | df = ncd.to_dataframe(clean_rows=False, axes=new_axis)
166 |
167 | with IncompleteMultidimensionalTrajectory.from_dataframe(
168 | df, tmpfile, axes=new_axis
169 | ) as result_ncd:
170 | assert "trajectory" in result_ncd.dimensions
171 | assert "time" in result_ncd.variables
172 | assert "lon" in result_ncd.variables
173 | assert "lat" in result_ncd.variables
174 | assert "depth" in result_ncd.variables
175 | test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again
176 |
177 | os.close(fid)
178 | os.remove(tmpfile)
179 |
180 | def test_imt_calculated_metadata_single(self):
181 | filepath = os.path.join(os.path.dirname(__file__), "resources", "im-single.nc")
182 |
183 | with IncompleteMultidimensionalTrajectory(filepath) as ncd:
184 | s = ncd.calculated_metadata()
185 | assert s.min_t.round("s") == dtparse("1990-01-01 00:00:00")
186 | assert s.max_t.round("s") == dtparse("1990-01-05 03:00:00")
187 | traj1 = s.trajectories["Trajectory1"]
188 | assert traj1.min_z == 0
189 | assert traj1.max_z == 99
190 | assert traj1.min_t.round("s") == dtparse("1990-01-01 00:00:00")
191 | assert traj1.max_t.round("s") == dtparse("1990-01-05 03:00:00")
192 | first_loc = traj1.geometry.coords[0]
193 | assert np.isclose(first_loc[0], -7.9336)
194 | assert np.isclose(first_loc[1], 42.00339)
195 |
196 | def test_imt_calculated_metadata_multi(self):
197 | filepath = os.path.join(os.path.dirname(__file__), "resources", "im-multiple.nc")
198 |
199 | with IncompleteMultidimensionalTrajectory(filepath) as ncd:
200 | m = ncd.calculated_metadata()
201 | assert m.min_t == dtparse("1990-01-01 00:00:00")
202 | assert m.max_t == dtparse("1990-01-02 12:00:00")
203 | assert len(m.trajectories) == 4
204 | traj0 = m.trajectories["Trajectory0"]
205 | assert traj0.min_z == 0
206 | assert traj0.max_z == 35
207 | assert traj0.min_t.round("s") == dtparse("1990-01-01 00:00:00")
208 | assert traj0.max_t.round("s") == dtparse("1990-01-02 11:00:00")
209 | first_loc = traj0.geometry.coords[0]
210 | assert np.isclose(first_loc[0], -35.07884)
211 | assert np.isclose(first_loc[1], 2.15286)
212 |
213 | traj3 = m.trajectories["Trajectory3"]
214 | assert traj3.min_z == 0
215 | assert traj3.max_z == 36
216 | assert traj3.min_t.round("s") == dtparse("1990-01-01 00:00:00")
217 | assert traj3.max_t.round("s") == dtparse("1990-01-02 12:00:00")
218 | first_loc = traj3.geometry.coords[0]
219 | assert np.isclose(first_loc[0], -73.3026)
220 | assert np.isclose(first_loc[1], 1.95761)
221 |
222 | def test_json_attributes_single(self):
223 | filepath = os.path.join(os.path.dirname(__file__), "resources", "im-single.nc")
224 |
225 | with IncompleteMultidimensionalTrajectory(filepath) as s:
226 | s.json_attributes()
227 |
228 | def test_json_attributes_multi(self):
229 | filepath = os.path.join(os.path.dirname(__file__), "resources", "im-multiple.nc")
230 |
231 | with IncompleteMultidimensionalTrajectory(filepath) as s:
232 | s.json_attributes()
233 |
--------------------------------------------------------------------------------
/pocean/tests/dsg/trajectoryProfile/test_trajectoryProfile_cr.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import math
3 | import os
4 | import tempfile
5 | import unittest
6 |
7 | import numpy as np
8 | import pytest
9 | from dateutil.parser import parse as dtparse
10 | from shapely.wkt import loads as wktloads
11 |
12 | from pocean import logger as L
13 | from pocean.dsg import ContiguousRaggedTrajectoryProfile
14 | from pocean.tests.dsg.test_new import test_is_mine
15 |
16 | L.level = logging.INFO
17 | L.handlers = [logging.StreamHandler()]
18 |
19 | # RuntimeWarning: invalid value encountered in cast is fine here.
20 | ignore_invalid_value_cast = pytest.mark.filterwarnings("ignore::RuntimeWarning")
21 |
22 |
23 | class TestContinousRaggedTrajectoryProfile(unittest.TestCase):
24 | def setUp(self):
25 | self.single = os.path.join(os.path.dirname(__file__), "resources", "cr-single.nc")
26 | self.multi = os.path.join(os.path.dirname(__file__), "resources", "cr-multiple.nc")
27 | self.missing_time = os.path.join(
28 | os.path.dirname(__file__), "resources", "cr-missing-time.nc"
29 | )
30 | self.nan_locations = os.path.join(
31 | os.path.dirname(__file__), "resources", "cr-nan-locations.nc"
32 | )
33 |
34 | def test_crtp_load(self):
35 | ContiguousRaggedTrajectoryProfile(self.single).close()
36 | ContiguousRaggedTrajectoryProfile(self.multi).close()
37 | ContiguousRaggedTrajectoryProfile(self.missing_time).close()
38 |
39 | @ignore_invalid_value_cast
40 | def test_crtp_dataframe_single(self):
41 | axes = {
42 | "t": "time",
43 | "x": "longitude",
44 | "y": "latitude",
45 | "z": "depth",
46 | }
47 | fid, tmpnc = tempfile.mkstemp(suffix=".nc")
48 | with ContiguousRaggedTrajectoryProfile(self.single) as ncd:
49 | df = ncd.to_dataframe(axes=axes)
50 | with ContiguousRaggedTrajectoryProfile.from_dataframe(
51 | df, tmpnc, axes=axes
52 | ) as result_ncd:
53 | assert "profile" in result_ncd.dimensions
54 | assert "trajectory" in result_ncd.dimensions
55 | test_is_mine(ContiguousRaggedTrajectoryProfile, tmpnc) # Try to load it again
56 | os.close(fid)
57 | os.remove(tmpnc)
58 |
59 | @ignore_invalid_value_cast
60 | def test_crtp_dataframe_single_unique_dims(self):
61 | axes = {
62 | "t": "time",
63 | "x": "longitude",
64 | "y": "latitude",
65 | "z": "depth",
66 | }
67 | fid, tmpnc = tempfile.mkstemp(suffix=".nc")
68 | with ContiguousRaggedTrajectoryProfile(self.single) as ncd:
69 | df = ncd.to_dataframe(axes=axes)
70 | with ContiguousRaggedTrajectoryProfile.from_dataframe(
71 | df, tmpnc, axes=axes, unique_dims=True
72 | ) as result_ncd:
73 | assert "profile_dim" in result_ncd.dimensions
74 | assert "trajectory_dim" in result_ncd.dimensions
75 | test_is_mine(ContiguousRaggedTrajectoryProfile, tmpnc) # Try to load it again
76 | os.close(fid)
77 | os.remove(tmpnc)
78 |
79 | def test_crtp_dataframe_multi(self):
80 | axes = {
81 | "t": "time",
82 | "x": "lon",
83 | "y": "lat",
84 | "z": "z",
85 | }
86 | fid, tmpnc = tempfile.mkstemp(suffix=".nc")
87 | with ContiguousRaggedTrajectoryProfile(self.multi) as ncd:
88 | df = ncd.to_dataframe(axes=axes)
89 | with ContiguousRaggedTrajectoryProfile.from_dataframe(
90 | df, tmpnc, axes=axes
91 | ) as result_ncd:
92 | assert "profile" in result_ncd.dimensions
93 | assert "trajectory" in result_ncd.dimensions
94 | test_is_mine(ContiguousRaggedTrajectoryProfile, tmpnc) # Try to load it again
95 | os.close(fid)
96 | os.remove(tmpnc)
97 |
98 | @ignore_invalid_value_cast
99 | def test_crtp_dataframe_missing_time(self):
100 | axes = {
101 | "t": "precise_time",
102 | "x": "precise_lon",
103 | "y": "precise_lat",
104 | "z": "depth",
105 | }
106 | fid, tmpnc = tempfile.mkstemp(suffix=".nc")
107 | with ContiguousRaggedTrajectoryProfile(self.missing_time) as ncd:
108 | df = ncd.to_dataframe(axes=axes)
109 | with ContiguousRaggedTrajectoryProfile.from_dataframe(
110 | df, tmpnc, axes=axes
111 | ) as result_ncd:
112 | assert "profile" in result_ncd.dimensions
113 | assert "trajectory" in result_ncd.dimensions
114 | test_is_mine(ContiguousRaggedTrajectoryProfile, tmpnc) # Try to load it again
115 | os.close(fid)
116 | os.remove(tmpnc)
117 |
118 | @ignore_invalid_value_cast
119 | def test_crtp_calculated_metadata_single(self):
120 | axes = {
121 | "t": "time",
122 | "x": "longitude",
123 | "y": "latitude",
124 | "z": "depth",
125 | }
126 |
127 | with ContiguousRaggedTrajectoryProfile(self.single) as st:
128 | s = st.calculated_metadata(axes=axes)
129 | assert s.min_t.round("s") == dtparse("2014-11-25 18:57:30")
130 | assert s.max_t.round("s") == dtparse("2014-11-27 07:10:30")
131 | assert len(s.trajectories) == 1
132 | traj = s.trajectories["sp025-20141125T1730"]
133 | assert traj.min_z == 0
134 | assert np.isclose(traj.max_z, 504.37827)
135 | assert traj.min_t.round("s") == dtparse("2014-11-25 18:57:30")
136 | assert traj.max_t.round("s") == dtparse("2014-11-27 07:10:30")
137 |
138 | first_loc = traj.geometry.coords[0]
139 | assert np.isclose(first_loc[0], -119.79025)
140 | assert np.isclose(first_loc[1], 34.30818)
141 | assert len(traj.profiles) == 17
142 |
143 | def test_crtp_calculated_metadata_multi(self):
144 | axes = {
145 | "t": "time",
146 | "x": "longitude",
147 | "y": "latitude",
148 | "z": "depth",
149 | }
150 |
151 | with ContiguousRaggedTrajectoryProfile(self.multi) as mt:
152 | m = mt.calculated_metadata(axes=axes)
153 | assert m.min_t.round("s") == dtparse("1990-01-01 00:00:00")
154 | assert m.max_t.round("s") == dtparse("1990-01-03 02:00:00")
155 | assert len(m.trajectories) == 5
156 | # First trajectory
157 | traj0 = m.trajectories[0]
158 | assert traj0.min_z == 0
159 | assert traj0.max_z == 43
160 | assert traj0.min_t.round("s") == dtparse("1990-01-02 05:00:00")
161 | assert traj0.max_t.round("s") == dtparse("1990-01-03 01:00:00")
162 | first_loc = traj0.geometry.coords[0]
163 | assert first_loc[0] == -60
164 | assert first_loc[1] == 53
165 | assert len(traj0.profiles) == 4
166 | assert traj0.profiles[0].t.round("s") == dtparse("1990-01-03 01:00:00")
167 | assert traj0.profiles[0].x == -60
168 | assert traj0.profiles[0].y == 49
169 |
170 | # Last trajectory
171 | traj4 = m.trajectories[4]
172 | assert traj4.min_z == 0
173 | assert traj4.max_z == 38
174 | assert traj4.min_t.round("s") == dtparse("1990-01-02 14:00:00")
175 | assert traj4.max_t.round("s") == dtparse("1990-01-02 15:00:00")
176 | first_loc = traj4.geometry.coords[0]
177 | assert first_loc[0] == -67
178 | assert first_loc[1] == 47
179 | assert len(traj4.profiles) == 4
180 | assert traj4.profiles[19].t.round("s") == dtparse("1990-01-02 14:00:00")
181 | assert traj4.profiles[19].x == -44
182 | assert traj4.profiles[19].y == 47
183 |
184 | @ignore_invalid_value_cast
185 | def test_crtp_calculated_metadata_missing_time(self):
186 | axes = {
187 | "t": "time",
188 | "x": "longitude",
189 | "y": "latitude",
190 | "z": "depth",
191 | }
192 |
193 | with ContiguousRaggedTrajectoryProfile(self.missing_time) as mmt:
194 | t = mmt.calculated_metadata(axes=axes)
195 | assert t.min_t == dtparse("2014-11-16 21:32:29.952500")
196 | assert t.max_t == dtparse("2014-11-17 07:59:08.398500")
197 | assert len(t.trajectories) == 1
198 |
199 | traj = t.trajectories["UW157-20141116T211809"]
200 | assert np.isclose(traj.min_z, 0.47928014)
201 | assert np.isclose(traj.max_z, 529.68005)
202 | assert traj.min_t == dtparse("2014-11-16 21:32:29.952500")
203 | assert traj.max_t == dtparse("2014-11-17 07:59:08.398500")
204 |
205 | first_loc = traj.geometry.coords[0]
206 |
207 | assert np.isclose(first_loc[0], -124.681526638573)
208 | assert np.isclose(first_loc[1], 43.5022166666667)
209 | assert len(traj.profiles) == 13
210 |
211 | @ignore_invalid_value_cast
212 | def test_crtp_just_missing_time(self):
213 | axes = {
214 | "t": "time",
215 | "x": "longitude",
216 | "y": "latitude",
217 | "z": "depth",
218 | }
219 |
220 | with ContiguousRaggedTrajectoryProfile(self.missing_time) as mmt:
221 | t = mmt.calculated_metadata(axes=axes)
222 | assert t.min_t == dtparse("2014-11-16 21:32:29.952500")
223 | assert t.max_t == dtparse("2014-11-17 07:59:08.398500")
224 | assert len(t.trajectories) == 1
225 |
226 | traj = t.trajectories["UW157-20141116T211809"]
227 | assert np.isclose(traj.min_z, 0.47928014)
228 | assert np.isclose(traj.max_z, 529.68005)
229 | assert traj.min_t == dtparse("2014-11-16 21:32:29.952500")
230 | assert traj.max_t == dtparse("2014-11-17 07:59:08.398500")
231 |
232 | first_loc = traj.geometry.coords[0]
233 | assert np.isclose(first_loc[0], -124.681526638573)
234 | assert np.isclose(first_loc[1], 43.5022166666667)
235 | assert len(traj.profiles) == 13
236 |
237 | @ignore_invalid_value_cast
238 | def test_crtp_just_missing_locations(self):
239 | axes = {
240 | "t": "time",
241 | "x": "longitude",
242 | "y": "latitude",
243 | "z": "depth",
244 | }
245 |
246 | with ContiguousRaggedTrajectoryProfile(self.nan_locations) as ml:
247 | t = ml.calculated_metadata(axes=axes)
248 | assert len(t.trajectories) == 1
249 |
250 | traj = t.trajectories["clark-20150709T1803"]
251 | coords = list(wktloads(traj.geometry.wkt).coords)
252 | assert True not in [math.isnan(x) for x, y in coords]
253 | assert True not in [math.isnan(y) for x, y in coords]
254 |
--------------------------------------------------------------------------------
/pocean/tests/test_cf.py:
--------------------------------------------------------------------------------
1 | #!python
2 | import logging
3 | import os
4 | import unittest
5 |
6 | from pocean import logger as L
7 | from pocean.cf import CFDataset
8 | from pocean.dsg import OrthogonalMultidimensionalTimeseries as omt
9 |
10 | L.level = logging.INFO
11 | L.handlers = [logging.StreamHandler()]
12 |
13 |
14 | class TestCFDatasetLoad(unittest.TestCase):
15 | def test_load_url(self):
16 | # File downloaded from https://geoport.usgs.esipfed.org/thredds/dodsC/silt/usgs/Projects/stellwagen/CF-1.6/ARGO_MERCHANT/1211-AA.cdf.html
17 | fname = os.path.join(os.path.dirname(__file__), "resources", "1211-AA.cdf")
18 | ncd = CFDataset.load(fname)
19 | assert omt.is_mine(ncd) is True
20 | ncd.close()
21 |
22 | def test_load_strict(self):
23 | ncfile = os.path.join(
24 | os.path.dirname(__file__), "dsg", "profile", "resources", "om-single.nc"
25 | )
26 |
27 | ncd = CFDataset.load(ncfile)
28 | assert omt.is_mine(ncd) is False
29 | with self.assertRaises(BaseException):
30 | omt.is_mine(ncd, strict=True)
31 | ncd.close()
32 |
--------------------------------------------------------------------------------
/pocean/tests/test_nc.py:
--------------------------------------------------------------------------------
1 | #!python
2 | import logging
3 | import os
4 | import tempfile
5 | import unittest
6 |
7 | from numpy import testing as npt
8 |
9 | from pocean import logger as L
10 | from pocean.cf import CFDataset
11 | from pocean.dataset import EnhancedDataset
12 | from pocean.meta import MetaInterface, ncpyattributes
13 |
14 | L.level = logging.INFO
15 | L.handlers = [logging.StreamHandler()]
16 |
17 |
18 | class TestJsonDataset(unittest.TestCase):
19 | def setUp(self):
20 | self.maxDiff = 9999
21 | self.hdl, self.ncdf = tempfile.mkstemp(prefix="pocean_test_")
22 |
23 | def tearDown(self):
24 | os.close(self.hdl)
25 | os.remove(self.ncdf)
26 |
27 | def test_lvl0_apply(self):
28 | jsf = os.path.join(os.path.dirname(__file__), "resources/coamps_lvl0.json")
29 | mi = MetaInterface.from_jsonfile(jsf)
30 |
31 | with EnhancedDataset(self.ncdf, "w") as ncd:
32 | ncd.apply_meta(mi)
33 |
34 | assert {k: v.size for k, v in ncd.dimensions.items()} == mi["dimensions"]
35 |
36 | fileglobatts = mi["attributes"]
37 | newglobatts = {}
38 | for nk in ncd.ncattrs():
39 | newglobatts[nk] = ncd.getncattr(nk)
40 |
41 | self.assertDictEqual(fileglobatts, newglobatts)
42 |
43 | for k, v in ncd.variables.items():
44 | filevaratts = mi["variables"][k]["attributes"]
45 | newvaratts = ncpyattributes(dict(v.__dict__), verbose=False)
46 |
47 | # _FillValue gets added even if it wasn't in the original attributes
48 | if "_FillValue" in newvaratts:
49 | del newvaratts["_FillValue"]
50 |
51 | if "missing_value" in filevaratts:
52 | del filevaratts["missing_value"]
53 |
54 | self.assertDictEqual(filevaratts, newvaratts)
55 |
56 | def test_lvl2_apply(self):
57 | jsf = os.path.join(os.path.dirname(__file__), "resources/coamps_lvl2.json")
58 | mi = MetaInterface.from_jsonfile(jsf)
59 |
60 | with EnhancedDataset(self.ncdf, "w") as ncd:
61 | ncd.apply_meta(mi)
62 |
63 | assert {k: v.size for k, v in ncd.dimensions.items()} == mi["dimensions"]
64 |
65 | fileglobatts = {k: v["data"] for k, v in mi["attributes"].items()}
66 | newglobatts = {}
67 | for nk in ncd.ncattrs():
68 | newglobatts[nk] = ncd.getncattr(nk)
69 |
70 | self.assertDictEqual(fileglobatts, newglobatts)
71 |
72 | for k, v in ncd.variables.items():
73 | filevaratts = {k: v["data"] for k, v in mi["variables"][k]["attributes"].items()}
74 | newvaratts = ncpyattributes(dict(v.__dict__), verbose=False)
75 |
76 | # _FillValue gets added even if it wasn't in the original attributes
77 | if "_FillValue" in newvaratts:
78 | del newvaratts["_FillValue"]
79 |
80 | if "missing_value" in filevaratts:
81 | del filevaratts["missing_value"]
82 |
83 | self.assertDictEqual(filevaratts, newvaratts)
84 |
85 | def test_input_output(self):
86 | ncfile = os.path.join(os.path.dirname(__file__), "resources/coamps.nc")
87 |
88 | with EnhancedDataset(ncfile, "r") as original_ncd:
89 | mi = original_ncd.meta()
90 |
91 | with EnhancedDataset(self.ncdf, "w") as ncd:
92 | ncd.apply_meta(mi)
93 |
94 | self.assertDictEqual(
95 | ncpyattributes(dict(original_ncd.__dict__)), ncpyattributes(dict(ncd.__dict__))
96 | )
97 |
98 | for k, v in original_ncd.variables.items():
99 | oldatts = ncpyattributes(dict(v.__dict__))
100 | newatts = ncpyattributes(dict(ncd.variables[k].__dict__))
101 |
102 | # _FillValue gets added even if it wasn't in the original attributes
103 | if "_FillValue" in newatts:
104 | del newatts["_FillValue"]
105 |
106 | if "missing_value" in oldatts:
107 | del oldatts["missing_value"]
108 |
109 | self.assertDictEqual(oldatts, newatts)
110 |
111 | def test_serialize_and_reload_data(self):
112 | ncfile = os.path.join(os.path.dirname(__file__), "resources/qc-month.nc")
113 |
114 | with CFDataset(ncfile) as cfncd:
115 | # Data from netCDF variable
116 | ncdata = cfncd.variables["data1"][:]
117 |
118 | # Not filled
119 | meta = cfncd.json(return_data=True, fill_data=False)
120 | jsdata = meta["variables"]["data1"]["data"]
121 | npt.assert_array_equal(ncdata, jsdata)
122 | fhandle1, fname1 = tempfile.mkstemp()
123 | with CFDataset(fname1, "w") as newcf:
124 | newcf.apply_json(meta)
125 | with CFDataset(fname1, "r") as rcf:
126 | newncdata = rcf.variables["data1"][:]
127 | npt.assert_array_equal(ncdata, newncdata)
128 | os.close(fhandle1)
129 | os.remove(fname1)
130 |
131 | # Filled
132 | meta = cfncd.json(return_data=True, fill_data=True)
133 | jsdata = meta["variables"]["data1"]["data"]
134 | npt.assert_array_equal(ncdata, jsdata)
135 | fhandle2, fname2 = tempfile.mkstemp()
136 | with CFDataset(fname2, "w") as newcf:
137 | newcf.apply_json(meta)
138 |
139 | with CFDataset(fname2, "r") as rcf:
140 | newncdata = rcf.variables["data1"][:]
141 | npt.assert_array_equal(ncdata, newncdata)
142 |
143 | os.close(fhandle2)
144 | os.remove(fname2)
145 |
--------------------------------------------------------------------------------
/pocean/tests/test_utils.py:
--------------------------------------------------------------------------------
1 | #!python
2 | import logging
3 | import os
4 | import shutil
5 | import tempfile
6 | import unittest
7 |
8 | import netCDF4 as nc4
9 | import numpy as np
10 | import pytest
11 |
12 | from pocean import logger
13 | from pocean.dataset import EnhancedDataset
14 | from pocean.utils import generic_masked, get_default_axes, normalize_array
15 |
16 | logger.level = logging.INFO
17 | logger.handlers = [logging.StreamHandler()]
18 |
19 |
20 | class TestUtils(unittest.TestCase):
21 | def setUp(self):
22 | self.input_file = os.path.join(os.path.dirname(__file__), "resources/coamps.nc")
23 |
24 | def test_get_default_axes(self):
25 | assert get_default_axes() == (
26 | "trajectory",
27 | "station",
28 | "profile",
29 | "obs",
30 | "t",
31 | "x",
32 | "y",
33 | "z",
34 | )
35 |
36 | new_defaults = {
37 | "trajectory": "a",
38 | "station": "b",
39 | "profile": "c",
40 | "sample": "h",
41 | "t": "d",
42 | "x": "e",
43 | "y": "f",
44 | "z": "g",
45 | }
46 | assert get_default_axes(new_defaults) == (
47 | "a",
48 | "b",
49 | "c",
50 | "h",
51 | "d",
52 | "e",
53 | "f",
54 | "g",
55 | )
56 |
57 | new_defaults = {"trajectory": "a", "station": "b", "profile": "c"}
58 | assert get_default_axes(new_defaults) == (
59 | "a",
60 | "b",
61 | "c",
62 | "obs",
63 | "t",
64 | "x",
65 | "y",
66 | "z",
67 | )
68 |
69 | # Time is not a valid axis key
70 | bad_defaults = {"time": "a"}
71 | with self.assertRaises(TypeError):
72 | get_default_axes(bad_defaults)
73 |
74 | # Can't have duplicate values
75 | bad_defaults = {"x": "a", "y": "a"}
76 | with self.assertRaises(ValueError):
77 | get_default_axes(bad_defaults)
78 |
79 | # but you can with the sample dimension
80 | bad_defaults = {"t": "time", "sample": "time"}
81 | assert get_default_axes(bad_defaults) == (
82 | "trajectory",
83 | "station",
84 | "profile",
85 | "time",
86 | "time",
87 | "x",
88 | "y",
89 | "z",
90 | )
91 |
92 | def test_single_attr_filter(self):
93 | nc = EnhancedDataset(self.input_file)
94 | grid_spacing_vars = nc.filter_by_attrs(grid_spacing="4.0 km")
95 |
96 | x = nc.variables.get("x")
97 | y = nc.variables.get("y")
98 |
99 | self.assertEqual(len(grid_spacing_vars), 2)
100 | assert x in grid_spacing_vars
101 | assert y in grid_spacing_vars
102 |
103 | def test_multiple_attr_filter(self):
104 | nc = EnhancedDataset(self.input_file)
105 | grid_spacing_vars = nc.filter_by_attrs(
106 | grid_spacing="4.0 km", standard_name="projection_y_coordinate"
107 | )
108 |
109 | y = nc.variables.get("y")
110 |
111 | self.assertEqual(len(grid_spacing_vars), 1)
112 | assert y in grid_spacing_vars
113 |
114 | @pytest.mark.filterwarnings("ignore::UserWarning")
115 | def test_generic_masked_bad_min_max_value(self):
116 | fid, tpath = tempfile.mkstemp(suffix=".nc", prefix="pocean-test")
117 | shutil.copy2(self.input_file, tpath)
118 |
119 | with EnhancedDataset(tpath, "a") as ncd:
120 | v = ncd.variables["v_component_wind_true_direction_all_geometries"]
121 | v.valid_min = np.float32(0.1)
122 | v.valid_max = np.float32(0.1)
123 | r = generic_masked(v[:], attrs=ncd.vatts(v.name))
124 | rflat = r.flatten()
125 | assert rflat[~rflat.mask].size == 0
126 |
127 | # Create a byte variable with a float valid_min and valid_max
128 | # to make sure it doesn't error
129 | b = ncd.createVariable("imabyte", "b")
130 | b.valid_min = 0
131 | b.valid_max = np.int16(600) # this is over a byte and thus invalid
132 | b[:] = 3
133 | r = generic_masked(b[:], attrs=ncd.vatts(b.name))
134 | assert np.all(r.mask == False) # noqa
135 |
136 | b.valid_min = 0
137 | b.valid_max = 2
138 | r = generic_masked(b[:], attrs=ncd.vatts(b.name))
139 | assert np.all(r.mask == True) # noqa
140 |
141 | c = ncd.createVariable("imanotherbyte", "f4")
142 | c.setncattr("valid_min", b"0")
143 | c.setncattr("valid_max", b"9")
144 | c[:] = 3
145 | r = generic_masked(c[:], attrs=ncd.vatts(c.name))
146 | assert np.all(r.mask == False) # noqa
147 |
148 | c = ncd.createVariable("imarange", "f4")
149 | c.valid_range = [0.0, 2.0]
150 | c[:] = 3.0
151 | r = generic_masked(c[:], attrs=ncd.vatts(c.name))
152 | assert np.all(r.mask == True) # noqa
153 |
154 | c.valid_range = [0.0, 2.0]
155 | c[:] = 1.0
156 | r = generic_masked(c[:], attrs=ncd.vatts(c.name))
157 | assert np.all(r.mask == False) # noqa
158 |
159 | os.close(fid)
160 | if os.path.exists(tpath):
161 | os.remove(tpath)
162 |
163 |
164 | class TestNetcdfUtils(unittest.TestCase):
165 | def test_cf_safe_name(self):
166 | from pocean.cf import cf_safe_name
167 |
168 | self.assertEqual("foo", cf_safe_name("foo"))
169 | self.assertEqual("v_1foo", cf_safe_name("1foo"))
170 | self.assertEqual("v_1foo_99", cf_safe_name("1foo-99"))
171 | self.assertEqual("foo_99", cf_safe_name("foo-99"))
172 | self.assertEqual("foo_99_", cf_safe_name("foo(99)"))
173 | self.assertEqual("v__foo_99_", cf_safe_name("_foo(99)"))
174 |
175 |
176 | class TestNormalizeArray(unittest.TestCase):
177 | def setUp(self):
178 | self.fh, self.fp = tempfile.mkstemp(suffix=".nc", prefix="pocean_testing_")
179 |
180 | def tearDown(self):
181 | os.close(self.fh)
182 | if os.path.exists(self.fp):
183 | os.remove(self.fp)
184 |
185 | def test_normalization_of_string_arrays_netcdf4(self):
186 | thestr = "bosadfsdfkljskfusdiofu987987987om"
187 |
188 | with nc4.Dataset(self.fp, "w", format="NETCDF4") as ncd:
189 | dimsize = len(thestr)
190 | ncd.createDimension("n", dimsize)
191 |
192 | # Single str (no dimension)
193 | ncd.createVariable("single_str", str)
194 | ncd.createVariable("single_unicode_", np.str_)
195 | ncd.createVariable("single_U", " 1:
221 | v[:, :] = np.tile(nc4.stringtoarr(thestr, dimsize), dimsize).reshape(
222 | v.shape
223 | )
224 | else:
225 | v[:] = np.tile(thestr, dimsize).reshape(v.shape)
226 |
227 | with nc4.Dataset(self.fp) as ncd:
228 | assert normalize_array(ncd.variables["single_str"]) == thestr
229 | assert normalize_array(ncd.variables["single_unicode_"]) == thestr
230 | assert normalize_array(ncd.variables["single_U"]) == thestr
231 | assert normalize_array(ncd.variables["single_S"]) == thestr
232 |
233 | assert np.all(normalize_array(ncd.variables["many_str"]) == [thestr] * len(thestr))
234 | assert np.all(normalize_array(ncd.variables["many_unicode_"]) == [thestr] * len(thestr))
235 | assert np.all(normalize_array(ncd.variables["many_U"]) == [thestr] * len(thestr))
236 | assert np.all(normalize_array(ncd.variables["many_S"]) == [thestr] * len(thestr))
237 |
238 | def test_normalization_of_string_arrays_netcdf3(self):
239 | thestr = "boodsfasfasdfm"
240 |
241 | with nc4.Dataset(self.fp, "w", format="NETCDF3_CLASSIC") as ncd:
242 | dimsize = len(thestr)
243 | ncd.createDimension("n", dimsize)
244 |
245 | # Single str (no dimension)
246 | ncd.createVariable("single_S", "S1", ("n",))
247 |
248 | for k, v in ncd.variables.items():
249 | if k.startswith("single_"):
250 | v[:] = nc4.stringtoarr(thestr, dimsize)
251 |
252 | # Array of strq
253 | ncd.createVariable(
254 | "many_S",
255 | "S1",
256 | (
257 | "n",
258 | "n",
259 | ),
260 | )
261 |
262 | for k, v in ncd.variables.items():
263 | if k.startswith("many_"):
264 | v[:, :] = np.tile(nc4.stringtoarr(thestr, dimsize), dimsize).reshape(v.shape)
265 |
266 | with nc4.Dataset(self.fp) as ncd:
267 | assert normalize_array(ncd.variables["single_S"]) == thestr
268 | assert np.all(normalize_array(ncd.variables["many_S"]) == [thestr] * dimsize)
269 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | build-backend = "setuptools.build_meta"
3 | requires = [
4 | "setuptools>=42",
5 | "setuptools-scm[toml]>=3.4",
6 | "wheel",
7 | ]
8 |
9 | [project]
10 | name = "pocean-core"
11 | description = "A python framework for working with met-ocean data"
12 | readme = "README.md"
13 | license = { file = "LICENSE.txt" }
14 | authors = [
15 | { name = "Kyle Wilcox", email = "kyle@axds.co" },
16 | ]
17 | requires-python = ">=3.9"
18 | classifiers = [
19 | "Programming Language :: Python :: 3 :: Only",
20 | "Programming Language :: Python :: 3.9",
21 | "Programming Language :: Python :: 3.10",
22 | "Programming Language :: Python :: 3.11",
23 | "Programming Language :: Python :: 3.12",
24 | "Programming Language :: Python :: 3.13",
25 | ]
26 | dynamic = [
27 | "version",
28 | ]
29 | dependencies = [
30 | "cftime>=1.2.1",
31 | "netcdf4",
32 | "numpy>=1.20",
33 | "pandas>=1.0.5",
34 | "python-dateutil",
35 | "pytz",
36 | "shapely>=1.8",
37 | "simplejson",
38 | ]
39 | urls.documentation = "https://pyoceans.github.io/pocean-core"
40 | urls.homepage = "https://pypi.org/project/pocean-core/"
41 | urls.repository = "https://github.com/pyoceans/pocean-core"
42 |
43 | [tool.setuptools]
44 | packages = [
45 | "pocean",
46 | ]
47 |
48 | [tool.setuptools_scm]
49 | write_to = "pocean/_version.py"
50 | write_to_template = "__version__ = '{version}'"
51 | tag_regex = "^(?Pv)?(?P[^\\+]+)(?P.*)?$"
52 |
53 | [tool.ruff]
54 |
55 | line-length = 100
56 |
57 | exclude = [
58 | ".git",
59 | ".git/",
60 | "__pycache__",
61 | "dist",
62 | "docs/",
63 | ]
64 |
65 | lint.select = [
66 | "E", # pycodecstyle
67 | "F", # flakes
68 | "I", # import sorting
69 | "W", # pydocstyle
70 | ]
71 |
72 | lint.ignore = [
73 | #"E265",
74 | #"E221",
75 | #"E203",
76 | #"E201",
77 | #"E124",
78 | #"E202",
79 | #"E241",
80 | #"E251",
81 | #"W504",
82 | "E501",
83 | "W291",
84 | "W293",
85 | ]
86 |
87 | lint.per-file-ignores."pocean/tests/*.py" = [
88 | "F403",
89 | "F405",
90 | ]
91 | lint.isort.order-by-type = false
92 |
93 | [tool.pytest.ini_options]
94 | addopts = "-s -rxs -v"
95 |
96 | filterwarnings = [
97 | "error",
98 | ]
99 |
--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | flake8
2 | pooch
3 | pre-commit
4 | pytest
5 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | cftime>=1.2.1
2 | netcdf4
3 | numpy>=1.20
4 | pandas>=1.0.5
5 | python-dateutil
6 | pytz
7 | shapely>=1.8
8 | simplejson
9 |
--------------------------------------------------------------------------------