├── .binder
    └── environment.yml
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    ├── pull_request_template.md
    └── workflows
    │   ├── ci.yml
    │   ├── linting.yml
    │   └── pythonpackage.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yml
├── LICENSE
├── README.rst
├── ci
    └── environment.yml
├── codecov.yml
├── dependabot.yml
├── docs
    ├── api.rst
    ├── authors.md
    ├── calibration.md
    ├── cheatsheet.md
    ├── conf.py
    ├── contributing.rst
    ├── img
    │   ├── code_bgu.png
    │   ├── interactive_plot.png
    │   ├── logo_with_name.png
    │   ├── logo_with_name_vert.png
    │   ├── logo_wo_name.png
    │   ├── output_101_2.png
    │   ├── output_106_1.png
    │   ├── output_108_0.png
    │   ├── output_111_0.png
    │   ├── output_116_0.png
    │   ├── output_14_0.png
    │   ├── output_16_0.png
    │   ├── output_16_1.png
    │   ├── output_19_0.png
    │   ├── output_19_1.png
    │   ├── output_22_0.png
    │   ├── output_24_0.png
    │   ├── output_26_0.png
    │   ├── output_28_1.png
    │   ├── output_29_1.png
    │   ├── output_33_0.png
    │   ├── output_36_0.png
    │   ├── output_38_1.png
    │   ├── output_41_0.png
    │   ├── output_43_0.png
    │   ├── output_45_0.png
    │   ├── output_47_0.png
    │   ├── output_49_0.png
    │   ├── output_51_0.png
    │   ├── output_55_1.png
    │   ├── output_56_1.png
    │   ├── output_59_0.png
    │   ├── output_61_0.png
    │   ├── output_64_0.png
    │   ├── output_65_0.png
    │   ├── output_67_1.png
    │   ├── output_71_0.png
    │   ├── output_74_0.png
    │   ├── output_76_0.png
    │   ├── output_78_0.png
    │   ├── output_80_0.png
    │   ├── output_82_0.png
    │   ├── output_84_0.png
    │   ├── output_86_1.png
    │   ├── output_86_2.png
    │   ├── output_92_1.png
    │   ├── output_94_1.png
    │   ├── output_97_0.png
    │   ├── output_99_2.png
    │   ├── package_overview.png
    │   └── package_structure.png
    ├── index.rst
    ├── installation.md
    ├── loading.md
    ├── mapping.md
    ├── optics.md
    ├── other.md
    ├── package_structure.md
    ├── physics.md
    ├── quality_control.md
    ├── requirements.txt
    ├── saving.md
    ├── static
    │   └── css
    │   │   └── custom.css
    ├── whats-new.rst
    └── wishlist.md
├── glidertools
    ├── .DS_Store
    ├── __init__.py
    ├── calibration.py
    ├── cleaning.py
    ├── flo_functions.py
    ├── helpers.py
    ├── load
    │   ├── __init__.py
    │   ├── ego.py
    │   ├── seaglider.py
    │   ├── slocum.py
    │   └── voto_seaexplorer.py
    ├── mapping.py
    ├── optics.py
    ├── physics.py
    ├── plot.py
    ├── processing.py
    └── utils.py
├── notebooks
    └── Demo_GT.ipynb
├── pyproject.toml
├── setup.cfg
├── setup.py
└── tests
    ├── __init__.py
    ├── data
        ├── p5420304.nc
        ├── p5420305.nc
        ├── p5420306.nc
        ├── p5420307.nc
        ├── p5420308.nc
        ├── p5420309.nc
        ├── p5420310.nc
        ├── p5420311.nc
        ├── p5420312.nc
        ├── p5420313.nc
        ├── p5420314.nc
        ├── p5420315.nc
        ├── p5420316.nc
        ├── p5420317.nc
        └── voto_nrt.nc
    ├── test_calibration.py
    ├── test_cleaning.py
    ├── test_dive_numbers.py
    ├── test_flo_functions.py
    ├── test_imports.py
    ├── test_load.py
    ├── test_mapping.py
    ├── test_optics.py
    ├── test_physics.py
    ├── test_plot.py
    └── test_processing.py


/.binder/environment.yml:
--------------------------------------------------------------------------------
 1 | name: glidertools_notebooks
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - numexpr
 6 |   - netCDF4
 7 |   - pandas
 8 |   - xarray >=2022.10.0
 9 |   - numpy
10 |   - scikit-learn
11 |   - scipy
12 |   - tqdm
13 |   - matplotlib
14 |   - gsw
15 |   - skyfield
16 |   - jupyterlab
17 |   - cmocean
18 |   - pip
19 |   - pip:
20 |     - glidertools @ git+https://github.com/GliderToolsCommunity/GliderTools.git@master
21 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Desktop (please complete the following information):**
27 |  - OS: [e.g. iOS]
28 |  - Browser [e.g. chrome, safari]
29 |  - Version [e.g. 22]
30 | 
31 | **Smartphone (please complete the following information):**
32 |  - Device: [e.g. iPhone6]
33 |  - OS: [e.g. iOS8.1]
34 |  - Browser [e.g. stock browser, safari]
35 |  - Version [e.g. 22]
36 | 
37 | **Additional context**
38 | Add any other context about the problem here.
39 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | <!-- Feel free to remove check-list items aren't relevant to your change -->
2 | 
3 |  - [ ] Closes #xxxx
4 |  - [ ] Tests added
5 |  - [ ] Passes `pre-commit run --all-files`
6 |  - [ ] User visible changes (including notable bug fixes) are documented in `whats-new.rst`
7 |  - [ ] New functions/methods are listed in `api.rst`
8 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - "*"
 6 |   pull_request:
 7 |     branches:
 8 |       - "*"
 9 |   schedule:
10 |     - cron: "0 13 * * 1"
11 | 
12 | jobs:
13 |   build:
14 |     name: Build (${{ matrix.python-version }} | ${{ matrix.os }})
15 |     runs-on: ${{ matrix.os }}
16 |     strategy:
17 |       fail-fast: false
18 |       matrix:
19 |         os: ["ubuntu-latest"]
20 |         python-version: ["3.8", "3.9", "3.10", "3.11"]
21 |     steps:
22 |       - uses: actions/checkout@v2
23 |       - name: Cache conda
24 |         uses: actions/cache@v1
25 |         env:
26 |           # Increase this value to reset cache if ci/environment.yml has not changed
27 |           CACHE_NUMBER: 0
28 |         with:
29 |           path: ~/conda_pkgs_dir
30 |           key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/environment.yml') }}
31 |       - uses: conda-incubator/setup-miniconda@v2
32 |         with:
33 |           activate-environment: test_env_glidertools # Defined in ci/environment*.yml
34 |           auto-update-conda: false
35 |           python-version: ${{ matrix.python-version }}
36 |           environment-file: ci/environment.yml
37 |           use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
38 |       - name: Set up conda environment
39 |         shell: bash -l {0}
40 |         run: |
41 |           python -m pip install -e . --no-deps
42 |           conda list
43 |       - name: Run Tests
44 |         shell: bash -l {0}
45 |         run: |
46 |           pytest -n auto --cov=./ --cov-report=xml
47 |       - name: Upload code coverage to Codecov
48 |         uses: codecov/codecov-action@v1
49 |         with:
50 |           file: ./coverage.xml
51 |           flags: unittests
52 |           env_vars: OS,PYTHON
53 |           name: codecov-umbrella
54 |           fail_ci_if_error: false
55 | 


--------------------------------------------------------------------------------
/.github/workflows/linting.yml:
--------------------------------------------------------------------------------
 1 | name: code-style
 2 | on:
 3 |   pull_request:
 4 |   push:
 5 |     branches: [master]
 6 | jobs:
 7 |     pre-commit:
 8 |         name: pre-commit
 9 |         runs-on: ubuntu-latest
10 |         steps:
11 |             - name: checkout
12 |               uses: actions/checkout@v2
13 |             - name: set up python
14 |               uses: actions/setup-python@v2
15 |               with:
16 |                   python-version: 3.8
17 |             - name: Run pre-commit
18 |               uses: pre-commit/action@v2.0.0
19 | 


--------------------------------------------------------------------------------
/.github/workflows/pythonpackage.yml:
--------------------------------------------------------------------------------
 1 | name: Upload Python Package
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [created]
 6 | 
 7 | jobs:
 8 |   deploy:
 9 |     if: github.repository == 'GliderToolsCommunity/GliderTools'
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: actions/checkout@v1
13 |       - name: Set up Python
14 |         uses: actions/setup-python@v1
15 |         with:
16 |           python-version: '3.x'
17 |       - name: Check git status
18 |         run: git status
19 |       - name: Check version
20 |         run:  python setup.py --version
21 |       - name: Install dependencies
22 |         run: |
23 |           python -m pip install --upgrade pip
24 |           pip install setuptools setuptools-scm wheel twine check-manifest toml
25 |       - name: Build tarball and wheels
26 |         run: |
27 |           git clean -xdf
28 |           git restore -SW .
29 |           python -m build --sdist --wheel .
30 |       - name: Test the artifacts
31 |         run: |
32 |           python -m twine check dist/*
33 |       - name: Publish package to PyPI
34 |         uses: pypa/gh-action-pypi-publish@v1.4.2
35 |         with:
36 |           user: __token__
37 |           password: ${{ secrets.PYPI_TOKEN }}
38 |           verbose: true
39 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Created by https://www.gitignore.io/api/python
 2 | # Edit at https://www.gitignore.io/?templates=python
 3 | 
 4 | ### Python ###
 5 | # Byte-compiled / optimized / DLL files
 6 | __pycache__/
 7 | *.py[cod]
 8 | *$py.class
 9 | 
10 | # C extensions
11 | *.so
12 | 
13 | # Distribution / packaging
14 | .Python
15 | build/
16 | develop-eggs/
17 | dist/
18 | downloads/
19 | eggs/
20 | .eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | wheels/
27 | pip-wheel-metadata/
28 | share/python-wheels/
29 | *.egg-info/
30 | .installed.cfg
31 | *.egg
32 | MANIFEST
33 | 
34 | # PyInstaller
35 | #  Usually these files are written by a python script from a template
36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
37 | *.manifest
38 | *.spec
39 | 
40 | # Installer logs
41 | pip-log.txt
42 | pip-delete-this-directory.txt
43 | 
44 | # Unit test / coverage reports
45 | htmlcov/
46 | .tox/
47 | .nox/
48 | .coverage
49 | .coverage.*
50 | .cache
51 | nosetests.xml
52 | coverage.xml
53 | *.cover
54 | .hypothesis/
55 | .pytest_cache/
56 | 
57 | # Translations
58 | *.mo
59 | *.pot
60 | 
61 | # Django stuff:
62 | *.log
63 | local_settings.py
64 | db.sqlite3
65 | db.sqlite3-journal
66 | 
67 | # Sphinx documentation
68 | docs/_build/
69 | docs/_generated/
70 | 
71 | # PyBuilder
72 | target/
73 | 
74 | # pyenv python configuration file
75 | .python-version
76 | 
77 | # vscode
78 | .vscode/*
79 | **/.DS_Store
80 | .DS_Store
81 | junit.xml
82 | **/.ipynb_checkpoints/**
83 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |   rev: v4.6.0
 4 |   hooks:
 5 |     - id: trailing-whitespace
 6 |     - id: check-ast
 7 |     - id: debug-statements
 8 |     - id: end-of-file-fixer
 9 |     - id: check-docstring-first
10 |     - id: check-added-large-files
11 | 
12 | - repo: https://github.com/PyCQA/flake8
13 |   rev: 7.0.0
14 |   hooks:
15 |     - id: flake8
16 |       args: ["--max-line-length=105", "--select=C,E,F,W,B,B950", "--ignore=E203,E501,W503"]
17 |       exclude: docs/source/conf.py
18 | 
19 | - repo: https://github.com/PyCQA/isort
20 |   rev: 5.13.2
21 |   hooks:
22 |   - id: isort
23 |     additional_dependencies: [toml]
24 |     args: [--project=glidertools, --multi-line=3, --lines-after-imports=2, --lines-between-types=1, --trailing-comma, --force-grid-wrap=0, --use-parentheses, --line-width=88]
25 | 
26 | - repo: https://github.com/asottile/seed-isort-config
27 |   rev: v2.2.0
28 |   hooks:
29 |     - id: seed-isort-config
30 | 
31 | - repo: https://github.com/psf/black
32 |   rev: 24.3.0
33 |   hooks:
34 |   - id: black
35 |     language_version: python3
36 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Set the OS, Python version and other tools you might need
 9 | build:
10 |   os: ubuntu-22.04
11 |   tools:
12 |     python: "3.11"
13 | 
14 | # Build documentation in the docs/ directory with Sphinx
15 | sphinx:
16 |   configuration: docs/conf.py
17 | 
18 | # Build documentation with MkDocs
19 | #mkdocs:
20 | #  configuration: mkdocs.yml
21 | 
22 | # Optionally build your docs in additional formats such as PDF and ePub
23 | formats: all
24 | 
25 | # Optionally set the version of Python and requirements required to build your docs
26 | python:
27 |   install:
28 |     - requirements: docs/requirements.txt
29 | 
30 | submodules:
31 |   include: all
32 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | ===============================
 2 | glidertools
 3 | ===============================
 4 | 
 5 | .. image:: https://github.com/GliderToolsCommunity/GliderTools/actions/workflows/ci.yml/badge.svg
 6 |         :target: https://github.com/GliderToolsCommunity/GliderTools/actions/workflows/ci.yml
 7 | .. image:: https://img.shields.io/conda/vn/conda-forge/glidertools.svg
 8 |         :target: https://anaconda.org/conda-forge/glidertools
 9 | .. image:: https://badgen.net/pypi/v/glidertools
10 |         :target: https://pypi.org/project/glidertools
11 | .. image:: https://pepy.tech/badge/glidertools
12 |         :target: https://pepy.tech/project/glidertools
13 | .. image:: https://readthedocs.org/projects/glidertools/badge/?version=latest
14 |         :target: https://glidertools.readthedocs.io
15 | .. image:: https://img.shields.io/badge/License-GPLv3-blue.svg
16 |         :target: https://www.gnu.org/licenses/gpl-3.0
17 | .. image:: https://img.shields.io/badge/Journal-10.3389%2Ffmars.2019.00738-blue
18 |         :target: https://doi.org/10.3389/fmars.2019.00738
19 | .. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.4075238.svg
20 |         :target: https://doi.org/10.5281/zenodo.4075238
21 | .. image:: https://codecov.io/gh/GliderToolsCommunity/GliderTools/branch/master/graph/badge.svg?token=FPUJ29TMSH
22 |         :target: https://codecov.io/gh/GliderToolsCommunity/GliderTools
23 | 
24 | Glider tools is a Python 3.8+ package designed to process data from the first level of processing to a science ready dataset (delayed mode quality control). The package is designed to easily import data to a standard column format (numpy.ndarray or pandas.DataFrame). Cleaning and smoothing functions are flexible and can be applied as required by the user. We provide examples and demonstrate best practices as developed by the `SOCCO Group <http://socco.org.za/>`_, with early contributions from `Polar Gliders <https://sebswart.com/>`_ at the University of Gothenburg. GliderTools includes contributions from `VOTO <https://voiceoftheocean.org//>`_. We aim to implement Best Practices developed by `OceanGliders <https://www.oceangliders.org/>`_ in the ongoing `discussions <https://github.com/OceanGlidersCommunity>`_.
25 | 
26 | Please cite the `original publication <https://doi.org/10.3389/fmars.2019.00738>`_ of this package and `the package itself <https://doi.org/10.5281/zenodo.4075238>`_.
27 | 
28 | Installation
29 | ------------
30 | Conda
31 | .....
32 | To install the core package from conda-forge run: ``conda install -c conda-forge glidertools``
33 | 
34 | PyPI
35 | ....
36 | To install the core package run: ``pip install glidertools``.
37 | 
38 | GitHub
39 | ......
40 | 1. Clone glidertools to your local machine: ``git clone https://github.com/GliderToolsCommunity/GliderTools``
41 | 2. Change to the parent directory of GliderTools
42 | 3. Install glidertools with ``pip install -e ./GliderTools``. This will allow
43 |    changes you make locally, to be reflected when you import the package in Python
44 | 
45 | Recommended, but optional packages
46 | ..................................
47 | There are some packages that are not installed by default, as these are large packages or can
48 | result in installation errors, resulting in failure to install GliderTools.
49 | These should install automatically with ``pip install package_name``:
50 | 
51 | * ``gsw``: accurate density calculation (may fail in some cases)
52 | * ``pykrige``: variogram plotting (installation generally works, except when bundled)
53 | * ``plotly``: interactive 3D plots (large package)
54 | 
55 | 
56 | How you can contribute
57 | ----------------------
58 | - Join the community `by introducing yourself <https://github.com/GliderToolsCommunity/GliderTools/discussions/47>`_ (no need to be a Python or Git guru! Just say what you are working with and join the discussion)
59 | - If you find an error, please report it on `as a Github issue <https://github.com/GliderToolsCommunity/GliderTools/issues/new>`_. Please copy the entire error message (even if it's long).
60 | - Oxygen processing is rudimentary so far but we are on it and happy to get your support `in this discussion <https://github.com/GliderToolsCommunity/GliderTools/discussions/74>`_
61 | 
62 | For contributing follow the `instructions <https://glidertools.readthedocs.io/en/latest/contributing.html>`_
63 | 
64 | Acknowledgements
65 | ----------------
66 | - We rely heavily on ``ion_functions.data.flo_functions`` which was
67 |   written by Christopher Wingard, Craig Risien, Russell Desiderio
68 | - This work was initially funded by Pedro M Scheel Monteiro at the
69 |   Council for Scientific and Industrial Research (where Luke was working
70 |   at the time of writing the code).
71 | - Testers for their feedback: SOCCO team at the CSIR and ...
72 | 


--------------------------------------------------------------------------------
/ci/environment.yml:
--------------------------------------------------------------------------------
 1 | name: test_env_glidertools
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - numexpr
 6 |   - netCDF4
 7 |   - pandas
 8 |   - xarray >=2022.10.0
 9 |   - numpy
10 |   - scikit-learn
11 |   - scipy
12 |   - tqdm
13 |   - matplotlib
14 |   - gsw
15 |   - skyfield
16 |   - pip
17 |   - pip:
18 |     - codecov
19 |     - pytest-cov
20 |     - pytest-xdist
21 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | codecov:
 2 |   require_ci_to_pass: no
 3 |   max_report_age: off
 4 | 
 5 | comment: false
 6 | 
 7 | coverage:
 8 |   precision: 2
 9 |   round: down
10 |   status:
11 |     project:
12 |       default:
13 |         target: 95
14 |         informational: true
15 |     patch: off
16 |     changes: off
17 | 
18 | ignore:
19 |   - "setup.py"
20 |   - "versioneer.py"
21 |   - "glidertools/__init__.py"
22 |   - "glidertools/_version.py"
23 |   - "glidertools/load/*"
24 |   - "tests/*"
25 | 


--------------------------------------------------------------------------------
/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 |   - package-ecosystem: 'github-actions'
4 |     directory: '/'
5 |     schedule:
6 |       # Check for updates once a week
7 |       interval: 'weekly'
8 | 


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
  1 | API Reference
  2 | =============
  3 | 
  4 | The API reference is automatically generated from the function docstrings in
  5 | the GliderTools package. Refer to the examples in the sidebar for reference on
  6 | how to use the functions.
  7 | 
  8 | Loading Data
  9 | ------------
 10 | .. currentmodule:: glidertools
 11 | .. autosummary::
 12 |    :toctree: ./_generated/
 13 | 
 14 |    load.seaglider_basestation_netCDFs
 15 |    load.seaglider_show_variables
 16 |    load.ego_mission_netCDF
 17 |    load.slocum_geomar_matfile
 18 |    load.voto_seaexplorer_nc
 19 |    load.voto_seaexplorer_dataset
 20 |    load.voto_concat_datasets
 21 | 
 22 | 
 23 | High level processing
 24 | ---------------------
 25 | .. currentmodule:: glidertools
 26 | .. autosummary::
 27 |    :toctree: ./_generated/
 28 | 
 29 |    processing.calc_physics
 30 |    processing.calc_oxygen
 31 |    processing.calc_backscatter
 32 |    processing.calc_fluorescence
 33 |    processing.calc_par
 34 | 
 35 | 
 36 | Cleaning
 37 | --------
 38 | .. currentmodule:: glidertools
 39 | .. autosummary::
 40 |    :toctree: ./_generated/
 41 | 
 42 |    cleaning.outlier_bounds_std
 43 |    cleaning.outlier_bounds_iqr
 44 |    cleaning.horizontal_diff_outliers
 45 |    cleaning.mask_bad_dive_fraction
 46 |    cleaning.data_density_filter
 47 |    cleaning.despike
 48 |    cleaning.despiking_report
 49 |    cleaning.rolling_window
 50 |    cleaning.savitzky_golay
 51 | 
 52 | 
 53 | Physics
 54 | -------
 55 | .. currentmodule:: glidertools
 56 | .. autosummary::
 57 |    :toctree: ./_generated/
 58 | 
 59 |    physics.mixed_layer_depth
 60 |    physics.potential_density
 61 |    physics.brunt_vaisala
 62 | 
 63 | 
 64 | 
 65 | Optics
 66 | ------
 67 | .. currentmodule:: glidertools
 68 | .. autosummary::
 69 |    :toctree: ./_generated/
 70 | 
 71 |    optics.find_bad_profiles
 72 |    optics.par_dark_count
 73 |    optics.backscatter_dark_count
 74 |    optics.fluorescence_dark_count
 75 |    optics.par_scaling
 76 |    optics.par_fill_surface
 77 |    optics.photic_depth
 78 |    optics.sunset_sunrise
 79 |    optics.quenching_correction
 80 |    optics.quenching_report
 81 | 
 82 | 
 83 | Calibration
 84 | -----------
 85 | .. currentmodule:: glidertools
 86 | .. autosummary::
 87 |    :toctree: ./_generated/
 88 | 
 89 |    calibration.bottle_matchup
 90 |    calibration.model_figs
 91 |    calibration.robust_linear_fit
 92 | 
 93 | Gridding and Interpolation
 94 | --------------------------
 95 | .. currentmodule:: glidertools
 96 | .. autosummary::
 97 |    :toctree: ./_generated/
 98 | 
 99 |    mapping.interp_obj
100 |    mapping.grid_data
101 |    mapping.variogram
102 | 
103 | 
104 | 
105 | Plotting
106 | --------
107 | .. currentmodule:: glidertools
108 | .. autosummary::
109 |    :toctree: ./_generated/
110 | 
111 |    plot.plot_functions
112 | 
113 | 
114 | General Utilities
115 | -----------------
116 | .. currentmodule:: glidertools
117 | .. autosummary::
118 |    :toctree: ./_generated/
119 | 
120 |    utils.time_average_per_dive
121 |    utils.mask_above_depth
122 |    utils.mask_below_depth
123 |    utils.mask_profile_depth
124 |    utils.merge_dimensions
125 |    utils.calc_glider_vert_velocity
126 |    utils.calc_dive_phase
127 |    utils.calc_dive_number
128 |    utils.dive_phase_to_number
129 |    utils.distance
130 |    utils.group_by_profiles
131 | 


--------------------------------------------------------------------------------
/docs/authors.md:
--------------------------------------------------------------------------------
 1 | Citing GliderTools
 2 | ==================
 3 | 
 4 | [![](https://zenodo.org/badge/141922866.svg)](https://zenodo.org/badge/latestdoi/141922866)
 5 | 
 6 | If you would like to cite or reference Glider Tools, please use:
 7 | 
 8 | Gregor, L., Ryan-Keogh, T. J., Nicholson, S.-A., du Plessis, M., Giddy, I., & Swart, S. (2019). GliderTools: A Python Toolbox for Processing Underwater Glider Data. Frontiers in Marine Science, 6(December), 1–13. https://doi.org/10.3389/fmars.2019.00738
 9 | 
10 | 
11 | Project Contributors
12 | --------------------
13 | 
14 | The following people have made contributions to the project (in alphabetical order by last name) and are considered "The GliderTools Developers". These contributors will be added as authors upon the next major release of GliderTools (i.e. new DOI release).
15 | - [Dhruv Balwada](https://dhruvbalwada.github.io/) - University of Washington, USA. (ORCID: [0000-0001-6632-0187](https://orcid.org/0000-0001-6632-0187))
16 | - [Julius Busecke](http://jbusecke.github.io/) -  Columbia University, USA. (ORCID: [0000-0001-8571-865X](https://orcid.org/0000-0001-8571-865X))
17 | - [Isabelle Giddy](https://github.com/isgiddy/) - University of Cape Town: Cape Town, Western Cape, South Africa. (ORCID: [0000-0002-8926-3311](https://orcid.org/0000-0002-8926-3311))
18 | - [Luke Gregor](https://github.com/lukegre) - Environmental Physics, ETH Zuerich: Zurich, Switzerland. (ORCID: [0000-0001-6071-1857](https://orcid.org/0000-0001-6071-1857))
19 | - [Tom Hull](https://github.com/tomhull) - Centre for Environment Fisheries and Aquaculture Science: Lowestoft, UK. (ORCID: [0000-0002-1714-9317](https://orcid.org/0000-0002-1714-9317))
20 | - [Martin Mohrmann](https://github.com/MartinMohrmann) - Voice of the Ocean Foundation, Gothenburg, Sweden. (ORCID: [0000-0001-8056-4866](https://orcid.org/0000-0001-8056-4866))
21 | - [Sarah-Anne Nicholson](https://github.com/sarahnicholson) - Council for Scientific and Industrial Research: Cape Town, South Africa. (ORCID: [0000-0002-1226-1828](https://orcid.org/0000-0002-1226-1828))
22 | - [Marcel du Plessis](https://mduplessis.com) - University of Cape Town: Cape Town, Western Cape, South Africa. (ORCID: [0000-0003-2759-2467](https://orcid.org/0000-0003-2759-2467))
23 | - [Callum Rollo](https://callumrollo.github.io/) - Voice of the Ocean Foundation, Gothenburg, Sweden. (ORCID: [0000-0002-5134-7886](https://orcid.org/0000-0002-5134-7886))
24 | - [Tommy Ryan-Keogh](https://github.com/tjryankeogh) - Council for Scientific and Industrial Research: Cape Town, South Africa. (ORCID: [0000-0001-5144-171X](https://orcid.org/0000-0001-5144-171X))
25 | - [Sebastiaan Swart](https://sebswart.com) - University of Gothenburg: Gothenburg, Sweden. (ORCID: [0000-0002-2251-8826](https://orcid.org/0000-0002-2251-8826))
26 | - [Soeren Thomsen](https://github.com/soerenthomsen) - LOCEAN/IPSL/CNRS/Sorbonne University: Paris, France. (ORCID: [0000-0002-0598-8340](https://orcid.org/0000-0002-0598-8340))
27 | 


--------------------------------------------------------------------------------
/docs/calibration.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Calibration with bottle samples
 3 | 
 4 | Bottle calibration can also be done using the `calibration` module.
 5 | 
 6 | The bottle file needs to be in a specific format with dates (`datetime64` format), depth and the variable values. This can be imported with any method available. I recommend `pandas.read_csv` as shown in the example below. Note that latitude and longitude are not taken into account, thus the user needs to make sure that the CTD cast was in the correct location (and time, but this will be used to match the glider).
 7 | 
 8 | 
 9 | ```python
10 | import pandas as pd
11 | 
12 | fname = '/Users/luke/Work/Publications/2019_Gregor_Front_glider/figures/SOSCEX 3 PS1.csv'
13 | cal = pd.read_csv(fname, parse_dates=['datetime'], dayfirst=True)
14 | ```
15 | 
16 | The `calibration.bottle_matchup` function returns an array that matches the size of the ungridded glider data.
17 | The matching is done based on depth and time from both the glider and the CTD. The function will show how many samples have been matched and the smallest time difference between a CTD rosette cast and a dive (any time on the dive).
18 | 
19 | ### Using depth
20 | 
21 | 
22 | ```python
23 | %autoreload 2
24 | 
25 | dat['bottle_sal'] = gt.calibration.bottle_matchup(
26 |     dat.dives, dat.depth, dat.time,
27 |     cal.depth, cal.datetime, cal.sal)
28 | 
29 | model = gt.calibration.robust_linear_fit(dat.salt_qc, dat.bottle_sal, fit_intercept=True, epsilon=1.5)
30 | dat['salinity_qc'] = model.predict(dat.salt_qc)
31 | ```
32 | 
33 |     [stn 0/5]  FAILED: 2015-07-28 10:25 Couldn't find samples within constraints
34 |     [stn 1/5]  FAILED: 2015-07-28 16:15 Couldn't find samples within constraints
35 |     [stn 2/5]  FAILED: 2015-12-08 03:23 Couldn't find samples within constraints
36 |     [stn 3/5] SUCCESS: 2016-01-05 17:46 (15 of 15 samples) match-up within 0.0 minutes
37 |     [stn 4/5] SUCCESS: 2016-02-08 03:14 (12 of 17 samples) match-up within 0.0 minutes
38 |     (13, 1) (100, 1)
39 | 
40 | 
41 | 
42 | ![png](img/output_92_1.png)
43 | 
44 | 
45 | ### Using Density
46 | 
47 | 
48 | ```python
49 | %autoreload 2
50 | 
51 | dat['bottle_sal'] = gt.calibration.bottle_matchup(
52 |     dat.dives, dat.density, dat.time,
53 |     cal.density, cal.datetime, cal.sal)
54 | 
55 | model = gt.calibration.robust_linear_fit(dat.salt_qc, dat.bottle_sal, fit_intercept=True, epsilon=1.5)
56 | dat['salinity_qc'] = model.predict(dat.salt_qc)
57 | ```
58 | 
59 |     [stn 0/5]  FAILED: 2015-07-28 10:25 Couldn't find samples within constraints
60 |     [stn 1/5]  FAILED: 2015-07-28 16:15 Couldn't find samples within constraints
61 |     [stn 2/5]  FAILED: 2015-12-08 03:23 Couldn't find samples within constraints
62 |     [stn 3/5] SUCCESS: 2016-01-05 17:46 (15 of 15 samples) match-up within 0.0 minutes
63 |     [stn 4/5] SUCCESS: 2016-02-08 03:14 (16 of 17 samples) match-up within 0.0 minutes
64 |     (6, 1) (100, 1)
65 | 
66 | 
67 | 
68 | ![png](img/output_94_1.png)
69 | 


--------------------------------------------------------------------------------
/docs/cheatsheet.md:
--------------------------------------------------------------------------------
1 | Cheat Sheet
2 | ===========
3 | ![cheat sheet image](img/package_overview.png)
4 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Read the Docs Template documentation build configuration file, created by
  4 | # sphinx-quickstart on Tue Aug 26 14:19:49 2014.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | from pkg_resources import get_distribution
 16 | 
 17 | 
 18 | release = get_distribution("glidertools").version
 19 | version = ".".join(release.split(".")[:2])
 20 | 
 21 | # If extensions (or modules to document with autodoc) are in another directory,
 22 | # add these directories to sys.path here. If the directory is relative to the
 23 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 24 | # sys.path.insert(0, os.path.abspath('.'))
 25 | 
 26 | # -- General configuration ------------------------------------------------
 27 | 
 28 | # If your documentation needs a minimal Sphinx version, state it here.
 29 | # needs_sphinx = "1.4"
 30 | 
 31 | # Add any Sphinx extension module names here, as strings. They can be
 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 33 | # ones.
 34 | 
 35 | extensions = [
 36 |     "sphinx.ext.autodoc",
 37 |     "sphinx.ext.autosummary",
 38 |     "sphinx.ext.mathjax",
 39 |     "sphinx.ext.napoleon",
 40 |     "sphinx.ext.extlinks",
 41 |     "recommonmark",
 42 | ]
 43 | 
 44 | autosummary_generate = True
 45 | 
 46 | autodoc_default_flags = ["members", "inherited-members"]
 47 | 
 48 | # Add any paths that contain templates here, relative to this directory.
 49 | templates_path = ["_templates"]
 50 | 
 51 | # The suffix of source filenames.
 52 | source_suffix = [".rst", ".md"]
 53 | 
 54 | napoleon_numpy_docstring = True
 55 | 
 56 | # The encoding of source files.
 57 | # source_encoding = 'utf-8-sig'
 58 | 
 59 | # The master toctree document.
 60 | master_doc = "index"
 61 | 
 62 | # General information about the project.
 63 | project = "GliderTools"
 64 | copyright = "GliderTools, 2023"
 65 | 
 66 | # The version info for the project you're documenting, acts as replacement for
 67 | # |version| and |release|, also used in various other places throughout the
 68 | # built documents.
 69 | #
 70 | # The short X.Y version.
 71 | version = ".".join(version.split(".")[:2])
 72 | # The full version, including alpha/beta/rc tags.
 73 | release = version
 74 | 
 75 | # link to github issues
 76 | extlinks = {
 77 |     "issue": ("https://github.com/GliderToolsCommunity/GliderTools/issues/%s", "GH#%s"),
 78 |     "pull": ("https://github.com/GliderToolsCommunity/GliderTools/issues/%s", "GH#%s"),
 79 | }
 80 | 
 81 | # The language for content autogenerated by Sphinx. Refer to documentation
 82 | # for a list of supported languages.
 83 | # language = None
 84 | 
 85 | # There are two options for replacing |today|: either, you set today to some
 86 | # non-false value, then it is used:
 87 | # today = ''
 88 | # Else, today_fmt is used as the format for a strftime call.
 89 | # today_fmt = '%B %d, %Y'
 90 | 
 91 | # List of patterns, relative to source directory, that match files and
 92 | # directories to ignore when looking for source files.
 93 | exclude_patterns = ["_build"]
 94 | 
 95 | # The reST default role (used for this markup: `text`) to use for all
 96 | # documents.
 97 | # default_role = None
 98 | 
 99 | # If true, '()' will be appended to :func: etc. cross-reference text.
100 | # add_function_parentheses = True
101 | 
102 | # If true, the current module name will be prepended to all description
103 | # unit titles (such as .. function::).
104 | # add_module_names = True
105 | 
106 | 
107 | # If true, sectionauthor and moduleauthor directives will be shown in the
108 | # output. They are ignored by default.
109 | # show_authors = False
110 | 
111 | # The name of the Pygments (syntax highlighting) style to use.
112 | pygments_style = "sphinx"
113 | 
114 | # A list of ignored prefixes for module index sorting.
115 | # modindex_common_prefix = []
116 | 
117 | # If true, keep warnings as "system message" paragraphs in the built documents.
118 | # keep_warnings = False
119 | 
120 | 
121 | # -- Options for HTML output ----------------------------------------------
122 | 
123 | # The theme to use for HTML and HTML Help pages.  See the documentation for
124 | # a list of builtin themes.
125 | html_theme = "sphinx_rtd_theme"
126 | 
127 | # Theme options are theme-specific and customize the look and feel of a theme
128 | # further.  For a list of options available for each theme, see the
129 | # documentation.
130 | # html_theme_options = {
131 | #     "logo_only": True,
132 | #     "display_version": False,
133 | #     "style_nav_header_background": "#343131",
134 | # }
135 | 
136 | # Add any paths that contain custom themes here, relative to this directory.
137 | # html_theme_path = []
138 | 
139 | # The name for this set of Sphinx documents.  If None, it defaults to
140 | # "<project> v<release> documentation".
141 | # html_title = None
142 | 
143 | # A shorter title for the navigation bar.  Default is the same as html_title.
144 | # html_short_title = None
145 | 
146 | # The name of an image file (relative to this directory) to place at the top
147 | # of the sidebar.
148 | html_logo = "img/logo_with_name.png"
149 | 
150 | # The name of an image file (within the static path) to use as favicon of the
151 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
152 | # pixels large.
153 | # html_favicon = None
154 | 
155 | # Add any paths that contain custom static files (such as style sheets) here,
156 | # relative to this directory. They are copied after the builtin static files,
157 | # so a file named "default.css" will overwrite the builtin "default.css".
158 | # html_static_path = ["static"]
159 | 
160 | # custom CSS files
161 | # html_context = {
162 | #    "css_files": ["_static/css/custom.css"],
163 | # }
164 | 
165 | # Add any extra paths that contain custom files (such as robots.txt or
166 | # .htaccess) here, relative to this directory. These files are copied
167 | # directly to the root of the documentation.
168 | # html_extra_path = []
169 | 
170 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
171 | # using the given strftime format.
172 | # html_last_updated_fmt = '%b %d, %Y'
173 | 
174 | # If true, SmartyPants will be used to convert quotes and dashes to
175 | # typographically correct entities.
176 | # html_use_smartypants = True
177 | 
178 | # Custom sidebar templates, maps document names to template names.
179 | # html_sidebars = {}
180 | 
181 | # Additional templates that should be rendered to pages, maps page names to
182 | # template names.
183 | # html_additional_pages = {}
184 | 
185 | # If false, no module index is generated.
186 | # html_domain_indices = True
187 | 
188 | # If false, no index is generated.
189 | # html_use_index = True
190 | 
191 | # If true, the index is split into individual pages for each letter.
192 | # html_split_index = False
193 | 
194 | # If true, links to the reST sources are added to the pages.
195 | # html_show_sourcelink = True
196 | 
197 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
198 | # html_show_sphinx = True
199 | 
200 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
201 | # html_show_copyright = True
202 | 
203 | # If true, an OpenSearch description file will be output, and all pages will
204 | # contain a <link> tag referring to it.  The value of this option must be the
205 | # base URL from which the finished HTML is served.
206 | # html_use_opensearch = ''
207 | 
208 | # This is the file name suffix for HTML files (e.g. ".xhtml").
209 | # html_file_suffix = None
210 | 
211 | # Output file base name for HTML help builder.
212 | htmlhelp_basename = "ReadtheDocsTemplatedoc"
213 | 
214 | 
215 | # -- Options for LaTeX output ---------------------------------------------
216 | 
217 | latex_elements = {
218 |     # The paper size ('letterpaper' or 'a4paper').
219 |     # 'papersize': 'letterpaper',
220 |     # The font size ('10pt', '11pt' or '12pt').
221 |     # 'pointsize': '10pt',
222 |     # Additional stuff for the LaTeX preamble.
223 |     # 'preamble': '',
224 | }
225 | 
226 | # Grouping the document tree into LaTeX files. List of tuples
227 | # (source start file, target name, title,
228 | #  author, documentclass [howto, manual, or own class]).
229 | latex_documents = [
230 |     (
231 |         "index",
232 |         "ReadtheDocsTemplate.tex",
233 |         "Read the Docs Template Documentation",
234 |         "Read the Docs",
235 |         "manual",
236 |     ),
237 | ]
238 | 
239 | # The name of an image file (relative to this directory) to place at the top of
240 | # the title page.
241 | # latex_logo = None
242 | 
243 | # For "manual" documents, if this is true, then toplevel headings are parts,
244 | # not chapters.
245 | # latex_use_parts = False
246 | 
247 | # If true, show page references after internal links.
248 | # latex_show_pagerefs = False
249 | 
250 | # If true, show URL addresses after external links.
251 | # latex_show_urls = False
252 | 
253 | # Documents to append as an appendix to all manuals.
254 | # latex_appendices = []
255 | 
256 | # If false, no module index is generated.
257 | # latex_domain_indices = True
258 | 
259 | 
260 | # -- Options for manual page output ---------------------------------------
261 | 
262 | # One entry per manual page. List of tuples
263 | # (source start file, name, description, authors, manual section).
264 | man_pages = [
265 |     (
266 |         "index",
267 |         "readthedocstemplate",
268 |         "Read the Docs Template Documentation",
269 |         ["Read the Docs"],
270 |         1,
271 |     )
272 | ]
273 | 
274 | # If true, show URL addresses after external links.
275 | # m an_show_urls = False
276 | 
277 | 
278 | # -- Options for Texinfo output -------------------------------------------
279 | 
280 | # Grouping the document tree into Texinfo files. List of tuples
281 | # (source start file, target name, title, author,
282 | #  dir menu entry, description, category)
283 | texinfo_documents = [
284 |     (
285 |         "index",
286 |         "ReadtheDocsTemplate",
287 |         "Read the Docs Template Documentation",
288 |         "Read the Docs",
289 |         "ReadtheDocsTemplate",
290 |         "Miscellaneous",
291 |     ),
292 | ]
293 | 
294 | 
295 | # Documents to append as an appendix to all manuals.
296 | # texinfo_appendices = []
297 | 
298 | # If false, no module index is generated.
299 | # texinfo_domain_indices = True
300 | 
301 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
302 | # texinfo_show_urls = 'footnote'
303 | 
304 | # If true, do not generate a @detailmenu in the "Top" node's menu.
305 | # texinfo_no_detailmenu = False
306 | 


--------------------------------------------------------------------------------
/docs/contributing.rst:
--------------------------------------------------------------------------------
  1 | =====================
  2 | Contribution Guide
  3 | =====================
  4 | 
  5 | Contributions are highly welcomed and appreciated.  Every little help counts,
  6 | so do not hesitate! You can make a high impact on ``glidertools`` just by using it, being
  7 | involved in `discussions <https://github.com/GliderToolsCommunity/GliderTools/discussions>`_
  8 |  and reporting `issues <https://github.com/GliderToolsCommunity/GliderTools/issues>`__.
  9 | 
 10 | The following sections cover some general guidelines
 11 | regarding development in ``glidertools`` for maintainers and contributors.
 12 | 
 13 | Nothing here is set in stone and can't be changed.
 14 | Feel free to suggest improvements or changes in the workflow.
 15 | 
 16 | 
 17 | .. contents:: Contribution links
 18 |    :depth: 2
 19 | 
 20 | 
 21 | 
 22 | .. _submitfeedback:
 23 | 
 24 | Feature requests and feedback
 25 | -----------------------------
 26 | 
 27 | We are eager to hear about your requests for new features and any suggestions about the
 28 | API, infrastructure, and so on. Feel free to start a discussion about these on the
 29 | `discussions tab <https://github.com/GliderToolsCommunity/GliderTools/discussions>`_ on github
 30 | under the "ideas" section.
 31 | 
 32 | After discussion with a few community members, and agreement that the feature should be added and who will work on it,
 33 | a new issue should be opened. In the issue, please make sure to explain in detail how the feature should work and keep
 34 | the scope as narrow as possible. This will make it easier to implement in small PRs.
 35 | 
 36 | 
 37 | .. _reportbugs:
 38 | 
 39 | Report bugs
 40 | -----------
 41 | 
 42 | Report bugs for ``glidertools`` in the `issue tracker <https://github.com/GliderToolsCommunity/GliderTools/issues>`_
 43 | with the label "bug".
 44 | 
 45 | If you can write a demonstration test that currently fails but should pass
 46 | that is a very useful commit to make as well, even if you cannot fix the bug itself.
 47 | 
 48 | 
 49 | .. _fixbugs:
 50 | 
 51 | Fix bugs
 52 | --------
 53 | 
 54 | Look through the `GitHub issues for bugs <https://github.com/GliderToolsCommunity/GliderTools/labels/bug>`_.
 55 | 
 56 | Talk to developers to find out how you can fix specific bugs.
 57 | 
 58 | 
 59 | 
 60 | Preparing Pull Requests
 61 | -----------------------
 62 | 
 63 | #. Fork the
 64 |    `glidertools GitHub repository <https://github.com/GliderToolsCommunity/GliderTools>`__.  It's
 65 |    fine to use ``glidertools`` as your fork repository name because it will live
 66 |    under your username.
 67 | 
 68 | #. Clone your fork locally using `git <https://git-scm.com/>`_, connect your repository
 69 |    to the upstream (main project), and create a branch::
 70 | 
 71 |     $ git clone git@github.com:YOUR_GITHUB_USERNAME/glidertools.git # clone to local machine
 72 |     $ cd glidertools
 73 |     $ git remote add upstream git@github.com:GliderToolsCommunity/GliderTools.git # connect to upstream remote
 74 | 
 75 |     # now, to fix a bug or add feature create your own branch off "master":
 76 | 
 77 |     $ git checkout -b your-bugfix-feature-branch-name master # Create a new branch where you will make changes
 78 | 
 79 |    If you need some help with Git, follow this quick start
 80 |    guide: https://git.wiki.kernel.org/index.php/QuickStart
 81 | 
 82 | #. Set up a [conda](environment) with all necessary dependencies::
 83 | 
 84 |     $ conda env create -f ci/environment-py3.8.yml
 85 | 
 86 | #. Activate your environment::
 87 | 
 88 |    $ conda activate test_env_glidertools
 89 |    *Make sure you are in this environment when working on changes in the future too.*
 90 | 
 91 | #. Install the GliderTools package::
 92 | 
 93 |    $ pip install -e . --no-deps
 94 | 
 95 | #. Before you modify anything, ensure that the setup works by executing all tests::
 96 | 
 97 |    $ pytest
 98 | 
 99 |    You want to see an output indicating no failures, like this::
100 | 
101 |    $ ========================== n passed, j warnings in 17.07s ===========================
102 | 
103 | 
104 | #. Install `pre-commit <https://pre-commit.com>`_ and its hook on the ``glidertools`` repo::
105 | 
106 |      $ pip install --user pre-commit
107 |      $ pre-commit install
108 | 
109 |    Afterwards ``pre-commit`` will run whenever you commit. If some errors are reported by pre-commit
110 |    you should format the code by running::
111 | 
112 |      $ pre-commit run --all-files
113 | 
114 |    and then try to commit again.
115 | 
116 |    https://pre-commit.com/ is a framework for managing and maintaining multi-language pre-commit
117 |    hooks to ensure code-style and code formatting is consistent.
118 | 
119 |     You can now edit your local working copy and run/add tests as necessary. Please follow
120 |     PEP-8 for naming. When committing, ``pre-commit`` will modify the files as needed, or
121 |     will generally be quite clear about what you need to do to pass the commit test.
122 | 
123 | 
124 | 
125 | 
126 | 
127 | #. Break your edits up into reasonably sized commits::
128 | 
129 |     $ git commit -a -m "<commit message>"
130 |     $ git push -u
131 | 
132 |    Committing will run the pre-commit hooks (isort, black and flake8).
133 |    Pushing will run the pre-push hooks (pytest and coverage)
134 | 
135 |    We highly recommend using test driven development, but our coverage requirement is
136 |    low at the moment due to lack of tests. If you are able to write tests, please
137 |    stick to `xarray <http://xarray.pydata.org/en/stable/contributing.html>`_'s
138 |    testing recommendations.
139 | 
140 | 
141 | #. Add yourself to the
142 |     `Project Contributors <https://glidertools.readthedocs.io/en/latest/authors.html>`_
143 |     list via ``./docs/authors.md``.
144 | 
145 | #. Finally, submit a pull request (PR) through the GitHub website using this data::
146 | 
147 |     head-fork: YOUR_GITHUB_USERNAME/glidertools
148 |     compare: your-branch-name
149 | 
150 |     base-fork: GliderToolsCommunity/GliderTools
151 |     base: master
152 | 
153 |    The merged pull request will undergo the same testing that your local branch
154 |    had to pass when pushing.
155 | 
156 | #. After your pull request is merged into the `GliderTools/master`, you will need
157 |    to fetch those changes and rebase your master so that your master reflects the latest
158 |    version of GliderTools. The changes should be fetched and incorporated (rebase) also right
159 |    before you are planning to introduce changes.::
160 | 
161 |      $ git checkout master # switch back to master branch
162 |      $ git fetch upstream  # Download all changes from central upstream repo
163 |      $ git rebase upstream/master  # Apply the changes that have been made to central repo,
164 |      $ # since your last fetch, onto you master.
165 |      $ git branch -d your-bugfix-feature-branch-name  # to delete the branch after PR is approved
166 | 
167 | Release Instructions
168 | --------------------
169 | 
170 | This is a documentation repo for people in the group on how to do the integrated deployment.
171 | 
172 | NB RULE! Never commit to master.
173 | 
174 | 1. Change the version in the setup.py file. Must be format YYYY.<release number>
175 | 2. Create a release with a tag that has the same format as the version above.
176 | 3. The distribution will be built automatically and pushed to PyPi
177 | 4. The DOI will also be updated on Zenodo. (untested, see #165)
178 | 


--------------------------------------------------------------------------------
/docs/img/code_bgu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/code_bgu.png


--------------------------------------------------------------------------------
/docs/img/interactive_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/interactive_plot.png


--------------------------------------------------------------------------------
/docs/img/logo_with_name.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/logo_with_name.png


--------------------------------------------------------------------------------
/docs/img/logo_with_name_vert.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/logo_with_name_vert.png


--------------------------------------------------------------------------------
/docs/img/logo_wo_name.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/logo_wo_name.png


--------------------------------------------------------------------------------
/docs/img/output_101_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_101_2.png


--------------------------------------------------------------------------------
/docs/img/output_106_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_106_1.png


--------------------------------------------------------------------------------
/docs/img/output_108_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_108_0.png


--------------------------------------------------------------------------------
/docs/img/output_111_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_111_0.png


--------------------------------------------------------------------------------
/docs/img/output_116_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_116_0.png


--------------------------------------------------------------------------------
/docs/img/output_14_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_14_0.png


--------------------------------------------------------------------------------
/docs/img/output_16_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_16_0.png


--------------------------------------------------------------------------------
/docs/img/output_16_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_16_1.png


--------------------------------------------------------------------------------
/docs/img/output_19_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_19_0.png


--------------------------------------------------------------------------------
/docs/img/output_19_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_19_1.png


--------------------------------------------------------------------------------
/docs/img/output_22_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_22_0.png


--------------------------------------------------------------------------------
/docs/img/output_24_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_24_0.png


--------------------------------------------------------------------------------
/docs/img/output_26_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_26_0.png


--------------------------------------------------------------------------------
/docs/img/output_28_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_28_1.png


--------------------------------------------------------------------------------
/docs/img/output_29_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_29_1.png


--------------------------------------------------------------------------------
/docs/img/output_33_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_33_0.png


--------------------------------------------------------------------------------
/docs/img/output_36_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_36_0.png


--------------------------------------------------------------------------------
/docs/img/output_38_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_38_1.png


--------------------------------------------------------------------------------
/docs/img/output_41_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_41_0.png


--------------------------------------------------------------------------------
/docs/img/output_43_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_43_0.png


--------------------------------------------------------------------------------
/docs/img/output_45_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_45_0.png


--------------------------------------------------------------------------------
/docs/img/output_47_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_47_0.png


--------------------------------------------------------------------------------
/docs/img/output_49_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_49_0.png


--------------------------------------------------------------------------------
/docs/img/output_51_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_51_0.png


--------------------------------------------------------------------------------
/docs/img/output_55_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_55_1.png


--------------------------------------------------------------------------------
/docs/img/output_56_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_56_1.png


--------------------------------------------------------------------------------
/docs/img/output_59_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_59_0.png


--------------------------------------------------------------------------------
/docs/img/output_61_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_61_0.png


--------------------------------------------------------------------------------
/docs/img/output_64_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_64_0.png


--------------------------------------------------------------------------------
/docs/img/output_65_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_65_0.png


--------------------------------------------------------------------------------
/docs/img/output_67_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_67_1.png


--------------------------------------------------------------------------------
/docs/img/output_71_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_71_0.png


--------------------------------------------------------------------------------
/docs/img/output_74_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_74_0.png


--------------------------------------------------------------------------------
/docs/img/output_76_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_76_0.png


--------------------------------------------------------------------------------
/docs/img/output_78_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_78_0.png


--------------------------------------------------------------------------------
/docs/img/output_80_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_80_0.png


--------------------------------------------------------------------------------
/docs/img/output_82_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_82_0.png


--------------------------------------------------------------------------------
/docs/img/output_84_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_84_0.png


--------------------------------------------------------------------------------
/docs/img/output_86_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_86_1.png


--------------------------------------------------------------------------------
/docs/img/output_86_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_86_2.png


--------------------------------------------------------------------------------
/docs/img/output_92_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_92_1.png


--------------------------------------------------------------------------------
/docs/img/output_94_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_94_1.png


--------------------------------------------------------------------------------
/docs/img/output_97_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_97_0.png


--------------------------------------------------------------------------------
/docs/img/output_99_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_99_2.png


--------------------------------------------------------------------------------
/docs/img/package_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/package_overview.png


--------------------------------------------------------------------------------
/docs/img/package_structure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/package_structure.png


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | =====================================
 2 | Glider Tools: profile data processing
 3 | =====================================
 4 | 
 5 | Glider tools is a Python 3.8+ package designed to process data from the first level of processing to a science ready dataset.
 6 | The package is designed to easily import data to a standard column format:
 7 | ``numpy.ndarray``, ``pandas.DataFrame`` or ``xarray.DataArray`` (we recommend
 8 | the latter which has full support for metadata).
 9 | Cleaning and smoothing functions are flexible and can be applied as required by the user.
10 | We provide examples and demonstrate best practices as developed by the `SOCCO Group <http://www.socco.org.za/>`_.
11 | 
12 | For the original publication of this package see: https://doi.org/10.3389/fmars.2019.00738.
13 | 
14 | For recommendations or bug reports, please visit https://github.com/GliderToolsCommunity/GliderTools/issues/new
15 | 
16 | .. toctree::
17 |    :maxdepth: 2
18 |    :caption: Getting started
19 | 
20 |    installation
21 |    cheatsheet
22 | 
23 | .. toctree::
24 |    :maxdepth: 2
25 |    :caption: Users Guide
26 | 
27 |    loading
28 |    quality_control
29 |    physics
30 |    optics
31 |    calibration
32 |    mapping
33 |    saving
34 |    other
35 | 
36 | .. toctree::
37 |    :maxdepth: 2
38 |    :caption: Help and Reference
39 | 
40 |    GitHub Repo <https://github.com/GliderToolsCommunity/GliderTools>
41 |    api
42 |    package_structure
43 |    whats-new
44 |    authors
45 |    contributing
46 |    wishlist
47 | 


--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | 
 4 | Notes on how to install
 5 | 
 6 | ##### Conda
 7 | The easiest way to install the packge is with [conda](https://docs.conda.io/en/latest/): `conda install -c conda-forge glidertools`.
 8 | 
 9 | ##### PyPI
10 | You can also install with pip: `pip install glidertools`.
11 | 
12 | ##### GitHub
13 | For the most up to date version of GliderTools, you can install directly from the online repository hosted on GitLab.
14 | 
15 | 1. Clone glidertools to your local machine: `git clone https://github.com/GliderToolsCommunity/GliderTools`
16 | 2. Change to the parent directory of GliderTools
17 | 3. Install glidertools with `pip install -e ./GliderTools`. This will allow changes you make locally, to be reflected when you import the package in Python
18 | 


--------------------------------------------------------------------------------
/docs/loading.md:
--------------------------------------------------------------------------------
  1 | Loading data
  2 | ============
  3 | 
  4 | To start using Glider Tools you first need to import the package to the
  5 | interactive workspace.
  6 | 
  7 | 
  8 | ## Import `GliderTools`
  9 | 
 10 | 
 11 | ```python
 12 | # pylab for more MATLAB like environment and inline displays plots below cells
 13 | %pylab inline
 14 | 
 15 | # if gsw Warning shows, manually install gsw if possible - will still work without
 16 | import glidertools as gt
 17 | from cmocean import cm as cmo  # we use this for colormaps
 18 | ```
 19 | 
 20 |     Populating the interactive namespace from numpy and matplotlib
 21 | 
 22 | 
 23 | ## Working with Seaglider base station files
 24 | 
 25 | GliderTools supports loading Seaglider files, including `scicon` data (different sampling frequencies).
 26 | There is a function that makes it easier to find variable names that you'd like to load: `gt.load.seaglider_show_variables`
 27 | 
 28 | This function is demonstrated in the cell below.
 29 | The function accepts a **list of file names** and can also receive a string with a wildcard placeholder (`*`) and basic regular expressions are also supported. In the example below we use a simple asterisk placeholder for all the files.
 30 | 
 31 | Note that the function chooses only one file from the passed list or glob string - this file name will be shown. The returned table shows the variable name, dimensions, units and brief comment if it is available.
 32 | 
 33 | 
 34 | ```python
 35 | filenames = '/Users/luke/Work/Data/sg542/p5420*.nc'
 36 | 
 37 | gt.load.seaglider_show_variables(filenames)
 38 | ```
 39 | 
 40 |     information is based on file: /Users/luke/Work/Data/sg542/p5420177.nc
 41 | 
 42 |     <table will be displayed here>
 43 | 
 44 | ## Working with VOTO Seaexplorer files or xarray-datasets
 45 | 
 46 | Glidertools supports loading Seaexplorer files. This is implemented and tested with `VOTO https://observations.voiceoftheocean.org`_ datasets in mind currently, but we are happy about feedback/pullrequests how it works for other SeaExplorer datasets. VOTO data can either be downloaded from the website using a browser or, more comfortable, from an `ERDAP server https://erddap.observations.voiceoftheocean.org/erddap/index.html`_ . See the `demo notebook <https://github.com/voto-ocean-knowledge/download_glider_data>`_ to get started with downloads over the API.
 47 | 
 48 | After download of a .nc file or xarray-Dataset, it can be read into Glidertools by calling `gt.load.voto_seaexplorer_nc` or `gt.load.voto_seaexplorer_dataset` respectively. Resulting datasets can be merged by calling `gt.load.voto_concat_datasets`. The import of the data into GliderTools is hereby finished, remaining steps on this wiki-page are optional.
 49 | 
 50 | ## Load variables
 51 | 
 52 | From the variable listing, one can choose multiple variables to load. Note that one only needs the variable name to load the data. Below, we've created a list of variables that we'll be using for this demo.
 53 | 
 54 | The `gt.load.seaglider_basestation_netCDFs` function is used to load a list of variables. It requires the filename string or list (as described above) and keys. It may be that these variables are not sampled at the same frequency. In this case, the loading function will load the sampling frequency dimensions separately. The function will try to find a time variable for each sampling frequency/dimension.
 55 | 
 56 | ### Coordinates and automatic *time* fetching
 57 | All associated coordinate variables will also be loaded with the data if coordinates are documented. These may included *latitude, longitude, depth* and *time* (naming may vary). If time cannot be found for a dimension, a *time* variable from a different dimension with the same number of observations is used instead. This insures that data can be merged based on the time of sampling.
 58 | 
 59 | ### Merging data based on time
 60 | If the `return_merged` is set to *True*, the function will merge the dimensions if the dimension has an associated *time* variable.
 61 | 
 62 | The function returns a dictionary of `xarray.Datasets` - a Python package that deals with coordinate indexed multi-dimensional arrays. We recommend that you read the documentation (http://xarray.pydata.org/en/stable/) as this package is used throughout *GliderTools*. This allows the original metadata to be copied with the data. The dictionary keys are the names of the dimensions. If `return_merged` is set to *True* an additional entry under the key `merged` will be included.
 63 | 
 64 | The structure of a dimension output is shown below. Note that the merged data will use the largest dimension as the primary dataset and the other data will be merged onto that time index. Data is linearly interpolated to the nearest time measurement of the primary index, but only by one measurement to ensure transparancy.
 65 | 
 66 | 
 67 | ```python
 68 | names = [
 69 |     'ctd_depth',
 70 |     'ctd_time',
 71 |     'ctd_pressure',
 72 |     'salinity',
 73 |     'temperature',
 74 |     'eng_wlbb2flvmt_Chlsig',
 75 |     'eng_wlbb2flvmt_wl470sig',
 76 |     'eng_wlbb2flvmt_wl700sig',
 77 |     'aanderaa4330_dissolved_oxygen',
 78 |     'eng_qsp_PARuV',
 79 | ]
 80 | 
 81 | ds_dict = gt.load.seaglider_basestation_netCDFs(
 82 |     filenames, names,
 83 |     return_merged=True,
 84 |     keep_global_attrs=False
 85 | )
 86 | ```
 87 | 
 88 |     DIMENSION: sg_data_point
 89 |     {
 90 |         ctd_pressure, eng_wlbb2flvmt_wl470sig, eng_wlbb2flvmt_wl700sig, temperature,
 91 |         ctd_time, ctd_depth, latitude, aanderaa4330_dissolved_oxygen, salinity,
 92 |         eng_wlbb2flvmt_Chlsig, longitude
 93 |     }
 94 | 
 95 | 
 96 |     100%|██████████| 336/336 [00:04<00:00, 73.66it/s]
 97 | 
 98 | 
 99 | 
100 |     DIMENSION: qsp2150_data_point
101 |     {eng_qsp_PARuV, time}
102 | 
103 | 
104 |     100%|██████████| 336/336 [00:01<00:00, 181.67it/s]
105 | 
106 | 
107 | 
108 |     Merging dimensions on time indicies: sg_data_point, qsp2150_data_point,
109 | 
110 | 
111 | The returned data contains the dimensions of the requested variables a `merged` object is also returned if return_merged=True
112 | ```python
113 | 
114 | print(ds_dict.keys())
115 | ```
116 | 
117 |     dict_keys(['sg_data_point', 'qsp2150_data_point', 'merged'])
118 | 
119 | 
120 | ### Metadata handling
121 | If the keyword arguement `keep_global_attrs=True`, the attributes from the original files (for all that are the same) are passed on to the output *Datasets* from the original netCDF attributes. The variable attributes (units, comments, axis...) are passed on by default, but can also be set to False if not wanted. GliderTools functions will automatically pass on these attributes to function outputs if a `xarray.DataArray` with attributes is given.
122 | All functions applied to data will also be recorded under the variable attribute `processing`.
123 | 
124 | 
125 | The merged dataset contains all the data interpolated to the nearest observation of the longest dimension the metadata is also shown for the example below
126 | ```python
127 | ds_dict['merged']
128 | ```
129 | 
130 | 
131 | 
132 | 
133 |     xarray.Dataset>
134 |     Dimensions:                        (merged: 382151)
135 |     Coordinates:
136 |         ctd_depth                      (merged) float64 -0.08821 0.018 ... -0.1422
137 |         latitude                       (merged) float64 -42.7 -42.7 ... -43.0 -43.0
138 |         longitude                      (merged) float64 8.744 8.744 ... 8.5 8.5
139 |         ctd_time_dt64                  (merged) datetime64[ns] 2015-12-08T07:36:16 ...
140 | 
141 |     Dimensions without coordinates: merged
142 |     Data variables:
143 |         ctd_pressure                   (merged) float64 -0.08815 0.01889 ... -0.1432
144 |         eng_wlbb2flvmt_wl470sig        (merged) float64 375.0 367.0 ... 98.0 91.0
145 |         eng_wlbb2flvmt_wl700sig        (merged) float64 2.647e+03 ... 137.0
146 |         temperature                    (merged) float64 11.55 11.54 ... 11.06 10.97
147 |         ctd_time                       (merged) float64 1.45e+09 ... 1.455e+09
148 |         aanderaa4330_dissolved_oxygen  (merged) float64 nan nan nan ... 269.1 269.1
149 |         salinity                       (merged) float64 nan nan nan ... 34.11 34.11
150 |         eng_wlbb2flvmt_Chlsig          (merged) float64 145.0 126.0 ... 215.0 215.0
151 |         dives                          (merged) float64 1.0 1.0 1.0 ... 344.5 344.5
152 |         eng_qsp_PARuV                  (merged) float64 0.551 0.203 ... 0.021 0.023
153 |         time                           (merged) float64 1.45e+09 ... 1.455e+09
154 |         time_dt64                      (merged) datetime64[ns] 2015-12-08T07:36:16 ...
155 | 
156 |     Attributes:
157 |         date_created:             2019-07-11 14:08:40
158 |         number_of_dives:          344.0
159 |         files:                    ['p5420001.nc', 'p5420002.nc', 'p5420004.nc', '...
160 |         time_coverage_start:      2015-12-08 07:36:16
161 |         time_coverage_end:        2016-02-08 04:39:04
162 |         geospatial_vertical_min:  -0.6323553853732649
163 |         geospatial_vertical_max:  1011.1000623417478
164 |         geospatial_lat_min:       -43.085757609206
165 |         geospatial_lat_max:       -42.70088638031523
166 |         geospatial_lon_min:       8.29983279020758
167 |         geospatial_lon_max:       8.7753734452125
168 |         processing:               [2019-07-11 14:08:40] imported data with Glider...
169 | 
170 | 
171 | 
172 | ### Renaming for ease of access
173 | When renaming, just be sure that there are no variables with names that you are trying to replace. In the example below we remove `time` in case it exists in the files.
174 | ```python
175 | # Here we drop the time variables imported for the PAR variable
176 | # we don't need these anymore. You might have to change this
177 | # depening on the dataset
178 | merged = ds_dict['merged']
179 | if 'time' in merged:
180 |     merged = merged.drop(["time", "time_dt64"])
181 | 
182 | 
183 | # To make it easier and clearer to work with, we rename the
184 | # original variables to something that makes more sense. This
185 | # is done with the xarray.Dataset.rename({}) function.
186 | # We only use the merged dataset as this contains all the
187 | # imported dimensions.
188 | # NOTE: The renaming has to be specific to the dataset otherwise an error will occur
189 | dat = merged.rename({
190 |     'salinity': 'salt_raw',
191 |     'temperature': 'temp_raw',
192 |     'ctd_pressure': 'pressure',
193 |     'ctd_depth': 'depth',
194 |     'ctd_time_dt64': 'time',
195 |     'ctd_time': 'time_raw',
196 |     'eng_wlbb2flvmt_wl700sig': 'bb700_raw',
197 |     'eng_wlbb2flvmt_wl470sig': 'bb470_raw',
198 |     'eng_wlbb2flvmt_Chlsig': 'flr_raw',
199 |     'eng_qsp_PARuV': 'par_raw',
200 |     'aanderaa4330_dissolved_oxygen': 'oxy_raw',
201 | })
202 | 
203 | print(dat)
204 | 
205 | # variable assignment for conveniant access
206 | depth = dat.depth
207 | dives = dat.dives
208 | lats = dat.latitude
209 | lons = dat.longitude
210 | time = dat.time
211 | pres = dat.pressure
212 | temp = dat.temp_raw
213 | salt = dat.salt_raw
214 | par = dat.par_raw
215 | bb700 = dat.bb700_raw
216 | bb470 = dat.bb470_raw
217 | fluor = dat.flr_raw
218 | 
219 | # name coordinates for quicker plotting
220 | x = dat.dives
221 | y = dat.depth
222 | ```
223 | 


--------------------------------------------------------------------------------
/docs/mapping.md:
--------------------------------------------------------------------------------
  1 | # Gridding and interpolation
  2 | 
  3 | ## Vertical gridding
  4 | It is often more convenient and computationally efficient to work with data that has been gridded to a standard vertical grid (i.e. depths have been binned).
  5 | GliderTools offers very easy to use and efficient tools to grid data once all the processing has been completed.
  6 | 
  7 | The first task is to select the bin size of the data that will be gridded.
  8 | GliderTools automatically selects bin sizes according to the sampling frequency of the dataset for every 50m.
  9 | This is shown in the figure below, where the 2D histogram shows the sampling frequency (by depth) and the line shows the automatically selected bin size rounded up to the nearest 0.5m.
 10 | 
 11 | 
 12 | ```python
 13 | ax = gt.plot.bin_size(dat.depth, cmap=mpl.cm.Blues)
 14 | ax.set_xlim(0, 6)
 15 | line = ax.get_children()[1]
 16 | line.set_linewidth(6)
 17 | line.set_color('orange')
 18 | 
 19 | legend = ax.get_children()[-2]
 20 | legend.set_visible(False)
 21 | ```
 22 | 
 23 | 
 24 | ![png](img/output_97_0.png)
 25 | 
 26 | 
 27 | ### Gridding with automatic bin sizes
 28 | 
 29 | Gridding the data then becomes easy with automatic binning. But note that the x-coordinate has the be semi-discrete, e.g. dives number or dive time stamp average. You'll see that the gridding function also returns the mean bin size and then the average sampling frequency.
 30 | 
 31 | The function can return either an xr.DataArray or a pd.DataFrame. The DataArray is the default as metadata can be stored in these files (including coordinate information).
 32 | 
 33 | Gridded data can be passed to the plot function without x- and y-coordinates, as these are contained in the gridded data.
 34 | 
 35 | In fact, data is silently passed through the gridding function when x- and y-coordinates are included in the `gt.plot` function
 36 | 
 37 | 
 38 | ```python
 39 | flr_gridded = gt.grid_data(dives, depth, flr)
 40 | 
 41 | ax = gt.plot(flr_gridded, cmap=cmo.delta)
 42 | ax.set_ylim(200, 0)
 43 | ```
 44 | 
 45 |     Mean bin size = 1.99
 46 |     Mean depth binned (50 m) vertical sampling frequency = 2.53
 47 | 
 48 | 
 49 | ![png](img/output_99_2.png)
 50 | 
 51 | 
 52 | ### Gridding with manually defined bins
 53 | 
 54 | There is also the option to manuualy define your bins if you'd prefer.
 55 | A custom bin array needs to be created.
 56 | Use `np.arange` to create sections of the bins and combine them with `np.r_` as shown below:
 57 | 
 58 | 
 59 | ```python
 60 | custom_bin = np.r_[
 61 |     np.arange(0, 100, 0.5),
 62 |     np.arange(100, 400, 1.0),
 63 |     np.arange(400, 1000, 2.0)]
 64 | 
 65 | flr_gridded = gt.grid_data(x, y, flr, bins=custom_bin)
 66 | 
 67 | # The plot below is the standard plotting procedure for an xarray.DataArray
 68 | gt.plot(flr_gridded, cmap=cmo.delta)
 69 | ylim(200, 0)
 70 | ```
 71 | 
 72 |     Mean bin size = 1.25
 73 |     Mean depth binned (50 m) vertical sampling frequency = 2.53
 74 | 
 75 | 
 76 | 
 77 | 
 78 | 
 79 |     (200, 0)
 80 | 
 81 | 
 82 | 
 83 | 
 84 | ![png](img/output_101_2.png)
 85 | 
 86 | 
 87 | ## 2D interpolation with objective mapping (Kriging)
 88 | 
 89 | Users may want to interpolate data horizontally when working with finescale gradients.
 90 | Several studies have used the `objmap` MATLAB function that uses objective mapping (a.k.a. Kriging).
 91 | Kriging is an advanced form of inverse distance weighted interpolation, where points influence the interpolation based on the distance from an interpolation point, where the influence falls off with a Gauassian function.
 92 | This is an expensive function when the dataset is large (due to a matrix inverse operation).
 93 | The computational cost is reduced by breaking the problem into smaller pieces using a quadtree that iteratively breaks data into smaller problems.
 94 | 
 95 | GliderTools provides a Python implementation of the MATLAB function. We have added parallel capability to speed the processing up, but this operation is still costly and could take several hours if an entire section is interpolated. We thus recommend that smaller sections are interpolated.
 96 | 
 97 | 
 98 | ```python
 99 | # first we select a subset of data (50k points)
100 | subs = dat.isel(merged=slice(0, 50000))
101 | 
102 | # we then get time values - this makes creating the interpolation grid easier
103 | var = subs.flr_qc
104 | time = subs.time.values
105 | depth = subs.depth
106 | dives = subs.dives
107 | dist = np.r_[0, gt.utils.distance(subs.longitude, subs.latitude).cumsum()]
108 | ```
109 | 
110 | ### Part 1: Semivariance
111 | Interpolating any variable requires some knowlege about the spatial autocorrelation of that variable. A semivariogram allows one to get this information from the data. The basic idea of a semivariogram is to assess the similarity between data at different lengthscales (lags), where a low semivariance shows coherence and a large semivariance shows a mismatch. This information is required to interpolate data with sensible estimates and error estimates.
112 | 
113 | GliderTools offers a derivation of a variogram tool (`gt.mapping.variogram`) that makes the process of finding these parameters a little easier, though there is a fair deal of subjectivity, depending on the scale of the question at hand, and tinkering are required to make a sensible interpolation.
114 | 
115 | #### 1.1. Choosing a subset of the data for semivariance estimation
116 | The variogram function selects a number of dives (number depends on max_points) and performs the analysis on the subset of dives rathern than selecting random points. We thus recommend that a subset of the data is used to perform the analysis. In the example below, we take a subset of the data that as particularly high variability that we are interested in preserving. This subset is < 250m depth and limited to the first 20 dives. This should be tailored to the variable that you're interested in.
117 | 
118 | 
119 | ```python
120 | m = (depth<150) & (dives > 30) & (dives < 46)
121 | ax = gt.plot(dives, depth, var)
122 | ax.plot(dives[m], depth[m], '-m', ms=3, alpha=0.7)
123 | ```
124 | 
125 | 
126 | 
127 | 
128 |     [<matplotlib.lines.Line2D at 0x1c728526d8>]
129 | 
130 | 
131 | 
132 | 
133 | ![png](img/output_106_1.png)
134 | 
135 | 
136 | #### 1.2. Initial estimate of semivariance
137 | We can now find an initial estimate of the semivariance. This initial estimate will not scale the x/y coordinates for anisotropy (different scales of variability). The variogram function also accepts a boolean mask as an keyword argument. This will reduce the input data to the subset of data that you've chosen.
138 | 
139 | The example below shows this initial estimate. We're looking for an estimate where the Gaussian model fits the semi-variance as well as possible, given that the variance paramters are acceptable. These variance parameters are: *sill, nugget, x and y length-scales*. The function automatically adjusts the range to be one and scales the x and y parameters accordingly.
140 | 
141 | The variogram function can take time (datetime64), but we use distance (in metres) to demonstrate the the anisotropic scaling.
142 | 
143 | 
144 | ```python
145 | vargram = gt.mapping.variogram(var, dist, depth, dives, mask=m)
146 | ```
147 | 
148 | 
149 | ![png](img/output_108_0.png)
150 | 
151 | 
152 | The example above shows that x and y are scaled, but the Gaussian model does not fit the semivariance very well. The range is 1, because it is scaled accordingly. The sill and nugget are very similar - this is not a good result.
153 | 
154 | #### 1.3. Finding the correct x and y length scales (anisotropy)
155 | 
156 | We can now scale the data with the xy_ratio. The ratio represents the scaling of x/y. For example, if x and y are both in metres (as in this case), we need to set a small xy_ratio as x has a much longer lengthscale. With some trial and error we choose a ratio of 0.0005, which fits the semivariogram relatively well and has a reasonably low *y* scaling estimate.
157 | 
158 | You'll see that the Gaussian model does not fit the semivariance exactly - this is OK. The important thing is that the first plateau matches the sill.
159 | 
160 | We can now use these values for interpolating.
161 | 
162 | 
163 | 
164 | ```python
165 | vargram = gt.mapping.variogram(var, dist, depth, dives, mask=m, xy_ratio=0.0005)
166 | ```
167 | 
168 | 
169 | ![png](img/output_111_0.png)
170 | 
171 | 
172 | ### 2. Interpolation
173 | #### 2.1 Preparing the interpolation grid
174 | To perform the interpolation we first need to create the grid onto which data will be interpolated.
175 | In the example below we use distance from the origin as the x-coordinate.
176 | Time can also be used and has to be in a `np.datetime64` format - we show a commented example of this.
177 | The y-coordinate is depth.
178 | 
179 | 
180 | ```python
181 | # creating the x- and y-interpolation coordinates
182 | # and a 1m vertical grid and a horizontal grid with 500 points
183 | xi = np.linspace(dist.min(), dist.max(), 500)
184 | yi = np.arange(0, depth[var.notnull()].max(), 1, dtype=float)
185 | 
186 | # time can also be used. This is a commented example of how to create
187 | # a time grid for interpolation.
188 | # xi = np.arange(time.min(), time.max(), 30, dtype='datetime64[m]')
189 | ```
190 | 
191 | #### 2.2 Interpolation with the semivariance parameters
192 | The interpolation has a number of parameters that can be changed or adapted to the dataset at hand.
193 | The commented inputs below describe these inputs.
194 | 
195 | 
196 | ```python
197 | %autoreload 2
198 | 
199 | interpolated = gt.mapping.interp_obj(
200 |     dist, depth, var, xi, yi,
201 | 
202 |     # Kriging interoplation arguments
203 |     partial_sill=1.1e4,  # taken from the semivariogram (sill - nugget)
204 |     nugget=3e3,  # taken from the semivariogram
205 |     lenscale_x=98942,  # in hours if x and xi are in datetime64
206 |     lenscale_y=50,  # the vertical gridding influence
207 |     detrend=True,  # if True use linear regression (z - z_hat), if False use average (z - z_mean)
208 | 
209 |     # Quadtree arguments
210 |     max_points_per_quad=65,  # an optimsation setting ~100 is good
211 |     min_points_per_quad=8,  # if neighbours have < N points, look at their neighbours
212 | 
213 |     # Parallel calculation inputs.
214 |     n_cpus=3,  # the number of CPU's to use for the calculation - default is n-1
215 |     parallel_chunk_size=512,  # when the dataset is very large, memory can become an issue
216 |                               # this prevents large buildup of parallel results
217 | )
218 | ```
219 | 
220 |     Starting Interpolation with quadtree optimal interpolation
221 |     ----------------------------------------------------------
222 | 
223 |     Preparing for interpolations:
224 |     	Finding and removing nans
225 |     	Removing data trend with linear regression
226 |     	Building QuadTree
227 | 
228 |     Interpolation information:
229 |     	basis points:        25226
230 |     	interp grid:         500, 404
231 |     	max_points_per_quad: 65
232 |     	min_points_per_quad: 8
233 |     	number of quads:     952
234 |     	detrend_method:      linear_regression
235 |     	partial_sill:        11000.0
236 |     	nugget:              3000.0
237 |     	lengthscales:        X = 98942
238 |     	                     Y = 50 m
239 | 
240 |     Processing interpolation chunks in 2 parts over 3 CPUs:
241 |     	chunk 1/2 completed in 12s
242 |     	chunk 2/2 completed in 10s
243 | 
244 |     Finishing off interoplation
245 |     	Adding back the trend
246 |     	Creating xarray dataset for output
247 | 
248 | 
249 | 
250 | ```python
251 | fig, ax = plt.subplots(3, 1, figsize=[9, 9], sharex=True, dpi=90)
252 | 
253 | error_mask = (interpolated.variance / interpolated.nugget) < 1.05
254 | interp_robust = interpolated.z.where(error_mask)
255 | 
256 | props = dict(vmin=0, vmax=300, cmap=cmo.delta)
257 | gt.plot.scatter(dist, depth, var, ax=ax[0], **props)
258 | gt.plot.pcolormesh(interp_robust, ax=ax[1], **props)
259 | gt.plot.pcolormesh(interpolated.variance, ax=ax[2], vmin=interpolated.nugget, vmax=interpolated.nugget*1.08)
260 | 
261 | ax[2].plot(dist, depth, 'w-', zorder=40, alpha=0.8, lw=0.4)
262 | 
263 | [a.set_ylim(400, 0) for a in ax]
264 | [a.set_xlabel('  ') for a in ax]
265 | 
266 | ax[0].get_children()[0].set_sizes([20])
267 | ax[0].set_title('Uninterpolated data')
268 | ax[1].set_title('Interpolated data')
269 | ax[2].set_title('Interpolation variance with dives shown in white')
270 | ax[2].set_xlabel('Distance (m)')
271 | 
272 | tks = xticks(rotation=0)
273 | ```
274 | 
275 | 
276 | ![png](img/output_116_0.png)
277 | 


--------------------------------------------------------------------------------
/docs/optics.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Optics (BB, PAR, Chl)
  3 | The optics module contains functions that process backscatter, PAR and fluorescence.
  4 | 
  5 | There is a wrapper function for each of these variables that applies several functions related to cleaning and processing. We show each step of the wrapper function seperately and then summarise with the wrapper function.
  6 | 
  7 | ## Backscatter
  8 | 
  9 | 
 10 | ```python
 11 | theta = 124
 12 | xfactor = 1.076
 13 | 
 14 | gt.plot(x, y, bb700, cmap=cmo.delta, vmin=60, vmax=200)
 15 | xlim(200,340)
 16 | title('Original Data')
 17 | show()
 18 | ```
 19 | 
 20 | 
 21 | ![png](img/output_41_0.png)
 22 | 
 23 | 
 24 | ### Outlier bounds method
 25 | See the cleaning section for more information on `gt.cleaning.outlider_bounds_[]`
 26 | 
 27 | ```python
 28 | bb700_iqr = gt.cleaning.outlier_bounds_iqr(bb700, multiplier=3)
 29 | bb700_std = gt.cleaning.outlier_bounds_std(bb700, multiplier=3)
 30 | 
 31 | fig, ax = plt.subplots(2, 1, figsize=[9, 6], sharex=True, dpi=90)
 32 | 
 33 | gt.plot(x, y, bb700_iqr, cmap=cmo.delta, ax=ax[0], vmin=60, vmax=200)
 34 | gt.plot(x, y, bb700_std, cmap=cmo.delta, ax=ax[1], vmin=60, vmax=200)
 35 | 
 36 | [a.set_xlabel('') for a in ax]
 37 | [a.set_xlim(200, 340) for a in ax]
 38 | 
 39 | ax[0].set_title('Outlier IQR')
 40 | ax[1].set_title('Outlier STD')
 41 | 
 42 | plt.show()
 43 | ```
 44 | 
 45 | 
 46 | ![png](img/output_43_0.png)
 47 | 
 48 | 
 49 | ### Removing bad profiles
 50 | This function masks bad dives based on mean + std x [1] or median + std x [1] at a reference depth.
 51 | 
 52 | 
 53 | ```python
 54 | # find_bad_profiles returns boolean mask and dive numbers
 55 | # we index only the mask
 56 | bad_profiles = gt.optics.find_bad_profiles(dives, depth, bb700,
 57 |                                            ref_depth=300,
 58 |                                            stdev_multiplier=1,
 59 |                                            method='median')[0]
 60 | 
 61 | fig, ax = plt.subplots(2, 1, figsize=[9, 6], sharex=True, dpi=90)
 62 | # ~ reverses True to False and vice versa - i.e. we mask bad bad profiles
 63 | gt.plot(x, y, bb700, cmap=cmo.delta, ax=ax[0], vmin=60, vmax=200)
 64 | gt.plot(x, y, bb700.where(~bad_profiles), cmap=cmo.delta, ax=ax[1], vmin=60, vmax=200)
 65 | 
 66 | [a.set_xlabel('') for a in ax]
 67 | [a.set_xlim(40, 120) for a in ax]
 68 | 
 69 | ax[0].set_title('All backscatter data')
 70 | ax[1].set_title('Bad profiles masked')
 71 | 
 72 | plt.show()
 73 | ```
 74 | 
 75 | 
 76 | ![png](img/output_45_0.png)
 77 | 
 78 | 
 79 | ### Conversion from counts to total backscatter
 80 | 
 81 | The scale and offset function uses the factory calibration dark count and scale factor.
 82 | 
 83 | The bback total function uses the coefficients from Zhang et al. (2009) to convert the raw counts into total backscatter (m-1), correcting for temperature and salinity. The $\chi$ factor and $\theta$ in this example were taken from Sullivan et al. (2013) and Slade & Boss (2015).
 84 | 
 85 | 
 86 | ```python
 87 | beta = gt.flo_functions.flo_scale_and_offset(bb700.where(~bad_profiles), 49, 3.217e-5)
 88 | bbp = gt.flo_functions.flo_bback_total(beta, temp_qc, salt_qc, theta, 700, xfactor)
 89 | 
 90 | fig, ax = plt.subplots(2, 1, figsize=[9, 6], sharex=True, dpi=90)
 91 | 
 92 | gt.plot(x, y, beta, cmap=cmo.delta, ax=ax[0], robust=True)
 93 | gt.plot(x, y, bbp, cmap=cmo.delta, ax=ax[1], robust=True)
 94 | 
 95 | [a.set_xlabel('') for a in ax]
 96 | [a.set_xlim(200, 340) for a in ax]
 97 | [a.set_ylim(400, 0) for a in ax]
 98 | 
 99 | ax[0].set_title('$\u03B2$')
100 | ax[1].set_title('b$_{bp}$ (m$^{-1}$)')
101 | 
102 | plt.show()
103 | ```
104 | 
105 | 
106 | ![png](img/output_47_0.png)
107 | 
108 | 
109 | ### Correcting for an in situ dark count
110 | Sensor drift from factory calibration requires an additional correction, the calculation of a dark count in situ. This is calculated from the 95th percentile of backscatter measurements between 200 and 400m.
111 | 
112 | 
113 | ```python
114 | bbp = gt.optics.backscatter_dark_count(bbp, depth)
115 | 
116 | gt.plot(x, y, bbp, cmap=cmo.delta, robust=True)
117 | xlim(200,340)
118 | title('b$_{bp}$ (m$^{-1}$)')
119 | show()
120 | ```
121 | 
122 | 
123 | ![png](img/output_49_0.png)
124 | 
125 | 
126 | ### Despiking
127 | Following the methods outlined in Briggs et al. (2011) to both identify spikes in backscatter and remove them from the baseline backscatter signal. The spikes are retained as the data can be used to address specific science questions, but their presence can decrease the accuracy of the fluorescence quenching function.
128 | 
129 | 
130 | ```python
131 | bbp_horz = gt.cleaning.horizontal_diff_outliers(x, y, bbp, depth_threshold=10, mask_frac=0.05)
132 | bbp_baseline, bbp_spikes = gt.cleaning.despike(bbp_horz, 7, spike_method='minmax')
133 | 
134 | 
135 | fig, ax = plt.subplots(2, 1, figsize=[9, 6], sharex=True, dpi=90)
136 | 
137 | gt.plot(x, y, bbp_baseline, cmap=cmo.delta, ax=ax[0], robust=True)
138 | gt.plot(x, y, bbp_spikes, ax=ax[1], cmap=cm.Spectral_r, vmin=0, vmax=0.004)
139 | 
140 | [a.set_xlabel('') for a in ax]
141 | [a.set_xlim(200, 340) for a in ax]
142 | 
143 | ax[0].set_title('Despiked b$_{bp}$ (m$^{-1}$)')
144 | ax[1].set_title('b$_{bp}$ (m$^{-1}$) spikes')
145 | 
146 | plt.show()
147 | ```
148 | 
149 | 
150 | ![png](img/output_51_0.png)
151 | 
152 | 
153 | ### Adding the corrected variables to the original dataframe
154 | 
155 | 
156 | ```python
157 | dat['bbp700'] = bbp_baseline
158 | dat['bbp700_spikes'] = bbp_spikes
159 | ```
160 | 
161 | ### Wrapper function demonstration
162 | A wrapper function was also designed, which is demonstrated below with the second wavelength (700 nm). The default option is for verbose to be True, which will provide an output of the different processing steps.
163 | 
164 | 
165 | ```python
166 | bbp_baseline, bbp_spikes = gt.calc_backscatter(
167 |     bb700, temp_qc, salt_qc, dives, depth, 700, 49, 3.217e-5,
168 |     spike_window=11, spike_method='minmax', iqr=2., profiles_ref_depth=300,
169 |     deep_multiplier=1, deep_method='median', verbose=True)
170 | 
171 | dat['bbp700'] = bbp_baseline
172 | dat['bbp700_spikes'] = bbp_spikes
173 | 
174 | ax = gt.plot(x, y, dat.bbp700, cmap=cmo.delta),
175 | 
176 | [a.set_xlim(200, 340) for a in ax]
177 | 
178 | plt.show()
179 | ```
180 | 
181 | 
182 |     ==================================================
183 |     bb700:
184 |     	Removing outliers with IQR * 2.0: 8606 obs
185 |     	Mask bad profiles based on deep values (depth=300m)
186 |     	Number of bad profiles = 27/672
187 |     	Zhang et al. (2009) correction
188 |     	Dark count correction
189 |     	Spike identification (spike window=11)
190 | 
191 | 
192 | 
193 | ![png](img/output_55_1.png)
194 | 
195 | 
196 | 
197 | ```python
198 | bbp_baseline, bbp_spikes = gt.calc_backscatter(
199 |     bb470, temp_qc, salt_qc, dives, depth, 470, 47, 1.569e-5,
200 |     spike_window=7, spike_method='minmax', iqr=3, profiles_ref_depth=300,
201 |     deep_multiplier=1, deep_method='median', verbose=True)
202 | 
203 | dat['bbp470'] = bbp_baseline
204 | dat['bbp470_spikes'] = bbp_spikes
205 | 
206 | gt.plot(x, y, dat.bbp470, cmap=cmo.delta)
207 | plt.show()
208 | ```
209 | 
210 | 
211 |     ==================================================
212 |     bb470:
213 |     	Removing outliers with IQR * 3: 2474 obs
214 |     	Mask bad profiles based on deep values (depth=300m)
215 |     	Number of bad profiles = 16/672
216 |     	Zhang et al. (2009) correction
217 |     	Dark count correction
218 |     	Spike identification (spike window=7)
219 | 
220 | 
221 | 
222 | ![png](img/output_56_1.png)
223 | 
224 | 
225 | ## PAR
226 | 
227 | ### PAR Scaling
228 | 
229 | This function uses the factory calibration to convert from $\mu$V to $\mu$E m$^{-2}$ s$^{-1}$.
230 | 
231 | 
232 | ```python
233 | par_scaled = gt.optics.par_scaling(par, 6.202e-4, 10.8)
234 | 
235 | fig, ax = plt.subplots(2, 1, figsize=[9, 6], sharex=True, dpi=90)
236 | 
237 | gt.plot(x, y, par, cmap=cmo.solar, ax=ax[0], robust=True)
238 | gt.plot(x, y, par_scaled, cmap=cmo.solar, ax=ax[1], robust=True)
239 | 
240 | [a.set_xlabel('') for a in ax]
241 | [a.set_xlim(200, 340) for a in ax]
242 | [a.set_ylim(70, 0) for a in ax]
243 | 
244 | ax[0].set_title('PAR ($\mu$V)')
245 | ax[1].set_title('PAR ($\mu$E m$^{-2}$ m$^{-1}$)')
246 | 
247 | plt.show()
248 | ```
249 | 
250 | 
251 | ![png](img/output_59_0.png)
252 | 
253 | 
254 | ### Correcting for an in situ dark count
255 | 
256 | Sensor drift from factory calibration requires an additional correction, the calculation of a dark count in situ. This is calculated from the median of PAR measurements, with additional masking applied for values before 23:01 and outside the 90th percentile.
257 | 
258 | 
259 | ```python
260 | par_dark = gt.optics.par_dark_count(par_scaled, dives, depth, time)
261 | 
262 | gt.plot(x, y, par_dark, robust=True, cmap=cmo.solar)
263 | xlim(200,340)
264 | ylim(70,0)
265 | title('PAR ($\mu$E m$^{-2}$ m$^{-1}$)')
266 | show()
267 | ```
268 | 
269 | 
270 | ![png](img/output_61_0.png)
271 | 
272 | 
273 | ### PAR replacement
274 | 
275 | This function removes the top 5 metres from each dive profile, and then algebraically recalculates the surface PAR using an exponential equation.
276 | 
277 | 
278 | ```python
279 | par_filled = gt.optics.par_fill_surface(par_dark, dives, depth, max_curve_depth=80)
280 | par_filled[par_filled < 0] = 0
281 | par_filled = par_filled.fillna(0)
282 | ```
283 | 
284 | 
285 | ```python
286 | i = dives == 232
287 | 
288 | fig, ax = subplots(1, 2, figsize=[6,6], dpi=100)
289 | 
290 | ax[0].plot(par_dark[i], depth[i], lw=0.5, marker='o', ms=5)
291 | ax[0].plot(par_filled[i], depth[i], lw=0.5, marker='o', ms=3)
292 | ax[1].plot(par_filled[i] - par_dark[i], depth[i], lw=0, marker='o')
293 | 
294 | ax[0].set_ylim(80,0)
295 | ax[0].set_ylabel('Depth (m)')
296 | ax[0].set_xlabel('PAR ($\mu$E m$^{-2}$ m$^{-1}$)')
297 | 
298 | ax[1].set_ylim(80,0)
299 | ax[1].set_xlim(-350,350)
300 | ax[1].set_yticklabels('')
301 | ax[1].set_xlabel('Difference between profiles')
302 | 
303 | fig.tight_layout()
304 | plt.show()
305 | ```
306 | 
307 | 
308 | ![png](img/output_64_0.png)
309 | 
310 | 
311 | 
312 | ```python
313 | gt.plot(x, y, par_filled, robust=True, cmap=cmo.solar)
314 | xlim(200,340)
315 | ylim(100,0)
316 | title('PAR ($\mu$E m$^{-2}$ m$^{-1}$)')
317 | show()
318 | ```
319 | 
320 | 
321 | ![png](img/output_65_0.png)
322 | 
323 | 
324 | ### Wrapper function demonstration
325 | 
326 | 
327 | ```python
328 | par_qc = gt.calc_par(par, dives, depth, time,
329 |                      6.202e-4, 10.8,
330 |                      curve_max_depth=80,
331 |                      verbose=True).fillna(0)
332 | 
333 | gt.plot(x, y, par_qc, robust=True, cmap=cmo.solar)
334 | ylim(80, 0)
335 | show()
336 | ```
337 | 
338 | 
339 |     ==================================================
340 |     PAR
341 |     	Dark correction
342 |     	Fitting exponential curve to data
343 | 
344 | 
345 | 
346 | ![png](img/output_67_1.png)
347 | 
348 | 
349 | ### Deriving additional variables
350 | 
351 | #### Euphotic Depth and Light attenuation coefficient
352 | 
353 | 
354 | ```python
355 | euphotic_depth, kd = gt.optics.photic_depth(
356 |     par_filled, dives, depth,
357 |     return_mask=False,
358 |     ref_percentage=1
359 | )
360 | ```
361 | 
362 | 
363 | ```python
364 | fig, ax = subplots(1, 1, figsize=[6,4], dpi=100)
365 | p1 = plot(euphotic_depth.index, euphotic_depth, label='Euphotic Depth')
366 | ylim(120,0)
367 | ylabel('Euphotic Depth (m)')
368 | xlabel('Dives')
369 | ax2 = ax.twinx()
370 | p2 = plot(kd.index, kd, color='orange', lw=0, marker='o', ms=2, label='K$_d$')
371 | ylabel('K$_d$', rotation=270, labelpad=20)
372 | 
373 | lns = p1+p2
374 | labs = [l.get_label() for l in lns]
375 | ax2.legend(lns, labs, loc=3, numpoints=1)
376 | 
377 | show()
378 | ```
379 | 
380 | 
381 | ![png](img/output_71_0.png)
382 | 
383 | 
384 | ## Fluorescence
385 | 
386 | Quenching Correcting Method as outlined in Thomalla et al. (2017)
387 | 
388 | 
389 | ```python
390 | gt.plot(x, y, fluor, cmap=cmo.delta, robust=True)
391 | xlim(150,300)
392 | title('Original Data')
393 | show()
394 | ```
395 | 
396 | 
397 | ![png](img/output_74_0.png)
398 | 
399 | 
400 | ### Outlier bounds method
401 | 
402 | 
403 | ```python
404 | flr_iqr = gt.cleaning.outlier_bounds_iqr(fluor, multiplier=3)
405 | 
406 | gt.plot(x, y, flr_iqr, cmap=cmo.delta, robust=True)
407 | title('Outlier Bounds IQR Method')
408 | xlim(150,300)
409 | show()
410 | ```
411 | 
412 | 
413 | ![png](img/output_76_0.png)
414 | 
415 | 
416 | ### Removing bad profiles
417 | 
418 | This function masks bad dives based on mean + std x [3] or median + std x [3] at a reference depth.
419 | 
420 | 
421 | ```python
422 | bad_profiles = gt.optics.find_bad_profiles(dives, depth, flr_iqr,
423 |                                            ref_depth=300,
424 |                                            stdev_multiplier=4,
425 |                                            method='mean')
426 | flr_goodprof = flr_iqr.where(~bad_profiles[0])
427 | 
428 | fig, ax = plt.subplots(2, 1, figsize=[9, 6], sharex=True, dpi=90)
429 | 
430 | gt.plot(x, y, flr_iqr, cmap=cmo.delta, ax=ax[0], robust=True)
431 | gt.plot(x, y, flr_goodprof, cmap=cmo.delta, ax=ax[1], robust=True)
432 | 
433 | [a.set_xlabel('') for a in ax]
434 | [a.set_xlim(90, 150) for a in ax]
435 | [a.set_ylim(300, 0) for a in ax]
436 | 
437 | ax[0].set_title('Bad Profiles Included')
438 | ax[1].set_title('Bad Profiles Discarded')
439 | 
440 | plt.show()
441 | ```
442 | 
443 | 
444 | ![png](img/output_78_0.png)
445 | 
446 | 
447 | ### Correcting for an in situ dark count
448 | 
449 | Sensor drift from factory calibration requires an additional correction, the calculation of a dark count in situ. This is calculated from the 95th percentile of fluorescence measurements between 300 and 400m.
450 | 
451 | 
452 | ```python
453 | flr_dark = gt.optics.fluorescence_dark_count(flr_iqr, dat.depth)
454 | 
455 | gt.plot(x, y, flr_dark, cmap=cmo.delta, robust=True)
456 | xlim(150,300)
457 | show()
458 | ```
459 | 
460 | 
461 | ![png](img/output_80_0.png)
462 | 
463 | 
464 | ### Despiking
465 | 
466 | 
467 | ```python
468 | flr_base, flr_spikes = gt.cleaning.despike(flr_dark, 11, spike_method='median')
469 | 
470 | fig, ax = plt.subplots(2, 1, figsize=[9, 6], sharex=True, dpi=90)
471 | 
472 | gt.plot(x, y, flr_base, cmap=cmo.delta, ax=ax[0], robust=True)
473 | gt.plot(x, y, flr_spikes, cmap=cm.RdBu_r, ax=ax[1], vmin=-5, vmax=5)
474 | 
475 | [a.set_xlabel('') for a in ax]
476 | [a.set_xlim(150, 300) for a in ax]
477 | [a.set_ylim(300, 0) for a in ax]
478 | 
479 | ax[0].set_title('Despiked Fluorescence')
480 | ax[1].set_title('Fluorescence spikes')
481 | 
482 | plt.show()
483 | ```
484 | 
485 | 
486 | ![png](img/output_82_0.png)
487 | 
488 | 
489 | ### Quenching Correction
490 | 
491 | This function uses the method outlined in Thomalla et al. (2017), briefly it calculates the quenching depth and performs the quenching correction based on the fluorescence to backscatter ratio. The quenching depth is calculated based upon the different between night and daytime fluorescence.
492 | 
493 | The default setting is for the preceding night to be used to correct the following day's quenching (`night_day_group=True`). This can be changed so that the following night is used to correct the preceding day. The quenching depth is then found from the difference between the night and daytime fluorescence, using the steepest gradient of the {5 minimum differences and the points the difference changes sign (+ve/-ve)}.
494 | 
495 | The function gets the backscatter/fluorescence ratio between from the quenching depth to the surface, and then calculates a mean nighttime ratio for each night. The quenching ratio is calculated from the nighttime ratio and the daytime ratio, which is then applied to fluorescence to correct for quenching. If the corrected value is less than raw, then the function will return the original raw data.
496 | 
497 | 
498 | ```python
499 | flr_qc, quench_layer = gt.optics.quenching_correction(
500 |     flr_base, dat.bbp470, dives, depth, time, lats, lons,
501 |     sunrise_sunset_offset=1, night_day_group=True)
502 | 
503 | fig, ax = plt.subplots(2, 1, figsize=[9, 6], sharex=True, dpi=90)
504 | 
505 | gt.plot(x, y, flr_qc, cmap=cmo.delta, ax=ax[0], robust=True)
506 | gt.plot(x, y, quench_layer, cmap=cm.RdBu_r, ax=ax[1], vmin=-.5, vmax=2)
507 | 
508 | [a.set_xlabel('') for a in ax]
509 | [a.set_xlim(150, 300) for a in ax]
510 | [a.set_ylim(100, 0) for a in ax]
511 | 
512 | ax[0].set_title('Quenching Corrected Fluorescence')
513 | ax[1].set_title('Quenching Layer')
514 | 
515 | plt.show()
516 | ```
517 | 
518 | 
519 | ![png](img/output_84_0.png)
520 | 
521 | 
522 | ### Wrapper function
523 | 
524 | 
525 | ```python
526 | flr_qnch, flr, qnch_layer, [fig1, fig2] = gt.calc_fluorescence(
527 |     fluor, dat.bbp700, dives, depth, time, lats, lons, 53, 0.0121,
528 |     profiles_ref_depth=300, deep_method='mean', deep_multiplier=1,
529 |     spike_window=11, spike_method='median', return_figure=True,
530 |     night_day_group=False, sunrise_sunset_offset=2, verbose=True)
531 | 
532 | dat['flr_qc'] = flr
533 | ```
534 | 
535 | 
536 |     ==================================================
537 |     Fluorescence
538 |     	Mask bad profiles based on deep values (ref depth=300m)
539 |     	Number of bad profiles = 19/672
540 |     	Dark count correction
541 |     	Quenching correction
542 |     	Spike identification (spike window=11)
543 |     	Generating figures for despiking and quenching report
544 | 
545 | 
546 | 
547 | ![png](img/output_86_1.png)
548 | 
549 | 
550 | 
551 | ![png](img/output_86_2.png)
552 | 


--------------------------------------------------------------------------------
/docs/other.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Other tools and utilities
 3 | 
 4 | ## 3D interactive plot
 5 | 
 6 | This is purely for investigative purposes, but provides a good way to interact with the data.
 7 | 
 8 | 
 9 | ```python
10 | plotly_figure = gt.plot.section3D(
11 |     dat.dives, dat.depth, dat.longitude, dat.latitude, dat.salt_qc,
12 |     zmin=-500, vmax=.999, vmin=.005
13 | )
14 | ```
15 | ![png](img/interactive_plot.png)
16 | 


--------------------------------------------------------------------------------
/docs/package_structure.md:
--------------------------------------------------------------------------------
1 | Package Structure
2 | =================
3 | ![Structure image](img/package_structure.png)
4 | 


--------------------------------------------------------------------------------
/docs/physics.md:
--------------------------------------------------------------------------------
 1 | # Secondary physical variables
 2 | 
 3 | ## Density
 4 | GliderTools provides a wrapper to calculate potential density.
 5 | This is done by first calculating potential temperature and then calculating absolute salinity.
 6 | A reference depth of `0` is used by default
 7 | 
 8 | 
 9 | ```python
10 | dens0 = gt.physics.potential_density(salt_qc, temp_qc, pres, lats, lons)
11 | dat['density'] = dens0
12 | gt.plot(dat.dives, dat.depth, dens0, cmap=cmo.dense)
13 | plt.xlim(50,150)
14 | plt.show()
15 | ```
16 | 
17 | 
18 | ![png](img/output_36_0.png)
19 | 
20 | 
21 | ## Mixed Layer Depth
22 | 
23 | 
24 | ```python
25 | import matplotlib.pyplot as plt
26 | mld = gt.physics.mixed_layer_depth(ds, 'density', verbose=False)
27 | mld_smoothed = mld.rolling(10, min_periods=3).mean()
28 | 
29 | mld_mask = gt.utils.mask_below_depth(ds, mld)
30 | mld_grid = gt.grid_data(ds.dives, ds.depth, mld_mask, verbose=False)
31 | 
32 | fig, ax = plt.subplots(1, 2, figsize=[9, 3], dpi=100, sharey=True)
33 | 
34 | mld_smoothed.plot(ax=ax[0])
35 | gt.plot(mld_grid, ax=ax[1])
36 | 
37 | [a.set_ylim(100, 0) for a in ax]
38 | 
39 | ax[0].set_ylabel('Depth (m)')
40 | [a.set_xlabel('Dives') for a in ax]
41 | plt.xticks(rotation=0)
42 | 
43 | fig.tight_layout()
44 | ```
45 | 
46 |     /Users/luke/Git/GliderTools/glidertools/helpers.py:61: GliderToolsWarning:
47 | 
48 |     Primary input variable is not xr.DataArray data type - no metadata to pass on.
49 | 
50 | 
51 | 
52 | 
53 | ![png](img/output_38_1.png)
54 | 


--------------------------------------------------------------------------------
/docs/quality_control.md:
--------------------------------------------------------------------------------
  1 | # Quality Control
  2 | Note that this summary carries on from the _Loading data_ page.
  3 | 
  4 | The `cleaning` module contains several tools that help to remove erroneous data - profiles or points.
  5 | These filters can be applied *globally* (IQR and standard devation limits), *vertically* (running average filters) or *horizontally* (horizontal filters on gridded data only).
  6 | 
  7 | There are also two approaches one can use to clean data: 1) filtering out bad points/dives; 2) smoothing data.
  8 | 
  9 | 
 10 | ## Original Data
 11 | 
 12 | Below we use **salinity** to demonstrate the different functions available to users.
 13 | 
 14 | ```python
 15 | dives = dat.dives
 16 | depth = dat.depth
 17 | salt = dat.salinity_raw
 18 | 
 19 | x = np.array(dives)  # ensures these are arrays
 20 | y = np.array(depth)
 21 | 
 22 | gt.plot(dives, depth, salt, cmap=cmo.haline, robust=True)
 23 | plt.xlim(50, 150)
 24 | plt.title('Original Data')
 25 | plt.show()
 26 | ```
 27 | 
 28 | ![png](img/output_14_0.png)
 29 | 
 30 | ## Global filtering: outlier limits (IQR & STD)
 31 | These functions find upper and lower limits for data outliers using interquartile range and standard deviations of the entire dataset. Multipliers can be set to make the filters more or less strict
 32 | 
 33 | 
 34 | ```python
 35 | salt_iqr = gt.cleaning.outlier_bounds_iqr(salt, multiplier=1.5)
 36 | salt_std = gt.cleaning.outlier_bounds_std(salt, multiplier=1.5)
 37 | 
 38 | # Plotting
 39 | gt.plot(x, y, salt_iqr, cmap=cmo.haline, robust=True)
 40 | plt.title('Outlier Bounds IQR Method')
 41 | plt.xlim(50,150)
 42 | 
 43 | gt.plot(x, y, salt_std, cmap=cmo.haline, robust=True)
 44 | plt.title('Outlier Bounds Stdev Method')
 45 | plt.xlim(50,150)
 46 | 
 47 | plt.show()
 48 | ```
 49 | 
 50 | ![png](img/output_16_0.png)
 51 | ![png](img/output_16_1.png)
 52 | 
 53 | 
 54 | ## Horizontal filtering: differential outliers
 55 | Erroneous measurements often occur sequentially - i.e. in the vertical. The vertical filtering approaches would thus miss any outliers as rolling windows are often used. It is thus useful to have an approach that compares dives in the horizontal. The `horizontal_diff_outliers` first grids data and then calculates where gradients (rolling mean - measurement) are outliers (same as `outlier_bounds_std`). If a certain fraction of measurements in a dive exceed the threshold, then that dive is deemed a bad dive. The example below shows three dives that have anomalous measurements.  These fall well within the global bounds of acceptable data, but horizontally that are masked out.
 56 | 
 57 | 
 58 | ```python
 59 | salt_horz = gt.cleaning.horizontal_diff_outliers(
 60 |     x, y, salt,
 61 |     multiplier=3,
 62 |     depth_threshold=400,
 63 |     mask_frac=0.1
 64 | )
 65 | 
 66 | gt.plot(x, y, salt, cmap=cmo.haline)
 67 | plt.title('Original dataset')
 68 | plt.xlim(150,250)
 69 | plt.show()
 70 | 
 71 | gt.plot(x, y, salt_horz, cmap=cmo.haline)
 72 | plt.title('Horizontal Differential Outliers removed')
 73 | plt.xlim(150,250)
 74 | plt.show()
 75 | ```
 76 | 
 77 | ![png](img/output_19_0.png)
 78 | ![png](img/output_19_1.png)
 79 | 
 80 | ## Vertical smoothing approaches
 81 | 
 82 | ### Despiking
 83 | This approach was used by Briggs et al. (2010). The idea is to apply a rolling filter to the data (along the time dimension). This forms the baseline. The difference from the original data are spikes.
 84 | 
 85 | There are two rolling filters that can be applied to the data. The *median* approach is the equivalent of a rolling median. The *minmax* approach first applies a rolling minimum and then rolling maximum to data. This is useful particularly for optics data where spikes are particles in the water column and are not normally distributed.
 86 | 
 87 | In the case of salinity, the *median* approach is likely best, as "spikes" would be positive and negative (Gaussian distribution).
 88 | 
 89 | 
 90 | ```python
 91 | salt_base, salt_spike = gt.cleaning.despike(salt, window_size=5, spike_method='median')
 92 | 
 93 | fig, ax = plt.subplots(2, 1, figsize=[9, 6], sharex=True, dpi=90)
 94 | 
 95 | gt.plot(x, y, salt_base, cmap=cmo.haline, ax=ax[0])
 96 | ax[0].set_title('Despiked using median filter')
 97 | ax[0].cb.set_label('Salinity baseline')
 98 | ax[0].set_xlim(50,150)
 99 | ax[0].set_xlabel('')
100 | 
101 | gt.plot(x, y, salt_spike, cmap=cm.RdBu_r, vmin=-6e-3, vmax=6e-3, ax=ax[1])
102 | ax[1].cb.set_label('Salinity spikes')
103 | ax[1].set_xlim(50,150)
104 | 
105 | plt.xticks(rotation=0)
106 | plt.show()
107 | ```
108 | ![png](img/output_22_0.png)
109 | 
110 | 
111 | ### Rolling window
112 | 
113 | The rolling window method simply applies an aggregating function (`mean, median, std, min, max`) to the dataset.
114 | Because the above example is equivalent to a rolling median, we show what a rolling `75th percentile` looks like instead.
115 | 
116 | This could be used to create additional filters by users. Note that in this more complex example we create a wrapper function for the percentile so that we can tell the percentile function that we want the 75th percentile and we want to calculate this along the nth axis.
117 | 
118 | 
119 | ```python
120 | def seventyfith(x, axis=0):
121 |     # wrapper function so we can pass axis and percentile to
122 |     # the input function
123 |     return np.percentile(x, 75, axis=axis)
124 | 
125 | # other numpy functions also work: np.mean, np.median, np.std
126 | salt_roll75 = gt.cleaning.rolling_window(salt, seventyfith, window=5)
127 | salt_rollavg = gt.cleaning.rolling_window(salt, mean, window=5)
128 | 
129 | # PLOTTING
130 | fig, ax = plt.subplots(2, 1, figsize=[9, 6], sharex=True, dpi=90)
131 | 
132 | gt.plot(x, y, salt_roll75, cmap=cmo.haline, ax=ax[0])
133 | ax[0].set_title('75$^{th}$ for a rolling window with size 5')
134 | ax[0].cb.set_label('Salinity baseline')
135 | ax[0].set_xlim(50,150)
136 | ax[0].set_xlabel('')
137 | 
138 | gt.plot(x, y, salt_roll75 - salt, cmap=cm.RdBu_r, vmin=-6e-3, vmax=6e-3, ax=ax[1])
139 | ax[1].cb.set_label('Difference from original data')
140 | ax[1].set_xlim(50,150)
141 | 
142 | plt.xticks(rotation=0)
143 | plt.show()
144 | ```
145 | ![png](img/output_24_0.png)
146 | 
147 | 
148 | ### Savitzky-Golay
149 | The Savitzky-Golay function fits a low order polynomial to a rolling window of the time series. This has the result of smoothing the data. A larger window with a lower order polynomial with have a smoother fit.
150 | 
151 | We recommend a 2nd order kernel. Here we use first order to show that the difference can be quite big.
152 | 
153 | 
154 | ```python
155 | salt_savgol = gt.cleaning.savitzky_golay(salt, window_size=11, order=1)
156 | 
157 | # PLOTTING
158 | fig, ax = plt.subplots(2, 1, figsize=[9, 6], sharex=True, dpi=90)
159 | 
160 | gt.plot(x, y, salt_savgol, cmap=cmo.haline, ax=ax[0])
161 | ax[0].set_title('Smoothing the data with Savitzky-Golay')
162 | ax[0].cb.set_label('Smoothed salinity')
163 | ax[0].set_xlim(50,150)
164 | ax[0].set_xlabel('')
165 | 
166 | gt.plot(x, y, salt_savgol - salt, cmap=cm.RdBu, vmin=-6e-3, vmax=6e-3, ax=ax[1])
167 | ax[1].cb.set_label('Difference from original')
168 | ax[1].set_xlim(50,150)
169 | 
170 | plt.show()
171 | ```
172 | 
173 | 
174 | ![png](img/output_26_0.png)
175 | 
176 | ## Wrapper functions
177 | 
178 | Wrapper functions have been designed to make this process more efficient, which is demonstrated below with **temperature** and **salinity**.
179 | 
180 | 
181 | ```python
182 | temp_qc = gt.calc_physics(temp, x, y,
183 |                           iqr=1.5, depth_threshold=0,
184 |                           spike_window=5, spike_method='median',
185 |                           savitzky_golay_window=11, savitzky_golay_order=2)
186 | 
187 | # PLOTTING
188 | fig, ax = plt.subplots(3, 1, figsize=[9, 8.5], sharex=True, dpi=90)
189 | 
190 | gt.plot(x, y, temp, cmap=cmo.thermal, ax=ax[0])
191 | gt.plot(x, y, temp_qc, cmap=cmo.thermal, ax=ax[1])
192 | gt.plot(x, y, temp_qc - temp, cmap=cm.RdBu_r, vmin=-0.05, vmax=0.05, ax=ax[2])
193 | 
194 | [a.set_xlabel('') for a in ax]
195 | 
196 | ax[0].cb.set_label('Original Data')
197 | ax[1].cb.set_label('Cleaned Data')
198 | ax[2].cb.set_label('Difference from Original')
199 | 
200 | plt.show()
201 | ```
202 | 
203 | 
204 |     ==================================================
205 |     Physics Variable:
206 |         Removing outliers with IQR * 1.5: 0 obs
207 |         Removing spikes with rolling median (spike window=5)
208 |         Smoothing with Savitzky-Golay filter (window=11, order=2)
209 | 
210 | 
211 | 
212 | ![png](img/output_28_1.png)
213 | 
214 | 
215 | 
216 | ```python
217 | salt_qc = gt.calc_physics(salt, x, y,
218 |                           mask_frac=0.2, iqr=2.5,
219 |                           spike_window=5, spike_method='median',
220 |                           savitzky_golay_window=11, savitzky_golay_order=2)
221 | 
222 | # PLOTTING
223 | fig, ax = plt.subplots(3, 1, figsize=[9, 8.5], sharex=True, dpi=90)
224 | 
225 | gt.plot(x, y, salt, cmap=cmo.haline, ax=ax[0])
226 | gt.plot(x, y, salt_qc, cmap=cmo.haline, ax=ax[1])
227 | gt.plot(x, y, salt_qc - salt, cmap=cm.RdBu_r, vmin=-0.02, vmax=0.02, ax=ax[2])
228 | 
229 | [a.set_xlabel('') for a in ax]
230 | 
231 | ax[0].cb.set_label('Original Data')
232 | ax[1].cb.set_label('Cleaned Data')
233 | ax[2].cb.set_label('Difference from Original')
234 | 
235 | plt.show()
236 | ```
237 | 
238 | 
239 |     ==================================================
240 |     Physics Variable:
241 |         Removing outliers with IQR * 2.5: 1551 obs
242 |         Removing spikes with rolling median (spike window=5)
243 |         Removing horizontal outliers (fraction=0.2, multiplier=2.5)
244 |         Smoothing with Savitzky-Golay filter (window=11, order=2)
245 | 
246 | 
247 | 
248 | ![png](img/output_29_1.png)
249 | 
250 | 
251 | ```python
252 | dat['temp_qc'] = temp
253 | dat['salt_qc'] = salt
254 | ```
255 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | glidertools
2 | docutils
3 | recommonmark
4 | sphinx-rtd-theme
5 | 


--------------------------------------------------------------------------------
/docs/saving.md:
--------------------------------------------------------------------------------
 1 | Saving data
 2 | ===========
 3 | 
 4 | We have not created an explicit way to save data in GliderTools. This is primarily due to the fact that the package is built on top of two packages that already do this very well: [*pandas*](https://pandas.pydata.org/pandas-docs/stable/getting_started/overview.html) and [*xarray*](http://xarray.pydata.org/en/stable/).
 5 | *pandas* is widely used and deals with tabular formatted data (2D). *xarray* widely used in earth sciences as it supports multi-dimensional indexing (3D+). We highly recommend that you read through the documentation for these packages as they are incredibly powerful and you would benefit from knowing these tools regardless of using GliderTools or not!
 6 | 
 7 | We have written GliderTools primarily with *xarray* as the backend, due to the ability to store attributes (or metadata) alongside the data - something that *pandas* does not yet do. Moreover, we have also created the tool so that metadata is passed to the output of each function, while appending the function call to the *history* attribute. This ensures that the user of the data knows when and what functions (and arguements) were called and for which version of GliderTools this was done.
 8 | 
 9 | Examples
10 | --------
11 | 
12 | First we give an example of how to save and read files to netCDF (which we recommend).
13 | 
14 | ```python
15 | import xarray as xr
16 | 
17 | # xds is an xarray.DataFrame with record of dimensions, coordinates and variables
18 | xds.to_netcdf('data_with_meta.nc')
19 | 
20 | # this file can simply be loaded in the same way, without using GliderTools
21 | # all the information that was attached to the data is still in the netCDF
22 | xds = xr.open_dataset('data_with_meta.nc')
23 | ```
24 | 
25 | In this second example we show how to save the data to a CSV. While this is a common and widely used format, we do not recommend this as the go to format, as all metadata is lost when the file is saved.
26 | ```python
27 | import pandas as pd
28 | 
29 | # If you prefer to save your data as a text file, you can easily do this with Pandas
30 | # note that converting the file to a dataframe discards all the metadata
31 | df = xds.to_dataframe()
32 | df.to_csv('data_without_meta.csv')
33 | 
34 | # this file can simply be loaded in the same way, without using GliderTools
35 | # there will be no more metadata attached to each variable
36 | df = pd.read_csv('data_without_meta.csv')
37 | 
38 | # finally, you can also convert the file back to an xarray.Dataset
39 | # however, the data will still be lost
40 | xds = df.to_xarray()
41 | ```
42 | 


--------------------------------------------------------------------------------
/docs/static/css/custom.css:
--------------------------------------------------------------------------------
 1 | div#cheat-sheet.section h1{
 2 | 	font-size: 0px;
 3 | }
 4 | 
 5 | .highlight{
 6 |     background-color: #efefef;
 7 |     border-color: #c9c9c9;
 8 |     border-width: 1px;
 9 |     border-style: solid;
10 | }
11 | 


--------------------------------------------------------------------------------
/docs/whats-new.rst:
--------------------------------------------------------------------------------
  1 | .. currentmodule:: glidertools
  2 | 
  3 | What's New
  4 | ===========
  5 | 
  6 | .. Template (do not remove)
  7 |     ------------------------
  8 | 
  9 |     Breaking changes
 10 |     ~~~~~~~~~~~~~~~~
 11 |     Description. (:pull:`ii`, :issue:`ii`). By `Name <https://github.com/github_username>`_.
 12 | 
 13 |     New Features
 14 |     ~~~~~~~~~~~~
 15 | 
 16 |     Documentation
 17 |     ~~~~~~~~~~~~~
 18 | 
 19 |     Internal Changes
 20 |     ~~~~~~~~~~~~~~~~
 21 | 
 22 |     Bug fixes
 23 |     ~~~~~~~~~
 24 |     - Dark count corrections for optical sensors(:pull:'110'). By 'Isabelle Giddy <https://github.com/isgiddy>'_.
 25 | 
 26 | 
 27 | v2023.07.25 (2023/07/25)
 28 | ------------------------
 29 | 
 30 | .. _whats-new.2023.07.25:
 31 | 
 32 | New Features
 33 | ~~~~~~~~~~~~
 34 | - added import for VOTO seaexplorer data (:pull:`170`) By `Martin Mohrmann <https://github.com/MartinMohrmann>`_.
 35 | - added versatile, depth dependent masking (:pull:`172`) and per profile grouping (:pull:`175`). By `Martin Mohrmann <https://github.com/MartinMohrmann>`_.
 36 | - add concatenation of two or more datasets (:pull:`173`), even with different set of variables (:pull:`183`). By `Martin Mohrmann <https://github.com/MartinMohrmann>`_.
 37 | 
 38 | Breaking changes
 39 | ~~~~~~~~~~~~~~~~
 40 | - Changed the behavior of `find_dive_phase` and `calc_dive_number` to use a smaller depth threshold when determining a valid dive (15 dbar down from 200 dbar).  this is also now adjusteable. (:pull:`134`) By `Tom Hull <https://github.com/tomhull>`_.
 41 | - GliderTools defaults for Figure creation were changed. Automatic application of plt.tight_layout was dropped in favour of more flexible embedding of GliderTools plots into existing layouts/subplots. (:pull:`185`). By `Martin Mohrmann <https://github.com/MartinMohrmann>`_.
 42 | - The mixed layer depth algorithm was corrected. (:pull:`169`, :issue:`168`). By `Martin Mohrmann <https://github.com/MartinMohrmann>`_. API change! Existing mixed layer computation code must be adapted.
 43 | 
 44 | Internal changes
 45 | ~~~~~~~~~~~~~~~~
 46 | - Removed outdated python-seawater dependency (:pull:`186`). By `Callum Rollo <https://github.com/callumrollo>`_.
 47 | - Update documentation of required dependencies (:pull:`174`). By `Sören Thomsen <https://github.com/soerenthomsen>`_.
 48 | - Some cleanup of old python2 dependencies (:pull:`166`). By `Martin Mohrmann <https://github.com/MartinMohrmann>`_.
 49 | - Replace deprecated pkg_resources with importlib.metadata (:pull:`187`). By `Martin Mohrmann <https://github.com/MartinMohrmann>`_.
 50 | - Add release guide to documentation (:pull:`186`). By `Martin Mohrmann <https://github.com/MartinMohrmann>`_.
 51 | - Cleanup of unused imports (:pull:`174`). By `Martin Mohrmann <https://github.com/MartinMohrmann>`_.
 52 | 
 53 | Bug fixes
 54 | ~~~~~~~~~
 55 | - Adapt demo notebook to updated Glider Tools (:pull:`179`). By `Callum Rollo <https://github.com/callumrollo>`_.
 56 | - Fix netCDF attribute handling for non-string attributes (:pull:`194`). By `Martin Mohrmann <https://github.com/MartinMohrmann>`_.
 57 | - Adapt quenching_report to modern numpy versions (:pull:`191`) By `Martin Mohrmann <https://github.com/MartinMohrmann>`_.
 58 | - Improve error handling for MLD computation (:pull:`190`). By `Martin Mohrmann <https://github.com/MartinMohrmann>`_.
 59 | 
 60 | Thanks also to `Julius Busecke <https://github.com/jbusecke>`_ for help with the github CI, `Sam Woodman <https://github.com/smwoodman>`_ for detailed bug reports and everyone else who has contributed.
 61 | 
 62 | 
 63 | v2022.12.13 (2022/12/13)
 64 | ------------------------
 65 | 
 66 | .. _whats-new.2022.12.13:
 67 | 
 68 | Internal changes
 69 | ~~~~~~~~~~~~~~~~
 70 | - Refactoring and update of testing and development framework, update of flake, black and almost all python dependencies
 71 | 
 72 | 
 73 | Breaking changes
 74 | ~~~~~~~~~~~~~~~~
 75 | - Fixed processing/calc_oxygen (:pull: `116`, :issue: `112`) By `Callum Rollo <https://github.com/callumrollo>`_.
 76 | 
 77 | 
 78 | Internal Changes
 79 | ~~~~~~~~~~~~~~~~
 80 | - Implemented code linting as part of the CI (:pull:`100`) By `Julius Busecke <https://github.com/jbusecke>`_.
 81 | 
 82 | Documentation
 83 | ~~~~~~~~~~~~~
 84 | - Added conda installation instructions + badge. (:pull:`94`) By `Julius Busecke <https://github.com/jbusecke>`_.
 85 | 
 86 | Bug fixes
 87 | ~~~~~~~~~
 88 | - Replaced `skyfield` dependency with `astral`, fixing sunrise/sunset problems at high latitudes.  By `Isabelle Sindiswa Giddy <https://github.com/isgiddy>`_.
 89 | 
 90 | v2021.03 (2021/3/30)
 91 | -------------------------
 92 | 
 93 | .. _whats-new.2021.03:
 94 | 
 95 | Documentation
 96 | ~~~~~~~~~~~~~
 97 | - Updated contributor guide for conda based workflow. (:pull:`81`) By `Julius Busecke <https://github.com/jbusecke>`_.
 98 | 
 99 | Internal Changes
100 | ~~~~~~~~~~~~~~~~
101 | - Migration of CI to conda based workflow with multiple python versions. (:pull:`54`)  By `Julius Busecke <https://github.com/jbusecke>`_.
102 | - Revamp distribution actions. (:pull:`82`) By `Julius Busecke <https://github.com/jbusecke>`_.
103 | - Migrate from astral to skyfield (:pull:'121') By 'Isabelle Giddy <https://github.com/isgiddy>'_.
104 | 


--------------------------------------------------------------------------------
/docs/wishlist.md:
--------------------------------------------------------------------------------
 1 | Wishlist
 2 | ========
 3 | 
 4 | A list of things we'd love to add to GliderTools and the work involved.
 5 | 
 6 | 1. Support for raw files from Slocum gliders and Seagliders with the following additional functionality
 7 |     - Thermal lag correction for each of the gliders supported in the suggestion above.
 8 |     - Support for hadware modules by model and manufacturer
 9 | 2. Make final data output compatible with www.OceanGliders.org data format, https://www.oceangliders.org/taskteams/data-management/
10 | 


--------------------------------------------------------------------------------
/glidertools/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/glidertools/.DS_Store


--------------------------------------------------------------------------------
/glidertools/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import warnings as _warnings
 4 | 
 5 | from . import (  # NOQA
 6 |     calibration,
 7 |     cleaning,
 8 |     flo_functions,
 9 |     load,
10 |     mapping,
11 |     optics,
12 |     physics,
13 |     utils,
14 | )
15 | from .helpers import package_version
16 | from .mapping import grid_data, interp_obj
17 | from .plot import logo as make_logo
18 | from .plot import plot_functions as plot
19 | from .processing import *
20 | 
21 | 
22 | __version__ = package_version()
23 | _warnings.filterwarnings("ignore", category=RuntimeWarning)
24 | 


--------------------------------------------------------------------------------
/glidertools/calibration.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | from inspect import currentframe as getframe
  4 | 
  5 | import numpy as _np
  6 | 
  7 | from .helpers import transfer_nc_attrs
  8 | 
  9 | 
 10 | def bottle_matchup(
 11 |     gld_dives,
 12 |     gld_depth,
 13 |     gld_time,
 14 |     btl_depth,
 15 |     btl_time,
 16 |     btl_values,
 17 |     min_depth_diff_metres=5,
 18 |     min_time_diff_minutes=120,
 19 | ):
 20 |     """
 21 |     Performs a matchup between glider and bottle samples based on time and
 22 |     depth (or density).
 23 | 
 24 |     Parameters
 25 |     ----------
 26 |     gld_depth : np.array, dtype=float
 27 |         glider depth at time of measurement
 28 |     gld_dives : np.array, dtype=float
 29 |         dive index of the glider (given by glider toolbox)
 30 |     gld_time : np.array, dtype=datetime64
 31 |         glider time that will be used as primary indexing variable
 32 |     btl_time: np.array, dtype=datetime64
 33 |         in-situ bottle sample's time
 34 |     btl_depth : np.array, dtype=float
 35 |         depth of in-situ sample
 36 |     btl_values : np.array, dtype=float
 37 |         the value that will be interpolated onto the glider time and
 38 |         depth coordinates (time, depth/dens)
 39 |     min_depth_diff_metres : float, default=5
 40 |         the minimum allowable depth difference
 41 |     min_time_diff_minutes : float, default=120
 42 |         the minimum allowable time difference between bottles and glider
 43 | 
 44 |     Returns
 45 |     -------
 46 |     array : float
 47 |         Returns the bottle values in the format of the glider
 48 |         i.e. the length of the output will be the same as gld_*
 49 | 
 50 |     """
 51 |     from pandas import Series
 52 | 
 53 |     # metadata preservation
 54 |     var = gld_depth.copy()
 55 |     if isinstance(btl_values, Series):
 56 |         var_name = btl_values.name + "_bottle_matchups"
 57 |     else:
 58 |         var_name = "bottle_matchups"
 59 | 
 60 |     # make all input variables np.arrays
 61 |     args = gld_time, gld_depth, gld_dives, btl_time, btl_depth, btl_values
 62 |     gld_time, gld_depth, gld_dives, btl_time, btl_depth, btl_values = map(
 63 |         _np.array, args
 64 |     )
 65 | 
 66 |     # create a blank array that matches glider data
 67 |     # (placeholder for calibration bottle values)
 68 |     gld_cal = _np.ones_like(gld_depth) * _np.nan
 69 | 
 70 |     # loop through each ship based CTD station
 71 |     stations = _np.unique(btl_time)
 72 |     for c, t in enumerate(stations):
 73 |         # index of station from ship CTD
 74 |         btl_idx = t == btl_time
 75 |         # number of samples per station
 76 |         btl_num = btl_idx.sum()
 77 | 
 78 |         # string representation of station time
 79 |         t_str = str(t.astype("datetime64[m]")).replace("T", " ")
 80 |         t_dif = abs(gld_time - t).astype("timedelta64[m]").astype(float)
 81 | 
 82 |         # loop through depths for the station
 83 |         if t_dif.min() < min_time_diff_minutes:
 84 |             # index of dive where minimum difference occurs
 85 |             i = _np.where(gld_dives[_np.nanargmin(t_dif)] == gld_dives)[0]
 86 |             n_depths = 0
 87 |             for depth in btl_depth[btl_idx]:
 88 |                 # an index for bottle where depth and station match
 89 |                 j = btl_idx & (depth == btl_depth)
 90 |                 # depth difference for glider profile
 91 |                 d_dif = abs(gld_depth - depth)[i]
 92 |                 # only match depth if diff is less than given threshold
 93 |                 if _np.nanmin(d_dif) < min_depth_diff_metres:
 94 |                     # index of min diff for this dive
 95 |                     k = i[_np.nanargmin(d_dif)]
 96 |                     # assign the bottle values to the calibration output
 97 |                     gld_cal[k] = btl_values[j]
 98 |                     n_depths += 1
 99 |             print(
100 |                 (
101 |                     "[stn {}/{}] SUCCESS: {} ({} of {} samples) match-up "
102 |                     "within {} minutes"
103 |                 ).format(c, stations.size, t_str, n_depths, btl_num, t_dif.min())
104 |             )
105 |         else:
106 |             print(
107 |                 (
108 |                     "[stn {}/{}]  FAILED: {} Couldn't find samples within "
109 |                     "constraints"
110 |                 ).format(c, stations.size, t_str)
111 |             )
112 | 
113 |     attrs = dict(units="", positive="", comment="", standard_name="", axis="")
114 |     gld_cal = transfer_nc_attrs(getframe(), var, gld_cal, var_name, **attrs)
115 | 
116 |     return gld_cal
117 | 
118 | 
119 | def model_metrics(x, y, model):
120 |     from numpy import array
121 |     from sklearn import metrics
122 | 
123 |     x = array(x).reshape(-1, 1)
124 |     y = array(y)
125 | 
126 |     y_hat = model.predict(x).squeeze()
127 |     ol = (
128 |         model.outliers_
129 |         if hasattr(model, "outliers_")
130 |         else _np.zeros_like(y).astype(bool)
131 |     )
132 | 
133 |     # formula = '$f(x) = {:.2g}x + {:.2g}$'.format(
134 |     #     model.coef_[0], model.intercept_
135 |     # )
136 | 
137 |     # metrics calculation
138 |     out = dict(
139 |         model_type=model.__class__.__name__,
140 |         model_slope=model.coef_[0],
141 |         model_intercept=model.intercept_,
142 |     )
143 | 
144 |     params = {
145 |         "param_" + key: value for key, value in model.__class__().get_params().items()
146 |     }
147 | 
148 |     results = dict(
149 |         r2_all=metrics.r2_score(y, y_hat),
150 |         r2_robust=metrics.r2_score(y[~ol], y_hat[~ol]),
151 |         rmse_all=metrics.mean_squared_error(y, y_hat) ** 0.5,
152 |         rmse_robust=metrics.mean_squared_error(y[~ol], y_hat[~ol]) ** 0.5,
153 |     )
154 | 
155 |     out.update(params)
156 |     out.update(results)
157 | 
158 |     return out
159 | 
160 | 
161 | def model_figs(bottle_data, glider_data, model, ax=None):
162 |     """
163 |     Creates the figure for a linear model fit.
164 | 
165 |     Parameters
166 |     ----------
167 |     bottle_data : np.array, shape=[m, ]
168 |         bottle data with the number of matched bottle/glider samples
169 |     glider_data : np.array, shape[m, ]
170 |         glider data with the number of matched bottle/glider samples
171 |     model : sklearn.linear_model object
172 |         a fitted model that you want to test.
173 | 
174 |     Returns
175 |     -------
176 |     figure axes : matplotlib.Axes
177 |         A figure showing the fit of the
178 |     """
179 | 
180 |     from matplotlib.offsetbox import AnchoredText
181 |     from matplotlib.pyplot import subplots
182 |     from numpy import array, isnan, linspace, nanmax, nanmin
183 |     from sklearn import metrics
184 | 
185 |     y = array(bottle_data)
186 |     x = array(glider_data).reshape(-1, 1)
187 | 
188 |     assert not any(isnan(x)), "There are nans in glider_data"
189 |     assert not any(isnan(y)), "There are nans in bottle_data"
190 |     assert x.size == y.size, "glider_data and bottle_data are not the same size"
191 |     assert (
192 |         x.size == model.outliers_.size
193 |     ), "model.outliers_ is a different size to bottle_data"
194 | 
195 |     xf = linspace(nanmin(x), nanmax(x), 100).reshape(-1, 1)
196 |     y_hat = model.predict(x).squeeze()
197 |     ol = (
198 |         model.outliers_
199 |         if hasattr(model, "outliers_")
200 |         else _np.zeros_like(y).astype(bool)
201 |     )
202 |     formula = "$f(x) = {:.2g}x + {:.2g}$".format(model.coef_[0], model.intercept_)
203 |     formula = formula if not formula.endswith("+ 0$") else formula[:-5] + "$"
204 | 
205 |     print(x.shape, xf.shape)
206 |     # PLOTTING FROM HERE ON #############
207 |     if ax is None:
208 |         _, ax = subplots(1, 1, figsize=[6, 5], dpi=120)
209 |     ax.plot(x, y, "o", c="k", zorder=99, label="Samples ({})".format(x.size))[0]
210 |     ax.plot(xf, model.predict(xf), c="#AAAAAA", label="{}".format(formula))
211 |     ax.plot(
212 |         x[ol],
213 |         y[ol],
214 |         "ow",
215 |         visible=ol.any(),
216 |         mew=1,
217 |         mec="k",
218 |         zorder=100,
219 |         label="Outliers ({})".format(ol.sum()),
220 |     )
221 |     ax.legend(fontsize=10, loc="upper left")
222 | 
223 |     # Additional info about the model displayed from here on
224 |     params = model.get_params()
225 |     rcModel = model.__class__().get_params()
226 |     for key in rcModel:
227 |         if rcModel[key] == params[key]:
228 |             params.pop(key)
229 | 
230 |     # metrics calculation
231 |     r2_all = metrics.r2_score(y, y_hat)
232 |     r2_robust = metrics.r2_score(y[~ol], y_hat[~ol])
233 |     rmse_all = metrics.mean_squared_error(y, y_hat) ** 0.5
234 |     rmse_robust = metrics.mean_squared_error(y[~ol], y_hat[~ol]) ** 0.5
235 | 
236 |     # string formatting
237 |     m_name = "Huber Regresion"
238 |     r2_str = "$r^2$ score: {:.2g} ({:.2g})\n"
239 |     rmse_str = "RMSE: {:.2g} ({:.2g})"
240 |     placeholder = "{}: {}\n"
241 | 
242 |     # formatting the strings to be displayed
243 |     params_str = "{} Params\n".format(m_name)
244 |     params_str += "".join([placeholder.format(key, params[key]) for key in params])
245 |     params_str += "\nResults (robust)\n"
246 |     params_str += r2_str.format(r2_all, r2_robust)
247 |     params_str += rmse_str.format(rmse_all, rmse_robust)
248 | 
249 |     # placing the text box
250 |     anchored_text = AnchoredText(
251 |         params_str, loc=4, prop=dict(size=10, family="monospace"), frameon=True
252 |     )
253 |     anchored_text.patch.set_boxstyle("round, pad=0.3, rounding_size=0.2")
254 |     anchored_text.patch.set_linewidth(0.2)
255 |     ax.add_artist(anchored_text)
256 | 
257 |     # axes labelling
258 |     ax.set_ylabel("Bottle sample")
259 |     ax.set_xlabel("Glider sample")
260 |     ax.set_title("Calibration curve using {}".format(m_name))
261 | 
262 |     return ax
263 | 
264 | 
265 | def robust_linear_fit(
266 |     gld_var, gld_var_cal, interpolate_limit=3, return_figures=True, **kwargs
267 | ):
268 |     """
269 |     Perform a robust linear regression using a Huber Loss Function to remove
270 |     outliers. Returns a model object that behaves like a scikit-learn model
271 |     object with a model.predict method.
272 | 
273 |     Parameters
274 |     ----------
275 |     gld_var : np.array, shape=[n, ]
276 |         glider variable
277 |     gld_var_cal : np.array, shape=[n, ]
278 |         bottle variable on glider indicies
279 |     fit_intercept : bool, default=False
280 |         forces 0 intercept if False
281 |     return_figures : bool, default=True
282 |         create figure with metrics
283 |     interpolate_limit : int, default=3
284 |         glider data may have missing points. The glider data is thus
285 |         interpolated to ensure that as many bottle samples as possible have a
286 |         match-up with the glider.
287 |     **kwargs : keyword=value pairs
288 |         will be passed to the Huber Loss regression to adjust regression
289 | 
290 |     Returns
291 |     -------
292 |     model : sklearn.linear_model
293 |         A fitted model. Use model.predict(glider_var) to create the calibrated
294 |         output.
295 |     """
296 | 
297 |     from pandas import Series
298 |     from sklearn import linear_model
299 | 
300 |     from .helpers import GliderToolsError
301 | 
302 |     # make all input arguments numpy arrays
303 |     args = gld_var, gld_var_cal
304 |     gld_var, gld_var_cal = map(_np.array, args)
305 | 
306 |     if _np.isnan(gld_var_cal).all():
307 |         raise GliderToolsError("There are no matches in your bottle data")
308 | 
309 |     gld_var = Series(gld_var).interpolate(limit=interpolate_limit).values
310 | 
311 |     # get bottle and glider values for the variables
312 |     i = ~_np.isnan(gld_var_cal) & ~_np.isnan(gld_var)
313 |     y = gld_var_cal[i]
314 |     x = gld_var[i][:, None]
315 | 
316 |     if "fit_intercept" not in kwargs:
317 |         kwargs["fit_intercept"] = False
318 |     model = linear_model.HuberRegressor(**kwargs)
319 |     model.fit(x, y)
320 | 
321 |     if return_figures:
322 |         model_figs(x, y, model)
323 | 
324 |     model._predict = model.predict
325 | 
326 |     def predict(self, x):
327 |         """
328 |         A wrapper around the normal predict function that takes
329 |         nans into account. An extra dimension is also added if needed.
330 |         """
331 |         from xarray import DataArray
332 | 
333 |         var = x.copy()
334 |         x = _np.array(x)
335 |         out = _np.ndarray(x.size) * _np.NaN
336 |         i = ~_np.isnan(x)
337 |         x = x[i].reshape(-1, 1)
338 |         out[i.squeeze()] = self._predict(x).squeeze()
339 | 
340 |         out = transfer_nc_attrs(getframe(), var, out, "_calibrated")
341 |         if hasattr(self, "info") & isinstance(out, DataArray):
342 |             out.attrs["model_info"] = str(self.info)
343 | 
344 |         return out
345 | 
346 |     model.predict = predict.__get__(model, linear_model.HuberRegressor)
347 |     model.info = model_metrics(x, y, model)
348 | 
349 |     return model
350 | 


--------------------------------------------------------------------------------
/glidertools/helpers.py:
--------------------------------------------------------------------------------
  1 | import inspect
  2 | 
  3 | 
  4 | def package_version():
  5 |     # package version will only be returned if package is installed through e.g. pip or conda,
  6 |     # development code is unaware of its own version (and there is not such a thing in dev anyway).
  7 |     # Advantage: We don't have to keep track of versioning manually
  8 |     from importlib.metadata import PackageNotFoundError, version
  9 | 
 10 |     try:
 11 |         version = version("glidertools")
 12 |     except PackageNotFoundError:
 13 |         version = "version_undefined"
 14 |     return version
 15 | 
 16 | 
 17 | class GliderToolsWarning(UserWarning):
 18 |     pass
 19 | 
 20 | 
 21 | class GliderToolsError(UserWarning):
 22 |     pass
 23 | 
 24 | 
 25 | def time_now():
 26 |     from pandas import Timestamp
 27 | 
 28 |     return str(Timestamp("today"))[:19]
 29 | 
 30 | 
 31 | def rebuild_func_call(frame):
 32 | 
 33 |     arginf = inspect.getargvalues(frame)
 34 |     name = frame.f_code.co_name
 35 |     args = arginf.args
 36 |     locl = arginf.locals
 37 | 
 38 |     module = inspect.getmodule(frame).__name__
 39 |     func = "{}.{}(".format(module, name)
 40 |     n_args = len(args)
 41 |     for c, arg_name in enumerate(args):
 42 |         arg_valu = str(locl[arg_name])
 43 |         if len(arg_valu) < 25:
 44 |             try:
 45 |                 float(arg_valu)
 46 |             except ValueError:
 47 |                 if (arg_valu == "True") | (arg_valu == "False"):
 48 |                     pass
 49 |                 else:
 50 |                     arg_valu = "'{}'".format(arg_valu)
 51 |         else:
 52 |             arg_valu = "<{}>".format(arg_name)
 53 |         func += "{}={}".format(arg_name, arg_valu)
 54 | 
 55 |         if c < (n_args - 1):
 56 |             func += ", "
 57 |         else:
 58 |             func += ")"
 59 | 
 60 |     return func
 61 | 
 62 | 
 63 | def transfer_nc_attrs(frame, input_xds, output_arr, output_name, **attrs):
 64 |     import warnings
 65 | 
 66 |     import xarray as xr
 67 | 
 68 |     not_dataarray = not isinstance(input_xds, xr.DataArray)
 69 |     no_parent_frame = inspect.getmodule(frame.f_back) is None
 70 |     if not_dataarray:
 71 |         if no_parent_frame:
 72 |             msg = (
 73 |                 "Primary input variable is not xr.DataArray data type - "
 74 |                 "no metadata to pass on."
 75 |             )
 76 |             warnings.warn(msg, category=GliderToolsWarning)
 77 |         return output_arr
 78 |     else:
 79 |         if output_name is None:
 80 |             output_name = input_xds.name
 81 |         elif output_name.startswith("_"):
 82 |             output_name = input_xds.name + output_name
 83 | 
 84 |         attributes = input_xds.attrs.copy()
 85 |         history = "" if "history" not in attributes else attributes["history"]
 86 |         history += "[{}] (v{}) {};\n".format(
 87 |             time_now(), package_version(), rebuild_func_call(frame)
 88 |         )
 89 |         attributes.update({"history": history})
 90 |         attributes.update(attrs)
 91 | 
 92 |         keys = list(attributes.keys())
 93 |         for key in keys:
 94 |             if str(attributes[key]) == "":
 95 |                 attributes.pop(key)
 96 | 
 97 |         xds = xr.DataArray(
 98 |             data=output_arr,
 99 |             coords=input_xds.coords,
100 |             dims=input_xds.dims,
101 |             name=output_name,
102 |             attrs=attributes,
103 |         )
104 | 
105 |         return xds
106 | 
107 | 
108 | def printv(verbose, message):
109 |     """
110 |     A helper function that prints message if verbose=True (for cleaner code)
111 | 
112 |     Parameters
113 |     ----------
114 |     verbose : bool
115 |     message : str
116 |     """
117 | 
118 |     if verbose:
119 |         print(message)
120 |     else:
121 |         pass
122 | 


--------------------------------------------------------------------------------
/glidertools/load/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from .ego import load_mission_nc as ego_mission_netCDF
 4 | from .seaglider import load_multiple_vars as seaglider_basestation_netCDFs
 5 | from .seaglider import show_variables as seaglider_show_variables
 6 | from .slocum import slocum_geomar_matfile
 7 | from .voto_seaexplorer import (
 8 |     voto_concat_datasets,
 9 |     voto_seaexplorer_dataset,
10 |     voto_seaexplorer_nc,
11 | )
12 | 


--------------------------------------------------------------------------------
/glidertools/load/ego.py:
--------------------------------------------------------------------------------
 1 | # base module to load ego files
 2 | 
 3 | from ..utils import calc_dive_phase, dive_phase_to_number
 4 | 
 5 | 
 6 | def load_mission_nc(filename):
 7 |     """
 8 |     Loads an EGO formatted glider mission file.
 9 | 
10 |     Parameters
11 |     ----------
12 |     filename : str
13 |         path and filename of the EGO netCDF file.
14 | 
15 |     Returns
16 |     -------
17 |         an xarray.Dataset object with all netCDF info attached
18 |     ego_data : xr.Dataset
19 |     """
20 | 
21 |     import xarray as xr
22 | 
23 |     xds = xr.open_dataset(filename)
24 | 
25 |     if "PHASE" in xds:
26 |         phase = xds.PHASE.load()
27 |         null_frac = phase.isnull().sum() / phase.size
28 | 
29 |     if (null_frac > 0.2) | ("PHASE" not in xds):
30 |         time = xds.TIME.load()
31 |         press = xds.PRES.load()
32 |         phase = calc_dive_phase(time, press)
33 | 
34 |     xds["DIVES"] = dive_phase_to_number(phase)
35 | 
36 |     return xds
37 | 


--------------------------------------------------------------------------------
/glidertools/load/seaglider.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import numpy as np
  4 | 
  5 | from netCDF4 import Dataset
  6 | 
  7 | from ..helpers import GliderToolsWarning
  8 | 
  9 | 
 10 | # TODO: fix dives indexing (merge dim if same size as other more populated dim)
 11 | # TODO: when dims merge dives are sometimes taken from the wrong dataframe
 12 | 
 13 | 
 14 | def process_files(file_str):
 15 |     from glob import glob
 16 | 
 17 |     if isinstance(file_str, str):
 18 |         files = np.sort(glob(file_str))
 19 | 
 20 |     if len(files) < 1:
 21 |         raise FileNotFoundError("The provided string is not a file path")
 22 |     return files
 23 | 
 24 | 
 25 | def show_variables(files):
 26 |     from pandas import DataFrame
 27 | 
 28 |     files = process_files(files)
 29 | 
 30 |     i = len(files) // 2
 31 | 
 32 |     file = files[i]
 33 |     print("information is based on file: {}".format(file))
 34 | 
 35 |     variables = Dataset(file).variables
 36 |     info = {}
 37 |     for i, key in enumerate(variables):
 38 |         var = variables[key]
 39 |         info[i] = {
 40 |             "name": key,
 41 |             "dims": var.dimensions[0] if len(var.dimensions) == 1 else "string",
 42 |             "units": "" if not hasattr(var, "units") else var.units,
 43 |             "comment": "" if not hasattr(var, "comment") else var.comment,
 44 |         }
 45 | 
 46 |     vars = DataFrame(info).T
 47 | 
 48 |     dim = vars.dims
 49 |     dim[dim.str.startswith("str")] = "string"
 50 |     vars["dims"] = dim
 51 | 
 52 |     vars = (
 53 |         vars.sort_values(["dims", "name"])
 54 |         .reset_index(drop=True)
 55 |         .loc[:, ["dims", "name", "units", "comment"]]
 56 |         .set_index("name")
 57 |         .style
 58 |     )
 59 | 
 60 |     return vars
 61 | 
 62 | 
 63 | def check_var_in_ncfiles(files, key):
 64 | 
 65 |     is_in_files = []
 66 |     for file in files:
 67 |         vardict = Dataset(file).variables
 68 |         if key in vardict:
 69 |             is_in_files += (True,)
 70 |         else:
 71 |             is_in_files += (False,)
 72 | 
 73 |     return any(is_in_files)
 74 | 
 75 | 
 76 | def get_var_dim(files, key):
 77 |     dims = []
 78 |     for file in files:
 79 |         variables = Dataset(file).variables
 80 |         if key in variables:
 81 |             var = variables[key]
 82 |             dims += var.dimensions
 83 |     unique_dims = list(set(dims))
 84 |     if len(unique_dims) > 1:
 85 |         return False
 86 |     elif len(unique_dims) == 1:
 87 |         return unique_dims[0]
 88 |     else:
 89 |         return "string"
 90 | 
 91 | 
 92 | def get_var_units(files, key):
 93 |     from numpy import nanargmax, unique
 94 | 
 95 |     units = [get_var_attrs(file, key, "units") for file in files]
 96 |     units, counts = unique(units, return_counts=True)
 97 |     imax = nanargmax(counts)
 98 | 
 99 |     return units[imax]
100 | 
101 | 
102 | def get_var_attrs(file, key, attr=None):
103 |     vars = Dataset(file).variables
104 |     if key not in vars:
105 |         return
106 |     var = Dataset(file).variables[key]
107 | 
108 |     if attr is None:
109 |         return {k: var.getncattr(k) for k in var.ncattrs()}
110 |     else:
111 |         if hasattr(var, attr):
112 |             return var.getncattr(attr)
113 | 
114 | 
115 | def get_var_coords(files, key):
116 |     """
117 |     Finds the coordinates of the variable for the given netCDF files.
118 | 
119 |     Parameters
120 |     ----------
121 |     files : list
122 |         a list of netCDF glider files
123 |     key : str
124 |         must be a variable in the netCDF
125 | 
126 |     Returns
127 |     -------
128 |     coords : list
129 |         a list of coordinates from a subset of files
130 |     """
131 |     from numpy import concatenate
132 | 
133 |     coords = set([get_var_attrs(f, key, "coordinates") for f in files])
134 |     if None in coords:
135 |         coords.remove(None)
136 |     coords = [c.split() for c in coords]
137 |     if coords != []:
138 |         coords = concatenate(coords).tolist()
139 | 
140 |     return coords
141 | 
142 | 
143 | def get_dim_nobs(files, dim):
144 |     for file in files:
145 |         dimensions = Dataset(file).dimensions
146 |         if dim in dimensions:
147 |             return dimensions[dim].size
148 | 
149 | 
150 | def get_dim_vars(files, dim):
151 |     """
152 |     Returns all the variable names that belong to a dimension
153 |     """
154 | 
155 |     dim_vars = set()  # avoid duplication with a set
156 |     for file in files:  # go through files to ensure all vars are included
157 |         variables = Dataset(file).variables
158 |         for key, var in variables.items():
159 |             # test if the variable belongs to the dimension
160 |             belongs_to_dim = any([dim in d for d in var.dimensions])
161 |             if belongs_to_dim:
162 |                 dim_vars.update([key])
163 |     # return a numpy array of the dimension variables (useful for indexing)
164 |     return np.array(list(dim_vars))
165 | 
166 | 
167 | def get_dim_same_size(files, dim):
168 |     """
169 |     Get dimension with the same size as the given dimension.
170 |     If more than one is found, return the with most variables.
171 |     """
172 | 
173 |     def sub_dim_same_size(file, dim):
174 |         dimensions = Dataset(file).dimensions
175 |         # make sure that the given dimension is in the file
176 |         same_size = set()
177 |         if dim in dimensions:
178 |             n = dimensions[dim].size
179 |             dimensions.pop(dim)
180 | 
181 |             for d in dimensions:
182 |                 if n == dimensions[d].size:
183 |                     same_size.update([d])
184 |         return list(same_size)
185 | 
186 |     # PART 1 get all dimensions with the same size
187 |     same_size = set(sub_dim_same_size(files[0], dim))
188 |     for file in files[1:]:
189 |         same_size = same_size.intersection(sub_dim_same_size(file, dim))
190 | 
191 |     # if there is only one dimension of the same length return it
192 |     return list(same_size)
193 | 
194 | 
195 | def get_dim_coord(files, dim_name, coord_name, niter=0):
196 |     # ensure time dim for each dimension for merging data
197 |     # 1. search for 'coord' in for the same dimension
198 |     # 2. search for coord in other dimension of same length
199 | 
200 |     dim_vars = get_dim_vars(files, dim_name)
201 |     is_coord = [coord_name in key for key in dim_vars]
202 |     same_size_dims = get_dim_same_size(files, dim_name)
203 | 
204 |     if any(is_coord) and (niter < 2):
205 |         return dim_vars[is_coord][0]
206 |     elif (same_size_dims != []) and (niter < 2):
207 |         for d in same_size_dims:
208 |             return get_dim_coord(files, d, coord_name, niter=niter + 1)
209 |     else:
210 |         return
211 | 
212 | 
213 | def make_variable_dimension_dict(files, variable_names, n_check_files=3):
214 |     import warnings
215 | 
216 |     step_size = len(files) // n_check_files
217 |     step_size = 1 if step_size == 0 else step_size
218 |     files_checklist = files[::step_size]
219 | 
220 |     dims = {}
221 |     for key in variable_names:
222 |         if not check_var_in_ncfiles(files_checklist, key):
223 |             msg = key + " was not found in the files"
224 |             warnings.warn(msg, GliderToolsWarning)
225 |             continue
226 |         single_dim = get_var_dim(files_checklist, key)
227 |         if not single_dim:
228 |             continue
229 |         else:
230 |             dim = single_dim
231 | 
232 |         if dim not in dims:
233 |             dims[dim] = set()
234 | 
235 |         dims[dim].update([key])
236 |         dims[dim].update(get_var_coords(files_checklist, key))
237 | 
238 |     # get compulsory time and depth variables (if present)
239 |     for d in dims:
240 |         dim = dims[d]
241 |         if d == "string":
242 |             continue
243 |         has_coord = any(["time" in v for v in dim])
244 | 
245 |         if not has_coord:
246 |             coord = get_dim_coord(files_checklist, d, "time")
247 |             if coord:
248 |                 dims[d].update([coord])
249 |             else:
250 |                 msg = "Could not find a time coordinate for dim: {}".format(d)
251 |                 warnings.warn(msg, GliderToolsWarning)
252 |     return dims
253 | 
254 | 
255 | def read_nc_files_divevars(files, keys, verbose=True, return_skipped=False):
256 |     from os import path
257 | 
258 |     from numpy.ma import row_stack
259 |     from pandas import DataFrame, concat
260 | 
261 |     if not verbose:
262 |         from numpy import arange as trange
263 |     else:
264 |         from tqdm import trange
265 | 
266 |     data = []
267 |     error = ""
268 |     skipped_files = []
269 |     progress_bar = trange(len(files))
270 |     d = 0
271 |     for i in progress_bar:
272 |         fname = files[i]
273 |         nc = Dataset(fname)
274 | 
275 |         d = nc.dive_number if hasattr(nc, "dive_number") else d + 1
276 | 
277 |         nc_keys = [k for k in filter(lambda k: k in nc.variables, keys)]
278 |         if nc_keys:
279 |             skipped = set(keys) - set(nc_keys)
280 |             if skipped:
281 |                 error += "{} not in {}\n".format(str(skipped), path.split(fname)[1])
282 |             arr = row_stack([nc.variables[k][:] for k in nc_keys])
283 |             nc.close()
284 | 
285 |             df = DataFrame(arr.T, columns=nc_keys)
286 |             df["dives"] = d
287 |             data += (df,)
288 |         else:
289 |             skipped_files += (fname,)
290 |             error += "{} was skipped\n".format(fname)
291 | 
292 |     if len(error) > 0:
293 |         print(error)
294 |     data = concat(data, ignore_index=True)
295 | 
296 |     if return_skipped:
297 |         return data, skipped_files
298 |     else:
299 |         return data
300 | 
301 | 
302 | def read_nc_files_strings(files, keys, verbose=True):
303 |     from numpy import array, r_
304 |     from pandas import DataFrame
305 | 
306 |     if not verbose:
307 |         from numpy import arange as trange
308 |     else:
309 |         from tqdm import trange
310 | 
311 |     data = []
312 |     idx = []
313 |     d = 0
314 |     for i in trange(files.size):
315 |         fname = files[i]
316 |         nc = Dataset(fname)
317 |         d = nc.dive_number if hasattr(nc, "dive_number") else d + 1
318 |         arr = r_[[nc.variables[k][:].squeeze() for k in keys]]
319 |         nc.close()
320 |         data += (arr,)
321 |         idx += (d,)
322 |     df = DataFrame(array(data, dtype=str), columns=keys)
323 |     for col in df:
324 |         df[col] = df[col].str.encode("ascii", "ignore").str.decode("ascii")
325 |         try:
326 |             df[col] = df[col].values.astype(float)
327 |         except ValueError:
328 |             pass
329 |     df["dives"] = idx
330 | 
331 |     return df
332 | 
333 | 
334 | def process_time(files, df):
335 |     def decode_times_1970(series):
336 |         # DECODING TIMES IF PRESENT
337 |         t0 = np.datetime64("1970-01-01 00:00:00", "s")
338 | 
339 |         # realistic upper and lower limits since 1970
340 |         tmin = np.datetime64("2000-01-01 00:00:00", "s")
341 |         tmax = np.datetime64("2030-01-01 00:00:00", "s")
342 |         lo_lim = (tmin - t0).astype(int)
343 |         up_lim = (tmax - t0).astype(int)
344 | 
345 |         series_masked = series[series.notnull()]
346 |         since1970 = ((series_masked > lo_lim) & (series_masked < up_lim)).all()
347 | 
348 |         if since1970:
349 |             dt = series.values.astype("timedelta64[s]")
350 |             return (t0 + dt).astype("datetime64[ns]")
351 | 
352 |     time_cols = df.columns[["time" in col for col in df]].values.tolist()
353 |     if isinstance(files, str):
354 |         file = [files]
355 |     else:
356 |         file = [files[len(files) // 2]]
357 | 
358 |     if len(time_cols) > 0:
359 |         for col in time_cols:
360 |             units = get_var_units(file, col)
361 |             if units.startswith("seconds since 1970"):
362 |                 df[col + "_dt64"] = decode_times_1970(df[col])
363 |                 df = df.set_index(col + "_dt64", drop=False)
364 |     return df
365 | 
366 | 
367 | def process_dives(df):
368 |     def get_dives(time, depth, dives=None):
369 |         from ..utils import calc_dive_number
370 | 
371 |         if dives is None:
372 |             return calc_dive_number(time, depth)
373 |         else:
374 |             # INDEX UP AND DOWN DIVES
375 |             depth = np.array(depth)
376 |             dives = np.array(dives)
377 | 
378 |             updive = np.ndarray(dives.size, dtype=bool) * False
379 |             for d in np.unique(dives):
380 |                 i = d == dives
381 |                 j = np.argmax(depth[i])
382 |                 # bool slice of the dive
383 |                 k = i[i]
384 |                 # make False until the maximum depth
385 |                 k[:j] = False
386 |                 # assign the bool slice to the updive
387 |                 updive[i] = k
388 | 
389 |             dives = dives + (updive / 2)
390 |             return dives
391 | 
392 |     depth_cols = df.columns[["depth" in col for col in df]].values.tolist()
393 |     time_cols = df.columns[["time" in col for col in df]].values.tolist()
394 |     if (len(depth_cols) > 0) & ("dives" in df):
395 |         depth = df[depth_cols[0]]
396 |         time = df[time_cols[0]]
397 |         df["dives"] = get_dives(time, depth, df.dives)
398 | 
399 |     return df
400 | 
401 | 
402 | def load_multiple_vars(
403 |     files,
404 |     variable_names,
405 |     return_merged=False,
406 |     verbose=True,
407 |     keep_global_attrs=False,
408 |     netcdf_attrs={},
409 |     keep_variable_attrs=True,
410 | ):
411 |     """
412 |     Load a list of variables from the SeaGlider object as a
413 |     ``pandas.DataFrame``.
414 | 
415 |     Parameters
416 |     ----------
417 |     variable_names : list
418 |         a list of strings representing the keys you would like to load.
419 | 
420 |     Returns
421 |     -------
422 |     pandas.DataFrame
423 |         Will always have coordinate dimensions loaded (even if not
424 |         specified). These can then be accessed either by the variable
425 |         objects or by .data[<dimension_name>].
426 | 
427 |     Note
428 |     ----
429 |         Using this method resets all previously loaded and stored data (data
430 |         is stored under ``SeaGlider.data={dim: pandas.DataFrame}``).
431 |         This is done to avoid erroneous coordinate matchup with sometimes
432 |         missing data).
433 |     """
434 |     import time
435 | 
436 |     from pandas import DataFrame, to_numeric
437 | 
438 |     from ..utils import merge_dimensions
439 | 
440 |     # create a dictionary with dims as keys and variables as keys
441 |     files = process_files(files)
442 | 
443 |     dims_dict = make_variable_dimension_dict(files, variable_names)
444 |     data = {dim_name: DataFrame() for dim_name in dims_dict}
445 |     merge_list = []  # list of mergable dataframes with longest at the front
446 |     max_len = 0
447 | 
448 |     # LOADING EACH DIMENSION
449 |     for dim_name, var_names in dims_dict.items():
450 | 
451 |         print("\nDIMENSION: {}\n{}".format(dim_name, str(var_names)).replace("'", ""))
452 |         time.sleep(0.2)  # to prevent progress bar interruption
453 |         skipped_files = []
454 |         if dim_name == "string":
455 |             df = read_nc_files_strings(files, var_names, verbose)
456 |         else:
457 |             df, skipped_files = read_nc_files_divevars(
458 |                 files, var_names, verbose, return_skipped=True
459 |             )
460 |         for col in df:
461 |             df[col] = to_numeric(df[col], errors="coerce")
462 | 
463 |         # converting times that have 'seconds since 1970' units
464 |         dim_files = list(set(files.tolist()) - set(skipped_files))
465 |         df = process_time(dim_files, df)
466 |         # splitting up and down if dives present otherwise calc from depth
467 |         df = process_dives(df)
468 | 
469 |         # to make the merge list (with time idx) and longest index at the front
470 |         if np.issubdtype(df.index.dtype, np.datetime64):
471 |             if len(df) > max_len:
472 |                 merge_list.insert(0, dim_name)
473 |                 max_len = len(df)
474 |             else:
475 |                 merge_list.append(dim_name)
476 | 
477 |         # adding columns to dimension based dataframes one by one
478 |         for col in df:
479 |             col = str(col)
480 |             data[dim_name][col] = df[col]
481 | 
482 |     # MERGING DATA IF POSSIBLE
483 |     can_merge = len(merge_list) > 1
484 |     if return_merged and can_merge:
485 |         print(
486 |             "\nMerging dimensions on time indicies: {}, ".format(merge_list[0]),
487 |             end="",
488 |         )
489 |         df_merged = data[merge_list.pop(0)]
490 |         for other in merge_list:
491 |             if "dives" in data[other]:
492 |                 df_other = data[other].drop(columns="dives")
493 |             else:
494 |                 df_other = data[other]
495 |             print(other, end=", ")
496 |             df_merged = merge_dimensions(df_merged, df_other, interp_lim=1)
497 |         data["merged"] = df_merged
498 |         drop_names = list(data["merged"].filter(regex="_drop").columns)
499 |         data["merged"] = data["merged"].drop(columns=drop_names)
500 | 
501 |     elif return_merged and (not can_merge):
502 |         print(
503 |             "\nCannot merge data - not enough time indexed DataFrames"
504 |             "\nReturning unmerged dataframes"
505 |         )
506 | 
507 |     # MAKING NETCDFS
508 |     for key in data:
509 |         data[key] = make_xr_dataset(
510 |             data[key],
511 |             files,
512 |             keep_global_attrs=keep_global_attrs,
513 |             keep_variable_attrs=keep_variable_attrs,
514 |             index_name=key,
515 |             attrs=netcdf_attrs,
516 |         )
517 |         if "dives" in data:
518 |             data = data.set_coords("dives")
519 | 
520 |     return data
521 | 
522 | 
523 | def make_xr_dataset(
524 |     df,
525 |     files,
526 |     index_name="index",
527 |     attrs={},
528 |     keep_variable_attrs=True,
529 |     keep_global_attrs=False,
530 | ):
531 |     import re
532 | 
533 |     from pandas import Timestamp
534 |     from xarray import open_dataset
535 | 
536 |     first = list(open_dataset(files[0]).attrs.items())
537 |     final = list(open_dataset(files[-1]).attrs.items())
538 | 
539 |     if keep_global_attrs:
540 |         global_attrs = dict(list(set(first).intersection(final)))
541 |     else:
542 |         global_attrs = {}
543 | 
544 |     lons = df.filter(regex=re.compile("lon", re.IGNORECASE))
545 |     lats = df.filter(regex=re.compile("lat", re.IGNORECASE))
546 |     depths = df.filter(regex=re.compile("depth", re.IGNORECASE))
547 |     times = df.filter(regex=re.compile("time_dt64", re.IGNORECASE))
548 |     dives = df.filter(regex=re.compile("dive", re.IGNORECASE))
549 | 
550 |     now = str(Timestamp("today"))[:19]
551 |     history = (
552 |         "[{}] imported data with GliderTools.load.seaglider_" "basestation_netCDFs;\n"
553 |     ).format(now)
554 | 
555 |     global_attrs.update(attrs)
556 |     global_attrs.update(
557 |         {
558 |             "date_created": now,
559 |             "number_of_dives": dives.max().max() // 1,
560 |             "files": str([f.split("/")[-1] for f in files]),
561 |             "time_coverage_start": str(times.min().min()),
562 |             "time_coverage_end": str(times.max().max()),
563 |             "geospatial_vertical_min": depths.min().min(),
564 |             "geospatial_vertical_max": depths.max().max(),
565 |             "geospatial_lat_min": lats.min().min(),
566 |             "geospatial_lat_max": lats.max().max(),
567 |             "geospatial_lon_min": lons.min().min(),
568 |             "geospatial_lon_max": lons.max().max(),
569 |             "processing": history,
570 |         }
571 |     )
572 | 
573 |     coords = set()
574 |     for key in df:
575 |         check_files = files[[0, files.size // 2, -1]]
576 |         coords.update(get_var_coords(check_files, key))
577 |     coords = list(coords)
578 | 
579 |     for i, coord in enumerate(coords):
580 |         if "time" in coord:
581 |             coords[i] = coord + "_dt64"
582 | 
583 |     xds = (
584 |         df.to_xarray()
585 |         .drop_indexes(df.index.name)
586 |         .reset_coords()
587 |         .set_coords(coords)
588 |         .rename_dims({df.index.name: index_name})
589 |         .assign_attrs(global_attrs)
590 |     )
591 | 
592 |     if keep_variable_attrs:
593 |         mid = len(files) // 2
594 |         for key in xds.variables:
595 |             attrs = get_var_attrs(files[mid], key)
596 |             if attrs is not None:
597 |                 attrs.pop("coordinates", None)
598 |                 xds[key].attrs = attrs
599 | 
600 |     return xds
601 | 


--------------------------------------------------------------------------------
/glidertools/load/slocum.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | def slocum_geomar_matfile(filename, verbose=True):
 5 |     """
 6 |     Load .mat file generated with the geomar MATLAB script for Slocum data.
 7 | 
 8 |     A dive column is generated on importing the data. When single values per
 9 |     dive (e.g. u/v), the value is set for the entire dive.
10 | 
11 |     Parameters
12 |     ----------
13 |     filename : str
14 |         path of .mat file.
15 |     verbose : bool, optional
16 |         defaults to True
17 | 
18 |     Returns
19 |     -------
20 |     pandas.DataFrame
21 |         DataFrame containing the all columns in the `.mat` file
22 |     """
23 | 
24 |     import numpy as np
25 |     import pandas as pd
26 | 
27 |     from scipy.io import loadmat
28 | 
29 |     mat = loadmat(filename)
30 | 
31 |     df = pd.DataFrame()
32 | 
33 |     if verbose:
34 |         print("Loading variables: \n\t[", end="")
35 |     for key in mat.keys():
36 |         if key.startswith("_"):
37 |             continue
38 | 
39 |         if verbose:
40 |             print(" " + key, end=",")
41 |         var = mat[key]
42 |         col, dives = [], []
43 |         for i, dat in enumerate(var.squeeze()):
44 |             col += (dat.squeeze(),)
45 |             dives += (np.ones(dat.squeeze().size) * i,)
46 | 
47 |         try:
48 |             df[key] = np.concatenate(col)
49 |             df["dives"] = np.concatenate(dives)
50 |         except ValueError:
51 |             ser = pd.Series(col, index=np.array(dives).squeeze())
52 |             df[key] = ser.reindex(df.dives).values
53 | 
54 |     df["dives"] /= 2.0
55 |     if "time_datenum" in df.columns:
56 |         df["time"] = convert_matlab_datenum_to_datetime64(df.time_datenum)
57 | 
58 |     print("]")
59 |     return df
60 | 
61 | 
62 | def convert_matlab_datenum_to_datetime64(datenum):
63 |     from numpy import datetime64, timedelta64
64 | 
65 |     time_epoch = datetime64("1970-01-01 00:00:00.000")
66 |     time_matlab = timedelta64(-367, "D")
67 |     time_ordinal = datetime64("0001-01-01 00:00:00", "D").astype("timedelta64")
68 |     time_measurements = (datenum * 86400).astype("timedelta64[s]")
69 | 
70 |     datetime = (time_epoch + time_matlab) + (time_ordinal + time_measurements)
71 | 
72 |     return datetime
73 | 


--------------------------------------------------------------------------------
/glidertools/load/voto_seaexplorer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import numpy as np
 3 | import xarray as xr
 4 | 
 5 | 
 6 | def voto_seaexplorer_nc(filename):
 7 |     """
 8 |     Load .nc file downloaded from https://observations.voiceoftheocean.org/.
 9 |     A dives column is generated on importing the data.
10 | 
11 |     Parameters
12 |     ----------
13 |     filename : str
14 |         path of .nc file.
15 | 
16 |     Returns
17 |     -------
18 |     xarray.Dataset
19 |         Dataset containing the all columns in the source file and dives column
20 |     """
21 |     ds = xr.open_dataset(filename)
22 |     ds = voto_seaexplorer_dataset(ds)
23 |     return ds
24 | 
25 | 
26 | def voto_seaexplorer_dataset(ds):
27 |     """
28 |     Adapts a VOTO xarray dataset, for example downloaded from the VOTO ERDAP
29 |     server (https://erddap.observations.voiceoftheocean.org/erddap/index.html)
30 |     to be used in GliderTools
31 | 
32 |     Parameters
33 |     ----------
34 |     ds : xarray.Dataset
35 | 
36 |     Returns
37 |     -------
38 |     xarray.Dataset
39 |         Dataset containing the all columns in the source file and dives column
40 |     """
41 |     ds = add_dive_column(ds)
42 |     return ds
43 | 
44 | 
45 | def add_dive_column(ds):
46 |     """add dive column to dataset
47 | 
48 |     Parameters:
49 |     -----------
50 |     ds: xarray.Dataset
51 | 
52 |     Returns:
53 |     --------
54 |     xarray.Dataset
55 |         Dataset containing a dives column
56 |     """
57 |     ds["dives"] = (
58 |         ["time"],
59 |         np.where(ds.profile_direction == 1, ds.profile_num, ds.profile_num + 0.5),
60 |     )
61 |     return ds
62 | 
63 | 
64 | def voto_concat_datasets(datasets):
65 |     """
66 |     Concatenates multiple datasets along the time dimensions, profile_num
67 |     and dives variable(s) are adapted so that they start counting from one
68 |     for the first dataset and monotonically increase.
69 | 
70 |     Parameters
71 |     ----------
72 |     datasets : list of xarray.Datasets
73 | 
74 |     Returns
75 |     -------
76 |     xarray.Dataset
77 |         concatenated Dataset containing all the data from the list of datasets
78 |     """
79 |     # in case the datasets have a different set of variables, emtpy variables are created
80 |     # to allow for concatenation (concat with different set of variables leads to error)
81 |     mlist = [set(dataset.variables.keys()) for dataset in datasets]
82 |     allvariables = set.union(*mlist)
83 |     for dataset in datasets:
84 |         missing_vars = allvariables - set(dataset.variables.keys())
85 |         for missing_var in missing_vars:
86 |             dataset[missing_var] = np.nan
87 | 
88 |     # renumber profiles, so that profile_num still is unique in concat-dataset
89 |     for index in range(1, len(datasets)):
90 |         datasets[index]["profile_num"] += (
91 |             datasets[index - 1].copy()["profile_num"].max()
92 |         )
93 |     ds = xr.concat(datasets, dim="time")
94 |     ds = add_dive_column(ds)
95 | 
96 |     return ds
97 | 


--------------------------------------------------------------------------------
/glidertools/physics.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import warnings
  4 | 
  5 | from inspect import currentframe as getframe
  6 | 
  7 | import numpy as np
  8 | 
  9 | from .helpers import GliderToolsWarning, transfer_nc_attrs
 10 | from .utils import group_by_profiles
 11 | 
 12 | 
 13 | def mixed_layer_depth(ds, variable, thresh=0.01, ref_depth=10, verbose=True):
 14 |     """
 15 |     Calculates the MLD for ungridded glider array.
 16 | 
 17 |     You can provide density or temperature.
 18 |     The default threshold is set for density (0.01).
 19 | 
 20 |     Parameters
 21 |     ----------
 22 |     ds : xarray.Dataset Glider dataset
 23 |     variable : str
 24 |          variable that will be used for the threshold criteria
 25 |     thresh : float=0.01 threshold for difference of variable
 26 |     ref_depth : float=10 reference depth for difference
 27 |     return_as_mask : bool, optional
 28 |     verbose : bool, optional
 29 | 
 30 |     Return
 31 |     ------
 32 |     mld : array
 33 |         will be an array of depths the length of the
 34 |         number of unique dives.
 35 |     """
 36 |     groups = group_by_profiles(ds, [variable, "depth"])
 37 |     mld = groups.apply(mld_profile, variable, thresh, ref_depth, verbose)
 38 |     return mld
 39 | 
 40 | 
 41 | def mld_profile(df, variable, thresh, ref_depth, verbose=True):
 42 |     exception = False
 43 |     divenum = df.index[0]
 44 |     df = df.dropna(subset=[variable, "depth"])
 45 |     if len(df) == 0:
 46 |         mld = np.nan
 47 |         exception = True
 48 |         message = """no observations found for specified variable in dive {}
 49 |                 """.format(
 50 |             divenum
 51 |         )
 52 |     elif np.nanmin(np.abs(df.depth.values - ref_depth)) > 5:
 53 |         exception = True
 54 |         message = """no observations within 5 m of ref_depth for dive {}
 55 |                 """.format(
 56 |             divenum
 57 |         )
 58 |         mld = np.nan
 59 |     else:
 60 |         direction = 1 if np.unique(df.index % 1 == 0) else -1
 61 |         # create arrays in order of increasing depth
 62 |         var_arr = df[variable].values[:: int(direction)]
 63 |         depth = df.depth.values[:: int(direction)]
 64 |         # get index closest to ref_depth
 65 |         i = np.nanargmin(np.abs(depth - ref_depth))
 66 |         # create difference array for threshold variable
 67 |         dd = var_arr - var_arr[i]
 68 |         # mask out all values that are shallower then ref_depth
 69 |         dd[depth < ref_depth] = np.nan
 70 |         # get all values in difference array within treshold range
 71 |         mixed = dd[abs(dd) > thresh]
 72 |         if len(mixed) > 0:
 73 |             idx_mld = np.argmax(abs(dd) > thresh)
 74 |             mld = depth[idx_mld]
 75 |         else:
 76 |             exception = True
 77 |             mld = np.nan
 78 |             message = """threshold criterion never true (all mixed or \
 79 |                 shallow profile) for profile {}""".format(
 80 |                 divenum
 81 |             )
 82 |     if verbose and exception:
 83 |         warnings.warn(message, category=GliderToolsWarning)
 84 |     return mld
 85 | 
 86 | 
 87 | def potential_density(salt_PSU, temp_C, pres_db, lat, lon, pres_ref=0):
 88 |     """
 89 |     Calculate density from glider measurements of salinity and temperature.
 90 | 
 91 |     The Basestation calculates density from absolute salinity and potential
 92 |     temperature. This function is a wrapper for this functionality, where
 93 |     potential temperature and absolute salinity are calculated first.
 94 |     Note that a reference pressure of 0 is used by default.
 95 | 
 96 |     Parameters
 97 |     ----------
 98 |     salt_PSU : array, dtype=float, shape=[n, ]
 99 |         practical salinty
100 |     temp_C : array, dtype=float, shape=[n, ]
101 |     temperature in deg C
102 |     pres_db : array, dtype=float, shape=[n, ]
103 |         pressure in decibar
104 |     lat : array, dtype=float, shape=[n, ]
105 |         latitude in degrees north
106 |     lon : array, dtype=float, shape=[n, ]
107 |         longitude in degrees east
108 | 
109 |     Returns
110 |     -------
111 |     potential_density : array, dtype=float, shape=[n, ]
112 |     """
113 | 
114 |     import gsw
115 | 
116 |     salt_abs = gsw.SA_from_SP(salt_PSU, pres_db, lon, lat)
117 |     pot_dens = gsw.pot_rho_t_exact(salt_abs, temp_C, pres_db, pres_ref)
118 |     pot_dens = transfer_nc_attrs(
119 |         getframe(),
120 |         temp_C,
121 |         pot_dens,
122 |         "potential_density",
123 |         units="kg/m3",
124 |         comment="",
125 |         standard_name="potential_density",
126 |     )
127 |     return pot_dens
128 | 
129 | 
130 | def brunt_vaisala(salt, temp, pres, lat=None):
131 |     r"""
132 |     Calculate the square of the buoyancy frequency.
133 | 
134 |     This is a copy from GSW package, with the exception that
135 |     the array maintains the same shape as the input. Note that
136 |     it only works on ungridded data at the moment.
137 | 
138 |     .. math::
139 | 
140 |     N^{2} = \frac{-g}{\sigma_{\theta}} \frac{d\sigma_{\theta}}{dz}
141 | 
142 |     Parameters
143 |     ----------
144 |     SA : array-like
145 |         Absolute Salinity, g/kg
146 |     CT : array-like
147 |         Conservative Temperature (ITS-90), degrees C
148 |     p : array-like
149 |         Sea pressure (absolute pressure minus 10.1325 dbar), dbar
150 |     lat : array-like, 1-D, optional
151 |         Latitude, degrees.
152 |     axis : int, optional
153 |         The dimension along which pressure increases.
154 | 
155 |     Returns
156 |     -------
157 |     N2 : array
158 |         Buoyancy frequency-squared at pressure midpoints, 1/s.
159 |         The shape along the pressure axis dimension is one
160 |         less than that of the inputs.
161 |     """
162 | 
163 |     from gsw import Nsquared
164 |     from numpy import nan, r_
165 | 
166 |     def pad_nan(a):
167 |         return r_[a, nan]
168 | 
169 |     n2 = pad_nan(Nsquared(salt, temp, pres)[0])
170 | 
171 |     n2 = transfer_nc_attrs(
172 |         getframe(),
173 |         temp,
174 |         n2,
175 |         "N_squared",
176 |         units="1/s2",
177 |         comment="",
178 |         standard_name="brunt_vaisala_freq",
179 |     )
180 | 
181 |     return n2
182 | 
183 | 
184 | # compute spice
185 | def spice0(salt_PSU, temp_C, pres_db, lat, lon):
186 |     """
187 |     Calculate spiciness from glider measurements of salinity and temperature.
188 | 
189 |     Parameters
190 |     ----------
191 |     salt_PSU : array, dtype=float, shape=[n, ]
192 |         practical salinty
193 |     temp_C : array, dtype=float, shape=[n, ]
194 |     temperature in deg C
195 |     pres_db : array, dtype=float, shape=[n, ]
196 |         pressure in decibar
197 |     lat : array, dtype=float, shape=[n, ]
198 |         latitude in degrees north
199 |     lon : array, dtype=float, shape=[n, ]
200 |         longitude in degrees east
201 | 
202 |     Returns
203 |     -------
204 |     potential_density : array, dtype=float, shape=[n, ]
205 |     """
206 |     import gsw
207 | 
208 |     salt_abs = gsw.SA_from_SP(salt_PSU, pres_db, lon, lat)
209 |     cons_temp = gsw.CT_from_t(salt_abs, temp_C, pres_db)
210 | 
211 |     spice0 = gsw.spiciness0(salt_abs, cons_temp)
212 | 
213 |     spice0 = transfer_nc_attrs(
214 |         getframe(),
215 |         temp_C,
216 |         spice0,
217 |         "spiciness0",
218 |         units=" ",
219 |         comment="",
220 |         standard_name="spiciness0",
221 |     )
222 |     return spice0
223 | 


--------------------------------------------------------------------------------
/glidertools/utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | from inspect import currentframe as getframe
  4 | 
  5 | from .helpers import transfer_nc_attrs
  6 | 
  7 | 
  8 | def time_average_per_dive(dives, time):
  9 |     """
 10 |     Gets the average time stamp per dive. This is used to create psuedo
 11 |     discrete time steps per dive for plotting data (using time as x-axis
 12 |     variable).
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     dives : np.array, dtype=float, shape=[n, ]
 17 |         discrete dive numbers (down = d.0; up = d.5) that matches time length
 18 |     time : np.array, dtype=datetime64, shape=[n, ]
 19 |         time stamp for each observed measurement
 20 | 
 21 |     Returns
 22 |     -------
 23 |     time_average_per_dive : np.array, dtype=datetime64, shape=[n, ]
 24 |         each dive will have the average time stamp of that dive. Can be used
 25 |         for plotting where time_average_per_dive is set as the x-axis.
 26 |     """
 27 |     from numpy import array, datetime64, nanmean
 28 |     from pandas import Series
 29 | 
 30 |     atime = array(time)
 31 |     dives = array(dives)
 32 |     if isinstance(atime[0], datetime64):
 33 |         t = atime.astype("datetime64[s]").astype(float)
 34 |     else:
 35 |         t = atime
 36 | 
 37 |     t_grp = Series(t).groupby(dives)
 38 |     t_mid = nanmean([t_grp.max(), t_grp.min()], axis=0)
 39 |     t_ser = Series(t_mid, index=t_grp.mean().index.values)
 40 |     diveavg = t_ser.reindex(index=dives).values
 41 |     diveavg = diveavg.astype("datetime64[s]")
 42 |     diveavg = transfer_nc_attrs(getframe(), time, diveavg, "_diveavg")
 43 | 
 44 |     return diveavg
 45 | 
 46 | 
 47 | def group_by_profiles(ds, variables=None):
 48 |     """
 49 |     Group profiles by dives column. Each group member is one dive. The
 50 |     returned profiles can be evaluated statistically, e.g. by
 51 |     pandas.DataFrame.mean or other aggregating methods. To filter out one
 52 |     specific profile, use xarray.Dataset.where instead.
 53 | 
 54 |     Parameters
 55 |     ----------
 56 |     ds : xarray.Dataset
 57 |         1-dimensional Glider dataset
 58 |     variables : list of strings, optional
 59 |         specify variables if only a subset of the dataset should be grouped
 60 |         into profiles. Grouping only a subset is considerably faster and more
 61 |         memory-effective.
 62 |     Return
 63 |     ------
 64 |     profiles:
 65 |     dataset grouped by profiles (dives variable), as created by the
 66 |     pandas.groupby methods.
 67 |     """
 68 |     ds = ds.reset_coords().to_pandas().reset_index().set_index("dives")
 69 |     if variables:
 70 |         return ds[variables].groupby("dives")
 71 |     else:
 72 |         return ds.groupby("dives")
 73 | 
 74 | 
 75 | def mask_above_depth(ds, depths):
 76 |     """
 77 |     Masks all data above depths.
 78 | 
 79 |     Parameters
 80 |     ----------
 81 |     df : xarray.Dataframe or pandas.Dataframe
 82 |     mask_depths : float (for constant depth masking) or pandas.Series as
 83 |         returned e.g. by the mixed_layer_depth function
 84 |     """
 85 |     return _mask_depth(ds, depths, above=True)
 86 | 
 87 | 
 88 | def mask_below_depth(ds, depths):
 89 |     """
 90 |     Masks all data below depths.
 91 | 
 92 |     Parameters
 93 |     ----------
 94 |     df : xarray.Dataframe or pandas.Dataframe
 95 |     mask_depths : float (for constant depth masking) or pandas.Series as
 96 |         returned e.g. by the mixed_layer_depth function
 97 |     """
 98 |     return _mask_depth(ds, depths, above=False)
 99 | 
100 | 
101 | def mask_profile_depth(df, mask_depth, above):
102 |     """
103 |     Masks either above or below mask_depth. If type(mask_depth)=np.nan,
104 |     the whole profile will be masked. Warning: This function is for a SINGLE
105 |     profile only, for masking a complete Glider Dataset please look for
106 |     utils.mask_above_depth and/or utils.mask_below_depth.
107 | 
108 |     Parameters
109 |     ----------
110 |     df : xarray.Dataframe or pandas.Dataframe
111 |     mask_depths : float (for constant depth masking) or pandas.Series as
112 |         returned e.g. by the mixed_layer_depth function
113 |     above : boolean
114 |         Mask either above mask_depth (True) or below (False)
115 |     """
116 |     if type(mask_depth) not in [int, float]:
117 |         # this case for calling from _mask_depth
118 |         mask_depth = mask_depth.loc[df.index[0]]
119 |     if above:
120 |         mask = df.depth > mask_depth
121 |     else:
122 |         mask = df.depth < mask_depth
123 |     return mask
124 | 
125 | 
126 | def _mask_depth(ds, depths, above=True):
127 |     ds = ds.reset_coords().to_pandas().set_index("dives")
128 |     mask = ds.groupby("dives").apply(mask_profile_depth, depths, above)
129 |     # mask = mask if above else ~mask
130 |     return mask.values
131 | 
132 | 
133 | def merge_dimensions(df1, df2, interp_lim=3):
134 |     """
135 |     Merges variables measured at different time intervals. Glider data may be
136 |     sampled at different time intervals, as is the case for primary CTD and
137 |     SciCon data.
138 | 
139 |     Parameters
140 |     ----------
141 |     df1 : pandas.DataFrame
142 |         A dataframe indexed by datetime64 sampling times. Can have multiple
143 |         columns. The index of this first dataframe will be preserved.
144 |     df2 : pandas.DataFrame
145 |         A dataframe indexed by datetime64 sampling times. Can have multiple
146 |         columns. This second dataframe will be interpolated linearly onto the
147 |         first dataframe.
148 | 
149 |     Returns
150 |     -------
151 |     merged_df : pandas.DataFrame
152 |         The combined arrays interpolated onto the index of the first axis
153 | 
154 |     Raises
155 |     ------
156 |     Userwarning
157 |         If either one of the indicies are not datetime64 dtypes
158 | 
159 |     Example
160 |     -------
161 |     You can use the following code and alter it if you want more control
162 | 
163 |     >>> df = pd.concat([df1, df2], sort=True, join='outer')  # doctest: +SKIP
164 |     >>> df = (df
165 |               .sort_index()
166 |               .interpolate(limit=interp_lim)
167 |               .bfill(limit=interp_lim)
168 |               .loc[df1.index]
169 |         )
170 |     """
171 | 
172 |     import numpy as np
173 |     import xarray as xr
174 | 
175 |     from .helpers import GliderToolsError
176 | 
177 |     is_xds = isinstance(df1, xr.Dataset) | isinstance(df2, xr.Dataset)
178 | 
179 |     if is_xds:
180 |         msg = "One of your input objects is xr.Dataset, please define "
181 |         raise GliderToolsError(msg)
182 | 
183 |     same_type = type(df1.index) == type(df2.index)  # noqa: E721
184 |     # turning datetime64[ns] to int64 first,
185 |     # because interpolate doesn't work on datetime-objects
186 | 
187 |     if same_type:
188 |         df = df1.join(df2, sort=True, how="outer", rsuffix="_drop")
189 |         df.index = df1.index.astype(np.int64)
190 |         keys = df.select_dtypes(include=["datetime64[ns]"]).keys()
191 |         for key in keys:
192 |             df[key] = df[key].astype(np.int64)
193 |         df = df.interpolate(limit=interp_lim).bfill(limit=interp_lim)
194 |         df.index = df1.index.astype("datetime64[ns]")
195 |         for key in keys:
196 |             df[key] = df[key].astype("datetime64[ns]")
197 |         return df.loc[df1.index.astype("datetime64[ns]")]
198 |     else:
199 |         raise UserWarning("Both dataframe indicies need to be same dtype")
200 | 
201 | 
202 | def calc_glider_vert_velocity(time, depth):
203 |     """
204 |     Calculate glider vertical velocity in cm/s
205 | 
206 |     Parameters
207 |     ----------
208 |     time : np.array [datetime64]
209 |         glider time dimension
210 |     depth : np.array [float]
211 |         depth (m) or pressure (dbar) if depth not avail
212 | 
213 |     Returns
214 |     -------
215 |     velocity : np.array
216 |         vertical velocity in cm/s
217 |     """
218 |     from numpy import array
219 |     from pandas import Series
220 | 
221 |     # Converting time from datetime 64 to seconds since deployment
222 |     t_ns = array(time).astype("datetime64[ns]").astype(float)
223 |     t_s = Series((t_ns - t_ns.min()) / 1e9)
224 | 
225 |     # converting pressure from dbar/m to cm
226 |     p_m = array(depth).astype(float)
227 |     p_cm = Series(p_m * 100)
228 | 
229 |     # velocity in cm/s
230 |     velocity = p_cm.diff() / t_s.diff()
231 | 
232 |     return velocity
233 | 
234 | 
235 | def calc_dive_phase(time, depth, dive_depth_threshold=15):
236 |     """
237 |     Determine the glider dive phase
238 | 
239 |     Parameters
240 |     ----------
241 |     time : np.array [datetime64]
242 |         glider time dimension
243 |     depth : np.array [float]
244 |         depth (m) or pressure (dbar) if depth not avail
245 |     dive_depth_threshold : [float]
246 |         minimum dive depth (m or dbar), should be less than your most shallow dive
247 | 
248 |     Returns
249 |     -------
250 |     phase : np.array [int]
251 |         phase according to the EGO dive phases
252 |     """
253 |     from numpy import array, isnan, ndarray
254 | 
255 |     time = array(time)
256 |     depth = array(depth)
257 | 
258 |     velocity = calc_glider_vert_velocity(time, depth)  # cm/s
259 | 
260 |     phase = ndarray(time.size)
261 | 
262 |     phase[velocity > 0.5] = 1  # down dive
263 |     phase[velocity < -0.5] = 4  # up dive
264 |     phase[(depth > dive_depth_threshold) & (velocity >= -0.5) & (velocity <= 0.5)] = (
265 |         3  # inflexion
266 |     )
267 |     phase[depth <= dive_depth_threshold] = 0  # surface drift
268 |     phase[isnan(phase)] = 6
269 |     phase = phase.astype(int)
270 | 
271 |     return phase
272 | 
273 | 
274 | def calc_dive_number(time, depth, dive_depth_threshold=15):
275 |     """
276 |     Determine the glider dive number (based on dive phase)
277 | 
278 |     Parameters
279 |     ----------
280 |     time : np.array [datetime64]
281 |         glider time dimension
282 |     depth : np.array [float]
283 |         depth (m) or pressure (dbar) if depth not avail
284 |     dive_depth_threshold : [float]
285 |         minimum dive depth (m or dbar), should be less than your most shallow dive
286 | 
287 |     Returns
288 |     -------
289 |     dive_number : np.ndarray [float]
290 |         the dive number where down dives are x.0 and up dives are x.5
291 |     """
292 | 
293 |     phase = calc_dive_phase(time, depth, dive_depth_threshold)
294 | 
295 |     dive = dive_phase_to_number(phase)
296 | 
297 |     return dive
298 | 
299 | 
300 | def dive_phase_to_number(phase):
301 |     from pandas import Series
302 | 
303 |     phase = Series(phase)
304 | 
305 |     u_dive = ((phase == 4).astype(int).diff() == 1).astype(int).cumsum()
306 |     d_dive = ((phase == 1).astype(int).diff() == 1).astype(int).cumsum()
307 | 
308 |     dive = (u_dive + d_dive) / 2
309 | 
310 |     return dive
311 | 
312 | 
313 | def distance(lon, lat, ref_idx=None):
314 |     """
315 |     Great-circle distance in m between lon, lat points.
316 | 
317 |     Parameters
318 |     ----------
319 |     lon, lat : array-like, 1-D (size must match)
320 |         Longitude, latitude, in degrees.
321 |     ref_idx : None, int
322 |         Defaults to None, which gives adjacent distances.
323 |         If set to positive or negative integer, distances
324 |         will be calculated from that point
325 | 
326 |     Returns
327 |     -------
328 |     distance : array-like
329 |         distance in meters between adjacent points
330 |         or distance from reference point
331 | 
332 |     """
333 |     import numpy as np
334 | 
335 |     lon = np.array(lon)
336 |     lat = np.array(lat)
337 | 
338 |     earth_radius = 6371e3
339 | 
340 |     if not lon.size == lat.size:
341 |         raise ValueError(
342 |             "lon, lat size must match; found %s, %s" % (lon.size, lat.size)
343 |         )
344 |     if not len(lon.shape) == 1:
345 |         raise ValueError("lon, lat must be flat arrays")
346 | 
347 |     lon = np.radians(lon)
348 |     lat = np.radians(lat)
349 | 
350 |     if ref_idx is None:
351 |         i1 = slice(0, -1)
352 |         i2 = slice(1, None)
353 |         dlon = np.diff(lon)
354 |         dlat = np.diff(lat)
355 |     else:
356 |         ref_idx = int(ref_idx)
357 |         i1 = ref_idx
358 |         i2 = slice(0, None)
359 |         dlon = lon[ref_idx] - lon
360 |         dlat = lat[ref_idx] - lat
361 | 
362 |     a = np.sin(dlat / 2) ** 2 + np.sin(dlon / 2) ** 2 * np.cos(lat[i1]) * np.cos(
363 |         lat[i2]
364 |     )
365 | 
366 |     angles = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
367 | 
368 |     distance = earth_radius * angles
369 |     d = np.r_[0, distance]
370 | 
371 |     return d
372 | 
373 | 
374 | if __name__ == "__main__":
375 | 
376 |     pass
377 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.isort]
2 | known_third_party = ["gsw", "matplotlib", "netCDF4", "numexpr", "numpy", "pkg_resources", "pytest", "setuptools", "xarray"]
3 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | ignore = E122, E123, E126, E127, E128, E731, E722, E203, E741, W503
 3 | max-line-length = 88
 4 | max-complexity = 22
 5 | exclude = build,tests,.git,benchmarks,.asv, glidertools/_version.py
 6 | per-file-ignores =
 7 |     glidertools/flo_functions.py: F841, E501
 8 |     __init__.py: F401, F403
 9 | 
10 | [isort]
11 | multi_line_output=3
12 | include_trailing_comma=True
13 | force_grid_wrap=0
14 | use_parentheses=True
15 | line_length=88
16 | 
17 | [tool:pytest]
18 | testpaths=tests/
19 | addopts= --cov --cov-fail-under=20
20 | 
21 | [doc8]
22 | # https://pypi.org/project/doc8/
23 | ignore-path = docs/_build
24 | max-line-length = 100
25 | sphinx = True
26 | 
27 | [sdist]
28 | formats = gztar
29 | 
30 | [check-manifest]
31 | ignore =
32 |     *.yml
33 |     *.yaml
34 |     .coveragerc
35 |     docs
36 |     docs/*
37 |     *.enc
38 |     notebooks
39 |     notebooks/*
40 |     tests
41 |     tests/*
42 | 
43 | [metadata]
44 | name = glidertools
45 | description = ("A toolkit for processing Seaglider base station NetCDF files: "
46 |         "despiking, smoothing, outlier detection, backscatter, fluorescence "
47 |         "quenching, calibration, gridding, interpolation. Documentation "
48 |         "at https://glidertools.readthedocs.io")
49 | author = Luke Gregor
50 | url = https://github.com/GliderToolsCommunity/GliderTools
51 | long_description = file: README.rst
52 | long_description_content_type = text/x-rst
53 | license = GNUv3
54 | license_file = LICENSE
55 | 
56 | ## These need to be filled in by the author!
57 | # For details see: https://pypi.org/classifiers/
58 | 
59 | classifiers =
60 |     Development Status :: 3 - Alpha
61 |     License :: OSI Approved :: MIT License
62 |     Operating System :: OS Independent
63 |     Intended Audience :: Science/Research
64 |     Programming Language :: Python
65 |     Programming Language :: Python :: 3
66 |     Programming Language :: Python :: 3.8
67 |     Programming Language :: Python :: 3.9
68 |     Programming Language :: Python :: 3.10
69 |     Programming Language :: Python :: 3.11
70 |     Programming Language :: Python :: 3.12
71 | 
72 | ## Add your email here
73 | author_email = "lukegre@gmail.com"
74 | 
75 | 
76 | ### make sure to fill in your dependencies!
77 | [options]
78 | install_requires =
79 |     numexpr
80 |     netcdf4
81 |     pandas
82 |     xarray
83 |     numpy
84 |     scikit-learn
85 |     scipy
86 |     tqdm
87 |     matplotlib
88 |     gsw
89 | 
90 | setup_requires=
91 |     setuptools_scm
92 | python_requires = >=3.8
93 | ################ Up until here
94 | 
95 | zip_safe = False
96 | packages = find:
97 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | 
 4 | setup(
 5 |     use_scm_version={
 6 |         "write_to": "glidertools/_version.py",
 7 |         "write_to_template": '__version__ = "{version}"',
 8 |         "tag_regex": r"^(?P<prefix>v)?(?P<version>[^\+]+)(?P<suffix>.*)?$",
 9 |     }
10 | )
11 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/__init__.py


--------------------------------------------------------------------------------
/tests/data/p5420304.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420304.nc


--------------------------------------------------------------------------------
/tests/data/p5420305.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420305.nc


--------------------------------------------------------------------------------
/tests/data/p5420306.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420306.nc


--------------------------------------------------------------------------------
/tests/data/p5420307.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420307.nc


--------------------------------------------------------------------------------
/tests/data/p5420308.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420308.nc


--------------------------------------------------------------------------------
/tests/data/p5420309.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420309.nc


--------------------------------------------------------------------------------
/tests/data/p5420310.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420310.nc


--------------------------------------------------------------------------------
/tests/data/p5420311.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420311.nc


--------------------------------------------------------------------------------
/tests/data/p5420312.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420312.nc


--------------------------------------------------------------------------------
/tests/data/p5420313.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420313.nc


--------------------------------------------------------------------------------
/tests/data/p5420314.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420314.nc


--------------------------------------------------------------------------------
/tests/data/p5420315.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420315.nc


--------------------------------------------------------------------------------
/tests/data/p5420316.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420316.nc


--------------------------------------------------------------------------------
/tests/data/p5420317.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420317.nc


--------------------------------------------------------------------------------
/tests/data/voto_nrt.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/voto_nrt.nc


--------------------------------------------------------------------------------
/tests/test_calibration.py:
--------------------------------------------------------------------------------
 1 | from glidertools.calibration import (  # noqa
 2 |     bottle_matchup,
 3 |     model_figs,
 4 |     model_metrics,
 5 |     robust_linear_fit,
 6 | )
 7 | 
 8 | 
 9 | def test_dummy():
10 |     """WE REALLY NEED TO ADD TESTS!!! THESE JUST TEST THE BASIC IMPORT!!!"""
11 |     assert 1 == 1
12 | 


--------------------------------------------------------------------------------
/tests/test_cleaning.py:
--------------------------------------------------------------------------------
 1 | from glidertools.cleaning import horizontal_diff_outliers, outlier_bounds_iqr
 2 | from glidertools.load import seaglider_basestation_netCDFs
 3 | 
 4 | 
 5 | filenames = "./tests/data/p542*.nc"
 6 | names = [
 7 |     "ctd_depth",
 8 |     "ctd_time",
 9 |     "ctd_pressure",
10 |     "salinity",
11 |     "temperature",
12 |     "eng_wlbb2flvmt_Chlsig",
13 |     "eng_wlbb2flvmt_wl470sig",
14 |     "eng_wlbb2flvmt_wl700sig",
15 |     "aanderaa4330_dissolved_oxygen",
16 |     "eng_qsp_PARuV",
17 | ]
18 | 
19 | sg542 = seaglider_basestation_netCDFs(
20 |     filenames, names, return_merged=True, keep_global_attrs=False
21 | )
22 | 
23 | sg542_dat = sg542["sg_data_point"]
24 | 
25 | 
26 | def test_outlier_bounds():
27 |     # does not test for soft bugs
28 |     salt = sg542_dat["salinity"]
29 |     outlier_bounds_iqr(salt, multiplier=1.5)
30 | 
31 | 
32 | def test_horizontal_outliers():
33 |     # does not test for soft bugs
34 |     horizontal_diff_outliers(
35 |         sg542_dat["dives"],
36 |         sg542_dat["ctd_depth"],
37 |         sg542_dat["salinity"],
38 |         multiplier=3,
39 |         depth_threshold=400,
40 |         mask_frac=0.1,
41 |     )
42 | 


--------------------------------------------------------------------------------
/tests/test_dive_numbers.py:
--------------------------------------------------------------------------------
 1 | import glidertools.utils as gt_util
 2 | 
 3 | from glidertools.load import seaglider_basestation_netCDFs
 4 | 
 5 | 
 6 | # load some data
 7 | filenames = "./tests/data/p542*.nc"
 8 | 
 9 | names = ["ctd_depth", "ctd_time"]
10 | ds_dict = seaglider_basestation_netCDFs(filenames, names, keep_global_attrs=False)
11 | 
12 | dat = ds_dict["sg_data_point"]
13 | depth = dat["ctd_depth"]
14 | time = dat["ctd_time"]
15 | 
16 | 
17 | def test_find_correct_number_dives():
18 |     # using default values
19 |     dives = gt_util.calc_dive_number(depth, time)
20 |     assert dives.max() == 599.5
21 | 


--------------------------------------------------------------------------------
/tests/test_flo_functions.py:
--------------------------------------------------------------------------------
 1 | from glidertools.flo_functions import (  # noqa
 2 |     flo_bback_total,
 3 |     flo_beta,
 4 |     flo_cdom,
 5 |     flo_chla,
 6 |     flo_density_seawater,
 7 |     flo_isotherm_compress,
 8 |     flo_refractive_index,
 9 |     flo_scale_and_offset,
10 |     flo_scat_seawater,
11 |     flo_zhang_scatter_coeffs,
12 | )
13 | 
14 | 
15 | def test_dummy():
16 |     """WE REALLY NEED TO ADD TESTS!!! THESE JUST TEST THE BASIC IMPORT!!!"""
17 |     assert 1 == 1
18 | 


--------------------------------------------------------------------------------
/tests/test_imports.py:
--------------------------------------------------------------------------------
 1 | def test_import():
 2 |     import glidertools
 3 | 
 4 |     print(glidertools)
 5 | 
 6 | 
 7 | def test_import_data_seaglider():
 8 |     import glidertools as gt
 9 | 
10 |     filenames = "./tests/data/p542*.nc"
11 | 
12 |     names = [
13 |         "ctd_depth",
14 |         "ctd_time",
15 |         "ctd_pressure",
16 |         "salinity",
17 |         "temperature",
18 |         "eng_wlbb2flvmt_Chlsig",
19 |         "eng_wlbb2flvmt_wl470sig",
20 |         "eng_wlbb2flvmt_wl700sig",
21 |         "aanderaa4330_dissolved_oxygen",
22 |         "eng_qsp_PARuV",
23 |     ]
24 | 
25 |     ds_dict = gt.load.seaglider_basestation_netCDFs(
26 |         filenames, names, return_merged=True, keep_global_attrs=False
27 |     )
28 | 
29 |     assert isinstance(ds_dict, dict)
30 | 


--------------------------------------------------------------------------------
/tests/test_load.py:
--------------------------------------------------------------------------------
 1 | from glidertools.load import voto_concat_datasets, voto_seaexplorer_nc
 2 | 
 3 | 
 4 | filename = "./tests/data/voto_nrt.nc"
 5 | 
 6 | # import two times to test concat
 7 | ds1 = voto_seaexplorer_nc(filename)
 8 | ds2 = voto_seaexplorer_nc(filename)
 9 | 
10 | 
11 | def test_dives_column_addition():
12 |     assert len(ds1.dives) > 1
13 | 
14 | 
15 | def test_voto_concat_datasets():
16 |     ds_concat = voto_concat_datasets([ds1, ds2])
17 |     assert 2 * len(ds1.time) == len(ds_concat.time)
18 | 


--------------------------------------------------------------------------------
/tests/test_mapping.py:
--------------------------------------------------------------------------------
 1 | from glidertools.mapping import (  # noqa
 2 |     get_optimal_bins,
 3 |     grid_data,
 4 |     grid_flat_dataarray,
 5 |     interp_leaf,
 6 |     interp_obj,
 7 |     variogram,
 8 | )
 9 | 
10 | 
11 | def test_dummy():
12 |     """WE REALLY NEED TO ADD TESTS!!! THESE JUST TEST THE BASIC IMPORT!!!"""
13 |     assert 1 == 1
14 | 


--------------------------------------------------------------------------------
/tests/test_optics.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | 
  4 | 
  5 | def test_sunrise_sunset():
  6 |     """
  7 |     Tests if sunrise/sunset:
  8 |         1. can run
  9 |         2. output is the right shape
 10 |         3. if the output is correct-ish
 11 |     """
 12 |     import numpy as np
 13 |     import pandas as pd
 14 | 
 15 |     from glidertools.optics import sunset_sunrise
 16 | 
 17 |     time = [
 18 |         np.datetime64("2000-01-01"),
 19 |         np.datetime64("2000-01-02"),
 20 |         np.datetime64("2000-01-03"),
 21 |     ]
 22 |     lat = -35, 45, 80
 23 |     lon = 0, 0, 0
 24 |     sunrise, sunset = sunset_sunrise(time, lat, lon)
 25 | 
 26 |     # Three entries, there should be three outputs
 27 |     assert len(sunrise) == len(lat)
 28 | 
 29 |     # sunrise will be earlier in the SH in January
 30 |     assert sunrise[0] < sunrise[2]
 31 | 
 32 |     # expect sunrise at the 4am, 7am and 11am for these times and latitudes
 33 |     # high latitude should output polar night default 11:59 for sunrise and 12:01 for sunset
 34 |     assert pd.to_datetime(sunrise[0]).hour == 4
 35 |     assert pd.to_datetime(sunrise[1]).hour == 7
 36 |     assert pd.to_datetime(sunrise[2]).hour == 11
 37 | 
 38 |     # high latitude should output polar night default 11:59 for sunrise and 12:01 for sunset
 39 |     assert pd.to_datetime(sunrise[2]).hour == 11
 40 |     assert pd.to_datetime(sunrise[2]).minute == 59
 41 | 
 42 |     assert pd.to_datetime(sunset[2]).hour == 12
 43 |     assert pd.to_datetime(sunset[2]).minute == 1
 44 | 
 45 | 
 46 | @pytest.mark.parametrize("percentile", [5, 50, 95])
 47 | def test_backscatter_dark_count(percentile):
 48 |     from glidertools.optics import backscatter_dark_count
 49 | 
 50 |     # create some synthetic data
 51 |     bbp = np.array([0.002, 0.0006, 0.0005, 0.0005, 0.0005])
 52 |     depth = np.array([50, 150, 210, 310, 350])
 53 |     # select only depths between 200 and 400
 54 |     mask = (depth > 200) & (depth < 400)
 55 |     # expected output
 56 |     expected_bbp_dark = bbp - np.nanpercentile(bbp[mask], percentile)
 57 |     bbp_dark = backscatter_dark_count(bbp, depth, percentile)
 58 |     np.testing.assert_allclose(expected_bbp_dark, bbp_dark)
 59 | 
 60 | 
 61 | @pytest.mark.parametrize("percentile", [5, 50, 95])
 62 | def test_backscatter_dark_count_negative(percentile):
 63 |     from glidertools.optics import backscatter_dark_count
 64 | 
 65 |     # create some synthetic data
 66 |     bbp = np.array(
 67 |         [0.002, 0.0006, 0.005, 0.005, 0.0004]
 68 |     )  # this will result in negative values that should be zeroed out
 69 |     depth = np.array([50, 150, 210, 310, 350])
 70 |     bbp_dark = backscatter_dark_count(bbp, depth, percentile)
 71 |     # in this case we just want to check if none of the values is negative!
 72 |     assert np.all(bbp_dark >= 0)
 73 | 
 74 | 
 75 | def test_backscatter_dark_count_warning():
 76 |     from glidertools.optics import backscatter_dark_count
 77 | 
 78 |     # create some synthetic data
 79 |     percentile = 50
 80 |     bbp = np.array([0.002, 0.0006, 0.005, 0.005])
 81 |     depth = np.array(
 82 |         [50, 60, 70, 110]
 83 |     )  # this will trigger the warning  (no values between 200 and 400m)
 84 |     with pytest.warns(
 85 |         UserWarning
 86 |     ):  # this line will fail if the command below does not actually raise a warning!
 87 |         backscatter_dark_count(bbp, depth, percentile)
 88 | 
 89 | 
 90 | @pytest.mark.parametrize("percentile", [5, 50, 95])
 91 | def test_flr_dark_count(percentile):
 92 |     from glidertools.optics import fluorescence_dark_count
 93 | 
 94 |     # create some synthetic data
 95 |     flr = np.array([200.0, 100.0, 52.0, 52.0])
 96 |     depth = np.array([20, 50, 310, 350])
 97 |     # select only depths between 200 and 400
 98 |     mask = (depth > 300) & (depth < 400)
 99 |     # expected output
100 |     expected_flr_dark = flr - np.nanpercentile(flr[mask], percentile)
101 |     flr_dark = fluorescence_dark_count(flr, depth, percentile)
102 |     np.testing.assert_allclose(expected_flr_dark, flr_dark)
103 | 
104 | 
105 | @pytest.mark.parametrize("percentile", [5, 50, 95])
106 | def test_flr_dark_count_negative(percentile):
107 |     from glidertools.optics import fluorescence_dark_count
108 | 
109 |     # create some synthetic data
110 |     flr = np.array([200.0, 100.0, 152.0, 151.0])
111 |     # this will result in negative values that should be zeroed out
112 |     depth = np.array([20, 50, 310, 350])
113 |     flr_dark = fluorescence_dark_count(flr, depth, percentile)
114 |     # in this case we just want to check if none of the values is negative!
115 |     assert np.all(flr_dark >= 0)
116 | 
117 | 
118 | def test_flr_dark_count_warning():
119 |     from glidertools.optics import fluorescence_dark_count
120 | 
121 |     # create some synthetic data
122 |     percentile = 50
123 |     flr = np.array([200.0, 100.0, 52.0, 52.0])
124 |     depth = np.array([20, 50, 210, 250])
125 | 
126 |     with pytest.warns(
127 |         UserWarning
128 |     ):  # this line will fail if the command below does not actually raise a warning!
129 |         fluorescence_dark_count(flr, depth, percentile)
130 | 
131 | 
132 | @pytest.mark.parametrize("percentile", [90])
133 | def test_par_dark_count(percentile):
134 |     from pandas import date_range
135 | 
136 |     from glidertools.optics import par_dark_count
137 | 
138 |     # create some synthetic data
139 |     par = np.array([34, 23.0, 0.89, 0.89])
140 |     depth = np.array([10, 20, 310, 350])
141 |     time = date_range("2018-12-01 10:00", "2018-12-03 00:00", 4)
142 |     # expected output
143 |     expected_par_dark = par - np.nanmedian(
144 |         np.nanpercentile(par[-1], percentile)
145 |     )  # only use values in the 90% percentile of depths and between 23:00 and 01:00
146 |     par_dark = par_dark_count(par, depth, time, percentile)
147 |     np.testing.assert_allclose(expected_par_dark, par_dark)
148 | 
149 | 
150 | def test_par_dark_count_warning():
151 |     from pandas import date_range
152 | 
153 |     from glidertools.optics import par_dark_count
154 | 
155 |     # create some synthetic data
156 |     percentile = 90
157 |     par = np.array([34, 23.0, 0.89, 0.89])
158 |     depth = np.array([10, 20, 310, 350])
159 |     time = date_range("2018-12-01 10:00", "2018-12-03 20:00", 4)
160 |     # this will trigger the warning  (no values between 200 and 400m)
161 |     with pytest.warns(
162 |         UserWarning
163 |     ):  # this line will fail if the command below does not actually raise a warning!
164 |         par_dark_count(par, depth, time, percentile)
165 | 


--------------------------------------------------------------------------------
/tests/test_physics.py:
--------------------------------------------------------------------------------
 1 | import xarray as xr
 2 | 
 3 | from glidertools.load import seaglider_basestation_netCDFs
 4 | from glidertools.physics import (
 5 |     brunt_vaisala,
 6 |     mixed_layer_depth,
 7 |     potential_density,
 8 |     spice0,
 9 | )
10 | from glidertools.utils import mask_above_depth, mask_below_depth
11 | 
12 | 
13 | filenames = "./tests/data/p542*.nc"
14 | 
15 | names = [
16 |     "ctd_depth",
17 |     "ctd_time",
18 |     "ctd_pressure",
19 |     "salinity",
20 |     "temperature",
21 |     "eng_wlbb2flvmt_Chlsig",
22 |     "eng_wlbb2flvmt_wl470sig",
23 |     "eng_wlbb2flvmt_wl700sig",
24 |     "aanderaa4330_dissolved_oxygen",
25 |     "eng_qsp_PARuV",
26 | ]
27 | 
28 | ds_dict = seaglider_basestation_netCDFs(
29 |     filenames, names, return_merged=True, keep_global_attrs=False
30 | )
31 | 
32 | merged = ds_dict["merged"]
33 | if "time" in merged:
34 |     merged = merged.drop_vars(["time", "time_dt64"])
35 | dat = merged.rename(
36 |     {
37 |         "salinity": "salt_raw",
38 |         "temperature": "temp_raw",
39 |         "ctd_pressure": "pressure",
40 |         "ctd_depth": "depth",
41 |         "ctd_time_dt64": "time",
42 |         "ctd_time": "time_raw",
43 |         "eng_wlbb2flvmt_wl700sig": "bb700_raw",
44 |         "eng_wlbb2flvmt_wl470sig": "bb470_raw",
45 |         "eng_wlbb2flvmt_Chlsig": "flr_raw",
46 |         "eng_qsp_PARuV": "par_raw",
47 |         "aanderaa4330_dissolved_oxygen": "oxy_raw",
48 |     }
49 | )
50 | 
51 | 
52 | def test_is_dataset():
53 |     assert isinstance(dat, xr.core.dataset.Dataset)
54 | 
55 | 
56 | def test_mixed_layer_depth():
57 |     mld = mixed_layer_depth(dat, "temp_raw")
58 |     assert mld.min() > 10
59 |     assert mld.max() < 40
60 | 
61 | 
62 | def test_masking():
63 |     # We "know" that the mld for this dataset is >10m and <40m
64 |     mld = mixed_layer_depth(dat, "temp_raw")
65 |     mask = mask_above_depth(dat, mld)
66 |     assert dat.depth[mask].max() > 10
67 |     mask = mask_below_depth(dat, mld)
68 |     assert dat.depth[mask].max() < 40
69 | 
70 | 
71 | def test_potential_density():
72 |     pot_den = potential_density(
73 |         dat.salt_raw, dat.temp_raw, dat.pressure, dat.latitude, dat.longitude
74 |     )
75 |     assert pot_den.min() > 1020
76 |     assert pot_den.max() < 1040
77 | 
78 | 
79 | def test_brunt_vaisala():
80 |     brunt_val = brunt_vaisala(dat.salt_raw, dat.temp_raw, dat.pressure)
81 |     assert brunt_val.min() > -0.002
82 |     assert brunt_val.max() < 0.002
83 | 
84 | 
85 | def test_spice0():
86 |     spice = spice0(
87 |         dat.salt_raw, dat.temp_raw, dat.pressure, dat.latitude, dat.longitude
88 |     )
89 |     assert spice.min() > -1
90 |     assert spice.max() < 1
91 | 


--------------------------------------------------------------------------------
/tests/test_plot.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | import glidertools.plot as gt_plt
 4 | 
 5 | from glidertools.load import seaglider_basestation_netCDFs
 6 | 
 7 | 
 8 | # load some data
 9 | filenames = "./tests/data/p542*.nc"
10 | 
11 | names = ["ctd_depth", "ctd_time", "ctd_pressure", "salinity", "temperature"]
12 | ds_dict = seaglider_basestation_netCDFs(filenames, names, keep_global_attrs=False)
13 | 
14 | dat = ds_dict["sg_data_point"]
15 | 
16 | 
17 | def test_no_warns():
18 |     """Check gt_plt() raises no warnings in pcolormesh."""
19 |     with warnings.catch_warnings() as record:
20 |         gt_plt(dat.dives, dat.ctd_pressure, dat.salinity)
21 | 
22 |     # print warnings that were captured
23 |     if record:
24 |         print("Warnings were raised: " + ", ".join([str(w) for w in record]))
25 | 
26 |         # Check the warning messages for statements we do not want to see
27 |         fail_message = (
28 |             "shading='flat' when X and Y have the same dimensions as C is deprecated"
29 |         )
30 |         assert not any([fail_message in str(r) for r in record])
31 | 


--------------------------------------------------------------------------------
/tests/test_processing.py:
--------------------------------------------------------------------------------
 1 | import gsw
 2 | import numpy as np
 3 | import pytest
 4 | 
 5 | from glidertools.processing import (  # noqa
 6 |     calc_backscatter,
 7 |     calc_fluorescence,
 8 |     calc_oxygen,
 9 |     calc_par,
10 |     calc_physics,
11 |     oxygen_ml_per_l_to_umol_per_kg,
12 | )
13 | from tests.test_physics import dat
14 | 
15 | 
16 | dat.oxy_raw.values[dat.oxy_raw.values < 0] = np.nan
17 | dat.oxy_raw.values[dat.oxy_raw.values > 500] = np.nan
18 | o2ml, o2pc, o2aou = calc_oxygen(
19 |     dat.oxy_raw,
20 |     dat.pressure,
21 |     dat.salt_raw,
22 |     dat.temp_raw,
23 |     dat.latitude,
24 |     dat.longitude,
25 | )
26 | 
27 | 
28 | def test_calc_oxygen():
29 |     assert np.nanmean(o2ml) == pytest.approx(5.22, 0.001)
30 |     assert np.nanmean(o2pc) == pytest.approx(75.857, 0.001)
31 |     assert np.nanmean(o2aou) == pytest.approx(75.351, 0.001)
32 | 
33 | 
34 | def test_oxygen_conversion():
35 |     absolute_salinity = gsw.SA_from_SP(
36 |         dat.salt_raw, dat.pressure, dat.longitude, dat.latitude
37 |     )
38 |     conservative_temperature = gsw.CT_from_t(
39 |         absolute_salinity, dat.temp_raw, dat.pressure
40 |     )
41 |     density = gsw.density.rho(absolute_salinity, conservative_temperature, dat.pressure)
42 |     o2_umol_kg = oxygen_ml_per_l_to_umol_per_kg(o2ml, density)
43 |     assert np.allclose(o2_umol_kg.values, dat.oxy_raw.values, equal_nan=True)
44 | 


--------------------------------------------------------------------------------