├── .binder └── environment.yml ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── pull_request_template.md └── workflows │ ├── ci.yml │ ├── linting.yml │ └── pythonpackage.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── LICENSE ├── README.rst ├── ci └── environment.yml ├── codecov.yml ├── dependabot.yml ├── docs ├── api.rst ├── authors.md ├── calibration.md ├── cheatsheet.md ├── conf.py ├── contributing.rst ├── img │ ├── code_bgu.png │ ├── interactive_plot.png │ ├── logo_with_name.png │ ├── logo_with_name_vert.png │ ├── logo_wo_name.png │ ├── output_101_2.png │ ├── output_106_1.png │ ├── output_108_0.png │ ├── output_111_0.png │ ├── output_116_0.png │ ├── output_14_0.png │ ├── output_16_0.png │ ├── output_16_1.png │ ├── output_19_0.png │ ├── output_19_1.png │ ├── output_22_0.png │ ├── output_24_0.png │ ├── output_26_0.png │ ├── output_28_1.png │ ├── output_29_1.png │ ├── output_33_0.png │ ├── output_36_0.png │ ├── output_38_1.png │ ├── output_41_0.png │ ├── output_43_0.png │ ├── output_45_0.png │ ├── output_47_0.png │ ├── output_49_0.png │ ├── output_51_0.png │ ├── output_55_1.png │ ├── output_56_1.png │ ├── output_59_0.png │ ├── output_61_0.png │ ├── output_64_0.png │ ├── output_65_0.png │ ├── output_67_1.png │ ├── output_71_0.png │ ├── output_74_0.png │ ├── output_76_0.png │ ├── output_78_0.png │ ├── output_80_0.png │ ├── output_82_0.png │ ├── output_84_0.png │ ├── output_86_1.png │ ├── output_86_2.png │ ├── output_92_1.png │ ├── output_94_1.png │ ├── output_97_0.png │ ├── output_99_2.png │ ├── package_overview.png │ └── package_structure.png ├── index.rst ├── installation.md ├── loading.md ├── mapping.md ├── optics.md ├── other.md ├── package_structure.md ├── physics.md ├── quality_control.md ├── requirements.txt ├── saving.md ├── static │ └── css │ │ └── custom.css ├── whats-new.rst └── wishlist.md ├── glidertools ├── .DS_Store ├── __init__.py ├── calibration.py ├── cleaning.py ├── flo_functions.py ├── helpers.py ├── load │ ├── __init__.py │ ├── ego.py │ ├── seaglider.py │ ├── slocum.py │ └── voto_seaexplorer.py ├── mapping.py ├── optics.py ├── physics.py ├── plot.py ├── processing.py └── utils.py ├── notebooks └── Demo_GT.ipynb ├── pyproject.toml ├── setup.cfg ├── setup.py └── tests ├── __init__.py ├── data ├── p5420304.nc ├── p5420305.nc ├── p5420306.nc ├── p5420307.nc ├── p5420308.nc ├── p5420309.nc ├── p5420310.nc ├── p5420311.nc ├── p5420312.nc ├── p5420313.nc ├── p5420314.nc ├── p5420315.nc ├── p5420316.nc ├── p5420317.nc └── voto_nrt.nc ├── test_calibration.py ├── test_cleaning.py ├── test_dive_numbers.py ├── test_flo_functions.py ├── test_imports.py ├── test_load.py ├── test_mapping.py ├── test_optics.py ├── test_physics.py ├── test_plot.py └── test_processing.py /.binder/environment.yml: -------------------------------------------------------------------------------- 1 | name: glidertools_notebooks 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - numexpr 6 | - netCDF4 7 | - pandas 8 | - xarray >=2022.10.0 9 | - numpy 10 | - scikit-learn 11 | - scipy 12 | - tqdm 13 | - matplotlib 14 | - gsw 15 | - skyfield 16 | - jupyterlab 17 | - cmocean 18 | - pip 19 | - pip: 20 | - glidertools @ git+https://github.com/GliderToolsCommunity/GliderTools.git@master 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Smartphone (please complete the following information):** 32 | - Device: [e.g. iPhone6] 33 | - OS: [e.g. iOS8.1] 34 | - Browser [e.g. stock browser, safari] 35 | - Version [e.g. 22] 36 | 37 | **Additional context** 38 | Add any other context about the problem here. 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | - [ ] Closes #xxxx 4 | - [ ] Tests added 5 | - [ ] Passes `pre-commit run --all-files` 6 | - [ ] User visible changes (including notable bug fixes) are documented in `whats-new.rst` 7 | - [ ] New functions/methods are listed in `api.rst` 8 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | push: 4 | branches: 5 | - "*" 6 | pull_request: 7 | branches: 8 | - "*" 9 | schedule: 10 | - cron: "0 13 * * 1" 11 | 12 | jobs: 13 | build: 14 | name: Build (${{ matrix.python-version }} | ${{ matrix.os }}) 15 | runs-on: ${{ matrix.os }} 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | os: ["ubuntu-latest"] 20 | python-version: ["3.8", "3.9", "3.10", "3.11"] 21 | steps: 22 | - uses: actions/checkout@v2 23 | - name: Cache conda 24 | uses: actions/cache@v1 25 | env: 26 | # Increase this value to reset cache if ci/environment.yml has not changed 27 | CACHE_NUMBER: 0 28 | with: 29 | path: ~/conda_pkgs_dir 30 | key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/environment.yml') }} 31 | - uses: conda-incubator/setup-miniconda@v2 32 | with: 33 | activate-environment: test_env_glidertools # Defined in ci/environment*.yml 34 | auto-update-conda: false 35 | python-version: ${{ matrix.python-version }} 36 | environment-file: ci/environment.yml 37 | use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly! 38 | - name: Set up conda environment 39 | shell: bash -l {0} 40 | run: | 41 | python -m pip install -e . --no-deps 42 | conda list 43 | - name: Run Tests 44 | shell: bash -l {0} 45 | run: | 46 | pytest -n auto --cov=./ --cov-report=xml 47 | - name: Upload code coverage to Codecov 48 | uses: codecov/codecov-action@v1 49 | with: 50 | file: ./coverage.xml 51 | flags: unittests 52 | env_vars: OS,PYTHON 53 | name: codecov-umbrella 54 | fail_ci_if_error: false 55 | -------------------------------------------------------------------------------- /.github/workflows/linting.yml: -------------------------------------------------------------------------------- 1 | name: code-style 2 | on: 3 | pull_request: 4 | push: 5 | branches: [master] 6 | jobs: 7 | pre-commit: 8 | name: pre-commit 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: checkout 12 | uses: actions/checkout@v2 13 | - name: set up python 14 | uses: actions/setup-python@v2 15 | with: 16 | python-version: 3.8 17 | - name: Run pre-commit 18 | uses: pre-commit/action@v2.0.0 19 | -------------------------------------------------------------------------------- /.github/workflows/pythonpackage.yml: -------------------------------------------------------------------------------- 1 | name: Upload Python Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | jobs: 8 | deploy: 9 | if: github.repository == 'GliderToolsCommunity/GliderTools' 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v1 13 | - name: Set up Python 14 | uses: actions/setup-python@v1 15 | with: 16 | python-version: '3.x' 17 | - name: Check git status 18 | run: git status 19 | - name: Check version 20 | run: python setup.py --version 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | pip install setuptools setuptools-scm wheel twine check-manifest toml 25 | - name: Build tarball and wheels 26 | run: | 27 | git clean -xdf 28 | git restore -SW . 29 | python -m build --sdist --wheel . 30 | - name: Test the artifacts 31 | run: | 32 | python -m twine check dist/* 33 | - name: Publish package to PyPI 34 | uses: pypa/gh-action-pypi-publish@v1.4.2 35 | with: 36 | user: __token__ 37 | password: ${{ secrets.PYPI_TOKEN }} 38 | verbose: true 39 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.gitignore.io/api/python 2 | # Edit at https://www.gitignore.io/?templates=python 3 | 4 | ### Python ### 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | pip-wheel-metadata/ 28 | share/python-wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | MANIFEST 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .nox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | docs/_generated/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # pyenv python configuration file 75 | .python-version 76 | 77 | # vscode 78 | .vscode/* 79 | **/.DS_Store 80 | .DS_Store 81 | junit.xml 82 | **/.ipynb_checkpoints/** 83 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.6.0 4 | hooks: 5 | - id: trailing-whitespace 6 | - id: check-ast 7 | - id: debug-statements 8 | - id: end-of-file-fixer 9 | - id: check-docstring-first 10 | - id: check-added-large-files 11 | 12 | - repo: https://github.com/PyCQA/flake8 13 | rev: 7.0.0 14 | hooks: 15 | - id: flake8 16 | args: ["--max-line-length=105", "--select=C,E,F,W,B,B950", "--ignore=E203,E501,W503"] 17 | exclude: docs/source/conf.py 18 | 19 | - repo: https://github.com/PyCQA/isort 20 | rev: 5.13.2 21 | hooks: 22 | - id: isort 23 | additional_dependencies: [toml] 24 | args: [--project=glidertools, --multi-line=3, --lines-after-imports=2, --lines-between-types=1, --trailing-comma, --force-grid-wrap=0, --use-parentheses, --line-width=88] 25 | 26 | - repo: https://github.com/asottile/seed-isort-config 27 | rev: v2.2.0 28 | hooks: 29 | - id: seed-isort-config 30 | 31 | - repo: https://github.com/psf/black 32 | rev: 24.3.0 33 | hooks: 34 | - id: black 35 | language_version: python3 36 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the OS, Python version and other tools you might need 9 | build: 10 | os: ubuntu-22.04 11 | tools: 12 | python: "3.11" 13 | 14 | # Build documentation in the docs/ directory with Sphinx 15 | sphinx: 16 | configuration: docs/conf.py 17 | 18 | # Build documentation with MkDocs 19 | #mkdocs: 20 | # configuration: mkdocs.yml 21 | 22 | # Optionally build your docs in additional formats such as PDF and ePub 23 | formats: all 24 | 25 | # Optionally set the version of Python and requirements required to build your docs 26 | python: 27 | install: 28 | - requirements: docs/requirements.txt 29 | 30 | submodules: 31 | include: all 32 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | =============================== 2 | glidertools 3 | =============================== 4 | 5 | .. image:: https://github.com/GliderToolsCommunity/GliderTools/actions/workflows/ci.yml/badge.svg 6 | :target: https://github.com/GliderToolsCommunity/GliderTools/actions/workflows/ci.yml 7 | .. image:: https://img.shields.io/conda/vn/conda-forge/glidertools.svg 8 | :target: https://anaconda.org/conda-forge/glidertools 9 | .. image:: https://badgen.net/pypi/v/glidertools 10 | :target: https://pypi.org/project/glidertools 11 | .. image:: https://pepy.tech/badge/glidertools 12 | :target: https://pepy.tech/project/glidertools 13 | .. image:: https://readthedocs.org/projects/glidertools/badge/?version=latest 14 | :target: https://glidertools.readthedocs.io 15 | .. image:: https://img.shields.io/badge/License-GPLv3-blue.svg 16 | :target: https://www.gnu.org/licenses/gpl-3.0 17 | .. image:: https://img.shields.io/badge/Journal-10.3389%2Ffmars.2019.00738-blue 18 | :target: https://doi.org/10.3389/fmars.2019.00738 19 | .. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.4075238.svg 20 | :target: https://doi.org/10.5281/zenodo.4075238 21 | .. image:: https://codecov.io/gh/GliderToolsCommunity/GliderTools/branch/master/graph/badge.svg?token=FPUJ29TMSH 22 | :target: https://codecov.io/gh/GliderToolsCommunity/GliderTools 23 | 24 | Glider tools is a Python 3.8+ package designed to process data from the first level of processing to a science ready dataset (delayed mode quality control). The package is designed to easily import data to a standard column format (numpy.ndarray or pandas.DataFrame). Cleaning and smoothing functions are flexible and can be applied as required by the user. We provide examples and demonstrate best practices as developed by the `SOCCO Group `_, with early contributions from `Polar Gliders `_ at the University of Gothenburg. GliderTools includes contributions from `VOTO `_. We aim to implement Best Practices developed by `OceanGliders `_ in the ongoing `discussions `_. 25 | 26 | Please cite the `original publication `_ of this package and `the package itself `_. 27 | 28 | Installation 29 | ------------ 30 | Conda 31 | ..... 32 | To install the core package from conda-forge run: ``conda install -c conda-forge glidertools`` 33 | 34 | PyPI 35 | .... 36 | To install the core package run: ``pip install glidertools``. 37 | 38 | GitHub 39 | ...... 40 | 1. Clone glidertools to your local machine: ``git clone https://github.com/GliderToolsCommunity/GliderTools`` 41 | 2. Change to the parent directory of GliderTools 42 | 3. Install glidertools with ``pip install -e ./GliderTools``. This will allow 43 | changes you make locally, to be reflected when you import the package in Python 44 | 45 | Recommended, but optional packages 46 | .................................. 47 | There are some packages that are not installed by default, as these are large packages or can 48 | result in installation errors, resulting in failure to install GliderTools. 49 | These should install automatically with ``pip install package_name``: 50 | 51 | * ``gsw``: accurate density calculation (may fail in some cases) 52 | * ``pykrige``: variogram plotting (installation generally works, except when bundled) 53 | * ``plotly``: interactive 3D plots (large package) 54 | 55 | 56 | How you can contribute 57 | ---------------------- 58 | - Join the community `by introducing yourself `_ (no need to be a Python or Git guru! Just say what you are working with and join the discussion) 59 | - If you find an error, please report it on `as a Github issue `_. Please copy the entire error message (even if it's long). 60 | - Oxygen processing is rudimentary so far but we are on it and happy to get your support `in this discussion `_ 61 | 62 | For contributing follow the `instructions `_ 63 | 64 | Acknowledgements 65 | ---------------- 66 | - We rely heavily on ``ion_functions.data.flo_functions`` which was 67 | written by Christopher Wingard, Craig Risien, Russell Desiderio 68 | - This work was initially funded by Pedro M Scheel Monteiro at the 69 | Council for Scientific and Industrial Research (where Luke was working 70 | at the time of writing the code). 71 | - Testers for their feedback: SOCCO team at the CSIR and ... 72 | -------------------------------------------------------------------------------- /ci/environment.yml: -------------------------------------------------------------------------------- 1 | name: test_env_glidertools 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - numexpr 6 | - netCDF4 7 | - pandas 8 | - xarray >=2022.10.0 9 | - numpy 10 | - scikit-learn 11 | - scipy 12 | - tqdm 13 | - matplotlib 14 | - gsw 15 | - skyfield 16 | - pip 17 | - pip: 18 | - codecov 19 | - pytest-cov 20 | - pytest-xdist 21 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | codecov: 2 | require_ci_to_pass: no 3 | max_report_age: off 4 | 5 | comment: false 6 | 7 | coverage: 8 | precision: 2 9 | round: down 10 | status: 11 | project: 12 | default: 13 | target: 95 14 | informational: true 15 | patch: off 16 | changes: off 17 | 18 | ignore: 19 | - "setup.py" 20 | - "versioneer.py" 21 | - "glidertools/__init__.py" 22 | - "glidertools/_version.py" 23 | - "glidertools/load/*" 24 | - "tests/*" 25 | -------------------------------------------------------------------------------- /dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: 'github-actions' 4 | directory: '/' 5 | schedule: 6 | # Check for updates once a week 7 | interval: 'weekly' 8 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ============= 3 | 4 | The API reference is automatically generated from the function docstrings in 5 | the GliderTools package. Refer to the examples in the sidebar for reference on 6 | how to use the functions. 7 | 8 | Loading Data 9 | ------------ 10 | .. currentmodule:: glidertools 11 | .. autosummary:: 12 | :toctree: ./_generated/ 13 | 14 | load.seaglider_basestation_netCDFs 15 | load.seaglider_show_variables 16 | load.ego_mission_netCDF 17 | load.slocum_geomar_matfile 18 | load.voto_seaexplorer_nc 19 | load.voto_seaexplorer_dataset 20 | load.voto_concat_datasets 21 | 22 | 23 | High level processing 24 | --------------------- 25 | .. currentmodule:: glidertools 26 | .. autosummary:: 27 | :toctree: ./_generated/ 28 | 29 | processing.calc_physics 30 | processing.calc_oxygen 31 | processing.calc_backscatter 32 | processing.calc_fluorescence 33 | processing.calc_par 34 | 35 | 36 | Cleaning 37 | -------- 38 | .. currentmodule:: glidertools 39 | .. autosummary:: 40 | :toctree: ./_generated/ 41 | 42 | cleaning.outlier_bounds_std 43 | cleaning.outlier_bounds_iqr 44 | cleaning.horizontal_diff_outliers 45 | cleaning.mask_bad_dive_fraction 46 | cleaning.data_density_filter 47 | cleaning.despike 48 | cleaning.despiking_report 49 | cleaning.rolling_window 50 | cleaning.savitzky_golay 51 | 52 | 53 | Physics 54 | ------- 55 | .. currentmodule:: glidertools 56 | .. autosummary:: 57 | :toctree: ./_generated/ 58 | 59 | physics.mixed_layer_depth 60 | physics.potential_density 61 | physics.brunt_vaisala 62 | 63 | 64 | 65 | Optics 66 | ------ 67 | .. currentmodule:: glidertools 68 | .. autosummary:: 69 | :toctree: ./_generated/ 70 | 71 | optics.find_bad_profiles 72 | optics.par_dark_count 73 | optics.backscatter_dark_count 74 | optics.fluorescence_dark_count 75 | optics.par_scaling 76 | optics.par_fill_surface 77 | optics.photic_depth 78 | optics.sunset_sunrise 79 | optics.quenching_correction 80 | optics.quenching_report 81 | 82 | 83 | Calibration 84 | ----------- 85 | .. currentmodule:: glidertools 86 | .. autosummary:: 87 | :toctree: ./_generated/ 88 | 89 | calibration.bottle_matchup 90 | calibration.model_figs 91 | calibration.robust_linear_fit 92 | 93 | Gridding and Interpolation 94 | -------------------------- 95 | .. currentmodule:: glidertools 96 | .. autosummary:: 97 | :toctree: ./_generated/ 98 | 99 | mapping.interp_obj 100 | mapping.grid_data 101 | mapping.variogram 102 | 103 | 104 | 105 | Plotting 106 | -------- 107 | .. currentmodule:: glidertools 108 | .. autosummary:: 109 | :toctree: ./_generated/ 110 | 111 | plot.plot_functions 112 | 113 | 114 | General Utilities 115 | ----------------- 116 | .. currentmodule:: glidertools 117 | .. autosummary:: 118 | :toctree: ./_generated/ 119 | 120 | utils.time_average_per_dive 121 | utils.mask_above_depth 122 | utils.mask_below_depth 123 | utils.mask_profile_depth 124 | utils.merge_dimensions 125 | utils.calc_glider_vert_velocity 126 | utils.calc_dive_phase 127 | utils.calc_dive_number 128 | utils.dive_phase_to_number 129 | utils.distance 130 | utils.group_by_profiles 131 | -------------------------------------------------------------------------------- /docs/authors.md: -------------------------------------------------------------------------------- 1 | Citing GliderTools 2 | ================== 3 | 4 | [![](https://zenodo.org/badge/141922866.svg)](https://zenodo.org/badge/latestdoi/141922866) 5 | 6 | If you would like to cite or reference Glider Tools, please use: 7 | 8 | Gregor, L., Ryan-Keogh, T. J., Nicholson, S.-A., du Plessis, M., Giddy, I., & Swart, S. (2019). GliderTools: A Python Toolbox for Processing Underwater Glider Data. Frontiers in Marine Science, 6(December), 1–13. https://doi.org/10.3389/fmars.2019.00738 9 | 10 | 11 | Project Contributors 12 | -------------------- 13 | 14 | The following people have made contributions to the project (in alphabetical order by last name) and are considered "The GliderTools Developers". These contributors will be added as authors upon the next major release of GliderTools (i.e. new DOI release). 15 | - [Dhruv Balwada](https://dhruvbalwada.github.io/) - University of Washington, USA. (ORCID: [0000-0001-6632-0187](https://orcid.org/0000-0001-6632-0187)) 16 | - [Julius Busecke](http://jbusecke.github.io/) - Columbia University, USA. (ORCID: [0000-0001-8571-865X](https://orcid.org/0000-0001-8571-865X)) 17 | - [Isabelle Giddy](https://github.com/isgiddy/) - University of Cape Town: Cape Town, Western Cape, South Africa. (ORCID: [0000-0002-8926-3311](https://orcid.org/0000-0002-8926-3311)) 18 | - [Luke Gregor](https://github.com/lukegre) - Environmental Physics, ETH Zuerich: Zurich, Switzerland. (ORCID: [0000-0001-6071-1857](https://orcid.org/0000-0001-6071-1857)) 19 | - [Tom Hull](https://github.com/tomhull) - Centre for Environment Fisheries and Aquaculture Science: Lowestoft, UK. (ORCID: [0000-0002-1714-9317](https://orcid.org/0000-0002-1714-9317)) 20 | - [Martin Mohrmann](https://github.com/MartinMohrmann) - Voice of the Ocean Foundation, Gothenburg, Sweden. (ORCID: [0000-0001-8056-4866](https://orcid.org/0000-0001-8056-4866)) 21 | - [Sarah-Anne Nicholson](https://github.com/sarahnicholson) - Council for Scientific and Industrial Research: Cape Town, South Africa. (ORCID: [0000-0002-1226-1828](https://orcid.org/0000-0002-1226-1828)) 22 | - [Marcel du Plessis](https://mduplessis.com) - University of Cape Town: Cape Town, Western Cape, South Africa. (ORCID: [0000-0003-2759-2467](https://orcid.org/0000-0003-2759-2467)) 23 | - [Callum Rollo](https://callumrollo.github.io/) - Voice of the Ocean Foundation, Gothenburg, Sweden. (ORCID: [0000-0002-5134-7886](https://orcid.org/0000-0002-5134-7886)) 24 | - [Tommy Ryan-Keogh](https://github.com/tjryankeogh) - Council for Scientific and Industrial Research: Cape Town, South Africa. (ORCID: [0000-0001-5144-171X](https://orcid.org/0000-0001-5144-171X)) 25 | - [Sebastiaan Swart](https://sebswart.com) - University of Gothenburg: Gothenburg, Sweden. (ORCID: [0000-0002-2251-8826](https://orcid.org/0000-0002-2251-8826)) 26 | - [Soeren Thomsen](https://github.com/soerenthomsen) - LOCEAN/IPSL/CNRS/Sorbonne University: Paris, France. (ORCID: [0000-0002-0598-8340](https://orcid.org/0000-0002-0598-8340)) 27 | -------------------------------------------------------------------------------- /docs/calibration.md: -------------------------------------------------------------------------------- 1 | 2 | # Calibration with bottle samples 3 | 4 | Bottle calibration can also be done using the `calibration` module. 5 | 6 | The bottle file needs to be in a specific format with dates (`datetime64` format), depth and the variable values. This can be imported with any method available. I recommend `pandas.read_csv` as shown in the example below. Note that latitude and longitude are not taken into account, thus the user needs to make sure that the CTD cast was in the correct location (and time, but this will be used to match the glider). 7 | 8 | 9 | ```python 10 | import pandas as pd 11 | 12 | fname = '/Users/luke/Work/Publications/2019_Gregor_Front_glider/figures/SOSCEX 3 PS1.csv' 13 | cal = pd.read_csv(fname, parse_dates=['datetime'], dayfirst=True) 14 | ``` 15 | 16 | The `calibration.bottle_matchup` function returns an array that matches the size of the ungridded glider data. 17 | The matching is done based on depth and time from both the glider and the CTD. The function will show how many samples have been matched and the smallest time difference between a CTD rosette cast and a dive (any time on the dive). 18 | 19 | ### Using depth 20 | 21 | 22 | ```python 23 | %autoreload 2 24 | 25 | dat['bottle_sal'] = gt.calibration.bottle_matchup( 26 | dat.dives, dat.depth, dat.time, 27 | cal.depth, cal.datetime, cal.sal) 28 | 29 | model = gt.calibration.robust_linear_fit(dat.salt_qc, dat.bottle_sal, fit_intercept=True, epsilon=1.5) 30 | dat['salinity_qc'] = model.predict(dat.salt_qc) 31 | ``` 32 | 33 | [stn 0/5] FAILED: 2015-07-28 10:25 Couldn't find samples within constraints 34 | [stn 1/5] FAILED: 2015-07-28 16:15 Couldn't find samples within constraints 35 | [stn 2/5] FAILED: 2015-12-08 03:23 Couldn't find samples within constraints 36 | [stn 3/5] SUCCESS: 2016-01-05 17:46 (15 of 15 samples) match-up within 0.0 minutes 37 | [stn 4/5] SUCCESS: 2016-02-08 03:14 (12 of 17 samples) match-up within 0.0 minutes 38 | (13, 1) (100, 1) 39 | 40 | 41 | 42 | ![png](img/output_92_1.png) 43 | 44 | 45 | ### Using Density 46 | 47 | 48 | ```python 49 | %autoreload 2 50 | 51 | dat['bottle_sal'] = gt.calibration.bottle_matchup( 52 | dat.dives, dat.density, dat.time, 53 | cal.density, cal.datetime, cal.sal) 54 | 55 | model = gt.calibration.robust_linear_fit(dat.salt_qc, dat.bottle_sal, fit_intercept=True, epsilon=1.5) 56 | dat['salinity_qc'] = model.predict(dat.salt_qc) 57 | ``` 58 | 59 | [stn 0/5] FAILED: 2015-07-28 10:25 Couldn't find samples within constraints 60 | [stn 1/5] FAILED: 2015-07-28 16:15 Couldn't find samples within constraints 61 | [stn 2/5] FAILED: 2015-12-08 03:23 Couldn't find samples within constraints 62 | [stn 3/5] SUCCESS: 2016-01-05 17:46 (15 of 15 samples) match-up within 0.0 minutes 63 | [stn 4/5] SUCCESS: 2016-02-08 03:14 (16 of 17 samples) match-up within 0.0 minutes 64 | (6, 1) (100, 1) 65 | 66 | 67 | 68 | ![png](img/output_94_1.png) 69 | -------------------------------------------------------------------------------- /docs/cheatsheet.md: -------------------------------------------------------------------------------- 1 | Cheat Sheet 2 | =========== 3 | ![cheat sheet image](img/package_overview.png) 4 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Read the Docs Template documentation build configuration file, created by 4 | # sphinx-quickstart on Tue Aug 26 14:19:49 2014. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | from pkg_resources import get_distribution 16 | 17 | 18 | release = get_distribution("glidertools").version 19 | version = ".".join(release.split(".")[:2]) 20 | 21 | # If extensions (or modules to document with autodoc) are in another directory, 22 | # add these directories to sys.path here. If the directory is relative to the 23 | # documentation root, use os.path.abspath to make it absolute, like shown here. 24 | # sys.path.insert(0, os.path.abspath('.')) 25 | 26 | # -- General configuration ------------------------------------------------ 27 | 28 | # If your documentation needs a minimal Sphinx version, state it here. 29 | # needs_sphinx = "1.4" 30 | 31 | # Add any Sphinx extension module names here, as strings. They can be 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 33 | # ones. 34 | 35 | extensions = [ 36 | "sphinx.ext.autodoc", 37 | "sphinx.ext.autosummary", 38 | "sphinx.ext.mathjax", 39 | "sphinx.ext.napoleon", 40 | "sphinx.ext.extlinks", 41 | "recommonmark", 42 | ] 43 | 44 | autosummary_generate = True 45 | 46 | autodoc_default_flags = ["members", "inherited-members"] 47 | 48 | # Add any paths that contain templates here, relative to this directory. 49 | templates_path = ["_templates"] 50 | 51 | # The suffix of source filenames. 52 | source_suffix = [".rst", ".md"] 53 | 54 | napoleon_numpy_docstring = True 55 | 56 | # The encoding of source files. 57 | # source_encoding = 'utf-8-sig' 58 | 59 | # The master toctree document. 60 | master_doc = "index" 61 | 62 | # General information about the project. 63 | project = "GliderTools" 64 | copyright = "GliderTools, 2023" 65 | 66 | # The version info for the project you're documenting, acts as replacement for 67 | # |version| and |release|, also used in various other places throughout the 68 | # built documents. 69 | # 70 | # The short X.Y version. 71 | version = ".".join(version.split(".")[:2]) 72 | # The full version, including alpha/beta/rc tags. 73 | release = version 74 | 75 | # link to github issues 76 | extlinks = { 77 | "issue": ("https://github.com/GliderToolsCommunity/GliderTools/issues/%s", "GH#%s"), 78 | "pull": ("https://github.com/GliderToolsCommunity/GliderTools/issues/%s", "GH#%s"), 79 | } 80 | 81 | # The language for content autogenerated by Sphinx. Refer to documentation 82 | # for a list of supported languages. 83 | # language = None 84 | 85 | # There are two options for replacing |today|: either, you set today to some 86 | # non-false value, then it is used: 87 | # today = '' 88 | # Else, today_fmt is used as the format for a strftime call. 89 | # today_fmt = '%B %d, %Y' 90 | 91 | # List of patterns, relative to source directory, that match files and 92 | # directories to ignore when looking for source files. 93 | exclude_patterns = ["_build"] 94 | 95 | # The reST default role (used for this markup: `text`) to use for all 96 | # documents. 97 | # default_role = None 98 | 99 | # If true, '()' will be appended to :func: etc. cross-reference text. 100 | # add_function_parentheses = True 101 | 102 | # If true, the current module name will be prepended to all description 103 | # unit titles (such as .. function::). 104 | # add_module_names = True 105 | 106 | 107 | # If true, sectionauthor and moduleauthor directives will be shown in the 108 | # output. They are ignored by default. 109 | # show_authors = False 110 | 111 | # The name of the Pygments (syntax highlighting) style to use. 112 | pygments_style = "sphinx" 113 | 114 | # A list of ignored prefixes for module index sorting. 115 | # modindex_common_prefix = [] 116 | 117 | # If true, keep warnings as "system message" paragraphs in the built documents. 118 | # keep_warnings = False 119 | 120 | 121 | # -- Options for HTML output ---------------------------------------------- 122 | 123 | # The theme to use for HTML and HTML Help pages. See the documentation for 124 | # a list of builtin themes. 125 | html_theme = "sphinx_rtd_theme" 126 | 127 | # Theme options are theme-specific and customize the look and feel of a theme 128 | # further. For a list of options available for each theme, see the 129 | # documentation. 130 | # html_theme_options = { 131 | # "logo_only": True, 132 | # "display_version": False, 133 | # "style_nav_header_background": "#343131", 134 | # } 135 | 136 | # Add any paths that contain custom themes here, relative to this directory. 137 | # html_theme_path = [] 138 | 139 | # The name for this set of Sphinx documents. If None, it defaults to 140 | # " v documentation". 141 | # html_title = None 142 | 143 | # A shorter title for the navigation bar. Default is the same as html_title. 144 | # html_short_title = None 145 | 146 | # The name of an image file (relative to this directory) to place at the top 147 | # of the sidebar. 148 | html_logo = "img/logo_with_name.png" 149 | 150 | # The name of an image file (within the static path) to use as favicon of the 151 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 152 | # pixels large. 153 | # html_favicon = None 154 | 155 | # Add any paths that contain custom static files (such as style sheets) here, 156 | # relative to this directory. They are copied after the builtin static files, 157 | # so a file named "default.css" will overwrite the builtin "default.css". 158 | # html_static_path = ["static"] 159 | 160 | # custom CSS files 161 | # html_context = { 162 | # "css_files": ["_static/css/custom.css"], 163 | # } 164 | 165 | # Add any extra paths that contain custom files (such as robots.txt or 166 | # .htaccess) here, relative to this directory. These files are copied 167 | # directly to the root of the documentation. 168 | # html_extra_path = [] 169 | 170 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 171 | # using the given strftime format. 172 | # html_last_updated_fmt = '%b %d, %Y' 173 | 174 | # If true, SmartyPants will be used to convert quotes and dashes to 175 | # typographically correct entities. 176 | # html_use_smartypants = True 177 | 178 | # Custom sidebar templates, maps document names to template names. 179 | # html_sidebars = {} 180 | 181 | # Additional templates that should be rendered to pages, maps page names to 182 | # template names. 183 | # html_additional_pages = {} 184 | 185 | # If false, no module index is generated. 186 | # html_domain_indices = True 187 | 188 | # If false, no index is generated. 189 | # html_use_index = True 190 | 191 | # If true, the index is split into individual pages for each letter. 192 | # html_split_index = False 193 | 194 | # If true, links to the reST sources are added to the pages. 195 | # html_show_sourcelink = True 196 | 197 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 198 | # html_show_sphinx = True 199 | 200 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 201 | # html_show_copyright = True 202 | 203 | # If true, an OpenSearch description file will be output, and all pages will 204 | # contain a tag referring to it. The value of this option must be the 205 | # base URL from which the finished HTML is served. 206 | # html_use_opensearch = '' 207 | 208 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 209 | # html_file_suffix = None 210 | 211 | # Output file base name for HTML help builder. 212 | htmlhelp_basename = "ReadtheDocsTemplatedoc" 213 | 214 | 215 | # -- Options for LaTeX output --------------------------------------------- 216 | 217 | latex_elements = { 218 | # The paper size ('letterpaper' or 'a4paper'). 219 | # 'papersize': 'letterpaper', 220 | # The font size ('10pt', '11pt' or '12pt'). 221 | # 'pointsize': '10pt', 222 | # Additional stuff for the LaTeX preamble. 223 | # 'preamble': '', 224 | } 225 | 226 | # Grouping the document tree into LaTeX files. List of tuples 227 | # (source start file, target name, title, 228 | # author, documentclass [howto, manual, or own class]). 229 | latex_documents = [ 230 | ( 231 | "index", 232 | "ReadtheDocsTemplate.tex", 233 | "Read the Docs Template Documentation", 234 | "Read the Docs", 235 | "manual", 236 | ), 237 | ] 238 | 239 | # The name of an image file (relative to this directory) to place at the top of 240 | # the title page. 241 | # latex_logo = None 242 | 243 | # For "manual" documents, if this is true, then toplevel headings are parts, 244 | # not chapters. 245 | # latex_use_parts = False 246 | 247 | # If true, show page references after internal links. 248 | # latex_show_pagerefs = False 249 | 250 | # If true, show URL addresses after external links. 251 | # latex_show_urls = False 252 | 253 | # Documents to append as an appendix to all manuals. 254 | # latex_appendices = [] 255 | 256 | # If false, no module index is generated. 257 | # latex_domain_indices = True 258 | 259 | 260 | # -- Options for manual page output --------------------------------------- 261 | 262 | # One entry per manual page. List of tuples 263 | # (source start file, name, description, authors, manual section). 264 | man_pages = [ 265 | ( 266 | "index", 267 | "readthedocstemplate", 268 | "Read the Docs Template Documentation", 269 | ["Read the Docs"], 270 | 1, 271 | ) 272 | ] 273 | 274 | # If true, show URL addresses after external links. 275 | # m an_show_urls = False 276 | 277 | 278 | # -- Options for Texinfo output ------------------------------------------- 279 | 280 | # Grouping the document tree into Texinfo files. List of tuples 281 | # (source start file, target name, title, author, 282 | # dir menu entry, description, category) 283 | texinfo_documents = [ 284 | ( 285 | "index", 286 | "ReadtheDocsTemplate", 287 | "Read the Docs Template Documentation", 288 | "Read the Docs", 289 | "ReadtheDocsTemplate", 290 | "Miscellaneous", 291 | ), 292 | ] 293 | 294 | 295 | # Documents to append as an appendix to all manuals. 296 | # texinfo_appendices = [] 297 | 298 | # If false, no module index is generated. 299 | # texinfo_domain_indices = True 300 | 301 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 302 | # texinfo_show_urls = 'footnote' 303 | 304 | # If true, do not generate a @detailmenu in the "Top" node's menu. 305 | # texinfo_no_detailmenu = False 306 | -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | Contribution Guide 3 | ===================== 4 | 5 | Contributions are highly welcomed and appreciated. Every little help counts, 6 | so do not hesitate! You can make a high impact on ``glidertools`` just by using it, being 7 | involved in `discussions `_ 8 | and reporting `issues `__. 9 | 10 | The following sections cover some general guidelines 11 | regarding development in ``glidertools`` for maintainers and contributors. 12 | 13 | Nothing here is set in stone and can't be changed. 14 | Feel free to suggest improvements or changes in the workflow. 15 | 16 | 17 | .. contents:: Contribution links 18 | :depth: 2 19 | 20 | 21 | 22 | .. _submitfeedback: 23 | 24 | Feature requests and feedback 25 | ----------------------------- 26 | 27 | We are eager to hear about your requests for new features and any suggestions about the 28 | API, infrastructure, and so on. Feel free to start a discussion about these on the 29 | `discussions tab `_ on github 30 | under the "ideas" section. 31 | 32 | After discussion with a few community members, and agreement that the feature should be added and who will work on it, 33 | a new issue should be opened. In the issue, please make sure to explain in detail how the feature should work and keep 34 | the scope as narrow as possible. This will make it easier to implement in small PRs. 35 | 36 | 37 | .. _reportbugs: 38 | 39 | Report bugs 40 | ----------- 41 | 42 | Report bugs for ``glidertools`` in the `issue tracker `_ 43 | with the label "bug". 44 | 45 | If you can write a demonstration test that currently fails but should pass 46 | that is a very useful commit to make as well, even if you cannot fix the bug itself. 47 | 48 | 49 | .. _fixbugs: 50 | 51 | Fix bugs 52 | -------- 53 | 54 | Look through the `GitHub issues for bugs `_. 55 | 56 | Talk to developers to find out how you can fix specific bugs. 57 | 58 | 59 | 60 | Preparing Pull Requests 61 | ----------------------- 62 | 63 | #. Fork the 64 | `glidertools GitHub repository `__. It's 65 | fine to use ``glidertools`` as your fork repository name because it will live 66 | under your username. 67 | 68 | #. Clone your fork locally using `git `_, connect your repository 69 | to the upstream (main project), and create a branch:: 70 | 71 | $ git clone git@github.com:YOUR_GITHUB_USERNAME/glidertools.git # clone to local machine 72 | $ cd glidertools 73 | $ git remote add upstream git@github.com:GliderToolsCommunity/GliderTools.git # connect to upstream remote 74 | 75 | # now, to fix a bug or add feature create your own branch off "master": 76 | 77 | $ git checkout -b your-bugfix-feature-branch-name master # Create a new branch where you will make changes 78 | 79 | If you need some help with Git, follow this quick start 80 | guide: https://git.wiki.kernel.org/index.php/QuickStart 81 | 82 | #. Set up a [conda](environment) with all necessary dependencies:: 83 | 84 | $ conda env create -f ci/environment-py3.8.yml 85 | 86 | #. Activate your environment:: 87 | 88 | $ conda activate test_env_glidertools 89 | *Make sure you are in this environment when working on changes in the future too.* 90 | 91 | #. Install the GliderTools package:: 92 | 93 | $ pip install -e . --no-deps 94 | 95 | #. Before you modify anything, ensure that the setup works by executing all tests:: 96 | 97 | $ pytest 98 | 99 | You want to see an output indicating no failures, like this:: 100 | 101 | $ ========================== n passed, j warnings in 17.07s =========================== 102 | 103 | 104 | #. Install `pre-commit `_ and its hook on the ``glidertools`` repo:: 105 | 106 | $ pip install --user pre-commit 107 | $ pre-commit install 108 | 109 | Afterwards ``pre-commit`` will run whenever you commit. If some errors are reported by pre-commit 110 | you should format the code by running:: 111 | 112 | $ pre-commit run --all-files 113 | 114 | and then try to commit again. 115 | 116 | https://pre-commit.com/ is a framework for managing and maintaining multi-language pre-commit 117 | hooks to ensure code-style and code formatting is consistent. 118 | 119 | You can now edit your local working copy and run/add tests as necessary. Please follow 120 | PEP-8 for naming. When committing, ``pre-commit`` will modify the files as needed, or 121 | will generally be quite clear about what you need to do to pass the commit test. 122 | 123 | 124 | 125 | 126 | 127 | #. Break your edits up into reasonably sized commits:: 128 | 129 | $ git commit -a -m "" 130 | $ git push -u 131 | 132 | Committing will run the pre-commit hooks (isort, black and flake8). 133 | Pushing will run the pre-push hooks (pytest and coverage) 134 | 135 | We highly recommend using test driven development, but our coverage requirement is 136 | low at the moment due to lack of tests. If you are able to write tests, please 137 | stick to `xarray `_'s 138 | testing recommendations. 139 | 140 | 141 | #. Add yourself to the 142 | `Project Contributors `_ 143 | list via ``./docs/authors.md``. 144 | 145 | #. Finally, submit a pull request (PR) through the GitHub website using this data:: 146 | 147 | head-fork: YOUR_GITHUB_USERNAME/glidertools 148 | compare: your-branch-name 149 | 150 | base-fork: GliderToolsCommunity/GliderTools 151 | base: master 152 | 153 | The merged pull request will undergo the same testing that your local branch 154 | had to pass when pushing. 155 | 156 | #. After your pull request is merged into the `GliderTools/master`, you will need 157 | to fetch those changes and rebase your master so that your master reflects the latest 158 | version of GliderTools. The changes should be fetched and incorporated (rebase) also right 159 | before you are planning to introduce changes.:: 160 | 161 | $ git checkout master # switch back to master branch 162 | $ git fetch upstream # Download all changes from central upstream repo 163 | $ git rebase upstream/master # Apply the changes that have been made to central repo, 164 | $ # since your last fetch, onto you master. 165 | $ git branch -d your-bugfix-feature-branch-name # to delete the branch after PR is approved 166 | 167 | Release Instructions 168 | -------------------- 169 | 170 | This is a documentation repo for people in the group on how to do the integrated deployment. 171 | 172 | NB RULE! Never commit to master. 173 | 174 | 1. Change the version in the setup.py file. Must be format YYYY. 175 | 2. Create a release with a tag that has the same format as the version above. 176 | 3. The distribution will be built automatically and pushed to PyPi 177 | 4. The DOI will also be updated on Zenodo. (untested, see #165) 178 | -------------------------------------------------------------------------------- /docs/img/code_bgu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/code_bgu.png -------------------------------------------------------------------------------- /docs/img/interactive_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/interactive_plot.png -------------------------------------------------------------------------------- /docs/img/logo_with_name.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/logo_with_name.png -------------------------------------------------------------------------------- /docs/img/logo_with_name_vert.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/logo_with_name_vert.png -------------------------------------------------------------------------------- /docs/img/logo_wo_name.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/logo_wo_name.png -------------------------------------------------------------------------------- /docs/img/output_101_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_101_2.png -------------------------------------------------------------------------------- /docs/img/output_106_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_106_1.png -------------------------------------------------------------------------------- /docs/img/output_108_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_108_0.png -------------------------------------------------------------------------------- /docs/img/output_111_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_111_0.png -------------------------------------------------------------------------------- /docs/img/output_116_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_116_0.png -------------------------------------------------------------------------------- /docs/img/output_14_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_14_0.png -------------------------------------------------------------------------------- /docs/img/output_16_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_16_0.png -------------------------------------------------------------------------------- /docs/img/output_16_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_16_1.png -------------------------------------------------------------------------------- /docs/img/output_19_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_19_0.png -------------------------------------------------------------------------------- /docs/img/output_19_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_19_1.png -------------------------------------------------------------------------------- /docs/img/output_22_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_22_0.png -------------------------------------------------------------------------------- /docs/img/output_24_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_24_0.png -------------------------------------------------------------------------------- /docs/img/output_26_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_26_0.png -------------------------------------------------------------------------------- /docs/img/output_28_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_28_1.png -------------------------------------------------------------------------------- /docs/img/output_29_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_29_1.png -------------------------------------------------------------------------------- /docs/img/output_33_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_33_0.png -------------------------------------------------------------------------------- /docs/img/output_36_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_36_0.png -------------------------------------------------------------------------------- /docs/img/output_38_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_38_1.png -------------------------------------------------------------------------------- /docs/img/output_41_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_41_0.png -------------------------------------------------------------------------------- /docs/img/output_43_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_43_0.png -------------------------------------------------------------------------------- /docs/img/output_45_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_45_0.png -------------------------------------------------------------------------------- /docs/img/output_47_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_47_0.png -------------------------------------------------------------------------------- /docs/img/output_49_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_49_0.png -------------------------------------------------------------------------------- /docs/img/output_51_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_51_0.png -------------------------------------------------------------------------------- /docs/img/output_55_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_55_1.png -------------------------------------------------------------------------------- /docs/img/output_56_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_56_1.png -------------------------------------------------------------------------------- /docs/img/output_59_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_59_0.png -------------------------------------------------------------------------------- /docs/img/output_61_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_61_0.png -------------------------------------------------------------------------------- /docs/img/output_64_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_64_0.png -------------------------------------------------------------------------------- /docs/img/output_65_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_65_0.png -------------------------------------------------------------------------------- /docs/img/output_67_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_67_1.png -------------------------------------------------------------------------------- /docs/img/output_71_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_71_0.png -------------------------------------------------------------------------------- /docs/img/output_74_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_74_0.png -------------------------------------------------------------------------------- /docs/img/output_76_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_76_0.png -------------------------------------------------------------------------------- /docs/img/output_78_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_78_0.png -------------------------------------------------------------------------------- /docs/img/output_80_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_80_0.png -------------------------------------------------------------------------------- /docs/img/output_82_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_82_0.png -------------------------------------------------------------------------------- /docs/img/output_84_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_84_0.png -------------------------------------------------------------------------------- /docs/img/output_86_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_86_1.png -------------------------------------------------------------------------------- /docs/img/output_86_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_86_2.png -------------------------------------------------------------------------------- /docs/img/output_92_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_92_1.png -------------------------------------------------------------------------------- /docs/img/output_94_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_94_1.png -------------------------------------------------------------------------------- /docs/img/output_97_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_97_0.png -------------------------------------------------------------------------------- /docs/img/output_99_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/output_99_2.png -------------------------------------------------------------------------------- /docs/img/package_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/package_overview.png -------------------------------------------------------------------------------- /docs/img/package_structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/docs/img/package_structure.png -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | ===================================== 2 | Glider Tools: profile data processing 3 | ===================================== 4 | 5 | Glider tools is a Python 3.8+ package designed to process data from the first level of processing to a science ready dataset. 6 | The package is designed to easily import data to a standard column format: 7 | ``numpy.ndarray``, ``pandas.DataFrame`` or ``xarray.DataArray`` (we recommend 8 | the latter which has full support for metadata). 9 | Cleaning and smoothing functions are flexible and can be applied as required by the user. 10 | We provide examples and demonstrate best practices as developed by the `SOCCO Group `_. 11 | 12 | For the original publication of this package see: https://doi.org/10.3389/fmars.2019.00738. 13 | 14 | For recommendations or bug reports, please visit https://github.com/GliderToolsCommunity/GliderTools/issues/new 15 | 16 | .. toctree:: 17 | :maxdepth: 2 18 | :caption: Getting started 19 | 20 | installation 21 | cheatsheet 22 | 23 | .. toctree:: 24 | :maxdepth: 2 25 | :caption: Users Guide 26 | 27 | loading 28 | quality_control 29 | physics 30 | optics 31 | calibration 32 | mapping 33 | saving 34 | other 35 | 36 | .. toctree:: 37 | :maxdepth: 2 38 | :caption: Help and Reference 39 | 40 | GitHub Repo 41 | api 42 | package_structure 43 | whats-new 44 | authors 45 | contributing 46 | wishlist 47 | -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | Notes on how to install 5 | 6 | ##### Conda 7 | The easiest way to install the packge is with [conda](https://docs.conda.io/en/latest/): `conda install -c conda-forge glidertools`. 8 | 9 | ##### PyPI 10 | You can also install with pip: `pip install glidertools`. 11 | 12 | ##### GitHub 13 | For the most up to date version of GliderTools, you can install directly from the online repository hosted on GitLab. 14 | 15 | 1. Clone glidertools to your local machine: `git clone https://github.com/GliderToolsCommunity/GliderTools` 16 | 2. Change to the parent directory of GliderTools 17 | 3. Install glidertools with `pip install -e ./GliderTools`. This will allow changes you make locally, to be reflected when you import the package in Python 18 | -------------------------------------------------------------------------------- /docs/loading.md: -------------------------------------------------------------------------------- 1 | Loading data 2 | ============ 3 | 4 | To start using Glider Tools you first need to import the package to the 5 | interactive workspace. 6 | 7 | 8 | ## Import `GliderTools` 9 | 10 | 11 | ```python 12 | # pylab for more MATLAB like environment and inline displays plots below cells 13 | %pylab inline 14 | 15 | # if gsw Warning shows, manually install gsw if possible - will still work without 16 | import glidertools as gt 17 | from cmocean import cm as cmo # we use this for colormaps 18 | ``` 19 | 20 | Populating the interactive namespace from numpy and matplotlib 21 | 22 | 23 | ## Working with Seaglider base station files 24 | 25 | GliderTools supports loading Seaglider files, including `scicon` data (different sampling frequencies). 26 | There is a function that makes it easier to find variable names that you'd like to load: `gt.load.seaglider_show_variables` 27 | 28 | This function is demonstrated in the cell below. 29 | The function accepts a **list of file names** and can also receive a string with a wildcard placeholder (`*`) and basic regular expressions are also supported. In the example below we use a simple asterisk placeholder for all the files. 30 | 31 | Note that the function chooses only one file from the passed list or glob string - this file name will be shown. The returned table shows the variable name, dimensions, units and brief comment if it is available. 32 | 33 | 34 | ```python 35 | filenames = '/Users/luke/Work/Data/sg542/p5420*.nc' 36 | 37 | gt.load.seaglider_show_variables(filenames) 38 | ``` 39 | 40 | information is based on file: /Users/luke/Work/Data/sg542/p5420177.nc 41 | 42 | 43 | 44 | ## Working with VOTO Seaexplorer files or xarray-datasets 45 | 46 | Glidertools supports loading Seaexplorer files. This is implemented and tested with `VOTO https://observations.voiceoftheocean.org`_ datasets in mind currently, but we are happy about feedback/pullrequests how it works for other SeaExplorer datasets. VOTO data can either be downloaded from the website using a browser or, more comfortable, from an `ERDAP server https://erddap.observations.voiceoftheocean.org/erddap/index.html`_ . See the `demo notebook `_ to get started with downloads over the API. 47 | 48 | After download of a .nc file or xarray-Dataset, it can be read into Glidertools by calling `gt.load.voto_seaexplorer_nc` or `gt.load.voto_seaexplorer_dataset` respectively. Resulting datasets can be merged by calling `gt.load.voto_concat_datasets`. The import of the data into GliderTools is hereby finished, remaining steps on this wiki-page are optional. 49 | 50 | ## Load variables 51 | 52 | From the variable listing, one can choose multiple variables to load. Note that one only needs the variable name to load the data. Below, we've created a list of variables that we'll be using for this demo. 53 | 54 | The `gt.load.seaglider_basestation_netCDFs` function is used to load a list of variables. It requires the filename string or list (as described above) and keys. It may be that these variables are not sampled at the same frequency. In this case, the loading function will load the sampling frequency dimensions separately. The function will try to find a time variable for each sampling frequency/dimension. 55 | 56 | ### Coordinates and automatic *time* fetching 57 | All associated coordinate variables will also be loaded with the data if coordinates are documented. These may included *latitude, longitude, depth* and *time* (naming may vary). If time cannot be found for a dimension, a *time* variable from a different dimension with the same number of observations is used instead. This insures that data can be merged based on the time of sampling. 58 | 59 | ### Merging data based on time 60 | If the `return_merged` is set to *True*, the function will merge the dimensions if the dimension has an associated *time* variable. 61 | 62 | The function returns a dictionary of `xarray.Datasets` - a Python package that deals with coordinate indexed multi-dimensional arrays. We recommend that you read the documentation (http://xarray.pydata.org/en/stable/) as this package is used throughout *GliderTools*. This allows the original metadata to be copied with the data. The dictionary keys are the names of the dimensions. If `return_merged` is set to *True* an additional entry under the key `merged` will be included. 63 | 64 | The structure of a dimension output is shown below. Note that the merged data will use the largest dimension as the primary dataset and the other data will be merged onto that time index. Data is linearly interpolated to the nearest time measurement of the primary index, but only by one measurement to ensure transparancy. 65 | 66 | 67 | ```python 68 | names = [ 69 | 'ctd_depth', 70 | 'ctd_time', 71 | 'ctd_pressure', 72 | 'salinity', 73 | 'temperature', 74 | 'eng_wlbb2flvmt_Chlsig', 75 | 'eng_wlbb2flvmt_wl470sig', 76 | 'eng_wlbb2flvmt_wl700sig', 77 | 'aanderaa4330_dissolved_oxygen', 78 | 'eng_qsp_PARuV', 79 | ] 80 | 81 | ds_dict = gt.load.seaglider_basestation_netCDFs( 82 | filenames, names, 83 | return_merged=True, 84 | keep_global_attrs=False 85 | ) 86 | ``` 87 | 88 | DIMENSION: sg_data_point 89 | { 90 | ctd_pressure, eng_wlbb2flvmt_wl470sig, eng_wlbb2flvmt_wl700sig, temperature, 91 | ctd_time, ctd_depth, latitude, aanderaa4330_dissolved_oxygen, salinity, 92 | eng_wlbb2flvmt_Chlsig, longitude 93 | } 94 | 95 | 96 | 100%|██████████| 336/336 [00:04<00:00, 73.66it/s] 97 | 98 | 99 | 100 | DIMENSION: qsp2150_data_point 101 | {eng_qsp_PARuV, time} 102 | 103 | 104 | 100%|██████████| 336/336 [00:01<00:00, 181.67it/s] 105 | 106 | 107 | 108 | Merging dimensions on time indicies: sg_data_point, qsp2150_data_point, 109 | 110 | 111 | The returned data contains the dimensions of the requested variables a `merged` object is also returned if return_merged=True 112 | ```python 113 | 114 | print(ds_dict.keys()) 115 | ``` 116 | 117 | dict_keys(['sg_data_point', 'qsp2150_data_point', 'merged']) 118 | 119 | 120 | ### Metadata handling 121 | If the keyword arguement `keep_global_attrs=True`, the attributes from the original files (for all that are the same) are passed on to the output *Datasets* from the original netCDF attributes. The variable attributes (units, comments, axis...) are passed on by default, but can also be set to False if not wanted. GliderTools functions will automatically pass on these attributes to function outputs if a `xarray.DataArray` with attributes is given. 122 | All functions applied to data will also be recorded under the variable attribute `processing`. 123 | 124 | 125 | The merged dataset contains all the data interpolated to the nearest observation of the longest dimension the metadata is also shown for the example below 126 | ```python 127 | ds_dict['merged'] 128 | ``` 129 | 130 | 131 | 132 | 133 | xarray.Dataset> 134 | Dimensions: (merged: 382151) 135 | Coordinates: 136 | ctd_depth (merged) float64 -0.08821 0.018 ... -0.1422 137 | latitude (merged) float64 -42.7 -42.7 ... -43.0 -43.0 138 | longitude (merged) float64 8.744 8.744 ... 8.5 8.5 139 | ctd_time_dt64 (merged) datetime64[ns] 2015-12-08T07:36:16 ... 140 | 141 | Dimensions without coordinates: merged 142 | Data variables: 143 | ctd_pressure (merged) float64 -0.08815 0.01889 ... -0.1432 144 | eng_wlbb2flvmt_wl470sig (merged) float64 375.0 367.0 ... 98.0 91.0 145 | eng_wlbb2flvmt_wl700sig (merged) float64 2.647e+03 ... 137.0 146 | temperature (merged) float64 11.55 11.54 ... 11.06 10.97 147 | ctd_time (merged) float64 1.45e+09 ... 1.455e+09 148 | aanderaa4330_dissolved_oxygen (merged) float64 nan nan nan ... 269.1 269.1 149 | salinity (merged) float64 nan nan nan ... 34.11 34.11 150 | eng_wlbb2flvmt_Chlsig (merged) float64 145.0 126.0 ... 215.0 215.0 151 | dives (merged) float64 1.0 1.0 1.0 ... 344.5 344.5 152 | eng_qsp_PARuV (merged) float64 0.551 0.203 ... 0.021 0.023 153 | time (merged) float64 1.45e+09 ... 1.455e+09 154 | time_dt64 (merged) datetime64[ns] 2015-12-08T07:36:16 ... 155 | 156 | Attributes: 157 | date_created: 2019-07-11 14:08:40 158 | number_of_dives: 344.0 159 | files: ['p5420001.nc', 'p5420002.nc', 'p5420004.nc', '... 160 | time_coverage_start: 2015-12-08 07:36:16 161 | time_coverage_end: 2016-02-08 04:39:04 162 | geospatial_vertical_min: -0.6323553853732649 163 | geospatial_vertical_max: 1011.1000623417478 164 | geospatial_lat_min: -43.085757609206 165 | geospatial_lat_max: -42.70088638031523 166 | geospatial_lon_min: 8.29983279020758 167 | geospatial_lon_max: 8.7753734452125 168 | processing: [2019-07-11 14:08:40] imported data with Glider... 169 | 170 | 171 | 172 | ### Renaming for ease of access 173 | When renaming, just be sure that there are no variables with names that you are trying to replace. In the example below we remove `time` in case it exists in the files. 174 | ```python 175 | # Here we drop the time variables imported for the PAR variable 176 | # we don't need these anymore. You might have to change this 177 | # depening on the dataset 178 | merged = ds_dict['merged'] 179 | if 'time' in merged: 180 | merged = merged.drop(["time", "time_dt64"]) 181 | 182 | 183 | # To make it easier and clearer to work with, we rename the 184 | # original variables to something that makes more sense. This 185 | # is done with the xarray.Dataset.rename({}) function. 186 | # We only use the merged dataset as this contains all the 187 | # imported dimensions. 188 | # NOTE: The renaming has to be specific to the dataset otherwise an error will occur 189 | dat = merged.rename({ 190 | 'salinity': 'salt_raw', 191 | 'temperature': 'temp_raw', 192 | 'ctd_pressure': 'pressure', 193 | 'ctd_depth': 'depth', 194 | 'ctd_time_dt64': 'time', 195 | 'ctd_time': 'time_raw', 196 | 'eng_wlbb2flvmt_wl700sig': 'bb700_raw', 197 | 'eng_wlbb2flvmt_wl470sig': 'bb470_raw', 198 | 'eng_wlbb2flvmt_Chlsig': 'flr_raw', 199 | 'eng_qsp_PARuV': 'par_raw', 200 | 'aanderaa4330_dissolved_oxygen': 'oxy_raw', 201 | }) 202 | 203 | print(dat) 204 | 205 | # variable assignment for conveniant access 206 | depth = dat.depth 207 | dives = dat.dives 208 | lats = dat.latitude 209 | lons = dat.longitude 210 | time = dat.time 211 | pres = dat.pressure 212 | temp = dat.temp_raw 213 | salt = dat.salt_raw 214 | par = dat.par_raw 215 | bb700 = dat.bb700_raw 216 | bb470 = dat.bb470_raw 217 | fluor = dat.flr_raw 218 | 219 | # name coordinates for quicker plotting 220 | x = dat.dives 221 | y = dat.depth 222 | ``` 223 | -------------------------------------------------------------------------------- /docs/mapping.md: -------------------------------------------------------------------------------- 1 | # Gridding and interpolation 2 | 3 | ## Vertical gridding 4 | It is often more convenient and computationally efficient to work with data that has been gridded to a standard vertical grid (i.e. depths have been binned). 5 | GliderTools offers very easy to use and efficient tools to grid data once all the processing has been completed. 6 | 7 | The first task is to select the bin size of the data that will be gridded. 8 | GliderTools automatically selects bin sizes according to the sampling frequency of the dataset for every 50m. 9 | This is shown in the figure below, where the 2D histogram shows the sampling frequency (by depth) and the line shows the automatically selected bin size rounded up to the nearest 0.5m. 10 | 11 | 12 | ```python 13 | ax = gt.plot.bin_size(dat.depth, cmap=mpl.cm.Blues) 14 | ax.set_xlim(0, 6) 15 | line = ax.get_children()[1] 16 | line.set_linewidth(6) 17 | line.set_color('orange') 18 | 19 | legend = ax.get_children()[-2] 20 | legend.set_visible(False) 21 | ``` 22 | 23 | 24 | ![png](img/output_97_0.png) 25 | 26 | 27 | ### Gridding with automatic bin sizes 28 | 29 | Gridding the data then becomes easy with automatic binning. But note that the x-coordinate has the be semi-discrete, e.g. dives number or dive time stamp average. You'll see that the gridding function also returns the mean bin size and then the average sampling frequency. 30 | 31 | The function can return either an xr.DataArray or a pd.DataFrame. The DataArray is the default as metadata can be stored in these files (including coordinate information). 32 | 33 | Gridded data can be passed to the plot function without x- and y-coordinates, as these are contained in the gridded data. 34 | 35 | In fact, data is silently passed through the gridding function when x- and y-coordinates are included in the `gt.plot` function 36 | 37 | 38 | ```python 39 | flr_gridded = gt.grid_data(dives, depth, flr) 40 | 41 | ax = gt.plot(flr_gridded, cmap=cmo.delta) 42 | ax.set_ylim(200, 0) 43 | ``` 44 | 45 | Mean bin size = 1.99 46 | Mean depth binned (50 m) vertical sampling frequency = 2.53 47 | 48 | 49 | ![png](img/output_99_2.png) 50 | 51 | 52 | ### Gridding with manually defined bins 53 | 54 | There is also the option to manuualy define your bins if you'd prefer. 55 | A custom bin array needs to be created. 56 | Use `np.arange` to create sections of the bins and combine them with `np.r_` as shown below: 57 | 58 | 59 | ```python 60 | custom_bin = np.r_[ 61 | np.arange(0, 100, 0.5), 62 | np.arange(100, 400, 1.0), 63 | np.arange(400, 1000, 2.0)] 64 | 65 | flr_gridded = gt.grid_data(x, y, flr, bins=custom_bin) 66 | 67 | # The plot below is the standard plotting procedure for an xarray.DataArray 68 | gt.plot(flr_gridded, cmap=cmo.delta) 69 | ylim(200, 0) 70 | ``` 71 | 72 | Mean bin size = 1.25 73 | Mean depth binned (50 m) vertical sampling frequency = 2.53 74 | 75 | 76 | 77 | 78 | 79 | (200, 0) 80 | 81 | 82 | 83 | 84 | ![png](img/output_101_2.png) 85 | 86 | 87 | ## 2D interpolation with objective mapping (Kriging) 88 | 89 | Users may want to interpolate data horizontally when working with finescale gradients. 90 | Several studies have used the `objmap` MATLAB function that uses objective mapping (a.k.a. Kriging). 91 | Kriging is an advanced form of inverse distance weighted interpolation, where points influence the interpolation based on the distance from an interpolation point, where the influence falls off with a Gauassian function. 92 | This is an expensive function when the dataset is large (due to a matrix inverse operation). 93 | The computational cost is reduced by breaking the problem into smaller pieces using a quadtree that iteratively breaks data into smaller problems. 94 | 95 | GliderTools provides a Python implementation of the MATLAB function. We have added parallel capability to speed the processing up, but this operation is still costly and could take several hours if an entire section is interpolated. We thus recommend that smaller sections are interpolated. 96 | 97 | 98 | ```python 99 | # first we select a subset of data (50k points) 100 | subs = dat.isel(merged=slice(0, 50000)) 101 | 102 | # we then get time values - this makes creating the interpolation grid easier 103 | var = subs.flr_qc 104 | time = subs.time.values 105 | depth = subs.depth 106 | dives = subs.dives 107 | dist = np.r_[0, gt.utils.distance(subs.longitude, subs.latitude).cumsum()] 108 | ``` 109 | 110 | ### Part 1: Semivariance 111 | Interpolating any variable requires some knowlege about the spatial autocorrelation of that variable. A semivariogram allows one to get this information from the data. The basic idea of a semivariogram is to assess the similarity between data at different lengthscales (lags), where a low semivariance shows coherence and a large semivariance shows a mismatch. This information is required to interpolate data with sensible estimates and error estimates. 112 | 113 | GliderTools offers a derivation of a variogram tool (`gt.mapping.variogram`) that makes the process of finding these parameters a little easier, though there is a fair deal of subjectivity, depending on the scale of the question at hand, and tinkering are required to make a sensible interpolation. 114 | 115 | #### 1.1. Choosing a subset of the data for semivariance estimation 116 | The variogram function selects a number of dives (number depends on max_points) and performs the analysis on the subset of dives rathern than selecting random points. We thus recommend that a subset of the data is used to perform the analysis. In the example below, we take a subset of the data that as particularly high variability that we are interested in preserving. This subset is < 250m depth and limited to the first 20 dives. This should be tailored to the variable that you're interested in. 117 | 118 | 119 | ```python 120 | m = (depth<150) & (dives > 30) & (dives < 46) 121 | ax = gt.plot(dives, depth, var) 122 | ax.plot(dives[m], depth[m], '-m', ms=3, alpha=0.7) 123 | ``` 124 | 125 | 126 | 127 | 128 | [] 129 | 130 | 131 | 132 | 133 | ![png](img/output_106_1.png) 134 | 135 | 136 | #### 1.2. Initial estimate of semivariance 137 | We can now find an initial estimate of the semivariance. This initial estimate will not scale the x/y coordinates for anisotropy (different scales of variability). The variogram function also accepts a boolean mask as an keyword argument. This will reduce the input data to the subset of data that you've chosen. 138 | 139 | The example below shows this initial estimate. We're looking for an estimate where the Gaussian model fits the semi-variance as well as possible, given that the variance paramters are acceptable. These variance parameters are: *sill, nugget, x and y length-scales*. The function automatically adjusts the range to be one and scales the x and y parameters accordingly. 140 | 141 | The variogram function can take time (datetime64), but we use distance (in metres) to demonstrate the the anisotropic scaling. 142 | 143 | 144 | ```python 145 | vargram = gt.mapping.variogram(var, dist, depth, dives, mask=m) 146 | ``` 147 | 148 | 149 | ![png](img/output_108_0.png) 150 | 151 | 152 | The example above shows that x and y are scaled, but the Gaussian model does not fit the semivariance very well. The range is 1, because it is scaled accordingly. The sill and nugget are very similar - this is not a good result. 153 | 154 | #### 1.3. Finding the correct x and y length scales (anisotropy) 155 | 156 | We can now scale the data with the xy_ratio. The ratio represents the scaling of x/y. For example, if x and y are both in metres (as in this case), we need to set a small xy_ratio as x has a much longer lengthscale. With some trial and error we choose a ratio of 0.0005, which fits the semivariogram relatively well and has a reasonably low *y* scaling estimate. 157 | 158 | You'll see that the Gaussian model does not fit the semivariance exactly - this is OK. The important thing is that the first plateau matches the sill. 159 | 160 | We can now use these values for interpolating. 161 | 162 | 163 | 164 | ```python 165 | vargram = gt.mapping.variogram(var, dist, depth, dives, mask=m, xy_ratio=0.0005) 166 | ``` 167 | 168 | 169 | ![png](img/output_111_0.png) 170 | 171 | 172 | ### 2. Interpolation 173 | #### 2.1 Preparing the interpolation grid 174 | To perform the interpolation we first need to create the grid onto which data will be interpolated. 175 | In the example below we use distance from the origin as the x-coordinate. 176 | Time can also be used and has to be in a `np.datetime64` format - we show a commented example of this. 177 | The y-coordinate is depth. 178 | 179 | 180 | ```python 181 | # creating the x- and y-interpolation coordinates 182 | # and a 1m vertical grid and a horizontal grid with 500 points 183 | xi = np.linspace(dist.min(), dist.max(), 500) 184 | yi = np.arange(0, depth[var.notnull()].max(), 1, dtype=float) 185 | 186 | # time can also be used. This is a commented example of how to create 187 | # a time grid for interpolation. 188 | # xi = np.arange(time.min(), time.max(), 30, dtype='datetime64[m]') 189 | ``` 190 | 191 | #### 2.2 Interpolation with the semivariance parameters 192 | The interpolation has a number of parameters that can be changed or adapted to the dataset at hand. 193 | The commented inputs below describe these inputs. 194 | 195 | 196 | ```python 197 | %autoreload 2 198 | 199 | interpolated = gt.mapping.interp_obj( 200 | dist, depth, var, xi, yi, 201 | 202 | # Kriging interoplation arguments 203 | partial_sill=1.1e4, # taken from the semivariogram (sill - nugget) 204 | nugget=3e3, # taken from the semivariogram 205 | lenscale_x=98942, # in hours if x and xi are in datetime64 206 | lenscale_y=50, # the vertical gridding influence 207 | detrend=True, # if True use linear regression (z - z_hat), if False use average (z - z_mean) 208 | 209 | # Quadtree arguments 210 | max_points_per_quad=65, # an optimsation setting ~100 is good 211 | min_points_per_quad=8, # if neighbours have < N points, look at their neighbours 212 | 213 | # Parallel calculation inputs. 214 | n_cpus=3, # the number of CPU's to use for the calculation - default is n-1 215 | parallel_chunk_size=512, # when the dataset is very large, memory can become an issue 216 | # this prevents large buildup of parallel results 217 | ) 218 | ``` 219 | 220 | Starting Interpolation with quadtree optimal interpolation 221 | ---------------------------------------------------------- 222 | 223 | Preparing for interpolations: 224 | Finding and removing nans 225 | Removing data trend with linear regression 226 | Building QuadTree 227 | 228 | Interpolation information: 229 | basis points: 25226 230 | interp grid: 500, 404 231 | max_points_per_quad: 65 232 | min_points_per_quad: 8 233 | number of quads: 952 234 | detrend_method: linear_regression 235 | partial_sill: 11000.0 236 | nugget: 3000.0 237 | lengthscales: X = 98942 238 | Y = 50 m 239 | 240 | Processing interpolation chunks in 2 parts over 3 CPUs: 241 | chunk 1/2 completed in 12s 242 | chunk 2/2 completed in 10s 243 | 244 | Finishing off interoplation 245 | Adding back the trend 246 | Creating xarray dataset for output 247 | 248 | 249 | 250 | ```python 251 | fig, ax = plt.subplots(3, 1, figsize=[9, 9], sharex=True, dpi=90) 252 | 253 | error_mask = (interpolated.variance / interpolated.nugget) < 1.05 254 | interp_robust = interpolated.z.where(error_mask) 255 | 256 | props = dict(vmin=0, vmax=300, cmap=cmo.delta) 257 | gt.plot.scatter(dist, depth, var, ax=ax[0], **props) 258 | gt.plot.pcolormesh(interp_robust, ax=ax[1], **props) 259 | gt.plot.pcolormesh(interpolated.variance, ax=ax[2], vmin=interpolated.nugget, vmax=interpolated.nugget*1.08) 260 | 261 | ax[2].plot(dist, depth, 'w-', zorder=40, alpha=0.8, lw=0.4) 262 | 263 | [a.set_ylim(400, 0) for a in ax] 264 | [a.set_xlabel(' ') for a in ax] 265 | 266 | ax[0].get_children()[0].set_sizes([20]) 267 | ax[0].set_title('Uninterpolated data') 268 | ax[1].set_title('Interpolated data') 269 | ax[2].set_title('Interpolation variance with dives shown in white') 270 | ax[2].set_xlabel('Distance (m)') 271 | 272 | tks = xticks(rotation=0) 273 | ``` 274 | 275 | 276 | ![png](img/output_116_0.png) 277 | -------------------------------------------------------------------------------- /docs/optics.md: -------------------------------------------------------------------------------- 1 | 2 | # Optics (BB, PAR, Chl) 3 | The optics module contains functions that process backscatter, PAR and fluorescence. 4 | 5 | There is a wrapper function for each of these variables that applies several functions related to cleaning and processing. We show each step of the wrapper function seperately and then summarise with the wrapper function. 6 | 7 | ## Backscatter 8 | 9 | 10 | ```python 11 | theta = 124 12 | xfactor = 1.076 13 | 14 | gt.plot(x, y, bb700, cmap=cmo.delta, vmin=60, vmax=200) 15 | xlim(200,340) 16 | title('Original Data') 17 | show() 18 | ``` 19 | 20 | 21 | ![png](img/output_41_0.png) 22 | 23 | 24 | ### Outlier bounds method 25 | See the cleaning section for more information on `gt.cleaning.outlider_bounds_[]` 26 | 27 | ```python 28 | bb700_iqr = gt.cleaning.outlier_bounds_iqr(bb700, multiplier=3) 29 | bb700_std = gt.cleaning.outlier_bounds_std(bb700, multiplier=3) 30 | 31 | fig, ax = plt.subplots(2, 1, figsize=[9, 6], sharex=True, dpi=90) 32 | 33 | gt.plot(x, y, bb700_iqr, cmap=cmo.delta, ax=ax[0], vmin=60, vmax=200) 34 | gt.plot(x, y, bb700_std, cmap=cmo.delta, ax=ax[1], vmin=60, vmax=200) 35 | 36 | [a.set_xlabel('') for a in ax] 37 | [a.set_xlim(200, 340) for a in ax] 38 | 39 | ax[0].set_title('Outlier IQR') 40 | ax[1].set_title('Outlier STD') 41 | 42 | plt.show() 43 | ``` 44 | 45 | 46 | ![png](img/output_43_0.png) 47 | 48 | 49 | ### Removing bad profiles 50 | This function masks bad dives based on mean + std x [1] or median + std x [1] at a reference depth. 51 | 52 | 53 | ```python 54 | # find_bad_profiles returns boolean mask and dive numbers 55 | # we index only the mask 56 | bad_profiles = gt.optics.find_bad_profiles(dives, depth, bb700, 57 | ref_depth=300, 58 | stdev_multiplier=1, 59 | method='median')[0] 60 | 61 | fig, ax = plt.subplots(2, 1, figsize=[9, 6], sharex=True, dpi=90) 62 | # ~ reverses True to False and vice versa - i.e. we mask bad bad profiles 63 | gt.plot(x, y, bb700, cmap=cmo.delta, ax=ax[0], vmin=60, vmax=200) 64 | gt.plot(x, y, bb700.where(~bad_profiles), cmap=cmo.delta, ax=ax[1], vmin=60, vmax=200) 65 | 66 | [a.set_xlabel('') for a in ax] 67 | [a.set_xlim(40, 120) for a in ax] 68 | 69 | ax[0].set_title('All backscatter data') 70 | ax[1].set_title('Bad profiles masked') 71 | 72 | plt.show() 73 | ``` 74 | 75 | 76 | ![png](img/output_45_0.png) 77 | 78 | 79 | ### Conversion from counts to total backscatter 80 | 81 | The scale and offset function uses the factory calibration dark count and scale factor. 82 | 83 | The bback total function uses the coefficients from Zhang et al. (2009) to convert the raw counts into total backscatter (m-1), correcting for temperature and salinity. The $\chi$ factor and $\theta$ in this example were taken from Sullivan et al. (2013) and Slade & Boss (2015). 84 | 85 | 86 | ```python 87 | beta = gt.flo_functions.flo_scale_and_offset(bb700.where(~bad_profiles), 49, 3.217e-5) 88 | bbp = gt.flo_functions.flo_bback_total(beta, temp_qc, salt_qc, theta, 700, xfactor) 89 | 90 | fig, ax = plt.subplots(2, 1, figsize=[9, 6], sharex=True, dpi=90) 91 | 92 | gt.plot(x, y, beta, cmap=cmo.delta, ax=ax[0], robust=True) 93 | gt.plot(x, y, bbp, cmap=cmo.delta, ax=ax[1], robust=True) 94 | 95 | [a.set_xlabel('') for a in ax] 96 | [a.set_xlim(200, 340) for a in ax] 97 | [a.set_ylim(400, 0) for a in ax] 98 | 99 | ax[0].set_title('$\u03B2$') 100 | ax[1].set_title('b$_{bp}$ (m$^{-1}$)') 101 | 102 | plt.show() 103 | ``` 104 | 105 | 106 | ![png](img/output_47_0.png) 107 | 108 | 109 | ### Correcting for an in situ dark count 110 | Sensor drift from factory calibration requires an additional correction, the calculation of a dark count in situ. This is calculated from the 95th percentile of backscatter measurements between 200 and 400m. 111 | 112 | 113 | ```python 114 | bbp = gt.optics.backscatter_dark_count(bbp, depth) 115 | 116 | gt.plot(x, y, bbp, cmap=cmo.delta, robust=True) 117 | xlim(200,340) 118 | title('b$_{bp}$ (m$^{-1}$)') 119 | show() 120 | ``` 121 | 122 | 123 | ![png](img/output_49_0.png) 124 | 125 | 126 | ### Despiking 127 | Following the methods outlined in Briggs et al. (2011) to both identify spikes in backscatter and remove them from the baseline backscatter signal. The spikes are retained as the data can be used to address specific science questions, but their presence can decrease the accuracy of the fluorescence quenching function. 128 | 129 | 130 | ```python 131 | bbp_horz = gt.cleaning.horizontal_diff_outliers(x, y, bbp, depth_threshold=10, mask_frac=0.05) 132 | bbp_baseline, bbp_spikes = gt.cleaning.despike(bbp_horz, 7, spike_method='minmax') 133 | 134 | 135 | fig, ax = plt.subplots(2, 1, figsize=[9, 6], sharex=True, dpi=90) 136 | 137 | gt.plot(x, y, bbp_baseline, cmap=cmo.delta, ax=ax[0], robust=True) 138 | gt.plot(x, y, bbp_spikes, ax=ax[1], cmap=cm.Spectral_r, vmin=0, vmax=0.004) 139 | 140 | [a.set_xlabel('') for a in ax] 141 | [a.set_xlim(200, 340) for a in ax] 142 | 143 | ax[0].set_title('Despiked b$_{bp}$ (m$^{-1}$)') 144 | ax[1].set_title('b$_{bp}$ (m$^{-1}$) spikes') 145 | 146 | plt.show() 147 | ``` 148 | 149 | 150 | ![png](img/output_51_0.png) 151 | 152 | 153 | ### Adding the corrected variables to the original dataframe 154 | 155 | 156 | ```python 157 | dat['bbp700'] = bbp_baseline 158 | dat['bbp700_spikes'] = bbp_spikes 159 | ``` 160 | 161 | ### Wrapper function demonstration 162 | A wrapper function was also designed, which is demonstrated below with the second wavelength (700 nm). The default option is for verbose to be True, which will provide an output of the different processing steps. 163 | 164 | 165 | ```python 166 | bbp_baseline, bbp_spikes = gt.calc_backscatter( 167 | bb700, temp_qc, salt_qc, dives, depth, 700, 49, 3.217e-5, 168 | spike_window=11, spike_method='minmax', iqr=2., profiles_ref_depth=300, 169 | deep_multiplier=1, deep_method='median', verbose=True) 170 | 171 | dat['bbp700'] = bbp_baseline 172 | dat['bbp700_spikes'] = bbp_spikes 173 | 174 | ax = gt.plot(x, y, dat.bbp700, cmap=cmo.delta), 175 | 176 | [a.set_xlim(200, 340) for a in ax] 177 | 178 | plt.show() 179 | ``` 180 | 181 | 182 | ================================================== 183 | bb700: 184 | Removing outliers with IQR * 2.0: 8606 obs 185 | Mask bad profiles based on deep values (depth=300m) 186 | Number of bad profiles = 27/672 187 | Zhang et al. (2009) correction 188 | Dark count correction 189 | Spike identification (spike window=11) 190 | 191 | 192 | 193 | ![png](img/output_55_1.png) 194 | 195 | 196 | 197 | ```python 198 | bbp_baseline, bbp_spikes = gt.calc_backscatter( 199 | bb470, temp_qc, salt_qc, dives, depth, 470, 47, 1.569e-5, 200 | spike_window=7, spike_method='minmax', iqr=3, profiles_ref_depth=300, 201 | deep_multiplier=1, deep_method='median', verbose=True) 202 | 203 | dat['bbp470'] = bbp_baseline 204 | dat['bbp470_spikes'] = bbp_spikes 205 | 206 | gt.plot(x, y, dat.bbp470, cmap=cmo.delta) 207 | plt.show() 208 | ``` 209 | 210 | 211 | ================================================== 212 | bb470: 213 | Removing outliers with IQR * 3: 2474 obs 214 | Mask bad profiles based on deep values (depth=300m) 215 | Number of bad profiles = 16/672 216 | Zhang et al. (2009) correction 217 | Dark count correction 218 | Spike identification (spike window=7) 219 | 220 | 221 | 222 | ![png](img/output_56_1.png) 223 | 224 | 225 | ## PAR 226 | 227 | ### PAR Scaling 228 | 229 | This function uses the factory calibration to convert from $\mu$V to $\mu$E m$^{-2}$ s$^{-1}$. 230 | 231 | 232 | ```python 233 | par_scaled = gt.optics.par_scaling(par, 6.202e-4, 10.8) 234 | 235 | fig, ax = plt.subplots(2, 1, figsize=[9, 6], sharex=True, dpi=90) 236 | 237 | gt.plot(x, y, par, cmap=cmo.solar, ax=ax[0], robust=True) 238 | gt.plot(x, y, par_scaled, cmap=cmo.solar, ax=ax[1], robust=True) 239 | 240 | [a.set_xlabel('') for a in ax] 241 | [a.set_xlim(200, 340) for a in ax] 242 | [a.set_ylim(70, 0) for a in ax] 243 | 244 | ax[0].set_title('PAR ($\mu$V)') 245 | ax[1].set_title('PAR ($\mu$E m$^{-2}$ m$^{-1}$)') 246 | 247 | plt.show() 248 | ``` 249 | 250 | 251 | ![png](img/output_59_0.png) 252 | 253 | 254 | ### Correcting for an in situ dark count 255 | 256 | Sensor drift from factory calibration requires an additional correction, the calculation of a dark count in situ. This is calculated from the median of PAR measurements, with additional masking applied for values before 23:01 and outside the 90th percentile. 257 | 258 | 259 | ```python 260 | par_dark = gt.optics.par_dark_count(par_scaled, dives, depth, time) 261 | 262 | gt.plot(x, y, par_dark, robust=True, cmap=cmo.solar) 263 | xlim(200,340) 264 | ylim(70,0) 265 | title('PAR ($\mu$E m$^{-2}$ m$^{-1}$)') 266 | show() 267 | ``` 268 | 269 | 270 | ![png](img/output_61_0.png) 271 | 272 | 273 | ### PAR replacement 274 | 275 | This function removes the top 5 metres from each dive profile, and then algebraically recalculates the surface PAR using an exponential equation. 276 | 277 | 278 | ```python 279 | par_filled = gt.optics.par_fill_surface(par_dark, dives, depth, max_curve_depth=80) 280 | par_filled[par_filled < 0] = 0 281 | par_filled = par_filled.fillna(0) 282 | ``` 283 | 284 | 285 | ```python 286 | i = dives == 232 287 | 288 | fig, ax = subplots(1, 2, figsize=[6,6], dpi=100) 289 | 290 | ax[0].plot(par_dark[i], depth[i], lw=0.5, marker='o', ms=5) 291 | ax[0].plot(par_filled[i], depth[i], lw=0.5, marker='o', ms=3) 292 | ax[1].plot(par_filled[i] - par_dark[i], depth[i], lw=0, marker='o') 293 | 294 | ax[0].set_ylim(80,0) 295 | ax[0].set_ylabel('Depth (m)') 296 | ax[0].set_xlabel('PAR ($\mu$E m$^{-2}$ m$^{-1}$)') 297 | 298 | ax[1].set_ylim(80,0) 299 | ax[1].set_xlim(-350,350) 300 | ax[1].set_yticklabels('') 301 | ax[1].set_xlabel('Difference between profiles') 302 | 303 | fig.tight_layout() 304 | plt.show() 305 | ``` 306 | 307 | 308 | ![png](img/output_64_0.png) 309 | 310 | 311 | 312 | ```python 313 | gt.plot(x, y, par_filled, robust=True, cmap=cmo.solar) 314 | xlim(200,340) 315 | ylim(100,0) 316 | title('PAR ($\mu$E m$^{-2}$ m$^{-1}$)') 317 | show() 318 | ``` 319 | 320 | 321 | ![png](img/output_65_0.png) 322 | 323 | 324 | ### Wrapper function demonstration 325 | 326 | 327 | ```python 328 | par_qc = gt.calc_par(par, dives, depth, time, 329 | 6.202e-4, 10.8, 330 | curve_max_depth=80, 331 | verbose=True).fillna(0) 332 | 333 | gt.plot(x, y, par_qc, robust=True, cmap=cmo.solar) 334 | ylim(80, 0) 335 | show() 336 | ``` 337 | 338 | 339 | ================================================== 340 | PAR 341 | Dark correction 342 | Fitting exponential curve to data 343 | 344 | 345 | 346 | ![png](img/output_67_1.png) 347 | 348 | 349 | ### Deriving additional variables 350 | 351 | #### Euphotic Depth and Light attenuation coefficient 352 | 353 | 354 | ```python 355 | euphotic_depth, kd = gt.optics.photic_depth( 356 | par_filled, dives, depth, 357 | return_mask=False, 358 | ref_percentage=1 359 | ) 360 | ``` 361 | 362 | 363 | ```python 364 | fig, ax = subplots(1, 1, figsize=[6,4], dpi=100) 365 | p1 = plot(euphotic_depth.index, euphotic_depth, label='Euphotic Depth') 366 | ylim(120,0) 367 | ylabel('Euphotic Depth (m)') 368 | xlabel('Dives') 369 | ax2 = ax.twinx() 370 | p2 = plot(kd.index, kd, color='orange', lw=0, marker='o', ms=2, label='K$_d$') 371 | ylabel('K$_d$', rotation=270, labelpad=20) 372 | 373 | lns = p1+p2 374 | labs = [l.get_label() for l in lns] 375 | ax2.legend(lns, labs, loc=3, numpoints=1) 376 | 377 | show() 378 | ``` 379 | 380 | 381 | ![png](img/output_71_0.png) 382 | 383 | 384 | ## Fluorescence 385 | 386 | Quenching Correcting Method as outlined in Thomalla et al. (2017) 387 | 388 | 389 | ```python 390 | gt.plot(x, y, fluor, cmap=cmo.delta, robust=True) 391 | xlim(150,300) 392 | title('Original Data') 393 | show() 394 | ``` 395 | 396 | 397 | ![png](img/output_74_0.png) 398 | 399 | 400 | ### Outlier bounds method 401 | 402 | 403 | ```python 404 | flr_iqr = gt.cleaning.outlier_bounds_iqr(fluor, multiplier=3) 405 | 406 | gt.plot(x, y, flr_iqr, cmap=cmo.delta, robust=True) 407 | title('Outlier Bounds IQR Method') 408 | xlim(150,300) 409 | show() 410 | ``` 411 | 412 | 413 | ![png](img/output_76_0.png) 414 | 415 | 416 | ### Removing bad profiles 417 | 418 | This function masks bad dives based on mean + std x [3] or median + std x [3] at a reference depth. 419 | 420 | 421 | ```python 422 | bad_profiles = gt.optics.find_bad_profiles(dives, depth, flr_iqr, 423 | ref_depth=300, 424 | stdev_multiplier=4, 425 | method='mean') 426 | flr_goodprof = flr_iqr.where(~bad_profiles[0]) 427 | 428 | fig, ax = plt.subplots(2, 1, figsize=[9, 6], sharex=True, dpi=90) 429 | 430 | gt.plot(x, y, flr_iqr, cmap=cmo.delta, ax=ax[0], robust=True) 431 | gt.plot(x, y, flr_goodprof, cmap=cmo.delta, ax=ax[1], robust=True) 432 | 433 | [a.set_xlabel('') for a in ax] 434 | [a.set_xlim(90, 150) for a in ax] 435 | [a.set_ylim(300, 0) for a in ax] 436 | 437 | ax[0].set_title('Bad Profiles Included') 438 | ax[1].set_title('Bad Profiles Discarded') 439 | 440 | plt.show() 441 | ``` 442 | 443 | 444 | ![png](img/output_78_0.png) 445 | 446 | 447 | ### Correcting for an in situ dark count 448 | 449 | Sensor drift from factory calibration requires an additional correction, the calculation of a dark count in situ. This is calculated from the 95th percentile of fluorescence measurements between 300 and 400m. 450 | 451 | 452 | ```python 453 | flr_dark = gt.optics.fluorescence_dark_count(flr_iqr, dat.depth) 454 | 455 | gt.plot(x, y, flr_dark, cmap=cmo.delta, robust=True) 456 | xlim(150,300) 457 | show() 458 | ``` 459 | 460 | 461 | ![png](img/output_80_0.png) 462 | 463 | 464 | ### Despiking 465 | 466 | 467 | ```python 468 | flr_base, flr_spikes = gt.cleaning.despike(flr_dark, 11, spike_method='median') 469 | 470 | fig, ax = plt.subplots(2, 1, figsize=[9, 6], sharex=True, dpi=90) 471 | 472 | gt.plot(x, y, flr_base, cmap=cmo.delta, ax=ax[0], robust=True) 473 | gt.plot(x, y, flr_spikes, cmap=cm.RdBu_r, ax=ax[1], vmin=-5, vmax=5) 474 | 475 | [a.set_xlabel('') for a in ax] 476 | [a.set_xlim(150, 300) for a in ax] 477 | [a.set_ylim(300, 0) for a in ax] 478 | 479 | ax[0].set_title('Despiked Fluorescence') 480 | ax[1].set_title('Fluorescence spikes') 481 | 482 | plt.show() 483 | ``` 484 | 485 | 486 | ![png](img/output_82_0.png) 487 | 488 | 489 | ### Quenching Correction 490 | 491 | This function uses the method outlined in Thomalla et al. (2017), briefly it calculates the quenching depth and performs the quenching correction based on the fluorescence to backscatter ratio. The quenching depth is calculated based upon the different between night and daytime fluorescence. 492 | 493 | The default setting is for the preceding night to be used to correct the following day's quenching (`night_day_group=True`). This can be changed so that the following night is used to correct the preceding day. The quenching depth is then found from the difference between the night and daytime fluorescence, using the steepest gradient of the {5 minimum differences and the points the difference changes sign (+ve/-ve)}. 494 | 495 | The function gets the backscatter/fluorescence ratio between from the quenching depth to the surface, and then calculates a mean nighttime ratio for each night. The quenching ratio is calculated from the nighttime ratio and the daytime ratio, which is then applied to fluorescence to correct for quenching. If the corrected value is less than raw, then the function will return the original raw data. 496 | 497 | 498 | ```python 499 | flr_qc, quench_layer = gt.optics.quenching_correction( 500 | flr_base, dat.bbp470, dives, depth, time, lats, lons, 501 | sunrise_sunset_offset=1, night_day_group=True) 502 | 503 | fig, ax = plt.subplots(2, 1, figsize=[9, 6], sharex=True, dpi=90) 504 | 505 | gt.plot(x, y, flr_qc, cmap=cmo.delta, ax=ax[0], robust=True) 506 | gt.plot(x, y, quench_layer, cmap=cm.RdBu_r, ax=ax[1], vmin=-.5, vmax=2) 507 | 508 | [a.set_xlabel('') for a in ax] 509 | [a.set_xlim(150, 300) for a in ax] 510 | [a.set_ylim(100, 0) for a in ax] 511 | 512 | ax[0].set_title('Quenching Corrected Fluorescence') 513 | ax[1].set_title('Quenching Layer') 514 | 515 | plt.show() 516 | ``` 517 | 518 | 519 | ![png](img/output_84_0.png) 520 | 521 | 522 | ### Wrapper function 523 | 524 | 525 | ```python 526 | flr_qnch, flr, qnch_layer, [fig1, fig2] = gt.calc_fluorescence( 527 | fluor, dat.bbp700, dives, depth, time, lats, lons, 53, 0.0121, 528 | profiles_ref_depth=300, deep_method='mean', deep_multiplier=1, 529 | spike_window=11, spike_method='median', return_figure=True, 530 | night_day_group=False, sunrise_sunset_offset=2, verbose=True) 531 | 532 | dat['flr_qc'] = flr 533 | ``` 534 | 535 | 536 | ================================================== 537 | Fluorescence 538 | Mask bad profiles based on deep values (ref depth=300m) 539 | Number of bad profiles = 19/672 540 | Dark count correction 541 | Quenching correction 542 | Spike identification (spike window=11) 543 | Generating figures for despiking and quenching report 544 | 545 | 546 | 547 | ![png](img/output_86_1.png) 548 | 549 | 550 | 551 | ![png](img/output_86_2.png) 552 | -------------------------------------------------------------------------------- /docs/other.md: -------------------------------------------------------------------------------- 1 | 2 | # Other tools and utilities 3 | 4 | ## 3D interactive plot 5 | 6 | This is purely for investigative purposes, but provides a good way to interact with the data. 7 | 8 | 9 | ```python 10 | plotly_figure = gt.plot.section3D( 11 | dat.dives, dat.depth, dat.longitude, dat.latitude, dat.salt_qc, 12 | zmin=-500, vmax=.999, vmin=.005 13 | ) 14 | ``` 15 | ![png](img/interactive_plot.png) 16 | -------------------------------------------------------------------------------- /docs/package_structure.md: -------------------------------------------------------------------------------- 1 | Package Structure 2 | ================= 3 | ![Structure image](img/package_structure.png) 4 | -------------------------------------------------------------------------------- /docs/physics.md: -------------------------------------------------------------------------------- 1 | # Secondary physical variables 2 | 3 | ## Density 4 | GliderTools provides a wrapper to calculate potential density. 5 | This is done by first calculating potential temperature and then calculating absolute salinity. 6 | A reference depth of `0` is used by default 7 | 8 | 9 | ```python 10 | dens0 = gt.physics.potential_density(salt_qc, temp_qc, pres, lats, lons) 11 | dat['density'] = dens0 12 | gt.plot(dat.dives, dat.depth, dens0, cmap=cmo.dense) 13 | plt.xlim(50,150) 14 | plt.show() 15 | ``` 16 | 17 | 18 | ![png](img/output_36_0.png) 19 | 20 | 21 | ## Mixed Layer Depth 22 | 23 | 24 | ```python 25 | import matplotlib.pyplot as plt 26 | mld = gt.physics.mixed_layer_depth(ds, 'density', verbose=False) 27 | mld_smoothed = mld.rolling(10, min_periods=3).mean() 28 | 29 | mld_mask = gt.utils.mask_below_depth(ds, mld) 30 | mld_grid = gt.grid_data(ds.dives, ds.depth, mld_mask, verbose=False) 31 | 32 | fig, ax = plt.subplots(1, 2, figsize=[9, 3], dpi=100, sharey=True) 33 | 34 | mld_smoothed.plot(ax=ax[0]) 35 | gt.plot(mld_grid, ax=ax[1]) 36 | 37 | [a.set_ylim(100, 0) for a in ax] 38 | 39 | ax[0].set_ylabel('Depth (m)') 40 | [a.set_xlabel('Dives') for a in ax] 41 | plt.xticks(rotation=0) 42 | 43 | fig.tight_layout() 44 | ``` 45 | 46 | /Users/luke/Git/GliderTools/glidertools/helpers.py:61: GliderToolsWarning: 47 | 48 | Primary input variable is not xr.DataArray data type - no metadata to pass on. 49 | 50 | 51 | 52 | 53 | ![png](img/output_38_1.png) 54 | -------------------------------------------------------------------------------- /docs/quality_control.md: -------------------------------------------------------------------------------- 1 | # Quality Control 2 | Note that this summary carries on from the _Loading data_ page. 3 | 4 | The `cleaning` module contains several tools that help to remove erroneous data - profiles or points. 5 | These filters can be applied *globally* (IQR and standard devation limits), *vertically* (running average filters) or *horizontally* (horizontal filters on gridded data only). 6 | 7 | There are also two approaches one can use to clean data: 1) filtering out bad points/dives; 2) smoothing data. 8 | 9 | 10 | ## Original Data 11 | 12 | Below we use **salinity** to demonstrate the different functions available to users. 13 | 14 | ```python 15 | dives = dat.dives 16 | depth = dat.depth 17 | salt = dat.salinity_raw 18 | 19 | x = np.array(dives) # ensures these are arrays 20 | y = np.array(depth) 21 | 22 | gt.plot(dives, depth, salt, cmap=cmo.haline, robust=True) 23 | plt.xlim(50, 150) 24 | plt.title('Original Data') 25 | plt.show() 26 | ``` 27 | 28 | ![png](img/output_14_0.png) 29 | 30 | ## Global filtering: outlier limits (IQR & STD) 31 | These functions find upper and lower limits for data outliers using interquartile range and standard deviations of the entire dataset. Multipliers can be set to make the filters more or less strict 32 | 33 | 34 | ```python 35 | salt_iqr = gt.cleaning.outlier_bounds_iqr(salt, multiplier=1.5) 36 | salt_std = gt.cleaning.outlier_bounds_std(salt, multiplier=1.5) 37 | 38 | # Plotting 39 | gt.plot(x, y, salt_iqr, cmap=cmo.haline, robust=True) 40 | plt.title('Outlier Bounds IQR Method') 41 | plt.xlim(50,150) 42 | 43 | gt.plot(x, y, salt_std, cmap=cmo.haline, robust=True) 44 | plt.title('Outlier Bounds Stdev Method') 45 | plt.xlim(50,150) 46 | 47 | plt.show() 48 | ``` 49 | 50 | ![png](img/output_16_0.png) 51 | ![png](img/output_16_1.png) 52 | 53 | 54 | ## Horizontal filtering: differential outliers 55 | Erroneous measurements often occur sequentially - i.e. in the vertical. The vertical filtering approaches would thus miss any outliers as rolling windows are often used. It is thus useful to have an approach that compares dives in the horizontal. The `horizontal_diff_outliers` first grids data and then calculates where gradients (rolling mean - measurement) are outliers (same as `outlier_bounds_std`). If a certain fraction of measurements in a dive exceed the threshold, then that dive is deemed a bad dive. The example below shows three dives that have anomalous measurements. These fall well within the global bounds of acceptable data, but horizontally that are masked out. 56 | 57 | 58 | ```python 59 | salt_horz = gt.cleaning.horizontal_diff_outliers( 60 | x, y, salt, 61 | multiplier=3, 62 | depth_threshold=400, 63 | mask_frac=0.1 64 | ) 65 | 66 | gt.plot(x, y, salt, cmap=cmo.haline) 67 | plt.title('Original dataset') 68 | plt.xlim(150,250) 69 | plt.show() 70 | 71 | gt.plot(x, y, salt_horz, cmap=cmo.haline) 72 | plt.title('Horizontal Differential Outliers removed') 73 | plt.xlim(150,250) 74 | plt.show() 75 | ``` 76 | 77 | ![png](img/output_19_0.png) 78 | ![png](img/output_19_1.png) 79 | 80 | ## Vertical smoothing approaches 81 | 82 | ### Despiking 83 | This approach was used by Briggs et al. (2010). The idea is to apply a rolling filter to the data (along the time dimension). This forms the baseline. The difference from the original data are spikes. 84 | 85 | There are two rolling filters that can be applied to the data. The *median* approach is the equivalent of a rolling median. The *minmax* approach first applies a rolling minimum and then rolling maximum to data. This is useful particularly for optics data where spikes are particles in the water column and are not normally distributed. 86 | 87 | In the case of salinity, the *median* approach is likely best, as "spikes" would be positive and negative (Gaussian distribution). 88 | 89 | 90 | ```python 91 | salt_base, salt_spike = gt.cleaning.despike(salt, window_size=5, spike_method='median') 92 | 93 | fig, ax = plt.subplots(2, 1, figsize=[9, 6], sharex=True, dpi=90) 94 | 95 | gt.plot(x, y, salt_base, cmap=cmo.haline, ax=ax[0]) 96 | ax[0].set_title('Despiked using median filter') 97 | ax[0].cb.set_label('Salinity baseline') 98 | ax[0].set_xlim(50,150) 99 | ax[0].set_xlabel('') 100 | 101 | gt.plot(x, y, salt_spike, cmap=cm.RdBu_r, vmin=-6e-3, vmax=6e-3, ax=ax[1]) 102 | ax[1].cb.set_label('Salinity spikes') 103 | ax[1].set_xlim(50,150) 104 | 105 | plt.xticks(rotation=0) 106 | plt.show() 107 | ``` 108 | ![png](img/output_22_0.png) 109 | 110 | 111 | ### Rolling window 112 | 113 | The rolling window method simply applies an aggregating function (`mean, median, std, min, max`) to the dataset. 114 | Because the above example is equivalent to a rolling median, we show what a rolling `75th percentile` looks like instead. 115 | 116 | This could be used to create additional filters by users. Note that in this more complex example we create a wrapper function for the percentile so that we can tell the percentile function that we want the 75th percentile and we want to calculate this along the nth axis. 117 | 118 | 119 | ```python 120 | def seventyfith(x, axis=0): 121 | # wrapper function so we can pass axis and percentile to 122 | # the input function 123 | return np.percentile(x, 75, axis=axis) 124 | 125 | # other numpy functions also work: np.mean, np.median, np.std 126 | salt_roll75 = gt.cleaning.rolling_window(salt, seventyfith, window=5) 127 | salt_rollavg = gt.cleaning.rolling_window(salt, mean, window=5) 128 | 129 | # PLOTTING 130 | fig, ax = plt.subplots(2, 1, figsize=[9, 6], sharex=True, dpi=90) 131 | 132 | gt.plot(x, y, salt_roll75, cmap=cmo.haline, ax=ax[0]) 133 | ax[0].set_title('75$^{th}$ for a rolling window with size 5') 134 | ax[0].cb.set_label('Salinity baseline') 135 | ax[0].set_xlim(50,150) 136 | ax[0].set_xlabel('') 137 | 138 | gt.plot(x, y, salt_roll75 - salt, cmap=cm.RdBu_r, vmin=-6e-3, vmax=6e-3, ax=ax[1]) 139 | ax[1].cb.set_label('Difference from original data') 140 | ax[1].set_xlim(50,150) 141 | 142 | plt.xticks(rotation=0) 143 | plt.show() 144 | ``` 145 | ![png](img/output_24_0.png) 146 | 147 | 148 | ### Savitzky-Golay 149 | The Savitzky-Golay function fits a low order polynomial to a rolling window of the time series. This has the result of smoothing the data. A larger window with a lower order polynomial with have a smoother fit. 150 | 151 | We recommend a 2nd order kernel. Here we use first order to show that the difference can be quite big. 152 | 153 | 154 | ```python 155 | salt_savgol = gt.cleaning.savitzky_golay(salt, window_size=11, order=1) 156 | 157 | # PLOTTING 158 | fig, ax = plt.subplots(2, 1, figsize=[9, 6], sharex=True, dpi=90) 159 | 160 | gt.plot(x, y, salt_savgol, cmap=cmo.haline, ax=ax[0]) 161 | ax[0].set_title('Smoothing the data with Savitzky-Golay') 162 | ax[0].cb.set_label('Smoothed salinity') 163 | ax[0].set_xlim(50,150) 164 | ax[0].set_xlabel('') 165 | 166 | gt.plot(x, y, salt_savgol - salt, cmap=cm.RdBu, vmin=-6e-3, vmax=6e-3, ax=ax[1]) 167 | ax[1].cb.set_label('Difference from original') 168 | ax[1].set_xlim(50,150) 169 | 170 | plt.show() 171 | ``` 172 | 173 | 174 | ![png](img/output_26_0.png) 175 | 176 | ## Wrapper functions 177 | 178 | Wrapper functions have been designed to make this process more efficient, which is demonstrated below with **temperature** and **salinity**. 179 | 180 | 181 | ```python 182 | temp_qc = gt.calc_physics(temp, x, y, 183 | iqr=1.5, depth_threshold=0, 184 | spike_window=5, spike_method='median', 185 | savitzky_golay_window=11, savitzky_golay_order=2) 186 | 187 | # PLOTTING 188 | fig, ax = plt.subplots(3, 1, figsize=[9, 8.5], sharex=True, dpi=90) 189 | 190 | gt.plot(x, y, temp, cmap=cmo.thermal, ax=ax[0]) 191 | gt.plot(x, y, temp_qc, cmap=cmo.thermal, ax=ax[1]) 192 | gt.plot(x, y, temp_qc - temp, cmap=cm.RdBu_r, vmin=-0.05, vmax=0.05, ax=ax[2]) 193 | 194 | [a.set_xlabel('') for a in ax] 195 | 196 | ax[0].cb.set_label('Original Data') 197 | ax[1].cb.set_label('Cleaned Data') 198 | ax[2].cb.set_label('Difference from Original') 199 | 200 | plt.show() 201 | ``` 202 | 203 | 204 | ================================================== 205 | Physics Variable: 206 | Removing outliers with IQR * 1.5: 0 obs 207 | Removing spikes with rolling median (spike window=5) 208 | Smoothing with Savitzky-Golay filter (window=11, order=2) 209 | 210 | 211 | 212 | ![png](img/output_28_1.png) 213 | 214 | 215 | 216 | ```python 217 | salt_qc = gt.calc_physics(salt, x, y, 218 | mask_frac=0.2, iqr=2.5, 219 | spike_window=5, spike_method='median', 220 | savitzky_golay_window=11, savitzky_golay_order=2) 221 | 222 | # PLOTTING 223 | fig, ax = plt.subplots(3, 1, figsize=[9, 8.5], sharex=True, dpi=90) 224 | 225 | gt.plot(x, y, salt, cmap=cmo.haline, ax=ax[0]) 226 | gt.plot(x, y, salt_qc, cmap=cmo.haline, ax=ax[1]) 227 | gt.plot(x, y, salt_qc - salt, cmap=cm.RdBu_r, vmin=-0.02, vmax=0.02, ax=ax[2]) 228 | 229 | [a.set_xlabel('') for a in ax] 230 | 231 | ax[0].cb.set_label('Original Data') 232 | ax[1].cb.set_label('Cleaned Data') 233 | ax[2].cb.set_label('Difference from Original') 234 | 235 | plt.show() 236 | ``` 237 | 238 | 239 | ================================================== 240 | Physics Variable: 241 | Removing outliers with IQR * 2.5: 1551 obs 242 | Removing spikes with rolling median (spike window=5) 243 | Removing horizontal outliers (fraction=0.2, multiplier=2.5) 244 | Smoothing with Savitzky-Golay filter (window=11, order=2) 245 | 246 | 247 | 248 | ![png](img/output_29_1.png) 249 | 250 | 251 | ```python 252 | dat['temp_qc'] = temp 253 | dat['salt_qc'] = salt 254 | ``` 255 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | glidertools 2 | docutils 3 | recommonmark 4 | sphinx-rtd-theme 5 | -------------------------------------------------------------------------------- /docs/saving.md: -------------------------------------------------------------------------------- 1 | Saving data 2 | =========== 3 | 4 | We have not created an explicit way to save data in GliderTools. This is primarily due to the fact that the package is built on top of two packages that already do this very well: [*pandas*](https://pandas.pydata.org/pandas-docs/stable/getting_started/overview.html) and [*xarray*](http://xarray.pydata.org/en/stable/). 5 | *pandas* is widely used and deals with tabular formatted data (2D). *xarray* widely used in earth sciences as it supports multi-dimensional indexing (3D+). We highly recommend that you read through the documentation for these packages as they are incredibly powerful and you would benefit from knowing these tools regardless of using GliderTools or not! 6 | 7 | We have written GliderTools primarily with *xarray* as the backend, due to the ability to store attributes (or metadata) alongside the data - something that *pandas* does not yet do. Moreover, we have also created the tool so that metadata is passed to the output of each function, while appending the function call to the *history* attribute. This ensures that the user of the data knows when and what functions (and arguements) were called and for which version of GliderTools this was done. 8 | 9 | Examples 10 | -------- 11 | 12 | First we give an example of how to save and read files to netCDF (which we recommend). 13 | 14 | ```python 15 | import xarray as xr 16 | 17 | # xds is an xarray.DataFrame with record of dimensions, coordinates and variables 18 | xds.to_netcdf('data_with_meta.nc') 19 | 20 | # this file can simply be loaded in the same way, without using GliderTools 21 | # all the information that was attached to the data is still in the netCDF 22 | xds = xr.open_dataset('data_with_meta.nc') 23 | ``` 24 | 25 | In this second example we show how to save the data to a CSV. While this is a common and widely used format, we do not recommend this as the go to format, as all metadata is lost when the file is saved. 26 | ```python 27 | import pandas as pd 28 | 29 | # If you prefer to save your data as a text file, you can easily do this with Pandas 30 | # note that converting the file to a dataframe discards all the metadata 31 | df = xds.to_dataframe() 32 | df.to_csv('data_without_meta.csv') 33 | 34 | # this file can simply be loaded in the same way, without using GliderTools 35 | # there will be no more metadata attached to each variable 36 | df = pd.read_csv('data_without_meta.csv') 37 | 38 | # finally, you can also convert the file back to an xarray.Dataset 39 | # however, the data will still be lost 40 | xds = df.to_xarray() 41 | ``` 42 | -------------------------------------------------------------------------------- /docs/static/css/custom.css: -------------------------------------------------------------------------------- 1 | div#cheat-sheet.section h1{ 2 | font-size: 0px; 3 | } 4 | 5 | .highlight{ 6 | background-color: #efefef; 7 | border-color: #c9c9c9; 8 | border-width: 1px; 9 | border-style: solid; 10 | } 11 | -------------------------------------------------------------------------------- /docs/whats-new.rst: -------------------------------------------------------------------------------- 1 | .. currentmodule:: glidertools 2 | 3 | What's New 4 | =========== 5 | 6 | .. Template (do not remove) 7 | ------------------------ 8 | 9 | Breaking changes 10 | ~~~~~~~~~~~~~~~~ 11 | Description. (:pull:`ii`, :issue:`ii`). By `Name `_. 12 | 13 | New Features 14 | ~~~~~~~~~~~~ 15 | 16 | Documentation 17 | ~~~~~~~~~~~~~ 18 | 19 | Internal Changes 20 | ~~~~~~~~~~~~~~~~ 21 | 22 | Bug fixes 23 | ~~~~~~~~~ 24 | - Dark count corrections for optical sensors(:pull:'110'). By 'Isabelle Giddy '_. 25 | 26 | 27 | v2023.07.25 (2023/07/25) 28 | ------------------------ 29 | 30 | .. _whats-new.2023.07.25: 31 | 32 | New Features 33 | ~~~~~~~~~~~~ 34 | - added import for VOTO seaexplorer data (:pull:`170`) By `Martin Mohrmann `_. 35 | - added versatile, depth dependent masking (:pull:`172`) and per profile grouping (:pull:`175`). By `Martin Mohrmann `_. 36 | - add concatenation of two or more datasets (:pull:`173`), even with different set of variables (:pull:`183`). By `Martin Mohrmann `_. 37 | 38 | Breaking changes 39 | ~~~~~~~~~~~~~~~~ 40 | - Changed the behavior of `find_dive_phase` and `calc_dive_number` to use a smaller depth threshold when determining a valid dive (15 dbar down from 200 dbar). this is also now adjusteable. (:pull:`134`) By `Tom Hull `_. 41 | - GliderTools defaults for Figure creation were changed. Automatic application of plt.tight_layout was dropped in favour of more flexible embedding of GliderTools plots into existing layouts/subplots. (:pull:`185`). By `Martin Mohrmann `_. 42 | - The mixed layer depth algorithm was corrected. (:pull:`169`, :issue:`168`). By `Martin Mohrmann `_. API change! Existing mixed layer computation code must be adapted. 43 | 44 | Internal changes 45 | ~~~~~~~~~~~~~~~~ 46 | - Removed outdated python-seawater dependency (:pull:`186`). By `Callum Rollo `_. 47 | - Update documentation of required dependencies (:pull:`174`). By `Sören Thomsen `_. 48 | - Some cleanup of old python2 dependencies (:pull:`166`). By `Martin Mohrmann `_. 49 | - Replace deprecated pkg_resources with importlib.metadata (:pull:`187`). By `Martin Mohrmann `_. 50 | - Add release guide to documentation (:pull:`186`). By `Martin Mohrmann `_. 51 | - Cleanup of unused imports (:pull:`174`). By `Martin Mohrmann `_. 52 | 53 | Bug fixes 54 | ~~~~~~~~~ 55 | - Adapt demo notebook to updated Glider Tools (:pull:`179`). By `Callum Rollo `_. 56 | - Fix netCDF attribute handling for non-string attributes (:pull:`194`). By `Martin Mohrmann `_. 57 | - Adapt quenching_report to modern numpy versions (:pull:`191`) By `Martin Mohrmann `_. 58 | - Improve error handling for MLD computation (:pull:`190`). By `Martin Mohrmann `_. 59 | 60 | Thanks also to `Julius Busecke `_ for help with the github CI, `Sam Woodman `_ for detailed bug reports and everyone else who has contributed. 61 | 62 | 63 | v2022.12.13 (2022/12/13) 64 | ------------------------ 65 | 66 | .. _whats-new.2022.12.13: 67 | 68 | Internal changes 69 | ~~~~~~~~~~~~~~~~ 70 | - Refactoring and update of testing and development framework, update of flake, black and almost all python dependencies 71 | 72 | 73 | Breaking changes 74 | ~~~~~~~~~~~~~~~~ 75 | - Fixed processing/calc_oxygen (:pull: `116`, :issue: `112`) By `Callum Rollo `_. 76 | 77 | 78 | Internal Changes 79 | ~~~~~~~~~~~~~~~~ 80 | - Implemented code linting as part of the CI (:pull:`100`) By `Julius Busecke `_. 81 | 82 | Documentation 83 | ~~~~~~~~~~~~~ 84 | - Added conda installation instructions + badge. (:pull:`94`) By `Julius Busecke `_. 85 | 86 | Bug fixes 87 | ~~~~~~~~~ 88 | - Replaced `skyfield` dependency with `astral`, fixing sunrise/sunset problems at high latitudes. By `Isabelle Sindiswa Giddy `_. 89 | 90 | v2021.03 (2021/3/30) 91 | ------------------------- 92 | 93 | .. _whats-new.2021.03: 94 | 95 | Documentation 96 | ~~~~~~~~~~~~~ 97 | - Updated contributor guide for conda based workflow. (:pull:`81`) By `Julius Busecke `_. 98 | 99 | Internal Changes 100 | ~~~~~~~~~~~~~~~~ 101 | - Migration of CI to conda based workflow with multiple python versions. (:pull:`54`) By `Julius Busecke `_. 102 | - Revamp distribution actions. (:pull:`82`) By `Julius Busecke `_. 103 | - Migrate from astral to skyfield (:pull:'121') By 'Isabelle Giddy '_. 104 | -------------------------------------------------------------------------------- /docs/wishlist.md: -------------------------------------------------------------------------------- 1 | Wishlist 2 | ======== 3 | 4 | A list of things we'd love to add to GliderTools and the work involved. 5 | 6 | 1. Support for raw files from Slocum gliders and Seagliders with the following additional functionality 7 | - Thermal lag correction for each of the gliders supported in the suggestion above. 8 | - Support for hadware modules by model and manufacturer 9 | 2. Make final data output compatible with www.OceanGliders.org data format, https://www.oceangliders.org/taskteams/data-management/ 10 | -------------------------------------------------------------------------------- /glidertools/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/glidertools/.DS_Store -------------------------------------------------------------------------------- /glidertools/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import warnings as _warnings 4 | 5 | from . import ( # NOQA 6 | calibration, 7 | cleaning, 8 | flo_functions, 9 | load, 10 | mapping, 11 | optics, 12 | physics, 13 | utils, 14 | ) 15 | from .helpers import package_version 16 | from .mapping import grid_data, interp_obj 17 | from .plot import logo as make_logo 18 | from .plot import plot_functions as plot 19 | from .processing import * 20 | 21 | 22 | __version__ = package_version() 23 | _warnings.filterwarnings("ignore", category=RuntimeWarning) 24 | -------------------------------------------------------------------------------- /glidertools/calibration.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from inspect import currentframe as getframe 4 | 5 | import numpy as _np 6 | 7 | from .helpers import transfer_nc_attrs 8 | 9 | 10 | def bottle_matchup( 11 | gld_dives, 12 | gld_depth, 13 | gld_time, 14 | btl_depth, 15 | btl_time, 16 | btl_values, 17 | min_depth_diff_metres=5, 18 | min_time_diff_minutes=120, 19 | ): 20 | """ 21 | Performs a matchup between glider and bottle samples based on time and 22 | depth (or density). 23 | 24 | Parameters 25 | ---------- 26 | gld_depth : np.array, dtype=float 27 | glider depth at time of measurement 28 | gld_dives : np.array, dtype=float 29 | dive index of the glider (given by glider toolbox) 30 | gld_time : np.array, dtype=datetime64 31 | glider time that will be used as primary indexing variable 32 | btl_time: np.array, dtype=datetime64 33 | in-situ bottle sample's time 34 | btl_depth : np.array, dtype=float 35 | depth of in-situ sample 36 | btl_values : np.array, dtype=float 37 | the value that will be interpolated onto the glider time and 38 | depth coordinates (time, depth/dens) 39 | min_depth_diff_metres : float, default=5 40 | the minimum allowable depth difference 41 | min_time_diff_minutes : float, default=120 42 | the minimum allowable time difference between bottles and glider 43 | 44 | Returns 45 | ------- 46 | array : float 47 | Returns the bottle values in the format of the glider 48 | i.e. the length of the output will be the same as gld_* 49 | 50 | """ 51 | from pandas import Series 52 | 53 | # metadata preservation 54 | var = gld_depth.copy() 55 | if isinstance(btl_values, Series): 56 | var_name = btl_values.name + "_bottle_matchups" 57 | else: 58 | var_name = "bottle_matchups" 59 | 60 | # make all input variables np.arrays 61 | args = gld_time, gld_depth, gld_dives, btl_time, btl_depth, btl_values 62 | gld_time, gld_depth, gld_dives, btl_time, btl_depth, btl_values = map( 63 | _np.array, args 64 | ) 65 | 66 | # create a blank array that matches glider data 67 | # (placeholder for calibration bottle values) 68 | gld_cal = _np.ones_like(gld_depth) * _np.nan 69 | 70 | # loop through each ship based CTD station 71 | stations = _np.unique(btl_time) 72 | for c, t in enumerate(stations): 73 | # index of station from ship CTD 74 | btl_idx = t == btl_time 75 | # number of samples per station 76 | btl_num = btl_idx.sum() 77 | 78 | # string representation of station time 79 | t_str = str(t.astype("datetime64[m]")).replace("T", " ") 80 | t_dif = abs(gld_time - t).astype("timedelta64[m]").astype(float) 81 | 82 | # loop through depths for the station 83 | if t_dif.min() < min_time_diff_minutes: 84 | # index of dive where minimum difference occurs 85 | i = _np.where(gld_dives[_np.nanargmin(t_dif)] == gld_dives)[0] 86 | n_depths = 0 87 | for depth in btl_depth[btl_idx]: 88 | # an index for bottle where depth and station match 89 | j = btl_idx & (depth == btl_depth) 90 | # depth difference for glider profile 91 | d_dif = abs(gld_depth - depth)[i] 92 | # only match depth if diff is less than given threshold 93 | if _np.nanmin(d_dif) < min_depth_diff_metres: 94 | # index of min diff for this dive 95 | k = i[_np.nanargmin(d_dif)] 96 | # assign the bottle values to the calibration output 97 | gld_cal[k] = btl_values[j] 98 | n_depths += 1 99 | print( 100 | ( 101 | "[stn {}/{}] SUCCESS: {} ({} of {} samples) match-up " 102 | "within {} minutes" 103 | ).format(c, stations.size, t_str, n_depths, btl_num, t_dif.min()) 104 | ) 105 | else: 106 | print( 107 | ( 108 | "[stn {}/{}] FAILED: {} Couldn't find samples within " 109 | "constraints" 110 | ).format(c, stations.size, t_str) 111 | ) 112 | 113 | attrs = dict(units="", positive="", comment="", standard_name="", axis="") 114 | gld_cal = transfer_nc_attrs(getframe(), var, gld_cal, var_name, **attrs) 115 | 116 | return gld_cal 117 | 118 | 119 | def model_metrics(x, y, model): 120 | from numpy import array 121 | from sklearn import metrics 122 | 123 | x = array(x).reshape(-1, 1) 124 | y = array(y) 125 | 126 | y_hat = model.predict(x).squeeze() 127 | ol = ( 128 | model.outliers_ 129 | if hasattr(model, "outliers_") 130 | else _np.zeros_like(y).astype(bool) 131 | ) 132 | 133 | # formula = '$f(x) = {:.2g}x + {:.2g}$'.format( 134 | # model.coef_[0], model.intercept_ 135 | # ) 136 | 137 | # metrics calculation 138 | out = dict( 139 | model_type=model.__class__.__name__, 140 | model_slope=model.coef_[0], 141 | model_intercept=model.intercept_, 142 | ) 143 | 144 | params = { 145 | "param_" + key: value for key, value in model.__class__().get_params().items() 146 | } 147 | 148 | results = dict( 149 | r2_all=metrics.r2_score(y, y_hat), 150 | r2_robust=metrics.r2_score(y[~ol], y_hat[~ol]), 151 | rmse_all=metrics.mean_squared_error(y, y_hat) ** 0.5, 152 | rmse_robust=metrics.mean_squared_error(y[~ol], y_hat[~ol]) ** 0.5, 153 | ) 154 | 155 | out.update(params) 156 | out.update(results) 157 | 158 | return out 159 | 160 | 161 | def model_figs(bottle_data, glider_data, model, ax=None): 162 | """ 163 | Creates the figure for a linear model fit. 164 | 165 | Parameters 166 | ---------- 167 | bottle_data : np.array, shape=[m, ] 168 | bottle data with the number of matched bottle/glider samples 169 | glider_data : np.array, shape[m, ] 170 | glider data with the number of matched bottle/glider samples 171 | model : sklearn.linear_model object 172 | a fitted model that you want to test. 173 | 174 | Returns 175 | ------- 176 | figure axes : matplotlib.Axes 177 | A figure showing the fit of the 178 | """ 179 | 180 | from matplotlib.offsetbox import AnchoredText 181 | from matplotlib.pyplot import subplots 182 | from numpy import array, isnan, linspace, nanmax, nanmin 183 | from sklearn import metrics 184 | 185 | y = array(bottle_data) 186 | x = array(glider_data).reshape(-1, 1) 187 | 188 | assert not any(isnan(x)), "There are nans in glider_data" 189 | assert not any(isnan(y)), "There are nans in bottle_data" 190 | assert x.size == y.size, "glider_data and bottle_data are not the same size" 191 | assert ( 192 | x.size == model.outliers_.size 193 | ), "model.outliers_ is a different size to bottle_data" 194 | 195 | xf = linspace(nanmin(x), nanmax(x), 100).reshape(-1, 1) 196 | y_hat = model.predict(x).squeeze() 197 | ol = ( 198 | model.outliers_ 199 | if hasattr(model, "outliers_") 200 | else _np.zeros_like(y).astype(bool) 201 | ) 202 | formula = "$f(x) = {:.2g}x + {:.2g}$".format(model.coef_[0], model.intercept_) 203 | formula = formula if not formula.endswith("+ 0$") else formula[:-5] + "$" 204 | 205 | print(x.shape, xf.shape) 206 | # PLOTTING FROM HERE ON ############# 207 | if ax is None: 208 | _, ax = subplots(1, 1, figsize=[6, 5], dpi=120) 209 | ax.plot(x, y, "o", c="k", zorder=99, label="Samples ({})".format(x.size))[0] 210 | ax.plot(xf, model.predict(xf), c="#AAAAAA", label="{}".format(formula)) 211 | ax.plot( 212 | x[ol], 213 | y[ol], 214 | "ow", 215 | visible=ol.any(), 216 | mew=1, 217 | mec="k", 218 | zorder=100, 219 | label="Outliers ({})".format(ol.sum()), 220 | ) 221 | ax.legend(fontsize=10, loc="upper left") 222 | 223 | # Additional info about the model displayed from here on 224 | params = model.get_params() 225 | rcModel = model.__class__().get_params() 226 | for key in rcModel: 227 | if rcModel[key] == params[key]: 228 | params.pop(key) 229 | 230 | # metrics calculation 231 | r2_all = metrics.r2_score(y, y_hat) 232 | r2_robust = metrics.r2_score(y[~ol], y_hat[~ol]) 233 | rmse_all = metrics.mean_squared_error(y, y_hat) ** 0.5 234 | rmse_robust = metrics.mean_squared_error(y[~ol], y_hat[~ol]) ** 0.5 235 | 236 | # string formatting 237 | m_name = "Huber Regresion" 238 | r2_str = "$r^2$ score: {:.2g} ({:.2g})\n" 239 | rmse_str = "RMSE: {:.2g} ({:.2g})" 240 | placeholder = "{}: {}\n" 241 | 242 | # formatting the strings to be displayed 243 | params_str = "{} Params\n".format(m_name) 244 | params_str += "".join([placeholder.format(key, params[key]) for key in params]) 245 | params_str += "\nResults (robust)\n" 246 | params_str += r2_str.format(r2_all, r2_robust) 247 | params_str += rmse_str.format(rmse_all, rmse_robust) 248 | 249 | # placing the text box 250 | anchored_text = AnchoredText( 251 | params_str, loc=4, prop=dict(size=10, family="monospace"), frameon=True 252 | ) 253 | anchored_text.patch.set_boxstyle("round, pad=0.3, rounding_size=0.2") 254 | anchored_text.patch.set_linewidth(0.2) 255 | ax.add_artist(anchored_text) 256 | 257 | # axes labelling 258 | ax.set_ylabel("Bottle sample") 259 | ax.set_xlabel("Glider sample") 260 | ax.set_title("Calibration curve using {}".format(m_name)) 261 | 262 | return ax 263 | 264 | 265 | def robust_linear_fit( 266 | gld_var, gld_var_cal, interpolate_limit=3, return_figures=True, **kwargs 267 | ): 268 | """ 269 | Perform a robust linear regression using a Huber Loss Function to remove 270 | outliers. Returns a model object that behaves like a scikit-learn model 271 | object with a model.predict method. 272 | 273 | Parameters 274 | ---------- 275 | gld_var : np.array, shape=[n, ] 276 | glider variable 277 | gld_var_cal : np.array, shape=[n, ] 278 | bottle variable on glider indicies 279 | fit_intercept : bool, default=False 280 | forces 0 intercept if False 281 | return_figures : bool, default=True 282 | create figure with metrics 283 | interpolate_limit : int, default=3 284 | glider data may have missing points. The glider data is thus 285 | interpolated to ensure that as many bottle samples as possible have a 286 | match-up with the glider. 287 | **kwargs : keyword=value pairs 288 | will be passed to the Huber Loss regression to adjust regression 289 | 290 | Returns 291 | ------- 292 | model : sklearn.linear_model 293 | A fitted model. Use model.predict(glider_var) to create the calibrated 294 | output. 295 | """ 296 | 297 | from pandas import Series 298 | from sklearn import linear_model 299 | 300 | from .helpers import GliderToolsError 301 | 302 | # make all input arguments numpy arrays 303 | args = gld_var, gld_var_cal 304 | gld_var, gld_var_cal = map(_np.array, args) 305 | 306 | if _np.isnan(gld_var_cal).all(): 307 | raise GliderToolsError("There are no matches in your bottle data") 308 | 309 | gld_var = Series(gld_var).interpolate(limit=interpolate_limit).values 310 | 311 | # get bottle and glider values for the variables 312 | i = ~_np.isnan(gld_var_cal) & ~_np.isnan(gld_var) 313 | y = gld_var_cal[i] 314 | x = gld_var[i][:, None] 315 | 316 | if "fit_intercept" not in kwargs: 317 | kwargs["fit_intercept"] = False 318 | model = linear_model.HuberRegressor(**kwargs) 319 | model.fit(x, y) 320 | 321 | if return_figures: 322 | model_figs(x, y, model) 323 | 324 | model._predict = model.predict 325 | 326 | def predict(self, x): 327 | """ 328 | A wrapper around the normal predict function that takes 329 | nans into account. An extra dimension is also added if needed. 330 | """ 331 | from xarray import DataArray 332 | 333 | var = x.copy() 334 | x = _np.array(x) 335 | out = _np.ndarray(x.size) * _np.NaN 336 | i = ~_np.isnan(x) 337 | x = x[i].reshape(-1, 1) 338 | out[i.squeeze()] = self._predict(x).squeeze() 339 | 340 | out = transfer_nc_attrs(getframe(), var, out, "_calibrated") 341 | if hasattr(self, "info") & isinstance(out, DataArray): 342 | out.attrs["model_info"] = str(self.info) 343 | 344 | return out 345 | 346 | model.predict = predict.__get__(model, linear_model.HuberRegressor) 347 | model.info = model_metrics(x, y, model) 348 | 349 | return model 350 | -------------------------------------------------------------------------------- /glidertools/helpers.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | 3 | 4 | def package_version(): 5 | # package version will only be returned if package is installed through e.g. pip or conda, 6 | # development code is unaware of its own version (and there is not such a thing in dev anyway). 7 | # Advantage: We don't have to keep track of versioning manually 8 | from importlib.metadata import PackageNotFoundError, version 9 | 10 | try: 11 | version = version("glidertools") 12 | except PackageNotFoundError: 13 | version = "version_undefined" 14 | return version 15 | 16 | 17 | class GliderToolsWarning(UserWarning): 18 | pass 19 | 20 | 21 | class GliderToolsError(UserWarning): 22 | pass 23 | 24 | 25 | def time_now(): 26 | from pandas import Timestamp 27 | 28 | return str(Timestamp("today"))[:19] 29 | 30 | 31 | def rebuild_func_call(frame): 32 | 33 | arginf = inspect.getargvalues(frame) 34 | name = frame.f_code.co_name 35 | args = arginf.args 36 | locl = arginf.locals 37 | 38 | module = inspect.getmodule(frame).__name__ 39 | func = "{}.{}(".format(module, name) 40 | n_args = len(args) 41 | for c, arg_name in enumerate(args): 42 | arg_valu = str(locl[arg_name]) 43 | if len(arg_valu) < 25: 44 | try: 45 | float(arg_valu) 46 | except ValueError: 47 | if (arg_valu == "True") | (arg_valu == "False"): 48 | pass 49 | else: 50 | arg_valu = "'{}'".format(arg_valu) 51 | else: 52 | arg_valu = "<{}>".format(arg_name) 53 | func += "{}={}".format(arg_name, arg_valu) 54 | 55 | if c < (n_args - 1): 56 | func += ", " 57 | else: 58 | func += ")" 59 | 60 | return func 61 | 62 | 63 | def transfer_nc_attrs(frame, input_xds, output_arr, output_name, **attrs): 64 | import warnings 65 | 66 | import xarray as xr 67 | 68 | not_dataarray = not isinstance(input_xds, xr.DataArray) 69 | no_parent_frame = inspect.getmodule(frame.f_back) is None 70 | if not_dataarray: 71 | if no_parent_frame: 72 | msg = ( 73 | "Primary input variable is not xr.DataArray data type - " 74 | "no metadata to pass on." 75 | ) 76 | warnings.warn(msg, category=GliderToolsWarning) 77 | return output_arr 78 | else: 79 | if output_name is None: 80 | output_name = input_xds.name 81 | elif output_name.startswith("_"): 82 | output_name = input_xds.name + output_name 83 | 84 | attributes = input_xds.attrs.copy() 85 | history = "" if "history" not in attributes else attributes["history"] 86 | history += "[{}] (v{}) {};\n".format( 87 | time_now(), package_version(), rebuild_func_call(frame) 88 | ) 89 | attributes.update({"history": history}) 90 | attributes.update(attrs) 91 | 92 | keys = list(attributes.keys()) 93 | for key in keys: 94 | if str(attributes[key]) == "": 95 | attributes.pop(key) 96 | 97 | xds = xr.DataArray( 98 | data=output_arr, 99 | coords=input_xds.coords, 100 | dims=input_xds.dims, 101 | name=output_name, 102 | attrs=attributes, 103 | ) 104 | 105 | return xds 106 | 107 | 108 | def printv(verbose, message): 109 | """ 110 | A helper function that prints message if verbose=True (for cleaner code) 111 | 112 | Parameters 113 | ---------- 114 | verbose : bool 115 | message : str 116 | """ 117 | 118 | if verbose: 119 | print(message) 120 | else: 121 | pass 122 | -------------------------------------------------------------------------------- /glidertools/load/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from .ego import load_mission_nc as ego_mission_netCDF 4 | from .seaglider import load_multiple_vars as seaglider_basestation_netCDFs 5 | from .seaglider import show_variables as seaglider_show_variables 6 | from .slocum import slocum_geomar_matfile 7 | from .voto_seaexplorer import ( 8 | voto_concat_datasets, 9 | voto_seaexplorer_dataset, 10 | voto_seaexplorer_nc, 11 | ) 12 | -------------------------------------------------------------------------------- /glidertools/load/ego.py: -------------------------------------------------------------------------------- 1 | # base module to load ego files 2 | 3 | from ..utils import calc_dive_phase, dive_phase_to_number 4 | 5 | 6 | def load_mission_nc(filename): 7 | """ 8 | Loads an EGO formatted glider mission file. 9 | 10 | Parameters 11 | ---------- 12 | filename : str 13 | path and filename of the EGO netCDF file. 14 | 15 | Returns 16 | ------- 17 | an xarray.Dataset object with all netCDF info attached 18 | ego_data : xr.Dataset 19 | """ 20 | 21 | import xarray as xr 22 | 23 | xds = xr.open_dataset(filename) 24 | 25 | if "PHASE" in xds: 26 | phase = xds.PHASE.load() 27 | null_frac = phase.isnull().sum() / phase.size 28 | 29 | if (null_frac > 0.2) | ("PHASE" not in xds): 30 | time = xds.TIME.load() 31 | press = xds.PRES.load() 32 | phase = calc_dive_phase(time, press) 33 | 34 | xds["DIVES"] = dive_phase_to_number(phase) 35 | 36 | return xds 37 | -------------------------------------------------------------------------------- /glidertools/load/seaglider.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | 5 | from netCDF4 import Dataset 6 | 7 | from ..helpers import GliderToolsWarning 8 | 9 | 10 | # TODO: fix dives indexing (merge dim if same size as other more populated dim) 11 | # TODO: when dims merge dives are sometimes taken from the wrong dataframe 12 | 13 | 14 | def process_files(file_str): 15 | from glob import glob 16 | 17 | if isinstance(file_str, str): 18 | files = np.sort(glob(file_str)) 19 | 20 | if len(files) < 1: 21 | raise FileNotFoundError("The provided string is not a file path") 22 | return files 23 | 24 | 25 | def show_variables(files): 26 | from pandas import DataFrame 27 | 28 | files = process_files(files) 29 | 30 | i = len(files) // 2 31 | 32 | file = files[i] 33 | print("information is based on file: {}".format(file)) 34 | 35 | variables = Dataset(file).variables 36 | info = {} 37 | for i, key in enumerate(variables): 38 | var = variables[key] 39 | info[i] = { 40 | "name": key, 41 | "dims": var.dimensions[0] if len(var.dimensions) == 1 else "string", 42 | "units": "" if not hasattr(var, "units") else var.units, 43 | "comment": "" if not hasattr(var, "comment") else var.comment, 44 | } 45 | 46 | vars = DataFrame(info).T 47 | 48 | dim = vars.dims 49 | dim[dim.str.startswith("str")] = "string" 50 | vars["dims"] = dim 51 | 52 | vars = ( 53 | vars.sort_values(["dims", "name"]) 54 | .reset_index(drop=True) 55 | .loc[:, ["dims", "name", "units", "comment"]] 56 | .set_index("name") 57 | .style 58 | ) 59 | 60 | return vars 61 | 62 | 63 | def check_var_in_ncfiles(files, key): 64 | 65 | is_in_files = [] 66 | for file in files: 67 | vardict = Dataset(file).variables 68 | if key in vardict: 69 | is_in_files += (True,) 70 | else: 71 | is_in_files += (False,) 72 | 73 | return any(is_in_files) 74 | 75 | 76 | def get_var_dim(files, key): 77 | dims = [] 78 | for file in files: 79 | variables = Dataset(file).variables 80 | if key in variables: 81 | var = variables[key] 82 | dims += var.dimensions 83 | unique_dims = list(set(dims)) 84 | if len(unique_dims) > 1: 85 | return False 86 | elif len(unique_dims) == 1: 87 | return unique_dims[0] 88 | else: 89 | return "string" 90 | 91 | 92 | def get_var_units(files, key): 93 | from numpy import nanargmax, unique 94 | 95 | units = [get_var_attrs(file, key, "units") for file in files] 96 | units, counts = unique(units, return_counts=True) 97 | imax = nanargmax(counts) 98 | 99 | return units[imax] 100 | 101 | 102 | def get_var_attrs(file, key, attr=None): 103 | vars = Dataset(file).variables 104 | if key not in vars: 105 | return 106 | var = Dataset(file).variables[key] 107 | 108 | if attr is None: 109 | return {k: var.getncattr(k) for k in var.ncattrs()} 110 | else: 111 | if hasattr(var, attr): 112 | return var.getncattr(attr) 113 | 114 | 115 | def get_var_coords(files, key): 116 | """ 117 | Finds the coordinates of the variable for the given netCDF files. 118 | 119 | Parameters 120 | ---------- 121 | files : list 122 | a list of netCDF glider files 123 | key : str 124 | must be a variable in the netCDF 125 | 126 | Returns 127 | ------- 128 | coords : list 129 | a list of coordinates from a subset of files 130 | """ 131 | from numpy import concatenate 132 | 133 | coords = set([get_var_attrs(f, key, "coordinates") for f in files]) 134 | if None in coords: 135 | coords.remove(None) 136 | coords = [c.split() for c in coords] 137 | if coords != []: 138 | coords = concatenate(coords).tolist() 139 | 140 | return coords 141 | 142 | 143 | def get_dim_nobs(files, dim): 144 | for file in files: 145 | dimensions = Dataset(file).dimensions 146 | if dim in dimensions: 147 | return dimensions[dim].size 148 | 149 | 150 | def get_dim_vars(files, dim): 151 | """ 152 | Returns all the variable names that belong to a dimension 153 | """ 154 | 155 | dim_vars = set() # avoid duplication with a set 156 | for file in files: # go through files to ensure all vars are included 157 | variables = Dataset(file).variables 158 | for key, var in variables.items(): 159 | # test if the variable belongs to the dimension 160 | belongs_to_dim = any([dim in d for d in var.dimensions]) 161 | if belongs_to_dim: 162 | dim_vars.update([key]) 163 | # return a numpy array of the dimension variables (useful for indexing) 164 | return np.array(list(dim_vars)) 165 | 166 | 167 | def get_dim_same_size(files, dim): 168 | """ 169 | Get dimension with the same size as the given dimension. 170 | If more than one is found, return the with most variables. 171 | """ 172 | 173 | def sub_dim_same_size(file, dim): 174 | dimensions = Dataset(file).dimensions 175 | # make sure that the given dimension is in the file 176 | same_size = set() 177 | if dim in dimensions: 178 | n = dimensions[dim].size 179 | dimensions.pop(dim) 180 | 181 | for d in dimensions: 182 | if n == dimensions[d].size: 183 | same_size.update([d]) 184 | return list(same_size) 185 | 186 | # PART 1 get all dimensions with the same size 187 | same_size = set(sub_dim_same_size(files[0], dim)) 188 | for file in files[1:]: 189 | same_size = same_size.intersection(sub_dim_same_size(file, dim)) 190 | 191 | # if there is only one dimension of the same length return it 192 | return list(same_size) 193 | 194 | 195 | def get_dim_coord(files, dim_name, coord_name, niter=0): 196 | # ensure time dim for each dimension for merging data 197 | # 1. search for 'coord' in for the same dimension 198 | # 2. search for coord in other dimension of same length 199 | 200 | dim_vars = get_dim_vars(files, dim_name) 201 | is_coord = [coord_name in key for key in dim_vars] 202 | same_size_dims = get_dim_same_size(files, dim_name) 203 | 204 | if any(is_coord) and (niter < 2): 205 | return dim_vars[is_coord][0] 206 | elif (same_size_dims != []) and (niter < 2): 207 | for d in same_size_dims: 208 | return get_dim_coord(files, d, coord_name, niter=niter + 1) 209 | else: 210 | return 211 | 212 | 213 | def make_variable_dimension_dict(files, variable_names, n_check_files=3): 214 | import warnings 215 | 216 | step_size = len(files) // n_check_files 217 | step_size = 1 if step_size == 0 else step_size 218 | files_checklist = files[::step_size] 219 | 220 | dims = {} 221 | for key in variable_names: 222 | if not check_var_in_ncfiles(files_checklist, key): 223 | msg = key + " was not found in the files" 224 | warnings.warn(msg, GliderToolsWarning) 225 | continue 226 | single_dim = get_var_dim(files_checklist, key) 227 | if not single_dim: 228 | continue 229 | else: 230 | dim = single_dim 231 | 232 | if dim not in dims: 233 | dims[dim] = set() 234 | 235 | dims[dim].update([key]) 236 | dims[dim].update(get_var_coords(files_checklist, key)) 237 | 238 | # get compulsory time and depth variables (if present) 239 | for d in dims: 240 | dim = dims[d] 241 | if d == "string": 242 | continue 243 | has_coord = any(["time" in v for v in dim]) 244 | 245 | if not has_coord: 246 | coord = get_dim_coord(files_checklist, d, "time") 247 | if coord: 248 | dims[d].update([coord]) 249 | else: 250 | msg = "Could not find a time coordinate for dim: {}".format(d) 251 | warnings.warn(msg, GliderToolsWarning) 252 | return dims 253 | 254 | 255 | def read_nc_files_divevars(files, keys, verbose=True, return_skipped=False): 256 | from os import path 257 | 258 | from numpy.ma import row_stack 259 | from pandas import DataFrame, concat 260 | 261 | if not verbose: 262 | from numpy import arange as trange 263 | else: 264 | from tqdm import trange 265 | 266 | data = [] 267 | error = "" 268 | skipped_files = [] 269 | progress_bar = trange(len(files)) 270 | d = 0 271 | for i in progress_bar: 272 | fname = files[i] 273 | nc = Dataset(fname) 274 | 275 | d = nc.dive_number if hasattr(nc, "dive_number") else d + 1 276 | 277 | nc_keys = [k for k in filter(lambda k: k in nc.variables, keys)] 278 | if nc_keys: 279 | skipped = set(keys) - set(nc_keys) 280 | if skipped: 281 | error += "{} not in {}\n".format(str(skipped), path.split(fname)[1]) 282 | arr = row_stack([nc.variables[k][:] for k in nc_keys]) 283 | nc.close() 284 | 285 | df = DataFrame(arr.T, columns=nc_keys) 286 | df["dives"] = d 287 | data += (df,) 288 | else: 289 | skipped_files += (fname,) 290 | error += "{} was skipped\n".format(fname) 291 | 292 | if len(error) > 0: 293 | print(error) 294 | data = concat(data, ignore_index=True) 295 | 296 | if return_skipped: 297 | return data, skipped_files 298 | else: 299 | return data 300 | 301 | 302 | def read_nc_files_strings(files, keys, verbose=True): 303 | from numpy import array, r_ 304 | from pandas import DataFrame 305 | 306 | if not verbose: 307 | from numpy import arange as trange 308 | else: 309 | from tqdm import trange 310 | 311 | data = [] 312 | idx = [] 313 | d = 0 314 | for i in trange(files.size): 315 | fname = files[i] 316 | nc = Dataset(fname) 317 | d = nc.dive_number if hasattr(nc, "dive_number") else d + 1 318 | arr = r_[[nc.variables[k][:].squeeze() for k in keys]] 319 | nc.close() 320 | data += (arr,) 321 | idx += (d,) 322 | df = DataFrame(array(data, dtype=str), columns=keys) 323 | for col in df: 324 | df[col] = df[col].str.encode("ascii", "ignore").str.decode("ascii") 325 | try: 326 | df[col] = df[col].values.astype(float) 327 | except ValueError: 328 | pass 329 | df["dives"] = idx 330 | 331 | return df 332 | 333 | 334 | def process_time(files, df): 335 | def decode_times_1970(series): 336 | # DECODING TIMES IF PRESENT 337 | t0 = np.datetime64("1970-01-01 00:00:00", "s") 338 | 339 | # realistic upper and lower limits since 1970 340 | tmin = np.datetime64("2000-01-01 00:00:00", "s") 341 | tmax = np.datetime64("2030-01-01 00:00:00", "s") 342 | lo_lim = (tmin - t0).astype(int) 343 | up_lim = (tmax - t0).astype(int) 344 | 345 | series_masked = series[series.notnull()] 346 | since1970 = ((series_masked > lo_lim) & (series_masked < up_lim)).all() 347 | 348 | if since1970: 349 | dt = series.values.astype("timedelta64[s]") 350 | return (t0 + dt).astype("datetime64[ns]") 351 | 352 | time_cols = df.columns[["time" in col for col in df]].values.tolist() 353 | if isinstance(files, str): 354 | file = [files] 355 | else: 356 | file = [files[len(files) // 2]] 357 | 358 | if len(time_cols) > 0: 359 | for col in time_cols: 360 | units = get_var_units(file, col) 361 | if units.startswith("seconds since 1970"): 362 | df[col + "_dt64"] = decode_times_1970(df[col]) 363 | df = df.set_index(col + "_dt64", drop=False) 364 | return df 365 | 366 | 367 | def process_dives(df): 368 | def get_dives(time, depth, dives=None): 369 | from ..utils import calc_dive_number 370 | 371 | if dives is None: 372 | return calc_dive_number(time, depth) 373 | else: 374 | # INDEX UP AND DOWN DIVES 375 | depth = np.array(depth) 376 | dives = np.array(dives) 377 | 378 | updive = np.ndarray(dives.size, dtype=bool) * False 379 | for d in np.unique(dives): 380 | i = d == dives 381 | j = np.argmax(depth[i]) 382 | # bool slice of the dive 383 | k = i[i] 384 | # make False until the maximum depth 385 | k[:j] = False 386 | # assign the bool slice to the updive 387 | updive[i] = k 388 | 389 | dives = dives + (updive / 2) 390 | return dives 391 | 392 | depth_cols = df.columns[["depth" in col for col in df]].values.tolist() 393 | time_cols = df.columns[["time" in col for col in df]].values.tolist() 394 | if (len(depth_cols) > 0) & ("dives" in df): 395 | depth = df[depth_cols[0]] 396 | time = df[time_cols[0]] 397 | df["dives"] = get_dives(time, depth, df.dives) 398 | 399 | return df 400 | 401 | 402 | def load_multiple_vars( 403 | files, 404 | variable_names, 405 | return_merged=False, 406 | verbose=True, 407 | keep_global_attrs=False, 408 | netcdf_attrs={}, 409 | keep_variable_attrs=True, 410 | ): 411 | """ 412 | Load a list of variables from the SeaGlider object as a 413 | ``pandas.DataFrame``. 414 | 415 | Parameters 416 | ---------- 417 | variable_names : list 418 | a list of strings representing the keys you would like to load. 419 | 420 | Returns 421 | ------- 422 | pandas.DataFrame 423 | Will always have coordinate dimensions loaded (even if not 424 | specified). These can then be accessed either by the variable 425 | objects or by .data[]. 426 | 427 | Note 428 | ---- 429 | Using this method resets all previously loaded and stored data (data 430 | is stored under ``SeaGlider.data={dim: pandas.DataFrame}``). 431 | This is done to avoid erroneous coordinate matchup with sometimes 432 | missing data). 433 | """ 434 | import time 435 | 436 | from pandas import DataFrame, to_numeric 437 | 438 | from ..utils import merge_dimensions 439 | 440 | # create a dictionary with dims as keys and variables as keys 441 | files = process_files(files) 442 | 443 | dims_dict = make_variable_dimension_dict(files, variable_names) 444 | data = {dim_name: DataFrame() for dim_name in dims_dict} 445 | merge_list = [] # list of mergable dataframes with longest at the front 446 | max_len = 0 447 | 448 | # LOADING EACH DIMENSION 449 | for dim_name, var_names in dims_dict.items(): 450 | 451 | print("\nDIMENSION: {}\n{}".format(dim_name, str(var_names)).replace("'", "")) 452 | time.sleep(0.2) # to prevent progress bar interruption 453 | skipped_files = [] 454 | if dim_name == "string": 455 | df = read_nc_files_strings(files, var_names, verbose) 456 | else: 457 | df, skipped_files = read_nc_files_divevars( 458 | files, var_names, verbose, return_skipped=True 459 | ) 460 | for col in df: 461 | df[col] = to_numeric(df[col], errors="coerce") 462 | 463 | # converting times that have 'seconds since 1970' units 464 | dim_files = list(set(files.tolist()) - set(skipped_files)) 465 | df = process_time(dim_files, df) 466 | # splitting up and down if dives present otherwise calc from depth 467 | df = process_dives(df) 468 | 469 | # to make the merge list (with time idx) and longest index at the front 470 | if np.issubdtype(df.index.dtype, np.datetime64): 471 | if len(df) > max_len: 472 | merge_list.insert(0, dim_name) 473 | max_len = len(df) 474 | else: 475 | merge_list.append(dim_name) 476 | 477 | # adding columns to dimension based dataframes one by one 478 | for col in df: 479 | col = str(col) 480 | data[dim_name][col] = df[col] 481 | 482 | # MERGING DATA IF POSSIBLE 483 | can_merge = len(merge_list) > 1 484 | if return_merged and can_merge: 485 | print( 486 | "\nMerging dimensions on time indicies: {}, ".format(merge_list[0]), 487 | end="", 488 | ) 489 | df_merged = data[merge_list.pop(0)] 490 | for other in merge_list: 491 | if "dives" in data[other]: 492 | df_other = data[other].drop(columns="dives") 493 | else: 494 | df_other = data[other] 495 | print(other, end=", ") 496 | df_merged = merge_dimensions(df_merged, df_other, interp_lim=1) 497 | data["merged"] = df_merged 498 | drop_names = list(data["merged"].filter(regex="_drop").columns) 499 | data["merged"] = data["merged"].drop(columns=drop_names) 500 | 501 | elif return_merged and (not can_merge): 502 | print( 503 | "\nCannot merge data - not enough time indexed DataFrames" 504 | "\nReturning unmerged dataframes" 505 | ) 506 | 507 | # MAKING NETCDFS 508 | for key in data: 509 | data[key] = make_xr_dataset( 510 | data[key], 511 | files, 512 | keep_global_attrs=keep_global_attrs, 513 | keep_variable_attrs=keep_variable_attrs, 514 | index_name=key, 515 | attrs=netcdf_attrs, 516 | ) 517 | if "dives" in data: 518 | data = data.set_coords("dives") 519 | 520 | return data 521 | 522 | 523 | def make_xr_dataset( 524 | df, 525 | files, 526 | index_name="index", 527 | attrs={}, 528 | keep_variable_attrs=True, 529 | keep_global_attrs=False, 530 | ): 531 | import re 532 | 533 | from pandas import Timestamp 534 | from xarray import open_dataset 535 | 536 | first = list(open_dataset(files[0]).attrs.items()) 537 | final = list(open_dataset(files[-1]).attrs.items()) 538 | 539 | if keep_global_attrs: 540 | global_attrs = dict(list(set(first).intersection(final))) 541 | else: 542 | global_attrs = {} 543 | 544 | lons = df.filter(regex=re.compile("lon", re.IGNORECASE)) 545 | lats = df.filter(regex=re.compile("lat", re.IGNORECASE)) 546 | depths = df.filter(regex=re.compile("depth", re.IGNORECASE)) 547 | times = df.filter(regex=re.compile("time_dt64", re.IGNORECASE)) 548 | dives = df.filter(regex=re.compile("dive", re.IGNORECASE)) 549 | 550 | now = str(Timestamp("today"))[:19] 551 | history = ( 552 | "[{}] imported data with GliderTools.load.seaglider_" "basestation_netCDFs;\n" 553 | ).format(now) 554 | 555 | global_attrs.update(attrs) 556 | global_attrs.update( 557 | { 558 | "date_created": now, 559 | "number_of_dives": dives.max().max() // 1, 560 | "files": str([f.split("/")[-1] for f in files]), 561 | "time_coverage_start": str(times.min().min()), 562 | "time_coverage_end": str(times.max().max()), 563 | "geospatial_vertical_min": depths.min().min(), 564 | "geospatial_vertical_max": depths.max().max(), 565 | "geospatial_lat_min": lats.min().min(), 566 | "geospatial_lat_max": lats.max().max(), 567 | "geospatial_lon_min": lons.min().min(), 568 | "geospatial_lon_max": lons.max().max(), 569 | "processing": history, 570 | } 571 | ) 572 | 573 | coords = set() 574 | for key in df: 575 | check_files = files[[0, files.size // 2, -1]] 576 | coords.update(get_var_coords(check_files, key)) 577 | coords = list(coords) 578 | 579 | for i, coord in enumerate(coords): 580 | if "time" in coord: 581 | coords[i] = coord + "_dt64" 582 | 583 | xds = ( 584 | df.to_xarray() 585 | .drop_indexes(df.index.name) 586 | .reset_coords() 587 | .set_coords(coords) 588 | .rename_dims({df.index.name: index_name}) 589 | .assign_attrs(global_attrs) 590 | ) 591 | 592 | if keep_variable_attrs: 593 | mid = len(files) // 2 594 | for key in xds.variables: 595 | attrs = get_var_attrs(files[mid], key) 596 | if attrs is not None: 597 | attrs.pop("coordinates", None) 598 | xds[key].attrs = attrs 599 | 600 | return xds 601 | -------------------------------------------------------------------------------- /glidertools/load/slocum.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | def slocum_geomar_matfile(filename, verbose=True): 5 | """ 6 | Load .mat file generated with the geomar MATLAB script for Slocum data. 7 | 8 | A dive column is generated on importing the data. When single values per 9 | dive (e.g. u/v), the value is set for the entire dive. 10 | 11 | Parameters 12 | ---------- 13 | filename : str 14 | path of .mat file. 15 | verbose : bool, optional 16 | defaults to True 17 | 18 | Returns 19 | ------- 20 | pandas.DataFrame 21 | DataFrame containing the all columns in the `.mat` file 22 | """ 23 | 24 | import numpy as np 25 | import pandas as pd 26 | 27 | from scipy.io import loadmat 28 | 29 | mat = loadmat(filename) 30 | 31 | df = pd.DataFrame() 32 | 33 | if verbose: 34 | print("Loading variables: \n\t[", end="") 35 | for key in mat.keys(): 36 | if key.startswith("_"): 37 | continue 38 | 39 | if verbose: 40 | print(" " + key, end=",") 41 | var = mat[key] 42 | col, dives = [], [] 43 | for i, dat in enumerate(var.squeeze()): 44 | col += (dat.squeeze(),) 45 | dives += (np.ones(dat.squeeze().size) * i,) 46 | 47 | try: 48 | df[key] = np.concatenate(col) 49 | df["dives"] = np.concatenate(dives) 50 | except ValueError: 51 | ser = pd.Series(col, index=np.array(dives).squeeze()) 52 | df[key] = ser.reindex(df.dives).values 53 | 54 | df["dives"] /= 2.0 55 | if "time_datenum" in df.columns: 56 | df["time"] = convert_matlab_datenum_to_datetime64(df.time_datenum) 57 | 58 | print("]") 59 | return df 60 | 61 | 62 | def convert_matlab_datenum_to_datetime64(datenum): 63 | from numpy import datetime64, timedelta64 64 | 65 | time_epoch = datetime64("1970-01-01 00:00:00.000") 66 | time_matlab = timedelta64(-367, "D") 67 | time_ordinal = datetime64("0001-01-01 00:00:00", "D").astype("timedelta64") 68 | time_measurements = (datenum * 86400).astype("timedelta64[s]") 69 | 70 | datetime = (time_epoch + time_matlab) + (time_ordinal + time_measurements) 71 | 72 | return datetime 73 | -------------------------------------------------------------------------------- /glidertools/load/voto_seaexplorer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import numpy as np 3 | import xarray as xr 4 | 5 | 6 | def voto_seaexplorer_nc(filename): 7 | """ 8 | Load .nc file downloaded from https://observations.voiceoftheocean.org/. 9 | A dives column is generated on importing the data. 10 | 11 | Parameters 12 | ---------- 13 | filename : str 14 | path of .nc file. 15 | 16 | Returns 17 | ------- 18 | xarray.Dataset 19 | Dataset containing the all columns in the source file and dives column 20 | """ 21 | ds = xr.open_dataset(filename) 22 | ds = voto_seaexplorer_dataset(ds) 23 | return ds 24 | 25 | 26 | def voto_seaexplorer_dataset(ds): 27 | """ 28 | Adapts a VOTO xarray dataset, for example downloaded from the VOTO ERDAP 29 | server (https://erddap.observations.voiceoftheocean.org/erddap/index.html) 30 | to be used in GliderTools 31 | 32 | Parameters 33 | ---------- 34 | ds : xarray.Dataset 35 | 36 | Returns 37 | ------- 38 | xarray.Dataset 39 | Dataset containing the all columns in the source file and dives column 40 | """ 41 | ds = add_dive_column(ds) 42 | return ds 43 | 44 | 45 | def add_dive_column(ds): 46 | """add dive column to dataset 47 | 48 | Parameters: 49 | ----------- 50 | ds: xarray.Dataset 51 | 52 | Returns: 53 | -------- 54 | xarray.Dataset 55 | Dataset containing a dives column 56 | """ 57 | ds["dives"] = ( 58 | ["time"], 59 | np.where(ds.profile_direction == 1, ds.profile_num, ds.profile_num + 0.5), 60 | ) 61 | return ds 62 | 63 | 64 | def voto_concat_datasets(datasets): 65 | """ 66 | Concatenates multiple datasets along the time dimensions, profile_num 67 | and dives variable(s) are adapted so that they start counting from one 68 | for the first dataset and monotonically increase. 69 | 70 | Parameters 71 | ---------- 72 | datasets : list of xarray.Datasets 73 | 74 | Returns 75 | ------- 76 | xarray.Dataset 77 | concatenated Dataset containing all the data from the list of datasets 78 | """ 79 | # in case the datasets have a different set of variables, emtpy variables are created 80 | # to allow for concatenation (concat with different set of variables leads to error) 81 | mlist = [set(dataset.variables.keys()) for dataset in datasets] 82 | allvariables = set.union(*mlist) 83 | for dataset in datasets: 84 | missing_vars = allvariables - set(dataset.variables.keys()) 85 | for missing_var in missing_vars: 86 | dataset[missing_var] = np.nan 87 | 88 | # renumber profiles, so that profile_num still is unique in concat-dataset 89 | for index in range(1, len(datasets)): 90 | datasets[index]["profile_num"] += ( 91 | datasets[index - 1].copy()["profile_num"].max() 92 | ) 93 | ds = xr.concat(datasets, dim="time") 94 | ds = add_dive_column(ds) 95 | 96 | return ds 97 | -------------------------------------------------------------------------------- /glidertools/physics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import warnings 4 | 5 | from inspect import currentframe as getframe 6 | 7 | import numpy as np 8 | 9 | from .helpers import GliderToolsWarning, transfer_nc_attrs 10 | from .utils import group_by_profiles 11 | 12 | 13 | def mixed_layer_depth(ds, variable, thresh=0.01, ref_depth=10, verbose=True): 14 | """ 15 | Calculates the MLD for ungridded glider array. 16 | 17 | You can provide density or temperature. 18 | The default threshold is set for density (0.01). 19 | 20 | Parameters 21 | ---------- 22 | ds : xarray.Dataset Glider dataset 23 | variable : str 24 | variable that will be used for the threshold criteria 25 | thresh : float=0.01 threshold for difference of variable 26 | ref_depth : float=10 reference depth for difference 27 | return_as_mask : bool, optional 28 | verbose : bool, optional 29 | 30 | Return 31 | ------ 32 | mld : array 33 | will be an array of depths the length of the 34 | number of unique dives. 35 | """ 36 | groups = group_by_profiles(ds, [variable, "depth"]) 37 | mld = groups.apply(mld_profile, variable, thresh, ref_depth, verbose) 38 | return mld 39 | 40 | 41 | def mld_profile(df, variable, thresh, ref_depth, verbose=True): 42 | exception = False 43 | divenum = df.index[0] 44 | df = df.dropna(subset=[variable, "depth"]) 45 | if len(df) == 0: 46 | mld = np.nan 47 | exception = True 48 | message = """no observations found for specified variable in dive {} 49 | """.format( 50 | divenum 51 | ) 52 | elif np.nanmin(np.abs(df.depth.values - ref_depth)) > 5: 53 | exception = True 54 | message = """no observations within 5 m of ref_depth for dive {} 55 | """.format( 56 | divenum 57 | ) 58 | mld = np.nan 59 | else: 60 | direction = 1 if np.unique(df.index % 1 == 0) else -1 61 | # create arrays in order of increasing depth 62 | var_arr = df[variable].values[:: int(direction)] 63 | depth = df.depth.values[:: int(direction)] 64 | # get index closest to ref_depth 65 | i = np.nanargmin(np.abs(depth - ref_depth)) 66 | # create difference array for threshold variable 67 | dd = var_arr - var_arr[i] 68 | # mask out all values that are shallower then ref_depth 69 | dd[depth < ref_depth] = np.nan 70 | # get all values in difference array within treshold range 71 | mixed = dd[abs(dd) > thresh] 72 | if len(mixed) > 0: 73 | idx_mld = np.argmax(abs(dd) > thresh) 74 | mld = depth[idx_mld] 75 | else: 76 | exception = True 77 | mld = np.nan 78 | message = """threshold criterion never true (all mixed or \ 79 | shallow profile) for profile {}""".format( 80 | divenum 81 | ) 82 | if verbose and exception: 83 | warnings.warn(message, category=GliderToolsWarning) 84 | return mld 85 | 86 | 87 | def potential_density(salt_PSU, temp_C, pres_db, lat, lon, pres_ref=0): 88 | """ 89 | Calculate density from glider measurements of salinity and temperature. 90 | 91 | The Basestation calculates density from absolute salinity and potential 92 | temperature. This function is a wrapper for this functionality, where 93 | potential temperature and absolute salinity are calculated first. 94 | Note that a reference pressure of 0 is used by default. 95 | 96 | Parameters 97 | ---------- 98 | salt_PSU : array, dtype=float, shape=[n, ] 99 | practical salinty 100 | temp_C : array, dtype=float, shape=[n, ] 101 | temperature in deg C 102 | pres_db : array, dtype=float, shape=[n, ] 103 | pressure in decibar 104 | lat : array, dtype=float, shape=[n, ] 105 | latitude in degrees north 106 | lon : array, dtype=float, shape=[n, ] 107 | longitude in degrees east 108 | 109 | Returns 110 | ------- 111 | potential_density : array, dtype=float, shape=[n, ] 112 | """ 113 | 114 | import gsw 115 | 116 | salt_abs = gsw.SA_from_SP(salt_PSU, pres_db, lon, lat) 117 | pot_dens = gsw.pot_rho_t_exact(salt_abs, temp_C, pres_db, pres_ref) 118 | pot_dens = transfer_nc_attrs( 119 | getframe(), 120 | temp_C, 121 | pot_dens, 122 | "potential_density", 123 | units="kg/m3", 124 | comment="", 125 | standard_name="potential_density", 126 | ) 127 | return pot_dens 128 | 129 | 130 | def brunt_vaisala(salt, temp, pres, lat=None): 131 | r""" 132 | Calculate the square of the buoyancy frequency. 133 | 134 | This is a copy from GSW package, with the exception that 135 | the array maintains the same shape as the input. Note that 136 | it only works on ungridded data at the moment. 137 | 138 | .. math:: 139 | 140 | N^{2} = \frac{-g}{\sigma_{\theta}} \frac{d\sigma_{\theta}}{dz} 141 | 142 | Parameters 143 | ---------- 144 | SA : array-like 145 | Absolute Salinity, g/kg 146 | CT : array-like 147 | Conservative Temperature (ITS-90), degrees C 148 | p : array-like 149 | Sea pressure (absolute pressure minus 10.1325 dbar), dbar 150 | lat : array-like, 1-D, optional 151 | Latitude, degrees. 152 | axis : int, optional 153 | The dimension along which pressure increases. 154 | 155 | Returns 156 | ------- 157 | N2 : array 158 | Buoyancy frequency-squared at pressure midpoints, 1/s. 159 | The shape along the pressure axis dimension is one 160 | less than that of the inputs. 161 | """ 162 | 163 | from gsw import Nsquared 164 | from numpy import nan, r_ 165 | 166 | def pad_nan(a): 167 | return r_[a, nan] 168 | 169 | n2 = pad_nan(Nsquared(salt, temp, pres)[0]) 170 | 171 | n2 = transfer_nc_attrs( 172 | getframe(), 173 | temp, 174 | n2, 175 | "N_squared", 176 | units="1/s2", 177 | comment="", 178 | standard_name="brunt_vaisala_freq", 179 | ) 180 | 181 | return n2 182 | 183 | 184 | # compute spice 185 | def spice0(salt_PSU, temp_C, pres_db, lat, lon): 186 | """ 187 | Calculate spiciness from glider measurements of salinity and temperature. 188 | 189 | Parameters 190 | ---------- 191 | salt_PSU : array, dtype=float, shape=[n, ] 192 | practical salinty 193 | temp_C : array, dtype=float, shape=[n, ] 194 | temperature in deg C 195 | pres_db : array, dtype=float, shape=[n, ] 196 | pressure in decibar 197 | lat : array, dtype=float, shape=[n, ] 198 | latitude in degrees north 199 | lon : array, dtype=float, shape=[n, ] 200 | longitude in degrees east 201 | 202 | Returns 203 | ------- 204 | potential_density : array, dtype=float, shape=[n, ] 205 | """ 206 | import gsw 207 | 208 | salt_abs = gsw.SA_from_SP(salt_PSU, pres_db, lon, lat) 209 | cons_temp = gsw.CT_from_t(salt_abs, temp_C, pres_db) 210 | 211 | spice0 = gsw.spiciness0(salt_abs, cons_temp) 212 | 213 | spice0 = transfer_nc_attrs( 214 | getframe(), 215 | temp_C, 216 | spice0, 217 | "spiciness0", 218 | units=" ", 219 | comment="", 220 | standard_name="spiciness0", 221 | ) 222 | return spice0 223 | -------------------------------------------------------------------------------- /glidertools/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from inspect import currentframe as getframe 4 | 5 | from .helpers import transfer_nc_attrs 6 | 7 | 8 | def time_average_per_dive(dives, time): 9 | """ 10 | Gets the average time stamp per dive. This is used to create psuedo 11 | discrete time steps per dive for plotting data (using time as x-axis 12 | variable). 13 | 14 | Parameters 15 | ---------- 16 | dives : np.array, dtype=float, shape=[n, ] 17 | discrete dive numbers (down = d.0; up = d.5) that matches time length 18 | time : np.array, dtype=datetime64, shape=[n, ] 19 | time stamp for each observed measurement 20 | 21 | Returns 22 | ------- 23 | time_average_per_dive : np.array, dtype=datetime64, shape=[n, ] 24 | each dive will have the average time stamp of that dive. Can be used 25 | for plotting where time_average_per_dive is set as the x-axis. 26 | """ 27 | from numpy import array, datetime64, nanmean 28 | from pandas import Series 29 | 30 | atime = array(time) 31 | dives = array(dives) 32 | if isinstance(atime[0], datetime64): 33 | t = atime.astype("datetime64[s]").astype(float) 34 | else: 35 | t = atime 36 | 37 | t_grp = Series(t).groupby(dives) 38 | t_mid = nanmean([t_grp.max(), t_grp.min()], axis=0) 39 | t_ser = Series(t_mid, index=t_grp.mean().index.values) 40 | diveavg = t_ser.reindex(index=dives).values 41 | diveavg = diveavg.astype("datetime64[s]") 42 | diveavg = transfer_nc_attrs(getframe(), time, diveavg, "_diveavg") 43 | 44 | return diveavg 45 | 46 | 47 | def group_by_profiles(ds, variables=None): 48 | """ 49 | Group profiles by dives column. Each group member is one dive. The 50 | returned profiles can be evaluated statistically, e.g. by 51 | pandas.DataFrame.mean or other aggregating methods. To filter out one 52 | specific profile, use xarray.Dataset.where instead. 53 | 54 | Parameters 55 | ---------- 56 | ds : xarray.Dataset 57 | 1-dimensional Glider dataset 58 | variables : list of strings, optional 59 | specify variables if only a subset of the dataset should be grouped 60 | into profiles. Grouping only a subset is considerably faster and more 61 | memory-effective. 62 | Return 63 | ------ 64 | profiles: 65 | dataset grouped by profiles (dives variable), as created by the 66 | pandas.groupby methods. 67 | """ 68 | ds = ds.reset_coords().to_pandas().reset_index().set_index("dives") 69 | if variables: 70 | return ds[variables].groupby("dives") 71 | else: 72 | return ds.groupby("dives") 73 | 74 | 75 | def mask_above_depth(ds, depths): 76 | """ 77 | Masks all data above depths. 78 | 79 | Parameters 80 | ---------- 81 | df : xarray.Dataframe or pandas.Dataframe 82 | mask_depths : float (for constant depth masking) or pandas.Series as 83 | returned e.g. by the mixed_layer_depth function 84 | """ 85 | return _mask_depth(ds, depths, above=True) 86 | 87 | 88 | def mask_below_depth(ds, depths): 89 | """ 90 | Masks all data below depths. 91 | 92 | Parameters 93 | ---------- 94 | df : xarray.Dataframe or pandas.Dataframe 95 | mask_depths : float (for constant depth masking) or pandas.Series as 96 | returned e.g. by the mixed_layer_depth function 97 | """ 98 | return _mask_depth(ds, depths, above=False) 99 | 100 | 101 | def mask_profile_depth(df, mask_depth, above): 102 | """ 103 | Masks either above or below mask_depth. If type(mask_depth)=np.nan, 104 | the whole profile will be masked. Warning: This function is for a SINGLE 105 | profile only, for masking a complete Glider Dataset please look for 106 | utils.mask_above_depth and/or utils.mask_below_depth. 107 | 108 | Parameters 109 | ---------- 110 | df : xarray.Dataframe or pandas.Dataframe 111 | mask_depths : float (for constant depth masking) or pandas.Series as 112 | returned e.g. by the mixed_layer_depth function 113 | above : boolean 114 | Mask either above mask_depth (True) or below (False) 115 | """ 116 | if type(mask_depth) not in [int, float]: 117 | # this case for calling from _mask_depth 118 | mask_depth = mask_depth.loc[df.index[0]] 119 | if above: 120 | mask = df.depth > mask_depth 121 | else: 122 | mask = df.depth < mask_depth 123 | return mask 124 | 125 | 126 | def _mask_depth(ds, depths, above=True): 127 | ds = ds.reset_coords().to_pandas().set_index("dives") 128 | mask = ds.groupby("dives").apply(mask_profile_depth, depths, above) 129 | # mask = mask if above else ~mask 130 | return mask.values 131 | 132 | 133 | def merge_dimensions(df1, df2, interp_lim=3): 134 | """ 135 | Merges variables measured at different time intervals. Glider data may be 136 | sampled at different time intervals, as is the case for primary CTD and 137 | SciCon data. 138 | 139 | Parameters 140 | ---------- 141 | df1 : pandas.DataFrame 142 | A dataframe indexed by datetime64 sampling times. Can have multiple 143 | columns. The index of this first dataframe will be preserved. 144 | df2 : pandas.DataFrame 145 | A dataframe indexed by datetime64 sampling times. Can have multiple 146 | columns. This second dataframe will be interpolated linearly onto the 147 | first dataframe. 148 | 149 | Returns 150 | ------- 151 | merged_df : pandas.DataFrame 152 | The combined arrays interpolated onto the index of the first axis 153 | 154 | Raises 155 | ------ 156 | Userwarning 157 | If either one of the indicies are not datetime64 dtypes 158 | 159 | Example 160 | ------- 161 | You can use the following code and alter it if you want more control 162 | 163 | >>> df = pd.concat([df1, df2], sort=True, join='outer') # doctest: +SKIP 164 | >>> df = (df 165 | .sort_index() 166 | .interpolate(limit=interp_lim) 167 | .bfill(limit=interp_lim) 168 | .loc[df1.index] 169 | ) 170 | """ 171 | 172 | import numpy as np 173 | import xarray as xr 174 | 175 | from .helpers import GliderToolsError 176 | 177 | is_xds = isinstance(df1, xr.Dataset) | isinstance(df2, xr.Dataset) 178 | 179 | if is_xds: 180 | msg = "One of your input objects is xr.Dataset, please define " 181 | raise GliderToolsError(msg) 182 | 183 | same_type = type(df1.index) == type(df2.index) # noqa: E721 184 | # turning datetime64[ns] to int64 first, 185 | # because interpolate doesn't work on datetime-objects 186 | 187 | if same_type: 188 | df = df1.join(df2, sort=True, how="outer", rsuffix="_drop") 189 | df.index = df1.index.astype(np.int64) 190 | keys = df.select_dtypes(include=["datetime64[ns]"]).keys() 191 | for key in keys: 192 | df[key] = df[key].astype(np.int64) 193 | df = df.interpolate(limit=interp_lim).bfill(limit=interp_lim) 194 | df.index = df1.index.astype("datetime64[ns]") 195 | for key in keys: 196 | df[key] = df[key].astype("datetime64[ns]") 197 | return df.loc[df1.index.astype("datetime64[ns]")] 198 | else: 199 | raise UserWarning("Both dataframe indicies need to be same dtype") 200 | 201 | 202 | def calc_glider_vert_velocity(time, depth): 203 | """ 204 | Calculate glider vertical velocity in cm/s 205 | 206 | Parameters 207 | ---------- 208 | time : np.array [datetime64] 209 | glider time dimension 210 | depth : np.array [float] 211 | depth (m) or pressure (dbar) if depth not avail 212 | 213 | Returns 214 | ------- 215 | velocity : np.array 216 | vertical velocity in cm/s 217 | """ 218 | from numpy import array 219 | from pandas import Series 220 | 221 | # Converting time from datetime 64 to seconds since deployment 222 | t_ns = array(time).astype("datetime64[ns]").astype(float) 223 | t_s = Series((t_ns - t_ns.min()) / 1e9) 224 | 225 | # converting pressure from dbar/m to cm 226 | p_m = array(depth).astype(float) 227 | p_cm = Series(p_m * 100) 228 | 229 | # velocity in cm/s 230 | velocity = p_cm.diff() / t_s.diff() 231 | 232 | return velocity 233 | 234 | 235 | def calc_dive_phase(time, depth, dive_depth_threshold=15): 236 | """ 237 | Determine the glider dive phase 238 | 239 | Parameters 240 | ---------- 241 | time : np.array [datetime64] 242 | glider time dimension 243 | depth : np.array [float] 244 | depth (m) or pressure (dbar) if depth not avail 245 | dive_depth_threshold : [float] 246 | minimum dive depth (m or dbar), should be less than your most shallow dive 247 | 248 | Returns 249 | ------- 250 | phase : np.array [int] 251 | phase according to the EGO dive phases 252 | """ 253 | from numpy import array, isnan, ndarray 254 | 255 | time = array(time) 256 | depth = array(depth) 257 | 258 | velocity = calc_glider_vert_velocity(time, depth) # cm/s 259 | 260 | phase = ndarray(time.size) 261 | 262 | phase[velocity > 0.5] = 1 # down dive 263 | phase[velocity < -0.5] = 4 # up dive 264 | phase[(depth > dive_depth_threshold) & (velocity >= -0.5) & (velocity <= 0.5)] = ( 265 | 3 # inflexion 266 | ) 267 | phase[depth <= dive_depth_threshold] = 0 # surface drift 268 | phase[isnan(phase)] = 6 269 | phase = phase.astype(int) 270 | 271 | return phase 272 | 273 | 274 | def calc_dive_number(time, depth, dive_depth_threshold=15): 275 | """ 276 | Determine the glider dive number (based on dive phase) 277 | 278 | Parameters 279 | ---------- 280 | time : np.array [datetime64] 281 | glider time dimension 282 | depth : np.array [float] 283 | depth (m) or pressure (dbar) if depth not avail 284 | dive_depth_threshold : [float] 285 | minimum dive depth (m or dbar), should be less than your most shallow dive 286 | 287 | Returns 288 | ------- 289 | dive_number : np.ndarray [float] 290 | the dive number where down dives are x.0 and up dives are x.5 291 | """ 292 | 293 | phase = calc_dive_phase(time, depth, dive_depth_threshold) 294 | 295 | dive = dive_phase_to_number(phase) 296 | 297 | return dive 298 | 299 | 300 | def dive_phase_to_number(phase): 301 | from pandas import Series 302 | 303 | phase = Series(phase) 304 | 305 | u_dive = ((phase == 4).astype(int).diff() == 1).astype(int).cumsum() 306 | d_dive = ((phase == 1).astype(int).diff() == 1).astype(int).cumsum() 307 | 308 | dive = (u_dive + d_dive) / 2 309 | 310 | return dive 311 | 312 | 313 | def distance(lon, lat, ref_idx=None): 314 | """ 315 | Great-circle distance in m between lon, lat points. 316 | 317 | Parameters 318 | ---------- 319 | lon, lat : array-like, 1-D (size must match) 320 | Longitude, latitude, in degrees. 321 | ref_idx : None, int 322 | Defaults to None, which gives adjacent distances. 323 | If set to positive or negative integer, distances 324 | will be calculated from that point 325 | 326 | Returns 327 | ------- 328 | distance : array-like 329 | distance in meters between adjacent points 330 | or distance from reference point 331 | 332 | """ 333 | import numpy as np 334 | 335 | lon = np.array(lon) 336 | lat = np.array(lat) 337 | 338 | earth_radius = 6371e3 339 | 340 | if not lon.size == lat.size: 341 | raise ValueError( 342 | "lon, lat size must match; found %s, %s" % (lon.size, lat.size) 343 | ) 344 | if not len(lon.shape) == 1: 345 | raise ValueError("lon, lat must be flat arrays") 346 | 347 | lon = np.radians(lon) 348 | lat = np.radians(lat) 349 | 350 | if ref_idx is None: 351 | i1 = slice(0, -1) 352 | i2 = slice(1, None) 353 | dlon = np.diff(lon) 354 | dlat = np.diff(lat) 355 | else: 356 | ref_idx = int(ref_idx) 357 | i1 = ref_idx 358 | i2 = slice(0, None) 359 | dlon = lon[ref_idx] - lon 360 | dlat = lat[ref_idx] - lat 361 | 362 | a = np.sin(dlat / 2) ** 2 + np.sin(dlon / 2) ** 2 * np.cos(lat[i1]) * np.cos( 363 | lat[i2] 364 | ) 365 | 366 | angles = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a)) 367 | 368 | distance = earth_radius * angles 369 | d = np.r_[0, distance] 370 | 371 | return d 372 | 373 | 374 | if __name__ == "__main__": 375 | 376 | pass 377 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.isort] 2 | known_third_party = ["gsw", "matplotlib", "netCDF4", "numexpr", "numpy", "pkg_resources", "pytest", "setuptools", "xarray"] 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E122, E123, E126, E127, E128, E731, E722, E203, E741, W503 3 | max-line-length = 88 4 | max-complexity = 22 5 | exclude = build,tests,.git,benchmarks,.asv, glidertools/_version.py 6 | per-file-ignores = 7 | glidertools/flo_functions.py: F841, E501 8 | __init__.py: F401, F403 9 | 10 | [isort] 11 | multi_line_output=3 12 | include_trailing_comma=True 13 | force_grid_wrap=0 14 | use_parentheses=True 15 | line_length=88 16 | 17 | [tool:pytest] 18 | testpaths=tests/ 19 | addopts= --cov --cov-fail-under=20 20 | 21 | [doc8] 22 | # https://pypi.org/project/doc8/ 23 | ignore-path = docs/_build 24 | max-line-length = 100 25 | sphinx = True 26 | 27 | [sdist] 28 | formats = gztar 29 | 30 | [check-manifest] 31 | ignore = 32 | *.yml 33 | *.yaml 34 | .coveragerc 35 | docs 36 | docs/* 37 | *.enc 38 | notebooks 39 | notebooks/* 40 | tests 41 | tests/* 42 | 43 | [metadata] 44 | name = glidertools 45 | description = ("A toolkit for processing Seaglider base station NetCDF files: " 46 | "despiking, smoothing, outlier detection, backscatter, fluorescence " 47 | "quenching, calibration, gridding, interpolation. Documentation " 48 | "at https://glidertools.readthedocs.io") 49 | author = Luke Gregor 50 | url = https://github.com/GliderToolsCommunity/GliderTools 51 | long_description = file: README.rst 52 | long_description_content_type = text/x-rst 53 | license = GNUv3 54 | license_file = LICENSE 55 | 56 | ## These need to be filled in by the author! 57 | # For details see: https://pypi.org/classifiers/ 58 | 59 | classifiers = 60 | Development Status :: 3 - Alpha 61 | License :: OSI Approved :: MIT License 62 | Operating System :: OS Independent 63 | Intended Audience :: Science/Research 64 | Programming Language :: Python 65 | Programming Language :: Python :: 3 66 | Programming Language :: Python :: 3.8 67 | Programming Language :: Python :: 3.9 68 | Programming Language :: Python :: 3.10 69 | Programming Language :: Python :: 3.11 70 | Programming Language :: Python :: 3.12 71 | 72 | ## Add your email here 73 | author_email = "lukegre@gmail.com" 74 | 75 | 76 | ### make sure to fill in your dependencies! 77 | [options] 78 | install_requires = 79 | numexpr 80 | netcdf4 81 | pandas 82 | xarray 83 | numpy 84 | scikit-learn 85 | scipy 86 | tqdm 87 | matplotlib 88 | gsw 89 | 90 | setup_requires= 91 | setuptools_scm 92 | python_requires = >=3.8 93 | ################ Up until here 94 | 95 | zip_safe = False 96 | packages = find: 97 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | 4 | setup( 5 | use_scm_version={ 6 | "write_to": "glidertools/_version.py", 7 | "write_to_template": '__version__ = "{version}"', 8 | "tag_regex": r"^(?Pv)?(?P[^\+]+)(?P.*)?$", 9 | } 10 | ) 11 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/__init__.py -------------------------------------------------------------------------------- /tests/data/p5420304.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420304.nc -------------------------------------------------------------------------------- /tests/data/p5420305.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420305.nc -------------------------------------------------------------------------------- /tests/data/p5420306.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420306.nc -------------------------------------------------------------------------------- /tests/data/p5420307.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420307.nc -------------------------------------------------------------------------------- /tests/data/p5420308.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420308.nc -------------------------------------------------------------------------------- /tests/data/p5420309.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420309.nc -------------------------------------------------------------------------------- /tests/data/p5420310.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420310.nc -------------------------------------------------------------------------------- /tests/data/p5420311.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420311.nc -------------------------------------------------------------------------------- /tests/data/p5420312.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420312.nc -------------------------------------------------------------------------------- /tests/data/p5420313.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420313.nc -------------------------------------------------------------------------------- /tests/data/p5420314.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420314.nc -------------------------------------------------------------------------------- /tests/data/p5420315.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420315.nc -------------------------------------------------------------------------------- /tests/data/p5420316.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420316.nc -------------------------------------------------------------------------------- /tests/data/p5420317.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/p5420317.nc -------------------------------------------------------------------------------- /tests/data/voto_nrt.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GliderToolsCommunity/GliderTools/9e4c287ba080385e3a7774bf4f375365f5b05d62/tests/data/voto_nrt.nc -------------------------------------------------------------------------------- /tests/test_calibration.py: -------------------------------------------------------------------------------- 1 | from glidertools.calibration import ( # noqa 2 | bottle_matchup, 3 | model_figs, 4 | model_metrics, 5 | robust_linear_fit, 6 | ) 7 | 8 | 9 | def test_dummy(): 10 | """WE REALLY NEED TO ADD TESTS!!! THESE JUST TEST THE BASIC IMPORT!!!""" 11 | assert 1 == 1 12 | -------------------------------------------------------------------------------- /tests/test_cleaning.py: -------------------------------------------------------------------------------- 1 | from glidertools.cleaning import horizontal_diff_outliers, outlier_bounds_iqr 2 | from glidertools.load import seaglider_basestation_netCDFs 3 | 4 | 5 | filenames = "./tests/data/p542*.nc" 6 | names = [ 7 | "ctd_depth", 8 | "ctd_time", 9 | "ctd_pressure", 10 | "salinity", 11 | "temperature", 12 | "eng_wlbb2flvmt_Chlsig", 13 | "eng_wlbb2flvmt_wl470sig", 14 | "eng_wlbb2flvmt_wl700sig", 15 | "aanderaa4330_dissolved_oxygen", 16 | "eng_qsp_PARuV", 17 | ] 18 | 19 | sg542 = seaglider_basestation_netCDFs( 20 | filenames, names, return_merged=True, keep_global_attrs=False 21 | ) 22 | 23 | sg542_dat = sg542["sg_data_point"] 24 | 25 | 26 | def test_outlier_bounds(): 27 | # does not test for soft bugs 28 | salt = sg542_dat["salinity"] 29 | outlier_bounds_iqr(salt, multiplier=1.5) 30 | 31 | 32 | def test_horizontal_outliers(): 33 | # does not test for soft bugs 34 | horizontal_diff_outliers( 35 | sg542_dat["dives"], 36 | sg542_dat["ctd_depth"], 37 | sg542_dat["salinity"], 38 | multiplier=3, 39 | depth_threshold=400, 40 | mask_frac=0.1, 41 | ) 42 | -------------------------------------------------------------------------------- /tests/test_dive_numbers.py: -------------------------------------------------------------------------------- 1 | import glidertools.utils as gt_util 2 | 3 | from glidertools.load import seaglider_basestation_netCDFs 4 | 5 | 6 | # load some data 7 | filenames = "./tests/data/p542*.nc" 8 | 9 | names = ["ctd_depth", "ctd_time"] 10 | ds_dict = seaglider_basestation_netCDFs(filenames, names, keep_global_attrs=False) 11 | 12 | dat = ds_dict["sg_data_point"] 13 | depth = dat["ctd_depth"] 14 | time = dat["ctd_time"] 15 | 16 | 17 | def test_find_correct_number_dives(): 18 | # using default values 19 | dives = gt_util.calc_dive_number(depth, time) 20 | assert dives.max() == 599.5 21 | -------------------------------------------------------------------------------- /tests/test_flo_functions.py: -------------------------------------------------------------------------------- 1 | from glidertools.flo_functions import ( # noqa 2 | flo_bback_total, 3 | flo_beta, 4 | flo_cdom, 5 | flo_chla, 6 | flo_density_seawater, 7 | flo_isotherm_compress, 8 | flo_refractive_index, 9 | flo_scale_and_offset, 10 | flo_scat_seawater, 11 | flo_zhang_scatter_coeffs, 12 | ) 13 | 14 | 15 | def test_dummy(): 16 | """WE REALLY NEED TO ADD TESTS!!! THESE JUST TEST THE BASIC IMPORT!!!""" 17 | assert 1 == 1 18 | -------------------------------------------------------------------------------- /tests/test_imports.py: -------------------------------------------------------------------------------- 1 | def test_import(): 2 | import glidertools 3 | 4 | print(glidertools) 5 | 6 | 7 | def test_import_data_seaglider(): 8 | import glidertools as gt 9 | 10 | filenames = "./tests/data/p542*.nc" 11 | 12 | names = [ 13 | "ctd_depth", 14 | "ctd_time", 15 | "ctd_pressure", 16 | "salinity", 17 | "temperature", 18 | "eng_wlbb2flvmt_Chlsig", 19 | "eng_wlbb2flvmt_wl470sig", 20 | "eng_wlbb2flvmt_wl700sig", 21 | "aanderaa4330_dissolved_oxygen", 22 | "eng_qsp_PARuV", 23 | ] 24 | 25 | ds_dict = gt.load.seaglider_basestation_netCDFs( 26 | filenames, names, return_merged=True, keep_global_attrs=False 27 | ) 28 | 29 | assert isinstance(ds_dict, dict) 30 | -------------------------------------------------------------------------------- /tests/test_load.py: -------------------------------------------------------------------------------- 1 | from glidertools.load import voto_concat_datasets, voto_seaexplorer_nc 2 | 3 | 4 | filename = "./tests/data/voto_nrt.nc" 5 | 6 | # import two times to test concat 7 | ds1 = voto_seaexplorer_nc(filename) 8 | ds2 = voto_seaexplorer_nc(filename) 9 | 10 | 11 | def test_dives_column_addition(): 12 | assert len(ds1.dives) > 1 13 | 14 | 15 | def test_voto_concat_datasets(): 16 | ds_concat = voto_concat_datasets([ds1, ds2]) 17 | assert 2 * len(ds1.time) == len(ds_concat.time) 18 | -------------------------------------------------------------------------------- /tests/test_mapping.py: -------------------------------------------------------------------------------- 1 | from glidertools.mapping import ( # noqa 2 | get_optimal_bins, 3 | grid_data, 4 | grid_flat_dataarray, 5 | interp_leaf, 6 | interp_obj, 7 | variogram, 8 | ) 9 | 10 | 11 | def test_dummy(): 12 | """WE REALLY NEED TO ADD TESTS!!! THESE JUST TEST THE BASIC IMPORT!!!""" 13 | assert 1 == 1 14 | -------------------------------------------------------------------------------- /tests/test_optics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | 5 | def test_sunrise_sunset(): 6 | """ 7 | Tests if sunrise/sunset: 8 | 1. can run 9 | 2. output is the right shape 10 | 3. if the output is correct-ish 11 | """ 12 | import numpy as np 13 | import pandas as pd 14 | 15 | from glidertools.optics import sunset_sunrise 16 | 17 | time = [ 18 | np.datetime64("2000-01-01"), 19 | np.datetime64("2000-01-02"), 20 | np.datetime64("2000-01-03"), 21 | ] 22 | lat = -35, 45, 80 23 | lon = 0, 0, 0 24 | sunrise, sunset = sunset_sunrise(time, lat, lon) 25 | 26 | # Three entries, there should be three outputs 27 | assert len(sunrise) == len(lat) 28 | 29 | # sunrise will be earlier in the SH in January 30 | assert sunrise[0] < sunrise[2] 31 | 32 | # expect sunrise at the 4am, 7am and 11am for these times and latitudes 33 | # high latitude should output polar night default 11:59 for sunrise and 12:01 for sunset 34 | assert pd.to_datetime(sunrise[0]).hour == 4 35 | assert pd.to_datetime(sunrise[1]).hour == 7 36 | assert pd.to_datetime(sunrise[2]).hour == 11 37 | 38 | # high latitude should output polar night default 11:59 for sunrise and 12:01 for sunset 39 | assert pd.to_datetime(sunrise[2]).hour == 11 40 | assert pd.to_datetime(sunrise[2]).minute == 59 41 | 42 | assert pd.to_datetime(sunset[2]).hour == 12 43 | assert pd.to_datetime(sunset[2]).minute == 1 44 | 45 | 46 | @pytest.mark.parametrize("percentile", [5, 50, 95]) 47 | def test_backscatter_dark_count(percentile): 48 | from glidertools.optics import backscatter_dark_count 49 | 50 | # create some synthetic data 51 | bbp = np.array([0.002, 0.0006, 0.0005, 0.0005, 0.0005]) 52 | depth = np.array([50, 150, 210, 310, 350]) 53 | # select only depths between 200 and 400 54 | mask = (depth > 200) & (depth < 400) 55 | # expected output 56 | expected_bbp_dark = bbp - np.nanpercentile(bbp[mask], percentile) 57 | bbp_dark = backscatter_dark_count(bbp, depth, percentile) 58 | np.testing.assert_allclose(expected_bbp_dark, bbp_dark) 59 | 60 | 61 | @pytest.mark.parametrize("percentile", [5, 50, 95]) 62 | def test_backscatter_dark_count_negative(percentile): 63 | from glidertools.optics import backscatter_dark_count 64 | 65 | # create some synthetic data 66 | bbp = np.array( 67 | [0.002, 0.0006, 0.005, 0.005, 0.0004] 68 | ) # this will result in negative values that should be zeroed out 69 | depth = np.array([50, 150, 210, 310, 350]) 70 | bbp_dark = backscatter_dark_count(bbp, depth, percentile) 71 | # in this case we just want to check if none of the values is negative! 72 | assert np.all(bbp_dark >= 0) 73 | 74 | 75 | def test_backscatter_dark_count_warning(): 76 | from glidertools.optics import backscatter_dark_count 77 | 78 | # create some synthetic data 79 | percentile = 50 80 | bbp = np.array([0.002, 0.0006, 0.005, 0.005]) 81 | depth = np.array( 82 | [50, 60, 70, 110] 83 | ) # this will trigger the warning (no values between 200 and 400m) 84 | with pytest.warns( 85 | UserWarning 86 | ): # this line will fail if the command below does not actually raise a warning! 87 | backscatter_dark_count(bbp, depth, percentile) 88 | 89 | 90 | @pytest.mark.parametrize("percentile", [5, 50, 95]) 91 | def test_flr_dark_count(percentile): 92 | from glidertools.optics import fluorescence_dark_count 93 | 94 | # create some synthetic data 95 | flr = np.array([200.0, 100.0, 52.0, 52.0]) 96 | depth = np.array([20, 50, 310, 350]) 97 | # select only depths between 200 and 400 98 | mask = (depth > 300) & (depth < 400) 99 | # expected output 100 | expected_flr_dark = flr - np.nanpercentile(flr[mask], percentile) 101 | flr_dark = fluorescence_dark_count(flr, depth, percentile) 102 | np.testing.assert_allclose(expected_flr_dark, flr_dark) 103 | 104 | 105 | @pytest.mark.parametrize("percentile", [5, 50, 95]) 106 | def test_flr_dark_count_negative(percentile): 107 | from glidertools.optics import fluorescence_dark_count 108 | 109 | # create some synthetic data 110 | flr = np.array([200.0, 100.0, 152.0, 151.0]) 111 | # this will result in negative values that should be zeroed out 112 | depth = np.array([20, 50, 310, 350]) 113 | flr_dark = fluorescence_dark_count(flr, depth, percentile) 114 | # in this case we just want to check if none of the values is negative! 115 | assert np.all(flr_dark >= 0) 116 | 117 | 118 | def test_flr_dark_count_warning(): 119 | from glidertools.optics import fluorescence_dark_count 120 | 121 | # create some synthetic data 122 | percentile = 50 123 | flr = np.array([200.0, 100.0, 52.0, 52.0]) 124 | depth = np.array([20, 50, 210, 250]) 125 | 126 | with pytest.warns( 127 | UserWarning 128 | ): # this line will fail if the command below does not actually raise a warning! 129 | fluorescence_dark_count(flr, depth, percentile) 130 | 131 | 132 | @pytest.mark.parametrize("percentile", [90]) 133 | def test_par_dark_count(percentile): 134 | from pandas import date_range 135 | 136 | from glidertools.optics import par_dark_count 137 | 138 | # create some synthetic data 139 | par = np.array([34, 23.0, 0.89, 0.89]) 140 | depth = np.array([10, 20, 310, 350]) 141 | time = date_range("2018-12-01 10:00", "2018-12-03 00:00", 4) 142 | # expected output 143 | expected_par_dark = par - np.nanmedian( 144 | np.nanpercentile(par[-1], percentile) 145 | ) # only use values in the 90% percentile of depths and between 23:00 and 01:00 146 | par_dark = par_dark_count(par, depth, time, percentile) 147 | np.testing.assert_allclose(expected_par_dark, par_dark) 148 | 149 | 150 | def test_par_dark_count_warning(): 151 | from pandas import date_range 152 | 153 | from glidertools.optics import par_dark_count 154 | 155 | # create some synthetic data 156 | percentile = 90 157 | par = np.array([34, 23.0, 0.89, 0.89]) 158 | depth = np.array([10, 20, 310, 350]) 159 | time = date_range("2018-12-01 10:00", "2018-12-03 20:00", 4) 160 | # this will trigger the warning (no values between 200 and 400m) 161 | with pytest.warns( 162 | UserWarning 163 | ): # this line will fail if the command below does not actually raise a warning! 164 | par_dark_count(par, depth, time, percentile) 165 | -------------------------------------------------------------------------------- /tests/test_physics.py: -------------------------------------------------------------------------------- 1 | import xarray as xr 2 | 3 | from glidertools.load import seaglider_basestation_netCDFs 4 | from glidertools.physics import ( 5 | brunt_vaisala, 6 | mixed_layer_depth, 7 | potential_density, 8 | spice0, 9 | ) 10 | from glidertools.utils import mask_above_depth, mask_below_depth 11 | 12 | 13 | filenames = "./tests/data/p542*.nc" 14 | 15 | names = [ 16 | "ctd_depth", 17 | "ctd_time", 18 | "ctd_pressure", 19 | "salinity", 20 | "temperature", 21 | "eng_wlbb2flvmt_Chlsig", 22 | "eng_wlbb2flvmt_wl470sig", 23 | "eng_wlbb2flvmt_wl700sig", 24 | "aanderaa4330_dissolved_oxygen", 25 | "eng_qsp_PARuV", 26 | ] 27 | 28 | ds_dict = seaglider_basestation_netCDFs( 29 | filenames, names, return_merged=True, keep_global_attrs=False 30 | ) 31 | 32 | merged = ds_dict["merged"] 33 | if "time" in merged: 34 | merged = merged.drop_vars(["time", "time_dt64"]) 35 | dat = merged.rename( 36 | { 37 | "salinity": "salt_raw", 38 | "temperature": "temp_raw", 39 | "ctd_pressure": "pressure", 40 | "ctd_depth": "depth", 41 | "ctd_time_dt64": "time", 42 | "ctd_time": "time_raw", 43 | "eng_wlbb2flvmt_wl700sig": "bb700_raw", 44 | "eng_wlbb2flvmt_wl470sig": "bb470_raw", 45 | "eng_wlbb2flvmt_Chlsig": "flr_raw", 46 | "eng_qsp_PARuV": "par_raw", 47 | "aanderaa4330_dissolved_oxygen": "oxy_raw", 48 | } 49 | ) 50 | 51 | 52 | def test_is_dataset(): 53 | assert isinstance(dat, xr.core.dataset.Dataset) 54 | 55 | 56 | def test_mixed_layer_depth(): 57 | mld = mixed_layer_depth(dat, "temp_raw") 58 | assert mld.min() > 10 59 | assert mld.max() < 40 60 | 61 | 62 | def test_masking(): 63 | # We "know" that the mld for this dataset is >10m and <40m 64 | mld = mixed_layer_depth(dat, "temp_raw") 65 | mask = mask_above_depth(dat, mld) 66 | assert dat.depth[mask].max() > 10 67 | mask = mask_below_depth(dat, mld) 68 | assert dat.depth[mask].max() < 40 69 | 70 | 71 | def test_potential_density(): 72 | pot_den = potential_density( 73 | dat.salt_raw, dat.temp_raw, dat.pressure, dat.latitude, dat.longitude 74 | ) 75 | assert pot_den.min() > 1020 76 | assert pot_den.max() < 1040 77 | 78 | 79 | def test_brunt_vaisala(): 80 | brunt_val = brunt_vaisala(dat.salt_raw, dat.temp_raw, dat.pressure) 81 | assert brunt_val.min() > -0.002 82 | assert brunt_val.max() < 0.002 83 | 84 | 85 | def test_spice0(): 86 | spice = spice0( 87 | dat.salt_raw, dat.temp_raw, dat.pressure, dat.latitude, dat.longitude 88 | ) 89 | assert spice.min() > -1 90 | assert spice.max() < 1 91 | -------------------------------------------------------------------------------- /tests/test_plot.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import glidertools.plot as gt_plt 4 | 5 | from glidertools.load import seaglider_basestation_netCDFs 6 | 7 | 8 | # load some data 9 | filenames = "./tests/data/p542*.nc" 10 | 11 | names = ["ctd_depth", "ctd_time", "ctd_pressure", "salinity", "temperature"] 12 | ds_dict = seaglider_basestation_netCDFs(filenames, names, keep_global_attrs=False) 13 | 14 | dat = ds_dict["sg_data_point"] 15 | 16 | 17 | def test_no_warns(): 18 | """Check gt_plt() raises no warnings in pcolormesh.""" 19 | with warnings.catch_warnings() as record: 20 | gt_plt(dat.dives, dat.ctd_pressure, dat.salinity) 21 | 22 | # print warnings that were captured 23 | if record: 24 | print("Warnings were raised: " + ", ".join([str(w) for w in record])) 25 | 26 | # Check the warning messages for statements we do not want to see 27 | fail_message = ( 28 | "shading='flat' when X and Y have the same dimensions as C is deprecated" 29 | ) 30 | assert not any([fail_message in str(r) for r in record]) 31 | -------------------------------------------------------------------------------- /tests/test_processing.py: -------------------------------------------------------------------------------- 1 | import gsw 2 | import numpy as np 3 | import pytest 4 | 5 | from glidertools.processing import ( # noqa 6 | calc_backscatter, 7 | calc_fluorescence, 8 | calc_oxygen, 9 | calc_par, 10 | calc_physics, 11 | oxygen_ml_per_l_to_umol_per_kg, 12 | ) 13 | from tests.test_physics import dat 14 | 15 | 16 | dat.oxy_raw.values[dat.oxy_raw.values < 0] = np.nan 17 | dat.oxy_raw.values[dat.oxy_raw.values > 500] = np.nan 18 | o2ml, o2pc, o2aou = calc_oxygen( 19 | dat.oxy_raw, 20 | dat.pressure, 21 | dat.salt_raw, 22 | dat.temp_raw, 23 | dat.latitude, 24 | dat.longitude, 25 | ) 26 | 27 | 28 | def test_calc_oxygen(): 29 | assert np.nanmean(o2ml) == pytest.approx(5.22, 0.001) 30 | assert np.nanmean(o2pc) == pytest.approx(75.857, 0.001) 31 | assert np.nanmean(o2aou) == pytest.approx(75.351, 0.001) 32 | 33 | 34 | def test_oxygen_conversion(): 35 | absolute_salinity = gsw.SA_from_SP( 36 | dat.salt_raw, dat.pressure, dat.longitude, dat.latitude 37 | ) 38 | conservative_temperature = gsw.CT_from_t( 39 | absolute_salinity, dat.temp_raw, dat.pressure 40 | ) 41 | density = gsw.density.rho(absolute_salinity, conservative_temperature, dat.pressure) 42 | o2_umol_kg = oxygen_ml_per_l_to_umol_per_kg(o2ml, density) 43 | assert np.allclose(o2_umol_kg.values, dat.oxy_raw.values, equal_nan=True) 44 | --------------------------------------------------------------------------------