├── .coveragerc ├── .github ├── ISSUE_TEMPLATE │ ├── bug-report.md │ ├── question.md │ └── release.md ├── pull_request_template.md └── workflows │ ├── check_formatting.yml │ ├── check_json.yml │ ├── check_notebooks.yml │ ├── codecov-CI.yml │ ├── docker-image.yml │ ├── matrix_ci.yml │ └── pylint.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── .zenodo.json ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── codecov.yml ├── dev_requirements.txt ├── doc ├── _static │ ├── theme_overrides.css │ └── thumbnails │ │ ├── Basics_Idealized-Case-1_Tracking-of-a-Test-Blob-in-2D_Thumbnail.png │ │ ├── Basics_Idealized-Case-2_Two_crossing_Blobs_Thumbnail.png │ │ ├── Basics_Methods-and-Parameters-for-Feature-Detection_Part_1_Thumbnail.png │ │ ├── Basics_Methods-and-Parameters-for-Feature-Detection_Part_2_Thumbnail.png │ │ ├── Basics_Methods-and-Parameters-for-Linking_Thumbnail.png │ │ ├── Basics_Methods-and-Parameters-for-Segmentation_Thumbnail.png │ │ ├── Example_OLR_Tracking_model_Thumbnail.png │ │ ├── Example_OLR_Tracking_satellite_Thumbnail.png │ │ ├── Example_Precip_Tracking_Thumbnail.png │ │ ├── Example_Track_on_Radar_Segment_on_Satellite_Thumbnail.png │ │ ├── Example_Updraft_Tracking_Thumbnail.png │ │ └── Example_vorticity_tracking_model_Thumbnail.png ├── analysis.rst ├── big_datasets.rst ├── big_datasets_examples │ └── notebooks │ │ └── parallel_processing_tobac.ipynb ├── bulk_statistics │ ├── index.rst │ └── notebooks │ │ ├── compute_statistics_during_feature_detection.ipynb │ │ ├── compute_statistics_during_segmentation.ipynb │ │ └── compute_statistics_postprocessing_example.ipynb ├── code_reviews.rst ├── code_structure.rst ├── conf.py ├── contributing.rst ├── data_input.rst ├── examples ├── examples.rst ├── feature_detection │ ├── index.rst │ └── notebooks │ │ ├── feature_detection_filtering.ipynb │ │ ├── multiple_thresholds_example.ipynb │ │ ├── n_min_threshold_example.ipynb │ │ └── position_threshold_example.ipynb ├── feature_detection_3D_out_vars.csv ├── feature_detection_base_out_vars.csv ├── feature_detection_output.rst ├── feature_detection_overview.rst ├── features_without_segmented_area.rst ├── images │ ├── Figure_linking.png │ ├── box_vs_column_seeding.png │ ├── cross.png │ ├── decision.png │ ├── detection_multiplethresholds.png │ ├── erosion_example.png │ ├── feature_outside_of_threshold_area.png │ ├── feature_outside_of_threshold_area_extreme.png │ ├── features_without_segment.png │ ├── linking_prediction.png │ ├── position_thresholds.png │ ├── sat_radar_combined.png │ ├── search.png │ └── sigma_threshold_example.png ├── index.rst ├── installation.rst ├── linking.rst ├── mentoring.rst ├── merge_split.rst ├── merge_split_out_vars.csv ├── plotting.rst ├── publications.rst ├── requirements.txt ├── segmentation.rst ├── segmentation_out_vars.csv ├── segmentation_out_vars_statistics.csv ├── segmentation_output.rst ├── segmentation_parameters.rst ├── testing_sphinx-based_rendering.rst ├── threshold_detection_parameters.rst ├── tobac.rst ├── tracking_base_out_vars.csv ├── tracking_output.rst └── transform_segmentation.rst ├── environment-ci.yml ├── environment-examples.yml ├── example_requirements.txt ├── examples ├── Basics │ ├── Idealized-Case-1_Tracking-of-a-Test-Blob-in-2D.ipynb │ ├── Idealized-Case-2_Two_crossing_Blobs.ipynb │ ├── Methods-and-Parameters-for-Feature-Detection_Part_1.ipynb │ ├── Methods-and-Parameters-for-Feature-Detection_Part_2.ipynb │ ├── Methods-and-Parameters-for-Linking.ipynb │ └── Methods-and-Parameters-for-Segmentation.ipynb ├── Example_OLR_Tracking_model │ └── Example_OLR_Tracking_model.ipynb ├── Example_OLR_Tracking_model_iris │ └── Example_OLR_Tracking_model_iris.ipynb ├── Example_OLR_Tracking_satellite │ └── Example_OLR_Tracking_satellite.ipynb ├── Example_Precip_Tracking │ └── Example_Precip_Tracking.ipynb ├── Example_Track_on_Radar_Segment_on_Satellite │ └── Example_Track_on_Radar_Segment_on_Satellite.ipynb ├── Example_Updraft_Tracking │ └── Example_Updraft_Tracking.ipynb └── Example_vorticity_tracking_model │ └── Example_vorticity_tracking_model.ipynb ├── requirements.txt ├── run_notebooks.sh ├── setup.py └── tobac ├── __init__.py ├── analysis ├── __init__.py ├── cell_analysis.py ├── feature_analysis.py └── spatial.py ├── centerofgravity.py ├── feature_detection.py ├── merge_split.py ├── plotting.py ├── segmentation ├── __init__.py └── watershed_segmentation.py ├── testing.py ├── tests ├── Dockerfile ├── Dockerfile-coverage ├── segmentation_tests │ ├── test_iris_xarray_segmentation.py │ ├── test_segmentation.py │ └── test_segmentation_time_pad.py ├── test_analysis_spatial.py ├── test_convert.py ├── test_datetime.py ├── test_decorators.py ├── test_feature_detection.py ├── test_generators.py ├── test_import.py ├── test_iris_xarray_match_utils.py ├── test_merge_split.py ├── test_pbc_utils.py ├── test_sample_data.py ├── test_testing.py ├── test_tracking.py ├── test_utils.py ├── test_utils_bulk_statistics.py ├── test_utils_coordinates.py ├── test_utils_internal.py └── test_xarray_utils.py ├── tracking.py ├── utils ├── __init__.py ├── bulk_statistics.py ├── datetime.py ├── decorators.py ├── general.py ├── generators.py ├── internal │ ├── __init__.py │ ├── coordinates.py │ ├── iris_utils.py │ ├── label_props.py │ └── xarray_utils.py ├── mask.py └── periodic_boundaries.py └── wrapper.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source=tobac 3 | omit = 4 | tobac/tests/* 5 | setup.py 6 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug Report 3 | about: Report a bug in the tobac library 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | * [ ] Have you searched the issue tracker for the same problem? 11 | * [ ] Have you checked if you're using the latest version? If not, which version are you using? 12 | * [ ] Have you mentioned the steps to reproduce the issue? 13 | * [ ] Have you, if applicable, included error messages? 14 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Question 3 | about: 'Ask a general question about tobac ' 4 | title: '' 5 | labels: question 6 | assignees: '' 7 | 8 | --- 9 | 10 | * [ ] Have you searched the issue tracker for similar questions? 11 | * [ ] Have you read the documentation to ensure your question isn't already answered? 12 | * [ ] Have you searched Stack Overflow or other relevant forums to see if your question has been answered elsewhere? 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/release.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Release 3 | about: Prepare a new release for tobac 4 | title: Release v.X.Y.Z 5 | labels: release 6 | assignees: '' 7 | 8 | --- 9 | 10 | Checklist for releasing vX.Y.Z: 11 | 12 | * [ ] Re-run notebooks and commit updates to repository 13 | * [ ] Bump version in `__init__.py` in `RC_vX.Y.Z` 14 | * [ ] Add changelog in `RC_vX.Y.Z` 15 | * [ ] Add new contributors to vX.Y.Z 16 | * [ ] Merge `RC_vX.Y.Z` into `main` 17 | * [ ] Delete `RC_vX.Y.Z` branch 18 | * [ ] Create release 19 | * [ ] Push release to conda-forge 20 | * [ ] E-mail tobac mailing list 21 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | * [ ] Have you followed our guidelines in CONTRIBUTING.md? 2 | * [ ] Have you self-reviewed your code and corrected any misspellings? 3 | * [ ] Have you written documentation that is easy to understand? 4 | * [ ] Have you written descriptive commit messages? 5 | * [ ] Have you added NumPy docstrings for newly added functions? 6 | * [ ] Have you formatted your code using black? 7 | * [ ] If you have introduced a new functionality, have you added adequate unit tests? 8 | * [ ] Have all tests passed in your local clone? 9 | * [ ] If you have introduced a new functionality, have you added an example notebook? 10 | * [ ] Have you kept your pull request small and limited so that it is easy to review? 11 | * [ ] Have the newest changes from this branch been merged? 12 | 13 | -------------------------------------------------------------------------------- /.github/workflows/check_formatting.yml: -------------------------------------------------------------------------------- 1 | name: Check Python File Formatting with Black 2 | on: [push, pull_request] 3 | jobs: 4 | formatting_job: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - uses: actions/checkout@v4 8 | - name: Set up conda 9 | uses: mamba-org/setup-micromamba@v1 10 | with: 11 | environment-file: environment-ci.yml 12 | generate-run-shell: true 13 | cache-environment: true 14 | cache-downloads: true 15 | - name: Check formatting 16 | shell: micromamba-shell {0} 17 | run: 18 | black --version && 19 | black tobac --check --diff 20 | -------------------------------------------------------------------------------- /.github/workflows/check_json.yml: -------------------------------------------------------------------------------- 1 | name: Check Zenodo JSON Formatting 2 | on: [push, pull_request] 3 | jobs: 4 | check-json-formatting: 5 | runs-on: ubuntu-latest 6 | defaults: 7 | run: 8 | shell: bash -el {0} 9 | steps: 10 | - name: check out repository code 11 | uses: actions/checkout@v4 12 | - name: set up conda environment 13 | uses: actions/setup-python@v5 14 | with: 15 | python-version: '3.12' 16 | cache: 'pip' # caching pip dependencies 17 | - name: Install check-jsonschema 18 | run: | 19 | pip install check-jsonschema 20 | - name: Check zenodo JSON formatting 21 | run: | 22 | check-jsonschema --schemafile https://zenodraft.github.io/metadata-schema-zenodo/latest/schema.json .zenodo.json -------------------------------------------------------------------------------- /.github/workflows/check_notebooks.yml: -------------------------------------------------------------------------------- 1 | name: Jupyter Notebooks CI 2 | on: [push, pull_request] 3 | jobs: 4 | Check-Notebooks: 5 | runs-on: ubuntu-latest 6 | defaults: 7 | run: 8 | shell: bash -el {0} 9 | steps: 10 | - name: check out repository code 11 | uses: actions/checkout@v3 12 | - name: set up mamba environment 13 | uses: mamba-org/setup-micromamba@v1 14 | with: 15 | environment-file: environment-examples.yml 16 | generate-run-shell: true 17 | cache-environment: true 18 | cache-downloads: true 19 | - name: Install tobac 20 | run: | 21 | pip install . 22 | - name: Find all notebook files 23 | run: | 24 | find . -type f -name '*.ipynb' > nbfiles.txt 25 | cat nbfiles.txt 26 | - name: Execute all notebook files 27 | run: | 28 | while IFS= read -r nbpath; do 29 | jupyter nbconvert --inplace --ClearMetadataPreprocessor.enabled=True --clear-output $nbpath 30 | jupyter nbconvert --to notebook --inplace --execute $nbpath 31 | done < nbfiles.txt -------------------------------------------------------------------------------- /.github/workflows/codecov-CI.yml: -------------------------------------------------------------------------------- 1 | name: Codecov CI 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | run: 7 | runs-on: ubuntu-latest 8 | env: 9 | OS: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v4 12 | # Similar to MetPy install-conda action 13 | - name: Set up conda 14 | uses: mamba-org/setup-micromamba@v1 15 | with: 16 | environment-file: environment-ci.yml 17 | generate-run-shell: true 18 | cache-environment: true 19 | cache-downloads: true 20 | 21 | - name: Generate report 22 | shell: micromamba-shell {0} 23 | run: 24 | python -m coverage run -m pytest --cov=./ --cov-report=xml 25 | - name: Upload Coverage to Codecov 26 | uses: codecov/codecov-action@v4 27 | with: 28 | token: ${{ secrets.CODECOV_TOKEN }} 29 | fail_ci_if_error: true 30 | flags: unittests 31 | -------------------------------------------------------------------------------- /.github/workflows/docker-image.yml: -------------------------------------------------------------------------------- 1 | name: Docker Image CI 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | 7 | build: 8 | 9 | runs-on: ubuntu-latest 10 | 11 | steps: 12 | - uses: actions/checkout@v2 13 | - name: Build the Docker image 14 | run: docker build . --file tobac/tests/Dockerfile --tag my-image-name:$(date +%s) 15 | -------------------------------------------------------------------------------- /.github/workflows/matrix_ci.yml: -------------------------------------------------------------------------------- 1 | name: Matrix Testing CI 2 | # this is similar to the pyart CI action 3 | on: [push, pull_request] 4 | 5 | concurrency: 6 | group: ${{ github.workflow }}-${{ github.ref }} 7 | cancel-in-progress: true 8 | 9 | # This job installs dependencies and runs tests across a matrix of python and OS versions. 10 | #Add if: github.repository == 'tobac-project/tobac' to limit runs to tobac repo 11 | 12 | jobs: 13 | build: 14 | name: ${{ matrix.os }}-${{ matrix.python-version }} 15 | runs-on: ${{ matrix.os }}-latest 16 | if: github.repository == 'tobac-project/tobac' 17 | defaults: 18 | run: 19 | shell: bash -l {0} 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] 24 | os: [macos, ubuntu, windows] 25 | 26 | steps: 27 | - uses: actions/checkout@v2 28 | 29 | # Install micromamba and dependencies 30 | - name: Setup Conda Environment 31 | uses: mamba-org/setup-micromamba@v1 32 | with: 33 | environment-file: environment-ci.yml 34 | activate-environment: pyart-dev 35 | cache-downloads: true 36 | channels: conda-forge 37 | channel-priority: strict 38 | create-args: python=${{ matrix.python-version }} 39 | 40 | - name: Fetch all history for all tags and branches 41 | run: | 42 | git fetch --prune --unshallow 43 | 44 | - name: Install tobac 45 | shell: bash -l {0} 46 | run: | 47 | python -m pip install -e . --no-deps --force-reinstall 48 | 49 | - name: Run Tests 50 | id: run_tests 51 | shell: bash -l {0} 52 | run: | 53 | python -m pytest -v 54 | -------------------------------------------------------------------------------- /.github/workflows/pylint.yml: -------------------------------------------------------------------------------- 1 | name: Linting 2 | on: 3 | pull_request_target: 4 | branches: 5 | - '*' 6 | permissions: 7 | pull-requests: write 8 | 9 | jobs: 10 | lint-workflow: 11 | runs-on: ubuntu-latest 12 | defaults: 13 | run: 14 | shell: bash -l {0} 15 | steps: 16 | - name: Check out Git repository 17 | uses: actions/checkout@v4 18 | - name: Set up mamba environment 19 | uses: mamba-org/setup-micromamba@v1 20 | with: 21 | environment-file: environment-ci.yml 22 | generate-run-shell: true 23 | cache-environment: true 24 | cache-downloads: true 25 | - name: Install tobac 26 | run: 27 | pip install . 28 | 29 | - name: Store the PR branch 30 | run: 31 | echo "SHA=$(git rev-parse "$GITHUB_SHA")" >> $GITHUB_OUTPUT 32 | id: git 33 | 34 | - name: Checkout RC branch 35 | uses: actions/checkout@v4 36 | with: 37 | ref: ${{ github.base_ref }} 38 | 39 | - name: Get pylint score of RC branch 40 | run: 41 | pylint tobac --disable=C --exit-zero 42 | id: main_score 43 | 44 | - name: Checkout PR branch 45 | uses: actions/checkout@v4 46 | with: 47 | ref: ${{ steps.git.outputs.SHA }} 48 | 49 | - name: Get pylint score of PR branch 50 | run: | 51 | # use shell script to save only tail of output 52 | OUTPUT_PART=$(pylint tobac --disable=C --exit-zero | tail -n 2) 53 | # but post entire output in the action details 54 | pylint tobac --disable=C --exit-zero 55 | # define random delimiter for multiline string 56 | EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64) 57 | echo "MESSAGE<<$EOF" >> "$GITHUB_OUTPUT" 58 | echo "$OUTPUT_PART" >> "$GITHUB_OUTPUT" 59 | echo "$EOF" >> "$GITHUB_OUTPUT" 60 | id: pr_score 61 | 62 | - name: Find Comment 63 | uses: peter-evans/find-comment@v2 64 | id: comment 65 | with: 66 | issue-number: ${{ github.event.pull_request.number }} 67 | comment-author: 'github-actions[bot]' 68 | body-includes: Linting results by Pylint 69 | 70 | - name: Post result to PR 71 | uses: peter-evans/create-or-update-comment@v3 72 | with: 73 | issue-number: ${{ github.event.pull_request.number }} 74 | comment-id: ${{ steps.comment.outputs.comment-id }} 75 | edit-mode: replace 76 | body: | 77 | Linting results by Pylint: 78 | -------------------------- 79 | ${{ steps.pr_score.outputs.MESSAGE }} 80 | The linting score is an indicator that reflects how well your code version follows Pylint’s coding standards and quality metrics with respect to the ${{ github.base_ref }} branch. 81 | A decrease usually indicates your new code does not fully meet style guidelines or has potential errors. 82 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | __pycache__ 3 | .vscode 4 | htmlcov 5 | .coverage 6 | build 7 | .idea 8 | examples/*/Save 9 | examples/*/Plot 10 | examples/climate-processes-tobac_example_data-b3e69ee 11 | .ipynb_checkpoints 12 | .DS_Store 13 | *.egg-info 14 | 15 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/psf/black 3 | rev: 21.12b0 4 | hooks: 5 | - id: black 6 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | formats: all 3 | build: 4 | os: ubuntu-22.04 5 | tools: 6 | python: "3.9" 7 | python: 8 | install: 9 | - requirements: doc/requirements.txt 10 | sphinx: 11 | # Path to your Sphinx configuration file. 12 | configuration: doc/conf.py 13 | -------------------------------------------------------------------------------- /.zenodo.json: -------------------------------------------------------------------------------- 1 | { 2 | "title": "tobac - Tracking and Object-based Analysis of Clouds", 3 | "description": "tobac is a Python package to identify, track and analyze clouds and other atmospheric phenomena in different types of gridded datasets, such as 3D model output from cloud-resolving model simulations or 2D data from satellite retrievals.", 4 | "creators": [ 5 | { 6 | "name": "tobac Community" 7 | }, 8 | { 9 | "name": "Brunner, Kelcy", 10 | "affiliation": "Texas Tech University", 11 | "orcid": "0000-0003-3938-0963" 12 | }, 13 | { 14 | "name": "Freeman, Sean W.", 15 | "affiliation": "The University of Alabama in Huntsville", 16 | "orcid": "0000-0002-7398-1597" 17 | }, 18 | { 19 | "name": "Jones, William K.", 20 | "affiliation": "University of Oxford", 21 | "orcid": "0000-0001-9786-3723" 22 | }, 23 | { 24 | "name": "Kukulies, Julia", 25 | "affiliation": "NSF National Center for Atmospheric Research", 26 | "orcid": "0000-0001-6084-0069" 27 | }, 28 | { 29 | "name": "Senf, Fabian", 30 | "affiliation": "Leibniz Institute for Tropospheric Research, Leipzig (Germany)", 31 | "orcid": "0000-0003-1685-2657" 32 | }, 33 | { 34 | "name": "Bruning, Eric", 35 | "affiliation": "Texas Tech University", 36 | "orcid": "0000-0003-1959-442X" 37 | }, 38 | { 39 | "name": "Stier, Philip", 40 | "affiliation": "University of Oxford", 41 | "orcid": "0000-0002-1191-0128" 42 | }, 43 | { 44 | "name": "van den Heever, Sue C.", 45 | "affiliation": "Colorado State University", 46 | "orcid": "0000-0001-9843-3864" 47 | }, 48 | { 49 | "name": "Heikenfeld, Max", 50 | "affiliation": "University of Oxford", 51 | "orcid": "0000-0001-8124-8048" 52 | }, 53 | { 54 | "name": "Marinescu, Peter J.", 55 | "affiliation": "Colorado State University", 56 | "orcid": "0000-0002-5842-969X" 57 | }, 58 | { 59 | "name": "Collis, Scott M.", 60 | "affiliation": "Argonne National Laboratory", 61 | "orcid": "0000-0002-2303-687X" 62 | }, 63 | { 64 | "name": "Lettl, Kolya", 65 | "affiliation": "Leibniz Institute for Tropospheric Research, Leipzig (Germany)", 66 | "orcid": "0000-0002-4524-8152" 67 | }, 68 | { 69 | "name": "Pfeifer, Nils", 70 | "affiliation": "Leibniz Institute for Tropospheric Research, Leipzig (Germany)", 71 | "orcid": "0000-0002-5350-1445" 72 | }, 73 | { 74 | "name": "Raut, Bhupendra A.", 75 | "affiliation": "Northwestern-Argonne Institute of Science and Engineering, Argonne National Laboratory", 76 | "orcid": "0000-0001-5598-1393" 77 | }, 78 | { 79 | "name": "Zhang, Xin", 80 | "affiliation": "Nanjing University of Information Science & Technology (China)", 81 | "orcid": "0000-0002-1756-6620" 82 | }, 83 | { 84 | "name": "Ziegner, Fabian", 85 | "affiliation":"Leibniz Institute for Tropospheric Research, Leipzig (Germany)" 86 | } 87 | ], 88 | "license": "BSD-3-Clause", 89 | "keywords": [ 90 | "cloud tracking" 91 | ], 92 | "access_right": "open", 93 | "upload_type": "software" 94 | } 95 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to tobac 2 | 3 | __Welcome! We are very happy that you are interested in our project and thanks for taking time to contribute! :)__ 4 | 5 | 6 | ## Getting Started 7 | ### Installation & Environment details 8 | You will find them in the [README.md](https://github.com/tobac-project/tobac/blob/master/README.md). 9 | 10 | ### Tutorials 11 | Tutorials have been prepared to provide you further inside to `tobac`s functionality. Please have a look in the 12 | [examples folder](https://github.com/tobac-project/tobac/tree/main/examples). 13 | 14 | ### Documentation 15 | You will find our documentation at [https://tobac.readthedocs.io](https://tobac.readthedocs.io). 16 | 17 | ### Testing 18 | The tests are located in the [tests folder](https://github.com/climate-processes/tobac/tree/master/tobac/tests). 19 | 20 | ## Reporting Bugs 21 | Please create a new issue on [GitHub](https://github.com/tobac-project/tobac/issues) if it is not listed there, yet. 22 | 23 | ### How to write a good Bug Report? 24 | * Give it a clear descriptive title. 25 | * Copy and paste the error message. 26 | * Describe the steps for reproducing the problem and give an specific example. 27 | * Optional: Make a suggestion to fix it. 28 | 29 | ## How to Submit Changes 30 | * Have a look at [our roadmap](https://github.com/tobac-project/tobac-roadmap/blob/master/tobac-roadmap-main.md) first, 31 | to learn about our project goals and check the 32 | [changelog.md](https://github.com/tobac-project/tobac/blob/main/CHANGELOG.md). 33 | * More details on the code structure and further help for code contributions can be found in our [developer 34 | guide](https://tobac.readthedocs.io/code_structure.html) 35 | * Before you start a pull request, please make sure that you added [numpydoc 36 | docstrings](https://numpydoc.readthedocs.io/en/latest/format.html) to your 37 | functions. See [docstring example in the developer guide](https://tobac.readthedocs.io/contributing.html). This way the 38 | api documentation will be parsed properly. 39 | * If it is a larger change or an newly added feature or workflow, please add an example in the [example 40 | folder](https://github.com/tobac-project/tobac/tree/main/examples) or adapt the existing examples there. 41 | * The code should be PEP 8 compliant, as this facilitates our collaboration. Please use the first stable version (22.6.0) of [black](https://black.readthedocs.io/en/stable/) to format your code. When you submit a pull request, all files are checked for formatting. 42 | * The tobac repository is set up with pre-commit hooks to automatically format your code when commiting changes. Please run the command "pre-commit install" in the root directory of tobac to set up pre-commit formatting. 43 | 44 | We aim to respond to all new issues/pull requests as soon as possible, however sometimes this is not possible due to work commitments. 45 | 46 | 47 | 48 | ## Slack 49 | In addition to the workflow here on Github, there's a tobac workspace on Slack [tobac-dev.slack.com](tobac-dev.slack.com) that we use for some additional communication around the project. Please join us there to stay updated about all things tobac that go beyond the detailed work on the code. 50 | 51 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019, climate-processes 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | tobac - Tracking and Object-based Analysis of Clouds 2 | ====== 3 | [![Release Version](https://img.shields.io/conda/vn/conda-forge/tobac.svg)](https://anaconda.org/conda-forge/tobac)[![Download Counter](https://img.shields.io/conda/dn/conda-forge/tobac.svg)](https://anaconda.org/conda-forge/tobac)[![Documentation Status](https://readthedocs.org/projects/tobac/badge/?version=latest)](https://tobac.readthedocs.io/en/latest/?badge=latest) 4 | 5 | What is it? 6 | ----------- 7 | 8 | *tobac* is a Python package for identifiying, tracking and analysing of clouds and other meteorological phenomena in different types of gridded datasets. *tobac* is unique in its ability to track phenomena using **any** variable on **any** grid, including radar data, satellite observations, and numerical model output. *tobac* has been used in a variety of peer-reviewed [publications](https://tobac.readthedocs.io/en/latest/publications.html) and is an international, multi-institutional collaboration. 9 | 10 | Documentation 11 | ------------- 12 | Individual features are identified as either maxima or minima in a two dimensional time varying field. 13 | The volume/area associated with the identified objects can be determined based on a time-varying 2D or 3D field and a threshold value. The in thre tracking step, the identified objects are linked into consistent trajectories representing the cloud over its lifecycle. 14 | 15 | Detailed documentation of the package can be found at https://tobac.readthedocs.io. 16 | 17 | Release announcements, workshop and conference announcements, and other information of interest to the broader *tobac* users group are sent to the [tobac core group](https://groups.google.com/g/tobac/about) mailing list. If you are interested in contributing to the development of *tobac*, we invite you to join the [tobac developers](https://groups.google.com/u/1/g/tobac-developers) mailing list. Information on monthly developers' meetings and other developer discussion and announcements are sent to that list. 18 | 19 | We also have a Slack server for both users and developers. For information on joining that, please contact the *tobac* developers mailing list, or see the information in the *tobac* release notes sent to the *tobac* mailing list. 20 | 21 | Installation 22 | ------------ 23 | tobac requires Python 3, and support for Python versions before 3.7 (i.e., 3.6 and lower) is deprecated and will be removed in tobac version 1.5. 24 | 25 | The easiest way is to install the most recent version of tobac via conda and the conda-forge channel: 26 | ``` 27 | conda install -c conda-forge tobac 28 | ``` 29 | This will take care of all necessary dependencies and should do the job for most users and also allows for an easy update of the installation by 30 | ``` 31 | conda update -c conda-forge tobac 32 | ``` 33 | 34 | 35 | You can also install conda via git, either for development purposes or to use specific development branches for the Github repository. 36 | 37 | If you are using anaconda, the following command from within the cloned repository should make sure all dependencies are met and up to date: 38 | ``` 39 | conda install -c conda-forge --yes --file requirements.txt 40 | ``` 41 | You can directly install the package directly from github with pip and either of the two following commands: 42 | ``` 43 | pip install --upgrade git+ssh://git@github.com/tobac-project/tobac.git 44 | pip install --upgrade git+https://github.com/tobac-project/tobac.git 45 | ``` 46 | You can also clone the package with any of the two following commands 47 | ``` 48 | git clone git@github.com:tobac-project/tobac.git 49 | git clone https://github.com/tobac-project/tobac.git 50 | ``` 51 | and install the package from the locally cloned version: 52 | ``` 53 | pip install tobac/ 54 | ``` 55 | 56 | Contributing 57 | ------------ 58 | We encourage bug reports, questions, and code contributions. For more details on contributing, please see https://github.com/tobac-project/tobac/blob/v2.0-dev/CONTRIBUTING.md 59 | 60 | We are currently in a transition phase between versions 1.x and 2.x. v2.x will enable the use of multiple tracking methods (including TINT) and will use xarray for gridded data instead of Iris. Preliminary development on v2.x has taken place on the `v2.0-dev` branch, while work on the `main` and `RC_v1.x.x` branches (containing v1.x development) is ongoing to unify these development efforts. 61 | 62 | Roadmap 63 | ------------ 64 | A roadmap for the future development of tobac is available here: https://github.com/tobac-project/tobac-roadmap/blob/master/tobac-roadmap-main.md 65 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | # Codecov.io settings 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | # basic settings 8 | # auto target means look at deltas from previous 9 | target: auto 10 | # Threshold we can drop coverage before failing CI 11 | threshold: 1% 12 | base: auto 13 | flags: 14 | - unit 15 | paths: 16 | - "tobac" 17 | 18 | # what folders and files to ignore 19 | ignore: 20 | - "tobac/tests/.*" 21 | - "setup.py" 22 | 23 | -------------------------------------------------------------------------------- /dev_requirements.txt: -------------------------------------------------------------------------------- 1 | # Python dependencies 2 | numpy 3 | scipy 4 | scikit-image 5 | scikit-learn 6 | pandas 7 | matplotlib 8 | iris 9 | xarray 10 | cartopy 11 | trackpy 12 | pre-commit 13 | black 14 | pytest 15 | typing_extensions 16 | nbconvert 17 | -------------------------------------------------------------------------------- /doc/_static/theme_overrides.css: -------------------------------------------------------------------------------- 1 | /* from https://github.com/readthedocs/sphinx_rtd_theme/issues/117#issuecomment-41506687 */ 2 | /* with augmentations from https://github.com/readthedocs/sphinx_rtd_theme/issues/117#issuecomment-153083280 */ 3 | /* override table width restrictions */ 4 | @media screen and (min-width: 767px) { 5 | 6 | .wy-table-responsive table td { 7 | /* !important prevents the common CSS stylesheets from 8 | overriding this as on RTD they are loaded after this stylesheet */ 9 | white-space: normal !important; 10 | } 11 | 12 | .wy-table-responsive { 13 | max-width: 100%; 14 | overflow: visible !important; 15 | 16 | } 17 | 18 | } 19 | -------------------------------------------------------------------------------- /doc/_static/thumbnails/Basics_Idealized-Case-1_Tracking-of-a-Test-Blob-in-2D_Thumbnail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tobac-project/tobac/82ac8f4cf19523ad86171a9f8d7c72e976ca3b09/doc/_static/thumbnails/Basics_Idealized-Case-1_Tracking-of-a-Test-Blob-in-2D_Thumbnail.png -------------------------------------------------------------------------------- /doc/_static/thumbnails/Basics_Idealized-Case-2_Two_crossing_Blobs_Thumbnail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tobac-project/tobac/82ac8f4cf19523ad86171a9f8d7c72e976ca3b09/doc/_static/thumbnails/Basics_Idealized-Case-2_Two_crossing_Blobs_Thumbnail.png -------------------------------------------------------------------------------- /doc/_static/thumbnails/Basics_Methods-and-Parameters-for-Feature-Detection_Part_1_Thumbnail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tobac-project/tobac/82ac8f4cf19523ad86171a9f8d7c72e976ca3b09/doc/_static/thumbnails/Basics_Methods-and-Parameters-for-Feature-Detection_Part_1_Thumbnail.png -------------------------------------------------------------------------------- /doc/_static/thumbnails/Basics_Methods-and-Parameters-for-Feature-Detection_Part_2_Thumbnail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tobac-project/tobac/82ac8f4cf19523ad86171a9f8d7c72e976ca3b09/doc/_static/thumbnails/Basics_Methods-and-Parameters-for-Feature-Detection_Part_2_Thumbnail.png -------------------------------------------------------------------------------- /doc/_static/thumbnails/Basics_Methods-and-Parameters-for-Linking_Thumbnail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tobac-project/tobac/82ac8f4cf19523ad86171a9f8d7c72e976ca3b09/doc/_static/thumbnails/Basics_Methods-and-Parameters-for-Linking_Thumbnail.png -------------------------------------------------------------------------------- /doc/_static/thumbnails/Basics_Methods-and-Parameters-for-Segmentation_Thumbnail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tobac-project/tobac/82ac8f4cf19523ad86171a9f8d7c72e976ca3b09/doc/_static/thumbnails/Basics_Methods-and-Parameters-for-Segmentation_Thumbnail.png -------------------------------------------------------------------------------- /doc/_static/thumbnails/Example_OLR_Tracking_model_Thumbnail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tobac-project/tobac/82ac8f4cf19523ad86171a9f8d7c72e976ca3b09/doc/_static/thumbnails/Example_OLR_Tracking_model_Thumbnail.png -------------------------------------------------------------------------------- /doc/_static/thumbnails/Example_OLR_Tracking_satellite_Thumbnail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tobac-project/tobac/82ac8f4cf19523ad86171a9f8d7c72e976ca3b09/doc/_static/thumbnails/Example_OLR_Tracking_satellite_Thumbnail.png -------------------------------------------------------------------------------- /doc/_static/thumbnails/Example_Precip_Tracking_Thumbnail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tobac-project/tobac/82ac8f4cf19523ad86171a9f8d7c72e976ca3b09/doc/_static/thumbnails/Example_Precip_Tracking_Thumbnail.png -------------------------------------------------------------------------------- /doc/_static/thumbnails/Example_Track_on_Radar_Segment_on_Satellite_Thumbnail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tobac-project/tobac/82ac8f4cf19523ad86171a9f8d7c72e976ca3b09/doc/_static/thumbnails/Example_Track_on_Radar_Segment_on_Satellite_Thumbnail.png -------------------------------------------------------------------------------- /doc/_static/thumbnails/Example_Updraft_Tracking_Thumbnail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tobac-project/tobac/82ac8f4cf19523ad86171a9f8d7c72e976ca3b09/doc/_static/thumbnails/Example_Updraft_Tracking_Thumbnail.png -------------------------------------------------------------------------------- /doc/_static/thumbnails/Example_vorticity_tracking_model_Thumbnail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tobac-project/tobac/82ac8f4cf19523ad86171a9f8d7c72e976ca3b09/doc/_static/thumbnails/Example_vorticity_tracking_model_Thumbnail.png -------------------------------------------------------------------------------- /doc/analysis.rst: -------------------------------------------------------------------------------- 1 | .. 2 | Documentation of analysis functions 3 | TODO: include descriptions of the analysis functions and examples 4 | 5 | Analysis 6 | ========= 7 | tobac provides several analysis functions that allow for the calculation of important quantities based on the tracking results. This includes the calculation of properties such as feature lifetimes and feature areas/volumes, but also allows for a convenient calculation of statistics for arbitrary fields of the same shape as as the input data used for the tracking analysis. 8 | -------------------------------------------------------------------------------- /doc/big_datasets.rst: -------------------------------------------------------------------------------- 1 | Handling Large Datasets 2 | ------------------------------------- 3 | 4 | Often, one desires to use *tobac* to identify and track features in large datasets ("big data"). This documentation strives to suggest various methods for doing so efficiently. Current versions of *tobac* do not support out-of-core (e.g., :code:`dask`) computation, meaning that these strategies may need to be employed for both computational and memory reasons. 5 | 6 | .. _Split Feature Detection: 7 | 8 | ======================= 9 | Split Feature Detection and Run in Parallel 10 | ======================= 11 | Current versions of threshold feature detection (see :doc:`feature_detection_overview`) are time independent, meaning that one can easily parallelize feature detection across all times (although not across space). *tobac* provides the :py:meth:`tobac.utils.combine_feature_dataframes` function to combine a list of dataframes produced by a parallelization method (such as :code:`jug`, :code:`multiprocessing.pool`, or :code:`dask.bag`) into a single combined dataframe suitable to perform tracking with. 12 | 13 | Below is a snippet from a larger notebook demonstrating how to run feature detection in parallel ( :doc:`big_datasets_examples/notebooks/parallel_processing_tobac`): 14 | 15 | :: 16 | 17 | # build list of tracked variables using Dask.Bag 18 | 19 | b = db.from_sequence( 20 | [ 21 | combined_ds["data"][x : x + 1] 22 | for x in range(len(combined_ds["time"])) 23 | ], 24 | npartitions=1, 25 | ) 26 | out_feature_dfs = db.map( 27 | lambda x: tobac.feature_detection_multithreshold( 28 | x, 4000, **parameters_features 29 | ), 30 | b, 31 | ).compute() 32 | 33 | combined_dataframes = tobac.utils.general.combine_feature_dataframes(out_feature_dfs) 34 | 35 | 36 | .. _Split Segmentation: 37 | 38 | ====================================== 39 | Split Segmentation and Run in Parallel 40 | ====================================== 41 | Recall that the segmentation mask (see :doc:`segmentation_output`) is the same size as the input grid, which results in large files when handling large input datasets. The following strategies can help reduce the output size and make segmentation masks more useful for the analysis. 42 | 43 | The first strategy is to only segment on features *after tracking and quality control*. While this will not directly impact performance, waiting to run segmentation on the final set of features (after discarding, e.g., non-tracked cells) can make analysis of the output segmentation dataset easier. 44 | 45 | To enhance the speed at which segmentation runs, one can process multiple segmentation times in parallel independently, similar to feature detection. Unlike feature detection, however, there is currently no built-in *tobac* method to combine multiple segmentation times into a single file. While one can do this using typical NetCDF tools such as :code:`nccat` or with xarray utilities such as :code:`xr.concat`, you can also leave the segmentation mask output as separate files, opening them later with multiple file retrievals such as :code:`xr.open_mfdataset`. 46 | 47 | 48 | .. _Tracking Hanging: 49 | 50 | ===================================== 51 | Tracking Hangs with too many Features 52 | ===================================== 53 | 54 | When tracking on a large dataset, :code:`tobac.tracking.linking_trackpy` can hang using the default parameters. This is due to the tracking library :code:`trackpy` searching for the next timestep's feature in too large of an area. This can be solved *without impact to scientific output* by lowering the :code:`subnetwork_size` parameter in :code:`tobac.tracking.linking_trackpy`. 55 | 56 | -------------------------------------------------------------------------------- /doc/bulk_statistics/index.rst: -------------------------------------------------------------------------------- 1 | ########################## 2 | Compute bulk statistics 3 | ########################## 4 | 5 | Bulk statistics allow for a wide range of properties of detected objects to be calculated during feature detection and segmentation or as a postprocessing step. 6 | The :py:meth:`tobac.utils.bulk_statistics.get_statistics_from_mask` function applies one or more functions over one or more data fields for each detected object. 7 | For example, one could calculate the convective mass flux for each detected feature by providing fields of vertical velocity, cloud water content and area. 8 | Numpy-like broadcasting is supported, allowing 2D and 3D data to be combined. 9 | 10 | .. toctree:: 11 | :maxdepth: 1 12 | 13 | notebooks/compute_statistics_during_feature_detection 14 | notebooks/compute_statistics_during_segmentation 15 | notebooks/compute_statistics_postprocessing_example 16 | -------------------------------------------------------------------------------- /doc/code_reviews.rst: -------------------------------------------------------------------------------- 1 | Code reviews 2 | ------------------ 3 | 4 | Before anything is merged into the release branch (:code:`RC_*`), we require that two reviewers accept the code changes of a pull request. 5 | 6 | ============================ 7 | How to do a code review 8 | ============================ 9 | 10 | * Checkout out pull request locally (`how to checkout a pull request locally `_) 11 | 12 | * Run tests locally 13 | 14 | * Go through code and see if it is readable and easy to understand 15 | 16 | * Not required, but often useful: test new features with your own data 17 | 18 | 19 | ============================ 20 | Tips and expectations 21 | ============================ 22 | 23 | 24 | Doing a code review can be very challenging if you are unfamiliar with the process. Here is a set of documents which might provide a good resource on how to get started: 25 | 26 | https://github.com/google/eng-practices 27 | 28 | 29 | ========================= 30 | Conventional comments 31 | ========================= 32 | 33 | The comments in a code review should be clear and constructive. 34 | 35 | A useful way of highlighting the intention of specific comments is to label them according to `conventional comments `_. 36 | 37 | -------------------------------------------------------------------------------- /doc/code_structure.rst: -------------------------------------------------------------------------------- 1 | Code structure and key design concepts 2 | -------------------------------------- 3 | 4 | ================================== 5 | Modules 6 | ================================== 7 | 8 | **tobac** aims to provide a flexible and modular framework which can be seen as a toolbox to create tracking algorithms according to the user's specific research needs. 9 | 10 | The **tobac** package currently consists of three **main modules**: 11 | 12 | 1. The :py:mod:`tobac.feature_detection` contains methods to identify objects (*features*) in 2D or 3D (3D or 4D when including the time dimensions) gridded data. This is done by identifying contiguous regions above or below one or multiple user-defined thresholds. The module makes use of :py:mod:`scipy.ndimage.label`, a generic image processing method that labels features in an array. The methods in :py:mod:`tobac.feature_detection` are high-level functions that enable a fast and effective feature detection and create easy-to-use output in form of a :py:mod:`pandas.DataFrame` that contains the coordinates and some basic information on each detected feature. The most high-level methods that is commonly used by users is :py:func:`tobac.feature_detection_multithreshold`. 13 | 14 | 2. The :py:mod:`tobac.segmentation` module contains methods to define the extent of the identified feature areas or volumes. This step is needed to create a mask of the identified features because the feature detection currently only saves the center points of the features. The segmentation procedure is performed by using the watershedding method, but more methods are to be implemented in the future. Just as the feature detection, this module can handle both 2D and 3D data. 15 | 16 | 3. The :py:mod:`tobac.tracking` module is responsible for linking identified features over time. This module makes primarily use of the python package :py:mod:`trackpy`. Note that the linking using :py:mod:`trackpy` is based on particle tracking principles which means that only the feature center positions (not the entire area or volume associated with each feature) are needed to link features over time. Other methods such as tracking based on overlapping areas from the segmented features are to be implemented. 17 | 18 | In addition to the main modules, there are three **postprocessing modules**: 19 | 20 | 4. The :py:mod:`tobac.merge_split` module provides functionality to identify mergers and splitters in the tracking output and to add labels such that one can reconstruct the parent and child tracks of each cell. 21 | 22 | 5. The :py:mod:`tobac.analysis` module contains methods to analyze the tracking output and derive statistics about individual tracks as well as summary statistics of the entire populations of tracks or subsets of the latter. 23 | 24 | 6. The :py:mod:`tobac.plotting` module provides methods to visualize the tracking output, for example for creating maps and animations of identified features, segmented areas and tracks. 25 | 26 | 27 | Finally, there are two modules that are primarily **important for developers**: 28 | 29 | 7. The :py:mod:`tobac.utils` module is a collection of smaller, not necessarily tracking-specific methods that facilitate and support the methods of the main modules. This module has multiple submodules. We separate methods that are rather generic and could also be practical for tobac users who build their own tracking algorithms (:py:mod:`tobac.utils.general`) and methods that mainly facilitate the development of **tobac** (:py:mod:`tobac.utils.internal`). Sometimes, new features come with the need of a whole set of new methods, so it could make sense to save these in their own submodule (see e.g. :py:mod:`tobac.periodic_boundaries`) 30 | 31 | 8. The :py:mod:`tobac.testing` module provides support for writing of unit tests. This module contains several methods to create simplified test data sets on which the various methods and parameters for feature detection, segmentation, and tracking can be tested. 32 | 33 | For more information on each submodule, refer to the respective source code documentation. 34 | 35 | One thing to note is that **tobac** as of now is purely functional. The plan is, however, to move towards a more object-oriented design with base classes for the main operations such as feature detection and tracking. 36 | 37 | 38 | ======== 39 | Examples 40 | ======== 41 | 42 | To help users get started with **tobac** and to demonstrate the various functionalities, **tobac** hosts several detailed and **illustrated examples** in the form of Jupyter notebooks. They are hosted under the directory `examples/` and be executed by the user. Our readthedocs page also hosts a rendered version of our examples as `gallery `_ 43 | 44 | 45 | ============================ 46 | Migrating to xarray and dask 47 | ============================ 48 | 49 | Currently, **tobac** uses `iris cubes `_ as the 50 | primary data container. However, we are currently working on migrating the source code to 51 | `xarray `_ such that all internal functions are based on `xr.DataArray 52 | objects `_. 53 | 54 | To ensure a robust transition from **iris** to **xarray**, we make use of various decorators that convert input and 55 | output data for the main functions without changing their actual code. These decorators are located in the `decorator 56 | submodule `_. 57 | 58 | In addition, one of our main goals for the future is to fully support `dask `_, in order to scale 59 | to large datasets and enable parallelization. 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /doc/conf.py: -------------------------------------------------------------------------------- 1 | """This file is used to configure the Sphinx build of our documentation. 2 | The documentation on setting this up is here: https://www.sphinx-doc.org/en/master/usage/configuration.html 3 | """ 4 | 5 | # This is the standard readthedocs theme. 6 | import sphinx_rtd_theme 7 | import sys, os 8 | 9 | sys.path.insert(0, os.path.abspath("extensions")) 10 | 11 | # What Sphinx extensions do we need 12 | extensions = [ 13 | "sphinx.ext.autodoc", 14 | "sphinx.ext.doctest", 15 | "sphinx.ext.todo", 16 | "sphinx.ext.coverage", 17 | "sphinx.ext.imgmath", 18 | "sphinx.ext.ifconfig", 19 | "sphinx_rtd_theme", 20 | "sphinx.ext.napoleon", 21 | "nbsphinx", 22 | "sphinx_gallery.load_style", 23 | "sphinx_toolbox.code", 24 | ] 25 | 26 | 27 | html_theme = "sphinx_rtd_theme" 28 | 29 | html_static_path = ["_static"] 30 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 31 | 32 | 33 | project = "tobac" 34 | 35 | master_doc = "index" 36 | 37 | # allow dropdowns 38 | collapse_navigation = False 39 | 40 | 41 | # Include our custom CSS (currently for special table config) 42 | def setup(app): 43 | app.add_css_file("theme_overrides.css") 44 | 45 | 46 | # This should include all modules used in tobac. These are dummy imports, 47 | # but should include both required and optional dependencies. 48 | autodoc_mock_imports = [ 49 | # "numpy", 50 | "scipy", 51 | "scikit-image", 52 | "pandas", 53 | "pytables", 54 | "matplotlib", 55 | "iris", 56 | "cf-units", 57 | "xarray", 58 | "cartopy", 59 | "trackpy", 60 | "numba", 61 | "skimage", 62 | "sklearn", 63 | "cftime", 64 | ] 65 | 66 | sys.path.insert(0, os.path.abspath("../")) 67 | 68 | pygments_style = "sphinx" 69 | 70 | # Napoleon settings for configuring the Napoleon extension 71 | # See documentation here: 72 | # https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html 73 | napoleon_google_docstring = True 74 | napoleon_numpy_docstring = True 75 | napoleon_include_init_with_doc = False 76 | napoleon_include_private_with_doc = False 77 | napoleon_include_special_with_doc = True 78 | napoleon_use_admonition_for_examples = False 79 | napoleon_use_admonition_for_notes = False 80 | napoleon_use_admonition_for_references = False 81 | napoleon_use_ivar = False 82 | napoleon_use_param = True 83 | napoleon_use_rtype = True 84 | napoleon_preprocess_types = False 85 | napoleon_type_aliases = None 86 | napoleon_attr_annotations = True 87 | 88 | nbsphinx_thumbnails = { 89 | "examples/Basics/Idealized-Case-1_Tracking-of-a-Test-Blob-in-2D": "_static/thumbnails/Basics_Idealized-Case-1_Tracking-of-a-Test-Blob-in-2D_Thumbnail.png", 90 | "examples/Basics/Idealized-Case-2_Two_crossing_Blobs": "_static/thumbnails/Basics_Idealized-Case-2_Two_crossing_Blobs_Thumbnail.png", 91 | "examples/Basics/Methods-and-Parameters-for-Feature-Detection_Part_1": "_static/thumbnails/Basics_Methods-and-Parameters-for-Feature-Detection_Part_1_Thumbnail.png", 92 | "examples/Basics/Methods-and-Parameters-for-Feature-Detection_Part_2": "_static/thumbnails/Basics_Methods-and-Parameters-for-Feature-Detection_Part_2_Thumbnail.png", 93 | "examples/Basics/Methods-and-Parameters-for-Linking": "_static/thumbnails/Basics_Methods-and-Parameters-for-Linking_Thumbnail.png", 94 | "examples/Basics/Methods-and-Parameters-for-Segmentation": "_static/thumbnails/Basics_Methods-and-Parameters-for-Segmentation_Thumbnail.png", 95 | "examples/Example_OLR_Tracking_model/Example_OLR_Tracking_model": "_static/thumbnails/Example_OLR_Tracking_model_Thumbnail.png", 96 | "examples/Example_OLR_Tracking_satellite/Example_OLR_Tracking_satellite": "_static/thumbnails/Example_OLR_Tracking_satellite_Thumbnail.png", 97 | "examples/Example_Precip_Tracking/Example_Precip_Tracking": "_static/thumbnails/Example_Precip_Tracking_Thumbnail.png", 98 | "examples/Example_Track_on_Radar_Segment_on_Satellite/Example_Track_on_Radar_Segment_on_Satellite": "_static/thumbnails/Example_Track_on_Radar_Segment_on_Satellite_Thumbnail.png", 99 | "examples/Example_Updraft_Tracking/Example_Updraft_Tracking": "_static/thumbnails/Example_Updraft_Tracking_Thumbnail.png", 100 | "examples/Example_vorticity_tracking_model/Example_vorticity_tracking_model": "_static/thumbnails/Example_vorticity_tracking_model_Thumbnail.png", 101 | } 102 | -------------------------------------------------------------------------------- /doc/data_input.rst: -------------------------------------------------------------------------------- 1 | .. 2 | Description of the input data required. 3 | 4 | 5 | Data Input 6 | ========== 7 | 8 | Input data for tobac should consist of one or more fields on a common, regular grid with a time dimension and two or three spatial dimensions. The input data can also include latitude and longitude coordinates, either as 1-d or 2-d variables depending on the grid used. 9 | 10 | As of version 1.6 of tobac, xarray DataArrays are the default format for input fields, with all internal operations performed using DataArrays. Backward compatibility with Iris Cube input is maintained using a conversion wrapper. Workflows using Iris should produce identical results to previous versions, but moving forward xarray is the recommended data format. 11 | 12 | ======= 13 | 3D Data 14 | ======= 15 | 16 | As of *tobac* version 1.5.0, 3D data are now fully supported for feature detection, tracking, and segmentation. Similar to how *tobac* requires some information on the horizontal grid spacing of the data (e.g., through the :code:`dxy` parameter), some information on the vertical grid spacing is also required. This is documented in detail in the API docs, but briefly, users must specify either :code:`dz`, where the grid has uniform grid spacing, or users must specify :code:`vertical_coord`, where :code:`vertical_coord` is the name of the coordinate representing the vertical, with the same units as :code:`dxy`. 17 | 18 | =========== 19 | Data Output 20 | =========== 21 | 22 | The output of the different analysis steps in tobac are output as either pandas DataFrames in the case of one-dimensional data, such a lists of identified features or feature tracks or as xarray DataArrays/Iris Cubes in the case of 2D/3D/4D fields such as feature masks. Note that the dataframe output from tracking is a superset of the features dataframe. 23 | 24 | For information on feature detection *output*, see :doc:`feature_detection_output`. 25 | For information on tracking *output*, see :doc:`tracking_output`. 26 | 27 | Note that in future versions of tobac, it is planned to combine both output data types into a single hierarchical data structure containing both spatial and object information. Additional information about the planned changes can be found in the v2.0-dev branch of the main tobac repository (`https://github.com/tobac-project/tobac `_), as well as the tobac roadmap (`https://github.com/tobac-project/tobac-roadmap `_. 28 | -------------------------------------------------------------------------------- /doc/examples: -------------------------------------------------------------------------------- 1 | ../examples -------------------------------------------------------------------------------- /doc/examples.rst: -------------------------------------------------------------------------------- 1 | Example Gallery 2 | =============== 3 | tobac is provided with a set of Jupyter notebooks that show examples of the application of tobac for different types of datasets. 4 | 5 | 6 | .. nbgallery:: 7 | :caption: Fundamentals of Detection and Tracking 8 | 9 | Test Blob in 2D 10 | Two crossing Blobs 11 | 12 | On Feature Detection: Part 1 13 | On Feature Detection: Part 2 14 | On Segmentation 15 | On Linking 16 | 17 | 18 | 19 | 20 | .. nbgallery:: 21 | :caption: Examples of Using *tobac* with Observations 22 | 23 | OLR from GOES-13 Satellite 24 | Combine Radar & Satellite 25 | 26 | 27 | .. nbgallery:: 28 | :caption: Examples of Using *tobac* with Model Data 29 | 30 | WRF OLR 31 | WRF Precip 32 | WRF Updrafts 33 | WRF Mesoscale Vorticity 34 | 35 | 36 | .. nbgallery:: 37 | :caption: Calculating Bulk Statistics 38 | 39 | Calculating Bulk Statistics during Feature Detection 40 | Calculating Bulk Statistics during Segmentation 41 | Calculating Bulk Statistics as a Postprocessing Step 42 | 43 | 44 | .. nbgallery:: 45 | :caption: Examples of Using *tobac* with Large Datasets and in Parallel 46 | 47 | Simple Dask Parallel Tutorial 48 | 49 | 50 | The notebooks can be found in the **examples** folder as part of the python package. The necessary input data for these examples is avaliable on zenodo and can be downloaded automatically by the Jupyter notebooks. -------------------------------------------------------------------------------- /doc/feature_detection/index.rst: -------------------------------------------------------------------------------- 1 | ###################################### 2 | Feature Detection Parameter Examples 3 | ###################################### 4 | 5 | .. toctree:: 6 | :maxdepth: 2 7 | 8 | notebooks/multiple_thresholds_example 9 | notebooks/n_min_threshold_example 10 | notebooks/position_threshold_example 11 | notebooks/feature_detection_filtering 12 | -------------------------------------------------------------------------------- /doc/feature_detection_3D_out_vars.csv: -------------------------------------------------------------------------------- 1 | Variable Name,Description,Units,Type 2 | vdim,vertical dimension in grid point space,Number of grid points,float64 3 | z,grid point z location of the feature (see vdim). Note that this is not necessarily an integer value depending on your selection of position_threshold,Number of grid points,float64 4 | altitude,z location of the feature above ground level,meters,float64 -------------------------------------------------------------------------------- /doc/feature_detection_base_out_vars.csv: -------------------------------------------------------------------------------- 1 | Variable Name,Description,Units,Type 2 | frame,Frame/time/file number; starts from 0 and increments by 1 to N times. ,n/a,int64 3 | idx,"Feature number within that frame; starts at 1, increments by 1 to the number of features for each frame, and resets to 1 when the frame increments",n/a,int 4 | hdim_1,"First horizontal dimension in grid point space (typically, although not always, N/S or y space)",Number of grid points,float 5 | hdim_2,"Second horizontal dimension in grid point space (typically, although not always, E/W or x space)",Number of grid points,float 6 | num,Number of grid points that are within the threshold of this feature,Number of grid points,int 7 | threshold_value,Maximum threshold value reached by the feature,Units of the input feature,int 8 | feature,Unique number of the feature; starts from 1 and increments by 1 to the number of features identified in all frames,n/a,int 9 | time,Time of the feature,Date and time,object/python datetime 10 | timestr,String representation of the feature time,YYYY-MM-DD HH:MM:SS,object/string 11 | y,Grid point y location of the feature (see hdim_1 and hdim_2). Note that this is not necessarily an integer value depending on your selection of position_threshold,Number of grid points,float 12 | x,Grid point x location of the feature (see also y),Number of grid points,float 13 | projection_y_coordinate,Y location of the feature in projection coordinates,Projection coordinates (usually m),float 14 | projection_x_coordinate,X location of the feature in projection coodinates,Projection coordinates (usually m),float 15 | lat,Latitude of the feature,Decimal degrees,float 16 | lon,Longitude of the feature,Decimal degrees,float -------------------------------------------------------------------------------- /doc/feature_detection_output.rst: -------------------------------------------------------------------------------- 1 | Feature Detection Output 2 | ------------------------- 3 | 4 | Feature detection outputs a `pandas` dataframe with several variables. The variables, (with column names listed in the `Variable Name` column), are described below with units. Note that while these variables come initially from the feature detection step, segmentation and tracking also share some of these variables as keys (e.g., the :code:`feature` acts as a universal key between each of these). See :doc:`tracking_output` for the additional columns added by tracking. 5 | 6 | Variables that are common to all feature detection files: 7 | 8 | .. csv-table:: tobac Feature Detection Output Variables 9 | :file: ./feature_detection_base_out_vars.csv 10 | :widths: 3, 35, 3, 3 11 | :header-rows: 1 12 | 13 | Variables that are included when using 3D feature detection in addition to those above: 14 | 15 | .. csv-table:: tobac 3D Feature Detection Output Variables 16 | :file: ./feature_detection_3D_out_vars.csv 17 | :widths: 3, 35, 3, 3 18 | :header-rows: 1 19 | 20 | 21 | One can optionally get the bulk statistics of the data points belonging to each feature regiion or volume. This is done using the `statistics` parameter when calling :ufunc:`tobac.feature_detection_multithreshold` . The user-defined metrics are then added as columns to the output dataframe. 22 | 23 | 24 | -------------------------------------------------------------------------------- /doc/feature_detection_overview.rst: -------------------------------------------------------------------------------- 1 | .. _feature-detection-overview: 2 | 3 | Feature Detection Basics 4 | ------------------------ 5 | 6 | The feature detection is the first step in using **tobac**. 7 | 8 | **Currently implemented methods:** 9 | 10 | **Multiple thresholds:** 11 | 12 | Features are identified as regions above or below a sequence of subsequent thresholds (if searching for eather maxima or minima in the data). Subsequently more restrictive threshold values are used to further refine the resulting features and allow for separation of features that are connected through a continuous region of less restrictive threshold values. 13 | 14 | .. image:: images/detection_multiplethresholds.png 15 | :width: 500 px 16 | 17 | **Current development:** 18 | We are currently working on additional methods for the identification of cloud features in different types of datasets. Some of these methods are specific to the input data such a combination of different channels from specific satellite imagers. Some of these methods will combine the feature detection and segmentations step in one single algorithm. 19 | -------------------------------------------------------------------------------- /doc/features_without_segmented_area.rst: -------------------------------------------------------------------------------- 1 | Features without segmented areas 2 | -------------------------------------- 3 | 4 | Not all detected features have a segmented area associated with them. Here, we show two cases in which a detected feature might 5 | not have a segmented area associated with them (meaning that the mask file does not contain the ID of the feature of interest and `ncells` in the segmentation 6 | output dataframe results in 0 grid cells. ) 7 | 8 | 9 | .. _Case 1: 10 | 11 | ============================== 12 | Case 1: Segmentation threshold 13 | ============================== 14 | 15 | If the segmentation threshold is lower (assuming `target='minimum'`) than the highest threshold specified in the Feature Detection (see :doc:`threshold_detection_parameters`) this could leave some features without a segmented area, simply because there are no values to be segmented. 16 | 17 | Consider for example the following data with 5 being the highest threshold specified for the Feature Detection (see :doc:`feature_detection_overview`): 18 | 19 | .. image:: images/features_without_segment.png 20 | :width: 500 px 21 | 22 | If the segmentation threshold is larger than 5 (e.g. `threshold = 6`), the segmented area contains all values <= 5 (still assuming `target='minimum'`), no matter if the detected feature has a threshold lower than 5 (upper panels) or if it is exactly equal to 5 and does not contain any features with lower thresholds inside (lower panels). 23 | 24 | 25 | If the segmentation threshold is lower than or equal to the highest feature detection threshold (e.g. `threshold = 5`), features with threshold values lower than 5 still get a segmented area associated with them (upper panels). However, features that are exactly equal to 5 and do not contain any features with lower thresholds inside will not get any segmented area associated with them (lower panels) which results in no values in the mask for this feature and `ncells=0`. 26 | 27 | .. _Case 2: 28 | 29 | ======================== 30 | Case 2: Feature position 31 | ======================== 32 | 33 | Another reason for features that do not have a segmented area associated with them is the rare but possible case when the feature position is located outside of the threshold area: 34 | 35 | .. image:: images/feature_outside_of_threshold_area.png 36 | :width: 500 px 37 | 38 | 39 | In this case, it may help to change the `position_threshold` (see :doc:`threshold_detection_parameters`) to `extreme` instead of `center`: 40 | 41 | .. image:: images/feature_outside_of_threshold_area_extreme.png 42 | :width: 500 px 43 | 44 | 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /doc/images/Figure_linking.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tobac-project/tobac/82ac8f4cf19523ad86171a9f8d7c72e976ca3b09/doc/images/Figure_linking.png -------------------------------------------------------------------------------- /doc/images/box_vs_column_seeding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tobac-project/tobac/82ac8f4cf19523ad86171a9f8d7c72e976ca3b09/doc/images/box_vs_column_seeding.png -------------------------------------------------------------------------------- /doc/images/cross.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tobac-project/tobac/82ac8f4cf19523ad86171a9f8d7c72e976ca3b09/doc/images/cross.png -------------------------------------------------------------------------------- /doc/images/decision.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tobac-project/tobac/82ac8f4cf19523ad86171a9f8d7c72e976ca3b09/doc/images/decision.png -------------------------------------------------------------------------------- /doc/images/detection_multiplethresholds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tobac-project/tobac/82ac8f4cf19523ad86171a9f8d7c72e976ca3b09/doc/images/detection_multiplethresholds.png -------------------------------------------------------------------------------- /doc/images/erosion_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tobac-project/tobac/82ac8f4cf19523ad86171a9f8d7c72e976ca3b09/doc/images/erosion_example.png -------------------------------------------------------------------------------- /doc/images/feature_outside_of_threshold_area.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tobac-project/tobac/82ac8f4cf19523ad86171a9f8d7c72e976ca3b09/doc/images/feature_outside_of_threshold_area.png -------------------------------------------------------------------------------- /doc/images/feature_outside_of_threshold_area_extreme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tobac-project/tobac/82ac8f4cf19523ad86171a9f8d7c72e976ca3b09/doc/images/feature_outside_of_threshold_area_extreme.png -------------------------------------------------------------------------------- /doc/images/features_without_segment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tobac-project/tobac/82ac8f4cf19523ad86171a9f8d7c72e976ca3b09/doc/images/features_without_segment.png -------------------------------------------------------------------------------- /doc/images/linking_prediction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tobac-project/tobac/82ac8f4cf19523ad86171a9f8d7c72e976ca3b09/doc/images/linking_prediction.png -------------------------------------------------------------------------------- /doc/images/position_thresholds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tobac-project/tobac/82ac8f4cf19523ad86171a9f8d7c72e976ca3b09/doc/images/position_thresholds.png -------------------------------------------------------------------------------- /doc/images/sat_radar_combined.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tobac-project/tobac/82ac8f4cf19523ad86171a9f8d7c72e976ca3b09/doc/images/sat_radar_combined.png -------------------------------------------------------------------------------- /doc/images/search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tobac-project/tobac/82ac8f4cf19523ad86171a9f8d7c72e976ca3b09/doc/images/search.png -------------------------------------------------------------------------------- /doc/images/sigma_threshold_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tobac-project/tobac/82ac8f4cf19523ad86171a9f8d7c72e976ca3b09/doc/images/sigma_threshold_example.png -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | .. 2 | Tobac homepage 3 | 4 | tobac - Tracking and Object-Based Analysis of Clouds 5 | ------------------------------------------------------- 6 | 7 | **tobac** is a Python package to rapidly identify, track and analyze clouds in different types of gridded datasets, such as 3D model output from cloud-resolving model simulations or 2D data from satellite retrievals. 8 | 9 | The software is set up in a modular way to include different algorithms for feature identification, tracking, and analyses. **tobac** is also input variable agnostic and doesn't rely on specific input variables, nor a specific grid to work. 10 | 11 | Individual features are identified as either maxima or minima in a 2D or 3D time-varying field (see :doc:`feature_detection_overview`). An associated volume can then be determined using these features with a separate (or identical) time-varying 2D or 3D field and a threshold value (see :doc:`segmentation`). The identified objects are linked into consistent trajectories representing the cloud over its lifecycle in the tracking step. Analysis and visualization methods provide a convenient way to use and display the tracking results. 12 | 13 | **Version 1.2 of tobac** and some example applications are described in a peer-reviewed article in Geoscientific Model Development as: 14 | 15 | Heikenfeld, M., Marinescu, P. J., Christensen, M., Watson-Parris, D., Senf, F., van den Heever, S. C., and Stier, P.: tobac 1.2: towards a flexible framework for tracking and analysis of clouds in diverse datasets, Geosci. Model Dev., 12, 4551–4570, https://doi.org/10.5194/gmd-12-4551-2019, 2019. 16 | 17 | **Version 1.5 of tobac** and the major enhancements that came with that version are described in the following peer-reviewed article in Geoscientific Model Development: 18 | 19 | Sokolowsky, G. A., Freeman, S. W., Jones, W. K., Kukulies, J., Senf, F., Marinescu, P. J., Heikenfeld, M., Brunner, K. N., Bruning, E. C., Collis, S. M., Jackson, R. C., Leung, G. R., Pfeifer, N., Raut, B. A., Saleeby, S. M., Stier, P., and van den Heever, S. C.: tobac v1.5: Introducing Fast 3D Tracking, Splits and Mergers, and Other Enhancements for Identifying and Analysing Meteorological Phenomena. Geoscientific Model Development, 17(13), 5309-5330. https://doi.org/10.5194/gmd-17-5309-2024, 2024. 20 | 21 | 22 | The project is currently being extended by several contributors to include additional workflows and algorithms using the same structure, syntax, and data formats. 23 | 24 | .. toctree:: 25 | :caption: Basic Information 26 | :maxdepth: 2 27 | 28 | installation 29 | data_input 30 | analysis 31 | plotting 32 | big_datasets 33 | examples 34 | publications 35 | 36 | .. toctree:: 37 | :caption: Feature Detection 38 | :maxdepth: 2 39 | 40 | feature_detection_overview 41 | threshold_detection_parameters 42 | feature_detection/index 43 | feature_detection_output 44 | 45 | .. toctree:: 46 | :caption: Segmentation 47 | :maxdepth: 2 48 | 49 | segmentation 50 | segmentation_parameters 51 | segmentation_output 52 | features_without_segmented_area 53 | transform_segmentation 54 | 55 | .. toctree:: 56 | :caption: Tracking 57 | :maxdepth: 2 58 | 59 | linking 60 | tracking_output 61 | 62 | .. toctree:: 63 | :caption: Merge/Split 64 | :maxdepth: 2 65 | 66 | merge_split 67 | 68 | 69 | .. toctree:: 70 | :caption: Developer guide 71 | :maxdepth: 3 72 | 73 | code_structure 74 | contributing 75 | code_reviews 76 | mentoring 77 | 78 | .. toctree:: 79 | :caption: Compute bulk statistics 80 | :maxdepth: 2 81 | 82 | bulk_statistics/index 83 | 84 | .. toctree:: 85 | :caption: API Reference 86 | :maxdepth: 3 87 | 88 | tobac 89 | -------------------------------------------------------------------------------- /doc/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ------------ 3 | tobac works with Python 3 installations. 4 | 5 | The easiest way is to install the most recent version of tobac via conda or mamba and the conda-forge channel: 6 | 7 | :code:`conda install -c conda-forge tobac` or :code:`mamba install -c conda-forge tobac` 8 | 9 | This will take care of all necessary dependencies and should do the job for most users. It also allows for an easy update of the installation by 10 | 11 | :code:`conda update -c conda-forge tobac` :code:`mamba update -c conda-forge tobac` 12 | 13 | 14 | You can also install conda via pip, which is mainly interesting for development purposes or using specific development branches for the Github repository. 15 | 16 | The following python packages are required (including dependencies of these packages): 17 | 18 | *numpy*, *scipy*, *scikit-image*, *pandas*, *pytables*, *matplotlib*, *iris*, *xarray*, *cartopy*, *trackpy* 19 | 20 | If you are using anaconda, the following command should make sure all dependencies are met and up to date: 21 | 22 | .. code-block:: console 23 | 24 | conda install -c conda-forge -y numpy scipy scikit-image pandas pytables matplotlib iris xarray cartopy trackpy 25 | 26 | You can directly install the package directly from github with pip and either of the two following commands: 27 | 28 | ``pip install --upgrade git+ssh://git@github.com/tobac-project/tobac.git`` 29 | 30 | ``pip install --upgrade git+https://github.com/tobac-project/tobac.git`` 31 | 32 | You can also clone the package with any of the two following commands: 33 | 34 | ``git clone git@github.com:tobac-project/tobac.git`` 35 | 36 | ``git clone https://github.com/tobac-project/tobac.git`` 37 | 38 | and install the package from the locally cloned version (The trailing slash is actually necessary): 39 | 40 | ``pip install --upgrade tobac/`` -------------------------------------------------------------------------------- /doc/linking.rst: -------------------------------------------------------------------------------- 1 | Linking 2 | ------- 3 | Currently implemented options for linking detected features into tracks: 4 | 5 | **Trackpy:** 6 | 7 | This method uses the trackpy library (http://soft-matter.github.io/trackpy). 8 | This approach only takes the point-like position of the feature, e.g. determined as the weighted mean, into account. Features to link with are looked for in a search radius defined by the parameters v_max or d_max. The position of the center of this search radius is determined by the method keyword. method="random" uses the position of the current feature (:math:`t_i`), while method="predict" makes use of the information from the linked feature in the previous timestep (:math:`t_{i-1}`) to predict the next position. For a simple case the search radii of the two methods look like this: 9 | 10 | .. image:: images/linking_prediction.png 11 | :width: 500 px 12 | 13 | If there is only one feature in the search radius, the linking can happen immediately. If there are none, the track ends at this timestep. If there are more options, trackpy performs a decision process. Assume there are :math:`N` features in the current and also :math:`N` in the next timeframe and they are all within each search radius. This means there are :math:`N!` options for linking. Each of these options means that :math:`N` distances between the center of the search radius of a current feature and a feature from the next time frame :math:`\delta_n, n=1, 2, ..., N` are traveled by the features. Trackpy will calculate the sum of the squared distances 14 | 15 | .. math:: 16 | 17 | \sum_{n=1}^{N} \delta_n^2. 18 | 19 | For every option the lowest value of this sum is used for linking. As an example, consider these two crossing features: 20 | 21 | .. image:: images/cross.png 22 | :width: 500 px 23 | 24 | If the search radius is chosen large enough, each will contain two options, a horizontal and a diagonal feature: 25 | 26 | .. image:: images/search.png 27 | :width: 500 px 28 | 29 | The linking will look differently for both methods in the end. The horizontal features are closer to the current position than the diagonal ones. This is why these are linked by the "random"-method. On the other hand, the diagonal features lie excatly on the guessed positions for the "predict"-method. This means, that the sum of squared distances is 0 and they are the optimal decision for linking in this case: 30 | 31 | .. image:: images/decision.png 32 | :width: 500 px 33 | -------------------------------------------------------------------------------- /doc/mentoring.rst: -------------------------------------------------------------------------------- 1 | Mentoring and Collaboration 2 | ---------------------------- 3 | 4 | ============================ 5 | Writing code collaboratively 6 | ============================ 7 | 8 | We firmly believe that code can only get better if more than two eyes and one brain work on it. Therefore, we aim to write code collaboratively, in particular, when comprehensive refactoring or enhancements of the code are done. In practice, this can be done by creating a **draft pull request**. This makes it really easy to iteratively improve a pull request with the feedback from others until the pull request is ready for review. 9 | 10 | 11 | When you work on a comprehensive feature with multiple developers, it is recommended to create a draft pull request on the :code:`dev_*` branch. As explained in :doc:`our branching strategy `, this branch does not undergo any protection rules. It is meant to experiment with new code and all collaborators of the `tobac-project organization `_ can directly push to this branch. Creating a draft pull request has the advantage of facilitating the communication with other developers who contribute to the same new feature. You can directly see which changes they make, comment these and discuss ways to go forward. 12 | 13 | ============== 14 | Get a mentor 15 | ============== 16 | 17 | **Is this your first time contributing to an open-source project?** 18 | 19 | Reach out to the **tobac** developer group and get a mentor! One of our developers will help you getting started and explain how our workflow works. You are, of course, always free to post any questions to GitHub discussions, our Slack channel, or write an email. But sometimes it can also be nice to have a specific person to refer to when things seem overwhelming in the beginning. 20 | 21 | =============== 22 | Pair reviews 23 | =============== 24 | 25 | Another great way of collaboration are pair reviews which means that you are reviewing code together with another developer. You can, for example, reach out to us when you have submitted a pull request and would like to talk through the review points with one of the reviewers in order to collaboratively come up with creative solutions to remaining issues. If you are a reviewer, you can offer a pair review to the person who created the pull request and help them addressing certain review points. 26 | -------------------------------------------------------------------------------- /doc/merge_split.rst: -------------------------------------------------------------------------------- 1 | Merge and Split 2 | ====================== 3 | 4 | This submodule is a post processing step to address tracked cells which merge/split. 5 | The first iteration of this module is to combine the cells which are merging but have received a new cell id (and are considered a new cell) once merged. 6 | This module uses a minimum euclidian spanning tree to combine merging cells, thus the postfix for the function is MEST. 7 | This submodule will label merged/split cells with a TRACK number in addition to its CELL number. 8 | 9 | Features, cells, and tracks are combined using parent/child nomenclature. 10 | (quick note on terms; “feature” is a detected object at a single time step (see :doc:`feature_detection_overview`). “cell” is a series of features linked together over multiple timesteps (see :doc:`linking`). "track" may be an individual cell or series of cells which have merged and/or split.) 11 | 12 | Overview of the output dataframe from merge_split 13 | 14 | d : `xarray.core.dataset.Dataset` 15 | 16 | xarray dataset of tobac merge/split cells with parent and child designations. 17 | 18 | Parent/child variables include: 19 | 20 | * cell_parent_track_id: The associated track id for each cell. All cells that have merged or split will have the same parent track id. If a cell never merges/splits, only one cell will have a particular track id. 21 | 22 | * feature_parent_cell_id: The associated parent cell id for each feature. All feature in a given cell will have the same cell id. 23 | 24 | * feature_parent_track_id: The associated parent track id for each feature. This is not the same as the cell id number. 25 | 26 | * track_child_cell_count: The total number of features belonging to all child cells of a given track id. 27 | 28 | * cell_child_feature_count: The total number of features for each cell. 29 | 30 | 31 | Example usage: 32 | 33 | ``d = merge_split_MEST(Track)`` 34 | 35 | merge_split outputs an `xarray` dataset with several variables. The variables, (with column names listed in the `Variable Name` column), are described below with units. Coordinates and dataset dimensions are Feature, Cell, and Track. 36 | 37 | Variables that are common to all feature detection files: 38 | 39 | .. csv-table:: tobac Merge_Split Track Output Variables 40 | :file: ./merge_split_out_vars.csv 41 | :widths: 3, 35, 3, 3 42 | :header-rows: 1 43 | 44 | -------------------------------------------------------------------------------- /doc/merge_split_out_vars.csv: -------------------------------------------------------------------------------- 1 | Variable Name,Description,Units,Type 2 | feature,Unique number of the feature; starts from 1 and increments by 1 to the number of features identified in all frames,n/a,int64 3 | cell,Tracked cell number; generally starts from 1. Untracked cell value is -1.,n/a,int64 4 | track,Unique number of the track; starts from 0 and increments by 1 to the number of tracks identified. Untracked cells and features have a track id of -1.,n/a,int64 5 | cell_parent_track_id,"The associated track id for each cell. All cells that have merged or split will have the same parent track id. If a cell never merges/splits, only one cell will have a particular track id.",n/a,int64 6 | feature_parent_cell_id,The associated parent cell id for each feature. All feature in a given cell will have the same cell id.,n/a,int64 7 | feature_parent_track_id,The associated parent track id for each feature. This is not the same as the cell id number.,n/a,int64 8 | track_child_cell_count,The number of features belonging to all child cells of a given track id.,n/a,int64 9 | cell_child_feature_count,The number of features for each cell.,n/a,int64 -------------------------------------------------------------------------------- /doc/plotting.rst: -------------------------------------------------------------------------------- 1 | Plotting 2 | ======== 3 | tobac provides functions to conveniently visualise the tracking results and analyses. -------------------------------------------------------------------------------- /doc/publications.rst: -------------------------------------------------------------------------------- 1 | .. _Refereed Publications: 2 | 3 | Refereed Publications 4 | ===================== 5 | 6 | **List of peer-reviewed publications in which tobac has been used:** 7 | 8 | ------------ 9 | 10 | .. list-table:: 11 | :widths: 30 12 | :class: wy-table-responsive 13 | 14 | * - Sokolowsky, G. A., Freeman, S. W., Jones, W. K., Kukulies, J., Senf, F., Marinescu, P. J., Heikenfeld, M., Brunner, K. N., Bruning, E. C., Collis, S. M., Jackson, R. C., Leung, G. R., Pfeifer, N., Raut, B. A., Saleeby, S. M., Stier, P., and van den Heever, S. C.: tobac v1.5 (2024). Introducing Fast 3D Tracking, Splits and Mergers, and Other Enhancements for Identifying and Analysing Meteorological Phenomena. Geoscientific Model Development, 17(13), 5309-5330. https://doi.org/10.5194/gmd-17-5309-2024. 15 | 16 | * - Heikenfeld, M., Marinescu, P. J., Christensen, M., Watson-Parris, D., Senf, F., van den Heever, S. C., and Stier, P. (2019). tobac 1.2: towards a flexible framework for tracking and analysis of clouds in diverse datasets, Geosci. Model Dev., 12, 4551–4570, https://doi.org/10.5194/gmd-12-4551-2019 17 | 18 | * - Bukowski, J., & van den Heever, S. C. (2021). Direct radiative effects in haboobs. *Journal of Geophysical Research: Atmospheres*, 126(21), e2021JD034814, doi:10.1029/2021JD034814. 19 | 20 | * - Bukowski, J. (2021). Mineral Dust Lofting and Interactions with Cold Pools (Doctoral dissertation, Colorado State University). 21 | 22 | * - Heikenfeld, M. (2019). Aerosol effects on microphysical processes and deep convective clouds (Doctoral dissertation, University of Oxford). 23 | * - Kukulies, J., Chen, D., & Curio, J. (2021). The role of mesoscale convective systems in precipitation in the Tibetan Plateau region. *Journal of Geophysical Research: Atmospheres*, 126(23), e2021JD035279. doi:10.1029/2021JD035279. 24 | 25 | * - Kukulies, J., Lai, H. W., Curio, J., Feng, Z., Lin, C., Li, P., Ou, T., Sugimoto, S. & Chen, D. (2023). Mesoscale convective systems in the Third pole region: Characteristics, mechanisms and impact on precipitation. *Frontiers in Earth Science*, 11, 1143380. 26 | 27 | * - Li, Y., Liu, Y., Chen, Y., Chen, B., Zhang, X., Wang, W. & Huo, Z. (2021). Characteristics of Deep Convective Systems and Initiation during Warm Seasons over China and Its Vicinity. *Remote Sensing*, 13(21), 4289. doi:10.3390/rs13214289. 28 | 29 | * - Leung, G. R., Saleeby, S. M., Sokolowsky, G. A., Freeman, S. W., & van den Heever, S. C. (2023). Aerosol–cloud impacts on aerosol detrainment and rainout in shallow maritime tropical clouds. *Atmospheric Chemistry and Physics*, 23(9), 5263-5278. 30 | 31 | * - Marinescu, P. J., Van Den Heever, S. C., Heikenfeld, M., Barrett, A. I., Barthlott, C., Hoose, C., Fan, J., Fridlind, A. M., Matsui, T., Miltenberger, A. K., Stier, P., Vie, B., White, B. A., & Zhang, Y. (2021). Impacts of varying concentrations of cloud condensation nuclei on deep convective cloud updrafts—a multimodel assessment. *Journal of the Atmospheric Sciences*, 78(4), 1147-1172, doi: 10.1175/JAS-D-20-0200.1. 32 | 33 | * - Marinescu, P. J. (2020). Observations of Aerosol Particles and Deep Convective Updrafts and the Modeling of Their Interactions (Doctoral dissertation, Colorado State University). 34 | 35 | * - Oue, M., Saleeby, S. M., Marinescu, P. J., Kollias, P., & van den Heever, S. C. (2022). Optimizing radar scan strategies for tracking isolated deep convection using observing system simulation experiments. *Atmospheric Measurement Techniques*, 15(16), 4931-4950. 36 | 37 | * - Raut, B. A., Jackson, R., Picel, M., Collis, S. M., Bergemann, M., & Jakob, C. (2021). An Adaptive Tracking Algorithm for Convection in Simulated and Remote Sensing Data. *Journal of Applied Meteorology and Climatology*, 60(4), 513-526, doi:10.1175/JAMC-D-20-0119.1. 38 | 39 | * - Whitaker, J. W. (2021). An Investigation of an East Pacific Easterly Wave Genesis Pathway and the Impact of the Papagayo and Tehuantepec Wind Jets on the East Pacific Mean State and Easterly Waves (Doctoral dissertation, Colorado State University). 40 | 41 | * - Zhang, X., Yin, Y., Kukulies, J., Li, Y., Kuang, X., He, C., .. & Chen, J. (2021). Revisiting Lightning Activity and Parameterization Using Geostationary Satellite Observations. *Remote Sensing*, 13(19), 3866, doi: 10.3390/rs13193866. 42 | 43 | 44 | **Have you used tobac in your research?** 45 | 46 | Please contact us (e.g. by joining our `tobac google group `_) or submit a pull request containing your reference in our `main repo on GitHub `_! 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /doc/requirements.txt: -------------------------------------------------------------------------------- 1 | ipykernel 2 | nbsphinx 3 | numpy 4 | sphinx_rtd_theme 5 | sphinx-gallery 6 | sphinx-toolbox 7 | -------------------------------------------------------------------------------- /doc/segmentation.rst: -------------------------------------------------------------------------------- 1 | Segmentation 2 | ---------------- 3 | The segmentation step aims at associating cloud areas (2D data) or cloud volumes (3D data) with the identified and tracked features. 4 | 5 | **Currently implemented methods:** 6 | 7 | **Watershedding in 2D:** 8 | Markers are set at the position of the individual feature positions identified in the detection step. Then watershedding with a fixed threshold is used to determine the area around each feature above/below that threshold value. This results in a mask with the feature id at all pixels identified as part of the clouds and zeros in all cloud free areas. 9 | 10 | **Watershedding in 3D:** 11 | Markers are set in the entire column above the individual feature positions identified in the detection step. Then watershedding with a fixed threshold is used to determine the volume around each feature above/below that threshold value. This results in a mask with the feature id at all voxels identified as part of the clouds and zeros in all cloud free areas. 12 | 13 | -------------------------------------------------------------------------------- /doc/segmentation_out_vars.csv: -------------------------------------------------------------------------------- 1 | Variable Name,Description,Units,Type 2 | ncells,Total number of grid points that belong to the segmented area associated with feature. ,n/a,int64 3 | -------------------------------------------------------------------------------- /doc/segmentation_out_vars_statistics.csv: -------------------------------------------------------------------------------- 1 | Variable Name,Description,Units,Type 2 | feature_mean,Mean of feature data points ,same as input field,float 3 | feature_max,Maximum value of feature data points ,same as input field,float 4 | feature_min,Minimum value of feature data points ,same as input field,float 5 | feature_sum,Sum of feature data points ,same as input field,float 6 | major_axis_length,The length of the major axis of the ellipse that has the same normalized second central moments as the feature area,"number of grid cells, multiply by dx to get distance unit",float 7 | feature_percentiles,Percentiles from 0 to 100 (with increment 1) of feature data distribution ,same as input field,ndarray 8 | -------------------------------------------------------------------------------- /doc/segmentation_output.rst: -------------------------------------------------------------------------------- 1 | Segmentation Output 2 | ------------------------- 3 | 4 | Segmentation outputs a mask (`iris.cube.Cube` and in the future `xarray.DataArray`) with the same dimensions as the input field, where each segmented area has the same ID as its corresponding feature (see `feature` column in :doc:`feature_detection_output`). Note that there are some cases in which a feature is not attributed to a segmented area associated with it (see :doc:`features_without_segmented_area`). 5 | 6 | Segmentation also outputs the same `pandas` dataframe as obtained by Feature Detection (see :doc:`feature_detection_overview`) but with one additional column: 7 | 8 | .. csv-table:: tobac Segmentation Output Variables 9 | :file: ./segmentation_out_vars.csv 10 | :widths: 3, 35, 3, 3 11 | :header-rows: 1 12 | 13 | One can optionally get the bulk statistics of the data points belonging to each segmented feature (i.e. either the 2D area or the 3D volume assigned to the feature). This is done using the `statistics` parameter when calling :ufunc:`tobac.segmentation.segmentation` . The user-defined metrics are then added as columns to the output dataframe, for example: 14 | 15 | .. csv-table:: tobac Segmentation Output Variables 16 | :file: ./segmentation_out_vars_statistics.csv 17 | :widths: 3, 35, 3, 3 18 | :header-rows: 1 19 | 20 | Note that these statistics refer to the data fields that are used as input for the segmentation. It is possible to run the segmentation with different input (see :doc:`transform segmentation`) data to get statistics of a feature based on different variables (e.g. get statistics of cloud top temperatures as well as rain rates for a certain storm object). 21 | -------------------------------------------------------------------------------- /doc/segmentation_parameters.rst: -------------------------------------------------------------------------------- 1 | Watershedding Segmentation Parameters 2 | ------------------------------------- 3 | 4 | Appropriate parameters must be chosen to properly use the watershedding segmentation module in *tobac*. This page gives a brief overview of parameters available in watershedding segmentation. 5 | 6 | A full list of parameters and descriptions can be found in the API Reference: :py:meth:`tobac.segmentation.segmentation`. 7 | 8 | ========================= 9 | Basic Operating Procedure 10 | ========================= 11 | The *tobac* watershedding segmentation algorithm selects regions of the data :code:`field` with values greater than :code:`threshold` and associates those regions with the features :code:`features` detected by feature detection (see :doc:`feature_detection_overview`). This algorithm uses a *watershedding* approach, which sets the individual features as initial seed points, and then has identified regions grow from those original seed points. For further information on watershedding segmentation, see `the scikit-image documentation `_. 12 | 13 | Note that you can run the watershedding segmentation algorithm on any variable that shares a grid with the variable detected in the feature detection step. It is not required that the variable used in feature detection be the same as the one in segmentation (e.g., you can detect updraft features and then run segmentation on total condensate). 14 | 15 | Segmentation can be run on 2D or 3D input data and with 2D or 3D feature detection output, but segmentation on 3D data using a 2D feature detection field requires careful consideration of where the vertical seeding will occur (see `Level`_). 16 | 17 | .. _Target: 18 | 19 | ====== 20 | Target 21 | ====== 22 | The :code:`target` parameter works similarly to how it works in feature detection (see :doc:`threshold_detection_parameters`). To segment areas that are greater than :code:`threshold`, use :code:`target='maximum'`. To segment areas that are less than :code:`threshold`, use :code:`target='minimum'`. 23 | 24 | .. _Threshold: 25 | 26 | ========= 27 | Threshold 28 | ========= 29 | Unlike in multiple threshold detection in Feature Detection, Watershedding Segmentation only accepts one threshold. This value will set either the minimum (for :code:`target='maximum'`) or maximum (for :code:`target='minimum'`) value to be segmented. Note that the segmentation is not inclusive of the threshold value, meaning that only values greater than (for :code:`target='maximum'`) or smaller than (for :code:`target='minimum'`) the threshold are included in the segmented region. 30 | 31 | 32 | .. _Level: 33 | 34 | =================================================== 35 | Projecting 2D Spatial Features into 3D Segmentation 36 | =================================================== 37 | When running feature detection on a 2D dataset and then using these detected features to segment data in 3D, there is clearly no information on where to put the seeds in the vertical. This is currently controlled by the :code:`level` parameter. By default, this parameter is :code:`None`, which seeds the full column at every 2D detected feature point. As *tobac* does not run a continuity check, this can result in undesired behavior, such as clouds in multiple layers being detected as one large object. 38 | 39 | :code:`level` can also be set to a `slice `_, which determines where in the vertical dimension (see `Vertical Coordinate`_) the features are seeded from. Note that :code:`level` operates in *array* coordinates rather than physical coordinates. 40 | 41 | 42 | .. _seg_2d_feature_3d: 43 | 44 | =================================================== 45 | Projecting 3D Spatial Features into 2D Segmentation 46 | =================================================== 47 | When running feature detection on a 3D dataset and then using these detected features to segment data in 2D, the vertical coordinate is ignored. In case of vertically overlapping features, the larger :code:`Feature` value is currently seeded. 48 | 49 | 50 | .. _seg_3d_feature_3d: 51 | 52 | =================================================== 53 | Projecting 3D Spatial Features into 3D Segmentation 54 | =================================================== 55 | When running feature detection on a 3D dataset and then using these detected features to segment data in 3D, there are currently two options for determining how to seed the watershedding algorithm: *column* seeding (set by :code:`seed_3D_flag='column'`) and *box* seeding (set by :code:`seed_3D_flag='box'`). We generally recommend *box* seeding when running feature detection and segmentation in 3D. 56 | 57 | **Column** seeding (:code:`seed_3D_flag='column'`) works by setting seed markers throughout some or all of the vertical column at all detected feature centroids (i.e., one column per feature detected). While the default behavior is to seed throughout the full vertical column, the vertical extent of the seeds can be set by passing a `slice `_ into the :code:`level` parameter. Note that :code:`level` operates in *array* coordinates rather than physical coordinates. 58 | 59 | **Box** seeding (:code:`seed_3D_flag='box'`) sets a cube or rectangular seed markers around the detected feature in 3D space. The marker size is user defined (in array coordinates) by :code:`seed_3D_size` as either an integer (for a cube) or a tuple of :code:`(int, int, int)`, ordered :code:`(vertical, hdim_1, hdim_2)`. Note that :code:`seed_3D_size` must be an odd number to avoid the box becoming biased to one side. If two seed boxes overlap, the seeded area is marked with the closest feature centroid. 60 | 61 | .. figure:: images/box_vs_column_seeding.png 62 | :scale: 50 % 63 | :alt: an example 3D plot showing column seeding linking features that should not be linked 64 | 65 | An example plot from a numerical model simulation showing the real-world difference between column and box seeding with a tilted feature. As this image demonstrates, box seeding is typically recommended for 3D data. 66 | 67 | 68 | .. _Max Distance: 69 | 70 | ================ 71 | Maximum Distance 72 | ================ 73 | *tobac*'s watershedding segmentation allows you to set a maximum distance away from the feature to classify as a segmented region belonging to that figure. :code:`max_distance` sets this distance in meters away from the detected feature to allow it to be considered part of the point. To turn this feature off, set :code:`max_distance=None`. 74 | -------------------------------------------------------------------------------- /doc/testing_sphinx-based_rendering.rst: -------------------------------------------------------------------------------- 1 | How to check the Sphinx-based rendering 2 | --------------------------------------- 3 | 4 | 5 | The workflow has been tested in a linux system. We aim to build a static 6 | website out of the documentation material present in ``tobac``. 7 | 8 | ================================== 9 | 1. Preparing the Local Environment 10 | ================================== 11 | 12 | - **choose a separate place for your testing** 13 | 14 | I will use the temporary directory ``/tmp/website-testing`` which I 15 | need to create. You can use a dedicated place of your choice … 16 | 17 | .. code:: bash 18 | 19 | > mkdir /tmp/website-testing 20 | > cd /tmp/website-testing 21 | 22 | I will indicate my position now with the ``/tmp/website-testing>`` 23 | prompt. 24 | 25 | - **get the official repository** 26 | 27 | .. code:: bash 28 | 29 | /tmp/website-testing> git clone https://github.com/tobac-project/tobac 30 | 31 | You might like to test a certain remote branch ```` then do: 32 | 33 | .. code:: bash 34 | 35 | /tmp/website-testing/tobac> git fetch --all 36 | /tmp/website-testing/tobac> git checkout -t origin/ 37 | 38 | - **Python environment** 39 | 40 | - create a python virtual env 41 | 42 | .. code:: bash 43 | 44 | /tmp/website-testing> python -m venv .python3-venv 45 | 46 | 47 | - and install requirements 48 | 49 | .. code:: bash 50 | 51 | # deactivation conda is only necessary if your loaded conda before … 52 | /tmp/website-testing> conda deactivate 53 | 54 | # activate the new env and upgrade ``pip`` 55 | /tmp/website-testing> source .python3-venv/bin/activate 56 | /tmp/website-testing> pip install –upgrade pip 57 | 58 | # now everything is installed into the local python env! 59 | /tmp/website-testing> pip install -r tobac/doc/requirements.txt 60 | 61 | # and also install RTD scheme 62 | /tmp/website-testing> pip install sphinx_rtd_theme 63 | 64 | `pip`-based installation takes a bit of time, but is much faster than `conda`. 65 | 66 | 67 | If the installation runs without problems, you are ready to build the website. 68 | 69 | 70 | ================================== 71 | 1. Building the Website 72 | ================================== 73 | 74 | Actually, only few steps are needed to build the website, i.e. 75 | 76 | - **running sphinx for rendering** 77 | 78 | .. code:: bash 79 | 80 | /tmp/website-testing> cd tobac 81 | 82 | /tmp/website-testing/tobac> sphinx-build -b html doc doc/_build/html 83 | 84 | If no severe error appeared 85 | 86 | - **view the HTML content** 87 | 88 | .. code:: bash 89 | 90 | /tmp/website-testing/tobac> firefox doc/_build/html/index.html 91 | 92 | ================================== 93 | 3. Parsing Your Local Changes 94 | ================================== 95 | 96 | Now, we connect to your locally hosted ``tobac`` repository and your 97 | development branch. 98 | 99 | - **connect to your local repo**: Assume your repo is located at 100 | ``/tmp/tobac-testing/tobac``, then add a new remote alias and fetch 101 | all content with 102 | 103 | .. code:: bash 104 | 105 | /tmp/website-testing/tobac> git remote add local-repo /tmp/tobac-testing/tobac 106 | /tmp/website-testing/tobac> git fetch --all 107 | 108 | - **check your development branch out**: Now, assume the your 109 | development branch is called ``my-devel``, then do 110 | 111 | .. code:: bash 112 | 113 | # to get a first overview on available branches 114 | /tmp/website-testing/tobac> git branch --all 115 | 116 | # and then actually get your development branch 117 | /tmp/website-testing/tobac> git checkout -b my-devel local-repo/my-devel 118 | 119 | You should see your developments, now … 120 | 121 | - **build and view website again** 122 | 123 | .. code:: bash 124 | 125 | /tmp/website-testing/tobac> sphinx-build -M clean doc doc/_build 126 | /tmp/website-testing/tobac> sphinx-build -b html doc doc/_build/html 127 | /tmp/website-testing/tobac> firefox _build/html/index.html 128 | 129 | 130 | ========================================== 131 | Option: Check Rendering of a Pull requests 132 | ========================================== 133 | 134 | - **check the pull request out**: Now, assume the PR has the ID ```` and you define the branch name ``BRANCH_NAME`` as you like 135 | 136 | .. code:: bash 137 | 138 | # to get PR shown as dedicated branch 139 | /tmp/website-testing/tobac> git fetch upstream pull/ID/head:BRANCH_NAME 140 | 141 | # and then actually get this PR as branch 142 | /tmp/website-testing/tobac> git checkout BRANCH_NAME 143 | 144 | You should see the PR now ... 145 | 146 | - **build and view website again** 147 | 148 | .. code:: bash 149 | 150 | /tmp/website-testing/tobac> sphinx-build -M clean doc doc/_build 151 | /tmp/website-testing/tobac> sphinx-build -b html doc doc/_build/html 152 | /tmp/website-testing/tobac> firefox _build/html/index.html 153 | 154 | 155 | -------------------------------------------------------------------------------- /doc/threshold_detection_parameters.rst: -------------------------------------------------------------------------------- 1 | Threshold Feature Detection Parameters 2 | -------------------------------------- 3 | 4 | The proper selection of parameters used to detect features with the *tobac* multiple threshold feature detection is a critical first step in using *tobac*. This page describes the various parameters available and provides broad comments on the usage of each parameter. 5 | 6 | A full list of parameters and descriptions can be found in the API Reference: :py:meth:`tobac.feature_detection.feature_detection_multithreshold` 7 | 8 | ========================= 9 | Basic Operating Procedure 10 | ========================= 11 | The *tobac* multiple threshold algorithm searches the input data (`field_in`) for contiguous regions of data greater than (with `target='maximum'`, see `Target`_) or less than (with `target='minimum'`) the selected thresholds (see `Thresholds`_). Contiguous regions (see `Minimum Threshold Number`_) are then identified as individual features, with a single point representing their location in the output (see `Position Threshold`_). Using this output (see :doc:`feature_detection_output`), segmentation (:doc:`segmentation`) and tracking (:doc:`linking`) can be run. 12 | 13 | .. _Target: 14 | 15 | ====== 16 | Target 17 | ====== 18 | First, you must determine whether you want to detect features on maxima or minima in your dataset. For example, if you are trying to detect clouds in IR satellite data, where clouds have relatively lower brightness temperatures than the background, you would set :code:`target='minimum'`. If, instead, you are trying to detect clouds by cloud water in model data, where an increase in mixing ratio indicates the presence of a cloud, you would set :code:`target='maximum'`. The :code:`target` parameter will determine the selection of many of the following parameters. 19 | 20 | .. _Thresholds: 21 | 22 | ========== 23 | Thresholds 24 | ========== 25 | You can select to detect features on either one or multiple thresholds. The first threshold (or the single threshold) sets the minimum magnitude (either lowest value for :code:`target='maximum'` or highest value for :code:`target='minimum'`) that a feature can be detected on. For example, if you have a field made up of values lower than :code:`10`, and you set :code:`target='maximum', threshold=[10,]`, *tobac* will detect no features. The feature detection uses the threshold value in an inclusive way, which means that if you set :code:`target='maximum', threshold=[10,]` and your field does not only contain values lower than :code:`10`, it will include all values **greater than and equal to** :code:`10`. 26 | 27 | Including *multiple* thresholds will allow *tobac* to refine the detection of features and detect multiple features that are connected through a contiguous region of less restrictive threshold values. You can see a conceptual diagram of that here: :doc:`feature_detection_overview`. To examine how setting different thresholds can change the number of features detected, see the example in this notebook: :doc:`feature_detection/notebooks/multiple_thresholds_example`. 28 | 29 | 30 | .. _Minimum Threshold Number: 31 | 32 | ======================== 33 | Minimum Threshold Number 34 | ======================== 35 | The minimum number of points per threshold, set by :code:`n_min_threshold`, determines how many contiguous pixels are required to be above the threshold for the feature to be detected. Setting this point very low can allow extraneous points to be detected as erroneous features, while setting this value too high will cause some real features to be missed. The default value for this parameter is :code:`0`, which will cause any values greater than the threshold after filtering to be identified as a feature. You can see a demonstration of the affect of increasing :code:`n_min_threshold` at: :doc:`feature_detection/notebooks/n_min_threshold_example`. 36 | 37 | .. _Position Threshold: 38 | 39 | ================ 40 | Feature Position 41 | ================ 42 | There are four ways of calculating the single point used to represent feature center: arithmetic center, extreme point, difference weighting, and absolute weighting. Generally, difference weighting (:code:`position_threshold='weighted_diff'`) or absolute weighting (:code:`position_threshold='weighted_abs'`) is suggested for most atmospheric applications. An example of these four methods is shown below, and can be further explored in the example notebook: :doc:`feature_detection/notebooks/position_threshold_example`. 43 | 44 | .. image:: images/position_thresholds.png 45 | :width: 500 px 46 | 47 | .. _Filtering Options: 48 | 49 | ================= 50 | Filtering Options 51 | ================= 52 | Before *tobac* detects features, two filtering options can optionally be employed. First is a multidimensional Gaussian Filter (`scipy.ndimage.gaussian_filter `_), with its standard deviation controlled by the :code:`sigma_threshold` parameter. It is not required that users use this filter (to turn it off, set :code:`sigma_threshold=0`), but the use of the filter is recommended for most atmospheric datasets that are not otherwise smoothed. An example of varying the :code:`sigma_threshold` parameter can be seen in the below figure, and can be explored in the example notebook: :doc:`feature_detection/notebooks/feature_detection_filtering`. 53 | 54 | .. image:: images/sigma_threshold_example.png 55 | :width: 500 px 56 | 57 | The second filtering option is a binary erosion (`skimage.morphology.binary_erosion `_), which reduces the size of features in all directions. The amount of the erosion is controlled by the :code:`n_erosion_threshold` parameter, with larger values resulting in smaller potential features. It is not required to use this feature (to turn it off, set :code:`n_erosion_threshold=0`), and its use should be considered alongside careful selection of :code:`n_min_threshold`. The default value is :code:`n_erosion_threshold=0`. 58 | 59 | .. _Minimum Distance: 60 | 61 | ================ 62 | Minimum Distance 63 | ================ 64 | The parameter :code:`min_distance` sets the minimum distance between two detected features. If two detected features are within :code:`min_distance` of each other, the feature with the more extreme value is kept, and the feature with the less extreme value is discarded. 65 | -------------------------------------------------------------------------------- /doc/tobac.rst: -------------------------------------------------------------------------------- 1 | tobac package 2 | ============= 3 | 4 | Submodules 5 | ---------- 6 | 7 | tobac.analysis module 8 | --------------------- 9 | 10 | tobac.analysis.cell_analysis module 11 | --------------------- 12 | 13 | .. automodule:: tobac.analysis.cell_analysis 14 | :members: 15 | :undoc-members: 16 | :show-inheritance: 17 | 18 | tobac.analysis.feature_analysis module 19 | --------------------- 20 | 21 | .. automodule:: tobac.analysis.feature_analysis 22 | :members: 23 | :undoc-members: 24 | :show-inheritance: 25 | 26 | tobac.analysis.spatial module 27 | --------------------- 28 | 29 | .. automodule:: tobac.analysis.spatial 30 | :members: 31 | :undoc-members: 32 | :show-inheritance: 33 | 34 | tobac.centerofgravity module 35 | ---------------------------- 36 | 37 | .. automodule:: tobac.centerofgravity 38 | :members: 39 | :undoc-members: 40 | :show-inheritance: 41 | 42 | tobac.feature\_detection module 43 | ------------------------------- 44 | 45 | .. automodule:: tobac.feature_detection 46 | :members: 47 | :undoc-members: 48 | :show-inheritance: 49 | 50 | tobac.merge_split module 51 | --------------------- 52 | 53 | .. automodule:: tobac.merge_split 54 | :members: 55 | :undoc-members: 56 | :show-inheritance: 57 | 58 | tobac.plotting module 59 | --------------------- 60 | 61 | .. automodule:: tobac.plotting 62 | :members: 63 | :undoc-members: 64 | :show-inheritance: 65 | 66 | tobac.segmentation module 67 | ------------------------- 68 | 69 | .. automodule:: tobac.segmentation.watershed_segmentation 70 | :members: 71 | :undoc-members: 72 | :show-inheritance: 73 | 74 | tobac.testing module 75 | -------------------- 76 | 77 | .. automodule:: tobac.testing 78 | :members: 79 | :undoc-members: 80 | :show-inheritance: 81 | 82 | tobac.tracking module 83 | --------------------- 84 | 85 | .. automodule:: tobac.tracking 86 | :members: 87 | :undoc-members: 88 | :show-inheritance: 89 | 90 | tobac.utils modules 91 | ------------------ 92 | 93 | tobac.utils.bulk_statistics module 94 | ------------------ 95 | 96 | .. automodule:: tobac.utils.bulk_statistics 97 | :members: 98 | :undoc-members: 99 | :show-inheritance: 100 | 101 | tobac.utils.decorators module 102 | ------------------ 103 | 104 | .. automodule:: tobac.utils.decorators 105 | :members: 106 | :undoc-members: 107 | :show-inheritance: 108 | 109 | tobac.utils.general module 110 | ------------------ 111 | 112 | .. automodule:: tobac.utils.general 113 | :members: 114 | :undoc-members: 115 | :show-inheritance: 116 | 117 | tobac.utils.mask module 118 | ------------------ 119 | 120 | .. automodule:: tobac.utils.mask 121 | :members: 122 | :undoc-members: 123 | :show-inheritance: 124 | 125 | tobac.utils.periodic_boundaries module 126 | ------------------ 127 | 128 | .. automodule:: tobac.utils.periodic_boundaries 129 | :members: 130 | :undoc-members: 131 | :show-inheritance: 132 | 133 | 134 | tobac.wrapper module 135 | -------------------- 136 | 137 | .. automodule:: tobac.wrapper 138 | :members: 139 | :undoc-members: 140 | :show-inheritance: 141 | 142 | Module contents 143 | --------------- 144 | 145 | .. automodule:: tobac 146 | :members: 147 | :undoc-members: 148 | :show-inheritance: 149 | -------------------------------------------------------------------------------- /doc/tracking_base_out_vars.csv: -------------------------------------------------------------------------------- 1 | Variable Name,Description,Units,Type 2 | cell,Tracked cell number; generally starts from 1. Untracked cell value can be set; but by default is -1.,n/a,int 3 | time_cell,Time since cell was first detected.,minutes,object/python timedelta 4 | -------------------------------------------------------------------------------- /doc/tracking_output.rst: -------------------------------------------------------------------------------- 1 | Tracking Output 2 | ------------------------- 3 | 4 | Tracking outputs a `pandas` dataframe with variables in addition to the variables output by Feature Detection (see :doc:`feature_detection_output`). While this is a separate dataframe than the one output by Feature Detection, it is identical except for the addition of the columns listed below. The additional variables added by tracking, with column names listed in the `Variable Name` column, are described below with units. 5 | 6 | Variables that are common to all tracking files: 7 | 8 | .. csv-table:: tobac Tracking Output Variables 9 | :file: ./tracking_base_out_vars.csv 10 | :widths: 3, 35, 3, 3 11 | :header-rows: 1 12 | -------------------------------------------------------------------------------- /doc/transform_segmentation.rst: -------------------------------------------------------------------------------- 1 | Track on one dataset, segment on another 2 | ---------------------------------------- 3 | 4 | *tobac* also has the capability to combine datasets through :doc:`segmentation`, which includes the ability to track on one dataset (e.g., gridded radar data) and run segmentation on a different dataset *on a different grid* (e.g., satellite data). 5 | 6 | .. image:: images/sat_radar_combined.png 7 | :width: 500 px 8 | 9 | To do this, users should first run :doc:`feature_detection_overview` with a dataset that contains latitude and longitude coordinates, such that they appear in the output dataframe from Feature Detection. Next, use :func:`tobac.utils.transform_feature_points` to transform the feature dataframe into the new coordinate system. Finally, use the output from :func:`tobac.utils.transform_feature_points` to run segmentation with the new data. This can be done with both 2D and 3D feature detection and segmentation. -------------------------------------------------------------------------------- /environment-ci.yml: -------------------------------------------------------------------------------- 1 | name: tobac-dev 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - numpy 6 | - scipy 7 | - scikit-image 8 | - scikit-learn 9 | - pandas 10 | - matplotlib 11 | - iris 12 | - xarray 13 | - cartopy 14 | - trackpy 15 | - pytest 16 | - typing_extensions 17 | - black 18 | - coverage 19 | - pytest-cov 20 | - pylint 21 | -------------------------------------------------------------------------------- /environment-examples.yml: -------------------------------------------------------------------------------- 1 | name: tobac-dev 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - numpy 6 | - scipy 7 | - scikit-image 8 | - scikit-learn 9 | - pandas 10 | - matplotlib 11 | - iris 12 | - xarray<2024.10.0 13 | - cartopy 14 | - trackpy>=0.6.1 15 | - pytest 16 | - typing_extensions 17 | - black 18 | - jupyter 19 | - notebook 20 | - pytables 21 | - s3fs 22 | - arm_pyart 23 | - seaborn 24 | - h5netcdf 25 | - typing_extensions 26 | - rioxarray 27 | - numba 28 | - dask 29 | - ffmpeg -------------------------------------------------------------------------------- /example_requirements.txt: -------------------------------------------------------------------------------- 1 | # Python dependencies 2 | numpy 3 | scipy 4 | scikit-image 5 | scikit-learn 6 | pandas 7 | matplotlib 8 | iris 9 | xarray 10 | cartopy 11 | trackpy 12 | jupyter 13 | notebook 14 | pytables 15 | s3fs 16 | arm_pyart 17 | seaborn 18 | h5netcdf 19 | typing_extensions 20 | rioxarray 21 | numba 22 | dask 23 | 24 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Python dependencies 2 | numpy 3 | scipy 4 | scikit-image 5 | scikit-learn 6 | pandas 7 | matplotlib 8 | iris 9 | xarray 10 | cartopy 11 | trackpy 12 | typing_extensions -------------------------------------------------------------------------------- /run_notebooks.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | find . -type f -name '*.ipynb' -not -path "*/.ipynb_checkpoints/*" > nbfiles.txt 3 | cat nbfiles.txt 4 | 5 | while IFS= read -r nbpath; do 6 | jupyter nbconvert --inplace --ClearMetadataPreprocessor.enabled=True --clear-output $nbpath 7 | jupyter nbconvert --to notebook --inplace --execute $nbpath 8 | done < nbfiles.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code is from the python documentation and is 3 | designed to read in the version number. 4 | See: https://packaging.python.org/en/latest/guides/single-sourcing-package-version/ 5 | """ 6 | 7 | from setuptools import setup 8 | from pathlib import Path 9 | 10 | 11 | def read(pkg_name): 12 | init_fname = Path(__file__).parent / pkg_name / "__init__.py" 13 | with open(init_fname, "r") as fp: 14 | return fp.read() 15 | 16 | 17 | def get_version(pkg_name): 18 | for line in read(pkg_name).splitlines(): 19 | if line.startswith("__version__"): 20 | delim = '"' if '"' in line else "'" 21 | return line.split(delim)[1] 22 | else: 23 | raise RuntimeError("Unable to find version string.") 24 | 25 | 26 | def get_requirements(requirements_filename): 27 | requirements_file = Path(__file__).parent / requirements_filename 28 | assert requirements_file.exists() 29 | with open(requirements_file) as f: 30 | requirements = [ 31 | line.strip() for line in f.readlines() if not line.startswith("#") 32 | ] 33 | # Iris has a different name on PyPI... 34 | if "iris" in requirements: 35 | requirements.remove("iris") 36 | requirements.append("scitools-iris") 37 | return requirements 38 | 39 | 40 | def get_packages(package_name): 41 | package = Path(package_name) 42 | packages = [ 43 | str(path.parent).replace("/", ".") for path in package.rglob("__init__.py") 44 | ] 45 | return packages 46 | 47 | 48 | PACKAGE_NAME = "tobac" 49 | 50 | # See classifiers list at: https://pypi.org/classifiers/ 51 | CLASSIFIERS = [ 52 | "Development Status :: 5 - Production/Stable", 53 | "Environment :: Console", 54 | "Intended Audience :: Education", 55 | "Intended Audience :: Science/Research", 56 | "Intended Audience :: Developers", 57 | "License :: OSI Approved :: BSD License", 58 | "Operating System :: POSIX :: Linux", 59 | "Operating System :: MacOS :: MacOS X", 60 | "Operating System :: Microsoft :: Windows", 61 | "Programming Language :: Python", 62 | "Programming Language :: Python :: 3", 63 | "Programming Language :: Python :: 3 :: Only", 64 | "Programming Language :: Python :: 3.7", 65 | "Programming Language :: Python :: 3.8", 66 | "Programming Language :: Python :: 3.9", 67 | "Programming Language :: Python :: 3.10", 68 | "Programming Language :: Python :: 3.11", 69 | "Topic :: Scientific/Engineering", 70 | "Topic :: Scientific/Engineering :: Atmospheric Science", 71 | ] 72 | 73 | 74 | setup( 75 | name=PACKAGE_NAME, 76 | version=get_version(PACKAGE_NAME), 77 | description="Tracking and object-based analysis of clouds", 78 | url="http://github.com/tobac-project/tobac", 79 | classifiers=CLASSIFIERS, 80 | author=[ 81 | "Max Heikenfeld", 82 | "William Jones", 83 | "Fabian Senf", 84 | "Sean Freeman", 85 | "Julia Kukulies", 86 | "Peter Marinescu", 87 | ], 88 | author_email=[ 89 | "max.heikenfeld@physics.ox.ac.uk", 90 | "william.jones@physics.ox.ac.uk", 91 | "senf@tropos.de", 92 | "sean.freeman@uah.edu", 93 | "julia.kukulies@gu.se", 94 | "peter.marinescu@colostate.edu", 95 | ], 96 | license="BSD-3-Clause License", 97 | packages=get_packages(PACKAGE_NAME), 98 | install_requires=get_requirements("requirements.txt"), 99 | test_requires=["pytest"], 100 | zip_safe=False, 101 | ) 102 | -------------------------------------------------------------------------------- /tobac/__init__.py: -------------------------------------------------------------------------------- 1 | # from .tracking import maketrack 2 | import sys 3 | 4 | if sys.version_info < (3, 7): 5 | warning = """ \n\n 6 | Support for Python versions less than 3.7 is deprecated. 7 | Version 1.5 of tobac will require Python 3.7 or later. 8 | Python {py} detected. \n\n 9 | """.format( 10 | py=".".join(str(v) for v in sys.version_info[:3]) 11 | ) 12 | 13 | print(warning) 14 | 15 | from tobac.segmentation.watershed_segmentation import ( 16 | segmentation_3D, 17 | segmentation_2D, 18 | watershedding_3D, 19 | watershedding_2D, 20 | ) 21 | from .centerofgravity import ( 22 | calculate_cog, 23 | calculate_cog_untracked, 24 | calculate_cog_domain, 25 | ) 26 | from .plotting import ( 27 | plot_tracks_mask_field, 28 | plot_tracks_mask_field_loop, 29 | plot_mask_cell_track_follow, 30 | plot_mask_cell_track_static, 31 | plot_mask_cell_track_static_timeseries, 32 | plot_lifetime_histogram, 33 | plot_lifetime_histogram_bar, 34 | plot_histogram_cellwise, 35 | plot_histogram_featurewise, 36 | plot_mask_cell_track_3Dstatic, 37 | plot_mask_cell_track_2D3Dstatic, 38 | plot_mask_cell_individual_static, 39 | plot_mask_cell_individual_3Dstatic, 40 | animation_mask_field, 41 | make_map, 42 | map_tracks, 43 | ) 44 | from tobac.analysis.cell_analysis import ( 45 | cell_statistics, 46 | cog_cell, 47 | lifetime_histogram, 48 | histogram_cellwise, 49 | velocity_histogram, 50 | calculate_overlap, 51 | ) 52 | from tobac.analysis.feature_analysis import ( 53 | histogram_featurewise, 54 | calculate_nearestneighbordistance, 55 | nearestneighbordistance_histogram, 56 | area_histogram, 57 | ) 58 | from tobac.analysis.spatial import ( 59 | calculate_velocity, 60 | calculate_distance, 61 | calculate_area, 62 | ) 63 | from .utils.mask import ( 64 | mask_cell, 65 | mask_cell_surface, 66 | mask_cube_cell, 67 | mask_cube_untracked, 68 | mask_cube, 69 | column_mask_from2D, 70 | mask_features, 71 | mask_features_surface, 72 | mask_cube_features, 73 | ) 74 | from .utils.general import ( 75 | get_bounding_box, 76 | add_coordinates, 77 | get_spacings, 78 | ) 79 | from .feature_detection import feature_detection_multithreshold 80 | from .tracking import linking_trackpy 81 | from .wrapper import maketrack 82 | from .wrapper import tracking_wrapper 83 | from . import merge_split 84 | 85 | # Set version number 86 | __version__ = "1.6.0" 87 | -------------------------------------------------------------------------------- /tobac/analysis/__init__.py: -------------------------------------------------------------------------------- 1 | """Provide tools to analyse and visualize the tracked objects. 2 | This module provides a set of routines that enables performing analyses 3 | and deriving statistics for individual tracks, such as the time series 4 | of integrated properties and vertical profiles. It also provides 5 | routines to calculate summary statistics of the entire population of 6 | tracked features in the field like histograms of areas/volumes 7 | or mass and a detailed cell lifetime analysis. These analysis 8 | routines are all built in a modular manner. Thus, users can reuse the 9 | most basic methods for interacting with the data structure of the 10 | package in their own analysis procedures in Python. This includes 11 | functions performing simple tasks like looping over all identified 12 | objects or trajectories and masking arrays for the analysis of 13 | individual features. Plotting routines include both visualizations 14 | for individual convective cells and their properties. [1]_ 15 | 16 | References 17 | ---------- 18 | .. Heikenfeld, M., Marinescu, P. J., Christensen, M., 19 | Watson-Parris, D., Senf, F., van den Heever, S. C. 20 | & Stier, P. (2019). tobac 1.2: towards a flexible 21 | framework for tracking and analysis of clouds in 22 | diverse datasets. Geoscientific Model Development, 23 | 12(11), 4551-4570. 24 | 25 | Notes 26 | ----- 27 | """ 28 | 29 | from tobac.analysis.cell_analysis import * 30 | from tobac.analysis.feature_analysis import * 31 | from tobac.analysis.spatial import * 32 | -------------------------------------------------------------------------------- /tobac/analysis/feature_analysis.py: -------------------------------------------------------------------------------- 1 | """ 2 | Perform analysis on the properties of detected features 3 | """ 4 | 5 | import logging 6 | import numpy as np 7 | 8 | from tobac.analysis.spatial import ( 9 | calculate_nearestneighbordistance, 10 | calculate_area, 11 | ) 12 | 13 | __all__ = ( 14 | "nearestneighbordistance_histogram", 15 | "area_histogram", 16 | "histogram_featurewise", 17 | ) 18 | 19 | 20 | def nearestneighbordistance_histogram( 21 | features, 22 | bin_edges=np.arange(0, 30000, 500), 23 | density=False, 24 | method_distance=None, 25 | return_values=False, 26 | ): 27 | """Create an nearest neighbor distance histogram of the features. 28 | If the DataFrame does not contain a 'min_distance' column, the 29 | distances are calculated. 30 | 31 | ---------- 32 | features 33 | 34 | bin_edges : int or ndarray, optional 35 | If bin_edges is an int, it defines the number of equal-width 36 | bins in the given range. If bins is a ndarray, it defines a 37 | monotonically increasing array of bin edges, including the 38 | rightmost edge. Default is np.arange(0, 30000, 500). 39 | 40 | density : bool, optional 41 | If False, the result will contain the number of samples in 42 | each bin. If True, the result is the value of the probability 43 | density function at the bin, normalized such that the integral 44 | over the range is 1. Default is False. 45 | 46 | method_distance : {None, 'xy', 'latlon'}, optional 47 | Method of distance calculation. 'xy' uses the length of the 48 | vector between the two features, 'latlon' uses the haversine 49 | distance. None checks wether the required coordinates are 50 | present and starts with 'xy'. Default is None. 51 | 52 | return_values : bool, optional 53 | Bool determining wether the nearest neighbor distance of the 54 | features are returned from this function. Default is False. 55 | 56 | Returns 57 | ------- 58 | hist : ndarray 59 | The values of the histogram. 60 | 61 | bin_edges : ndarray 62 | The edges of the histogram. 63 | 64 | distances, optional : ndarray 65 | A numpy array with the nearest neighbor distances of each 66 | feature. 67 | 68 | """ 69 | 70 | if "min_distance" not in features.columns: 71 | logging.debug("calculate nearest neighbor distances") 72 | features = calculate_nearestneighbordistance( 73 | features, method_distance=method_distance 74 | ) 75 | distances = features["min_distance"].values 76 | hist, bin_edges = np.histogram( 77 | distances[~np.isnan(distances)], bin_edges, density=density 78 | ) 79 | if return_values: 80 | return hist, bin_edges, distances 81 | else: 82 | return hist, bin_edges 83 | 84 | 85 | def area_histogram( 86 | features, 87 | mask, 88 | bin_edges=np.arange(0, 30000, 500), 89 | density=False, 90 | method_area=None, 91 | return_values=False, 92 | representative_area=False, 93 | ): 94 | """Create an area histogram of the features. If the DataFrame 95 | does not contain an area column, the areas are calculated. 96 | 97 | Parameters 98 | ---------- 99 | features : pandas.DataFrame 100 | DataFrame of the features. 101 | 102 | mask : iris.cube.Cube 103 | Cube containing mask (int for tracked volumes 0 104 | everywhere else). Needs to contain either 105 | projection_x_coordinate and projection_y_coordinate or 106 | latitude and longitude coordinates. The output of a 107 | segmentation should be used here. 108 | 109 | bin_edges : int or ndarray, optional 110 | If bin_edges is an int, it defines the number of 111 | equal-width bins in the given range. If bins is a ndarray, 112 | it defines a monotonically increasing array of bin edges, 113 | including the rightmost edge. 114 | Default is np.arange(0, 30000, 500). 115 | 116 | density : bool, optional 117 | If False, the result will contain the number of samples 118 | in each bin. If True, the result is the value of the 119 | probability density function at the bin, normalized such 120 | that the integral over the range is 1. Default is False. 121 | 122 | return_values : bool, optional 123 | Bool determining wether the areas of the features are 124 | returned from this function. Default is False. 125 | 126 | representive_area: bool, optional 127 | If False, no weights will associated to the values. 128 | If True, the weights for each area will be the areas 129 | itself, i.e. each bin count will have the value of 130 | the sum of all areas within the edges of the bin. 131 | Default is False. 132 | 133 | Returns 134 | ------- 135 | hist : ndarray 136 | The values of the histogram. 137 | 138 | bin_edges : ndarray 139 | The edges of the histogram. 140 | 141 | bin_centers : ndarray 142 | The centers of the histogram intervalls. 143 | 144 | areas : ndarray, optional 145 | A numpy array approximating the area of each feature. 146 | 147 | """ 148 | 149 | if "area" not in features.columns: 150 | logging.info("calculate area") 151 | features = calculate_area(features, mask, method_area) 152 | areas = features["area"].values 153 | # restrict to non NaN values: 154 | areas = areas[~np.isnan(areas)] 155 | if representative_area: 156 | weights = areas 157 | else: 158 | weights = None 159 | hist, bin_edges = np.histogram(areas, bin_edges, density=density, weights=weights) 160 | bin_centers = bin_edges[:-1] + 0.5 * np.diff(bin_edges) 161 | 162 | if return_values: 163 | return hist, bin_edges, bin_centers, areas 164 | else: 165 | return hist, bin_edges, bin_centers 166 | 167 | 168 | def histogram_featurewise(Track, variable=None, bin_edges=None, density=False): 169 | """Create a histogram of a variable from the features 170 | (detected objects at a single time step) of a track. 171 | Essentially a wrapper of the numpy.histogram() method. 172 | 173 | Parameters 174 | ---------- 175 | Track : pandas.DataFrame 176 | The track containing the variable to create the 177 | histogram from. 178 | 179 | variable : string, optional 180 | Column of the DataFrame with the variable on which the 181 | histogram is to be based on. Default is None. 182 | 183 | bin_edges : int or ndarray, optional 184 | If bin_edges is an int, it defines the number of 185 | equal-width bins in the given range. If bins is 186 | a sequence, it defines a monotonically increasing 187 | array of bin edges, including the rightmost edge. 188 | 189 | density : bool, optional 190 | If False, the result will contain the number of 191 | samples in each bin. If True, the result is the 192 | value of the probability density function at the 193 | bin, normalized such that the integral over the 194 | range is 1. Default is False. 195 | 196 | Returns 197 | ------- 198 | hist : ndarray 199 | The values of the histogram 200 | 201 | bin_edges : ndarray 202 | The edges of the histogram 203 | 204 | bin_centers : ndarray 205 | The centers of the histogram intervalls 206 | 207 | """ 208 | 209 | hist, bin_edges = np.histogram(Track[variable].values, bin_edges, density=density) 210 | bin_centers = bin_edges[:-1] + 0.5 * np.diff(bin_edges) 211 | 212 | return hist, bin_edges, bin_centers 213 | -------------------------------------------------------------------------------- /tobac/centerofgravity.py: -------------------------------------------------------------------------------- 1 | """Identify center of gravity and mass for analysis.""" 2 | 3 | import logging 4 | import warnings 5 | 6 | 7 | def calculate_cog(tracks, mass, mask): 8 | """Calculate center of gravity and mass for each tracked cell. 9 | 10 | Parameters 11 | ---------- 12 | tracks : pandas.DataFrame 13 | DataFrame containing trajectories of cell centers. 14 | 15 | mass : iris.cube.Cube 16 | Cube of quantity (need coordinates 'time', 17 | 'geopotential_height','projection_x_coordinate' and 18 | 'projection_y_coordinate'). 19 | 20 | mask : iris.cube.Cube 21 | Cube containing mask (int > where belonging to area/volume 22 | of feature, 0 else). 23 | 24 | Returns 25 | ------- 26 | tracks_out : pandas.DataFrame 27 | Dataframe containing t, x, y, z positions of center of gravity 28 | and total mass of each tracked cell at each timestep. 29 | """ 30 | 31 | from .utils import mask_cube_cell 32 | from iris import Constraint 33 | 34 | warnings.warn( 35 | "calculate_cog is depreciated and will be removed or significantly changed in v2.0.", 36 | DeprecationWarning, 37 | ) 38 | 39 | logging.info("start calculating centre of gravity for tracked cells") 40 | 41 | tracks_out = tracks[["time", "frame", "cell", "time_cell"]] 42 | 43 | for i_row, row in tracks_out.iterrows(): 44 | cell = row["cell"] 45 | constraint_time = Constraint(time=row["time"]) 46 | mass_i = mass.extract(constraint_time) 47 | mask_i = mask.extract(constraint_time) 48 | mass_masked_i = mask_cube_cell(mass_i, mask_i, cell) 49 | x_M, y_M, z_M, mass_M = center_of_gravity(mass_masked_i) 50 | tracks_out.loc[i_row, "x_M"] = float(x_M) 51 | tracks_out.loc[i_row, "y_M"] = float(y_M) 52 | tracks_out.loc[i_row, "z_M"] = float(z_M) 53 | tracks_out.loc[i_row, "mass"] = float(mass_M) 54 | 55 | logging.info("Finished calculating centre of gravity for tracked cells") 56 | 57 | return tracks_out 58 | 59 | 60 | def calculate_cog_untracked(mass, mask): 61 | """Calculate center of gravity and mass for untracked domain parts. 62 | 63 | Parameters 64 | ---------- 65 | mass : iris.cube.Cube 66 | Cube of quantity (need coordinates 'time', 67 | 'geopotential_height','projection_x_coordinate' and 68 | 'projection_y_coordinate'). 69 | 70 | mask : iris.cube.Cube 71 | Cube containing mask (int > where belonging to area/volume 72 | of feature, 0 else). 73 | 74 | Returns 75 | ------- 76 | tracks_out : pandas.DataFrame 77 | Dataframe containing t, x, y, z positions of center of gravity 78 | and total mass for untracked part of the domain. 79 | """ 80 | 81 | from pandas import DataFrame 82 | from .utils import mask_cube_untracked 83 | from iris import Constraint 84 | 85 | warnings.warn( 86 | "calculate_cog_untracked is depreciated and will be removed or significantly changed in v2.0.", 87 | DeprecationWarning, 88 | ) 89 | 90 | logging.info( 91 | "start calculating centre of gravity for untracked parts of the domain" 92 | ) 93 | tracks_out = DataFrame() 94 | time_coord = mass.coord("time") 95 | tracks_out["frame"] = range(len(time_coord.points)) 96 | for i_row, row in tracks_out.iterrows(): 97 | time_i = time_coord.units.num2date(time_coord[int(row["frame"])].points[0]) 98 | constraint_time = Constraint(time=time_i) 99 | mass_i = mass.extract(constraint_time) 100 | mask_i = mask.extract(constraint_time) 101 | mass_untracked_i = mask_cube_untracked(mass_i, mask_i) 102 | x_M, y_M, z_M, mass_M = center_of_gravity(mass_untracked_i) 103 | tracks_out.loc[i_row, "time"] = time_i 104 | tracks_out.loc[i_row, "x_M"] = float(x_M) 105 | tracks_out.loc[i_row, "y_M"] = float(y_M) 106 | tracks_out.loc[i_row, "z_M"] = float(z_M) 107 | tracks_out.loc[i_row, "mass"] = float(mass_M) 108 | 109 | logging.info( 110 | "Finished calculating centre of gravity for untracked parts of the domain" 111 | ) 112 | 113 | return tracks_out 114 | 115 | 116 | def calculate_cog_domain(mass): 117 | """Calculate center of gravity and mass for entire domain. 118 | 119 | Parameters 120 | ---------- 121 | mass : iris.cube.Cube 122 | Cube of quantity (need coordinates 'time', 123 | 'geopotential_height','projection_x_coordinate' and 124 | 'projection_y_coordinate'). 125 | 126 | Returns 127 | ------- 128 | tracks_out : pandas.DataFrame 129 | Dataframe containing t, x, y, z positions of center of gravity 130 | and total mass of the entire domain. 131 | """ 132 | 133 | from pandas import DataFrame 134 | from iris import Constraint 135 | 136 | warnings.warn( 137 | "calculate_cog_domain is depreciated and will be removed or significantly changed in v2.0.", 138 | DeprecationWarning, 139 | ) 140 | 141 | logging.info("start calculating centre of gravity for entire domain") 142 | 143 | time_coord = mass.coord("time") 144 | 145 | tracks_out = DataFrame() 146 | tracks_out["frame"] = range(len(time_coord.points)) 147 | for i_row, row in tracks_out.iterrows(): 148 | time_i = time_coord.units.num2date(time_coord[int(row["frame"])].points[0]) 149 | constraint_time = Constraint(time=time_i) 150 | mass_i = mass.extract(constraint_time) 151 | x_M, y_M, z_M, mass_M = center_of_gravity(mass_i) 152 | tracks_out.loc[i_row, "time"] = time_i 153 | tracks_out.loc[i_row, "x_M"] = float(x_M) 154 | tracks_out.loc[i_row, "y_M"] = float(y_M) 155 | tracks_out.loc[i_row, "z_M"] = float(z_M) 156 | tracks_out.loc[i_row, "mass"] = float(mass_M) 157 | 158 | logging.info("Finished calculating centre of gravity for entire domain") 159 | 160 | return tracks_out 161 | 162 | 163 | def center_of_gravity(cube_in): 164 | """Calculate center of gravity and sum of quantity. 165 | 166 | Parameters 167 | ---------- 168 | cube_in : iris.cube.Cube 169 | Cube (potentially masked) of quantity (need coordinates 170 | 'geopotential_height','projection_x_coordinate' and 171 | 'projection_y_coordinate'). 172 | 173 | Returns 174 | ------- 175 | x : float 176 | X position of center of gravity. 177 | 178 | y : float 179 | Y position of center of gravity. 180 | 181 | z : float 182 | Z position of center of gravity. 183 | 184 | variable_sum : float 185 | Sum of quantity of over unmasked part of the cube. 186 | """ 187 | 188 | from iris.analysis import SUM 189 | import numpy as np 190 | 191 | warnings.warn( 192 | "center_of_gravity is depreciated and will be removed or significantly changed in v2.0.", 193 | DeprecationWarning, 194 | ) 195 | 196 | cube_sum = cube_in.collapsed(["bottom_top", "south_north", "west_east"], SUM) 197 | z = cube_in.coord("geopotential_height") 198 | x = cube_in.coord("projection_x_coordinate") 199 | y = cube_in.coord("projection_y_coordinate") 200 | dimensions_collapse = ["model_level_number", "x", "y"] 201 | for coord in cube_in.coords(): 202 | if coord.ndim > 1 and ( 203 | cube_in.coord_dims(dimensions_collapse[0])[0] in cube_in.coord_dims(coord) 204 | or cube_in.coord_dims(dimensions_collapse[1])[0] 205 | in cube_in.coord_dims(coord) 206 | or cube_in.coord_dims(dimensions_collapse[2])[0] 207 | in cube_in.coord_dims(coord) 208 | ): 209 | cube_in.remove_coord(coord.name()) 210 | if cube_sum.data > 0: 211 | x = ( 212 | (cube_in * x).collapsed(["model_level_number", "x", "y"], SUM) / cube_sum 213 | ).data 214 | y = ( 215 | (cube_in * y).collapsed(["model_level_number", "x", "y"], SUM) / cube_sum 216 | ).data 217 | z = ( 218 | (cube_in * z.points).collapsed(["model_level_number", "x", "y"], SUM) 219 | / cube_sum 220 | ).data 221 | else: 222 | x = np.nan 223 | y = np.nan 224 | z = np.nan 225 | variable_sum = cube_sum.data 226 | return (x, y, z, variable_sum) 227 | -------------------------------------------------------------------------------- /tobac/segmentation/__init__.py: -------------------------------------------------------------------------------- 1 | from .watershed_segmentation import ( 2 | add_markers, 3 | segmentation_3D, 4 | segmentation_2D, 5 | segmentation_timestep, 6 | check_add_unseeded_across_bdrys, 7 | segmentation, 8 | watershedding_2D, 9 | watershedding_3D, 10 | ) 11 | -------------------------------------------------------------------------------- /tobac/tests/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mambaorg/micromamba 2 | ARG MAMBA_DOCKERFILE_ACTIVATE=1 3 | 4 | RUN micromamba install -y -n base -c conda-forge numpy \ 5 | scipy scikit-image pandas pytables matplotlib iris \ 6 | cf-units xarray cartopy trackpy numba pytest pip scikit-learn 7 | 8 | COPY . ./ 9 | 10 | RUN pip install . 11 | 12 | RUN pytest 13 | -------------------------------------------------------------------------------- /tobac/tests/Dockerfile-coverage: -------------------------------------------------------------------------------- 1 | FROM mambaorg/micromamba 2 | ARG MAMBA_DOCKERFILE_ACTIVATE=1 3 | 4 | RUN micromamba install -y -n base -c conda-forge numpy \ 5 | scipy scikit-image pandas pytables matplotlib iris \ 6 | cf-units xarray cartopy trackpy numba pytest pip \ 7 | pytest-cov coverage 8 | 9 | COPY . ./ 10 | 11 | RUN pip install . 12 | 13 | RUN coverage run -m pytest --cov-report=xml &&\ 14 | mv coverage.xml shared 15 | 16 | -------------------------------------------------------------------------------- /tobac/tests/segmentation_tests/test_iris_xarray_segmentation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tobac-project/tobac/82ac8f4cf19523ad86171a9f8d7c72e976ca3b09/tobac/tests/segmentation_tests/test_iris_xarray_segmentation.py -------------------------------------------------------------------------------- /tobac/tests/segmentation_tests/test_segmentation_time_pad.py: -------------------------------------------------------------------------------- 1 | """Tests for time padding of segmentation""" 2 | 3 | import datetime 4 | import pytest 5 | from typing import Optional 6 | import numpy as np 7 | import tobac.testing as tb_test 8 | import tobac.segmentation.watershed_segmentation as watershed_segmentation 9 | import tobac.feature_detection as feature_detection 10 | 11 | 12 | @pytest.mark.parametrize( 13 | "time_pad_setting, time_offset, expect_output,", 14 | [ 15 | (datetime.timedelta(seconds=1), datetime.timedelta(seconds=0), True), 16 | (datetime.timedelta(seconds=1), datetime.timedelta(seconds=2), False), 17 | (datetime.timedelta(seconds=0), datetime.timedelta(seconds=2), False), 18 | (datetime.timedelta(seconds=3), datetime.timedelta(seconds=2), True), 19 | (datetime.timedelta(seconds=2), datetime.timedelta(seconds=1), True), 20 | (datetime.timedelta(seconds=0), datetime.timedelta(seconds=0), True), 21 | (None, datetime.timedelta(seconds=0), True), 22 | (None, datetime.timedelta(seconds=1), False), 23 | ], 24 | ) 25 | def test_watershed_segmentation_time_pad( 26 | time_pad_setting: Optional[datetime.timedelta], 27 | time_offset: datetime.timedelta, 28 | expect_output: bool, 29 | ): 30 | """Tests tobac.watershed_segmentation for time padding working correctly.""" 31 | test_dset_size = (50, 50) 32 | test_hdim_1_pt = 20.0 33 | test_hdim_2_pt = 20.0 34 | test_hdim_1_sz = 5 35 | test_hdim_2_sz = 5 36 | size_feature1 = test_hdim_1_sz * test_hdim_2_sz 37 | test_amp = 2 38 | test_min_num = 2 39 | 40 | test_data = np.zeros(test_dset_size) 41 | test_data = tb_test.make_feature_blob( 42 | test_data, 43 | test_hdim_1_pt, 44 | test_hdim_2_pt, 45 | h1_size=test_hdim_1_sz, 46 | h2_size=test_hdim_2_sz, 47 | amplitude=test_amp, 48 | ) 49 | 50 | # add feature of different size 51 | test_hdim_1_pt = 40.0 52 | test_hdim_2_pt = 40.0 53 | test_hdim_1_sz = 10 54 | test_hdim_2_sz = 10 55 | size_feature2 = test_hdim_1_sz * test_hdim_2_sz 56 | test_amp = 10 57 | test_dxy = 1 58 | 59 | test_data = tb_test.make_feature_blob( 60 | test_data, 61 | test_hdim_1_pt, 62 | test_hdim_2_pt, 63 | h1_size=test_hdim_1_sz, 64 | h2_size=test_hdim_2_sz, 65 | amplitude=test_amp, 66 | ) 67 | 68 | test_data_xarray = tb_test.make_dataset_from_arr(test_data, data_type="xarray") 69 | test_data_xarray = test_data_xarray.assign_coords( 70 | time=datetime.datetime(2020, 1, 1) 71 | ) 72 | 73 | test_data_xarray = test_data_xarray.expand_dims("time") 74 | # detect both features 75 | fd_output = feature_detection.feature_detection_multithreshold( 76 | test_data_xarray, 77 | i_time=0, 78 | dxy=1, 79 | threshold=[1, 2, 3], 80 | n_min_threshold=test_min_num, 81 | target="maximum", 82 | statistic={"features_mean": np.mean}, 83 | ) 84 | 85 | # add feature IDs to data frame for one time step 86 | fd_output["feature"] = [1, 2] 87 | fd_output.loc[:, "time"] += time_offset 88 | 89 | # perform segmentation 90 | out_seg_mask, out_df = watershed_segmentation.segmentation( 91 | field=test_data_xarray, 92 | features=fd_output, 93 | dxy=test_dxy, 94 | threshold=1.5, 95 | time_padding=time_pad_setting, 96 | ) 97 | out_seg_mask_arr = out_seg_mask 98 | if expect_output: 99 | # assure that the number of grid cells belonging to each feature (ncells) are consistent with segmentation mask 100 | assert np.sum(out_seg_mask_arr == 1) == size_feature1 101 | assert np.sum(out_seg_mask_arr == 2) == size_feature2 102 | else: 103 | assert np.sum(out_seg_mask_arr == 1) == 0 104 | assert np.sum(out_seg_mask_arr == 2) == 0 105 | -------------------------------------------------------------------------------- /tobac/tests/test_convert.py: -------------------------------------------------------------------------------- 1 | """Tests for the iris/xarray conversion decorators""" 2 | 3 | import pytest 4 | import tobac 5 | import tobac.testing 6 | import xarray 7 | from iris.cube import Cube 8 | import pandas as pd 9 | from pandas.testing import assert_frame_equal 10 | from copy import deepcopy 11 | from tobac.utils.decorators import ( 12 | xarray_to_iris, 13 | iris_to_xarray, 14 | xarray_to_irispandas, 15 | irispandas_to_xarray, 16 | ) 17 | 18 | 19 | @pytest.mark.parametrize( 20 | "decorator, input_types, expected_internal_types, expected_output_type", 21 | [ 22 | ( 23 | xarray_to_iris, 24 | [xarray.DataArray, xarray.DataArray], 25 | [Cube, Cube], 26 | xarray.DataArray, 27 | ), 28 | (xarray_to_iris, [Cube, Cube], [Cube, Cube], Cube), 29 | (xarray_to_iris, [Cube, xarray.DataArray], [Cube, Cube], xarray.DataArray), 30 | (xarray_to_iris, [xarray.DataArray, Cube], [Cube, Cube], xarray.DataArray), 31 | (iris_to_xarray, [Cube, Cube], [xarray.DataArray, xarray.DataArray], Cube), 32 | ( 33 | iris_to_xarray, 34 | [xarray.DataArray, xarray.DataArray], 35 | [xarray.DataArray, xarray.DataArray], 36 | xarray.DataArray, 37 | ), 38 | ( 39 | iris_to_xarray, 40 | [xarray.DataArray, Cube], 41 | [xarray.DataArray, xarray.DataArray], 42 | Cube, 43 | ), 44 | ( 45 | iris_to_xarray, 46 | [Cube, xarray.DataArray], 47 | [xarray.DataArray, xarray.DataArray], 48 | Cube, 49 | ), 50 | ( 51 | xarray_to_irispandas, 52 | [xarray.DataArray, xarray.DataArray], 53 | [Cube, Cube], 54 | xarray.DataArray, 55 | ), 56 | (xarray_to_irispandas, [Cube, Cube], [Cube, Cube], Cube), 57 | ( 58 | xarray_to_irispandas, 59 | [Cube, xarray.DataArray], 60 | [Cube, Cube], 61 | xarray.DataArray, 62 | ), 63 | ( 64 | xarray_to_irispandas, 65 | [xarray.DataArray, Cube], 66 | [Cube, Cube], 67 | xarray.DataArray, 68 | ), 69 | ( 70 | xarray_to_irispandas, 71 | [xarray.Dataset, xarray.Dataset], 72 | [pd.DataFrame, pd.DataFrame], 73 | xarray.Dataset, 74 | ), 75 | ( 76 | xarray_to_irispandas, 77 | [pd.DataFrame, pd.DataFrame], 78 | [pd.DataFrame, pd.DataFrame], 79 | pd.DataFrame, 80 | ), 81 | ( 82 | xarray_to_irispandas, 83 | [xarray.Dataset, pd.DataFrame], 84 | [pd.DataFrame, pd.DataFrame], 85 | xarray.Dataset, 86 | ), 87 | ( 88 | xarray_to_irispandas, 89 | [pd.DataFrame, xarray.Dataset], 90 | [pd.DataFrame, pd.DataFrame], 91 | xarray.Dataset, 92 | ), 93 | ( 94 | xarray_to_irispandas, 95 | [xarray.Dataset, xarray.DataArray], 96 | [pd.DataFrame, Cube], 97 | xarray.Dataset, 98 | ), 99 | ( 100 | irispandas_to_xarray, 101 | [Cube, Cube], 102 | [xarray.DataArray, xarray.DataArray], 103 | Cube, 104 | ), 105 | ( 106 | irispandas_to_xarray, 107 | [xarray.DataArray, xarray.DataArray], 108 | [xarray.DataArray, xarray.DataArray], 109 | xarray.DataArray, 110 | ), 111 | ( 112 | irispandas_to_xarray, 113 | [xarray.DataArray, Cube], 114 | [xarray.DataArray, xarray.DataArray], 115 | Cube, 116 | ), 117 | ( 118 | irispandas_to_xarray, 119 | [Cube, xarray.DataArray], 120 | [xarray.DataArray, xarray.DataArray], 121 | Cube, 122 | ), 123 | ( 124 | irispandas_to_xarray, 125 | [pd.DataFrame, pd.DataFrame], 126 | [xarray.Dataset, xarray.Dataset], 127 | pd.DataFrame, 128 | ), 129 | ( 130 | irispandas_to_xarray, 131 | [xarray.Dataset, xarray.Dataset], 132 | [xarray.Dataset, xarray.Dataset], 133 | xarray.Dataset, 134 | ), 135 | ( 136 | irispandas_to_xarray, 137 | [pd.DataFrame, xarray.Dataset], 138 | [xarray.Dataset, xarray.Dataset], 139 | pd.DataFrame, 140 | ), 141 | ( 142 | irispandas_to_xarray, 143 | [xarray.Dataset, pd.DataFrame], 144 | [xarray.Dataset, xarray.Dataset], 145 | pd.DataFrame, 146 | ), 147 | ( 148 | irispandas_to_xarray, 149 | [pd.DataFrame, Cube], 150 | [xarray.Dataset, xarray.DataArray], 151 | pd.DataFrame, 152 | ), 153 | ], 154 | ) 155 | def test_converting( 156 | decorator, input_types, expected_internal_types, expected_output_type 157 | ): 158 | """Testing the conversions of the decorators internally and for the output""" 159 | 160 | def test_function_kwarg(test_input, kwarg=None): 161 | assert ( 162 | type(test_input) == expected_internal_types[0] 163 | ), "Expected internal type {}, got {} for {}".format( 164 | expected_internal_types[0], type(test_input), decorator.__name__ 165 | ) 166 | assert ( 167 | type(kwarg) == expected_internal_types[1] 168 | ), "Expected internal type {}, got {} for {} as keyword argument".format( 169 | expected_internal_types[1], type(kwarg), decorator.__name__ 170 | ) 171 | return test_input 172 | 173 | def test_function_tuple_output(test_input, kwarg=None): 174 | return (test_input, test_input) 175 | 176 | decorator_i = decorator() 177 | decorated_function_kwarg = decorator_i(test_function_kwarg) 178 | decorated_function_tuple = decorator_i(test_function_tuple_output) 179 | 180 | if input_types[0] == xarray.DataArray: 181 | data = xarray.DataArray.from_iris(tobac.testing.make_simple_sample_data_2D()) 182 | elif input_types[0] == Cube: 183 | data = tobac.testing.make_simple_sample_data_2D() 184 | elif input_types[0] == xarray.Dataset: 185 | data = tobac.testing.generate_single_feature( 186 | 1, 1, max_h1=100, max_h2=100 187 | ).to_xarray() 188 | elif input_types[0] == pd.DataFrame: 189 | data = tobac.testing.generate_single_feature(1, 1, max_h1=100, max_h2=100) 190 | 191 | if input_types[1] == xarray.DataArray: 192 | kwarg = xarray.DataArray.from_iris(tobac.testing.make_simple_sample_data_2D()) 193 | elif input_types[1] == Cube: 194 | kwarg = tobac.testing.make_simple_sample_data_2D() 195 | elif input_types[1] == xarray.Dataset: 196 | kwarg = tobac.testing.generate_single_feature( 197 | 1, 1, max_h1=100, max_h2=100 198 | ).to_xarray() 199 | elif input_types[1] == pd.DataFrame: 200 | kwarg = tobac.testing.generate_single_feature(1, 1, max_h1=100, max_h2=100) 201 | 202 | output = decorated_function_kwarg(data, kwarg=kwarg) 203 | tuple_output = decorated_function_tuple(data, kwarg=kwarg) 204 | 205 | assert ( 206 | type(output) == expected_output_type 207 | ), "Expected output type {}, got {} for {}".format( 208 | expected_output_type, type(output), decorator.__name__ 209 | ) 210 | assert ( 211 | type(tuple_output[0]) == expected_output_type 212 | ), "Expected output type {}, but got {} for {} (1st tuple output(".format( 213 | expected_output_type, type(tuple_output[0]), decorator.__name__ 214 | ) 215 | assert ( 216 | type(tuple_output[1]) == expected_output_type 217 | ), "Expected output type {}, but got {} for {} (2nd tuple output(".format( 218 | expected_output_type, type(tuple_output[1]), decorator.__name__ 219 | ) 220 | 221 | 222 | def test_xarray_workflow(): 223 | """Test comparing the outputs of the standard functions of tobac for a test dataset 224 | with the output of the same functions decorated with tobac.utils.xarray_to_iris""" 225 | 226 | data = tobac.testing.make_sample_data_2D_3blobs() 227 | data_xarray = xarray.DataArray.from_iris(deepcopy(data)) 228 | 229 | # Testing the get_spacings utility 230 | xarray_to_iris_i = xarray_to_iris() 231 | get_spacings_xarray = xarray_to_iris_i(tobac.utils.get_spacings) 232 | dxy, dt = tobac.utils.get_spacings(data) 233 | dxy_xarray, dt_xarray = get_spacings_xarray(data_xarray) 234 | 235 | assert dxy == dxy_xarray 236 | assert dt == dt_xarray 237 | 238 | # Testing feature detection 239 | feature_detection_xarray = xarray_to_iris_i( 240 | tobac.feature_detection.feature_detection_multithreshold 241 | ) 242 | features = tobac.feature_detection.feature_detection_multithreshold( 243 | data, dxy, threshold=1.0 244 | ) 245 | features_xarray = feature_detection_xarray(data_xarray, dxy_xarray, threshold=1.0) 246 | 247 | assert_frame_equal(features, features_xarray) 248 | 249 | # Testing the segmentation 250 | segmentation_xarray = xarray_to_iris_i(tobac.segmentation.segmentation) 251 | mask, features = tobac.segmentation.segmentation(features, data, dxy, threshold=1.0) 252 | mask_xarray, features_xarray = segmentation_xarray( 253 | features_xarray, data_xarray, dxy_xarray, threshold=1.0 254 | ) 255 | 256 | assert (mask.data == mask_xarray.to_iris().data).all() 257 | 258 | # testing tracking 259 | tracking_xarray = xarray_to_iris_i(tobac.tracking.linking_trackpy) 260 | track = tobac.tracking.linking_trackpy(features, data, dt, dxy, v_max=100.0) 261 | track_xarray = tracking_xarray( 262 | features_xarray, data_xarray, dt_xarray, dxy_xarray, v_max=100.0 263 | ) 264 | 265 | assert_frame_equal(track, track_xarray) 266 | -------------------------------------------------------------------------------- /tobac/tests/test_datetime.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | import numpy as np 4 | import pandas as pd 5 | import cftime 6 | import pytest 7 | 8 | import tobac.utils.datetime as datetime_utils 9 | 10 | 11 | def test_to_cftime(): 12 | """Test conversion of datetime types to cftime calendars""" 13 | test_dates = [ 14 | "2000-01-01", 15 | "2000-01-01 00:00:00", 16 | datetime(2000, 1, 1), 17 | np.datetime64("2000-01-01 00:00:00.000000000"), 18 | np.datetime64("2000-01-01 00:00:00"), 19 | pd.to_datetime("2000-01-01"), 20 | cftime.datetime(2000, 1, 1), 21 | cftime.DatetimeGregorian(2000, 1, 1), 22 | cftime.Datetime360Day(2000, 1, 1), 23 | cftime.DatetimeNoLeap(2000, 1, 1), 24 | ] 25 | 26 | for date in test_dates: 27 | assert datetime_utils.to_cftime(date, "standard") == cftime.datetime(2000, 1, 1) 28 | assert datetime_utils.to_cftime(date, "gregorian") == cftime.DatetimeGregorian( 29 | 2000, 1, 1 30 | ) 31 | assert datetime_utils.to_cftime(date, "360_day") == cftime.Datetime360Day( 32 | 2000, 1, 1 33 | ) 34 | assert datetime_utils.to_cftime(date, "365_day") == cftime.DatetimeNoLeap( 35 | 2000, 1, 1 36 | ) 37 | 38 | # Test array-like input 39 | for date in test_dates: 40 | assert datetime_utils.to_cftime([date], "standard")[0] == cftime.datetime( 41 | 2000, 1, 1 42 | ) 43 | assert datetime_utils.to_cftime([date], "gregorian")[ 44 | 0 45 | ] == cftime.DatetimeGregorian(2000, 1, 1) 46 | assert datetime_utils.to_cftime([date], "360_day")[0] == cftime.Datetime360Day( 47 | 2000, 1, 1 48 | ) 49 | assert datetime_utils.to_cftime([date], "365_day")[0] == cftime.DatetimeNoLeap( 50 | 2000, 1, 1 51 | ) 52 | 53 | 54 | def test_to_timestamp(): 55 | """Test conversion of various datetime types to pandas timestamps""" 56 | test_dates = [ 57 | "2000-01-01", 58 | "2000-01-01 00:00:00", 59 | datetime(2000, 1, 1), 60 | np.datetime64("2000-01-01 00:00:00.000000000"), 61 | np.datetime64("2000-01-01 00:00:00"), 62 | pd.to_datetime("2000-01-01"), 63 | cftime.datetime(2000, 1, 1), 64 | cftime.DatetimeGregorian(2000, 1, 1), 65 | cftime.Datetime360Day(2000, 1, 1), 66 | cftime.DatetimeNoLeap(2000, 1, 1), 67 | ] 68 | 69 | for date in test_dates: 70 | assert datetime_utils.to_timestamp(date) == pd.to_datetime("2000-01-01") 71 | 72 | # Test array input 73 | for date in test_dates: 74 | assert datetime_utils.to_timestamp([date])[0] == pd.to_datetime("2000-01-01") 75 | 76 | 77 | def test_to_datetime(): 78 | """Test conversion of various datetime types to python datetime""" 79 | test_dates = [ 80 | "2000-01-01", 81 | "2000-01-01 00:00:00", 82 | datetime(2000, 1, 1), 83 | np.datetime64("2000-01-01 00:00:00.000000000"), 84 | np.datetime64("2000-01-01 00:00:00"), 85 | pd.to_datetime("2000-01-01"), 86 | cftime.datetime(2000, 1, 1), 87 | cftime.DatetimeGregorian(2000, 1, 1), 88 | cftime.Datetime360Day(2000, 1, 1), 89 | cftime.DatetimeNoLeap(2000, 1, 1), 90 | ] 91 | 92 | for date in test_dates: 93 | assert datetime_utils.to_datetime(date) == datetime(2000, 1, 1) 94 | 95 | # Test array input 96 | for date in test_dates: 97 | assert datetime_utils.to_datetime([date])[0] == datetime(2000, 1, 1) 98 | 99 | 100 | def test_to_datetime64(): 101 | """Test conversion of various datetime types to numpy datetime64""" 102 | test_dates = [ 103 | "2000-01-01", 104 | "2000-01-01 00:00:00", 105 | datetime(2000, 1, 1), 106 | np.datetime64("2000-01-01 00:00:00.000000000"), 107 | np.datetime64("2000-01-01 00:00:00"), 108 | pd.to_datetime("2000-01-01"), 109 | cftime.datetime(2000, 1, 1), 110 | cftime.DatetimeGregorian(2000, 1, 1), 111 | cftime.Datetime360Day(2000, 1, 1), 112 | cftime.DatetimeNoLeap(2000, 1, 1), 113 | ] 114 | 115 | for date in test_dates: 116 | assert datetime_utils.to_datetime64(date) == np.datetime64( 117 | "2000-01-01 00:00:00.000000000" 118 | ) 119 | 120 | # Test array input 121 | for date in test_dates: 122 | assert datetime_utils.to_datetime64([date])[0] == np.datetime64( 123 | "2000-01-01 00:00:00.000000000" 124 | ) 125 | 126 | 127 | def test_to_datestr(): 128 | """Test conversion of various datetime types to ISO format datestring""" 129 | test_dates = [ 130 | "2000-01-01", 131 | "2000-01-01 00:00:00", 132 | datetime(2000, 1, 1), 133 | np.datetime64("2000-01-01 00:00:00.000000000"), 134 | np.datetime64("2000-01-01 00:00:00"), 135 | pd.to_datetime("2000-01-01"), 136 | cftime.datetime(2000, 1, 1), 137 | cftime.DatetimeGregorian(2000, 1, 1), 138 | cftime.Datetime360Day(2000, 1, 1), 139 | cftime.DatetimeNoLeap(2000, 1, 1), 140 | ] 141 | 142 | for date in test_dates: 143 | assert ( 144 | datetime_utils.to_datestr(date) == "2000-01-01T00:00:00.000000000" 145 | or datetime_utils.to_datestr(date) == "2000-01-01T00:00:00" 146 | ) 147 | 148 | 149 | def test_to_datestr_array(): 150 | """Test conversion of arrays of various datetime types to ISO format 151 | datestring 152 | """ 153 | test_dates = [ 154 | "2000-01-01", 155 | "2000-01-01 00:00:00", 156 | datetime(2000, 1, 1), 157 | np.datetime64("2000-01-01 00:00:00.000000000"), 158 | np.datetime64("2000-01-01 00:00:00"), 159 | pd.to_datetime("2000-01-01"), 160 | cftime.datetime(2000, 1, 1), 161 | cftime.DatetimeGregorian(2000, 1, 1), 162 | cftime.Datetime360Day(2000, 1, 1), 163 | cftime.DatetimeNoLeap(2000, 1, 1), 164 | ] 165 | for date in test_dates: 166 | assert datetime_utils.to_datestr([date]) == [ 167 | "2000-01-01T00:00:00.000000000" 168 | ] or datetime_utils.to_datestr([date]) == ["2000-01-01T00:00:00"] 169 | 170 | 171 | def test_match_datetime_format(): 172 | """Test match_datetime_format for various datetime-like combinations""" 173 | test_dates = [ 174 | "2000-01-01T00:00:00.000000000", 175 | datetime(2000, 1, 1), 176 | np.datetime64("2000-01-01 00:00:00.000000000"), 177 | pd.to_datetime("2000-01-01"), 178 | cftime.datetime(2000, 1, 1), 179 | cftime.DatetimeGregorian(2000, 1, 1), 180 | cftime.Datetime360Day(2000, 1, 1), 181 | cftime.DatetimeNoLeap(2000, 1, 1), 182 | ] 183 | 184 | for target in test_dates: 185 | for date in test_dates: 186 | assert datetime_utils.match_datetime_format(date, target) == target 187 | 188 | 189 | def test_match_datetime_format_array(): 190 | """Test match_datetime_format for various datetime-like combinations with 191 | array input 192 | """ 193 | test_dates = [ 194 | "2000-01-01T00:00:00.000000000", 195 | datetime(2000, 1, 1), 196 | np.datetime64("2000-01-01 00:00:00.000000000"), 197 | pd.to_datetime("2000-01-01"), 198 | cftime.datetime(2000, 1, 1), 199 | cftime.DatetimeGregorian(2000, 1, 1), 200 | cftime.Datetime360Day(2000, 1, 1), 201 | cftime.DatetimeNoLeap(2000, 1, 1), 202 | ] 203 | 204 | for target in test_dates: 205 | for date in test_dates: 206 | assert datetime_utils.match_datetime_format([date], [target]) == np.array( 207 | [target] 208 | ) 209 | 210 | 211 | def test_match_datetime_format_error(): 212 | """Test that if a non datetime-like object is provided as target to 213 | match_datetime_format that a ValueError is raised: 214 | """ 215 | with pytest.raises(ValueError, match="Target is not a valid datetime*"): 216 | datetime_utils.match_datetime_format(datetime(2000, 1, 1), 1.5) 217 | -------------------------------------------------------------------------------- /tobac/tests/test_decorators.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for tobac.utils.decorators 3 | """ 4 | 5 | import numpy as np 6 | import pandas as pd 7 | import xarray as xr 8 | import iris 9 | 10 | from tobac.utils import decorators 11 | 12 | 13 | def test_convert_cube_to_dataarray(): 14 | test_da_float = xr.DataArray(np.arange(15, dtype=float).reshape(3, 5) + 0.5) 15 | test_da_int = xr.DataArray(np.arange(15, dtype=int).reshape(3, 5)) 16 | 17 | assert np.all( 18 | decorators.convert_cube_to_dataarray(test_da_float.to_iris()) 19 | == test_da_float.values 20 | ) 21 | assert np.all( 22 | decorators.convert_cube_to_dataarray(test_da_int.to_iris()) 23 | == test_da_int.values 24 | ) 25 | 26 | 27 | def test_conv_kwargs_iris_to_xarray(): 28 | assert decorators._conv_kwargs_iris_to_xarray({}) == {} 29 | assert decorators._conv_kwargs_iris_to_xarray(dict(test_int=1)) == dict(test_int=1) 30 | 31 | test_da = xr.DataArray(np.arange(5)) 32 | 33 | test_xr_kwarg = decorators._conv_kwargs_iris_to_xarray(dict(test_xr=test_da)) 34 | assert isinstance(test_xr_kwarg["test_xr"], xr.DataArray) 35 | 36 | test_iris_kwarg = decorators._conv_kwargs_iris_to_xarray( 37 | dict(test_iris=test_da.to_iris()) 38 | ) 39 | assert isinstance(test_iris_kwarg["test_iris"], xr.DataArray) 40 | 41 | 42 | def test_conv_kwargs_irispandas_to_xarray(): 43 | assert decorators._conv_kwargs_irispandas_to_xarray({}) == {} 44 | assert decorators._conv_kwargs_irispandas_to_xarray(dict(test_int=1)) == dict( 45 | test_int=1 46 | ) 47 | 48 | test_da = xr.DataArray(np.arange(5)) 49 | 50 | test_xr_kwarg = decorators._conv_kwargs_irispandas_to_xarray(dict(test_xr=test_da)) 51 | assert isinstance(test_xr_kwarg["test_xr"], xr.DataArray) 52 | 53 | test_iris_kwarg = decorators._conv_kwargs_irispandas_to_xarray( 54 | dict(test_iris=test_da.to_iris()) 55 | ) 56 | assert isinstance(test_iris_kwarg["test_iris"], xr.DataArray) 57 | 58 | test_ds = xr.Dataset({"test": test_da}) 59 | test_ds_kwarg = decorators._conv_kwargs_irispandas_to_xarray(dict(test_xr=test_ds)) 60 | assert isinstance(test_ds_kwarg["test_xr"], xr.Dataset) 61 | 62 | test_pd_kwarg = decorators._conv_kwargs_irispandas_to_xarray( 63 | dict(test_pd=test_ds.to_pandas()) 64 | ) 65 | assert isinstance(test_pd_kwarg["test_pd"], xr.Dataset) 66 | 67 | 68 | def test_conv_kwargs_xarray_to_iris(): 69 | assert decorators._conv_kwargs_xarray_to_iris({}) == {} 70 | assert decorators._conv_kwargs_xarray_to_iris(dict(test_int=1)) == dict(test_int=1) 71 | 72 | test_da = xr.DataArray(np.arange(5)) 73 | 74 | test_xr_kwarg = decorators._conv_kwargs_xarray_to_iris(dict(test_xr=test_da)) 75 | assert isinstance(test_xr_kwarg["test_xr"], iris.cube.Cube) 76 | 77 | test_iris_kwarg = decorators._conv_kwargs_xarray_to_iris( 78 | dict(test_iris=test_da.to_iris()) 79 | ) 80 | assert isinstance(test_iris_kwarg["test_iris"], iris.cube.Cube) 81 | 82 | 83 | def test_conv_kwargs_xarray_to_irispandas(): 84 | assert decorators._conv_kwargs_xarray_to_irispandas({}) == {} 85 | assert decorators._conv_kwargs_xarray_to_irispandas(dict(test_int=1)) == dict( 86 | test_int=1 87 | ) 88 | 89 | test_da = xr.DataArray(np.arange(5)) 90 | 91 | test_xr_kwarg = decorators._conv_kwargs_xarray_to_irispandas(dict(test_xr=test_da)) 92 | assert isinstance(test_xr_kwarg["test_xr"], iris.cube.Cube) 93 | 94 | test_iris_kwarg = decorators._conv_kwargs_xarray_to_irispandas( 95 | dict(test_iris=test_da.to_iris()) 96 | ) 97 | assert isinstance(test_iris_kwarg["test_iris"], iris.cube.Cube) 98 | 99 | test_ds = xr.Dataset({"test": test_da}) 100 | test_ds_kwarg = decorators._conv_kwargs_xarray_to_irispandas(dict(test_xr=test_ds)) 101 | assert isinstance(test_ds_kwarg["test_xr"], pd.DataFrame) 102 | 103 | test_pd_kwarg = decorators._conv_kwargs_xarray_to_irispandas( 104 | dict(test_pd=test_ds.to_pandas()) 105 | ) 106 | assert isinstance(test_pd_kwarg["test_pd"], pd.DataFrame) 107 | 108 | 109 | @decorators.iris_to_xarray(save_iris_info=True) 110 | def _test_iris_to_xarray(*args, **kwargs): 111 | return kwargs["converted_from_iris"] 112 | 113 | 114 | def test_iris_to_xarray(): 115 | test_da = xr.DataArray(np.arange(5)) 116 | 117 | assert _test_iris_to_xarray(test_da) == False 118 | assert _test_iris_to_xarray(kwarg_xr=test_da) == False 119 | 120 | assert _test_iris_to_xarray(test_da.to_iris()) == True 121 | assert _test_iris_to_xarray(kwarg_ir=test_da.to_iris()) == True 122 | 123 | 124 | @decorators.irispandas_to_xarray(save_iris_info=True) 125 | def _test_irispandas_to_xarray(*args, **kwargs): 126 | return kwargs["converted_from_iris"] 127 | 128 | 129 | def test_irispandas_to_xarray(): 130 | test_da = xr.DataArray(np.arange(5)) 131 | 132 | assert _test_irispandas_to_xarray(test_da) == False 133 | assert _test_irispandas_to_xarray(kwarg_xr=test_da) == False 134 | 135 | assert _test_irispandas_to_xarray(test_da.to_iris()) == True 136 | assert _test_irispandas_to_xarray(kwarg_ir=test_da.to_iris()) == True 137 | 138 | 139 | @decorators.xarray_to_irispandas() 140 | def _test_xarray_to_irispandas(*args, **kwargs): 141 | return args, kwargs 142 | 143 | 144 | def test_xarray_to_irispandas(): 145 | test_da = xr.DataArray(np.arange(5, dtype=float)) 146 | 147 | assert isinstance(_test_xarray_to_irispandas(test_da)[0][0], iris.cube.Cube) 148 | assert _test_xarray_to_irispandas(test_da)[1] == {} 149 | -------------------------------------------------------------------------------- /tobac/tests/test_generators.py: -------------------------------------------------------------------------------- 1 | """Unit tests for tobac.utils.generators module""" 2 | 3 | from datetime import datetime, timedelta 4 | 5 | import cftime 6 | import numpy as np 7 | import pandas as pd 8 | import pytest 9 | import xarray as xr 10 | from pandas.testing import assert_frame_equal 11 | 12 | from tobac.utils import generators 13 | 14 | 15 | def test_field_and_features_over_time(): 16 | """Test iterating over field_and_features_over_time generator""" 17 | test_data = xr.DataArray( 18 | np.zeros([2, 10, 10]), 19 | dims=("time", "y", "x"), 20 | coords={"time": [datetime(2000, 1, 1), datetime(2000, 1, 1, 1)]}, 21 | ) 22 | 23 | test_features = pd.DataFrame( 24 | { 25 | "feature": [1, 2, 3], 26 | "frame": [0, 0, 1], 27 | "time": [ 28 | datetime(2000, 1, 1), 29 | datetime(2000, 1, 1), 30 | datetime(2000, 1, 1, 1), 31 | ], 32 | } 33 | ) 34 | 35 | iterator = generators.field_and_features_over_time(test_data, test_features) 36 | 37 | iter_0 = next(iterator) 38 | 39 | assert iter_0[0] == 0 40 | assert iter_0[1] == np.datetime64("2000-01-01") 41 | assert np.all(iter_0[2] == test_data.isel(time=0)) 42 | assert_frame_equal( 43 | iter_0[3], test_features[test_features.time == datetime(2000, 1, 1)] 44 | ) 45 | 46 | iter_1 = next(iterator) 47 | 48 | assert iter_1[0] == 1 49 | assert iter_1[1] == np.datetime64("2000-01-01 01:00:00") 50 | assert np.all(iter_1[2] == test_data.isel(time=1)) 51 | assert_frame_equal( 52 | iter_1[3], test_features[test_features.time == datetime(2000, 1, 1, 1)] 53 | ) 54 | 55 | with pytest.raises(StopIteration): 56 | next(iterator) 57 | 58 | 59 | def test_field_and_features_over_time_time_padding(): 60 | """Test the time_padding functionality of field_and_features_over_time 61 | generator 62 | """ 63 | test_data = xr.DataArray( 64 | np.zeros([1, 10, 10]), 65 | dims=("time", "y", "x"), 66 | coords={"time": [datetime(2000, 1, 1)]}, 67 | ) 68 | 69 | test_features = pd.DataFrame( 70 | { 71 | "feature": [1, 2, 3], 72 | "frame": [0, 0, 0], 73 | "time": [ 74 | datetime(2000, 1, 1), 75 | datetime(2000, 1, 1, 0, 0, 1), 76 | datetime(2000, 1, 1, 0, 0, 2), 77 | ], 78 | } 79 | ) 80 | 81 | # Test no time padding 82 | _, _, _, df_slice = next( 83 | generators.field_and_features_over_time(test_data, test_features) 84 | ) 85 | 86 | assert len(df_slice) == 1 87 | assert_frame_equal(df_slice, test_features.loc[0:0]) 88 | 89 | # Test time padding of 1 second 90 | _, _, _, df_slice = next( 91 | generators.field_and_features_over_time( 92 | test_data, test_features, time_padding=timedelta(seconds=1) 93 | ) 94 | ) 95 | 96 | assert len(df_slice) == 2 97 | assert_frame_equal(df_slice, test_features.loc[0:1]) 98 | 99 | # Test time padding of 2 seconds 100 | _, _, _, df_slice = next( 101 | generators.field_and_features_over_time( 102 | test_data, test_features, time_padding=timedelta(seconds=2) 103 | ) 104 | ) 105 | 106 | assert len(df_slice) == 3 107 | assert_frame_equal(df_slice, test_features.loc[0:2]) 108 | 109 | 110 | def test_field_and_features_over_time_cftime(): 111 | """Test field_and_features_over_time when given cftime datetime formats""" 112 | test_data = xr.DataArray( 113 | np.zeros([2, 10, 10]), 114 | dims=("time", "y", "x"), 115 | coords={ 116 | "time": [ 117 | cftime.Datetime360Day(2000, 1, 1), 118 | cftime.Datetime360Day(2000, 1, 1, 1), 119 | ] 120 | }, 121 | ) 122 | 123 | test_features = pd.DataFrame( 124 | { 125 | "feature": [1, 2, 3], 126 | "frame": [0, 0, 1], 127 | "time": [ 128 | cftime.Datetime360Day(2000, 1, 1), 129 | cftime.Datetime360Day(2000, 1, 1, 0, 0, 1), 130 | cftime.Datetime360Day(2000, 1, 1, 1), 131 | ], 132 | } 133 | ) 134 | 135 | iterator = generators.field_and_features_over_time( 136 | test_data, test_features, time_padding=timedelta(seconds=1) 137 | ) 138 | 139 | iter_0 = next(iterator) 140 | 141 | assert iter_0[0] == 0 142 | assert iter_0[1] == cftime.Datetime360Day(2000, 1, 1) 143 | assert np.all(iter_0[2] == test_data.isel(time=0)) 144 | assert_frame_equal(iter_0[3], test_features.loc[0:1]) 145 | 146 | iter_1 = next(iterator) 147 | 148 | assert iter_1[0] == 1 149 | assert iter_1[1] == cftime.Datetime360Day(2000, 1, 1, 1) 150 | assert np.all(iter_1[2] == test_data.isel(time=1)) 151 | assert_frame_equal( 152 | iter_1[3], 153 | test_features[test_features.time == cftime.Datetime360Day(2000, 1, 1, 1)], 154 | ) 155 | 156 | with pytest.raises(StopIteration): 157 | next(iterator) 158 | 159 | 160 | def test_field_and_features_over_time_time_var_name(): 161 | """Test field_and_features_over_time generator works correctly with a time 162 | coordinate name other than "time" 163 | """ 164 | # Test non-standard time coord name: 165 | test_data = xr.DataArray( 166 | np.zeros([2, 10, 10]), 167 | dims=("time_testing", "y", "x"), 168 | coords={"time_testing": [datetime(2000, 1, 1), datetime(2000, 1, 1, 1)]}, 169 | ) 170 | 171 | test_features = pd.DataFrame( 172 | { 173 | "feature": [1, 2, 3], 174 | "frame": [0, 0, 1], 175 | "time_testing": [ 176 | datetime(2000, 1, 1), 177 | datetime(2000, 1, 1), 178 | datetime(2000, 1, 1, 1), 179 | ], 180 | } 181 | ) 182 | 183 | _ = next( 184 | generators.field_and_features_over_time( 185 | test_data, test_features, time_var_name="time_testing" 186 | ) 187 | ) 188 | 189 | 190 | def test_field_and_features_over_time_time_var_name_error(): 191 | """Test that field_and_features_over_time generator raises the correct 192 | error when the name of the time coordinates do not match between the given 193 | data and dataframe 194 | """ 195 | # Test if time_var_name not in dataarray: 196 | test_data = xr.DataArray( 197 | np.zeros([2, 10, 10]), 198 | dims=("time_testing", "y", "x"), 199 | coords={"time_testing": [datetime(2000, 1, 1), datetime(2000, 1, 1, 1)]}, 200 | ) 201 | 202 | test_features = pd.DataFrame( 203 | { 204 | "feature": [1, 2, 3], 205 | "frame": [0, 0, 1], 206 | "time": [ 207 | datetime(2000, 1, 1), 208 | datetime(2000, 1, 1), 209 | datetime(2000, 1, 1, 1), 210 | ], 211 | } 212 | ) 213 | 214 | with pytest.raises(ValueError, match="time not present in input field*"): 215 | next(generators.field_and_features_over_time(test_data, test_features)) 216 | 217 | # Test if time var name not in dataframe: 218 | test_data = xr.DataArray( 219 | np.zeros([2, 10, 10]), 220 | dims=("time", "y", "x"), 221 | coords={"time": [datetime(2000, 1, 1), datetime(2000, 1, 1, 1)]}, 222 | ) 223 | 224 | test_features = pd.DataFrame( 225 | { 226 | "feature": [1, 2, 3], 227 | "frame": [0, 0, 1], 228 | "time_testing": [ 229 | datetime(2000, 1, 1), 230 | datetime(2000, 1, 1), 231 | datetime(2000, 1, 1, 1), 232 | ], 233 | } 234 | ) 235 | 236 | with pytest.raises(ValueError, match="time not present in input feature*"): 237 | next(generators.field_and_features_over_time(test_data, test_features)) 238 | -------------------------------------------------------------------------------- /tobac/tests/test_import.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import tobac 3 | 4 | 5 | def test_dummy_function(): 6 | assert 1 == 1 7 | 8 | 9 | def test_version(): 10 | """Test to make sure that we have a version number included. 11 | Also test to make sure that the version number complies with 12 | semantic versioning guidelines. 13 | If it's not, this should result in an error. 14 | """ 15 | import re 16 | 17 | assert type(tobac.__version__) is str 18 | # Make sure that we are following semantic versioning 19 | # i.e., our version is of form x.x.x, where x are all 20 | # integer numbers. 21 | assert re.match(r"[0-9]+\.[0-9]+\.[0-9]+", tobac.__version__) is not None 22 | -------------------------------------------------------------------------------- /tobac/tests/test_iris_xarray_match_utils.py: -------------------------------------------------------------------------------- 1 | """Tests to confirm that xarray and iris pathways work the same and produce the same data 2 | for the same input datasets. 3 | """ 4 | 5 | from __future__ import annotations 6 | 7 | import copy 8 | import datetime 9 | 10 | import iris.cube 11 | import numpy as np 12 | import pandas as pd 13 | import xarray as xr 14 | import pytest 15 | 16 | 17 | import tobac.testing as tbtest 18 | import tobac.utils.internal.iris_utils as iris_utils 19 | import tobac.utils.internal.xarray_utils as xr_utils 20 | import tobac.utils.datetime as datetime_utils 21 | from tobac.utils.decorators import convert_cube_to_dataarray 22 | 23 | 24 | @pytest.mark.parametrize( 25 | "feature_positions, coordinates, expected_val", 26 | [ 27 | ( 28 | ((0, 0, 0), (9, 9, 9)), 29 | {"x": ("x", np.linspace(0, 10, 10)), "z": ("z", np.linspace(0, 10, 10))}, 30 | {"x": (0, 10)}, 31 | ), 32 | ( 33 | ((0, 0), (9, 9)), 34 | {"x": ("x", np.linspace(0, 10, 10))}, 35 | {"x": (0, 10)}, 36 | ), 37 | ( 38 | ((0, 0), (9, 9), (5, 7)), 39 | { 40 | "longitude": ("x", np.linspace(-30, 60, 10)), 41 | "latitude": ("y", np.linspace(-70, 20, 10)), 42 | }, 43 | {"latitude": (-70, 20, 0), "longitude": (-30, 60, 20)}, 44 | ), 45 | ( 46 | ((0, 0), (9, 9), (5, 7), (3.6, 7.9)), 47 | { 48 | "longitude": ( 49 | ("x", "y"), 50 | np.arange(-180, -80).reshape(10, -1), 51 | ), 52 | "latitude": (("x", "y"), np.arange(-50, 50).reshape(10, -1)), 53 | }, 54 | { 55 | "latitude": (-50, 49, 7, -6.1), 56 | "longitude": (-180, -81, -123, -136.1), 57 | }, 58 | ), 59 | ], 60 | ) 61 | def test_add_coordinates_xarray_base( 62 | feature_positions: tuple[tuple[float]], 63 | coordinates: dict[str : tuple[str, np.ndarray]], 64 | expected_val: dict[str : tuple[float]], 65 | ): 66 | """ 67 | Test that adding coordinates for xarray and iris are equal, using an 68 | xarray generated dataset as the base. 69 | 70 | Parameters 71 | ---------- 72 | feature_positions: tuple of tuple of floats 73 | Locations of the features to test in (hdim_1, hdim_2, zdim [optional]) coordinates 74 | coordinates: dict, key: str; value: tuple of str, numpy array 75 | Coordinates to use, in xarray coordinate style. Dims will be ('x', 'y', 'z') for 3D 76 | data (determined by feature_positions) and ('x', 'y') for 2D data. All axes will have 77 | size 10. 78 | expected_val: dict, key: str; value: tuple of floats 79 | Expected interpolated coordinates 80 | 81 | """ 82 | 83 | all_indiv_feats = [] 84 | if len(feature_positions[0]) == 2: 85 | is_3D = False 86 | elif len(feature_positions[0]) == 3: 87 | is_3D = True 88 | else: 89 | raise ValueError("Feature positions should be 2 or 3D") 90 | for i, single_feat_position in enumerate(feature_positions): 91 | if not is_3D and len(single_feat_position) == 2: 92 | all_indiv_feats.append( 93 | tbtest.generate_single_feature( 94 | single_feat_position[0], 95 | single_feat_position[1], 96 | feature_num=i, 97 | max_h1=10, 98 | max_h2=10, 99 | ) 100 | ) 101 | elif is_3D and len(single_feat_position) == 3: 102 | all_indiv_feats.append( 103 | tbtest.generate_single_feature( 104 | single_feat_position[0], 105 | single_feat_position[1], 106 | start_v=single_feat_position[2], 107 | feature_num=i, 108 | max_h1=10, 109 | max_h2=10, 110 | ) 111 | ) 112 | 113 | else: 114 | raise ValueError("Feature positions should be 2 or 3D") 115 | 116 | all_feats = pd.concat(all_indiv_feats) 117 | 118 | da_size = (1, 10, 10, 10) if is_3D else (1, 10, 10) 119 | dims = ("time", "x", "y", "z") if is_3D else ("time", "x", "y") 120 | coordinates["time"] = np.array((datetime.datetime(2000, 1, 1, 0),)) 121 | da_with_coords = xr.DataArray(data=np.empty(da_size), dims=dims, coords=coordinates) 122 | if is_3D: 123 | iris_coord_interp = iris_utils.add_coordinates_3D( 124 | all_feats, da_with_coords.to_iris() 125 | ) 126 | xr_coord_interp = xr_utils.add_coordinates_to_features( 127 | all_feats, da_with_coords 128 | ) 129 | 130 | else: 131 | iris_coord_interp = iris_utils.add_coordinates( 132 | all_feats, da_with_coords.to_iris() 133 | ) 134 | xr_coord_interp = xr_utils.add_coordinates_to_features( 135 | all_feats, da_with_coords 136 | ) 137 | for val_name in expected_val: 138 | np.testing.assert_almost_equal( 139 | iris_coord_interp[val_name], expected_val[val_name] 140 | ) 141 | np.testing.assert_almost_equal( 142 | xr_coord_interp[val_name], expected_val[val_name] 143 | ) 144 | 145 | # assert (iris_coord_interp[val_name] == expected_val[val_name]).all() 146 | # assert (xr_coord_interp[val_name] == expected_val[val_name]).all() 147 | 148 | # Convert datetimes to ensure that they are the same type: 149 | xr_coord_interp["time"] = datetime_utils.match_datetime_format( 150 | xr_coord_interp.time, iris_coord_interp.time 151 | ) 152 | 153 | pd.testing.assert_frame_equal(iris_coord_interp, xr_coord_interp) 154 | 155 | 156 | @pytest.mark.parametrize( 157 | "coordinate_names, coordinate_standard_names", 158 | [(("lat",), ("latitude",))], 159 | ) 160 | def test_add_coordinates_xarray_std_names( 161 | coordinate_names: tuple[str], 162 | coordinate_standard_names: tuple[str], 163 | ): 164 | """ 165 | Test that adding coordinates for xarray and iris result in the same coordinate names 166 | when standard_names are added to the xarray coordinates 167 | 168 | Parameters 169 | ---------- 170 | coordinate_names: tuple of str 171 | names of coordinates to give 172 | coordinate_standard_name: tuple of str 173 | standard_names of coordinates to give 174 | 175 | """ 176 | 177 | all_feats = tbtest.generate_single_feature( 178 | 0, 179 | 0, 180 | feature_num=1, 181 | max_h1=10, 182 | max_h2=10, 183 | ) 184 | 185 | da_size = (1, 10, 10) 186 | dims = ("time", "x", "y") 187 | coordinates = dict() 188 | coordinates["time"] = np.array((datetime.datetime(2000, 1, 1, 0),)) 189 | 190 | for coord_name, coord_standard_name in zip( 191 | coordinate_names, coordinate_standard_names 192 | ): 193 | coordinates[coord_name] = xr.DataArray(data=np.arange(10), dims="x") 194 | coordinates[coord_name].attrs["standard_name"] = coord_standard_name 195 | 196 | da_with_coords = xr.DataArray(data=np.empty(da_size), dims=dims, coords=coordinates) 197 | 198 | iris_coord_interp = iris_utils.add_coordinates( 199 | copy.deepcopy(all_feats), da_with_coords.to_iris() 200 | ) 201 | xr_coord_interp = xr_utils.add_coordinates_to_features( 202 | copy.deepcopy(all_feats), da_with_coords 203 | ) 204 | xr_coord_interp["time"] = datetime_utils.match_datetime_format( 205 | xr_coord_interp.time, iris_coord_interp.time 206 | ) 207 | pd.testing.assert_frame_equal(iris_coord_interp, xr_coord_interp) 208 | 209 | 210 | def test_preserve_iris_datetime_types(): 211 | """ 212 | Test that xarray.add_coordinates_to_features correctly returns the same time types as 213 | iris when preserve_iris_datetime_types = True. 214 | """ 215 | 216 | all_feats = tbtest.generate_single_feature( 217 | 0, 218 | 0, 219 | feature_num=1, 220 | max_h1=10, 221 | max_h2=10, 222 | ) 223 | var_array: iris.cube.Cube = tbtest.make_simple_sample_data_2D(data_type="iris") 224 | 225 | xarray_output = xr_utils.add_coordinates_to_features( 226 | all_feats, 227 | convert_cube_to_dataarray(var_array, preserve_iris_datetime_types=True), 228 | ) 229 | iris_output = iris_utils.add_coordinates(all_feats, var_array) 230 | 231 | pd.testing.assert_frame_equal(xarray_output, iris_output) 232 | assert xarray_output["time"].values[0] == iris_output["time"].values[0] 233 | assert isinstance( 234 | xarray_output["time"].values[0], type(iris_output["time"].values[0]) 235 | ) 236 | 237 | xarray_output_datetime_preserve_off = xr_utils.add_coordinates_to_features( 238 | all_feats, 239 | convert_cube_to_dataarray(var_array, preserve_iris_datetime_types=False), 240 | ) 241 | 242 | assert isinstance( 243 | xarray_output_datetime_preserve_off["time"].values[0], np.datetime64 244 | ) 245 | -------------------------------------------------------------------------------- /tobac/tests/test_sample_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for tobac based on simple sample datasets with moving blobs. These tests should be adapted to be more modular in the future. 3 | """ 4 | 5 | from tobac.testing import ( 6 | make_sample_data_2D_3blobs, 7 | make_sample_data_2D_3blobs_inv, 8 | make_sample_data_3D_3blobs, 9 | ) 10 | from tobac import ( 11 | feature_detection_multithreshold, 12 | linking_trackpy, 13 | get_spacings, 14 | segmentation_2D, 15 | segmentation_3D, 16 | ) 17 | from iris.analysis import MEAN, MAX, MIN 18 | from pandas.testing import assert_frame_equal 19 | from numpy.testing import assert_allclose 20 | import pandas as pd 21 | 22 | 23 | def test_sample_data(): 24 | """ 25 | Test to make sure that sample datasets in the following tests are set up the right way 26 | """ 27 | sample_data = make_sample_data_2D_3blobs() 28 | sample_data_inv = make_sample_data_2D_3blobs_inv() 29 | 30 | assert sample_data.coord("projection_x_coordinate") == sample_data_inv.coord( 31 | "projection_x_coordinate" 32 | ) 33 | assert sample_data.coord("projection_y_coordinate") == sample_data_inv.coord( 34 | "projection_y_coordinate" 35 | ) 36 | assert sample_data.coord("time") == sample_data_inv.coord("time") 37 | minimum = sample_data.collapsed( 38 | ("time", "projection_x_coordinate", "projection_y_coordinate"), MIN 39 | ).data 40 | minimum_inv = sample_data_inv.collapsed( 41 | ("time", "projection_x_coordinate", "projection_y_coordinate"), MIN 42 | ).data 43 | assert_allclose(minimum, minimum_inv) 44 | mean = sample_data.collapsed( 45 | ("time", "projection_x_coordinate", "projection_y_coordinate"), MEAN 46 | ).data 47 | mean_inv = sample_data_inv.collapsed( 48 | ("time", "projection_x_coordinate", "projection_y_coordinate"), MEAN 49 | ).data 50 | assert_allclose(mean, mean_inv) 51 | 52 | 53 | def test_tracking_coord_order(): 54 | """ 55 | Test a tracking applications to make sure that coordinate order does not lead to different results 56 | """ 57 | sample_data = make_sample_data_2D_3blobs() 58 | sample_data_inv = make_sample_data_2D_3blobs_inv() 59 | # Keyword arguments for feature detection step: 60 | parameters_features = {} 61 | parameters_features["position_threshold"] = "weighted_diff" 62 | parameters_features["sigma_threshold"] = 0.5 63 | parameters_features["min_distance"] = 0 64 | parameters_features["sigma_threshold"] = 1 65 | parameters_features["threshold"] = [3, 5, 10] # m/s 66 | parameters_features["n_erosion_threshold"] = 0 67 | parameters_features["n_min_threshold"] = 3 68 | 69 | # calculate dxy,dt 70 | dxy, dt = get_spacings(sample_data) 71 | dxy_inv, dt_inv = get_spacings(sample_data_inv) 72 | 73 | # Test that dt and dxy are the same for different order of coordinates 74 | assert_allclose(dxy, dxy_inv) 75 | assert_allclose(dt, dt_inv) 76 | 77 | # Test that dt and dxy are as expected 78 | assert_allclose(dt, 60) 79 | assert_allclose(dxy, 1000) 80 | 81 | # Find features 82 | Features = feature_detection_multithreshold(sample_data, dxy, **parameters_features) 83 | Features_inv = feature_detection_multithreshold( 84 | sample_data_inv, dxy_inv, **parameters_features 85 | ) 86 | 87 | # Assert that output of feature detection not empty: 88 | assert type(Features) == pd.core.frame.DataFrame 89 | assert type(Features_inv) == pd.core.frame.DataFrame 90 | assert not Features.empty 91 | assert not Features_inv.empty 92 | 93 | # perform watershedding segmentation 94 | parameters_segmentation = {} 95 | parameters_segmentation["target"] = "maximum" 96 | parameters_segmentation["method"] = "watershed" 97 | 98 | segmentation_mask, features_segmentation = segmentation_2D( 99 | Features, sample_data, dxy=dxy, **parameters_segmentation 100 | ) 101 | segmentation_mask_inv, features_segmentation = segmentation_2D( 102 | Features_inv, sample_data_inv, dxy=dxy_inv, **parameters_segmentation 103 | ) 104 | 105 | # perform trajectory linking 106 | 107 | parameters_linking = {} 108 | parameters_linking["method_linking"] = "predict" 109 | parameters_linking["adaptive_stop"] = 0.2 110 | parameters_linking["adaptive_step"] = 0.95 111 | parameters_linking["extrapolate"] = 0 112 | parameters_linking["order"] = 1 113 | parameters_linking["subnetwork_size"] = 100 114 | parameters_linking["memory"] = 0 115 | parameters_linking["time_cell_min"] = 5 * 60 116 | parameters_linking["method_linking"] = "predict" 117 | parameters_linking["v_max"] = 100 118 | 119 | Track = linking_trackpy(Features, sample_data, dt=dt, dxy=dxy, **parameters_linking) 120 | Track_inv = linking_trackpy( 121 | Features_inv, sample_data_inv, dt=dt_inv, dxy=dxy_inv, **parameters_linking 122 | ) 123 | 124 | 125 | def test_tracking_3D(): 126 | """ 127 | Test a tracking applications to make sure that coordinate order does not lead to different results 128 | """ 129 | sample_data = make_sample_data_3D_3blobs() 130 | sample_data_inv = make_sample_data_3D_3blobs(invert_xy=True) 131 | # Keyword arguments for feature detection step: 132 | parameters_features = {} 133 | parameters_features["position_threshold"] = "weighted_diff" 134 | parameters_features["sigma_threshold"] = 0.5 135 | parameters_features["min_distance"] = 0 136 | parameters_features["sigma_threshold"] = 1 137 | parameters_features["threshold"] = [3, 5, 10] # m/s 138 | parameters_features["n_erosion_threshold"] = 0 139 | parameters_features["n_min_threshold"] = 3 140 | 141 | sample_data_max = sample_data.collapsed("geopotential_height", MAX) 142 | sample_data_max_inv = sample_data.collapsed("geopotential_height", MAX) 143 | 144 | # calculate dxy,dt 145 | dxy, dt = get_spacings(sample_data_max) 146 | dxy_inv, dt_inv = get_spacings(sample_data_max_inv) 147 | 148 | # Test that dt and dxy are the same for different order of coordinates 149 | assert_allclose(dxy, dxy_inv) 150 | assert_allclose(dt, dt_inv) 151 | 152 | # Test that dt and dxy are as expected 153 | assert_allclose(dt, 120) 154 | assert_allclose(dxy, 1000) 155 | 156 | # Find features 157 | Features = feature_detection_multithreshold( 158 | sample_data_max, dxy, **parameters_features 159 | ) 160 | Features_inv = feature_detection_multithreshold( 161 | sample_data_max_inv, dxy_inv, **parameters_features 162 | ) 163 | 164 | # perform watershedding segmentation 165 | parameters_segmentation = {} 166 | parameters_segmentation["target"] = "maximum" 167 | parameters_segmentation["method"] = "watershed" 168 | 169 | segmentation_mask, features_segmentation = segmentation_3D( 170 | Features, sample_data_max, dxy=dxy, **parameters_segmentation 171 | ) 172 | segmentation_mask_inv, features_segmentation = segmentation_3D( 173 | Features_inv, sample_data_max_inv, dxy=dxy_inv, **parameters_segmentation 174 | ) 175 | 176 | # perform trajectory linking 177 | 178 | parameters_linking = {} 179 | parameters_linking["method_linking"] = "predict" 180 | parameters_linking["adaptive_stop"] = 0.2 181 | parameters_linking["adaptive_step"] = 0.95 182 | parameters_linking["extrapolate"] = 0 183 | parameters_linking["order"] = 1 184 | parameters_linking["subnetwork_size"] = 100 185 | parameters_linking["memory"] = 0 186 | parameters_linking["time_cell_min"] = 5 * 60 187 | parameters_linking["method_linking"] = "predict" 188 | parameters_linking["v_max"] = 100 189 | 190 | Track = linking_trackpy(Features, sample_data, dt=dt, dxy=dxy, **parameters_linking) 191 | Track_inv = linking_trackpy( 192 | Features_inv, sample_data_inv, dt=dt_inv, dxy=dxy_inv, **parameters_linking 193 | ) 194 | 195 | # Assert that output of feature detection not empty: 196 | assert not Track.empty 197 | assert not Track_inv.empty 198 | -------------------------------------------------------------------------------- /tobac/tests/test_utils_internal.py: -------------------------------------------------------------------------------- 1 | import tobac.utils.internal as internal_utils 2 | import tobac.testing as tbtest 3 | 4 | import pytest 5 | import numpy as np 6 | import xarray as xr 7 | 8 | 9 | @pytest.mark.parametrize( 10 | "dset_type, time_axis, vertical_axis, expected_out", 11 | [ 12 | ("iris", 0, 1, (2, 3)), 13 | ("iris", -1, 0, (1, 2)), 14 | ("iris", 0, -1, (1, 2)), 15 | ("iris", 0, 2, (1, 3)), 16 | ("iris", 3, 0, (1, 2)), 17 | ("iris", 0, 3, (1, 2)), 18 | ("iris", 1, 2, (0, 3)), 19 | ("xarray", 0, 1, (2, 3)), 20 | ("xarray", 0, 2, (1, 3)), 21 | ("xarray", 3, 0, (1, 2)), 22 | ("xarray", 0, 3, (1, 2)), 23 | ("xarray", 1, 2, (0, 3)), 24 | ], 25 | ) 26 | def test_find_hdim_axes_3D(dset_type, time_axis, vertical_axis, expected_out): 27 | """Tests tobac.utils.internal.file_hdim_axes_3D 28 | 29 | Parameters 30 | ---------- 31 | dset_type: str{"xarray" or "iris"} 32 | type of the dataset to generate 33 | time_axis: int 34 | axis number of the time coordinate (or -1 to not have one) 35 | vertical_axis: int 36 | axis number of the vertical coordinate (or -1 to not have one) 37 | expected_out: tuple (int, int) 38 | expected output 39 | """ 40 | ndims = 2 + (1 if time_axis >= 0 else 0) + (1 if vertical_axis >= 0 else 0) 41 | test_dset_size = [2] * ndims 42 | 43 | test_data = np.zeros(test_dset_size) 44 | 45 | dset_opts = { 46 | "in_arr": test_data, 47 | "data_type": dset_type, 48 | } 49 | if time_axis >= 0: 50 | dset_opts["time_dim_num"] = time_axis 51 | if vertical_axis >= 0: 52 | dset_opts["z_dim_num"] = vertical_axis 53 | dset_opts["z_dim_name"] = "altitude" 54 | 55 | y_set = False 56 | for dim_number in range(ndims): 57 | if time_axis != dim_number and vertical_axis != dim_number: 58 | if not y_set: 59 | dset_opts["y_dim_num"] = dim_number 60 | y_set = True 61 | else: 62 | dset_opts["x_dim_num"] = dim_number 63 | 64 | cube_test = tbtest.make_dataset_from_arr(**dset_opts) 65 | 66 | out_coords = internal_utils.find_hdim_axes_3D(cube_test) 67 | 68 | assert out_coords == expected_out 69 | 70 | 71 | @pytest.mark.parametrize( 72 | "lat_name, lon_name, lat_name_test, lon_name_test, expected_result", 73 | [ 74 | ("lat", "lon", None, None, ("lat", "lon")), 75 | ("lat", "long", None, None, ("lat", "long")), 76 | ("lat", "altitude", None, None, ("lat", None)), 77 | ("lat", "longitude", "lat", "longitude", ("lat", "longitude")), 78 | ], 79 | ) 80 | def test_detect_latlon_coord_name( 81 | lat_name, lon_name, lat_name_test, lon_name_test, expected_result 82 | ): 83 | """Tests tobac.utils.internal.detect_latlon_coord_name""" 84 | 85 | in_arr = np.empty((50, 50)) 86 | lat_vals = np.empty(50) 87 | lon_vals = np.empty(50) 88 | 89 | in_xr = xr.Dataset( 90 | {"data": ((lat_name, lon_name), in_arr)}, 91 | coords={lat_name: lat_vals, lon_name: lon_vals}, 92 | ) 93 | out_lat_name, out_lon_name = internal_utils.detect_latlon_coord_name( 94 | in_xr["data"].to_iris(), lat_name_test, lon_name_test 95 | ) 96 | assert out_lat_name == expected_result[0] 97 | assert out_lon_name == expected_result[1] 98 | -------------------------------------------------------------------------------- /tobac/tests/test_xarray_utils.py: -------------------------------------------------------------------------------- 1 | """Tests for tobac.utils.internal_utils.xarray_utils""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import Union 6 | 7 | import pytest 8 | import numpy as np 9 | import xarray as xr 10 | 11 | import tobac.utils.internal.xarray_utils as xr_utils 12 | import tobac.testing as tbtest 13 | import datetime 14 | 15 | 16 | @pytest.mark.parametrize( 17 | "dim_names, coord_dim_map, coord_looking_for, expected_out, expected_raise", 18 | [ 19 | ( 20 | ("time", "altitude", "x", "y"), # dim_names 21 | { # coord_dim_map 22 | "time": ("time",), 23 | "latitude": ("x", "y"), 24 | "longitude": ("x", "y"), 25 | "altmsl": ("altitude", "x", "y"), 26 | }, 27 | "time", # coord_looking_for 28 | 0, 29 | False, 30 | ), 31 | ( 32 | ("time", "time", "time", "time", "time"), # dim_names 33 | { # coord_dim_map 34 | "time": ("time",), 35 | }, 36 | "time", # coord_looking_for 37 | 0, 38 | True, 39 | ), 40 | ( 41 | ("time", "altitude", "x", "y"), # dim_names 42 | { # coord_dim_map 43 | "time": ("time",), 44 | "latitude": ("x", "y"), 45 | "longitude": ("x", "y"), 46 | "altmsl": ("altitude", "x", "y"), 47 | }, 48 | "altitude", # coord_looking_for 49 | 1, 50 | False, 51 | ), 52 | ( 53 | ("time", "altitude", "x", "y"), # dim_names 54 | { # coord_dim_map 55 | "time": ("time",), 56 | "latitude": ("x", "y"), 57 | "longitude": ("x", "y"), 58 | "altmsl": ("altitude", "x", "y"), 59 | }, 60 | "latitude", # coord_looking_for 61 | None, 62 | True, 63 | ), 64 | ( 65 | ("time", "altitude", "x", "y"), # dim_names 66 | { # coord_dim_map 67 | "time": ("time",), 68 | "latitude": ("x", "y"), 69 | "longitude": ("x", "y"), 70 | "altmsl": ("altitude", "x", "y"), 71 | }, 72 | "x", # coord_looking_for 73 | 2, 74 | False, 75 | ), 76 | ( 77 | ("time", "altitude", "x", "y"), # dim_names 78 | { # coord_dim_map 79 | "time": ("time",), 80 | "latitude": ("x", "y"), 81 | "longitude": ("x", "y"), 82 | "altmsl": ("altitude", "x", "y"), 83 | }, 84 | "z", # coord_looking_for 85 | 2, 86 | True, 87 | ), 88 | ( 89 | ("time", "altitude", "x", "y"), # dim_names 90 | { # coord_dim_map 91 | "t": ("time",), 92 | "latitude": ("x", "y"), 93 | "longitude": ("x", "y"), 94 | "altmsl": ("altitude", "x", "y"), 95 | }, 96 | "t", # coord_looking_for 97 | 0, 98 | False, 99 | ), 100 | ], 101 | ) 102 | def test_find_axis_from_dim_coord( 103 | dim_names: tuple[str], 104 | coord_dim_map: dict, 105 | coord_looking_for: str, 106 | expected_out: Union[int, None], 107 | expected_raise: bool, 108 | ): 109 | """Tests tobac.utils.internal.file_hdim_axes_3D 110 | 111 | Parameters 112 | ---------- 113 | dim_names: tuple[str] 114 | Names of the dimensions to have 115 | coord_dim_map: dict[str : tuple[str],] 116 | Mapping of coordinates (keys) to dimensions (values) 117 | coord_looking_for: str 118 | what coordinate/dimension to look for 119 | expected_out: Union[int, None] 120 | What the expected output is 121 | expected_raise: bool 122 | Whether or not we expect a raise 123 | """ 124 | 125 | # size of the array per dimension 126 | arr_sz = 4 127 | arr_da = np.empty((arr_sz,) * len(dim_names)) 128 | coord_vals = {} 129 | for coord_nm in coord_dim_map: 130 | coord_vals[coord_nm] = ( 131 | coord_dim_map[coord_nm], 132 | np.empty((arr_sz,) * len(coord_dim_map[coord_nm])), 133 | ) 134 | 135 | xr_da = xr.DataArray(arr_da, dims=dim_names, coords=coord_vals) 136 | if expected_raise: 137 | with pytest.raises(ValueError): 138 | _ = xr_utils.find_axis_from_dim_coord(xr_da, coord_looking_for) 139 | else: 140 | out_val = xr_utils.find_axis_from_dim_coord(xr_da, coord_looking_for) 141 | if expected_out is not None: 142 | assert out_val == expected_out 143 | else: 144 | assert out_val is None 145 | 146 | 147 | @pytest.mark.parametrize( 148 | "dim_names, coord_dim_map, feature_pos, expected_vals", 149 | [ 150 | ( 151 | ["time", "x", "y"], 152 | { 153 | "test_coord1": (tuple(), 1), 154 | "test_coord_time": ("time", [5, 6, 7, 8, 9, 10]), 155 | }, 156 | (1, 1), 157 | {"test_coord1": (1, 1, 1), "test_coord_time": (5, 6, 7)}, 158 | ), 159 | ], 160 | ) 161 | def test_add_coordinates_to_features_interpolate_along_other_dims( 162 | dim_names: tuple[str], 163 | coord_dim_map: dict, 164 | feature_pos: tuple[int], 165 | expected_vals: dict[str, tuple], 166 | ): 167 | time_len: int = 6 168 | if len(feature_pos) == 2: 169 | all_feats = tbtest.generate_single_feature( 170 | feature_pos[0], 171 | feature_pos[1], 172 | feature_num=1, 173 | num_frames=3, 174 | max_h1=100, 175 | max_h2=100, 176 | ) 177 | arr_size = (time_len, 5, 5) 178 | 179 | elif len(feature_pos) == 3: 180 | all_feats = tbtest.generate_single_feature( 181 | feature_pos[1], 182 | feature_pos[2], 183 | start_v=feature_pos[0], 184 | feature_num=1, 185 | num_frames=3, 186 | max_h1=100, 187 | max_h2=100, 188 | ) 189 | arr_size = (time_len, 1, 5, 5) 190 | else: 191 | raise ValueError("too many dimensions") 192 | coord_dim_map["time"] = ( 193 | ("time",), 194 | [ 195 | datetime.datetime(2000, 1, 1, 0) + datetime.timedelta(hours=x) 196 | for x in range(time_len) 197 | ], 198 | ) 199 | 200 | test_xr_arr = xr.DataArray(np.empty(arr_size), dims=dim_names, coords=coord_dim_map) 201 | 202 | resulting_df = xr_utils.add_coordinates_to_features(all_feats, test_xr_arr) 203 | for coord in coord_dim_map: 204 | assert coord in resulting_df 205 | if coord != "time": 206 | assert np.all(resulting_df[coord].values == expected_vals[coord]) 207 | -------------------------------------------------------------------------------- /tobac/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .general import ( 2 | add_coordinates, 3 | add_coordinates_3D, 4 | get_spacings, 5 | get_bounding_box, 6 | combine_tobac_feats, 7 | combine_feature_dataframes, 8 | transform_feature_points, 9 | standardize_track_dataset, 10 | spectral_filtering, 11 | ) 12 | 13 | from .mask import ( 14 | mask_cell, 15 | mask_cell_surface, 16 | mask_cube_cell, 17 | mask_cube_untracked, 18 | mask_cube, 19 | column_mask_from2D, 20 | mask_features, 21 | mask_features_surface, 22 | mask_cube_features, 23 | ) 24 | 25 | from .internal import get_label_props_in_dict, get_indices_of_labels_from_reg_prop_dict 26 | 27 | from .bulk_statistics import get_statistics, get_statistics_from_mask 28 | -------------------------------------------------------------------------------- /tobac/utils/datetime.py: -------------------------------------------------------------------------------- 1 | """Functions for converting between and working with different datetime formats""" 2 | 3 | from typing import Union 4 | import datetime 5 | import numpy as np 6 | import pandas as pd 7 | import xarray as xr 8 | import cftime 9 | 10 | 11 | def to_cftime( 12 | dates: Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime], 13 | calendar: str, 14 | align_on: str = "date", 15 | ) -> cftime.datetime: 16 | """Converts a provided datetime-like object to a cftime datetime with the 17 | given calendar 18 | 19 | Parameters 20 | ---------- 21 | dates : Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime] 22 | A datetime-like object or array of datetime-like objects to be converted 23 | calendar : str 24 | The requested cftime calender 25 | align_on : str, optional 26 | The 'align-on' parameter required for 360-day, 365-day and 366-day 27 | cftime dates, by default "date" 28 | 29 | Returns 30 | ------- 31 | cftime.datetime 32 | A cftime object or array of cftime objects in the requested calendar 33 | """ 34 | dates_arr = np.atleast_1d(dates) 35 | if isinstance(dates_arr[0], cftime.datetime): 36 | cftime_dates = ( 37 | xr.DataArray(dates_arr, {"time": dates_arr}) 38 | .convert_calendar(calendar, use_cftime=True, align_on=align_on) 39 | .time.values 40 | ) 41 | else: 42 | cftime_dates = ( 43 | xr.DataArray(dates_arr, {"time": pd.to_datetime(dates_arr)}) 44 | .convert_calendar(calendar, use_cftime=True, align_on=align_on) 45 | .time.values 46 | ) 47 | if not hasattr(dates, "__iter__") or isinstance(dates, str) and len(cftime_dates): 48 | return cftime_dates[0] 49 | return cftime_dates 50 | 51 | 52 | def to_timestamp( 53 | dates: Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime], 54 | ) -> pd.Timestamp: 55 | """Converts a provided datetime-like object to a pandas timestamp 56 | 57 | Parameters 58 | ---------- 59 | dates : Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime] 60 | A datetime-like object or array of datetime-like objects to be converted 61 | 62 | Returns 63 | ------- 64 | pd.Timestamp 65 | A pandas timestamp or array of pandas timestamps 66 | """ 67 | squeeze_output = False 68 | if not hasattr(dates, "__iter__") or isinstance(dates, str): 69 | dates = np.atleast_1d(dates) 70 | squeeze_output = True 71 | 72 | if isinstance(dates[0], cftime.datetime): 73 | pd_dates = xr.CFTimeIndex(dates).to_datetimeindex() 74 | else: 75 | pd_dates = pd.to_datetime(dates) 76 | 77 | if squeeze_output: 78 | return pd_dates[0] 79 | return pd_dates 80 | 81 | 82 | def to_datetime( 83 | dates: Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime], 84 | ) -> datetime.datetime: 85 | """Converts a provided datetime-like object to python datetime objects 86 | 87 | Parameters 88 | ---------- 89 | dates : Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime] 90 | A datetime-like object or array of datetime-like objects to be converted 91 | 92 | Returns 93 | ------- 94 | datetime.datetime 95 | A python datetime or array of python datetimes 96 | """ 97 | return to_timestamp(dates).to_pydatetime() 98 | 99 | 100 | def to_datetime64( 101 | dates: Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime], 102 | ) -> np.datetime64: 103 | """Converts a provided datetime-like object to numpy datetime64 objects 104 | 105 | Parameters 106 | ---------- 107 | dates : Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime] 108 | A datetime-like object or array of datetime-like objects to be converted 109 | 110 | Returns 111 | ------- 112 | np.datetime64 113 | A numpy datetime64 or array of numpy datetime64s 114 | """ 115 | return to_timestamp(dates).to_numpy() 116 | 117 | 118 | def to_datestr( 119 | dates: Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime], 120 | ) -> str: 121 | """Converts a provided datetime-like object to ISO format date strings 122 | 123 | Parameters 124 | ---------- 125 | dates : Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime] 126 | A datetime-like object or array of datetime-like objects to be converted 127 | 128 | Returns 129 | ------- 130 | str 131 | A string or array of strings in ISO date format 132 | """ 133 | dates = to_datetime64(dates) 134 | if hasattr(dates, "__iter__"): 135 | return dates.astype(str) 136 | return str(dates) 137 | 138 | 139 | def match_datetime_format( 140 | dates: Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime], 141 | target: Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime], 142 | ) -> Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime]: 143 | """Converts the provided datetime-like objects to the same datetime format 144 | as the provided target 145 | 146 | Parameters 147 | ---------- 148 | dates : Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime] 149 | A datetime-like object or array of datetime-like objects to be converted 150 | target : Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime] 151 | A datetime-like object or array of datetime-like objects which the dates 152 | input will be converted to match 153 | 154 | Returns 155 | ------- 156 | Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime] 157 | The datetime-like values of the date parameter, converted to a format 158 | which matches that of the target input 159 | 160 | Raises 161 | ------ 162 | ValueError 163 | If the target parameter provided is not a datetime-time object or array 164 | of datetime-like objects 165 | """ 166 | if isinstance(target, str): 167 | return to_datestr(dates) 168 | if isinstance(target, xr.DataArray): 169 | target = target.values 170 | if isinstance(target, pd.Series): 171 | target = target.to_numpy() 172 | if hasattr(target, "__iter__"): 173 | target = target[0] 174 | if isinstance(target, str): 175 | return to_datestr(dates) 176 | if isinstance(target, cftime.datetime): 177 | return to_cftime(dates, target.calendar) 178 | if isinstance(target, pd.Timestamp): 179 | return to_timestamp(dates) 180 | if isinstance(target, np.datetime64): 181 | return to_datetime64(dates) 182 | if isinstance(target, datetime.datetime): 183 | return to_datetime(dates) 184 | raise ValueError("Target is not a valid datetime format") 185 | -------------------------------------------------------------------------------- /tobac/utils/generators.py: -------------------------------------------------------------------------------- 1 | """Custom generators used for iterators required by tobac""" 2 | 3 | import datetime 4 | from typing import Generator, Optional, Tuple, Union 5 | 6 | import cftime 7 | import numpy as np 8 | import pandas as pd 9 | import xarray as xr 10 | 11 | import tobac.utils.datetime as datetime_utils 12 | 13 | 14 | def field_and_features_over_time( 15 | field: xr.DataArray, 16 | features: pd.DataFrame, 17 | time_var_name: str = "time", 18 | time_padding: Optional[datetime.timedelta] = None, 19 | ) -> Generator[ 20 | Tuple[ 21 | int, 22 | Union[datetime.datetime, np.datetime64, cftime.datetime], 23 | xr.DataArray, 24 | pd.DataFrame, 25 | ], 26 | None, 27 | None, 28 | ]: 29 | """Generator that iterates over time through a paired field dataarray and a 30 | features dataframe. time_padding parameter allows a tolerance to be set for 31 | matching time stamps in the datarray and dataframe 32 | 33 | Parameters 34 | ---------- 35 | field : xr.DataArray 36 | The field to iterate over 37 | features : pd.DataFrame 38 | The features dataframe to iterate through 39 | time_var_name : str, optional (default: "time") 40 | The name of the time dimension in field and the time column in features, 41 | by default "time" 42 | time_padding : datetime.timedelta, optional (default: None) 43 | The tolerance for matching features at the same time as each time step 44 | in the field dataframe, by default None 45 | 46 | Yields 47 | ------ 48 | Generator[tuple[int, Union[datetime.datetime, np.datetime64, cftime.datetime], xr.DataArray, pd.DataFrame], None, None] 49 | A generator that returns the iteration index, the time, the slice of 50 | field at that time the slice of features with times within the time 51 | padding tolerance of the time step 52 | """ 53 | if time_var_name not in field.coords: 54 | raise ValueError(f"{time_var_name} not present in input field coordinates") 55 | 56 | if time_var_name not in features.columns: 57 | raise ValueError(f"{time_var_name} not present in input feature columns") 58 | 59 | all_times = pd.Series( 60 | datetime_utils.match_datetime_format( 61 | features[time_var_name], field.coords[time_var_name] 62 | ) 63 | ) 64 | for time_iteration_number, time_iteration_value in enumerate( 65 | field.coords[time_var_name] 66 | ): 67 | field_at_time = field.isel({time_var_name: time_iteration_number}) 68 | if time_padding is not None: 69 | # padded_conv = pd.Timedelta(time_padding).to_timedelta64() 70 | if isinstance(time_iteration_value.values.item(), int): 71 | min_time = ( 72 | time_iteration_value.values 73 | - pd.Timedelta(time_padding).to_timedelta64() 74 | ) 75 | max_time = ( 76 | time_iteration_value.values 77 | + pd.Timedelta(time_padding).to_timedelta64() 78 | ) 79 | else: 80 | min_time = time_iteration_value.values - time_padding 81 | max_time = time_iteration_value.values + time_padding 82 | features_i = features.loc[all_times.between(min_time, max_time)] 83 | else: 84 | features_i = features.loc[all_times == time_iteration_value.values] 85 | 86 | yield time_iteration_number, time_iteration_value, field_at_time, features_i 87 | -------------------------------------------------------------------------------- /tobac/utils/internal/__init__.py: -------------------------------------------------------------------------------- 1 | from .label_props import * 2 | from .coordinates import * 3 | -------------------------------------------------------------------------------- /tobac/utils/internal/label_props.py: -------------------------------------------------------------------------------- 1 | """Internal tobac utilities""" 2 | 3 | from __future__ import annotations 4 | import numpy as np 5 | import skimage.measure 6 | 7 | 8 | def get_label_props_in_dict(labels: np.array) -> dict: 9 | """Function to get the label properties into a dictionary format. 10 | 11 | Parameters 12 | ---------- 13 | labels : 2D array-like 14 | Output of the `skimage.measure.label` function. 15 | 16 | Returns 17 | ------- 18 | region_properties_dict: dict 19 | Output from skimage.measure.regionprops in dictionary 20 | format, where they key is the label number. 21 | """ 22 | 23 | region_properties_raw = skimage.measure.regionprops(labels) 24 | region_properties_dict = { 25 | region_prop.label: region_prop for region_prop in region_properties_raw 26 | } 27 | 28 | return region_properties_dict 29 | 30 | 31 | def get_indices_of_labels_from_reg_prop_dict(region_property_dict: dict) -> tuple[dict]: 32 | """Function to get the x, y, and z indices (as well as point count) of all labeled regions. 33 | Parameters 34 | ---------- 35 | region_property_dict : dict of region_property objects 36 | This dict should come from the get_label_props_in_dict function. 37 | Returns 38 | ------- 39 | curr_loc_indices : dict 40 | The number of points in the label number (key: label number). 41 | z_indices : dict 42 | The z indices in the label number. If a 2D property dict is passed, this value is not returned. 43 | y_indices : dict 44 | The y indices in the label number (key: label number). 45 | x_indices : dict 46 | The x indices in the label number (key: label number). 47 | Raises 48 | ------ 49 | ValueError 50 | A ValueError is raised if there are no regions in the region 51 | property dict. 52 | """ 53 | 54 | if len(region_property_dict) == 0: 55 | raise ValueError("No regions!") 56 | 57 | z_indices = dict() 58 | y_indices = dict() 59 | x_indices = dict() 60 | curr_loc_indices = dict() 61 | is_3D = False 62 | 63 | # loop through all skimage identified regions 64 | for region_prop_key in region_property_dict: 65 | region_prop = region_property_dict[region_prop_key] 66 | index = region_prop.label 67 | if len(region_prop.coords[0]) >= 3: 68 | is_3D = True 69 | curr_z_ixs, curr_y_ixs, curr_x_ixs = np.transpose(region_prop.coords) 70 | z_indices[index] = curr_z_ixs 71 | else: 72 | curr_y_ixs, curr_x_ixs = np.transpose(region_prop.coords) 73 | z_indices[index] = -1 74 | 75 | y_indices[index] = curr_y_ixs 76 | x_indices[index] = curr_x_ixs 77 | curr_loc_indices[index] = len(curr_y_ixs) 78 | # print("indices found") 79 | if is_3D: 80 | return [curr_loc_indices, z_indices, y_indices, x_indices] 81 | else: 82 | return [curr_loc_indices, y_indices, x_indices] 83 | -------------------------------------------------------------------------------- /tobac/wrapper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import logging 3 | import warnings 4 | 5 | 6 | def tracking_wrapper( 7 | field_in_features, 8 | field_in_segmentation, 9 | time_spacing=None, 10 | grid_spacing=None, 11 | parameters_features=None, 12 | parameters_tracking=None, 13 | parameters_segmentation=None, 14 | ): 15 | from .feature_detection import feature_detection_multithreshold 16 | from .tracking import linking_trackpy 17 | from tobac.segmentation.watershed_segmentation import ( 18 | segmentation_3D, 19 | segmentation_2D, 20 | ) 21 | from .utils import get_spacings 22 | 23 | warnings.warn( 24 | "tracking_wrapper is depreciated and will be removed in v2.0.", 25 | DeprecationWarning, 26 | ) 27 | 28 | logger = logging.getLogger("trackpy") 29 | logger.propagate = False 30 | logger.setLevel(logging.WARNING) 31 | 32 | ### Prepare Tracking 33 | 34 | dxy, dt = get_spacings( 35 | field_in_features, grid_spacing=grid_spacing, time_spacing=time_spacing 36 | ) 37 | 38 | ### Start Tracking 39 | # Feature detection: 40 | 41 | method_detection = parameters_features.pop("method_detection", None) 42 | if method_detection in ["threshold", "threshold_multi"]: 43 | features = feature_detection_multithreshold( 44 | field_in_features, **parameters_features 45 | ) 46 | else: 47 | raise ValueError( 48 | "method_detection unknown, has to be either threshold_multi or threshold" 49 | ) 50 | 51 | method_segmentation = parameters_features.pop("method_segmentation", None) 52 | 53 | if method_segmentation == "watershedding": 54 | if field_in_segmentation.ndim == 4: 55 | segmentation_mask, features_segmentation = segmentation_3D( 56 | features, field_in_segmentation, **parameters_segmentation 57 | ) 58 | if field_in_segmentation.ndim == 3: 59 | segmentation_mask, features_segmentation = segmentation_2D( 60 | features, field_in_segmentation, **parameters_segmentation 61 | ) 62 | 63 | # Link the features in the individual frames to trajectories: 64 | method_linking = parameters_features.pop("method_linking", None) 65 | 66 | if method_linking == "trackpy": 67 | trajectories = linking_trackpy(features, **parameters_tracking) 68 | logging.debug("Finished tracking") 69 | else: 70 | raise ValueError("method_linking unknown, has to be trackpy") 71 | 72 | return features, segmentation_mask, trajectories 73 | 74 | 75 | def maketrack( 76 | field_in, 77 | grid_spacing=None, 78 | time_spacing=None, 79 | target="maximum", 80 | v_max=None, 81 | d_max=None, 82 | memory=0, 83 | stubs=5, 84 | order=1, 85 | extrapolate=0, 86 | method_detection="threshold", 87 | position_threshold="center", 88 | sigma_threshold=0.5, 89 | n_erosion_threshold=0, 90 | threshold=1, 91 | min_num=0, 92 | min_distance=0, 93 | method_linking="random", 94 | cell_number_start=1, 95 | subnetwork_size=None, 96 | adaptive_stop=None, 97 | adaptive_step=None, 98 | return_intermediate=False, 99 | ): 100 | from .feature_detection import feature_detection_multithreshold 101 | from .tracking import linking_trackpy 102 | 103 | """ 104 | Function identifiying features andlinking them into trajectories 105 | 106 | Parameters: 107 | field_in: iris.cube.Cube 108 | 2D input field tracking is performed on 109 | grid_spacing: float 110 | grid spacing in input data (m) 111 | time_spacing: float 112 | time resolution of input data (s) 113 | target string 114 | Switch to determine if algorithm looks for maxima or minima in input field (maximum: look for maxima (default), minimum: look for minima) 115 | v_max: float 116 | Assumed maximum speed of tracked objects (m/s) 117 | memory: int 118 | Number of timesteps for which objects can be missed by the algorithm to still give a constistent track 119 | stubs: float 120 | Minumum number of timesteps for which objects have to be detected to not be filtered out as spurious 121 | min_num: int 122 | Minumum number of cells above threshold in the feature to be tracked 123 | order: int 124 | order if interpolation spline to fill gaps in tracking(from allowing memory to be larger than 0) 125 | extrapolate int 126 | number of points to extrapolate individual tracks by 127 | method_detection: str('threshold' or 'threshold_multi') 128 | flag choosing method used for feature detection 129 | position_threshold: str('extreme', 'weighted_diff', 'weighted_abs' or 'center') 130 | flag choosing method used for the position of the tracked feature 131 | sigma_threshold: float 132 | standard deviation for intial filtering step 133 | 134 | n_erosion_threshold: int 135 | number of pixel by which to erode the identified features 136 | 137 | method_linking: str('predict' or 'random') 138 | flag choosing method used for trajectory linking 139 | 140 | return_intermediate: boolean 141 | flag to tetermine if only final tracjectories are output (False, default) or if detected features, filtered features and unfilled tracks are returned additionally (True) 142 | 143 | Output: 144 | trajectories_final: pandas.DataFrame 145 | Tracked updrafts, one row per timestep and updraft, includes dimensions 'time','latitude','longitude','projection_x_variable', 'projection_y_variable' based on w cube. 146 | 'hdim_1' and 'hdim_2' are used for segementation step. 147 | 148 | Optional output: 149 | features_filtered: pandas.DataFrame 150 | 151 | features_unfiltered: pandas.DataFrame 152 | 153 | trajectories_filtered_unfilled: pandas.DataFrame 154 | 155 | """ 156 | from copy import deepcopy 157 | 158 | warnings.warn( 159 | "maketrack is depreciated and will be removed in v2.0.", 160 | DeprecationWarning, 161 | ) 162 | 163 | logger = logging.getLogger("trackpy") 164 | logger.propagate = False 165 | logger.setLevel(logging.WARNING) 166 | 167 | ### Prepare Tracking 168 | 169 | # set horizontal grid spacing of input data 170 | # If cartesian x and y corrdinates are present, use these to determine dxy (vertical grid spacing used to transfer pixel distances to real distances): 171 | coord_names = [coord.name() for coord in field_in.coords()] 172 | 173 | if ( 174 | "projection_x_coordinate" in coord_names 175 | and "projection_y_coordinate" in coord_names 176 | ) and (grid_spacing is None): 177 | x_coord = deepcopy(field_in.coord("projection_x_coordinate")) 178 | x_coord.convert_units("metre") 179 | dx = np.diff(field_in.coord("projection_y_coordinate")[0:2].points)[0] 180 | y_coord = deepcopy(field_in.coord("projection_y_coordinate")) 181 | y_coord.convert_units("metre") 182 | dy = np.diff(field_in.coord("projection_y_coordinate")[0:2].points)[0] 183 | dxy = 0.5 * (dx + dy) 184 | elif grid_spacing is not None: 185 | dxy = grid_spacing 186 | else: 187 | ValueError( 188 | "no information about grid spacing, need either input cube with projection_x_coord and projection_y_coord or keyword argument grid_spacing" 189 | ) 190 | 191 | # set horizontal grid spacing of input data 192 | if time_spacing is None: 193 | # get time resolution of input data from first to steps of input cube: 194 | time_coord = field_in.coord("time") 195 | dt = ( 196 | time_coord.units.num2date(time_coord.points[1]) 197 | - time_coord.units.num2date(time_coord.points[0]) 198 | ).seconds 199 | elif time_spacing is not None: 200 | # use value of time_spacing for dt: 201 | dt = time_spacing 202 | 203 | ### Start Tracking 204 | # Feature detection: 205 | if method_detection in ["threshold", "threshold_multi"]: 206 | features = feature_detection_multithreshold( 207 | field_in=field_in, 208 | threshold=threshold, 209 | dxy=dxy, 210 | target=target, 211 | position_threshold=position_threshold, 212 | sigma_threshold=sigma_threshold, 213 | n_erosion_threshold=n_erosion_threshold, 214 | ) 215 | features_filtered = features.drop(features[features["num"] < min_num].index) 216 | 217 | else: 218 | raise ValueError( 219 | "method_detection unknown, has to be either threshold_multi or threshold" 220 | ) 221 | 222 | # Link the features in the individual frames to trajectories: 223 | 224 | trajectories = linking_trackpy( 225 | features=features_filtered, 226 | field_in=field_in, 227 | dxy=dxy, 228 | dt=dt, 229 | memory=memory, 230 | subnetwork_size=subnetwork_size, 231 | adaptive_stop=adaptive_stop, 232 | adaptive_step=adaptive_step, 233 | v_max=v_max, 234 | d_max=d_max, 235 | stubs=stubs, 236 | order=order, 237 | extrapolate=extrapolate, 238 | method_linking=method_linking, 239 | cell_number_start=1, 240 | ) 241 | 242 | logging.debug("Finished tracking") 243 | 244 | return trajectories, features 245 | --------------------------------------------------------------------------------