├── .environment.yml ├── .gitignore ├── .readthedocs.yml ├── .travis.yml ├── CHANGELOG.md ├── CONTRIBUTING.rst ├── Makefile ├── README.md ├── build.sh ├── codecov.yml ├── docs ├── Makefile ├── make.bat └── source │ ├── CHANGELOG.rst │ ├── CONTRIBUTING.rst │ ├── Makefile.txt │ ├── README.rst │ ├── _static │ └── style.css │ ├── _templates │ ├── class.rst │ ├── function.rst │ └── module.rst │ ├── api.rst │ ├── conf.py │ └── index.rst ├── examples ├── README.txt ├── ai │ ├── README.txt │ ├── SFFS.py │ ├── learnWithCustomRaster.py │ ├── learnWithRFandCompareCV.py │ └── learnWithRFandRS50.py ├── charts │ ├── README.txt │ ├── plotConfusion.py │ ├── plotConfusionAcc.py │ └── plotConfusionF1.py ├── cross_validation │ ├── LeaveOneOutPerClass.py │ ├── LeaveOneSubGroupOut.py │ ├── LeaveOneSubGroupOutAndSaveVector.py │ ├── LeavePSubGroupOut.py │ ├── README.txt │ ├── RandomSampling50.py │ ├── SpatialLeaveAsideOut.py │ ├── SpatialLeaveOnePixelOut.py │ ├── SpatialLeaveOneSubGroupOut.py │ ├── __drawCVmethods.py │ └── train_test_split.py ├── processing │ ├── README.txt │ ├── copyRasterInVectorFields.py │ ├── extractRasterValues.py │ ├── modalClass.py │ ├── rasterMask.py │ ├── rasterMath.py │ ├── rasterMathCustomBlock.py │ ├── rasterMathCustomBlockAndMask.py │ ├── rasterMath_testBlockSize_3d_andNBands.py │ ├── rasterMath_twoRasters.py │ └── readVectorFields.py └── stats │ ├── MoransI.py │ ├── README.txt │ ├── qualityIndexFromConfusionMatrix.py │ └── zonal_stats.py ├── getVersion.py ├── license.txt ├── metadata ├── RasterMath_schema.png ├── __docs.py ├── __init__.py ├── museoToolBox_logo.svg ├── museoToolBox_logo_1024.png ├── museoToolBox_logo_128.png ├── schema.drawio └── schema.png ├── museotoolbox ├── __init__.py ├── ai │ └── __init__.py ├── charts │ └── __init__.py ├── cross_validation │ ├── __init__.py │ └── _sample_selection.py ├── datasets │ ├── __init__.py │ └── _historicalmap │ │ ├── map_compress.tif │ │ ├── map_lowres.tif │ │ ├── train.gpkg │ │ └── train_centroid.gpkg ├── internal_tools │ └── __init__.py ├── processing │ └── __init__.py └── stats │ └── __init__.py ├── paper.bib ├── paper.md ├── requirements-dev.txt ├── requirements.txt ├── setup.py └── test ├── __init__.py ├── test_ai.py ├── test_charts.py ├── test_cross_validation.py ├── test_processing.py └── test_stats.py /.environment.yml: -------------------------------------------------------------------------------- 1 | # Conda environment for developpers 2 | 3 | name: museotoolbox-dev 4 | 5 | dependencies: 6 | - python=3.7 7 | - pytest 8 | - gdal 9 | - pip 10 | - pip: 11 | - -r file:requirements-dev.txt 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # ignore datasets to avoid modification 2 | museotoolbox/datasets/_historicalmap/* 3 | 4 | # Files to ignore 5 | .coverage.* 6 | .coverage 7 | *.pyc 8 | *.py~ 9 | *_filtered* 10 | *.swp 11 | __pycache__ 12 | *.spyproject/* 13 | 14 | # nb 15 | *.pickle 16 | *.py.md5 17 | 18 | # MTB 19 | MuseoToolBox.egg-info/ 20 | cv_*.sqlite 21 | 22 | # sphinx-gallery temp files 23 | 24 | # Sphinx documentation 25 | docs/source/_build/ 26 | docs/build/ 27 | docs/source/modules 28 | docs/source/gen_modules 29 | docs/source/_autosummary 30 | docs/source/_static 31 | docs/source/auto_examples/**.pickle 32 | docs/source/auto_examples/ 33 | docs/source/auto_examples/**.md5 34 | docs/modules/ 35 | 36 | # Distribution / packaging 37 | .Python 38 | env/ 39 | build/ 40 | develop-eggs/ 41 | dist/ 42 | downloads/ 43 | eggs/ 44 | .eggs/ 45 | lib/ 46 | lib64/ 47 | parts/ 48 | sdist/ 49 | var/ 50 | *.egg-info/ 51 | .installed.cfg 52 | *.egg 53 | 54 | # PyInstaller 55 | # Usually these files are written by a python script from a template 56 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 57 | *.manifest 58 | *.spec 59 | 60 | # Installer logs 61 | pip-log.txt 62 | pip-delete-this-directory.txt 63 | 64 | # IPython Notebook 65 | .ipynb_checkpoints 66 | 67 | # pyenv 68 | .python-version 69 | 70 | # celery beat schedule file 71 | celerybeat-schedule 72 | 73 | # dotenv 74 | .env 75 | 76 | # virtualenv 77 | venv/ 78 | ENV/ 79 | 80 | # Spyder project settings 81 | .spyderproject 82 | 83 | # Rope project settings 84 | .ropeproject 85 | 86 | # Mac stuff 87 | .DS_Store 88 | 89 | # coverage output folder 90 | cov_html/ 91 | 92 | *.gpkg-shm 93 | 94 | *.gpkg-wal 95 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | version: 2 6 | 7 | build: 8 | image: latest 9 | 10 | python: 11 | version: 3.7 12 | system_packages: true 13 | install: 14 | - requirements: requirements-dev.txt 15 | - method: setuptools 16 | path: . 17 | 18 | conda: 19 | environment: .environment.yml 20 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: bionic 2 | language: python 3 | os : linux 4 | python: 5 | - "3.6" 6 | - "3.7" 7 | - "3.8" 8 | 9 | addons: 10 | apt: 11 | packages: 12 | - libgdal-dev 13 | 14 | before_install: 15 | - sudo add-apt-repository --yes ppa:ubuntugis/ppa 16 | - sudo apt-get --quiet update 17 | - sudo apt-get install --yes libgdal-dev gdal-bin 18 | install: 19 | - pip install gdal==`gdal-config --version` 20 | - pip install -r requirements.txt 21 | - pip install . 22 | - pip install codecov 23 | - pip install pytest-cov pytest 24 | 25 | script: 26 | - python setup.py develop 27 | - pytest -v --cov=museotoolbox 28 | 29 | deploy: 30 | provider: pypi 31 | user: "karasiak" 32 | password: 33 | secure: 7NeKw+T+tO7LWvwjgj1agHIo8zQlA33bkg9x6pQ4zXd24ZJ4XDhqnGSxG9NKr9pP9l9PwDrPZIIW50JeBWula+JXQxtRj80LNJB8ySJVn3BEdLojxUMthVvOtdUqzqFME5iFpP6SEdUrODMHXJa18tCP5QN2XHUWEixmmqrbFg8JYgg6nswVmmCHpXor5h9LW1V0bJ1uOThmpjhXUhKx7xM6lcXJssC91Ghmkh1SApNuK3Ah8ir92HK53VSdQScK8QAHIpvWGbTLhbt7C4IuwDFERgDIbOec6L9UxOePMXmIF/5wu+mlQeVzFV5DiFBJ0U+fDEqg1PTKifM9KQmOtBX8iFVyiZmwqM6wRJjt83C5j4VKDj8vs8Y9dGkmOobRNlE3HdEekB69mj5177oCpOSZtgntkbR/9wfEK7j4qnj/mALaKx9mYKdI9nRepEnnocp6R2aGvZD0BxSwRWxjgubfolsG3mBHBRhjog5pVOcC4de3aPwZKxWTls2AIMP47lxocKnpHWTzKAtuPEo4w/Y2IaFXbk0EQleC6BLHAWi7DbAYct7/6Ju7VOdm80ko0To4PQ+9fzpSZndcAxf1cEys0OaVcQPJ9XjxfvzHUz1OXqQKzZe5tfAxFS6q6rbMV4J9zNRP+xX8rOhmB4HajBmBfIEcz/Avt4R+pl5lIeo= 34 | on: 35 | tags: true 36 | branch: master 37 | skip_existing: true 38 | 39 | after_success: 40 | - codecov 41 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## [0.13.6] - 2020-07-19 9 | 10 | ### Added 11 | 12 | - RasterMath get_image_as_array() now supports mask. 13 | 14 | ### Fixed 15 | 16 | - Update groups management for sklearn>=0.25 17 | - Fixed bug with raster/vector datasets (mtb.datasets.load_historical_data()) 18 | - Fixed bug with get_image_as_array() from RasterMath (completely rewrite this part) 19 | 20 | ## [0.13.5] - 2020-06-24 21 | 22 | ### Fixed 23 | 24 | - Requirements is not directly written inside setup.py due to bugs. 25 | 26 | ## [0.13.4] - 2020-06-24 27 | 28 | ### Fixed 29 | 30 | - Fix bug in setup.py using requirements.txt instead of ./requirements.txt 31 | 32 | ## [0.13.3] - 2020-06-23 33 | 34 | ### Fixed 35 | 36 | - Adding psutil to depency 37 | 38 | ## [0.13.2] - 2020-06-18 39 | 40 | ### Added 41 | 42 | - get_image_as_array function for RasterMath 43 | 44 | ### Fixed 45 | 46 | - train_test_split supports now groups=None 47 | 48 | ## [0.13.1] - 2020-06-11 49 | 50 | ### Added 51 | 52 | - Support list for cross-validation in order to give an unready unfolded cv. 53 | 54 | New features provided by @marclang for the charts module : 55 | 56 | - Allows to display both F1 and accuracy or mean metrics 57 | - Allows to display accuracy after have been displaying mean (and vice versa) 58 | - Allows to display float matrix 59 | 60 | ### Changed 61 | 62 | - Fix path separator to access tutorial dataset 63 | 64 | ## [0.13.0] - 2020-04-21 65 | 66 | ### Changed 67 | 68 | - Final version for JOSS (paper.md and paper.bib updated thanks to @kbarnhart) 69 | 70 | ## [0.12.1-rc.1] - 2020-04-18 71 | 72 | ### Added 73 | 74 | - RasterMath use available memory to speed up process and manage now several cores (n_jobs) 75 | - train_test_split in cross_validation module 76 | 77 | ### Changed 78 | 79 | - Enhance mask management for RasterMath 80 | - Move FlushCache to optimize RasterMath 81 | - RasterMath get_random_block returns only block which are not totally unmasked 82 | - charts.PlotConfusionMatrix has a default argument (zero_is_min=True) 83 | 84 | ## [0.12.1-beta.2] - 2020-02-10 85 | 86 | ### Fixed 87 | 88 | - Fix bug when in RasterMath when input is only one band 89 | - Fix bug in RasterMath with mask and list 90 | 91 | ### Added 92 | 93 | - n_jobs for RasterMath (thanks to Helene @HTDBD and Arthur @ArthurDfs, two great students) 94 | - function write_block and generally a most intuitive way to use RasterMath (with the help of @HTDBD and @ArthurDfs) 95 | 96 | ## [0.12.1-beta.1] - 2020-01-16 97 | 98 | ### Added 99 | - new branch spatial added 100 | 101 | ### Added 102 | 103 | - Added this line 104 | 105 | ### Changed 106 | 107 | - SequentialFeatureSelection parameters order Changed. *scoring* is now before *standardize*. 108 | - Update doc for load_historical_data() 109 | 110 | ### Fixed 111 | 112 | - Fix bug in get_block() and get_random_block() which returned the same block each time due to new method. 113 | - Fix bug with nodata in RasterMath when output is of float type 114 | 115 | ## [0.12] - 2019-12-13 116 | 117 | ### Changed 118 | 119 | - RasterMath made a lot of improvements using block reading and writing. For example, the default block size is now 256x256 (you can keep the default block size by choosing block_size=False), and Museo ToolBox automatic detect if the geotiff will be tiled or not (it depends on the block size). 120 | - Some folders have Changed name : 121 | - raster_tools and vector_tools to processing 122 | - learn_tools to ai 123 | - some functions have Changed name : 124 | - getSamplesFromROI to extract_values 125 | - historicalMap to load_historical_data 126 | - getDistanceMatrix to get_distance_matrix 127 | - classes now always begin with a capital case : 128 | - learnAndPredict to SuperLearner 129 | - rasterMath to RasterMath 130 | - sequentialFeatureSelection to SequentialFeatureSelection 131 | 132 | ### Fixed 133 | 134 | - bug #7 : getSamplesFromROI (nowd extract_ROI) now extracts ROI values using by default memory. If it fails, it will create a temporary raster on disk then delete it when finished. 135 | 136 | ### Removed 137 | 138 | - Remove command lines (cli) 139 | 140 | ## [0.12rc5] - 2019-11-11 141 | 142 | ### Changed 143 | 144 | - getSamplesFromROI return list of available fields if wrong field given. 145 | - rasterMath convert np.nan value to nodata value (if numpy >= 1.17) 146 | 147 | ## [0.12rc4] - 2019-11-01 148 | 149 | ### Changed 150 | 151 | - Minor fix when using learnAndPredict with an outside customized function 152 | - Better management fo cross-validation in learnAndPredict 153 | - Fix minor bug using False or None value with cv in learnAndPredict 154 | 155 | ### Added 156 | 157 | - Add an option to use SFS without writing each best model on the disk. 158 | 159 | ## [0.12rc3] - 2019-10-29 160 | 161 | ### Added 162 | 163 | - Move some functions from vector_tools to raster_tools, functions are anyway still available from vector_tools 164 | 165 | ### Changed 166 | 167 | - learnAndPredict manages int value for cross-validation by using RandomStratifiedKFold 168 | - Enhance blocksize management for rasterMath 169 | - Move command line code in _cli folder 170 | 171 | ## [0.12rc2] - 2019-10-14 172 | 173 | ### Changed 174 | 175 | - Improvements of rasterMath 176 | - customBlockSize defines now the same block size for window reading and for the output 177 | - add seed parameter (to set a random generator) in getRandomBlock() 178 | - add getRasterParameters() and customRasterParameters() function. 179 | 180 | ## [0.12rc1] - 2019-10-12 181 | 182 | ### Changed 183 | 184 | - update rasterMath to generate by default a 256*256 raster block size. 185 | - update rasterMath to prevent bug if user has osgeo/gdal version is lower than 2.1. 186 | - prevent bug when in rasterMath if processor has only 1 core. 187 | 188 | ### Fixed 189 | - minor fixes 190 | -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | ===================================== 2 | How to contribute to this package 3 | ===================================== 4 | 5 | This document describes how to edit the package, run the tests, build the docs, put tagged versions on PyPI_, etc. 6 | 7 | Editing the project 8 | --------------------- 9 | 10 | Developpment environnement 11 | +++++++++++++++++++++++++++ 12 | 13 | We're using conda environment to install all dependencies needed for Museo ToolBox developpement. 14 | 15 | - ``conda env create -f .environment.yml`` 16 | 17 | This will create an environnement called ``museotoolbox-dev``, to activate it, just type : 18 | 19 | - ``conda active museotoolbox-dev`` 20 | 21 | 22 | Package structure 23 | ++++++++++++++++++ 24 | - Package code is in `museotoolbox `_ folder. 25 | - Docs can be generated by typing ``Make doc`` at the root of folder. 26 | - Unit tests are in `test `_ folder. 27 | 28 | Modify the code via pull requests 29 | +++++++++++++++++++++++++++++++++++ 30 | To make changes to the code, you should make a branch or fork, make your changes, and then submit a pull request. 31 | If you aren't sure about pull requests: 32 | 33 | - A general description of pull requests: https://help.github.com/en/articles/about-pull-requests 34 | 35 | - How to create a pull request: https://help.github.com/en/articles/creating-a-pull-request 36 | 37 | - How to structure your pull requests (one conceptually distinct change per pull request): https://medium.com/@fagnerbrack/one-pull-request-one-concern-e84a27dfe9f1 38 | 39 | Tests and documentation 40 | +++++++++++++++++++++++ 41 | You should document your code clearly with `numpy style documentation`_. 42 | You should add tests. 43 | For simple things, these can be `doctests `_ in the code. 44 | For more elaborate functionality, put unit tests in test. 45 | 46 | Versions and CHANGELOG 47 | ++++++++++++++++++++++ 48 | The version is `single sourced `_ in `__init__.py`_. 49 | When modifying a tagged version (e.g., ``0.1.0``), indicate you are working on a development version by adding a ``dev`` (e.g., ``0.1.dev1``). 50 | See `here `_ for more information on version numbers. 51 | 52 | Conceptual descriptions of changes should also be tracked in the CHANGELOG_. 53 | 54 | Adding dependencies 55 | +++++++++++++++++++++ 56 | When you add code that uses a new package that is not in the standard python library, you should add it to the dependencies specified under the ``install_requires`` option in `setup.py `_. 57 | `See here `_ for information on how to do this, and how to specify minimal required versions. 58 | As described in the above link, you should **not** pin exact versions in ``install_requires`` in `setup.py `_ unless absolutely necessary. 59 | 60 | 61 | Testing 62 | --------- 63 | 64 | Adding tests 65 | ++++++++++++++ 66 | As you add new codes, you should create tests to make sure it is working correctly. 67 | These can include: 68 | 69 | - doctests in the code 70 | 71 | - unit tests in the `./test/ `_ subdirectory 72 | 73 | Running the tests locally 74 | ++++++++++++++++++++++++++ 75 | After you make changes, you should run two sets of tests. 76 | To run the tests, go to the top-level package directory. 77 | 78 | Then run the tests with pytest_ under the ``museotoolbox-dev`` conda environnement by running : 79 | 80 | - ``make pytest`` 81 | 82 | 83 | Building the documentation 84 | +++++++++++++++++++++++++++ 85 | The documentation will be built for each new commit in the ``master`` branch. 86 | However, you can build your own documentation in order by typing under the ``museotoolbox-dev`` conda environnement : 87 | 88 | - ``make ddoc`` 89 | 90 | 91 | Automated testing on Travis 92 | +++++++++++++++++++++++++++ 93 | The aforementioned pytest_ tests will be run automatically by the Travis_ continuous integration system as specified in the `.travis.yml <.travis.yml>`_ file. 94 | Note that running the Travis_ tests requires you to register the project with Travis_. 95 | 96 | If the tests are passing, you will see this on the Travis_ badge on GitHub repo main page. 97 | 98 | 99 | Tagging versions and putting on PyPI 100 | ------------------------------------- 101 | When you have a new stable release, you will want to tag it and put it on PyPI_ where it can be installed with pip_. 102 | First, make sure the version number is up-to-date in `__init__.py`_ and the CHANGELOG_. 103 | Then commit the code to GitHub if you haven't already done so. 104 | Next tag the version, as in:: 105 | 106 | git tag -a 0.1.0 -m 'version 0.1.0' 107 | 108 | and then push the tag to GitHub with:: 109 | 110 | git push --tags 111 | 112 | Finally, with Travis_, specify your pypi account in `.travis.yml <.travis.yml>`_ file to publish a new version on pypi when you publish a stable release on github. 113 | 114 | Note that this requires you to have registered the package on PyPI_ if this is the first version of the package there. 115 | 116 | .. _pytest: https://docs.pytest.org 117 | .. _Travis: https://docs.travis-ci.com 118 | .. _PyPI: https://pypi.org/ 119 | .. _pip: https://pip.pypa.io 120 | .. _sphinx: https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html 121 | .. _test: test 122 | .. _docs: docs 123 | .. _notebooks: notebooks 124 | .. _`Jupyter notebooks`: https://jupyter.org/ 125 | .. _`__init__.py`: museotoolbox/__init__.py 126 | .. _CHANGELOG: CHANGELOG.md 127 | .. _`numpy style documentation`: https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_numpy.html 128 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | 3 | PYTHON=python3 4 | branch := $(shell git symbolic-ref --short -q HEAD) 5 | 6 | help : 7 | @echo "The following make targets are available:" 8 | @echo " help - print this message" 9 | @echo " build - build python package" 10 | @echo " install - install python package (local user)" 11 | @echo " sinstall - install python package (system with sudo)" 12 | @echo " remove - remove the package (local user)" 13 | @echo " sremove - remove the package (system with sudo)" 14 | @echo " clean - remove any temporary files" 15 | @echo " notebook - launch ipython3 notebook" 16 | build : 17 | $(PYTHON) setup.py sdist bdist_wheel 18 | buildext : 19 | $(PYTHON) setup.py build_ext --inplace 20 | 21 | install : 22 | $(PYTHON) setup.py install --user 23 | 24 | sinstall : 25 | sudo $(PYTHON) setup.py install 26 | 27 | remove : 28 | $(PYTHON) setup.py install --user --record files.txt 29 | tr '\n' '\0' < files.txt | xargs -0 rm -f -- 30 | rm files.txt 31 | 32 | sremove : 33 | $(PYTHON) setup.py install --record files.txt 34 | tr '\n' '\0' < files.txt | sudo xargs -0 rm -f -- 35 | rm files.txt 36 | 37 | clean : FORCE 38 | $(PYTHON) setup.py clean 39 | 40 | uploadpypi : 41 | #python setup.py register 42 | $(PYTHON) setup.py sdist 43 | twine upload dist/* 44 | 45 | doc : 46 | m2r README.md CHANGELOG.md 47 | mv README.rst CHANGELOG.rst docs/source/ 48 | cd docs/ && make html 49 | 50 | doc_full : 51 | m2r README.md CHANGELOG.md 52 | mv README.rst CHANGELOG.rst docs/source/ 53 | rm -rf docs/source/auto_examples/ 54 | rm -rf docs/source/modules/ 55 | rm -rf docs/build/ 56 | cd docs/ && make html 57 | 58 | autopep8 : 59 | autopep8 -ir museotoolbox --jobs -1 60 | 61 | aautopep8 : 62 | autopep8 -air -a -a -a -a museotoolbox --jobs -1 63 | 64 | pytest : 65 | pytest-3 -v --cov=museotoolbox 66 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Museo ToolBox logo](https://github.com/nkarasiak/MuseoToolBox/raw/master/metadata/museoToolBox_logo_128.png) 2 | 3 | [![Build status](https://api.travis-ci.org/nkarasiak/MuseoToolBox.svg?branch=master)](https://travis-ci.org/nkarasiak/MuseoToolBox) 4 | [![Documentation status](https://readthedocs.org/projects/museotoolbox/badge/?version=latest)](https://museotoolbox.readthedocs.io/en/latest/?badge=latest) 5 | [![codecov](https://codecov.io/gh/nkarasiak/MuseoToolBox/branch/master/graph/badge.svg)](https://codecov.io/gh/nkarasiak/MuseoToolBox) 6 | [![PyPI version](https://badge.fury.io/py/museotoolbox.svg)](https://badge.fury.io/py/museotoolbox) 7 | [![Conda version](https://camo.githubusercontent.com/074cca1cb04798ef7b05419795c800130e47273b/68747470733a2f2f696d672e736869656c64732e696f2f636f6e64612f766e2f636f6e64612d666f7267652f6d7573656f746f6f6c626f782e737667)](https://anaconda.org/conda-forge/museotoolbox) 8 | [![Downloads](https://pepy.tech/badge/museotoolbox)](https://pepy.tech/project/museotoolbox) 9 | [![status](https://joss.theoj.org/papers/1f4762d9910093a08034e8f4de441930/status.svg)](https://joss.theoj.org/papers/1f4762d9910093a08034e8f4de441930) 10 | 11 | **Museo ToolBox** is a python library to simplify the use of raster/vector, especially for machine learning and remote sensing. It is now easy to extract raster values from vector polygons and to do some spatial/unspatial cross-validation for scikit-learn from raster. 12 | 13 | One of the most meaningful contribution is, in my humble opinion, the [RasterMath](https://museotoolbox.readthedocs.io/en/latest/modules/processing/museotoolbox.processing.RasterMath.html) class and the [spatial cross-validation](https://museotoolbox.readthedocs.io/en/latest/modules/museotoolbox.cross_validation.html#module-museotoolbox.cross_validation). 14 | 15 | ## What's the point ? 16 | 17 | Today, the main usages of Museo ToolBox are : 18 | - [museotoolbox.cross_validation](https://museotoolbox.readthedocs.io/en/latest/modules/museotoolbox.cross_validation.html#module-museotoolbox.cross_validation) 19 | - Create validation/training sets from vector, and cross-validation compatible with Scikit-Learn GridSearchCV. The aim is here to **promote the spatial cross-validation** in order to better estimate a model (with a lower spatial auto-correlation overestimation). 20 | - [museotoolbox.processing](https://museotoolbox.readthedocs.io/en/latest/modules/museotoolbox.processing.html) 21 | - [RasterMath](https://museotoolbox.readthedocs.io/en/latest/modules/processing/museotoolbox.processing.RasterMath.html), allows you to apply any of your array-compatible function on your raster and save it. Just load RasterMath, then it will return you the value for each pixel (in all bands) and now you can do whatever you want : predicting a model, smooth signal (whittaker, double logistic...), compute modal value... RasterMath reads and writes a raster block per block to avoid loading the full image in memory. It is compatible with every python function (including numpy) as the first and only argument RasterMath needs on your function is an array. 22 | - Extract bands values from vector ROI (polygons/points) (function : [extract_ROI](https://museotoolbox.readthedocs.io/en/latest/modules/processing/museotoolbox.processing.extract_ROI.html#museotoolbox.processing.extract_ROI)) 23 | - AI based on Scikit-Learn. [SuperLearner](https://museotoolbox.readthedocs.io/en/latest/modules/ai/museotoolbox.ai.SuperLearner.html#museotoolbox.ai.SuperLearner) simplifies the use of cross-validation by extracting each accuracy (kappa,F1,OA, and above all confusion matrix) from each fold. It also eases the way to predict a raster (just give the raster name and the model). 24 | 25 | ## That seems cool, but is there some help to use this ? 26 | 27 | I imagined Museo ToolBox as a tool to simplify raster processing and to promote spatial cross-validation, so of course there is some help : [a complete documentation with a lot of examples is available on readthedocs](https://museotoolbox.readthedocs.org/). 28 | 29 | ## How do I install Museo ToolBox ? 30 | 31 | We recommend you to install Museo ToolBox via conda as it includes gdal dependency : 32 | 33 | ```shell 34 | conda install -c conda-forge museotoolbox 35 | ``` 36 | 37 | However, if you prefer to install this library via pip, you need to install first gdal, then : 38 | 39 | ```shell 40 | python3 -m pip install museotoolbox --user 41 | ``` 42 | 43 | For early-adopters, you can install the latest development version directly from git : 44 | 45 | ```shell 46 | python3 -m pip install https://github.com/nkarasiak/museotoolbox/archive/develop.zip --user -U 47 | ``` 48 | 49 | Feel free to remove the `--user` if you like to install the library for every user on the machine or if some dependencies need root access. `-U` is for update if a newer version exists. 50 | 51 | ### Using and citing the toolbox 52 | 53 | If you use Museo ToolBox in your research and find it useful, please cite this library using the following bibtex reference: 54 | 55 | ```bib 56 | @article{Karasiak2020, 57 | doi = {10.21105/joss.01978}, 58 | url = {https://doi.org/10.21105/joss.01978}, 59 | year = {2020}, 60 | publisher = {The Open Journal}, 61 | volume = {5}, 62 | number = {48}, 63 | pages = {1978}, 64 | author = {Nicolas Karasiak}, 65 | title = {Museo ToolBox: A Python library for remote sensing including a new way to handle rasters.}, 66 | journal = {Journal of Open Source Software} 67 | } 68 | ``` 69 | Or copy this citation : 70 | 71 | > Karasiak, N., (2020). Museo ToolBox: A Python library for remote sensing including a new way to handle rasters.. Journal of Open Source Software, 5(48), 1978, https://doi.org/10.21105/joss.01978 72 | 73 | ## I want to improve Museo ToolBox, how can I contribute ? 74 | 75 | To contribute to this package, please read the instructions in [CONTRIBUTING.rst](CONTRIBUTING.rst). 76 | 77 | ## Who built Museo ToolBox ? 78 | 79 | I am [Nicolas Karasiak](http://www.karasiak.net), a Phd student at Dynafor Lab. I work tree species mapping from space throught dense satellite image time series, especially with Sentinel-2. A special thanks goes to [Mathieu Fauvel](http://fauvel.mathieu.free.fr/) who initiated me to the beautiful world of the open-source. 80 | 81 | ## Why this name ? 82 | As Orfeo ToolBox is one my favorite and most useful library to work with raster data, I choose to name my work as Museo because in ancient Greek religion and myth, Museo is the son and disciple of Orfeo. If you want an acronym, let's say MUSEO means 'Multiple Useful Services for Earth Observation'. 83 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pkg='museotoolbox' 4 | array=( 3.5 3.6 3.7 ) 5 | 6 | echo "Building conda package ..." 7 | cd ~ 8 | conda skeleton pypi $pkg 9 | cd $pkg 10 | wget https://conda.io/docs/_downloads/build1.sh 11 | wget https://conda.io/docs/_downloads/bld.bat 12 | cd ~ 13 | 14 | # building conda packages 15 | for i in "${array[@]}" 16 | do 17 | conda-build --python $i $pkg 18 | done 19 | 20 | # convert package for other platforms 21 | cd ~ 22 | platforms=( osx-64 linux-32 linux-64 win-32 win-64 ) 23 | find $HOME/conda-bld/linux-64/ -name *.tar.bz2 | while read file 24 | do 25 | echo $file 26 | #conda convert --platform all $file -o $HOME/conda-bld/ 27 | for platform in "${platforms[@]}" 28 | do 29 | conda convert --platform $platform $file -o $HOME/conda-bld/ 30 | done 31 | done 32 | 33 | # upload packages to conda 34 | find $HOME/conda-bld/ -name *.tar.bz2 | while read file 35 | do 36 | echo $file 37 | anaconda upload $file 38 | done 39 | 40 | echo "Building conda package done!" 41 | 42 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | codecov: 2 | token: cfea4c84-c081-448b-b6bc-1b84891d5f07 3 | 4 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 21 | 22 | .PHONY: help 23 | help: 24 | @echo "Please use \`make ' where is one of" 25 | @echo " html to make standalone HTML files" 26 | @echo " dirhtml to make HTML files named index.html in directories" 27 | @echo " singlehtml to make a single large HTML file" 28 | @echo " pickle to make pickle files" 29 | @echo " json to make JSON files" 30 | @echo " htmlhelp to make HTML files and a HTML help project" 31 | @echo " qthelp to make HTML files and a qthelp project" 32 | @echo " applehelp to make an Apple Help Book" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | @echo " coverage to run coverage check of the documentation (if enabled)" 49 | 50 | .PHONY: clean 51 | clean: 52 | rm -rf $(BUILDDIR)/* 53 | 54 | .PHONY: html 55 | html: 56 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 57 | @echo 58 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 59 | 60 | .PHONY: dirhtml 61 | dirhtml: 62 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 63 | @echo 64 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 65 | 66 | .PHONY: singlehtml 67 | singlehtml: 68 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 69 | @echo 70 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 71 | 72 | .PHONY: pickle 73 | pickle: 74 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 75 | @echo 76 | @echo "Build finished; now you can process the pickle files." 77 | 78 | .PHONY: json 79 | json: 80 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 81 | @echo 82 | @echo "Build finished; now you can process the JSON files." 83 | 84 | .PHONY: htmlhelp 85 | htmlhelp: 86 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 87 | @echo 88 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 89 | ".hhp project file in $(BUILDDIR)/htmlhelp." 90 | 91 | .PHONY: qthelp 92 | qthelp: 93 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 94 | @echo 95 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 96 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 97 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/MTB.qhcp" 98 | @echo "To view the help file:" 99 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/MTB.qhc" 100 | 101 | .PHONY: applehelp 102 | applehelp: 103 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 104 | @echo 105 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 106 | @echo "N.B. You won't be able to view it unless you put it in" \ 107 | "~/Library/Documentation/Help or install it in your application" \ 108 | "bundle." 109 | 110 | .PHONY: devhelp 111 | devhelp: 112 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 113 | @echo 114 | @echo "Build finished." 115 | @echo "To view the help file:" 116 | @echo "# mkdir -p $$HOME/.local/share/devhelp/MTB" 117 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/MTB" 118 | @echo "# devhelp" 119 | 120 | .PHONY: epub 121 | epub: 122 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 123 | @echo 124 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 125 | 126 | .PHONY: latex 127 | latex: 128 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 129 | @echo 130 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 131 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 132 | "(use \`make latexpdf' here to do that automatically)." 133 | 134 | .PHONY: latexpdf 135 | latexpdf: 136 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 137 | @echo "Running LaTeX files through pdflatex..." 138 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 139 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 140 | 141 | .PHONY: latexpdfja 142 | latexpdfja: 143 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 144 | @echo "Running LaTeX files through platex and dvipdfmx..." 145 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 146 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 147 | 148 | .PHONY: text 149 | text: 150 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 151 | @echo 152 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 153 | 154 | .PHONY: man 155 | man: 156 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 157 | @echo 158 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 159 | 160 | .PHONY: texinfo 161 | texinfo: 162 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 163 | @echo 164 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 165 | @echo "Run \`make' in that directory to run these through makeinfo" \ 166 | "(use \`make info' here to do that automatically)." 167 | 168 | .PHONY: info 169 | info: 170 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 171 | @echo "Running Texinfo files through makeinfo..." 172 | make -C $(BUILDDIR)/texinfo info 173 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 174 | 175 | .PHONY: gettext 176 | gettext: 177 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 178 | @echo 179 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 180 | 181 | .PHONY: changes 182 | changes: 183 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 184 | @echo 185 | @echo "The overview file is in $(BUILDDIR)/changes." 186 | 187 | .PHONY: linkcheck 188 | linkcheck: 189 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 190 | @echo 191 | @echo "Link check complete; look for any errors in the above output " \ 192 | "or in $(BUILDDIR)/linkcheck/output.txt." 193 | 194 | .PHONY: doctest 195 | doctest: 196 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 197 | @echo "Testing of doctests in the sources finished, look at the " \ 198 | "results in $(BUILDDIR)/doctest/output.txt." 199 | 200 | .PHONY: coverage 201 | coverage: 202 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 203 | @echo "Testing of coverage in the sources finished, look at the " \ 204 | "results in $(BUILDDIR)/coverage/python.txt." 205 | 206 | .PHONY: xml 207 | xml: 208 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 209 | @echo 210 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 211 | 212 | .PHONY: pseudoxml 213 | pseudoxml: 214 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 215 | @echo 216 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 217 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source 10 | set I18NSPHINXOPTS=%SPHINXOPTS% source 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. xml to make Docutils-native XML files 37 | echo. pseudoxml to make pseudoxml-XML files for display purposes 38 | echo. linkcheck to check all external links for integrity 39 | echo. doctest to run all doctests embedded in the documentation if enabled 40 | echo. coverage to run coverage check of the documentation if enabled 41 | goto end 42 | ) 43 | 44 | if "%1" == "clean" ( 45 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 46 | del /q /s %BUILDDIR%\* 47 | goto end 48 | ) 49 | 50 | 51 | REM Check if sphinx-build is available and fallback to Python version if any 52 | %SPHINXBUILD% 1>NUL 2>NUL 53 | if errorlevel 9009 goto sphinx_python 54 | goto sphinx_ok 55 | 56 | :sphinx_python 57 | 58 | set SPHINXBUILD=python -m sphinx.__init__ 59 | %SPHINXBUILD% 2> nul 60 | if errorlevel 9009 ( 61 | echo. 62 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 63 | echo.installed, then set the SPHINXBUILD environment variable to point 64 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 65 | echo.may add the Sphinx directory to PATH. 66 | echo. 67 | echo.If you don't have Sphinx installed, grab it from 68 | echo.http://sphinx-doc.org/ 69 | exit /b 1 70 | ) 71 | 72 | :sphinx_ok 73 | 74 | 75 | if "%1" == "html" ( 76 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 77 | if errorlevel 1 exit /b 1 78 | echo. 79 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 80 | goto end 81 | ) 82 | 83 | if "%1" == "dirhtml" ( 84 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 85 | if errorlevel 1 exit /b 1 86 | echo. 87 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 88 | goto end 89 | ) 90 | 91 | if "%1" == "singlehtml" ( 92 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 93 | if errorlevel 1 exit /b 1 94 | echo. 95 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 96 | goto end 97 | ) 98 | 99 | if "%1" == "pickle" ( 100 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 101 | if errorlevel 1 exit /b 1 102 | echo. 103 | echo.Build finished; now you can process the pickle files. 104 | goto end 105 | ) 106 | 107 | if "%1" == "json" ( 108 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 109 | if errorlevel 1 exit /b 1 110 | echo. 111 | echo.Build finished; now you can process the JSON files. 112 | goto end 113 | ) 114 | 115 | if "%1" == "htmlhelp" ( 116 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 117 | if errorlevel 1 exit /b 1 118 | echo. 119 | echo.Build finished; now you can run HTML Help Workshop with the ^ 120 | .hhp project file in %BUILDDIR%/htmlhelp. 121 | goto end 122 | ) 123 | 124 | if "%1" == "qthelp" ( 125 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 129 | .qhcp project file in %BUILDDIR%/qthelp, like this: 130 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\POT.qhcp 131 | echo.To view the help file: 132 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\POT.ghc 133 | goto end 134 | ) 135 | 136 | if "%1" == "devhelp" ( 137 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 138 | if errorlevel 1 exit /b 1 139 | echo. 140 | echo.Build finished. 141 | goto end 142 | ) 143 | 144 | if "%1" == "epub" ( 145 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 146 | if errorlevel 1 exit /b 1 147 | echo. 148 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 149 | goto end 150 | ) 151 | 152 | if "%1" == "latex" ( 153 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 154 | if errorlevel 1 exit /b 1 155 | echo. 156 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 157 | goto end 158 | ) 159 | 160 | if "%1" == "latexpdf" ( 161 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 162 | cd %BUILDDIR%/latex 163 | make all-pdf 164 | cd %~dp0 165 | echo. 166 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 167 | goto end 168 | ) 169 | 170 | if "%1" == "latexpdfja" ( 171 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 172 | cd %BUILDDIR%/latex 173 | make all-pdf-ja 174 | cd %~dp0 175 | echo. 176 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 177 | goto end 178 | ) 179 | 180 | if "%1" == "text" ( 181 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 182 | if errorlevel 1 exit /b 1 183 | echo. 184 | echo.Build finished. The text files are in %BUILDDIR%/text. 185 | goto end 186 | ) 187 | 188 | if "%1" == "man" ( 189 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 190 | if errorlevel 1 exit /b 1 191 | echo. 192 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 193 | goto end 194 | ) 195 | 196 | if "%1" == "texinfo" ( 197 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 198 | if errorlevel 1 exit /b 1 199 | echo. 200 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 201 | goto end 202 | ) 203 | 204 | if "%1" == "gettext" ( 205 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 206 | if errorlevel 1 exit /b 1 207 | echo. 208 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 209 | goto end 210 | ) 211 | 212 | if "%1" == "changes" ( 213 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 214 | if errorlevel 1 exit /b 1 215 | echo. 216 | echo.The overview file is in %BUILDDIR%/changes. 217 | goto end 218 | ) 219 | 220 | if "%1" == "linkcheck" ( 221 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 222 | if errorlevel 1 exit /b 1 223 | echo. 224 | echo.Link check complete; look for any errors in the above output ^ 225 | or in %BUILDDIR%/linkcheck/output.txt. 226 | goto end 227 | ) 228 | 229 | if "%1" == "doctest" ( 230 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 231 | if errorlevel 1 exit /b 1 232 | echo. 233 | echo.Testing of doctests in the sources finished, look at the ^ 234 | results in %BUILDDIR%/doctest/output.txt. 235 | goto end 236 | ) 237 | 238 | if "%1" == "coverage" ( 239 | %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage 240 | if errorlevel 1 exit /b 1 241 | echo. 242 | echo.Testing of coverage in the sources finished, look at the ^ 243 | results in %BUILDDIR%/coverage/python.txt. 244 | goto end 245 | ) 246 | 247 | if "%1" == "xml" ( 248 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 249 | if errorlevel 1 exit /b 1 250 | echo. 251 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 252 | goto end 253 | ) 254 | 255 | if "%1" == "pseudoxml" ( 256 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 257 | if errorlevel 1 exit /b 1 258 | echo. 259 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 260 | goto end 261 | ) 262 | 263 | :end 264 | -------------------------------------------------------------------------------- /docs/source/CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | 2 | Changelog 3 | ========= 4 | 5 | All notable changes to this project will be documented in this file. 6 | 7 | The format is based on `Keep a Changelog `_\ , 8 | and this project adheres to `Semantic Versioning `_. 9 | 10 | [0.13.6] - 2020-07-19 11 | --------------------- 12 | 13 | Added 14 | ^^^^^ 15 | 16 | 17 | * RasterMath get_image_as_array() now supports mask. 18 | 19 | Fixed 20 | ^^^^^ 21 | 22 | 23 | * Update groups management for sklearn>=0.25 24 | * Fixed bug with raster/vector datasets (mtb.datasets.load_historical_data()) 25 | * Fixed bug with get_image_as_array() from RasterMath (completely rewrite this part) 26 | 27 | [0.13.5] - 2020-06-24 28 | --------------------- 29 | 30 | Fixed 31 | ^^^^^ 32 | 33 | 34 | * Requirements is not directly written inside setup.py due to bugs. 35 | 36 | [0.13.4] - 2020-06-24 37 | --------------------- 38 | 39 | Fixed 40 | ^^^^^ 41 | 42 | 43 | * Fix bug in setup.py using requirements.txt instead of ./requirements.txt 44 | 45 | [0.13.3] - 2020-06-23 46 | --------------------- 47 | 48 | Fixed 49 | ^^^^^ 50 | 51 | 52 | * Adding psutil to depency 53 | 54 | [0.13.2] - 2020-06-18 55 | --------------------- 56 | 57 | Added 58 | ^^^^^ 59 | 60 | 61 | * get_image_as_array function for RasterMath 62 | 63 | Fixed 64 | ^^^^^ 65 | 66 | 67 | * train_test_split supports now groups=None 68 | 69 | [0.13.1] - 2020-06-11 70 | --------------------- 71 | 72 | Added 73 | ^^^^^ 74 | 75 | 76 | * Support list for cross-validation in order to give an unready unfolded cv. 77 | 78 | New features provided by @marclang for the charts module : 79 | 80 | 81 | * Allows to display both F1 and accuracy or mean metrics 82 | * Allows to display accuracy after have been displaying mean (and vice versa) 83 | * Allows to display float matrix 84 | 85 | Changed 86 | ^^^^^^^ 87 | 88 | 89 | * Fix path separator to access tutorial dataset 90 | 91 | [0.13.0] - 2020-04-21 92 | --------------------- 93 | 94 | Changed 95 | ^^^^^^^ 96 | 97 | 98 | * Final version for JOSS (paper.md and paper.bib updated thanks to @kbarnhart) 99 | 100 | [0.12.1-rc.1] - 2020-04-18 101 | -------------------------- 102 | 103 | Added 104 | ^^^^^ 105 | 106 | 107 | * RasterMath use available memory to speed up process and manage now several cores (n_jobs) 108 | * train_test_split in cross_validation module 109 | 110 | Changed 111 | ^^^^^^^ 112 | 113 | 114 | * Enhance mask management for RasterMath 115 | * Move FlushCache to optimize RasterMath 116 | * RasterMath get_random_block returns only block which are not totally unmasked 117 | * charts.PlotConfusionMatrix has a default argument (zero_is_min=True) 118 | 119 | [0.12.1-beta.2] - 2020-02-10 120 | ---------------------------- 121 | 122 | Fixed 123 | ^^^^^ 124 | 125 | 126 | * Fix bug when in RasterMath when input is only one band 127 | * Fix bug in RasterMath with mask and list 128 | 129 | Added 130 | ^^^^^ 131 | 132 | 133 | * n_jobs for RasterMath (thanks to Helene @HTDBD and Arthur @ArthurDfs, two great students) 134 | * function write_block and generally a most intuitive way to use RasterMath (with the help of @HTDBD and @ArthurDfs) 135 | 136 | [0.12.1-beta.1] - 2020-01-16 137 | ---------------------------- 138 | 139 | Added 140 | ^^^^^ 141 | 142 | 143 | * new branch spatial added 144 | 145 | Added 146 | ^^^^^ 147 | 148 | 149 | * Added this line 150 | 151 | Changed 152 | ^^^^^^^ 153 | 154 | 155 | * SequentialFeatureSelection parameters order Changed. *scoring* is now before *standardize*. 156 | * Update doc for load_historical_data() 157 | 158 | Fixed 159 | ^^^^^ 160 | 161 | 162 | * Fix bug in get_block() and get_random_block() which returned the same block each time due to new method. 163 | * Fix bug with nodata in RasterMath when output is of float type 164 | 165 | [0.12] - 2019-12-13 166 | ------------------- 167 | 168 | Changed 169 | ^^^^^^^ 170 | 171 | 172 | * RasterMath made a lot of improvements using block reading and writing. For example, the default block size is now 256x256 (you can keep the default block size by choosing block_size=False), and Museo ToolBox automatic detect if the geotiff will be tiled or not (it depends on the block size). 173 | * Some folders have Changed name : 174 | 175 | * raster_tools and vector_tools to processing 176 | * learn_tools to ai 177 | 178 | * some functions have Changed name : 179 | 180 | * getSamplesFromROI to extract_values 181 | * historicalMap to load_historical_data 182 | * getDistanceMatrix to get_distance_matrix 183 | 184 | * classes now always begin with a capital case : 185 | 186 | * learnAndPredict to SuperLearner 187 | * rasterMath to RasterMath 188 | * sequentialFeatureSelection to SequentialFeatureSelection 189 | 190 | Fixed 191 | ^^^^^ 192 | 193 | 194 | * bug #7 : getSamplesFromROI (nowd extract_ROI) now extracts ROI values using by default memory. If it fails, it will create a temporary raster on disk then delete it when finished. 195 | 196 | Removed 197 | ^^^^^^^ 198 | 199 | 200 | * Remove command lines (cli) 201 | 202 | [0.12rc5] - 2019-11-11 203 | ---------------------- 204 | 205 | Changed 206 | ^^^^^^^ 207 | 208 | 209 | * getSamplesFromROI return list of available fields if wrong field given. 210 | * rasterMath convert np.nan value to nodata value (if numpy >= 1.17) 211 | 212 | [0.12rc4] - 2019-11-01 213 | ---------------------- 214 | 215 | Changed 216 | ^^^^^^^ 217 | 218 | 219 | * Minor fix when using learnAndPredict with an outside customized function 220 | * Better management fo cross-validation in learnAndPredict 221 | * Fix minor bug using False or None value with cv in learnAndPredict 222 | 223 | Added 224 | ^^^^^ 225 | 226 | 227 | * Add an option to use SFS without writing each best model on the disk. 228 | 229 | [0.12rc3] - 2019-10-29 230 | ---------------------- 231 | 232 | Added 233 | ^^^^^ 234 | 235 | 236 | * Move some functions from vector_tools to raster_tools, functions are anyway still available from vector_tools 237 | 238 | Changed 239 | ^^^^^^^ 240 | 241 | 242 | * learnAndPredict manages int value for cross-validation by using RandomStratifiedKFold 243 | * Enhance blocksize management for rasterMath 244 | * Move command line code in _cli folder 245 | 246 | [0.12rc2] - 2019-10-14 247 | ---------------------- 248 | 249 | Changed 250 | ^^^^^^^ 251 | 252 | 253 | * Improvements of rasterMath 254 | 255 | * customBlockSize defines now the same block size for window reading and for the output 256 | * add seed parameter (to set a random generator) in getRandomBlock() 257 | * add getRasterParameters() and customRasterParameters() function. 258 | 259 | [0.12rc1] - 2019-10-12 260 | ---------------------- 261 | 262 | Changed 263 | ^^^^^^^ 264 | 265 | 266 | * update rasterMath to generate by default a 256*256 raster block size. 267 | * update rasterMath to prevent bug if user has osgeo/gdal version is lower than 2.1. 268 | * prevent bug when in rasterMath if processor has only 1 core. 269 | 270 | Fixed 271 | ^^^^^ 272 | 273 | 274 | * minor fixes 275 | -------------------------------------------------------------------------------- /docs/source/CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | ===================================== 2 | How to contribute to this package 3 | ===================================== 4 | 5 | This document describes how to edit the package, run the tests, build the docs, put tagged versions on PyPI_, etc. 6 | 7 | Editing the project 8 | --------------------- 9 | 10 | Developpment environnement 11 | +++++++++++++++++++++++++++ 12 | 13 | We're using conda environment to install all dependencies needed for Museo ToolBox developpement. 14 | 15 | - ``conda env create -f .environment.yml`` 16 | 17 | This will create an environnement called ``museotoolbox-dev``, to activate it, just type : 18 | 19 | - ``conda active museotoolbox-dev`` 20 | 21 | 22 | Package structure 23 | ++++++++++++++++++ 24 | - Package code is in `museotoolbox `_ folder. 25 | - Docs can be generated by typing ``Make doc`` at the root of folder. 26 | - Unit tests are in `test `_ folder. 27 | 28 | Modify the code via pull requests 29 | +++++++++++++++++++++++++++++++++++ 30 | To make changes to the code, you should make a branch or fork, make your changes, and then submit a pull request. 31 | If you aren't sure about pull requests: 32 | 33 | - A general description of pull requests: https://help.github.com/en/articles/about-pull-requests 34 | 35 | - How to create a pull request: https://help.github.com/en/articles/creating-a-pull-request 36 | 37 | - How to structure your pull requests (one conceptually distinct change per pull request): https://medium.com/@fagnerbrack/one-pull-request-one-concern-e84a27dfe9f1 38 | 39 | Tests and documentation 40 | +++++++++++++++++++++++ 41 | You should document your code clearly with `numpy style documentation`_. 42 | You should add tests. 43 | For simple things, these can be `doctests `_ in the code. 44 | For more elaborate functionality, put unit tests in test. 45 | 46 | Versions and CHANGELOG 47 | ++++++++++++++++++++++ 48 | The version is `single sourced `_ in `__init__.py`_. 49 | When modifying a tagged version (e.g., ``0.1.0``), indicate you are working on a development version by adding a ``dev`` (e.g., ``0.1.dev1``). 50 | See `here `_ for more information on version numbers. 51 | 52 | Conceptual descriptions of changes should also be tracked in the CHANGELOG_. 53 | 54 | Adding dependencies 55 | +++++++++++++++++++++ 56 | When you add code that uses a new package that is not in the standard python library, you should add it to the dependencies specified under the ``install_requires`` option in `setup.py `_. 57 | `See here `_ for information on how to do this, and how to specify minimal required versions. 58 | As described in the above link, you should **not** pin exact versions in ``install_requires`` in `setup.py `_ unless absolutely necessary. 59 | 60 | 61 | Testing 62 | --------- 63 | 64 | Adding tests 65 | ++++++++++++++ 66 | As you add new codes, you should create tests to make sure it is working correctly. 67 | These can include: 68 | 69 | - doctests in the code 70 | 71 | - unit tests in the `./test/ `_ subdirectory 72 | 73 | Running the tests locally 74 | ++++++++++++++++++++++++++ 75 | After you make changes, you should run two sets of tests. 76 | To run the tests, go to the top-level package directory. 77 | 78 | Then run the tests with pytest_ under the ``museotoolbox-dev`` conda environnement by running : 79 | 80 | - ``make pytest`` 81 | 82 | 83 | Building the documentation 84 | +++++++++++++++++++++++++++ 85 | The documentation will be built for each new commit in the ``master`` branch. 86 | However, you can build your own documentation in order by typing under the ``museotoolbox-dev`` conda environnement : 87 | 88 | - ``make ddoc`` 89 | 90 | 91 | Automated testing on Travis 92 | +++++++++++++++++++++++++++ 93 | The aforementioned pytest_ tests will be run automatically by the Travis_ continuous integration system as specified in the `.travis.yml <.travis.yml>`_ file. 94 | Note that running the Travis_ tests requires you to register the project with Travis_. 95 | 96 | If the tests are passing, you will see this on the Travis_ badge on GitHub repo main page. 97 | 98 | 99 | Tagging versions and putting on PyPI 100 | ------------------------------------- 101 | When you have a new stable release, you will want to tag it and put it on PyPI_ where it can be installed with pip_. 102 | First, make sure the version number is up-to-date in `__init__.py`_ and the CHANGELOG_. 103 | Then commit the code to GitHub if you haven't already done so. 104 | Next tag the version, as in:: 105 | 106 | git tag -a 0.1.0 -m 'version 0.1.0' 107 | 108 | and then push the tag to GitHub with:: 109 | 110 | git push --tags 111 | 112 | Finally, with Travis_, specify your pypi account in `.travis.yml <.travis.yml>`_ file to publish a new version on pypi when you publish a stable release on github. 113 | 114 | Note that this requires you to have registered the package on PyPI_ if this is the first version of the package there. 115 | 116 | .. _pytest: https://docs.pytest.org 117 | .. _Travis: https://docs.travis-ci.com 118 | .. _PyPI: https://pypi.org/ 119 | .. _pip: https://pip.pypa.io 120 | .. _sphinx: https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html 121 | .. _test: test 122 | .. _docs: docs 123 | .. _notebooks: notebooks 124 | .. _`Jupyter notebooks`: https://jupyter.org/ 125 | .. _`__init__.py`: museotoolbox/__init__.py 126 | .. _CHANGELOG: CHANGELOG.md 127 | .. _`numpy style documentation`: https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_numpy.html 128 | -------------------------------------------------------------------------------- /docs/source/Makefile.txt: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = MuseoToolBox 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/source/README.rst: -------------------------------------------------------------------------------- 1 | .. role:: raw-html-m2r(raw) 2 | :format: html 3 | 4 | 5 | 6 | .. image:: https://github.com/nkarasiak/MuseoToolBox/raw/master/metadata/museoToolBox_logo_128.png 7 | :target: https://github.com/nkarasiak/MuseoToolBox/raw/master/metadata/museoToolBox_logo_128.png 8 | :alt: Museo ToolBox logo 9 | 10 | 11 | 12 | .. image:: https://api.travis-ci.org/nkarasiak/MuseoToolBox.svg?branch=master 13 | :target: https://travis-ci.org/nkarasiak/MuseoToolBox 14 | :alt: Build status 15 | 16 | 17 | .. image:: https://readthedocs.org/projects/museotoolbox/badge/?version=latest 18 | :target: https://museotoolbox.readthedocs.io/en/latest/?badge=latest 19 | :alt: Documentation status 20 | 21 | 22 | .. image:: https://codecov.io/gh/nkarasiak/MuseoToolBox/branch/master/graph/badge.svg 23 | :target: https://codecov.io/gh/nkarasiak/MuseoToolBox 24 | :alt: codecov 25 | 26 | 27 | .. image:: https://badge.fury.io/py/museotoolbox.svg 28 | :target: https://badge.fury.io/py/museotoolbox 29 | :alt: PyPI version 30 | 31 | 32 | .. image:: https://camo.githubusercontent.com/074cca1cb04798ef7b05419795c800130e47273b/68747470733a2f2f696d672e736869656c64732e696f2f636f6e64612f766e2f636f6e64612d666f7267652f6d7573656f746f6f6c626f782e737667 33 | :target: https://anaconda.org/conda-forge/museotoolbox 34 | :alt: Conda version 35 | 36 | 37 | .. image:: https://pepy.tech/badge/museotoolbox 38 | :target: https://pepy.tech/project/museotoolbox 39 | :alt: Downloads 40 | 41 | 42 | .. image:: https://joss.theoj.org/papers/1f4762d9910093a08034e8f4de441930/status.svg 43 | :target: https://joss.theoj.org/papers/1f4762d9910093a08034e8f4de441930 44 | :alt: status 45 | 46 | 47 | **Museo ToolBox** is a python library to simplify the use of raster/vector, especially for machine learning and remote sensing. It is now easy to extract raster values from vector polygons and to do some spatial/unspatial cross-validation for scikit-learn from raster. 48 | 49 | One of the most meaningful contribution is, in my humble opinion, the `RasterMath `_ class and the `spatial cross-validation `_. 50 | 51 | What's the point ? 52 | ------------------ 53 | 54 | Today, the main usages of Museo ToolBox are : 55 | 56 | 57 | * `museotoolbox.cross_validation `_ 58 | 59 | * Create validation/training sets from vector, and cross-validation compatible with Scikit-Learn GridSearchCV. The aim is here to **promote the spatial cross-validation** in order to better estimate a model (with a lower spatial auto-correlation overestimation). 60 | 61 | * `museotoolbox.processing `_ 62 | 63 | * `RasterMath `_\ , allows you to apply any of your array-compatible function on your raster and save it. Just load RasterMath, then it will return you the value for each pixel (in all bands) and now you can do whatever you want : predicting a model, smooth signal (whittaker, double logistic...), compute modal value... RasterMath reads and writes a raster block per block to avoid loading the full image in memory. It is compatible with every python function (including numpy) as the first and only argument RasterMath needs on your function is an array. 64 | * Extract bands values from vector ROI (polygons/points) (function : `extract_ROI `_\ ) 65 | 66 | * AI based on Scikit-Learn. `SuperLearner `_ simplifies the use of cross-validation by extracting each accuracy (kappa,F1,OA, and above all confusion matrix) from each fold. It also eases the way to predict a raster (just give the raster name and the model). 67 | 68 | That seems cool, but is there some help to use this ? 69 | ----------------------------------------------------- 70 | 71 | I imagined Museo ToolBox as a tool to simplify raster processing and to promote spatial cross-validation, so of course there is some help : `a complete documentation with a lot of examples is available on readthedocs `_. 72 | 73 | How do I install Museo ToolBox ? 74 | -------------------------------- 75 | 76 | We recommend you to install Museo ToolBox via conda as it includes gdal dependency : 77 | 78 | .. code-block:: shell 79 | 80 | conda install -c conda-forge museotoolbox 81 | 82 | However, if you prefer to install this library via pip, you need to install first gdal, then : 83 | 84 | .. code-block:: shell 85 | 86 | python3 -m pip install museotoolbox --user 87 | 88 | For early-adopters, you can install the latest development version directly from git : 89 | 90 | .. code-block:: shell 91 | 92 | python3 -m pip install https://github.com/nkarasiak/museotoolbox/archive/develop.zip --user -U 93 | 94 | Feel free to remove the ``--user`` if you like to install the library for every user on the machine or if some dependencies need root access. ``-U`` is for update if a newer version exists. 95 | 96 | Using and citing the toolbox 97 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 98 | 99 | If you use Museo ToolBox in your research and find it useful, please cite this library using the following bibtex reference: 100 | 101 | .. code-block:: bib 102 | 103 | @article{Karasiak2020, 104 | doi = {10.21105/joss.01978}, 105 | url = {https://doi.org/10.21105/joss.01978}, 106 | year = {2020}, 107 | publisher = {The Open Journal}, 108 | volume = {5}, 109 | number = {48}, 110 | pages = {1978}, 111 | author = {Nicolas Karasiak}, 112 | title = {Museo ToolBox: A Python library for remote sensing including a new way to handle rasters.}, 113 | journal = {Journal of Open Source Software} 114 | } 115 | 116 | Or copy this citation : 117 | 118 | .. 119 | 120 | Karasiak, N., (2020). Museo ToolBox: A Python library for remote sensing including a new way to handle rasters.. Journal of Open Source Software, 5(48), 1978, https://doi.org/10.21105/joss.01978 121 | 122 | 123 | I want to improve Museo ToolBox, how can I contribute ? 124 | ------------------------------------------------------- 125 | 126 | To contribute to this package, please read the instructions in `CONTRIBUTING.rst `_. 127 | 128 | Who built Museo ToolBox ? 129 | ------------------------- 130 | 131 | I am `Nicolas Karasiak `_\ , a Phd student at Dynafor Lab. I work tree species mapping from space throught dense satellite image time series, especially with Sentinel-2. A special thanks goes to `Mathieu Fauvel `_ who initiated me to the beautiful world of the open-source. 132 | 133 | Why this name ? 134 | --------------- 135 | 136 | As Orfeo ToolBox is one my favorite and most useful library to work with raster data, I choose to name my work as Museo because in ancient Greek religion and myth, :raw-html-m2r:`Museo is the son and disciple of Orfeo`. If you want an acronym, let's say MUSEO means 'Multiple Useful Services for Earth Observation'. 137 | -------------------------------------------------------------------------------- /docs/source/_static/style.css: -------------------------------------------------------------------------------- 1 | .avatar{ 2 | margin: -20px auto 5px auto; 3 | border-radius: 50%; 4 | text-align: center; 5 | display: block; 6 | } 7 | .rst-content .sidebar{ 8 | width : 35%; 9 | } 10 | -------------------------------------------------------------------------------- /docs/source/_templates/class.rst: -------------------------------------------------------------------------------- 1 | {{ objname }} 2 | {{ underline }} 3 | 4 | 5 | .. currentmodule:: {{ module }} 6 | 7 | .. autoclass:: {{ objname }} 8 | 9 | {% block methods %} 10 | 11 | {% if methods %} 12 | .. rubric:: Methods 13 | 14 | .. autosummary:: 15 | :toctree: {{ objname }} 16 | {% for item in methods %} 17 | ~{{ name }}.{{ item }} 18 | {%- endfor %} 19 | {% endif %} 20 | {% endblock %} 21 | 22 | {% block attributes %} 23 | {% if attributes %} 24 | .. rubric:: Attributes 25 | 26 | .. autosummary:: 27 | :toctree: {{ objname }} 28 | {% for item in attributes %} 29 | ~{{ name }}.{{ item }} 30 | {%- endfor %} 31 | {% endif %} 32 | {% endblock %} 33 | 34 | .. include:: ../backreferences/{{fullname}}.examples 35 | 36 | .. raw:: html 37 | 38 |
39 | -------------------------------------------------------------------------------- /docs/source/_templates/function.rst: -------------------------------------------------------------------------------- 1 | {{ objname }} 2 | {{ underline }} 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autofunction:: {{ objname }} 7 | 8 | .. include:: ../backreferences/{{fullname}}.examples 9 | 10 | .. raw:: html 11 | 12 |
13 | -------------------------------------------------------------------------------- /docs/source/_templates/module.rst: -------------------------------------------------------------------------------- 1 | {{ objname }} 2 | {{ underline }} 3 | 4 | .. automodule:: {{ fullname }} 5 | 6 | {% block functions %} 7 | {% if functions %} 8 | .. rubric:: Functions 9 | 10 | .. autosummary:: 11 | :toctree: {{ objname }} 12 | :template: function.rst 13 | {% for item in functions %} 14 | {{ item }} 15 | {%- endfor %} 16 | {% endif %} 17 | {% endblock %} 18 | 19 | {% block classes %} 20 | {% if classes %} 21 | .. rubric:: Classes 22 | 23 | .. autosummary:: 24 | :toctree: {{ objname }} 25 | :template: class.rst 26 | {% for item in classes %} 27 | {{ item }} 28 | {%- endfor %} 29 | {% endif %} 30 | {% endblock %} 31 | 32 | {% block exceptions %} 33 | {% if exceptions %} 34 | .. rubric:: Exceptions 35 | 36 | .. autosummary:: 37 | {% for item in exceptions %} 38 | {{ item }} 39 | {%- endfor %} 40 | {% endif %} 41 | {% endblock %} 42 | -------------------------------------------------------------------------------- /docs/source/api.rst: -------------------------------------------------------------------------------- 1 | .. _museotoolbox_api_reference: 2 | 3 | API Reference 4 | ============= 5 | 6 | The complete Museo ToolBox project is automatically documented for every module. 7 | 8 | .. currentmodule:: museotoolbox 9 | 10 | .. autosummary:: 11 | :toctree: modules/ 12 | :template: module.rst 13 | 14 | processing 15 | ai 16 | cross_validation 17 | charts 18 | stats 19 | datasets 20 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Museo ToolBox documentation build configuration file, created by 5 | # sphinx-quickstart on Sun Nov 11 11:34:29 2018. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | # 20 | import re 21 | import os 22 | import sys 23 | # sys.path.insert(0, os.path.abspath('.')) 24 | 25 | 26 | # -- General configuration ------------------------------------------------ 27 | 28 | # If your documentation needs a minimal Sphinx version, state it here. 29 | # 30 | # needs_sphinx = '1.0' 31 | 32 | # Add any Sphinx extension module names here, as strings. They can be 33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 34 | # ones. 35 | 36 | #from recommonmark.parser import CommonMarkParser 37 | 38 | autoclass_content = 'both' 39 | 40 | #source_parsers = { 41 | # '.md': 'recommonmark.parser.CommonMarkParser', 42 | #} 43 | 44 | extensions = [ 45 | 'sphinx.ext.doctest', 46 | 'sphinx.ext.autodoc', 47 | 'sphinx.ext.autosummary', 48 | 'sphinx.ext.viewcode', 49 | 'sphinx.ext.napoleon', 50 | 'sphinx_gallery.gen_gallery', 51 | 'nbsphinx', 52 | 'sphinx_copybutton' 53 | ] 54 | 55 | 56 | autosummary_generate = True 57 | # numpydoc_show_class_members=False 58 | imported_members=True 59 | autoclass_content = 'both' 60 | # autodoc_default_flags = ['members', 'inherited-members','undoc-members'] 61 | # Add any paths that contain templates here, relative to this directory. 62 | templates_path = ['_templates'] 63 | 64 | # The suffix(es) of source filenames. 65 | # You can specify multiple suffix as a list of string: 66 | # 67 | # source_suffix = ['.rst', '.md'] 68 | 69 | 70 | 71 | # General information about the project. 72 | project = 'MuseoToolBox' 73 | copyright = '2019, Nicolas Karasiak' 74 | author = 'Nicolas Karasiak' 75 | sphinx_gallery_conf = { 76 | 'backreferences_dir' : os.path.join('modules','backreferences'), 77 | 'doc_module':'museotoolbox', 78 | # path to your examples scripts 79 | 'examples_dirs': '../../examples', 80 | 'filename_pattern' : '/', 81 | # path where to save gallery generated examples 82 | 'ignore_pattern' : '__', 83 | 'gallery_dirs': 'auto_examples', 84 | 'plot_gallery': True, 85 | #'doc_module' : ('sphinx_gallery', 'numpy') # enable 'Examples using..' 86 | } 87 | 88 | # The version info for the project you're documenting, acts as replacement for 89 | # |version| and |release|, also used in various other places throughout the 90 | # built documents. 91 | # 92 | sys.path.insert(0, os.path.abspath('.')) 93 | sys.path.insert(0, os.path.abspath('..')) 94 | sys.path.insert(0, os.path.abspath("../..")) 95 | 96 | # The short X.Y version. 97 | 98 | __version__ = re.search( 99 | r'__version__\s*=\s*[\'"]([^\'"]*)[\'"]', # It excludes inline comment too 100 | open('../../museotoolbox/__init__.py').read()).group(1) 101 | 102 | source_suffix = ['.rst'] 103 | 104 | # The master toctree document. 105 | 106 | version = __version__ 107 | # The full version, including alpha/beta/rc tags. 108 | release = __version__ 109 | 110 | # The language for content autogenerated by Sphinx. Refer to documentation 111 | # for a list of supported languages. 112 | # 113 | # This is also used if you do content translation via gettext catalogs. 114 | # Usually you set "language" from the command line for these cases. 115 | language = None 116 | 117 | # List of patterns, relative to source directory, that match files and 118 | # directories to ignore when looking for source files. 119 | # This patterns also effect to html_static_path and html_extra_path 120 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', '**.ipynb_checkpoints','../../examples/cross_validation/__*'] 121 | 122 | # The name of the Pygments (syntax highlighting) style to use. 123 | pygments_style = 'sphinx' 124 | 125 | # If true, `todo` and `todoList` produce output, else they produce nothing. 126 | todo_include_todos = False 127 | 128 | 129 | # -- Options for HTML output ---------------------------------------------- 130 | 131 | # The theme to use for HTML and HTML Help pages. See the documentation for 132 | # a list of builtin themes. 133 | # 134 | html_theme = 'sphinx_rtd_theme' 135 | 136 | html_logo = '../../metadata/museoToolBox_logo_128.png' 137 | # Theme options are theme-specific and customize the look and feel of a theme 138 | # further. For a list of options available for each theme, see the 139 | # documentation. 140 | # 141 | # html_theme_options = {} 142 | 143 | # Add any paths that contain custom static files (such as style sheets) here, 144 | # relative to this directory. They are copied after the builtin static files, 145 | # so a file named "default.css" will overwrite the builtin "default.css". 146 | html_static_path = ['_static'] 147 | 148 | # Custom sidebar templates, must be a dictionary that maps document names 149 | # to template names. 150 | # 151 | # This is required for the alabaster theme 152 | # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars 153 | html_sidebars = { 154 | '**': [ 155 | 'relations.html', # needs 'show_related': True theme option to display 156 | 'searchbox.html', 157 | ] 158 | } 159 | 160 | 161 | # -- Options for HTMLHelp output ------------------------------------------ 162 | 163 | # Output file base name for HTML help builder. 164 | htmlhelp_basename = 'MuseoToolBoxdoc' 165 | 166 | 167 | # -- Options for LaTeX output --------------------------------------------- 168 | 169 | latex_elements = { 170 | # The paper size ('letterpaper' or 'a4paper'). 171 | # 172 | # 'papersize': 'letterpaper', 173 | 174 | # The font size ('10pt', '11pt' or '12pt'). 175 | # 176 | # 'pointsize': '10pt', 177 | 178 | # Additional stuff for the LaTeX preamble. 179 | # 180 | # 'preamble': '', 181 | 182 | # Latex figure (float) alignment 183 | # 184 | # 'figure_align': 'htbp', 185 | } 186 | 187 | 188 | 189 | 190 | # -- Options for Texinfo output ------------------------------------------- 191 | 192 | # Grouping the document tree into Texinfo files. List of tuples 193 | # (source start file, target name, title, author, 194 | # dir menu entry, description, category) 195 | 196 | master_doc = 'index' 197 | 198 | # -- Options for manual page output --------------------------------------- 199 | 200 | # One entry per manual page. List of tuples 201 | # (source start file, name, description, authors, manual section). 202 | man_pages = [ 203 | (master_doc, 'museotoolbox', 'MuseoToolBox Documentation', 204 | [author], 1) 205 | ] 206 | def setup(app): 207 | app.add_stylesheet('style.css') # may also be an URL 208 | 209 | 210 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. MuseoToolBox documentation 2 | 3 | MuseoToolBox documentation 4 | ============================= 5 | 6 | .. sidebar:: About MuseoToolBox 7 | 8 | .. image:: https://github.com/nkarasiak.png?size=100 9 | :alt: Nicolas Karasiak avatar 10 | :class: avatar 11 | 12 | :Author: `Nicolas Karasiak `_ 13 | :Source code: `github.com project `_ 14 | :Bug tracker: `github.com issues `_ 15 | :Generated: |today| 16 | :License: GPL v3 17 | :Version: |release| 18 | 19 | .. include:: README.rst 20 | :start-line: 8 21 | 22 | .. toctree:: 23 | :maxdepth: 2 24 | 25 | self 26 | 27 | .. toctree:: 28 | :maxdepth: 3 29 | :caption: Examples gallery 30 | 31 | auto_examples/index 32 | 33 | .. toctree:: 34 | :maxdepth: 3 35 | :caption: Documentation API 36 | 37 | api 38 | CHANGELOG 39 | CONTRIBUTING 40 | 41 | Indices and tables 42 | ================== 43 | 44 | * :ref:`genindex` 45 | * :ref:`modindex` 46 | * :ref:`search` 47 | -------------------------------------------------------------------------------- /examples/README.txt: -------------------------------------------------------------------------------- 1 | .. _museotoolbox: 2 | 3 | Gallery 4 | ----------------------- 5 | 6 | Here you will find all the examples related to :mod:`museotoolbox` library. 7 | 8 | -------------------------------------------------------------------------------- /examples/ai/README.txt: -------------------------------------------------------------------------------- 1 | .. _ai: 2 | 3 | Artificial Intelligence 4 | ------------------------ 5 | 6 | Examples related to the :mod:`museotoolbox.ai` module. 7 | 8 | -------------------------------------------------------------------------------- /examples/ai/SFFS.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Sequential Forward Feature Selection (SFFS) 4 | ======================================================== 5 | 6 | This example shows how to make a Random Sampling with 7 | 50% for each class. 8 | 9 | """ 10 | 11 | ############################################################################## 12 | # Import librairies 13 | # ------------------------------------------- 14 | 15 | from museotoolbox.ai import SequentialFeatureSelection 16 | from museotoolbox.cross_validation import LeavePSubGroupOut 17 | from museotoolbox import datasets 18 | from sklearn.ensemble import RandomForestClassifier 19 | from sklearn import metrics 20 | import numpy as np 21 | ############################################################################## 22 | # Load HistoricalMap dataset 23 | # ------------------------------------------- 24 | 25 | X,y,g = datasets.load_historical_data(return_X_y_g=True,low_res=True) 26 | 27 | ############################################################################## 28 | # Create CV 29 | # ------------------------------------------- 30 | 31 | LSGO = LeavePSubGroupOut(valid_size=0.8,n_repeats=2, 32 | random_state=12,verbose=False) 33 | ############################################################################## 34 | # Initialize Random-Forest and metrics 35 | # -------------------------------------- 36 | 37 | classifier = RandomForestClassifier(random_state=12,n_jobs=1) 38 | 39 | f1 = metrics.make_scorer(metrics.f1_score) 40 | 41 | ############################################################################## 42 | # Set and fit the Sequentia Feature Selection 43 | # --------------------------------------------------------------- 44 | # 45 | SFFS = SequentialFeatureSelection(classifier=classifier,param_grid=dict(n_estimators=[10,20]),verbose=False) 46 | 47 | SFFS.fit(X.astype(np.float),y,g,cv=LSGO,max_features=3) 48 | 49 | ############################################## 50 | # Show best features and score 51 | 52 | print('Best features are : '+str(SFFS.best_features_)) 53 | print('F1 are : '+str(SFFS.best_scores_)) 54 | 55 | ########################################################################## 56 | # In order to predict every classification from the best feature 57 | 58 | SFFS.predict_best_combination(datasets.load_historical_data()[0],'/tmp/SFFS/best_classification.tif') 59 | 60 | ############################################################################## 61 | # Plot example 62 | 63 | from matplotlib import pyplot as plt 64 | plt.plot(np.arange(1,len(SFFS.best_scores_)+1),SFFS.best_scores_) 65 | plt.xlabel('Number of features') 66 | plt.xticks(np.arange(1,len(SFFS.best_scores_)+1)) 67 | plt.ylabel('F1') 68 | plt.show() 69 | -------------------------------------------------------------------------------- /examples/ai/learnWithCustomRaster.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Learn algorithm and customize your input raster without writing it on disk 4 | ============================================================================= 5 | 6 | This example shows how to customize your raster (ndvi, smooth signal...) in the 7 | learning process to avoi generate a new raster. 8 | 9 | """ 10 | 11 | ############################################################################## 12 | # Import librairies 13 | # ------------------------------------------- 14 | 15 | from museotoolbox.ai import SuperLearner 16 | from museotoolbox.processing import extract_ROI 17 | from museotoolbox import datasets 18 | from sklearn.ensemble import RandomForestClassifier 19 | from sklearn import metrics 20 | 21 | ############################################################################## 22 | # Load HistoricalMap dataset 23 | # ------------------------------------------- 24 | 25 | raster,vector = datasets.load_historical_data(low_res=True) 26 | field = 'Class' 27 | 28 | ############################################################################## 29 | # Initialize Random-Forest and metrics 30 | # -------------------------------------- 31 | 32 | classifier = RandomForestClassifier(random_state=12,n_jobs=1) 33 | 34 | kappa = metrics.make_scorer(metrics.cohen_kappa_score) 35 | f1_mean = metrics.make_scorer(metrics.f1_score,average='micro') 36 | scoring = dict(kappa=kappa,f1_mean=f1_mean,accuracy='accuracy') 37 | 38 | 39 | ############################################################################## 40 | # Start learning 41 | # --------------------------- 42 | # sklearn will compute different metrics, but will keep best results from kappa (refit='kappa') 43 | SL = SuperLearner(classifier=classifier,param_grid=dict(n_estimators=[10]),n_jobs=1,verbose=1) 44 | 45 | ############################################################################## 46 | # Create or use custom function 47 | 48 | def reduceBands(X,bandToKeep=[0,2]): 49 | # this function get the first and the last band 50 | X=X[:,bandToKeep].reshape(-1,len(bandToKeep)) 51 | return X 52 | 53 | # add this function to learnAndPredict class 54 | SL.customize_array(reduceBands) 55 | 56 | # if you learn from vector, refit according to the f1_mean 57 | X,y = extract_ROI(raster,vector,field) 58 | SL.fit(X,y,cv=2,scoring=scoring,refit='f1_mean') 59 | 60 | ############################################################################## 61 | # Read the model 62 | # ------------------- 63 | print(SL.model) 64 | print(SL.model.cv_results_) 65 | print(SL.model.best_score_) 66 | 67 | ############################################################################## 68 | # Get F1 for every class from best params 69 | # ----------------------------------------------- 70 | 71 | for stats in SL.get_stats_from_cv(confusion_matrix=False,F1=True): 72 | print(stats['F1']) 73 | 74 | ############################################################################## 75 | # Get each confusion matrix from folds 76 | # ----------------------------------------------- 77 | 78 | for stats in SL.get_stats_from_cv(confusion_matrix=True): 79 | print(stats['confusion_matrix']) 80 | 81 | ############################################################################## 82 | # Save each confusion matrix from folds 83 | # ----------------------------------------------- 84 | 85 | SL.save_cm_from_cv('/tmp/testMTB/',prefix='RS50_') 86 | 87 | ############################################################################## 88 | # Predict map 89 | # --------------------------- 90 | 91 | SL.predict_image(raster,'/tmp/classification.tif', 92 | higher_confidence='/tmp/confidence.tif', 93 | confidence_per_class='/tmp/confidencePerClass.tif') 94 | ########################## 95 | # Plot example 96 | 97 | from matplotlib import pyplot as plt 98 | from osgeo import gdal 99 | src=gdal.Open('/tmp/classification.tif') 100 | plt.imshow(src.GetRasterBand(1).ReadAsArray(),cmap=plt.get_cmap('tab20')) 101 | plt.axis('off') 102 | plt.show() 103 | -------------------------------------------------------------------------------- /examples/ai/learnWithRFandCompareCV.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Learn with Random-Forest and compare Cross-Validation methods 4 | =============================================================== 5 | 6 | This example shows how to make a classification with different cross-validation methods. 7 | 8 | """ 9 | 10 | ############################################################################## 11 | # Import librairies 12 | # ------------------------------------------- 13 | 14 | from museotoolbox.ai import SuperLearner 15 | from museotoolbox import cross_validation 16 | from museotoolbox.processing import extract_ROI 17 | from museotoolbox import datasets 18 | from sklearn.ensemble import RandomForestClassifier 19 | from sklearn.model_selection import StratifiedKFold 20 | 21 | ############################################################################## 22 | # Load HistoricalMap dataset 23 | # ------------------------------------------- 24 | 25 | raster,vector = datasets.load_historical_data(low_res=True) 26 | field = 'Class' 27 | group = 'uniquefid' 28 | X,y,g = extract_ROI(raster,vector,field,group) 29 | ############################################################################## 30 | # Initialize Random-Forest 31 | # --------------------------- 32 | 33 | classifier = RandomForestClassifier(random_state=12,n_jobs=1) 34 | 35 | ############################################################################## 36 | # Create list of different CV 37 | # --------------------------- 38 | 39 | CVs = [cross_validation.RandomStratifiedKFold(n_splits=2), 40 | cross_validation.LeavePSubGroupOut(valid_size=0.5), 41 | cross_validation.LeaveOneSubGroupOut(), 42 | StratifiedKFold(n_splits=2,shuffle=True) #from sklearn 43 | ] 44 | 45 | kappas=[] 46 | 47 | 48 | 49 | for cv in CVs : 50 | SL = SuperLearner( classifier=classifier,param_grid=dict(n_estimators=[50,100]),n_jobs=1) 51 | SL.fit(X,y,group=g,cv=cv) 52 | print('Kappa for '+str(type(cv).__name__)) 53 | cvKappa = [] 54 | 55 | for stats in SL.get_stats_from_cv(confusion_matrix=False,kappa=True): 56 | print(stats['kappa']) 57 | cvKappa.append(stats['kappa']) 58 | 59 | kappas.append(cvKappa) 60 | 61 | print(20*'=') 62 | 63 | ########################## 64 | # Plot example 65 | 66 | 67 | from matplotlib import pyplot as plt 68 | plt.title('Kappa according to Cross-validation methods') 69 | plt.boxplot(kappas,labels=[str(type(i).__name__) for i in CVs], patch_artist=True) 70 | plt.grid() 71 | plt.ylabel('Kappa') 72 | plt.xticks(rotation=15) 73 | plt.show() 74 | -------------------------------------------------------------------------------- /examples/ai/learnWithRFandRS50.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Learn with Random-Forest and Random Sampling 50% (RS50) 4 | ======================================================== 5 | 6 | This example shows how to make a Random Sampling with 7 | 50% for each class. 8 | 9 | """ 10 | 11 | ############################################################################## 12 | # Import librairies 13 | # ------------------------------------------- 14 | 15 | from museotoolbox.ai import SuperLearner 16 | from museotoolbox.cross_validation import RandomStratifiedKFold 17 | from museotoolbox.processing import extract_ROI 18 | from museotoolbox import datasets 19 | from sklearn.ensemble import RandomForestClassifier 20 | from sklearn import metrics 21 | 22 | ############################################################################## 23 | # Load HistoricalMap dataset 24 | # ------------------------------------------- 25 | 26 | raster,vector = datasets.load_historical_data(low_res=True) 27 | field = 'Class' 28 | X,y = extract_ROI(raster,vector,field) 29 | ############################################################################## 30 | # Create CV 31 | # ------------------------------------------- 32 | 33 | SKF = RandomStratifiedKFold(n_splits=2, 34 | random_state=12,verbose=False) 35 | 36 | ############################################################################## 37 | # Initialize Random-Forest and metrics 38 | # -------------------------------------- 39 | 40 | classifier = RandomForestClassifier(random_state=12,n_jobs=1) 41 | 42 | # 43 | kappa = metrics.make_scorer(metrics.cohen_kappa_score) 44 | f1_mean = metrics.make_scorer(metrics.f1_score,average='micro') 45 | scoring = dict(kappa=kappa,f1_mean=f1_mean,accuracy='accuracy') 46 | 47 | 48 | ############################################################################## 49 | # Start learning 50 | # --------------------------- 51 | # sklearn will compute different metrics, but will keep best results from kappa (refit='kappa') 52 | SL = SuperLearner(classifier=classifier,param_grid = dict(n_estimators=[10]),n_jobs=1,verbose=1) 53 | 54 | SL.fit(X,y,cv=SKF,scoring=kappa) 55 | 56 | 57 | # ============================================================================= 58 | # ############################################################################## 59 | # # Read the model 60 | # # ------------------- 61 | # print(SL.model) 62 | # print(SL.model.cv_results_) 63 | # print(SL.model.best_score_) 64 | # 65 | # ############################################################################## 66 | # # Get F1 for every class from best params 67 | # # ----------------------------------------------- 68 | # 69 | # for stats in SL.get_stats_from_cv(confusion_matrix=False,F1=True): 70 | # print(stats['F1']) 71 | # 72 | # ############################################################################## 73 | # # Get each confusion matrix from folds 74 | # # ----------------------------------------------- 75 | # 76 | # for stats in SL.get_stats_from_cv(confusion_matrix=True): 77 | # print(stats['confusion_matrix']) 78 | # 79 | # ############################################################################## 80 | # # Save each confusion matrix from folds 81 | # # ----------------------------------------------- 82 | # 83 | # SL.save_cm_from_cv('/tmp/testMTB/',prefix='RS50_') 84 | # 85 | # ============================================================================= 86 | ############################################################################## 87 | # Predict map 88 | # --------------------------- 89 | 90 | SL.predict_image(raster,'/tmp/classification.tif', 91 | higher_confidence='/tmp/confidence.tif', 92 | confidence_per_class='/tmp/confidencePerClass.tif') 93 | 94 | ########################## 95 | # Plot example 96 | 97 | from matplotlib import pyplot as plt 98 | from osgeo import gdal 99 | src=gdal.Open('/tmp/classification.tif') 100 | plt.imshow(src.GetRasterBand(1).ReadAsArray(),cmap=plt.get_cmap('tab20')) 101 | plt.axis('off') 102 | plt.show() 103 | -------------------------------------------------------------------------------- /examples/charts/README.txt: -------------------------------------------------------------------------------- 1 | .. _charts: 2 | 3 | Charts 4 | ------------------------ 5 | 6 | Examples related to the :mod:`museotoolbox.charts` module. 7 | 8 | -------------------------------------------------------------------------------- /examples/charts/plotConfusion.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Plot confusion matrix 4 | ======================================================== 5 | 6 | Plot confusion matrix from Cross-Validation, with F1 as subplot. 7 | 8 | """ 9 | 10 | ############################################################################## 11 | # Import librairies 12 | # ------------------------------------------- 13 | from museotoolbox.ai import SuperLearner 14 | from museotoolbox.cross_validation import RandomStratifiedKFold 15 | from museotoolbox.charts import PlotConfusionMatrix 16 | from museotoolbox import datasets 17 | from sklearn.ensemble import RandomForestClassifier 18 | 19 | ############################################################################## 20 | # Load HistoricalMap dataset 21 | # ------------------------------------------- 22 | 23 | X,y = datasets.load_historical_data(low_res=True,return_X_y=True) 24 | #########################################m##################################### 25 | # Create CV 26 | # ------------------------------------------- 27 | RSKF = RandomStratifiedKFold(n_splits=2, 28 | random_state=12,verbose=False) 29 | 30 | ############################################################################## 31 | # Initialize Random-Forest 32 | # --------------------------- 33 | 34 | classifier = RandomForestClassifier() 35 | 36 | ############################################################################## 37 | # Start learning 38 | # --------------------------- 39 | 40 | SL = SuperLearner(classifier=classifier,param_grid=dict(n_estimators=[10,50])) 41 | SL.fit(X,y,cv=RSKF) 42 | ############################################################################## 43 | # Get kappa from each fold 44 | # --------------------------- 45 | 46 | for stats in SL.get_stats_from_cv(confusion_matrix=False,kappa=True): 47 | print(stats['kappa']) 48 | 49 | ############################################################################## 50 | # Get each confusion matrix from folds 51 | # ----------------------------------------------- 52 | cms = [] 53 | for stats in SL.get_stats_from_cv(confusion_matrix=True): 54 | cms.append(stats['confusion_matrix']) 55 | print(stats['confusion_matrix']) 56 | 57 | ############################################################################## 58 | # Plot confusion matrix 59 | # ----------------------------------------------- 60 | 61 | import numpy as np 62 | meanCM = np.mean(cms,axis=0).astype(np.int16) 63 | pltCM = PlotConfusionMatrix(meanCM.T) # Translate for Y = prediction and X = truth 64 | pltCM.add_text() 65 | pltCM.color_diagonal() -------------------------------------------------------------------------------- /examples/charts/plotConfusionAcc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Plot confusion matrix with User/Producer accuracy 4 | ======================================================== 5 | 6 | Plot confusion matrix from Cross-Validation, with accuracy (user/prod) as subplot. 7 | 8 | """ 9 | 10 | ############################################################################## 11 | # Import librairies 12 | # ------------------------------------------- 13 | from museotoolbox.ai import SuperLearner 14 | from museotoolbox.cross_validation import RandomStratifiedKFold 15 | from museotoolbox.charts import PlotConfusionMatrix 16 | from museotoolbox import datasets 17 | from sklearn.ensemble import RandomForestClassifier 18 | 19 | ############################################################################## 20 | # Load HistoricalMap dataset 21 | # ------------------------------------------- 22 | 23 | X,y = datasets.load_historical_data(low_res=True,return_X_y=True) 24 | 25 | ############################################################################## 26 | # Create CV 27 | # ------------------------------------------- 28 | RSKF = RandomStratifiedKFold(n_splits=2, 29 | random_state=12,verbose=False) 30 | 31 | ############################################################################## 32 | # Initialize Random-Forest 33 | # --------------------------- 34 | 35 | classifier = RandomForestClassifier() 36 | 37 | ############################################################################## 38 | # Start learning 39 | # --------------------------- 40 | 41 | 42 | SL = SuperLearner(classifier=classifier,param_grid=dict(n_estimators=[10,100])) 43 | SL.fit(X,y,cv=RSKF) 44 | 45 | ############################################################################## 46 | # Get kappa from each fold 47 | # --------------------------- 48 | 49 | for stats in SL.get_stats_from_cv(confusion_matrix=False,kappa=True): 50 | print(stats['kappa']) 51 | 52 | ############################################################################## 53 | # Get each confusion matrix from folds 54 | # ----------------------------------------------- 55 | cms = [] 56 | for stats in SL.get_stats_from_cv(confusion_matrix=True): 57 | cms.append(stats['confusion_matrix']) 58 | print(stats['confusion_matrix']) 59 | 60 | ############################################################################## 61 | # Plot confusion matrix 62 | # ----------------------------------------------- 63 | 64 | import numpy as np 65 | 66 | # a bug in Sphinx doesn't show the whole plot, sorry. 67 | 68 | labels = ['Forest','Agriculture','Bare soil','Water','Building'] 69 | from matplotlib.pyplot import cm as colorMap 70 | meanCM = np.mean(cms,axis=0).astype(np.int16) 71 | pltCM = PlotConfusionMatrix(meanCM.T) # Translate for Y = prediction and X = truth 72 | pltCM.add_text() 73 | pltCM.add_x_labels(labels,rotation=90) 74 | pltCM.add_y_labels(labels) 75 | pltCM.color_diagonal(diag_color=colorMap.Purples,matrix_color=colorMap.Reds) 76 | pltCM.add_accuracy() 77 | pltCM.add_f1() -------------------------------------------------------------------------------- /examples/charts/plotConfusionF1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Plot confusion matrix from Cross-Validation with F1 4 | ======================================================== 5 | 6 | Plot confusion matrix from Cross-Validation, with F1 as subplot. 7 | 8 | """ 9 | 10 | ############################################################################## 11 | # Import librairies 12 | # ------------------------------------------- 13 | from museotoolbox.ai import SuperLearner 14 | from museotoolbox.cross_validation import RandomStratifiedKFold 15 | from museotoolbox.charts import PlotConfusionMatrix 16 | from museotoolbox import datasets 17 | from sklearn.ensemble import RandomForestClassifier 18 | 19 | ############################################################################## 20 | # Load HistoricalMap dataset 21 | # ------------------------------------------- 22 | 23 | X,y = datasets.load_historical_data(low_res=True,return_X_y=True) 24 | field = 'Class' 25 | ############################################################################## 26 | # Create CV 27 | # ------------------------------------------- 28 | RSKF = RandomStratifiedKFold(n_splits=2, 29 | random_state=12,verbose=False) 30 | 31 | ############################################################################## 32 | # Initialize Random-Forest 33 | # --------------------------- 34 | 35 | classifier = RandomForestClassifier() 36 | 37 | ############################################################################## 38 | # Start learning 39 | # --------------------------- 40 | 41 | 42 | SL = SuperLearner(classifier=classifier,param_grid=dict(n_estimators=[10,50])) 43 | SL.fit(X,y,cv=RSKF) 44 | 45 | ############################################################################## 46 | # Get kappa from each fold 47 | # --------------------------- 48 | 49 | for stats in SL.get_stats_from_cv(confusion_matrix=False,kappa=True): 50 | print(stats['kappa']) 51 | 52 | ############################################################################## 53 | # Get each confusion matrix from folds 54 | # ----------------------------------------------- 55 | cms = [] 56 | for stats in SL.get_stats_from_cv(confusion_matrix=True): 57 | cms.append(stats['confusion_matrix']) 58 | print(stats['confusion_matrix']) 59 | 60 | ############################################################################## 61 | # Plot confusion matrix 62 | # ----------------------------------------------- 63 | 64 | import numpy as np 65 | meanCM = np.mean(cms,axis=0).astype(np.int16) 66 | pltCM = PlotConfusionMatrix(meanCM.T) # Translate for Y = prediction and X = truth 67 | pltCM.add_text() 68 | pltCM.add_f1() 69 | pltCM.color_diagonal() -------------------------------------------------------------------------------- /examples/cross_validation/LeaveOneOutPerClass.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Leave One Out Per Class (LOOPC) 4 | ====================================================== 5 | 6 | This example shows how to make a Leave One Out for each class. 7 | 8 | """ 9 | 10 | ############################################################################## 11 | # Import librairies 12 | # ------------------------------------------- 13 | 14 | from museotoolbox.cross_validation import LeaveOneOut 15 | from museotoolbox import datasets 16 | 17 | ############################################################################## 18 | # Load HistoricalMap dataset 19 | # ------------------------------------------- 20 | 21 | X,y = datasets.load_historical_data(return_X_y=True,low_res=True) 22 | 23 | ############################################################################## 24 | # Create CV 25 | # ------------------------------------------- 26 | LOOPC = LeaveOneOut(random_state=8,verbose=False) 27 | for tr,vl in LOOPC.split(X=None,y=y): 28 | print(tr,vl) 29 | 30 | ############################################################################### 31 | # .. note:: 32 | # Split is made to generate each fold 33 | 34 | # Show label 35 | 36 | for tr,vl in LOOPC.split(X=None,y=y): 37 | print(y[vl]) 38 | 39 | ############################################################################### 40 | # Save each train/valid fold in a file 41 | # ------------------------------------------- 42 | # In order to translate polygons into points (each points is a pixel in the raster) 43 | # we use sampleExtraction from vector_tools to generate a temporary vector. 44 | 45 | trvl = LOOPC.save_to_vector(datasets.load_historical_data()[1],'Class',out_vector='/tmp/LOO.gpkg') 46 | for tr,vl in trvl: 47 | print(tr,vl) 48 | 49 | 50 | ############################################################################### 51 | # Plot example on how a polygon was splitted 52 | 53 | import ogr 54 | import numpy as np 55 | from matplotlib import pyplot as plt 56 | import matplotlib.path as mpath 57 | import matplotlib.patches as mpatches 58 | 59 | # Prepare figure 60 | plt.ioff() 61 | ax=plt.subplot(1,1,1) 62 | ax = plt.gca() 63 | 64 | 65 | xBounds,yBounds=[[],[]] 66 | 67 | for idx,vector in enumerate([tr,vl]): 68 | # Read all features in layer and store as paths 69 | ds = ogr.Open(vector) 70 | lyr = ds.GetLayer(0) 71 | 72 | for feat in lyr: 73 | paths = [] 74 | codes = [] 75 | all_x = [] 76 | all_y = [] 77 | 78 | for geom in feat.GetGeometryRef(): 79 | x = [geom.GetX(j) for j in range(geom.GetPointCount())] 80 | y = [geom.GetY(j) for j in range(geom.GetPointCount())] 81 | print(y) 82 | codes += [mpath.Path.MOVETO] + \ 83 | (len(x)-1)*[mpath.Path.LINETO] 84 | all_x += x 85 | all_y += y 86 | path = mpath.Path(np.column_stack((all_x,all_y)), codes) 87 | paths.append(path) 88 | 89 | # Add paths as patches to axes 90 | for path in paths: 91 | if idx==0: 92 | ax.add_patch(mpatches.PathPatch(path,color='C0')) 93 | else: 94 | ax.add_patch(mpatches.PathPatch(path,color='C1')) 95 | 96 | xBounds.append([np.min(all_x),np.max(all_x)]) 97 | yBounds.append([np.min(all_y),np.max(all_y)]) 98 | 99 | 100 | ax.set_xlim(np.min(np.array(xBounds)[:,0]),np.max(np.array(xBounds)[:,1])) 101 | ax.set_ylim(np.min(np.array(yBounds)[:,0]),np.max(np.array(yBounds)[:,1])) 102 | 103 | 104 | legend = [mpatches.Patch(color='C0', label='Train'),mpatches.Patch(color='C1', label='Valid')] 105 | plt.legend(handles=legend) 106 | 107 | plt.show() 108 | -------------------------------------------------------------------------------- /examples/cross_validation/LeaveOneSubGroupOut.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Leave-One-SubGroup-Out (LOSGO) 4 | ====================================================== 5 | 6 | This example shows how to make a Leave-One-SubGroup-Out. 7 | 8 | """ 9 | 10 | ############################################################################## 11 | # Import librairies 12 | # ------------------------------------------- 13 | 14 | from museotoolbox.cross_validation import LeaveOneSubGroupOut 15 | from museotoolbox.processing import extract_ROI 16 | from museotoolbox import datasets 17 | 18 | ############################################################################## 19 | # Load HistoricalMap dataset 20 | # ------------------------------------------- 21 | 22 | raster,vector = datasets.load_historical_data() 23 | field = 'Class' 24 | group = 'uniquefid' 25 | X,y,s = extract_ROI(raster,vector,field,group) 26 | 27 | ############################################################################## 28 | # Create CV 29 | # ------------------------------------------- 30 | # if n_splits is False (default), the number of splits will be the smallest 31 | # number of subgroup of all labels. 32 | 33 | valid_size = 0.5 # Means 50% 34 | LOSGO = LeaveOneSubGroupOut(verbose=False,random_state=12) # 35 | 36 | ############################################################################### 37 | # .. note:: 38 | # Split is made to generate each fold 39 | 40 | LOSGO.get_n_splits(X,y,s) 41 | for tr,vl in LOSGO.split(X,y,s): 42 | print(tr.shape,vl.shape) 43 | 44 | ############################################################################### 45 | # Differences with sklearn 46 | # ------------------------------------------- 47 | # Sklearn do not use subgroups (only groups), so no hierarchical dependances. 48 | 49 | from sklearn.model_selection import LeaveOneGroupOut 50 | LOGO = LeaveOneGroupOut() 51 | for tr,vl in LOGO.split(X=X,y=y,groups=s): 52 | print(tr.shape,vl.shape) 53 | 54 | ############################################################################### 55 | # Plot example 56 | from __drawCVmethods import plotMethod 57 | plotMethod('LOO-group') -------------------------------------------------------------------------------- /examples/cross_validation/LeaveOneSubGroupOutAndSaveVector.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Generate a cross-validation and/or save each fold to a vector file 4 | =================================================================== 5 | 6 | This example shows how to make a Leave-One-SubGroup-Out and save 7 | each fold as a vector file. 8 | 9 | """ 10 | 11 | ############################################################################## 12 | # Import librairies 13 | # ------------------------------------------- 14 | 15 | from museotoolbox.cross_validation import LeaveOneSubGroupOut 16 | from museotoolbox.processing import extract_ROI 17 | from museotoolbox import datasets 18 | 19 | ############################################################################## 20 | # Load HistoricalMap dataset 21 | # ------------------------------------------- 22 | 23 | raster,vector = datasets.load_historical_data(low_res=True) 24 | field = 'Class' 25 | group = 'uniquefid' 26 | X,y,s = extract_ROI(raster,vector,field,group) 27 | ############################################################################## 28 | # Create CV 29 | # ------------------------------------------- 30 | 31 | valid_size = 0.5 # Means 50% 32 | LOSGO = LeaveOneSubGroupOut(verbose=False,random_state=12) 33 | 34 | ############################################################################### 35 | # .. note:: 36 | # Split is made to generate each fold 37 | 38 | LOSGO.get_n_splits(X,y,s) 39 | for tr,vl in LOSGO.split(X,y,s): 40 | print(tr.shape,vl.shape) 41 | 42 | ############################################################################### 43 | # Save each train/valid fold to a vector file (here in polygon type) 44 | # 45 | 46 | vectorFiles = LOSGO.save_to_vector(vector,field,group=group,out_vector='/tmp/LOSGO.gpkg') 47 | 48 | for tr,vl in vectorFiles: 49 | print(tr,vl) 50 | 51 | ############################################################################### 52 | # The sampling can be different in vector point or polygon. 53 | # So you can generate each centroid of a pixel that contains the polygon. 54 | # 55 | 56 | from museotoolbox.processing import sample_extraction 57 | vectorPointPerPixel = '/tmp/vectorCentroid.gpkg' 58 | sample_extraction(raster,vector,vectorPointPerPixel) 59 | 60 | vectorFiles = LOSGO.save_to_vector(vectorPointPerPixel,field,group=group,out_vector='/tmp/LOSGO.gpkg') 61 | 62 | for tr,vl in LOSGO.split(X,y,s): 63 | print(tr.shape,vl.shape) -------------------------------------------------------------------------------- /examples/cross_validation/LeavePSubGroupOut.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Leave-P-SubGroup-Out (LPSGO) 4 | ====================================================== 5 | 6 | This example shows how to make a Leave-Percent-SubGroup-Out. 7 | 8 | """ 9 | 10 | ############################################################################## 11 | # Import librairies 12 | # ------------------------------------------- 13 | 14 | from museotoolbox.cross_validation import LeavePSubGroupOut 15 | from museotoolbox import datasets,processing 16 | import numpy as np 17 | 18 | ############################################################################## 19 | # Load HistoricalMap dataset 20 | # ------------------------------------------- 21 | 22 | raster,vector = datasets.load_historical_data(low_res=True) 23 | field = 'Class' 24 | group = 'uniquefid' 25 | 26 | ############################################################################## 27 | # Create CV 28 | # ------------------------------------------- 29 | valid_size = 0.5 # Means 50% 30 | LPSGO = LeavePSubGroupOut(valid_size = 0.5, 31 | random_state=12,verbose=False) 32 | 33 | ############################################################################### 34 | # Extract X,y and group. 35 | # ------------------------------------------- 36 | 37 | X,y,g= processing.extract_ROI(raster,vector,field,group) 38 | 39 | ############################################################################### 40 | # .. note:: 41 | # Split is made to generate each fold 42 | 43 | for tr,vl in LPSGO.split(X,y,g): 44 | print(tr.shape,vl.shape) 45 | 46 | print('y label with number of samples') 47 | print(np.unique(y[tr],return_counts=True)) 48 | ############################################################################## 49 | # Differences with scikit-learn 50 | # ------------------------------------------- 51 | from sklearn.model_selection import LeavePGroupsOut 52 | # You need to specify the number of groups 53 | 54 | LPGO = LeavePGroupsOut(n_groups=2) 55 | for tr,vl in LPGO.split(X,y,g): 56 | print(tr.shape,vl.shape) 57 | 58 | ############################################################################## 59 | # With GroupShuffleSplit, won't keep the percentage per subgroup 60 | # This generate unbalanced classes 61 | 62 | from sklearn.model_selection import GroupShuffleSplit 63 | GSS = GroupShuffleSplit(test_size=0.5,n_splits=2) 64 | for tr,vl in GSS.split(X,y,g): 65 | print(tr.shape,vl.shape) 66 | 67 | print('y label with number of samples') 68 | print(np.unique(y[tr],return_counts=True)) 69 | 70 | ############################################################################### 71 | # Plot example in image 72 | from __drawCVmethods import plotMethod 73 | plotMethod('SKF-group') -------------------------------------------------------------------------------- /examples/cross_validation/README.txt: -------------------------------------------------------------------------------- 1 | .. _crossvalidation: 2 | 3 | Cross-Validation 4 | ------------------------ 5 | 6 | Examples related to the :mod:`museotoolbox.cross_validation` module. 7 | 8 | -------------------------------------------------------------------------------- /examples/cross_validation/RandomSampling50.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Stratified-K-Fold 4 | ====================================================== 5 | 6 | This example shows how to make a Random Sampling with 7 | 50% for each class. 8 | 9 | """ 10 | 11 | ############################################################################## 12 | # Import librairies 13 | # ------------------------------------------- 14 | 15 | from museotoolbox.cross_validation import RandomStratifiedKFold 16 | from museotoolbox import datasets,processing 17 | 18 | ############################################################################## 19 | # Load HistoricalMap dataset 20 | # ------------------------------------------- 21 | 22 | raster,vector = datasets.load_historical_data(low_res=True) 23 | field = 'Class' 24 | y = processing.read_vector_values(vector,field) 25 | 26 | ############################################################################## 27 | # Create CV 28 | # ------------------------------------------- 29 | 30 | SKF = RandomStratifiedKFold(n_splits=2,n_repeats=2, 31 | random_state=12,verbose=False) 32 | for tr,vl in SKF.split(X=None,y=y): 33 | print(tr,vl) 34 | 35 | ############################################################################### 36 | # .. note:: 37 | # Split is made to generate each fold 38 | 39 | # Show label 40 | 41 | for tr,vl in SKF.split(X=None,y=y): 42 | print(y[tr],y[vl]) 43 | 44 | ############################################################################## 45 | # .. note:: 46 | # The first one is made with polygon only. 47 | # When learning/predicting, all pixels will be taken in account 48 | # TO generate a full X and y labels, extract samples from ROI 49 | 50 | X,y=processing.extract_ROI(raster,vector,field) 51 | 52 | for tr,vl in SKF.split(X,y): 53 | print(tr,vl) 54 | print(tr.shape,vl.shape) 55 | 56 | ########################## 57 | # Plot example 58 | from __drawCVmethods import plotMethod 59 | plotMethod('SKF-pixel') -------------------------------------------------------------------------------- /examples/cross_validation/SpatialLeaveAsideOut.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Spatial Leave-Aside-Out (SLAO) 4 | ====================================================== 5 | 6 | This example shows how to make a Spatial Leave-Aside-Out. 7 | 8 | See https://doi.org/10.1016/j.foreco.2013.07.059 9 | 10 | """ 11 | 12 | ############################################################################## 13 | # Import librairies 14 | # ------------------------------------------- 15 | 16 | from museotoolbox.cross_validation import SpatialLeaveAsideOut 17 | from museotoolbox import datasets,processing 18 | 19 | ############################################################################## 20 | # Load HistoricalMap dataset 21 | # ------------------------------------------- 22 | 23 | raster,vector = datasets.load_historical_data() 24 | field = 'Class' 25 | X,y = processing.extract_ROI(raster,vector,field) 26 | distance_matrix = processing.get_distance_matrix(raster,vector) 27 | 28 | ############################################################################## 29 | # Create CV 30 | # ------------------------------------------- 31 | # n_splits will be the number of the least populated class 32 | 33 | SLOPO = SpatialLeaveAsideOut(valid_size=1/3, 34 | distance_matrix=distance_matrix,random_state=4) 35 | 36 | print(SLOPO.get_n_splits(X,y)) 37 | 38 | ############################################################################### 39 | # .. note:: 40 | # Split is made to generate each fold 41 | 42 | for tr,vl in SLOPO.split(X,y): 43 | print(tr.shape,vl.shape) 44 | 45 | ############################################################################### 46 | # Save each train/valid fold in a file 47 | # ------------------------------------------- 48 | # In order to translate polygons into points (each points is a pixel in the raster) 49 | # we use sampleExtraction from vector_tools to generate a temporary vector. 50 | 51 | processing.sample_extraction(raster,vector,out_vector='/tmp/pixels.gpkg',verbose=False) 52 | trvl = SLOPO.save_to_vector('/tmp/pixels.gpkg',field,out_vector='/tmp/SLOPO.gpkg') 53 | for tr,vl in trvl: 54 | print(tr,vl) 55 | 56 | 57 | ############################################################################### 58 | # Plot example on how a polygon was splitted 59 | 60 | import ogr 61 | import numpy as np 62 | from matplotlib import pyplot as plt 63 | # Read all features in layer and store as paths 64 | xyl= np.array([],dtype=float).reshape((-1,3)) 65 | for idx,vector in enumerate([tr,vl]): 66 | ds = ogr.Open(vector) 67 | lyr = ds.GetLayer(0) 68 | lyr.SetAttributeFilter ( "uniquefid=17" ) # select a specific group 69 | for feat in lyr: 70 | geom = feat.GetGeometryRef() 71 | xyl = np.vstack((xyl,np.asarray((geom.GetX(),geom.GetY(),idx)))) 72 | 73 | trPoints = xyl[xyl[:,2]==0][:,:2] 74 | vlPoints = xyl[xyl[:,2]==1][:,:2] 75 | plt.scatter(trPoints[:,0],trPoints[:,1],label='train',color='C0') 76 | plt.scatter(vlPoints[:,0],vlPoints[:,1],label='valid',color='C1') 77 | plt.legend() 78 | plt.show() -------------------------------------------------------------------------------- /examples/cross_validation/SpatialLeaveOnePixelOut.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Spatial Leave-One-Out (SLOO) 4 | ====================================================== 5 | 6 | This example shows how to make a Spatial Leave-One-Out called here 7 | a Spatial Leave-One-Pixel-Out. 8 | 9 | For more information see : https://onlinelibrary.wiley.com/doi/full/10.1111/geb.12161. 10 | 11 | """ 12 | 13 | ############################################################################## 14 | # Import librairies 15 | # ------------------------------------------- 16 | 17 | from museotoolbox.cross_validation import SpatialLeaveOneOut 18 | from museotoolbox import datasets,processing 19 | ############################################################################## 20 | # Load HistoricalMap dataset 21 | # ------------------------------------------- 22 | 23 | raster,vector = datasets.load_historical_data(low_res=True) 24 | field = 'Class' 25 | X,y = processing.extract_ROI(raster,vector,field) 26 | distance_matrix = processing.get_distance_matrix(raster,vector) 27 | 28 | ############################################################################## 29 | # Create CV 30 | # ------------------------------------------- 31 | # n_splits will be the number of the least populated class 32 | 33 | SLOO = SpatialLeaveOneOut(distance_thresold=100,distance_matrix=distance_matrix, 34 | random_state=12) 35 | ############################################################################### 36 | # .. note:: 37 | # Split is made to generate each fold 38 | SLOO.get_n_splits(X,y) 39 | for tr,vl in SLOO.split(X,y): 40 | print(tr.shape,vl.shape) 41 | 42 | #################################################### 43 | # Save each train/valid in a spatial vector file 44 | from museotoolbox.processing import sample_extraction 45 | sample_extraction(raster,vector,'/tmp/one_point_per_pixel.gpkg') 46 | files = SLOO.save_to_vector('/tmp/one_point_per_pixel.gpkg','Class',out_vector='/tmp/trvl.gpkg') 47 | print(files) 48 | ############################################# 49 | # Draw image 50 | from __drawCVmethods import plotMethod 51 | plotMethod('SLOO-pixel') 52 | -------------------------------------------------------------------------------- /examples/cross_validation/SpatialLeaveOneSubGroupOut.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Spatial Leave-One-SubGroup-Out (SLOSGO) 4 | ====================================================== 5 | 6 | This example shows how to make a Spatial Leave-One-SubGroup-Out. 7 | 8 | """ 9 | 10 | ############################################################################## 11 | # Import librairies 12 | # ------------------------------------------- 13 | import numpy as np 14 | from museotoolbox.cross_validation import SpatialLeaveOneSubGroupOut 15 | from museotoolbox import datasets,processing 16 | ############################################################################## 17 | # Load HistoricalMap dataset 18 | # ------------------------------------------- 19 | 20 | _,centroid = datasets.load_historical_data(low_res=True,centroid=True) 21 | raster,vector = datasets.load_historical_data(low_res=True) 22 | 23 | field = 'Class' 24 | 25 | ############################################################################## 26 | # Extract label ('Class' field) and groups ('uniquefid' field) 27 | # Compute distanceMatrix with centroid (one point per group) 28 | 29 | X,y,groups = processing.extract_ROI(raster,vector,field,'uniquefid') 30 | distance_matrix,distance_label = processing.get_distance_matrix(raster,centroid,'uniquefid') 31 | 32 | ############################################################################## 33 | # Create CV 34 | # ------------------------------------------- 35 | # n_splits will be the number of the least populated class 36 | 37 | SLOSGO = SpatialLeaveOneSubGroupOut(distance_thresold=100,distance_matrix=distance_matrix, 38 | distance_label=distance_label,random_state=12) 39 | 40 | 41 | ############################################################################### 42 | # .. note:: 43 | # Split is made to generate each fold 44 | SLOSGO.get_n_splits(X,y,groups) 45 | for tr,vl in SLOSGO.split(X,y,groups): 46 | print(np.unique(groups[vl])) 47 | print(np.unique(groups[tr])) 48 | 49 | SLOSGO.save_to_vector(vector,'Class','uniquefid','/tmp/slosgo.gpkg') 50 | ############################################# 51 | # Draw image 52 | from __drawCVmethods import plotMethod 53 | plotMethod('SLOO-group') -------------------------------------------------------------------------------- /examples/cross_validation/__drawCVmethods.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat Mar 2 10:41:59 2019 5 | 6 | @author: nicolas 7 | """ 8 | import numpy as np 9 | from matplotlib import pyplot as plt 10 | 11 | def plotMethod(cvType='SKF-pixel'): 12 | nSamples = 30 13 | alpha_unused = 0.1 14 | trainColor='C0' 15 | validColor='C1' 16 | points = np.array([]).reshape(-1,3) 17 | distanceBuffer=200 18 | bufferSize=48000 19 | #random 20 | np.random.seed(10) 21 | def multiplyBy100AndToInt(x): 22 | x*=100 23 | x=x.astype(int) 24 | return x 25 | def initFrame(lim=200,size=4): 26 | f=plt.figure(figsize=(size,size)) 27 | plt.xlim(-lim,lim) 28 | plt.ylim(-lim,lim) 29 | plt.xticks([]) 30 | plt.yticks([]) 31 | return f 32 | 33 | def drawFrame(title,train,valid,unused=False,buffer=False,show=True): 34 | f=initFrame() 35 | ax = f.add_subplot(111) 36 | 37 | """ 38 | if title: 39 | plt.title(title) 40 | """ 41 | ax.scatter(train[0],train[1],c=trainColor,marker='.',s=100,label='Train') 42 | ax.scatter(valid[0],valid[1],c=validColor,marker='x',label='Valid') 43 | ax.legend() 44 | if unused is not False: 45 | ax.scatter(unused[0],unused[1],c='grey',marker='.',s=100,alpha=alpha_unused,label='Unused references') 46 | if buffer is not False: 47 | ax.scatter(valid[0],valid[1],facecolor='none',edgecolor='red',marker='o',s=bufferSize) 48 | 49 | if show: 50 | plt.show() 51 | return f 52 | 53 | #### Generate 4 stands 54 | 55 | X = np.random.vonmises(5,50,nSamples) 56 | Y = np.random.vonmises(5,10,nSamples) 57 | X = multiplyBy100AndToInt(X) 58 | Y = multiplyBy100AndToInt(Y) 59 | 60 | label = np.ones(Y.shape) 61 | points = np.concatenate((points,np.vstack((X,Y,label)).T)) 62 | # plt.scatter(X,Y) 63 | 64 | 65 | X = np.random.vonmises(0.3,7,nSamples)-1 66 | Y = np.random.vonmises(0,10,nSamples)+1 67 | X = multiplyBy100AndToInt(X) 68 | Y = multiplyBy100AndToInt(Y) 69 | 70 | label += 1 71 | points = np.concatenate((points,np.vstack((X,Y,label)).T)) 72 | 73 | # plt.scatter(X,Y) 74 | X = np.random.vonmises(1,8,nSamples) 75 | Y = np.random.vonmises(1,20,nSamples) 76 | X = multiplyBy100AndToInt(X) 77 | Y = multiplyBy100AndToInt(Y) 78 | label += 1 79 | points = np.concatenate((points,np.vstack((X,Y,label)).T)) 80 | 81 | X = np.random.vonmises(100,15,nSamples)+1 82 | Y = np.random.vonmises(100,10,nSamples)-0.3 83 | X = multiplyBy100AndToInt(X) 84 | Y = multiplyBy100AndToInt(Y) 85 | label += 1 86 | points = np.concatenate((points,np.vstack((X,Y,label)).T)) 87 | 88 | paths = [] 89 | codes = [] 90 | 91 | from scipy.spatial import ConvexHull 92 | import matplotlib.path as mpath 93 | 94 | codes += [mpath.Path.MOVETO] + \ 95 | (len(X)-1)*[mpath.Path.LINETO] 96 | 97 | for group in range(1,5): 98 | coords = points[np.where(points[:,2]==group)][:,:2] 99 | h=ConvexHull(coords).vertices 100 | 101 | path = mpath.Path(coords[h]) 102 | paths.append(path) 103 | 104 | randomPoints = np.random.permutation(points) 105 | 106 | if cvType == 'SKF-pixel': 107 | f=drawFrame('Stratified-K-Fold pixel-based',[randomPoints[:,0][:75],randomPoints[:,1][:75]],[randomPoints[:,0][75:],randomPoints[:,1][75:]]) 108 | plt.show() 109 | else: 110 | train = np.where(np.in1d(points[:,2],np.array([2,4]))) 111 | valid = np.where(np.in1d(points[:,2],np.array([2,4]),invert=True)) 112 | 113 | if cvType == 'SKF-group': 114 | f=drawFrame('Stratified-K-Fold group-based',[points[valid][:,0],points[valid][:,1]],[points[train][:,0],points[train][:,1]]) 115 | plt.show() 116 | 117 | else: 118 | valid = randomPoints[0] 119 | train = randomPoints[1:] 120 | if cvType == 'LOO-pixel': 121 | f=drawFrame('Leave-One-Out pixel-based',[train[:,0],train[:,1]],valid) 122 | 123 | if cvType == 'LOO-group': 124 | valid = np.where(np.in1d(points[:,2],np.array([4]))) 125 | train = np.where(np.in1d(points[:,2],np.array([4]),invert=True)) 126 | f=drawFrame('Leave-One-Out group-based',[points[train][:,0],points[train][:,1]],[points[valid][:,0],points[valid][:,1]]) 127 | # pp.savefig(f,bbox_inches='tight') 128 | # SLOO-pixel 129 | if cvType == 'SLOO-pixel': 130 | from scipy.spatial import distance 131 | distance=distance.cdist(randomPoints[:,:2],randomPoints[:,:2]) 132 | 133 | valid = randomPoints[0] 134 | train_nospatial = randomPoints[1:] 135 | train = randomPoints[np.where(distance[0,:]>distanceBuffer)[0]] 136 | f=drawFrame('Spatial Leave-One-Out pixel-based',[train[:,0],train[:,1]],valid,[train_nospatial[:,0],train_nospatial[:,1]],buffer=True,show=False) 137 | plt.text(-70,0,'Distance buffer\nfrom validation pixel',fontsize=12) 138 | #lt.scatter(train_nospatial[:,0],train_nospatial[:,1],c='grey',marker='.',s=100,alpha=alpha_unused) 139 | plt.show() 140 | 141 | # 142 | if cvType == 'SLOO-group': 143 | valid = np.where(np.in1d(points[:,2],np.array([4]))) 144 | train_nospatial = np.where(np.in1d(points[:,2],np.array([4]),invert=True)) 145 | 146 | train = np.where(np.in1d(points[:,2],np.array([2]))) 147 | 148 | train=[points[train][:,0],points[train][:,1]] 149 | valid=[points[valid][:,0],points[valid][:,1]] 150 | unused=points[train_nospatial][:,0],points[train_nospatial][:,1] 151 | f=drawFrame('Spatial Leave-One-Out group-based',train,valid,unused,show=False) 152 | 153 | centroid = np.asarray([[np.mean(points[:,0][np.where(points[:,2]==stand)]),np.mean(points[:,1][np.where(points[:,2]==stand)])] for stand in range(1,5)]) 154 | plt.scatter(centroid[:,0],centroid[:,1],color='black',s=60,alpha=0.8,label='Centroid') 155 | plt.scatter(centroid[:,0][3],centroid[:,1][3],facecolor='none',edgecolor='red',marker='o',s=bufferSize) 156 | plt.text(-90,-10,'Distance buffer\nfrom centroid',fontsize=12) 157 | plt.show() 158 | -------------------------------------------------------------------------------- /examples/cross_validation/train_test_split.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Train test split with every kind of cross-validation 5 | ====================================================== 6 | 7 | This example shows how to split between test and train according to 8 | every cross-validation method. 9 | """ 10 | 11 | ############################################################################## 12 | # Import librairies 13 | # ------------------------------------------- 14 | 15 | import numpy as np 16 | import museotoolbox as mtb 17 | 18 | ############################################################################## 19 | # Generate random dataset 20 | # ------------------------------------------- 21 | 22 | np.random.seed(42) 23 | y = np.random.randint(1,3,10) 24 | X = np.random.randint(1,255,[10,3],dtype=np.uint8) 25 | 26 | ############################################################################## 27 | # Split train/test 28 | # ----------------------------------------------------------------------- 29 | # Using :mod:`museotoolbox.cross_validation.LeaveOneOut` 30 | 31 | cv = mtb.cross_validation.LeaveOneOut(random_state=42) 32 | 33 | X_train, X_test, y_train, y_test = mtb.cross_validation.train_test_split(cv,X,y) 34 | 35 | ############################################################################## 36 | # Split train/test with groups 37 | # ------------------------------------------- 38 | # Generate group 39 | 40 | groups = np.array([1, 1, 2, 3, 4, 2, 1, 1, 2, 3],dtype=int) 41 | 42 | ################################################################## 43 | # Using :mod:`museotoolbox.cross_validation.LeaveOneSubGroupOut` 44 | 45 | cv = mtb.cross_validation.LeaveOneSubGroupOut(random_state=42) 46 | 47 | X_train, X_test, y_train, y_test, g_train, g_test = mtb.cross_validation.train_test_split(cv,X,y,groups=groups) -------------------------------------------------------------------------------- /examples/processing/README.txt: -------------------------------------------------------------------------------- 1 | .. _processing: 2 | 3 | Processing 4 | ------------------------ 5 | 6 | Examples related to the dedicated raster and vector :mod:`museotoolbox.processing` module. 7 | -------------------------------------------------------------------------------- /examples/processing/copyRasterInVectorFields.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Copy raster values in vector fields then read vector 4 | ====================================================== 5 | 6 | This example shows how to extract from polygons or points 7 | each pixel centroid located in the vector (polygons/points) 8 | and how to extract and save band values in vector fields. 9 | 10 | This tool is made to avoid using raster everytime you need 11 | to learn and predict a model.""" 12 | 13 | ############################################################################## 14 | # Import librairies 15 | # ------------------------------------------- 16 | 17 | import museotoolbox as mtb 18 | from matplotlib import pyplot as plt 19 | 20 | ############################################################################## 21 | # Load HistoricalMap dataset 22 | # ------------------------------------------- 23 | 24 | raster,vector = mtb.datasets.load_historical_data(low_res=True) 25 | out_vector='/tmp/vector_withROI.gpkg' 26 | 27 | 28 | ############################################################################### 29 | # .. note:: 30 | # There is no need to specify a bandPrefix. 31 | # If bandPrefix is not specified, scipt will only generate the centroid 32 | 33 | mtb.processing.sample_extraction(raster,vector, 34 | out_vector=out_vector, 35 | unique_fid='uniquefid', 36 | band_prefix='band_', 37 | verbose=False) 38 | 39 | 40 | ############################################# 41 | # Read values from both vectors 42 | 43 | originalY = mtb.processing.read_vector_values(vector,'Class') 44 | X,y = mtb.processing.read_vector_values(out_vector,'Class',band_prefix='band_') 45 | 46 | ############################################# 47 | # Original vector is polygon type, each polygons contains multiple pixel 48 | 49 | print(originalY.shape) 50 | 51 | ############################################# 52 | # Number of Y in the new vector is the total number of pixel in the polygons 53 | 54 | print(y.shape) 55 | 56 | ############################################# 57 | # X has the same size of Y, but in 3 dimensions because our raster has 3 bands 58 | 59 | print(X.shape) 60 | print(X[410:420,:]) 61 | print(y[410:420]) 62 | 63 | ############################################# 64 | # Plot blue and red band 65 | 66 | plt.figure(1) 67 | colors = [int(i % 23) for i in y] 68 | plt.scatter(X[:,0],X[:,2],c=colors,alpha=.8) 69 | plt.show() 70 | 71 | -------------------------------------------------------------------------------- /examples/processing/extractRasterValues.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Extract raster values from vector file 4 | =============================================================== 5 | 6 | Easily extract raster values from vector files (polygon/point) 7 | """ 8 | 9 | ############################################################################## 10 | # Import librairies 11 | # ------------------------------------------- 12 | 13 | import museotoolbox as mtb 14 | import numpy as np 15 | ############################################################################## 16 | # Load HistoricalMap dataset 17 | # ------------------------------------------- 18 | 19 | raster,vector = mtb.datasets.load_historical_data() 20 | 21 | ############################################################################## 22 | # Extract raster values with no vector information 23 | # ------------------------------------------------- 24 | 25 | X = mtb.processing.extract_ROI(raster,vector) 26 | 27 | print("Vector contains {} pixels".format(X.shape[0])) 28 | print("Raster contains {} bands per pixel".format(X.shape[1])) 29 | 30 | ########################## 31 | # Let's suppose you want konw to extract the label of each polygon/point 32 | 33 | X,y = mtb.processing.extract_ROI(raster,vector,'class') 34 | uniqueLabels = np.unique(y,return_counts=True) 35 | 36 | for label,count in zip(*uniqueLabels): 37 | print('label {} has {} samples'.format(label,count)) 38 | 39 | #################### 40 | # You can put as many fields as you want, except fields of string type 41 | 42 | X,y,g = mtb.processing.extract_ROI(raster,vector,'class','uniquefid') 43 | print('There are a total of {} groups'.format(np.unique(g).size)) -------------------------------------------------------------------------------- /examples/processing/modalClass.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Modal class and number of agreements 4 | =============================================================== 5 | 6 | Create a raster with the modal class and the number of agreements. 7 | 8 | """ 9 | 10 | ############################################################################## 11 | # Import librairies 12 | # ------------------------------------------- 13 | 14 | import museotoolbox as mtb 15 | from scipy.stats import mode 16 | import numpy as np 17 | ############################################################################## 18 | # Load HistoricalMap dataset 19 | # ------------------------------------------- 20 | 21 | raster,vector = mtb.datasets.load_historical_data(low_res=True) 22 | 23 | ############################################################################## 24 | # Initialize rasterMath with raster 25 | # ----------------------------------------- 26 | 27 | ######## 28 | # In case you want to add a mask 29 | mask = '/tmp/maskFromPolygons.tif' 30 | 31 | mtb.processing.image_mask_from_vector(vector,raster,out_image = mask) 32 | 33 | rM = mtb.processing.RasterMath(raster,in_image_mask=mask) 34 | 35 | print(rM.get_random_block()) 36 | ########################## 37 | # Let's suppose you want compute the modal classification between several predictions 38 | # The first band will be the most predicted class, and the second the number of times it has been predicted. 39 | 40 | 41 | x = rM.get_random_block() 42 | 43 | def modal_class(x): 44 | tmpStack = np.column_stack(mode(x,axis=1)).astype(np.int16) 45 | return tmpStack 46 | 47 | modal_class(x) 48 | 49 | rM.add_function(modal_class,out_image='/tmp/modal.tif',out_nodata=0) 50 | 51 | ##################### 52 | # Run the script 53 | 54 | rM.run() 55 | 56 | ####################### 57 | # Plot result 58 | 59 | from osgeo import gdal 60 | from matplotlib import pyplot as plt 61 | 62 | src = gdal.Open('/tmp/modal.tif') 63 | data = src.ReadAsArray()[0,:,:] 64 | data = np.where(data== 0,np.nan,data) 65 | plt.imshow(data) 66 | -------------------------------------------------------------------------------- /examples/processing/rasterMask.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Raster mask from vector 4 | =============================================================== 5 | 6 | Create a raster mask from vector. 7 | 8 | """ 9 | 10 | ############################################################################## 11 | # Import librairies 12 | # ------------------------------------------- 13 | 14 | from museotoolbox.processing import image_mask_from_vector 15 | from museotoolbox.datasets import load_historical_data 16 | 17 | ############################################################################## 18 | # Load HistoricalMap dataset 19 | # ------------------------------------------- 20 | 21 | raster,vector = load_historical_data() 22 | 23 | ############################################################################## 24 | # Rasterize vector 25 | # ----------------------------------------- 26 | 27 | 28 | ############################################################################## 29 | # Default, invert is False, it means only polygons will be kept (the rest is set to nodata) 30 | image_mask_from_vector(vector,raster,'/tmp/mask.tif',invert=False) 31 | 32 | ############################################################################## 33 | # If invert is set to True, it means polygons will be set to nodata 34 | image_mask_from_vector(vector,raster,'/tmp/maskInvert.tif',invert=True) 35 | 36 | ####################### 37 | # Plot result 38 | # ----------------------------------- 39 | 40 | from osgeo import gdal 41 | from matplotlib import pyplot as plt 42 | 43 | ############################################## 44 | # Default mask (invert=False) 45 | # ----------------------------------- 46 | # white is nodata, black is 255 47 | src = gdal.Open('/tmp/mask.tif') 48 | plt.imshow(src.ReadAsArray(),cmap='Greys') 49 | 50 | ############################################## 51 | # invert mask (invert=True) 52 | # ----------------------------------- 53 | # white is nodata, black is 255 54 | 55 | src = gdal.Open('/tmp/maskInvert.tif') 56 | plt.imshow(src.ReadAsArray(),cmap='Greys') -------------------------------------------------------------------------------- /examples/processing/rasterMath.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Basics to use rasterMath 4 | =============================================================== 5 | 6 | Compute substract and addition between two raster bands. 7 | 8 | """ 9 | 10 | ############################################################################## 11 | # Import librairies 12 | # ------------------------------------------- 13 | 14 | from museotoolbox.processing import RasterMath 15 | from museotoolbox import datasets 16 | import numpy as np 17 | ############################################################################## 18 | # Load HistoricalMap dataset 19 | # ------------------------------------------- 20 | 21 | raster,vector = datasets.load_historical_data() 22 | 23 | ############################################################################## 24 | # Initialize rasterMath with raster 25 | # ------------------------------------ 26 | 27 | rM = RasterMath(raster) 28 | 29 | print(rM.get_random_block()) 30 | 31 | ########################## 32 | # Let's suppose you want compute the difference between blue and green band. 33 | # I suggest you to define type in numpy array to save space while creating the raster! 34 | 35 | X = rM.get_random_block() 36 | 37 | sub = lambda X : np.array((X[:,0]-X[:,1])).astype(np.int16) 38 | 39 | 40 | rM.add_function(sub,out_image='/tmp/sub_lambda.tif') 41 | ########################################################### 42 | # Use a python function to use arguments 43 | # ---------------------------------------- 44 | 45 | def sub(X,band1=0,band2=1): 46 | outX = np.array((X[:,band1]-X[:,band2])).astype(np.int16) 47 | return outX 48 | 49 | ################################################################# 50 | # We can add keyword argument in the addFunction. 51 | # This function is going to substract band2 from band 1 52 | import time 53 | t=time.time() 54 | rM = RasterMath(raster) 55 | rM.add_function(sub,out_image='/tmp/sub.tif',band1=1,band2=0,compress='high') 56 | 57 | ##################### 58 | # Run the script 59 | 60 | rM.run() 61 | print(time.time()-t) 62 | ####################### 63 | # Plot result 64 | 65 | from osgeo import gdal 66 | from matplotlib import pyplot as plt 67 | src = gdal.Open('/tmp/sub.tif') 68 | plt.imshow(src.ReadAsArray()) 69 | -------------------------------------------------------------------------------- /examples/processing/rasterMathCustomBlock.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | rasterMath with custom window/block size (and with 3 dimensions) 4 | ================================================================= 5 | 6 | Tips to use rasterMath by defining its block size and to receive 7 | a full block (not a array with one pixel per row.) 8 | 9 | """ 10 | 11 | ############################################################################## 12 | # Import librairies 13 | # ------------------------------------------- 14 | 15 | from museotoolbox.processing import RasterMath 16 | from museotoolbox import datasets 17 | from matplotlib import pyplot as plt 18 | 19 | ############################################################################## 20 | # Load HistoricalMap dataset 21 | # ------------------------------------------- 22 | 23 | raster,vector = datasets.load_historical_data() 24 | 25 | ############################################################################## 26 | # Initialize rasterMath with raster 27 | # ------------------------------------ 28 | 29 | # Set return3d to True to have full block size (not one pixel per row) 30 | 31 | rM = RasterMath(raster,return_3d=True) 32 | 33 | print(rM.get_random_block().shape) 34 | 35 | ############################################################################## 36 | # Comparing different block size (%, fixed, full block) 37 | # ------------------------------------------------------- 38 | 39 | ####################### 40 | # You can define block by percentage of the whole width/height 41 | 42 | rM.custom_block_size(1/2,1/2) 43 | print(rM.get_random_block().shape) 44 | 45 | ####################### 46 | # Or by fixed window 47 | 48 | rM.custom_block_size(50,100) # width divided every 50 pixel and height every 100 49 | print(rM.get_random_block().shape) 50 | 51 | ######################## 52 | # To have the full image (one block) 53 | 54 | rM.custom_block_size(-1,-1) # to have the full image 55 | 56 | ######################## 57 | # To have block width divided by 4 and height by 2 58 | 59 | rM.custom_block_size(1/4,1/2) 60 | 61 | ########################################## 62 | # Define block size for output raster 63 | # ------------------------------------- 64 | 65 | raster_parameters = rM.get_raster_parameters() 66 | 67 | print('Default parameters are '+str(raster_parameters)) 68 | 69 | 70 | # to do before adding the function 71 | 72 | rM.custom_block_size(256,256) # custom for reading AND writing the output 73 | #raster_parameters = ['COMPRESS=DEFLATE'] 74 | #rM.customRasterParameters(raster_parameters) 75 | 76 | ##################################### 77 | # now add a function to just return the same raster 78 | 79 | returnSameImage = lambda x : x 80 | rM.add_function(returnSameImage,'/tmp/testcustomblock.tif') 81 | rM.run() 82 | 83 | ##################### 84 | # check block size of new raster 85 | 86 | rMblock = RasterMath('/tmp/testcustomblock.tif') 87 | print(rMblock.block_sizes) 88 | 89 | ####################### 90 | # Plot blocks 91 | 92 | n_row,n_col = 2,4 93 | rM.custom_block_size(1/n_col,1/n_row) 94 | 95 | fig=plt.figure(figsize=(12,6),dpi=150) 96 | 97 | for idx,tile in enumerate(rM.read_block_per_block()): 98 | fig.add_subplot(n_row,n_col,idx+1) 99 | plt.title('block %s' %(idx+1)) 100 | plt.imshow(tile) 101 | plt.show() 102 | 103 | -------------------------------------------------------------------------------- /examples/processing/rasterMathCustomBlockAndMask.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | rasterMath with custom block size, mask, and in 3 dimensions 4 | =================================================================== 5 | 6 | Tips to use rasterMath by defining its block size and to receive 7 | a full block (not a array with one pixel per row.) 8 | 9 | Tips : A function readBlockPerBlock() yields each block, without saving results 10 | to a new raster. 11 | 12 | """ 13 | 14 | ############################################################################## 15 | # Import librairies 16 | # ------------------------------------------- 17 | 18 | from museotoolbox.processing import RasterMath,image_mask_from_vector 19 | from museotoolbox import datasets 20 | from matplotlib import pyplot as plt 21 | import numpy as np 22 | ############################################################################## 23 | # Load HistoricalMap dataset 24 | # ------------------------------------------- 25 | 26 | raster,vector = datasets.load_historical_data() 27 | 28 | ############################################################################## 29 | # Initialize rasterMath with raster 30 | # ------------------------------------ 31 | 32 | # Set return_3d to True to have full block size (not one pixel per row) 33 | # Create raster mask to only keep pixel inside polygons. 34 | 35 | image_mask_from_vector(vector,raster,'/tmp/mask.tif',invert=False) 36 | 37 | rM = RasterMath(raster,in_image_mask='/tmp/mask.tif',return_3d=True) 38 | #rM.addInputRaster('/tmp/mask.tif') 39 | print(rM.get_random_block().shape) 40 | 41 | ####################### 42 | # Plot blocks 43 | x = rM.get_random_block() 44 | 45 | rM.add_function(np.mean,'/tmp/mean.tif',axis=2,out_np_dt=np.int16) 46 | 47 | rM.run() 48 | 49 | from osgeo import gdal 50 | dst = gdal.Open('/tmp/mean.tif') 51 | arr = dst.GetRasterBand(1).ReadAsArray() 52 | plt.imshow(np.ma.masked_where(arr == rM._outputs[0]['nodata'], arr)) 53 | -------------------------------------------------------------------------------- /examples/processing/rasterMath_testBlockSize_3d_andNBands.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Using rasterMath with 3d block or 2d block 4 | ================================================================== 5 | 6 | Test notebook to validate code. 7 | """ 8 | 9 | ############################################################################## 10 | # Import librairies 11 | # ------------------------------------------- 12 | 13 | from museotoolbox.processing import RasterMath,image_mask_from_vector 14 | from museotoolbox import datasets 15 | from matplotlib import pyplot as plt 16 | import numpy as np 17 | 18 | 19 | ############################################################################## 20 | # Load HistoricalMap dataset 21 | # ------------------------------------------- 22 | 23 | raster,vector = datasets.load_historical_data() 24 | 25 | ############################################################################## 26 | # Initialize rasterMath with raster 27 | # ------------------------------------ 28 | 29 | # Set return_3d to True to have full block size (not one pixel per row) 30 | # Create raster mask to only keep pixel inside polygons. 31 | 32 | image_mask_from_vector(vector,raster,'/tmp/mask.tif',invert=True) 33 | 34 | import time 35 | t0=time.time() 36 | for return_3d in [True,False]: 37 | 38 | rM = RasterMath(raster,in_image_mask='/tmp/mask.tif',return_3d=return_3d) 39 | 40 | rM.custom_block_size(128,128) # block of 128x128 41 | 42 | x = rM.get_block() 43 | 44 | # Returns with only 1 dimension 45 | returnFlatten = lambda x : x[...,0] 46 | 47 | # Returns 3x the original last dimension 48 | addOneBand = lambda x : np.repeat(x,3,axis=x.ndim-1) 49 | # Add functions to rasterMath 50 | rM.add_function(addOneBand,'/tmp/x_repeat_{}.tif'.format(str(return_3d))) 51 | rM.add_function(returnFlatten,'/tmp/x_flatten_{}.tif'.format(str(return_3d))) 52 | 53 | rM.run() 54 | print(time.time()-t0) 55 | 56 | from osgeo import gdal 57 | dst = gdal.Open('/tmp/x_flatten_True.tif') 58 | arr = dst.GetRasterBand(1).ReadAsArray() 59 | plt.imshow(np.ma.masked_where(arr == np.min(arr), arr)) 60 | -------------------------------------------------------------------------------- /examples/processing/rasterMath_twoRasters.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | rasterMath with several rasters as inputs 4 | =============================================================== 5 | 6 | Compute substract and addition between two raster bands. 7 | """ 8 | 9 | ############################################################################## 10 | # Import librairies 11 | # ------------------------------------------- 12 | 13 | from museotoolbox.processing import RasterMath,image_mask_from_vector 14 | from museotoolbox import datasets 15 | import numpy as np 16 | ############################################################################## 17 | # Load HistoricalMap dataset 18 | # ------------------------------------------- 19 | 20 | raster,vector = datasets.load_historical_data() 21 | 22 | ############################################################################## 23 | # Initialize rasterMath with raster 24 | # ------------------------------------ 25 | 26 | ############################################################################## 27 | # If invert is set to True, it means polygons will be set to nodata 28 | 29 | image_mask_from_vector(vector,raster,'/tmp/mask.tif',invert=True) 30 | rM = RasterMath(in_image = raster,in_image_mask='/tmp/mask.tif',return_3d=True) 31 | rM.add_image(raster) 32 | 33 | print('Number of rasters : '+str(len(rM.get_random_block()))) 34 | 35 | ########################## 36 | # Let's suppose you want compute the substraction between the blue and green band of two inputs 37 | # I suggest you to define type in numpy array to save space while creating the raster! 38 | 39 | x = rM.get_block(0) 40 | 41 | def sub(x): 42 | firstBandOfFirstRaster = x[0][...,0] 43 | thirdBandOfSecondRaster = x[1][...,2] 44 | difference = np.subtract(firstBandOfFirstRaster,thirdBandOfSecondRaster) 45 | return difference 46 | 47 | rM.add_function(sub,out_image='/tmp/sub_2inputs.tif') 48 | 49 | ##################### 50 | # Run the script 51 | 52 | #rM.run() 53 | rM.run() 54 | 55 | ####################### 56 | # Plot result 57 | 58 | from osgeo import gdal 59 | from matplotlib import pyplot as plt 60 | 61 | src = gdal.Open('/tmp/sub_2inputs.tif') 62 | arr = src.ReadAsArray() 63 | arr = np.where(arr==0,np.nan,arr) 64 | plt.imshow(arr) 65 | -------------------------------------------------------------------------------- /examples/processing/readVectorFields.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Read fields from vector 4 | ====================================================== 5 | 6 | This example shows how to read fields values from 7 | a vector file. 8 | """ 9 | 10 | ############################################################################## 11 | # Import librairies 12 | # ------------------- 13 | 14 | import museotoolbox as mtb 15 | 16 | ############################################################################## 17 | # Load HistoricalMap dataset 18 | # ---------------------------- 19 | 20 | raster,vector = mtb.datasets.load_historical_data(low_res=True) 21 | 22 | ############################################################################### 23 | # .. note:: 24 | # If you have no memories on what the fields name are, simply put the vector path 25 | 26 | try : 27 | mtb.processing.read_vector_values(vector) 28 | except Exception as e: 29 | print(e) 30 | 31 | ############################################# 32 | # Read values from field 'Class' 33 | # -------------------------------- 34 | 35 | Y,Name = mtb.processing.read_vector_values(vector,'Class','Type') 36 | print(Y,Name) 37 | print(Y.shape) 38 | 39 | ############################################# 40 | # Read values from field beginning with 'C' 41 | # ------------------------------------------- 42 | # As multiple fields can begin with C, function returns a column per field 43 | 44 | C = mtb.processing.read_vector_values(vector,band_prefix='C') 45 | print(C) 46 | print(C.shape) 47 | 48 | 49 | ############################################# 50 | 51 | from matplotlib import pyplot as plt 52 | import numpy as np 53 | plt.title('Number of polygons per label') 54 | plt.bar(np.arange(np.unique(Y).size)+1,np.unique(Y,return_counts=True)[1]) 55 | -------------------------------------------------------------------------------- /examples/stats/MoransI.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Compute Moran's I with different lags from raster 4 | =============================================================== 5 | 6 | Compute Moran's I with different lags, support mask. 7 | 8 | """ 9 | 10 | ############################################################################## 11 | # Import librairies 12 | # ------------------------------------------- 13 | import numpy as np 14 | from museotoolbox.stats import Moran 15 | from matplotlib import pyplot as plt 16 | from osgeo import gdal,osr 17 | 18 | ############################################################################## 19 | # Load HistoricalMap dataset 20 | # ------------------------------------------- 21 | raster = '/tmp/autocorrelated_moran.tif' 22 | mask = '/tmp/mask.tif' 23 | 24 | def create_false_image(array,path): 25 | # from https://pcjericks.github.io/py-gdalogr-cookbook/raster_layers.html 26 | driver = gdal.GetDriverByName('GTiff') 27 | outRaster = driver.Create(path, array.shape[1], array.shape[0], 1, gdal.GDT_Byte) 28 | outRaster.SetGeoTransform((0, 10, 0, 0, 0, 10)) 29 | outband = outRaster.GetRasterBand(1) 30 | outband.WriteArray(array) 31 | outRasterSRS = osr.SpatialReference() 32 | outRasterSRS.ImportFromEPSG(4326) 33 | outRaster.SetProjection(outRasterSRS.ExportToWkt()) 34 | outband.FlushCache() 35 | 36 | # create autocorrelated tif 37 | x = np.zeros((100,100),dtype=int) 38 | # max autocorr 39 | x[:50,:] = 1 40 | create_false_image(x,raster) 41 | x_mask = np.random.randint(0,2,[100,100]) 42 | create_false_image(x_mask,mask) 43 | 44 | ################################ 45 | # Random mask 46 | plt.title('Random mask') 47 | plt.imshow(x_mask,cmap='gray', aspect='equal',interpolation='none') 48 | 49 | ################################ 50 | # Spatially autocorrelated image 51 | plt.title('Highly autocorrelated image') 52 | plt.imshow(x,cmap='gray', aspect='equal',interpolation='none') 53 | 54 | ##################################################### 55 | # Compute Moran's I for lag 1 on autocorrelated image 56 | lags = [1,3,5] 57 | 58 | MoransI = Moran(raster,lag=lags,in_image_mask=mask) 59 | print(MoransI.scores) 60 | 61 | #################################################### 62 | # Compute Moran's I for lag 1 on totally random image 63 | lags = [1,3,5] 64 | 65 | MoransI = Moran(mask,lag=lags) 66 | print(MoransI.scores) 67 | 68 | ####################### 69 | # Plot result 70 | # ----------------------------------- 71 | from matplotlib import pyplot as plt 72 | plt.title('Evolution of Moran\'s I') 73 | plt.plot(MoransI.scores['lag'],MoransI.scores['I'],'-o') 74 | plt.xlabel('Spatial lag') 75 | plt.xticks(lags) 76 | plt.ylabel('Moran\'s I') -------------------------------------------------------------------------------- /examples/stats/README.txt: -------------------------------------------------------------------------------- 1 | .. _stats: 2 | 3 | Stats 4 | ------------------------ 5 | 6 | Examples related to the :mod:`museotoolbox.stats` module. 7 | -------------------------------------------------------------------------------- /examples/stats/qualityIndexFromConfusionMatrix.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Compute quality index from confusion matrix 4 | =============================================================== 5 | 6 | Compute different quality index (OA, Kappa and F1) directly 7 | from confusion matrix. 8 | 9 | """ 10 | 11 | ############################################################################## 12 | # Import librairies 13 | # ------------------------------------------- 14 | import numpy as np 15 | from museotoolbox.stats import retrieve_y_from_confusion_matrix 16 | from museotoolbox.charts import PlotConfusionMatrix 17 | from sklearn.metrics import accuracy_score,cohen_kappa_score 18 | ############################################################################## 19 | # Create a random confusion matrix 20 | # ------------------------------------------- 21 | 22 | confusion_matrix = np.random.randint(1,30,size=[6,6]) 23 | confusion_matrix[range(6),range(6)] += 40 24 | print('Total number of pixels : '+str(np.sum(confusion_matrix))) 25 | 26 | PlotConfusionMatrix(confusion_matrix).add_text() 27 | 28 | ############################################################################## 29 | # Generate index from the confusion matrix 30 | 31 | yp,yt = retrieve_y_from_confusion_matrix(confusion_matrix) 32 | 33 | ################################ 34 | # show quality 35 | print('OA is : '+str(accuracy_score(yp,yt))) 36 | print('Kappa is : '+str(cohen_kappa_score(yp,yt))) -------------------------------------------------------------------------------- /examples/stats/zonal_stats.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Compute quality index from confusion matrix 4 | =============================================================== 5 | 6 | Compute different quality index (OA, Kappa and F1) directly 7 | from confusion matrix. 8 | 9 | """ 10 | 11 | ############################################################################## 12 | # Import librairies 13 | # ------------------------------------------- 14 | import numpy as np 15 | from museotoolbox.stats import zonal_stats 16 | from museotoolbox.datasets import load_historical_data 17 | 18 | ############################################################################## 19 | # Load dataset 20 | # ------------------------------------------- 21 | 22 | raster,vector = load_historical_data() 23 | 24 | ############################################################################## 25 | # Compute mean and variance per polygon 26 | # ---------------------------------------------------- 27 | mean,var = zonal_stats(raster,vector,'uniquefid',stats=['mean','var']) 28 | print(mean.shape) 29 | 30 | ##################################### 31 | # Show mean value 32 | print('For polygon 1 : ') 33 | for band_idx,band in enumerate(['blue','green','red']): 34 | print('Mean value in {} band is : {}'.format(band,mean[0,band_idx])) 35 | 36 | ##################################### 37 | # Show variance value 38 | print('For polygon 1 : ') 39 | for band_idx,band in enumerate(['blue','green','red']): 40 | print('Variance value in {} band is : {}'.format(band,var[0,band_idx])) 41 | 42 | ###############################################" 43 | # You can put in stats, every numpy function 44 | # 45 | # For example here : mean, median, max, min 46 | 47 | mean,median,amax,amin = zonal_stats(raster,vector,'uniquefid',stats=['mean','median','max','min']) 48 | 49 | print('For polygon 1 : ') 50 | for band_idx,band in enumerate(['blue','green','red']): 51 | print('Min value in {} band is : {}'.format(band,amin[0,band_idx])) 52 | print('Max value in {} band is : {}'.format(band,amax[0,band_idx])) 53 | 54 | -------------------------------------------------------------------------------- /getVersion.py: -------------------------------------------------------------------------------- 1 | from museotoolbox import __version__ 2 | print(__version__) 3 | -------------------------------------------------------------------------------- /metadata/RasterMath_schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nkarasiak/MuseoToolBox/1b1705925ff5165b1310c4de22a0c0fa80959b8a/metadata/RasterMath_schema.png -------------------------------------------------------------------------------- /metadata/__docs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Nov 24 17:38:05 2019 5 | 6 | @author: nicolas 7 | 8 | in_image : string. 9 | A filename or path of a raster file. 10 | It could be any file that GDAL can open. 11 | 12 | in_vector : string. 13 | A filename or path corresponding to a vector file. 14 | It could be any file that GDAL/OGR can open. 15 | 16 | out_image : string. 17 | A geotiff extension filename corresponding to a raster image to create. 18 | 19 | X : array-like, shape = [n_samples, n_features] 20 | The training input samples. 21 | 22 | y : array-like, shape = [n_samples] 23 | The target values. 24 | 25 | in_image_mask : str 26 | A filename or path corresponding to a raster image. 27 | 0 values are considered as masked data. 28 | 29 | out_image : str 30 | A filename or path corresponding to a geotiff (.tif) raster image to save. 31 | 0 values are considered as masked data. 32 | 33 | 34 | """ 35 | 36 | 37 | -------------------------------------------------------------------------------- /metadata/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # ============================================================================= 4 | # ___ ___ _____ _______ 5 | # | \/ | |_ _| | | ___ \ 6 | # | . . |_ _ ___ ___ ___ | | ___ ___ | | |_/ / _____ __ 7 | # | |\/| | | | / __|/ _ \/ _ \ | |/ _ \ / _ \| | ___ \/ _ \ \/ / 8 | # | | | | |_| \__ \ __/ (_) | | | (_) | (_) | | |_/ / (_) > < 9 | # \_| |_/\__,_|___/\___|\___/ \_/\___/ \___/|_\____/ \___/_/\_\ 10 | # 11 | # @author: Nicolas Karasiak 12 | # @site: www.karasiak.net 13 | # @git: www.github.com/nkarasiak/MuseoToolBox 14 | # ============================================================================= -------------------------------------------------------------------------------- /metadata/museoToolBox_logo_1024.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nkarasiak/MuseoToolBox/1b1705925ff5165b1310c4de22a0c0fa80959b8a/metadata/museoToolBox_logo_1024.png -------------------------------------------------------------------------------- /metadata/museoToolBox_logo_128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nkarasiak/MuseoToolBox/1b1705925ff5165b1310c4de22a0c0fa80959b8a/metadata/museoToolBox_logo_128.png -------------------------------------------------------------------------------- /metadata/schema.drawio: -------------------------------------------------------------------------------- 1 | 7V1bd+I2EP41PGYPvmIeE3LpbjcN3Wzb7BNHsRVQ17aoLFiSX1/Jl2Bb4hZA2IlO8mAPso3n+zSSZkZDxxpEixsCppNbHMCwY3aDRce67JimYZtmh/93g+dM0neNTDAmKMgbLQX36AXmwm4unaEAJpWGFOOQomlV6OM4hj6tyAAh+Fe12RMOq0+dgjEUBPc+CEXpPyigk0zqdLtL+W8QjSe09kEEira5IJmAAP8qiayrjjUgGNPsKFoMYMh1V6glu+56xaev34vAmG5zwWf//Orhi+nMgvPgCf0ZDYYv52dedpc5CGf5+34DCYXkln/77GvT50IVExqF7MjoWBcgROOYHYfwiT38guBZHED+oC47e0JhOMAhJuw8xjG7+iKhBP+EhbBjWobrdX3eeExAgNgr1C4YhyBJ8vsVTS4RYQgjzB8M2dd8vW8BjMkfjmN6DSIUcr59BRQzmaiqXHtzSChclES56m4gjiAlz6zJosA1uyJnsZef/lpSopeLJiU2WLkM5CQcv953CRQ7yLGS4/bHbH7+Zf731DF/3F4OXh57zsPLmWEI6MCA8TY/xYRO8BjHILxaSksocQiXbb5iPM2F/0JKn/NOCGap8kqowwWiDzko/PhH6fhyUT7hanKKsyEkiL02JEWLmKngoXyS3uiTU5wu75WePb+eBee8Ry9ZwiTXiGsu/VwgmdUHpuVspsld8gtws5BK87c3eq/E4Zpd2ekKIPCM+HBNTyvsGQVkDOmmLinykMAQUDSvfhEZr9JLmZ7Ac6nBFKOYJqU7D7lgSW+7Sm+3ZkrWt+47NT5nT1+y+/U19iC8qQnffsIPXYaJ+50+D7zRmdcHw79uBmdO6whvWCoYb2nGt5/x0oaO1T7K91RQ3taUf6+Ub9+0xjRUUN7RlG8/5aXzGrt9jHdUMN7VjH+njO+3j/FK1q49zfj2M14+rTFbR3lLyeLV05RvP+WlRr7XPsYrWbv2NePbz3h5w/bNa2wVa9fXyKamfIspLzXy7fPW2Adeu67VXylePiXYh0mC4rHQGZIJmPJDnnPAeZchm3+ecmUKfHbh94z5XELBY0EQOz9/zS4w7EwwxAnKo+B5+L3UO8qxd0a+K5f/HZGWewXVzZqPWQyqG4VsUsmxWE28baPqUnhtHXJZZdDK/FjNpZ2SLzZbK3PLAdr0pDZStf0y+keNi0tf3dXetHZSttdIyh46sr1WSaVRFCA9eu46ehq9zcOnrXL4dLWfs1G2yN7SFllWI2yR5Z1g+NR+ynZS1m4kZZUMn672NLaTs04zOXtg77j03XvaVdhOzrqN5OyhU7PknNV7LNrJ2WYsrQXOqnBQ90zN2VZythkeTIGzKtKjTO11X8XZpoYRt6b1SSLndRZvCCTWmx86QWqtCks+UJ/gJBkxAQpA6qHUHtEdPaKWa1aXNhKPaGFpD+0RlZs2vaGlbabN2dK02V05FY9r2rzdTFu9uQqXaqHBkmVj9gwkMHsvbdF2sWiea2+yaJ7KEE+R5VgCl0AQjObQp5iMUrEI8+rCEduWihhwqDeUijAE26mmFoTd3TJv5RDVINYGk0uo3M+mkHyFgMSsX22Px/sv5GF2a+5Pc7sudTzwxBos9/C/GXtDBMJrCOiMwHsY5grTUK6GUlKUxXBMhVAWgdQSlKwLzuFdDO9m9M3YlfXu4wj5h8S0AThaXn8jjl1DJY7i4qzA8X72eMPQmWo8t8dTZmINSymg4pw0BXSo4XwDnO7J4XQFOL+BOMDRPSVs8fWEYPD7NS/0piHdDlJTNoVVC6lkCjsFfAqUdtTzBAV6DN0FUVuCqKkUUcm8toSonhPthqdsUqQWT3HpX8NTD6a742rJ5kY9lbgWhqKEa4hBMJqghGKCfBCOuP9uB0Rb6dTxPK8CjKy/FYnrSnApVq4lXFAExnAUgeTn6IngKPe6vXdkau42aY+R5bkfDxlxmd8x3TC1I1MQs+MxP4YLNhv16ejb3efiY/a4cosdkAvBIwwvgP9znOq8hsl7wFW2rDhWTd01JcoqsI4hHQXMDoLY5/2OErT4YL1NujI4lnN7dRktAZVxQDMbGKE4AotVkQf20mvCP60FRTa5d1WCIq7WSFo3nEe/PgYGlsRcOSohEK1VAqJpCEf5uPPWaMExAFIUiJN1i65KTMQlErdVeExGAUHsvTKTxbQLYxB9lJ4inUortVam3pgIOyfJipXCsXUVkuJXDk6cFeu4VUJvSLqpNz/0pjA5xfVGxnZSvBmbwvak+IHzyuQM19se28nwZuwu35Phh95xJlVVMQHSFG8ZxZuxG33feYqKMpmmZviRGL5r+vvWlJeXO20hwQ+9A3OtBst+GQp07vsb3Aj1n6BRl/u+djVaQvYWE6BzciugFdVJ1uT+2T0RtaP5fgwxWEfYjRGcw9Fz5o/zcfw0S5i+VoV43otfrlYwTBrb8VyV4Eh2QeIoQkmKBs4PPgoe0sRKU2LijoeHmCc7wNF0Rpke8z5yu20X+Xi2TubnVmrrRPSYaePvMwrEjKz30ok2b+c5Vqxh9cS4gsELX6uM5JPA5oAgaFyCy/YjiyQSp/T3kYvc5SavM612b7w237zylCKW28mNC8+TVKbv14vN139rfVP7vLL4UV0rVgt8K5rzb+H8Saqo7M35nnd8zot5SBEOZqGYWrGftyUlTNnb4ilwtgjj9QZK7jWC9037k1PBz+qLY3ixB0DJGC4mOA3SeU3rlx4cKJ5Xf547hyIUBKkxLuNqHAhXu7pGsS0RVVnamukcCVUxReppFsuz1ZowOVaLVXULgu06p8VKxyMbXL5IHl6TlC+SN3RPMafoWdU5woYATr25kmqEEifKBJAPGsDZy571ip96Vh/AWcP5CrTDENN36trcD7raZF66+/QwDh12SjCm5V5MwHRyiwPIW/wP -------------------------------------------------------------------------------- /metadata/schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nkarasiak/MuseoToolBox/1b1705925ff5165b1310c4de22a0c0fa80959b8a/metadata/schema.png -------------------------------------------------------------------------------- /museotoolbox/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # ============================================================================= 4 | # ___ ___ _____ _______ 5 | # | \/ | |_ _| | | ___ \ 6 | # | . . |_ _ ___ ___ ___ | | ___ ___ | | |_/ / _____ __ 7 | # | |\/| | | | / __|/ _ \/ _ \ | |/ _ \ / _ \| | ___ \/ _ \ \/ / 8 | # | | | | |_| \__ \ __/ (_) | | | (_) | (_) | | |_/ / (_) > < 9 | # \_| |_/\__,_|___/\___|\___/ \_/\___/ \___/|_\____/ \___/_/\_\ 10 | # 11 | # @author: Nicolas Karasiak 12 | # @site: www.karasiak.net 13 | # @git: www.github.com/nkarasiak/MuseoToolBox 14 | # ============================================================================= 15 | from . import ai, processing, cross_validation, datasets, stats 16 | 17 | __version__ = "0.13.6" 18 | -------------------------------------------------------------------------------- /museotoolbox/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # ============================================================================= 4 | # ___ ___ _____ _______ 5 | # | \/ | |_ _| | | ___ \ 6 | # | . . |_ _ ___ ___ ___ | | ___ ___ | | |_/ / _____ __ 7 | # | |\/| | | | / __|/ _ \/ _ \ | |/ _ \ / _ \| | ___ \/ _ \ \/ / 8 | # | | | | |_| \__ \ __/ (_) | | | (_) | (_) | | |_/ / (_) > < 9 | # \_| |_/\__,_|___/\___|\___/ \_/\___/ \___/|_\____/ \___/_/\_\ 10 | # 11 | # @author: Nicolas Karasiak 12 | # @site: www.karasiak.net 13 | # @git: www.github.com/nkarasiak/MuseoToolBox 14 | # ============================================================================= 15 | """ 16 | The :mod:`museotoolbox.datasets` module gathers available datasets for testing 17 | `MuseoToolBox`. 18 | """ 19 | import os 20 | 21 | __pathFile = os.path.dirname(os.path.realpath(__file__)).replace("\\","/") 22 | 23 | def load_historical_data(return_X_y=False, return_X_y_g=False, 24 | centroid=False, low_res=False): 25 | """ 26 | Get a sample of a french Historical map made by the army (carte d'état-major). 27 | These maps are used to identify forest in the 1800's. 28 | 29 | Field of the vector containning the label class is `Class`. 30 | 31 | =================== ============== 32 | Classes 5 33 | Samples total 12647 34 | Number of polygons 17 35 | Dimensionality 3 36 | Features integer 37 | =================== ============== 38 | 39 | 40 | Parameters 41 | ----------- 42 | return_X_y : boolean, optional (default=False). 43 | If True, returns ``(data, target)`` instead of a path of files. 44 | return_X_y_g : boolean, optional (default=False). 45 | If True, returns the polygon id for each feature. 46 | centroid : boolean, optional (default=False). 47 | If True, return the path of the centroid for each feature. 48 | low_res : boolean, optinal (default=False). 49 | If True returns a low resolution of the raster, so you will have also less features. 50 | 51 | Returns 52 | ------- 53 | raster,vector : list of str. 54 | Return path of raster and vector files if 55 | (data, target) : tuple if ``return_X_y`` is True 56 | (data, target, group) : tuple if ``return_X_y_g`` is True 57 | 58 | References 59 | ----------- 60 | https://github.com/nkarasiak/HistoricalMap 61 | 62 | Examples 63 | -------- 64 | >>> X, y = load_historical_data(return_X_y=True) 65 | >>> X.shape, y.shape 66 | (12647, 3) (12647,) 67 | >>> raster,vector = load_historical_data() 68 | >>> raster 69 | /mnt/bigone/lib/MuseoToolBox/museotoolbox/datasets/_historicalmap/map_compress.tif 70 | >>> vector 71 | /mnt/bigone/lib/MuseoToolBox/museotoolbox/datasets/_historicalmap/train.gpkg 72 | """ 73 | to_return = [] 74 | separator = '/' 75 | if low_res: 76 | raster = __pathFile+'{0}_historicalmap{0}map_lowres.tif'.format(separator) 77 | else: 78 | raster = __pathFile+'{0}_historicalmap{0}map_compress.tif'.format(separator) 79 | 80 | vector = __pathFile+'{0}_historicalmap{0}train.gpkg'.format(separator) 81 | 82 | if return_X_y or return_X_y_g: 83 | from ..processing import extract_ROI 84 | if centroid: 85 | vector = __pathFile+'{0}_historicalmap{0}train_centroid.gpkg'.format(separator) 86 | 87 | if return_X_y_g: 88 | X, y, g = extract_ROI(raster, vector, 'Class', 'uniquefid') 89 | to_return = (X, y, g) 90 | else: 91 | X, y = extract_ROI(raster, vector, 'Class') 92 | to_return = (X, y) 93 | else: 94 | to_return.append(raster) 95 | if centroid: 96 | vectorCentroid = __pathFile+'{0}_historicalmap{0}train_centroid.gpkg'.format(separator) 97 | to_return.append(vectorCentroid) 98 | else: 99 | to_return.append(vector) 100 | 101 | return to_return 102 | -------------------------------------------------------------------------------- /museotoolbox/datasets/_historicalmap/map_compress.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nkarasiak/MuseoToolBox/1b1705925ff5165b1310c4de22a0c0fa80959b8a/museotoolbox/datasets/_historicalmap/map_compress.tif -------------------------------------------------------------------------------- /museotoolbox/datasets/_historicalmap/map_lowres.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nkarasiak/MuseoToolBox/1b1705925ff5165b1310c4de22a0c0fa80959b8a/museotoolbox/datasets/_historicalmap/map_lowres.tif -------------------------------------------------------------------------------- /museotoolbox/datasets/_historicalmap/train.gpkg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nkarasiak/MuseoToolBox/1b1705925ff5165b1310c4de22a0c0fa80959b8a/museotoolbox/datasets/_historicalmap/train.gpkg -------------------------------------------------------------------------------- /museotoolbox/datasets/_historicalmap/train_centroid.gpkg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nkarasiak/MuseoToolBox/1b1705925ff5165b1310c4de22a0c0fa80959b8a/museotoolbox/datasets/_historicalmap/train_centroid.gpkg -------------------------------------------------------------------------------- /museotoolbox/internal_tools/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # ============================================================================= 4 | # ___ ___ _____ _______ 5 | # | \/ | |_ _| | | ___ \ 6 | # | . . |_ _ ___ ___ ___ | | ___ ___ | | |_/ / _____ __ 7 | # | |\/| | | | / __|/ _ \/ _ \ | |/ _ \ / _ \| | ___ \/ _ \ \/ / 8 | # | | | | |_| \__ \ __/ (_) | | | (_) | (_) | | |_/ / (_) > < 9 | # \_| |_/\__,_|___/\___|\___/ \_/\___/ \___/|_\____/ \___/_/\_\ 10 | # 11 | # @author: Nicolas Karasiak 12 | # @site: www.karasiak.net 13 | # @git: www.github.com/nkarasiak/MuseoToolBox 14 | # ============================================================================= 15 | 16 | 17 | def push_feedback(msg, feedback=None): 18 | # in order to convert in Qgis Processing 19 | # ============================================================================= 20 | # if feedback and feedback is not True: 21 | # if feedback == 'gui': 22 | # QgsMessageLog.logMessage(str(msg)) 23 | # else: 24 | # feedback.setProgressText(msg) 25 | # else: 26 | # ============================================================================= 27 | print(msg) 28 | 29 | 30 | class ProgressBar: 31 | 32 | def __init__(self, total, message='', length=40): 33 | """ 34 | total : int 35 | Total number of samples. 36 | message : str 37 | Custom message to show before the progress bar. 38 | length : int. 39 | Length of the bar. 40 | """ 41 | self.start = 0 42 | self.total = total 43 | self.length = length 44 | self.message = message 45 | self.lastPosition = None 46 | 47 | def add_position(self, value=False): 48 | """ 49 | Add progress to the bar. 50 | 51 | Parameters 52 | ---------- 53 | 54 | value : int or False. 55 | If false, will add one. 56 | """ 57 | 58 | if value: 59 | inPercent = int(value / self.total * 100) 60 | else: 61 | 62 | self.start += 1 63 | value = self.start 64 | inPercent = int(self.start / self.total * 100) 65 | 66 | if inPercent != self.lastPosition: 67 | self.lastPosition = inPercent 68 | self.nHash = int(self.length * (value / self.total)) 69 | self.nPoints = int(self.length - int(self.nHash)) 70 | 71 | self.printBar(inPercent) 72 | 73 | def printBar(self, value): 74 | if value == 100: 75 | end = "\n" 76 | else: 77 | end = "\r" 78 | 79 | # print(self.nHash) 80 | # print(self.nPoints) 81 | print( 82 | '\r' + 83 | self.message + 84 | ' [{}{}]{}%'.format( 85 | self.nHash * 86 | "#", 87 | self.nPoints * 88 | ".", 89 | self.lastPosition), 90 | end=end, 91 | flush=True) 92 | -------------------------------------------------------------------------------- /paper.bib: -------------------------------------------------------------------------------- 1 | @article{moran_notes_1950, 2 | title = {Notes on {Continuous} {Stochastic} {Phenomena}}, 3 | volume = {37}, 4 | issn = {0006-3444}, 5 | url = {http://www.jstor.org/stable/2332142}, 6 | doi = {10.2307/2332142}, 7 | number = {1/2}, 8 | urldate = {2017-12-08}, 9 | journal = {Biometrika}, 10 | author = {Moran, P. A. P.}, 11 | year = {1950}, 12 | pages = {17--23} 13 | } 14 | 15 | @article{scikitlearn_2011, 16 | author = {Pedregosa, Fabian and Varoquaux, Ga\"{e}l and Gramfort, Alexandre and Michel, Vincent and Thirion, Bertrand and Grisel, Olivier and Blondel, Mathieu and Prettenhofer, Peter and Weiss, Ron and Dubourg, Vincent and Vanderplas, Jake and Passos, Alexandre and Cournapeau, David and Brucher, Matthieu and Perrot, Matthieu and Duchesnay, \'{E}douard}, 17 | title = {Scikit-Learn: Machine Learning in Python}, 18 | year = {2011}, 19 | issue_date = {February 2011}, 20 | publisher = {JMLR.org}, 21 | volume = {12}, 22 | issn = {1532-4435}, 23 | journal = {Journal of Machine Learning Research}, 24 | month = nov, 25 | pages = {2825–2830}, 26 | numpages = {6} 27 | } 28 | @article{karasiak_2019, 29 | title = {Statistical stability and spatial unstability in prediction of forest tree species using satellite image time series}, 30 | journal = {Remote Sensing}, 31 | author = {Karasiak, Nicolas and Dejoux, J.-F. and Fauvel, M. and Willm, J. and Monteil, C. and Sheeren, D.}, 32 | year = {2019}, 33 | doi = {10.3390/rs11212512} 34 | } 35 | 36 | @article{roberts_2017, 37 | title = {Cross-validation strategies for data with temporal, spatial, hierarchical, or phylogenetic structure}, 38 | volume = {40}, 39 | issn = {09067590}, 40 | doi = {10.1111/ecog.02881}, 41 | language = {en}, 42 | number = {8}, 43 | urldate = {2019-12-12}, 44 | journal = {Ecography}, 45 | author = {Roberts, David R. and Bahn, Volker and Ciuti, Simone and Boyce, Mark S. and Elith, Jane and Guillera-Arroita, Gurutzeta and Hauenstein, Severin and Lahoz-Monfort, José J. and Schröder, Boris and Thuiller, Wilfried and Warton, David I. and Wintle, Brendan A. and Hartig, Florian and Dormann, Carsten F.}, 46 | month = aug, 47 | year = {2017}, 48 | pages = {913--929} 49 | } 50 | 51 | @article{olofsson_good_2014, 52 | title = {Good practices for estimating area and assessing accuracy of land change}, 53 | volume = {148}, 54 | issn = {0034-4257}, 55 | doi = {10.1016/j.rse.2014.02.015}, 56 | urldate = {2019-02-18}, 57 | journal = {Remote Sensing of Environment}, 58 | author = {Olofsson, Pontus and Foody, Giles M. and Herold, Martin and Stehman, Stephen V. and Woodcock, Curtis E. and Wulder, Michael A.}, 59 | month = may, 60 | year = {2014}, 61 | keywords = {Remote sensing, Accuracy assessment, Area estimation, Land change, Response design, Sampling design}, 62 | pages = {42--57} 63 | } 64 | -------------------------------------------------------------------------------- /paper.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'Museo ToolBox: A Python library for remote sensing including a new way to handle rasters.' 3 | 4 | tags: 5 | - Python 6 | - remote sensing 7 | - spatial cross-validation 8 | - raster 9 | - vector 10 | - autocorrelation 11 | 12 | authors: 13 | - name: Nicolas Karasiak 14 | orcid: 0000-0002-1558-0816 15 | affiliation: "1" # (Multiple affiliations must be quoted) 16 | affiliations: 17 | - name: Université de Toulouse, INRAE, UMR DYNAFOR, Castanet-Tolosan, France 18 | index: 1 19 | 20 | date: 13 December 2019 21 | 22 | bibliography: paper.bib 23 | 24 | --- 25 | 26 | # Summary 27 | 28 | `Museo ToolBox` is a Python library dedicated to the processing of georeferenced arrays, also known as rasters or images in remote sensing. 29 | 30 | In this domain, classifying land cover type is a common and sometimes complex task, regardless of your level of expertise. Recurring procedures such as extracting Regions Of Interest (ROIs, or raster values from a polygon), computing spectral indices or validating a model with a cross-validation can be difficult to implement. 31 | 32 | `Museo ToolBox` aims at simplifying the whole process by making the main treatments more accessible (extracting of ROIs, fitting a model with cross-validation, computing Normalized Difference Vegetation Index (NDVI) or various spectral indices, performing any kind of array function to the raster, etc). 33 | 34 | The main objective of this library is to facilitate the transposition of array-like functions into an image and to promote good practices in machine learning. 35 | 36 | To make `Museo ToolBox` easier to get started with, a [full documentation with lot of examples is available online on read the docs](http://museotoolbox.readthedocs.io/). 37 | 38 | # Museo ToolBox in details 39 | 40 | `Museo ToolBox` is organized into several modules (Figure 1): 41 | 42 | - [processing](https://museotoolbox.readthedocs.io/en/latest/modules/museotoolbox.processing.html): raster and vector processing. 43 | - [cross-validation](https://museotoolbox.readthedocs.io/en/latest/modules/museotoolbox.cross_validation.html): stratified cross-validation compatible with scikit-learn. 44 | - [ai](https://museotoolbox.readthedocs.io/en/latest/modules/museotoolbox.ai.html): artificial intelligence module built upon scikit-learn [@scikitlearn_2011]. 45 | - [charts](https://museotoolbox.readthedocs.io/en/latest/modules/museotoolbox.charts.html): plot confusion matrix with F1 score or producer/user's accuracy. 46 | - [stats](https://museotoolbox.readthedocs.io/en/latest/modules/museotoolbox.stats.html): compute statistics (such as Moran's Index [@moran_notes_1950], confusion matrix, commision/omission) or extracting truth and predicted label from a confusion matrix. 47 | 48 | ![Museo ToolBox schema.](metadata/schema.png) 49 | 50 | The main usages of `Museo ToolBox` are: 51 | 52 | 1. [Reading and writing a raster block per block using your own function](https://museotoolbox.readthedocs.io/en/latest/modules/processing/museotoolbox.processing.RasterMath.html). 53 | 2. [Generating cross-validation, including spatial cross-validation](https://museotoolbox.readthedocs.io/en/latest/auto_examples/index.html#cross-validation). 54 | 3. [Fitting models with scikit-learn, extracting accuracy from each cross-validation fold, and predicting raster](https://museotoolbox.readthedocs.io/en/latest/modules/ai/museotoolbox.ai.SuperLearner.html). 55 | 4. [Plotting confusion matrix and adding f1 score or producer/user accuracy](https://museotoolbox.readthedocs.io/en/latest/modules/charts/museotoolbox.charts.PlotConfusionMatrix.html#museotoolbox.charts.PlotConfusionMatrix). 56 | 5. [Getting the y_true and and y_predicted labels from a confusion matrix](https://museotoolbox.readthedocs.io/en/latest/modules/stats/museotoolbox.stats.retrieve_y_from_confusion_matrix.html). 57 | 58 | ## RasterMath 59 | 60 | Available in `museotoolbox.processing`, the `RasterMath` class is the keystone of ``Museo ToolBox``. 61 | 62 | The question I asked myself is: How can we make it as easy as possible to implement array-like functions on images? The idea behind ``RasterMath`` is that if the function is intended to operate with an array, it should be easy to use it with your raster using as few lines as possible. 63 | 64 | So, what does ``RasterMath`` really do? The user only works with an array and confirms with a sample that the process is doing well, and lets `RasterMath` generalize it to the whole image. The user doesn't need to manage the raster reading and writing process, the no-data management, the compression, the number of bands, or the projection. Figure 2 describes how `RasterMath` reads a raster, performs the function, and writes it to a new raster. 65 | 66 | The objective is to **allow the user to focus solely on the array-compatible function** while ``RasterMath`` manages the raster part. 67 | 68 | [See ``RasterMath`` documentation and examples](https://museotoolbox.readthedocs.io/en/latest/modules/processing/museotoolbox.processing.RasterMath.html). 69 | 70 | ![``RasterMath`` under the hood](metadata/RasterMath_schema.png) 71 | 72 | ## Artificial Intelligence 73 | 74 | The artificial intelligence (`ai`) module is natively built to implement ``scikit-learn`` algorithms and uses state of the art methods (such as standardizing the input data). ``SuperLearner`` class optimizes the fit process using a grid search to fix the parameters of the classifier. There is also a Sequential Feature Selection protocol which supports a number of components (e.g. a single-date image is composed of four bands, i.e. four features, so a user may select four features at once). 75 | 76 | [See the ``SuperLearner`` documentation and examples](https://museotoolbox.readthedocs.io/en/latest/modules/ai/museotoolbox.ai.SuperLearner.html). 77 | 78 | ## Cross-validation 79 | 80 | ``Museo ToolBox`` implements stratified cross-validation, which means the separation between the training and the validation samples is made by respecting the size per class. 81 | For example the Leave-One-Out method will keep one sample of validation per class. As stated by @olofsson_good_2014 *"stratified random sampling is a practical design that satisfies the 82 | basic accuracy assessment objectives and most of the desirable design 83 | criteria"*. For spatial cross-validation, see @karasiak_2019 inspired by @roberts_2017. 84 | 85 | ``Museo ToolBox`` offers two different kinds of cross-validation: 86 | 87 | ### Non-spatial cross-validation 88 | 89 | - Leave-One-Out. 90 | - Leave-One-SubGroup-Out. 91 | - Leave-P-SubGroup-Out (Percentage of subgroup per class). 92 | - Random Stratified K-Fold. 93 | 94 | ### Spatial cross-validation 95 | 96 | - Spatial Leave-One-Out [@karasiak_2019]. 97 | - Spatial Leave-Aside-Out. 98 | - Spatial Leave-One-SubGroup-Out (using centroids to select one subgroup and remove other subgroups for the same class inside a specified distance buffer). 99 | 100 | [See the cross-validation documentation and examples](https://museotoolbox.readthedocs.io/en/latest/auto_examples/index.html#cross-validation). 101 | 102 | # Acknowledgements 103 | 104 | I acknowledge contributions from [Mathieu Fauvel](http://fauvel.mathieu.free.fr/), beta-testers (hey [Yousra Hamrouni](https://github.com/yousraH)), and my thesis advisors: Jean-François Dejoux, Claude Monteil and [David Sheeren](https://dsheeren.github.io/). Many thanks to Marie for proofreading. 105 | Many thanks to Sigma students: [Hélène Ternisien de Boiville](https://github.com/HTDBD), [Arthur Duflos](https://github.com/ArthurDfs), [Sam Antonetti](https://github.com/santonetti) and [Anne-Sophie Tronc](https://github.com/AnneSophieTronc) for their involvement in ``RasterMath`` improvements in early 2020. 106 | 107 | # References 108 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | # dependencies to install for Museo ToolBox developpers 2 | 3 | # Museo ToolBox end-user depencies 4 | # same as requirements.txt 5 | numpy>=1.10 6 | scipy>=1.0 7 | scikit-learn>=0.16 8 | matplotlib 9 | joblib 10 | psutil 11 | 12 | # Development specific requirements 13 | m2r 14 | autopep8 15 | recommonmark 16 | sphinx 17 | sphinx-rtd-theme 18 | numpydoc 19 | ipykernel 20 | nbsphinx 21 | sphinx-gallery 22 | sphinx-copybutton 23 | pytest 24 | pytest-cov 25 | 26 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # dependencies for end-user 2 | # Does not include GDAL, so you need to install it before 3 | numpy>=1.10 4 | scipy>=1.0 5 | scikit-learn>=0.16 6 | matplotlib 7 | joblib 8 | psutil 9 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # ============================================================================= 4 | # ___ ___ _____ _______ 5 | # | \/ | |_ _| | | ___ \ 6 | # | . . |_ _ ___ ___ ___ | | ___ ___ | | |_/ / _____ __ 7 | # | |\/| | | | / __|/ _ \/ _ \ | |/ _ \ / _ \| | ___ \/ _ \ \/ / 8 | # | | | | |_| \__ \ __/ (_) | | | (_) | (_) | | |_/ / (_) > < 9 | # \_| |_/\__,_|___/\___|\___/ \_/\___/ \___/|_\____/ \___/_/\_\ 10 | # 11 | # @author: Nicolas Karasiak 12 | # @site: www.karasiak.net 13 | # @git: www.github.com/nkarasiak/MuseoToolBox 14 | # ============================================================================= 15 | import re 16 | 17 | __version__ = re.search( 18 | r'__version__\s*=\s*[\'"]([^\'"]*)[\'"]', # It excludes inline comment too 19 | open('museotoolbox/__init__.py').read()).group(1) 20 | 21 | import setuptools 22 | 23 | with open('README.md', 'r') as fh: 24 | long_description = fh.read() 25 | 26 | 27 | 28 | setuptools.setup( 29 | name='museotoolbox', 30 | version=__version__, 31 | description='Raster and vector tools for Remote Sensing and Classification, built upon gdal library', 32 | long_description=long_description, 33 | long_description_content_type="text/markdown", 34 | url='https://github.com/nkarasiak/MuseoToolBox', 35 | author='Nicolas Karasiak', 36 | author_email='karasiak.nicolas@gmail.com', 37 | license='GPLv3', 38 | install_requires=['numpy>=1.10', 39 | 'scipy>=1.0', 40 | 'scikit-learn>=0.16', 41 | 'matplotlib', 42 | 'joblib', 43 | 'psutil'], 44 | packages=setuptools.find_packages(), 45 | classifiers=[ 46 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 47 | "Topic :: Scientific/Engineering :: GIS", 48 | "Programming Language :: Python :: 3", 49 | "Intended Audience :: Science/Research"], 50 | zip_safe=False, 51 | package_data={ 52 | 'museotoolbox': ['datasets/_historicalmap/map_compress.tif','datasets/_historicalmap/map_lowres.tif','datasets/_historicalmap/train.gpkg','datasets/_historicalmap/train_centroid.gpkg'] 53 | } 54 | ) 55 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | # init 2 | -------------------------------------------------------------------------------- /test/test_ai.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import unittest 3 | 4 | import numpy as np 5 | from museotoolbox import ai 6 | from museotoolbox.datasets import load_historical_data 7 | from museotoolbox.processing import image_mask_from_vector 8 | from osgeo import gdal 9 | 10 | import os 11 | import tempfile 12 | tempdir = tempfile.mkdtemp() 13 | import shutil 14 | 15 | from sklearn.ensemble import RandomForestClassifier 16 | 17 | raster,vector = load_historical_data(low_res=True) 18 | X,y,g = load_historical_data(return_X_y_g=True,low_res=True) 19 | param_grid = dict(n_estimators=[1,10]) 20 | classifier = RandomForestClassifier() 21 | image_mask_from_vector(vector,raster,os.path.join(tempdir,'mask.tif')) 22 | 23 | class TestStats(unittest.TestCase): 24 | def test_superLearner(self): 25 | 26 | n_cv = 2 27 | for tf in [True,False]: 28 | verbose = tf+1 29 | model = ai.SuperLearner(classifier,param_grid=param_grid,n_jobs=1,verbose=verbose) 30 | model.fit(X,y,group=g,standardize=tf,cv=n_cv) 31 | assert(model.predict_array(X).shape == y.shape) 32 | len(model.CV) == n_cv 33 | assert(np.all(model.group == g)) 34 | 35 | model.predict_image(raster,os.path.join(tempdir,'class.tif'),confidence_per_class=os.path.join(tempdir,'confclass.tif'),higher_confidence=os.path.join(tempdir,'higherconf.tif')) 36 | assert(model._is_standardized == tf) 37 | 38 | # test masked return if X is totally masked 39 | X_masked = np.ma.copy(X) 40 | X_masked.mask=True 41 | X_masked_return = model._convert_array(X_masked) 42 | assert(np.ma.is_masked(X_masked_return)) 43 | 44 | 45 | 46 | def test_superLearn_pred(self): 47 | model = ai.SuperLearner(classifier,param_grid=param_grid,n_jobs=1,verbose=0) 48 | model.customize_array(np.mean,axis=1) 49 | model.fit(X,y,group=g,standardize=True,cv=2) 50 | # # 51 | assert(model._array_is_customized == True) 52 | assert(model._array_is_customized) 53 | assert(model.xFunction) 54 | model.predict_image(raster,out_image=os.path.join(tempdir,'class.tif'),in_image_mask=os.path.join(tempdir,'mask.tif'),confidence_per_class=os.path.join(tempdir,'confclass.tif'),higher_confidence=os.path.join(tempdir,'higherconf.tif')) 55 | assert(gdal.Open(os.path.join(tempdir,'class.tif')).RasterCount == 1) 56 | assert(gdal.Open(os.path.join(tempdir,'higherconf.tif')).RasterCount == 1) 57 | assert(gdal.Open(os.path.join(tempdir,'confclass.tif')).RasterCount == len(np.unique(y))) 58 | cms = model.get_stats_from_cv() 59 | 60 | assert(len(cms) == 2) 61 | model.save_cm_from_cv(tempdir,prefix='coco',header=False) 62 | 63 | model.save_model(os.path.join(tempdir,'model')) 64 | assert(os.path.exists(os.path.join(tempdir,'model.npz'))) 65 | model.load_model(os.path.join(tempdir,'model')) 66 | 67 | 68 | with self.assertRaises(ValueError): 69 | model.fit(X,y,cv=False) 70 | 71 | def test_sequential(self): 72 | 73 | sfs = ai.SequentialFeatureSelection(classifier,param_grid) 74 | sfs.fit(X,y,cv=2) 75 | sfs.predict(X,idx=0) 76 | assert(not np.all(sfs.predict(X,idx=0) == sfs.predict(X,idx=1))) 77 | sfs.predict_best_combination(raster,os.path.join(tempdir,'class')) 78 | sfs.predict_images(raster,os.path.join(tempdir,'class')) 79 | assert(sfs.get_best_model()) 80 | assert(sfs.transform(X,idx='best').shape[1] == sfs.best_idx_+1) 81 | 82 | n_comp = 2 83 | max_features = 2 84 | sfs = ai.SequentialFeatureSelection(classifier,param_grid,n_comp=n_comp,path_to_save_models='/tmp/sfs_models/',verbose=1) 85 | def double_columns(x): 86 | return np.hstack((x,x)) 87 | sfs.customize_array(double_columns) 88 | sfs.fit(X,y,max_features=max_features,standardize=True,cv=2) 89 | sfs.fit(X,y,max_features=max_features,standardize=True,cv=2) # to reload from path 90 | assert(sfs.transform(X,idx=1).shape[1] == 2*n_comp) 91 | assert(sfs.transform(X,idx=0).shape[1] == n_comp) 92 | assert(sfs.X.shape[1] == X.shape[1]*2) 93 | assert(len(sfs.best_features_) == 2) 94 | sfs.predict_images(raster,tempdir) 95 | sfs.predict_best_combination(raster,os.path.join(tempdir,'best.tif')) 96 | assert(sfs.get_best_model().X.shape[1] == n_comp*(sfs.best_idx_+1) ) 97 | sfs.predict(X,0) 98 | 99 | if __name__ == "__main__": 100 | unittest.main() 101 | shutil.rmtree(tempdir) -------------------------------------------------------------------------------- /test/test_charts.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import unittest 3 | 4 | import os 5 | import tempfile 6 | import numpy as np 7 | from museotoolbox import charts 8 | confusion_matrix = np.random.randint(5,20,[5,5]) 9 | confusion_matrix[-1,-1] = 0 10 | confusion_matrix[-1,:] = 0 11 | 12 | tmp_dir = tempfile.mkdtemp() 13 | 14 | class TestCharts(unittest.TestCase): 15 | def test_Plot(self): 16 | for hide_ticks in [True,False]: 17 | pcm = charts.PlotConfusionMatrix(confusion_matrix) 18 | pcm.color_diagonal('RdYlBu') 19 | pcm.add_text() 20 | pcm.add_x_labels([1,2,3,4,5],rotation=59+hide_ticks,position='bottom') 21 | pcm.add_y_labels(['one','two','three','four','five']) 22 | pcm.add_mean('mean','mean',hide_ticks=True) 23 | 24 | def test_f1(self): 25 | pcm = charts.PlotConfusionMatrix(confusion_matrix,left=0.12,right=.9) 26 | pcm.add_text() 27 | pcm.add_x_labels([1,2,3,4,5],position='top',rotation=90) 28 | pcm.add_f1() 29 | 30 | pcm.add_y_labels(['one','two','three','four','five']) 31 | pcm.save_to(os.path.join(tmp_dir,'test.pdf')) 32 | os.remove(os.path.join(tmp_dir,'test.pdf')) 33 | 34 | def test_f1_nonsquarematrix(self): 35 | pcm = charts.PlotConfusionMatrix(confusion_matrix[:,:-2]) 36 | 37 | self.assertRaises(Warning,pcm.add_f1) 38 | self.assertRaises(Warning,pcm.color_diagonal) 39 | self.assertRaises(Warning,pcm.add_accuracy) 40 | 41 | 42 | def test_accuracy(self): 43 | for rotation in [45,90]: 44 | pcm = charts.PlotConfusionMatrix(confusion_matrix,left=0.12,right=.9,cmap='PuRd_r') 45 | pcm.add_text(thresold=35) 46 | pcm.add_x_labels([1,2,3,4,5],position='top',rotation=90) 47 | pcm.add_y_labels(['one','two','three','four','five']) 48 | pcm.add_accuracy() 49 | 50 | if __name__ == "__main__": 51 | unittest.main() 52 | -------------------------------------------------------------------------------- /test/test_cross_validation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Nov 24 12:03:31 2019 5 | 6 | @author: nicolas 7 | """ 8 | # -*- coding: utf-8 -*- 9 | import unittest 10 | import os 11 | import numpy as np 12 | 13 | from museotoolbox.datasets import load_historical_data 14 | from museotoolbox import cross_validation 15 | from museotoolbox import processing 16 | 17 | raster,vector = load_historical_data() 18 | X,y,g = load_historical_data(return_X_y_g=True) 19 | distance_matrix = processing.get_distance_matrix(raster,vector) 20 | n_class = len(np.unique(y,return_counts=True)[1]) 21 | smallest_class = np.min(np.unique(y,return_counts=True)[1]) 22 | 23 | 24 | 25 | class TestCV(unittest.TestCase): 26 | def test_train_split(self): 27 | np.random.seed(42) 28 | y = np.random.randint(1,3,10).reshape(-1,1) 29 | X = np.random.randint(1,255,[10,3],dtype=np.uint8) 30 | g = np.random.randint(1,3,10).reshape(-1,1) 31 | 32 | cv = cross_validation.LeaveOneOut(random_state=42) 33 | 34 | X_train, X_test, y_train, y_test = cross_validation.train_test_split(cv,X,y) 35 | assert ( X_train.shape[0]+X_test.shape[0] == X.shape[0] ) 36 | assert ( y_train.shape[0]+y_test.shape[0] == y.shape[0] ) 37 | assert ( np.all( np.equal(y_test,np.array([1,2]) ) ) ) 38 | 39 | X_train, X_test, y_train, y_test, g_train, g_test = cross_validation.train_test_split(cv,X,y,groups=g) 40 | assert (X_train.shape[0] == y_train.shape[0] == g_train.shape[0]) 41 | assert (X_test.shape[0] == y_test.shape[0] == g_test.shape[0]) 42 | 43 | def test_loo(self): 44 | for split in [False,1,2,5]: 45 | 46 | cv = cross_validation.LeaveOneOut(n_repeats=split,random_state=split,verbose=split) 47 | if split == False: 48 | assert(cv.get_n_splits(X,y)==np.min(np.unique(y,return_counts=True)[-1])) 49 | else: 50 | assert(cv.get_n_splits(X,y)==split) 51 | assert(cv.verbose == split) 52 | 53 | for tr,vl in cv.split(X,y): 54 | assert(tr.size == y.size-5) 55 | assert(vl.size == 5) 56 | assert(len(vl) == 5) 57 | 58 | 59 | def test_kfold(self): 60 | cv = cross_validation.RandomStratifiedKFold(valid_size=1/50) 61 | 62 | self.assertRaises(ValueError,cv.get_n_splits,X,y) 63 | 64 | for split in [1,2,5]: 65 | cv = cross_validation.RandomStratifiedKFold(n_splits=1+split,n_repeats=split,verbose=split) 66 | assert(cv.get_n_splits(X,y)==split*split+split) 67 | assert(cv.verbose == split) 68 | 69 | for idx,[tr,vl] in enumerate(cv.split(X,y)): 70 | assert(int(tr.size/vl.size) == split) 71 | assert(np.unique(y[vl],return_counts=True)[0].size == 5) 72 | 73 | assert(idx+1 == split*split+split) 74 | 75 | def test_LeavePSubGroupOut(self): 76 | 77 | cv = cross_validation.LeavePSubGroupOut(verbose=2) 78 | for tr,vl in cv.split(X,y,g): 79 | assert(not np.unique(np.in1d([1,2],[3,4]))[0]) 80 | 81 | self.assertRaises(ValueError,cross_validation.LeavePSubGroupOut,valid_size='ko') 82 | self.assertRaises(ValueError,cross_validation.LeavePSubGroupOut,valid_size=5.1) 83 | 84 | def test_LeaveOneSubGroupOut(self): 85 | cv = cross_validation.LeaveOneSubGroupOut(verbose=2) 86 | # if only one subgroup 87 | tempG = np.copy(g) 88 | tempG[np.where(y==5)] = 1 89 | self.assertRaises(Exception,cv.get_n_splits,X,y,tempG) 90 | 91 | # if all is ok 92 | cv = cross_validation.LeaveOneSubGroupOut(verbose=2) 93 | y_vl = np.array([]) 94 | for tr,vl in cv.split(X,y,g): 95 | y_vl = np.concatenate((y_vl,vl)) 96 | assert(not np.unique(np.in1d([1,2],[3,4]))[0]) 97 | assert(np.all(np.unique(np.asarray(y_vl),return_counts=True)[1]==1)) 98 | 99 | list_files =cv.save_to_vector(vector,'Class',group='uniquefid',out_vector='/tmp/cv_g.gpkg') 100 | 101 | assert(len(list_files)==cv.get_n_splits(X,y,g)) 102 | 103 | def test_SLOO(self): 104 | 105 | assert(distance_matrix.shape[0] == y.size) 106 | 107 | cv = cross_validation.SpatialLeaveOneOut(distance_thresold=100, 108 | distance_matrix=distance_matrix, 109 | random_state=12,verbose=1) 110 | 111 | 112 | processing.sample_extraction(raster,vector,out_vector='/tmp/pixels.gpkg',verbose=False) 113 | y_ = processing.read_vector_values('/tmp/pixels.gpkg','Class') 114 | y_polygons = processing.read_vector_values(vector,'Class') 115 | assert(y_.size == y.size) 116 | assert(y_polygons.size != y_.size) 117 | 118 | list_files=cv.save_to_vector('/tmp/pixels.gpkg','Class',out_vector='/tmp/cv.gpkg') 119 | assert(len(list_files[0]) == 2) 120 | for l in list_files: 121 | for f in l: 122 | os.remove(f) 123 | os.remove('/tmp/pixels.gpkg') 124 | # to keep same size of training by a random selection 125 | 126 | 127 | as_loo = cross_validation._sample_selection._cv_manager(cross_validation._sample_selection.distanceCV, 128 | distance_thresold=100, 129 | distance_matrix=distance_matrix, 130 | random_state=12, 131 | LOO_same_size=True, 132 | valid_size=1) 133 | y_vl = [] 134 | y_asloo_vl = [] 135 | for sloo_cv,as_loo_cv in zip(cv.split(X,y),as_loo.split(X,y)): 136 | y_vl.append(sloo_cv[1]) 137 | y_asloo_vl.append(as_loo_cv[1]) 138 | assert(n_class == len(y[sloo_cv[1]])) 139 | assert(sloo_cv[0].size == as_loo_cv[0].size) # same size between loo and sloo 140 | assert(np.all(sloo_cv[1] == as_loo_cv[1])) # using same valid pixel 141 | 142 | assert(np.all(np.unique(np.asarray(y_vl),return_counts=True)[1]==1)) 143 | assert(np.all(np.unique(np.asarray(y_asloo_vl),return_counts=True)[1]==1)) 144 | 145 | as_loo = cross_validation._sample_selection._cv_manager(cross_validation._sample_selection.distanceCV, 146 | distance_thresold=300, 147 | distance_matrix=distance_matrix, 148 | random_state=12, 149 | LOO_same_size=True,valid_size=2,n_repeats=1,n_splits=5,verbose=1) 150 | for tr,vl in as_loo.split(X,y): 151 | assert(vl.size == n_class) 152 | 153 | 154 | as_loo = cross_validation._sample_selection._cv_manager(cross_validation._sample_selection.distanceCV, 155 | distance_thresold=100, 156 | distance_matrix=distance_matrix, 157 | random_state=12, 158 | LOO_same_size=True,valid_size=False,n_repeats=1,n_splits=5,verbose=1) 159 | as_loo.get_n_splits(X,y) 160 | # distance too high 161 | cv = cross_validation.SpatialLeaveOneOut(distance_thresold=10000,distance_matrix=distance_matrix,verbose=2) 162 | 163 | self.assertRaises(ValueError,cv.get_n_splits,X,y) 164 | 165 | 166 | def test_aside(self): 167 | 168 | SLOPO = cross_validation.SpatialLeaveAsideOut(valid_size=1/3, 169 | distance_matrix=distance_matrix,random_state=2,verbose=2) 170 | 171 | assert(SLOPO.get_n_splits(X,y) == int(1/(1/3))) 172 | 173 | for tr,vl in SLOPO.split(X,y): 174 | assert(np.unique(y[vl]).size == 5) 175 | assert(np.unique(y[tr]).size == 5) 176 | 177 | 178 | def test_slosgo(self) : 179 | 180 | cv = cross_validation.SpatialLeaveOneSubGroupOut(distance_thresold=100,distance_matrix=distance_matrix,distance_label=g,verbose=2) 181 | 182 | y_vl = np.array([]) 183 | for tr,vl in cv.split(X,y,g) : 184 | print(np.unique(g[vl])) 185 | assert(n_class==np.unique(g[vl]).size) 186 | assert(np.all(np.unique(np.asarray(y_vl),return_counts=True)[1]==1)) 187 | 188 | processing.sample_extraction(raster,vector,out_vector='/tmp/pixels.gpkg',verbose=False) 189 | test_extensions = ['wrong','shp','gpkg'] 190 | for extension in test_extensions: 191 | if extension == 'wrong': 192 | 193 | self.assertRaises(Exception,cv.save_to_vector,'/tmp/pixels.gpkg','Class',out_vector='/tmp/SLOSGO.'+extension) 194 | else: 195 | list_files = cv.save_to_vector('/tmp/pixels.gpkg','Class',out_vector='/tmp/SLOSGO.'+extension) 196 | # test overwriting of previous files 197 | list_files = cv.save_to_vector('/tmp/pixels.gpkg','Class',out_vector='/tmp/SLOSGO.'+extension) 198 | for tr,vl in list_files: 199 | assert(len(list_files[0]) == 2) 200 | for l in list_files: 201 | for f in l: 202 | print(f) 203 | if os.path.exists(f): 204 | os.remove(f) 205 | 206 | 207 | 208 | def test_compare_loo_kf(self): 209 | cv_loo = cross_validation.LeaveOneOut(random_state=12,verbose=2) 210 | cv_kf_as_loo = cross_validation.RandomStratifiedKFold(n_splits=False,valid_size=1,random_state=12,verbose=2) 211 | for trvl_loo,trvl_kf in zip(cv_loo.split(X,y),cv_kf_as_loo.split(X,y)): 212 | assert(np.all(trvl_loo[0]==trvl_kf[0])) 213 | assert(np.all(trvl_loo[1]==trvl_kf[1])) 214 | assert(len(trvl_kf[1]) == n_class) 215 | assert(np.unique(y[trvl_kf[1]]).size == n_class) 216 | 217 | #to print extensions 218 | cv_loo.get_supported_extensions() 219 | 220 | 221 | 222 | if __name__ == "__main__": 223 | unittest.main() -------------------------------------------------------------------------------- /test/test_processing.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import unittest 3 | from shutil import copyfile 4 | import numpy as np 5 | from museotoolbox import processing 6 | from museotoolbox.datasets import load_historical_data 7 | from osgeo import gdal, osr 8 | import os 9 | 10 | raster,vector = load_historical_data() 11 | rM = processing.RasterMath(raster) 12 | mask = processing.image_mask_from_vector(vector,raster,'/tmp/mask.tif') 13 | 14 | 15 | def create_false_image(array,path): 16 | # from https://pcjericks.github.io/py-gdalogr-cookbook/raster_layers.html 17 | driver = gdal.GetDriverByName('GTiff') 18 | outRaster = driver.Create(path, array.shape[1], array.shape[0], 1, gdal.GDT_Byte) 19 | outRaster.SetGeoTransform((0, 10, 0, 0, 0, 10)) 20 | outband = outRaster.GetRasterBand(1) 21 | outband.WriteArray(array) 22 | outRasterSRS = osr.SpatialReference() 23 | outRasterSRS.ImportFromEPSG(4326) 24 | outRaster.SetProjection(outRasterSRS.ExportToWkt()) 25 | outband.FlushCache() 26 | 27 | # create autocorrelated tif 28 | x = np.zeros((100,100),dtype=int) 29 | # max autocorr 30 | x[:50,:] = 1 31 | x[50:,:] = 2 32 | 33 | x_mask = np.random.randint(0,2,[100,100]) # random mask 34 | create_false_image(x,'/tmp/100x100size.tif') 35 | 36 | 37 | class TestRaster(unittest.TestCase): 38 | def test_convert_datatype(self): 39 | 40 | self._assert_np_gdt(np.dtype('uint8').name,gdal.GDT_Byte) 41 | self._assert_np_gdt(np.dtype('int16').name,gdal.GDT_Int16) 42 | self._assert_np_gdt(np.dtype('uint16').name,gdal.GDT_UInt16) 43 | self._assert_np_gdt(np.dtype('int32').name,gdal.GDT_Int32) 44 | self._assert_np_gdt(np.dtype('uint32').name,gdal.GDT_UInt32) 45 | 46 | self._assert_np_gdt(np.dtype('int64').name,gdal.GDT_Int32) 47 | self._assert_np_gdt(np.dtype('uint64').name,gdal.GDT_Int32) 48 | 49 | self._assert_np_gdt(np.dtype('uint16').name,gdal.GDT_UInt16) 50 | self._assert_np_gdt(np.dtype('float32').name,gdal.GDT_Float32) 51 | self._assert_np_gdt(np.dtype('float64').name,gdal.GDT_Float64) 52 | 53 | self._assert_np_gdt(gdal.GDT_Byte,np.uint8) 54 | self._assert_np_gdt(gdal.GDT_Int16,np.int16) 55 | self._assert_np_gdt(gdal.GDT_UInt16,np.uint16) 56 | self._assert_np_gdt(gdal.GDT_Float64,np.float64) 57 | self._assert_np_gdt(gdal.GDT_Float32,np.float32) 58 | 59 | self._assert_np_gdt(np.dtype('float128').name,gdal.GDT_Float64) 60 | assert(processing.convert_dt(gdal.GDT_Int16,to_otb_dt=True) == 'int16') 61 | assert(processing.convert_dt(np.dtype('float64').name,to_otb_dt=True) == 'double') 62 | 63 | assert(processing._convert_gdal_to_otb_dt(18) == 'cdouble') # if unknow, put cdouble (highest type) 64 | 65 | def _assert_np_gdt(self,in_conv,out_dt): 66 | assert(processing.convert_dt(in_conv)==out_dt) 67 | 68 | 69 | def test_gdt_minmax_values(self): 70 | assert(gdal.GDT_UInt16 == processing.get_gdt_from_minmax_values(500)) 71 | assert(gdal.GDT_UInt32 == processing.get_gdt_from_minmax_values(max_value=155500)) 72 | assert(gdal.GDT_Int32 == processing.get_gdt_from_minmax_values(max_value=0,min_value=-75500)) 73 | assert(gdal.GDT_Int16 == processing.get_gdt_from_minmax_values(max_value=1,min_value=-5)) 74 | assert(gdal.GDT_Float32 == processing.get_gdt_from_minmax_values(max_value=2,min_value=-55.55)) 75 | assert(gdal.GDT_Byte == processing.get_gdt_from_minmax_values(max_value=222)) 76 | assert(gdal.GDT_Float64 == processing.get_gdt_from_minmax_values(max_value =888E+40)) 77 | assert(gdal.GDT_Float64 == processing.get_gdt_from_minmax_values(max_value=5,min_value = -888E+40)) 78 | 79 | def test_rasterize(self): 80 | for invert in [True,False]: 81 | for field in ['class',False]: 82 | mem = processing.rasterize(raster,vector,field,out_image='MEM',invert=invert) 83 | assert(mem.RasterCount == 1) 84 | assert(mem.RasterXSize == rM.n_columns) 85 | assert(mem.RasterYSize == rM.n_lines) 86 | 87 | def test_noImg(self) : 88 | 89 | self.assertRaises(ReferenceError,processing.RasterMath,'None',verbose=0) 90 | 91 | def test_dimension(self) : 92 | assert(rM.n_bands == gdal.Open(raster).RasterCount) 93 | assert(rM.n_lines == gdal.Open(raster).RasterYSize) 94 | assert(rM.n_columns == gdal.Open(raster).RasterXSize) 95 | 96 | 97 | def test_readPerBand(self): 98 | for is_3d in [True,False]: 99 | rM_band = processing.RasterMath(raster,return_3d=is_3d,in_image_mask=mask) 100 | for idx,band in enumerate(rM_band.read_band_per_band()): 101 | print(band.ndim) 102 | if is_3d is True: 103 | assert(band.ndim == 2) 104 | else: 105 | assert(band.ndim == 2) 106 | del rM_band 107 | 108 | def test_3d(self) : 109 | rM_3d = processing.RasterMath(raster,return_3d=True) 110 | self.assertRaises(ValueError,rM_3d.get_block,100) 111 | assert(rM_3d.get_random_block().ndim == 3) 112 | for block in rM.read_block_per_block(): 113 | pass 114 | for band in rM.read_band_per_band(): 115 | pass 116 | rM.custom_block_size(128,256) 117 | assert(rM.y_block_size==256) 118 | assert(rM.x_block_size==128) 119 | 120 | rM.custom_block_size(-1,-1) 121 | assert(rM.x_block_size == rM.n_columns) 122 | assert(rM.y_block_size == rM.n_lines) 123 | rM.custom_block_size(1/2,1/3) 124 | assert(rM.x_block_size == np.ceil(1/2*rM.n_columns)) 125 | assert(rM.y_block_size == np.ceil(1/3*rM.n_lines)) 126 | 127 | rM.add_image(raster) 128 | self.assertRaises(ValueError,rM.add_image,'/tmp/100x100size.tif') 129 | return_x = lambda x : x[0].astype(np.int16) 130 | rM.add_function(return_x,'/tmp/test_double.tif') 131 | rM.run() 132 | os.remove('/tmp/test_double.tif') 133 | assert(np.all(rM.get_random_block(random_state=12))== np.all(rM.get_random_block(random_state=12))) 134 | 135 | 136 | def test_mask(self) : 137 | for is_3d in [True, False]: 138 | mask = '/tmp/mask.tif' 139 | processing.image_mask_from_vector(vector,raster,mask) 140 | mask_src = gdal.Open(mask) 141 | raster_src = gdal.Open(raster) 142 | mask_proj =osr.SpatialReference(wkt=mask_src.GetProjection()) 143 | raster_proj = osr.SpatialReference(wkt=raster_src.GetProjection()) 144 | assert(raster_proj.GetAttrValue('projcs') == mask_proj.GetAttrValue('projcs')) 145 | assert(mask_src.RasterCount == 1) 146 | assert(mask_src.RasterXSize == raster_src.RasterXSize) 147 | assert(mask_src.RasterYSize == raster_src.RasterYSize) 148 | rM_band = processing.RasterMath(raster,return_3d=is_3d) 149 | for idx,band in enumerate(rM_band.read_band_per_band()): 150 | pass 151 | rM_band.add_function(np.mean,axis=is_3d+1,out_image='/tmp/mean.tif') 152 | rM_band.run() 153 | 154 | self.assertRaises(MemoryError,rM_band.run,'1K') 155 | 156 | assert(idx+1 == rM_band.n_bands) 157 | x = rM_band.get_random_block() 158 | assert(x.ndim == is_3d+2) 159 | os.remove('/tmp/mean.tif') 160 | 161 | 162 | def test_XYextraction(self): 163 | X = processing.extract_ROI(raster,vector,prefer_memory=False) 164 | 165 | 166 | self.assertRaises(ValueError,processing.extract_ROI,raster,vector,'Type') 167 | self.assertRaises(Exception,processing.extract_ROI,raster,vector,'no_field') 168 | 169 | assert(X.ndim == 2) 170 | 171 | X,y = processing.extract_ROI(raster,vector,'Class') 172 | assert(X.shape[0] == y.shape[0]) 173 | 174 | X,y,g = processing.extract_ROI(raster,vector,'Class','uniquefid') 175 | assert(X.shape[0] == y.shape[0] == g.shape[0]) 176 | 177 | self.assertRaises(ValueError,processing.extract_ROI,'wrong/path','wrong/path/too') 178 | assert(processing.extract_ROI(raster,vector).shape[1] == gdal.Open(raster).RasterCount) 179 | self.assertRaises(ValueError,processing.extract_ROI,raster,vector,'kodidk') 180 | 181 | 182 | 183 | def test_raster_math_mean(self): 184 | for is_3d in [True,False]: 185 | rM = processing.RasterMath(raster,return_3d = is_3d,verbose=is_3d,in_image_mask=mask,n_jobs=is_3d+1) 186 | if is_3d is False: 187 | # test without compression with reading/writing pixel per pixel, very slow... 188 | rM.custom_block_size(10,10) # to have full masked block 189 | rM.add_function(np.mean,'/tmp/mean.tif',axis=is_3d+1,dtype=np.int16) 190 | rM.run() 191 | else: 192 | # test using default block size and high compressio of raster 193 | rM.add_function(np.mean,'/tmp/mean.tif',axis=is_3d+1,dtype=np.int16,compress='high') 194 | rM.run() 195 | assert(gdal.Open('/tmp/mean.tif').RasterCount == 1) 196 | assert(gdal.Open('/tmp/mean.tif').RasterXSize == rM.n_columns) 197 | assert(gdal.Open('/tmp/mean.tif').RasterYSize == rM.n_lines) 198 | 199 | os.remove('/tmp/mean.tif') 200 | 201 | def test_unknow_fields(self): 202 | self.assertRaises(ValueError,processing.extract_ROI,raster,vector,'wrong_field') 203 | self.assertRaises(ValueError,processing.read_vector_values,vector) 204 | self.assertRaises(Exception,processing.read_vector_values,'wrong_path') 205 | self.assertRaises(ValueError,processing.read_vector_values,vector,'wrong_field') 206 | self.assertRaises(ValueError,processing.read_vector_values,vector,band_prefix='wrong_field') 207 | self.assertRaises(ReferenceError,processing.RasterMath,raster,in_image_mask='kiki') 208 | 209 | def test_addfid(self): 210 | copyfile(vector,'/tmp/test.gpkg') 211 | for tf in [True,False]: 212 | processing._add_vector_unique_fid('/tmp/test.gpkg',unique_field='to_create',verbose=tf) 213 | processing.sample_extraction(raster,'/tmp/test.gpkg','/tmp/test_roi.gpkg',band_prefix='band',verbose=1) 214 | self.assertRaises(Warning,processing.sample_extraction,raster,'/tmp/test.gpkg','/test/vector.ppkg') 215 | os.remove('/tmp/test.gpkg') 216 | 217 | y_ = processing.read_vector_values('/tmp/test_roi.gpkg',band_prefix='band',verbose=1) 218 | assert(y_.shape[1] == gdal.Open(raster).RasterCount) 219 | os.remove('/tmp/test_roi.gpkg') 220 | 221 | def test_centroid(self): 222 | 223 | Xc,yc = load_historical_data(centroid=True,return_X_y=True) 224 | Xc_file, yc_file= load_historical_data(centroid=True) 225 | assert(os.path.exists(Xc_file)) 226 | assert(os.path.exists(yc_file)) 227 | assert(Xc.shape[0] == processing.read_vector_values(vector,'Type').shape[0]) 228 | 229 | def test_extract_position(self): 230 | X,pixel_position=processing.extract_ROI(raster,vector,get_pixel_position=True,prefer_memory=False) 231 | assert(pixel_position.shape[0] == X.shape[0]) 232 | 233 | def test_get_parameter(self): 234 | rM = processing.RasterMath(raster) 235 | assert(isinstance(rM.get_raster_parameters(),dict)) 236 | rM.custom_raster_parameters(['TILED=NO']) 237 | assert(rM.get_raster_parameters() == ['TILED=NO']) 238 | 239 | def test_get_distance_matrix(self): 240 | distance_matrix,label = processing.get_distance_matrix(raster,vector,'Class') 241 | assert(label.size== distance_matrix.shape[0]) 242 | 243 | if __name__ == "__main__": 244 | unittest.main() 245 | -------------------------------------------------------------------------------- /test/test_stats.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import unittest 3 | 4 | import numpy as np 5 | from museotoolbox import datasets 6 | from museotoolbox import stats 7 | from osgeo import gdal,osr 8 | from sklearn.metrics import accuracy_score,cohen_kappa_score 9 | from shutil import copyfile 10 | ### 11 | confusion_matrix = np.array([[5,1],[2,2]]) 12 | # real 13 | yt_init= [1,1,1,1,1,1,2,2,2,2] 14 | yp_init = [1,1,1,1,1,2,1,1,2,2] 15 | 16 | def create_false_image(array,path): 17 | # from https://pcjericks.github.io/py-gdalogr-cookbook/raster_layers.html 18 | driver = gdal.GetDriverByName('GTiff') 19 | outRaster = driver.Create(path, array.shape[1], array.shape[0], 1, gdal.GDT_Byte) 20 | outRaster.SetGeoTransform((0, 10, 0, 0, 0, 10)) 21 | outband = outRaster.GetRasterBand(1) 22 | outband.WriteArray(array) 23 | outRasterSRS = osr.SpatialReference() 24 | outRasterSRS.ImportFromEPSG(4326) 25 | outRaster.SetProjection(outRasterSRS.ExportToWkt()) 26 | outband.FlushCache() 27 | 28 | # create autocorrelated tif 29 | x = np.zeros((100,100),dtype=int) 30 | # max autocorr 31 | x[:50,:] = 1 32 | x[50:,:] = 2 33 | 34 | x_mask = np.random.randint(0,2,[100,100]) # random mask 35 | create_false_image(x,'/tmp/autocorrelated_moran.tif') 36 | create_false_image(x_mask,'/tmp/mask_moran.tif') 37 | 38 | ### 39 | class TestStats(unittest.TestCase): 40 | 41 | def test_Moran_param(self): 42 | m = stats.Moran('/tmp/autocorrelated_moran.tif',lag=[1,2]) 43 | assert(m.get_n_neighbors(x[:3,:3],x[:3,:3],weights=x[:3,:3]) == 8) 44 | m.lags == [1,2] 45 | assert(len(m.scores['I']) == len(m.lags)) 46 | 47 | def test_Moran(self): 48 | self.assertRaises(ReferenceError,stats.Moran,in_image='N/A') 49 | 50 | # full autocorrelation 51 | moran = stats.Moran('/tmp/autocorrelated_moran.tif',lag=1) 52 | assert(np.round(moran.I,2) >= 0.95) 53 | 54 | #perfect random 55 | moran = stats.Moran('/tmp/mask_moran.tif',lag=1) 56 | assert(0 >= np.abs(np.round(moran.I,1))) 57 | 58 | # with mask 59 | moran_intermediate = stats.Moran('/tmp/autocorrelated_moran.tif',in_image_mask='/tmp/mask_moran.tif',lag=[1,2]) 60 | assert(moran_intermediate.scores['lag'] == [1,2]) 61 | assert(moran_intermediate.scores['I'][0] != moran.I) 62 | 63 | def test_Moran_fullMask(self): # full autocorrelation 64 | moran = stats.Moran('/tmp/mask_moran.tif',in_image_mask='/tmp/mask_moran.tif',lag=1) 65 | assert(np.isnan(moran.I)) 66 | 67 | def test_comm_om(self): 68 | comm_om = stats.commission_omission(confusion_matrix) 69 | 70 | assert(comm_om[0] == [confusion_matrix[0,1]/np.sum(confusion_matrix[0,:])*100,confusion_matrix[1,0]/np.sum(confusion_matrix[1,:])*100]) 71 | assert(comm_om[1] == [confusion_matrix[1,0]/np.sum(confusion_matrix[:,0])*100,confusion_matrix[0,1]/np.sum(confusion_matrix[:,1])*100]) 72 | 73 | sts = stats.ComputeConfusionMatrix(yt_init,yp_init,OA=True, kappa= True,F1=True) 74 | assert(np.all(confusion_matrix == sts.confusion_matrix)) 75 | assert(len(sts.F1) == 2) 76 | 77 | def test_stats_from_cm(self): 78 | 79 | yt,yp = stats.retrieve_y_from_confusion_matrix(confusion_matrix) 80 | assert(accuracy_score(yp,yt) == (np.sum(np.diag(confusion_matrix))/np.sum(confusion_matrix))) 81 | assert(np.all(yp==yp_init)) 82 | assert(np.all(yt==yt_init)) 83 | sts_from_matrix = stats.ComputeConfusionMatrix(yp,yt,OA=True,kappa=True) 84 | assert(sts_from_matrix.Kappa == cohen_kappa_score(yp,yt)) 85 | 86 | def test_zonal_stats(self): 87 | raster,vector = datasets.load_historical_data() 88 | copyfile(vector,'/tmp/train.gpkg') 89 | 90 | median,amax,std = stats.zonal_stats(raster,'/tmp/train.gpkg',False) 91 | assert(median.shape == amax.shape == std.shape) 92 | assert(np.sum(std)