├── .environment.yml
├── .gitignore
├── .readthedocs.yml
├── .travis.yml
├── CHANGELOG.md
├── CONTRIBUTING.rst
├── Makefile
├── README.md
├── build.sh
├── codecov.yml
├── docs
    ├── Makefile
    ├── make.bat
    └── source
    │   ├── CHANGELOG.rst
    │   ├── CONTRIBUTING.rst
    │   ├── Makefile.txt
    │   ├── README.rst
    │   ├── _static
    │       └── style.css
    │   ├── _templates
    │       ├── class.rst
    │       ├── function.rst
    │       └── module.rst
    │   ├── api.rst
    │   ├── conf.py
    │   └── index.rst
├── examples
    ├── README.txt
    ├── ai
    │   ├── README.txt
    │   ├── SFFS.py
    │   ├── learnWithCustomRaster.py
    │   ├── learnWithRFandCompareCV.py
    │   └── learnWithRFandRS50.py
    ├── charts
    │   ├── README.txt
    │   ├── plotConfusion.py
    │   ├── plotConfusionAcc.py
    │   └── plotConfusionF1.py
    ├── cross_validation
    │   ├── LeaveOneOutPerClass.py
    │   ├── LeaveOneSubGroupOut.py
    │   ├── LeaveOneSubGroupOutAndSaveVector.py
    │   ├── LeavePSubGroupOut.py
    │   ├── README.txt
    │   ├── RandomSampling50.py
    │   ├── SpatialLeaveAsideOut.py
    │   ├── SpatialLeaveOnePixelOut.py
    │   ├── SpatialLeaveOneSubGroupOut.py
    │   ├── __drawCVmethods.py
    │   └── train_test_split.py
    ├── processing
    │   ├── README.txt
    │   ├── copyRasterInVectorFields.py
    │   ├── extractRasterValues.py
    │   ├── modalClass.py
    │   ├── rasterMask.py
    │   ├── rasterMath.py
    │   ├── rasterMathCustomBlock.py
    │   ├── rasterMathCustomBlockAndMask.py
    │   ├── rasterMath_testBlockSize_3d_andNBands.py
    │   ├── rasterMath_twoRasters.py
    │   └── readVectorFields.py
    └── stats
    │   ├── MoransI.py
    │   ├── README.txt
    │   ├── qualityIndexFromConfusionMatrix.py
    │   └── zonal_stats.py
├── getVersion.py
├── license.txt
├── metadata
    ├── RasterMath_schema.png
    ├── __docs.py
    ├── __init__.py
    ├── museoToolBox_logo.svg
    ├── museoToolBox_logo_1024.png
    ├── museoToolBox_logo_128.png
    ├── schema.drawio
    └── schema.png
├── museotoolbox
    ├── __init__.py
    ├── ai
    │   └── __init__.py
    ├── charts
    │   └── __init__.py
    ├── cross_validation
    │   ├── __init__.py
    │   └── _sample_selection.py
    ├── datasets
    │   ├── __init__.py
    │   └── _historicalmap
    │   │   ├── map_compress.tif
    │   │   ├── map_lowres.tif
    │   │   ├── train.gpkg
    │   │   └── train_centroid.gpkg
    ├── internal_tools
    │   └── __init__.py
    ├── processing
    │   └── __init__.py
    └── stats
    │   └── __init__.py
├── paper.bib
├── paper.md
├── requirements-dev.txt
├── requirements.txt
├── setup.py
└── test
    ├── __init__.py
    ├── test_ai.py
    ├── test_charts.py
    ├── test_cross_validation.py
    ├── test_processing.py
    └── test_stats.py


/.environment.yml:
--------------------------------------------------------------------------------
 1 | # Conda environment for developpers
 2 | 
 3 | name: museotoolbox-dev
 4 | 
 5 | dependencies:
 6 |         - python=3.7
 7 |         - pytest
 8 |         - gdal
 9 |         - pip 
10 |         - pip:
11 |           - -r file:requirements-dev.txt
12 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # ignore datasets to avoid modification
 2 | museotoolbox/datasets/_historicalmap/*
 3 | 
 4 | # Files to ignore
 5 | .coverage.*
 6 | .coverage
 7 | *.pyc
 8 | *.py~
 9 | *_filtered*
10 | *.swp
11 | __pycache__
12 | *.spyproject/*
13 | 
14 | # nb
15 | *.pickle
16 | *.py.md5
17 | 
18 | # MTB
19 | MuseoToolBox.egg-info/
20 | cv_*.sqlite
21 | 
22 | # sphinx-gallery temp files
23 | 
24 | # Sphinx documentation
25 | docs/source/_build/
26 | docs/build/
27 | docs/source/modules
28 | docs/source/gen_modules
29 | docs/source/_autosummary
30 | docs/source/_static
31 | docs/source/auto_examples/**.pickle
32 | docs/source/auto_examples/
33 | docs/source/auto_examples/**.md5
34 | docs/modules/
35 | 
36 | # Distribution / packaging
37 | .Python
38 | env/
39 | build/
40 | develop-eggs/
41 | dist/
42 | downloads/
43 | eggs/
44 | .eggs/
45 | lib/
46 | lib64/
47 | parts/
48 | sdist/
49 | var/
50 | *.egg-info/
51 | .installed.cfg
52 | *.egg
53 | 
54 | # PyInstaller
55 | #  Usually these files are written by a python script from a template
56 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
57 | *.manifest
58 | *.spec
59 | 
60 | # Installer logs
61 | pip-log.txt
62 | pip-delete-this-directory.txt
63 | 
64 | # IPython Notebook
65 | .ipynb_checkpoints
66 | 
67 | # pyenv
68 | .python-version
69 | 
70 | # celery beat schedule file
71 | celerybeat-schedule
72 | 
73 | # dotenv
74 | .env
75 | 
76 | # virtualenv
77 | venv/
78 | ENV/
79 | 
80 | # Spyder project settings
81 | .spyderproject
82 | 
83 | # Rope project settings
84 | .ropeproject
85 | 
86 | # Mac stuff
87 | .DS_Store
88 | 
89 | # coverage output folder
90 | cov_html/
91 | 
92 | *.gpkg-shm
93 | 
94 | *.gpkg-wal
95 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | version: 2
 6 | 
 7 | build:
 8 |     image: latest
 9 |     
10 | python:
11 |     version: 3.7
12 |     system_packages: true
13 |     install:
14 |       - requirements: requirements-dev.txt
15 |       - method: setuptools
16 |         path: .
17 |      
18 | conda:
19 |     environment: .environment.yml
20 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist: bionic
 2 | language: python
 3 | os : linux
 4 | python:
 5 |   - "3.6"
 6 |   - "3.7"
 7 |   - "3.8"
 8 | 
 9 | addons:
10 |    apt:
11 |      packages:
12 |      - libgdal-dev
13 | 
14 | before_install:
15 |   - sudo add-apt-repository --yes ppa:ubuntugis/ppa
16 |   - sudo apt-get --quiet update
17 |   - sudo apt-get install --yes libgdal-dev gdal-bin
18 | install:
19 |   - pip install gdal==`gdal-config --version`
20 |   - pip install -r requirements.txt
21 |   - pip install .
22 |   - pip install codecov
23 |   - pip install pytest-cov pytest
24 | 
25 | script:
26 | - python setup.py develop
27 | - pytest -v  --cov=museotoolbox
28 | 
29 | deploy:
30 |   provider: pypi
31 |   user: "karasiak"
32 |   password:
33 |     secure: 7NeKw+T+tO7LWvwjgj1agHIo8zQlA33bkg9x6pQ4zXd24ZJ4XDhqnGSxG9NKr9pP9l9PwDrPZIIW50JeBWula+JXQxtRj80LNJB8ySJVn3BEdLojxUMthVvOtdUqzqFME5iFpP6SEdUrODMHXJa18tCP5QN2XHUWEixmmqrbFg8JYgg6nswVmmCHpXor5h9LW1V0bJ1uOThmpjhXUhKx7xM6lcXJssC91Ghmkh1SApNuK3Ah8ir92HK53VSdQScK8QAHIpvWGbTLhbt7C4IuwDFERgDIbOec6L9UxOePMXmIF/5wu+mlQeVzFV5DiFBJ0U+fDEqg1PTKifM9KQmOtBX8iFVyiZmwqM6wRJjt83C5j4VKDj8vs8Y9dGkmOobRNlE3HdEekB69mj5177oCpOSZtgntkbR/9wfEK7j4qnj/mALaKx9mYKdI9nRepEnnocp6R2aGvZD0BxSwRWxjgubfolsG3mBHBRhjog5pVOcC4de3aPwZKxWTls2AIMP47lxocKnpHWTzKAtuPEo4w/Y2IaFXbk0EQleC6BLHAWi7DbAYct7/6Ju7VOdm80ko0To4PQ+9fzpSZndcAxf1cEys0OaVcQPJ9XjxfvzHUz1OXqQKzZe5tfAxFS6q6rbMV4J9zNRP+xX8rOhmB4HajBmBfIEcz/Avt4R+pl5lIeo=
34 |   on:
35 |     tags: true
36 |     branch: master
37 |   skip_existing: true
38 | 
39 | after_success:
40 |  - codecov 
41 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | All notable changes to this project will be documented in this file.
  4 | 
  5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
  6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
  7 | 
  8 | ## [0.13.6] - 2020-07-19
  9 | 
 10 | ### Added
 11 | 
 12 | - RasterMath get_image_as_array() now supports mask.
 13 | 
 14 | ### Fixed
 15 | 
 16 | - Update groups management for sklearn>=0.25
 17 | - Fixed bug with raster/vector datasets (mtb.datasets.load_historical_data())
 18 | - Fixed bug with get_image_as_array() from RasterMath (completely rewrite this part)
 19 | 
 20 | ## [0.13.5] - 2020-06-24
 21 | 
 22 | ### Fixed
 23 | 
 24 | - Requirements is not directly written inside setup.py due to bugs.
 25 | 
 26 | ## [0.13.4] - 2020-06-24
 27 | 
 28 | ### Fixed
 29 |  
 30 | - Fix bug in setup.py using requirements.txt instead of ./requirements.txt
 31 | 
 32 | ## [0.13.3] - 2020-06-23
 33 | 
 34 | ### Fixed
 35 | 
 36 | - Adding psutil to depency
 37 | 
 38 | ## [0.13.2] - 2020-06-18
 39 | 
 40 | ### Added
 41 | 
 42 | - get_image_as_array function for RasterMath
 43 | 
 44 | ### Fixed
 45 | 
 46 | - train_test_split supports now groups=None
 47 | 
 48 | ## [0.13.1] - 2020-06-11
 49 | 
 50 | ### Added
 51 | 
 52 | - Support list for cross-validation in order to give an unready unfolded cv.
 53 | 
 54 | New features provided by @marclang for the charts module :
 55 | 
 56 | - Allows to display both F1 and accuracy or mean metrics
 57 | - Allows to display accuracy after have been displaying mean (and vice versa)
 58 | - Allows to display float matrix
 59 | 
 60 | ### Changed
 61 | 
 62 | - Fix path separator to access tutorial dataset
 63 | 
 64 | ## [0.13.0] - 2020-04-21
 65 | 
 66 | ### Changed
 67 | 
 68 | - Final version for JOSS (paper.md and paper.bib updated thanks to @kbarnhart)
 69 | 
 70 | ## [0.12.1-rc.1] - 2020-04-18
 71 | 
 72 | ### Added
 73 | 
 74 | - RasterMath use available memory to speed up process and manage now several cores (n_jobs)
 75 | - train_test_split in cross_validation module
 76 | 
 77 | ### Changed
 78 | 
 79 | - Enhance mask management for RasterMath
 80 | - Move FlushCache to optimize RasterMath
 81 | - RasterMath get_random_block returns only block which are not totally unmasked
 82 | - charts.PlotConfusionMatrix has a default argument (zero_is_min=True)
 83 | 
 84 | ## [0.12.1-beta.2] - 2020-02-10
 85 | 
 86 | ### Fixed
 87 | 
 88 | - Fix bug when in RasterMath when input is only one band
 89 | - Fix bug in RasterMath with mask and list
 90 | 
 91 | ### Added
 92 | 
 93 | - n_jobs for RasterMath (thanks to Helene @HTDBD and Arthur @ArthurDfs, two great students)
 94 | - function write_block and generally a most intuitive way to use RasterMath (with the help of @HTDBD and @ArthurDfs)
 95 | 
 96 | ## [0.12.1-beta.1] - 2020-01-16
 97 | 
 98 | ### Added
 99 | - new branch spatial added
100 | 
101 | ### Added
102 | 
103 | - Added this line
104 | 
105 | ### Changed
106 | 
107 | - SequentialFeatureSelection parameters order Changed. *scoring* is now before *standardize*.
108 | - Update doc for load_historical_data()
109 | 
110 | ### Fixed
111 | 
112 | - Fix bug in get_block() and get_random_block() which returned the same block each time due to new method.
113 | - Fix bug with nodata in RasterMath when output is of float type
114 | 
115 | ## [0.12] - 2019-12-13
116 | 
117 | ### Changed
118 | 
119 | - RasterMath made a lot of improvements using block reading and writing. For example, the default block size is now 256x256 (you can keep the default block size by choosing block_size=False), and Museo ToolBox automatic detect if the geotiff will be tiled or not (it depends on the block size).
120 | - Some folders have Changed name :
121 | 	- raster_tools and vector_tools to processing
122 | 	- learn_tools to ai
123 | - some functions have Changed name :
124 | 	- getSamplesFromROI to extract_values
125 | 	- historicalMap to load_historical_data
126 | 	- getDistanceMatrix to get_distance_matrix
127 | - classes now always begin with a capital case :
128 |     - learnAndPredict to SuperLearner
129 |     - rasterMath to RasterMath
130 |     - sequentialFeatureSelection to SequentialFeatureSelection
131 | 
132 | ### Fixed
133 | 
134 | - bug #7 : getSamplesFromROI (nowd extract_ROI) now extracts ROI values using by default memory. If it fails, it will create a temporary raster on disk then delete it when finished.
135 | 
136 | ### Removed
137 | 
138 | - Remove command lines (cli)
139 | 
140 | ## [0.12rc5] - 2019-11-11
141 | 
142 | ### Changed
143 | 
144 | - getSamplesFromROI return list of available fields if wrong field given.
145 | - rasterMath convert np.nan value to nodata value (if numpy >= 1.17)
146 | 
147 | ## [0.12rc4] - 2019-11-01
148 | 
149 | ### Changed
150 | 
151 | - Minor fix when using learnAndPredict with an outside customized function
152 | - Better management fo cross-validation in learnAndPredict
153 | - Fix minor bug using False or None value with cv in learnAndPredict
154 | 
155 | ### Added
156 | 
157 | - Add an option to use SFS without writing each best model on the disk.
158 | 
159 | ## [0.12rc3] - 2019-10-29
160 | 
161 | ### Added
162 | 
163 | - Move some functions from vector_tools to raster_tools, functions are anyway still available from vector_tools
164 | 
165 | ### Changed
166 | 
167 | - learnAndPredict manages int value for cross-validation by using RandomStratifiedKFold
168 | - Enhance blocksize management for rasterMath
169 | - Move command line code in _cli folder
170 | 
171 | ## [0.12rc2] - 2019-10-14
172 | 
173 | ### Changed
174 | 
175 | - Improvements of rasterMath
176 | 	- customBlockSize defines now the same block size for window reading and for the output
177 | 	- add seed parameter (to set a random generator) in getRandomBlock()
178 | 	- add getRasterParameters() and customRasterParameters() function.
179 | 
180 | ## [0.12rc1] - 2019-10-12
181 | 
182 | ### Changed
183 | 
184 | - update rasterMath to generate by default a 256*256 raster block size.
185 | - update rasterMath to prevent bug if user has osgeo/gdal version is lower than 2.1.
186 | - prevent bug when in rasterMath if processor has only 1 core.
187 | 
188 | ### Fixed
189 | - minor fixes
190 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
  1 | =====================================
  2 | How to contribute to this package
  3 | =====================================
  4 | 
  5 | This document describes how to edit the package, run the tests, build the docs, put tagged versions on PyPI_, etc.
  6 | 
  7 | Editing the project
  8 | ---------------------
  9 | 
 10 | Developpment environnement
 11 | +++++++++++++++++++++++++++
 12 | 
 13 | We're using conda environment to install all dependencies needed for Museo ToolBox developpement.
 14 | 
 15 | -  ``conda env create -f .environment.yml``
 16 | 
 17 | This will create an environnement called ``museotoolbox-dev``, to activate it, just type :
 18 | 
 19 | -  ``conda active museotoolbox-dev``
 20 | 
 21 | 
 22 | Package structure
 23 | ++++++++++++++++++
 24 | - Package code is in `museotoolbox <museotoolbox>`_ folder.
 25 | - Docs can be generated by typing ``Make doc`` at the root of folder.
 26 | - Unit tests are in `test <test>`_ folder.
 27 | 
 28 | Modify the code via pull requests
 29 | +++++++++++++++++++++++++++++++++++
 30 | To make changes to the code, you should make a branch or fork, make your changes, and then submit a pull request.
 31 | If you aren't sure about pull requests:
 32 | 
 33 |  - A general description of pull requests: https://help.github.com/en/articles/about-pull-requests
 34 | 
 35 |  - How to create a pull request: https://help.github.com/en/articles/creating-a-pull-request
 36 | 
 37 |  - How to structure your pull requests (one conceptually distinct change per pull request): https://medium.com/@fagnerbrack/one-pull-request-one-concern-e84a27dfe9f1
 38 | 
 39 | Tests and documentation
 40 | +++++++++++++++++++++++
 41 | You should document your code clearly with `numpy style documentation`_.
 42 | You should add tests.
 43 | For simple things, these can be `doctests <https://docs.python.org/3/library/doctest.html>`_ in the code.
 44 | For more elaborate functionality, put unit tests in test.
 45 | 
 46 | Versions and CHANGELOG
 47 | ++++++++++++++++++++++
 48 | The version is `single sourced <https://packaging.python.org/guides/single-sourcing-package-version/>`_ in `__init__.py`_.
 49 | When modifying a tagged version (e.g., ``0.1.0``), indicate you are working on a development version by adding a ``dev`` (e.g., ``0.1.dev1``).
 50 | See `here <https://www.python.org/dev/peps/pep-0440/>`_ for more information on version numbers.
 51 | 
 52 | Conceptual descriptions of changes should also be tracked in the CHANGELOG_.
 53 | 
 54 | Adding dependencies
 55 | +++++++++++++++++++++
 56 | When you add code that uses a new package that is not in the standard python library, you should add it to the dependencies specified under the ``install_requires`` option in `setup.py <setup.py>`_.
 57 | `See here <https://packaging.python.org/discussions/install-requires-vs-requirements/>`_ for information on how to do this, and how to specify minimal required versions.
 58 | As described in the above link, you should **not** pin exact versions in ``install_requires`` in `setup.py <setup.py>`_ unless absolutely necessary.
 59 | 
 60 | 
 61 | Testing
 62 | ---------
 63 | 
 64 | Adding tests
 65 | ++++++++++++++
 66 | As you add new codes, you should create tests to make sure it is working correctly.
 67 | These can include:
 68 | 
 69 |   - doctests in the code
 70 | 
 71 |   - unit tests in the `./test/ <test>`_ subdirectory
 72 | 
 73 | Running the tests locally
 74 | ++++++++++++++++++++++++++
 75 | After you make changes, you should run two sets of tests.
 76 | To run the tests, go to the top-level package directory.
 77 | 
 78 | Then run the tests with pytest_ under the ``museotoolbox-dev`` conda environnement by running :
 79 | 
 80 | -  ``make pytest``
 81 | 
 82 | 
 83 | Building the documentation
 84 | +++++++++++++++++++++++++++
 85 | The documentation will be built for each new commit in the ``master`` branch.
 86 | However, you can build your own documentation in order by typing under the ``museotoolbox-dev`` conda environnement :
 87 | 
 88 | -  ``make ddoc``
 89 | 
 90 | 
 91 | Automated testing on Travis
 92 | +++++++++++++++++++++++++++
 93 | The aforementioned  pytest_ tests will be run automatically by the Travis_ continuous integration system as specified in the `.travis.yml <.travis.yml>`_ file.
 94 | Note that running the Travis_ tests requires you to register the project with Travis_.
 95 | 
 96 | If the tests are passing, you will see this on the Travis_ badge on GitHub repo main page.
 97 | 
 98 | 
 99 | Tagging versions and putting on PyPI
100 | -------------------------------------
101 | When you have a new stable release, you will want to tag it and put it on PyPI_ where it can be installed with pip_.
102 | First, make sure the version number is up-to-date in `__init__.py`_ and the CHANGELOG_.
103 | Then commit the code to GitHub if you haven't already done so.
104 | Next tag the version, as in::
105 | 
106 |     git tag -a 0.1.0 -m 'version 0.1.0'
107 | 
108 | and then push the tag to GitHub with::
109 | 
110 |     git push --tags
111 | 
112 | Finally, with Travis_, specify your pypi account in `.travis.yml <.travis.yml>`_ file to publish a new version on pypi when you publish a stable release on github.
113 | 
114 | Note that this requires you to have registered the package on PyPI_ if this is the first version of the package there.
115 | 
116 | .. _pytest: https://docs.pytest.org
117 | .. _Travis: https://docs.travis-ci.com
118 | .. _PyPI: https://pypi.org/
119 | .. _pip: https://pip.pypa.io
120 | .. _sphinx: https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html
121 | .. _test: test
122 | .. _docs: docs
123 | .. _notebooks: notebooks
124 | .. _`Jupyter notebooks`: https://jupyter.org/
125 | .. _`__init__.py`: museotoolbox/__init__.py
126 | .. _CHANGELOG: CHANGELOG.md
127 | .. _`numpy style documentation`: https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_numpy.html
128 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | PYTHON=python3
 4 | branch := $(shell git symbolic-ref --short -q HEAD)
 5 | 
 6 | help :
 7 | 	@echo "The following make targets are available:"
 8 | 	@echo "    help - print this message"
 9 | 	@echo "    build - build python package"
10 | 	@echo "    install - install python package (local user)"
11 | 	@echo "    sinstall - install python package (system with sudo)"
12 | 	@echo "    remove - remove the package (local user)"
13 | 	@echo "    sremove - remove the package (system with sudo)"
14 | 	@echo "    clean - remove any temporary files"
15 | 	@echo "    notebook - launch ipython3 notebook"
16 | build :
17 | 	$(PYTHON) setup.py sdist bdist_wheel
18 | buildext :
19 | 	$(PYTHON) setup.py build_ext --inplace
20 | 
21 | install :
22 | 	$(PYTHON) setup.py install --user
23 | 
24 | sinstall :
25 | 	sudo $(PYTHON) setup.py install
26 | 
27 | remove :
28 | 	$(PYTHON) setup.py install --user --record files.txt
29 | 	tr '\n' '\0' < files.txt | xargs -0 rm -f --
30 | 	rm files.txt
31 | 
32 | sremove :
33 | 	$(PYTHON) setup.py install  --record files.txt
34 | 	tr '\n' '\0' < files.txt | sudo xargs -0 rm -f --
35 | 	rm files.txt
36 | 
37 | clean : FORCE
38 | 	$(PYTHON) setup.py clean
39 | 
40 | uploadpypi :
41 | 	#python setup.py register
42 | 	$(PYTHON) setup.py sdist
43 | 	twine upload dist/*
44 | 
45 | doc :
46 | 	m2r README.md CHANGELOG.md
47 | 	mv README.rst CHANGELOG.rst docs/source/
48 | 	cd docs/ && make html
49 | 
50 | doc_full :
51 | 	m2r README.md CHANGELOG.md
52 | 	mv README.rst CHANGELOG.rst docs/source/
53 | 	rm -rf docs/source/auto_examples/
54 | 	rm -rf docs/source/modules/
55 | 	rm -rf docs/build/
56 | 	cd docs/ && make html
57 | 
58 | autopep8 :
59 | 	autopep8 -ir museotoolbox --jobs -1
60 | 
61 | aautopep8 :
62 | 	autopep8 -air -a -a -a -a  museotoolbox --jobs -1
63 | 
64 | pytest :
65 | 	pytest-3 -v  --cov=museotoolbox
66 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ![Museo ToolBox logo](https://github.com/nkarasiak/MuseoToolBox/raw/master/metadata/museoToolBox_logo_128.png)
 2 | 
 3 | [![Build status](https://api.travis-ci.org/nkarasiak/MuseoToolBox.svg?branch=master)](https://travis-ci.org/nkarasiak/MuseoToolBox)
 4 | [![Documentation status](https://readthedocs.org/projects/museotoolbox/badge/?version=latest)](https://museotoolbox.readthedocs.io/en/latest/?badge=latest)
 5 | [![codecov](https://codecov.io/gh/nkarasiak/MuseoToolBox/branch/master/graph/badge.svg)](https://codecov.io/gh/nkarasiak/MuseoToolBox)
 6 | [![PyPI version](https://badge.fury.io/py/museotoolbox.svg)](https://badge.fury.io/py/museotoolbox)
 7 | [![Conda version](https://camo.githubusercontent.com/074cca1cb04798ef7b05419795c800130e47273b/68747470733a2f2f696d672e736869656c64732e696f2f636f6e64612f766e2f636f6e64612d666f7267652f6d7573656f746f6f6c626f782e737667)](https://anaconda.org/conda-forge/museotoolbox)
 8 | [![Downloads](https://pepy.tech/badge/museotoolbox)](https://pepy.tech/project/museotoolbox)
 9 | [![status](https://joss.theoj.org/papers/1f4762d9910093a08034e8f4de441930/status.svg)](https://joss.theoj.org/papers/1f4762d9910093a08034e8f4de441930)
10 | 
11 | **Museo ToolBox** is a python library to simplify the use of raster/vector, especially for machine learning and remote sensing. It is now easy to extract raster values from vector polygons and to do some spatial/unspatial cross-validation for scikit-learn from raster.
12 | 
13 | One of the most meaningful contribution is, in my humble opinion, the [RasterMath](https://museotoolbox.readthedocs.io/en/latest/modules/processing/museotoolbox.processing.RasterMath.html) class and the [spatial cross-validation](https://museotoolbox.readthedocs.io/en/latest/modules/museotoolbox.cross_validation.html#module-museotoolbox.cross_validation).
14 | 
15 | ## What's the point ?
16 | 
17 | Today, the main usages of Museo ToolBox are :
18 | -  [museotoolbox.cross_validation](https://museotoolbox.readthedocs.io/en/latest/modules/museotoolbox.cross_validation.html#module-museotoolbox.cross_validation)
19 |     - Create validation/training sets from vector, and cross-validation compatible with Scikit-Learn GridSearchCV. The aim is here to **promote the spatial cross-validation** in order to better estimate a model (with a lower spatial auto-correlation overestimation).
20 | - [museotoolbox.processing](https://museotoolbox.readthedocs.io/en/latest/modules/museotoolbox.processing.html)
21 |   - [RasterMath](https://museotoolbox.readthedocs.io/en/latest/modules/processing/museotoolbox.processing.RasterMath.html), allows you to apply any of your array-compatible function on your raster and save it. Just load RasterMath, then it will return you the value for each pixel (in all bands) and now you can do whatever you want : predicting a model, smooth signal (whittaker, double logistic...), compute modal value... RasterMath reads and writes a raster block per block to avoid loading the full image in memory. It is compatible with every python function (including numpy) as the first and only argument RasterMath needs on your function is an array.
22 |   - Extract bands values from vector ROI (polygons/points) (function : [extract_ROI](https://museotoolbox.readthedocs.io/en/latest/modules/processing/museotoolbox.processing.extract_ROI.html#museotoolbox.processing.extract_ROI))  
23 | - AI based on Scikit-Learn. [SuperLearner](https://museotoolbox.readthedocs.io/en/latest/modules/ai/museotoolbox.ai.SuperLearner.html#museotoolbox.ai.SuperLearner) simplifies the use of cross-validation by extracting each accuracy (kappa,F1,OA, and above all confusion matrix) from each fold. It also eases the way to predict a raster (just give the raster name and the model).
24 | 
25 | ## That seems cool, but is there some help to use this ?
26 | 
27 | I imagined Museo ToolBox as a tool to simplify raster processing and to promote spatial cross-validation, so of course there is some help : [a complete documentation with a lot of examples is available on readthedocs](https://museotoolbox.readthedocs.org/).
28 | 
29 | ## How do I install Museo ToolBox ?
30 | 
31 | We recommend you to install Museo ToolBox via conda as it includes gdal dependency :
32 | 
33 | ```shell
34 | conda install -c conda-forge museotoolbox
35 | ```
36 | 
37 | However, if you prefer to install this library via pip, you need to install first gdal, then :
38 | 
39 | ```shell
40 | python3 -m pip install museotoolbox --user
41 | ```
42 | 
43 | For early-adopters, you can install the latest development version directly from git :
44 | 
45 | ```shell
46 | python3 -m pip install https://github.com/nkarasiak/museotoolbox/archive/develop.zip --user -U
47 | ```
48 | 
49 | Feel free to remove the `--user` if you like to install the library for every user on the machine or if some dependencies need root access. `-U` is for update if a newer version exists.
50 | 
51 | ### Using and citing the toolbox
52 | 
53 | If you use Museo ToolBox in your research and find it useful, please cite this library using the following bibtex reference:
54 | 
55 | ```bib
56 | @article{Karasiak2020,
57 |   doi = {10.21105/joss.01978},
58 |   url = {https://doi.org/10.21105/joss.01978},
59 |   year = {2020},
60 |   publisher = {The Open Journal},
61 |   volume = {5},
62 |   number = {48},
63 |   pages = {1978},
64 |   author = {Nicolas Karasiak},
65 |   title = {Museo ToolBox: A Python library for remote sensing including a new way to handle rasters.},
66 |   journal = {Journal of Open Source Software}
67 | }
68 | ```
69 | Or copy this citation :
70 | 
71 | > Karasiak, N., (2020). Museo ToolBox: A Python library for remote sensing including a new way to handle rasters.. Journal of Open Source Software, 5(48), 1978, https://doi.org/10.21105/joss.01978
72 | 
73 | ## I want to improve Museo ToolBox, how can I contribute ?
74 | 
75 | To contribute to this package, please read the instructions in [CONTRIBUTING.rst](CONTRIBUTING.rst).
76 | 
77 | ## Who built Museo ToolBox ?
78 | 
79 | I am [Nicolas Karasiak](http://www.karasiak.net), a Phd student at Dynafor Lab. I work tree species mapping from space throught dense satellite image time series, especially with Sentinel-2. A special thanks goes to [Mathieu Fauvel](http://fauvel.mathieu.free.fr/) who initiated me to the beautiful world of the open-source.
80 | 
81 | ## Why this name ?
82 | As Orfeo ToolBox is one my favorite and most useful library to work with raster data, I choose to name my work as Museo because in ancient Greek religion and myth, <a href="https://it.wikipedia.org/wiki/Museo_(autore_mitico)">Museo is the son and disciple of Orfeo</a>. If you want an acronym, let's say MUSEO means 'Multiple Useful Services for Earth Observation'.
83 | 


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pkg='museotoolbox'
 4 | array=( 3.5 3.6 3.7 )
 5 | 
 6 | echo "Building conda package ..."
 7 | cd ~
 8 | conda skeleton pypi $pkg
 9 | cd $pkg
10 | wget https://conda.io/docs/_downloads/build1.sh
11 | wget https://conda.io/docs/_downloads/bld.bat
12 | cd ~
13 | 
14 | # building conda packages
15 | for i in "${array[@]}"
16 | do
17 | 	conda-build --python $i $pkg
18 | done
19 | 
20 | # convert package for other platforms
21 | cd ~
22 | platforms=( osx-64 linux-32 linux-64 win-32 win-64 )
23 | find $HOME/conda-bld/linux-64/ -name *.tar.bz2 | while read file
24 | do
25 |     echo $file
26 |     #conda convert --platform all $file  -o $HOME/conda-bld/
27 |     for platform in "${platforms[@]}"
28 |     do
29 |        conda convert --platform $platform $file  -o $HOME/conda-bld/
30 |     done
31 | done
32 | 
33 | # upload packages to conda
34 | find $HOME/conda-bld/ -name *.tar.bz2 | while read file
35 | do
36 |     echo $file
37 |     anaconda upload $file
38 | done
39 | 
40 | echo "Building conda package done!"
41 | 
42 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | codecov:
2 |   token: cfea4c84-c081-448b-b6bc-1b84891d5f07
3 | 
4 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 21 | 
 22 | .PHONY: help
 23 | help:
 24 | 	@echo "Please use \`make <target>' where <target> is one of"
 25 | 	@echo "  html       to make standalone HTML files"
 26 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 27 | 	@echo "  singlehtml to make a single large HTML file"
 28 | 	@echo "  pickle     to make pickle files"
 29 | 	@echo "  json       to make JSON files"
 30 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 31 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 32 | 	@echo "  applehelp  to make an Apple Help Book"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 36 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 37 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 38 | 	@echo "  text       to make text files"
 39 | 	@echo "  man        to make manual pages"
 40 | 	@echo "  texinfo    to make Texinfo files"
 41 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 42 | 	@echo "  gettext    to make PO message catalogs"
 43 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 44 | 	@echo "  xml        to make Docutils-native XML files"
 45 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 46 | 	@echo "  linkcheck  to check all external links for integrity"
 47 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 48 | 	@echo "  coverage   to run coverage check of the documentation (if enabled)"
 49 | 
 50 | .PHONY: clean
 51 | clean:
 52 | 	rm -rf $(BUILDDIR)/*
 53 | 
 54 | .PHONY: html
 55 | html:
 56 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 57 | 	@echo
 58 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 59 | 
 60 | .PHONY: dirhtml
 61 | dirhtml:
 62 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 63 | 	@echo
 64 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 65 | 
 66 | .PHONY: singlehtml
 67 | singlehtml:
 68 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 69 | 	@echo
 70 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 71 | 
 72 | .PHONY: pickle
 73 | pickle:
 74 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 75 | 	@echo
 76 | 	@echo "Build finished; now you can process the pickle files."
 77 | 
 78 | .PHONY: json
 79 | json:
 80 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 81 | 	@echo
 82 | 	@echo "Build finished; now you can process the JSON files."
 83 | 
 84 | .PHONY: htmlhelp
 85 | htmlhelp:
 86 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 87 | 	@echo
 88 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 89 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 90 | 
 91 | .PHONY: qthelp
 92 | qthelp:
 93 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 94 | 	@echo
 95 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 96 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 97 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/MTB.qhcp"
 98 | 	@echo "To view the help file:"
 99 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/MTB.qhc"
100 | 
101 | .PHONY: applehelp
102 | applehelp:
103 | 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
104 | 	@echo
105 | 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
106 | 	@echo "N.B. You won't be able to view it unless you put it in" \
107 | 	      "~/Library/Documentation/Help or install it in your application" \
108 | 	      "bundle."
109 | 
110 | .PHONY: devhelp
111 | devhelp:
112 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
113 | 	@echo
114 | 	@echo "Build finished."
115 | 	@echo "To view the help file:"
116 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/MTB"
117 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/MTB"
118 | 	@echo "# devhelp"
119 | 
120 | .PHONY: epub
121 | epub:
122 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
123 | 	@echo
124 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
125 | 
126 | .PHONY: latex
127 | latex:
128 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
129 | 	@echo
130 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
131 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
132 | 	      "(use \`make latexpdf' here to do that automatically)."
133 | 
134 | .PHONY: latexpdf
135 | latexpdf:
136 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
137 | 	@echo "Running LaTeX files through pdflatex..."
138 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
139 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
140 | 
141 | .PHONY: latexpdfja
142 | latexpdfja:
143 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
144 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
145 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
146 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
147 | 
148 | .PHONY: text
149 | text:
150 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
151 | 	@echo
152 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
153 | 
154 | .PHONY: man
155 | man:
156 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
157 | 	@echo
158 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
159 | 
160 | .PHONY: texinfo
161 | texinfo:
162 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
163 | 	@echo
164 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
165 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
166 | 	      "(use \`make info' here to do that automatically)."
167 | 
168 | .PHONY: info
169 | info:
170 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
171 | 	@echo "Running Texinfo files through makeinfo..."
172 | 	make -C $(BUILDDIR)/texinfo info
173 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
174 | 
175 | .PHONY: gettext
176 | gettext:
177 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
178 | 	@echo
179 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
180 | 
181 | .PHONY: changes
182 | changes:
183 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
184 | 	@echo
185 | 	@echo "The overview file is in $(BUILDDIR)/changes."
186 | 
187 | .PHONY: linkcheck
188 | linkcheck:
189 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
190 | 	@echo
191 | 	@echo "Link check complete; look for any errors in the above output " \
192 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
193 | 
194 | .PHONY: doctest
195 | doctest:
196 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
197 | 	@echo "Testing of doctests in the sources finished, look at the " \
198 | 	      "results in $(BUILDDIR)/doctest/output.txt."
199 | 
200 | .PHONY: coverage
201 | coverage:
202 | 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
203 | 	@echo "Testing of coverage in the sources finished, look at the " \
204 | 	      "results in $(BUILDDIR)/coverage/python.txt."
205 | 
206 | .PHONY: xml
207 | xml:
208 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
209 | 	@echo
210 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
211 | 
212 | .PHONY: pseudoxml
213 | pseudoxml:
214 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
215 | 	@echo
216 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
217 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% source
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  xml        to make Docutils-native XML files
 37 | 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 38 | 	echo.  linkcheck  to check all external links for integrity
 39 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 40 | 	echo.  coverage   to run coverage check of the documentation if enabled
 41 | 	goto end
 42 | )
 43 | 
 44 | if "%1" == "clean" (
 45 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 46 | 	del /q /s %BUILDDIR%\*
 47 | 	goto end
 48 | )
 49 | 
 50 | 
 51 | REM Check if sphinx-build is available and fallback to Python version if any
 52 | %SPHINXBUILD% 1>NUL 2>NUL
 53 | if errorlevel 9009 goto sphinx_python
 54 | goto sphinx_ok
 55 | 
 56 | :sphinx_python
 57 | 
 58 | set SPHINXBUILD=python -m sphinx.__init__
 59 | %SPHINXBUILD% 2> nul
 60 | if errorlevel 9009 (
 61 | 	echo.
 62 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 63 | 	echo.installed, then set the SPHINXBUILD environment variable to point
 64 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 65 | 	echo.may add the Sphinx directory to PATH.
 66 | 	echo.
 67 | 	echo.If you don't have Sphinx installed, grab it from
 68 | 	echo.http://sphinx-doc.org/
 69 | 	exit /b 1
 70 | )
 71 | 
 72 | :sphinx_ok
 73 | 
 74 | 
 75 | if "%1" == "html" (
 76 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 77 | 	if errorlevel 1 exit /b 1
 78 | 	echo.
 79 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 80 | 	goto end
 81 | )
 82 | 
 83 | if "%1" == "dirhtml" (
 84 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 85 | 	if errorlevel 1 exit /b 1
 86 | 	echo.
 87 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 88 | 	goto end
 89 | )
 90 | 
 91 | if "%1" == "singlehtml" (
 92 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 93 | 	if errorlevel 1 exit /b 1
 94 | 	echo.
 95 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 96 | 	goto end
 97 | )
 98 | 
 99 | if "%1" == "pickle" (
100 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
101 | 	if errorlevel 1 exit /b 1
102 | 	echo.
103 | 	echo.Build finished; now you can process the pickle files.
104 | 	goto end
105 | )
106 | 
107 | if "%1" == "json" (
108 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
109 | 	if errorlevel 1 exit /b 1
110 | 	echo.
111 | 	echo.Build finished; now you can process the JSON files.
112 | 	goto end
113 | )
114 | 
115 | if "%1" == "htmlhelp" (
116 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
117 | 	if errorlevel 1 exit /b 1
118 | 	echo.
119 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
120 | .hhp project file in %BUILDDIR%/htmlhelp.
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "qthelp" (
125 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
129 | .qhcp project file in %BUILDDIR%/qthelp, like this:
130 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\POT.qhcp
131 | 	echo.To view the help file:
132 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\POT.ghc
133 | 	goto end
134 | )
135 | 
136 | if "%1" == "devhelp" (
137 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
138 | 	if errorlevel 1 exit /b 1
139 | 	echo.
140 | 	echo.Build finished.
141 | 	goto end
142 | )
143 | 
144 | if "%1" == "epub" (
145 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
146 | 	if errorlevel 1 exit /b 1
147 | 	echo.
148 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
149 | 	goto end
150 | )
151 | 
152 | if "%1" == "latex" (
153 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
154 | 	if errorlevel 1 exit /b 1
155 | 	echo.
156 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
157 | 	goto end
158 | )
159 | 
160 | if "%1" == "latexpdf" (
161 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
162 | 	cd %BUILDDIR%/latex
163 | 	make all-pdf
164 | 	cd %~dp0
165 | 	echo.
166 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
167 | 	goto end
168 | )
169 | 
170 | if "%1" == "latexpdfja" (
171 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
172 | 	cd %BUILDDIR%/latex
173 | 	make all-pdf-ja
174 | 	cd %~dp0
175 | 	echo.
176 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
177 | 	goto end
178 | )
179 | 
180 | if "%1" == "text" (
181 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
182 | 	if errorlevel 1 exit /b 1
183 | 	echo.
184 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
185 | 	goto end
186 | )
187 | 
188 | if "%1" == "man" (
189 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
190 | 	if errorlevel 1 exit /b 1
191 | 	echo.
192 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
193 | 	goto end
194 | )
195 | 
196 | if "%1" == "texinfo" (
197 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
198 | 	if errorlevel 1 exit /b 1
199 | 	echo.
200 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
201 | 	goto end
202 | )
203 | 
204 | if "%1" == "gettext" (
205 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
206 | 	if errorlevel 1 exit /b 1
207 | 	echo.
208 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
209 | 	goto end
210 | )
211 | 
212 | if "%1" == "changes" (
213 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
214 | 	if errorlevel 1 exit /b 1
215 | 	echo.
216 | 	echo.The overview file is in %BUILDDIR%/changes.
217 | 	goto end
218 | )
219 | 
220 | if "%1" == "linkcheck" (
221 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
222 | 	if errorlevel 1 exit /b 1
223 | 	echo.
224 | 	echo.Link check complete; look for any errors in the above output ^
225 | or in %BUILDDIR%/linkcheck/output.txt.
226 | 	goto end
227 | )
228 | 
229 | if "%1" == "doctest" (
230 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
231 | 	if errorlevel 1 exit /b 1
232 | 	echo.
233 | 	echo.Testing of doctests in the sources finished, look at the ^
234 | results in %BUILDDIR%/doctest/output.txt.
235 | 	goto end
236 | )
237 | 
238 | if "%1" == "coverage" (
239 | 	%SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
240 | 	if errorlevel 1 exit /b 1
241 | 	echo.
242 | 	echo.Testing of coverage in the sources finished, look at the ^
243 | results in %BUILDDIR%/coverage/python.txt.
244 | 	goto end
245 | )
246 | 
247 | if "%1" == "xml" (
248 | 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
249 | 	if errorlevel 1 exit /b 1
250 | 	echo.
251 | 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
252 | 	goto end
253 | )
254 | 
255 | if "%1" == "pseudoxml" (
256 | 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
257 | 	if errorlevel 1 exit /b 1
258 | 	echo.
259 | 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
260 | 	goto end
261 | )
262 | 
263 | :end
264 | 


--------------------------------------------------------------------------------
/docs/source/CHANGELOG.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | Changelog
  3 | =========
  4 | 
  5 | All notable changes to this project will be documented in this file.
  6 | 
  7 | The format is based on `Keep a Changelog <https://keepachangelog.com/en/1.0.0/>`_\ ,
  8 | and this project adheres to `Semantic Versioning <https://semver.org/spec/v2.0.0.html>`_.
  9 | 
 10 | [0.13.6] - 2020-07-19
 11 | ---------------------
 12 | 
 13 | Added
 14 | ^^^^^
 15 | 
 16 | 
 17 | * RasterMath get_image_as_array() now supports mask.
 18 | 
 19 | Fixed
 20 | ^^^^^
 21 | 
 22 | 
 23 | * Update groups management for sklearn>=0.25
 24 | * Fixed bug with raster/vector datasets (mtb.datasets.load_historical_data())
 25 | * Fixed bug with get_image_as_array() from RasterMath (completely rewrite this part)
 26 | 
 27 | [0.13.5] - 2020-06-24
 28 | ---------------------
 29 | 
 30 | Fixed
 31 | ^^^^^
 32 | 
 33 | 
 34 | * Requirements is not directly written inside setup.py due to bugs.
 35 | 
 36 | [0.13.4] - 2020-06-24
 37 | ---------------------
 38 | 
 39 | Fixed
 40 | ^^^^^
 41 | 
 42 | 
 43 | * Fix bug in setup.py using requirements.txt instead of ./requirements.txt
 44 | 
 45 | [0.13.3] - 2020-06-23
 46 | ---------------------
 47 | 
 48 | Fixed
 49 | ^^^^^
 50 | 
 51 | 
 52 | * Adding psutil to depency
 53 | 
 54 | [0.13.2] - 2020-06-18
 55 | ---------------------
 56 | 
 57 | Added
 58 | ^^^^^
 59 | 
 60 | 
 61 | * get_image_as_array function for RasterMath
 62 | 
 63 | Fixed
 64 | ^^^^^
 65 | 
 66 | 
 67 | * train_test_split supports now groups=None
 68 | 
 69 | [0.13.1] - 2020-06-11
 70 | ---------------------
 71 | 
 72 | Added
 73 | ^^^^^
 74 | 
 75 | 
 76 | * Support list for cross-validation in order to give an unready unfolded cv.
 77 | 
 78 | New features provided by @marclang for the charts module :
 79 | 
 80 | 
 81 | * Allows to display both F1 and accuracy or mean metrics
 82 | * Allows to display accuracy after have been displaying mean (and vice versa)
 83 | * Allows to display float matrix
 84 | 
 85 | Changed
 86 | ^^^^^^^
 87 | 
 88 | 
 89 | * Fix path separator to access tutorial dataset
 90 | 
 91 | [0.13.0] - 2020-04-21
 92 | ---------------------
 93 | 
 94 | Changed
 95 | ^^^^^^^
 96 | 
 97 | 
 98 | * Final version for JOSS (paper.md and paper.bib updated thanks to @kbarnhart)
 99 | 
100 | [0.12.1-rc.1] - 2020-04-18
101 | --------------------------
102 | 
103 | Added
104 | ^^^^^
105 | 
106 | 
107 | * RasterMath use available memory to speed up process and manage now several cores (n_jobs)
108 | * train_test_split in cross_validation module
109 | 
110 | Changed
111 | ^^^^^^^
112 | 
113 | 
114 | * Enhance mask management for RasterMath
115 | * Move FlushCache to optimize RasterMath
116 | * RasterMath get_random_block returns only block which are not totally unmasked
117 | * charts.PlotConfusionMatrix has a default argument (zero_is_min=True)
118 | 
119 | [0.12.1-beta.2] - 2020-02-10
120 | ----------------------------
121 | 
122 | Fixed
123 | ^^^^^
124 | 
125 | 
126 | * Fix bug when in RasterMath when input is only one band
127 | * Fix bug in RasterMath with mask and list
128 | 
129 | Added
130 | ^^^^^
131 | 
132 | 
133 | * n_jobs for RasterMath (thanks to Helene @HTDBD and Arthur @ArthurDfs, two great students)
134 | * function write_block and generally a most intuitive way to use RasterMath (with the help of @HTDBD and @ArthurDfs)
135 | 
136 | [0.12.1-beta.1] - 2020-01-16
137 | ----------------------------
138 | 
139 | Added
140 | ^^^^^
141 | 
142 | 
143 | * new branch spatial added
144 | 
145 | Added
146 | ^^^^^
147 | 
148 | 
149 | * Added this line
150 | 
151 | Changed
152 | ^^^^^^^
153 | 
154 | 
155 | * SequentialFeatureSelection parameters order Changed. *scoring* is now before *standardize*.
156 | * Update doc for load_historical_data()
157 | 
158 | Fixed
159 | ^^^^^
160 | 
161 | 
162 | * Fix bug in get_block() and get_random_block() which returned the same block each time due to new method.
163 | * Fix bug with nodata in RasterMath when output is of float type
164 | 
165 | [0.12] - 2019-12-13
166 | -------------------
167 | 
168 | Changed
169 | ^^^^^^^
170 | 
171 | 
172 | * RasterMath made a lot of improvements using block reading and writing. For example, the default block size is now 256x256 (you can keep the default block size by choosing block_size=False), and Museo ToolBox automatic detect if the geotiff will be tiled or not (it depends on the block size).
173 | * Some folders have Changed name :
174 | 
175 |   * raster_tools and vector_tools to processing
176 |   * learn_tools to ai
177 | 
178 | * some functions have Changed name :
179 | 
180 |   * getSamplesFromROI to extract_values
181 |   * historicalMap to load_historical_data
182 |   * getDistanceMatrix to get_distance_matrix
183 | 
184 | * classes now always begin with a capital case :
185 | 
186 |   * learnAndPredict to SuperLearner
187 |   * rasterMath to RasterMath
188 |   * sequentialFeatureSelection to SequentialFeatureSelection
189 | 
190 | Fixed
191 | ^^^^^
192 | 
193 | 
194 | * bug #7 : getSamplesFromROI (nowd extract_ROI) now extracts ROI values using by default memory. If it fails, it will create a temporary raster on disk then delete it when finished.
195 | 
196 | Removed
197 | ^^^^^^^
198 | 
199 | 
200 | * Remove command lines (cli)
201 | 
202 | [0.12rc5] - 2019-11-11
203 | ----------------------
204 | 
205 | Changed
206 | ^^^^^^^
207 | 
208 | 
209 | * getSamplesFromROI return list of available fields if wrong field given.
210 | * rasterMath convert np.nan value to nodata value (if numpy >= 1.17)
211 | 
212 | [0.12rc4] - 2019-11-01
213 | ----------------------
214 | 
215 | Changed
216 | ^^^^^^^
217 | 
218 | 
219 | * Minor fix when using learnAndPredict with an outside customized function
220 | * Better management fo cross-validation in learnAndPredict
221 | * Fix minor bug using False or None value with cv in learnAndPredict
222 | 
223 | Added
224 | ^^^^^
225 | 
226 | 
227 | * Add an option to use SFS without writing each best model on the disk.
228 | 
229 | [0.12rc3] - 2019-10-29
230 | ----------------------
231 | 
232 | Added
233 | ^^^^^
234 | 
235 | 
236 | * Move some functions from vector_tools to raster_tools, functions are anyway still available from vector_tools
237 | 
238 | Changed
239 | ^^^^^^^
240 | 
241 | 
242 | * learnAndPredict manages int value for cross-validation by using RandomStratifiedKFold
243 | * Enhance blocksize management for rasterMath
244 | * Move command line code in _cli folder
245 | 
246 | [0.12rc2] - 2019-10-14
247 | ----------------------
248 | 
249 | Changed
250 | ^^^^^^^
251 | 
252 | 
253 | * Improvements of rasterMath
254 | 
255 |   * customBlockSize defines now the same block size for window reading and for the output
256 |   * add seed parameter (to set a random generator) in getRandomBlock()
257 |   * add getRasterParameters() and customRasterParameters() function.
258 | 
259 | [0.12rc1] - 2019-10-12
260 | ----------------------
261 | 
262 | Changed
263 | ^^^^^^^
264 | 
265 | 
266 | * update rasterMath to generate by default a 256*256 raster block size.
267 | * update rasterMath to prevent bug if user has osgeo/gdal version is lower than 2.1.
268 | * prevent bug when in rasterMath if processor has only 1 core.
269 | 
270 | Fixed
271 | ^^^^^
272 | 
273 | 
274 | * minor fixes
275 | 


--------------------------------------------------------------------------------
/docs/source/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
  1 | =====================================
  2 | How to contribute to this package
  3 | =====================================
  4 | 
  5 | This document describes how to edit the package, run the tests, build the docs, put tagged versions on PyPI_, etc.
  6 | 
  7 | Editing the project
  8 | ---------------------
  9 | 
 10 | Developpment environnement
 11 | +++++++++++++++++++++++++++
 12 | 
 13 | We're using conda environment to install all dependencies needed for Museo ToolBox developpement.
 14 | 
 15 | -  ``conda env create -f .environment.yml``
 16 | 
 17 | This will create an environnement called ``museotoolbox-dev``, to activate it, just type :
 18 | 
 19 | -  ``conda active museotoolbox-dev``
 20 | 
 21 | 
 22 | Package structure
 23 | ++++++++++++++++++
 24 | - Package code is in `museotoolbox <museotoolbox>`_ folder.
 25 | - Docs can be generated by typing ``Make doc`` at the root of folder.
 26 | - Unit tests are in `test <test>`_ folder.
 27 | 
 28 | Modify the code via pull requests
 29 | +++++++++++++++++++++++++++++++++++
 30 | To make changes to the code, you should make a branch or fork, make your changes, and then submit a pull request.
 31 | If you aren't sure about pull requests:
 32 | 
 33 |  - A general description of pull requests: https://help.github.com/en/articles/about-pull-requests
 34 | 
 35 |  - How to create a pull request: https://help.github.com/en/articles/creating-a-pull-request
 36 | 
 37 |  - How to structure your pull requests (one conceptually distinct change per pull request): https://medium.com/@fagnerbrack/one-pull-request-one-concern-e84a27dfe9f1
 38 | 
 39 | Tests and documentation
 40 | +++++++++++++++++++++++
 41 | You should document your code clearly with `numpy style documentation`_.
 42 | You should add tests.
 43 | For simple things, these can be `doctests <https://docs.python.org/3/library/doctest.html>`_ in the code.
 44 | For more elaborate functionality, put unit tests in test.
 45 | 
 46 | Versions and CHANGELOG
 47 | ++++++++++++++++++++++
 48 | The version is `single sourced <https://packaging.python.org/guides/single-sourcing-package-version/>`_ in `__init__.py`_.
 49 | When modifying a tagged version (e.g., ``0.1.0``), indicate you are working on a development version by adding a ``dev`` (e.g., ``0.1.dev1``).
 50 | See `here <https://www.python.org/dev/peps/pep-0440/>`_ for more information on version numbers.
 51 | 
 52 | Conceptual descriptions of changes should also be tracked in the CHANGELOG_.
 53 | 
 54 | Adding dependencies
 55 | +++++++++++++++++++++
 56 | When you add code that uses a new package that is not in the standard python library, you should add it to the dependencies specified under the ``install_requires`` option in `setup.py <setup.py>`_.
 57 | `See here <https://packaging.python.org/discussions/install-requires-vs-requirements/>`_ for information on how to do this, and how to specify minimal required versions.
 58 | As described in the above link, you should **not** pin exact versions in ``install_requires`` in `setup.py <setup.py>`_ unless absolutely necessary.
 59 | 
 60 | 
 61 | Testing
 62 | ---------
 63 | 
 64 | Adding tests
 65 | ++++++++++++++
 66 | As you add new codes, you should create tests to make sure it is working correctly.
 67 | These can include:
 68 | 
 69 |   - doctests in the code
 70 | 
 71 |   - unit tests in the `./test/ <test>`_ subdirectory
 72 | 
 73 | Running the tests locally
 74 | ++++++++++++++++++++++++++
 75 | After you make changes, you should run two sets of tests.
 76 | To run the tests, go to the top-level package directory.
 77 | 
 78 | Then run the tests with pytest_ under the ``museotoolbox-dev`` conda environnement by running :
 79 | 
 80 | -  ``make pytest``
 81 | 
 82 | 
 83 | Building the documentation
 84 | +++++++++++++++++++++++++++
 85 | The documentation will be built for each new commit in the ``master`` branch.
 86 | However, you can build your own documentation in order by typing under the ``museotoolbox-dev`` conda environnement :
 87 | 
 88 | -  ``make ddoc``
 89 | 
 90 | 
 91 | Automated testing on Travis
 92 | +++++++++++++++++++++++++++
 93 | The aforementioned  pytest_ tests will be run automatically by the Travis_ continuous integration system as specified in the `.travis.yml <.travis.yml>`_ file.
 94 | Note that running the Travis_ tests requires you to register the project with Travis_.
 95 | 
 96 | If the tests are passing, you will see this on the Travis_ badge on GitHub repo main page.
 97 | 
 98 | 
 99 | Tagging versions and putting on PyPI
100 | -------------------------------------
101 | When you have a new stable release, you will want to tag it and put it on PyPI_ where it can be installed with pip_.
102 | First, make sure the version number is up-to-date in `__init__.py`_ and the CHANGELOG_.
103 | Then commit the code to GitHub if you haven't already done so.
104 | Next tag the version, as in::
105 | 
106 |     git tag -a 0.1.0 -m 'version 0.1.0'
107 | 
108 | and then push the tag to GitHub with::
109 | 
110 |     git push --tags
111 | 
112 | Finally, with Travis_, specify your pypi account in `.travis.yml <.travis.yml>`_ file to publish a new version on pypi when you publish a stable release on github.
113 | 
114 | Note that this requires you to have registered the package on PyPI_ if this is the first version of the package there.
115 | 
116 | .. _pytest: https://docs.pytest.org
117 | .. _Travis: https://docs.travis-ci.com
118 | .. _PyPI: https://pypi.org/
119 | .. _pip: https://pip.pypa.io
120 | .. _sphinx: https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html
121 | .. _test: test
122 | .. _docs: docs
123 | .. _notebooks: notebooks
124 | .. _`Jupyter notebooks`: https://jupyter.org/
125 | .. _`__init__.py`: museotoolbox/__init__.py
126 | .. _CHANGELOG: CHANGELOG.md
127 | .. _`numpy style documentation`: https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_numpy.html
128 | 


--------------------------------------------------------------------------------
/docs/source/Makefile.txt:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = MuseoToolBox
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/source/README.rst:
--------------------------------------------------------------------------------
  1 | .. role:: raw-html-m2r(raw)
  2 |    :format: html
  3 | 
  4 | 
  5 | 
  6 | .. image:: https://github.com/nkarasiak/MuseoToolBox/raw/master/metadata/museoToolBox_logo_128.png
  7 |    :target: https://github.com/nkarasiak/MuseoToolBox/raw/master/metadata/museoToolBox_logo_128.png
  8 |    :alt: Museo ToolBox logo
  9 | 
 10 | 
 11 | 
 12 | .. image:: https://api.travis-ci.org/nkarasiak/MuseoToolBox.svg?branch=master
 13 |    :target: https://travis-ci.org/nkarasiak/MuseoToolBox
 14 |    :alt: Build status
 15 | 
 16 | 
 17 | .. image:: https://readthedocs.org/projects/museotoolbox/badge/?version=latest
 18 |    :target: https://museotoolbox.readthedocs.io/en/latest/?badge=latest
 19 |    :alt: Documentation status
 20 | 
 21 | 
 22 | .. image:: https://codecov.io/gh/nkarasiak/MuseoToolBox/branch/master/graph/badge.svg
 23 |    :target: https://codecov.io/gh/nkarasiak/MuseoToolBox
 24 |    :alt: codecov
 25 | 
 26 | 
 27 | .. image:: https://badge.fury.io/py/museotoolbox.svg
 28 |    :target: https://badge.fury.io/py/museotoolbox
 29 |    :alt: PyPI version
 30 | 
 31 | 
 32 | .. image:: https://camo.githubusercontent.com/074cca1cb04798ef7b05419795c800130e47273b/68747470733a2f2f696d672e736869656c64732e696f2f636f6e64612f766e2f636f6e64612d666f7267652f6d7573656f746f6f6c626f782e737667
 33 |    :target: https://anaconda.org/conda-forge/museotoolbox
 34 |    :alt: Conda version
 35 | 
 36 | 
 37 | .. image:: https://pepy.tech/badge/museotoolbox
 38 |    :target: https://pepy.tech/project/museotoolbox
 39 |    :alt: Downloads
 40 | 
 41 | 
 42 | .. image:: https://joss.theoj.org/papers/1f4762d9910093a08034e8f4de441930/status.svg
 43 |    :target: https://joss.theoj.org/papers/1f4762d9910093a08034e8f4de441930
 44 |    :alt: status
 45 | 
 46 | 
 47 | **Museo ToolBox** is a python library to simplify the use of raster/vector, especially for machine learning and remote sensing. It is now easy to extract raster values from vector polygons and to do some spatial/unspatial cross-validation for scikit-learn from raster.
 48 | 
 49 | One of the most meaningful contribution is, in my humble opinion, the `RasterMath <https://museotoolbox.readthedocs.io/en/latest/modules/processing/museotoolbox.processing.RasterMath.html>`_ class and the `spatial cross-validation <https://museotoolbox.readthedocs.io/en/latest/modules/museotoolbox.cross_validation.html#module-museotoolbox.cross_validation>`_.
 50 | 
 51 | What's the point ?
 52 | ------------------
 53 | 
 54 | Today, the main usages of Museo ToolBox are :
 55 | 
 56 | 
 57 | * `museotoolbox.cross_validation <https://museotoolbox.readthedocs.io/en/latest/modules/museotoolbox.cross_validation.html#module-museotoolbox.cross_validation>`_
 58 | 
 59 |   * Create validation/training sets from vector, and cross-validation compatible with Scikit-Learn GridSearchCV. The aim is here to **promote the spatial cross-validation** in order to better estimate a model (with a lower spatial auto-correlation overestimation).
 60 | 
 61 | * `museotoolbox.processing <https://museotoolbox.readthedocs.io/en/latest/modules/museotoolbox.processing.html>`_
 62 | 
 63 |   * `RasterMath <https://museotoolbox.readthedocs.io/en/latest/modules/processing/museotoolbox.processing.RasterMath.html>`_\ , allows you to apply any of your array-compatible function on your raster and save it. Just load RasterMath, then it will return you the value for each pixel (in all bands) and now you can do whatever you want : predicting a model, smooth signal (whittaker, double logistic...), compute modal value... RasterMath reads and writes a raster block per block to avoid loading the full image in memory. It is compatible with every python function (including numpy) as the first and only argument RasterMath needs on your function is an array.
 64 |   * Extract bands values from vector ROI (polygons/points) (function : `extract_ROI <https://museotoolbox.readthedocs.io/en/latest/modules/processing/museotoolbox.processing.extract_ROI.html#museotoolbox.processing.extract_ROI>`_\ )  
 65 | 
 66 | * AI based on Scikit-Learn. `SuperLearner <https://museotoolbox.readthedocs.io/en/latest/modules/ai/museotoolbox.ai.SuperLearner.html#museotoolbox.ai.SuperLearner>`_ simplifies the use of cross-validation by extracting each accuracy (kappa,F1,OA, and above all confusion matrix) from each fold. It also eases the way to predict a raster (just give the raster name and the model).
 67 | 
 68 | That seems cool, but is there some help to use this ?
 69 | -----------------------------------------------------
 70 | 
 71 | I imagined Museo ToolBox as a tool to simplify raster processing and to promote spatial cross-validation, so of course there is some help : `a complete documentation with a lot of examples is available on readthedocs <https://museotoolbox.readthedocs.org/>`_.
 72 | 
 73 | How do I install Museo ToolBox ?
 74 | --------------------------------
 75 | 
 76 | We recommend you to install Museo ToolBox via conda as it includes gdal dependency :
 77 | 
 78 | .. code-block:: shell
 79 | 
 80 |    conda install -c conda-forge museotoolbox
 81 | 
 82 | However, if you prefer to install this library via pip, you need to install first gdal, then :
 83 | 
 84 | .. code-block:: shell
 85 | 
 86 |    python3 -m pip install museotoolbox --user
 87 | 
 88 | For early-adopters, you can install the latest development version directly from git :
 89 | 
 90 | .. code-block:: shell
 91 | 
 92 |    python3 -m pip install https://github.com/nkarasiak/museotoolbox/archive/develop.zip --user -U
 93 | 
 94 | Feel free to remove the ``--user`` if you like to install the library for every user on the machine or if some dependencies need root access. ``-U`` is for update if a newer version exists.
 95 | 
 96 | Using and citing the toolbox
 97 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 98 | 
 99 | If you use Museo ToolBox in your research and find it useful, please cite this library using the following bibtex reference:
100 | 
101 | .. code-block:: bib
102 | 
103 |    @article{Karasiak2020,
104 |      doi = {10.21105/joss.01978},
105 |      url = {https://doi.org/10.21105/joss.01978},
106 |      year = {2020},
107 |      publisher = {The Open Journal},
108 |      volume = {5},
109 |      number = {48},
110 |      pages = {1978},
111 |      author = {Nicolas Karasiak},
112 |      title = {Museo ToolBox: A Python library for remote sensing including a new way to handle rasters.},
113 |      journal = {Journal of Open Source Software}
114 |    }
115 | 
116 | Or copy this citation :
117 | 
118 | ..
119 | 
120 |    Karasiak, N., (2020). Museo ToolBox: A Python library for remote sensing including a new way to handle rasters.. Journal of Open Source Software, 5(48), 1978, https://doi.org/10.21105/joss.01978
121 | 
122 | 
123 | I want to improve Museo ToolBox, how can I contribute ?
124 | -------------------------------------------------------
125 | 
126 | To contribute to this package, please read the instructions in `CONTRIBUTING.rst <CONTRIBUTING.rst>`_.
127 | 
128 | Who built Museo ToolBox ?
129 | -------------------------
130 | 
131 | I am `Nicolas Karasiak <http://www.karasiak.net>`_\ , a Phd student at Dynafor Lab. I work tree species mapping from space throught dense satellite image time series, especially with Sentinel-2. A special thanks goes to `Mathieu Fauvel <http://fauvel.mathieu.free.fr/>`_ who initiated me to the beautiful world of the open-source.
132 | 
133 | Why this name ?
134 | ---------------
135 | 
136 | As Orfeo ToolBox is one my favorite and most useful library to work with raster data, I choose to name my work as Museo because in ancient Greek religion and myth, :raw-html-m2r:`<a href="https://it.wikipedia.org/wiki/Museo_(autore_mitico)">Museo is the son and disciple of Orfeo</a>`. If you want an acronym, let's say MUSEO means 'Multiple Useful Services for Earth Observation'.
137 | 


--------------------------------------------------------------------------------
/docs/source/_static/style.css:
--------------------------------------------------------------------------------
 1 | .avatar{
 2 |     margin: -20px auto 5px auto;
 3 |     border-radius: 50%;
 4 |     text-align: center;
 5 |     display: block;
 6 |         }
 7 | .rst-content .sidebar{
 8 | width : 35%;
 9 | }
10 | 


--------------------------------------------------------------------------------
/docs/source/_templates/class.rst:
--------------------------------------------------------------------------------
 1 | {{ objname }}
 2 | {{ underline }}
 3 | 
 4 | 
 5 | .. currentmodule:: {{ module }}
 6 | 
 7 | .. autoclass:: {{ objname }}
 8 | 
 9 |    {% block methods %}
10 | 
11 |    {% if methods %}
12 |    .. rubric:: Methods
13 | 
14 |    .. autosummary::
15 |       :toctree: {{ objname }}
16 |    {% for item in methods %}
17 |       ~{{ name }}.{{ item }}
18 |    {%- endfor %}
19 |    {% endif %}
20 |    {% endblock %}
21 | 
22 |    {% block attributes %}
23 |    {% if attributes %}
24 |    .. rubric:: Attributes
25 | 
26 |    .. autosummary::
27 |       :toctree: {{ objname }}
28 |    {% for item in attributes %}
29 |       ~{{ name }}.{{ item }}
30 |    {%- endfor %}
31 |    {% endif %}
32 |    {% endblock %}
33 | 
34 | .. include:: ../backreferences/{{fullname}}.examples
35 | 
36 | .. raw:: html
37 | 
38 |      <div style='clear:both'></div>
39 | 


--------------------------------------------------------------------------------
/docs/source/_templates/function.rst:
--------------------------------------------------------------------------------
 1 | {{ objname }}
 2 | {{ underline }}
 3 | 
 4 | .. currentmodule:: {{ module }}
 5 |    
 6 | .. autofunction:: {{ objname }}
 7 | 
 8 | .. include:: ../backreferences/{{fullname}}.examples
 9 | 
10 | .. raw:: html
11 | 
12 |      <div style='clear:both'></div>
13 | 


--------------------------------------------------------------------------------
/docs/source/_templates/module.rst:
--------------------------------------------------------------------------------
 1 | {{ objname }}
 2 | {{ underline }}
 3 | 
 4 | .. automodule:: {{ fullname }}
 5 | 
 6 |    {% block functions %}
 7 |    {% if functions %}
 8 |    .. rubric:: Functions
 9 | 
10 |    .. autosummary::
11 |       :toctree: {{ objname }}
12 |       :template: function.rst
13 |    {% for item in functions %}
14 |       {{ item }}
15 |    {%- endfor %}
16 |    {% endif %}
17 |    {% endblock %}
18 | 
19 |    {% block classes %}
20 |    {% if classes %}
21 |    .. rubric:: Classes
22 | 
23 |    .. autosummary::
24 |       :toctree: {{ objname }}
25 |       :template: class.rst
26 |    {% for item in classes %}
27 |       {{ item }}
28 |    {%- endfor %}
29 |    {% endif %}
30 |    {% endblock %}
31 | 
32 |    {% block exceptions %}
33 |    {% if exceptions %}
34 |    .. rubric:: Exceptions
35 | 
36 |    .. autosummary::
37 |    {% for item in exceptions %}
38 |       {{ item }}
39 |    {%- endfor %}
40 |    {% endif %}
41 |    {% endblock %}
42 | 


--------------------------------------------------------------------------------
/docs/source/api.rst:
--------------------------------------------------------------------------------
 1 | .. _museotoolbox_api_reference:
 2 | 
 3 | API Reference
 4 | =============
 5 | 
 6 | The complete Museo ToolBox project is automatically documented for every module.
 7 | 
 8 | .. currentmodule:: museotoolbox
 9 | 
10 | .. autosummary::
11 |    :toctree: modules/
12 |    :template: module.rst
13 |    
14 |    processing
15 |    ai
16 |    cross_validation
17 |    charts
18 |    stats
19 |    datasets
20 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # Museo ToolBox documentation build configuration file, created by
  5 | # sphinx-quickstart on Sun Nov 11 11:34:29 2018.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #
 20 | import re
 21 | import os
 22 | import sys
 23 | # sys.path.insert(0, os.path.abspath('.'))
 24 | 
 25 | 
 26 | # -- General configuration ------------------------------------------------
 27 | 
 28 | # If your documentation needs a minimal Sphinx version, state it here.
 29 | #
 30 | # needs_sphinx = '1.0'
 31 | 
 32 | # Add any Sphinx extension module names here, as strings. They can be
 33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 34 | # ones.
 35 | 
 36 | #from recommonmark.parser import CommonMarkParser
 37 | 
 38 | autoclass_content = 'both'
 39 | 
 40 | #source_parsers = {
 41 | #   '.md': 'recommonmark.parser.CommonMarkParser',
 42 | #}
 43 | 
 44 | extensions = [
 45 |     'sphinx.ext.doctest',
 46 |     'sphinx.ext.autodoc',
 47 |     'sphinx.ext.autosummary',
 48 |     'sphinx.ext.viewcode',
 49 |     'sphinx.ext.napoleon',
 50 |     'sphinx_gallery.gen_gallery',
 51 |     'nbsphinx',
 52 |     'sphinx_copybutton'
 53 |     ]
 54 | 
 55 | 
 56 | autosummary_generate = True
 57 | # numpydoc_show_class_members=False 
 58 | imported_members=True
 59 | autoclass_content = 'both'
 60 | # autodoc_default_flags = ['members', 'inherited-members','undoc-members']
 61 | # Add any paths that contain templates here, relative to this directory.
 62 | templates_path = ['_templates']
 63 | 
 64 | # The suffix(es) of source filenames.
 65 | # You can specify multiple suffix as a list of string:
 66 | #
 67 | # source_suffix = ['.rst', '.md']
 68 | 
 69 | 
 70 | 
 71 | # General information about the project.
 72 | project = 'MuseoToolBox'
 73 | copyright = '2019, Nicolas Karasiak'
 74 | author = 'Nicolas Karasiak'
 75 | sphinx_gallery_conf = {
 76 |      'backreferences_dir'  : os.path.join('modules','backreferences'),
 77 |      'doc_module':'museotoolbox',
 78 |      # path to your examples scripts
 79 |      'examples_dirs': '../../examples',
 80 |      'filename_pattern' : '/',
 81 |      # path where to save gallery generated examples
 82 |      'ignore_pattern' : '__',
 83 |      'gallery_dirs': 'auto_examples',
 84 |      'plot_gallery': True,
 85 |      #'doc_module' : ('sphinx_gallery', 'numpy') #  enable 'Examples using..'
 86 | }
 87 | 
 88 | # The version info for the project you're documenting, acts as replacement for
 89 | # |version| and |release|, also used in various other places throughout the
 90 | # built documents.
 91 | #
 92 | sys.path.insert(0, os.path.abspath('.'))
 93 | sys.path.insert(0, os.path.abspath('..'))
 94 | sys.path.insert(0, os.path.abspath("../.."))
 95 | 
 96 | # The short X.Y version.
 97 | 
 98 | __version__ = re.search(
 99 |     r'__version__\s*=\s*[\'"]([^\'"]*)[\'"]',  # It excludes inline comment too
100 |     open('../../museotoolbox/__init__.py').read()).group(1)
101 | 
102 | source_suffix = ['.rst']
103 | 
104 | # The master toctree document.
105 | 
106 | version = __version__
107 | # The full version, including alpha/beta/rc tags.
108 | release = __version__
109 | 
110 | # The language for content autogenerated by Sphinx. Refer to documentation
111 | # for a list of supported languages.
112 | #
113 | # This is also used if you do content translation via gettext catalogs.
114 | # Usually you set "language" from the command line for these cases.
115 | language = None
116 | 
117 | # List of patterns, relative to source directory, that match files and
118 | # directories to ignore when looking for source files.
119 | # This patterns also effect to html_static_path and html_extra_path
120 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', '**.ipynb_checkpoints','../../examples/cross_validation/__*']
121 | 
122 | # The name of the Pygments (syntax highlighting) style to use.
123 | pygments_style = 'sphinx'
124 | 
125 | # If true, `todo` and `todoList` produce output, else they produce nothing.
126 | todo_include_todos = False
127 | 
128 | 
129 | # -- Options for HTML output ----------------------------------------------
130 | 
131 | # The theme to use for HTML and HTML Help pages.  See the documentation for
132 | # a list of builtin themes.
133 | #
134 | html_theme = 'sphinx_rtd_theme'
135 | 
136 | html_logo = '../../metadata/museoToolBox_logo_128.png'
137 | # Theme options are theme-specific and customize the look and feel of a theme
138 | # further.  For a list of options available for each theme, see the
139 | # documentation.
140 | #
141 | # html_theme_options = {}
142 | 
143 | # Add any paths that contain custom static files (such as style sheets) here,
144 | # relative to this directory. They are copied after the builtin static files,
145 | # so a file named "default.css" will overwrite the builtin "default.css".
146 | html_static_path = ['_static']
147 | 
148 | # Custom sidebar templates, must be a dictionary that maps document names
149 | # to template names.
150 | #
151 | # This is required for the alabaster theme
152 | # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
153 | html_sidebars = {
154 |     '**': [
155 |         'relations.html',  # needs 'show_related': True theme option to display
156 |         'searchbox.html',
157 |     ]
158 | }
159 | 
160 | 
161 | # -- Options for HTMLHelp output ------------------------------------------
162 | 
163 | # Output file base name for HTML help builder.
164 | htmlhelp_basename = 'MuseoToolBoxdoc'
165 | 
166 | 
167 | # -- Options for LaTeX output ---------------------------------------------
168 | 
169 | latex_elements = {
170 |     # The paper size ('letterpaper' or 'a4paper').
171 |     #
172 |     # 'papersize': 'letterpaper',
173 | 
174 |     # The font size ('10pt', '11pt' or '12pt').
175 |     #
176 |     # 'pointsize': '10pt',
177 | 
178 |     # Additional stuff for the LaTeX preamble.
179 |     #
180 |     # 'preamble': '',
181 | 
182 |     # Latex figure (float) alignment
183 |     #
184 |     # 'figure_align': 'htbp',
185 | }
186 | 
187 | 
188 | 
189 | 
190 | # -- Options for Texinfo output -------------------------------------------
191 | 
192 | # Grouping the document tree into Texinfo files. List of tuples
193 | # (source start file, target name, title, author,
194 | #  dir menu entry, description, category)
195 | 
196 | master_doc = 'index'
197 | 
198 | # -- Options for manual page output ---------------------------------------
199 | 
200 | # One entry per manual page. List of tuples
201 | # (source start file, name, description, authors, manual section).
202 | man_pages = [
203 |     (master_doc, 'museotoolbox', 'MuseoToolBox Documentation',
204 |      [author], 1)
205 | ]
206 | def setup(app):
207 |     app.add_stylesheet('style.css')  # may also be an URL
208 | 
209 | 
210 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. MuseoToolBox documentation
 2 | 
 3 | MuseoToolBox documentation
 4 | =============================
 5 | 
 6 | .. sidebar:: About MuseoToolBox
 7 | 
 8 |     .. image:: https://github.com/nkarasiak.png?size=100
 9 |        :alt: Nicolas Karasiak avatar
10 |        :class: avatar
11 | 
12 |     :Author: `Nicolas Karasiak <http://www.karasiak.net>`_
13 |     :Source code: `github.com project <https://github.com/nkarasiak/MuseoToolBox>`_
14 |     :Bug tracker: `github.com issues <https://github.com/nkarasiak/MuseoToolBox/issues>`_
15 |     :Generated: |today|
16 |     :License: GPL v3
17 |     :Version: |release|
18 |     
19 | .. include:: README.rst
20 |     :start-line: 8
21 | 
22 | .. toctree::
23 |    :maxdepth: 2
24 | 
25 |    self
26 | 
27 | .. toctree::
28 |    :maxdepth: 3
29 |    :caption: Examples gallery
30 | 
31 |    auto_examples/index
32 | 
33 | .. toctree::
34 |    :maxdepth: 3
35 |    :caption: Documentation API
36 | 
37 |    api
38 |    CHANGELOG
39 |    CONTRIBUTING
40 | 
41 | Indices and tables
42 | ==================
43 | 
44 | * :ref:`genindex`
45 | * :ref:`modindex`
46 | * :ref:`search`
47 | 


--------------------------------------------------------------------------------
/examples/README.txt:
--------------------------------------------------------------------------------
1 | .. _museotoolbox:
2 | 
3 | Gallery
4 | -----------------------
5 | 
6 | Here you will find all the examples related to  :mod:`museotoolbox` library.
7 | 
8 | 


--------------------------------------------------------------------------------
/examples/ai/README.txt:
--------------------------------------------------------------------------------
1 | .. _ai:
2 | 
3 | Artificial Intelligence
4 | ------------------------
5 | 
6 | Examples related to the :mod:`museotoolbox.ai` module.
7 | 
8 | 


--------------------------------------------------------------------------------
/examples/ai/SFFS.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Sequential Forward Feature Selection (SFFS)
 4 | ========================================================
 5 | 
 6 | This example shows how to make a Random Sampling with 
 7 | 50% for each class.
 8 | 
 9 | """
10 | 
11 | ##############################################################################
12 | # Import librairies
13 | # -------------------------------------------
14 | 
15 | from museotoolbox.ai import SequentialFeatureSelection
16 | from museotoolbox.cross_validation import LeavePSubGroupOut
17 | from museotoolbox import datasets
18 | from sklearn.ensemble import RandomForestClassifier
19 | from sklearn import metrics
20 | import numpy as np
21 | ##############################################################################
22 | # Load HistoricalMap dataset
23 | # -------------------------------------------
24 | 
25 | X,y,g = datasets.load_historical_data(return_X_y_g=True,low_res=True)
26 | 
27 | ##############################################################################
28 | # Create CV
29 | # -------------------------------------------
30 | 
31 | LSGO = LeavePSubGroupOut(valid_size=0.8,n_repeats=2,
32 |                 random_state=12,verbose=False)
33 | ##############################################################################
34 | # Initialize Random-Forest and metrics
35 | # --------------------------------------
36 | 
37 | classifier = RandomForestClassifier(random_state=12,n_jobs=1)
38 | 
39 | f1 = metrics.make_scorer(metrics.f1_score)
40 | 
41 | ##############################################################################
42 | # Set and fit the Sequentia Feature Selection
43 | # ---------------------------------------------------------------
44 | #
45 | SFFS = SequentialFeatureSelection(classifier=classifier,param_grid=dict(n_estimators=[10,20]),verbose=False)
46 | 
47 | SFFS.fit(X.astype(np.float),y,g,cv=LSGO,max_features=3)
48 | 
49 | ##############################################
50 | # Show best features and score
51 | 
52 | print('Best features are : '+str(SFFS.best_features_))
53 | print('F1 are : '+str(SFFS.best_scores_))
54 | 
55 | ##########################################################################
56 | # In order to predict every classification from the best feature
57 | 
58 | SFFS.predict_best_combination(datasets.load_historical_data()[0],'/tmp/SFFS/best_classification.tif')
59 | 
60 | ##############################################################################
61 | # Plot example
62 | 
63 | from matplotlib import pyplot as plt
64 | plt.plot(np.arange(1,len(SFFS.best_scores_)+1),SFFS.best_scores_)
65 | plt.xlabel('Number of features')
66 | plt.xticks(np.arange(1,len(SFFS.best_scores_)+1))
67 | plt.ylabel('F1')
68 | plt.show()
69 | 


--------------------------------------------------------------------------------
/examples/ai/learnWithCustomRaster.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Learn algorithm and customize your input raster without writing it on disk
  4 | =============================================================================
  5 | 
  6 | This example shows how to customize your raster (ndvi, smooth signal...) in the 
  7 | learning process to avoi generate a new raster.
  8 | 
  9 | """
 10 | 
 11 | ##############################################################################
 12 | # Import librairies
 13 | # -------------------------------------------
 14 | 
 15 | from museotoolbox.ai import SuperLearner
 16 | from museotoolbox.processing import extract_ROI
 17 | from museotoolbox import datasets
 18 | from sklearn.ensemble import RandomForestClassifier
 19 | from sklearn import metrics
 20 | 
 21 | ##############################################################################
 22 | # Load HistoricalMap dataset
 23 | # -------------------------------------------
 24 | 
 25 | raster,vector = datasets.load_historical_data(low_res=True)
 26 | field = 'Class'
 27 | 
 28 | ##############################################################################
 29 | # Initialize Random-Forest and metrics
 30 | # --------------------------------------
 31 | 
 32 | classifier = RandomForestClassifier(random_state=12,n_jobs=1)
 33 | 
 34 | kappa = metrics.make_scorer(metrics.cohen_kappa_score)
 35 | f1_mean = metrics.make_scorer(metrics.f1_score,average='micro')
 36 | scoring = dict(kappa=kappa,f1_mean=f1_mean,accuracy='accuracy')
 37 | 
 38 | 
 39 | ##############################################################################
 40 | # Start learning
 41 | # ---------------------------
 42 | # sklearn will compute different metrics, but will keep best results from kappa (refit='kappa')
 43 | SL = SuperLearner(classifier=classifier,param_grid=dict(n_estimators=[10]),n_jobs=1,verbose=1)
 44 | 
 45 | ##############################################################################
 46 | # Create or use custom function
 47 | 
 48 | def reduceBands(X,bandToKeep=[0,2]):
 49 |     # this function get the first and the last band
 50 |     X=X[:,bandToKeep].reshape(-1,len(bandToKeep))
 51 |     return X
 52 | 
 53 | # add this function to learnAndPredict class
 54 | SL.customize_array(reduceBands)
 55 | 
 56 | # if you learn from vector, refit according to the f1_mean
 57 | X,y = extract_ROI(raster,vector,field)
 58 | SL.fit(X,y,cv=2,scoring=scoring,refit='f1_mean')
 59 | 
 60 | ##############################################################################
 61 | # Read the model
 62 | # -------------------
 63 | print(SL.model)
 64 | print(SL.model.cv_results_)
 65 | print(SL.model.best_score_)
 66 | 
 67 | ##############################################################################
 68 | # Get F1 for every class from best params
 69 | # -----------------------------------------------
 70 | 
 71 | for stats in SL.get_stats_from_cv(confusion_matrix=False,F1=True):
 72 |     print(stats['F1'])
 73 |     
 74 | ##############################################################################
 75 | # Get each confusion matrix from folds
 76 | # -----------------------------------------------
 77 | 
 78 | for stats in SL.get_stats_from_cv(confusion_matrix=True):
 79 |     print(stats['confusion_matrix'])
 80 |     
 81 | ##############################################################################
 82 | # Save each confusion matrix from folds
 83 | # -----------------------------------------------
 84 | 
 85 | SL.save_cm_from_cv('/tmp/testMTB/',prefix='RS50_')
 86 | 
 87 | ##############################################################################
 88 | # Predict map
 89 | # ---------------------------
 90 |     
 91 | SL.predict_image(raster,'/tmp/classification.tif',
 92 |                   higher_confidence='/tmp/confidence.tif',
 93 |                   confidence_per_class='/tmp/confidencePerClass.tif')
 94 | ##########################
 95 | # Plot example
 96 | 
 97 | from matplotlib import pyplot as plt
 98 | from osgeo import gdal
 99 | src=gdal.Open('/tmp/classification.tif')
100 | plt.imshow(src.GetRasterBand(1).ReadAsArray(),cmap=plt.get_cmap('tab20'))
101 | plt.axis('off')
102 | plt.show()
103 | 


--------------------------------------------------------------------------------
/examples/ai/learnWithRFandCompareCV.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Learn with Random-Forest and compare Cross-Validation methods
 4 | ===============================================================
 5 | 
 6 | This example shows how to make a classification with different cross-validation methods.
 7 | 
 8 | """
 9 | 
10 | ##############################################################################
11 | # Import librairies
12 | # -------------------------------------------
13 | 
14 | from museotoolbox.ai import SuperLearner
15 | from museotoolbox import cross_validation
16 | from museotoolbox.processing import extract_ROI
17 | from museotoolbox import datasets
18 | from sklearn.ensemble import RandomForestClassifier
19 | from sklearn.model_selection import StratifiedKFold
20 | 
21 | ##############################################################################
22 | # Load HistoricalMap dataset
23 | # -------------------------------------------
24 | 
25 | raster,vector = datasets.load_historical_data(low_res=True)
26 | field = 'Class'
27 | group = 'uniquefid'
28 | X,y,g = extract_ROI(raster,vector,field,group)
29 | ##############################################################################
30 | # Initialize Random-Forest
31 | # ---------------------------
32 | 
33 | classifier = RandomForestClassifier(random_state=12,n_jobs=1)
34 | 
35 | ##############################################################################
36 | # Create list of different CV
37 | # ---------------------------
38 | 
39 | CVs = [cross_validation.RandomStratifiedKFold(n_splits=2),
40 |        cross_validation.LeavePSubGroupOut(valid_size=0.5),
41 |        cross_validation.LeaveOneSubGroupOut(),
42 |        StratifiedKFold(n_splits=2,shuffle=True) #from sklearn
43 |        ]
44 | 
45 | kappas=[]
46 | 
47 | 
48 | 
49 | for cv in CVs : 
50 |     SL = SuperLearner( classifier=classifier,param_grid=dict(n_estimators=[50,100]),n_jobs=1)
51 |     SL.fit(X,y,group=g,cv=cv)
52 |     print('Kappa for '+str(type(cv).__name__))
53 |     cvKappa = []
54 |     
55 |     for stats in SL.get_stats_from_cv(confusion_matrix=False,kappa=True):
56 |         print(stats['kappa'])
57 |         cvKappa.append(stats['kappa'])
58 |     
59 |     kappas.append(cvKappa)
60 |     
61 |     print(20*'=')
62 | 
63 | ##########################
64 | # Plot example
65 | 
66 | 
67 | from matplotlib import pyplot as plt
68 | plt.title('Kappa according to Cross-validation methods')
69 | plt.boxplot(kappas,labels=[str(type(i).__name__) for i in CVs], patch_artist=True)
70 | plt.grid()
71 | plt.ylabel('Kappa')
72 | plt.xticks(rotation=15)
73 | plt.show()
74 | 


--------------------------------------------------------------------------------
/examples/ai/learnWithRFandRS50.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Learn with Random-Forest and Random Sampling 50% (RS50)
  4 | ========================================================
  5 | 
  6 | This example shows how to make a Random Sampling with 
  7 | 50% for each class.
  8 | 
  9 | """
 10 | 
 11 | ##############################################################################
 12 | # Import librairies
 13 | # -------------------------------------------
 14 | 
 15 | from museotoolbox.ai import SuperLearner
 16 | from museotoolbox.cross_validation import RandomStratifiedKFold
 17 | from museotoolbox.processing import extract_ROI
 18 | from museotoolbox import datasets
 19 | from sklearn.ensemble import RandomForestClassifier
 20 | from sklearn import metrics
 21 | 
 22 | ##############################################################################
 23 | # Load HistoricalMap dataset
 24 | # -------------------------------------------
 25 | 
 26 | raster,vector = datasets.load_historical_data(low_res=True)
 27 | field = 'Class'
 28 | X,y = extract_ROI(raster,vector,field)
 29 | ##############################################################################
 30 | # Create CV
 31 | # -------------------------------------------
 32 | 
 33 | SKF = RandomStratifiedKFold(n_splits=2,
 34 |                 random_state=12,verbose=False)
 35 | 
 36 | ##############################################################################
 37 | # Initialize Random-Forest and metrics
 38 | # --------------------------------------
 39 | 
 40 | classifier = RandomForestClassifier(random_state=12,n_jobs=1)
 41 | 
 42 | # 
 43 | kappa = metrics.make_scorer(metrics.cohen_kappa_score)
 44 | f1_mean = metrics.make_scorer(metrics.f1_score,average='micro')
 45 | scoring = dict(kappa=kappa,f1_mean=f1_mean,accuracy='accuracy')
 46 | 
 47 | 
 48 | ##############################################################################
 49 | # Start learning
 50 | # ---------------------------
 51 | # sklearn will compute different metrics, but will keep best results from kappa (refit='kappa')
 52 | SL = SuperLearner(classifier=classifier,param_grid = dict(n_estimators=[10]),n_jobs=1,verbose=1)
 53 | 
 54 | SL.fit(X,y,cv=SKF,scoring=kappa)
 55 | 
 56 | 
 57 | # =============================================================================
 58 | # ##############################################################################
 59 | # # Read the model
 60 | # # -------------------
 61 | # print(SL.model)
 62 | # print(SL.model.cv_results_)
 63 | # print(SL.model.best_score_)
 64 | # 
 65 | # ##############################################################################
 66 | # # Get F1 for every class from best params
 67 | # # -----------------------------------------------
 68 | # 
 69 | # for stats in SL.get_stats_from_cv(confusion_matrix=False,F1=True):
 70 | #     print(stats['F1'])
 71 | #     
 72 | # ##############################################################################
 73 | # # Get each confusion matrix from folds
 74 | # # -----------------------------------------------
 75 | # 
 76 | # for stats in SL.get_stats_from_cv(confusion_matrix=True):
 77 | #     print(stats['confusion_matrix'])
 78 | #     
 79 | # ##############################################################################
 80 | # # Save each confusion matrix from folds
 81 | # # -----------------------------------------------
 82 | # 
 83 | # SL.save_cm_from_cv('/tmp/testMTB/',prefix='RS50_')
 84 | # 
 85 | # =============================================================================
 86 | ##############################################################################
 87 | # Predict map
 88 | # ---------------------------
 89 |     
 90 | SL.predict_image(raster,'/tmp/classification.tif',
 91 |                   higher_confidence='/tmp/confidence.tif',
 92 |                   confidence_per_class='/tmp/confidencePerClass.tif')
 93 | 
 94 | ##########################
 95 | # Plot example
 96 | 
 97 | from matplotlib import pyplot as plt
 98 | from osgeo import gdal
 99 | src=gdal.Open('/tmp/classification.tif')
100 | plt.imshow(src.GetRasterBand(1).ReadAsArray(),cmap=plt.get_cmap('tab20'))
101 | plt.axis('off')
102 | plt.show()
103 | 


--------------------------------------------------------------------------------
/examples/charts/README.txt:
--------------------------------------------------------------------------------
1 | .. _charts:
2 | 
3 | Charts
4 | ------------------------
5 | 
6 | Examples related to the :mod:`museotoolbox.charts` module.
7 | 
8 | 


--------------------------------------------------------------------------------
/examples/charts/plotConfusion.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Plot confusion matrix
 4 | ========================================================
 5 | 
 6 | Plot confusion matrix from Cross-Validation, with F1 as subplot.
 7 | 
 8 | """
 9 | 
10 | ##############################################################################
11 | # Import librairies
12 | # -------------------------------------------
13 | from museotoolbox.ai import SuperLearner
14 | from museotoolbox.cross_validation import RandomStratifiedKFold
15 | from museotoolbox.charts import PlotConfusionMatrix
16 | from museotoolbox import datasets
17 | from sklearn.ensemble import RandomForestClassifier
18 | 
19 | ##############################################################################
20 | # Load HistoricalMap dataset
21 | # -------------------------------------------
22 | 
23 | X,y = datasets.load_historical_data(low_res=True,return_X_y=True)
24 | #########################################m#####################################
25 | # Create CV
26 | # -------------------------------------------
27 | RSKF = RandomStratifiedKFold(n_splits=2,
28 |                 random_state=12,verbose=False)
29 | 
30 | ##############################################################################
31 | # Initialize Random-Forest
32 | # ---------------------------
33 | 
34 | classifier = RandomForestClassifier()
35 | 
36 | ##############################################################################
37 | # Start learning
38 | # ---------------------------
39 | 
40 | SL = SuperLearner(classifier=classifier,param_grid=dict(n_estimators=[10,50]))
41 | SL.fit(X,y,cv=RSKF)
42 | ##############################################################################
43 | # Get kappa from each fold
44 | # ---------------------------
45 |   
46 | for stats in SL.get_stats_from_cv(confusion_matrix=False,kappa=True):
47 |     print(stats['kappa'])
48 | 
49 | ##############################################################################
50 | # Get each confusion matrix from folds
51 | # -----------------------------------------------
52 | cms = []
53 | for stats in SL.get_stats_from_cv(confusion_matrix=True):
54 |     cms.append(stats['confusion_matrix'])
55 |     print(stats['confusion_matrix'])
56 |     
57 | ##############################################################################
58 | # Plot confusion matrix
59 | # -----------------------------------------------
60 |     
61 | import numpy as np
62 | meanCM = np.mean(cms,axis=0).astype(np.int16)
63 | pltCM = PlotConfusionMatrix(meanCM.T) # Translate for Y = prediction and X = truth
64 | pltCM.add_text()
65 | pltCM.color_diagonal()


--------------------------------------------------------------------------------
/examples/charts/plotConfusionAcc.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Plot confusion matrix with User/Producer accuracy
 4 | ========================================================
 5 | 
 6 | Plot confusion matrix from Cross-Validation, with accuracy (user/prod) as subplot.
 7 | 
 8 | """
 9 | 
10 | ##############################################################################
11 | # Import librairies
12 | # -------------------------------------------
13 | from museotoolbox.ai import SuperLearner
14 | from museotoolbox.cross_validation import RandomStratifiedKFold
15 | from museotoolbox.charts import PlotConfusionMatrix
16 | from museotoolbox import datasets
17 | from sklearn.ensemble import RandomForestClassifier
18 | 
19 | ##############################################################################
20 | # Load HistoricalMap dataset
21 | # -------------------------------------------
22 | 
23 | X,y = datasets.load_historical_data(low_res=True,return_X_y=True)
24 | 
25 | ##############################################################################
26 | # Create CV
27 | # -------------------------------------------
28 | RSKF = RandomStratifiedKFold(n_splits=2,
29 |                 random_state=12,verbose=False)
30 | 
31 | ##############################################################################
32 | # Initialize Random-Forest
33 | # ---------------------------
34 | 
35 | classifier = RandomForestClassifier()
36 | 
37 | ##############################################################################
38 | # Start learning
39 | # ---------------------------
40 | 
41 | 
42 | SL = SuperLearner(classifier=classifier,param_grid=dict(n_estimators=[10,100]))
43 | SL.fit(X,y,cv=RSKF)
44 | 
45 | ##############################################################################
46 | # Get kappa from each fold
47 | # ---------------------------
48 |   
49 | for stats in SL.get_stats_from_cv(confusion_matrix=False,kappa=True):
50 |     print(stats['kappa'])
51 | 
52 | ##############################################################################
53 | # Get each confusion matrix from folds
54 | # -----------------------------------------------
55 | cms = []
56 | for stats in SL.get_stats_from_cv(confusion_matrix=True):
57 |     cms.append(stats['confusion_matrix'])
58 |     print(stats['confusion_matrix'])
59 |     
60 | ##############################################################################
61 | # Plot confusion matrix
62 | # -----------------------------------------------
63 |     
64 | import numpy as np
65 | 
66 | # a bug in Sphinx doesn't show the whole plot, sorry.
67 | 
68 | labels = ['Forest','Agriculture','Bare soil','Water','Building']
69 | from matplotlib.pyplot import cm as colorMap
70 | meanCM = np.mean(cms,axis=0).astype(np.int16)
71 | pltCM = PlotConfusionMatrix(meanCM.T) # Translate for Y = prediction and X = truth
72 | pltCM.add_text()
73 | pltCM.add_x_labels(labels,rotation=90)
74 | pltCM.add_y_labels(labels)
75 | pltCM.color_diagonal(diag_color=colorMap.Purples,matrix_color=colorMap.Reds)
76 | pltCM.add_accuracy()
77 | pltCM.add_f1()


--------------------------------------------------------------------------------
/examples/charts/plotConfusionF1.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Plot confusion matrix from Cross-Validation with F1
 4 | ========================================================
 5 | 
 6 | Plot confusion matrix from Cross-Validation, with F1 as subplot.
 7 | 
 8 | """
 9 | 
10 | ##############################################################################
11 | # Import librairies
12 | # -------------------------------------------
13 | from museotoolbox.ai import SuperLearner
14 | from museotoolbox.cross_validation import RandomStratifiedKFold
15 | from museotoolbox.charts import PlotConfusionMatrix
16 | from museotoolbox import datasets
17 | from sklearn.ensemble import RandomForestClassifier
18 | 
19 | ##############################################################################
20 | # Load HistoricalMap dataset
21 | # -------------------------------------------
22 | 
23 | X,y = datasets.load_historical_data(low_res=True,return_X_y=True)
24 | field = 'Class'
25 | ##############################################################################
26 | # Create CV
27 | # -------------------------------------------
28 | RSKF = RandomStratifiedKFold(n_splits=2,
29 |                 random_state=12,verbose=False)
30 | 
31 | ##############################################################################
32 | # Initialize Random-Forest
33 | # ---------------------------
34 | 
35 | classifier = RandomForestClassifier()
36 | 
37 | ##############################################################################
38 | # Start learning
39 | # ---------------------------
40 | 
41 | 
42 | SL = SuperLearner(classifier=classifier,param_grid=dict(n_estimators=[10,50]))
43 | SL.fit(X,y,cv=RSKF)
44 | 
45 | ##############################################################################
46 | # Get kappa from each fold
47 | # ---------------------------
48 |   
49 | for stats in SL.get_stats_from_cv(confusion_matrix=False,kappa=True):
50 |     print(stats['kappa'])
51 | 
52 | ##############################################################################
53 | # Get each confusion matrix from folds
54 | # -----------------------------------------------
55 | cms = []
56 | for stats in SL.get_stats_from_cv(confusion_matrix=True):
57 |     cms.append(stats['confusion_matrix'])
58 |     print(stats['confusion_matrix'])
59 |     
60 | ##############################################################################
61 | # Plot confusion matrix
62 | # -----------------------------------------------
63 |     
64 | import numpy as np
65 | meanCM = np.mean(cms,axis=0).astype(np.int16)
66 | pltCM = PlotConfusionMatrix(meanCM.T) # Translate for Y = prediction and X = truth
67 | pltCM.add_text()
68 | pltCM.add_f1()
69 | pltCM.color_diagonal()


--------------------------------------------------------------------------------
/examples/cross_validation/LeaveOneOutPerClass.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Leave One Out Per Class (LOOPC)
  4 | ======================================================
  5 | 
  6 | This example shows how to make a Leave One Out for each class.
  7 | 
  8 | """
  9 | 
 10 | ##############################################################################
 11 | # Import librairies
 12 | # -------------------------------------------
 13 | 
 14 | from museotoolbox.cross_validation import LeaveOneOut
 15 | from museotoolbox import datasets
 16 | 
 17 | ##############################################################################
 18 | # Load HistoricalMap dataset
 19 | # -------------------------------------------
 20 | 
 21 | X,y = datasets.load_historical_data(return_X_y=True,low_res=True)
 22 | 
 23 | ##############################################################################
 24 | # Create CV
 25 | # -------------------------------------------
 26 | LOOPC = LeaveOneOut(random_state=8,verbose=False)
 27 | for tr,vl in LOOPC.split(X=None,y=y):
 28 |     print(tr,vl)
 29 | 
 30 | ###############################################################################
 31 | # .. note::
 32 | #    Split is made to generate each fold
 33 | 
 34 | # Show label
 35 | 
 36 | for tr,vl in LOOPC.split(X=None,y=y):
 37 |     print(y[vl])
 38 |     
 39 | ###############################################################################
 40 | #    Save each train/valid fold in a file
 41 | # -------------------------------------------
 42 | # In order to translate polygons into points (each points is a pixel in the raster)
 43 | # we use sampleExtraction from vector_tools to generate a temporary vector.
 44 | 
 45 | trvl = LOOPC.save_to_vector(datasets.load_historical_data()[1],'Class',out_vector='/tmp/LOO.gpkg')
 46 | for tr,vl in trvl:
 47 |     print(tr,vl)
 48 | 
 49 |  
 50 | ###############################################################################
 51 | #    Plot example on how a polygon was splitted
 52 | 
 53 | import ogr
 54 | import numpy as np    
 55 | from matplotlib import pyplot as plt
 56 | import matplotlib.path as mpath
 57 | import matplotlib.patches as mpatches
 58 | 
 59 | # Prepare figure
 60 | plt.ioff()
 61 | ax=plt.subplot(1,1,1)
 62 | ax = plt.gca()
 63 | 
 64 | 
 65 | xBounds,yBounds=[[],[]]
 66 | 
 67 | for idx,vector in enumerate([tr,vl]):
 68 |     # Read all features in layer and store as paths    
 69 |     ds = ogr.Open(vector)
 70 |     lyr = ds.GetLayer(0)
 71 |     
 72 |     for feat in lyr:
 73 |         paths = []
 74 |         codes = []
 75 |         all_x = []
 76 |         all_y = []
 77 |         
 78 |         for geom in feat.GetGeometryRef():
 79 |             x = [geom.GetX(j) for j in range(geom.GetPointCount())]
 80 |             y = [geom.GetY(j) for j in range(geom.GetPointCount())]
 81 |             print(y)
 82 |             codes += [mpath.Path.MOVETO] + \
 83 |                              (len(x)-1)*[mpath.Path.LINETO]
 84 |             all_x += x
 85 |             all_y += y
 86 |         path = mpath.Path(np.column_stack((all_x,all_y)), codes)
 87 |         paths.append(path)
 88 |                 
 89 |         # Add paths as patches to axes
 90 |         for path in paths:
 91 |             if idx==0:
 92 |                 ax.add_patch(mpatches.PathPatch(path,color='C0'))
 93 |             else:
 94 |                 ax.add_patch(mpatches.PathPatch(path,color='C1'))
 95 |                 
 96 |         xBounds.append([np.min(all_x),np.max(all_x)])
 97 |         yBounds.append([np.min(all_y),np.max(all_y)])
 98 |        
 99 | 
100 | ax.set_xlim(np.min(np.array(xBounds)[:,0]),np.max(np.array(xBounds)[:,1]))
101 | ax.set_ylim(np.min(np.array(yBounds)[:,0]),np.max(np.array(yBounds)[:,1]))
102 | 
103 | 
104 | legend = [mpatches.Patch(color='C0', label='Train'),mpatches.Patch(color='C1', label='Valid')]
105 | plt.legend(handles=legend)
106 | 
107 | plt.show()
108 | 


--------------------------------------------------------------------------------
/examples/cross_validation/LeaveOneSubGroupOut.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Leave-One-SubGroup-Out (LOSGO)
 4 | ======================================================
 5 | 
 6 | This example shows how to make a Leave-One-SubGroup-Out.
 7 | 
 8 | """
 9 | 
10 | ##############################################################################
11 | # Import librairies
12 | # -------------------------------------------
13 | 
14 | from museotoolbox.cross_validation import LeaveOneSubGroupOut
15 | from museotoolbox.processing import extract_ROI
16 | from museotoolbox import datasets
17 | 
18 | ##############################################################################
19 | # Load HistoricalMap dataset
20 | # -------------------------------------------
21 | 
22 | raster,vector = datasets.load_historical_data()
23 | field = 'Class'
24 | group = 'uniquefid'
25 | X,y,s = extract_ROI(raster,vector,field,group)
26 | 
27 | ##############################################################################
28 | # Create CV
29 | # -------------------------------------------
30 | # if n_splits is False (default), the number of splits will be the smallest
31 | # number of subgroup of all labels.
32 | 
33 | valid_size = 0.5 # Means 50%
34 | LOSGO = LeaveOneSubGroupOut(verbose=False,random_state=12) # 
35 | 
36 | ###############################################################################
37 | # .. note::
38 | #    Split is made to generate each fold
39 | 
40 | LOSGO.get_n_splits(X,y,s)
41 | for tr,vl in LOSGO.split(X,y,s):
42 |     print(tr.shape,vl.shape)
43 | 
44 | ###############################################################################
45 | # Differences with sklearn
46 | # ------------------------------------------- 
47 | # Sklearn do not use subgroups (only groups), so no hierarchical dependances.
48 |     
49 | from sklearn.model_selection import LeaveOneGroupOut
50 | LOGO = LeaveOneGroupOut()
51 | for tr,vl in LOGO.split(X=X,y=y,groups=s):
52 |     print(tr.shape,vl.shape)
53 | 
54 | ###############################################################################
55 | # Plot example
56 | from __drawCVmethods import plotMethod
57 | plotMethod('LOO-group')


--------------------------------------------------------------------------------
/examples/cross_validation/LeaveOneSubGroupOutAndSaveVector.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Generate a cross-validation and/or save each fold to a vector file
 4 | ===================================================================
 5 | 
 6 | This example shows how to make a Leave-One-SubGroup-Out and save
 7 | each fold as a vector file.
 8 | 
 9 | """
10 | 
11 | ##############################################################################
12 | # Import librairies
13 | # -------------------------------------------
14 | 
15 | from museotoolbox.cross_validation import LeaveOneSubGroupOut
16 | from museotoolbox.processing import extract_ROI
17 | from museotoolbox import datasets
18 | 
19 | ##############################################################################
20 | # Load HistoricalMap dataset
21 | # -------------------------------------------
22 | 
23 | raster,vector = datasets.load_historical_data(low_res=True)
24 | field = 'Class'
25 | group = 'uniquefid'
26 | X,y,s = extract_ROI(raster,vector,field,group)
27 | ##############################################################################
28 | # Create CV
29 | # -------------------------------------------
30 | 
31 | valid_size = 0.5 # Means 50%
32 | LOSGO = LeaveOneSubGroupOut(verbose=False,random_state=12)
33 | 
34 | ###############################################################################
35 | # .. note::
36 | #    Split is made to generate each fold
37 | 
38 | LOSGO.get_n_splits(X,y,s)
39 | for tr,vl in LOSGO.split(X,y,s):
40 |     print(tr.shape,vl.shape)
41 | 
42 | ###############################################################################
43 | #  Save each train/valid fold to a vector file (here in polygon type)
44 | #
45 | 
46 | vectorFiles = LOSGO.save_to_vector(vector,field,group=group,out_vector='/tmp/LOSGO.gpkg')
47 | 
48 | for tr,vl in vectorFiles:
49 |     print(tr,vl)
50 | 
51 | ###############################################################################
52 | #  The sampling can be different in vector point or polygon.
53 | #  So you can generate each centroid of a pixel that contains the polygon.
54 | # 
55 |     
56 | from museotoolbox.processing import sample_extraction
57 | vectorPointPerPixel = '/tmp/vectorCentroid.gpkg'
58 | sample_extraction(raster,vector,vectorPointPerPixel)
59 | 
60 | vectorFiles = LOSGO.save_to_vector(vectorPointPerPixel,field,group=group,out_vector='/tmp/LOSGO.gpkg')
61 | 
62 | for tr,vl in LOSGO.split(X,y,s):
63 |     print(tr.shape,vl.shape)


--------------------------------------------------------------------------------
/examples/cross_validation/LeavePSubGroupOut.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Leave-P-SubGroup-Out (LPSGO)
 4 | ======================================================
 5 | 
 6 | This example shows how to make a Leave-Percent-SubGroup-Out.
 7 | 
 8 | """
 9 | 
10 | ##############################################################################
11 | # Import librairies
12 | # -------------------------------------------
13 | 
14 | from museotoolbox.cross_validation import LeavePSubGroupOut
15 | from museotoolbox import datasets,processing
16 | import numpy as np
17 | 
18 | ##############################################################################
19 | # Load HistoricalMap dataset
20 | # -------------------------------------------
21 | 
22 | raster,vector = datasets.load_historical_data(low_res=True)
23 | field = 'Class'
24 | group = 'uniquefid'
25 | 
26 | ##############################################################################
27 | # Create CV
28 | # -------------------------------------------
29 | valid_size = 0.5 # Means 50%
30 | LPSGO = LeavePSubGroupOut(valid_size = 0.5,
31 |                           random_state=12,verbose=False)
32 |     
33 | ###############################################################################
34 | # Extract X,y and group.
35 | # -------------------------------------------
36 | 
37 | X,y,g= processing.extract_ROI(raster,vector,field,group)
38 | 
39 | ###############################################################################
40 | # .. note::
41 | #    Split is made to generate each fold
42 | 
43 | for tr,vl in LPSGO.split(X,y,g):
44 |     print(tr.shape,vl.shape)
45 | 
46 | print('y label with number of samples')
47 | print(np.unique(y[tr],return_counts=True))
48 | ##############################################################################
49 | # Differences with scikit-learn
50 | # -------------------------------------------
51 | from sklearn.model_selection import LeavePGroupsOut
52 | # You need to specify the number of groups
53 | 
54 | LPGO = LeavePGroupsOut(n_groups=2)
55 | for tr,vl in LPGO.split(X,y,g):
56 |     print(tr.shape,vl.shape)
57 | 
58 | ##############################################################################
59 | # With GroupShuffleSplit, won't keep the percentage per subgroup
60 | # This generate unbalanced classes
61 |     
62 | from sklearn.model_selection import GroupShuffleSplit
63 | GSS = GroupShuffleSplit(test_size=0.5,n_splits=2)
64 | for tr,vl in GSS.split(X,y,g):
65 |     print(tr.shape,vl.shape)
66 | 
67 | print('y label with number of samples')
68 | print(np.unique(y[tr],return_counts=True))
69 | 
70 | ###############################################################################
71 | # Plot example in image
72 | from __drawCVmethods import plotMethod
73 | plotMethod('SKF-group')


--------------------------------------------------------------------------------
/examples/cross_validation/README.txt:
--------------------------------------------------------------------------------
1 | .. _crossvalidation:
2 | 
3 | Cross-Validation
4 | ------------------------
5 | 
6 | Examples related to the :mod:`museotoolbox.cross_validation` module.
7 | 
8 | 


--------------------------------------------------------------------------------
/examples/cross_validation/RandomSampling50.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Stratified-K-Fold
 4 | ======================================================
 5 | 
 6 | This example shows how to make a Random Sampling with 
 7 | 50% for each class.
 8 | 
 9 | """
10 | 
11 | ##############################################################################
12 | # Import librairies
13 | # -------------------------------------------
14 | 
15 | from museotoolbox.cross_validation import RandomStratifiedKFold
16 | from museotoolbox import datasets,processing
17 | 
18 | ##############################################################################
19 | # Load HistoricalMap dataset
20 | # -------------------------------------------
21 | 
22 | raster,vector = datasets.load_historical_data(low_res=True)
23 | field = 'Class'
24 | y = processing.read_vector_values(vector,field)
25 | 
26 | ##############################################################################
27 | # Create CV
28 | # -------------------------------------------
29 | 
30 | SKF = RandomStratifiedKFold(n_splits=2,n_repeats=2,
31 |                 random_state=12,verbose=False)
32 | for tr,vl in SKF.split(X=None,y=y):
33 |     print(tr,vl)
34 | 
35 | ###############################################################################
36 | # .. note::
37 | #    Split is made to generate each fold
38 | 
39 | # Show label
40 | 
41 | for tr,vl in SKF.split(X=None,y=y):
42 |     print(y[tr],y[vl])
43 | 
44 | ##############################################################################
45 | # .. note::
46 | #    The first one is made with polygon only.
47 | #    When learning/predicting, all pixels will be taken in account
48 | #    TO generate a full X and y labels, extract samples from ROI
49 | 
50 | X,y=processing.extract_ROI(raster,vector,field)
51 | 
52 | for tr,vl in SKF.split(X,y):
53 |     print(tr,vl)
54 |     print(tr.shape,vl.shape)
55 |         
56 | ##########################
57 | # Plot example
58 | from __drawCVmethods import plotMethod
59 | plotMethod('SKF-pixel')


--------------------------------------------------------------------------------
/examples/cross_validation/SpatialLeaveAsideOut.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Spatial Leave-Aside-Out (SLAO)
 4 | ======================================================
 5 | 
 6 | This example shows how to make a Spatial Leave-Aside-Out.
 7 | 
 8 | See https://doi.org/10.1016/j.foreco.2013.07.059
 9 | 
10 | """
11 | 
12 | ##############################################################################
13 | # Import librairies
14 | # -------------------------------------------
15 | 
16 | from museotoolbox.cross_validation import SpatialLeaveAsideOut
17 | from museotoolbox import datasets,processing
18 | 
19 | ##############################################################################
20 | # Load HistoricalMap dataset
21 | # -------------------------------------------
22 | 
23 | raster,vector = datasets.load_historical_data()
24 | field = 'Class'
25 | X,y = processing.extract_ROI(raster,vector,field)
26 | distance_matrix = processing.get_distance_matrix(raster,vector)
27 | 
28 | ##############################################################################
29 | # Create CV
30 | # -------------------------------------------
31 | # n_splits will be the number  of the least populated class
32 | 
33 | SLOPO = SpatialLeaveAsideOut(valid_size=1/3,
34 |                              distance_matrix=distance_matrix,random_state=4)
35 | 
36 | print(SLOPO.get_n_splits(X,y))
37 | 
38 | ###############################################################################
39 | # .. note::
40 | #    Split is made to generate each fold
41 | 
42 | for tr,vl in SLOPO.split(X,y):
43 |     print(tr.shape,vl.shape)  
44 | 
45 | ###############################################################################
46 | #    Save each train/valid fold in a file
47 | # -------------------------------------------
48 | # In order to translate polygons into points (each points is a pixel in the raster)
49 | # we use sampleExtraction from vector_tools to generate a temporary vector.
50 | 
51 | processing.sample_extraction(raster,vector,out_vector='/tmp/pixels.gpkg',verbose=False)
52 | trvl = SLOPO.save_to_vector('/tmp/pixels.gpkg',field,out_vector='/tmp/SLOPO.gpkg')
53 | for tr,vl in trvl:
54 |     print(tr,vl)
55 |  
56 |     
57 | ###############################################################################
58 | #    Plot example on how a polygon was splitted
59 | 
60 | import ogr
61 | import numpy as np    
62 | from matplotlib import pyplot as plt
63 | # Read all features in layer and store as paths
64 | xyl= np.array([],dtype=float).reshape((-1,3))
65 | for idx,vector in enumerate([tr,vl]):
66 |     ds = ogr.Open(vector)
67 |     lyr = ds.GetLayer(0)
68 |     lyr.SetAttributeFilter ( "uniquefid=17" ) # select a specific group
69 |     for feat in lyr:
70 |         geom = feat.GetGeometryRef()
71 |         xyl = np.vstack((xyl,np.asarray((geom.GetX(),geom.GetY(),idx))))
72 |     
73 | trPoints = xyl[xyl[:,2]==0][:,:2]
74 | vlPoints = xyl[xyl[:,2]==1][:,:2]
75 | plt.scatter(trPoints[:,0],trPoints[:,1],label='train',color='C0')
76 | plt.scatter(vlPoints[:,0],vlPoints[:,1],label='valid',color='C1')
77 | plt.legend()
78 | plt.show()


--------------------------------------------------------------------------------
/examples/cross_validation/SpatialLeaveOnePixelOut.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Spatial Leave-One-Out (SLOO)
 4 | ======================================================
 5 | 
 6 | This example shows how to make a Spatial Leave-One-Out called here
 7 | a Spatial Leave-One-Pixel-Out.
 8 | 
 9 | For more information see : https://onlinelibrary.wiley.com/doi/full/10.1111/geb.12161.
10 | 
11 | """
12 | 
13 | ##############################################################################
14 | # Import librairies
15 | # -------------------------------------------
16 | 
17 | from museotoolbox.cross_validation import SpatialLeaveOneOut
18 | from museotoolbox import datasets,processing
19 | ##############################################################################
20 | # Load HistoricalMap dataset
21 | # -------------------------------------------
22 | 
23 | raster,vector = datasets.load_historical_data(low_res=True)
24 | field = 'Class'
25 | X,y = processing.extract_ROI(raster,vector,field)
26 | distance_matrix = processing.get_distance_matrix(raster,vector)
27 | 
28 | ##############################################################################
29 | # Create CV
30 | # -------------------------------------------
31 | # n_splits will be the number  of the least populated class
32 | 
33 | SLOO = SpatialLeaveOneOut(distance_thresold=100,distance_matrix=distance_matrix,
34 |                                 random_state=12)
35 | ###############################################################################
36 | # .. note::
37 | #    Split is made to generate each fold
38 | SLOO.get_n_splits(X,y)
39 | for tr,vl in SLOO.split(X,y):
40 |     print(tr.shape,vl.shape)
41 | 
42 | ####################################################
43 | # Save each train/valid in a spatial vector file
44 | from museotoolbox.processing import sample_extraction
45 | sample_extraction(raster,vector,'/tmp/one_point_per_pixel.gpkg')
46 | files = SLOO.save_to_vector('/tmp/one_point_per_pixel.gpkg','Class',out_vector='/tmp/trvl.gpkg')
47 | print(files)
48 | #############################################
49 | # Draw image
50 | from __drawCVmethods import plotMethod
51 | plotMethod('SLOO-pixel')
52 | 


--------------------------------------------------------------------------------
/examples/cross_validation/SpatialLeaveOneSubGroupOut.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Spatial Leave-One-SubGroup-Out (SLOSGO)
 4 | ======================================================
 5 | 
 6 | This example shows how to make a Spatial Leave-One-SubGroup-Out.
 7 | 
 8 | """
 9 | 
10 | ##############################################################################
11 | # Import librairies
12 | # -------------------------------------------
13 | import numpy as np
14 | from museotoolbox.cross_validation import SpatialLeaveOneSubGroupOut
15 | from museotoolbox import datasets,processing
16 | ##############################################################################
17 | # Load HistoricalMap dataset
18 | # -------------------------------------------
19 | 
20 | _,centroid = datasets.load_historical_data(low_res=True,centroid=True)
21 | raster,vector = datasets.load_historical_data(low_res=True)
22 | 
23 | field = 'Class'
24 | 
25 | ##############################################################################
26 | # Extract label ('Class' field) and groups ('uniquefid' field)
27 | # Compute distanceMatrix with centroid (one point per group)
28 | 
29 | X,y,groups = processing.extract_ROI(raster,vector,field,'uniquefid')
30 | distance_matrix,distance_label = processing.get_distance_matrix(raster,centroid,'uniquefid')
31 | 
32 | ##############################################################################
33 | # Create CV
34 | # -------------------------------------------
35 | # n_splits will be the number  of the least populated class
36 | 
37 | SLOSGO = SpatialLeaveOneSubGroupOut(distance_thresold=100,distance_matrix=distance_matrix,
38 |                                    distance_label=distance_label,random_state=12)
39 | 
40 | 
41 | ###############################################################################
42 | # .. note::
43 | #    Split is made to generate each fold
44 | SLOSGO.get_n_splits(X,y,groups)
45 | for tr,vl in SLOSGO.split(X,y,groups):
46 |     print(np.unique(groups[vl]))
47 |     print(np.unique(groups[tr]))
48 |     
49 | SLOSGO.save_to_vector(vector,'Class','uniquefid','/tmp/slosgo.gpkg')
50 | #############################################
51 | # Draw image
52 | from __drawCVmethods import plotMethod
53 | plotMethod('SLOO-group')


--------------------------------------------------------------------------------
/examples/cross_validation/__drawCVmethods.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sat Mar  2 10:41:59 2019
  5 | 
  6 | @author: nicolas
  7 | """
  8 | import numpy as np
  9 | from matplotlib import pyplot as plt
 10 | 
 11 | def plotMethod(cvType='SKF-pixel'):
 12 |     nSamples = 30
 13 |     alpha_unused = 0.1
 14 |     trainColor='C0'
 15 |     validColor='C1'
 16 |     points = np.array([]).reshape(-1,3)
 17 |     distanceBuffer=200
 18 |     bufferSize=48000
 19 |     #random
 20 |     np.random.seed(10)
 21 |     def multiplyBy100AndToInt(x):
 22 |         x*=100
 23 |         x=x.astype(int)
 24 |         return x
 25 |     def initFrame(lim=200,size=4):
 26 |         f=plt.figure(figsize=(size,size))
 27 |         plt.xlim(-lim,lim)
 28 |         plt.ylim(-lim,lim)
 29 |         plt.xticks([])
 30 |         plt.yticks([])
 31 |         return f
 32 |         
 33 |     def drawFrame(title,train,valid,unused=False,buffer=False,show=True):
 34 |         f=initFrame()
 35 |         ax = f.add_subplot(111)
 36 |     
 37 |         """
 38 |         if title:
 39 |             plt.title(title)
 40 |         """
 41 |         ax.scatter(train[0],train[1],c=trainColor,marker='.',s=100,label='Train')
 42 |         ax.scatter(valid[0],valid[1],c=validColor,marker='x',label='Valid')
 43 |         ax.legend()
 44 |         if unused is not False:
 45 |             ax.scatter(unused[0],unused[1],c='grey',marker='.',s=100,alpha=alpha_unused,label='Unused references')
 46 |         if buffer is not False:
 47 |             ax.scatter(valid[0],valid[1],facecolor='none',edgecolor='red',marker='o',s=bufferSize)
 48 |         
 49 |         if show:
 50 |             plt.show()
 51 |         return f
 52 |     
 53 |     #### Generate 4 stands
 54 |             
 55 |     X = np.random.vonmises(5,50,nSamples)
 56 |     Y = np.random.vonmises(5,10,nSamples)
 57 |     X = multiplyBy100AndToInt(X)
 58 |     Y = multiplyBy100AndToInt(Y)
 59 |     
 60 |     label = np.ones(Y.shape)
 61 |     points = np.concatenate((points,np.vstack((X,Y,label)).T))
 62 |     # plt.scatter(X,Y)
 63 |     
 64 |     
 65 |     X = np.random.vonmises(0.3,7,nSamples)-1
 66 |     Y = np.random.vonmises(0,10,nSamples)+1
 67 |     X = multiplyBy100AndToInt(X)
 68 |     Y = multiplyBy100AndToInt(Y)
 69 |     
 70 |     label += 1
 71 |     points = np.concatenate((points,np.vstack((X,Y,label)).T))
 72 |     
 73 |     # plt.scatter(X,Y)
 74 |     X = np.random.vonmises(1,8,nSamples)
 75 |     Y = np.random.vonmises(1,20,nSamples)
 76 |     X = multiplyBy100AndToInt(X)
 77 |     Y = multiplyBy100AndToInt(Y)
 78 |     label += 1
 79 |     points = np.concatenate((points,np.vstack((X,Y,label)).T))
 80 |     
 81 |     X = np.random.vonmises(100,15,nSamples)+1
 82 |     Y = np.random.vonmises(100,10,nSamples)-0.3
 83 |     X = multiplyBy100AndToInt(X)
 84 |     Y = multiplyBy100AndToInt(Y)
 85 |     label += 1
 86 |     points = np.concatenate((points,np.vstack((X,Y,label)).T))
 87 |       
 88 |     paths = []
 89 |     codes = []
 90 |     
 91 |     from scipy.spatial import ConvexHull
 92 |     import matplotlib.path as mpath
 93 |     
 94 |     codes += [mpath.Path.MOVETO] + \
 95 |                      (len(X)-1)*[mpath.Path.LINETO]
 96 |     
 97 |     for group in range(1,5):
 98 |         coords = points[np.where(points[:,2]==group)][:,:2]
 99 |         h=ConvexHull(coords).vertices
100 |     
101 |         path = mpath.Path(coords[h])
102 |         paths.append(path)
103 |             
104 |     randomPoints = np.random.permutation(points)
105 |     
106 |     if cvType == 'SKF-pixel':
107 |         f=drawFrame('Stratified-K-Fold pixel-based',[randomPoints[:,0][:75],randomPoints[:,1][:75]],[randomPoints[:,0][75:],randomPoints[:,1][75:]])
108 |         plt.show()
109 |     else:
110 |         train = np.where(np.in1d(points[:,2],np.array([2,4])))
111 |         valid = np.where(np.in1d(points[:,2],np.array([2,4]),invert=True))
112 |     
113 |         if cvType == 'SKF-group':
114 |             f=drawFrame('Stratified-K-Fold group-based',[points[valid][:,0],points[valid][:,1]],[points[train][:,0],points[train][:,1]])
115 |             plt.show()
116 | 
117 |         else:
118 |             valid = randomPoints[0]
119 |             train = randomPoints[1:]
120 |             if cvType == 'LOO-pixel':
121 |                 f=drawFrame('Leave-One-Out pixel-based',[train[:,0],train[:,1]],valid)
122 |                 
123 |             if cvType == 'LOO-group':
124 |                 valid = np.where(np.in1d(points[:,2],np.array([4])))
125 |                 train = np.where(np.in1d(points[:,2],np.array([4]),invert=True))
126 |                 f=drawFrame('Leave-One-Out group-based',[points[train][:,0],points[train][:,1]],[points[valid][:,0],points[valid][:,1]])
127 | #                pp.savefig(f,bbox_inches='tight')
128 |                 # SLOO-pixel
129 |             if cvType == 'SLOO-pixel':
130 |                 from scipy.spatial import distance
131 |                 distance=distance.cdist(randomPoints[:,:2],randomPoints[:,:2])
132 |                 
133 |                 valid = randomPoints[0]
134 |                 train_nospatial = randomPoints[1:]
135 |                 train = randomPoints[np.where(distance[0,:]>distanceBuffer)[0]]
136 |                 f=drawFrame('Spatial Leave-One-Out pixel-based',[train[:,0],train[:,1]],valid,[train_nospatial[:,0],train_nospatial[:,1]],buffer=True,show=False)
137 |                 plt.text(-70,0,'Distance buffer\nfrom validation pixel',fontsize=12)
138 |                 #lt.scatter(train_nospatial[:,0],train_nospatial[:,1],c='grey',marker='.',s=100,alpha=alpha_unused)
139 |                 plt.show()
140 |             
141 |             # 
142 |             if cvType == 'SLOO-group':
143 |                 valid = np.where(np.in1d(points[:,2],np.array([4])))
144 |                 train_nospatial = np.where(np.in1d(points[:,2],np.array([4]),invert=True))
145 |                 
146 |                 train = np.where(np.in1d(points[:,2],np.array([2])))
147 |                 
148 |                 train=[points[train][:,0],points[train][:,1]]
149 |                 valid=[points[valid][:,0],points[valid][:,1]]
150 |                 unused=points[train_nospatial][:,0],points[train_nospatial][:,1]
151 |                 f=drawFrame('Spatial Leave-One-Out group-based',train,valid,unused,show=False)
152 |                 
153 |                 centroid = np.asarray([[np.mean(points[:,0][np.where(points[:,2]==stand)]),np.mean(points[:,1][np.where(points[:,2]==stand)])] for stand in range(1,5)])
154 |                 plt.scatter(centroid[:,0],centroid[:,1],color='black',s=60,alpha=0.8,label='Centroid')
155 |                 plt.scatter(centroid[:,0][3],centroid[:,1][3],facecolor='none',edgecolor='red',marker='o',s=bufferSize)
156 |                 plt.text(-90,-10,'Distance buffer\nfrom centroid',fontsize=12)
157 |                 plt.show()
158 |             


--------------------------------------------------------------------------------
/examples/cross_validation/train_test_split.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Train test split with every kind of cross-validation
 5 | ======================================================
 6 | 
 7 | This example shows how to split between test and train according to
 8 | every cross-validation method.
 9 | """
10 | 
11 | ##############################################################################
12 | # Import librairies
13 | # -------------------------------------------
14 | 
15 | import numpy as np
16 | import museotoolbox as mtb
17 | 
18 | ##############################################################################
19 | # Generate random dataset
20 | # -------------------------------------------
21 | 
22 | np.random.seed(42)
23 | y = np.random.randint(1,3,10)
24 | X = np.random.randint(1,255,[10,3],dtype=np.uint8)
25 | 
26 | ##############################################################################
27 | # Split train/test 
28 | # -----------------------------------------------------------------------
29 | # Using :mod:`museotoolbox.cross_validation.LeaveOneOut`
30 | 
31 | cv = mtb.cross_validation.LeaveOneOut(random_state=42)
32 | 
33 | X_train, X_test, y_train, y_test = mtb.cross_validation.train_test_split(cv,X,y)
34 | 
35 | ##############################################################################
36 | # Split train/test with groups
37 | # -------------------------------------------
38 | # Generate group
39 | 
40 | groups = np.array([1, 1, 2, 3, 4, 2, 1, 1, 2, 3],dtype=int)
41 | 
42 | ##################################################################
43 | # Using :mod:`museotoolbox.cross_validation.LeaveOneSubGroupOut`
44 | 
45 | cv = mtb.cross_validation.LeaveOneSubGroupOut(random_state=42)
46 | 
47 | X_train, X_test, y_train, y_test, g_train, g_test = mtb.cross_validation.train_test_split(cv,X,y,groups=groups)


--------------------------------------------------------------------------------
/examples/processing/README.txt:
--------------------------------------------------------------------------------
1 | .. _processing:
2 | 
3 | Processing
4 | ------------------------
5 | 
6 | Examples related to the dedicated raster and vector :mod:`museotoolbox.processing` module.
7 | 


--------------------------------------------------------------------------------
/examples/processing/copyRasterInVectorFields.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Copy raster values in vector fields then read vector
 4 | ======================================================
 5 | 
 6 | This example shows how to extract from polygons or points
 7 | each pixel centroid located in the vector (polygons/points)
 8 | and how to extract and save band values in vector fields.
 9 |  
10 | This tool is made to avoid using raster everytime you need
11 | to learn and predict a model."""
12 | 
13 | ##############################################################################
14 | # Import librairies
15 | # -------------------------------------------
16 | 
17 | import museotoolbox as mtb
18 | from matplotlib import pyplot as plt
19 | 
20 | ##############################################################################
21 | # Load HistoricalMap dataset
22 | # -------------------------------------------
23 | 
24 | raster,vector = mtb.datasets.load_historical_data(low_res=True)
25 | out_vector='/tmp/vector_withROI.gpkg'
26 | 
27 | 
28 | ###############################################################################
29 | # .. note::
30 | #    There is no need to specify a bandPrefix. 
31 | #    If bandPrefix is not specified, scipt will only generate the centroid
32 | 
33 | mtb.processing.sample_extraction(raster,vector,
34 |                                  out_vector=out_vector,
35 |                                  unique_fid='uniquefid',
36 |                                  band_prefix='band_',
37 |                                  verbose=False)
38 | 
39 | 
40 | #############################################
41 | # Read values from both vectors
42 | 
43 | originalY = mtb.processing.read_vector_values(vector,'Class')
44 | X,y = mtb.processing.read_vector_values(out_vector,'Class',band_prefix='band_')
45 | 
46 | #############################################
47 | # Original vector is polygon type, each polygons contains multiple pixel
48 | 
49 | print(originalY.shape)
50 | 
51 | #############################################
52 | # Number of Y in the new vector is the total number of pixel in the polygons
53 | 
54 | print(y.shape)
55 | 
56 | #############################################
57 | # X has the same size of Y, but in 3 dimensions because our raster has 3 bands
58 | 
59 | print(X.shape)
60 | print(X[410:420,:])
61 | print(y[410:420])
62 | 
63 | #############################################
64 | # Plot blue and red band
65 | 
66 | plt.figure(1)
67 | colors =  [int(i % 23) for i in y]
68 | plt.scatter(X[:,0],X[:,2],c=colors,alpha=.8)
69 | plt.show()
70 | 
71 | 


--------------------------------------------------------------------------------
/examples/processing/extractRasterValues.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Extract raster values from vector file
 4 | ===============================================================
 5 | 
 6 | Easily extract raster values from vector files (polygon/point)
 7 | """
 8 | 
 9 | ##############################################################################
10 | # Import librairies
11 | # -------------------------------------------
12 | 
13 | import museotoolbox as mtb
14 | import numpy as np
15 | ##############################################################################
16 | # Load HistoricalMap dataset
17 | # -------------------------------------------
18 | 
19 | raster,vector = mtb.datasets.load_historical_data() 
20 | 
21 | ##############################################################################
22 | # Extract raster values with no vector information
23 | # -------------------------------------------------
24 | 
25 | X = mtb.processing.extract_ROI(raster,vector)
26 | 
27 | print("Vector contains {} pixels".format(X.shape[0]))
28 | print("Raster contains {} bands per pixel".format(X.shape[1]))
29 | 
30 | ##########################
31 | # Let's suppose you want konw to extract the label of each polygon/point
32 | 
33 | X,y = mtb.processing.extract_ROI(raster,vector,'class')
34 | uniqueLabels = np.unique(y,return_counts=True)
35 | 
36 | for label,count in zip(*uniqueLabels):
37 |     print('label {} has {} samples'.format(label,count))
38 |     
39 | ####################
40 | # You can put as many fields as you want, except fields of string type
41 | 
42 | X,y,g = mtb.processing.extract_ROI(raster,vector,'class','uniquefid')
43 | print('There are a total of {} groups'.format(np.unique(g).size))


--------------------------------------------------------------------------------
/examples/processing/modalClass.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Modal class and number of agreements
 4 | ===============================================================
 5 | 
 6 | Create a raster with the modal class and the number of agreements.
 7 | 
 8 | """
 9 | 
10 | ##############################################################################
11 | # Import librairies
12 | # -------------------------------------------
13 | 
14 | import museotoolbox as mtb
15 | from scipy.stats import mode
16 | import numpy as np
17 | ##############################################################################
18 | # Load HistoricalMap dataset
19 | # -------------------------------------------
20 | 
21 | raster,vector = mtb.datasets.load_historical_data(low_res=True)
22 | 
23 | ##############################################################################
24 | # Initialize rasterMath with raster
25 | # -----------------------------------------
26 | 
27 | ########
28 | # In case you want to add a mask
29 | mask = '/tmp/maskFromPolygons.tif'
30 | 
31 | mtb.processing.image_mask_from_vector(vector,raster,out_image = mask)
32 | 
33 | rM = mtb.processing.RasterMath(raster,in_image_mask=mask)
34 | 
35 | print(rM.get_random_block())
36 | ##########################
37 | # Let's suppose you want compute the modal classification between several predictions
38 | # The first band will be the most predicted class, and the second the number of times it has been predicted.
39 | 
40 | 
41 | x = rM.get_random_block()
42 | 
43 | def modal_class(x):    
44 |     tmpStack = np.column_stack(mode(x,axis=1)).astype(np.int16)
45 |     return tmpStack
46 | 
47 | modal_class(x)
48 | 
49 | rM.add_function(modal_class,out_image='/tmp/modal.tif',out_nodata=0)
50 | 
51 | #####################
52 | # Run the script
53 | 
54 | rM.run()
55 | 
56 | #######################
57 | # Plot result
58 | 
59 | from osgeo import gdal
60 | from matplotlib import pyplot as plt 
61 | 
62 | src = gdal.Open('/tmp/modal.tif')
63 | data = src.ReadAsArray()[0,:,:]
64 | data = np.where(data== 0,np.nan,data)
65 | plt.imshow(data)
66 | 


--------------------------------------------------------------------------------
/examples/processing/rasterMask.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Raster mask from vector
 4 | ===============================================================
 5 | 
 6 | Create a raster mask from vector.
 7 | 
 8 | """
 9 | 
10 | ##############################################################################
11 | # Import librairies
12 | # -------------------------------------------
13 | 
14 | from museotoolbox.processing import image_mask_from_vector
15 | from museotoolbox.datasets import load_historical_data
16 | 
17 | ##############################################################################
18 | # Load HistoricalMap dataset
19 | # -------------------------------------------
20 | 
21 | raster,vector = load_historical_data()
22 | 
23 | ##############################################################################
24 | # Rasterize vector
25 | # -----------------------------------------
26 | 
27 | 
28 | ##############################################################################
29 | # Default, invert is False, it means only polygons will be kept (the rest is set to nodata)
30 | image_mask_from_vector(vector,raster,'/tmp/mask.tif',invert=False)
31 | 
32 | ##############################################################################
33 | # If invert is set to True, it means polygons will be set to nodata
34 | image_mask_from_vector(vector,raster,'/tmp/maskInvert.tif',invert=True)
35 | 
36 | #######################
37 | # Plot result
38 | # -----------------------------------
39 | 
40 | from osgeo import gdal
41 | from matplotlib import pyplot as plt 
42 | 
43 | ##############################################
44 | # Default mask (invert=False)
45 | # -----------------------------------
46 | # white is nodata, black is 255
47 | src = gdal.Open('/tmp/mask.tif')
48 | plt.imshow(src.ReadAsArray(),cmap='Greys')
49 | 
50 | ##############################################
51 | # invert mask (invert=True)
52 | # -----------------------------------
53 | # white is nodata, black is 255
54 | 
55 | src = gdal.Open('/tmp/maskInvert.tif')
56 | plt.imshow(src.ReadAsArray(),cmap='Greys')


--------------------------------------------------------------------------------
/examples/processing/rasterMath.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Basics to use rasterMath
 4 | ===============================================================
 5 | 
 6 | Compute substract and addition between two raster bands.
 7 | 
 8 | """
 9 | 
10 | ##############################################################################
11 | # Import librairies
12 | # -------------------------------------------
13 | 
14 | from museotoolbox.processing import RasterMath
15 | from museotoolbox import datasets
16 | import numpy as np
17 | ##############################################################################
18 | # Load HistoricalMap dataset
19 | # -------------------------------------------
20 | 
21 | raster,vector = datasets.load_historical_data()
22 | 
23 | ##############################################################################
24 | # Initialize rasterMath with raster
25 | # ------------------------------------
26 | 
27 | rM = RasterMath(raster)
28 | 
29 | print(rM.get_random_block())
30 | 
31 | ##########################
32 | # Let's suppose you want compute the difference between blue and green band.
33 | # I suggest you to define type in numpy array to save space while creating the raster!
34 | 
35 | X = rM.get_random_block()
36 |  
37 | sub = lambda X : np.array((X[:,0]-X[:,1])).astype(np.int16) 
38 | 
39 | 
40 | rM.add_function(sub,out_image='/tmp/sub_lambda.tif')
41 | ###########################################################
42 | # Use a python function to use arguments
43 | # ----------------------------------------
44 | 
45 | def sub(X,band1=0,band2=1):
46 |     outX = np.array((X[:,band1]-X[:,band2])).astype(np.int16)
47 |     return outX
48 | 
49 | #################################################################
50 | # We can add keyword argument in the addFunction.
51 | # This function is going to substract band2 from band 1 
52 | import time
53 | t=time.time()
54 | rM = RasterMath(raster)
55 | rM.add_function(sub,out_image='/tmp/sub.tif',band1=1,band2=0,compress='high')
56 | 
57 | #####################
58 | # Run the script
59 | 
60 | rM.run()
61 | print(time.time()-t)
62 | #######################
63 | # Plot result
64 | 
65 | from osgeo import gdal
66 | from matplotlib import pyplot as plt 
67 | src = gdal.Open('/tmp/sub.tif')
68 | plt.imshow(src.ReadAsArray())
69 | 


--------------------------------------------------------------------------------
/examples/processing/rasterMathCustomBlock.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | rasterMath with custom window/block size (and with 3 dimensions)
  4 | =================================================================
  5 | 
  6 | Tips to use rasterMath by defining its block size and to receive
  7 | a full block (not a array with one pixel per row.)
  8 | 
  9 | """
 10 | 
 11 | ##############################################################################
 12 | # Import librairies
 13 | # -------------------------------------------
 14 | 
 15 | from museotoolbox.processing import RasterMath
 16 | from museotoolbox import datasets
 17 | from matplotlib import pyplot as plt
 18 | 
 19 | ##############################################################################
 20 | # Load HistoricalMap dataset
 21 | # -------------------------------------------
 22 | 
 23 | raster,vector = datasets.load_historical_data()
 24 | 
 25 | ##############################################################################
 26 | # Initialize rasterMath with raster
 27 | # ------------------------------------
 28 | 
 29 | # Set return3d to True to have full block size (not one pixel per row)
 30 | 
 31 | rM = RasterMath(raster,return_3d=True)
 32 | 
 33 | print(rM.get_random_block().shape)
 34 | 
 35 | ##############################################################################
 36 | # Comparing different block size (%, fixed, full block)
 37 | # -------------------------------------------------------
 38 | 
 39 | ####################### 
 40 | # You can define block by percentage of the whole width/height
 41 | 
 42 | rM.custom_block_size(1/2,1/2) 
 43 | print(rM.get_random_block().shape)
 44 | 
 45 | #######################
 46 | # Or by fixed window 
 47 | 
 48 | rM.custom_block_size(50,100) # width divided every 50 pixel and height every 100
 49 | print(rM.get_random_block().shape)
 50 | 
 51 | ########################
 52 | # To have the full image (one block)
 53 | 
 54 | rM.custom_block_size(-1,-1) # to have the full image
 55 | 
 56 | ########################
 57 | # To have block width divided by 4 and height by 2
 58 | 
 59 | rM.custom_block_size(1/4,1/2)
 60 | 
 61 | ##########################################
 62 | # Define block size for output raster
 63 | # -------------------------------------
 64 | 
 65 | raster_parameters = rM.get_raster_parameters()
 66 | 
 67 | print('Default parameters are '+str(raster_parameters))
 68 | 
 69 | 
 70 | # to do before adding the function
 71 | 
 72 | rM.custom_block_size(256,256) # custom for reading AND writing the output
 73 | #raster_parameters = ['COMPRESS=DEFLATE']
 74 | #rM.customRasterParameters(raster_parameters)
 75 | 
 76 | #####################################
 77 | # now add a function to just return the same raster
 78 | 
 79 | returnSameImage  = lambda x : x
 80 | rM.add_function(returnSameImage,'/tmp/testcustomblock.tif')
 81 | rM.run()
 82 | 
 83 | #####################
 84 | # check block size of new raster
 85 | 
 86 | rMblock = RasterMath('/tmp/testcustomblock.tif')
 87 | print(rMblock.block_sizes)
 88 | 
 89 | #######################
 90 | # Plot blocks
 91 | 
 92 | n_row,n_col = 2,4
 93 | rM.custom_block_size(1/n_col,1/n_row)
 94 | 
 95 | fig=plt.figure(figsize=(12,6),dpi=150)
 96 | 
 97 | for idx,tile in enumerate(rM.read_block_per_block()):
 98 |     fig.add_subplot(n_row,n_col,idx+1)
 99 |     plt.title('block %s' %(idx+1))
100 |     plt.imshow(tile)
101 | plt.show()
102 | 
103 | 


--------------------------------------------------------------------------------
/examples/processing/rasterMathCustomBlockAndMask.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | rasterMath with custom block size, mask, and in 3 dimensions
 4 | ===================================================================
 5 | 
 6 | Tips to use rasterMath by defining its block size and to receive
 7 | a full block (not a array with one pixel per row.)
 8 | 
 9 | Tips : A function readBlockPerBlock() yields each block, without saving results
10 | to a new raster.
11 | 
12 | """
13 | 
14 | ##############################################################################
15 | # Import librairies
16 | # -------------------------------------------
17 | 
18 | from museotoolbox.processing import RasterMath,image_mask_from_vector
19 | from museotoolbox import datasets
20 | from matplotlib import pyplot as plt
21 | import numpy as np
22 | ##############################################################################
23 | # Load HistoricalMap dataset
24 | # -------------------------------------------
25 | 
26 | raster,vector = datasets.load_historical_data()
27 | 
28 | ##############################################################################
29 | # Initialize rasterMath with raster
30 | # ------------------------------------
31 | 
32 | # Set return_3d to True to have full block size (not one pixel per row)
33 | # Create raster mask to only keep pixel inside polygons.
34 | 
35 | image_mask_from_vector(vector,raster,'/tmp/mask.tif',invert=False)
36 | 
37 | rM = RasterMath(raster,in_image_mask='/tmp/mask.tif',return_3d=True)
38 | #rM.addInputRaster('/tmp/mask.tif')
39 | print(rM.get_random_block().shape)
40 | 
41 | #######################
42 | # Plot blocks
43 | x = rM.get_random_block()
44 | 
45 | rM.add_function(np.mean,'/tmp/mean.tif',axis=2,out_np_dt=np.int16)
46 | 
47 | rM.run()
48 | 
49 | from osgeo import gdal
50 | dst = gdal.Open('/tmp/mean.tif')
51 | arr = dst.GetRasterBand(1).ReadAsArray()
52 | plt.imshow(np.ma.masked_where(arr == rM._outputs[0]['nodata'], arr))
53 | 


--------------------------------------------------------------------------------
/examples/processing/rasterMath_testBlockSize_3d_andNBands.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Using rasterMath with 3d block or 2d block
 4 | ==================================================================
 5 | 
 6 | Test notebook to validate code.
 7 | """
 8 | 
 9 | ##############################################################################
10 | # Import librairies
11 | # -------------------------------------------
12 | 
13 | from museotoolbox.processing import RasterMath,image_mask_from_vector
14 | from museotoolbox import datasets
15 | from matplotlib import pyplot as plt
16 | import numpy as np
17 | 
18 | 
19 | ##############################################################################
20 | # Load HistoricalMap dataset
21 | # -------------------------------------------
22 | 
23 | raster,vector = datasets.load_historical_data()
24 | 
25 | ##############################################################################
26 | # Initialize rasterMath with raster
27 | # ------------------------------------
28 | 
29 | # Set return_3d to True to have full block size (not one pixel per row)
30 | # Create raster mask to only keep pixel inside polygons.
31 | 
32 | image_mask_from_vector(vector,raster,'/tmp/mask.tif',invert=True)
33 | 
34 | import time
35 | t0=time.time()
36 | for return_3d in [True,False]:
37 | 
38 |     rM = RasterMath(raster,in_image_mask='/tmp/mask.tif',return_3d=return_3d)
39 |     
40 |     rM.custom_block_size(128,128) # block of  128x128
41 |     
42 |     x = rM.get_block()
43 |     
44 |     # Returns with only 1 dimension
45 |     returnFlatten = lambda x : x[...,0]
46 |     
47 |     # Returns 3x the original last dimension
48 |     addOneBand = lambda x : np.repeat(x,3,axis=x.ndim-1)
49 |     # Add functions to rasterMath
50 |     rM.add_function(addOneBand,'/tmp/x_repeat_{}.tif'.format(str(return_3d)))
51 |     rM.add_function(returnFlatten,'/tmp/x_flatten_{}.tif'.format(str(return_3d)))
52 |     
53 |     rM.run()
54 | print(time.time()-t0)
55 | 
56 | from osgeo import gdal
57 | dst = gdal.Open('/tmp/x_flatten_True.tif')
58 | arr = dst.GetRasterBand(1).ReadAsArray()
59 | plt.imshow(np.ma.masked_where(arr == np.min(arr), arr))
60 | 


--------------------------------------------------------------------------------
/examples/processing/rasterMath_twoRasters.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | rasterMath with several rasters as inputs
 4 | ===============================================================
 5 | 
 6 | Compute substract and addition between two raster bands.
 7 | """
 8 | 
 9 | ##############################################################################
10 | # Import librairies
11 | # -------------------------------------------
12 | 
13 | from museotoolbox.processing import RasterMath,image_mask_from_vector
14 | from museotoolbox import datasets
15 | import numpy as np
16 | ##############################################################################
17 | # Load HistoricalMap dataset
18 | # -------------------------------------------
19 | 
20 | raster,vector = datasets.load_historical_data()
21 | 
22 | ##############################################################################
23 | # Initialize rasterMath with raster
24 | # ------------------------------------
25 | 
26 | ##############################################################################
27 | # If invert is set to True, it means polygons will be set to nodata
28 | 
29 | image_mask_from_vector(vector,raster,'/tmp/mask.tif',invert=True)
30 | rM = RasterMath(in_image = raster,in_image_mask='/tmp/mask.tif',return_3d=True)
31 | rM.add_image(raster)
32 | 
33 | print('Number of rasters : '+str(len(rM.get_random_block())))
34 | 
35 | ##########################
36 | # Let's suppose you want compute the substraction between the blue and green band of two inputs
37 | # I suggest you to define type in numpy array to save space while creating the raster!
38 | 
39 | x = rM.get_block(0)
40 | 
41 | def sub(x):
42 |     firstBandOfFirstRaster = x[0][...,0]
43 |     thirdBandOfSecondRaster = x[1][...,2]
44 |     difference = np.subtract(firstBandOfFirstRaster,thirdBandOfSecondRaster)
45 |     return difference
46 | 
47 | rM.add_function(sub,out_image='/tmp/sub_2inputs.tif')
48 | 
49 | #####################
50 | # Run the script
51 | 
52 | #rM.run()
53 | rM.run()
54 | 
55 | #######################
56 | # Plot result
57 | 
58 | from osgeo import gdal
59 | from matplotlib import pyplot as plt 
60 | 
61 | src = gdal.Open('/tmp/sub_2inputs.tif')
62 | arr = src.ReadAsArray()
63 | arr = np.where(arr==0,np.nan,arr)
64 | plt.imshow(arr)
65 | 


--------------------------------------------------------------------------------
/examples/processing/readVectorFields.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Read fields from vector
 4 | ======================================================
 5 | 
 6 | This example shows how to read fields values from
 7 | a vector file.
 8 | """
 9 | 
10 | ##############################################################################
11 | # Import librairies
12 | # -------------------
13 | 
14 | import museotoolbox as mtb
15 | 
16 | ##############################################################################
17 | # Load HistoricalMap dataset
18 | # ----------------------------
19 | 
20 | raster,vector = mtb.datasets.load_historical_data(low_res=True)
21 | 
22 | ###############################################################################
23 | # .. note::
24 | #    If you have no memories on what the fields name are, simply put the vector path
25 | 
26 | try : 
27 |     mtb.processing.read_vector_values(vector)
28 | except Exception as e:
29 |     print(e)
30 | 
31 | #############################################
32 | # Read values from field 'Class'
33 | # --------------------------------
34 | 
35 | Y,Name = mtb.processing.read_vector_values(vector,'Class','Type')
36 | print(Y,Name)
37 | print(Y.shape)
38 | 
39 | #############################################
40 | # Read values from field beginning with 'C'
41 | # -------------------------------------------
42 | # As multiple fields can begin with C, function returns a column per field
43 | 
44 | C = mtb.processing.read_vector_values(vector,band_prefix='C')
45 | print(C)
46 | print(C.shape)
47 | 
48 | 
49 | #############################################
50 | 
51 | from matplotlib import pyplot as plt
52 | import numpy as np
53 | plt.title('Number of polygons per label')
54 | plt.bar(np.arange(np.unique(Y).size)+1,np.unique(Y,return_counts=True)[1])
55 | 


--------------------------------------------------------------------------------
/examples/stats/MoransI.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Compute Moran's I with different lags from raster
 4 | ===============================================================
 5 | 
 6 | Compute Moran's I with different lags, support mask.
 7 | 
 8 | """
 9 | 
10 | ##############################################################################
11 | # Import librairies
12 | # -------------------------------------------
13 | import numpy as np
14 | from museotoolbox.stats import Moran
15 | from matplotlib import pyplot as plt
16 | from osgeo import gdal,osr
17 | 
18 | ##############################################################################
19 | # Load HistoricalMap dataset
20 | # -------------------------------------------
21 | raster = '/tmp/autocorrelated_moran.tif'
22 | mask = '/tmp/mask.tif'
23 | 
24 | def create_false_image(array,path):
25 |     # from https://pcjericks.github.io/py-gdalogr-cookbook/raster_layers.html
26 |     driver = gdal.GetDriverByName('GTiff')
27 |     outRaster = driver.Create(path, array.shape[1], array.shape[0], 1, gdal.GDT_Byte)
28 |     outRaster.SetGeoTransform((0, 10, 0, 0, 0, 10))
29 |     outband = outRaster.GetRasterBand(1)
30 |     outband.WriteArray(array)
31 |     outRasterSRS = osr.SpatialReference()
32 |     outRasterSRS.ImportFromEPSG(4326)
33 |     outRaster.SetProjection(outRasterSRS.ExportToWkt())
34 |     outband.FlushCache()
35 | 
36 | # create autocorrelated tif
37 | x = np.zeros((100,100),dtype=int)
38 | # max autocorr
39 | x[:50,:] = 1
40 | create_false_image(x,raster)
41 | x_mask = np.random.randint(0,2,[100,100])
42 | create_false_image(x_mask,mask)
43 | 
44 | ################################
45 | # Random mask
46 | plt.title('Random mask')
47 | plt.imshow(x_mask,cmap='gray', aspect='equal',interpolation='none')
48 | 
49 | ################################
50 | # Spatially autocorrelated image
51 | plt.title('Highly autocorrelated image')
52 | plt.imshow(x,cmap='gray', aspect='equal',interpolation='none')
53 | 
54 | #####################################################
55 | # Compute Moran's I for lag 1 on autocorrelated image
56 | lags =  [1,3,5]
57 | 
58 | MoransI = Moran(raster,lag=lags,in_image_mask=mask)
59 | print(MoransI.scores)
60 | 
61 | ####################################################
62 | # Compute Moran's I for lag 1 on totally random image
63 | lags =  [1,3,5]
64 | 
65 | MoransI = Moran(mask,lag=lags)
66 | print(MoransI.scores)
67 | 
68 | #######################
69 | # Plot result
70 | # -----------------------------------
71 | from matplotlib import pyplot as plt 
72 | plt.title('Evolution of Moran\'s I')
73 | plt.plot(MoransI.scores['lag'],MoransI.scores['I'],'-o')
74 | plt.xlabel('Spatial lag')
75 | plt.xticks(lags)
76 | plt.ylabel('Moran\'s I')


--------------------------------------------------------------------------------
/examples/stats/README.txt:
--------------------------------------------------------------------------------
1 | .. _stats:
2 | 
3 | Stats
4 | ------------------------
5 | 
6 | Examples related to the :mod:`museotoolbox.stats` module.
7 | 


--------------------------------------------------------------------------------
/examples/stats/qualityIndexFromConfusionMatrix.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Compute quality index from confusion matrix
 4 | ===============================================================
 5 | 
 6 | Compute different quality index  (OA, Kappa and F1) directly
 7 | from confusion matrix.
 8 | 
 9 | """
10 | 
11 | ##############################################################################
12 | # Import librairies
13 | # -------------------------------------------
14 | import numpy as np
15 | from museotoolbox.stats import retrieve_y_from_confusion_matrix
16 | from museotoolbox.charts import PlotConfusionMatrix
17 | from sklearn.metrics import accuracy_score,cohen_kappa_score
18 | ##############################################################################
19 | # Create a random confusion matrix
20 | # -------------------------------------------
21 | 
22 | confusion_matrix = np.random.randint(1,30,size=[6,6])
23 | confusion_matrix[range(6),range(6)] += 40
24 | print('Total number of pixels : '+str(np.sum(confusion_matrix)))
25 | 
26 | PlotConfusionMatrix(confusion_matrix).add_text()
27 | 
28 | ##############################################################################
29 | # Generate index from the confusion matrix
30 | 
31 | yp,yt = retrieve_y_from_confusion_matrix(confusion_matrix)
32 | 
33 | ################################
34 | # show quality
35 | print('OA is : '+str(accuracy_score(yp,yt)))
36 | print('Kappa is : '+str(cohen_kappa_score(yp,yt)))


--------------------------------------------------------------------------------
/examples/stats/zonal_stats.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Compute quality index from confusion matrix
 4 | ===============================================================
 5 | 
 6 | Compute different quality index  (OA, Kappa and F1) directly
 7 | from confusion matrix.
 8 | 
 9 | """
10 | 
11 | ##############################################################################
12 | # Import librairies
13 | # -------------------------------------------
14 | import numpy as np
15 | from museotoolbox.stats import zonal_stats
16 | from museotoolbox.datasets import load_historical_data
17 | 
18 | ##############################################################################
19 | # Load dataset
20 | # -------------------------------------------
21 | 
22 | raster,vector = load_historical_data()
23 | 
24 | ##############################################################################
25 | # Compute mean and variance per polygon
26 | # ----------------------------------------------------
27 | mean,var = zonal_stats(raster,vector,'uniquefid',stats=['mean','var'])
28 | print(mean.shape)
29 | 
30 | #####################################
31 | # Show mean value
32 | print('For polygon 1 : ')
33 | for band_idx,band in enumerate(['blue','green','red']):
34 |     print('Mean value in {} band is : {}'.format(band,mean[0,band_idx]))
35 | 
36 | #####################################
37 | # Show variance value    
38 | print('For polygon 1 : ')
39 | for band_idx,band in enumerate(['blue','green','red']):
40 |     print('Variance value in {} band is : {}'.format(band,var[0,band_idx]))
41 |     
42 | ###############################################"
43 | # You can put in stats, every numpy function
44 | #
45 | # For example here : mean, median, max, min
46 |     
47 | mean,median,amax,amin = zonal_stats(raster,vector,'uniquefid',stats=['mean','median','max','min'])
48 | 
49 | print('For polygon 1 : ')
50 | for band_idx,band in enumerate(['blue','green','red']):
51 |     print('Min value in {} band is : {}'.format(band,amin[0,band_idx]))
52 |     print('Max value in {} band is : {}'.format(band,amax[0,band_idx]))
53 | 
54 | 


--------------------------------------------------------------------------------
/getVersion.py:
--------------------------------------------------------------------------------
1 | from museotoolbox import __version__
2 | print(__version__)
3 | 


--------------------------------------------------------------------------------
/metadata/RasterMath_schema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nkarasiak/MuseoToolBox/1b1705925ff5165b1310c4de22a0c0fa80959b8a/metadata/RasterMath_schema.png


--------------------------------------------------------------------------------
/metadata/__docs.py:
--------------------------------------------------------------------------------
 1 |     #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sun Nov 24 17:38:05 2019
 5 | 
 6 | @author: nicolas
 7 | 
 8 | in_image : string.
 9 |     A filename or path of a raster file.
10 |     It could be any file that GDAL can open.
11 | 
12 | in_vector : string.
13 |     A filename or path corresponding to a vector file.
14 |     It could be any file that GDAL/OGR can open.
15 |     
16 | out_image : string.
17 |     A geotiff extension filename corresponding to a raster image to create.
18 |     
19 | X : array-like, shape = [n_samples, n_features]
20 |     The training input samples.
21 | 
22 | y : array-like, shape = [n_samples]
23 |     The target values.
24 |     
25 | in_image_mask : str
26 |     A filename or path corresponding to a raster image.
27 |     0 values are considered as masked data.
28 | 
29 | out_image : str
30 |     A filename or path corresponding to a geotiff (.tif) raster image to save.
31 |     0 values are considered as masked data.
32 | 
33 | 
34 | """
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/metadata/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # =============================================================================
 4 | # ___  ___                       _____           _______
 5 | # |  \/  |                      |_   _|         | | ___ \
 6 | # | .  . |_   _ ___  ___  ___     | | ___   ___ | | |_/ / _____  __
 7 | # | |\/| | | | / __|/ _ \/ _ \    | |/ _ \ / _ \| | ___ \/ _ \ \/ /
 8 | # | |  | | |_| \__ \  __/ (_) |   | | (_) | (_) | | |_/ / (_) >  <
 9 | # \_|  |_/\__,_|___/\___|\___/    \_/\___/ \___/|_\____/ \___/_/\_\
10 | #
11 | # @author:  Nicolas Karasiak
12 | # @site:    www.karasiak.net
13 | # @git:     www.github.com/nkarasiak/MuseoToolBox
14 | # =============================================================================


--------------------------------------------------------------------------------
/metadata/museoToolBox_logo_1024.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nkarasiak/MuseoToolBox/1b1705925ff5165b1310c4de22a0c0fa80959b8a/metadata/museoToolBox_logo_1024.png


--------------------------------------------------------------------------------
/metadata/museoToolBox_logo_128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nkarasiak/MuseoToolBox/1b1705925ff5165b1310c4de22a0c0fa80959b8a/metadata/museoToolBox_logo_128.png


--------------------------------------------------------------------------------
/metadata/schema.drawio:
--------------------------------------------------------------------------------
1 | <mxfile host="Chrome" modified="2019-12-12T11:04:11.558Z" agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36" etag="IS262rRajBcG_Hy4YEtQ" version="12.3.7" type="device" pages="1"><diagram id="-kELOAa4KB48Zj16GUlB" name="Page-1">7V1bd+I2EP41PGYPvmIeE3LpbjcN3Wzb7BNHsRVQ17aoLFiSX1/Jl2Bb4hZA2IlO8mAPso3n+zSSZkZDxxpEixsCppNbHMCwY3aDRce67JimYZtmh/93g+dM0neNTDAmKMgbLQX36AXmwm4unaEAJpWGFOOQomlV6OM4hj6tyAAh+Fe12RMOq0+dgjEUBPc+CEXpPyigk0zqdLtL+W8QjSe09kEEira5IJmAAP8qiayrjjUgGNPsKFoMYMh1V6glu+56xaev34vAmG5zwWf//Orhi+nMgvPgCf0ZDYYv52dedpc5CGf5+34DCYXkln/77GvT50IVExqF7MjoWBcgROOYHYfwiT38guBZHED+oC47e0JhOMAhJuw8xjG7+iKhBP+EhbBjWobrdX3eeExAgNgr1C4YhyBJ8vsVTS4RYQgjzB8M2dd8vW8BjMkfjmN6DSIUcr59BRQzmaiqXHtzSChclES56m4gjiAlz6zJosA1uyJnsZef/lpSopeLJiU2WLkM5CQcv953CRQ7yLGS4/bHbH7+Zf731DF/3F4OXh57zsPLmWEI6MCA8TY/xYRO8BjHILxaSksocQiXbb5iPM2F/0JKn/NOCGap8kqowwWiDzko/PhH6fhyUT7hanKKsyEkiL02JEWLmKngoXyS3uiTU5wu75WePb+eBee8Ry9ZwiTXiGsu/VwgmdUHpuVspsld8gtws5BK87c3eq/E4Zpd2ekKIPCM+HBNTyvsGQVkDOmmLinykMAQUDSvfhEZr9JLmZ7Ac6nBFKOYJqU7D7lgSW+7Sm+3ZkrWt+47NT5nT1+y+/U19iC8qQnffsIPXYaJ+50+D7zRmdcHw79uBmdO6whvWCoYb2nGt5/x0oaO1T7K91RQ3taUf6+Ub9+0xjRUUN7RlG8/5aXzGrt9jHdUMN7VjH+njO+3j/FK1q49zfj2M14+rTFbR3lLyeLV05RvP+WlRr7XPsYrWbv2NePbz3h5w/bNa2wVa9fXyKamfIspLzXy7fPW2Adeu67VXylePiXYh0mC4rHQGZIJmPJDnnPAeZchm3+ecmUKfHbh94z5XELBY0EQOz9/zS4w7EwwxAnKo+B5+L3UO8qxd0a+K5f/HZGWewXVzZqPWQyqG4VsUsmxWE28baPqUnhtHXJZZdDK/FjNpZ2SLzZbK3PLAdr0pDZStf0y+keNi0tf3dXetHZSttdIyh46sr1WSaVRFCA9eu46ehq9zcOnrXL4dLWfs1G2yN7SFllWI2yR5Z1g+NR+ynZS1m4kZZUMn672NLaTs04zOXtg77j03XvaVdhOzrqN5OyhU7PknNV7LNrJ2WYsrQXOqnBQ90zN2VZythkeTIGzKtKjTO11X8XZpoYRt6b1SSLndRZvCCTWmx86QWqtCks+UJ/gJBkxAQpA6qHUHtEdPaKWa1aXNhKPaGFpD+0RlZs2vaGlbabN2dK02V05FY9r2rzdTFu9uQqXaqHBkmVj9gwkMHsvbdF2sWiea2+yaJ7KEE+R5VgCl0AQjObQp5iMUrEI8+rCEduWihhwqDeUijAE26mmFoTd3TJv5RDVINYGk0uo3M+mkHyFgMSsX22Px/sv5GF2a+5Pc7sudTzwxBos9/C/GXtDBMJrCOiMwHsY5grTUK6GUlKUxXBMhVAWgdQSlKwLzuFdDO9m9M3YlfXu4wj5h8S0AThaXn8jjl1DJY7i4qzA8X72eMPQmWo8t8dTZmINSymg4pw0BXSo4XwDnO7J4XQFOL+BOMDRPSVs8fWEYPD7NS/0piHdDlJTNoVVC6lkCjsFfAqUdtTzBAV6DN0FUVuCqKkUUcm8toSonhPthqdsUqQWT3HpX8NTD6a742rJ5kY9lbgWhqKEa4hBMJqghGKCfBCOuP9uB0Rb6dTxPK8CjKy/FYnrSnApVq4lXFAExnAUgeTn6IngKPe6vXdkau42aY+R5bkfDxlxmd8x3TC1I1MQs+MxP4YLNhv16ejb3efiY/a4cosdkAvBIwwvgP9znOq8hsl7wFW2rDhWTd01JcoqsI4hHQXMDoLY5/2OErT4YL1NujI4lnN7dRktAZVxQDMbGKE4AotVkQf20mvCP60FRTa5d1WCIq7WSFo3nEe/PgYGlsRcOSohEK1VAqJpCEf5uPPWaMExAFIUiJN1i65KTMQlErdVeExGAUHsvTKTxbQLYxB9lJ4inUortVam3pgIOyfJipXCsXUVkuJXDk6cFeu4VUJvSLqpNz/0pjA5xfVGxnZSvBmbwvak+IHzyuQM19se28nwZuwu35Phh95xJlVVMQHSFG8ZxZuxG33feYqKMpmmZviRGL5r+vvWlJeXO20hwQ+9A3OtBst+GQp07vsb3Aj1n6BRl/u+djVaQvYWE6BzciugFdVJ1uT+2T0RtaP5fgwxWEfYjRGcw9Fz5o/zcfw0S5i+VoV43otfrlYwTBrb8VyV4Eh2QeIoQkmKBs4PPgoe0sRKU2LijoeHmCc7wNF0Rpke8z5yu20X+Xi2TubnVmrrRPSYaePvMwrEjKz30ok2b+c5Vqxh9cS4gsELX6uM5JPA5oAgaFyCy/YjiyQSp/T3kYvc5SavM612b7w237zylCKW28mNC8+TVKbv14vN139rfVP7vLL4UV0rVgt8K5rzb+H8Saqo7M35nnd8zot5SBEOZqGYWrGftyUlTNnb4ilwtgjj9QZK7jWC9037k1PBz+qLY3ixB0DJGC4mOA3SeU3rlx4cKJ5Xf547hyIUBKkxLuNqHAhXu7pGsS0RVVnamukcCVUxReppFsuz1ZowOVaLVXULgu06p8VKxyMbXL5IHl6TlC+SN3RPMafoWdU5woYATr25kmqEEifKBJAPGsDZy571ip96Vh/AWcP5CrTDENN36trcD7raZF66+/QwDh12SjCm5V5MwHRyiwPIW/wP</diagram></mxfile>


--------------------------------------------------------------------------------
/metadata/schema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nkarasiak/MuseoToolBox/1b1705925ff5165b1310c4de22a0c0fa80959b8a/metadata/schema.png


--------------------------------------------------------------------------------
/museotoolbox/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # =============================================================================
 4 | # ___  ___                       _____           _______
 5 | # |  \/  |                      |_   _|         | | ___ \
 6 | # | .  . |_   _ ___  ___  ___     | | ___   ___ | | |_/ / _____  __
 7 | # | |\/| | | | / __|/ _ \/ _ \    | |/ _ \ / _ \| | ___ \/ _ \ \/ /
 8 | # | |  | | |_| \__ \  __/ (_) |   | | (_) | (_) | | |_/ / (_) >  <
 9 | # \_|  |_/\__,_|___/\___|\___/    \_/\___/ \___/|_\____/ \___/_/\_\
10 | #
11 | # @author:  Nicolas Karasiak
12 | # @site:    www.karasiak.net
13 | # @git:     www.github.com/nkarasiak/MuseoToolBox
14 | # =============================================================================
15 | from . import ai, processing, cross_validation, datasets, stats
16 | 
17 | __version__ = "0.13.6"
18 | 


--------------------------------------------------------------------------------
/museotoolbox/datasets/__init__.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | # =============================================================================
  4 | # ___  ___                       _____           _______
  5 | # |  \/  |                      |_   _|         | | ___ \
  6 | # | .  . |_   _ ___  ___  ___     | | ___   ___ | | |_/ / _____  __
  7 | # | |\/| | | | / __|/ _ \/ _ \    | |/ _ \ / _ \| | ___ \/ _ \ \/ /
  8 | # | |  | | |_| \__ \  __/ (_) |   | | (_) | (_) | | |_/ / (_) >  <
  9 | # \_|  |_/\__,_|___/\___|\___/    \_/\___/ \___/|_\____/ \___/_/\_\
 10 | #
 11 | # @author:  Nicolas Karasiak
 12 | # @site:    www.karasiak.net
 13 | # @git:     www.github.com/nkarasiak/MuseoToolBox
 14 | # =============================================================================
 15 | """
 16 | The :mod:`museotoolbox.datasets` module gathers available datasets for testing
 17 | `MuseoToolBox`.
 18 | """
 19 | import os
 20 | 
 21 | __pathFile = os.path.dirname(os.path.realpath(__file__)).replace("\\","/")
 22 | 
 23 | def load_historical_data(return_X_y=False, return_X_y_g=False,
 24 |                          centroid=False, low_res=False):
 25 |     """
 26 |     Get a sample of a french Historical map made by the army (carte d'état-major).
 27 |     These maps are used to identify forest in the 1800's.
 28 | 
 29 |     Field of the vector containning the label class is `Class`.
 30 | 
 31 |     ===================   ==============
 32 |     Classes                            5
 33 |     Samples total                  12647
 34 |     Number of polygons                17
 35 |     Dimensionality                     3
 36 |     Features                     integer
 37 |     ===================   ==============
 38 | 
 39 | 
 40 |     Parameters
 41 |     -----------
 42 |     return_X_y : boolean, optional (default=False).
 43 |         If True, returns ``(data, target)`` instead of a path of files.
 44 |     return_X_y_g : boolean, optional (default=False).
 45 |         If True, returns the polygon id for each feature.
 46 |     centroid : boolean, optional (default=False).
 47 |         If True, return the path of the centroid for each feature.
 48 |     low_res : boolean, optinal (default=False).
 49 |         If True returns a low resolution of the raster, so you will have also less features.
 50 | 
 51 |     Returns
 52 |     -------
 53 |     raster,vector : list of str.
 54 |         Return path of raster and vector files if
 55 |     (data, target) : tuple if ``return_X_y`` is True
 56 |     (data, target, group) : tuple if ``return_X_y_g`` is True
 57 | 
 58 |     References
 59 |     -----------
 60 |     https://github.com/nkarasiak/HistoricalMap
 61 | 
 62 |     Examples
 63 |     --------
 64 |     >>> X, y = load_historical_data(return_X_y=True)
 65 |     >>> X.shape, y.shape
 66 |     (12647, 3) (12647,)
 67 |     >>> raster,vector = load_historical_data()
 68 |     >>> raster
 69 |     /mnt/bigone/lib/MuseoToolBox/museotoolbox/datasets/_historicalmap/map_compress.tif
 70 |     >>> vector
 71 |     /mnt/bigone/lib/MuseoToolBox/museotoolbox/datasets/_historicalmap/train.gpkg
 72 |     """
 73 |     to_return = []
 74 |     separator = '/'
 75 |     if low_res:
 76 |         raster = __pathFile+'{0}_historicalmap{0}map_lowres.tif'.format(separator)
 77 |     else:
 78 |         raster = __pathFile+'{0}_historicalmap{0}map_compress.tif'.format(separator)
 79 |                 
 80 |     vector = __pathFile+'{0}_historicalmap{0}train.gpkg'.format(separator)
 81 |     
 82 |     if return_X_y or return_X_y_g:
 83 |         from ..processing import extract_ROI
 84 |         if centroid:
 85 |             vector = __pathFile+'{0}_historicalmap{0}train_centroid.gpkg'.format(separator)
 86 |             
 87 |         if return_X_y_g:
 88 |             X, y, g = extract_ROI(raster, vector, 'Class', 'uniquefid')
 89 |             to_return = (X, y, g)
 90 |         else:
 91 |             X, y = extract_ROI(raster, vector, 'Class')
 92 |             to_return = (X, y)
 93 |     else:
 94 |         to_return.append(raster)
 95 |         if centroid:
 96 |             vectorCentroid = __pathFile+'{0}_historicalmap{0}train_centroid.gpkg'.format(separator)
 97 |             to_return.append(vectorCentroid)
 98 |         else:
 99 |             to_return.append(vector)
100 | 
101 |     return to_return
102 | 


--------------------------------------------------------------------------------
/museotoolbox/datasets/_historicalmap/map_compress.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nkarasiak/MuseoToolBox/1b1705925ff5165b1310c4de22a0c0fa80959b8a/museotoolbox/datasets/_historicalmap/map_compress.tif


--------------------------------------------------------------------------------
/museotoolbox/datasets/_historicalmap/map_lowres.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nkarasiak/MuseoToolBox/1b1705925ff5165b1310c4de22a0c0fa80959b8a/museotoolbox/datasets/_historicalmap/map_lowres.tif


--------------------------------------------------------------------------------
/museotoolbox/datasets/_historicalmap/train.gpkg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nkarasiak/MuseoToolBox/1b1705925ff5165b1310c4de22a0c0fa80959b8a/museotoolbox/datasets/_historicalmap/train.gpkg


--------------------------------------------------------------------------------
/museotoolbox/datasets/_historicalmap/train_centroid.gpkg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nkarasiak/MuseoToolBox/1b1705925ff5165b1310c4de22a0c0fa80959b8a/museotoolbox/datasets/_historicalmap/train_centroid.gpkg


--------------------------------------------------------------------------------
/museotoolbox/internal_tools/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # =============================================================================
 4 | # ___  ___                       _____           _______
 5 | # |  \/  |                      |_   _|         | | ___ \
 6 | # | .  . |_   _ ___  ___  ___     | | ___   ___ | | |_/ / _____  __
 7 | # | |\/| | | | / __|/ _ \/ _ \    | |/ _ \ / _ \| | ___ \/ _ \ \/ /
 8 | # | |  | | |_| \__ \  __/ (_) |   | | (_) | (_) | | |_/ / (_) >  <
 9 | # \_|  |_/\__,_|___/\___|\___/    \_/\___/ \___/|_\____/ \___/_/\_\
10 | #
11 | # @author:  Nicolas Karasiak
12 | # @site:    www.karasiak.net
13 | # @git:     www.github.com/nkarasiak/MuseoToolBox
14 | # =============================================================================
15 | 
16 | 
17 | def push_feedback(msg, feedback=None):
18 |     # in order to convert in Qgis Processing
19 |     # =============================================================================
20 |     #     if feedback and feedback is not True:
21 |     #         if feedback == 'gui':
22 |     #             QgsMessageLog.logMessage(str(msg))
23 |     #         else:
24 |     #             feedback.setProgressText(msg)
25 |     #     else:
26 |     # =============================================================================
27 |     print(msg)
28 | 
29 | 
30 | class ProgressBar:
31 | 
32 |     def __init__(self, total, message='', length=40):
33 |         """
34 |         total : int
35 |             Total number of samples.
36 |         message : str
37 |             Custom message to show before the progress bar.
38 |         length : int.
39 |             Length of the bar.
40 |         """
41 |         self.start = 0
42 |         self.total = total
43 |         self.length = length
44 |         self.message = message
45 |         self.lastPosition = None
46 | 
47 |     def add_position(self, value=False):
48 |         """
49 |         Add progress to the bar.
50 | 
51 |         Parameters
52 |         ----------
53 | 
54 |         value : int or False.
55 |             If false, will add one.
56 |         """
57 | 
58 |         if value:
59 |             inPercent = int(value / self.total * 100)
60 |         else:
61 | 
62 |             self.start += 1
63 |             value = self.start
64 |             inPercent = int(self.start / self.total * 100)
65 | 
66 |         if inPercent != self.lastPosition:
67 |             self.lastPosition = inPercent
68 |             self.nHash = int(self.length * (value / self.total))
69 |             self.nPoints = int(self.length - int(self.nHash))
70 | 
71 |             self.printBar(inPercent)
72 | 
73 |     def printBar(self, value):
74 |         if value == 100:
75 |             end = "\n"
76 |         else:
77 |             end = "\r"
78 | 
79 |         # print(self.nHash)
80 |         # print(self.nPoints)
81 |         print(
82 |             '\r' +
83 |             self.message +
84 |             ' [{}{}]{}%'.format(
85 |                 self.nHash *
86 |                 "#",
87 |                 self.nPoints *
88 |                 ".",
89 |                 self.lastPosition),
90 |             end=end,
91 |             flush=True)
92 | 


--------------------------------------------------------------------------------
/paper.bib:
--------------------------------------------------------------------------------
 1 | @article{moran_notes_1950,
 2 | 	title = {Notes on {Continuous} {Stochastic} {Phenomena}},
 3 | 	volume = {37},
 4 | 	issn = {0006-3444},
 5 | 	url = {http://www.jstor.org/stable/2332142},
 6 | 	doi = {10.2307/2332142},
 7 | 	number = {1/2},
 8 | 	urldate = {2017-12-08},
 9 | 	journal = {Biometrika},
10 | 	author = {Moran, P. A. P.},
11 | 	year = {1950},
12 | 	pages = {17--23}
13 | }
14 | 
15 | @article{scikitlearn_2011,
16 | author = {Pedregosa, Fabian and Varoquaux, Ga\"{e}l and Gramfort, Alexandre and Michel, Vincent and Thirion, Bertrand and Grisel, Olivier and Blondel, Mathieu and Prettenhofer, Peter and Weiss, Ron and Dubourg, Vincent and Vanderplas, Jake and Passos, Alexandre and Cournapeau, David and Brucher, Matthieu and Perrot, Matthieu and Duchesnay, \'{E}douard},
17 | title = {Scikit-Learn: Machine Learning in Python},
18 | year = {2011},
19 | issue_date = {February 2011},
20 | publisher = {JMLR.org},
21 | volume = {12},
22 | issn = {1532-4435},
23 | journal = {Journal of Machine Learning Research},
24 | month = nov,
25 | pages = {2825–2830},
26 | numpages = {6}
27 | }
28 | @article{karasiak_2019,
29 | 	title = {Statistical stability and spatial unstability in prediction of forest tree species using satellite image time series},
30 | 	journal = {Remote Sensing},
31 | 	author = {Karasiak, Nicolas and Dejoux, J.-F. and Fauvel, M. and Willm, J. and Monteil, C. and Sheeren, D.},
32 | 	year = {2019},
33 | 	doi = {10.3390/rs11212512}
34 | }
35 | 
36 | @article{roberts_2017,
37 | 	title = {Cross-validation strategies for data with temporal, spatial, hierarchical, or phylogenetic structure},
38 | 	volume = {40},
39 | 	issn = {09067590},
40 | 	doi = {10.1111/ecog.02881},
41 | 	language = {en},
42 | 	number = {8},
43 | 	urldate = {2019-12-12},
44 | 	journal = {Ecography},
45 | 	author = {Roberts, David R. and Bahn, Volker and Ciuti, Simone and Boyce, Mark S. and Elith, Jane and Guillera-Arroita, Gurutzeta and Hauenstein, Severin and Lahoz-Monfort, José J. and Schröder, Boris and Thuiller, Wilfried and Warton, David I. and Wintle, Brendan A. and Hartig, Florian and Dormann, Carsten F.},
46 | 	month = aug,
47 | 	year = {2017},
48 | 	pages = {913--929}
49 | }
50 | 
51 | @article{olofsson_good_2014,
52 | 	title = {Good practices for estimating area and assessing accuracy of land change},
53 | 	volume = {148},
54 | 	issn = {0034-4257},
55 | 	doi = {10.1016/j.rse.2014.02.015},
56 | 	urldate = {2019-02-18},
57 | 	journal = {Remote Sensing of Environment},
58 | 	author = {Olofsson, Pontus and Foody, Giles M. and Herold, Martin and Stehman, Stephen V. and Woodcock, Curtis E. and Wulder, Michael A.},
59 | 	month = may,
60 | 	year = {2014},
61 | 	keywords = {Remote sensing, Accuracy assessment, Area estimation, Land change, Response design, Sampling design},
62 | 	pages = {42--57}
63 | }
64 | 


--------------------------------------------------------------------------------
/paper.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: 'Museo ToolBox: A Python library for remote sensing including a new way to handle rasters.'
  3 | 
  4 | tags:
  5 |   - Python
  6 |   - remote sensing
  7 |   - spatial cross-validation
  8 |   - raster
  9 |   - vector
 10 |   - autocorrelation
 11 | 
 12 | authors:
 13 |   - name: Nicolas Karasiak
 14 |     orcid: 0000-0002-1558-0816
 15 |     affiliation: "1" # (Multiple affiliations must be quoted)
 16 | affiliations:
 17 |  - name: Université de Toulouse, INRAE, UMR DYNAFOR, Castanet-Tolosan, France
 18 |    index: 1
 19 | 
 20 | date: 13 December 2019
 21 | 
 22 | bibliography: paper.bib
 23 | 
 24 | ---
 25 | 
 26 | # Summary
 27 | 
 28 | `Museo ToolBox` is a Python library dedicated to the processing of georeferenced arrays, also known as rasters or images in remote sensing.
 29 | 
 30 | In this domain, classifying land cover type is a common and sometimes complex task, regardless of your level of expertise. Recurring procedures such as extracting Regions Of Interest (ROIs, or raster values from a polygon), computing spectral indices or validating a model with a cross-validation can be difficult to implement.
 31 | 
 32 | `Museo ToolBox` aims at simplifying the whole process by making the main treatments more accessible (extracting of ROIs, fitting a model with cross-validation, computing Normalized Difference Vegetation Index (NDVI) or various spectral indices, performing any kind of array function to the raster, etc).
 33 | 
 34 | The main objective of this library is to facilitate the transposition of array-like functions into an image and to promote good practices in machine learning.
 35 | 
 36 | To make `Museo ToolBox` easier to get started with, a [full documentation with lot of examples is available online on read the docs](http://museotoolbox.readthedocs.io/).
 37 | 
 38 | # Museo ToolBox in details
 39 | 
 40 | `Museo ToolBox` is organized into several modules (Figure 1):
 41 | 
 42 | - [processing](https://museotoolbox.readthedocs.io/en/latest/modules/museotoolbox.processing.html): raster and vector processing.
 43 | - [cross-validation](https://museotoolbox.readthedocs.io/en/latest/modules/museotoolbox.cross_validation.html): stratified cross-validation compatible with scikit-learn.
 44 | - [ai](https://museotoolbox.readthedocs.io/en/latest/modules/museotoolbox.ai.html): artificial intelligence module built upon scikit-learn [@scikitlearn_2011].
 45 | - [charts](https://museotoolbox.readthedocs.io/en/latest/modules/museotoolbox.charts.html): plot confusion matrix with F1 score or producer/user's accuracy.
 46 | - [stats](https://museotoolbox.readthedocs.io/en/latest/modules/museotoolbox.stats.html): compute statistics (such as Moran's Index [@moran_notes_1950], confusion matrix, commision/omission) or extracting truth and predicted label from a confusion matrix.
 47 | 
 48 | ![Museo ToolBox schema.](metadata/schema.png)
 49 | 
 50 | The main usages of `Museo ToolBox` are:
 51 | 
 52 | 1. [Reading and writing a raster block per block using your own function](https://museotoolbox.readthedocs.io/en/latest/modules/processing/museotoolbox.processing.RasterMath.html).
 53 | 2. [Generating cross-validation, including spatial cross-validation](https://museotoolbox.readthedocs.io/en/latest/auto_examples/index.html#cross-validation).
 54 | 3. [Fitting models with scikit-learn, extracting accuracy from each cross-validation fold, and predicting raster](https://museotoolbox.readthedocs.io/en/latest/modules/ai/museotoolbox.ai.SuperLearner.html).
 55 | 4. [Plotting confusion matrix and adding f1 score or producer/user accuracy](https://museotoolbox.readthedocs.io/en/latest/modules/charts/museotoolbox.charts.PlotConfusionMatrix.html#museotoolbox.charts.PlotConfusionMatrix).
 56 | 5. [Getting the y_true and and y_predicted labels from a confusion matrix](https://museotoolbox.readthedocs.io/en/latest/modules/stats/museotoolbox.stats.retrieve_y_from_confusion_matrix.html).
 57 | 
 58 | ## RasterMath
 59 | 
 60 | Available in `museotoolbox.processing`, the `RasterMath` class is the keystone of ``Museo ToolBox``.
 61 | 
 62 | The question I asked myself is: How can we make it as easy as possible to implement array-like functions on images? The idea behind ``RasterMath`` is that if the function is intended to operate with an array, it should be easy to use it with your raster using as few lines as possible.
 63 | 
 64 | So, what does ``RasterMath`` really do? The user only works with an array and confirms with a sample that the process is doing well, and lets `RasterMath` generalize it to the whole image. The user doesn't need to manage the raster reading and writing process, the no-data management, the compression, the number of bands, or the projection. Figure 2 describes how `RasterMath` reads a raster, performs the function, and writes it to a new raster.
 65 | 
 66 | The objective is to **allow the user to focus solely on the array-compatible function** while ``RasterMath`` manages the raster part.
 67 | 
 68 | [See ``RasterMath`` documentation and examples](https://museotoolbox.readthedocs.io/en/latest/modules/processing/museotoolbox.processing.RasterMath.html).
 69 | 
 70 | ![``RasterMath`` under the hood](metadata/RasterMath_schema.png)
 71 | 
 72 | ## Artificial Intelligence
 73 | 
 74 | The artificial intelligence (`ai`) module is natively built to implement ``scikit-learn`` algorithms and uses state of the art methods (such as standardizing the input data). ``SuperLearner`` class optimizes the fit process using a grid search to fix the parameters of the classifier. There is also a Sequential Feature Selection protocol which supports a number of components (e.g. a single-date image is composed of four bands, i.e. four features, so a user may select four features at once).
 75 | 
 76 | [See the ``SuperLearner`` documentation and examples](https://museotoolbox.readthedocs.io/en/latest/modules/ai/museotoolbox.ai.SuperLearner.html).
 77 | 
 78 | ## Cross-validation
 79 | 
 80 | ``Museo ToolBox`` implements stratified cross-validation, which means the separation between the training and the validation samples is made by respecting the size per class.
 81 | For example the Leave-One-Out method will keep one sample of validation per class. As stated by @olofsson_good_2014 *"stratified random sampling is a practical design that satisfies the
 82 | basic accuracy assessment objectives and most of the desirable design
 83 | criteria"*. For spatial cross-validation, see @karasiak_2019 inspired by @roberts_2017.
 84 | 
 85 | ``Museo ToolBox`` offers two different kinds of cross-validation:
 86 | 
 87 | ### Non-spatial cross-validation
 88 | 
 89 | - Leave-One-Out.
 90 | - Leave-One-SubGroup-Out.
 91 | - Leave-P-SubGroup-Out (Percentage of subgroup per class).
 92 | - Random Stratified K-Fold.
 93 | 
 94 | ### Spatial cross-validation
 95 | 
 96 | - Spatial Leave-One-Out [@karasiak_2019].
 97 | - Spatial Leave-Aside-Out.
 98 | - Spatial Leave-One-SubGroup-Out (using centroids to select one subgroup and remove other subgroups for the same class inside a specified distance buffer).
 99 | 
100 | [See the cross-validation documentation and examples](https://museotoolbox.readthedocs.io/en/latest/auto_examples/index.html#cross-validation).
101 | 
102 | # Acknowledgements
103 | 
104 | I acknowledge contributions from [Mathieu Fauvel](http://fauvel.mathieu.free.fr/), beta-testers (hey [Yousra Hamrouni](https://github.com/yousraH)), and my thesis advisors: Jean-François Dejoux, Claude Monteil and [David Sheeren](https://dsheeren.github.io/). Many thanks to Marie for proofreading.
105 | Many thanks to Sigma students: [Hélène Ternisien de Boiville](https://github.com/HTDBD), [Arthur Duflos](https://github.com/ArthurDfs), [Sam Antonetti](https://github.com/santonetti) and [Anne-Sophie Tronc](https://github.com/AnneSophieTronc) for their involvement in ``RasterMath`` improvements in early 2020.
106 | 
107 | # References
108 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | # dependencies to install for Museo ToolBox developpers
 2 | 
 3 | # Museo ToolBox end-user depencies
 4 | # same as requirements.txt
 5 | numpy>=1.10
 6 | scipy>=1.0
 7 | scikit-learn>=0.16
 8 | matplotlib
 9 | joblib
10 | psutil
11 | 
12 | # Development specific requirements
13 | m2r
14 | autopep8
15 | recommonmark
16 | sphinx
17 | sphinx-rtd-theme
18 | numpydoc
19 | ipykernel
20 | nbsphinx
21 | sphinx-gallery
22 | sphinx-copybutton
23 | pytest
24 | pytest-cov
25 | 
26 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # dependencies for end-user
2 | # Does not include GDAL, so you need to install it before
3 | numpy>=1.10
4 | scipy>=1.0
5 | scikit-learn>=0.16
6 | matplotlib
7 | joblib
8 | psutil
9 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # =============================================================================
 4 | # ___  ___                       _____           _______
 5 | # |  \/  |                      |_   _|         | | ___ \
 6 | # | .  . |_   _ ___  ___  ___     | | ___   ___ | | |_/ / _____  __
 7 | # | |\/| | | | / __|/ _ \/ _ \    | |/ _ \ / _ \| | ___ \/ _ \ \/ /
 8 | # | |  | | |_| \__ \  __/ (_) |   | | (_) | (_) | | |_/ / (_) >  <
 9 | # \_|  |_/\__,_|___/\___|\___/    \_/\___/ \___/|_\____/ \___/_/\_\
10 | #
11 | # @author:  Nicolas Karasiak
12 | # @site:    www.karasiak.net
13 | # @git:     www.github.com/nkarasiak/MuseoToolBox
14 | # =============================================================================
15 | import re
16 | 
17 | __version__ = re.search(
18 |     r'__version__\s*=\s*[\'"]([^\'"]*)[\'"]',  # It excludes inline comment too
19 |     open('museotoolbox/__init__.py').read()).group(1)
20 | 
21 | import setuptools
22 | 
23 | with open('README.md', 'r') as fh:
24 |     long_description = fh.read()
25 | 
26 | 
27 | 
28 | setuptools.setup(
29 |     name='museotoolbox',
30 |     version=__version__,
31 |     description='Raster and vector tools for Remote Sensing and Classification, built upon gdal library',
32 |     long_description=long_description,
33 |     long_description_content_type="text/markdown",
34 |     url='https://github.com/nkarasiak/MuseoToolBox',
35 |     author='Nicolas Karasiak',
36 |     author_email='karasiak.nicolas@gmail.com',
37 |     license='GPLv3',
38 |     install_requires=['numpy>=1.10',
39 |                       'scipy>=1.0',
40 |                       'scikit-learn>=0.16',
41 |                       'matplotlib',
42 |                       'joblib',
43 |                       'psutil'],
44 |     packages=setuptools.find_packages(),
45 |     classifiers=[
46 |             "Topic :: Scientific/Engineering :: Artificial Intelligence",
47 |             "Topic :: Scientific/Engineering :: GIS",
48 |             "Programming Language :: Python :: 3",
49 |             "Intended Audience :: Science/Research"],
50 |     zip_safe=False,
51 |     package_data={
52 |       'museotoolbox': ['datasets/_historicalmap/map_compress.tif','datasets/_historicalmap/map_lowres.tif','datasets/_historicalmap/train.gpkg','datasets/_historicalmap/train_centroid.gpkg']
53 |    }
54 |  )
55 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
1 | # init
2 | 


--------------------------------------------------------------------------------
/test/test_ai.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import unittest
  3 | 
  4 | import numpy as np
  5 | from museotoolbox import ai
  6 | from museotoolbox.datasets import load_historical_data
  7 | from museotoolbox.processing import image_mask_from_vector
  8 | from osgeo import gdal
  9 | 
 10 | import os
 11 | import tempfile
 12 | tempdir = tempfile.mkdtemp()
 13 | import shutil
 14 | 
 15 | from sklearn.ensemble import RandomForestClassifier
 16 | 
 17 | raster,vector = load_historical_data(low_res=True)
 18 | X,y,g = load_historical_data(return_X_y_g=True,low_res=True)
 19 | param_grid = dict(n_estimators=[1,10])
 20 | classifier = RandomForestClassifier()
 21 | image_mask_from_vector(vector,raster,os.path.join(tempdir,'mask.tif'))
 22 | 
 23 | class TestStats(unittest.TestCase):
 24 |     def test_superLearner(self):
 25 |         
 26 |         n_cv = 2
 27 |         for tf in [True,False]:
 28 |             verbose = tf+1
 29 |             model = ai.SuperLearner(classifier,param_grid=param_grid,n_jobs=1,verbose=verbose)
 30 |             model.fit(X,y,group=g,standardize=tf,cv=n_cv)
 31 |             assert(model.predict_array(X).shape == y.shape)
 32 |             len(model.CV) == n_cv
 33 |             assert(np.all(model.group == g))
 34 |             
 35 |             model.predict_image(raster,os.path.join(tempdir,'class.tif'),confidence_per_class=os.path.join(tempdir,'confclass.tif'),higher_confidence=os.path.join(tempdir,'higherconf.tif'))
 36 |             assert(model._is_standardized == tf)
 37 |         
 38 |         # test masked return if X is totally masked
 39 |         X_masked = np.ma.copy(X)
 40 |         X_masked.mask=True
 41 |         X_masked_return = model._convert_array(X_masked)
 42 |         assert(np.ma.is_masked(X_masked_return))
 43 |         
 44 | 
 45 |         
 46 |     def test_superLearn_pred(self):
 47 |         model = ai.SuperLearner(classifier,param_grid=param_grid,n_jobs=1,verbose=0)
 48 |         model.customize_array(np.mean,axis=1)
 49 |         model.fit(X,y,group=g,standardize=True,cv=2)
 50 | #        #    
 51 |         assert(model._array_is_customized == True)
 52 |         assert(model._array_is_customized)
 53 |         assert(model.xFunction)
 54 |         model.predict_image(raster,out_image=os.path.join(tempdir,'class.tif'),in_image_mask=os.path.join(tempdir,'mask.tif'),confidence_per_class=os.path.join(tempdir,'confclass.tif'),higher_confidence=os.path.join(tempdir,'higherconf.tif'))
 55 |         assert(gdal.Open(os.path.join(tempdir,'class.tif')).RasterCount == 1)
 56 |         assert(gdal.Open(os.path.join(tempdir,'higherconf.tif')).RasterCount == 1)
 57 |         assert(gdal.Open(os.path.join(tempdir,'confclass.tif')).RasterCount == len(np.unique(y)))
 58 |         cms = model.get_stats_from_cv()
 59 |         
 60 |         assert(len(cms) == 2)
 61 |         model.save_cm_from_cv(tempdir,prefix='coco',header=False)
 62 |         
 63 |         model.save_model(os.path.join(tempdir,'model'))
 64 |         assert(os.path.exists(os.path.join(tempdir,'model.npz')))
 65 |         model.load_model(os.path.join(tempdir,'model'))
 66 |         
 67 | 
 68 |         with self.assertRaises(ValueError):
 69 |             model.fit(X,y,cv=False)
 70 |         
 71 |     def test_sequential(self):
 72 |         
 73 |         sfs = ai.SequentialFeatureSelection(classifier,param_grid)
 74 |         sfs.fit(X,y,cv=2)
 75 |         sfs.predict(X,idx=0)
 76 |         assert(not np.all(sfs.predict(X,idx=0) == sfs.predict(X,idx=1)))
 77 |         sfs.predict_best_combination(raster,os.path.join(tempdir,'class'))
 78 |         sfs.predict_images(raster,os.path.join(tempdir,'class'))
 79 |         assert(sfs.get_best_model())
 80 |         assert(sfs.transform(X,idx='best').shape[1] == sfs.best_idx_+1)
 81 | 
 82 |         n_comp = 2
 83 |         max_features = 2
 84 |         sfs = ai.SequentialFeatureSelection(classifier,param_grid,n_comp=n_comp,path_to_save_models='/tmp/sfs_models/',verbose=1)
 85 |         def double_columns(x):
 86 |             return np.hstack((x,x))
 87 |         sfs.customize_array(double_columns)
 88 |         sfs.fit(X,y,max_features=max_features,standardize=True,cv=2)
 89 |         sfs.fit(X,y,max_features=max_features,standardize=True,cv=2) # to reload from path
 90 |         assert(sfs.transform(X,idx=1).shape[1] == 2*n_comp)
 91 |         assert(sfs.transform(X,idx=0).shape[1] == n_comp)
 92 |         assert(sfs.X.shape[1] == X.shape[1]*2)
 93 |         assert(len(sfs.best_features_) == 2)
 94 |         sfs.predict_images(raster,tempdir)
 95 |         sfs.predict_best_combination(raster,os.path.join(tempdir,'best.tif'))
 96 |         assert(sfs.get_best_model().X.shape[1] == n_comp*(sfs.best_idx_+1) )
 97 |         sfs.predict(X,0)
 98 |         
 99 | if __name__ == "__main__":
100 |     unittest.main()
101 |     shutil.rmtree(tempdir)


--------------------------------------------------------------------------------
/test/test_charts.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import unittest
 3 | 
 4 | import os
 5 | import tempfile
 6 | import numpy as np
 7 | from museotoolbox import charts
 8 | confusion_matrix = np.random.randint(5,20,[5,5])
 9 | confusion_matrix[-1,-1] = 0
10 | confusion_matrix[-1,:] = 0
11 | 
12 | tmp_dir = tempfile.mkdtemp()
13 | 
14 | class TestCharts(unittest.TestCase):
15 |     def test_Plot(self):
16 |         for hide_ticks in [True,False]:
17 |             pcm = charts.PlotConfusionMatrix(confusion_matrix)
18 |             pcm.color_diagonal('RdYlBu')
19 |             pcm.add_text()
20 |             pcm.add_x_labels([1,2,3,4,5],rotation=59+hide_ticks,position='bottom')
21 |             pcm.add_y_labels(['one','two','three','four','five'])
22 |             pcm.add_mean('mean','mean',hide_ticks=True)
23 |             
24 |     def test_f1(self):
25 |         pcm = charts.PlotConfusionMatrix(confusion_matrix,left=0.12,right=.9)
26 |         pcm.add_text()
27 |         pcm.add_x_labels([1,2,3,4,5],position='top',rotation=90)
28 |         pcm.add_f1()
29 |         
30 |         pcm.add_y_labels(['one','two','three','four','five'])
31 |         pcm.save_to(os.path.join(tmp_dir,'test.pdf'))
32 |         os.remove(os.path.join(tmp_dir,'test.pdf'))
33 |         
34 |     def test_f1_nonsquarematrix(self):
35 |         pcm = charts.PlotConfusionMatrix(confusion_matrix[:,:-2])
36 | 
37 |         self.assertRaises(Warning,pcm.add_f1)
38 |         self.assertRaises(Warning,pcm.color_diagonal)
39 |         self.assertRaises(Warning,pcm.add_accuracy)
40 |         
41 | 
42 |     def test_accuracy(self):
43 |         for rotation in [45,90]:
44 |             pcm = charts.PlotConfusionMatrix(confusion_matrix,left=0.12,right=.9,cmap='PuRd_r')
45 |             pcm.add_text(thresold=35)
46 |             pcm.add_x_labels([1,2,3,4,5],position='top',rotation=90)
47 |             pcm.add_y_labels(['one','two','three','four','five'])
48 |             pcm.add_accuracy()
49 |         
50 | if __name__ == "__main__":
51 |     unittest.main()
52 |     


--------------------------------------------------------------------------------
/test/test_cross_validation.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sun Nov 24 12:03:31 2019
  5 | 
  6 | @author: nicolas
  7 | """
  8 | # -*- coding: utf-8 -*-
  9 | import unittest
 10 | import os
 11 | import numpy as np
 12 | 
 13 | from museotoolbox.datasets import load_historical_data
 14 | from museotoolbox import cross_validation
 15 | from museotoolbox import processing
 16 | 
 17 | raster,vector = load_historical_data()
 18 | X,y,g = load_historical_data(return_X_y_g=True)
 19 | distance_matrix = processing.get_distance_matrix(raster,vector)
 20 | n_class = len(np.unique(y,return_counts=True)[1])
 21 | smallest_class = np.min(np.unique(y,return_counts=True)[1])
 22 | 
 23 | 
 24 | 
 25 | class TestCV(unittest.TestCase):
 26 |     def test_train_split(self):
 27 |         np.random.seed(42)
 28 |         y = np.random.randint(1,3,10).reshape(-1,1)
 29 |         X = np.random.randint(1,255,[10,3],dtype=np.uint8)
 30 |         g = np.random.randint(1,3,10).reshape(-1,1)
 31 | 
 32 |         cv = cross_validation.LeaveOneOut(random_state=42)
 33 | 
 34 |         X_train, X_test, y_train, y_test = cross_validation.train_test_split(cv,X,y)
 35 |         assert ( X_train.shape[0]+X_test.shape[0] == X.shape[0] )
 36 |         assert ( y_train.shape[0]+y_test.shape[0] == y.shape[0] )
 37 |         assert ( np.all( np.equal(y_test,np.array([1,2]) ) ) )
 38 |         
 39 |         X_train, X_test, y_train, y_test, g_train, g_test = cross_validation.train_test_split(cv,X,y,groups=g)
 40 |         assert (X_train.shape[0] == y_train.shape[0] == g_train.shape[0])
 41 |         assert (X_test.shape[0] == y_test.shape[0] == g_test.shape[0])
 42 |         
 43 |     def test_loo(self):
 44 |         for split in [False,1,2,5]:
 45 |             
 46 |                 cv = cross_validation.LeaveOneOut(n_repeats=split,random_state=split,verbose=split)
 47 |                 if split == False:
 48 |                     assert(cv.get_n_splits(X,y)==np.min(np.unique(y,return_counts=True)[-1]))
 49 |                 else:
 50 |                     assert(cv.get_n_splits(X,y)==split)
 51 |                 assert(cv.verbose == split)
 52 |                 
 53 |                 for tr,vl in cv.split(X,y):
 54 |                     assert(tr.size == y.size-5)
 55 |                     assert(vl.size == 5)
 56 |                     assert(len(vl) == 5)
 57 |                 
 58 |             
 59 |     def test_kfold(self):
 60 |         cv = cross_validation.RandomStratifiedKFold(valid_size=1/50)        
 61 | 
 62 |         self.assertRaises(ValueError,cv.get_n_splits,X,y)
 63 |         
 64 |         for split in [1,2,5]:
 65 |             cv = cross_validation.RandomStratifiedKFold(n_splits=1+split,n_repeats=split,verbose=split)
 66 |             assert(cv.get_n_splits(X,y)==split*split+split)
 67 |             assert(cv.verbose == split)
 68 |             
 69 |             for idx,[tr,vl] in enumerate(cv.split(X,y)):
 70 |                 assert(int(tr.size/vl.size) == split)
 71 |                 assert(np.unique(y[vl],return_counts=True)[0].size == 5)
 72 |         
 73 |             assert(idx+1 == split*split+split)
 74 |             
 75 |     def test_LeavePSubGroupOut(self):
 76 |         
 77 |         cv = cross_validation.LeavePSubGroupOut(verbose=2)
 78 |         for tr,vl in cv.split(X,y,g):
 79 |             assert(not np.unique(np.in1d([1,2],[3,4]))[0])
 80 | 
 81 |         self.assertRaises(ValueError,cross_validation.LeavePSubGroupOut,valid_size='ko')
 82 |         self.assertRaises(ValueError,cross_validation.LeavePSubGroupOut,valid_size=5.1)
 83 |         
 84 |     def test_LeaveOneSubGroupOut(self):
 85 |         cv = cross_validation.LeaveOneSubGroupOut(verbose=2)
 86 |         # if only one subgroup
 87 |         tempG = np.copy(g)
 88 |         tempG[np.where(y==5)] = 1
 89 |         self.assertRaises(Exception,cv.get_n_splits,X,y,tempG)
 90 |             
 91 |         # if all is ok
 92 |         cv = cross_validation.LeaveOneSubGroupOut(verbose=2)
 93 |         y_vl = np.array([])
 94 |         for tr,vl in cv.split(X,y,g):
 95 |             y_vl = np.concatenate((y_vl,vl))
 96 |             assert(not np.unique(np.in1d([1,2],[3,4]))[0])
 97 |         assert(np.all(np.unique(np.asarray(y_vl),return_counts=True)[1]==1))
 98 | 
 99 |         list_files =cv.save_to_vector(vector,'Class',group='uniquefid',out_vector='/tmp/cv_g.gpkg')
100 | 
101 |         assert(len(list_files)==cv.get_n_splits(X,y,g))
102 |         
103 |     def test_SLOO(self):
104 |         
105 |         assert(distance_matrix.shape[0] == y.size)
106 |         
107 |         cv = cross_validation.SpatialLeaveOneOut(distance_thresold=100,
108 |                                                  distance_matrix=distance_matrix,
109 |                                                  random_state=12,verbose=1)
110 |         
111 |         
112 |         processing.sample_extraction(raster,vector,out_vector='/tmp/pixels.gpkg',verbose=False)
113 |         y_ = processing.read_vector_values('/tmp/pixels.gpkg','Class')
114 |         y_polygons = processing.read_vector_values(vector,'Class')
115 |         assert(y_.size == y.size)
116 |         assert(y_polygons.size != y_.size)
117 |         
118 |         list_files=cv.save_to_vector('/tmp/pixels.gpkg','Class',out_vector='/tmp/cv.gpkg')
119 |         assert(len(list_files[0]) == 2)
120 |         for l in list_files:
121 |             for f in l:
122 |                 os.remove(f)
123 |         os.remove('/tmp/pixels.gpkg')
124 |         # to keep same size of training by a random selection
125 | 
126 |             
127 |         as_loo = cross_validation._sample_selection._cv_manager(cross_validation._sample_selection.distanceCV,
128 |                                                             distance_thresold=100,
129 |                                                             distance_matrix=distance_matrix,
130 |                                                             random_state=12,
131 |                                                             LOO_same_size=True,
132 |                                                             valid_size=1)
133 |         y_vl = []
134 |         y_asloo_vl = []
135 |         for sloo_cv,as_loo_cv in zip(cv.split(X,y),as_loo.split(X,y)):
136 |             y_vl.append(sloo_cv[1])
137 |             y_asloo_vl.append(as_loo_cv[1])
138 |             assert(n_class == len(y[sloo_cv[1]]))
139 |             assert(sloo_cv[0].size == as_loo_cv[0].size) # same size between loo and sloo 
140 |             assert(np.all(sloo_cv[1] == as_loo_cv[1])) # using same valid pixel
141 |         
142 |         assert(np.all(np.unique(np.asarray(y_vl),return_counts=True)[1]==1))
143 |         assert(np.all(np.unique(np.asarray(y_asloo_vl),return_counts=True)[1]==1))
144 |         
145 |         as_loo = cross_validation._sample_selection._cv_manager(cross_validation._sample_selection.distanceCV,
146 |                                                             distance_thresold=300,
147 |                                                             distance_matrix=distance_matrix,
148 |                                                             random_state=12,
149 |                                                             LOO_same_size=True,valid_size=2,n_repeats=1,n_splits=5,verbose=1)
150 |         for tr,vl in as_loo.split(X,y):
151 |             assert(vl.size == n_class)
152 |             
153 |         
154 |         as_loo = cross_validation._sample_selection._cv_manager(cross_validation._sample_selection.distanceCV,
155 |                                                             distance_thresold=100,
156 |                                                             distance_matrix=distance_matrix,
157 |                                                             random_state=12,
158 |                                                             LOO_same_size=True,valid_size=False,n_repeats=1,n_splits=5,verbose=1)
159 |         as_loo.get_n_splits(X,y)
160 |         # distance too high 
161 |         cv = cross_validation.SpatialLeaveOneOut(distance_thresold=10000,distance_matrix=distance_matrix,verbose=2)
162 | 
163 |         self.assertRaises(ValueError,cv.get_n_splits,X,y)            
164 |         
165 |         
166 |     def test_aside(self):
167 |         
168 |         SLOPO = cross_validation.SpatialLeaveAsideOut(valid_size=1/3,
169 |                                      distance_matrix=distance_matrix,random_state=2,verbose=2)
170 |         
171 |         assert(SLOPO.get_n_splits(X,y) == int(1/(1/3)))
172 |             
173 |         for tr,vl in SLOPO.split(X,y):
174 |             assert(np.unique(y[vl]).size == 5) 
175 |             assert(np.unique(y[tr]).size == 5) 
176 |         
177 |         
178 |     def test_slosgo(self)       :
179 |         
180 |         cv = cross_validation.SpatialLeaveOneSubGroupOut(distance_thresold=100,distance_matrix=distance_matrix,distance_label=g,verbose=2)
181 |         
182 |         y_vl = np.array([])
183 |         for tr,vl in cv.split(X,y,g)        :
184 |             print(np.unique(g[vl]))
185 |             assert(n_class==np.unique(g[vl]).size)
186 |         assert(np.all(np.unique(np.asarray(y_vl),return_counts=True)[1]==1))
187 |         
188 |         processing.sample_extraction(raster,vector,out_vector='/tmp/pixels.gpkg',verbose=False)
189 |         test_extensions = ['wrong','shp','gpkg']
190 |         for extension in test_extensions:
191 |             if extension == 'wrong':
192 | 
193 |                 self.assertRaises(Exception,cv.save_to_vector,'/tmp/pixels.gpkg','Class',out_vector='/tmp/SLOSGO.'+extension)
194 |             else:
195 |                 list_files = cv.save_to_vector('/tmp/pixels.gpkg','Class',out_vector='/tmp/SLOSGO.'+extension)
196 |                 # test overwriting of previous files
197 |                 list_files = cv.save_to_vector('/tmp/pixels.gpkg','Class',out_vector='/tmp/SLOSGO.'+extension) 
198 |                 for tr,vl in list_files:
199 |                     assert(len(list_files[0]) == 2)
200 |                     for l in list_files:
201 |                         for f in l:     
202 |                             print(f)
203 |                             if os.path.exists(f):
204 |                                 os.remove(f)
205 |                             
206 | 
207 |         
208 |     def test_compare_loo_kf(self):
209 |         cv_loo = cross_validation.LeaveOneOut(random_state=12,verbose=2)
210 |         cv_kf_as_loo = cross_validation.RandomStratifiedKFold(n_splits=False,valid_size=1,random_state=12,verbose=2)
211 |         for trvl_loo,trvl_kf in zip(cv_loo.split(X,y),cv_kf_as_loo.split(X,y)):
212 |             assert(np.all(trvl_loo[0]==trvl_kf[0]))
213 |             assert(np.all(trvl_loo[1]==trvl_kf[1]))
214 |             assert(len(trvl_kf[1]) == n_class)
215 |             assert(np.unique(y[trvl_kf[1]]).size == n_class)
216 |         
217 |         #to print extensions
218 |         cv_loo.get_supported_extensions()
219 |     
220 | 
221 | 
222 | if __name__ == "__main__":
223 |     unittest.main()


--------------------------------------------------------------------------------
/test/test_processing.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import unittest
  3 | from shutil import copyfile
  4 | import numpy as np
  5 | from museotoolbox import processing
  6 | from museotoolbox.datasets import load_historical_data
  7 | from osgeo import gdal, osr
  8 | import os
  9 | 
 10 | raster,vector = load_historical_data()
 11 | rM = processing.RasterMath(raster)
 12 | mask = processing.image_mask_from_vector(vector,raster,'/tmp/mask.tif')
 13 | 
 14 | 
 15 | def create_false_image(array,path):
 16 |     # from https://pcjericks.github.io/py-gdalogr-cookbook/raster_layers.html
 17 |     driver = gdal.GetDriverByName('GTiff')
 18 |     outRaster = driver.Create(path, array.shape[1], array.shape[0], 1, gdal.GDT_Byte)
 19 |     outRaster.SetGeoTransform((0, 10, 0, 0, 0, 10))
 20 |     outband = outRaster.GetRasterBand(1)
 21 |     outband.WriteArray(array)
 22 |     outRasterSRS = osr.SpatialReference()
 23 |     outRasterSRS.ImportFromEPSG(4326)
 24 |     outRaster.SetProjection(outRasterSRS.ExportToWkt())
 25 |     outband.FlushCache()
 26 | 
 27 | # create autocorrelated tif
 28 | x = np.zeros((100,100),dtype=int)
 29 | # max autocorr
 30 | x[:50,:] = 1
 31 | x[50:,:] = 2
 32 | 
 33 | x_mask = np.random.randint(0,2,[100,100]) # random mask
 34 | create_false_image(x,'/tmp/100x100size.tif')
 35 | 
 36 | 
 37 | class TestRaster(unittest.TestCase):
 38 |     def test_convert_datatype(self):
 39 |         
 40 |         self._assert_np_gdt(np.dtype('uint8').name,gdal.GDT_Byte)
 41 |         self._assert_np_gdt(np.dtype('int16').name,gdal.GDT_Int16)
 42 |         self._assert_np_gdt(np.dtype('uint16').name,gdal.GDT_UInt16)
 43 |         self._assert_np_gdt(np.dtype('int32').name,gdal.GDT_Int32)
 44 |         self._assert_np_gdt(np.dtype('uint32').name,gdal.GDT_UInt32)
 45 |         
 46 |         self._assert_np_gdt(np.dtype('int64').name,gdal.GDT_Int32)
 47 |         self._assert_np_gdt(np.dtype('uint64').name,gdal.GDT_Int32)
 48 |         
 49 |         self._assert_np_gdt(np.dtype('uint16').name,gdal.GDT_UInt16)
 50 |         self._assert_np_gdt(np.dtype('float32').name,gdal.GDT_Float32)
 51 |         self._assert_np_gdt(np.dtype('float64').name,gdal.GDT_Float64)
 52 |         
 53 |         self._assert_np_gdt(gdal.GDT_Byte,np.uint8)
 54 |         self._assert_np_gdt(gdal.GDT_Int16,np.int16)
 55 |         self._assert_np_gdt(gdal.GDT_UInt16,np.uint16)
 56 |         self._assert_np_gdt(gdal.GDT_Float64,np.float64)
 57 |         self._assert_np_gdt(gdal.GDT_Float32,np.float32)
 58 |         
 59 |         self._assert_np_gdt(np.dtype('float128').name,gdal.GDT_Float64)
 60 |         assert(processing.convert_dt(gdal.GDT_Int16,to_otb_dt=True) == 'int16')
 61 |         assert(processing.convert_dt(np.dtype('float64').name,to_otb_dt=True) == 'double')
 62 |         
 63 |         assert(processing._convert_gdal_to_otb_dt(18) == 'cdouble') # if unknow, put cdouble (highest type)
 64 |         
 65 |     def _assert_np_gdt(self,in_conv,out_dt):
 66 |         assert(processing.convert_dt(in_conv)==out_dt)
 67 |         
 68 |         
 69 |     def test_gdt_minmax_values(self):
 70 |         assert(gdal.GDT_UInt16 == processing.get_gdt_from_minmax_values(500))
 71 |         assert(gdal.GDT_UInt32 == processing.get_gdt_from_minmax_values(max_value=155500))
 72 |         assert(gdal.GDT_Int32 == processing.get_gdt_from_minmax_values(max_value=0,min_value=-75500))
 73 |         assert(gdal.GDT_Int16 == processing.get_gdt_from_minmax_values(max_value=1,min_value=-5))
 74 |         assert(gdal.GDT_Float32 == processing.get_gdt_from_minmax_values(max_value=2,min_value=-55.55))
 75 |         assert(gdal.GDT_Byte == processing.get_gdt_from_minmax_values(max_value=222))
 76 |         assert(gdal.GDT_Float64 == processing.get_gdt_from_minmax_values(max_value =888E+40))
 77 |         assert(gdal.GDT_Float64 == processing.get_gdt_from_minmax_values(max_value=5,min_value = -888E+40))
 78 |         
 79 |     def test_rasterize(self):
 80 |         for invert in [True,False]:
 81 |             for field in ['class',False]:
 82 |                 mem = processing.rasterize(raster,vector,field,out_image='MEM',invert=invert)
 83 |                 assert(mem.RasterCount == 1)
 84 |                 assert(mem.RasterXSize == rM.n_columns)
 85 |                 assert(mem.RasterYSize == rM.n_lines)
 86 |             
 87 |     def test_noImg(self)    :    
 88 |         
 89 |         self.assertRaises(ReferenceError,processing.RasterMath,'None',verbose=0)
 90 |         
 91 |     def test_dimension(self)    :    
 92 |         assert(rM.n_bands == gdal.Open(raster).RasterCount)
 93 |         assert(rM.n_lines == gdal.Open(raster).RasterYSize)
 94 |         assert(rM.n_columns == gdal.Open(raster).RasterXSize)
 95 |         
 96 |     
 97 |     def test_readPerBand(self):
 98 |         for is_3d in [True,False]:
 99 |             rM_band = processing.RasterMath(raster,return_3d=is_3d,in_image_mask=mask)
100 |             for idx,band in enumerate(rM_band.read_band_per_band()):
101 |                 print(band.ndim)
102 |                 if is_3d is True:
103 |                     assert(band.ndim == 2)
104 |                 else:
105 |                     assert(band.ndim == 2)
106 |             del rM_band
107 |     
108 |     def test_3d(self)            :
109 |         rM_3d = processing.RasterMath(raster,return_3d=True)
110 |         self.assertRaises(ValueError,rM_3d.get_block,100)
111 |         assert(rM_3d.get_random_block().ndim == 3)
112 |         for block in rM.read_block_per_block():
113 |             pass
114 |         for band in rM.read_band_per_band():
115 |             pass
116 |         rM.custom_block_size(128,256)
117 |         assert(rM.y_block_size==256)
118 |         assert(rM.x_block_size==128)
119 |         
120 |         rM.custom_block_size(-1,-1)
121 |         assert(rM.x_block_size == rM.n_columns)
122 |         assert(rM.y_block_size == rM.n_lines)
123 |         rM.custom_block_size(1/2,1/3)
124 |         assert(rM.x_block_size == np.ceil(1/2*rM.n_columns))
125 |         assert(rM.y_block_size == np.ceil(1/3*rM.n_lines))
126 |         
127 |         rM.add_image(raster)
128 |         self.assertRaises(ValueError,rM.add_image,'/tmp/100x100size.tif')
129 |         return_x = lambda x : x[0].astype(np.int16)
130 |         rM.add_function(return_x,'/tmp/test_double.tif')
131 |         rM.run()
132 |         os.remove('/tmp/test_double.tif')
133 |         assert(np.all(rM.get_random_block(random_state=12))== np.all(rM.get_random_block(random_state=12)))
134 |         
135 |         
136 |     def test_mask(self)            :
137 |         for is_3d in [True, False]:
138 |             mask = '/tmp/mask.tif'
139 |             processing.image_mask_from_vector(vector,raster,mask)
140 |             mask_src = gdal.Open(mask)
141 |             raster_src = gdal.Open(raster)
142 |             mask_proj =osr.SpatialReference(wkt=mask_src.GetProjection())
143 |             raster_proj = osr.SpatialReference(wkt=raster_src.GetProjection())
144 |             assert(raster_proj.GetAttrValue('projcs') == mask_proj.GetAttrValue('projcs'))
145 |             assert(mask_src.RasterCount == 1)
146 |             assert(mask_src.RasterXSize == raster_src.RasterXSize)
147 |             assert(mask_src.RasterYSize == raster_src.RasterYSize)
148 |             rM_band = processing.RasterMath(raster,return_3d=is_3d)
149 |             for idx,band in enumerate(rM_band.read_band_per_band()):
150 |                 pass
151 |             rM_band.add_function(np.mean,axis=is_3d+1,out_image='/tmp/mean.tif')
152 |             rM_band.run()
153 |             
154 |             self.assertRaises(MemoryError,rM_band.run,'1K')
155 |             
156 |             assert(idx+1 == rM_band.n_bands)                        
157 |             x = rM_band.get_random_block()
158 |             assert(x.ndim == is_3d+2)
159 |             os.remove('/tmp/mean.tif')
160 |         
161 |     
162 |     def test_XYextraction(self):
163 |         X = processing.extract_ROI(raster,vector,prefer_memory=False)
164 |         
165 |         
166 |         self.assertRaises(ValueError,processing.extract_ROI,raster,vector,'Type')
167 |         self.assertRaises(Exception,processing.extract_ROI,raster,vector,'no_field')
168 |         
169 |         assert(X.ndim == 2)
170 |         
171 |         X,y = processing.extract_ROI(raster,vector,'Class')
172 |         assert(X.shape[0] == y.shape[0])
173 |         
174 |         X,y,g = processing.extract_ROI(raster,vector,'Class','uniquefid')
175 |         assert(X.shape[0] == y.shape[0] == g.shape[0])
176 |     
177 |         self.assertRaises(ValueError,processing.extract_ROI,'wrong/path','wrong/path/too')
178 |         assert(processing.extract_ROI(raster,vector).shape[1] == gdal.Open(raster).RasterCount)
179 |         self.assertRaises(ValueError,processing.extract_ROI,raster,vector,'kodidk')
180 |         
181 |         
182 |         
183 |     def test_raster_math_mean(self):
184 |         for is_3d in [True,False]:
185 |             rM = processing.RasterMath(raster,return_3d = is_3d,verbose=is_3d,in_image_mask=mask,n_jobs=is_3d+1)
186 |             if is_3d is False:
187 |                 # test without compression with reading/writing pixel per pixel, very slow...
188 |                 rM.custom_block_size(10,10) # to have full masked block
189 |                 rM.add_function(np.mean,'/tmp/mean.tif',axis=is_3d+1,dtype=np.int16)
190 |                 rM.run()
191 |             else:
192 |                 # test using default block size and high compressio of raster
193 |                 rM.add_function(np.mean,'/tmp/mean.tif',axis=is_3d+1,dtype=np.int16,compress='high')
194 |                 rM.run()
195 |             assert(gdal.Open('/tmp/mean.tif').RasterCount == 1)
196 |             assert(gdal.Open('/tmp/mean.tif').RasterXSize == rM.n_columns)
197 |             assert(gdal.Open('/tmp/mean.tif').RasterYSize == rM.n_lines)
198 |             
199 |             os.remove('/tmp/mean.tif')
200 |             
201 |     def test_unknow_fields(self):
202 |         self.assertRaises(ValueError,processing.extract_ROI,raster,vector,'wrong_field')
203 |         self.assertRaises(ValueError,processing.read_vector_values,vector)
204 |         self.assertRaises(Exception,processing.read_vector_values,'wrong_path')
205 |         self.assertRaises(ValueError,processing.read_vector_values,vector,'wrong_field')
206 |         self.assertRaises(ValueError,processing.read_vector_values,vector,band_prefix='wrong_field')
207 |         self.assertRaises(ReferenceError,processing.RasterMath,raster,in_image_mask='kiki')
208 |     
209 |     def test_addfid(self):
210 |         copyfile(vector,'/tmp/test.gpkg')
211 |         for tf in [True,False]:
212 |             processing._add_vector_unique_fid('/tmp/test.gpkg',unique_field='to_create',verbose=tf)
213 |         processing.sample_extraction(raster,'/tmp/test.gpkg','/tmp/test_roi.gpkg',band_prefix='band',verbose=1)
214 |         self.assertRaises(Warning,processing.sample_extraction,raster,'/tmp/test.gpkg','/test/vector.ppkg')
215 |         os.remove('/tmp/test.gpkg')
216 |         
217 |         y_ = processing.read_vector_values('/tmp/test_roi.gpkg',band_prefix='band',verbose=1)
218 |         assert(y_.shape[1] == gdal.Open(raster).RasterCount)
219 |         os.remove('/tmp/test_roi.gpkg')
220 |      
221 |     def test_centroid(self):
222 |      
223 |          Xc,yc = load_historical_data(centroid=True,return_X_y=True)
224 |          Xc_file, yc_file= load_historical_data(centroid=True)
225 |          assert(os.path.exists(Xc_file))
226 |          assert(os.path.exists(yc_file))
227 |          assert(Xc.shape[0] == processing.read_vector_values(vector,'Type').shape[0])
228 |          
229 |     def test_extract_position(self):
230 |         X,pixel_position=processing.extract_ROI(raster,vector,get_pixel_position=True,prefer_memory=False)
231 |         assert(pixel_position.shape[0] == X.shape[0])
232 |         
233 |     def test_get_parameter(self):
234 |         rM = processing.RasterMath(raster)
235 |         assert(isinstance(rM.get_raster_parameters(),dict))
236 |         rM.custom_raster_parameters(['TILED=NO'])
237 |         assert(rM.get_raster_parameters() == ['TILED=NO'])
238 |     
239 |     def test_get_distance_matrix(self):
240 |         distance_matrix,label = processing.get_distance_matrix(raster,vector,'Class')
241 |         assert(label.size== distance_matrix.shape[0])
242 |         
243 | if __name__ == "__main__":
244 |     unittest.main()
245 |     


--------------------------------------------------------------------------------
/test/test_stats.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import unittest
  3 | 
  4 | import numpy as np
  5 | from museotoolbox import datasets
  6 | from museotoolbox import stats
  7 | from osgeo import gdal,osr
  8 | from sklearn.metrics import accuracy_score,cohen_kappa_score
  9 | from shutil import copyfile
 10 | ###    
 11 | confusion_matrix = np.array([[5,1],[2,2]])
 12 | # real 
 13 | yt_init= [1,1,1,1,1,1,2,2,2,2]
 14 | yp_init = [1,1,1,1,1,2,1,1,2,2]
 15 | 
 16 | def create_false_image(array,path):
 17 |     # from https://pcjericks.github.io/py-gdalogr-cookbook/raster_layers.html
 18 |     driver = gdal.GetDriverByName('GTiff')
 19 |     outRaster = driver.Create(path, array.shape[1], array.shape[0], 1, gdal.GDT_Byte)
 20 |     outRaster.SetGeoTransform((0, 10, 0, 0, 0, 10))
 21 |     outband = outRaster.GetRasterBand(1)
 22 |     outband.WriteArray(array)
 23 |     outRasterSRS = osr.SpatialReference()
 24 |     outRasterSRS.ImportFromEPSG(4326)
 25 |     outRaster.SetProjection(outRasterSRS.ExportToWkt())
 26 |     outband.FlushCache()
 27 | 
 28 | # create autocorrelated tif
 29 | x = np.zeros((100,100),dtype=int)
 30 | # max autocorr
 31 | x[:50,:] = 1
 32 | x[50:,:] = 2
 33 | 
 34 | x_mask = np.random.randint(0,2,[100,100]) # random mask
 35 | create_false_image(x,'/tmp/autocorrelated_moran.tif')
 36 | create_false_image(x_mask,'/tmp/mask_moran.tif')
 37 | 
 38 | ###
 39 | class TestStats(unittest.TestCase):
 40 |     
 41 |     def test_Moran_param(self):
 42 |         m = stats.Moran('/tmp/autocorrelated_moran.tif',lag=[1,2])
 43 |         assert(m.get_n_neighbors(x[:3,:3],x[:3,:3],weights=x[:3,:3]) == 8)
 44 |         m.lags == [1,2]
 45 |         assert(len(m.scores['I']) == len(m.lags))
 46 |         
 47 |     def test_Moran(self):
 48 |         self.assertRaises(ReferenceError,stats.Moran,in_image='N/A')
 49 | 
 50 |         # full autocorrelation
 51 |         moran = stats.Moran('/tmp/autocorrelated_moran.tif',lag=1)
 52 |         assert(np.round(moran.I,2) >= 0.95)
 53 |         
 54 |         #perfect random
 55 |         moran = stats.Moran('/tmp/mask_moran.tif',lag=1)
 56 |         assert(0 >= np.abs(np.round(moran.I,1)))
 57 |         
 58 |         # with mask
 59 |         moran_intermediate = stats.Moran('/tmp/autocorrelated_moran.tif',in_image_mask='/tmp/mask_moran.tif',lag=[1,2])
 60 |         assert(moran_intermediate.scores['lag'] == [1,2])
 61 |         assert(moran_intermediate.scores['I'][0] != moran.I)
 62 |    
 63 |     def test_Moran_fullMask(self):        # full autocorrelation
 64 |         moran = stats.Moran('/tmp/mask_moran.tif',in_image_mask='/tmp/mask_moran.tif',lag=1)
 65 |         assert(np.isnan(moran.I))
 66 | 
 67 |     def test_comm_om(self):
 68 |         comm_om = stats.commission_omission(confusion_matrix)
 69 | 
 70 |         assert(comm_om[0] == [confusion_matrix[0,1]/np.sum(confusion_matrix[0,:])*100,confusion_matrix[1,0]/np.sum(confusion_matrix[1,:])*100])
 71 |         assert(comm_om[1] == [confusion_matrix[1,0]/np.sum(confusion_matrix[:,0])*100,confusion_matrix[0,1]/np.sum(confusion_matrix[:,1])*100])
 72 |             
 73 |         sts = stats.ComputeConfusionMatrix(yt_init,yp_init,OA=True,  kappa=  True,F1=True)
 74 |         assert(np.all(confusion_matrix == sts.confusion_matrix))
 75 |         assert(len(sts.F1) == 2)
 76 | 
 77 |     def test_stats_from_cm(self):
 78 |         
 79 |         yt,yp = stats.retrieve_y_from_confusion_matrix(confusion_matrix)
 80 |         assert(accuracy_score(yp,yt)  == (np.sum(np.diag(confusion_matrix))/np.sum(confusion_matrix)))
 81 |         assert(np.all(yp==yp_init))
 82 |         assert(np.all(yt==yt_init))
 83 |         sts_from_matrix = stats.ComputeConfusionMatrix(yp,yt,OA=True,kappa=True)
 84 |         assert(sts_from_matrix.Kappa == cohen_kappa_score(yp,yt))
 85 |         
 86 |     def test_zonal_stats(self):
 87 |         raster,vector = datasets.load_historical_data()
 88 |         copyfile(vector,'/tmp/train.gpkg')
 89 |         
 90 |         median,amax,std = stats.zonal_stats(raster,'/tmp/train.gpkg',False)
 91 |         assert(median.shape == amax.shape == std.shape)
 92 |         assert(np.sum(std)<np.sum(median)< np.sum(amax))
 93 |         
 94 | if __name__ == "__main__":
 95 |     unittest.main()
 96 |     
 97 | #    os.remove('/tmp/autocorrelated_moran.tif')
 98 | #    os.remove('/tmp/mask_moran.tif')
 99 | 
100 |     


--------------------------------------------------------------------------------