├── .git-blame-ignore-revs ├── .gitattributes ├── .github ├── dependabot.yml ├── release.yml └── workflows │ ├── build_docs.yml │ ├── release.yml │ └── testing.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CHANGELOG.md ├── LICENSE.txt ├── README.md ├── ci ├── 311-latest.yaml ├── 311-numba-latest.yaml ├── 311-oldest.yaml ├── 312-latest.yaml ├── 312-numba-latest.yaml ├── 313-dev.yaml ├── 313-latest.yaml └── 313-numba-latest.yaml ├── codecov.yml ├── docs ├── Makefile ├── _static │ ├── auto │ │ └── references.el │ ├── images │ │ ├── equalinterval.png │ │ ├── fisherjenks.png │ │ ├── hr60fj10.png │ │ ├── hr60mb10.png │ │ ├── hr60q10.png │ │ ├── pysal_favicon.ico │ │ └── quantiles.png │ ├── pysal-styles.css │ └── references.bib ├── api.rst ├── conf.py ├── index.rst ├── installation.rst ├── references.rst └── tutorial.rst ├── environment.yml ├── mapclassify ├── __init__.py ├── _classify_API.py ├── classifiers.py ├── datasets │ ├── __init__.py │ └── calemp │ │ ├── README.md │ │ ├── __init__.py │ │ ├── calempdensity.csv │ │ └── data.py ├── greedy.py ├── legendgram.py ├── pooling.py ├── tests │ ├── __init__.py │ ├── baseline │ │ ├── test_histogram_plot.png │ │ ├── test_histogram_plot_despine.png │ │ └── test_histogram_plot_linewidth.png │ ├── baseline_images │ │ └── test_legendgram │ │ │ ├── legendgram_cmap.png │ │ │ ├── legendgram_default.png │ │ │ ├── legendgram_kwargs.png │ │ │ ├── legendgram_map.png │ │ │ ├── legendgram_position.png │ │ │ ├── legendgram_quantiles.png │ │ │ └── legendgram_vlines.png │ ├── test_classify.py │ ├── test_greedy.py │ ├── test_legendgram.py │ ├── test_mapclassify.py │ └── test_rgba.py └── util.py ├── notebooks ├── 01_maximum_breaks.ipynb ├── 02_legends.ipynb ├── 03_choropleth.ipynb ├── 04_pooled.ipynb ├── 05_Greedy_coloring.ipynb ├── 06_api.ipynb ├── 07_std_anchor.ipynb ├── 08_manual_coloring.ipynb └── 09_legendgram.ipynb └── pyproject.toml /.git-blame-ignore-revs: -------------------------------------------------------------------------------- 1 | # black-ification of code 2 | 71bfea486c64d3e87d0677f824ee6b17d576d028 3 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | mapclassify/_version.py export-subst 2 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "github-actions" 9 | directory: "/" 10 | schedule: 11 | interval: "daily" 12 | reviewers: 13 | - "jGaboardi" 14 | 15 | - package-ecosystem: "pip" 16 | directory: "/" 17 | schedule: 18 | interval: "daily" 19 | reviewers: 20 | - "jGaboardi" 21 | -------------------------------------------------------------------------------- /.github/release.yml: -------------------------------------------------------------------------------- 1 | changelog: 2 | exclude: 3 | labels: 4 | - ignore-for-release 5 | authors: 6 | - dependabot 7 | - pre-commit-ci 8 | categories: 9 | - title: Bug Fixes 10 | labels: 11 | - bug 12 | - title: Enhancements 13 | labels: 14 | - enhancement 15 | - title: Other Changes 16 | labels: 17 | - "*" 18 | -------------------------------------------------------------------------------- /.github/workflows/build_docs.yml: -------------------------------------------------------------------------------- 1 | 2 | name: Build Docs 3 | 4 | on: 5 | push: 6 | # Sequence of patterns matched against refs/tags 7 | tags: 8 | - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10 9 | workflow_dispatch: 10 | inputs: 11 | version: 12 | description: Manual Doc Build Reason 13 | default: test 14 | required: false 15 | 16 | jobs: 17 | docs: 18 | name: Build & Push Docs 19 | runs-on: ${{ matrix.os }} 20 | timeout-minutes: 90 21 | strategy: 22 | matrix: 23 | os: ['ubuntu-latest'] 24 | environment-file: [ci/313-latest.yaml] 25 | experimental: [false] 26 | defaults: 27 | run: 28 | shell: bash -l {0} 29 | 30 | steps: 31 | - name: Checkout repo 32 | uses: actions/checkout@v4 33 | with: 34 | fetch-depth: 0 # Fetch all history for all branches and tags. 35 | 36 | - name: Setup micromamba 37 | uses: mamba-org/setup-micromamba@v2 38 | with: 39 | environment-file: ${{ matrix.environment-file }} 40 | micromamba-version: 'latest' 41 | 42 | - name: Install package 43 | run: pip install . 44 | 45 | - name: Make Docs 46 | run: cd docs; make html 47 | 48 | - name: Commit Docs 49 | run: | 50 | git clone https://github.com/ammaraskar/sphinx-action-test.git --branch gh-pages --single-branch gh-pages 51 | cp -r docs/_build/html/* gh-pages/ 52 | cd gh-pages 53 | git config --local user.email "action@github.com" 54 | git config --local user.name "GitHub Action" 55 | git add . 56 | git commit -m "Update documentation" -a || true 57 | # The above command will fail if no changes were present, 58 | # so we ignore the return code. 59 | 60 | - name: push to gh-pages 61 | uses: ad-m/github-push-action@master 62 | with: 63 | branch: gh-pages 64 | directory: gh-pages 65 | github_token: ${{ secrets.GITHUB_TOKEN }} 66 | force: true 67 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release Package 2 | 3 | on: 4 | push: 5 | # Sequence of patterns matched against refs/tags 6 | tags: 7 | - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10 8 | workflow_dispatch: 9 | inputs: 10 | version: 11 | description: Manual Release 12 | default: test 13 | required: false 14 | 15 | jobs: 16 | build: 17 | runs-on: ubuntu-latest 18 | steps: 19 | - name: Checkout repo 20 | uses: actions/checkout@v4 21 | 22 | - name: Set up Python 23 | uses: actions/setup-python@v5 24 | with: 25 | python-version: '3.x' 26 | 27 | - name: Install Dependencies 28 | run: | 29 | python -m pip install --upgrade pip build twine 30 | python -m build 31 | twine check --strict dist/* 32 | 33 | - name: Create Release Notes 34 | uses: actions/github-script@v7 35 | with: 36 | github-token: ${{secrets.GITHUB_TOKEN}} 37 | script: | 38 | await github.request(`POST /repos/${{ github.repository }}/releases`, { 39 | tag_name: "${{ github.ref }}", 40 | generate_release_notes: true 41 | }); 42 | 43 | - name: Publish distribution 📦 to PyPI 44 | uses: pypa/gh-action-pypi-publish@release/v1 45 | with: 46 | user: __token__ 47 | password: ${{ secrets.PYPI_PASSWORD }} 48 | -------------------------------------------------------------------------------- /.github/workflows/testing.yml: -------------------------------------------------------------------------------- 1 | name: Continuous Integration 2 | 3 | on: 4 | push: 5 | branches: 6 | - "*" 7 | pull_request: 8 | branches: 9 | - "*" 10 | schedule: 11 | - cron: "59 21 * * *" 12 | 13 | jobs: 14 | testing: 15 | name: (${{ matrix.os }}, ${{ matrix.environment-file }}) 16 | runs-on: ${{ matrix.os }} 17 | defaults: 18 | run: 19 | shell: bash -l {0} 20 | strategy: 21 | matrix: 22 | os: ["ubuntu-latest"] 23 | environment-file: 24 | [ 25 | ci/311-oldest.yaml, 26 | ci/311-latest.yaml, 27 | ci/311-numba-latest.yaml, 28 | ci/312-latest.yaml, 29 | ci/312-numba-latest.yaml, 30 | ci/313-latest.yaml, 31 | ci/313-numba-latest.yaml, 32 | ci/313-dev.yaml, 33 | ] 34 | include: 35 | - environment-file: ci/313-latest.yaml 36 | os: macos-13 # Intel 37 | - environment-file: ci/313-numba-latest.yaml 38 | os: macos-13 # Intel 39 | - environment-file: ci/313-latest.yaml 40 | os: macos-14 # Apple Silicon 41 | - environment-file: ci/313-numba-latest.yaml 42 | os: macos-14 # Apple Silicon 43 | - environment-file: ci/313-latest.yaml 44 | os: windows-latest 45 | - environment-file: ci/313-numba-latest.yaml 46 | os: windows-latest 47 | fail-fast: false 48 | 49 | steps: 50 | - name: checkout repo 51 | uses: actions/checkout@v4 52 | with: 53 | fetch-depth: 0 # Fetch all history for all branches and tags. 54 | 55 | - name: setup micromamba 56 | uses: mamba-org/setup-micromamba@v2 57 | with: 58 | environment-file: ${{ matrix.environment-file }} 59 | micromamba-version: 'latest' 60 | 61 | - name: environment info 62 | run: | 63 | micromamba info 64 | micromamba list 65 | 66 | - name: spatial versions 67 | run: | 68 | python -c "import geopandas; geopandas.show_versions();" 69 | 70 | - name: Download test files 71 | run: | 72 | python -c ' 73 | import libpysal 74 | 75 | libpysal.examples.fetch_all() 76 | ' 77 | 78 | - name: Run pytest 79 | run: | 80 | pytest \ 81 | mapclassify \ 82 | -r a \ 83 | -v \ 84 | -n auto \ 85 | --color yes \ 86 | --cov-append \ 87 | --cov mapclassify \ 88 | --cov-report xml \ 89 | --cov-report term-missing 90 | 91 | - name: run docstring tests 92 | if: contains(matrix.environment-file, '312-numba-latest') && contains(matrix.os, 'ubuntu') 93 | run: | 94 | pytest \ 95 | -v \ 96 | -r a \ 97 | -n auto \ 98 | --color yes \ 99 | --cov-append \ 100 | --cov mapclassify \ 101 | --cov-report xml \ 102 | --doctest-only \ 103 | --mpl mapclassify 104 | 105 | - name: codecov (${{ matrix.os }}, ${{ matrix.environment-file }}) 106 | uses: codecov/codecov-action@v5 107 | with: 108 | token: ${{ secrets.CODECOV_TOKEN }} 109 | file: ./coverage.xml 110 | name: mapclassify-codecov 111 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .CHANGELOG.md.swp 3 | .ropeproject/ 4 | dist/ 5 | examples/notebooks/.ipynb_checkpoints/ 6 | examples/python/ 7 | mapclassify.egg-info/ 8 | mapclassify/.ropeproject/ 9 | mapclassify/datasets/calemp/.ropeproject/ 10 | mapclassify/tests/.ropeproject/ 11 | .DS_Store 12 | .vscode/settings.json 13 | __pycache__ 14 | /notebooks/.ipynb_checkpoints/ 15 | result_images/ -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | files: 'mapclassify\/' 2 | repos: 3 | - repo: https://github.com/astral-sh/ruff-pre-commit 4 | rev: "v0.11.4" 5 | hooks: 6 | - id: ruff 7 | - id: ruff-format 8 | 9 | ci: 10 | autofix_prs: false 11 | autoupdate_schedule: quarterly 12 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Version 2.4.1 (2020-12-20) 2 | 3 | This is a bug-fix release. 4 | 5 | We closed a total of 9 issues (enhancements and bug fixes) through 3 pull requests, since our last release on 2020-12-13. 6 | 7 | ## Issues Closed 8 | - BUG: support series in sampled classifiers (#99) 9 | - BUG: FisherJenksSampled returns ValueError if Series is passed as y (#98) 10 | - REGR: fix invariant array regression (#101) 11 | - REGR: UserDefined classifier returns ValueError("Minimum and maximum of input data are equal, cannot create bins.") (#100) 12 | - [DOC] add example nb for new classify API (#91) 13 | - 2.4.0 Release (#97) 14 | 15 | ## Pull Requests 16 | - BUG: support series in sampled classifiers (#99) 17 | - REGR: fix invariant array regression (#101) 18 | - 2.4.0 Release (#97) 19 | 20 | The following individuals contributed to this release: 21 | 22 | - Serge Rey 23 | - Martin Fleischmann 24 | - Stefanie Lumnitz 25 | 26 | # Version 2.4.0 (2020-12-13) 27 | 28 | We closed a total of 39 issues (enhancements and bug fixes) through 15 pull requests, since our last release on 2020-06-13. 29 | Issues Closed 30 | 31 | - Remove timeout on tests. (#96) 32 | - BUG: HeadTailBreaks RecursionError due to floating point issue (#92) 33 | - Handle recursion error for head tails. (#95) 34 | - Add streamlined API (#72) 35 | - [API] add high-level API mapclassify.classify() (#90) 36 | - BUG: Fix mapclassify #88 (#89) 37 | - exclude Python 3.6 for Windows (#94) 38 | - CI: update conda action (#93) 39 | - EqualInterval unclear error when max_y - min_y = 0 (#88) 40 | - BUG: fix unordered series in greedy (#87) 41 | - BUG: greedy(strategy='balanced') does not return correct labels (#86) 42 | - Extra files in PyPI sdist (#56) 43 | - MAINT: fix repos name (#85) 44 | - DOC: content type for long description (#84) 45 | - MAINT: update gitcount notebook (#83) 46 | - Update documentations to include tutorial (#63) 47 | - build binder for notebooks (#71) 48 | - current version of mapclassify in docs? (#70) 49 | - 404 for notebook/tutorials links in docs (#79) 50 | - DOC: figs (#82) 51 | - DOCS: new images for tutorial (#81) 52 | - DOC: missing figs (#80) 53 | - DOCS: update documentation pages (#78) 54 | - Make networkx optional, remove xfail from greedy (#77) 55 | 56 | ## Pull Requests 57 | 58 | - Remove timeout on tests. (#96) 59 | - Handle recursion error for head tails. (#95) 60 | - [API] add high-level API mapclassify.classify() (#90) 61 | - BUG: Fix mapclassify #88 (#89) 62 | - exclude Python 3.6 for Windows (#94) 63 | - CI: update conda action (#93) 64 | - BUG: fix unordered series in greedy (#87) 65 | - MAINT: fix repos name (#85) 66 | - DOC: content type for long description (#84) 67 | - MAINT: update gitcount notebook (#83) 68 | - DOC: figs (#82) 69 | - DOCS: new images for tutorial (#81) 70 | - DOC: missing figs (#80) 71 | - DOCS: update documentation pages (#78) 72 | - Make networkx optional, remove xfail from greedy (#77) 73 | 74 | The following individuals contributed to this release: 75 | 76 | Serge Rey 77 | Stefanie Lumnitz 78 | James Gaboardi 79 | Martin Fleischmann 80 | 81 | 82 | # Version 2.3.0 (2020-06-13) 83 | ## Key Enhancements 84 | 85 | - Topological coloring to ensure no two adjacent polygons share the same color. 86 | - Pooled classification allows for the use of the same class intervals across maps. 87 | 88 | ## Details 89 | 90 | We closed a total of 30 issues (enhancements and bug fixes) through 10 pull requests, since our last release on 2020-01-04. 91 | ## Issues Closed 92 | 93 | - Make networkx optional, remove xfail from greedy (#77) 94 | - BINDER: point to upstream (#76) 95 | - add binder badge (#75) 96 | - Binder (#74) 97 | - sys import missing from setup.py (#73) 98 | - [WIP] DOC: Updating tutorial (#66) 99 | - chorobrewer branch has begun (#27) 100 | - Is mapclassify code black? (#68) 101 | - Code format and README (#69) 102 | - Move testing over to github actions (#64) 103 | - Add pinning in pooled example documentation (#67) 104 | - Migrate to GHA (#65) 105 | - Add a Pooled classifier (#51) 106 | - Backwards compatability (#48) 107 | - Difference between Natural Breaks and Fisher Jenks schemes (#62) 108 | - ENH: add greedy (topological) coloring (#61) 109 | - Error while running mapclassify (#60) 110 | - Pooled (#59) 111 | - Invalid escape sequences in strings (#57) 112 | - 3.8, appveyor, deprecation fixes (#58) 113 | 114 | ## Pull Requests 115 | 116 | - Make networkx optional, remove xfail from greedy (#77) 117 | - BINDER: point to upstream (#76) 118 | - add binder badge (#75) 119 | - Binder (#74) 120 | - [WIP] DOC: Updating tutorial (#66) 121 | - Code format and README (#69) 122 | - Migrate to GHA (#65) 123 | - ENH: add greedy (topological) coloring (#61) 124 | - Pooled (#59) 125 | - 3.8, appveyor, deprecation fixes (#58) 126 | 127 | ## Acknowledgements 128 | 129 | The following individuals contributed to this release: 130 | 131 | - Serge Rey 132 | - James Gaboardi 133 | - Eli Knaap 134 | - Martin Fleischmann 135 | 136 | 137 | 138 | # Version 2.2.0 (2019-12-21) 139 | 140 | This releases brings new functionality for [formatting of legend classes](https://github.com/sjsrey/geopandas/blob/legendkwds/examples/choro_legends.ipynb). 141 | 142 | We closed a total of 21 issues (enhancements and bug fixes) through 9 pull requests, since our last release on 2019-06-28. 143 | 144 | ## Issues Closed 145 | - 2.2 (#54) 146 | - 2.2 (#53) 147 | - conda-forge UnsatisfiableError on windows and python 3.7 (#52) 148 | - [MAINT] updating supported Python versions in setup.py (#49) 149 | - BUG: RecursiveError in HeadTailBreaks (#46) 150 | - BUG: HeadTailBreaks raise RecursionError (#45) 151 | - BUG: UserDefined accepts only list if max not in bins (#47) 152 | - BUG: avoid deprecation warning in HeadTailBreaks (#44) 153 | - remove docs badge (#42) 154 | - Remove doc badge (#43) 155 | - Docs: moving to project pages on github and off rtd (#41) 156 | - BUG: Fix for downstream breakage in geopandas (#40) 157 | 158 | ## Pull Requests 159 | - 2.2 (#54) 160 | - 2.2 (#53) 161 | - [MAINT] updating supported Python versions in setup.py (#49) 162 | - BUG: RecursiveError in HeadTailBreaks (#46) 163 | - BUG: UserDefined accepts only list if max not in bins (#47) 164 | - BUG: avoid deprecation warning in HeadTailBreaks (#44) 165 | - Remove doc badge (#43) 166 | - Docs: moving to project pages on github and off rtd (#41) 167 | - BUG: Fix for downstream breakage in geopandas (#40) 168 | 169 | The following individuals contributed to this release: 170 | 171 | - Serge Rey 172 | - James Gaboardi 173 | - Wei Kang 174 | - Martin Fleischmann 175 | 176 | 177 | # Version 2.1.0 (2019-06-26) 178 | 179 | We closed a total of 36 issues (enhancements and bug fixes) through 16 pull requests, since our last release on 2018-10-28. 180 | 181 | ## Issues Closed 182 | - ENH: dropping 3.5 support and adding 3.7 (#38) 183 | - ENH: plot method added to Mapclassify (#36) 184 | - ENH: keeping init keyword argument to avoid API breakage. (#35) 185 | - mapclassify.Natural_Break() does not return the specified k classes (#16) 186 | - Fix for #16 (#32) 187 | - Mixed usage of brewer2mpl and palettable.colorbrewer in color.py (#33) 188 | - Chorobrewer (#34) 189 | - conda-forge recipe needs some love (#14) 190 | - generating images for color selector (#31) 191 | - doc: bump version and dev setup docs (#30) 192 | - environment.yml (#29) 193 | - add color import and chorobrewer notebook (#28) 194 | - Chorobrewer (#26) 195 | - chorobrewer init (#25) 196 | - add badges for pypi, zenodo and docs (#24) 197 | - add geopandas and libpysal to test requirement (#23) 198 | - adjust changelog and delete tools/github_stats.py (#22) 199 | - add requirements_docs.txt to MANIFEST.in (#21) 200 | - gadf and K_classifiers not in __ini__.py (#18) 201 | - rel: 2.0.1 (#20) 202 | 203 | ## Pull Requests 204 | - ENH: dropping 3.5 support and adding 3.7 (#38) 205 | - ENH: plot method added to Mapclassify (#36) 206 | - ENH: keeping init keyword argument to avoid API breakage. (#35) 207 | - Fix for #16 (#32) 208 | - Chorobrewer (#34) 209 | - generating images for color selector (#31) 210 | - doc: bump version and dev setup docs (#30) 211 | - environment.yml (#29) 212 | - add color import and chorobrewer notebook (#28) 213 | - Chorobrewer (#26) 214 | - chorobrewer init (#25) 215 | - add badges for pypi, zenodo and docs (#24) 216 | - add geopandas and libpysal to test requirement (#23) 217 | - adjust changelog and delete tools/github_stats.py (#22) 218 | - add requirements_docs.txt to MANIFEST.in (#21) 219 | - rel: 2.0.1 (#20) 220 | 221 | The following individuals contributed to this release: 222 | 223 | - Serge Rey 224 | - Wei Kang 225 | 226 | # Version 2.0.1 (2018-10-28) 227 | 228 | We closed a total of 12 issues (enhancements and bug fixes) through 5 pull requests, since our last release on 2018-08-10. 229 | 230 | ## Issues Closed 231 | - gadf and K_classifiers not in __ini__.py (#18) 232 | - rel: 2.0.1 (#20) 233 | - fix doctests (interactive examples in inline docstrings) (#19) 234 | - complete readthedocs configuration & add Slocum 2009 reference (#17) 235 | - prepping for a doc based release (#15) 236 | - new release on pypi (#10) 237 | - prepare for release 2.0.0 (#13) 238 | 239 | ## Pull Requests 240 | - rel: 2.0.1 (#20) 241 | - fix doctests (interactive examples in inline docstrings) (#19) 242 | - complete readthedocs configuration & add Slocum 2009 reference (#17) 243 | - prepping for a doc based release (#15) 244 | - prepare for release 2.0.0 (#13) 245 | 246 | The following individuals contributed to this release: 247 | 248 | - Serge Rey 249 | - Wei Kang 250 | 251 | # Version 2.0.0 (2018-08-10) 252 | 253 | Starting from this release, mapclassify supports python 3+ only (currently 3.5 254 | and 3.6). 255 | 256 | This release also features a first stable version of mapclassify in 257 | the process of pysal refactoring. There is a big change in the api in that we no 258 | longer provide an api module (`from mapclassify.api import Quantiles`). Instead, 259 | users will directly `from mapclassify import Quantiles`. 260 | 261 | GitHub stats for 2017/08/18 - 2018/08/10 262 | 263 | These lists are automatically generated, and may be incomplete or contain duplicates. 264 | 265 | We closed a total of 8 issues, 4 pull requests and 4 regular issues; 266 | this is the full list (generated with the script 267 | :file:`tools/github_stats.py`): 268 | 269 | Pull Requests (4): 270 | 271 | * :ghpull:`12`: b'Clean up for next pypi release' 272 | * :ghpull:`11`: b'move notebooks outside of the package' 273 | * :ghpull:`9`: b'ENH: move classifiers up into init' 274 | * :ghpull:`8`: b'Moving to python 3+' 275 | 276 | Issues (4): 277 | 278 | * :ghissue:`12`: b'Clean up for next pypi release' 279 | * :ghissue:`11`: b'move notebooks outside of the package' 280 | * :ghissue:`9`: b'ENH: move classifiers up into init' 281 | * :ghissue:`8`: b'Moving to python 3+' 282 | 283 | 284 | # Version 1.0.1 (2017-08-17) 285 | 286 | - Warnings added when duplicate values make quantiles ill-defined 287 | - Faster digitize in place of list comprehension 288 | - Bug fix for consistent treatment of intervals (closed on the right, open on the left) 289 | 290 | v<1.0.0dev> 2017-04-21 291 | 292 | - alpha release 293 | 294 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2018 PySAL-mapclassify Developers 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 4 | 5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 6 | 7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 8 | 9 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mapclassify: Classification Schemes for Choropleth Maps 2 | 3 | [![Continuous Integration](https://github.com/pysal/mapclassify/actions/workflows/testing.yml/badge.svg)](https://github.com/pysal/mapclassify/actions/workflows/testing.yml) 4 | [![codecov](https://codecov.io/gh/pysal/mapclassify/branch/main/graph/badge.svg)](https://codecov.io/gh/pysal/mapclassify) 5 | [![PyPI version](https://badge.fury.io/py/mapclassify.svg)](https://badge.fury.io/py/mapclassify) 6 | [![DOI](https://zenodo.org/badge/88918063.svg)](https://zenodo.org/badge/latestdoi/88918063) 7 | [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) 8 | [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) 9 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/pysal/mapclassify/main) 10 | 11 | `mapclassify` implements a family of classification schemes for choropleth maps. 12 | Its focus is on the determination of the number of classes, and the assignment 13 | of observations to those classes. It is intended for use with upstream mapping 14 | and geovisualization packages (see 15 | [geopandas](https://geopandas.org/mapping.html)) 16 | that handle the rendering of the maps. 17 | 18 | For further theoretical background see [Rey, S.J., D. Arribas-Bel, and L.J. Wolf (2020) "Geographic Data Science with PySAL and the PyData Stack”](https://geographicdata.science/book/notebooks/05_choropleth.html). 19 | 20 | ## Using `mapclassify` 21 | Load built-in example data reporting employment density in 58 California counties: 22 | 23 | ```python 24 | >>> import mapclassify 25 | >>> y = mapclassify.load_example() 26 | >>> y.mean() 27 | 125.92810344827588 28 | >>> y.min(), y.max() 29 | (0.13, 4111.4499999999998) 30 | 31 | ``` 32 | 33 | ## Map Classifiers Supported 34 | 35 | ### BoxPlot 36 | 37 | ```python 38 | >>> mapclassify.BoxPlot(y) 39 | BoxPlot 40 | 41 | Interval Count 42 | -------------------------- 43 | ( -inf, -52.88] | 0 44 | ( -52.88, 2.57] | 15 45 | ( 2.57, 9.36] | 14 46 | ( 9.36, 39.53] | 14 47 | ( 39.53, 94.97] | 6 48 | ( 94.97, 4111.45] | 9 49 | ``` 50 | 51 | 52 | 53 | ### EqualInterval 54 | 55 | ```python 56 | >>> mapclassify.EqualInterval(y) 57 | EqualInterval 58 | 59 | Interval Count 60 | -------------------------- 61 | [ 0.13, 822.39] | 57 62 | ( 822.39, 1644.66] | 0 63 | (1644.66, 2466.92] | 0 64 | (2466.92, 3289.19] | 0 65 | (3289.19, 4111.45] | 1 66 | ``` 67 | 68 | ### FisherJenks 69 | 70 | ```python 71 | >>> import numpy as np 72 | >>> np.random.seed(123456) 73 | >>> mapclassify.FisherJenks(y, k=5) 74 | FisherJenks 75 | 76 | Interval Count 77 | -------------------------- 78 | [ 0.13, 75.29] | 49 79 | ( 75.29, 192.05] | 3 80 | ( 192.05, 370.50] | 4 81 | ( 370.50, 722.85] | 1 82 | ( 722.85, 4111.45] | 1 83 | ``` 84 | 85 | ### FisherJenksSampled 86 | 87 | ```python 88 | >>> np.random.seed(123456) 89 | >>> x = np.random.exponential(size=(10000,)) 90 | >>> mapclassify.FisherJenks(x, k=5) 91 | FisherJenks 92 | 93 | Interval Count 94 | ---------------------- 95 | [ 0.00, 0.64] | 4694 96 | ( 0.64, 1.45] | 2922 97 | ( 1.45, 2.53] | 1584 98 | ( 2.53, 4.14] | 636 99 | ( 4.14, 10.61] | 164 100 | 101 | >>> mapclassify.FisherJenksSampled(x, k=5) 102 | FisherJenksSampled 103 | 104 | Interval Count 105 | ---------------------- 106 | [ 0.00, 0.70] | 5020 107 | ( 0.70, 1.63] | 2952 108 | ( 1.63, 2.88] | 1454 109 | ( 2.88, 5.32] | 522 110 | ( 5.32, 10.61] | 52 111 | ``` 112 | 113 | ### HeadTailBreaks 114 | 115 | ```python 116 | >>> mapclassify.HeadTailBreaks(y) 117 | HeadTailBreaks 118 | 119 | Interval Count 120 | -------------------------- 121 | [ 0.13, 125.93] | 50 122 | ( 125.93, 811.26] | 7 123 | ( 811.26, 4111.45] | 1 124 | ``` 125 | 126 | ### JenksCaspall 127 | 128 | ```python 129 | >>> mapclassify.JenksCaspall(y, k=5) 130 | JenksCaspall 131 | 132 | Interval Count 133 | -------------------------- 134 | [ 0.13, 1.81] | 14 135 | ( 1.81, 7.60] | 13 136 | ( 7.60, 29.82] | 14 137 | ( 29.82, 181.27] | 10 138 | ( 181.27, 4111.45] | 7 139 | ``` 140 | 141 | ### JenksCaspallForced 142 | 143 | ```python 144 | >>> mapclassify.JenksCaspallForced(y, k=5) 145 | JenksCaspallForced 146 | 147 | Interval Count 148 | -------------------------- 149 | [ 0.13, 1.34] | 12 150 | ( 1.34, 5.90] | 12 151 | ( 5.90, 16.70] | 13 152 | ( 16.70, 50.65] | 9 153 | ( 50.65, 4111.45] | 12 154 | ``` 155 | 156 | ### JenksCaspallSampled 157 | 158 | ```python 159 | >>> mapclassify.JenksCaspallSampled(y, k=5) 160 | JenksCaspallSampled 161 | 162 | Interval Count 163 | -------------------------- 164 | [ 0.13, 12.02] | 33 165 | ( 12.02, 29.82] | 8 166 | ( 29.82, 75.29] | 8 167 | ( 75.29, 192.05] | 3 168 | ( 192.05, 4111.45] | 6 169 | ``` 170 | 171 | ### MaxP 172 | 173 | ```python 174 | >>> mapclassify.MaxP(y) 175 | MaxP 176 | 177 | Interval Count 178 | -------------------------- 179 | [ 0.13, 8.70] | 29 180 | ( 8.70, 16.70] | 8 181 | ( 16.70, 20.47] | 1 182 | ( 20.47, 66.26] | 10 183 | ( 66.26, 4111.45] | 10 184 | ``` 185 | 186 | ### [MaximumBreaks](notebooks/maximum_breaks.ipynb) 187 | 188 | ```python 189 | >>> mapclassify.MaximumBreaks(y, k=5) 190 | MaximumBreaks 191 | 192 | Interval Count 193 | -------------------------- 194 | [ 0.13, 146.00] | 50 195 | ( 146.00, 228.49] | 2 196 | ( 228.49, 546.67] | 4 197 | ( 546.67, 2417.15] | 1 198 | (2417.15, 4111.45] | 1 199 | ``` 200 | 201 | ### NaturalBreaks 202 | 203 | ```python 204 | >>> mapclassify.NaturalBreaks(y, k=5) 205 | NaturalBreaks 206 | 207 | Interval Count 208 | -------------------------- 209 | [ 0.13, 75.29] | 49 210 | ( 75.29, 192.05] | 3 211 | ( 192.05, 370.50] | 4 212 | ( 370.50, 722.85] | 1 213 | ( 722.85, 4111.45] | 1 214 | ``` 215 | 216 | ### Quantiles 217 | 218 | ```python 219 | >>> mapclassify.Quantiles(y, k=5) 220 | Quantiles 221 | 222 | Interval Count 223 | -------------------------- 224 | [ 0.13, 1.46] | 12 225 | ( 1.46, 5.80] | 11 226 | ( 5.80, 13.28] | 12 227 | ( 13.28, 54.62] | 11 228 | ( 54.62, 4111.45] | 12 229 | ``` 230 | 231 | ### Percentiles 232 | 233 | ```python 234 | >>> mapclassify.Percentiles(y, pct=[33, 66, 100]) 235 | Percentiles 236 | 237 | Interval Count 238 | -------------------------- 239 | [ 0.13, 3.36] | 19 240 | ( 3.36, 22.86] | 19 241 | ( 22.86, 4111.45] | 20 242 | ``` 243 | 244 | ### PrettyBreaks 245 | ```python 246 | >>> np.random.seed(123456) 247 | >>> x = np.random.randint(0, 10000, (100,1)) 248 | >>> mapclassify.PrettyBreaks(x) 249 | Pretty 250 | 251 | Interval Count 252 | ---------------------------- 253 | [ 300.00, 2000.00] | 23 254 | ( 2000.00, 4000.00] | 15 255 | ( 4000.00, 6000.00] | 18 256 | ( 6000.00, 8000.00] | 24 257 | ( 8000.00, 10000.00] | 20 258 | ``` 259 | 260 | ### StdMean 261 | 262 | ```python 263 | >>> mapclassify.StdMean(y) 264 | StdMean 265 | 266 | Interval Count 267 | -------------------------- 268 | ( -inf, -967.36] | 0 269 | (-967.36, -420.72] | 0 270 | (-420.72, 672.57] | 56 271 | ( 672.57, 1219.22] | 1 272 | (1219.22, 4111.45] | 1 273 | ``` 274 | ### UserDefined 275 | 276 | ```python 277 | >>> mapclassify.UserDefined(y, bins=[22, 674, 4112]) 278 | UserDefined 279 | 280 | Interval Count 281 | -------------------------- 282 | [ 0.13, 22.00] | 38 283 | ( 22.00, 674.00] | 18 284 | ( 674.00, 4112.00] | 2 285 | ``` 286 | 287 | ## Alternative API 288 | 289 | As of version 2.4.0 the API has been extended. A `classify` function is now 290 | available for a streamlined interface: 291 | 292 | ```python 293 | >>> classify(y, 'boxplot') 294 | BoxPlot 295 | 296 | Interval Count 297 | -------------------------- 298 | ( -inf, -52.88] | 0 299 | ( -52.88, 2.57] | 15 300 | ( 2.57, 9.36] | 14 301 | ( 9.36, 39.53] | 14 302 | ( 39.53, 94.97] | 6 303 | ( 94.97, 4111.45] | 9 304 | 305 | ``` 306 | 307 | 308 | 309 | 310 | ## Use Cases 311 | 312 | ### Creating and using a classification instance 313 | 314 | ```python 315 | >>> bp = mapclassify.BoxPlot(y) 316 | >>> bp 317 | BoxPlot 318 | 319 | Interval Count 320 | -------------------------- 321 | ( -inf, -52.88] | 0 322 | ( -52.88, 2.57] | 15 323 | ( 2.57, 9.36] | 14 324 | ( 9.36, 39.53] | 14 325 | ( 39.53, 94.97] | 6 326 | ( 94.97, 4111.45] | 9 327 | 328 | >>> bp.bins 329 | array([ -5.28762500e+01, 2.56750000e+00, 9.36500000e+00, 330 | 3.95300000e+01, 9.49737500e+01, 4.11145000e+03]) 331 | >>> bp.counts 332 | array([ 0, 15, 14, 14, 6, 9]) 333 | >>> bp.yb 334 | array([5, 1, 2, 3, 2, 1, 5, 1, 3, 3, 1, 2, 2, 1, 2, 2, 2, 1, 5, 2, 4, 1, 2, 335 | 2, 1, 1, 3, 3, 3, 5, 3, 1, 3, 5, 2, 3, 5, 5, 4, 3, 5, 3, 5, 4, 2, 1, 336 | 1, 4, 4, 3, 3, 1, 1, 2, 1, 4, 3, 2]) 337 | 338 | ``` 339 | 340 | ### Binning new data 341 | 342 | ```python 343 | >>> bp = mapclassify.BoxPlot(y) 344 | >>> bp 345 | BoxPlot 346 | 347 | Interval Count 348 | -------------------------- 349 | ( -inf, -52.88] | 0 350 | ( -52.88, 2.57] | 15 351 | ( 2.57, 9.36] | 14 352 | ( 9.36, 39.53] | 14 353 | ( 39.53, 94.97] | 6 354 | ( 94.97, 4111.45] | 9 355 | >>> bp.find_bin([0, 7, 3000, 48]) 356 | array([1, 2, 5, 4]) 357 | 358 | ``` 359 | Note that `find_bin` does not recalibrate the classifier: 360 | ```python 361 | >>> bp 362 | BoxPlot 363 | 364 | Interval Count 365 | -------------------------- 366 | ( -inf, -52.88] | 0 367 | ( -52.88, 2.57] | 15 368 | ( 2.57, 9.36] | 14 369 | ( 9.36, 39.53] | 14 370 | ( 39.53, 94.97] | 6 371 | ( 94.97, 4111.45] | 9 372 | ``` 373 | ### Apply 374 | 375 | ```python 376 | >>> import mapclassify 377 | >>> import pandas 378 | >>> from numpy import linspace as lsp 379 | >>> data = [lsp(3,8,num=10), lsp(10, 0, num=10), lsp(-5, 15, num=10)] 380 | >>> data = pandas.DataFrame(data).T 381 | >>> data 382 | 0 1 2 383 | 0 3.000000 10.000000 -5.000000 384 | 1 3.555556 8.888889 -2.777778 385 | 2 4.111111 7.777778 -0.555556 386 | 3 4.666667 6.666667 1.666667 387 | 4 5.222222 5.555556 3.888889 388 | 5 5.777778 4.444444 6.111111 389 | 6 6.333333 3.333333 8.333333 390 | 7 6.888889 2.222222 10.555556 391 | 8 7.444444 1.111111 12.777778 392 | 9 8.000000 0.000000 15.000000 393 | >>> data.apply(mapclassify.Quantiles.make(rolling=True)) 394 | 0 1 2 395 | 0 0 4 0 396 | 1 0 4 0 397 | 2 1 4 0 398 | 3 1 3 0 399 | 4 2 2 1 400 | 5 2 1 2 401 | 6 3 0 4 402 | 7 3 0 4 403 | 8 4 0 4 404 | 9 4 0 4 405 | 406 | ``` 407 | 408 | 409 | ## Development Notes 410 | 411 | Because we use `geopandas` in development, and geopandas has stable `mapclassify` as a dependency, setting up a local development installation involves creating a conda environment, then replacing the stable `mapclassify` with the development version of `mapclassify` in the development environment. This can be accomplished with the following steps: 412 | 413 | 414 | ``` 415 | conda-env create -f environment.yml 416 | conda activate mapclassify 417 | conda remove -n mapclassify mapclassify 418 | pip install -e . 419 | ``` 420 | -------------------------------------------------------------------------------- /ci/311-latest.yaml: -------------------------------------------------------------------------------- 1 | name: test 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.11 6 | # required 7 | - networkx 8 | - numpy 9 | - pandas 10 | - scikit-learn 11 | - scipy 12 | # testing 13 | - geopandas 14 | - libpysal 15 | - pytest 16 | - pytest-cov 17 | - pytest-xdist 18 | - pytest-mpl 19 | - codecov 20 | - matplotlib 21 | -------------------------------------------------------------------------------- /ci/311-numba-latest.yaml: -------------------------------------------------------------------------------- 1 | name: test 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.11 6 | # required 7 | - networkx 8 | - numpy 9 | - pandas 10 | - scikit-learn 11 | - scipy 12 | # testing 13 | - geopandas 14 | - libpysal 15 | - pytest 16 | - pytest-cov 17 | - pytest-xdist 18 | - pytest-mpl 19 | - codecov 20 | - matplotlib 21 | # optional 22 | - numba 23 | -------------------------------------------------------------------------------- /ci/311-oldest.yaml: -------------------------------------------------------------------------------- 1 | name: test 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.11 6 | # required 7 | - networkx=3.2 8 | - numpy=1.26 9 | - pandas=2.1 10 | - scikit-learn=1.4 11 | - scipy=1.12 12 | # testing 13 | - fiona 14 | - geopandas 15 | - libpysal 16 | - pytest 17 | - pytest-cov 18 | - pytest-xdist 19 | - codecov 20 | - matplotlib 21 | -------------------------------------------------------------------------------- /ci/312-latest.yaml: -------------------------------------------------------------------------------- 1 | name: test 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.12 6 | # required 7 | - networkx 8 | - numpy 9 | - pandas 10 | - scikit-learn 11 | - scipy 12 | # testing 13 | - geopandas 14 | - libpysal 15 | - pytest 16 | - pytest-cov 17 | - pytest-xdist 18 | - pytest-mpl 19 | - codecov 20 | - matplotlib 21 | -------------------------------------------------------------------------------- /ci/312-numba-latest.yaml: -------------------------------------------------------------------------------- 1 | name: test 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.12 6 | # required 7 | - networkx 8 | - numpy 9 | - pandas 10 | - scikit-learn 11 | - scipy 12 | # testing 13 | - geopandas 14 | - libpysal 15 | - pytest 16 | - pytest-cov 17 | - pytest-xdist 18 | - pytest-doctestplus 19 | - pytest-mpl 20 | - codecov 21 | - matplotlib 22 | # optional 23 | - numba 24 | -------------------------------------------------------------------------------- /ci/313-dev.yaml: -------------------------------------------------------------------------------- 1 | name: test 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.13 6 | # testing 7 | - pytest 8 | - pytest-cov 9 | - pytest-xdist 10 | - codecov 11 | # optional 12 | - pyproj 13 | - pip 14 | - pip: 15 | - --pre --index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple --extra-index-url https://pypi.org/simple 16 | - scipy 17 | - scikit-learn 18 | - pandas 19 | - networkx 20 | - matplotlib 21 | - shapely 22 | - fiona 23 | - pytest-mpl 24 | - git+https://github.com/pysal/libpysal.git@main 25 | - git+https://github.com/geopandas/geopandas.git@main 26 | -------------------------------------------------------------------------------- /ci/313-latest.yaml: -------------------------------------------------------------------------------- 1 | name: test 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.13 6 | # required 7 | - networkx 8 | - numpy 9 | - pandas 10 | - scikit-learn 11 | - scipy 12 | # testing 13 | - geopandas 14 | - libpysal 15 | - pytest 16 | - pytest-cov 17 | - pytest-xdist 18 | - pytest-mpl 19 | - codecov 20 | - matplotlib 21 | # docs 22 | - nbsphinx 23 | - numpydoc 24 | - sphinx 25 | - sphinx-gallery 26 | - sphinxcontrib-bibtex 27 | - sphinx_bootstrap_theme 28 | -------------------------------------------------------------------------------- /ci/313-numba-latest.yaml: -------------------------------------------------------------------------------- 1 | name: test 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.13 6 | # required 7 | - networkx 8 | - numpy 9 | - pandas 10 | - scikit-learn 11 | - scipy 12 | # testing 13 | - geopandas 14 | - libpysal 15 | - pytest 16 | - pytest-cov 17 | - pytest-xdist 18 | - pytest-doctestplus 19 | - pytest-mpl 20 | - codecov 21 | - matplotlib 22 | # optional 23 | - numba 24 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | codecov: 2 | notify: 3 | after_n_builds: 10 4 | coverage: 5 | range: 50..95 6 | round: nearest 7 | precision: 1 8 | status: 9 | project: 10 | default: 11 | threshold: 2% 12 | patch: 13 | default: 14 | threshold: 2% 15 | target: 80% 16 | ignore: 17 | - "tests/*" 18 | comment: 19 | layout: "reach, diff, files" 20 | behavior: once 21 | after_n_builds: 10 22 | require_changes: true 23 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = mapclassify 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @rsync -r --exclude '.ipynb_checkpoints/' ../notebooks/ ./notebooks/ 21 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 22 | 23 | github: 24 | @make html 25 | 26 | sync: 27 | @rsync -avh _build/html/ ../docs/ --delete 28 | @make clean 29 | touch .nojekyll 30 | 31 | clean: 32 | rm -rf $(BUILDDIR)/* 33 | rm -rf auto_examples/ 34 | rm -rf generated/ 35 | -------------------------------------------------------------------------------- /docs/_static/auto/references.el: -------------------------------------------------------------------------------- 1 | (TeX-add-style-hook 2 | "references" 3 | (lambda () 4 | (LaTeX-add-bibitems 5 | "Jiang_2013" 6 | "Rey_2016")) 7 | :bibtex) 8 | 9 | -------------------------------------------------------------------------------- /docs/_static/images/equalinterval.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/docs/_static/images/equalinterval.png -------------------------------------------------------------------------------- /docs/_static/images/fisherjenks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/docs/_static/images/fisherjenks.png -------------------------------------------------------------------------------- /docs/_static/images/hr60fj10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/docs/_static/images/hr60fj10.png -------------------------------------------------------------------------------- /docs/_static/images/hr60mb10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/docs/_static/images/hr60mb10.png -------------------------------------------------------------------------------- /docs/_static/images/hr60q10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/docs/_static/images/hr60q10.png -------------------------------------------------------------------------------- /docs/_static/images/pysal_favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/docs/_static/images/pysal_favicon.ico -------------------------------------------------------------------------------- /docs/_static/images/quantiles.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/docs/_static/images/quantiles.png -------------------------------------------------------------------------------- /docs/_static/pysal-styles.css: -------------------------------------------------------------------------------- 1 | /* Make thumbnails with equal heights */ 2 | @media only screen and (min-width : 500px) { 3 | .row.equal-height { 4 | display: flex; 5 | flex-wrap: wrap; 6 | } 7 | .row.equal-height > [class*='col-'] { 8 | display: flex; 9 | flex-direction: row, column; 10 | } 11 | .row.equal-height.row:after, 12 | .row.equal-height.row:before { 13 | display: flex; 14 | } 15 | 16 | .row.equal-height > [class*='col-'] > .thumbnail, 17 | .row.equal-height > [class*='col-'] > .thumbnail > .caption { 18 | display: flex; 19 | flex: .9 .1 auto; 20 | flex-direction: column; 21 | } 22 | .row.equal-height > [class*='col-'] > .thumbnail > .caption > .flex-text { 23 | flex-grow: 1; 24 | } 25 | .row.equal-height > [class*='col-'] > .thumbnail > img { 26 | width: 350px; 27 | height: 200%; /* force image's height */ 28 | 29 | /* force image fit inside it's "box" */ 30 | -webkit-object-fit: cover; 31 | -moz-object-fit: cover; 32 | -ms-object-fit: cover; 33 | -o-object-fit: cover; 34 | object-fit: cover; 35 | } 36 | } 37 | 38 | .row.extra-bottom-padding{ 39 | margin-bottom: 20px; 40 | } 41 | 42 | 43 | .topnavicons { 44 | margin-left: 10% !important; 45 | } 46 | 47 | .topnavicons li { 48 | margin-left: 0px !important; 49 | min-width: 100px; 50 | text-align: center; 51 | } 52 | 53 | .topnavicons .thumbnail { 54 | margin-right: 10px; 55 | border: none; 56 | box-shadow: none; 57 | text-align: center; 58 | font-size: 85%; 59 | font-weight: bold; 60 | line-height: 10px; 61 | height: 100px; 62 | } 63 | 64 | .topnavicons .thumbnail img { 65 | display: block; 66 | margin-left: auto; 67 | margin-right: auto; 68 | } 69 | 70 | 71 | /* Table with a scrollbar */ 72 | .bodycontainer { max-height: 800px; width: 100%; margin: 0; padding: 0; overflow-y: auto; } 73 | .table-scrollable { margin: 0; padding: 0; } 74 | 75 | .label { 76 | color: #E74C3C; 77 | font-size: 100%; 78 | font-weight: bold; 79 | width: 100px; 80 | text-align: left; 81 | vertical-align: middle; 82 | } 83 | 84 | div.body { 85 | max-width: 1080px; 86 | } 87 | 88 | table.longtable.align-default{ 89 | text-align: left; 90 | } -------------------------------------------------------------------------------- /docs/_static/references.bib: -------------------------------------------------------------------------------- 1 | %% This BibTeX bibliography file was created using BibDesk. 2 | %% http://bibdesk.sourceforge.net/ 3 | 4 | %% Created for Wei Kang at 2018-10-25 22:16:36 -0700 5 | 6 | 7 | %% Saved with string encoding Unicode (UTF-8) 8 | 9 | 10 | 11 | @article{Jiang_2013, 12 | Author = {Jiang, Bin}, 13 | Doi = {10.1080/00330124.2012.700499}, 14 | Issn = {1467-9272}, 15 | Journal = {The Professional Geographer}, 16 | Month = {Aug}, 17 | Number = 3, 18 | Pages = {482--494}, 19 | Publisher = {Informa UK Limited}, 20 | Title = {Head/Tail Breaks: A New Classification Scheme for Data with a Heavy-Tailed Distribution}, 21 | Url = {http://dx.doi.org/10.1080/00330124.2012.700499}, 22 | Volume = 65, 23 | Year = 2013, 24 | Bdsk-Url-1 = {http://dx.doi.org/10.1080/00330124.2012.700499}} 25 | 26 | @article{Rey_2016, 27 | Author = {Rey, Sergio J. and Stephens, Philip and Laura, Jason}, 28 | Doi = {10.1111/tgis.12236}, 29 | Issn = {1361-1682}, 30 | Journal = {Transactions in GIS}, 31 | Month = {Oct}, 32 | Number = 4, 33 | Pages = {796--810}, 34 | Publisher = {Wiley}, 35 | Title = {An evaluation of sampling and full enumeration strategies for {Fisher Jenks} classification in big data settings}, 36 | Url = {http://dx.doi.org/10.1111/tgis.12236}, 37 | Volume = 21, 38 | Year = 2016, 39 | Bdsk-Url-1 = {http://dx.doi.org/10.1111/tgis.12236}} 40 | 41 | @book{Slocum_2009, 42 | Author = {Slocum, Terry A. and McMaster, Robert B. and Kessler, Fritz C. and Howard, Hugh H.}, 43 | Publisher = {Pearson Prentice Hall, Upper Saddle River}, 44 | Title = {Thematic cartography and geovisualization}, 45 | Year = {2009}} 46 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | .. _api_ref: 2 | 3 | .. currentmodule:: mapclassify 4 | 5 | API reference 6 | ============= 7 | 8 | .. _classifiers_api: 9 | 10 | Classifiers 11 | ----------- 12 | 13 | .. autosummary:: 14 | :toctree: generated/ 15 | 16 | mapclassify.BoxPlot 17 | mapclassify.EqualInterval 18 | mapclassify.FisherJenks 19 | mapclassify.FisherJenksSampled 20 | mapclassify.greedy 21 | mapclassify.HeadTailBreaks 22 | mapclassify.JenksCaspall 23 | mapclassify.JenksCaspallForced 24 | mapclassify.JenksCaspallSampled 25 | mapclassify.MaxP 26 | mapclassify.MaximumBreaks 27 | mapclassify.NaturalBreaks 28 | mapclassify.Percentiles 29 | mapclassify.PrettyBreaks 30 | mapclassify.Quantiles 31 | mapclassify.StdMean 32 | mapclassify.UserDefined 33 | 34 | Utilities 35 | --------- 36 | 37 | .. autosummary:: 38 | :toctree: generated/ 39 | 40 | mapclassify.KClassifiers 41 | mapclassify.Pooled 42 | mapclassify.classify 43 | mapclassify.gadf 44 | mapclassify.util.get_color_array 45 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # giddy documentation build configuration file, created by 2 | # sphinx-quickstart on Wed Jun 6 15:54:22 2018. 3 | # 4 | # This file is execfile()d with the current directory set to its 5 | # containing dir. 6 | # 7 | # Note that not all possible configuration values are present in this 8 | # autogenerated file. 9 | # 10 | # All configuration values have a default; values that are commented out 11 | # serve to show the default. 12 | 13 | import os 14 | 15 | # If extensions (or modules to document with autodoc) are in another directory, 16 | # add these directories to sys.path here. If the directory is relative to the 17 | # documentation root, use os.path.abspath to make it absolute, like shown here. 18 | # 19 | import sys 20 | 21 | import sphinx_bootstrap_theme 22 | 23 | sys.path.insert(0, os.path.abspath("../")) 24 | 25 | # import your package to obtain the version info to display on the docs website 26 | import mapclassify 27 | 28 | # -- General configuration ------------------------------------------------ 29 | 30 | # If your documentation needs a minimal Sphinx version, state it here. 31 | # 32 | # needs_sphinx = '1.0' 33 | # Add any Sphinx extension module names here, as strings. They can be 34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 35 | # ones. 36 | extensions = [ #'sphinx_gallery.gen_gallery', 37 | "sphinx.ext.autodoc", 38 | "sphinx.ext.autosummary", 39 | "sphinx.ext.viewcode", 40 | "sphinxcontrib.bibtex", 41 | "sphinx.ext.mathjax", 42 | "sphinx.ext.doctest", 43 | "sphinx.ext.intersphinx", 44 | "numpydoc", 45 | "matplotlib.sphinxext.plot_directive", 46 | "nbsphinx", 47 | ] 48 | 49 | bibtex_bibfiles = ["_static/references.bib"] 50 | 51 | # Add any paths that contain templates here, relative to this directory. 52 | templates_path = ["_templates"] 53 | 54 | # The suffix(es) of source filenames. 55 | # You can specify multiple suffix as a list of string: 56 | # 57 | # source_suffix = ['.rst', '.md'] 58 | source_suffix = ".rst" 59 | 60 | # The master toctree document. 61 | master_doc = "index" 62 | 63 | # General information about the project. 64 | project = "mapclassify" # string of your project name, for example, 'giddy' 65 | copyright = "2018-, pysal developers" 66 | author = "pysal developers" 67 | 68 | # The version info for the project you're documenting, acts as replacement for 69 | # |version| and |release|, also used in various other places throughout the 70 | # built documents. 71 | # 72 | # The full version. 73 | version = mapclassify.__version__ # should replace it with your PACKAGE_NAME 74 | release = mapclassify.__version__ # should replace it with your PACKAGE_NAME 75 | 76 | # The language for content autogenerated by Sphinx. Refer to documentation 77 | # for a list of supported languages. 78 | # 79 | # This is also used if you do content translation via gettext catalogs. 80 | # Usually you set "language" from the command line for these cases. 81 | language = "en" 82 | 83 | # List of patterns, relative to source directory, that match files and 84 | # directories to ignore when looking for source files. 85 | # This patterns also effect to html_static_path and html_extra_path 86 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "tests/*"] 87 | 88 | # The name of the Pygments (syntax highlighting) style to use. 89 | pygments_style = "sphinx" 90 | 91 | # If true, `todo` and `todoList` produce output, else they produce nothing. 92 | todo_include_todos = False 93 | 94 | # -- Options for HTML output ---------------------------------------------- 95 | 96 | # The theme to use for HTML and HTML Help pages. See the documentation for 97 | # a list of builtin themes. 98 | # 99 | # html_theme = 'alabaster' 100 | html_theme = "bootstrap" 101 | html_theme_path = sphinx_bootstrap_theme.get_html_theme_path() 102 | html_title = f"{project} v{version} Manual" 103 | 104 | # (Optional) Logo of your package. 105 | # Should be small enough to fit the navbar (ideally 24x24). 106 | # Path should be relative to the ``_static`` files directory. 107 | # html_logo = "_static/images/package_logo.jpg" 108 | 109 | # (Optional) PySAL favicon 110 | html_favicon = "_static/images/pysal_favicon.ico" 111 | 112 | 113 | # Theme options are theme-specific and customize the look and feel of a theme 114 | # further. For a list of options available for each theme, see the 115 | # documentation. 116 | # 117 | html_theme_options = { 118 | # Navigation bar title. (Default: ``project`` value) 119 | "navbar_title": project, # string of your project name, for example, 'giddy' 120 | # Render the next and previous page links in navbar. (Default: true) 121 | "navbar_sidebarrel": False, 122 | # Render the current pages TOC in the navbar. (Default: true) 123 | #'navbar_pagenav': True, 124 | #'navbar_pagenav': False, 125 | # No sidebar 126 | "nosidebar": True, 127 | # Tab name for the current pages TOC. (Default: "Page") 128 | #'navbar_pagenav_name': "Page", 129 | # Global TOC depth for "site" navbar tab. (Default: 1) 130 | # Switching to -1 shows all levels. 131 | "globaltoc_depth": 2, 132 | # Include hidden TOCs in Site navbar? 133 | # 134 | # Note: If this is "false", you cannot have mixed ``:hidden:`` and 135 | # non-hidden ``toctree`` directives in the same page, or else the build 136 | # will break. 137 | # 138 | # Values: "true" (default) or "false" 139 | "globaltoc_includehidden": "true", 140 | # HTML navbar class (Default: "navbar") to attach to
element. 141 | # For black navbar, do "navbar navbar-inverse" 142 | #'navbar_class': "navbar navbar-inverse", 143 | # Fix navigation bar to top of page? 144 | # Values: "true" (default) or "false" 145 | "navbar_fixed_top": "true", 146 | # Location of link to source. 147 | # Options are "nav" (default), "footer" or anything else to exclude. 148 | "source_link_position": "footer", 149 | # Bootswatch (http://bootswatch.com/) theme. 150 | # 151 | # Options are nothing (default) or the name of a valid theme 152 | # such as "amelia" or "cosmo", "yeti", "flatly". 153 | "bootswatch_theme": "yeti", 154 | # Choose Bootstrap version. 155 | # Values: "3" (default) or "2" (in quotes) 156 | "bootstrap_version": "3", 157 | # Navigation bar menu 158 | "navbar_links": [ 159 | ("Installation", "installation"), 160 | ("Tutorial", "tutorial"), 161 | ("API", "api"), 162 | ("References", "references"), 163 | ], 164 | } 165 | 166 | # Add any paths that contain custom static files (such as style sheets) here, 167 | # relative to this directory. They are copied after the builtin static files, 168 | # so a file named "default.css" will overwrite the builtin "default.css". 169 | html_static_path = ["_static"] 170 | 171 | # Custom sidebar templates, maps document names to template names. 172 | # html_sidebars = {} 173 | # html_sidebars = {'sidebar': ['localtoc.html', 'sourcelink.html', 'searchbox.html']} 174 | 175 | # -- Options for HTMLHelp output ------------------------------------------ 176 | 177 | # Output file base name for HTML help builder. 178 | htmlhelp_basename = project + "doc" 179 | 180 | 181 | # -- Options for LaTeX output --------------------------------------------- 182 | 183 | latex_elements = { 184 | # The paper size ('letterpaper' or 'a4paper'). 185 | # 186 | # 'papersize': 'letterpaper', 187 | # The font size ('10pt', '11pt' or '12pt'). 188 | # 189 | # 'pointsize': '10pt', 190 | # Additional stuff for the LaTeX preamble. 191 | # 192 | # 'preamble': '', 193 | # Latex figure (float) alignment 194 | # 195 | # 'figure_align': 'htbp', 196 | } 197 | 198 | # Grouping the document tree into LaTeX files. List of tuples 199 | # (source start file, target name, title, 200 | # author, documentclass [howto, manual, or own class]). 201 | latex_documents = [ 202 | ( 203 | master_doc, 204 | f"{project}.tex", 205 | f"{project} Documentation", 206 | "pysal developers", 207 | "manual", 208 | ), 209 | ] 210 | 211 | 212 | # -- Options for manual page output --------------------------------------- 213 | 214 | # One entry per manual page. List of tuples 215 | # (source start file, name, description, authors, manual section). 216 | man_pages = [(master_doc, project, f"{project} Documentation", [author], 1)] 217 | 218 | 219 | # -- Options for Texinfo output ------------------------------------------- 220 | 221 | # Grouping the document tree into Texinfo files. List of tuples 222 | # (source start file, target name, title, author, 223 | # dir menu entry, description, category) 224 | texinfo_documents = [ 225 | ( 226 | master_doc, 227 | project, 228 | f"{project} Documentation", 229 | author, 230 | "PySAL Developers", 231 | "map classification schemes.", 232 | "Miscellaneous", 233 | ), 234 | ] 235 | 236 | 237 | # ----------------------------------------------------------------------------- 238 | # Autosummary 239 | # ----------------------------------------------------------------------------- 240 | 241 | # Generate the API documentation when building 242 | autosummary_generate = True 243 | 244 | # avoid showing members twice 245 | numpydoc_show_class_members = False 246 | numpydoc_use_plots = True 247 | class_members_toctree = True 248 | numpydoc_show_inherited_class_members = True 249 | numpydoc_xref_param_type = True 250 | 251 | # automatically document class members 252 | autodoc_default_options = {"members": True, "undoc-members": True} 253 | 254 | # display the source code for Plot directive 255 | plot_include_source = True 256 | 257 | 258 | def setup(app): 259 | app.add_css_file("pysal-styles.css") 260 | 261 | 262 | # Example configuration for intersphinx: refer to the Python standard library. 263 | intersphinx_mapping = { 264 | "geopandas": ("https://geopandas.org/en/latest/", None), 265 | "libpysal": ("https://pysal.org/libpysal/", None), 266 | "matplotlib": ("https://matplotlib.org/stable/", None), 267 | "networkx": ("https://networkx.org/documentation/stable/", None), 268 | "numpy": ("https://numpy.org/doc/stable/", None), 269 | "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), 270 | "python": ("https://docs.python.org/3.13/", None), 271 | "scipy": ("https://docs.scipy.org/doc/scipy/", None), 272 | } 273 | 274 | 275 | # This is processed by Jinja2 and inserted before each notebook 276 | nbsphinx_prolog = r""" 277 | {% set docname = env.doc2path(env.docname, base=None) %} 278 | .. only:: html 279 | 280 | .. role:: raw-html(raw) 281 | :format: html 282 | 283 | .. nbinfo:: 284 | 285 | This page was generated from `{{ docname }}`__. 286 | Interactive online version: 287 | :raw-html:`Binder badge` 288 | 289 | __ https://github.com/pysal/mapclassify/blob/main/{{ docname }} 290 | .. raw:: latex 291 | 292 | \nbsphinxstartnotebook{\scriptsize\noindent\strut 293 | \textcolor{gray}{The following section was generated from 294 | \sphinxcode{\sphinxupquote{\strut {{ docname | escape_latex }}}} \dotfill}} 295 | """ # noqa: E501 296 | 297 | # This is processed by Jinja2 and inserted after each notebook 298 | nbsphinx_epilog = r""" 299 | .. raw:: latex 300 | 301 | \nbsphinxstopnotebook{\scriptsize\noindent\strut 302 | \textcolor{gray}{\dotfill\ \sphinxcode{\sphinxupquote{\strut 303 | {{ env.doc2path(env.docname, base='doc') | escape_latex }}}} ends here.}} 304 | """ 305 | 306 | # List of arguments to be passed to the kernel that executes the notebooks: 307 | nbsphinx_execute_arguments = [ 308 | "--InlineBackend.figure_formats={'svg', 'pdf'}", 309 | "--InlineBackend.rc={'figure.dpi': 96}", 310 | ] 311 | 312 | 313 | mathjax3_config = { 314 | "TeX": {"equationNumbers": {"autoNumber": "AMS", "useLabelIds": True}}, 315 | } 316 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. documentation master file 2 | 3 | mapclassify 4 | =========== 5 | 6 | mapclassify is an open-source python library for Choropleth map classification. It is part of `PySAL`_ the Python Spatial Analysis Library. 7 | 8 | .. raw:: html 9 | 10 |
11 |
12 |
13 |
14 | 22 |
23 |
24 | 32 |
33 |
34 | 43 |
44 |
45 |
46 |
47 | 48 | 49 | .. toctree:: 50 | :hidden: 51 | :maxdepth: 3 52 | :caption: Contents: 53 | 54 | Installation 55 | Tutorial 56 | API 57 | References 58 | 59 | 60 | .. _PySAL: https://github.com/pysal/pysal 61 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | .. Installation 2 | 3 | Installation 4 | ============ 5 | 6 | mapclassify supports python `3.9`_+. Please make sure that you are 7 | operating in a python 3 environment. 8 | 9 | Installing released version 10 | --------------------------- 11 | 12 | mapclassify is available in on `conda`_ via the `conda-forge`_ channel:: 13 | 14 | conda install -c conda-forge mapclassify 15 | 16 | 17 | mapclassify is also available on the `Python Package Index`_. Therefore, you can either 18 | install directly with `pip` from the command line:: 19 | 20 | pip install -U mapclassify 21 | 22 | 23 | or download the source distribution (.tar.gz) and decompress it to your selected 24 | destination. Open a command shell and navigate to the decompressed folder. 25 | Type:: 26 | 27 | pip install . 28 | 29 | Installing development version 30 | ------------------------------ 31 | 32 | Potentially, you might want to use the newest features in the development 33 | version of mapclassify on github - `pysal/mapclassify`_ while have not been incorporated 34 | in the Pypi released version. You can achieve that by installing `pysal/mapclassify`_ 35 | by running the following from a command shell:: 36 | 37 | pip install git+https://github.com/pysal/mapclassify.git 38 | 39 | You can also `fork`_ the `pysal/mapclassify`_ repo and create a local clone of 40 | your fork. By making changes 41 | to your local clone and submitting a pull request to `pysal/mapclassify`_, you can 42 | contribute to mapclassify development. 43 | 44 | .. _3.9: https://docs.python.org/3.9/ 45 | .. _conda: https://docs.conda.io/en/latest/ 46 | .. _conda-forge: https://anaconda.org/conda-forge/mapclassify 47 | .. _Python Package Index: https://pypi.org/project/mapclassify/ 48 | .. _pysal/mapclassify: https://github.com/pysal/mapclassify 49 | .. _fork: https://help.github.com/articles/fork-a-repo/ 50 | -------------------------------------------------------------------------------- /docs/references.rst: -------------------------------------------------------------------------------- 1 | .. reference for the docs 2 | 3 | References 4 | ========== 5 | 6 | .. bibliography:: _static/references.bib 7 | :all: 8 | -------------------------------------------------------------------------------- /docs/tutorial.rst: -------------------------------------------------------------------------------- 1 | Tutorial 2 | ======== 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | :caption: Contents: 7 | 8 | notebooks/01_maximum_breaks.ipynb 9 | notebooks/02_legends.ipynb 10 | notebooks/03_choropleth.ipynb 11 | notebooks/04_pooled.ipynb 12 | notebooks/05_Greedy_coloring.ipynb 13 | notebooks/06_api.ipynb 14 | 15 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | # Run `conda-env create -f environment.yml` 2 | name: mapclassify 3 | channels: 4 | - conda-forge 5 | dependencies: 6 | - python 7 | - geodatasets 8 | - geopandas 9 | - git 10 | - ipywidgets 11 | - jupyterlab 12 | - libpysal 13 | - lonboard 14 | - matplotlib 15 | - nbconvert 16 | - networkx 17 | - numba 18 | - palettable 19 | - pip 20 | - pyarrow 21 | - pydeck 22 | - scikit-learn 23 | - seaborn 24 | - shapely 25 | - pip: 26 | - git+https://github.com/pysal/mapclassify.git@main 27 | -------------------------------------------------------------------------------- /mapclassify/__init__.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | from importlib.metadata import PackageNotFoundError, version 3 | 4 | from . import legendgram, util 5 | from ._classify_API import classify 6 | from .classifiers import ( 7 | CLASSIFIERS, 8 | BoxPlot, 9 | EqualInterval, 10 | FisherJenks, 11 | FisherJenksSampled, 12 | HeadTailBreaks, 13 | JenksCaspall, 14 | JenksCaspallForced, 15 | JenksCaspallSampled, 16 | KClassifiers, 17 | MaximumBreaks, 18 | MaxP, 19 | NaturalBreaks, 20 | Percentiles, 21 | PrettyBreaks, 22 | Quantiles, 23 | StdMean, 24 | UserDefined, 25 | gadf, 26 | load_example, 27 | ) 28 | from .greedy import greedy 29 | from .pooling import Pooled 30 | 31 | with contextlib.suppress(PackageNotFoundError): 32 | __version__ = version("mapclassify") 33 | -------------------------------------------------------------------------------- /mapclassify/_classify_API.py: -------------------------------------------------------------------------------- 1 | from .classifiers import ( 2 | BoxPlot, 3 | EqualInterval, 4 | FisherJenks, 5 | FisherJenksSampled, 6 | HeadTailBreaks, 7 | JenksCaspall, 8 | JenksCaspallForced, 9 | JenksCaspallSampled, 10 | MaximumBreaks, 11 | MaxP, 12 | NaturalBreaks, 13 | Percentiles, 14 | PrettyBreaks, 15 | Quantiles, 16 | StdMean, 17 | UserDefined, 18 | ) 19 | 20 | __author__ = "Stefanie Lumnitz " 21 | 22 | 23 | _classifiers = { 24 | "boxplot": BoxPlot, 25 | "equalinterval": EqualInterval, 26 | "fisherjenks": FisherJenks, 27 | "fisherjenkssampled": FisherJenksSampled, 28 | "headtailbreaks": HeadTailBreaks, 29 | "jenkscaspall": JenksCaspall, 30 | "jenkscaspallforced": JenksCaspallForced, 31 | "jenkscaspallsampled": JenksCaspallSampled, 32 | "maxp": MaxP, 33 | "maximumbreaks": MaximumBreaks, 34 | "naturalbreaks": NaturalBreaks, 35 | "quantiles": Quantiles, 36 | "percentiles": Percentiles, 37 | "prettybreaks": PrettyBreaks, 38 | "stdmean": StdMean, 39 | "userdefined": UserDefined, 40 | } 41 | 42 | 43 | def classify( 44 | y, 45 | scheme, 46 | k=5, 47 | pct=[1, 10, 50, 90, 99, 100], 48 | pct_sampled=0.10, 49 | truncate=True, 50 | hinge=1.5, 51 | multiples=[-2, -1, 1, 2], 52 | mindiff=0, 53 | initial=100, 54 | bins=None, 55 | lowest=None, 56 | anchor=False, 57 | ): 58 | """ 59 | 60 | Classify your data with ``mapclassify.classify``. 61 | Input parameters are dependent on classifier used. 62 | 63 | Parameters 64 | ---------- 65 | 66 | y : numpy.array 67 | :math:`(n,1)`, values to classify. 68 | scheme : str 69 | ``pysal.mapclassify`` classification scheme. 70 | k : int (default 5) 71 | The number of classes. 72 | pct : numpy.array (default [1, 10, 50, 90, 99, 100]) 73 | Percentiles used for classification with ``percentiles``. 74 | pct_sampled : float default (0.10) 75 | The percentage of n that should form the sample 76 | (``JenksCaspallSampled``, ``FisherJenksSampled``) 77 | If ``pct`` is specified such that ``n*pct > 1000``, then ``pct=1000``. 78 | truncate : bool (default True) 79 | Truncate ``pct_sampled`` in cases where ``pct * n > 1000``. 80 | hinge : float (default 1.5) 81 | Multiplier for *IQR* when ``BoxPlot`` classifier used. 82 | multiples : numpy.array (default [-2,-1,1,2]) 83 | The multiples of the standard deviation to add/subtract from 84 | the sample mean to define the bins using ``std_mean``. 85 | mindiff : float (default is 0) 86 | The minimum difference between class breaks 87 | if using ``maximum_breaks`` classifier. 88 | initial : int (default 100) 89 | Number of initial solutions to generate or number of runs when using 90 | ``natural_breaks`` or ``max_p_classifier``. Setting initial to ``0`` 91 | will result in the quickest calculation of bins. 92 | bins : numpy.array (default None) 93 | :math:`(k,1)`, upper bounds of classes (have to be monotically 94 | increasing) if using ``user_defined`` classifier. 95 | Default is ``None``. For example: ``[20, max(y)]``. 96 | lowest : float (default None) 97 | Scalar minimum value of lowest class. Default is to set the minimum 98 | to ``-inf`` if ``y.min()`` > first upper bound (which will override 99 | the default), otherwise minimum is set to ``y.min()``. 100 | anchor : bool (default False) 101 | Anchor upper bound of one class to the sample mean. 102 | 103 | 104 | 105 | Returns 106 | ------- 107 | classifier : mapclassify.classifiers.MapClassifier 108 | Object containing bin ids for each observation (``.yb``), 109 | upper bounds of each class (``.bins``), number of classes (``.k``) 110 | and number of observations falling in each class (``.counts``). 111 | 112 | Notes 113 | ----- 114 | 115 | Supported classifiers include: 116 | 117 | * ``quantiles`` 118 | * ``boxplot`` 119 | * ``equalinterval`` 120 | * ``fisherjenks`` 121 | * ``fisherjenkssampled`` 122 | * ``headtailbreaks`` 123 | * ``jenkscaspall`` 124 | * ``jenkscaspallsampled`` 125 | * ``jenks_caspallforced`` 126 | * ``maxp`` 127 | * ``maximumbreaks`` 128 | * ``naturalbreaks`` 129 | * ``percentiles`` 130 | * ``prettybreaks`` 131 | * ``stdmean`` 132 | * ``userdefined`` 133 | 134 | Examples 135 | -------- 136 | 137 | >>> import libpysal 138 | >>> import geopandas 139 | >>> from mapclassify import classify 140 | 141 | Load example data. 142 | 143 | >>> link_to_data = libpysal.examples.get_path("columbus.shp") 144 | >>> gdf = geopandas.read_file(link_to_data) 145 | >>> x = gdf['HOVAL'].values 146 | 147 | Classify values by quantiles. 148 | 149 | >>> quantiles = classify(x, "quantiles") 150 | 151 | Classify values by box_plot and set hinge to ``2``. 152 | 153 | >>> box_plot = classify(x, 'box_plot', hinge=2) 154 | >>> box_plot 155 | BoxPlot 156 | 157 | Interval Count 158 | ---------------------- 159 | ( -inf, -9.50] | 0 160 | (-9.50, 25.70] | 13 161 | (25.70, 33.50] | 12 162 | (33.50, 43.30] | 12 163 | (43.30, 78.50] | 9 164 | (78.50, 96.40] | 3 165 | 166 | """ 167 | 168 | # reformat 169 | scheme_lower = scheme.lower() 170 | scheme = scheme_lower.replace("_", "") 171 | 172 | # check if scheme is a valid scheme 173 | if scheme not in _classifiers: 174 | raise ValueError( 175 | f"Invalid scheme: '{scheme}'\n" 176 | f"Scheme must be in the set: {_classifiers.keys()}" 177 | ) 178 | 179 | elif scheme == "boxplot": 180 | classifier = _classifiers[scheme](y, hinge) 181 | 182 | elif scheme == "fisherjenkssampled": 183 | classifier = _classifiers[scheme](y, k, pct_sampled, truncate) 184 | 185 | elif scheme == "headtailbreaks": 186 | classifier = _classifiers[scheme](y) 187 | 188 | elif scheme == "percentiles": 189 | classifier = _classifiers[scheme](y, pct) 190 | 191 | elif scheme == "stdmean": 192 | classifier = _classifiers[scheme](y, multiples, anchor) 193 | 194 | elif scheme == "jenkscaspallsampled": 195 | classifier = _classifiers[scheme](y, k, pct_sampled) 196 | 197 | elif scheme == "maximumbreaks": 198 | classifier = _classifiers[scheme](y, k, mindiff) 199 | 200 | elif scheme in ["naturalbreaks", "maxp"]: 201 | classifier = _classifiers[scheme](y, k, initial) 202 | 203 | elif scheme == "userdefined": 204 | classifier = _classifiers[scheme](y, bins, lowest) 205 | 206 | elif scheme in [ 207 | "equalinterval", 208 | "fisherjenks", 209 | "jenkscaspall", 210 | "jenkscaspallforced", 211 | "quantiles", 212 | "prettybreaks", 213 | ]: 214 | classifier = _classifiers[scheme](y, k) 215 | 216 | return classifier 217 | -------------------------------------------------------------------------------- /mapclassify/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Datasets module 3 | """ 4 | 5 | from . import calemp 6 | -------------------------------------------------------------------------------- /mapclassify/datasets/calemp/README.md: -------------------------------------------------------------------------------- 1 | calemp 2 | ====== 3 | 4 | Employment density for California counties 5 | ------------------------------------------ 6 | 7 | * calempdensity.csv: data on employment and employment density in California 8 | counties. 9 | 10 | Polygon data, n=58, k=11. 11 | 12 | Source: Anselin, L. and S.J. Rey (in progress) Spatial Econometrics: Foundations. 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /mapclassify/datasets/calemp/__init__.py: -------------------------------------------------------------------------------- 1 | from .data import * 2 | -------------------------------------------------------------------------------- /mapclassify/datasets/calemp/calempdensity.csv: -------------------------------------------------------------------------------- 1 | "Geographic Area","Geographic Area","Geographic Name","GEONAME","GEOCOMP","STATE","Number of Employees for All Sectors","Number of employees","Class Number","sq. km","emp/sq km" 2 | "05000US06001","06001","Alameda County, California","Alameda County, California","00","06",630171,630171,5,1910.1,329.92 3 | "05000US06003","06003","Alpine County, California","Alpine County, California","00","06",813,813,1,1913.1,0.42 4 | "05000US06005","06005","Amador County, California","Amador County, California","00","06",9061,9061,2,1534.7,5.9 5 | "05000US06007","06007","Butte County, California","Butte County, California","00","06",59578,59578,3,4246.6,14.03 6 | "05000US06009","06009","Calaveras County, California","Calaveras County, California","00","06",7344,7344,2,2642.3,2.78 7 | "05000US06011","06011","Colusa County, California","Colusa County, California","00","06",4000,4000,1,2980.5,1.34 8 | "05000US06013","06013","Contra Costa County, California","Contra Costa County, California","00","06",338156,338156,5,1865.5,181.27 9 | "05000US06015","06015","Del Norte County, California","Del Norte County, California","00","06",4303,4303,1,2610.4,1.65 10 | "05000US06017","06017","El Dorado County, California","El Dorado County, California","00","06",44477,44477,3,4432.8,10.03 11 | "05000US06019","06019","Fresno County, California","Fresno County, California","00","06",257975,257975,4,15444.7,16.7 12 | "05000US06021","06021","Glenn County, California","Glenn County, California","00","06",4487,4487,1,3405.5,1.32 13 | "05000US06023","06023","Humboldt County, California","Humboldt County, California","00","06",36962,36962,3,9253.5,3.99 14 | "05000US06025","06025","Imperial County, California","Imperial County, California","00","06",34156,34156,3,10813.4,3.16 15 | "05000US06027","06027","Inyo County, California","Inyo County, California","00","06",5820,5820,1,26397.5,0.22 16 | "05000US06029","06029","Kern County, California","Kern County, California","00","06",183412,183412,4,21086.8,8.7 17 | "05000US06031","06031","Kings County, California","Kings County, California","00","06",23610,23610,2,3598.8,6.56 18 | "05000US06033","06033","Lake County, California","Lake County, California","00","06",10648,10648,2,3259.4,3.27 19 | "05000US06035","06035","Lassen County, California","Lassen County, California","00","06",3860,3860,1,11803.9,0.33 20 | "05000US06037","06037","Los Angeles County, California","Los Angeles County, California","00","06",3895886,3895886,5,10515.3,370.5 21 | "05000US06039","06039","Madera County, California","Madera County, California","00","06",24957,24957,2,5538.5,4.51 22 | "05000US06041","06041","Marin County, California","Marin County, California","00","06",101358,101358,4,1346.2,75.29 23 | "05000US06043","06043","Mariposa County, California","Mariposa County, California","00","06",3739,3739,1,3758.6,0.99 24 | "05000US06045","06045","Mendocino County, California","Mendocino County, California","00","06",24898,24898,2,9089,2.74 25 | "05000US06047","06047","Merced County, California","Merced County, California","00","06",43369,43369,3,4995.8,8.68 26 | "05000US06049","06049","Modoc County, California","Modoc County, California","00","06",1467,1467,1,10215.9,0.14 27 | "05000US06051","06051","Mono County, California","Mono County, California","00","06",7289,7289,1,7885.2,0.92 28 | "05000US06053","06053","Monterey County, California","Monterey County, California","00","06",108660,108660,4,8603.8,12.63 29 | "05000US06055","06055","Napa County, California","Napa County, California","00","06",56029,56029,3,1952.5,28.7 30 | "05000US06057","06057","Nevada County, California","Nevada County, California","00","06",29805,29805,3,2480.3,12.02 31 | "05000US06059","06059","Orange County, California","Orange County, California","00","06",1478452,1478452,5,2045.3,722.85 32 | "05000US06061","06061","Placer County, California","Placer County, California","00","06",133427,133427,4,3637.4,36.68 33 | "05000US06063","06063","Plumas County, California","Plumas County, California","00","06",4863,4863,1,6614.8,0.74 34 | "05000US06065","06065","Riverside County, California","Riverside County, California","00","06",556789,556789,5,18669.1,29.82 35 | "05000US06067","06067","Sacramento County, California","Sacramento County, California","00","06",480346,480346,5,2501.1,192.05 36 | "05000US06069","06069","San Benito County, California","San Benito County, California","00","06",12163,12163,2,3597.9,3.38 37 | "05000US06071","06071","San Bernardino County, California","San Bernardino County, California","00","06",579135,579135,5,51961.2,11.15 38 | "05000US06073","06073","San Diego County, California","San Diego County, California","00","06",1205862,1205862,5,10889.6,110.74 39 | "05000US06075","06075","San Francisco County, California","San Francisco County, California","00","06",497485,497485,5,121,4111.45 40 | "05000US06077","06077","San Joaquin County, California","San Joaquin County, California","00","06",179276,179276,4,3624.1,49.47 41 | "05000US06079","06079","San Luis Obispo County, California","San Luis Obispo County, California","00","06",88413,88413,3,8558.7,10.33 42 | "05000US06081","06081","San Mateo County, California","San Mateo County, California","00","06",368859,368859,5,1163.2,317.11 43 | "05000US06083","06083","Santa Barbara County, California","Santa Barbara County, California","00","06",145202,145202,4,7092.6,20.47 44 | "05000US06085","06085","Santa Clara County, California","Santa Clara County, California","00","06",886011,886011,5,3344.3,264.93 45 | "05000US06087","06087","Santa Cruz County, California","Santa Cruz County, California","00","06",76488,76488,3,1154.3,66.26 46 | "05000US06089","06089","Shasta County, California","Shasta County, California","00","06",52804,52804,3,9804.8,5.39 47 | "05000US06091","06091","Sierra County, California","Sierra County, California","00","06",324,324,1,2469.4,0.13 48 | "05000US06093","06093","Siskiyou County, California","Siskiyou County, California","00","06",9992,9992,2,16284,0.61 49 | "05000US06095","06095","Solano County, California","Solano County, California","00","06",108653,108653,4,2145,50.65 50 | "05000US06097","06097","Sonoma County, California","Sonoma County, California","00","06",165261,165261,4,4082.4,40.48 51 | "05000US06099","06099","Stanislaus County, California","Stanislaus County, California","00","06",141928,141928,4,3870.9,36.67 52 | "05000US06101","06101","Sutter County, California","Sutter County, California","00","06",20430,20430,2,1561,13.09 53 | "05000US06103","06103","Tehama County, California","Tehama County, California","00","06",13809,13809,2,7643.2,1.81 54 | "05000US06105","06105","Trinity County, California","Trinity County, California","00","06",1668,1668,1,8233.3,0.2 55 | "05000US06107","06107","Tulare County, California","Tulare County, California","00","06",94949,94949,4,12495,7.6 56 | "05000US06109","06109","Tuolumne County, California","Tuolumne County, California","00","06",14519,14519,2,5790.3,2.51 57 | "05000US06111","06111","Ventura County, California","Ventura County, California","00","06",273745,273745,5,4781,57.26 58 | "05000US06113","06113","Yolo County, California","Yolo County, California","00","06",63769,63769,3,2622.2,24.32 59 | "05000US06115","06115","Yuba County, California","Yuba County, California","00","06",11374,11374,2,1632.9,6.97 60 | -------------------------------------------------------------------------------- /mapclassify/datasets/calemp/data.py: -------------------------------------------------------------------------------- 1 | from os.path import abspath, dirname 2 | 3 | import pandas as pd 4 | 5 | 6 | def load(): 7 | """ 8 | Load the data and return a DataSeries instance. 9 | 10 | """ 11 | 12 | df = _get_data() 13 | 14 | return df["emp/sq km"] 15 | 16 | 17 | def _get_data(): 18 | filepath = dirname(abspath(__file__)) 19 | filepath += "/calempdensity.csv" 20 | df = pd.read_csv(filepath) 21 | return df 22 | -------------------------------------------------------------------------------- /mapclassify/greedy.py: -------------------------------------------------------------------------------- 1 | """ 2 | greedy - Greedy (topological) coloring for GeoPandas 3 | 4 | Copyright (C) 2019 Martin Fleischmann, 2017 Nyall Dawson 5 | 6 | """ 7 | 8 | import operator 9 | 10 | __all__ = ["greedy"] 11 | 12 | 13 | def _balanced(features, sw, balance="count", min_colors=4): 14 | """ 15 | Strategy to color features in a way which is visually balanced. 16 | 17 | Algorithm ported from QGIS to be used with GeoDataFrames 18 | and libpysal weights objects. 19 | 20 | Original algorithm: 21 | Date : February 2017 22 | Copyright : (C) 2017 by Nyall Dawson 23 | Email : nyall dot dawson at gmail dot com 24 | 25 | Parameters 26 | ---------- 27 | 28 | features : geopandas.GeoDataFrame 29 | GeoDataFrame. 30 | sw : libpysal.weights.W 31 | Spatial weights object denoting adjacency of features. 32 | balance : str (default 'count') 33 | The method of color balancing. 34 | min_colors : int (default 4) 35 | The minimal number of colors to be used. 36 | 37 | Returns 38 | ------- 39 | 40 | feature_colors : dict 41 | Dictionary with assigned color codes. 42 | 43 | """ 44 | feature_colors = {} 45 | # start with minimum number of colors in pool 46 | color_pool = set(range(min_colors)) 47 | 48 | # calculate count of neighbours 49 | neighbour_count = sw.cardinalities 50 | 51 | # sort features by neighbour count - handle those with more neighbours first 52 | sorted_by_count = sorted( 53 | neighbour_count.items(), key=operator.itemgetter(1), reverse=True 54 | ) 55 | 56 | # counts for each color already assigned 57 | color_counts = {} 58 | color_areas = {} 59 | for c in color_pool: 60 | color_counts[c] = 0 61 | color_areas[c] = 0 62 | 63 | if balance == "centroid": 64 | features = features.copy() 65 | features.geometry = features.geometry.centroid 66 | balance = "distance" 67 | 68 | for feature_id, _ in sorted_by_count: 69 | # first work out which already assigned colors are adjacent to this feature 70 | adjacent_colors = set() 71 | for neighbour in sw.neighbors[feature_id]: 72 | if neighbour in feature_colors: 73 | adjacent_colors.add(feature_colors[neighbour]) 74 | 75 | # from the existing colors, work out which are available (ie non-adjacent) 76 | available_colors = color_pool.difference(adjacent_colors) 77 | 78 | feature_color = -1 79 | if len(available_colors) == 0: 80 | # no existing colors available for this feature; add new color and repeat 81 | min_colors += 1 82 | return _balanced(features, sw, balance, min_colors) 83 | else: 84 | if balance == "count": 85 | # choose least used available color 86 | counts = [ 87 | (c, v) for c, v in color_counts.items() if c in available_colors 88 | ] 89 | feature_color = sorted(counts, key=operator.itemgetter(1))[0][0] 90 | color_counts[feature_color] += 1 91 | elif balance == "area": 92 | areas = [ 93 | (c, v) for c, v in color_areas.items() if c in available_colors 94 | ] 95 | feature_color = sorted(areas, key=operator.itemgetter(1))[0][0] 96 | color_areas[feature_color] += features.loc[feature_id].geometry.area 97 | 98 | elif balance == "distance": 99 | min_distances = {c: float("inf") for c in available_colors} 100 | this_feature = features.loc[feature_id].geometry 101 | 102 | # find features for all available colors 103 | other_features = { 104 | f_id: c 105 | for (f_id, c) in feature_colors.items() 106 | if c in available_colors 107 | } 108 | 109 | distances = features.loc[other_features.keys()].distance(this_feature) 110 | # calculate the min distance from this feature to the nearest 111 | # feature with each assigned color 112 | for other_feature_id, c in other_features.items(): 113 | distance = distances.loc[other_feature_id] 114 | if distance < min_distances[c]: 115 | min_distances[c] = distance 116 | 117 | # choose color such that min distance is maximised! 118 | # - ie we want MAXIMAL separation between features with the same color 119 | feature_color = sorted( 120 | min_distances, key=min_distances.__getitem__, reverse=True 121 | )[0] 122 | 123 | feature_colors[feature_id] = feature_color 124 | 125 | return feature_colors 126 | 127 | 128 | def greedy( 129 | gdf, 130 | strategy="balanced", 131 | balance="count", 132 | min_colors=4, 133 | sw="queen", 134 | min_distance=None, 135 | silence_warnings=True, 136 | interchange=False, 137 | ): 138 | """ 139 | Color GeoDataFrame using various strategies of greedy (topological) colouring. 140 | 141 | Attempts to color a GeoDataFrame using as few colors as possible, where no 142 | neighbours can have same color as the feature itself. Offers various strategies 143 | ported from QGIS or implemented within NetworkX for greedy graph coloring. 144 | 145 | ``greedy`` will return ``pandas.Series`` representing assigned color codes. 146 | 147 | Parameters 148 | ---------- 149 | 150 | gdf : GeoDataFrame 151 | GeoDataFrame 152 | strategy : str (default 'balanced') 153 | Determine coloring strategy. Options are ``'balanced'`` for 154 | algorithm based on QGIS Topological coloring. It is aiming 155 | for a visual balance, defined by the balance parameter. Other 156 | options are those supported by ``networkx.greedy_color``: 157 | 158 | * ``'largest_first'`` 159 | * ``'random_sequential'`` 160 | * ``'smallest_last'`` 161 | * ``'independent_set'`` 162 | * ``'connected_sequential_bfs'`` 163 | * ``'connected_sequential_dfs'`` 164 | * ``'connected_sequential'`` (alias for the previous strategy) 165 | * ``'saturation_largest_first'`` 166 | * ``'DSATUR'`` (alias for the previous strategy) 167 | 168 | For details see https://networkx.github.io/documentation/stable/reference/algorithms/generated/networkx.algorithms.coloring.greedy_color.html 169 | 170 | balance : str (default 'count') 171 | If strategy is ``'balanced'``, determine the method of color balancing. 172 | 173 | * ``'count'`` attempts to balance the number of features per each color. 174 | * ``'area'`` attempts to balance the area covered by each color. 175 | * ``'centroid'`` attempts to balance the distance between colors based 176 | on the distance between centroids. 177 | * ``'distance'`` attempts to balance the distance between colors based 178 | on the distance between geometries. Slower than ``'centroid'``, 179 | but more precise. 180 | 181 | Both ``'centroid'`` and ``'distance'`` are significantly slower than other 182 | especially for larger GeoDataFrames. Apart from ``'count'``, all require 183 | CRS to be projected (not in degrees) to ensure metric values are correct. 184 | min_colors: int (default 4) 185 | If strategy is ``'balanced'``, define the minimal number of colors to be used. 186 | sw : 'queen', 'rook' or libpysal.weights.W (default 'queen') 187 | If min_distance is None, one can pass ``'libpysal.weights.W'`` 188 | object denoting neighbors or let greedy generate one based on 189 | ``'queen'`` or ``'rook'`` contiguity. 190 | min_distance : float (default None) 191 | Set minimal distance between colors. If ``min_distance`` is not ``None``, 192 | slower algorithm for generating spatial weghts is used based on 193 | intersection between geometries. ``'min_distance'`` is then used as a 194 | tolerance of intersection. 195 | silence_warnings : bool (default True) 196 | Silence libpysal warnings when creating spatial weights. 197 | interchange : bool (default False) 198 | Use the color interchange algorithm (applicable for NetworkX strategies). 199 | For details see https://networkx.github.io/documentation/stable/reference/algorithms/generated/networkx.algorithms.coloring.greedy_color.html 200 | 201 | Returns 202 | ------- 203 | 204 | color : pandas.Series 205 | ``pandas.Series`` representing assinged color codes. 206 | 207 | Examples 208 | -------- 209 | 210 | >>> from mapclassify import greedy 211 | >>> import geopandas 212 | >>> world = geopandas.read_file( 213 | ... "https://naciscdn.org/naturalearth/110m/cultural/ne_110m_admin_0_countries.zip" 214 | ... ) 215 | >>> africa = world.loc[world.CONTINENT == "Africa"].copy() 216 | >>> africa = africa.to_crs("ESRI:102022").reset_index(drop=True) 217 | 218 | Default: 219 | 220 | >>> africa["greedy_colors"] = greedy(africa) 221 | >>> africa["greedy_colors"].head() 222 | 0 1 223 | 1 0 224 | 2 0 225 | 3 1 226 | 4 4 227 | Name: greedy_colors, dtype: int64 228 | 229 | Balanced by area: 230 | 231 | >>> africa["balanced_area"] = greedy(africa, strategy="balanced", balance="area") 232 | >>> africa["balanced_area"].head() 233 | 0 1 234 | 1 2 235 | 2 0 236 | 3 1 237 | 4 3 238 | Name: balanced_area, dtype: int64 239 | 240 | Using rook adjacency: 241 | 242 | >>> africa["rook_adjacency"] = greedy(africa, sw="rook") 243 | >>> africa["rook_adjacency"].tail() 244 | 46 3 245 | 47 0 246 | 48 2 247 | 49 3 248 | 50 1 249 | Name: rook_adjacency, dtype: int64 250 | 251 | Adding minimal distance between colors: 252 | 253 | >>> africa["min_distance"] = greedy(africa, min_distance=1000000) 254 | >>> africa["min_distance"].head() 255 | 0 1 256 | 1 9 257 | 2 0 258 | 3 7 259 | 4 4 260 | Name: min_distance, dtype: int64 261 | 262 | Using different coloring strategy: 263 | 264 | >>> africa["smallest_last"] = greedy(africa, strategy="smallest_last") 265 | >>> africa["smallest_last"].head() 266 | 0 3 267 | 1 1 268 | 2 1 269 | 3 3 270 | 4 1 271 | Name: smallest_last, dtype: int64 272 | 273 | """ # noqa 274 | 275 | if strategy != "balanced": 276 | try: 277 | import networkx as nx 278 | 279 | STRATEGIES = nx.algorithms.coloring.greedy_coloring.STRATEGIES.keys() 280 | 281 | except ImportError: 282 | raise ImportError("The 'networkx' package is required.") from None 283 | 284 | try: 285 | import pandas as pd 286 | except ImportError: 287 | raise ImportError("The 'pandas' package is required.") from None 288 | try: 289 | from libpysal.weights import Queen, Rook, W, fuzzy_contiguity 290 | except ImportError: 291 | raise ImportError("The 'libpysal' package is required.") from None 292 | 293 | if min_distance is not None: 294 | sw = fuzzy_contiguity( 295 | gdf, 296 | tolerance=0.0, 297 | buffering=True, 298 | buffer=min_distance / 2.0, 299 | silence_warnings=silence_warnings, 300 | ) 301 | 302 | if not isinstance(sw, W): 303 | if sw == "queen": 304 | sw = Queen.from_dataframe( 305 | gdf, silence_warnings=silence_warnings, use_index=False 306 | ) 307 | elif sw == "rook": 308 | sw = Rook.from_dataframe( 309 | gdf, silence_warnings=silence_warnings, use_index=False 310 | ) 311 | 312 | if strategy == "balanced": 313 | color = pd.Series(_balanced(gdf, sw, balance=balance, min_colors=min_colors)) 314 | 315 | elif strategy in STRATEGIES: 316 | color = nx.greedy_color( 317 | sw.to_networkx(), strategy=strategy, interchange=interchange 318 | ) 319 | 320 | else: 321 | raise ValueError(f"'{strategy}' is not a valid strategy.") 322 | 323 | color = pd.Series(color).sort_index() 324 | color.index = gdf.index 325 | return color 326 | -------------------------------------------------------------------------------- /mapclassify/legendgram.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def _legendgram( 5 | classifier, 6 | *, 7 | ax=None, 8 | cmap="viridis", 9 | bins=50, 10 | inset=True, 11 | clip=None, 12 | vlines=False, 13 | vlinecolor="black", 14 | vlinewidth=1, 15 | loc="lower left", 16 | legend_size=("27%", "20%"), 17 | frameon=False, 18 | tick_params=None, 19 | bbox_to_anchor=None, 20 | **kwargs, 21 | ): 22 | """ 23 | Add a histogram in a choropleth with colors aligned with map ... 24 | 25 | Arguments 26 | --------- 27 | ax : Axes 28 | ... 29 | loc : string or int 30 | valid legend location like that used in matplotlib.pyplot.legend. Valid 31 | locations are 'upper left', 'upper center', 'upper right', 'center left', 32 | 'center', 'center right', 'lower left', 'lower center', 'lower right'. 33 | legend_size : tuple 34 | tuple of floats or strings describing the (width, height) of the 35 | legend. If a float is provided, it is 36 | the size in inches, e.g. ``(1.3, 1)``. If a string is provided, it is 37 | the size in relative units, e.g. ``('40%', '20%')``. By default, 38 | i.e. if ``bbox_to_anchor`` is not specified, those are relative to 39 | the `ax`. Otherwise, they are to be understood relative to the 40 | bounding box provided via ``bbox_to_anchor``. 41 | frameon : bool (default: False) 42 | whether to add a frame to the legendgram 43 | tick_params : keyword dictionary 44 | options to control how the histogram axis gets ticked/labelled. 45 | bbox_to_anchor : tuple or ``matplotlib.trasforms.BboxBase`` 46 | Bbox that the inset axes will be anchored to. If None, a tuple of 47 | ``(0, 0, 1, 1)`` is used. If a tuple, can be either 48 | ``[left, bottom, width, height]``, or ``[left, bottom]``. If the ``legend_size`` 49 | is in relative units (%), the 2-tuple ``[left, bottom]`` cannot be used. 50 | 51 | Returns 52 | ------- 53 | axis containing the legendgram. 54 | """ 55 | 56 | try: 57 | import matplotlib.pyplot as plt 58 | from matplotlib import colormaps 59 | from mpl_toolkits.axes_grid1.inset_locator import inset_axes 60 | except ImportError as e: 61 | raise ImportError from e("you must have matplotlib ") 62 | if ax is None: 63 | f, ax = plt.subplots() 64 | else: 65 | f = ax.get_figure() 66 | k = len(classifier.bins) 67 | breaks = classifier.bins 68 | if inset: 69 | if not bbox_to_anchor: 70 | bbox_to_anchor = (0, 0, 1, 1) 71 | histpos = inset_axes( 72 | ax, 73 | loc=loc, 74 | width=legend_size[0], 75 | height=legend_size[1], 76 | bbox_to_anchor=bbox_to_anchor, 77 | bbox_transform=ax.transAxes, 78 | ) 79 | histax = f.add_axes(histpos) 80 | else: 81 | histax = ax 82 | N, bins, patches = histax.hist(classifier.y, bins=bins, color="0.1", **kwargs) 83 | if isinstance(cmap, str): 84 | cmap = colormaps[cmap] 85 | 86 | colors = [cmap(i) for i in np.linspace(0, 1, k)] 87 | 88 | bucket_breaks = [0] + [np.searchsorted(bins, i) for i in breaks] 89 | for c in range(k): 90 | for b in range(bucket_breaks[c], bucket_breaks[c + 1]): 91 | patches[b].set_facecolor(colors[c]) 92 | if clip is not None: 93 | histax.set_xlim(*clip) 94 | histax.set_frame_on(frameon) 95 | histax.get_yaxis().set_visible(False) 96 | if tick_params is None: 97 | tick_params = dict() 98 | if vlines: 99 | lim = histax.get_ylim()[1] 100 | # plot upper limit of each bin 101 | for i in classifier.bins: 102 | histax.vlines(i, 0, lim, color=vlinecolor, linewidth=vlinewidth) 103 | tick_params["labelsize"] = tick_params.get("labelsize", 12) 104 | histax.tick_params(**tick_params) 105 | return histax 106 | -------------------------------------------------------------------------------- /mapclassify/pooling.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | from .classifiers import ( 4 | BoxPlot, 5 | EqualInterval, 6 | FisherJenks, 7 | FisherJenksSampled, 8 | MaximumBreaks, 9 | Quantiles, 10 | StdMean, 11 | UserDefined, 12 | ) 13 | 14 | __all__ = ["Pooled"] 15 | 16 | dispatcher = { 17 | "boxplot": BoxPlot, 18 | "equalinterval": EqualInterval, 19 | "fisherjenks": FisherJenks, 20 | "fisherjenkssampled": FisherJenksSampled, 21 | "quantiles": Quantiles, 22 | "maximumbreaks": MaximumBreaks, 23 | "stdmean": StdMean, 24 | "userdefined": UserDefined, 25 | } 26 | 27 | 28 | class Pooled: 29 | """Applying global binning across columns. 30 | 31 | Parameters 32 | ---------- 33 | 34 | Y : numpy.array 35 | :math:`(n, m)`, values to classify, with :math:`m>1`. 36 | classifier : str (default 'Quantiles') 37 | Name of ``mapclassify.classifier`` to apply. 38 | **kwargs : dict 39 | Additional keyword arguments for classifier. 40 | 41 | Attributes 42 | ---------- 43 | 44 | global_classifier : mapclassify.classifiers.MapClassifier 45 | Instance of the pooled classifier defined as the classifier 46 | applied to the union of the columns. 47 | col_classifier : list 48 | Elements are ``MapClassifier`` instances with the pooled classifier 49 | applied to the associated column of ``Y``. 50 | 51 | Examples 52 | -------- 53 | 54 | >>> import mapclassify 55 | >>> import numpy 56 | >>> n = 20 57 | >>> data = numpy.array([numpy.arange(n)+i*n for i in range(1,4)]).T 58 | >>> res = mapclassify.Pooled(data) 59 | 60 | >>> res.col_classifiers[0].counts.tolist() 61 | [12, 8, 0, 0, 0] 62 | 63 | >>> res.col_classifiers[1].counts.tolist() 64 | [0, 4, 12, 4, 0] 65 | 66 | >>> res.col_classifiers[2].counts.tolist() 67 | [0, 0, 0, 8, 12] 68 | 69 | >>> res.global_classifier.counts.tolist() 70 | [12, 12, 12, 12, 12] 71 | 72 | >>> res.global_classifier.bins == res.col_classifiers[0].bins 73 | array([ True, True, True, True, True]) 74 | 75 | >>> res.global_classifier.bins 76 | array([31.8, 43.6, 55.4, 67.2, 79. ]) 77 | 78 | """ 79 | 80 | def __init__(self, Y, classifier="Quantiles", **kwargs): 81 | method = classifier.lower() 82 | valid_methods = list(dispatcher.keys()) 83 | if method not in valid_methods: 84 | raise ValueError( 85 | f"'{classifier}' not a valid classifier. " 86 | f"Currently supported classifiers: {valid_methods}" 87 | ) 88 | 89 | self.__dict__.update(kwargs) 90 | Y = numpy.asarray(Y) 91 | n, cols = Y.shape 92 | y = numpy.reshape(Y, (-1, 1), order="f") 93 | ymin = y.min() 94 | global_classifier = dispatcher[method](y, **kwargs) 95 | # self.k = global_classifier.k 96 | col_classifiers = [] 97 | name = f"Pooled {classifier}" 98 | for c in range(cols): 99 | res = UserDefined(Y[:, c], bins=global_classifier.bins, lowest=ymin) 100 | res.name = name 101 | col_classifiers.append(res) 102 | self.col_classifiers = col_classifiers 103 | self.global_classifier = global_classifier 104 | self._summary() 105 | 106 | def _summary(self): 107 | self.classes = self.global_classifier.classes 108 | self.tss = self.global_classifier.tss 109 | self.adcm = self.global_classifier.adcm 110 | self.gadf = self.global_classifier.gadf 111 | 112 | def __str__(self): 113 | s = "Pooled Classifier" 114 | rows = [s] 115 | for c in self.col_classifiers: 116 | rows.append(c.table()) 117 | return "\n\n".join(rows) 118 | 119 | def __repr__(self): 120 | return self.__str__() 121 | -------------------------------------------------------------------------------- /mapclassify/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/mapclassify/tests/__init__.py -------------------------------------------------------------------------------- /mapclassify/tests/baseline/test_histogram_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/mapclassify/tests/baseline/test_histogram_plot.png -------------------------------------------------------------------------------- /mapclassify/tests/baseline/test_histogram_plot_despine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/mapclassify/tests/baseline/test_histogram_plot_despine.png -------------------------------------------------------------------------------- /mapclassify/tests/baseline/test_histogram_plot_linewidth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/mapclassify/tests/baseline/test_histogram_plot_linewidth.png -------------------------------------------------------------------------------- /mapclassify/tests/baseline_images/test_legendgram/legendgram_cmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/mapclassify/tests/baseline_images/test_legendgram/legendgram_cmap.png -------------------------------------------------------------------------------- /mapclassify/tests/baseline_images/test_legendgram/legendgram_default.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/mapclassify/tests/baseline_images/test_legendgram/legendgram_default.png -------------------------------------------------------------------------------- /mapclassify/tests/baseline_images/test_legendgram/legendgram_kwargs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/mapclassify/tests/baseline_images/test_legendgram/legendgram_kwargs.png -------------------------------------------------------------------------------- /mapclassify/tests/baseline_images/test_legendgram/legendgram_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/mapclassify/tests/baseline_images/test_legendgram/legendgram_map.png -------------------------------------------------------------------------------- /mapclassify/tests/baseline_images/test_legendgram/legendgram_position.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/mapclassify/tests/baseline_images/test_legendgram/legendgram_position.png -------------------------------------------------------------------------------- /mapclassify/tests/baseline_images/test_legendgram/legendgram_quantiles.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/mapclassify/tests/baseline_images/test_legendgram/legendgram_quantiles.png -------------------------------------------------------------------------------- /mapclassify/tests/baseline_images/test_legendgram/legendgram_vlines.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/mapclassify/tests/baseline_images/test_legendgram/legendgram_vlines.png -------------------------------------------------------------------------------- /mapclassify/tests/test_classify.py: -------------------------------------------------------------------------------- 1 | import geopandas 2 | import libpysal 3 | import pytest 4 | 5 | import mapclassify 6 | 7 | 8 | def _assertions(a, b): 9 | assert a.k == b.k 10 | assert a.yb.all() == b.yb.all() 11 | assert a.bins.all() == b.bins.all() 12 | assert a.counts.all() == b.counts.all() 13 | 14 | 15 | class TestClassify: 16 | def setup_method(self): 17 | link_to_data = libpysal.examples.get_path("columbus.shp") 18 | gdf = geopandas.read_file(link_to_data) 19 | self.x = gdf["HOVAL"].values 20 | 21 | def test_box_plot(self): 22 | a = mapclassify.classify(self.x, "box_plot") 23 | b = mapclassify.BoxPlot(self.x) 24 | _assertions(a, b) 25 | 26 | def test_equal_interval(self): 27 | a = mapclassify.classify(self.x, "EqualInterval", k=3) 28 | b = mapclassify.EqualInterval(self.x, k=3) 29 | _assertions(a, b) 30 | 31 | def test_fisher_jenks(self): 32 | a = mapclassify.classify(self.x, "FisherJenks", k=3) 33 | b = mapclassify.FisherJenks(self.x, k=3) 34 | _assertions(a, b) 35 | 36 | def test_fisher_jenks_sampled(self): 37 | a = mapclassify.classify( 38 | self.x, "FisherJenksSampled", k=3, pct_sampled=0.5, truncate=False 39 | ) 40 | b = mapclassify.FisherJenksSampled(self.x, k=3, pct=0.5, truncate=False) 41 | _assertions(a, b) 42 | 43 | def test_headtail_breaks(self): 44 | a = mapclassify.classify(self.x, "headtail_breaks") 45 | b = mapclassify.HeadTailBreaks(self.x) 46 | _assertions(a, b) 47 | 48 | def test_quantiles(self): 49 | a = mapclassify.classify(self.x, "quantiles", k=3) 50 | b = mapclassify.Quantiles(self.x, k=3) 51 | _assertions(a, b) 52 | 53 | def test_percentiles(self): 54 | a = mapclassify.classify(self.x, "percentiles", pct=[25, 50, 75, 100]) 55 | b = mapclassify.Percentiles(self.x, pct=[25, 50, 75, 100]) 56 | _assertions(a, b) 57 | 58 | a = mapclassify.classify(self.x, "prettybreaks") 59 | b = mapclassify.PrettyBreaks(self.x) 60 | _assertions(a, b) 61 | 62 | def test_jenks_caspall(self): 63 | a = mapclassify.classify(self.x, "JenksCaspall", k=3) 64 | b = mapclassify.JenksCaspall(self.x, k=3) 65 | _assertions(a, b) 66 | 67 | def test_jenks_caspall_forced(self): 68 | a = mapclassify.classify(self.x, "JenksCaspallForced", k=3) 69 | b = mapclassify.JenksCaspallForced(self.x, k=3) 70 | _assertions(a, b) 71 | 72 | def test_jenks_caspall_sampled(self): 73 | a = mapclassify.classify(self.x, "JenksCaspallSampled", pct_sampled=0.5) 74 | b = mapclassify.JenksCaspallSampled(self.x, pct=0.5) 75 | _assertions(a, b) 76 | 77 | def test_natural_breaks(self): 78 | a = mapclassify.classify(self.x, "natural_breaks") 79 | b = mapclassify.NaturalBreaks(self.x) 80 | _assertions(a, b) 81 | 82 | def test_max_p_classifier(self): 83 | a = mapclassify.classify(self.x, "max_p", k=3, initial=50) 84 | b = mapclassify.MaxP(self.x, k=3, initial=50) 85 | _assertions(a, b) 86 | 87 | def test_std_mean(self): 88 | a = mapclassify.classify(self.x, "std_mean", multiples=[-1, -0.5, 0.5, 1]) 89 | b = mapclassify.StdMean(self.x, multiples=[-1, -0.5, 0.5, 1]) 90 | _assertions(a, b) 91 | 92 | def test_user_defined(self): 93 | a = mapclassify.classify(self.x, "user_defined", bins=[20, max(self.x)]) 94 | b = mapclassify.UserDefined(self.x, bins=[20, max(self.x)]) 95 | _assertions(a, b) 96 | 97 | def test_bad_classifier(self): 98 | classifier = "George_Costanza" 99 | with pytest.raises(ValueError, match="Invalid scheme: 'georgecostanza'"): 100 | mapclassify.classify(self.x, classifier) 101 | -------------------------------------------------------------------------------- /mapclassify/tests/test_greedy.py: -------------------------------------------------------------------------------- 1 | import geopandas 2 | import libpysal 3 | import pytest 4 | 5 | from ..greedy import greedy 6 | 7 | world = geopandas.read_file( 8 | "https://naciscdn.org/naturalearth/110m/cultural/ne_110m_admin_0_countries.zip" 9 | ) 10 | sw = libpysal.weights.Queen.from_dataframe( 11 | world, ids=world.index.to_list(), silence_warnings=True 12 | ) 13 | 14 | 15 | def _check_correctess(colors): 16 | assert len(colors) == len(world) 17 | for i, neighbors in sw.neighbors.items(): 18 | if len(neighbors) > 1: 19 | assert (colors[neighbors] != colors[i]).all() 20 | 21 | 22 | @pytest.mark.filterwarnings("ignore:Geometry is in a geographic CRS.") 23 | class TestGreedy: 24 | def test_default(self): 25 | colors = greedy(world) 26 | assert set(colors) == {0, 1, 2, 3, 4} 27 | assert colors.value_counts().to_list() == [36, 36, 35, 35, 35] 28 | assert (colors.index == world.index).all() 29 | _check_correctess(colors) 30 | 31 | def test_rook(self): 32 | colors = greedy(world, sw="rook") 33 | assert set(colors) == {0, 1, 2, 3, 4} 34 | assert colors.value_counts().to_list() == [36, 36, 35, 35, 35] 35 | _check_correctess(colors) 36 | 37 | def test_sw(self): 38 | colors = greedy(world, sw=sw) 39 | assert set(colors) == {0, 1, 2, 3, 4} 40 | assert colors.value_counts().to_list() == [36, 36, 35, 35, 35] 41 | _check_correctess(colors) 42 | 43 | def test_min_distance(self): 44 | europe = world.loc[world.CONTINENT == "Europe"].to_crs(epsg=3035) 45 | colors = greedy(europe, min_distance=500000) 46 | assert set(colors) == set(range(13)) 47 | assert colors.value_counts().to_list() == [3] * 13 48 | 49 | def test_invalid_strategy(self): 50 | strategy = "spice melange" 51 | with pytest.raises(ValueError, match=f"'{strategy}' is not a valid strategy."): 52 | greedy(world, strategy=strategy) 53 | 54 | 55 | @pytest.mark.filterwarnings("ignore:Geometry is in a geographic CRS.") 56 | @pytest.mark.parametrize("pysal_geos", [None, 0]) 57 | class TestGreedyParams: 58 | def test_count(self, pysal_geos): 59 | colors = greedy( 60 | world, strategy="balanced", balance="count", min_distance=pysal_geos 61 | ) 62 | assert set(colors) == {0, 1, 2, 3, 4} 63 | assert colors.value_counts().to_list() == [36, 36, 35, 35, 35] 64 | _check_correctess(colors) 65 | 66 | def test_area(self, pysal_geos): 67 | colors = greedy( 68 | world, strategy="balanced", balance="area", min_distance=pysal_geos 69 | ) 70 | assert set(colors) == {0, 1, 2, 3, 4} 71 | assert colors.value_counts().to_list() == [55, 49, 39, 32, 2] 72 | _check_correctess(colors) 73 | 74 | def test_centroid(self, pysal_geos): 75 | colors = greedy( 76 | world, strategy="balanced", balance="centroid", min_distance=pysal_geos 77 | ) 78 | assert set(colors) == {0, 1, 2, 3, 4} 79 | assert colors.value_counts().to_list() == [39, 36, 36, 34, 32] 80 | _check_correctess(colors) 81 | 82 | def test_distance(self, pysal_geos): 83 | colors = greedy( 84 | world, strategy="balanced", balance="distance", min_distance=pysal_geos 85 | ) 86 | assert set(colors) == {0, 1, 2, 3, 4} 87 | assert colors.value_counts().to_list() == [38, 36, 35, 34, 34] 88 | _check_correctess(colors) 89 | 90 | def test_largest_first(self, pysal_geos): 91 | colors = greedy(world, strategy="largest_first", min_distance=pysal_geos) 92 | assert set(colors) == {0, 1, 2, 3, 4} 93 | assert colors.value_counts().to_list() == [64, 49, 42, 21, 1] 94 | _check_correctess(colors) 95 | 96 | def test_random_sequential(self, pysal_geos): 97 | """based on random, no consistent results to be tested""" 98 | colors = greedy(world, strategy="random_sequential", min_distance=pysal_geos) 99 | _check_correctess(colors) 100 | 101 | def test_smallest_last(self, pysal_geos): 102 | colors = greedy(world, strategy="smallest_last", min_distance=pysal_geos) 103 | assert set(colors) == {0, 1, 2, 3} 104 | assert colors.value_counts().to_list() == [71, 52, 39, 15] 105 | _check_correctess(colors) 106 | 107 | def test_independent_set(self, pysal_geos): 108 | colors = greedy(world, strategy="independent_set", min_distance=pysal_geos) 109 | assert set(colors) == {0, 1, 2, 3, 4} 110 | assert colors.value_counts().to_list() == [91, 42, 26, 13, 5] 111 | _check_correctess(colors) 112 | 113 | def test_connected_sequential_bfs(self, pysal_geos): 114 | colors = greedy( 115 | world, strategy="connected_sequential_bfs", min_distance=pysal_geos 116 | ) 117 | assert set(colors) == {0, 1, 2, 3, 4} 118 | _check_correctess(colors) 119 | 120 | def test_connected_sequential_dfs(self, pysal_geos): 121 | colors = greedy( 122 | world, strategy="connected_sequential_dfs", min_distance=pysal_geos 123 | ) 124 | assert set(colors) == {0, 1, 2, 3, 4} 125 | _check_correctess(colors) 126 | 127 | def test_connected_sequential(self, pysal_geos): 128 | colors = greedy(world, strategy="connected_sequential", min_distance=pysal_geos) 129 | assert set(colors) == {0, 1, 2, 3, 4} 130 | _check_correctess(colors) 131 | 132 | def test_saturation_largest_first(self, pysal_geos): 133 | colors = greedy( 134 | world, strategy="saturation_largest_first", min_distance=pysal_geos 135 | ) 136 | assert set(colors) == {0, 1, 2, 3} 137 | assert colors.value_counts().to_list() == [71, 47, 42, 17] 138 | _check_correctess(colors) 139 | 140 | def test_DSATUR(self, pysal_geos): 141 | colors = greedy(world, strategy="DSATUR", min_distance=pysal_geos) 142 | assert set(colors) == {0, 1, 2, 3} 143 | assert colors.value_counts().to_list() == [71, 47, 42, 17] 144 | _check_correctess(colors) 145 | 146 | def test_index(self, pysal_geos): 147 | world["ten"] = world.index * 10 148 | reindexed = world.set_index("ten") 149 | colors = greedy(reindexed, min_distance=pysal_geos) 150 | assert len(colors) == len(world) 151 | assert set(colors) == {0, 1, 2, 3, 4} 152 | assert colors.value_counts().to_list() == [36, 36, 35, 35, 35] 153 | -------------------------------------------------------------------------------- /mapclassify/tests/test_legendgram.py: -------------------------------------------------------------------------------- 1 | import geopandas as gpd 2 | import matplotlib 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import pytest 6 | from libpysal import examples 7 | from matplotlib.testing.decorators import image_comparison 8 | from packaging.version import Version 9 | 10 | from mapclassify import EqualInterval, Quantiles 11 | from mapclassify.legendgram import _legendgram 12 | 13 | 14 | class TestLegendgram: 15 | def setup_method(self): 16 | np.random.seed(42) 17 | self.data = np.random.normal(0, 1, 100) 18 | self.classifier = EqualInterval(self.data, k=5) 19 | 20 | def test_legendgram_returns_axis(self): 21 | """Test that _legendgram returns a matplotlib axis""" 22 | _, ax = plt.subplots(figsize=(8, 6)) 23 | histax = _legendgram(self.classifier, ax=ax) 24 | plt.close() 25 | 26 | assert isinstance(histax, matplotlib.axes.Axes) 27 | 28 | def test_legendgram_standalone(self): 29 | """Test that _legendgram works without providing an axis""" 30 | histax = _legendgram(self.classifier) 31 | plt.close() 32 | 33 | assert isinstance(histax, matplotlib.axes.Axes) 34 | 35 | def test_legendgram_inset_false(self): 36 | """Test that _legendgram works with inset=False""" 37 | _, ax = plt.subplots(figsize=(8, 6)) 38 | histax = _legendgram(self.classifier, ax=ax, inset=False) 39 | plt.close() 40 | 41 | # When inset=False, histax should be the same as ax 42 | assert histax is ax 43 | 44 | def test_legendgram_clip(self): 45 | """Test that _legendgram applies clip parameter correctly""" 46 | _, ax = plt.subplots(figsize=(8, 6)) 47 | clip_range = (-2, 2) 48 | histax = _legendgram(self.classifier, ax=ax, clip=clip_range) 49 | xlim = histax.get_xlim() 50 | plt.close() 51 | 52 | assert xlim[0] == clip_range[0] 53 | assert xlim[1] == clip_range[1] 54 | 55 | def test_legendgram_tick_params(self): 56 | """Test that _legendgram applies tick_params correctly""" 57 | _, ax = plt.subplots(figsize=(8, 6)) 58 | custom_tick_params = {"labelsize": 20, "rotation": 45} 59 | _ = _legendgram(self.classifier, ax=ax, tick_params=custom_tick_params) 60 | plt.close() 61 | 62 | def test_legendgram_frameon(self): 63 | """Test that _legendgram applies frameon parameter correctly""" 64 | _, ax = plt.subplots(figsize=(8, 6)) 65 | histax = _legendgram(self.classifier, ax=ax, frameon=True) 66 | is_frame_on = histax.get_frame_on() 67 | plt.close() 68 | 69 | assert is_frame_on 70 | 71 | @pytest.mark.skipif( 72 | Version(matplotlib.__version__) >= Version("3.11.0.dev"), 73 | reason="change of font rendering breaks image comparison", 74 | # once 3.11 lands, we should update expected and test against that 75 | ) 76 | @image_comparison( 77 | baseline_images=["legendgram_default"], 78 | extensions=["png"], 79 | remove_text=False, 80 | tol=0.05, 81 | ) 82 | def test_legendgram_default(self): 83 | """Test default legendgram appearance""" 84 | _, ax = plt.subplots(figsize=(8, 6)) 85 | _legendgram(self.classifier, ax=ax) 86 | 87 | @pytest.mark.skipif( 88 | Version(matplotlib.__version__) >= Version("3.11.0.dev"), 89 | reason="change of font rendering breaks image comparison", 90 | # once 3.11 lands, we should update expected and test against that 91 | ) 92 | @image_comparison( 93 | baseline_images=["legendgram_vlines"], 94 | extensions=["png"], 95 | remove_text=False, 96 | tol=0.05, 97 | ) 98 | def test_legendgram_vlines(self): 99 | """Test legendgram with vertical lines""" 100 | _, ax = plt.subplots(figsize=(8, 6)) 101 | _legendgram(self.classifier, ax=ax, vlines=True, vlinecolor="red", vlinewidth=2) 102 | 103 | @pytest.mark.skipif( 104 | Version(matplotlib.__version__) >= Version("3.11.0.dev"), 105 | reason="change of font rendering breaks image comparison", 106 | # once 3.11 lands, we should update expected and test against that 107 | ) 108 | @image_comparison( 109 | baseline_images=["legendgram_cmap"], 110 | extensions=["png"], 111 | remove_text=False, 112 | tol=0.05, 113 | ) 114 | def test_legendgram_cmap(self): 115 | """Test legendgram with custom colormap""" 116 | _, ax = plt.subplots(figsize=(8, 6)) 117 | _legendgram(self.classifier, ax=ax, cmap="plasma") 118 | 119 | @pytest.mark.skipif( 120 | Version(matplotlib.__version__) >= Version("3.11.0.dev"), 121 | reason="change of font rendering breaks image comparison", 122 | # once 3.11 lands, we should update expected and test against that 123 | ) 124 | @image_comparison( 125 | baseline_images=["legendgram_cmap"], 126 | extensions=["png"], 127 | remove_text=False, 128 | tol=0.05, 129 | ) 130 | def test_legendgram_cmap_class(self): 131 | """Test legendgram with custom colormap""" 132 | _, ax = plt.subplots(figsize=(8, 6)) 133 | _legendgram(self.classifier, ax=ax, cmap=matplotlib.cm.plasma) 134 | 135 | @pytest.mark.skipif( 136 | Version(matplotlib.__version__) >= Version("3.11.0.dev"), 137 | reason="change of font rendering breaks image comparison", 138 | # once 3.11 lands, we should update expected and test against that 139 | ) 140 | @image_comparison( 141 | baseline_images=["legendgram_position"], 142 | extensions=["png"], 143 | remove_text=False, 144 | tol=0.05, 145 | ) 146 | def test_legendgram_position(self): 147 | """Test legendgram with custom position""" 148 | _, ax = plt.subplots(figsize=(8, 6)) 149 | _legendgram( 150 | self.classifier, ax=ax, loc="upper right", legend_size=("40%", "30%") 151 | ) 152 | 153 | @pytest.mark.skipif( 154 | Version(matplotlib.__version__) >= Version("3.11.0.dev"), 155 | reason="change of font rendering breaks image comparison", 156 | # once 3.11 lands, we should update expected and test against that 157 | ) 158 | @image_comparison( 159 | baseline_images=["legendgram_map"], 160 | extensions=["png"], 161 | remove_text=False, 162 | tol=0.05, 163 | ) 164 | def test_legendgram_map(self): 165 | """Test with geopandas map""" 166 | data = gpd.read_file(examples.get_path("south.shp")).to_crs(epsg=5070) 167 | ax = data.plot("DV80", k=10, scheme="Quantiles") 168 | classifier = Quantiles(data["DV80"].values, k=10) 169 | classifier.plot_legendgram( 170 | ax=ax, legend_size=("50%", "20%"), loc="upper left", clip=(2, 10) 171 | ) 172 | ax.set_axis_off() 173 | 174 | @pytest.mark.skipif( 175 | Version(matplotlib.__version__) >= Version("3.11.0.dev"), 176 | reason="change of font rendering breaks image comparison", 177 | # once 3.11 lands, we should update expected and test against that 178 | ) 179 | @image_comparison( 180 | baseline_images=["legendgram_kwargs"], 181 | extensions=["png"], 182 | remove_text=False, 183 | tol=0.05, 184 | ) 185 | def test_legendgram_kwargs(self): 186 | """Test default legendgram appearance""" 187 | _, ax = plt.subplots(figsize=(8, 6)) 188 | _legendgram( 189 | self.classifier, ax=ax, legend_size=("20%", "30%"), orientation="horizontal" 190 | ) 191 | -------------------------------------------------------------------------------- /mapclassify/tests/test_mapclassify.py: -------------------------------------------------------------------------------- 1 | import types 2 | 3 | import numpy 4 | import pytest 5 | 6 | from ..classifiers import * 7 | from ..classifiers import bin, bin1d, binC, load_example 8 | from ..pooling import Pooled 9 | 10 | RTOL = 0.0001 11 | 12 | 13 | class TestQuantile: 14 | def test_quantile(self): 15 | y = numpy.arange(1000) 16 | expected = numpy.array([333.0, 666.0, 999.0]) 17 | numpy.testing.assert_almost_equal(expected, quantile(y, k=3)) 18 | 19 | def test_quantile_k4(self): 20 | x = numpy.arange(1000) 21 | qx = quantile(x, k=4) 22 | expected = numpy.array([249.75, 499.5, 749.25, 999.0]) 23 | numpy.testing.assert_array_almost_equal(expected, qx) 24 | 25 | def test_quantile_k(self): 26 | y = numpy.random.random(1000) 27 | for k in range(5, 10): 28 | numpy.testing.assert_almost_equal(k, len(quantile(y, k))) 29 | assert k == len(quantile(y, k)) 30 | 31 | 32 | class TestUpdate: 33 | def setup_method(self): 34 | numpy.random.seed(4414) 35 | self.data = numpy.random.normal(0, 10, size=10) 36 | self.new_data = numpy.random.normal(0, 10, size=4) 37 | 38 | def test_update(self): 39 | # Quantiles 40 | quants = Quantiles(self.data, k=3) 41 | known_yb = numpy.array([0, 1, 0, 1, 0, 2, 0, 2, 1, 2]) 42 | numpy.testing.assert_allclose(quants.yb, known_yb, rtol=RTOL) 43 | 44 | new_yb = quants.update(self.new_data, k=4).yb 45 | known_new_yb = numpy.array([0, 3, 1, 0, 1, 2, 0, 2, 1, 3, 0, 3, 2, 3]) 46 | numpy.testing.assert_allclose(new_yb, known_new_yb, rtol=RTOL) 47 | 48 | # User-Defined 49 | ud = UserDefined(self.data, [-20, 0, 5, 20]) 50 | known_yb = numpy.array([1, 2, 1, 1, 1, 2, 0, 2, 1, 3]) 51 | numpy.testing.assert_allclose(ud.yb, known_yb, rtol=RTOL) 52 | 53 | new_yb = ud.update(self.new_data).yb 54 | known_new_yb = numpy.array([1, 3, 1, 1, 1, 2, 1, 1, 1, 2, 0, 2, 1, 3]) 55 | numpy.testing.assert_allclose(new_yb, known_new_yb, rtol=RTOL) 56 | 57 | # Fisher-Jenks Sampled 58 | fjs = FisherJenksSampled(self.data, k=3, pct=70) 59 | known_yb = numpy.array([1, 2, 0, 1, 1, 2, 0, 2, 1, 2]) 60 | numpy.testing.assert_allclose(known_yb, fjs.yb, rtol=RTOL) 61 | 62 | new_yb = fjs.update(self.new_data, k=2).yb 63 | known_new_yb = numpy.array([0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1]) 64 | numpy.testing.assert_allclose(known_new_yb, new_yb, rtol=RTOL) 65 | 66 | 67 | class TestFindBin: 68 | def setup_method(self): 69 | self.V = load_example() 70 | 71 | def test_find_bin(self): 72 | toclass = [0, 1, 3, 5, 50, 70, 101, 202, 390, 505, 800, 5000, 5001] 73 | mc = FisherJenks(self.V, k=5) 74 | known = [0, 0, 0, 0, 0, 0, 1, 2, 3, 3, 4, 4, 4] 75 | numpy.testing.assert_array_equal(known, mc.find_bin(toclass)) 76 | 77 | mc2 = FisherJenks(self.V, k=9) 78 | known = [0, 0, 0, 0, 2, 2, 3, 5, 7, 7, 8, 8, 8] 79 | numpy.testing.assert_array_equal(known, mc2.find_bin(toclass)) 80 | 81 | 82 | class TestMake: 83 | def setup_method(self): 84 | self.data = [ 85 | numpy.linspace(-5, 5, num=5), 86 | numpy.linspace(-10, 10, num=5), 87 | numpy.linspace(-20, 20, num=5), 88 | ] 89 | self.ei = EqualInterval.make() 90 | self.q5r = Quantiles.make(k=5, rolling=True) 91 | 92 | def test_make(self): 93 | assert isinstance(self.ei, types.FunctionType) 94 | assert isinstance(self.q5r, types.FunctionType) 95 | 96 | assert hasattr(self.ei, "_options") 97 | assert self.ei._options == dict() 98 | assert hasattr(self.q5r, "_options") 99 | assert self.q5r._options == {"k": 5, "rolling": True} 100 | 101 | def test_apply(self): 102 | ei_classes = [self.ei(d) for d in self.data] 103 | known = [numpy.arange(0, 5, 1)] * 3 104 | numpy.testing.assert_allclose(known, ei_classes) 105 | 106 | q5r_classes = [self.q5r(d) for d in self.data] 107 | known = [[0, 1, 2, 3, 4], [0, 0, 2, 3, 4], [0, 0, 2, 4, 4]] 108 | accreted_data = set(self.q5r.__defaults__[0].y) 109 | all_data = set(numpy.asarray(self.data).flatten()) 110 | assert accreted_data == all_data 111 | numpy.testing.assert_allclose(known, q5r_classes) 112 | 113 | 114 | class TestBinC: 115 | def test_bin_c(self): 116 | bins = list(range(2, 8)) 117 | y = numpy.array( 118 | [ 119 | [7, 5, 6], 120 | [2, 3, 5], 121 | [7, 2, 2], 122 | [3, 6, 7], 123 | [6, 3, 4], 124 | [6, 7, 4], 125 | [6, 5, 6], 126 | [4, 6, 7], 127 | [4, 6, 3], 128 | [3, 2, 7], 129 | ] 130 | ) 131 | 132 | expected = numpy.array( 133 | [ 134 | [5, 3, 4], 135 | [0, 1, 3], 136 | [5, 0, 0], 137 | [1, 4, 5], 138 | [4, 1, 2], 139 | [4, 5, 2], 140 | [4, 3, 4], 141 | [2, 4, 5], 142 | [2, 4, 1], 143 | [1, 0, 5], 144 | ] 145 | ) 146 | numpy.testing.assert_array_equal(expected, binC(y, bins)) 147 | 148 | 149 | class TestBin: 150 | def test_bin(self): 151 | y = numpy.array( 152 | [ 153 | [7, 13, 14], 154 | [10, 11, 13], 155 | [7, 17, 2], 156 | [18, 3, 14], 157 | [9, 15, 8], 158 | [7, 13, 12], 159 | [16, 6, 11], 160 | [19, 2, 15], 161 | [11, 11, 9], 162 | [3, 2, 19], 163 | ] 164 | ) 165 | bins = [10, 15, 20] 166 | expected = numpy.array( 167 | [ 168 | [0, 1, 1], 169 | [0, 1, 1], 170 | [0, 2, 0], 171 | [2, 0, 1], 172 | [0, 1, 0], 173 | [0, 1, 1], 174 | [2, 0, 1], 175 | [2, 0, 1], 176 | [1, 1, 0], 177 | [0, 0, 2], 178 | ] 179 | ) 180 | 181 | numpy.testing.assert_array_equal(expected, bin(y, bins)) 182 | 183 | 184 | class TestBin1d: 185 | def test_bin1d(self): 186 | y = numpy.arange(100, dtype="float") 187 | bins = [25, 74, 100] 188 | binIds = numpy.array( 189 | [ 190 | 0, 191 | 0, 192 | 0, 193 | 0, 194 | 0, 195 | 0, 196 | 0, 197 | 0, 198 | 0, 199 | 0, 200 | 0, 201 | 0, 202 | 0, 203 | 0, 204 | 0, 205 | 0, 206 | 0, 207 | 0, 208 | 0, 209 | 0, 210 | 0, 211 | 0, 212 | 0, 213 | 0, 214 | 0, 215 | 0, 216 | 1, 217 | 1, 218 | 1, 219 | 1, 220 | 1, 221 | 1, 222 | 1, 223 | 1, 224 | 1, 225 | 1, 226 | 1, 227 | 1, 228 | 1, 229 | 1, 230 | 1, 231 | 1, 232 | 1, 233 | 1, 234 | 1, 235 | 1, 236 | 1, 237 | 1, 238 | 1, 239 | 1, 240 | 1, 241 | 1, 242 | 1, 243 | 1, 244 | 1, 245 | 1, 246 | 1, 247 | 1, 248 | 1, 249 | 1, 250 | 1, 251 | 1, 252 | 1, 253 | 1, 254 | 1, 255 | 1, 256 | 1, 257 | 1, 258 | 1, 259 | 1, 260 | 1, 261 | 1, 262 | 1, 263 | 1, 264 | 1, 265 | 2, 266 | 2, 267 | 2, 268 | 2, 269 | 2, 270 | 2, 271 | 2, 272 | 2, 273 | 2, 274 | 2, 275 | 2, 276 | 2, 277 | 2, 278 | 2, 279 | 2, 280 | 2, 281 | 2, 282 | 2, 283 | 2, 284 | 2, 285 | 2, 286 | 2, 287 | 2, 288 | 2, 289 | 2, 290 | ] 291 | ) 292 | counts = numpy.array([26, 49, 25]) 293 | 294 | numpy.testing.assert_array_equal(binIds, bin1d(y, bins)[0]) 295 | numpy.testing.assert_array_equal(counts, bin1d(y, bins)[1]) 296 | 297 | 298 | class TestNaturalBreaks: 299 | def setup_method(self): 300 | self.V = load_example() 301 | 302 | def test_natural_breaks(self): 303 | # assert expected, natural_breaks(values, k, itmax)) 304 | assert True # TODO: implement your test here 305 | 306 | def test_NaturalBreaks(self): 307 | nb = NaturalBreaks(self.V, 5) 308 | assert nb.k == 5 309 | assert len(nb.counts) == 5 310 | numpy.testing.assert_array_almost_equal( 311 | nb.counts, numpy.array([49, 3, 4, 1, 1]) 312 | ) 313 | 314 | def test_NaturalBreaks_stability(self): 315 | for _ in range(10): 316 | nb = NaturalBreaks(self.V, 5) 317 | assert nb.k == 5 318 | assert len(nb.counts) == 5 319 | 320 | def test_NaturalBreaks_randomData(self): 321 | for i in range(10): 322 | V = numpy.random.random(50) * (i + 1) 323 | nb = NaturalBreaks(V, 5) 324 | assert nb.k == 5 325 | assert len(nb.counts) == 5 326 | 327 | 328 | class TestHeadTailBreaks: 329 | def setup_method(self): 330 | x = list(range(1, 1000)) 331 | y = [] 332 | for i in x: 333 | y.append(i ** (-2)) 334 | self.V = numpy.array(y) 335 | 336 | def test_HeadTailBreaks(self): 337 | htb = HeadTailBreaks(self.V) 338 | assert htb.k == 4 339 | assert len(htb.counts) == 4 340 | numpy.testing.assert_array_almost_equal( 341 | htb.counts, numpy.array([975, 21, 2, 1]) 342 | ) 343 | 344 | def test_HeadTailBreaks_doublemax(self): 345 | V = numpy.append(self.V, self.V.max()) 346 | htb = HeadTailBreaks(V) 347 | assert htb.k == 4 348 | assert len(htb.counts) == 4 349 | numpy.testing.assert_array_almost_equal( 350 | htb.counts, numpy.array([980, 17, 1, 2]) 351 | ) 352 | 353 | def test_HeadTailBreaks_float(self): 354 | V = numpy.array([1 + 2**-52, 1, 1]) 355 | htb = HeadTailBreaks(V) 356 | assert htb.k == 2 357 | assert len(htb.counts) == 2 358 | numpy.testing.assert_array_almost_equal(htb.counts, numpy.array([2, 1])) 359 | 360 | 361 | class TestPrettyBreaks: 362 | def setup_method(self): 363 | self.V = load_example() 364 | 365 | def test_pretty(self): 366 | res = PrettyBreaks(self.V) 367 | assert res.k == 5 368 | numpy.testing.assert_array_equal(res.counts, [57, 0, 0, 0, 1]) 369 | numpy.testing.assert_array_equal(res.bins, list(range(1000, 6000, 1000))) 370 | 371 | 372 | class TestMapClassifier: 373 | def test_Map_Classifier(self): 374 | # map__classifier = Map_Classifier(y) 375 | assert True # TODO: implement your test here 376 | 377 | def test___repr__(self): 378 | # map__classifier = Map_Classifier(y) 379 | # assert expected, map__classifier.__repr__()) 380 | assert True # TODO: implement your test here 381 | 382 | def test___str__(self): 383 | # map__classifier = Map_Classifier(y) 384 | # assert expected, map__classifier.__str__()) 385 | assert True # TODO: implement your test here 386 | 387 | def test_get_adcm(self): 388 | # map__classifier = Map_Classifier(y) 389 | # assert expected, map__classifier.get_adcm()) 390 | assert True # TODO: implement your test here 391 | 392 | def test_get_gadf(self): 393 | # map__classifier = Map_Classifier(y) 394 | # assert expected, map__classifier.get_gadf()) 395 | assert True # TODO: implement your test here 396 | 397 | def test_get_tss(self): 398 | # map__classifier = Map_Classifier(y) 399 | # assert expected, map__classifier.get_tss()) 400 | assert True # TODO: implement your test here 401 | 402 | 403 | class TestEqualInterval: 404 | def setup_method(self): 405 | self.V = load_example() 406 | 407 | def test_EqualInterval(self): 408 | ei = EqualInterval(self.V) 409 | numpy.testing.assert_array_almost_equal( 410 | ei.counts, numpy.array([57, 0, 0, 0, 1]) 411 | ) 412 | numpy.testing.assert_array_almost_equal( 413 | ei.bins, numpy.array([822.394, 1644.658, 2466.922, 3289.186, 4111.45]) 414 | ) 415 | 416 | with pytest.raises( 417 | ValueError, match="Not enough unique values in array to form 5 classes." 418 | ): 419 | EqualInterval(numpy.array([1, 1, 1, 1])) 420 | 421 | 422 | class TestPercentiles: 423 | def setup_method(self): 424 | self.V = load_example() 425 | 426 | def test_Percentiles(self): 427 | pc = Percentiles(self.V) 428 | numpy.testing.assert_array_almost_equal( 429 | pc.bins, 430 | numpy.array( 431 | [ 432 | 1.35700000e-01, 433 | 5.53000000e-01, 434 | 9.36500000e00, 435 | 2.13914000e02, 436 | 2.17994800e03, 437 | 4.11145000e03, 438 | ] 439 | ), 440 | ) 441 | numpy.testing.assert_array_almost_equal( 442 | pc.counts, numpy.array([1, 5, 23, 23, 5, 1]) 443 | ) 444 | 445 | 446 | class TestBoxPlot: 447 | def setup_method(self): 448 | self.V = load_example() 449 | 450 | def test_BoxPlot(self): 451 | bp = BoxPlot(self.V) 452 | bins = numpy.array( 453 | [ 454 | -5.28762500e01, 455 | 2.56750000e00, 456 | 9.36500000e00, 457 | 3.95300000e01, 458 | 9.49737500e01, 459 | 4.11145000e03, 460 | ] 461 | ) 462 | numpy.testing.assert_array_almost_equal(bp.bins, bins) 463 | 464 | 465 | class TestQuantiles: 466 | def setup_method(self): 467 | self.V = load_example() 468 | 469 | def test_Quantiles(self): 470 | q = Quantiles(self.V, k=5) 471 | numpy.testing.assert_array_almost_equal( 472 | q.bins, 473 | numpy.array( 474 | [ 475 | 1.46400000e00, 476 | 5.79800000e00, 477 | 1.32780000e01, 478 | 5.46160000e01, 479 | 4.11145000e03, 480 | ] 481 | ), 482 | ) 483 | numpy.testing.assert_array_almost_equal( 484 | q.counts, numpy.array([12, 11, 12, 11, 12]) 485 | ) 486 | 487 | 488 | class TestStdMean: 489 | def setup_method(self): 490 | self.V = load_example() 491 | 492 | def test_StdMean(self): 493 | std_mean = StdMean(self.V) 494 | numpy.testing.assert_array_almost_equal( 495 | std_mean.bins, 496 | numpy.array( 497 | [-967.36235382, -420.71712519, 672.57333208, 1219.21856072, 4111.45] 498 | ), 499 | ) 500 | numpy.testing.assert_array_almost_equal( 501 | std_mean.counts, numpy.array([0, 0, 56, 1, 1]) 502 | ) 503 | 504 | 505 | class TestMaximumBreaks: 506 | def setup_method(self): 507 | self.V = load_example() 508 | 509 | def test_MaximumBreaks(self): 510 | mb = MaximumBreaks(self.V, k=5) 511 | assert mb.k == 5 512 | numpy.testing.assert_array_almost_equal( 513 | mb.bins, numpy.array([146.005, 228.49, 546.675, 2417.15, 4111.45]) 514 | ) 515 | numpy.testing.assert_array_almost_equal( 516 | mb.counts, numpy.array([50, 2, 4, 1, 1]) 517 | ) 518 | 519 | with pytest.raises( 520 | ValueError, match="Not enough unique values in array to form 5 classes." 521 | ): 522 | MaximumBreaks(numpy.array([1, 1, 1, 1])) 523 | 524 | 525 | class TestFisherJenks: 526 | def setup_method(self): 527 | self.V = load_example() 528 | 529 | def test_FisherJenks(self): 530 | fj = FisherJenks(self.V) 531 | assert fj.adcm == 799.24000000000001 532 | numpy.testing.assert_array_almost_equal( 533 | fj.bins, numpy.array([75.29, 192.05, 370.5, 722.85, 4111.45]) 534 | ) 535 | numpy.testing.assert_array_almost_equal( 536 | fj.counts, numpy.array([49, 3, 4, 1, 1]) 537 | ) 538 | 539 | 540 | class TestJenksCaspall: 541 | def setup_method(self): 542 | self.V = load_example() 543 | 544 | def test_JenksCaspall(self): 545 | numpy.random.seed(10) 546 | jc = JenksCaspall(self.V, k=5) 547 | numpy.testing.assert_array_almost_equal( 548 | jc.counts, numpy.array([14, 13, 14, 10, 7]) 549 | ) 550 | numpy.testing.assert_array_almost_equal( 551 | jc.bins, 552 | numpy.array( 553 | [ 554 | 1.81000000e00, 555 | 7.60000000e00, 556 | 2.98200000e01, 557 | 1.81270000e02, 558 | 4.11145000e03, 559 | ] 560 | ), 561 | ) 562 | 563 | 564 | class TestJenksCaspallSampled: 565 | def setup_method(self): 566 | self.V = load_example() 567 | 568 | def test_JenksCaspallSampled(self): 569 | numpy.random.seed(100) 570 | x = numpy.random.random(100000) 571 | jc = JenksCaspall(x) 572 | jcs = JenksCaspallSampled(x) 573 | numpy.testing.assert_array_almost_equal( 574 | jc.bins, 575 | numpy.array([0.19718393, 0.39655886, 0.59648522, 0.79780763, 0.99997979]), 576 | ) 577 | numpy.testing.assert_array_almost_equal( 578 | jcs.bins, 579 | numpy.array([0.20856569, 0.41513931, 0.62457691, 0.82561423, 0.99997979]), 580 | ) 581 | 582 | 583 | class TestJenksCaspallForced: 584 | def setup_method(self): 585 | self.V = load_example() 586 | 587 | def test_JenksCaspallForced(self): 588 | numpy.random.seed(100) 589 | jcf = JenksCaspallForced(self.V, k=5) 590 | numpy.testing.assert_array_almost_equal( 591 | jcf.bins, 592 | numpy.array( 593 | [ 594 | 1.34000000e00, 595 | 5.90000000e00, 596 | 1.67000000e01, 597 | 5.06500000e01, 598 | 4.11145000e03, 599 | ] 600 | ), 601 | ) 602 | numpy.testing.assert_array_almost_equal( 603 | jcf.counts, numpy.array([12, 12, 13, 9, 12]) 604 | ) 605 | 606 | with pytest.raises( 607 | ValueError, match="Not enough unique values in array to form 5 classes." 608 | ): 609 | JenksCaspallForced(numpy.array([1, 1, 1, 1])) 610 | 611 | 612 | class TestUserDefined: 613 | def setup_method(self): 614 | self.V = load_example() 615 | 616 | def test_UserDefined(self): 617 | bins = [20, max(self.V)] 618 | ud = UserDefined(self.V, bins) 619 | numpy.testing.assert_array_almost_equal(ud.bins, numpy.array([20.0, 4111.45])) 620 | numpy.testing.assert_array_almost_equal(ud.counts, numpy.array([37, 21])) 621 | 622 | def test_UserDefined_max(self): 623 | bins = numpy.array([20, 30]) 624 | ud = UserDefined(self.V, bins) 625 | numpy.testing.assert_array_almost_equal( 626 | ud.bins, numpy.array([20.0, 30.0, 4111.45]) 627 | ) 628 | numpy.testing.assert_array_almost_equal(ud.counts, numpy.array([37, 4, 17])) 629 | 630 | def test_UserDefined_invariant(self): 631 | bins = [10, 20, 30, 40] 632 | ud = UserDefined(numpy.array([12, 12, 12]), bins) 633 | numpy.testing.assert_array_almost_equal(ud.bins, numpy.array([10, 20, 30, 40])) 634 | numpy.testing.assert_array_almost_equal(ud.counts, numpy.array([0, 3, 0, 0])) 635 | 636 | def test_UserDefined_lowest(self): 637 | bins = [20, max(self.V)] 638 | ud = UserDefined(self.V, bins, lowest=-1.0) 639 | numpy.testing.assert_array_almost_equal(ud.bins, numpy.array([20.0, 4111.45])) 640 | numpy.testing.assert_array_almost_equal(ud.counts, numpy.array([37, 21])) 641 | classes = ["[ -1.00, 20.00]", "( 20.00, 4111.45]"] 642 | assert ud.get_legend_classes() == classes 643 | 644 | 645 | class TestStdMeanAnchor: 646 | def setup_method(self): 647 | self.V = load_example() 648 | 649 | def test_StdMeanAnchor(self): 650 | sm = StdMean(self.V, anchor=True) 651 | bins = numpy.array( 652 | [ 653 | 125.92810345, 654 | 672.57333208, 655 | 1219.21856072, 656 | 1765.86378936, 657 | 2312.50901799, 658 | 2859.15424663, 659 | 3405.79947527, 660 | 3952.4447039, 661 | 4111.45, 662 | ] 663 | ) 664 | counts = numpy.array([50, 6, 1, 0, 0, 0, 0, 0, 1]) 665 | numpy.testing.assert_array_almost_equal(sm.bins, bins) 666 | numpy.testing.assert_array_almost_equal(sm.counts, counts) 667 | 668 | 669 | class TestMaxP: 670 | def setup_method(self): 671 | self.V = load_example() 672 | 673 | def test_MaxP(self): 674 | numpy.random.seed(100) 675 | mp = MaxP(self.V) 676 | numpy.testing.assert_array_almost_equal( 677 | mp.bins, 678 | numpy.array([3.16000e00, 1.26300e01, 1.67000e01, 2.04700e01, 4.11145e03]), 679 | ) 680 | numpy.testing.assert_array_almost_equal( 681 | mp.counts, numpy.array([18, 16, 3, 1, 20]) 682 | ) 683 | 684 | with pytest.raises( 685 | ValueError, match="Not enough unique values in array to form 5 classes." 686 | ): 687 | MaxP(numpy.array([1, 1, 1, 1])) 688 | 689 | 690 | class TestGadf: 691 | def setup_method(self): 692 | self.V = load_example() 693 | 694 | def test_gadf(self): 695 | qgadf = gadf(self.V) 696 | assert qgadf[0] == 15 697 | assert qgadf[-1] == 0.37402575909092828 698 | 699 | 700 | class TestKClassifiers: 701 | def setup_method(self): 702 | self.V = load_example() 703 | 704 | def test_K_classifiers(self): 705 | numpy.random.seed(100) 706 | ks = KClassifiers(self.V) 707 | assert ks.best.name == "FisherJenks" 708 | assert ks.best.gadf == 0.84810327199081048 709 | assert ks.best.k == 4 710 | 711 | 712 | class TestPooled: 713 | def setup_method(self): 714 | n = 20 715 | self.data = numpy.array([numpy.arange(n) + i * n for i in range(1, 4)]).T 716 | 717 | def test_pooled(self): 718 | res = Pooled(self.data, k=4) 719 | assert res.k == 4 720 | numpy.testing.assert_array_almost_equal( 721 | res.col_classifiers[0].counts, numpy.array([15, 5, 0, 0]) 722 | ) 723 | numpy.testing.assert_array_almost_equal( 724 | res.col_classifiers[-1].counts, numpy.array([0, 0, 5, 15]) 725 | ) 726 | numpy.testing.assert_array_almost_equal( 727 | res.global_classifier.counts, numpy.array([15, 15, 15, 15]) 728 | ) 729 | res = Pooled(self.data, classifier="BoxPlot", hinge=1.5) 730 | numpy.testing.assert_array_almost_equal( 731 | res.col_classifiers[0].bins, numpy.array([-9.5, 34.75, 49.5, 64.25, 108.5]) 732 | ) 733 | 734 | def test_pooled_bad_classifier(self): 735 | classifier = "Larry David" 736 | message = f"'{classifier}' not a valid classifier." 737 | with pytest.raises(ValueError, match=message): 738 | Pooled(self.data, classifier=classifier, k=4) 739 | 740 | 741 | class TestPlots: 742 | def setup_method(self): 743 | n = 20 744 | self.data = numpy.array([numpy.arange(n) + i * n for i in range(1, 4)]).T 745 | 746 | @pytest.mark.mpl_image_compare 747 | def test_histogram_plot(self): 748 | ax = Quantiles(self.data).plot_histogram() 749 | return ax.get_figure() 750 | 751 | @pytest.mark.mpl_image_compare 752 | def test_histogram_plot_despine(self): 753 | ax = Quantiles(self.data).plot_histogram(despine=False) 754 | return ax.get_figure() 755 | 756 | @pytest.mark.mpl_image_compare 757 | def test_histogram_plot_linewidth(self): 758 | ax = Quantiles(self.data).plot_histogram( 759 | linewidth=3, linecolor="red", color="yellow" 760 | ) 761 | return ax.get_figure() 762 | -------------------------------------------------------------------------------- /mapclassify/tests/test_rgba.py: -------------------------------------------------------------------------------- 1 | import geopandas 2 | import numpy as np 3 | from numpy.testing import assert_array_equal 4 | 5 | from mapclassify.util import get_color_array 6 | 7 | world = geopandas.read_file( 8 | "https://naciscdn.org/naturalearth/110m/cultural/ne_110m_admin_0_countries.zip" 9 | ) 10 | world["area"] = world.area 11 | 12 | # columns are equivalent except for nan in the first position 13 | world["nanarea"] = world.area 14 | world.loc[0, "nanarea"] = np.nan 15 | 16 | 17 | def test_rgba(): 18 | colors = get_color_array(world["area"], cmap="viridis")[-1] 19 | assert_array_equal(colors, np.array([94, 201, 97, 255])) 20 | 21 | 22 | def test_rgba_hex(): 23 | colors = get_color_array(world["area"], cmap="viridis", as_hex=True)[-1] 24 | assert_array_equal(colors, "#5ec961") 25 | 26 | 27 | def test_rgba_nan(): 28 | colors = get_color_array(world["nanarea"], cmap="viridis", nan_color=[0, 0, 0, 0]) 29 | # should be nan_color 30 | assert_array_equal(colors[0], np.array([0, 0, 0, 0])) 31 | # still a cmap color 32 | assert_array_equal(colors[-1], np.array([94, 201, 97, 255])) 33 | 34 | 35 | def test_rgba_nan_hex(): 36 | colors = get_color_array( 37 | world["nanarea"], cmap="viridis", nan_color=[0, 0, 0, 0], as_hex=True 38 | ) 39 | assert_array_equal(colors[0], np.array(["#000000"])) 40 | assert_array_equal(colors[-1], np.array(["#5ec961"])) 41 | -------------------------------------------------------------------------------- /mapclassify/util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from ._classify_API import classify as _classify 4 | 5 | 6 | def get_color_array( 7 | values, 8 | scheme="quantiles", 9 | cmap="viridis", 10 | alpha=1, 11 | nan_color=[255, 255, 255, 255], 12 | as_hex=False, 13 | **kwargs, 14 | ): 15 | """Convert array of values into RGBA or hex colors using a colormap and classifier. 16 | This function is useful for visualization libraries that require users to provide 17 | an array of colors for each object (like pydeck or lonboard) but can also be used 18 | to create a manual column of colors passed to matplotlib. 19 | 20 | Parameters 21 | ---------- 22 | values : list-like 23 | array of input values 24 | scheme : str, optional 25 | string description of a mapclassify classifier, by default `"quantiles"` 26 | cmap : str, optional 27 | name of matplotlib colormap to use, by default "viridis" 28 | alpha : float 29 | alpha parameter that defines transparency. Should be in the range [0,1] 30 | nan_color : list, optional 31 | RGBA color to fill NaN values, by default [255, 255, 255, 255] 32 | as_hex: bool, optional 33 | if True, return a (n,1)-dimensional array of hexcolors instead of a (n,4) 34 | dimensional array of RGBA values. 35 | kwargs : dict 36 | additional keyword arguments are passed to `mapclassify.classify` 37 | 38 | Returns 39 | ------- 40 | numpy.array 41 | numpy array (aligned with the input array) defining a color for each row. If 42 | `as_hex` is False, the array is :math:`(n,4)` holding an array of RGBA values in 43 | each row. If `as_hex` is True, the array is :math:`(n,1)` holding a hexcolor in 44 | each row. 45 | 46 | """ 47 | try: 48 | import pandas as pd 49 | from matplotlib import colormaps 50 | from matplotlib.colors import Normalize, to_hex 51 | except ImportError as e: 52 | raise ImportError("This function requires pandas and matplotlib") from e 53 | if not (alpha <= 1) and (alpha >= 0): 54 | raise ValueError("alpha must be in the range [0,1]") 55 | if not pd.api.types.is_list_like(nan_color) and not len(nan_color) == 4: 56 | raise ValueError("`nan_color` must be list-like of 4 values: (R,G,B,A)") 57 | 58 | # only operate on non-NaN values 59 | v = pd.Series(values, dtype=object) 60 | legit_indices = v[~v.isna()].index.values 61 | legit_vals = v.dropna().values 62 | bogus_indices = v[v.isna()].index.values # stash these for use later 63 | # transform (non-NaN) values into class bins 64 | bins = _classify(legit_vals, scheme=scheme, **kwargs).yb 65 | 66 | # normalize using the data's range (not strictly 1-k if classifier is degenerate) 67 | norm = Normalize(min(bins), max(bins)) 68 | normalized_vals = norm(bins) 69 | 70 | # generate RBGA array and convert to series 71 | rgbas = colormaps[cmap](normalized_vals, bytes=True, alpha=alpha) 72 | colors = pd.Series(list(rgbas), index=legit_indices).apply(np.array) 73 | nan_colors = pd.Series( 74 | [nan_color for i in range(len(bogus_indices))], index=bogus_indices 75 | ).apply(lambda x: np.array(x).astype(np.uint8)) 76 | 77 | # put colors in their correct places and fill empty with specified color 78 | v.update(colors) 79 | v.update(nan_colors) 80 | 81 | # convert to hexcolors if preferred 82 | if as_hex: 83 | colors = v.apply(lambda x: to_hex(x / 255.0)) 84 | return colors.values 85 | return np.stack(v.values) 86 | -------------------------------------------------------------------------------- /notebooks/01_maximum_breaks.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Introduction to mapclassify\n", 8 | "\n", 9 | "`mapclassify` implements a family of classification schemes for choropleth maps. \n", 10 | "Its focus is on the determination of the number of classes, and the assignment of observations to those classes.\n", 11 | "It is intended for use with upstream mapping and geovisualization packages (see [geopandas](https://geopandas.org/mapping.html) and [geoplot](https://residentmario.github.io/geoplot/user_guide/Customizing_Plots.html) for examples) that handle the rendering of the maps.\n", 12 | "\n", 13 | "In this notebook, the basic functionality of mapclassify is presented." 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 1, 19 | "metadata": { 20 | "ExecuteTime": { 21 | "end_time": "2022-11-04T16:51:55.127728Z", 22 | "start_time": "2022-11-04T16:51:54.017906Z" 23 | } 24 | }, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "text/plain": [ 29 | "'2.4.2+78.gc62d2d7.dirty'" 30 | ] 31 | }, 32 | "execution_count": 1, 33 | "metadata": {}, 34 | "output_type": "execute_result" 35 | } 36 | ], 37 | "source": [ 38 | "import mapclassify as mc\n", 39 | "\n", 40 | "mc.__version__" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "## Example data\n", 48 | "`mapclassify` contains a built-in dataset for employment density for the 58 California counties." 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 2, 54 | "metadata": { 55 | "ExecuteTime": { 56 | "end_time": "2022-11-04T16:51:55.397263Z", 57 | "start_time": "2022-11-04T16:51:55.130764Z" 58 | } 59 | }, 60 | "outputs": [], 61 | "source": [ 62 | "y = mc.load_example()" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "## Basic Functionality\n", 70 | "All classifiers in `mapclassify` have a common interface and afford similar functionality. We illustrate these using the `MaximumBreaks` classifier.\n", 71 | "\n", 72 | "`MaximumBreaks` requires that the user specify the number of classes `k`. Given this, the logic of the classifier is to sort the observations in ascending order and find the difference between rank adjacent values. The class boundaries are defined as the $k-1$ largest rank-adjacent breaks in the sorted values." 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 3, 78 | "metadata": { 79 | "ExecuteTime": { 80 | "end_time": "2022-11-04T16:51:55.407290Z", 81 | "start_time": "2022-11-04T16:51:55.401874Z" 82 | } 83 | }, 84 | "outputs": [ 85 | { 86 | "data": { 87 | "text/plain": [ 88 | "MaximumBreaks\n", 89 | "\n", 90 | " Interval Count\n", 91 | "--------------------------\n", 92 | "[ 0.13, 228.49] | 52\n", 93 | "( 228.49, 546.67] | 4\n", 94 | "( 546.67, 2417.15] | 1\n", 95 | "(2417.15, 4111.45] | 1" 96 | ] 97 | }, 98 | "execution_count": 3, 99 | "metadata": {}, 100 | "output_type": "execute_result" 101 | } 102 | ], 103 | "source": [ 104 | "mc.MaximumBreaks(y, k=4)" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "The classifier returns an instance of `MaximumBreaks` that reports the resulting intervals and counts. The first class has closed lower and upper bounds:\n", 112 | "\n", 113 | "```\n", 114 | "[ 0.13, 228.49]\n", 115 | "```\n", 116 | "\n", 117 | "with `0.13` being the minimum value in the dataset:" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 4, 123 | "metadata": { 124 | "ExecuteTime": { 125 | "end_time": "2022-11-04T16:51:55.413265Z", 126 | "start_time": "2022-11-04T16:51:55.408990Z" 127 | } 128 | }, 129 | "outputs": [ 130 | { 131 | "data": { 132 | "text/plain": [ 133 | "0.13" 134 | ] 135 | }, 136 | "execution_count": 4, 137 | "metadata": {}, 138 | "output_type": "execute_result" 139 | } 140 | ], 141 | "source": [ 142 | "y.min()" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "Subsequent intervals are open on the lower bound and closed on the upper bound. The fourth class has the maximum value as its closed upper bound:" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 5, 155 | "metadata": { 156 | "ExecuteTime": { 157 | "end_time": "2022-11-04T16:51:55.419714Z", 158 | "start_time": "2022-11-04T16:51:55.415775Z" 159 | } 160 | }, 161 | "outputs": [ 162 | { 163 | "data": { 164 | "text/plain": [ 165 | "4111.45" 166 | ] 167 | }, 168 | "execution_count": 5, 169 | "metadata": {}, 170 | "output_type": "execute_result" 171 | } 172 | ], 173 | "source": [ 174 | "y.max()" 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "metadata": {}, 180 | "source": [ 181 | "Assigning the classifier to an object let's us inspect other aspects of the classifier:" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 6, 187 | "metadata": { 188 | "ExecuteTime": { 189 | "end_time": "2022-11-04T16:51:55.426490Z", 190 | "start_time": "2022-11-04T16:51:55.421539Z" 191 | } 192 | }, 193 | "outputs": [ 194 | { 195 | "data": { 196 | "text/plain": [ 197 | "MaximumBreaks\n", 198 | "\n", 199 | " Interval Count\n", 200 | "--------------------------\n", 201 | "[ 0.13, 228.49] | 52\n", 202 | "( 228.49, 546.67] | 4\n", 203 | "( 546.67, 2417.15] | 1\n", 204 | "(2417.15, 4111.45] | 1" 205 | ] 206 | }, 207 | "execution_count": 6, 208 | "metadata": {}, 209 | "output_type": "execute_result" 210 | } 211 | ], 212 | "source": [ 213 | "mb4 = mc.MaximumBreaks(y, k=4)\n", 214 | "mb4" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": {}, 220 | "source": [ 221 | "The `bins` attribute has the upper bounds of the intervals:" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 7, 227 | "metadata": { 228 | "ExecuteTime": { 229 | "end_time": "2022-11-04T16:51:55.433994Z", 230 | "start_time": "2022-11-04T16:51:55.429143Z" 231 | } 232 | }, 233 | "outputs": [ 234 | { 235 | "data": { 236 | "text/plain": [ 237 | "array([ 228.49 , 546.675, 2417.15 , 4111.45 ])" 238 | ] 239 | }, 240 | "execution_count": 7, 241 | "metadata": {}, 242 | "output_type": "execute_result" 243 | } 244 | ], 245 | "source": [ 246 | "mb4.bins" 247 | ] 248 | }, 249 | { 250 | "cell_type": "markdown", 251 | "metadata": {}, 252 | "source": [ 253 | "and `counts` reports the number of values falling in each bin:" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 8, 259 | "metadata": { 260 | "ExecuteTime": { 261 | "end_time": "2022-11-04T16:51:55.441325Z", 262 | "start_time": "2022-11-04T16:51:55.437014Z" 263 | } 264 | }, 265 | "outputs": [ 266 | { 267 | "data": { 268 | "text/plain": [ 269 | "array([52, 4, 1, 1])" 270 | ] 271 | }, 272 | "execution_count": 8, 273 | "metadata": {}, 274 | "output_type": "execute_result" 275 | } 276 | ], 277 | "source": [ 278 | "mb4.counts" 279 | ] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "metadata": {}, 284 | "source": [ 285 | "The specific bin (i.e. label) for each observation can be found in the `yb` attribute:" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": 9, 291 | "metadata": { 292 | "ExecuteTime": { 293 | "end_time": "2022-11-04T16:51:55.447878Z", 294 | "start_time": "2022-11-04T16:51:55.443824Z" 295 | } 296 | }, 297 | "outputs": [ 298 | { 299 | "data": { 300 | "text/plain": [ 301 | "array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,\n", 302 | " 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 1, 0, 1, 0,\n", 303 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])" 304 | ] 305 | }, 306 | "execution_count": 9, 307 | "metadata": {}, 308 | "output_type": "execute_result" 309 | } 310 | ], 311 | "source": [ 312 | "mb4.yb" 313 | ] 314 | }, 315 | { 316 | "cell_type": "markdown", 317 | "metadata": {}, 318 | "source": [ 319 | "## Changing the number of classes\n", 320 | "\n", 321 | "Staying with the the same classifier, the user can apply the same classification rule, but for a different number of classes:" 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": 10, 327 | "metadata": { 328 | "ExecuteTime": { 329 | "end_time": "2022-11-04T16:51:55.454514Z", 330 | "start_time": "2022-11-04T16:51:55.449706Z" 331 | } 332 | }, 333 | "outputs": [ 334 | { 335 | "data": { 336 | "text/plain": [ 337 | "MaximumBreaks\n", 338 | "\n", 339 | " Interval Count\n", 340 | "--------------------------\n", 341 | "[ 0.13, 146.00] | 50\n", 342 | "( 146.00, 228.49] | 2\n", 343 | "( 228.49, 291.02] | 1\n", 344 | "( 291.02, 350.21] | 2\n", 345 | "( 350.21, 546.67] | 1\n", 346 | "( 546.67, 2417.15] | 1\n", 347 | "(2417.15, 4111.45] | 1" 348 | ] 349 | }, 350 | "execution_count": 10, 351 | "metadata": {}, 352 | "output_type": "execute_result" 353 | } 354 | ], 355 | "source": [ 356 | "mb7 = mc.MaximumBreaks(y, k=7)\n", 357 | "mb7" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": 11, 363 | "metadata": { 364 | "ExecuteTime": { 365 | "end_time": "2022-11-04T16:51:55.461787Z", 366 | "start_time": "2022-11-04T16:51:55.456906Z" 367 | } 368 | }, 369 | "outputs": [ 370 | { 371 | "data": { 372 | "text/plain": [ 373 | "array([ 146.005, 228.49 , 291.02 , 350.21 , 546.675, 2417.15 ,\n", 374 | " 4111.45 ])" 375 | ] 376 | }, 377 | "execution_count": 11, 378 | "metadata": {}, 379 | "output_type": "execute_result" 380 | } 381 | ], 382 | "source": [ 383 | "mb7.bins" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": 12, 389 | "metadata": { 390 | "ExecuteTime": { 391 | "end_time": "2022-11-04T16:51:55.471152Z", 392 | "start_time": "2022-11-04T16:51:55.466248Z" 393 | } 394 | }, 395 | "outputs": [ 396 | { 397 | "data": { 398 | "text/plain": [ 399 | "array([50, 2, 1, 2, 1, 1, 1])" 400 | ] 401 | }, 402 | "execution_count": 12, 403 | "metadata": {}, 404 | "output_type": "execute_result" 405 | } 406 | ], 407 | "source": [ 408 | "mb7.counts" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": 13, 414 | "metadata": { 415 | "ExecuteTime": { 416 | "end_time": "2022-11-04T16:51:55.477524Z", 417 | "start_time": "2022-11-04T16:51:55.473430Z" 418 | } 419 | }, 420 | "outputs": [ 421 | { 422 | "data": { 423 | "text/plain": [ 424 | "array([3, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0,\n", 425 | " 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 1, 0, 0, 0, 6, 0, 0, 3, 0, 2, 0,\n", 426 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])" 427 | ] 428 | }, 429 | "execution_count": 13, 430 | "metadata": {}, 431 | "output_type": "execute_result" 432 | } 433 | ], 434 | "source": [ 435 | "mb7.yb" 436 | ] 437 | }, 438 | { 439 | "cell_type": "markdown", 440 | "metadata": {}, 441 | "source": [ 442 | "One additional attribute to mention here is the `adcm` attribute:" 443 | ] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "execution_count": 14, 448 | "metadata": { 449 | "ExecuteTime": { 450 | "end_time": "2022-11-04T16:51:55.483597Z", 451 | "start_time": "2022-11-04T16:51:55.479867Z" 452 | } 453 | }, 454 | "outputs": [ 455 | { 456 | "data": { 457 | "text/plain": [ 458 | "727.3200000000002" 459 | ] 460 | }, 461 | "execution_count": 14, 462 | "metadata": {}, 463 | "output_type": "execute_result" 464 | } 465 | ], 466 | "source": [ 467 | "mb7.adcm" 468 | ] 469 | }, 470 | { 471 | "cell_type": "markdown", 472 | "metadata": {}, 473 | "source": [ 474 | "`adcm` is a measure of fit, defined as the mean absolute deviation around the class median. " 475 | ] 476 | }, 477 | { 478 | "cell_type": "code", 479 | "execution_count": 15, 480 | "metadata": { 481 | "ExecuteTime": { 482 | "end_time": "2022-11-04T16:51:55.489640Z", 483 | "start_time": "2022-11-04T16:51:55.485845Z" 484 | } 485 | }, 486 | "outputs": [ 487 | { 488 | "data": { 489 | "text/plain": [ 490 | "1181.4900000000002" 491 | ] 492 | }, 493 | "execution_count": 15, 494 | "metadata": {}, 495 | "output_type": "execute_result" 496 | } 497 | ], 498 | "source": [ 499 | "mb4.adcm" 500 | ] 501 | }, 502 | { 503 | "cell_type": "markdown", 504 | "metadata": {}, 505 | "source": [ 506 | "The `adcm` can be expected to decrease as $k$ increases for a given classifier. Thus, if using as a measure of fit, the `adcm` should only be used to compare classifiers defined on the same number of classes." 507 | ] 508 | }, 509 | { 510 | "cell_type": "markdown", 511 | "metadata": {}, 512 | "source": [ 513 | "## Next Steps\n", 514 | "`MaximumBreaks` is but one of many classifiers in `mapclassify`:" 515 | ] 516 | }, 517 | { 518 | "cell_type": "code", 519 | "execution_count": 16, 520 | "metadata": { 521 | "ExecuteTime": { 522 | "end_time": "2022-11-04T16:51:55.496548Z", 523 | "start_time": "2022-11-04T16:51:55.492318Z" 524 | } 525 | }, 526 | "outputs": [ 527 | { 528 | "data": { 529 | "text/plain": [ 530 | "('BoxPlot',\n", 531 | " 'EqualInterval',\n", 532 | " 'FisherJenks',\n", 533 | " 'FisherJenksSampled',\n", 534 | " 'HeadTailBreaks',\n", 535 | " 'JenksCaspall',\n", 536 | " 'JenksCaspallForced',\n", 537 | " 'JenksCaspallSampled',\n", 538 | " 'MaxP',\n", 539 | " 'MaximumBreaks',\n", 540 | " 'NaturalBreaks',\n", 541 | " 'Quantiles',\n", 542 | " 'Percentiles',\n", 543 | " 'StdMean',\n", 544 | " 'UserDefined')" 545 | ] 546 | }, 547 | "execution_count": 16, 548 | "metadata": {}, 549 | "output_type": "execute_result" 550 | } 551 | ], 552 | "source": [ 553 | "mc.classifiers.CLASSIFIERS" 554 | ] 555 | }, 556 | { 557 | "cell_type": "markdown", 558 | "metadata": {}, 559 | "source": [ 560 | "To learn more about an individual classifier, introspection is available:" 561 | ] 562 | }, 563 | { 564 | "cell_type": "code", 565 | "execution_count": 17, 566 | "metadata": { 567 | "ExecuteTime": { 568 | "end_time": "2022-11-04T16:51:55.537870Z", 569 | "start_time": "2022-11-04T16:51:55.499084Z" 570 | } 571 | }, 572 | "outputs": [], 573 | "source": [ 574 | "mc.MaximumBreaks?" 575 | ] 576 | }, 577 | { 578 | "cell_type": "markdown", 579 | "metadata": {}, 580 | "source": [ 581 | "-------------------------\n", 582 | "\n", 583 | "For more comprehensive appliciations of `mapclassify` the interested reader is directed to the chapter on [choropleth mapping](https://geographicdata.science/book/notebooks/05_choropleth.html) in [Rey, Arribas-Bel, and Wolf (2020) \"Geographic Data Science with PySAL and the PyData Stack”](https://geographicdata.science/book).\n", 584 | "\n", 585 | "-------------------------" 586 | ] 587 | } 588 | ], 589 | "metadata": { 590 | "anaconda-cloud": {}, 591 | "kernelspec": { 592 | "display_name": "Python [conda env:py310_mapclassify]", 593 | "language": "python", 594 | "name": "conda-env-py310_mapclassify-py" 595 | }, 596 | "language_info": { 597 | "codemirror_mode": { 598 | "name": "ipython", 599 | "version": 3 600 | }, 601 | "file_extension": ".py", 602 | "mimetype": "text/x-python", 603 | "name": "python", 604 | "nbconvert_exporter": "python", 605 | "pygments_lexer": "ipython3", 606 | "version": "3.10.6" 607 | } 608 | }, 609 | "nbformat": 4, 610 | "nbformat_minor": 4 611 | } 612 | -------------------------------------------------------------------------------- /notebooks/02_legends.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Legends in mapclassify\n", 8 | "\n", 9 | "`mapclassify` allows for user defined formatting of legends" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": { 16 | "ExecuteTime": { 17 | "end_time": "2022-11-04T18:03:55.559087Z", 18 | "start_time": "2022-11-04T18:03:53.594867Z" 19 | } 20 | }, 21 | "outputs": [ 22 | { 23 | "data": { 24 | "text/plain": [ 25 | "'2.4.2+78.gc62d2d7.dirty'" 26 | ] 27 | }, 28 | "execution_count": 1, 29 | "metadata": {}, 30 | "output_type": "execute_result" 31 | } 32 | ], 33 | "source": [ 34 | "import mapclassify\n", 35 | "\n", 36 | "mapclassify.__version__" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 2, 42 | "metadata": { 43 | "ExecuteTime": { 44 | "end_time": "2022-11-04T18:03:56.030661Z", 45 | "start_time": "2022-11-04T18:03:55.564369Z" 46 | } 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "cal = mapclassify.load_example()" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 3, 56 | "metadata": { 57 | "ExecuteTime": { 58 | "end_time": "2022-11-04T18:03:56.041172Z", 59 | "start_time": "2022-11-04T18:03:56.034966Z" 60 | } 61 | }, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "text/plain": [ 66 | "Quantiles\n", 67 | "\n", 68 | " Interval Count\n", 69 | "--------------------------\n", 70 | "[ 0.13, 1.16] | 10\n", 71 | "( 1.16, 3.38] | 10\n", 72 | "( 3.38, 9.36] | 9\n", 73 | "( 9.36, 24.32] | 10\n", 74 | "( 24.32, 70.78] | 9\n", 75 | "( 70.78, 4111.45] | 10" 76 | ] 77 | }, 78 | "execution_count": 3, 79 | "metadata": {}, 80 | "output_type": "execute_result" 81 | } 82 | ], 83 | "source": [ 84 | "q6 = mapclassify.Quantiles(cal, k=6)\n", 85 | "q6" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "The default is to use two decimal places for this dataset.\n", 93 | "\n", 94 | "If the user desires a list of strings with these values, the `get_legend_classes` method can be called\n", 95 | "which will return the strings with the default format:" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 4, 101 | "metadata": { 102 | "ExecuteTime": { 103 | "end_time": "2022-11-04T18:03:56.047765Z", 104 | "start_time": "2022-11-04T18:03:56.042764Z" 105 | } 106 | }, 107 | "outputs": [ 108 | { 109 | "data": { 110 | "text/plain": [ 111 | "['[ 0.13, 1.16]',\n", 112 | " '( 1.16, 3.38]',\n", 113 | " '( 3.38, 9.36]',\n", 114 | " '( 9.36, 24.32]',\n", 115 | " '( 24.32, 70.78]',\n", 116 | " '( 70.78, 4111.45]']" 117 | ] 118 | }, 119 | "execution_count": 4, 120 | "metadata": {}, 121 | "output_type": "execute_result" 122 | } 123 | ], 124 | "source": [ 125 | "q6.get_legend_classes()" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "To set the legends to integers, an option can be passed into the method:" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 5, 138 | "metadata": { 139 | "ExecuteTime": { 140 | "end_time": "2022-11-04T18:03:56.055615Z", 141 | "start_time": "2022-11-04T18:03:56.050635Z" 142 | } 143 | }, 144 | "outputs": [ 145 | { 146 | "data": { 147 | "text/plain": [ 148 | "['[ 0, 1]',\n", 149 | " '( 1, 3]',\n", 150 | " '( 3, 9]',\n", 151 | " '( 9, 24]',\n", 152 | " '( 24, 71]',\n", 153 | " '( 71, 4111]']" 154 | ] 155 | }, 156 | "execution_count": 5, 157 | "metadata": {}, 158 | "output_type": "execute_result" 159 | } 160 | ], 161 | "source": [ 162 | "q6.get_legend_classes(fmt=\"{:.0f}\")" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "Note that this does not change the original object:" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 6, 175 | "metadata": { 176 | "ExecuteTime": { 177 | "end_time": "2022-11-04T18:03:56.064112Z", 178 | "start_time": "2022-11-04T18:03:56.058884Z" 179 | } 180 | }, 181 | "outputs": [ 182 | { 183 | "data": { 184 | "text/plain": [ 185 | "Quantiles\n", 186 | "\n", 187 | " Interval Count\n", 188 | "--------------------------\n", 189 | "[ 0.13, 1.16] | 10\n", 190 | "( 1.16, 3.38] | 10\n", 191 | "( 3.38, 9.36] | 9\n", 192 | "( 9.36, 24.32] | 10\n", 193 | "( 24.32, 70.78] | 9\n", 194 | "( 70.78, 4111.45] | 10" 195 | ] 196 | }, 197 | "execution_count": 6, 198 | "metadata": {}, 199 | "output_type": "execute_result" 200 | } 201 | ], 202 | "source": [ 203 | "q6" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "The format can be changed on the object by calling the `set_fmt` method:" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 7, 216 | "metadata": { 217 | "ExecuteTime": { 218 | "end_time": "2022-11-04T18:03:56.073242Z", 219 | "start_time": "2022-11-04T18:03:56.067329Z" 220 | } 221 | }, 222 | "outputs": [ 223 | { 224 | "data": { 225 | "text/plain": [ 226 | "Quantiles\n", 227 | "\n", 228 | " Interval Count\n", 229 | "--------------------\n", 230 | "[ 0, 1] | 10\n", 231 | "( 1, 3] | 10\n", 232 | "( 3, 9] | 9\n", 233 | "( 9, 24] | 10\n", 234 | "( 24, 71] | 9\n", 235 | "( 71, 4111] | 10" 236 | ] 237 | }, 238 | "execution_count": 7, 239 | "metadata": {}, 240 | "output_type": "execute_result" 241 | } 242 | ], 243 | "source": [ 244 | "q6.set_fmt(fmt=\"{:.0f}\")\n", 245 | "q6" 246 | ] 247 | } 248 | ], 249 | "metadata": { 250 | "kernelspec": { 251 | "display_name": "Python [conda env:py310_mapclassify]", 252 | "language": "python", 253 | "name": "conda-env-py310_mapclassify-py" 254 | }, 255 | "language_info": { 256 | "codemirror_mode": { 257 | "name": "ipython", 258 | "version": 3 259 | }, 260 | "file_extension": ".py", 261 | "mimetype": "text/x-python", 262 | "name": "python", 263 | "nbconvert_exporter": "python", 264 | "pygments_lexer": "ipython3", 265 | "version": "3.10.6" 266 | } 267 | }, 268 | "nbformat": 4, 269 | "nbformat_minor": 4 270 | } 271 | -------------------------------------------------------------------------------- /notebooks/06_api.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Overview of the mapclassify API\n", 8 | "\n", 9 | "There are a number of ways to access the functionality in `mapclassify`" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "We first load the example dataset that we have seen earlier." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": { 23 | "ExecuteTime": { 24 | "end_time": "2022-11-05T15:10:19.167785Z", 25 | "start_time": "2022-11-05T15:10:14.404320Z" 26 | }, 27 | "tags": [] 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "import geopandas\n", 32 | "import libpysal\n", 33 | "import mapclassify" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "Current `mapclassify` version." 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 2, 46 | "metadata": { 47 | "ExecuteTime": { 48 | "end_time": "2022-11-05T15:10:19.182165Z", 49 | "start_time": "2022-11-05T15:10:19.171353Z" 50 | }, 51 | "tags": [] 52 | }, 53 | "outputs": [ 54 | { 55 | "data": { 56 | "text/plain": [ 57 | "'2.4.2+107.gb97c316a.dirty'" 58 | ] 59 | }, 60 | "execution_count": 2, 61 | "metadata": {}, 62 | "output_type": "execute_result" 63 | } 64 | ], 65 | "source": [ 66 | "mapclassify.__version__" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 3, 72 | "metadata": { 73 | "ExecuteTime": { 74 | "end_time": "2022-11-05T15:10:19.586837Z", 75 | "start_time": "2022-11-05T15:10:19.187232Z" 76 | }, 77 | "tags": [] 78 | }, 79 | "outputs": [ 80 | { 81 | "data": { 82 | "text/html": [ 83 | "
\n", 84 | "\n", 97 | "\n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | "
AREAPERIMETERCOLUMBUS_COLUMBUS_IPOLYIDNEIGHOVALINCCRIMEOPEN...DISCBDXYNSANSBEWCPTHOUSNEIGNOgeometry
00.3094412.440629251580.46700319.53115.7259802.850747...5.0338.79999944.0700001.01.01.00.01000.01005.0POLYGON ((8.62413 14.23698, 8.55970 14.74245, ...
10.2593292.236939312144.56700121.23218.8017545.296720...4.2735.61999942.3800011.01.00.00.01000.01001.0POLYGON ((8.25279 14.23694, 8.28276 14.22994, ...
20.1924682.187547463626.35000015.95630.6267814.534649...3.8939.82000041.1800001.01.01.00.01000.01006.0POLYGON ((8.65331 14.00809, 8.81814 14.00205, ...
30.0838411.427635524233.2000014.47732.3877600.394427...3.7036.50000040.5200001.01.00.00.01000.01002.0POLYGON ((8.45950 13.82035, 8.47341 13.83227, ...
40.4888882.997133675723.22500011.25250.7315100.405664...2.8340.00999838.0000001.01.01.00.01000.01007.0POLYGON ((8.68527 13.63952, 8.67758 13.72221, ...
\n", 247 | "

5 rows × 21 columns

\n", 248 | "
" 249 | ], 250 | "text/plain": [ 251 | " AREA PERIMETER COLUMBUS_ COLUMBUS_I POLYID NEIG HOVAL \\\n", 252 | "0 0.309441 2.440629 2 5 1 5 80.467003 \n", 253 | "1 0.259329 2.236939 3 1 2 1 44.567001 \n", 254 | "2 0.192468 2.187547 4 6 3 6 26.350000 \n", 255 | "3 0.083841 1.427635 5 2 4 2 33.200001 \n", 256 | "4 0.488888 2.997133 6 7 5 7 23.225000 \n", 257 | "\n", 258 | " INC CRIME OPEN ... DISCBD X Y NSA NSB \\\n", 259 | "0 19.531 15.725980 2.850747 ... 5.03 38.799999 44.070000 1.0 1.0 \n", 260 | "1 21.232 18.801754 5.296720 ... 4.27 35.619999 42.380001 1.0 1.0 \n", 261 | "2 15.956 30.626781 4.534649 ... 3.89 39.820000 41.180000 1.0 1.0 \n", 262 | "3 4.477 32.387760 0.394427 ... 3.70 36.500000 40.520000 1.0 1.0 \n", 263 | "4 11.252 50.731510 0.405664 ... 2.83 40.009998 38.000000 1.0 1.0 \n", 264 | "\n", 265 | " EW CP THOUS NEIGNO geometry \n", 266 | "0 1.0 0.0 1000.0 1005.0 POLYGON ((8.62413 14.23698, 8.55970 14.74245, ... \n", 267 | "1 0.0 0.0 1000.0 1001.0 POLYGON ((8.25279 14.23694, 8.28276 14.22994, ... \n", 268 | "2 1.0 0.0 1000.0 1006.0 POLYGON ((8.65331 14.00809, 8.81814 14.00205, ... \n", 269 | "3 0.0 0.0 1000.0 1002.0 POLYGON ((8.45950 13.82035, 8.47341 13.83227, ... \n", 270 | "4 1.0 0.0 1000.0 1007.0 POLYGON ((8.68527 13.63952, 8.67758 13.72221, ... \n", 271 | "\n", 272 | "[5 rows x 21 columns]" 273 | ] 274 | }, 275 | "execution_count": 3, 276 | "metadata": {}, 277 | "output_type": "execute_result" 278 | } 279 | ], 280 | "source": [ 281 | "pth = libpysal.examples.get_path(\"columbus.shp\")\n", 282 | "gdf = geopandas.read_file(pth)\n", 283 | "y = gdf.HOVAL\n", 284 | "gdf.head()" 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": {}, 290 | "source": [ 291 | "## Original API (< 2.4.0)\n" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": 4, 297 | "metadata": { 298 | "ExecuteTime": { 299 | "end_time": "2022-11-05T15:10:19.595711Z", 300 | "start_time": "2022-11-05T15:10:19.589037Z" 301 | }, 302 | "tags": [] 303 | }, 304 | "outputs": [ 305 | { 306 | "data": { 307 | "text/plain": [ 308 | "BoxPlot\n", 309 | "\n", 310 | " Interval Count\n", 311 | "----------------------\n", 312 | "( -inf, -0.70] | 0\n", 313 | "(-0.70, 25.70] | 13\n", 314 | "(25.70, 33.50] | 12\n", 315 | "(33.50, 43.30] | 12\n", 316 | "(43.30, 69.70] | 7\n", 317 | "(69.70, 96.40] | 5" 318 | ] 319 | }, 320 | "execution_count": 4, 321 | "metadata": {}, 322 | "output_type": "execute_result" 323 | } 324 | ], 325 | "source": [ 326 | "bp = mapclassify.BoxPlot(y)\n", 327 | "bp" 328 | ] 329 | }, 330 | { 331 | "cell_type": "markdown", 332 | "metadata": {}, 333 | "source": [ 334 | "## Extended API (>= 2.40)\n", 335 | "\n", 336 | "Note the original API is still available so this extension keeps backwards compatibility." 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": 5, 342 | "metadata": { 343 | "ExecuteTime": { 344 | "end_time": "2022-11-05T15:10:19.603460Z", 345 | "start_time": "2022-11-05T15:10:19.598526Z" 346 | }, 347 | "tags": [] 348 | }, 349 | "outputs": [ 350 | { 351 | "data": { 352 | "text/plain": [ 353 | "BoxPlot\n", 354 | "\n", 355 | " Interval Count\n", 356 | "----------------------\n", 357 | "( -inf, -0.70] | 0\n", 358 | "(-0.70, 25.70] | 13\n", 359 | "(25.70, 33.50] | 12\n", 360 | "(33.50, 43.30] | 12\n", 361 | "(43.30, 69.70] | 7\n", 362 | "(69.70, 96.40] | 5" 363 | ] 364 | }, 365 | "execution_count": 5, 366 | "metadata": {}, 367 | "output_type": "execute_result" 368 | } 369 | ], 370 | "source": [ 371 | "bp = mapclassify.classify(y, \"box_plot\")\n", 372 | "bp" 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": 6, 378 | "metadata": { 379 | "ExecuteTime": { 380 | "end_time": "2022-11-05T15:10:19.611996Z", 381 | "start_time": "2022-11-05T15:10:19.608075Z" 382 | }, 383 | "tags": [] 384 | }, 385 | "outputs": [ 386 | { 387 | "data": { 388 | "text/plain": [ 389 | "mapclassify.classifiers.BoxPlot" 390 | ] 391 | }, 392 | "execution_count": 6, 393 | "metadata": {}, 394 | "output_type": "execute_result" 395 | } 396 | ], 397 | "source": [ 398 | "type(bp)" 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": 7, 404 | "metadata": { 405 | "ExecuteTime": { 406 | "end_time": "2022-11-05T15:10:19.619168Z", 407 | "start_time": "2022-11-05T15:10:19.614412Z" 408 | }, 409 | "tags": [] 410 | }, 411 | "outputs": [ 412 | { 413 | "data": { 414 | "text/plain": [ 415 | "Quantiles\n", 416 | "\n", 417 | " Interval Count\n", 418 | "----------------------\n", 419 | "[17.90, 23.08] | 10\n", 420 | "(23.08, 30.48] | 10\n", 421 | "(30.48, 39.10] | 9\n", 422 | "(39.10, 45.83] | 10\n", 423 | "(45.83, 96.40] | 10" 424 | ] 425 | }, 426 | "execution_count": 7, 427 | "metadata": {}, 428 | "output_type": "execute_result" 429 | } 430 | ], 431 | "source": [ 432 | "q5 = mapclassify.classify(y, \"quantiles\", k=5)\n", 433 | "q5" 434 | ] 435 | }, 436 | { 437 | "cell_type": "markdown", 438 | "metadata": {}, 439 | "source": [ 440 | "### Robustness of the `scheme` argument" 441 | ] 442 | }, 443 | { 444 | "cell_type": "code", 445 | "execution_count": 8, 446 | "metadata": { 447 | "ExecuteTime": { 448 | "end_time": "2022-11-05T15:10:19.627988Z", 449 | "start_time": "2022-11-05T15:10:19.621853Z" 450 | }, 451 | "tags": [] 452 | }, 453 | "outputs": [ 454 | { 455 | "data": { 456 | "text/plain": [ 457 | "BoxPlot\n", 458 | "\n", 459 | " Interval Count\n", 460 | "----------------------\n", 461 | "( -inf, -0.70] | 0\n", 462 | "(-0.70, 25.70] | 13\n", 463 | "(25.70, 33.50] | 12\n", 464 | "(33.50, 43.30] | 12\n", 465 | "(43.30, 69.70] | 7\n", 466 | "(69.70, 96.40] | 5" 467 | ] 468 | }, 469 | "execution_count": 8, 470 | "metadata": {}, 471 | "output_type": "execute_result" 472 | } 473 | ], 474 | "source": [ 475 | "mapclassify.classify(y, \"boxPlot\")" 476 | ] 477 | }, 478 | { 479 | "cell_type": "code", 480 | "execution_count": 9, 481 | "metadata": { 482 | "ExecuteTime": { 483 | "end_time": "2022-11-05T15:10:19.634396Z", 484 | "start_time": "2022-11-05T15:10:19.629847Z" 485 | }, 486 | "tags": [] 487 | }, 488 | "outputs": [ 489 | { 490 | "data": { 491 | "text/plain": [ 492 | "BoxPlot\n", 493 | "\n", 494 | " Interval Count\n", 495 | "----------------------\n", 496 | "( -inf, -0.70] | 0\n", 497 | "(-0.70, 25.70] | 13\n", 498 | "(25.70, 33.50] | 12\n", 499 | "(33.50, 43.30] | 12\n", 500 | "(43.30, 69.70] | 7\n", 501 | "(69.70, 96.40] | 5" 502 | ] 503 | }, 504 | "execution_count": 9, 505 | "metadata": {}, 506 | "output_type": "execute_result" 507 | } 508 | ], 509 | "source": [ 510 | "mapclassify.classify(y, \"Boxplot\")" 511 | ] 512 | }, 513 | { 514 | "cell_type": "code", 515 | "execution_count": 10, 516 | "metadata": { 517 | "ExecuteTime": { 518 | "end_time": "2022-11-05T15:10:19.641115Z", 519 | "start_time": "2022-11-05T15:10:19.636017Z" 520 | }, 521 | "tags": [] 522 | }, 523 | "outputs": [ 524 | { 525 | "data": { 526 | "text/plain": [ 527 | "BoxPlot\n", 528 | "\n", 529 | " Interval Count\n", 530 | "----------------------\n", 531 | "( -inf, -0.70] | 0\n", 532 | "(-0.70, 25.70] | 13\n", 533 | "(25.70, 33.50] | 12\n", 534 | "(33.50, 43.30] | 12\n", 535 | "(43.30, 69.70] | 7\n", 536 | "(69.70, 96.40] | 5" 537 | ] 538 | }, 539 | "execution_count": 10, 540 | "metadata": {}, 541 | "output_type": "execute_result" 542 | } 543 | ], 544 | "source": [ 545 | "mapclassify.classify(y, \"Box_plot\")" 546 | ] 547 | }, 548 | { 549 | "cell_type": "code", 550 | "execution_count": 13, 551 | "metadata": { 552 | "ExecuteTime": { 553 | "end_time": "2022-11-05T15:10:19.691302Z", 554 | "start_time": "2022-11-05T15:10:19.645124Z" 555 | }, 556 | "tags": [] 557 | }, 558 | "outputs": [ 559 | { 560 | "data": { 561 | "text/plain": [ 562 | "StdMean\n", 563 | "\n", 564 | " Interval Count\n", 565 | "----------------------\n", 566 | "( -inf, 1.50] | 0\n", 567 | "( 1.50, 19.97] | 5\n", 568 | "(19.97, 56.90] | 37\n", 569 | "(56.90, 75.37] | 3\n", 570 | "(75.37, 96.40] | 4" 571 | ] 572 | }, 573 | "execution_count": 13, 574 | "metadata": {}, 575 | "output_type": "execute_result" 576 | } 577 | ], 578 | "source": [ 579 | "mapclassify.classify(y, 'Std_Mean')" 580 | ] 581 | }, 582 | { 583 | "cell_type": "code", 584 | "execution_count": 15, 585 | "metadata": { 586 | "ExecuteTime": { 587 | "end_time": "2022-10-26T03:01:45.977181Z", 588 | "start_time": "2022-10-26T03:01:45.931234Z" 589 | }, 590 | "tags": [] 591 | }, 592 | "outputs": [ 593 | { 594 | "data": { 595 | "text/plain": [ 596 | "StdMean\n", 597 | "\n", 598 | " Interval Count\n", 599 | "----------------------\n", 600 | "[17.90, 19.97] | 5\n", 601 | "(19.97, 38.44] | 24\n", 602 | "(38.44, 56.90] | 13\n", 603 | "(56.90, 75.37] | 3\n", 604 | "(75.37, 93.83] | 3\n", 605 | "(93.83, 96.40] | 1" 606 | ] 607 | }, 608 | "execution_count": 15, 609 | "metadata": {}, 610 | "output_type": "execute_result" 611 | } 612 | ], 613 | "source": [ 614 | "mapclassify.classify(y, 'Std_Mean', anchor=True)" 615 | ] 616 | }, 617 | { 618 | "cell_type": "code", 619 | "execution_count": 16, 620 | "metadata": { 621 | "tags": [] 622 | }, 623 | "outputs": [ 624 | { 625 | "data": { 626 | "text/plain": [ 627 | "(38.43622446938775, 18.466069465206047, 17.9, 96.400002)" 628 | ] 629 | }, 630 | "execution_count": 16, 631 | "metadata": {}, 632 | "output_type": "execute_result" 633 | } 634 | ], 635 | "source": [ 636 | "y.mean(), y.std(), y.min(), y.max()" 637 | ] 638 | }, 639 | { 640 | "cell_type": "code", 641 | "execution_count": null, 642 | "metadata": {}, 643 | "outputs": [], 644 | "source": [] 645 | } 646 | ], 647 | "metadata": { 648 | "kernelspec": { 649 | "display_name": "Python 3 (ipykernel)", 650 | "language": "python", 651 | "name": "python3" 652 | }, 653 | "language_info": { 654 | "codemirror_mode": { 655 | "name": "ipython", 656 | "version": 3 657 | }, 658 | "file_extension": ".py", 659 | "mimetype": "text/x-python", 660 | "name": "python", 661 | "nbconvert_exporter": "python", 662 | "pygments_lexer": "ipython3", 663 | "version": "3.10.10" 664 | } 665 | }, 666 | "nbformat": 4, 667 | "nbformat_minor": 4 668 | } 669 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.0", "setuptools_scm[toml]>=6.2"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [tool.setuptools_scm] 6 | 7 | [project] 8 | name = "mapclassify" 9 | dynamic = ["version"] 10 | maintainers = [ 11 | { name = "Serge Rey", email = "sjsrey@gmail.com" }, 12 | { name = "Wei Kang", email = "weikang9009@gmail.com" }, 13 | ] 14 | license = { text = "BSD 3-Clause" } 15 | description = "Classification Schemes for Choropleth Maps." 16 | keywords = ["spatial statistics", "geovisualization"] 17 | readme = { text = """\ 18 | `mapclassify` implements a family of classification schemes for choropleth maps. 19 | Its focus is on the determination of the number of classes, and the assignment 20 | of observations to those classes. It is intended for use with upstream mapping 21 | and geovisualization packages (see `geopandas`_ and `geoplot`_) 22 | that handle the rendering of the maps. 23 | 24 | For further theoretical background see "`Choropleth Mapping`_" in Rey, S.J., D. Arribas-Bel, and L.J. Wolf (2020) "Geographic Data Science with PySAL and the PyData Stack”. 25 | 26 | .. _geopandas: https://geopandas.org/mapping.html 27 | .. _geoplot: https://residentmario.github.io/geoplot/user_guide/Customizing_Plots.html 28 | .. _Choropleth Mapping: https://geographicdata.science/book/notebooks/05_choropleth.html 29 | """, content-type = "text/x-rst" } 30 | classifiers = [ 31 | "Programming Language :: Python :: 3", 32 | "License :: OSI Approved :: BSD License", 33 | "Operating System :: OS Independent", 34 | "Intended Audience :: Science/Research", 35 | "Topic :: Scientific/Engineering :: GIS", 36 | ] 37 | requires-python = ">=3.11" 38 | dependencies = [ 39 | "networkx>=3.2", 40 | "numpy>=1.26", 41 | "pandas>=2.1", 42 | "scikit-learn>=1.4", 43 | "scipy>=1.12", 44 | ] 45 | 46 | [project.urls] 47 | Home = "https://pysal.org/mapclassify/" 48 | Repository = "https://github.com/pysal/mapclassify" 49 | 50 | [project.optional-dependencies] 51 | speedups = [ 52 | "numba>=0.58" 53 | ] 54 | dev = [ 55 | "ruff", 56 | "pre-commit", 57 | "watermark", 58 | ] 59 | docs = [ 60 | "nbsphinx", 61 | "numpydoc", 62 | "sphinx>=1.4.3", 63 | "sphinx-gallery", 64 | "sphinxcontrib-bibtex", 65 | "sphinx_bootstrap_theme", 66 | ] 67 | spatial = [ 68 | "geopandas", 69 | "libpysal", 70 | "matplotlib", 71 | "shapely", 72 | ] 73 | notebooks = [ 74 | "mapclassify[spatial]", 75 | "geodatasets", 76 | "ipywidgets", 77 | "jupyterlab", 78 | "lonboard", 79 | "pyarrow", 80 | "pydeck", 81 | "seaborn", 82 | ] 83 | tests = [ 84 | "mapclassify[spatial]", 85 | "pytest", 86 | "pytest-cov", 87 | "pytest-xdist", 88 | "pytest-doctestplus", 89 | "pytest-mpl" 90 | ] 91 | all = ["mapclassify[speedups,dev,docs,notebooks,tests]"] 92 | 93 | [tool.setuptools.packages.find] 94 | include = ["mapclassify", "mapclassify.*"] 95 | 96 | 97 | [tool.ruff] 98 | line-length = 88 99 | lint.select = ["E", "F", "W", "I", "UP", "N", "B", "A", "C4", "SIM", "ARG"] 100 | lint.ignore = [ 101 | "B006", 102 | "B008", 103 | "B009", 104 | "B010", 105 | "C408", 106 | "E731", 107 | "F401", 108 | "F403", 109 | "F405", 110 | "N803", 111 | "N806", 112 | "N999", 113 | "UP007" 114 | ] 115 | extend-include = [ 116 | "docs/conf.py" 117 | ] 118 | 119 | [tool.ruff.lint.per-file-ignores] 120 | "*tests/test_*.py" = [ 121 | "A004", # Import is shadowing a Python builtin 122 | "N802", # Function name should be lowercase 123 | ] 124 | "docs/conf.py" = [ 125 | "A001", # Variable is shadowing a Python builtin 126 | ] 127 | 128 | [tool.coverage.run] 129 | source = ["./mapclassify"] 130 | 131 | [tool.coverage.report] 132 | exclude_lines = [ 133 | "if self.debug:", 134 | "pragma: no cover", 135 | "raise NotImplementedError", 136 | "except ModuleNotFoundError:", 137 | "except ImportError", 138 | ] 139 | ignore_errors = true 140 | omit = ["mapclassify/tests/*", "docs/conf.py"] 141 | --------------------------------------------------------------------------------