├── .git-blame-ignore-revs
├── .gitattributes
├── .github
├── dependabot.yml
├── release.yml
└── workflows
│ ├── build_docs.yml
│ ├── release.yml
│ └── testing.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CHANGELOG.md
├── LICENSE.txt
├── README.md
├── ci
├── 311-latest.yaml
├── 311-numba-latest.yaml
├── 311-oldest.yaml
├── 312-latest.yaml
├── 312-numba-latest.yaml
├── 313-dev.yaml
├── 313-latest.yaml
└── 313-numba-latest.yaml
├── codecov.yml
├── docs
├── Makefile
├── _static
│ ├── auto
│ │ └── references.el
│ ├── images
│ │ ├── equalinterval.png
│ │ ├── fisherjenks.png
│ │ ├── hr60fj10.png
│ │ ├── hr60mb10.png
│ │ ├── hr60q10.png
│ │ ├── pysal_favicon.ico
│ │ └── quantiles.png
│ ├── pysal-styles.css
│ └── references.bib
├── api.rst
├── conf.py
├── index.rst
├── installation.rst
├── references.rst
└── tutorial.rst
├── environment.yml
├── mapclassify
├── __init__.py
├── _classify_API.py
├── classifiers.py
├── datasets
│ ├── __init__.py
│ └── calemp
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── calempdensity.csv
│ │ └── data.py
├── greedy.py
├── legendgram.py
├── pooling.py
├── tests
│ ├── __init__.py
│ ├── baseline
│ │ ├── test_histogram_plot.png
│ │ ├── test_histogram_plot_despine.png
│ │ └── test_histogram_plot_linewidth.png
│ ├── baseline_images
│ │ └── test_legendgram
│ │ │ ├── legendgram_cmap.png
│ │ │ ├── legendgram_default.png
│ │ │ ├── legendgram_kwargs.png
│ │ │ ├── legendgram_map.png
│ │ │ ├── legendgram_position.png
│ │ │ ├── legendgram_quantiles.png
│ │ │ └── legendgram_vlines.png
│ ├── test_classify.py
│ ├── test_greedy.py
│ ├── test_legendgram.py
│ ├── test_mapclassify.py
│ └── test_rgba.py
└── util.py
├── notebooks
├── 01_maximum_breaks.ipynb
├── 02_legends.ipynb
├── 03_choropleth.ipynb
├── 04_pooled.ipynb
├── 05_Greedy_coloring.ipynb
├── 06_api.ipynb
├── 07_std_anchor.ipynb
├── 08_manual_coloring.ipynb
└── 09_legendgram.ipynb
└── pyproject.toml
/.git-blame-ignore-revs:
--------------------------------------------------------------------------------
1 | # black-ification of code
2 | 71bfea486c64d3e87d0677f824ee6b17d576d028
3 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | mapclassify/_version.py export-subst
2 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | # To get started with Dependabot version updates, you'll need to specify which
2 | # package ecosystems to update and where the package manifests are located.
3 | # Please see the documentation for all configuration options:
4 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
5 |
6 | version: 2
7 | updates:
8 | - package-ecosystem: "github-actions"
9 | directory: "/"
10 | schedule:
11 | interval: "daily"
12 | reviewers:
13 | - "jGaboardi"
14 |
15 | - package-ecosystem: "pip"
16 | directory: "/"
17 | schedule:
18 | interval: "daily"
19 | reviewers:
20 | - "jGaboardi"
21 |
--------------------------------------------------------------------------------
/.github/release.yml:
--------------------------------------------------------------------------------
1 | changelog:
2 | exclude:
3 | labels:
4 | - ignore-for-release
5 | authors:
6 | - dependabot
7 | - pre-commit-ci
8 | categories:
9 | - title: Bug Fixes
10 | labels:
11 | - bug
12 | - title: Enhancements
13 | labels:
14 | - enhancement
15 | - title: Other Changes
16 | labels:
17 | - "*"
18 |
--------------------------------------------------------------------------------
/.github/workflows/build_docs.yml:
--------------------------------------------------------------------------------
1 |
2 | name: Build Docs
3 |
4 | on:
5 | push:
6 | # Sequence of patterns matched against refs/tags
7 | tags:
8 | - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10
9 | workflow_dispatch:
10 | inputs:
11 | version:
12 | description: Manual Doc Build Reason
13 | default: test
14 | required: false
15 |
16 | jobs:
17 | docs:
18 | name: Build & Push Docs
19 | runs-on: ${{ matrix.os }}
20 | timeout-minutes: 90
21 | strategy:
22 | matrix:
23 | os: ['ubuntu-latest']
24 | environment-file: [ci/313-latest.yaml]
25 | experimental: [false]
26 | defaults:
27 | run:
28 | shell: bash -l {0}
29 |
30 | steps:
31 | - name: Checkout repo
32 | uses: actions/checkout@v4
33 | with:
34 | fetch-depth: 0 # Fetch all history for all branches and tags.
35 |
36 | - name: Setup micromamba
37 | uses: mamba-org/setup-micromamba@v2
38 | with:
39 | environment-file: ${{ matrix.environment-file }}
40 | micromamba-version: 'latest'
41 |
42 | - name: Install package
43 | run: pip install .
44 |
45 | - name: Make Docs
46 | run: cd docs; make html
47 |
48 | - name: Commit Docs
49 | run: |
50 | git clone https://github.com/ammaraskar/sphinx-action-test.git --branch gh-pages --single-branch gh-pages
51 | cp -r docs/_build/html/* gh-pages/
52 | cd gh-pages
53 | git config --local user.email "action@github.com"
54 | git config --local user.name "GitHub Action"
55 | git add .
56 | git commit -m "Update documentation" -a || true
57 | # The above command will fail if no changes were present,
58 | # so we ignore the return code.
59 |
60 | - name: push to gh-pages
61 | uses: ad-m/github-push-action@master
62 | with:
63 | branch: gh-pages
64 | directory: gh-pages
65 | github_token: ${{ secrets.GITHUB_TOKEN }}
66 | force: true
67 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Release Package
2 |
3 | on:
4 | push:
5 | # Sequence of patterns matched against refs/tags
6 | tags:
7 | - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10
8 | workflow_dispatch:
9 | inputs:
10 | version:
11 | description: Manual Release
12 | default: test
13 | required: false
14 |
15 | jobs:
16 | build:
17 | runs-on: ubuntu-latest
18 | steps:
19 | - name: Checkout repo
20 | uses: actions/checkout@v4
21 |
22 | - name: Set up Python
23 | uses: actions/setup-python@v5
24 | with:
25 | python-version: '3.x'
26 |
27 | - name: Install Dependencies
28 | run: |
29 | python -m pip install --upgrade pip build twine
30 | python -m build
31 | twine check --strict dist/*
32 |
33 | - name: Create Release Notes
34 | uses: actions/github-script@v7
35 | with:
36 | github-token: ${{secrets.GITHUB_TOKEN}}
37 | script: |
38 | await github.request(`POST /repos/${{ github.repository }}/releases`, {
39 | tag_name: "${{ github.ref }}",
40 | generate_release_notes: true
41 | });
42 |
43 | - name: Publish distribution 📦 to PyPI
44 | uses: pypa/gh-action-pypi-publish@release/v1
45 | with:
46 | user: __token__
47 | password: ${{ secrets.PYPI_PASSWORD }}
48 |
--------------------------------------------------------------------------------
/.github/workflows/testing.yml:
--------------------------------------------------------------------------------
1 | name: Continuous Integration
2 |
3 | on:
4 | push:
5 | branches:
6 | - "*"
7 | pull_request:
8 | branches:
9 | - "*"
10 | schedule:
11 | - cron: "59 21 * * *"
12 |
13 | jobs:
14 | testing:
15 | name: (${{ matrix.os }}, ${{ matrix.environment-file }})
16 | runs-on: ${{ matrix.os }}
17 | defaults:
18 | run:
19 | shell: bash -l {0}
20 | strategy:
21 | matrix:
22 | os: ["ubuntu-latest"]
23 | environment-file:
24 | [
25 | ci/311-oldest.yaml,
26 | ci/311-latest.yaml,
27 | ci/311-numba-latest.yaml,
28 | ci/312-latest.yaml,
29 | ci/312-numba-latest.yaml,
30 | ci/313-latest.yaml,
31 | ci/313-numba-latest.yaml,
32 | ci/313-dev.yaml,
33 | ]
34 | include:
35 | - environment-file: ci/313-latest.yaml
36 | os: macos-13 # Intel
37 | - environment-file: ci/313-numba-latest.yaml
38 | os: macos-13 # Intel
39 | - environment-file: ci/313-latest.yaml
40 | os: macos-14 # Apple Silicon
41 | - environment-file: ci/313-numba-latest.yaml
42 | os: macos-14 # Apple Silicon
43 | - environment-file: ci/313-latest.yaml
44 | os: windows-latest
45 | - environment-file: ci/313-numba-latest.yaml
46 | os: windows-latest
47 | fail-fast: false
48 |
49 | steps:
50 | - name: checkout repo
51 | uses: actions/checkout@v4
52 | with:
53 | fetch-depth: 0 # Fetch all history for all branches and tags.
54 |
55 | - name: setup micromamba
56 | uses: mamba-org/setup-micromamba@v2
57 | with:
58 | environment-file: ${{ matrix.environment-file }}
59 | micromamba-version: 'latest'
60 |
61 | - name: environment info
62 | run: |
63 | micromamba info
64 | micromamba list
65 |
66 | - name: spatial versions
67 | run: |
68 | python -c "import geopandas; geopandas.show_versions();"
69 |
70 | - name: Download test files
71 | run: |
72 | python -c '
73 | import libpysal
74 |
75 | libpysal.examples.fetch_all()
76 | '
77 |
78 | - name: Run pytest
79 | run: |
80 | pytest \
81 | mapclassify \
82 | -r a \
83 | -v \
84 | -n auto \
85 | --color yes \
86 | --cov-append \
87 | --cov mapclassify \
88 | --cov-report xml \
89 | --cov-report term-missing
90 |
91 | - name: run docstring tests
92 | if: contains(matrix.environment-file, '312-numba-latest') && contains(matrix.os, 'ubuntu')
93 | run: |
94 | pytest \
95 | -v \
96 | -r a \
97 | -n auto \
98 | --color yes \
99 | --cov-append \
100 | --cov mapclassify \
101 | --cov-report xml \
102 | --doctest-only \
103 | --mpl mapclassify
104 |
105 | - name: codecov (${{ matrix.os }}, ${{ matrix.environment-file }})
106 | uses: codecov/codecov-action@v5
107 | with:
108 | token: ${{ secrets.CODECOV_TOKEN }}
109 | file: ./coverage.xml
110 | name: mapclassify-codecov
111 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | .CHANGELOG.md.swp
3 | .ropeproject/
4 | dist/
5 | examples/notebooks/.ipynb_checkpoints/
6 | examples/python/
7 | mapclassify.egg-info/
8 | mapclassify/.ropeproject/
9 | mapclassify/datasets/calemp/.ropeproject/
10 | mapclassify/tests/.ropeproject/
11 | .DS_Store
12 | .vscode/settings.json
13 | __pycache__
14 | /notebooks/.ipynb_checkpoints/
15 | result_images/
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | files: 'mapclassify\/'
2 | repos:
3 | - repo: https://github.com/astral-sh/ruff-pre-commit
4 | rev: "v0.11.4"
5 | hooks:
6 | - id: ruff
7 | - id: ruff-format
8 |
9 | ci:
10 | autofix_prs: false
11 | autoupdate_schedule: quarterly
12 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Version 2.4.1 (2020-12-20)
2 |
3 | This is a bug-fix release.
4 |
5 | We closed a total of 9 issues (enhancements and bug fixes) through 3 pull requests, since our last release on 2020-12-13.
6 |
7 | ## Issues Closed
8 | - BUG: support series in sampled classifiers (#99)
9 | - BUG: FisherJenksSampled returns ValueError if Series is passed as y (#98)
10 | - REGR: fix invariant array regression (#101)
11 | - REGR: UserDefined classifier returns ValueError("Minimum and maximum of input data are equal, cannot create bins.") (#100)
12 | - [DOC] add example nb for new classify API (#91)
13 | - 2.4.0 Release (#97)
14 |
15 | ## Pull Requests
16 | - BUG: support series in sampled classifiers (#99)
17 | - REGR: fix invariant array regression (#101)
18 | - 2.4.0 Release (#97)
19 |
20 | The following individuals contributed to this release:
21 |
22 | - Serge Rey
23 | - Martin Fleischmann
24 | - Stefanie Lumnitz
25 |
26 | # Version 2.4.0 (2020-12-13)
27 |
28 | We closed a total of 39 issues (enhancements and bug fixes) through 15 pull requests, since our last release on 2020-06-13.
29 | Issues Closed
30 |
31 | - Remove timeout on tests. (#96)
32 | - BUG: HeadTailBreaks RecursionError due to floating point issue (#92)
33 | - Handle recursion error for head tails. (#95)
34 | - Add streamlined API (#72)
35 | - [API] add high-level API mapclassify.classify() (#90)
36 | - BUG: Fix mapclassify #88 (#89)
37 | - exclude Python 3.6 for Windows (#94)
38 | - CI: update conda action (#93)
39 | - EqualInterval unclear error when max_y - min_y = 0 (#88)
40 | - BUG: fix unordered series in greedy (#87)
41 | - BUG: greedy(strategy='balanced') does not return correct labels (#86)
42 | - Extra files in PyPI sdist (#56)
43 | - MAINT: fix repos name (#85)
44 | - DOC: content type for long description (#84)
45 | - MAINT: update gitcount notebook (#83)
46 | - Update documentations to include tutorial (#63)
47 | - build binder for notebooks (#71)
48 | - current version of mapclassify in docs? (#70)
49 | - 404 for notebook/tutorials links in docs (#79)
50 | - DOC: figs (#82)
51 | - DOCS: new images for tutorial (#81)
52 | - DOC: missing figs (#80)
53 | - DOCS: update documentation pages (#78)
54 | - Make networkx optional, remove xfail from greedy (#77)
55 |
56 | ## Pull Requests
57 |
58 | - Remove timeout on tests. (#96)
59 | - Handle recursion error for head tails. (#95)
60 | - [API] add high-level API mapclassify.classify() (#90)
61 | - BUG: Fix mapclassify #88 (#89)
62 | - exclude Python 3.6 for Windows (#94)
63 | - CI: update conda action (#93)
64 | - BUG: fix unordered series in greedy (#87)
65 | - MAINT: fix repos name (#85)
66 | - DOC: content type for long description (#84)
67 | - MAINT: update gitcount notebook (#83)
68 | - DOC: figs (#82)
69 | - DOCS: new images for tutorial (#81)
70 | - DOC: missing figs (#80)
71 | - DOCS: update documentation pages (#78)
72 | - Make networkx optional, remove xfail from greedy (#77)
73 |
74 | The following individuals contributed to this release:
75 |
76 | Serge Rey
77 | Stefanie Lumnitz
78 | James Gaboardi
79 | Martin Fleischmann
80 |
81 |
82 | # Version 2.3.0 (2020-06-13)
83 | ## Key Enhancements
84 |
85 | - Topological coloring to ensure no two adjacent polygons share the same color.
86 | - Pooled classification allows for the use of the same class intervals across maps.
87 |
88 | ## Details
89 |
90 | We closed a total of 30 issues (enhancements and bug fixes) through 10 pull requests, since our last release on 2020-01-04.
91 | ## Issues Closed
92 |
93 | - Make networkx optional, remove xfail from greedy (#77)
94 | - BINDER: point to upstream (#76)
95 | - add binder badge (#75)
96 | - Binder (#74)
97 | - sys import missing from setup.py (#73)
98 | - [WIP] DOC: Updating tutorial (#66)
99 | - chorobrewer branch has begun (#27)
100 | - Is mapclassify code black? (#68)
101 | - Code format and README (#69)
102 | - Move testing over to github actions (#64)
103 | - Add pinning in pooled example documentation (#67)
104 | - Migrate to GHA (#65)
105 | - Add a Pooled classifier (#51)
106 | - Backwards compatability (#48)
107 | - Difference between Natural Breaks and Fisher Jenks schemes (#62)
108 | - ENH: add greedy (topological) coloring (#61)
109 | - Error while running mapclassify (#60)
110 | - Pooled (#59)
111 | - Invalid escape sequences in strings (#57)
112 | - 3.8, appveyor, deprecation fixes (#58)
113 |
114 | ## Pull Requests
115 |
116 | - Make networkx optional, remove xfail from greedy (#77)
117 | - BINDER: point to upstream (#76)
118 | - add binder badge (#75)
119 | - Binder (#74)
120 | - [WIP] DOC: Updating tutorial (#66)
121 | - Code format and README (#69)
122 | - Migrate to GHA (#65)
123 | - ENH: add greedy (topological) coloring (#61)
124 | - Pooled (#59)
125 | - 3.8, appveyor, deprecation fixes (#58)
126 |
127 | ## Acknowledgements
128 |
129 | The following individuals contributed to this release:
130 |
131 | - Serge Rey
132 | - James Gaboardi
133 | - Eli Knaap
134 | - Martin Fleischmann
135 |
136 |
137 |
138 | # Version 2.2.0 (2019-12-21)
139 |
140 | This releases brings new functionality for [formatting of legend classes](https://github.com/sjsrey/geopandas/blob/legendkwds/examples/choro_legends.ipynb).
141 |
142 | We closed a total of 21 issues (enhancements and bug fixes) through 9 pull requests, since our last release on 2019-06-28.
143 |
144 | ## Issues Closed
145 | - 2.2 (#54)
146 | - 2.2 (#53)
147 | - conda-forge UnsatisfiableError on windows and python 3.7 (#52)
148 | - [MAINT] updating supported Python versions in setup.py (#49)
149 | - BUG: RecursiveError in HeadTailBreaks (#46)
150 | - BUG: HeadTailBreaks raise RecursionError (#45)
151 | - BUG: UserDefined accepts only list if max not in bins (#47)
152 | - BUG: avoid deprecation warning in HeadTailBreaks (#44)
153 | - remove docs badge (#42)
154 | - Remove doc badge (#43)
155 | - Docs: moving to project pages on github and off rtd (#41)
156 | - BUG: Fix for downstream breakage in geopandas (#40)
157 |
158 | ## Pull Requests
159 | - 2.2 (#54)
160 | - 2.2 (#53)
161 | - [MAINT] updating supported Python versions in setup.py (#49)
162 | - BUG: RecursiveError in HeadTailBreaks (#46)
163 | - BUG: UserDefined accepts only list if max not in bins (#47)
164 | - BUG: avoid deprecation warning in HeadTailBreaks (#44)
165 | - Remove doc badge (#43)
166 | - Docs: moving to project pages on github and off rtd (#41)
167 | - BUG: Fix for downstream breakage in geopandas (#40)
168 |
169 | The following individuals contributed to this release:
170 |
171 | - Serge Rey
172 | - James Gaboardi
173 | - Wei Kang
174 | - Martin Fleischmann
175 |
176 |
177 | # Version 2.1.0 (2019-06-26)
178 |
179 | We closed a total of 36 issues (enhancements and bug fixes) through 16 pull requests, since our last release on 2018-10-28.
180 |
181 | ## Issues Closed
182 | - ENH: dropping 3.5 support and adding 3.7 (#38)
183 | - ENH: plot method added to Mapclassify (#36)
184 | - ENH: keeping init keyword argument to avoid API breakage. (#35)
185 | - mapclassify.Natural_Break() does not return the specified k classes (#16)
186 | - Fix for #16 (#32)
187 | - Mixed usage of brewer2mpl and palettable.colorbrewer in color.py (#33)
188 | - Chorobrewer (#34)
189 | - conda-forge recipe needs some love (#14)
190 | - generating images for color selector (#31)
191 | - doc: bump version and dev setup docs (#30)
192 | - environment.yml (#29)
193 | - add color import and chorobrewer notebook (#28)
194 | - Chorobrewer (#26)
195 | - chorobrewer init (#25)
196 | - add badges for pypi, zenodo and docs (#24)
197 | - add geopandas and libpysal to test requirement (#23)
198 | - adjust changelog and delete tools/github_stats.py (#22)
199 | - add requirements_docs.txt to MANIFEST.in (#21)
200 | - gadf and K_classifiers not in __ini__.py (#18)
201 | - rel: 2.0.1 (#20)
202 |
203 | ## Pull Requests
204 | - ENH: dropping 3.5 support and adding 3.7 (#38)
205 | - ENH: plot method added to Mapclassify (#36)
206 | - ENH: keeping init keyword argument to avoid API breakage. (#35)
207 | - Fix for #16 (#32)
208 | - Chorobrewer (#34)
209 | - generating images for color selector (#31)
210 | - doc: bump version and dev setup docs (#30)
211 | - environment.yml (#29)
212 | - add color import and chorobrewer notebook (#28)
213 | - Chorobrewer (#26)
214 | - chorobrewer init (#25)
215 | - add badges for pypi, zenodo and docs (#24)
216 | - add geopandas and libpysal to test requirement (#23)
217 | - adjust changelog and delete tools/github_stats.py (#22)
218 | - add requirements_docs.txt to MANIFEST.in (#21)
219 | - rel: 2.0.1 (#20)
220 |
221 | The following individuals contributed to this release:
222 |
223 | - Serge Rey
224 | - Wei Kang
225 |
226 | # Version 2.0.1 (2018-10-28)
227 |
228 | We closed a total of 12 issues (enhancements and bug fixes) through 5 pull requests, since our last release on 2018-08-10.
229 |
230 | ## Issues Closed
231 | - gadf and K_classifiers not in __ini__.py (#18)
232 | - rel: 2.0.1 (#20)
233 | - fix doctests (interactive examples in inline docstrings) (#19)
234 | - complete readthedocs configuration & add Slocum 2009 reference (#17)
235 | - prepping for a doc based release (#15)
236 | - new release on pypi (#10)
237 | - prepare for release 2.0.0 (#13)
238 |
239 | ## Pull Requests
240 | - rel: 2.0.1 (#20)
241 | - fix doctests (interactive examples in inline docstrings) (#19)
242 | - complete readthedocs configuration & add Slocum 2009 reference (#17)
243 | - prepping for a doc based release (#15)
244 | - prepare for release 2.0.0 (#13)
245 |
246 | The following individuals contributed to this release:
247 |
248 | - Serge Rey
249 | - Wei Kang
250 |
251 | # Version 2.0.0 (2018-08-10)
252 |
253 | Starting from this release, mapclassify supports python 3+ only (currently 3.5
254 | and 3.6).
255 |
256 | This release also features a first stable version of mapclassify in
257 | the process of pysal refactoring. There is a big change in the api in that we no
258 | longer provide an api module (`from mapclassify.api import Quantiles`). Instead,
259 | users will directly `from mapclassify import Quantiles`.
260 |
261 | GitHub stats for 2017/08/18 - 2018/08/10
262 |
263 | These lists are automatically generated, and may be incomplete or contain duplicates.
264 |
265 | We closed a total of 8 issues, 4 pull requests and 4 regular issues;
266 | this is the full list (generated with the script
267 | :file:`tools/github_stats.py`):
268 |
269 | Pull Requests (4):
270 |
271 | * :ghpull:`12`: b'Clean up for next pypi release'
272 | * :ghpull:`11`: b'move notebooks outside of the package'
273 | * :ghpull:`9`: b'ENH: move classifiers up into init'
274 | * :ghpull:`8`: b'Moving to python 3+'
275 |
276 | Issues (4):
277 |
278 | * :ghissue:`12`: b'Clean up for next pypi release'
279 | * :ghissue:`11`: b'move notebooks outside of the package'
280 | * :ghissue:`9`: b'ENH: move classifiers up into init'
281 | * :ghissue:`8`: b'Moving to python 3+'
282 |
283 |
284 | # Version 1.0.1 (2017-08-17)
285 |
286 | - Warnings added when duplicate values make quantiles ill-defined
287 | - Faster digitize in place of list comprehension
288 | - Bug fix for consistent treatment of intervals (closed on the right, open on the left)
289 |
290 | v<1.0.0dev> 2017-04-21
291 |
292 | - alpha release
293 |
294 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright 2018 PySAL-mapclassify Developers
2 |
3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
4 |
5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
6 |
7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
8 |
9 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
10 |
11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
12 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # mapclassify: Classification Schemes for Choropleth Maps
2 |
3 | [](https://github.com/pysal/mapclassify/actions/workflows/testing.yml)
4 | [](https://codecov.io/gh/pysal/mapclassify)
5 | [](https://badge.fury.io/py/mapclassify)
6 | [](https://zenodo.org/badge/latestdoi/88918063)
7 | [](https://opensource.org/licenses/BSD-3-Clause)
8 | [](https://github.com/astral-sh/ruff)
9 | [](https://mybinder.org/v2/gh/pysal/mapclassify/main)
10 |
11 | `mapclassify` implements a family of classification schemes for choropleth maps.
12 | Its focus is on the determination of the number of classes, and the assignment
13 | of observations to those classes. It is intended for use with upstream mapping
14 | and geovisualization packages (see
15 | [geopandas](https://geopandas.org/mapping.html))
16 | that handle the rendering of the maps.
17 |
18 | For further theoretical background see [Rey, S.J., D. Arribas-Bel, and L.J. Wolf (2020) "Geographic Data Science with PySAL and the PyData Stack”](https://geographicdata.science/book/notebooks/05_choropleth.html).
19 |
20 | ## Using `mapclassify`
21 | Load built-in example data reporting employment density in 58 California counties:
22 |
23 | ```python
24 | >>> import mapclassify
25 | >>> y = mapclassify.load_example()
26 | >>> y.mean()
27 | 125.92810344827588
28 | >>> y.min(), y.max()
29 | (0.13, 4111.4499999999998)
30 |
31 | ```
32 |
33 | ## Map Classifiers Supported
34 |
35 | ### BoxPlot
36 |
37 | ```python
38 | >>> mapclassify.BoxPlot(y)
39 | BoxPlot
40 |
41 | Interval Count
42 | --------------------------
43 | ( -inf, -52.88] | 0
44 | ( -52.88, 2.57] | 15
45 | ( 2.57, 9.36] | 14
46 | ( 9.36, 39.53] | 14
47 | ( 39.53, 94.97] | 6
48 | ( 94.97, 4111.45] | 9
49 | ```
50 |
51 |
52 |
53 | ### EqualInterval
54 |
55 | ```python
56 | >>> mapclassify.EqualInterval(y)
57 | EqualInterval
58 |
59 | Interval Count
60 | --------------------------
61 | [ 0.13, 822.39] | 57
62 | ( 822.39, 1644.66] | 0
63 | (1644.66, 2466.92] | 0
64 | (2466.92, 3289.19] | 0
65 | (3289.19, 4111.45] | 1
66 | ```
67 |
68 | ### FisherJenks
69 |
70 | ```python
71 | >>> import numpy as np
72 | >>> np.random.seed(123456)
73 | >>> mapclassify.FisherJenks(y, k=5)
74 | FisherJenks
75 |
76 | Interval Count
77 | --------------------------
78 | [ 0.13, 75.29] | 49
79 | ( 75.29, 192.05] | 3
80 | ( 192.05, 370.50] | 4
81 | ( 370.50, 722.85] | 1
82 | ( 722.85, 4111.45] | 1
83 | ```
84 |
85 | ### FisherJenksSampled
86 |
87 | ```python
88 | >>> np.random.seed(123456)
89 | >>> x = np.random.exponential(size=(10000,))
90 | >>> mapclassify.FisherJenks(x, k=5)
91 | FisherJenks
92 |
93 | Interval Count
94 | ----------------------
95 | [ 0.00, 0.64] | 4694
96 | ( 0.64, 1.45] | 2922
97 | ( 1.45, 2.53] | 1584
98 | ( 2.53, 4.14] | 636
99 | ( 4.14, 10.61] | 164
100 |
101 | >>> mapclassify.FisherJenksSampled(x, k=5)
102 | FisherJenksSampled
103 |
104 | Interval Count
105 | ----------------------
106 | [ 0.00, 0.70] | 5020
107 | ( 0.70, 1.63] | 2952
108 | ( 1.63, 2.88] | 1454
109 | ( 2.88, 5.32] | 522
110 | ( 5.32, 10.61] | 52
111 | ```
112 |
113 | ### HeadTailBreaks
114 |
115 | ```python
116 | >>> mapclassify.HeadTailBreaks(y)
117 | HeadTailBreaks
118 |
119 | Interval Count
120 | --------------------------
121 | [ 0.13, 125.93] | 50
122 | ( 125.93, 811.26] | 7
123 | ( 811.26, 4111.45] | 1
124 | ```
125 |
126 | ### JenksCaspall
127 |
128 | ```python
129 | >>> mapclassify.JenksCaspall(y, k=5)
130 | JenksCaspall
131 |
132 | Interval Count
133 | --------------------------
134 | [ 0.13, 1.81] | 14
135 | ( 1.81, 7.60] | 13
136 | ( 7.60, 29.82] | 14
137 | ( 29.82, 181.27] | 10
138 | ( 181.27, 4111.45] | 7
139 | ```
140 |
141 | ### JenksCaspallForced
142 |
143 | ```python
144 | >>> mapclassify.JenksCaspallForced(y, k=5)
145 | JenksCaspallForced
146 |
147 | Interval Count
148 | --------------------------
149 | [ 0.13, 1.34] | 12
150 | ( 1.34, 5.90] | 12
151 | ( 5.90, 16.70] | 13
152 | ( 16.70, 50.65] | 9
153 | ( 50.65, 4111.45] | 12
154 | ```
155 |
156 | ### JenksCaspallSampled
157 |
158 | ```python
159 | >>> mapclassify.JenksCaspallSampled(y, k=5)
160 | JenksCaspallSampled
161 |
162 | Interval Count
163 | --------------------------
164 | [ 0.13, 12.02] | 33
165 | ( 12.02, 29.82] | 8
166 | ( 29.82, 75.29] | 8
167 | ( 75.29, 192.05] | 3
168 | ( 192.05, 4111.45] | 6
169 | ```
170 |
171 | ### MaxP
172 |
173 | ```python
174 | >>> mapclassify.MaxP(y)
175 | MaxP
176 |
177 | Interval Count
178 | --------------------------
179 | [ 0.13, 8.70] | 29
180 | ( 8.70, 16.70] | 8
181 | ( 16.70, 20.47] | 1
182 | ( 20.47, 66.26] | 10
183 | ( 66.26, 4111.45] | 10
184 | ```
185 |
186 | ### [MaximumBreaks](notebooks/maximum_breaks.ipynb)
187 |
188 | ```python
189 | >>> mapclassify.MaximumBreaks(y, k=5)
190 | MaximumBreaks
191 |
192 | Interval Count
193 | --------------------------
194 | [ 0.13, 146.00] | 50
195 | ( 146.00, 228.49] | 2
196 | ( 228.49, 546.67] | 4
197 | ( 546.67, 2417.15] | 1
198 | (2417.15, 4111.45] | 1
199 | ```
200 |
201 | ### NaturalBreaks
202 |
203 | ```python
204 | >>> mapclassify.NaturalBreaks(y, k=5)
205 | NaturalBreaks
206 |
207 | Interval Count
208 | --------------------------
209 | [ 0.13, 75.29] | 49
210 | ( 75.29, 192.05] | 3
211 | ( 192.05, 370.50] | 4
212 | ( 370.50, 722.85] | 1
213 | ( 722.85, 4111.45] | 1
214 | ```
215 |
216 | ### Quantiles
217 |
218 | ```python
219 | >>> mapclassify.Quantiles(y, k=5)
220 | Quantiles
221 |
222 | Interval Count
223 | --------------------------
224 | [ 0.13, 1.46] | 12
225 | ( 1.46, 5.80] | 11
226 | ( 5.80, 13.28] | 12
227 | ( 13.28, 54.62] | 11
228 | ( 54.62, 4111.45] | 12
229 | ```
230 |
231 | ### Percentiles
232 |
233 | ```python
234 | >>> mapclassify.Percentiles(y, pct=[33, 66, 100])
235 | Percentiles
236 |
237 | Interval Count
238 | --------------------------
239 | [ 0.13, 3.36] | 19
240 | ( 3.36, 22.86] | 19
241 | ( 22.86, 4111.45] | 20
242 | ```
243 |
244 | ### PrettyBreaks
245 | ```python
246 | >>> np.random.seed(123456)
247 | >>> x = np.random.randint(0, 10000, (100,1))
248 | >>> mapclassify.PrettyBreaks(x)
249 | Pretty
250 |
251 | Interval Count
252 | ----------------------------
253 | [ 300.00, 2000.00] | 23
254 | ( 2000.00, 4000.00] | 15
255 | ( 4000.00, 6000.00] | 18
256 | ( 6000.00, 8000.00] | 24
257 | ( 8000.00, 10000.00] | 20
258 | ```
259 |
260 | ### StdMean
261 |
262 | ```python
263 | >>> mapclassify.StdMean(y)
264 | StdMean
265 |
266 | Interval Count
267 | --------------------------
268 | ( -inf, -967.36] | 0
269 | (-967.36, -420.72] | 0
270 | (-420.72, 672.57] | 56
271 | ( 672.57, 1219.22] | 1
272 | (1219.22, 4111.45] | 1
273 | ```
274 | ### UserDefined
275 |
276 | ```python
277 | >>> mapclassify.UserDefined(y, bins=[22, 674, 4112])
278 | UserDefined
279 |
280 | Interval Count
281 | --------------------------
282 | [ 0.13, 22.00] | 38
283 | ( 22.00, 674.00] | 18
284 | ( 674.00, 4112.00] | 2
285 | ```
286 |
287 | ## Alternative API
288 |
289 | As of version 2.4.0 the API has been extended. A `classify` function is now
290 | available for a streamlined interface:
291 |
292 | ```python
293 | >>> classify(y, 'boxplot')
294 | BoxPlot
295 |
296 | Interval Count
297 | --------------------------
298 | ( -inf, -52.88] | 0
299 | ( -52.88, 2.57] | 15
300 | ( 2.57, 9.36] | 14
301 | ( 9.36, 39.53] | 14
302 | ( 39.53, 94.97] | 6
303 | ( 94.97, 4111.45] | 9
304 |
305 | ```
306 |
307 |
308 |
309 |
310 | ## Use Cases
311 |
312 | ### Creating and using a classification instance
313 |
314 | ```python
315 | >>> bp = mapclassify.BoxPlot(y)
316 | >>> bp
317 | BoxPlot
318 |
319 | Interval Count
320 | --------------------------
321 | ( -inf, -52.88] | 0
322 | ( -52.88, 2.57] | 15
323 | ( 2.57, 9.36] | 14
324 | ( 9.36, 39.53] | 14
325 | ( 39.53, 94.97] | 6
326 | ( 94.97, 4111.45] | 9
327 |
328 | >>> bp.bins
329 | array([ -5.28762500e+01, 2.56750000e+00, 9.36500000e+00,
330 | 3.95300000e+01, 9.49737500e+01, 4.11145000e+03])
331 | >>> bp.counts
332 | array([ 0, 15, 14, 14, 6, 9])
333 | >>> bp.yb
334 | array([5, 1, 2, 3, 2, 1, 5, 1, 3, 3, 1, 2, 2, 1, 2, 2, 2, 1, 5, 2, 4, 1, 2,
335 | 2, 1, 1, 3, 3, 3, 5, 3, 1, 3, 5, 2, 3, 5, 5, 4, 3, 5, 3, 5, 4, 2, 1,
336 | 1, 4, 4, 3, 3, 1, 1, 2, 1, 4, 3, 2])
337 |
338 | ```
339 |
340 | ### Binning new data
341 |
342 | ```python
343 | >>> bp = mapclassify.BoxPlot(y)
344 | >>> bp
345 | BoxPlot
346 |
347 | Interval Count
348 | --------------------------
349 | ( -inf, -52.88] | 0
350 | ( -52.88, 2.57] | 15
351 | ( 2.57, 9.36] | 14
352 | ( 9.36, 39.53] | 14
353 | ( 39.53, 94.97] | 6
354 | ( 94.97, 4111.45] | 9
355 | >>> bp.find_bin([0, 7, 3000, 48])
356 | array([1, 2, 5, 4])
357 |
358 | ```
359 | Note that `find_bin` does not recalibrate the classifier:
360 | ```python
361 | >>> bp
362 | BoxPlot
363 |
364 | Interval Count
365 | --------------------------
366 | ( -inf, -52.88] | 0
367 | ( -52.88, 2.57] | 15
368 | ( 2.57, 9.36] | 14
369 | ( 9.36, 39.53] | 14
370 | ( 39.53, 94.97] | 6
371 | ( 94.97, 4111.45] | 9
372 | ```
373 | ### Apply
374 |
375 | ```python
376 | >>> import mapclassify
377 | >>> import pandas
378 | >>> from numpy import linspace as lsp
379 | >>> data = [lsp(3,8,num=10), lsp(10, 0, num=10), lsp(-5, 15, num=10)]
380 | >>> data = pandas.DataFrame(data).T
381 | >>> data
382 | 0 1 2
383 | 0 3.000000 10.000000 -5.000000
384 | 1 3.555556 8.888889 -2.777778
385 | 2 4.111111 7.777778 -0.555556
386 | 3 4.666667 6.666667 1.666667
387 | 4 5.222222 5.555556 3.888889
388 | 5 5.777778 4.444444 6.111111
389 | 6 6.333333 3.333333 8.333333
390 | 7 6.888889 2.222222 10.555556
391 | 8 7.444444 1.111111 12.777778
392 | 9 8.000000 0.000000 15.000000
393 | >>> data.apply(mapclassify.Quantiles.make(rolling=True))
394 | 0 1 2
395 | 0 0 4 0
396 | 1 0 4 0
397 | 2 1 4 0
398 | 3 1 3 0
399 | 4 2 2 1
400 | 5 2 1 2
401 | 6 3 0 4
402 | 7 3 0 4
403 | 8 4 0 4
404 | 9 4 0 4
405 |
406 | ```
407 |
408 |
409 | ## Development Notes
410 |
411 | Because we use `geopandas` in development, and geopandas has stable `mapclassify` as a dependency, setting up a local development installation involves creating a conda environment, then replacing the stable `mapclassify` with the development version of `mapclassify` in the development environment. This can be accomplished with the following steps:
412 |
413 |
414 | ```
415 | conda-env create -f environment.yml
416 | conda activate mapclassify
417 | conda remove -n mapclassify mapclassify
418 | pip install -e .
419 | ```
420 |
--------------------------------------------------------------------------------
/ci/311-latest.yaml:
--------------------------------------------------------------------------------
1 | name: test
2 | channels:
3 | - conda-forge
4 | dependencies:
5 | - python=3.11
6 | # required
7 | - networkx
8 | - numpy
9 | - pandas
10 | - scikit-learn
11 | - scipy
12 | # testing
13 | - geopandas
14 | - libpysal
15 | - pytest
16 | - pytest-cov
17 | - pytest-xdist
18 | - pytest-mpl
19 | - codecov
20 | - matplotlib
21 |
--------------------------------------------------------------------------------
/ci/311-numba-latest.yaml:
--------------------------------------------------------------------------------
1 | name: test
2 | channels:
3 | - conda-forge
4 | dependencies:
5 | - python=3.11
6 | # required
7 | - networkx
8 | - numpy
9 | - pandas
10 | - scikit-learn
11 | - scipy
12 | # testing
13 | - geopandas
14 | - libpysal
15 | - pytest
16 | - pytest-cov
17 | - pytest-xdist
18 | - pytest-mpl
19 | - codecov
20 | - matplotlib
21 | # optional
22 | - numba
23 |
--------------------------------------------------------------------------------
/ci/311-oldest.yaml:
--------------------------------------------------------------------------------
1 | name: test
2 | channels:
3 | - conda-forge
4 | dependencies:
5 | - python=3.11
6 | # required
7 | - networkx=3.2
8 | - numpy=1.26
9 | - pandas=2.1
10 | - scikit-learn=1.4
11 | - scipy=1.12
12 | # testing
13 | - fiona
14 | - geopandas
15 | - libpysal
16 | - pytest
17 | - pytest-cov
18 | - pytest-xdist
19 | - codecov
20 | - matplotlib
21 |
--------------------------------------------------------------------------------
/ci/312-latest.yaml:
--------------------------------------------------------------------------------
1 | name: test
2 | channels:
3 | - conda-forge
4 | dependencies:
5 | - python=3.12
6 | # required
7 | - networkx
8 | - numpy
9 | - pandas
10 | - scikit-learn
11 | - scipy
12 | # testing
13 | - geopandas
14 | - libpysal
15 | - pytest
16 | - pytest-cov
17 | - pytest-xdist
18 | - pytest-mpl
19 | - codecov
20 | - matplotlib
21 |
--------------------------------------------------------------------------------
/ci/312-numba-latest.yaml:
--------------------------------------------------------------------------------
1 | name: test
2 | channels:
3 | - conda-forge
4 | dependencies:
5 | - python=3.12
6 | # required
7 | - networkx
8 | - numpy
9 | - pandas
10 | - scikit-learn
11 | - scipy
12 | # testing
13 | - geopandas
14 | - libpysal
15 | - pytest
16 | - pytest-cov
17 | - pytest-xdist
18 | - pytest-doctestplus
19 | - pytest-mpl
20 | - codecov
21 | - matplotlib
22 | # optional
23 | - numba
24 |
--------------------------------------------------------------------------------
/ci/313-dev.yaml:
--------------------------------------------------------------------------------
1 | name: test
2 | channels:
3 | - conda-forge
4 | dependencies:
5 | - python=3.13
6 | # testing
7 | - pytest
8 | - pytest-cov
9 | - pytest-xdist
10 | - codecov
11 | # optional
12 | - pyproj
13 | - pip
14 | - pip:
15 | - --pre --index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple --extra-index-url https://pypi.org/simple
16 | - scipy
17 | - scikit-learn
18 | - pandas
19 | - networkx
20 | - matplotlib
21 | - shapely
22 | - fiona
23 | - pytest-mpl
24 | - git+https://github.com/pysal/libpysal.git@main
25 | - git+https://github.com/geopandas/geopandas.git@main
26 |
--------------------------------------------------------------------------------
/ci/313-latest.yaml:
--------------------------------------------------------------------------------
1 | name: test
2 | channels:
3 | - conda-forge
4 | dependencies:
5 | - python=3.13
6 | # required
7 | - networkx
8 | - numpy
9 | - pandas
10 | - scikit-learn
11 | - scipy
12 | # testing
13 | - geopandas
14 | - libpysal
15 | - pytest
16 | - pytest-cov
17 | - pytest-xdist
18 | - pytest-mpl
19 | - codecov
20 | - matplotlib
21 | # docs
22 | - nbsphinx
23 | - numpydoc
24 | - sphinx
25 | - sphinx-gallery
26 | - sphinxcontrib-bibtex
27 | - sphinx_bootstrap_theme
28 |
--------------------------------------------------------------------------------
/ci/313-numba-latest.yaml:
--------------------------------------------------------------------------------
1 | name: test
2 | channels:
3 | - conda-forge
4 | dependencies:
5 | - python=3.13
6 | # required
7 | - networkx
8 | - numpy
9 | - pandas
10 | - scikit-learn
11 | - scipy
12 | # testing
13 | - geopandas
14 | - libpysal
15 | - pytest
16 | - pytest-cov
17 | - pytest-xdist
18 | - pytest-doctestplus
19 | - pytest-mpl
20 | - codecov
21 | - matplotlib
22 | # optional
23 | - numba
24 |
--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | codecov:
2 | notify:
3 | after_n_builds: 10
4 | coverage:
5 | range: 50..95
6 | round: nearest
7 | precision: 1
8 | status:
9 | project:
10 | default:
11 | threshold: 2%
12 | patch:
13 | default:
14 | threshold: 2%
15 | target: 80%
16 | ignore:
17 | - "tests/*"
18 | comment:
19 | layout: "reach, diff, files"
20 | behavior: once
21 | after_n_builds: 10
22 | require_changes: true
23 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SPHINXPROJ = mapclassify
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @rsync -r --exclude '.ipynb_checkpoints/' ../notebooks/ ./notebooks/
21 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
22 |
23 | github:
24 | @make html
25 |
26 | sync:
27 | @rsync -avh _build/html/ ../docs/ --delete
28 | @make clean
29 | touch .nojekyll
30 |
31 | clean:
32 | rm -rf $(BUILDDIR)/*
33 | rm -rf auto_examples/
34 | rm -rf generated/
35 |
--------------------------------------------------------------------------------
/docs/_static/auto/references.el:
--------------------------------------------------------------------------------
1 | (TeX-add-style-hook
2 | "references"
3 | (lambda ()
4 | (LaTeX-add-bibitems
5 | "Jiang_2013"
6 | "Rey_2016"))
7 | :bibtex)
8 |
9 |
--------------------------------------------------------------------------------
/docs/_static/images/equalinterval.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/docs/_static/images/equalinterval.png
--------------------------------------------------------------------------------
/docs/_static/images/fisherjenks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/docs/_static/images/fisherjenks.png
--------------------------------------------------------------------------------
/docs/_static/images/hr60fj10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/docs/_static/images/hr60fj10.png
--------------------------------------------------------------------------------
/docs/_static/images/hr60mb10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/docs/_static/images/hr60mb10.png
--------------------------------------------------------------------------------
/docs/_static/images/hr60q10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/docs/_static/images/hr60q10.png
--------------------------------------------------------------------------------
/docs/_static/images/pysal_favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/docs/_static/images/pysal_favicon.ico
--------------------------------------------------------------------------------
/docs/_static/images/quantiles.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/docs/_static/images/quantiles.png
--------------------------------------------------------------------------------
/docs/_static/pysal-styles.css:
--------------------------------------------------------------------------------
1 | /* Make thumbnails with equal heights */
2 | @media only screen and (min-width : 500px) {
3 | .row.equal-height {
4 | display: flex;
5 | flex-wrap: wrap;
6 | }
7 | .row.equal-height > [class*='col-'] {
8 | display: flex;
9 | flex-direction: row, column;
10 | }
11 | .row.equal-height.row:after,
12 | .row.equal-height.row:before {
13 | display: flex;
14 | }
15 |
16 | .row.equal-height > [class*='col-'] > .thumbnail,
17 | .row.equal-height > [class*='col-'] > .thumbnail > .caption {
18 | display: flex;
19 | flex: .9 .1 auto;
20 | flex-direction: column;
21 | }
22 | .row.equal-height > [class*='col-'] > .thumbnail > .caption > .flex-text {
23 | flex-grow: 1;
24 | }
25 | .row.equal-height > [class*='col-'] > .thumbnail > img {
26 | width: 350px;
27 | height: 200%; /* force image's height */
28 |
29 | /* force image fit inside it's "box" */
30 | -webkit-object-fit: cover;
31 | -moz-object-fit: cover;
32 | -ms-object-fit: cover;
33 | -o-object-fit: cover;
34 | object-fit: cover;
35 | }
36 | }
37 |
38 | .row.extra-bottom-padding{
39 | margin-bottom: 20px;
40 | }
41 |
42 |
43 | .topnavicons {
44 | margin-left: 10% !important;
45 | }
46 |
47 | .topnavicons li {
48 | margin-left: 0px !important;
49 | min-width: 100px;
50 | text-align: center;
51 | }
52 |
53 | .topnavicons .thumbnail {
54 | margin-right: 10px;
55 | border: none;
56 | box-shadow: none;
57 | text-align: center;
58 | font-size: 85%;
59 | font-weight: bold;
60 | line-height: 10px;
61 | height: 100px;
62 | }
63 |
64 | .topnavicons .thumbnail img {
65 | display: block;
66 | margin-left: auto;
67 | margin-right: auto;
68 | }
69 |
70 |
71 | /* Table with a scrollbar */
72 | .bodycontainer { max-height: 800px; width: 100%; margin: 0; padding: 0; overflow-y: auto; }
73 | .table-scrollable { margin: 0; padding: 0; }
74 |
75 | .label {
76 | color: #E74C3C;
77 | font-size: 100%;
78 | font-weight: bold;
79 | width: 100px;
80 | text-align: left;
81 | vertical-align: middle;
82 | }
83 |
84 | div.body {
85 | max-width: 1080px;
86 | }
87 |
88 | table.longtable.align-default{
89 | text-align: left;
90 | }
--------------------------------------------------------------------------------
/docs/_static/references.bib:
--------------------------------------------------------------------------------
1 | %% This BibTeX bibliography file was created using BibDesk.
2 | %% http://bibdesk.sourceforge.net/
3 |
4 | %% Created for Wei Kang at 2018-10-25 22:16:36 -0700
5 |
6 |
7 | %% Saved with string encoding Unicode (UTF-8)
8 |
9 |
10 |
11 | @article{Jiang_2013,
12 | Author = {Jiang, Bin},
13 | Doi = {10.1080/00330124.2012.700499},
14 | Issn = {1467-9272},
15 | Journal = {The Professional Geographer},
16 | Month = {Aug},
17 | Number = 3,
18 | Pages = {482--494},
19 | Publisher = {Informa UK Limited},
20 | Title = {Head/Tail Breaks: A New Classification Scheme for Data with a Heavy-Tailed Distribution},
21 | Url = {http://dx.doi.org/10.1080/00330124.2012.700499},
22 | Volume = 65,
23 | Year = 2013,
24 | Bdsk-Url-1 = {http://dx.doi.org/10.1080/00330124.2012.700499}}
25 |
26 | @article{Rey_2016,
27 | Author = {Rey, Sergio J. and Stephens, Philip and Laura, Jason},
28 | Doi = {10.1111/tgis.12236},
29 | Issn = {1361-1682},
30 | Journal = {Transactions in GIS},
31 | Month = {Oct},
32 | Number = 4,
33 | Pages = {796--810},
34 | Publisher = {Wiley},
35 | Title = {An evaluation of sampling and full enumeration strategies for {Fisher Jenks} classification in big data settings},
36 | Url = {http://dx.doi.org/10.1111/tgis.12236},
37 | Volume = 21,
38 | Year = 2016,
39 | Bdsk-Url-1 = {http://dx.doi.org/10.1111/tgis.12236}}
40 |
41 | @book{Slocum_2009,
42 | Author = {Slocum, Terry A. and McMaster, Robert B. and Kessler, Fritz C. and Howard, Hugh H.},
43 | Publisher = {Pearson Prentice Hall, Upper Saddle River},
44 | Title = {Thematic cartography and geovisualization},
45 | Year = {2009}}
46 |
--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
1 | .. _api_ref:
2 |
3 | .. currentmodule:: mapclassify
4 |
5 | API reference
6 | =============
7 |
8 | .. _classifiers_api:
9 |
10 | Classifiers
11 | -----------
12 |
13 | .. autosummary::
14 | :toctree: generated/
15 |
16 | mapclassify.BoxPlot
17 | mapclassify.EqualInterval
18 | mapclassify.FisherJenks
19 | mapclassify.FisherJenksSampled
20 | mapclassify.greedy
21 | mapclassify.HeadTailBreaks
22 | mapclassify.JenksCaspall
23 | mapclassify.JenksCaspallForced
24 | mapclassify.JenksCaspallSampled
25 | mapclassify.MaxP
26 | mapclassify.MaximumBreaks
27 | mapclassify.NaturalBreaks
28 | mapclassify.Percentiles
29 | mapclassify.PrettyBreaks
30 | mapclassify.Quantiles
31 | mapclassify.StdMean
32 | mapclassify.UserDefined
33 |
34 | Utilities
35 | ---------
36 |
37 | .. autosummary::
38 | :toctree: generated/
39 |
40 | mapclassify.KClassifiers
41 | mapclassify.Pooled
42 | mapclassify.classify
43 | mapclassify.gadf
44 | mapclassify.util.get_color_array
45 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # giddy documentation build configuration file, created by
2 | # sphinx-quickstart on Wed Jun 6 15:54:22 2018.
3 | #
4 | # This file is execfile()d with the current directory set to its
5 | # containing dir.
6 | #
7 | # Note that not all possible configuration values are present in this
8 | # autogenerated file.
9 | #
10 | # All configuration values have a default; values that are commented out
11 | # serve to show the default.
12 |
13 | import os
14 |
15 | # If extensions (or modules to document with autodoc) are in another directory,
16 | # add these directories to sys.path here. If the directory is relative to the
17 | # documentation root, use os.path.abspath to make it absolute, like shown here.
18 | #
19 | import sys
20 |
21 | import sphinx_bootstrap_theme
22 |
23 | sys.path.insert(0, os.path.abspath("../"))
24 |
25 | # import your package to obtain the version info to display on the docs website
26 | import mapclassify
27 |
28 | # -- General configuration ------------------------------------------------
29 |
30 | # If your documentation needs a minimal Sphinx version, state it here.
31 | #
32 | # needs_sphinx = '1.0'
33 | # Add any Sphinx extension module names here, as strings. They can be
34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
35 | # ones.
36 | extensions = [ #'sphinx_gallery.gen_gallery',
37 | "sphinx.ext.autodoc",
38 | "sphinx.ext.autosummary",
39 | "sphinx.ext.viewcode",
40 | "sphinxcontrib.bibtex",
41 | "sphinx.ext.mathjax",
42 | "sphinx.ext.doctest",
43 | "sphinx.ext.intersphinx",
44 | "numpydoc",
45 | "matplotlib.sphinxext.plot_directive",
46 | "nbsphinx",
47 | ]
48 |
49 | bibtex_bibfiles = ["_static/references.bib"]
50 |
51 | # Add any paths that contain templates here, relative to this directory.
52 | templates_path = ["_templates"]
53 |
54 | # The suffix(es) of source filenames.
55 | # You can specify multiple suffix as a list of string:
56 | #
57 | # source_suffix = ['.rst', '.md']
58 | source_suffix = ".rst"
59 |
60 | # The master toctree document.
61 | master_doc = "index"
62 |
63 | # General information about the project.
64 | project = "mapclassify" # string of your project name, for example, 'giddy'
65 | copyright = "2018-, pysal developers"
66 | author = "pysal developers"
67 |
68 | # The version info for the project you're documenting, acts as replacement for
69 | # |version| and |release|, also used in various other places throughout the
70 | # built documents.
71 | #
72 | # The full version.
73 | version = mapclassify.__version__ # should replace it with your PACKAGE_NAME
74 | release = mapclassify.__version__ # should replace it with your PACKAGE_NAME
75 |
76 | # The language for content autogenerated by Sphinx. Refer to documentation
77 | # for a list of supported languages.
78 | #
79 | # This is also used if you do content translation via gettext catalogs.
80 | # Usually you set "language" from the command line for these cases.
81 | language = "en"
82 |
83 | # List of patterns, relative to source directory, that match files and
84 | # directories to ignore when looking for source files.
85 | # This patterns also effect to html_static_path and html_extra_path
86 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "tests/*"]
87 |
88 | # The name of the Pygments (syntax highlighting) style to use.
89 | pygments_style = "sphinx"
90 |
91 | # If true, `todo` and `todoList` produce output, else they produce nothing.
92 | todo_include_todos = False
93 |
94 | # -- Options for HTML output ----------------------------------------------
95 |
96 | # The theme to use for HTML and HTML Help pages. See the documentation for
97 | # a list of builtin themes.
98 | #
99 | # html_theme = 'alabaster'
100 | html_theme = "bootstrap"
101 | html_theme_path = sphinx_bootstrap_theme.get_html_theme_path()
102 | html_title = f"{project} v{version} Manual"
103 |
104 | # (Optional) Logo of your package.
105 | # Should be small enough to fit the navbar (ideally 24x24).
106 | # Path should be relative to the ``_static`` files directory.
107 | # html_logo = "_static/images/package_logo.jpg"
108 |
109 | # (Optional) PySAL favicon
110 | html_favicon = "_static/images/pysal_favicon.ico"
111 |
112 |
113 | # Theme options are theme-specific and customize the look and feel of a theme
114 | # further. For a list of options available for each theme, see the
115 | # documentation.
116 | #
117 | html_theme_options = {
118 | # Navigation bar title. (Default: ``project`` value)
119 | "navbar_title": project, # string of your project name, for example, 'giddy'
120 | # Render the next and previous page links in navbar. (Default: true)
121 | "navbar_sidebarrel": False,
122 | # Render the current pages TOC in the navbar. (Default: true)
123 | #'navbar_pagenav': True,
124 | #'navbar_pagenav': False,
125 | # No sidebar
126 | "nosidebar": True,
127 | # Tab name for the current pages TOC. (Default: "Page")
128 | #'navbar_pagenav_name': "Page",
129 | # Global TOC depth for "site" navbar tab. (Default: 1)
130 | # Switching to -1 shows all levels.
131 | "globaltoc_depth": 2,
132 | # Include hidden TOCs in Site navbar?
133 | #
134 | # Note: If this is "false", you cannot have mixed ``:hidden:`` and
135 | # non-hidden ``toctree`` directives in the same page, or else the build
136 | # will break.
137 | #
138 | # Values: "true" (default) or "false"
139 | "globaltoc_includehidden": "true",
140 | # HTML navbar class (Default: "navbar") to attach to
element.
141 | # For black navbar, do "navbar navbar-inverse"
142 | #'navbar_class': "navbar navbar-inverse",
143 | # Fix navigation bar to top of page?
144 | # Values: "true" (default) or "false"
145 | "navbar_fixed_top": "true",
146 | # Location of link to source.
147 | # Options are "nav" (default), "footer" or anything else to exclude.
148 | "source_link_position": "footer",
149 | # Bootswatch (http://bootswatch.com/) theme.
150 | #
151 | # Options are nothing (default) or the name of a valid theme
152 | # such as "amelia" or "cosmo", "yeti", "flatly".
153 | "bootswatch_theme": "yeti",
154 | # Choose Bootstrap version.
155 | # Values: "3" (default) or "2" (in quotes)
156 | "bootstrap_version": "3",
157 | # Navigation bar menu
158 | "navbar_links": [
159 | ("Installation", "installation"),
160 | ("Tutorial", "tutorial"),
161 | ("API", "api"),
162 | ("References", "references"),
163 | ],
164 | }
165 |
166 | # Add any paths that contain custom static files (such as style sheets) here,
167 | # relative to this directory. They are copied after the builtin static files,
168 | # so a file named "default.css" will overwrite the builtin "default.css".
169 | html_static_path = ["_static"]
170 |
171 | # Custom sidebar templates, maps document names to template names.
172 | # html_sidebars = {}
173 | # html_sidebars = {'sidebar': ['localtoc.html', 'sourcelink.html', 'searchbox.html']}
174 |
175 | # -- Options for HTMLHelp output ------------------------------------------
176 |
177 | # Output file base name for HTML help builder.
178 | htmlhelp_basename = project + "doc"
179 |
180 |
181 | # -- Options for LaTeX output ---------------------------------------------
182 |
183 | latex_elements = {
184 | # The paper size ('letterpaper' or 'a4paper').
185 | #
186 | # 'papersize': 'letterpaper',
187 | # The font size ('10pt', '11pt' or '12pt').
188 | #
189 | # 'pointsize': '10pt',
190 | # Additional stuff for the LaTeX preamble.
191 | #
192 | # 'preamble': '',
193 | # Latex figure (float) alignment
194 | #
195 | # 'figure_align': 'htbp',
196 | }
197 |
198 | # Grouping the document tree into LaTeX files. List of tuples
199 | # (source start file, target name, title,
200 | # author, documentclass [howto, manual, or own class]).
201 | latex_documents = [
202 | (
203 | master_doc,
204 | f"{project}.tex",
205 | f"{project} Documentation",
206 | "pysal developers",
207 | "manual",
208 | ),
209 | ]
210 |
211 |
212 | # -- Options for manual page output ---------------------------------------
213 |
214 | # One entry per manual page. List of tuples
215 | # (source start file, name, description, authors, manual section).
216 | man_pages = [(master_doc, project, f"{project} Documentation", [author], 1)]
217 |
218 |
219 | # -- Options for Texinfo output -------------------------------------------
220 |
221 | # Grouping the document tree into Texinfo files. List of tuples
222 | # (source start file, target name, title, author,
223 | # dir menu entry, description, category)
224 | texinfo_documents = [
225 | (
226 | master_doc,
227 | project,
228 | f"{project} Documentation",
229 | author,
230 | "PySAL Developers",
231 | "map classification schemes.",
232 | "Miscellaneous",
233 | ),
234 | ]
235 |
236 |
237 | # -----------------------------------------------------------------------------
238 | # Autosummary
239 | # -----------------------------------------------------------------------------
240 |
241 | # Generate the API documentation when building
242 | autosummary_generate = True
243 |
244 | # avoid showing members twice
245 | numpydoc_show_class_members = False
246 | numpydoc_use_plots = True
247 | class_members_toctree = True
248 | numpydoc_show_inherited_class_members = True
249 | numpydoc_xref_param_type = True
250 |
251 | # automatically document class members
252 | autodoc_default_options = {"members": True, "undoc-members": True}
253 |
254 | # display the source code for Plot directive
255 | plot_include_source = True
256 |
257 |
258 | def setup(app):
259 | app.add_css_file("pysal-styles.css")
260 |
261 |
262 | # Example configuration for intersphinx: refer to the Python standard library.
263 | intersphinx_mapping = {
264 | "geopandas": ("https://geopandas.org/en/latest/", None),
265 | "libpysal": ("https://pysal.org/libpysal/", None),
266 | "matplotlib": ("https://matplotlib.org/stable/", None),
267 | "networkx": ("https://networkx.org/documentation/stable/", None),
268 | "numpy": ("https://numpy.org/doc/stable/", None),
269 | "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
270 | "python": ("https://docs.python.org/3.13/", None),
271 | "scipy": ("https://docs.scipy.org/doc/scipy/", None),
272 | }
273 |
274 |
275 | # This is processed by Jinja2 and inserted before each notebook
276 | nbsphinx_prolog = r"""
277 | {% set docname = env.doc2path(env.docname, base=None) %}
278 | .. only:: html
279 |
280 | .. role:: raw-html(raw)
281 | :format: html
282 |
283 | .. nbinfo::
284 |
285 | This page was generated from `{{ docname }}`__.
286 | Interactive online version:
287 | :raw-html:`

`
288 |
289 | __ https://github.com/pysal/mapclassify/blob/main/{{ docname }}
290 | .. raw:: latex
291 |
292 | \nbsphinxstartnotebook{\scriptsize\noindent\strut
293 | \textcolor{gray}{The following section was generated from
294 | \sphinxcode{\sphinxupquote{\strut {{ docname | escape_latex }}}} \dotfill}}
295 | """ # noqa: E501
296 |
297 | # This is processed by Jinja2 and inserted after each notebook
298 | nbsphinx_epilog = r"""
299 | .. raw:: latex
300 |
301 | \nbsphinxstopnotebook{\scriptsize\noindent\strut
302 | \textcolor{gray}{\dotfill\ \sphinxcode{\sphinxupquote{\strut
303 | {{ env.doc2path(env.docname, base='doc') | escape_latex }}}} ends here.}}
304 | """
305 |
306 | # List of arguments to be passed to the kernel that executes the notebooks:
307 | nbsphinx_execute_arguments = [
308 | "--InlineBackend.figure_formats={'svg', 'pdf'}",
309 | "--InlineBackend.rc={'figure.dpi': 96}",
310 | ]
311 |
312 |
313 | mathjax3_config = {
314 | "TeX": {"equationNumbers": {"autoNumber": "AMS", "useLabelIds": True}},
315 | }
316 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. documentation master file
2 |
3 | mapclassify
4 | ===========
5 |
6 | mapclassify is an open-source python library for Choropleth map classification. It is part of `PySAL`_ the Python Spatial Analysis Library.
7 |
8 | .. raw:: html
9 |
10 |
11 |
12 |
13 |
14 |
22 |
23 |
24 |
32 |
33 |
34 |
43 |
44 |
45 |
46 |
47 |
48 |
49 | .. toctree::
50 | :hidden:
51 | :maxdepth: 3
52 | :caption: Contents:
53 |
54 | Installation
55 | Tutorial
56 | API
57 | References
58 |
59 |
60 | .. _PySAL: https://github.com/pysal/pysal
61 |
--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
1 | .. Installation
2 |
3 | Installation
4 | ============
5 |
6 | mapclassify supports python `3.9`_+. Please make sure that you are
7 | operating in a python 3 environment.
8 |
9 | Installing released version
10 | ---------------------------
11 |
12 | mapclassify is available in on `conda`_ via the `conda-forge`_ channel::
13 |
14 | conda install -c conda-forge mapclassify
15 |
16 |
17 | mapclassify is also available on the `Python Package Index`_. Therefore, you can either
18 | install directly with `pip` from the command line::
19 |
20 | pip install -U mapclassify
21 |
22 |
23 | or download the source distribution (.tar.gz) and decompress it to your selected
24 | destination. Open a command shell and navigate to the decompressed folder.
25 | Type::
26 |
27 | pip install .
28 |
29 | Installing development version
30 | ------------------------------
31 |
32 | Potentially, you might want to use the newest features in the development
33 | version of mapclassify on github - `pysal/mapclassify`_ while have not been incorporated
34 | in the Pypi released version. You can achieve that by installing `pysal/mapclassify`_
35 | by running the following from a command shell::
36 |
37 | pip install git+https://github.com/pysal/mapclassify.git
38 |
39 | You can also `fork`_ the `pysal/mapclassify`_ repo and create a local clone of
40 | your fork. By making changes
41 | to your local clone and submitting a pull request to `pysal/mapclassify`_, you can
42 | contribute to mapclassify development.
43 |
44 | .. _3.9: https://docs.python.org/3.9/
45 | .. _conda: https://docs.conda.io/en/latest/
46 | .. _conda-forge: https://anaconda.org/conda-forge/mapclassify
47 | .. _Python Package Index: https://pypi.org/project/mapclassify/
48 | .. _pysal/mapclassify: https://github.com/pysal/mapclassify
49 | .. _fork: https://help.github.com/articles/fork-a-repo/
50 |
--------------------------------------------------------------------------------
/docs/references.rst:
--------------------------------------------------------------------------------
1 | .. reference for the docs
2 |
3 | References
4 | ==========
5 |
6 | .. bibliography:: _static/references.bib
7 | :all:
8 |
--------------------------------------------------------------------------------
/docs/tutorial.rst:
--------------------------------------------------------------------------------
1 | Tutorial
2 | ========
3 |
4 | .. toctree::
5 | :maxdepth: 1
6 | :caption: Contents:
7 |
8 | notebooks/01_maximum_breaks.ipynb
9 | notebooks/02_legends.ipynb
10 | notebooks/03_choropleth.ipynb
11 | notebooks/04_pooled.ipynb
12 | notebooks/05_Greedy_coloring.ipynb
13 | notebooks/06_api.ipynb
14 |
15 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | # Run `conda-env create -f environment.yml`
2 | name: mapclassify
3 | channels:
4 | - conda-forge
5 | dependencies:
6 | - python
7 | - geodatasets
8 | - geopandas
9 | - git
10 | - ipywidgets
11 | - jupyterlab
12 | - libpysal
13 | - lonboard
14 | - matplotlib
15 | - nbconvert
16 | - networkx
17 | - numba
18 | - palettable
19 | - pip
20 | - pyarrow
21 | - pydeck
22 | - scikit-learn
23 | - seaborn
24 | - shapely
25 | - pip:
26 | - git+https://github.com/pysal/mapclassify.git@main
27 |
--------------------------------------------------------------------------------
/mapclassify/__init__.py:
--------------------------------------------------------------------------------
1 | import contextlib
2 | from importlib.metadata import PackageNotFoundError, version
3 |
4 | from . import legendgram, util
5 | from ._classify_API import classify
6 | from .classifiers import (
7 | CLASSIFIERS,
8 | BoxPlot,
9 | EqualInterval,
10 | FisherJenks,
11 | FisherJenksSampled,
12 | HeadTailBreaks,
13 | JenksCaspall,
14 | JenksCaspallForced,
15 | JenksCaspallSampled,
16 | KClassifiers,
17 | MaximumBreaks,
18 | MaxP,
19 | NaturalBreaks,
20 | Percentiles,
21 | PrettyBreaks,
22 | Quantiles,
23 | StdMean,
24 | UserDefined,
25 | gadf,
26 | load_example,
27 | )
28 | from .greedy import greedy
29 | from .pooling import Pooled
30 |
31 | with contextlib.suppress(PackageNotFoundError):
32 | __version__ = version("mapclassify")
33 |
--------------------------------------------------------------------------------
/mapclassify/_classify_API.py:
--------------------------------------------------------------------------------
1 | from .classifiers import (
2 | BoxPlot,
3 | EqualInterval,
4 | FisherJenks,
5 | FisherJenksSampled,
6 | HeadTailBreaks,
7 | JenksCaspall,
8 | JenksCaspallForced,
9 | JenksCaspallSampled,
10 | MaximumBreaks,
11 | MaxP,
12 | NaturalBreaks,
13 | Percentiles,
14 | PrettyBreaks,
15 | Quantiles,
16 | StdMean,
17 | UserDefined,
18 | )
19 |
20 | __author__ = "Stefanie Lumnitz "
21 |
22 |
23 | _classifiers = {
24 | "boxplot": BoxPlot,
25 | "equalinterval": EqualInterval,
26 | "fisherjenks": FisherJenks,
27 | "fisherjenkssampled": FisherJenksSampled,
28 | "headtailbreaks": HeadTailBreaks,
29 | "jenkscaspall": JenksCaspall,
30 | "jenkscaspallforced": JenksCaspallForced,
31 | "jenkscaspallsampled": JenksCaspallSampled,
32 | "maxp": MaxP,
33 | "maximumbreaks": MaximumBreaks,
34 | "naturalbreaks": NaturalBreaks,
35 | "quantiles": Quantiles,
36 | "percentiles": Percentiles,
37 | "prettybreaks": PrettyBreaks,
38 | "stdmean": StdMean,
39 | "userdefined": UserDefined,
40 | }
41 |
42 |
43 | def classify(
44 | y,
45 | scheme,
46 | k=5,
47 | pct=[1, 10, 50, 90, 99, 100],
48 | pct_sampled=0.10,
49 | truncate=True,
50 | hinge=1.5,
51 | multiples=[-2, -1, 1, 2],
52 | mindiff=0,
53 | initial=100,
54 | bins=None,
55 | lowest=None,
56 | anchor=False,
57 | ):
58 | """
59 |
60 | Classify your data with ``mapclassify.classify``.
61 | Input parameters are dependent on classifier used.
62 |
63 | Parameters
64 | ----------
65 |
66 | y : numpy.array
67 | :math:`(n,1)`, values to classify.
68 | scheme : str
69 | ``pysal.mapclassify`` classification scheme.
70 | k : int (default 5)
71 | The number of classes.
72 | pct : numpy.array (default [1, 10, 50, 90, 99, 100])
73 | Percentiles used for classification with ``percentiles``.
74 | pct_sampled : float default (0.10)
75 | The percentage of n that should form the sample
76 | (``JenksCaspallSampled``, ``FisherJenksSampled``)
77 | If ``pct`` is specified such that ``n*pct > 1000``, then ``pct=1000``.
78 | truncate : bool (default True)
79 | Truncate ``pct_sampled`` in cases where ``pct * n > 1000``.
80 | hinge : float (default 1.5)
81 | Multiplier for *IQR* when ``BoxPlot`` classifier used.
82 | multiples : numpy.array (default [-2,-1,1,2])
83 | The multiples of the standard deviation to add/subtract from
84 | the sample mean to define the bins using ``std_mean``.
85 | mindiff : float (default is 0)
86 | The minimum difference between class breaks
87 | if using ``maximum_breaks`` classifier.
88 | initial : int (default 100)
89 | Number of initial solutions to generate or number of runs when using
90 | ``natural_breaks`` or ``max_p_classifier``. Setting initial to ``0``
91 | will result in the quickest calculation of bins.
92 | bins : numpy.array (default None)
93 | :math:`(k,1)`, upper bounds of classes (have to be monotically
94 | increasing) if using ``user_defined`` classifier.
95 | Default is ``None``. For example: ``[20, max(y)]``.
96 | lowest : float (default None)
97 | Scalar minimum value of lowest class. Default is to set the minimum
98 | to ``-inf`` if ``y.min()`` > first upper bound (which will override
99 | the default), otherwise minimum is set to ``y.min()``.
100 | anchor : bool (default False)
101 | Anchor upper bound of one class to the sample mean.
102 |
103 |
104 |
105 | Returns
106 | -------
107 | classifier : mapclassify.classifiers.MapClassifier
108 | Object containing bin ids for each observation (``.yb``),
109 | upper bounds of each class (``.bins``), number of classes (``.k``)
110 | and number of observations falling in each class (``.counts``).
111 |
112 | Notes
113 | -----
114 |
115 | Supported classifiers include:
116 |
117 | * ``quantiles``
118 | * ``boxplot``
119 | * ``equalinterval``
120 | * ``fisherjenks``
121 | * ``fisherjenkssampled``
122 | * ``headtailbreaks``
123 | * ``jenkscaspall``
124 | * ``jenkscaspallsampled``
125 | * ``jenks_caspallforced``
126 | * ``maxp``
127 | * ``maximumbreaks``
128 | * ``naturalbreaks``
129 | * ``percentiles``
130 | * ``prettybreaks``
131 | * ``stdmean``
132 | * ``userdefined``
133 |
134 | Examples
135 | --------
136 |
137 | >>> import libpysal
138 | >>> import geopandas
139 | >>> from mapclassify import classify
140 |
141 | Load example data.
142 |
143 | >>> link_to_data = libpysal.examples.get_path("columbus.shp")
144 | >>> gdf = geopandas.read_file(link_to_data)
145 | >>> x = gdf['HOVAL'].values
146 |
147 | Classify values by quantiles.
148 |
149 | >>> quantiles = classify(x, "quantiles")
150 |
151 | Classify values by box_plot and set hinge to ``2``.
152 |
153 | >>> box_plot = classify(x, 'box_plot', hinge=2)
154 | >>> box_plot
155 | BoxPlot
156 |
157 | Interval Count
158 | ----------------------
159 | ( -inf, -9.50] | 0
160 | (-9.50, 25.70] | 13
161 | (25.70, 33.50] | 12
162 | (33.50, 43.30] | 12
163 | (43.30, 78.50] | 9
164 | (78.50, 96.40] | 3
165 |
166 | """
167 |
168 | # reformat
169 | scheme_lower = scheme.lower()
170 | scheme = scheme_lower.replace("_", "")
171 |
172 | # check if scheme is a valid scheme
173 | if scheme not in _classifiers:
174 | raise ValueError(
175 | f"Invalid scheme: '{scheme}'\n"
176 | f"Scheme must be in the set: {_classifiers.keys()}"
177 | )
178 |
179 | elif scheme == "boxplot":
180 | classifier = _classifiers[scheme](y, hinge)
181 |
182 | elif scheme == "fisherjenkssampled":
183 | classifier = _classifiers[scheme](y, k, pct_sampled, truncate)
184 |
185 | elif scheme == "headtailbreaks":
186 | classifier = _classifiers[scheme](y)
187 |
188 | elif scheme == "percentiles":
189 | classifier = _classifiers[scheme](y, pct)
190 |
191 | elif scheme == "stdmean":
192 | classifier = _classifiers[scheme](y, multiples, anchor)
193 |
194 | elif scheme == "jenkscaspallsampled":
195 | classifier = _classifiers[scheme](y, k, pct_sampled)
196 |
197 | elif scheme == "maximumbreaks":
198 | classifier = _classifiers[scheme](y, k, mindiff)
199 |
200 | elif scheme in ["naturalbreaks", "maxp"]:
201 | classifier = _classifiers[scheme](y, k, initial)
202 |
203 | elif scheme == "userdefined":
204 | classifier = _classifiers[scheme](y, bins, lowest)
205 |
206 | elif scheme in [
207 | "equalinterval",
208 | "fisherjenks",
209 | "jenkscaspall",
210 | "jenkscaspallforced",
211 | "quantiles",
212 | "prettybreaks",
213 | ]:
214 | classifier = _classifiers[scheme](y, k)
215 |
216 | return classifier
217 |
--------------------------------------------------------------------------------
/mapclassify/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Datasets module
3 | """
4 |
5 | from . import calemp
6 |
--------------------------------------------------------------------------------
/mapclassify/datasets/calemp/README.md:
--------------------------------------------------------------------------------
1 | calemp
2 | ======
3 |
4 | Employment density for California counties
5 | ------------------------------------------
6 |
7 | * calempdensity.csv: data on employment and employment density in California
8 | counties.
9 |
10 | Polygon data, n=58, k=11.
11 |
12 | Source: Anselin, L. and S.J. Rey (in progress) Spatial Econometrics: Foundations.
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/mapclassify/datasets/calemp/__init__.py:
--------------------------------------------------------------------------------
1 | from .data import *
2 |
--------------------------------------------------------------------------------
/mapclassify/datasets/calemp/calempdensity.csv:
--------------------------------------------------------------------------------
1 | "Geographic Area","Geographic Area","Geographic Name","GEONAME","GEOCOMP","STATE","Number of Employees for All Sectors","Number of employees","Class Number","sq. km","emp/sq km"
2 | "05000US06001","06001","Alameda County, California","Alameda County, California","00","06",630171,630171,5,1910.1,329.92
3 | "05000US06003","06003","Alpine County, California","Alpine County, California","00","06",813,813,1,1913.1,0.42
4 | "05000US06005","06005","Amador County, California","Amador County, California","00","06",9061,9061,2,1534.7,5.9
5 | "05000US06007","06007","Butte County, California","Butte County, California","00","06",59578,59578,3,4246.6,14.03
6 | "05000US06009","06009","Calaveras County, California","Calaveras County, California","00","06",7344,7344,2,2642.3,2.78
7 | "05000US06011","06011","Colusa County, California","Colusa County, California","00","06",4000,4000,1,2980.5,1.34
8 | "05000US06013","06013","Contra Costa County, California","Contra Costa County, California","00","06",338156,338156,5,1865.5,181.27
9 | "05000US06015","06015","Del Norte County, California","Del Norte County, California","00","06",4303,4303,1,2610.4,1.65
10 | "05000US06017","06017","El Dorado County, California","El Dorado County, California","00","06",44477,44477,3,4432.8,10.03
11 | "05000US06019","06019","Fresno County, California","Fresno County, California","00","06",257975,257975,4,15444.7,16.7
12 | "05000US06021","06021","Glenn County, California","Glenn County, California","00","06",4487,4487,1,3405.5,1.32
13 | "05000US06023","06023","Humboldt County, California","Humboldt County, California","00","06",36962,36962,3,9253.5,3.99
14 | "05000US06025","06025","Imperial County, California","Imperial County, California","00","06",34156,34156,3,10813.4,3.16
15 | "05000US06027","06027","Inyo County, California","Inyo County, California","00","06",5820,5820,1,26397.5,0.22
16 | "05000US06029","06029","Kern County, California","Kern County, California","00","06",183412,183412,4,21086.8,8.7
17 | "05000US06031","06031","Kings County, California","Kings County, California","00","06",23610,23610,2,3598.8,6.56
18 | "05000US06033","06033","Lake County, California","Lake County, California","00","06",10648,10648,2,3259.4,3.27
19 | "05000US06035","06035","Lassen County, California","Lassen County, California","00","06",3860,3860,1,11803.9,0.33
20 | "05000US06037","06037","Los Angeles County, California","Los Angeles County, California","00","06",3895886,3895886,5,10515.3,370.5
21 | "05000US06039","06039","Madera County, California","Madera County, California","00","06",24957,24957,2,5538.5,4.51
22 | "05000US06041","06041","Marin County, California","Marin County, California","00","06",101358,101358,4,1346.2,75.29
23 | "05000US06043","06043","Mariposa County, California","Mariposa County, California","00","06",3739,3739,1,3758.6,0.99
24 | "05000US06045","06045","Mendocino County, California","Mendocino County, California","00","06",24898,24898,2,9089,2.74
25 | "05000US06047","06047","Merced County, California","Merced County, California","00","06",43369,43369,3,4995.8,8.68
26 | "05000US06049","06049","Modoc County, California","Modoc County, California","00","06",1467,1467,1,10215.9,0.14
27 | "05000US06051","06051","Mono County, California","Mono County, California","00","06",7289,7289,1,7885.2,0.92
28 | "05000US06053","06053","Monterey County, California","Monterey County, California","00","06",108660,108660,4,8603.8,12.63
29 | "05000US06055","06055","Napa County, California","Napa County, California","00","06",56029,56029,3,1952.5,28.7
30 | "05000US06057","06057","Nevada County, California","Nevada County, California","00","06",29805,29805,3,2480.3,12.02
31 | "05000US06059","06059","Orange County, California","Orange County, California","00","06",1478452,1478452,5,2045.3,722.85
32 | "05000US06061","06061","Placer County, California","Placer County, California","00","06",133427,133427,4,3637.4,36.68
33 | "05000US06063","06063","Plumas County, California","Plumas County, California","00","06",4863,4863,1,6614.8,0.74
34 | "05000US06065","06065","Riverside County, California","Riverside County, California","00","06",556789,556789,5,18669.1,29.82
35 | "05000US06067","06067","Sacramento County, California","Sacramento County, California","00","06",480346,480346,5,2501.1,192.05
36 | "05000US06069","06069","San Benito County, California","San Benito County, California","00","06",12163,12163,2,3597.9,3.38
37 | "05000US06071","06071","San Bernardino County, California","San Bernardino County, California","00","06",579135,579135,5,51961.2,11.15
38 | "05000US06073","06073","San Diego County, California","San Diego County, California","00","06",1205862,1205862,5,10889.6,110.74
39 | "05000US06075","06075","San Francisco County, California","San Francisco County, California","00","06",497485,497485,5,121,4111.45
40 | "05000US06077","06077","San Joaquin County, California","San Joaquin County, California","00","06",179276,179276,4,3624.1,49.47
41 | "05000US06079","06079","San Luis Obispo County, California","San Luis Obispo County, California","00","06",88413,88413,3,8558.7,10.33
42 | "05000US06081","06081","San Mateo County, California","San Mateo County, California","00","06",368859,368859,5,1163.2,317.11
43 | "05000US06083","06083","Santa Barbara County, California","Santa Barbara County, California","00","06",145202,145202,4,7092.6,20.47
44 | "05000US06085","06085","Santa Clara County, California","Santa Clara County, California","00","06",886011,886011,5,3344.3,264.93
45 | "05000US06087","06087","Santa Cruz County, California","Santa Cruz County, California","00","06",76488,76488,3,1154.3,66.26
46 | "05000US06089","06089","Shasta County, California","Shasta County, California","00","06",52804,52804,3,9804.8,5.39
47 | "05000US06091","06091","Sierra County, California","Sierra County, California","00","06",324,324,1,2469.4,0.13
48 | "05000US06093","06093","Siskiyou County, California","Siskiyou County, California","00","06",9992,9992,2,16284,0.61
49 | "05000US06095","06095","Solano County, California","Solano County, California","00","06",108653,108653,4,2145,50.65
50 | "05000US06097","06097","Sonoma County, California","Sonoma County, California","00","06",165261,165261,4,4082.4,40.48
51 | "05000US06099","06099","Stanislaus County, California","Stanislaus County, California","00","06",141928,141928,4,3870.9,36.67
52 | "05000US06101","06101","Sutter County, California","Sutter County, California","00","06",20430,20430,2,1561,13.09
53 | "05000US06103","06103","Tehama County, California","Tehama County, California","00","06",13809,13809,2,7643.2,1.81
54 | "05000US06105","06105","Trinity County, California","Trinity County, California","00","06",1668,1668,1,8233.3,0.2
55 | "05000US06107","06107","Tulare County, California","Tulare County, California","00","06",94949,94949,4,12495,7.6
56 | "05000US06109","06109","Tuolumne County, California","Tuolumne County, California","00","06",14519,14519,2,5790.3,2.51
57 | "05000US06111","06111","Ventura County, California","Ventura County, California","00","06",273745,273745,5,4781,57.26
58 | "05000US06113","06113","Yolo County, California","Yolo County, California","00","06",63769,63769,3,2622.2,24.32
59 | "05000US06115","06115","Yuba County, California","Yuba County, California","00","06",11374,11374,2,1632.9,6.97
60 |
--------------------------------------------------------------------------------
/mapclassify/datasets/calemp/data.py:
--------------------------------------------------------------------------------
1 | from os.path import abspath, dirname
2 |
3 | import pandas as pd
4 |
5 |
6 | def load():
7 | """
8 | Load the data and return a DataSeries instance.
9 |
10 | """
11 |
12 | df = _get_data()
13 |
14 | return df["emp/sq km"]
15 |
16 |
17 | def _get_data():
18 | filepath = dirname(abspath(__file__))
19 | filepath += "/calempdensity.csv"
20 | df = pd.read_csv(filepath)
21 | return df
22 |
--------------------------------------------------------------------------------
/mapclassify/greedy.py:
--------------------------------------------------------------------------------
1 | """
2 | greedy - Greedy (topological) coloring for GeoPandas
3 |
4 | Copyright (C) 2019 Martin Fleischmann, 2017 Nyall Dawson
5 |
6 | """
7 |
8 | import operator
9 |
10 | __all__ = ["greedy"]
11 |
12 |
13 | def _balanced(features, sw, balance="count", min_colors=4):
14 | """
15 | Strategy to color features in a way which is visually balanced.
16 |
17 | Algorithm ported from QGIS to be used with GeoDataFrames
18 | and libpysal weights objects.
19 |
20 | Original algorithm:
21 | Date : February 2017
22 | Copyright : (C) 2017 by Nyall Dawson
23 | Email : nyall dot dawson at gmail dot com
24 |
25 | Parameters
26 | ----------
27 |
28 | features : geopandas.GeoDataFrame
29 | GeoDataFrame.
30 | sw : libpysal.weights.W
31 | Spatial weights object denoting adjacency of features.
32 | balance : str (default 'count')
33 | The method of color balancing.
34 | min_colors : int (default 4)
35 | The minimal number of colors to be used.
36 |
37 | Returns
38 | -------
39 |
40 | feature_colors : dict
41 | Dictionary with assigned color codes.
42 |
43 | """
44 | feature_colors = {}
45 | # start with minimum number of colors in pool
46 | color_pool = set(range(min_colors))
47 |
48 | # calculate count of neighbours
49 | neighbour_count = sw.cardinalities
50 |
51 | # sort features by neighbour count - handle those with more neighbours first
52 | sorted_by_count = sorted(
53 | neighbour_count.items(), key=operator.itemgetter(1), reverse=True
54 | )
55 |
56 | # counts for each color already assigned
57 | color_counts = {}
58 | color_areas = {}
59 | for c in color_pool:
60 | color_counts[c] = 0
61 | color_areas[c] = 0
62 |
63 | if balance == "centroid":
64 | features = features.copy()
65 | features.geometry = features.geometry.centroid
66 | balance = "distance"
67 |
68 | for feature_id, _ in sorted_by_count:
69 | # first work out which already assigned colors are adjacent to this feature
70 | adjacent_colors = set()
71 | for neighbour in sw.neighbors[feature_id]:
72 | if neighbour in feature_colors:
73 | adjacent_colors.add(feature_colors[neighbour])
74 |
75 | # from the existing colors, work out which are available (ie non-adjacent)
76 | available_colors = color_pool.difference(adjacent_colors)
77 |
78 | feature_color = -1
79 | if len(available_colors) == 0:
80 | # no existing colors available for this feature; add new color and repeat
81 | min_colors += 1
82 | return _balanced(features, sw, balance, min_colors)
83 | else:
84 | if balance == "count":
85 | # choose least used available color
86 | counts = [
87 | (c, v) for c, v in color_counts.items() if c in available_colors
88 | ]
89 | feature_color = sorted(counts, key=operator.itemgetter(1))[0][0]
90 | color_counts[feature_color] += 1
91 | elif balance == "area":
92 | areas = [
93 | (c, v) for c, v in color_areas.items() if c in available_colors
94 | ]
95 | feature_color = sorted(areas, key=operator.itemgetter(1))[0][0]
96 | color_areas[feature_color] += features.loc[feature_id].geometry.area
97 |
98 | elif balance == "distance":
99 | min_distances = {c: float("inf") for c in available_colors}
100 | this_feature = features.loc[feature_id].geometry
101 |
102 | # find features for all available colors
103 | other_features = {
104 | f_id: c
105 | for (f_id, c) in feature_colors.items()
106 | if c in available_colors
107 | }
108 |
109 | distances = features.loc[other_features.keys()].distance(this_feature)
110 | # calculate the min distance from this feature to the nearest
111 | # feature with each assigned color
112 | for other_feature_id, c in other_features.items():
113 | distance = distances.loc[other_feature_id]
114 | if distance < min_distances[c]:
115 | min_distances[c] = distance
116 |
117 | # choose color such that min distance is maximised!
118 | # - ie we want MAXIMAL separation between features with the same color
119 | feature_color = sorted(
120 | min_distances, key=min_distances.__getitem__, reverse=True
121 | )[0]
122 |
123 | feature_colors[feature_id] = feature_color
124 |
125 | return feature_colors
126 |
127 |
128 | def greedy(
129 | gdf,
130 | strategy="balanced",
131 | balance="count",
132 | min_colors=4,
133 | sw="queen",
134 | min_distance=None,
135 | silence_warnings=True,
136 | interchange=False,
137 | ):
138 | """
139 | Color GeoDataFrame using various strategies of greedy (topological) colouring.
140 |
141 | Attempts to color a GeoDataFrame using as few colors as possible, where no
142 | neighbours can have same color as the feature itself. Offers various strategies
143 | ported from QGIS or implemented within NetworkX for greedy graph coloring.
144 |
145 | ``greedy`` will return ``pandas.Series`` representing assigned color codes.
146 |
147 | Parameters
148 | ----------
149 |
150 | gdf : GeoDataFrame
151 | GeoDataFrame
152 | strategy : str (default 'balanced')
153 | Determine coloring strategy. Options are ``'balanced'`` for
154 | algorithm based on QGIS Topological coloring. It is aiming
155 | for a visual balance, defined by the balance parameter. Other
156 | options are those supported by ``networkx.greedy_color``:
157 |
158 | * ``'largest_first'``
159 | * ``'random_sequential'``
160 | * ``'smallest_last'``
161 | * ``'independent_set'``
162 | * ``'connected_sequential_bfs'``
163 | * ``'connected_sequential_dfs'``
164 | * ``'connected_sequential'`` (alias for the previous strategy)
165 | * ``'saturation_largest_first'``
166 | * ``'DSATUR'`` (alias for the previous strategy)
167 |
168 | For details see https://networkx.github.io/documentation/stable/reference/algorithms/generated/networkx.algorithms.coloring.greedy_color.html
169 |
170 | balance : str (default 'count')
171 | If strategy is ``'balanced'``, determine the method of color balancing.
172 |
173 | * ``'count'`` attempts to balance the number of features per each color.
174 | * ``'area'`` attempts to balance the area covered by each color.
175 | * ``'centroid'`` attempts to balance the distance between colors based
176 | on the distance between centroids.
177 | * ``'distance'`` attempts to balance the distance between colors based
178 | on the distance between geometries. Slower than ``'centroid'``,
179 | but more precise.
180 |
181 | Both ``'centroid'`` and ``'distance'`` are significantly slower than other
182 | especially for larger GeoDataFrames. Apart from ``'count'``, all require
183 | CRS to be projected (not in degrees) to ensure metric values are correct.
184 | min_colors: int (default 4)
185 | If strategy is ``'balanced'``, define the minimal number of colors to be used.
186 | sw : 'queen', 'rook' or libpysal.weights.W (default 'queen')
187 | If min_distance is None, one can pass ``'libpysal.weights.W'``
188 | object denoting neighbors or let greedy generate one based on
189 | ``'queen'`` or ``'rook'`` contiguity.
190 | min_distance : float (default None)
191 | Set minimal distance between colors. If ``min_distance`` is not ``None``,
192 | slower algorithm for generating spatial weghts is used based on
193 | intersection between geometries. ``'min_distance'`` is then used as a
194 | tolerance of intersection.
195 | silence_warnings : bool (default True)
196 | Silence libpysal warnings when creating spatial weights.
197 | interchange : bool (default False)
198 | Use the color interchange algorithm (applicable for NetworkX strategies).
199 | For details see https://networkx.github.io/documentation/stable/reference/algorithms/generated/networkx.algorithms.coloring.greedy_color.html
200 |
201 | Returns
202 | -------
203 |
204 | color : pandas.Series
205 | ``pandas.Series`` representing assinged color codes.
206 |
207 | Examples
208 | --------
209 |
210 | >>> from mapclassify import greedy
211 | >>> import geopandas
212 | >>> world = geopandas.read_file(
213 | ... "https://naciscdn.org/naturalearth/110m/cultural/ne_110m_admin_0_countries.zip"
214 | ... )
215 | >>> africa = world.loc[world.CONTINENT == "Africa"].copy()
216 | >>> africa = africa.to_crs("ESRI:102022").reset_index(drop=True)
217 |
218 | Default:
219 |
220 | >>> africa["greedy_colors"] = greedy(africa)
221 | >>> africa["greedy_colors"].head()
222 | 0 1
223 | 1 0
224 | 2 0
225 | 3 1
226 | 4 4
227 | Name: greedy_colors, dtype: int64
228 |
229 | Balanced by area:
230 |
231 | >>> africa["balanced_area"] = greedy(africa, strategy="balanced", balance="area")
232 | >>> africa["balanced_area"].head()
233 | 0 1
234 | 1 2
235 | 2 0
236 | 3 1
237 | 4 3
238 | Name: balanced_area, dtype: int64
239 |
240 | Using rook adjacency:
241 |
242 | >>> africa["rook_adjacency"] = greedy(africa, sw="rook")
243 | >>> africa["rook_adjacency"].tail()
244 | 46 3
245 | 47 0
246 | 48 2
247 | 49 3
248 | 50 1
249 | Name: rook_adjacency, dtype: int64
250 |
251 | Adding minimal distance between colors:
252 |
253 | >>> africa["min_distance"] = greedy(africa, min_distance=1000000)
254 | >>> africa["min_distance"].head()
255 | 0 1
256 | 1 9
257 | 2 0
258 | 3 7
259 | 4 4
260 | Name: min_distance, dtype: int64
261 |
262 | Using different coloring strategy:
263 |
264 | >>> africa["smallest_last"] = greedy(africa, strategy="smallest_last")
265 | >>> africa["smallest_last"].head()
266 | 0 3
267 | 1 1
268 | 2 1
269 | 3 3
270 | 4 1
271 | Name: smallest_last, dtype: int64
272 |
273 | """ # noqa
274 |
275 | if strategy != "balanced":
276 | try:
277 | import networkx as nx
278 |
279 | STRATEGIES = nx.algorithms.coloring.greedy_coloring.STRATEGIES.keys()
280 |
281 | except ImportError:
282 | raise ImportError("The 'networkx' package is required.") from None
283 |
284 | try:
285 | import pandas as pd
286 | except ImportError:
287 | raise ImportError("The 'pandas' package is required.") from None
288 | try:
289 | from libpysal.weights import Queen, Rook, W, fuzzy_contiguity
290 | except ImportError:
291 | raise ImportError("The 'libpysal' package is required.") from None
292 |
293 | if min_distance is not None:
294 | sw = fuzzy_contiguity(
295 | gdf,
296 | tolerance=0.0,
297 | buffering=True,
298 | buffer=min_distance / 2.0,
299 | silence_warnings=silence_warnings,
300 | )
301 |
302 | if not isinstance(sw, W):
303 | if sw == "queen":
304 | sw = Queen.from_dataframe(
305 | gdf, silence_warnings=silence_warnings, use_index=False
306 | )
307 | elif sw == "rook":
308 | sw = Rook.from_dataframe(
309 | gdf, silence_warnings=silence_warnings, use_index=False
310 | )
311 |
312 | if strategy == "balanced":
313 | color = pd.Series(_balanced(gdf, sw, balance=balance, min_colors=min_colors))
314 |
315 | elif strategy in STRATEGIES:
316 | color = nx.greedy_color(
317 | sw.to_networkx(), strategy=strategy, interchange=interchange
318 | )
319 |
320 | else:
321 | raise ValueError(f"'{strategy}' is not a valid strategy.")
322 |
323 | color = pd.Series(color).sort_index()
324 | color.index = gdf.index
325 | return color
326 |
--------------------------------------------------------------------------------
/mapclassify/legendgram.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def _legendgram(
5 | classifier,
6 | *,
7 | ax=None,
8 | cmap="viridis",
9 | bins=50,
10 | inset=True,
11 | clip=None,
12 | vlines=False,
13 | vlinecolor="black",
14 | vlinewidth=1,
15 | loc="lower left",
16 | legend_size=("27%", "20%"),
17 | frameon=False,
18 | tick_params=None,
19 | bbox_to_anchor=None,
20 | **kwargs,
21 | ):
22 | """
23 | Add a histogram in a choropleth with colors aligned with map ...
24 |
25 | Arguments
26 | ---------
27 | ax : Axes
28 | ...
29 | loc : string or int
30 | valid legend location like that used in matplotlib.pyplot.legend. Valid
31 | locations are 'upper left', 'upper center', 'upper right', 'center left',
32 | 'center', 'center right', 'lower left', 'lower center', 'lower right'.
33 | legend_size : tuple
34 | tuple of floats or strings describing the (width, height) of the
35 | legend. If a float is provided, it is
36 | the size in inches, e.g. ``(1.3, 1)``. If a string is provided, it is
37 | the size in relative units, e.g. ``('40%', '20%')``. By default,
38 | i.e. if ``bbox_to_anchor`` is not specified, those are relative to
39 | the `ax`. Otherwise, they are to be understood relative to the
40 | bounding box provided via ``bbox_to_anchor``.
41 | frameon : bool (default: False)
42 | whether to add a frame to the legendgram
43 | tick_params : keyword dictionary
44 | options to control how the histogram axis gets ticked/labelled.
45 | bbox_to_anchor : tuple or ``matplotlib.trasforms.BboxBase``
46 | Bbox that the inset axes will be anchored to. If None, a tuple of
47 | ``(0, 0, 1, 1)`` is used. If a tuple, can be either
48 | ``[left, bottom, width, height]``, or ``[left, bottom]``. If the ``legend_size``
49 | is in relative units (%), the 2-tuple ``[left, bottom]`` cannot be used.
50 |
51 | Returns
52 | -------
53 | axis containing the legendgram.
54 | """
55 |
56 | try:
57 | import matplotlib.pyplot as plt
58 | from matplotlib import colormaps
59 | from mpl_toolkits.axes_grid1.inset_locator import inset_axes
60 | except ImportError as e:
61 | raise ImportError from e("you must have matplotlib ")
62 | if ax is None:
63 | f, ax = plt.subplots()
64 | else:
65 | f = ax.get_figure()
66 | k = len(classifier.bins)
67 | breaks = classifier.bins
68 | if inset:
69 | if not bbox_to_anchor:
70 | bbox_to_anchor = (0, 0, 1, 1)
71 | histpos = inset_axes(
72 | ax,
73 | loc=loc,
74 | width=legend_size[0],
75 | height=legend_size[1],
76 | bbox_to_anchor=bbox_to_anchor,
77 | bbox_transform=ax.transAxes,
78 | )
79 | histax = f.add_axes(histpos)
80 | else:
81 | histax = ax
82 | N, bins, patches = histax.hist(classifier.y, bins=bins, color="0.1", **kwargs)
83 | if isinstance(cmap, str):
84 | cmap = colormaps[cmap]
85 |
86 | colors = [cmap(i) for i in np.linspace(0, 1, k)]
87 |
88 | bucket_breaks = [0] + [np.searchsorted(bins, i) for i in breaks]
89 | for c in range(k):
90 | for b in range(bucket_breaks[c], bucket_breaks[c + 1]):
91 | patches[b].set_facecolor(colors[c])
92 | if clip is not None:
93 | histax.set_xlim(*clip)
94 | histax.set_frame_on(frameon)
95 | histax.get_yaxis().set_visible(False)
96 | if tick_params is None:
97 | tick_params = dict()
98 | if vlines:
99 | lim = histax.get_ylim()[1]
100 | # plot upper limit of each bin
101 | for i in classifier.bins:
102 | histax.vlines(i, 0, lim, color=vlinecolor, linewidth=vlinewidth)
103 | tick_params["labelsize"] = tick_params.get("labelsize", 12)
104 | histax.tick_params(**tick_params)
105 | return histax
106 |
--------------------------------------------------------------------------------
/mapclassify/pooling.py:
--------------------------------------------------------------------------------
1 | import numpy
2 |
3 | from .classifiers import (
4 | BoxPlot,
5 | EqualInterval,
6 | FisherJenks,
7 | FisherJenksSampled,
8 | MaximumBreaks,
9 | Quantiles,
10 | StdMean,
11 | UserDefined,
12 | )
13 |
14 | __all__ = ["Pooled"]
15 |
16 | dispatcher = {
17 | "boxplot": BoxPlot,
18 | "equalinterval": EqualInterval,
19 | "fisherjenks": FisherJenks,
20 | "fisherjenkssampled": FisherJenksSampled,
21 | "quantiles": Quantiles,
22 | "maximumbreaks": MaximumBreaks,
23 | "stdmean": StdMean,
24 | "userdefined": UserDefined,
25 | }
26 |
27 |
28 | class Pooled:
29 | """Applying global binning across columns.
30 |
31 | Parameters
32 | ----------
33 |
34 | Y : numpy.array
35 | :math:`(n, m)`, values to classify, with :math:`m>1`.
36 | classifier : str (default 'Quantiles')
37 | Name of ``mapclassify.classifier`` to apply.
38 | **kwargs : dict
39 | Additional keyword arguments for classifier.
40 |
41 | Attributes
42 | ----------
43 |
44 | global_classifier : mapclassify.classifiers.MapClassifier
45 | Instance of the pooled classifier defined as the classifier
46 | applied to the union of the columns.
47 | col_classifier : list
48 | Elements are ``MapClassifier`` instances with the pooled classifier
49 | applied to the associated column of ``Y``.
50 |
51 | Examples
52 | --------
53 |
54 | >>> import mapclassify
55 | >>> import numpy
56 | >>> n = 20
57 | >>> data = numpy.array([numpy.arange(n)+i*n for i in range(1,4)]).T
58 | >>> res = mapclassify.Pooled(data)
59 |
60 | >>> res.col_classifiers[0].counts.tolist()
61 | [12, 8, 0, 0, 0]
62 |
63 | >>> res.col_classifiers[1].counts.tolist()
64 | [0, 4, 12, 4, 0]
65 |
66 | >>> res.col_classifiers[2].counts.tolist()
67 | [0, 0, 0, 8, 12]
68 |
69 | >>> res.global_classifier.counts.tolist()
70 | [12, 12, 12, 12, 12]
71 |
72 | >>> res.global_classifier.bins == res.col_classifiers[0].bins
73 | array([ True, True, True, True, True])
74 |
75 | >>> res.global_classifier.bins
76 | array([31.8, 43.6, 55.4, 67.2, 79. ])
77 |
78 | """
79 |
80 | def __init__(self, Y, classifier="Quantiles", **kwargs):
81 | method = classifier.lower()
82 | valid_methods = list(dispatcher.keys())
83 | if method not in valid_methods:
84 | raise ValueError(
85 | f"'{classifier}' not a valid classifier. "
86 | f"Currently supported classifiers: {valid_methods}"
87 | )
88 |
89 | self.__dict__.update(kwargs)
90 | Y = numpy.asarray(Y)
91 | n, cols = Y.shape
92 | y = numpy.reshape(Y, (-1, 1), order="f")
93 | ymin = y.min()
94 | global_classifier = dispatcher[method](y, **kwargs)
95 | # self.k = global_classifier.k
96 | col_classifiers = []
97 | name = f"Pooled {classifier}"
98 | for c in range(cols):
99 | res = UserDefined(Y[:, c], bins=global_classifier.bins, lowest=ymin)
100 | res.name = name
101 | col_classifiers.append(res)
102 | self.col_classifiers = col_classifiers
103 | self.global_classifier = global_classifier
104 | self._summary()
105 |
106 | def _summary(self):
107 | self.classes = self.global_classifier.classes
108 | self.tss = self.global_classifier.tss
109 | self.adcm = self.global_classifier.adcm
110 | self.gadf = self.global_classifier.gadf
111 |
112 | def __str__(self):
113 | s = "Pooled Classifier"
114 | rows = [s]
115 | for c in self.col_classifiers:
116 | rows.append(c.table())
117 | return "\n\n".join(rows)
118 |
119 | def __repr__(self):
120 | return self.__str__()
121 |
--------------------------------------------------------------------------------
/mapclassify/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/mapclassify/tests/__init__.py
--------------------------------------------------------------------------------
/mapclassify/tests/baseline/test_histogram_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/mapclassify/tests/baseline/test_histogram_plot.png
--------------------------------------------------------------------------------
/mapclassify/tests/baseline/test_histogram_plot_despine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/mapclassify/tests/baseline/test_histogram_plot_despine.png
--------------------------------------------------------------------------------
/mapclassify/tests/baseline/test_histogram_plot_linewidth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/mapclassify/tests/baseline/test_histogram_plot_linewidth.png
--------------------------------------------------------------------------------
/mapclassify/tests/baseline_images/test_legendgram/legendgram_cmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/mapclassify/tests/baseline_images/test_legendgram/legendgram_cmap.png
--------------------------------------------------------------------------------
/mapclassify/tests/baseline_images/test_legendgram/legendgram_default.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/mapclassify/tests/baseline_images/test_legendgram/legendgram_default.png
--------------------------------------------------------------------------------
/mapclassify/tests/baseline_images/test_legendgram/legendgram_kwargs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/mapclassify/tests/baseline_images/test_legendgram/legendgram_kwargs.png
--------------------------------------------------------------------------------
/mapclassify/tests/baseline_images/test_legendgram/legendgram_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/mapclassify/tests/baseline_images/test_legendgram/legendgram_map.png
--------------------------------------------------------------------------------
/mapclassify/tests/baseline_images/test_legendgram/legendgram_position.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/mapclassify/tests/baseline_images/test_legendgram/legendgram_position.png
--------------------------------------------------------------------------------
/mapclassify/tests/baseline_images/test_legendgram/legendgram_quantiles.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/mapclassify/tests/baseline_images/test_legendgram/legendgram_quantiles.png
--------------------------------------------------------------------------------
/mapclassify/tests/baseline_images/test_legendgram/legendgram_vlines.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pysal/mapclassify/8ac07ba0ececafd744b9f5f35ae670209baf340c/mapclassify/tests/baseline_images/test_legendgram/legendgram_vlines.png
--------------------------------------------------------------------------------
/mapclassify/tests/test_classify.py:
--------------------------------------------------------------------------------
1 | import geopandas
2 | import libpysal
3 | import pytest
4 |
5 | import mapclassify
6 |
7 |
8 | def _assertions(a, b):
9 | assert a.k == b.k
10 | assert a.yb.all() == b.yb.all()
11 | assert a.bins.all() == b.bins.all()
12 | assert a.counts.all() == b.counts.all()
13 |
14 |
15 | class TestClassify:
16 | def setup_method(self):
17 | link_to_data = libpysal.examples.get_path("columbus.shp")
18 | gdf = geopandas.read_file(link_to_data)
19 | self.x = gdf["HOVAL"].values
20 |
21 | def test_box_plot(self):
22 | a = mapclassify.classify(self.x, "box_plot")
23 | b = mapclassify.BoxPlot(self.x)
24 | _assertions(a, b)
25 |
26 | def test_equal_interval(self):
27 | a = mapclassify.classify(self.x, "EqualInterval", k=3)
28 | b = mapclassify.EqualInterval(self.x, k=3)
29 | _assertions(a, b)
30 |
31 | def test_fisher_jenks(self):
32 | a = mapclassify.classify(self.x, "FisherJenks", k=3)
33 | b = mapclassify.FisherJenks(self.x, k=3)
34 | _assertions(a, b)
35 |
36 | def test_fisher_jenks_sampled(self):
37 | a = mapclassify.classify(
38 | self.x, "FisherJenksSampled", k=3, pct_sampled=0.5, truncate=False
39 | )
40 | b = mapclassify.FisherJenksSampled(self.x, k=3, pct=0.5, truncate=False)
41 | _assertions(a, b)
42 |
43 | def test_headtail_breaks(self):
44 | a = mapclassify.classify(self.x, "headtail_breaks")
45 | b = mapclassify.HeadTailBreaks(self.x)
46 | _assertions(a, b)
47 |
48 | def test_quantiles(self):
49 | a = mapclassify.classify(self.x, "quantiles", k=3)
50 | b = mapclassify.Quantiles(self.x, k=3)
51 | _assertions(a, b)
52 |
53 | def test_percentiles(self):
54 | a = mapclassify.classify(self.x, "percentiles", pct=[25, 50, 75, 100])
55 | b = mapclassify.Percentiles(self.x, pct=[25, 50, 75, 100])
56 | _assertions(a, b)
57 |
58 | a = mapclassify.classify(self.x, "prettybreaks")
59 | b = mapclassify.PrettyBreaks(self.x)
60 | _assertions(a, b)
61 |
62 | def test_jenks_caspall(self):
63 | a = mapclassify.classify(self.x, "JenksCaspall", k=3)
64 | b = mapclassify.JenksCaspall(self.x, k=3)
65 | _assertions(a, b)
66 |
67 | def test_jenks_caspall_forced(self):
68 | a = mapclassify.classify(self.x, "JenksCaspallForced", k=3)
69 | b = mapclassify.JenksCaspallForced(self.x, k=3)
70 | _assertions(a, b)
71 |
72 | def test_jenks_caspall_sampled(self):
73 | a = mapclassify.classify(self.x, "JenksCaspallSampled", pct_sampled=0.5)
74 | b = mapclassify.JenksCaspallSampled(self.x, pct=0.5)
75 | _assertions(a, b)
76 |
77 | def test_natural_breaks(self):
78 | a = mapclassify.classify(self.x, "natural_breaks")
79 | b = mapclassify.NaturalBreaks(self.x)
80 | _assertions(a, b)
81 |
82 | def test_max_p_classifier(self):
83 | a = mapclassify.classify(self.x, "max_p", k=3, initial=50)
84 | b = mapclassify.MaxP(self.x, k=3, initial=50)
85 | _assertions(a, b)
86 |
87 | def test_std_mean(self):
88 | a = mapclassify.classify(self.x, "std_mean", multiples=[-1, -0.5, 0.5, 1])
89 | b = mapclassify.StdMean(self.x, multiples=[-1, -0.5, 0.5, 1])
90 | _assertions(a, b)
91 |
92 | def test_user_defined(self):
93 | a = mapclassify.classify(self.x, "user_defined", bins=[20, max(self.x)])
94 | b = mapclassify.UserDefined(self.x, bins=[20, max(self.x)])
95 | _assertions(a, b)
96 |
97 | def test_bad_classifier(self):
98 | classifier = "George_Costanza"
99 | with pytest.raises(ValueError, match="Invalid scheme: 'georgecostanza'"):
100 | mapclassify.classify(self.x, classifier)
101 |
--------------------------------------------------------------------------------
/mapclassify/tests/test_greedy.py:
--------------------------------------------------------------------------------
1 | import geopandas
2 | import libpysal
3 | import pytest
4 |
5 | from ..greedy import greedy
6 |
7 | world = geopandas.read_file(
8 | "https://naciscdn.org/naturalearth/110m/cultural/ne_110m_admin_0_countries.zip"
9 | )
10 | sw = libpysal.weights.Queen.from_dataframe(
11 | world, ids=world.index.to_list(), silence_warnings=True
12 | )
13 |
14 |
15 | def _check_correctess(colors):
16 | assert len(colors) == len(world)
17 | for i, neighbors in sw.neighbors.items():
18 | if len(neighbors) > 1:
19 | assert (colors[neighbors] != colors[i]).all()
20 |
21 |
22 | @pytest.mark.filterwarnings("ignore:Geometry is in a geographic CRS.")
23 | class TestGreedy:
24 | def test_default(self):
25 | colors = greedy(world)
26 | assert set(colors) == {0, 1, 2, 3, 4}
27 | assert colors.value_counts().to_list() == [36, 36, 35, 35, 35]
28 | assert (colors.index == world.index).all()
29 | _check_correctess(colors)
30 |
31 | def test_rook(self):
32 | colors = greedy(world, sw="rook")
33 | assert set(colors) == {0, 1, 2, 3, 4}
34 | assert colors.value_counts().to_list() == [36, 36, 35, 35, 35]
35 | _check_correctess(colors)
36 |
37 | def test_sw(self):
38 | colors = greedy(world, sw=sw)
39 | assert set(colors) == {0, 1, 2, 3, 4}
40 | assert colors.value_counts().to_list() == [36, 36, 35, 35, 35]
41 | _check_correctess(colors)
42 |
43 | def test_min_distance(self):
44 | europe = world.loc[world.CONTINENT == "Europe"].to_crs(epsg=3035)
45 | colors = greedy(europe, min_distance=500000)
46 | assert set(colors) == set(range(13))
47 | assert colors.value_counts().to_list() == [3] * 13
48 |
49 | def test_invalid_strategy(self):
50 | strategy = "spice melange"
51 | with pytest.raises(ValueError, match=f"'{strategy}' is not a valid strategy."):
52 | greedy(world, strategy=strategy)
53 |
54 |
55 | @pytest.mark.filterwarnings("ignore:Geometry is in a geographic CRS.")
56 | @pytest.mark.parametrize("pysal_geos", [None, 0])
57 | class TestGreedyParams:
58 | def test_count(self, pysal_geos):
59 | colors = greedy(
60 | world, strategy="balanced", balance="count", min_distance=pysal_geos
61 | )
62 | assert set(colors) == {0, 1, 2, 3, 4}
63 | assert colors.value_counts().to_list() == [36, 36, 35, 35, 35]
64 | _check_correctess(colors)
65 |
66 | def test_area(self, pysal_geos):
67 | colors = greedy(
68 | world, strategy="balanced", balance="area", min_distance=pysal_geos
69 | )
70 | assert set(colors) == {0, 1, 2, 3, 4}
71 | assert colors.value_counts().to_list() == [55, 49, 39, 32, 2]
72 | _check_correctess(colors)
73 |
74 | def test_centroid(self, pysal_geos):
75 | colors = greedy(
76 | world, strategy="balanced", balance="centroid", min_distance=pysal_geos
77 | )
78 | assert set(colors) == {0, 1, 2, 3, 4}
79 | assert colors.value_counts().to_list() == [39, 36, 36, 34, 32]
80 | _check_correctess(colors)
81 |
82 | def test_distance(self, pysal_geos):
83 | colors = greedy(
84 | world, strategy="balanced", balance="distance", min_distance=pysal_geos
85 | )
86 | assert set(colors) == {0, 1, 2, 3, 4}
87 | assert colors.value_counts().to_list() == [38, 36, 35, 34, 34]
88 | _check_correctess(colors)
89 |
90 | def test_largest_first(self, pysal_geos):
91 | colors = greedy(world, strategy="largest_first", min_distance=pysal_geos)
92 | assert set(colors) == {0, 1, 2, 3, 4}
93 | assert colors.value_counts().to_list() == [64, 49, 42, 21, 1]
94 | _check_correctess(colors)
95 |
96 | def test_random_sequential(self, pysal_geos):
97 | """based on random, no consistent results to be tested"""
98 | colors = greedy(world, strategy="random_sequential", min_distance=pysal_geos)
99 | _check_correctess(colors)
100 |
101 | def test_smallest_last(self, pysal_geos):
102 | colors = greedy(world, strategy="smallest_last", min_distance=pysal_geos)
103 | assert set(colors) == {0, 1, 2, 3}
104 | assert colors.value_counts().to_list() == [71, 52, 39, 15]
105 | _check_correctess(colors)
106 |
107 | def test_independent_set(self, pysal_geos):
108 | colors = greedy(world, strategy="independent_set", min_distance=pysal_geos)
109 | assert set(colors) == {0, 1, 2, 3, 4}
110 | assert colors.value_counts().to_list() == [91, 42, 26, 13, 5]
111 | _check_correctess(colors)
112 |
113 | def test_connected_sequential_bfs(self, pysal_geos):
114 | colors = greedy(
115 | world, strategy="connected_sequential_bfs", min_distance=pysal_geos
116 | )
117 | assert set(colors) == {0, 1, 2, 3, 4}
118 | _check_correctess(colors)
119 |
120 | def test_connected_sequential_dfs(self, pysal_geos):
121 | colors = greedy(
122 | world, strategy="connected_sequential_dfs", min_distance=pysal_geos
123 | )
124 | assert set(colors) == {0, 1, 2, 3, 4}
125 | _check_correctess(colors)
126 |
127 | def test_connected_sequential(self, pysal_geos):
128 | colors = greedy(world, strategy="connected_sequential", min_distance=pysal_geos)
129 | assert set(colors) == {0, 1, 2, 3, 4}
130 | _check_correctess(colors)
131 |
132 | def test_saturation_largest_first(self, pysal_geos):
133 | colors = greedy(
134 | world, strategy="saturation_largest_first", min_distance=pysal_geos
135 | )
136 | assert set(colors) == {0, 1, 2, 3}
137 | assert colors.value_counts().to_list() == [71, 47, 42, 17]
138 | _check_correctess(colors)
139 |
140 | def test_DSATUR(self, pysal_geos):
141 | colors = greedy(world, strategy="DSATUR", min_distance=pysal_geos)
142 | assert set(colors) == {0, 1, 2, 3}
143 | assert colors.value_counts().to_list() == [71, 47, 42, 17]
144 | _check_correctess(colors)
145 |
146 | def test_index(self, pysal_geos):
147 | world["ten"] = world.index * 10
148 | reindexed = world.set_index("ten")
149 | colors = greedy(reindexed, min_distance=pysal_geos)
150 | assert len(colors) == len(world)
151 | assert set(colors) == {0, 1, 2, 3, 4}
152 | assert colors.value_counts().to_list() == [36, 36, 35, 35, 35]
153 |
--------------------------------------------------------------------------------
/mapclassify/tests/test_legendgram.py:
--------------------------------------------------------------------------------
1 | import geopandas as gpd
2 | import matplotlib
3 | import matplotlib.pyplot as plt
4 | import numpy as np
5 | import pytest
6 | from libpysal import examples
7 | from matplotlib.testing.decorators import image_comparison
8 | from packaging.version import Version
9 |
10 | from mapclassify import EqualInterval, Quantiles
11 | from mapclassify.legendgram import _legendgram
12 |
13 |
14 | class TestLegendgram:
15 | def setup_method(self):
16 | np.random.seed(42)
17 | self.data = np.random.normal(0, 1, 100)
18 | self.classifier = EqualInterval(self.data, k=5)
19 |
20 | def test_legendgram_returns_axis(self):
21 | """Test that _legendgram returns a matplotlib axis"""
22 | _, ax = plt.subplots(figsize=(8, 6))
23 | histax = _legendgram(self.classifier, ax=ax)
24 | plt.close()
25 |
26 | assert isinstance(histax, matplotlib.axes.Axes)
27 |
28 | def test_legendgram_standalone(self):
29 | """Test that _legendgram works without providing an axis"""
30 | histax = _legendgram(self.classifier)
31 | plt.close()
32 |
33 | assert isinstance(histax, matplotlib.axes.Axes)
34 |
35 | def test_legendgram_inset_false(self):
36 | """Test that _legendgram works with inset=False"""
37 | _, ax = plt.subplots(figsize=(8, 6))
38 | histax = _legendgram(self.classifier, ax=ax, inset=False)
39 | plt.close()
40 |
41 | # When inset=False, histax should be the same as ax
42 | assert histax is ax
43 |
44 | def test_legendgram_clip(self):
45 | """Test that _legendgram applies clip parameter correctly"""
46 | _, ax = plt.subplots(figsize=(8, 6))
47 | clip_range = (-2, 2)
48 | histax = _legendgram(self.classifier, ax=ax, clip=clip_range)
49 | xlim = histax.get_xlim()
50 | plt.close()
51 |
52 | assert xlim[0] == clip_range[0]
53 | assert xlim[1] == clip_range[1]
54 |
55 | def test_legendgram_tick_params(self):
56 | """Test that _legendgram applies tick_params correctly"""
57 | _, ax = plt.subplots(figsize=(8, 6))
58 | custom_tick_params = {"labelsize": 20, "rotation": 45}
59 | _ = _legendgram(self.classifier, ax=ax, tick_params=custom_tick_params)
60 | plt.close()
61 |
62 | def test_legendgram_frameon(self):
63 | """Test that _legendgram applies frameon parameter correctly"""
64 | _, ax = plt.subplots(figsize=(8, 6))
65 | histax = _legendgram(self.classifier, ax=ax, frameon=True)
66 | is_frame_on = histax.get_frame_on()
67 | plt.close()
68 |
69 | assert is_frame_on
70 |
71 | @pytest.mark.skipif(
72 | Version(matplotlib.__version__) >= Version("3.11.0.dev"),
73 | reason="change of font rendering breaks image comparison",
74 | # once 3.11 lands, we should update expected and test against that
75 | )
76 | @image_comparison(
77 | baseline_images=["legendgram_default"],
78 | extensions=["png"],
79 | remove_text=False,
80 | tol=0.05,
81 | )
82 | def test_legendgram_default(self):
83 | """Test default legendgram appearance"""
84 | _, ax = plt.subplots(figsize=(8, 6))
85 | _legendgram(self.classifier, ax=ax)
86 |
87 | @pytest.mark.skipif(
88 | Version(matplotlib.__version__) >= Version("3.11.0.dev"),
89 | reason="change of font rendering breaks image comparison",
90 | # once 3.11 lands, we should update expected and test against that
91 | )
92 | @image_comparison(
93 | baseline_images=["legendgram_vlines"],
94 | extensions=["png"],
95 | remove_text=False,
96 | tol=0.05,
97 | )
98 | def test_legendgram_vlines(self):
99 | """Test legendgram with vertical lines"""
100 | _, ax = plt.subplots(figsize=(8, 6))
101 | _legendgram(self.classifier, ax=ax, vlines=True, vlinecolor="red", vlinewidth=2)
102 |
103 | @pytest.mark.skipif(
104 | Version(matplotlib.__version__) >= Version("3.11.0.dev"),
105 | reason="change of font rendering breaks image comparison",
106 | # once 3.11 lands, we should update expected and test against that
107 | )
108 | @image_comparison(
109 | baseline_images=["legendgram_cmap"],
110 | extensions=["png"],
111 | remove_text=False,
112 | tol=0.05,
113 | )
114 | def test_legendgram_cmap(self):
115 | """Test legendgram with custom colormap"""
116 | _, ax = plt.subplots(figsize=(8, 6))
117 | _legendgram(self.classifier, ax=ax, cmap="plasma")
118 |
119 | @pytest.mark.skipif(
120 | Version(matplotlib.__version__) >= Version("3.11.0.dev"),
121 | reason="change of font rendering breaks image comparison",
122 | # once 3.11 lands, we should update expected and test against that
123 | )
124 | @image_comparison(
125 | baseline_images=["legendgram_cmap"],
126 | extensions=["png"],
127 | remove_text=False,
128 | tol=0.05,
129 | )
130 | def test_legendgram_cmap_class(self):
131 | """Test legendgram with custom colormap"""
132 | _, ax = plt.subplots(figsize=(8, 6))
133 | _legendgram(self.classifier, ax=ax, cmap=matplotlib.cm.plasma)
134 |
135 | @pytest.mark.skipif(
136 | Version(matplotlib.__version__) >= Version("3.11.0.dev"),
137 | reason="change of font rendering breaks image comparison",
138 | # once 3.11 lands, we should update expected and test against that
139 | )
140 | @image_comparison(
141 | baseline_images=["legendgram_position"],
142 | extensions=["png"],
143 | remove_text=False,
144 | tol=0.05,
145 | )
146 | def test_legendgram_position(self):
147 | """Test legendgram with custom position"""
148 | _, ax = plt.subplots(figsize=(8, 6))
149 | _legendgram(
150 | self.classifier, ax=ax, loc="upper right", legend_size=("40%", "30%")
151 | )
152 |
153 | @pytest.mark.skipif(
154 | Version(matplotlib.__version__) >= Version("3.11.0.dev"),
155 | reason="change of font rendering breaks image comparison",
156 | # once 3.11 lands, we should update expected and test against that
157 | )
158 | @image_comparison(
159 | baseline_images=["legendgram_map"],
160 | extensions=["png"],
161 | remove_text=False,
162 | tol=0.05,
163 | )
164 | def test_legendgram_map(self):
165 | """Test with geopandas map"""
166 | data = gpd.read_file(examples.get_path("south.shp")).to_crs(epsg=5070)
167 | ax = data.plot("DV80", k=10, scheme="Quantiles")
168 | classifier = Quantiles(data["DV80"].values, k=10)
169 | classifier.plot_legendgram(
170 | ax=ax, legend_size=("50%", "20%"), loc="upper left", clip=(2, 10)
171 | )
172 | ax.set_axis_off()
173 |
174 | @pytest.mark.skipif(
175 | Version(matplotlib.__version__) >= Version("3.11.0.dev"),
176 | reason="change of font rendering breaks image comparison",
177 | # once 3.11 lands, we should update expected and test against that
178 | )
179 | @image_comparison(
180 | baseline_images=["legendgram_kwargs"],
181 | extensions=["png"],
182 | remove_text=False,
183 | tol=0.05,
184 | )
185 | def test_legendgram_kwargs(self):
186 | """Test default legendgram appearance"""
187 | _, ax = plt.subplots(figsize=(8, 6))
188 | _legendgram(
189 | self.classifier, ax=ax, legend_size=("20%", "30%"), orientation="horizontal"
190 | )
191 |
--------------------------------------------------------------------------------
/mapclassify/tests/test_mapclassify.py:
--------------------------------------------------------------------------------
1 | import types
2 |
3 | import numpy
4 | import pytest
5 |
6 | from ..classifiers import *
7 | from ..classifiers import bin, bin1d, binC, load_example
8 | from ..pooling import Pooled
9 |
10 | RTOL = 0.0001
11 |
12 |
13 | class TestQuantile:
14 | def test_quantile(self):
15 | y = numpy.arange(1000)
16 | expected = numpy.array([333.0, 666.0, 999.0])
17 | numpy.testing.assert_almost_equal(expected, quantile(y, k=3))
18 |
19 | def test_quantile_k4(self):
20 | x = numpy.arange(1000)
21 | qx = quantile(x, k=4)
22 | expected = numpy.array([249.75, 499.5, 749.25, 999.0])
23 | numpy.testing.assert_array_almost_equal(expected, qx)
24 |
25 | def test_quantile_k(self):
26 | y = numpy.random.random(1000)
27 | for k in range(5, 10):
28 | numpy.testing.assert_almost_equal(k, len(quantile(y, k)))
29 | assert k == len(quantile(y, k))
30 |
31 |
32 | class TestUpdate:
33 | def setup_method(self):
34 | numpy.random.seed(4414)
35 | self.data = numpy.random.normal(0, 10, size=10)
36 | self.new_data = numpy.random.normal(0, 10, size=4)
37 |
38 | def test_update(self):
39 | # Quantiles
40 | quants = Quantiles(self.data, k=3)
41 | known_yb = numpy.array([0, 1, 0, 1, 0, 2, 0, 2, 1, 2])
42 | numpy.testing.assert_allclose(quants.yb, known_yb, rtol=RTOL)
43 |
44 | new_yb = quants.update(self.new_data, k=4).yb
45 | known_new_yb = numpy.array([0, 3, 1, 0, 1, 2, 0, 2, 1, 3, 0, 3, 2, 3])
46 | numpy.testing.assert_allclose(new_yb, known_new_yb, rtol=RTOL)
47 |
48 | # User-Defined
49 | ud = UserDefined(self.data, [-20, 0, 5, 20])
50 | known_yb = numpy.array([1, 2, 1, 1, 1, 2, 0, 2, 1, 3])
51 | numpy.testing.assert_allclose(ud.yb, known_yb, rtol=RTOL)
52 |
53 | new_yb = ud.update(self.new_data).yb
54 | known_new_yb = numpy.array([1, 3, 1, 1, 1, 2, 1, 1, 1, 2, 0, 2, 1, 3])
55 | numpy.testing.assert_allclose(new_yb, known_new_yb, rtol=RTOL)
56 |
57 | # Fisher-Jenks Sampled
58 | fjs = FisherJenksSampled(self.data, k=3, pct=70)
59 | known_yb = numpy.array([1, 2, 0, 1, 1, 2, 0, 2, 1, 2])
60 | numpy.testing.assert_allclose(known_yb, fjs.yb, rtol=RTOL)
61 |
62 | new_yb = fjs.update(self.new_data, k=2).yb
63 | known_new_yb = numpy.array([0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1])
64 | numpy.testing.assert_allclose(known_new_yb, new_yb, rtol=RTOL)
65 |
66 |
67 | class TestFindBin:
68 | def setup_method(self):
69 | self.V = load_example()
70 |
71 | def test_find_bin(self):
72 | toclass = [0, 1, 3, 5, 50, 70, 101, 202, 390, 505, 800, 5000, 5001]
73 | mc = FisherJenks(self.V, k=5)
74 | known = [0, 0, 0, 0, 0, 0, 1, 2, 3, 3, 4, 4, 4]
75 | numpy.testing.assert_array_equal(known, mc.find_bin(toclass))
76 |
77 | mc2 = FisherJenks(self.V, k=9)
78 | known = [0, 0, 0, 0, 2, 2, 3, 5, 7, 7, 8, 8, 8]
79 | numpy.testing.assert_array_equal(known, mc2.find_bin(toclass))
80 |
81 |
82 | class TestMake:
83 | def setup_method(self):
84 | self.data = [
85 | numpy.linspace(-5, 5, num=5),
86 | numpy.linspace(-10, 10, num=5),
87 | numpy.linspace(-20, 20, num=5),
88 | ]
89 | self.ei = EqualInterval.make()
90 | self.q5r = Quantiles.make(k=5, rolling=True)
91 |
92 | def test_make(self):
93 | assert isinstance(self.ei, types.FunctionType)
94 | assert isinstance(self.q5r, types.FunctionType)
95 |
96 | assert hasattr(self.ei, "_options")
97 | assert self.ei._options == dict()
98 | assert hasattr(self.q5r, "_options")
99 | assert self.q5r._options == {"k": 5, "rolling": True}
100 |
101 | def test_apply(self):
102 | ei_classes = [self.ei(d) for d in self.data]
103 | known = [numpy.arange(0, 5, 1)] * 3
104 | numpy.testing.assert_allclose(known, ei_classes)
105 |
106 | q5r_classes = [self.q5r(d) for d in self.data]
107 | known = [[0, 1, 2, 3, 4], [0, 0, 2, 3, 4], [0, 0, 2, 4, 4]]
108 | accreted_data = set(self.q5r.__defaults__[0].y)
109 | all_data = set(numpy.asarray(self.data).flatten())
110 | assert accreted_data == all_data
111 | numpy.testing.assert_allclose(known, q5r_classes)
112 |
113 |
114 | class TestBinC:
115 | def test_bin_c(self):
116 | bins = list(range(2, 8))
117 | y = numpy.array(
118 | [
119 | [7, 5, 6],
120 | [2, 3, 5],
121 | [7, 2, 2],
122 | [3, 6, 7],
123 | [6, 3, 4],
124 | [6, 7, 4],
125 | [6, 5, 6],
126 | [4, 6, 7],
127 | [4, 6, 3],
128 | [3, 2, 7],
129 | ]
130 | )
131 |
132 | expected = numpy.array(
133 | [
134 | [5, 3, 4],
135 | [0, 1, 3],
136 | [5, 0, 0],
137 | [1, 4, 5],
138 | [4, 1, 2],
139 | [4, 5, 2],
140 | [4, 3, 4],
141 | [2, 4, 5],
142 | [2, 4, 1],
143 | [1, 0, 5],
144 | ]
145 | )
146 | numpy.testing.assert_array_equal(expected, binC(y, bins))
147 |
148 |
149 | class TestBin:
150 | def test_bin(self):
151 | y = numpy.array(
152 | [
153 | [7, 13, 14],
154 | [10, 11, 13],
155 | [7, 17, 2],
156 | [18, 3, 14],
157 | [9, 15, 8],
158 | [7, 13, 12],
159 | [16, 6, 11],
160 | [19, 2, 15],
161 | [11, 11, 9],
162 | [3, 2, 19],
163 | ]
164 | )
165 | bins = [10, 15, 20]
166 | expected = numpy.array(
167 | [
168 | [0, 1, 1],
169 | [0, 1, 1],
170 | [0, 2, 0],
171 | [2, 0, 1],
172 | [0, 1, 0],
173 | [0, 1, 1],
174 | [2, 0, 1],
175 | [2, 0, 1],
176 | [1, 1, 0],
177 | [0, 0, 2],
178 | ]
179 | )
180 |
181 | numpy.testing.assert_array_equal(expected, bin(y, bins))
182 |
183 |
184 | class TestBin1d:
185 | def test_bin1d(self):
186 | y = numpy.arange(100, dtype="float")
187 | bins = [25, 74, 100]
188 | binIds = numpy.array(
189 | [
190 | 0,
191 | 0,
192 | 0,
193 | 0,
194 | 0,
195 | 0,
196 | 0,
197 | 0,
198 | 0,
199 | 0,
200 | 0,
201 | 0,
202 | 0,
203 | 0,
204 | 0,
205 | 0,
206 | 0,
207 | 0,
208 | 0,
209 | 0,
210 | 0,
211 | 0,
212 | 0,
213 | 0,
214 | 0,
215 | 0,
216 | 1,
217 | 1,
218 | 1,
219 | 1,
220 | 1,
221 | 1,
222 | 1,
223 | 1,
224 | 1,
225 | 1,
226 | 1,
227 | 1,
228 | 1,
229 | 1,
230 | 1,
231 | 1,
232 | 1,
233 | 1,
234 | 1,
235 | 1,
236 | 1,
237 | 1,
238 | 1,
239 | 1,
240 | 1,
241 | 1,
242 | 1,
243 | 1,
244 | 1,
245 | 1,
246 | 1,
247 | 1,
248 | 1,
249 | 1,
250 | 1,
251 | 1,
252 | 1,
253 | 1,
254 | 1,
255 | 1,
256 | 1,
257 | 1,
258 | 1,
259 | 1,
260 | 1,
261 | 1,
262 | 1,
263 | 1,
264 | 1,
265 | 2,
266 | 2,
267 | 2,
268 | 2,
269 | 2,
270 | 2,
271 | 2,
272 | 2,
273 | 2,
274 | 2,
275 | 2,
276 | 2,
277 | 2,
278 | 2,
279 | 2,
280 | 2,
281 | 2,
282 | 2,
283 | 2,
284 | 2,
285 | 2,
286 | 2,
287 | 2,
288 | 2,
289 | 2,
290 | ]
291 | )
292 | counts = numpy.array([26, 49, 25])
293 |
294 | numpy.testing.assert_array_equal(binIds, bin1d(y, bins)[0])
295 | numpy.testing.assert_array_equal(counts, bin1d(y, bins)[1])
296 |
297 |
298 | class TestNaturalBreaks:
299 | def setup_method(self):
300 | self.V = load_example()
301 |
302 | def test_natural_breaks(self):
303 | # assert expected, natural_breaks(values, k, itmax))
304 | assert True # TODO: implement your test here
305 |
306 | def test_NaturalBreaks(self):
307 | nb = NaturalBreaks(self.V, 5)
308 | assert nb.k == 5
309 | assert len(nb.counts) == 5
310 | numpy.testing.assert_array_almost_equal(
311 | nb.counts, numpy.array([49, 3, 4, 1, 1])
312 | )
313 |
314 | def test_NaturalBreaks_stability(self):
315 | for _ in range(10):
316 | nb = NaturalBreaks(self.V, 5)
317 | assert nb.k == 5
318 | assert len(nb.counts) == 5
319 |
320 | def test_NaturalBreaks_randomData(self):
321 | for i in range(10):
322 | V = numpy.random.random(50) * (i + 1)
323 | nb = NaturalBreaks(V, 5)
324 | assert nb.k == 5
325 | assert len(nb.counts) == 5
326 |
327 |
328 | class TestHeadTailBreaks:
329 | def setup_method(self):
330 | x = list(range(1, 1000))
331 | y = []
332 | for i in x:
333 | y.append(i ** (-2))
334 | self.V = numpy.array(y)
335 |
336 | def test_HeadTailBreaks(self):
337 | htb = HeadTailBreaks(self.V)
338 | assert htb.k == 4
339 | assert len(htb.counts) == 4
340 | numpy.testing.assert_array_almost_equal(
341 | htb.counts, numpy.array([975, 21, 2, 1])
342 | )
343 |
344 | def test_HeadTailBreaks_doublemax(self):
345 | V = numpy.append(self.V, self.V.max())
346 | htb = HeadTailBreaks(V)
347 | assert htb.k == 4
348 | assert len(htb.counts) == 4
349 | numpy.testing.assert_array_almost_equal(
350 | htb.counts, numpy.array([980, 17, 1, 2])
351 | )
352 |
353 | def test_HeadTailBreaks_float(self):
354 | V = numpy.array([1 + 2**-52, 1, 1])
355 | htb = HeadTailBreaks(V)
356 | assert htb.k == 2
357 | assert len(htb.counts) == 2
358 | numpy.testing.assert_array_almost_equal(htb.counts, numpy.array([2, 1]))
359 |
360 |
361 | class TestPrettyBreaks:
362 | def setup_method(self):
363 | self.V = load_example()
364 |
365 | def test_pretty(self):
366 | res = PrettyBreaks(self.V)
367 | assert res.k == 5
368 | numpy.testing.assert_array_equal(res.counts, [57, 0, 0, 0, 1])
369 | numpy.testing.assert_array_equal(res.bins, list(range(1000, 6000, 1000)))
370 |
371 |
372 | class TestMapClassifier:
373 | def test_Map_Classifier(self):
374 | # map__classifier = Map_Classifier(y)
375 | assert True # TODO: implement your test here
376 |
377 | def test___repr__(self):
378 | # map__classifier = Map_Classifier(y)
379 | # assert expected, map__classifier.__repr__())
380 | assert True # TODO: implement your test here
381 |
382 | def test___str__(self):
383 | # map__classifier = Map_Classifier(y)
384 | # assert expected, map__classifier.__str__())
385 | assert True # TODO: implement your test here
386 |
387 | def test_get_adcm(self):
388 | # map__classifier = Map_Classifier(y)
389 | # assert expected, map__classifier.get_adcm())
390 | assert True # TODO: implement your test here
391 |
392 | def test_get_gadf(self):
393 | # map__classifier = Map_Classifier(y)
394 | # assert expected, map__classifier.get_gadf())
395 | assert True # TODO: implement your test here
396 |
397 | def test_get_tss(self):
398 | # map__classifier = Map_Classifier(y)
399 | # assert expected, map__classifier.get_tss())
400 | assert True # TODO: implement your test here
401 |
402 |
403 | class TestEqualInterval:
404 | def setup_method(self):
405 | self.V = load_example()
406 |
407 | def test_EqualInterval(self):
408 | ei = EqualInterval(self.V)
409 | numpy.testing.assert_array_almost_equal(
410 | ei.counts, numpy.array([57, 0, 0, 0, 1])
411 | )
412 | numpy.testing.assert_array_almost_equal(
413 | ei.bins, numpy.array([822.394, 1644.658, 2466.922, 3289.186, 4111.45])
414 | )
415 |
416 | with pytest.raises(
417 | ValueError, match="Not enough unique values in array to form 5 classes."
418 | ):
419 | EqualInterval(numpy.array([1, 1, 1, 1]))
420 |
421 |
422 | class TestPercentiles:
423 | def setup_method(self):
424 | self.V = load_example()
425 |
426 | def test_Percentiles(self):
427 | pc = Percentiles(self.V)
428 | numpy.testing.assert_array_almost_equal(
429 | pc.bins,
430 | numpy.array(
431 | [
432 | 1.35700000e-01,
433 | 5.53000000e-01,
434 | 9.36500000e00,
435 | 2.13914000e02,
436 | 2.17994800e03,
437 | 4.11145000e03,
438 | ]
439 | ),
440 | )
441 | numpy.testing.assert_array_almost_equal(
442 | pc.counts, numpy.array([1, 5, 23, 23, 5, 1])
443 | )
444 |
445 |
446 | class TestBoxPlot:
447 | def setup_method(self):
448 | self.V = load_example()
449 |
450 | def test_BoxPlot(self):
451 | bp = BoxPlot(self.V)
452 | bins = numpy.array(
453 | [
454 | -5.28762500e01,
455 | 2.56750000e00,
456 | 9.36500000e00,
457 | 3.95300000e01,
458 | 9.49737500e01,
459 | 4.11145000e03,
460 | ]
461 | )
462 | numpy.testing.assert_array_almost_equal(bp.bins, bins)
463 |
464 |
465 | class TestQuantiles:
466 | def setup_method(self):
467 | self.V = load_example()
468 |
469 | def test_Quantiles(self):
470 | q = Quantiles(self.V, k=5)
471 | numpy.testing.assert_array_almost_equal(
472 | q.bins,
473 | numpy.array(
474 | [
475 | 1.46400000e00,
476 | 5.79800000e00,
477 | 1.32780000e01,
478 | 5.46160000e01,
479 | 4.11145000e03,
480 | ]
481 | ),
482 | )
483 | numpy.testing.assert_array_almost_equal(
484 | q.counts, numpy.array([12, 11, 12, 11, 12])
485 | )
486 |
487 |
488 | class TestStdMean:
489 | def setup_method(self):
490 | self.V = load_example()
491 |
492 | def test_StdMean(self):
493 | std_mean = StdMean(self.V)
494 | numpy.testing.assert_array_almost_equal(
495 | std_mean.bins,
496 | numpy.array(
497 | [-967.36235382, -420.71712519, 672.57333208, 1219.21856072, 4111.45]
498 | ),
499 | )
500 | numpy.testing.assert_array_almost_equal(
501 | std_mean.counts, numpy.array([0, 0, 56, 1, 1])
502 | )
503 |
504 |
505 | class TestMaximumBreaks:
506 | def setup_method(self):
507 | self.V = load_example()
508 |
509 | def test_MaximumBreaks(self):
510 | mb = MaximumBreaks(self.V, k=5)
511 | assert mb.k == 5
512 | numpy.testing.assert_array_almost_equal(
513 | mb.bins, numpy.array([146.005, 228.49, 546.675, 2417.15, 4111.45])
514 | )
515 | numpy.testing.assert_array_almost_equal(
516 | mb.counts, numpy.array([50, 2, 4, 1, 1])
517 | )
518 |
519 | with pytest.raises(
520 | ValueError, match="Not enough unique values in array to form 5 classes."
521 | ):
522 | MaximumBreaks(numpy.array([1, 1, 1, 1]))
523 |
524 |
525 | class TestFisherJenks:
526 | def setup_method(self):
527 | self.V = load_example()
528 |
529 | def test_FisherJenks(self):
530 | fj = FisherJenks(self.V)
531 | assert fj.adcm == 799.24000000000001
532 | numpy.testing.assert_array_almost_equal(
533 | fj.bins, numpy.array([75.29, 192.05, 370.5, 722.85, 4111.45])
534 | )
535 | numpy.testing.assert_array_almost_equal(
536 | fj.counts, numpy.array([49, 3, 4, 1, 1])
537 | )
538 |
539 |
540 | class TestJenksCaspall:
541 | def setup_method(self):
542 | self.V = load_example()
543 |
544 | def test_JenksCaspall(self):
545 | numpy.random.seed(10)
546 | jc = JenksCaspall(self.V, k=5)
547 | numpy.testing.assert_array_almost_equal(
548 | jc.counts, numpy.array([14, 13, 14, 10, 7])
549 | )
550 | numpy.testing.assert_array_almost_equal(
551 | jc.bins,
552 | numpy.array(
553 | [
554 | 1.81000000e00,
555 | 7.60000000e00,
556 | 2.98200000e01,
557 | 1.81270000e02,
558 | 4.11145000e03,
559 | ]
560 | ),
561 | )
562 |
563 |
564 | class TestJenksCaspallSampled:
565 | def setup_method(self):
566 | self.V = load_example()
567 |
568 | def test_JenksCaspallSampled(self):
569 | numpy.random.seed(100)
570 | x = numpy.random.random(100000)
571 | jc = JenksCaspall(x)
572 | jcs = JenksCaspallSampled(x)
573 | numpy.testing.assert_array_almost_equal(
574 | jc.bins,
575 | numpy.array([0.19718393, 0.39655886, 0.59648522, 0.79780763, 0.99997979]),
576 | )
577 | numpy.testing.assert_array_almost_equal(
578 | jcs.bins,
579 | numpy.array([0.20856569, 0.41513931, 0.62457691, 0.82561423, 0.99997979]),
580 | )
581 |
582 |
583 | class TestJenksCaspallForced:
584 | def setup_method(self):
585 | self.V = load_example()
586 |
587 | def test_JenksCaspallForced(self):
588 | numpy.random.seed(100)
589 | jcf = JenksCaspallForced(self.V, k=5)
590 | numpy.testing.assert_array_almost_equal(
591 | jcf.bins,
592 | numpy.array(
593 | [
594 | 1.34000000e00,
595 | 5.90000000e00,
596 | 1.67000000e01,
597 | 5.06500000e01,
598 | 4.11145000e03,
599 | ]
600 | ),
601 | )
602 | numpy.testing.assert_array_almost_equal(
603 | jcf.counts, numpy.array([12, 12, 13, 9, 12])
604 | )
605 |
606 | with pytest.raises(
607 | ValueError, match="Not enough unique values in array to form 5 classes."
608 | ):
609 | JenksCaspallForced(numpy.array([1, 1, 1, 1]))
610 |
611 |
612 | class TestUserDefined:
613 | def setup_method(self):
614 | self.V = load_example()
615 |
616 | def test_UserDefined(self):
617 | bins = [20, max(self.V)]
618 | ud = UserDefined(self.V, bins)
619 | numpy.testing.assert_array_almost_equal(ud.bins, numpy.array([20.0, 4111.45]))
620 | numpy.testing.assert_array_almost_equal(ud.counts, numpy.array([37, 21]))
621 |
622 | def test_UserDefined_max(self):
623 | bins = numpy.array([20, 30])
624 | ud = UserDefined(self.V, bins)
625 | numpy.testing.assert_array_almost_equal(
626 | ud.bins, numpy.array([20.0, 30.0, 4111.45])
627 | )
628 | numpy.testing.assert_array_almost_equal(ud.counts, numpy.array([37, 4, 17]))
629 |
630 | def test_UserDefined_invariant(self):
631 | bins = [10, 20, 30, 40]
632 | ud = UserDefined(numpy.array([12, 12, 12]), bins)
633 | numpy.testing.assert_array_almost_equal(ud.bins, numpy.array([10, 20, 30, 40]))
634 | numpy.testing.assert_array_almost_equal(ud.counts, numpy.array([0, 3, 0, 0]))
635 |
636 | def test_UserDefined_lowest(self):
637 | bins = [20, max(self.V)]
638 | ud = UserDefined(self.V, bins, lowest=-1.0)
639 | numpy.testing.assert_array_almost_equal(ud.bins, numpy.array([20.0, 4111.45]))
640 | numpy.testing.assert_array_almost_equal(ud.counts, numpy.array([37, 21]))
641 | classes = ["[ -1.00, 20.00]", "( 20.00, 4111.45]"]
642 | assert ud.get_legend_classes() == classes
643 |
644 |
645 | class TestStdMeanAnchor:
646 | def setup_method(self):
647 | self.V = load_example()
648 |
649 | def test_StdMeanAnchor(self):
650 | sm = StdMean(self.V, anchor=True)
651 | bins = numpy.array(
652 | [
653 | 125.92810345,
654 | 672.57333208,
655 | 1219.21856072,
656 | 1765.86378936,
657 | 2312.50901799,
658 | 2859.15424663,
659 | 3405.79947527,
660 | 3952.4447039,
661 | 4111.45,
662 | ]
663 | )
664 | counts = numpy.array([50, 6, 1, 0, 0, 0, 0, 0, 1])
665 | numpy.testing.assert_array_almost_equal(sm.bins, bins)
666 | numpy.testing.assert_array_almost_equal(sm.counts, counts)
667 |
668 |
669 | class TestMaxP:
670 | def setup_method(self):
671 | self.V = load_example()
672 |
673 | def test_MaxP(self):
674 | numpy.random.seed(100)
675 | mp = MaxP(self.V)
676 | numpy.testing.assert_array_almost_equal(
677 | mp.bins,
678 | numpy.array([3.16000e00, 1.26300e01, 1.67000e01, 2.04700e01, 4.11145e03]),
679 | )
680 | numpy.testing.assert_array_almost_equal(
681 | mp.counts, numpy.array([18, 16, 3, 1, 20])
682 | )
683 |
684 | with pytest.raises(
685 | ValueError, match="Not enough unique values in array to form 5 classes."
686 | ):
687 | MaxP(numpy.array([1, 1, 1, 1]))
688 |
689 |
690 | class TestGadf:
691 | def setup_method(self):
692 | self.V = load_example()
693 |
694 | def test_gadf(self):
695 | qgadf = gadf(self.V)
696 | assert qgadf[0] == 15
697 | assert qgadf[-1] == 0.37402575909092828
698 |
699 |
700 | class TestKClassifiers:
701 | def setup_method(self):
702 | self.V = load_example()
703 |
704 | def test_K_classifiers(self):
705 | numpy.random.seed(100)
706 | ks = KClassifiers(self.V)
707 | assert ks.best.name == "FisherJenks"
708 | assert ks.best.gadf == 0.84810327199081048
709 | assert ks.best.k == 4
710 |
711 |
712 | class TestPooled:
713 | def setup_method(self):
714 | n = 20
715 | self.data = numpy.array([numpy.arange(n) + i * n for i in range(1, 4)]).T
716 |
717 | def test_pooled(self):
718 | res = Pooled(self.data, k=4)
719 | assert res.k == 4
720 | numpy.testing.assert_array_almost_equal(
721 | res.col_classifiers[0].counts, numpy.array([15, 5, 0, 0])
722 | )
723 | numpy.testing.assert_array_almost_equal(
724 | res.col_classifiers[-1].counts, numpy.array([0, 0, 5, 15])
725 | )
726 | numpy.testing.assert_array_almost_equal(
727 | res.global_classifier.counts, numpy.array([15, 15, 15, 15])
728 | )
729 | res = Pooled(self.data, classifier="BoxPlot", hinge=1.5)
730 | numpy.testing.assert_array_almost_equal(
731 | res.col_classifiers[0].bins, numpy.array([-9.5, 34.75, 49.5, 64.25, 108.5])
732 | )
733 |
734 | def test_pooled_bad_classifier(self):
735 | classifier = "Larry David"
736 | message = f"'{classifier}' not a valid classifier."
737 | with pytest.raises(ValueError, match=message):
738 | Pooled(self.data, classifier=classifier, k=4)
739 |
740 |
741 | class TestPlots:
742 | def setup_method(self):
743 | n = 20
744 | self.data = numpy.array([numpy.arange(n) + i * n for i in range(1, 4)]).T
745 |
746 | @pytest.mark.mpl_image_compare
747 | def test_histogram_plot(self):
748 | ax = Quantiles(self.data).plot_histogram()
749 | return ax.get_figure()
750 |
751 | @pytest.mark.mpl_image_compare
752 | def test_histogram_plot_despine(self):
753 | ax = Quantiles(self.data).plot_histogram(despine=False)
754 | return ax.get_figure()
755 |
756 | @pytest.mark.mpl_image_compare
757 | def test_histogram_plot_linewidth(self):
758 | ax = Quantiles(self.data).plot_histogram(
759 | linewidth=3, linecolor="red", color="yellow"
760 | )
761 | return ax.get_figure()
762 |
--------------------------------------------------------------------------------
/mapclassify/tests/test_rgba.py:
--------------------------------------------------------------------------------
1 | import geopandas
2 | import numpy as np
3 | from numpy.testing import assert_array_equal
4 |
5 | from mapclassify.util import get_color_array
6 |
7 | world = geopandas.read_file(
8 | "https://naciscdn.org/naturalearth/110m/cultural/ne_110m_admin_0_countries.zip"
9 | )
10 | world["area"] = world.area
11 |
12 | # columns are equivalent except for nan in the first position
13 | world["nanarea"] = world.area
14 | world.loc[0, "nanarea"] = np.nan
15 |
16 |
17 | def test_rgba():
18 | colors = get_color_array(world["area"], cmap="viridis")[-1]
19 | assert_array_equal(colors, np.array([94, 201, 97, 255]))
20 |
21 |
22 | def test_rgba_hex():
23 | colors = get_color_array(world["area"], cmap="viridis", as_hex=True)[-1]
24 | assert_array_equal(colors, "#5ec961")
25 |
26 |
27 | def test_rgba_nan():
28 | colors = get_color_array(world["nanarea"], cmap="viridis", nan_color=[0, 0, 0, 0])
29 | # should be nan_color
30 | assert_array_equal(colors[0], np.array([0, 0, 0, 0]))
31 | # still a cmap color
32 | assert_array_equal(colors[-1], np.array([94, 201, 97, 255]))
33 |
34 |
35 | def test_rgba_nan_hex():
36 | colors = get_color_array(
37 | world["nanarea"], cmap="viridis", nan_color=[0, 0, 0, 0], as_hex=True
38 | )
39 | assert_array_equal(colors[0], np.array(["#000000"]))
40 | assert_array_equal(colors[-1], np.array(["#5ec961"]))
41 |
--------------------------------------------------------------------------------
/mapclassify/util.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from ._classify_API import classify as _classify
4 |
5 |
6 | def get_color_array(
7 | values,
8 | scheme="quantiles",
9 | cmap="viridis",
10 | alpha=1,
11 | nan_color=[255, 255, 255, 255],
12 | as_hex=False,
13 | **kwargs,
14 | ):
15 | """Convert array of values into RGBA or hex colors using a colormap and classifier.
16 | This function is useful for visualization libraries that require users to provide
17 | an array of colors for each object (like pydeck or lonboard) but can also be used
18 | to create a manual column of colors passed to matplotlib.
19 |
20 | Parameters
21 | ----------
22 | values : list-like
23 | array of input values
24 | scheme : str, optional
25 | string description of a mapclassify classifier, by default `"quantiles"`
26 | cmap : str, optional
27 | name of matplotlib colormap to use, by default "viridis"
28 | alpha : float
29 | alpha parameter that defines transparency. Should be in the range [0,1]
30 | nan_color : list, optional
31 | RGBA color to fill NaN values, by default [255, 255, 255, 255]
32 | as_hex: bool, optional
33 | if True, return a (n,1)-dimensional array of hexcolors instead of a (n,4)
34 | dimensional array of RGBA values.
35 | kwargs : dict
36 | additional keyword arguments are passed to `mapclassify.classify`
37 |
38 | Returns
39 | -------
40 | numpy.array
41 | numpy array (aligned with the input array) defining a color for each row. If
42 | `as_hex` is False, the array is :math:`(n,4)` holding an array of RGBA values in
43 | each row. If `as_hex` is True, the array is :math:`(n,1)` holding a hexcolor in
44 | each row.
45 |
46 | """
47 | try:
48 | import pandas as pd
49 | from matplotlib import colormaps
50 | from matplotlib.colors import Normalize, to_hex
51 | except ImportError as e:
52 | raise ImportError("This function requires pandas and matplotlib") from e
53 | if not (alpha <= 1) and (alpha >= 0):
54 | raise ValueError("alpha must be in the range [0,1]")
55 | if not pd.api.types.is_list_like(nan_color) and not len(nan_color) == 4:
56 | raise ValueError("`nan_color` must be list-like of 4 values: (R,G,B,A)")
57 |
58 | # only operate on non-NaN values
59 | v = pd.Series(values, dtype=object)
60 | legit_indices = v[~v.isna()].index.values
61 | legit_vals = v.dropna().values
62 | bogus_indices = v[v.isna()].index.values # stash these for use later
63 | # transform (non-NaN) values into class bins
64 | bins = _classify(legit_vals, scheme=scheme, **kwargs).yb
65 |
66 | # normalize using the data's range (not strictly 1-k if classifier is degenerate)
67 | norm = Normalize(min(bins), max(bins))
68 | normalized_vals = norm(bins)
69 |
70 | # generate RBGA array and convert to series
71 | rgbas = colormaps[cmap](normalized_vals, bytes=True, alpha=alpha)
72 | colors = pd.Series(list(rgbas), index=legit_indices).apply(np.array)
73 | nan_colors = pd.Series(
74 | [nan_color for i in range(len(bogus_indices))], index=bogus_indices
75 | ).apply(lambda x: np.array(x).astype(np.uint8))
76 |
77 | # put colors in their correct places and fill empty with specified color
78 | v.update(colors)
79 | v.update(nan_colors)
80 |
81 | # convert to hexcolors if preferred
82 | if as_hex:
83 | colors = v.apply(lambda x: to_hex(x / 255.0))
84 | return colors.values
85 | return np.stack(v.values)
86 |
--------------------------------------------------------------------------------
/notebooks/01_maximum_breaks.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Introduction to mapclassify\n",
8 | "\n",
9 | "`mapclassify` implements a family of classification schemes for choropleth maps. \n",
10 | "Its focus is on the determination of the number of classes, and the assignment of observations to those classes.\n",
11 | "It is intended for use with upstream mapping and geovisualization packages (see [geopandas](https://geopandas.org/mapping.html) and [geoplot](https://residentmario.github.io/geoplot/user_guide/Customizing_Plots.html) for examples) that handle the rendering of the maps.\n",
12 | "\n",
13 | "In this notebook, the basic functionality of mapclassify is presented."
14 | ]
15 | },
16 | {
17 | "cell_type": "code",
18 | "execution_count": 1,
19 | "metadata": {
20 | "ExecuteTime": {
21 | "end_time": "2022-11-04T16:51:55.127728Z",
22 | "start_time": "2022-11-04T16:51:54.017906Z"
23 | }
24 | },
25 | "outputs": [
26 | {
27 | "data": {
28 | "text/plain": [
29 | "'2.4.2+78.gc62d2d7.dirty'"
30 | ]
31 | },
32 | "execution_count": 1,
33 | "metadata": {},
34 | "output_type": "execute_result"
35 | }
36 | ],
37 | "source": [
38 | "import mapclassify as mc\n",
39 | "\n",
40 | "mc.__version__"
41 | ]
42 | },
43 | {
44 | "cell_type": "markdown",
45 | "metadata": {},
46 | "source": [
47 | "## Example data\n",
48 | "`mapclassify` contains a built-in dataset for employment density for the 58 California counties."
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 2,
54 | "metadata": {
55 | "ExecuteTime": {
56 | "end_time": "2022-11-04T16:51:55.397263Z",
57 | "start_time": "2022-11-04T16:51:55.130764Z"
58 | }
59 | },
60 | "outputs": [],
61 | "source": [
62 | "y = mc.load_example()"
63 | ]
64 | },
65 | {
66 | "cell_type": "markdown",
67 | "metadata": {},
68 | "source": [
69 | "## Basic Functionality\n",
70 | "All classifiers in `mapclassify` have a common interface and afford similar functionality. We illustrate these using the `MaximumBreaks` classifier.\n",
71 | "\n",
72 | "`MaximumBreaks` requires that the user specify the number of classes `k`. Given this, the logic of the classifier is to sort the observations in ascending order and find the difference between rank adjacent values. The class boundaries are defined as the $k-1$ largest rank-adjacent breaks in the sorted values."
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": 3,
78 | "metadata": {
79 | "ExecuteTime": {
80 | "end_time": "2022-11-04T16:51:55.407290Z",
81 | "start_time": "2022-11-04T16:51:55.401874Z"
82 | }
83 | },
84 | "outputs": [
85 | {
86 | "data": {
87 | "text/plain": [
88 | "MaximumBreaks\n",
89 | "\n",
90 | " Interval Count\n",
91 | "--------------------------\n",
92 | "[ 0.13, 228.49] | 52\n",
93 | "( 228.49, 546.67] | 4\n",
94 | "( 546.67, 2417.15] | 1\n",
95 | "(2417.15, 4111.45] | 1"
96 | ]
97 | },
98 | "execution_count": 3,
99 | "metadata": {},
100 | "output_type": "execute_result"
101 | }
102 | ],
103 | "source": [
104 | "mc.MaximumBreaks(y, k=4)"
105 | ]
106 | },
107 | {
108 | "cell_type": "markdown",
109 | "metadata": {},
110 | "source": [
111 | "The classifier returns an instance of `MaximumBreaks` that reports the resulting intervals and counts. The first class has closed lower and upper bounds:\n",
112 | "\n",
113 | "```\n",
114 | "[ 0.13, 228.49]\n",
115 | "```\n",
116 | "\n",
117 | "with `0.13` being the minimum value in the dataset:"
118 | ]
119 | },
120 | {
121 | "cell_type": "code",
122 | "execution_count": 4,
123 | "metadata": {
124 | "ExecuteTime": {
125 | "end_time": "2022-11-04T16:51:55.413265Z",
126 | "start_time": "2022-11-04T16:51:55.408990Z"
127 | }
128 | },
129 | "outputs": [
130 | {
131 | "data": {
132 | "text/plain": [
133 | "0.13"
134 | ]
135 | },
136 | "execution_count": 4,
137 | "metadata": {},
138 | "output_type": "execute_result"
139 | }
140 | ],
141 | "source": [
142 | "y.min()"
143 | ]
144 | },
145 | {
146 | "cell_type": "markdown",
147 | "metadata": {},
148 | "source": [
149 | "Subsequent intervals are open on the lower bound and closed on the upper bound. The fourth class has the maximum value as its closed upper bound:"
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": 5,
155 | "metadata": {
156 | "ExecuteTime": {
157 | "end_time": "2022-11-04T16:51:55.419714Z",
158 | "start_time": "2022-11-04T16:51:55.415775Z"
159 | }
160 | },
161 | "outputs": [
162 | {
163 | "data": {
164 | "text/plain": [
165 | "4111.45"
166 | ]
167 | },
168 | "execution_count": 5,
169 | "metadata": {},
170 | "output_type": "execute_result"
171 | }
172 | ],
173 | "source": [
174 | "y.max()"
175 | ]
176 | },
177 | {
178 | "cell_type": "markdown",
179 | "metadata": {},
180 | "source": [
181 | "Assigning the classifier to an object let's us inspect other aspects of the classifier:"
182 | ]
183 | },
184 | {
185 | "cell_type": "code",
186 | "execution_count": 6,
187 | "metadata": {
188 | "ExecuteTime": {
189 | "end_time": "2022-11-04T16:51:55.426490Z",
190 | "start_time": "2022-11-04T16:51:55.421539Z"
191 | }
192 | },
193 | "outputs": [
194 | {
195 | "data": {
196 | "text/plain": [
197 | "MaximumBreaks\n",
198 | "\n",
199 | " Interval Count\n",
200 | "--------------------------\n",
201 | "[ 0.13, 228.49] | 52\n",
202 | "( 228.49, 546.67] | 4\n",
203 | "( 546.67, 2417.15] | 1\n",
204 | "(2417.15, 4111.45] | 1"
205 | ]
206 | },
207 | "execution_count": 6,
208 | "metadata": {},
209 | "output_type": "execute_result"
210 | }
211 | ],
212 | "source": [
213 | "mb4 = mc.MaximumBreaks(y, k=4)\n",
214 | "mb4"
215 | ]
216 | },
217 | {
218 | "cell_type": "markdown",
219 | "metadata": {},
220 | "source": [
221 | "The `bins` attribute has the upper bounds of the intervals:"
222 | ]
223 | },
224 | {
225 | "cell_type": "code",
226 | "execution_count": 7,
227 | "metadata": {
228 | "ExecuteTime": {
229 | "end_time": "2022-11-04T16:51:55.433994Z",
230 | "start_time": "2022-11-04T16:51:55.429143Z"
231 | }
232 | },
233 | "outputs": [
234 | {
235 | "data": {
236 | "text/plain": [
237 | "array([ 228.49 , 546.675, 2417.15 , 4111.45 ])"
238 | ]
239 | },
240 | "execution_count": 7,
241 | "metadata": {},
242 | "output_type": "execute_result"
243 | }
244 | ],
245 | "source": [
246 | "mb4.bins"
247 | ]
248 | },
249 | {
250 | "cell_type": "markdown",
251 | "metadata": {},
252 | "source": [
253 | "and `counts` reports the number of values falling in each bin:"
254 | ]
255 | },
256 | {
257 | "cell_type": "code",
258 | "execution_count": 8,
259 | "metadata": {
260 | "ExecuteTime": {
261 | "end_time": "2022-11-04T16:51:55.441325Z",
262 | "start_time": "2022-11-04T16:51:55.437014Z"
263 | }
264 | },
265 | "outputs": [
266 | {
267 | "data": {
268 | "text/plain": [
269 | "array([52, 4, 1, 1])"
270 | ]
271 | },
272 | "execution_count": 8,
273 | "metadata": {},
274 | "output_type": "execute_result"
275 | }
276 | ],
277 | "source": [
278 | "mb4.counts"
279 | ]
280 | },
281 | {
282 | "cell_type": "markdown",
283 | "metadata": {},
284 | "source": [
285 | "The specific bin (i.e. label) for each observation can be found in the `yb` attribute:"
286 | ]
287 | },
288 | {
289 | "cell_type": "code",
290 | "execution_count": 9,
291 | "metadata": {
292 | "ExecuteTime": {
293 | "end_time": "2022-11-04T16:51:55.447878Z",
294 | "start_time": "2022-11-04T16:51:55.443824Z"
295 | }
296 | },
297 | "outputs": [
298 | {
299 | "data": {
300 | "text/plain": [
301 | "array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,\n",
302 | " 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 1, 0, 1, 0,\n",
303 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])"
304 | ]
305 | },
306 | "execution_count": 9,
307 | "metadata": {},
308 | "output_type": "execute_result"
309 | }
310 | ],
311 | "source": [
312 | "mb4.yb"
313 | ]
314 | },
315 | {
316 | "cell_type": "markdown",
317 | "metadata": {},
318 | "source": [
319 | "## Changing the number of classes\n",
320 | "\n",
321 | "Staying with the the same classifier, the user can apply the same classification rule, but for a different number of classes:"
322 | ]
323 | },
324 | {
325 | "cell_type": "code",
326 | "execution_count": 10,
327 | "metadata": {
328 | "ExecuteTime": {
329 | "end_time": "2022-11-04T16:51:55.454514Z",
330 | "start_time": "2022-11-04T16:51:55.449706Z"
331 | }
332 | },
333 | "outputs": [
334 | {
335 | "data": {
336 | "text/plain": [
337 | "MaximumBreaks\n",
338 | "\n",
339 | " Interval Count\n",
340 | "--------------------------\n",
341 | "[ 0.13, 146.00] | 50\n",
342 | "( 146.00, 228.49] | 2\n",
343 | "( 228.49, 291.02] | 1\n",
344 | "( 291.02, 350.21] | 2\n",
345 | "( 350.21, 546.67] | 1\n",
346 | "( 546.67, 2417.15] | 1\n",
347 | "(2417.15, 4111.45] | 1"
348 | ]
349 | },
350 | "execution_count": 10,
351 | "metadata": {},
352 | "output_type": "execute_result"
353 | }
354 | ],
355 | "source": [
356 | "mb7 = mc.MaximumBreaks(y, k=7)\n",
357 | "mb7"
358 | ]
359 | },
360 | {
361 | "cell_type": "code",
362 | "execution_count": 11,
363 | "metadata": {
364 | "ExecuteTime": {
365 | "end_time": "2022-11-04T16:51:55.461787Z",
366 | "start_time": "2022-11-04T16:51:55.456906Z"
367 | }
368 | },
369 | "outputs": [
370 | {
371 | "data": {
372 | "text/plain": [
373 | "array([ 146.005, 228.49 , 291.02 , 350.21 , 546.675, 2417.15 ,\n",
374 | " 4111.45 ])"
375 | ]
376 | },
377 | "execution_count": 11,
378 | "metadata": {},
379 | "output_type": "execute_result"
380 | }
381 | ],
382 | "source": [
383 | "mb7.bins"
384 | ]
385 | },
386 | {
387 | "cell_type": "code",
388 | "execution_count": 12,
389 | "metadata": {
390 | "ExecuteTime": {
391 | "end_time": "2022-11-04T16:51:55.471152Z",
392 | "start_time": "2022-11-04T16:51:55.466248Z"
393 | }
394 | },
395 | "outputs": [
396 | {
397 | "data": {
398 | "text/plain": [
399 | "array([50, 2, 1, 2, 1, 1, 1])"
400 | ]
401 | },
402 | "execution_count": 12,
403 | "metadata": {},
404 | "output_type": "execute_result"
405 | }
406 | ],
407 | "source": [
408 | "mb7.counts"
409 | ]
410 | },
411 | {
412 | "cell_type": "code",
413 | "execution_count": 13,
414 | "metadata": {
415 | "ExecuteTime": {
416 | "end_time": "2022-11-04T16:51:55.477524Z",
417 | "start_time": "2022-11-04T16:51:55.473430Z"
418 | }
419 | },
420 | "outputs": [
421 | {
422 | "data": {
423 | "text/plain": [
424 | "array([3, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0,\n",
425 | " 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 1, 0, 0, 0, 6, 0, 0, 3, 0, 2, 0,\n",
426 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])"
427 | ]
428 | },
429 | "execution_count": 13,
430 | "metadata": {},
431 | "output_type": "execute_result"
432 | }
433 | ],
434 | "source": [
435 | "mb7.yb"
436 | ]
437 | },
438 | {
439 | "cell_type": "markdown",
440 | "metadata": {},
441 | "source": [
442 | "One additional attribute to mention here is the `adcm` attribute:"
443 | ]
444 | },
445 | {
446 | "cell_type": "code",
447 | "execution_count": 14,
448 | "metadata": {
449 | "ExecuteTime": {
450 | "end_time": "2022-11-04T16:51:55.483597Z",
451 | "start_time": "2022-11-04T16:51:55.479867Z"
452 | }
453 | },
454 | "outputs": [
455 | {
456 | "data": {
457 | "text/plain": [
458 | "727.3200000000002"
459 | ]
460 | },
461 | "execution_count": 14,
462 | "metadata": {},
463 | "output_type": "execute_result"
464 | }
465 | ],
466 | "source": [
467 | "mb7.adcm"
468 | ]
469 | },
470 | {
471 | "cell_type": "markdown",
472 | "metadata": {},
473 | "source": [
474 | "`adcm` is a measure of fit, defined as the mean absolute deviation around the class median. "
475 | ]
476 | },
477 | {
478 | "cell_type": "code",
479 | "execution_count": 15,
480 | "metadata": {
481 | "ExecuteTime": {
482 | "end_time": "2022-11-04T16:51:55.489640Z",
483 | "start_time": "2022-11-04T16:51:55.485845Z"
484 | }
485 | },
486 | "outputs": [
487 | {
488 | "data": {
489 | "text/plain": [
490 | "1181.4900000000002"
491 | ]
492 | },
493 | "execution_count": 15,
494 | "metadata": {},
495 | "output_type": "execute_result"
496 | }
497 | ],
498 | "source": [
499 | "mb4.adcm"
500 | ]
501 | },
502 | {
503 | "cell_type": "markdown",
504 | "metadata": {},
505 | "source": [
506 | "The `adcm` can be expected to decrease as $k$ increases for a given classifier. Thus, if using as a measure of fit, the `adcm` should only be used to compare classifiers defined on the same number of classes."
507 | ]
508 | },
509 | {
510 | "cell_type": "markdown",
511 | "metadata": {},
512 | "source": [
513 | "## Next Steps\n",
514 | "`MaximumBreaks` is but one of many classifiers in `mapclassify`:"
515 | ]
516 | },
517 | {
518 | "cell_type": "code",
519 | "execution_count": 16,
520 | "metadata": {
521 | "ExecuteTime": {
522 | "end_time": "2022-11-04T16:51:55.496548Z",
523 | "start_time": "2022-11-04T16:51:55.492318Z"
524 | }
525 | },
526 | "outputs": [
527 | {
528 | "data": {
529 | "text/plain": [
530 | "('BoxPlot',\n",
531 | " 'EqualInterval',\n",
532 | " 'FisherJenks',\n",
533 | " 'FisherJenksSampled',\n",
534 | " 'HeadTailBreaks',\n",
535 | " 'JenksCaspall',\n",
536 | " 'JenksCaspallForced',\n",
537 | " 'JenksCaspallSampled',\n",
538 | " 'MaxP',\n",
539 | " 'MaximumBreaks',\n",
540 | " 'NaturalBreaks',\n",
541 | " 'Quantiles',\n",
542 | " 'Percentiles',\n",
543 | " 'StdMean',\n",
544 | " 'UserDefined')"
545 | ]
546 | },
547 | "execution_count": 16,
548 | "metadata": {},
549 | "output_type": "execute_result"
550 | }
551 | ],
552 | "source": [
553 | "mc.classifiers.CLASSIFIERS"
554 | ]
555 | },
556 | {
557 | "cell_type": "markdown",
558 | "metadata": {},
559 | "source": [
560 | "To learn more about an individual classifier, introspection is available:"
561 | ]
562 | },
563 | {
564 | "cell_type": "code",
565 | "execution_count": 17,
566 | "metadata": {
567 | "ExecuteTime": {
568 | "end_time": "2022-11-04T16:51:55.537870Z",
569 | "start_time": "2022-11-04T16:51:55.499084Z"
570 | }
571 | },
572 | "outputs": [],
573 | "source": [
574 | "mc.MaximumBreaks?"
575 | ]
576 | },
577 | {
578 | "cell_type": "markdown",
579 | "metadata": {},
580 | "source": [
581 | "-------------------------\n",
582 | "\n",
583 | "For more comprehensive appliciations of `mapclassify` the interested reader is directed to the chapter on [choropleth mapping](https://geographicdata.science/book/notebooks/05_choropleth.html) in [Rey, Arribas-Bel, and Wolf (2020) \"Geographic Data Science with PySAL and the PyData Stack”](https://geographicdata.science/book).\n",
584 | "\n",
585 | "-------------------------"
586 | ]
587 | }
588 | ],
589 | "metadata": {
590 | "anaconda-cloud": {},
591 | "kernelspec": {
592 | "display_name": "Python [conda env:py310_mapclassify]",
593 | "language": "python",
594 | "name": "conda-env-py310_mapclassify-py"
595 | },
596 | "language_info": {
597 | "codemirror_mode": {
598 | "name": "ipython",
599 | "version": 3
600 | },
601 | "file_extension": ".py",
602 | "mimetype": "text/x-python",
603 | "name": "python",
604 | "nbconvert_exporter": "python",
605 | "pygments_lexer": "ipython3",
606 | "version": "3.10.6"
607 | }
608 | },
609 | "nbformat": 4,
610 | "nbformat_minor": 4
611 | }
612 |
--------------------------------------------------------------------------------
/notebooks/02_legends.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Legends in mapclassify\n",
8 | "\n",
9 | "`mapclassify` allows for user defined formatting of legends"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 1,
15 | "metadata": {
16 | "ExecuteTime": {
17 | "end_time": "2022-11-04T18:03:55.559087Z",
18 | "start_time": "2022-11-04T18:03:53.594867Z"
19 | }
20 | },
21 | "outputs": [
22 | {
23 | "data": {
24 | "text/plain": [
25 | "'2.4.2+78.gc62d2d7.dirty'"
26 | ]
27 | },
28 | "execution_count": 1,
29 | "metadata": {},
30 | "output_type": "execute_result"
31 | }
32 | ],
33 | "source": [
34 | "import mapclassify\n",
35 | "\n",
36 | "mapclassify.__version__"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "execution_count": 2,
42 | "metadata": {
43 | "ExecuteTime": {
44 | "end_time": "2022-11-04T18:03:56.030661Z",
45 | "start_time": "2022-11-04T18:03:55.564369Z"
46 | }
47 | },
48 | "outputs": [],
49 | "source": [
50 | "cal = mapclassify.load_example()"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": 3,
56 | "metadata": {
57 | "ExecuteTime": {
58 | "end_time": "2022-11-04T18:03:56.041172Z",
59 | "start_time": "2022-11-04T18:03:56.034966Z"
60 | }
61 | },
62 | "outputs": [
63 | {
64 | "data": {
65 | "text/plain": [
66 | "Quantiles\n",
67 | "\n",
68 | " Interval Count\n",
69 | "--------------------------\n",
70 | "[ 0.13, 1.16] | 10\n",
71 | "( 1.16, 3.38] | 10\n",
72 | "( 3.38, 9.36] | 9\n",
73 | "( 9.36, 24.32] | 10\n",
74 | "( 24.32, 70.78] | 9\n",
75 | "( 70.78, 4111.45] | 10"
76 | ]
77 | },
78 | "execution_count": 3,
79 | "metadata": {},
80 | "output_type": "execute_result"
81 | }
82 | ],
83 | "source": [
84 | "q6 = mapclassify.Quantiles(cal, k=6)\n",
85 | "q6"
86 | ]
87 | },
88 | {
89 | "cell_type": "markdown",
90 | "metadata": {},
91 | "source": [
92 | "The default is to use two decimal places for this dataset.\n",
93 | "\n",
94 | "If the user desires a list of strings with these values, the `get_legend_classes` method can be called\n",
95 | "which will return the strings with the default format:"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": 4,
101 | "metadata": {
102 | "ExecuteTime": {
103 | "end_time": "2022-11-04T18:03:56.047765Z",
104 | "start_time": "2022-11-04T18:03:56.042764Z"
105 | }
106 | },
107 | "outputs": [
108 | {
109 | "data": {
110 | "text/plain": [
111 | "['[ 0.13, 1.16]',\n",
112 | " '( 1.16, 3.38]',\n",
113 | " '( 3.38, 9.36]',\n",
114 | " '( 9.36, 24.32]',\n",
115 | " '( 24.32, 70.78]',\n",
116 | " '( 70.78, 4111.45]']"
117 | ]
118 | },
119 | "execution_count": 4,
120 | "metadata": {},
121 | "output_type": "execute_result"
122 | }
123 | ],
124 | "source": [
125 | "q6.get_legend_classes()"
126 | ]
127 | },
128 | {
129 | "cell_type": "markdown",
130 | "metadata": {},
131 | "source": [
132 | "To set the legends to integers, an option can be passed into the method:"
133 | ]
134 | },
135 | {
136 | "cell_type": "code",
137 | "execution_count": 5,
138 | "metadata": {
139 | "ExecuteTime": {
140 | "end_time": "2022-11-04T18:03:56.055615Z",
141 | "start_time": "2022-11-04T18:03:56.050635Z"
142 | }
143 | },
144 | "outputs": [
145 | {
146 | "data": {
147 | "text/plain": [
148 | "['[ 0, 1]',\n",
149 | " '( 1, 3]',\n",
150 | " '( 3, 9]',\n",
151 | " '( 9, 24]',\n",
152 | " '( 24, 71]',\n",
153 | " '( 71, 4111]']"
154 | ]
155 | },
156 | "execution_count": 5,
157 | "metadata": {},
158 | "output_type": "execute_result"
159 | }
160 | ],
161 | "source": [
162 | "q6.get_legend_classes(fmt=\"{:.0f}\")"
163 | ]
164 | },
165 | {
166 | "cell_type": "markdown",
167 | "metadata": {},
168 | "source": [
169 | "Note that this does not change the original object:"
170 | ]
171 | },
172 | {
173 | "cell_type": "code",
174 | "execution_count": 6,
175 | "metadata": {
176 | "ExecuteTime": {
177 | "end_time": "2022-11-04T18:03:56.064112Z",
178 | "start_time": "2022-11-04T18:03:56.058884Z"
179 | }
180 | },
181 | "outputs": [
182 | {
183 | "data": {
184 | "text/plain": [
185 | "Quantiles\n",
186 | "\n",
187 | " Interval Count\n",
188 | "--------------------------\n",
189 | "[ 0.13, 1.16] | 10\n",
190 | "( 1.16, 3.38] | 10\n",
191 | "( 3.38, 9.36] | 9\n",
192 | "( 9.36, 24.32] | 10\n",
193 | "( 24.32, 70.78] | 9\n",
194 | "( 70.78, 4111.45] | 10"
195 | ]
196 | },
197 | "execution_count": 6,
198 | "metadata": {},
199 | "output_type": "execute_result"
200 | }
201 | ],
202 | "source": [
203 | "q6"
204 | ]
205 | },
206 | {
207 | "cell_type": "markdown",
208 | "metadata": {},
209 | "source": [
210 | "The format can be changed on the object by calling the `set_fmt` method:"
211 | ]
212 | },
213 | {
214 | "cell_type": "code",
215 | "execution_count": 7,
216 | "metadata": {
217 | "ExecuteTime": {
218 | "end_time": "2022-11-04T18:03:56.073242Z",
219 | "start_time": "2022-11-04T18:03:56.067329Z"
220 | }
221 | },
222 | "outputs": [
223 | {
224 | "data": {
225 | "text/plain": [
226 | "Quantiles\n",
227 | "\n",
228 | " Interval Count\n",
229 | "--------------------\n",
230 | "[ 0, 1] | 10\n",
231 | "( 1, 3] | 10\n",
232 | "( 3, 9] | 9\n",
233 | "( 9, 24] | 10\n",
234 | "( 24, 71] | 9\n",
235 | "( 71, 4111] | 10"
236 | ]
237 | },
238 | "execution_count": 7,
239 | "metadata": {},
240 | "output_type": "execute_result"
241 | }
242 | ],
243 | "source": [
244 | "q6.set_fmt(fmt=\"{:.0f}\")\n",
245 | "q6"
246 | ]
247 | }
248 | ],
249 | "metadata": {
250 | "kernelspec": {
251 | "display_name": "Python [conda env:py310_mapclassify]",
252 | "language": "python",
253 | "name": "conda-env-py310_mapclassify-py"
254 | },
255 | "language_info": {
256 | "codemirror_mode": {
257 | "name": "ipython",
258 | "version": 3
259 | },
260 | "file_extension": ".py",
261 | "mimetype": "text/x-python",
262 | "name": "python",
263 | "nbconvert_exporter": "python",
264 | "pygments_lexer": "ipython3",
265 | "version": "3.10.6"
266 | }
267 | },
268 | "nbformat": 4,
269 | "nbformat_minor": 4
270 | }
271 |
--------------------------------------------------------------------------------
/notebooks/06_api.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Overview of the mapclassify API\n",
8 | "\n",
9 | "There are a number of ways to access the functionality in `mapclassify`"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "We first load the example dataset that we have seen earlier."
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 1,
22 | "metadata": {
23 | "ExecuteTime": {
24 | "end_time": "2022-11-05T15:10:19.167785Z",
25 | "start_time": "2022-11-05T15:10:14.404320Z"
26 | },
27 | "tags": []
28 | },
29 | "outputs": [],
30 | "source": [
31 | "import geopandas\n",
32 | "import libpysal\n",
33 | "import mapclassify"
34 | ]
35 | },
36 | {
37 | "cell_type": "markdown",
38 | "metadata": {},
39 | "source": [
40 | "Current `mapclassify` version."
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "execution_count": 2,
46 | "metadata": {
47 | "ExecuteTime": {
48 | "end_time": "2022-11-05T15:10:19.182165Z",
49 | "start_time": "2022-11-05T15:10:19.171353Z"
50 | },
51 | "tags": []
52 | },
53 | "outputs": [
54 | {
55 | "data": {
56 | "text/plain": [
57 | "'2.4.2+107.gb97c316a.dirty'"
58 | ]
59 | },
60 | "execution_count": 2,
61 | "metadata": {},
62 | "output_type": "execute_result"
63 | }
64 | ],
65 | "source": [
66 | "mapclassify.__version__"
67 | ]
68 | },
69 | {
70 | "cell_type": "code",
71 | "execution_count": 3,
72 | "metadata": {
73 | "ExecuteTime": {
74 | "end_time": "2022-11-05T15:10:19.586837Z",
75 | "start_time": "2022-11-05T15:10:19.187232Z"
76 | },
77 | "tags": []
78 | },
79 | "outputs": [
80 | {
81 | "data": {
82 | "text/html": [
83 | "\n",
84 | "\n",
97 | "
\n",
98 | " \n",
99 | " \n",
100 | " | \n",
101 | " AREA | \n",
102 | " PERIMETER | \n",
103 | " COLUMBUS_ | \n",
104 | " COLUMBUS_I | \n",
105 | " POLYID | \n",
106 | " NEIG | \n",
107 | " HOVAL | \n",
108 | " INC | \n",
109 | " CRIME | \n",
110 | " OPEN | \n",
111 | " ... | \n",
112 | " DISCBD | \n",
113 | " X | \n",
114 | " Y | \n",
115 | " NSA | \n",
116 | " NSB | \n",
117 | " EW | \n",
118 | " CP | \n",
119 | " THOUS | \n",
120 | " NEIGNO | \n",
121 | " geometry | \n",
122 | "
\n",
123 | " \n",
124 | " \n",
125 | " \n",
126 | " 0 | \n",
127 | " 0.309441 | \n",
128 | " 2.440629 | \n",
129 | " 2 | \n",
130 | " 5 | \n",
131 | " 1 | \n",
132 | " 5 | \n",
133 | " 80.467003 | \n",
134 | " 19.531 | \n",
135 | " 15.725980 | \n",
136 | " 2.850747 | \n",
137 | " ... | \n",
138 | " 5.03 | \n",
139 | " 38.799999 | \n",
140 | " 44.070000 | \n",
141 | " 1.0 | \n",
142 | " 1.0 | \n",
143 | " 1.0 | \n",
144 | " 0.0 | \n",
145 | " 1000.0 | \n",
146 | " 1005.0 | \n",
147 | " POLYGON ((8.62413 14.23698, 8.55970 14.74245, ... | \n",
148 | "
\n",
149 | " \n",
150 | " 1 | \n",
151 | " 0.259329 | \n",
152 | " 2.236939 | \n",
153 | " 3 | \n",
154 | " 1 | \n",
155 | " 2 | \n",
156 | " 1 | \n",
157 | " 44.567001 | \n",
158 | " 21.232 | \n",
159 | " 18.801754 | \n",
160 | " 5.296720 | \n",
161 | " ... | \n",
162 | " 4.27 | \n",
163 | " 35.619999 | \n",
164 | " 42.380001 | \n",
165 | " 1.0 | \n",
166 | " 1.0 | \n",
167 | " 0.0 | \n",
168 | " 0.0 | \n",
169 | " 1000.0 | \n",
170 | " 1001.0 | \n",
171 | " POLYGON ((8.25279 14.23694, 8.28276 14.22994, ... | \n",
172 | "
\n",
173 | " \n",
174 | " 2 | \n",
175 | " 0.192468 | \n",
176 | " 2.187547 | \n",
177 | " 4 | \n",
178 | " 6 | \n",
179 | " 3 | \n",
180 | " 6 | \n",
181 | " 26.350000 | \n",
182 | " 15.956 | \n",
183 | " 30.626781 | \n",
184 | " 4.534649 | \n",
185 | " ... | \n",
186 | " 3.89 | \n",
187 | " 39.820000 | \n",
188 | " 41.180000 | \n",
189 | " 1.0 | \n",
190 | " 1.0 | \n",
191 | " 1.0 | \n",
192 | " 0.0 | \n",
193 | " 1000.0 | \n",
194 | " 1006.0 | \n",
195 | " POLYGON ((8.65331 14.00809, 8.81814 14.00205, ... | \n",
196 | "
\n",
197 | " \n",
198 | " 3 | \n",
199 | " 0.083841 | \n",
200 | " 1.427635 | \n",
201 | " 5 | \n",
202 | " 2 | \n",
203 | " 4 | \n",
204 | " 2 | \n",
205 | " 33.200001 | \n",
206 | " 4.477 | \n",
207 | " 32.387760 | \n",
208 | " 0.394427 | \n",
209 | " ... | \n",
210 | " 3.70 | \n",
211 | " 36.500000 | \n",
212 | " 40.520000 | \n",
213 | " 1.0 | \n",
214 | " 1.0 | \n",
215 | " 0.0 | \n",
216 | " 0.0 | \n",
217 | " 1000.0 | \n",
218 | " 1002.0 | \n",
219 | " POLYGON ((8.45950 13.82035, 8.47341 13.83227, ... | \n",
220 | "
\n",
221 | " \n",
222 | " 4 | \n",
223 | " 0.488888 | \n",
224 | " 2.997133 | \n",
225 | " 6 | \n",
226 | " 7 | \n",
227 | " 5 | \n",
228 | " 7 | \n",
229 | " 23.225000 | \n",
230 | " 11.252 | \n",
231 | " 50.731510 | \n",
232 | " 0.405664 | \n",
233 | " ... | \n",
234 | " 2.83 | \n",
235 | " 40.009998 | \n",
236 | " 38.000000 | \n",
237 | " 1.0 | \n",
238 | " 1.0 | \n",
239 | " 1.0 | \n",
240 | " 0.0 | \n",
241 | " 1000.0 | \n",
242 | " 1007.0 | \n",
243 | " POLYGON ((8.68527 13.63952, 8.67758 13.72221, ... | \n",
244 | "
\n",
245 | " \n",
246 | "
\n",
247 | "
5 rows × 21 columns
\n",
248 | "
"
249 | ],
250 | "text/plain": [
251 | " AREA PERIMETER COLUMBUS_ COLUMBUS_I POLYID NEIG HOVAL \\\n",
252 | "0 0.309441 2.440629 2 5 1 5 80.467003 \n",
253 | "1 0.259329 2.236939 3 1 2 1 44.567001 \n",
254 | "2 0.192468 2.187547 4 6 3 6 26.350000 \n",
255 | "3 0.083841 1.427635 5 2 4 2 33.200001 \n",
256 | "4 0.488888 2.997133 6 7 5 7 23.225000 \n",
257 | "\n",
258 | " INC CRIME OPEN ... DISCBD X Y NSA NSB \\\n",
259 | "0 19.531 15.725980 2.850747 ... 5.03 38.799999 44.070000 1.0 1.0 \n",
260 | "1 21.232 18.801754 5.296720 ... 4.27 35.619999 42.380001 1.0 1.0 \n",
261 | "2 15.956 30.626781 4.534649 ... 3.89 39.820000 41.180000 1.0 1.0 \n",
262 | "3 4.477 32.387760 0.394427 ... 3.70 36.500000 40.520000 1.0 1.0 \n",
263 | "4 11.252 50.731510 0.405664 ... 2.83 40.009998 38.000000 1.0 1.0 \n",
264 | "\n",
265 | " EW CP THOUS NEIGNO geometry \n",
266 | "0 1.0 0.0 1000.0 1005.0 POLYGON ((8.62413 14.23698, 8.55970 14.74245, ... \n",
267 | "1 0.0 0.0 1000.0 1001.0 POLYGON ((8.25279 14.23694, 8.28276 14.22994, ... \n",
268 | "2 1.0 0.0 1000.0 1006.0 POLYGON ((8.65331 14.00809, 8.81814 14.00205, ... \n",
269 | "3 0.0 0.0 1000.0 1002.0 POLYGON ((8.45950 13.82035, 8.47341 13.83227, ... \n",
270 | "4 1.0 0.0 1000.0 1007.0 POLYGON ((8.68527 13.63952, 8.67758 13.72221, ... \n",
271 | "\n",
272 | "[5 rows x 21 columns]"
273 | ]
274 | },
275 | "execution_count": 3,
276 | "metadata": {},
277 | "output_type": "execute_result"
278 | }
279 | ],
280 | "source": [
281 | "pth = libpysal.examples.get_path(\"columbus.shp\")\n",
282 | "gdf = geopandas.read_file(pth)\n",
283 | "y = gdf.HOVAL\n",
284 | "gdf.head()"
285 | ]
286 | },
287 | {
288 | "cell_type": "markdown",
289 | "metadata": {},
290 | "source": [
291 | "## Original API (< 2.4.0)\n"
292 | ]
293 | },
294 | {
295 | "cell_type": "code",
296 | "execution_count": 4,
297 | "metadata": {
298 | "ExecuteTime": {
299 | "end_time": "2022-11-05T15:10:19.595711Z",
300 | "start_time": "2022-11-05T15:10:19.589037Z"
301 | },
302 | "tags": []
303 | },
304 | "outputs": [
305 | {
306 | "data": {
307 | "text/plain": [
308 | "BoxPlot\n",
309 | "\n",
310 | " Interval Count\n",
311 | "----------------------\n",
312 | "( -inf, -0.70] | 0\n",
313 | "(-0.70, 25.70] | 13\n",
314 | "(25.70, 33.50] | 12\n",
315 | "(33.50, 43.30] | 12\n",
316 | "(43.30, 69.70] | 7\n",
317 | "(69.70, 96.40] | 5"
318 | ]
319 | },
320 | "execution_count": 4,
321 | "metadata": {},
322 | "output_type": "execute_result"
323 | }
324 | ],
325 | "source": [
326 | "bp = mapclassify.BoxPlot(y)\n",
327 | "bp"
328 | ]
329 | },
330 | {
331 | "cell_type": "markdown",
332 | "metadata": {},
333 | "source": [
334 | "## Extended API (>= 2.40)\n",
335 | "\n",
336 | "Note the original API is still available so this extension keeps backwards compatibility."
337 | ]
338 | },
339 | {
340 | "cell_type": "code",
341 | "execution_count": 5,
342 | "metadata": {
343 | "ExecuteTime": {
344 | "end_time": "2022-11-05T15:10:19.603460Z",
345 | "start_time": "2022-11-05T15:10:19.598526Z"
346 | },
347 | "tags": []
348 | },
349 | "outputs": [
350 | {
351 | "data": {
352 | "text/plain": [
353 | "BoxPlot\n",
354 | "\n",
355 | " Interval Count\n",
356 | "----------------------\n",
357 | "( -inf, -0.70] | 0\n",
358 | "(-0.70, 25.70] | 13\n",
359 | "(25.70, 33.50] | 12\n",
360 | "(33.50, 43.30] | 12\n",
361 | "(43.30, 69.70] | 7\n",
362 | "(69.70, 96.40] | 5"
363 | ]
364 | },
365 | "execution_count": 5,
366 | "metadata": {},
367 | "output_type": "execute_result"
368 | }
369 | ],
370 | "source": [
371 | "bp = mapclassify.classify(y, \"box_plot\")\n",
372 | "bp"
373 | ]
374 | },
375 | {
376 | "cell_type": "code",
377 | "execution_count": 6,
378 | "metadata": {
379 | "ExecuteTime": {
380 | "end_time": "2022-11-05T15:10:19.611996Z",
381 | "start_time": "2022-11-05T15:10:19.608075Z"
382 | },
383 | "tags": []
384 | },
385 | "outputs": [
386 | {
387 | "data": {
388 | "text/plain": [
389 | "mapclassify.classifiers.BoxPlot"
390 | ]
391 | },
392 | "execution_count": 6,
393 | "metadata": {},
394 | "output_type": "execute_result"
395 | }
396 | ],
397 | "source": [
398 | "type(bp)"
399 | ]
400 | },
401 | {
402 | "cell_type": "code",
403 | "execution_count": 7,
404 | "metadata": {
405 | "ExecuteTime": {
406 | "end_time": "2022-11-05T15:10:19.619168Z",
407 | "start_time": "2022-11-05T15:10:19.614412Z"
408 | },
409 | "tags": []
410 | },
411 | "outputs": [
412 | {
413 | "data": {
414 | "text/plain": [
415 | "Quantiles\n",
416 | "\n",
417 | " Interval Count\n",
418 | "----------------------\n",
419 | "[17.90, 23.08] | 10\n",
420 | "(23.08, 30.48] | 10\n",
421 | "(30.48, 39.10] | 9\n",
422 | "(39.10, 45.83] | 10\n",
423 | "(45.83, 96.40] | 10"
424 | ]
425 | },
426 | "execution_count": 7,
427 | "metadata": {},
428 | "output_type": "execute_result"
429 | }
430 | ],
431 | "source": [
432 | "q5 = mapclassify.classify(y, \"quantiles\", k=5)\n",
433 | "q5"
434 | ]
435 | },
436 | {
437 | "cell_type": "markdown",
438 | "metadata": {},
439 | "source": [
440 | "### Robustness of the `scheme` argument"
441 | ]
442 | },
443 | {
444 | "cell_type": "code",
445 | "execution_count": 8,
446 | "metadata": {
447 | "ExecuteTime": {
448 | "end_time": "2022-11-05T15:10:19.627988Z",
449 | "start_time": "2022-11-05T15:10:19.621853Z"
450 | },
451 | "tags": []
452 | },
453 | "outputs": [
454 | {
455 | "data": {
456 | "text/plain": [
457 | "BoxPlot\n",
458 | "\n",
459 | " Interval Count\n",
460 | "----------------------\n",
461 | "( -inf, -0.70] | 0\n",
462 | "(-0.70, 25.70] | 13\n",
463 | "(25.70, 33.50] | 12\n",
464 | "(33.50, 43.30] | 12\n",
465 | "(43.30, 69.70] | 7\n",
466 | "(69.70, 96.40] | 5"
467 | ]
468 | },
469 | "execution_count": 8,
470 | "metadata": {},
471 | "output_type": "execute_result"
472 | }
473 | ],
474 | "source": [
475 | "mapclassify.classify(y, \"boxPlot\")"
476 | ]
477 | },
478 | {
479 | "cell_type": "code",
480 | "execution_count": 9,
481 | "metadata": {
482 | "ExecuteTime": {
483 | "end_time": "2022-11-05T15:10:19.634396Z",
484 | "start_time": "2022-11-05T15:10:19.629847Z"
485 | },
486 | "tags": []
487 | },
488 | "outputs": [
489 | {
490 | "data": {
491 | "text/plain": [
492 | "BoxPlot\n",
493 | "\n",
494 | " Interval Count\n",
495 | "----------------------\n",
496 | "( -inf, -0.70] | 0\n",
497 | "(-0.70, 25.70] | 13\n",
498 | "(25.70, 33.50] | 12\n",
499 | "(33.50, 43.30] | 12\n",
500 | "(43.30, 69.70] | 7\n",
501 | "(69.70, 96.40] | 5"
502 | ]
503 | },
504 | "execution_count": 9,
505 | "metadata": {},
506 | "output_type": "execute_result"
507 | }
508 | ],
509 | "source": [
510 | "mapclassify.classify(y, \"Boxplot\")"
511 | ]
512 | },
513 | {
514 | "cell_type": "code",
515 | "execution_count": 10,
516 | "metadata": {
517 | "ExecuteTime": {
518 | "end_time": "2022-11-05T15:10:19.641115Z",
519 | "start_time": "2022-11-05T15:10:19.636017Z"
520 | },
521 | "tags": []
522 | },
523 | "outputs": [
524 | {
525 | "data": {
526 | "text/plain": [
527 | "BoxPlot\n",
528 | "\n",
529 | " Interval Count\n",
530 | "----------------------\n",
531 | "( -inf, -0.70] | 0\n",
532 | "(-0.70, 25.70] | 13\n",
533 | "(25.70, 33.50] | 12\n",
534 | "(33.50, 43.30] | 12\n",
535 | "(43.30, 69.70] | 7\n",
536 | "(69.70, 96.40] | 5"
537 | ]
538 | },
539 | "execution_count": 10,
540 | "metadata": {},
541 | "output_type": "execute_result"
542 | }
543 | ],
544 | "source": [
545 | "mapclassify.classify(y, \"Box_plot\")"
546 | ]
547 | },
548 | {
549 | "cell_type": "code",
550 | "execution_count": 13,
551 | "metadata": {
552 | "ExecuteTime": {
553 | "end_time": "2022-11-05T15:10:19.691302Z",
554 | "start_time": "2022-11-05T15:10:19.645124Z"
555 | },
556 | "tags": []
557 | },
558 | "outputs": [
559 | {
560 | "data": {
561 | "text/plain": [
562 | "StdMean\n",
563 | "\n",
564 | " Interval Count\n",
565 | "----------------------\n",
566 | "( -inf, 1.50] | 0\n",
567 | "( 1.50, 19.97] | 5\n",
568 | "(19.97, 56.90] | 37\n",
569 | "(56.90, 75.37] | 3\n",
570 | "(75.37, 96.40] | 4"
571 | ]
572 | },
573 | "execution_count": 13,
574 | "metadata": {},
575 | "output_type": "execute_result"
576 | }
577 | ],
578 | "source": [
579 | "mapclassify.classify(y, 'Std_Mean')"
580 | ]
581 | },
582 | {
583 | "cell_type": "code",
584 | "execution_count": 15,
585 | "metadata": {
586 | "ExecuteTime": {
587 | "end_time": "2022-10-26T03:01:45.977181Z",
588 | "start_time": "2022-10-26T03:01:45.931234Z"
589 | },
590 | "tags": []
591 | },
592 | "outputs": [
593 | {
594 | "data": {
595 | "text/plain": [
596 | "StdMean\n",
597 | "\n",
598 | " Interval Count\n",
599 | "----------------------\n",
600 | "[17.90, 19.97] | 5\n",
601 | "(19.97, 38.44] | 24\n",
602 | "(38.44, 56.90] | 13\n",
603 | "(56.90, 75.37] | 3\n",
604 | "(75.37, 93.83] | 3\n",
605 | "(93.83, 96.40] | 1"
606 | ]
607 | },
608 | "execution_count": 15,
609 | "metadata": {},
610 | "output_type": "execute_result"
611 | }
612 | ],
613 | "source": [
614 | "mapclassify.classify(y, 'Std_Mean', anchor=True)"
615 | ]
616 | },
617 | {
618 | "cell_type": "code",
619 | "execution_count": 16,
620 | "metadata": {
621 | "tags": []
622 | },
623 | "outputs": [
624 | {
625 | "data": {
626 | "text/plain": [
627 | "(38.43622446938775, 18.466069465206047, 17.9, 96.400002)"
628 | ]
629 | },
630 | "execution_count": 16,
631 | "metadata": {},
632 | "output_type": "execute_result"
633 | }
634 | ],
635 | "source": [
636 | "y.mean(), y.std(), y.min(), y.max()"
637 | ]
638 | },
639 | {
640 | "cell_type": "code",
641 | "execution_count": null,
642 | "metadata": {},
643 | "outputs": [],
644 | "source": []
645 | }
646 | ],
647 | "metadata": {
648 | "kernelspec": {
649 | "display_name": "Python 3 (ipykernel)",
650 | "language": "python",
651 | "name": "python3"
652 | },
653 | "language_info": {
654 | "codemirror_mode": {
655 | "name": "ipython",
656 | "version": 3
657 | },
658 | "file_extension": ".py",
659 | "mimetype": "text/x-python",
660 | "name": "python",
661 | "nbconvert_exporter": "python",
662 | "pygments_lexer": "ipython3",
663 | "version": "3.10.10"
664 | }
665 | },
666 | "nbformat": 4,
667 | "nbformat_minor": 4
668 | }
669 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=61.0", "setuptools_scm[toml]>=6.2"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [tool.setuptools_scm]
6 |
7 | [project]
8 | name = "mapclassify"
9 | dynamic = ["version"]
10 | maintainers = [
11 | { name = "Serge Rey", email = "sjsrey@gmail.com" },
12 | { name = "Wei Kang", email = "weikang9009@gmail.com" },
13 | ]
14 | license = { text = "BSD 3-Clause" }
15 | description = "Classification Schemes for Choropleth Maps."
16 | keywords = ["spatial statistics", "geovisualization"]
17 | readme = { text = """\
18 | `mapclassify` implements a family of classification schemes for choropleth maps.
19 | Its focus is on the determination of the number of classes, and the assignment
20 | of observations to those classes. It is intended for use with upstream mapping
21 | and geovisualization packages (see `geopandas`_ and `geoplot`_)
22 | that handle the rendering of the maps.
23 |
24 | For further theoretical background see "`Choropleth Mapping`_" in Rey, S.J., D. Arribas-Bel, and L.J. Wolf (2020) "Geographic Data Science with PySAL and the PyData Stack”.
25 |
26 | .. _geopandas: https://geopandas.org/mapping.html
27 | .. _geoplot: https://residentmario.github.io/geoplot/user_guide/Customizing_Plots.html
28 | .. _Choropleth Mapping: https://geographicdata.science/book/notebooks/05_choropleth.html
29 | """, content-type = "text/x-rst" }
30 | classifiers = [
31 | "Programming Language :: Python :: 3",
32 | "License :: OSI Approved :: BSD License",
33 | "Operating System :: OS Independent",
34 | "Intended Audience :: Science/Research",
35 | "Topic :: Scientific/Engineering :: GIS",
36 | ]
37 | requires-python = ">=3.11"
38 | dependencies = [
39 | "networkx>=3.2",
40 | "numpy>=1.26",
41 | "pandas>=2.1",
42 | "scikit-learn>=1.4",
43 | "scipy>=1.12",
44 | ]
45 |
46 | [project.urls]
47 | Home = "https://pysal.org/mapclassify/"
48 | Repository = "https://github.com/pysal/mapclassify"
49 |
50 | [project.optional-dependencies]
51 | speedups = [
52 | "numba>=0.58"
53 | ]
54 | dev = [
55 | "ruff",
56 | "pre-commit",
57 | "watermark",
58 | ]
59 | docs = [
60 | "nbsphinx",
61 | "numpydoc",
62 | "sphinx>=1.4.3",
63 | "sphinx-gallery",
64 | "sphinxcontrib-bibtex",
65 | "sphinx_bootstrap_theme",
66 | ]
67 | spatial = [
68 | "geopandas",
69 | "libpysal",
70 | "matplotlib",
71 | "shapely",
72 | ]
73 | notebooks = [
74 | "mapclassify[spatial]",
75 | "geodatasets",
76 | "ipywidgets",
77 | "jupyterlab",
78 | "lonboard",
79 | "pyarrow",
80 | "pydeck",
81 | "seaborn",
82 | ]
83 | tests = [
84 | "mapclassify[spatial]",
85 | "pytest",
86 | "pytest-cov",
87 | "pytest-xdist",
88 | "pytest-doctestplus",
89 | "pytest-mpl"
90 | ]
91 | all = ["mapclassify[speedups,dev,docs,notebooks,tests]"]
92 |
93 | [tool.setuptools.packages.find]
94 | include = ["mapclassify", "mapclassify.*"]
95 |
96 |
97 | [tool.ruff]
98 | line-length = 88
99 | lint.select = ["E", "F", "W", "I", "UP", "N", "B", "A", "C4", "SIM", "ARG"]
100 | lint.ignore = [
101 | "B006",
102 | "B008",
103 | "B009",
104 | "B010",
105 | "C408",
106 | "E731",
107 | "F401",
108 | "F403",
109 | "F405",
110 | "N803",
111 | "N806",
112 | "N999",
113 | "UP007"
114 | ]
115 | extend-include = [
116 | "docs/conf.py"
117 | ]
118 |
119 | [tool.ruff.lint.per-file-ignores]
120 | "*tests/test_*.py" = [
121 | "A004", # Import is shadowing a Python builtin
122 | "N802", # Function name should be lowercase
123 | ]
124 | "docs/conf.py" = [
125 | "A001", # Variable is shadowing a Python builtin
126 | ]
127 |
128 | [tool.coverage.run]
129 | source = ["./mapclassify"]
130 |
131 | [tool.coverage.report]
132 | exclude_lines = [
133 | "if self.debug:",
134 | "pragma: no cover",
135 | "raise NotImplementedError",
136 | "except ModuleNotFoundError:",
137 | "except ImportError",
138 | ]
139 | ignore_errors = true
140 | omit = ["mapclassify/tests/*", "docs/conf.py"]
141 |
--------------------------------------------------------------------------------