element.
140 | # For black navbar, do "navbar navbar-inverse"
141 | # 'navbar_class': "navbar navbar-inverse",
142 | # Fix navigation bar to top of page?
143 | # Values: "true" (default) or "false"
144 | "navbar_fixed_top": "true",
145 | # Location of link to source.
146 | # Options are "nav" (default), "footer" or anything else to exclude.
147 | "source_link_position": "footer",
148 | # Bootswatch (http://bootswatch.com/) theme.
149 | #
150 | # Options are nothing (default) or the name of a valid theme
151 | # such as "amelia" or "cosmo", "yeti", "flatly".
152 | "bootswatch_theme": "yeti",
153 | # Choose Bootstrap version.
154 | # Values: "3" (default) or "2" (in quotes)
155 | "bootstrap_version": "3",
156 | # Navigation bar menu
157 | "navbar_links": [
158 | ("Installation", "installation"),
159 | ("User Guide", "user-guide/intro"),
160 | ("API", "api"),
161 | ("References", "references"),
162 | ],
163 | }
164 |
165 | # Add any paths that contain custom static files (such as style sheets) here,
166 | # relative to this directory. They are copied after the builtin static files,
167 | # so a file named "default.css" will overwrite the builtin "default.css".
168 | html_static_path = ["_static"]
169 |
170 | # Custom sidebar templates, maps document names to template names.
171 | # html_sidebars = {}
172 | # html_sidebars = {'sidebar': ['localtoc.html', 'sourcelink.html', 'searchbox.html']}
173 |
174 | # -- Options for HTMLHelp output ------------------------------------------
175 |
176 | # Output file base name for HTML help builder.
177 | htmlhelp_basename = project + "doc"
178 |
179 |
180 | # -- Options for LaTeX output ---------------------------------------------
181 |
182 | latex_elements = {
183 | # The paper size ('letterpaper' or 'a4paper').
184 | #
185 | # 'papersize': 'letterpaper',
186 | # The font size ('10pt', '11pt' or '12pt').
187 | #
188 | # 'pointsize': '10pt',
189 | # Additional stuff for the LaTeX preamble.
190 | #
191 | # 'preamble': '',
192 | # Latex figure (float) alignment
193 | #
194 | # 'figure_align': 'htbp',
195 | }
196 |
197 | # Grouping the document tree into LaTeX files. List of tuples
198 | # (source start file, target name, title,
199 | # author, documentclass [howto, manual, or own class]).
200 | latex_documents = [
201 | (
202 | master_doc,
203 | f"{project}.tex",
204 | f"{project} Documentation",
205 | "pysal developers",
206 | "manual",
207 | ),
208 | ]
209 |
210 |
211 | # -- Options for manual page output ---------------------------------------
212 |
213 | # One entry per manual page. List of tuples
214 | # (source start file, name, description, authors, manual section).
215 | man_pages = [(master_doc, project, f"{project} Documentation", [author], 1)]
216 |
217 |
218 | # -- Options for Texinfo output -------------------------------------------
219 |
220 | # Grouping the document tree into Texinfo files. List of tuples
221 | # (source start file, target name, title, author,
222 | # dir menu entry, description, category)
223 | texinfo_documents = [
224 | (
225 | master_doc,
226 | project,
227 | f"{project} Documentation",
228 | author,
229 | project,
230 | "Measures of spatial (and non-spatial) inequality",
231 | "Miscellaneous",
232 | ),
233 | ]
234 |
235 |
236 | # Generate the API documentation when building
237 | autosummary_generate = True
238 |
239 | # avoid showing members twice
240 | numpydoc_show_class_members = False
241 | numpydoc_use_plots = True
242 | class_members_toctree = True
243 | numpydoc_show_inherited_class_members = True
244 | numpydoc_xref_param_type = True
245 |
246 | # automatically document class members
247 | autodoc_default_options = {"members": True, "undoc-members": True}
248 |
249 | # display the source code for Plot directive
250 | plot_include_source = True
251 |
252 |
253 | def setup(app):
254 | app.add_css_file("pysal-styles.css")
255 |
256 |
257 | # Example configuration for intersphinx: refer to the Python standard library.
258 | intersphinx_mapping = {
259 | "libpysal": ("https://pysal.org/libpysal/", None),
260 | "numpy": ("https://numpy.org/doc/stable/", None),
261 | "python": ("https://docs.python.org/3.12/", None),
262 | "scipy": ("https://docs.scipy.org/doc/scipy/", None),
263 | }
264 |
265 |
266 | # This is processed by Jinja2 and inserted before each notebook
267 | nbsphinx_prolog = r"""
268 | {% set docname = env.doc2path(env.docname, base=None) %}
269 |
270 | .. only:: html
271 |
272 | .. role:: raw-html(raw)
273 | :format: html
274 |
275 | .. nbinfo::
276 |
277 | This page was generated from `{{ docname }}`__.
278 | Interactive online version:
279 | :raw-html:`

`
280 |
281 | __ https://github.com/pysal/inequality/blob/main/{{ docname }}
282 |
283 | .. raw:: latex
284 |
285 | \nbsphinxstartnotebook{\scriptsize\noindent\strut
286 | \textcolor{gray}{The following section was generated from
287 | \sphinxcode{\sphinxupquote{\strut {{ docname | escape_latex }}}} \dotfill}}
288 | """
289 |
290 | # This is processed by Jinja2 and inserted after each notebook
291 | nbsphinx_epilog = r"""
292 | .. raw:: latex
293 |
294 | \nbsphinxstopnotebook{\scriptsize\noindent\strut
295 | \textcolor{gray}{\dotfill\ \sphinxcode{\sphinxupquote{\strut
296 | {{ env.doc2path(env.docname, base='doc') | escape_latex }}}} ends here.}}
297 | """
298 |
299 | # List of arguments to be passed to the kernel that executes the notebooks:
300 | nbsphinx_execute_arguments = [
301 | "--InlineBackend.figure_formats={'svg', 'pdf'}",
302 | "--InlineBackend.rc={'figure.dpi': 96}",
303 | ]
304 |
305 |
306 | mathjax3_config = {
307 | "TeX": {"equationNumbers": {"autoNumber": "AMS", "useLabelIds": True}},
308 | }
309 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. documentation master file
2 |
3 | ************
4 | Inequality
5 | ************
6 |
7 | **inequality** implements measures for the analysis of inequality over space and time and is part of the `PySAL family
`_
8 |
9 | Details are available in the `inequality api `_.
10 |
11 | An Example: Spatial Inequality in Mexico: 1940-2000
12 | ============================================================
13 |
14 | .. raw:: html
15 |
16 |
17 |
18 |
19 |
20 |
28 |
36 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 | Development
57 | ===========
58 |
59 | The package is currently maintained by `@sjsrey`_
60 |
61 |
62 |
63 | https://github.com/pysal/inequality
64 |
65 | Getting Involved
66 | ======================
67 |
68 | If you are interested in contributing to PySAL please see our
69 | `development guidelines `_.
70 |
71 |
72 | Bug reports
73 | ===============
74 |
75 | To search for or report bugs, please see inequality's issues_.
76 |
77 | .. _issues : http://github.com/pysal/inequality/issues
78 |
79 |
80 | Citing inequality
81 | ==================
82 |
83 | If you use PySAL-inequality in a scientific publication, we would appreciate citations to:
84 |
85 |
86 | Sergio Rey, James Gaboardi, Wei Kang, Philip Stephens, Renan Xavier Cortes, Dani Arribas-Bel, Levi John Wolf, & Martin Fleischmann. (2023). pysal/inequality: v1.0.1 (v1.0.1). Zenodo. https://doi.org/10.5281/zenodo.10050549
87 |
88 | Bibtex entry::
89 |
90 | @software{inequality-devs2023,
91 | author = {Sergio Rey and
92 | James Gaboardi and
93 | Wei Kang and
94 | Philip Stephens and
95 | Renan Xavier Cortes and
96 | Dani Arribas-Bel and
97 | Levi John Wolf and
98 | Martin Fleischmann},
99 | title = {pysal/inequality: v1.0.1},
100 | month = oct,
101 | year = 2023,
102 | publisher = {Zenodo},
103 | version = {v1.0.1},
104 | doi = {10.5281/zenodo.10050549},
105 | url = {https://doi.org/10.5281/zenodo.10050549},
106 | }
107 |
108 |
109 |
110 |
111 |
112 | License information
113 | ========================
114 |
115 | See the file "LICENSE.txt" for information on the history of this
116 | software, terms & conditions for usage, and a DISCLAIMER OF ALL
117 | WARRANTIES.
118 |
119 |
120 | inequality
121 | ==========
122 |
123 | Documentation contents
124 | ----------------------
125 |
126 | .. toctree::
127 | :maxdepth: 1
128 |
129 | Home
130 | Installation
131 | API reference
132 | user-guide/intro
133 | References
134 |
135 | .. _PySAL: https://github.com/pysal/pysal
136 | .. _PySAL (Python Spatial Analysis Library): http://pysal.org
137 | .. _GeoPandas: http://geopandas.org
138 | .. _PySAL: http://pysal.org
139 | .. _@sjsrey: http://github.org/sjsrey
140 | .. _issue: https://github.com/pysal/inequality/issues/new/choose
141 | .. _discussion: https://github.com/pysal/inequality/discussions
142 | .. _Discord: https://discord.com/channels/1192517762103398401/1192517763986632766
143 |
--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
1 | .. Installation
2 |
3 | Installation
4 | ============
5 |
6 | inequality supports Python `3.10`_, `3.11`_, and `3.12`_. Please make sure that you are
7 | operating in a Python >= 3.10 environment.
8 |
9 | conda
10 | +++++
11 |
12 | inequality is available through conda::
13 |
14 | conda install -c conda-forge inequality
15 |
16 | pypi
17 | ++++
18 |
19 | inequality is available on the `Python Package Index`_. Therefore, you can either
20 | install directly with `pip` from the command line::
21 |
22 | pip install -U inequality
23 |
24 | or download the source distribution (.tar.gz) and decompress it to your selected
25 | destination. Open a command shell and navigate to the decompressed folder.
26 | Type::
27 |
28 | pip install .
29 |
30 | Installing development version
31 | ------------------------------
32 |
33 | Potentially, you might want to use the newest features in the development
34 | version of inequality on github - `pysal/inequality`_ while have not been incorporated
35 | in the Pypi released version. You can achieve that by installing `pysal/inequality`_
36 | by running the following from a command shell::
37 |
38 | pip install git+https://github.com/pysal/inequality.git
39 |
40 | You can also `fork`_ the `pysal/inequality`_ repo and create a local clone of
41 | your fork. By making changes
42 | to your local clone and submitting a pull request to `pysal/inequality`_, you can
43 | contribute to inequality development.
44 |
45 |
46 | .. _3.10: https://docs.python.org/3.10/
47 | .. _3.11: https://docs.python.org/3.11/
48 | .. _3.12: https://docs.python.org/3.12/
49 | .. _Python Package Index: https://pypi.org/project/inequality/
50 | .. _pysal/inequality: https://github.com/pysal/inequality
51 | .. _fork: https://help.github.com/articles/fork-a-repo/
52 |
--------------------------------------------------------------------------------
/docs/references.rst:
--------------------------------------------------------------------------------
1 | .. reference for the docs
2 |
3 | References
4 | ==========
5 |
6 | .. bibliography:: _static/references.bib
7 | :cited:
8 |
--------------------------------------------------------------------------------
/docs/user-guide/intro.rst:
--------------------------------------------------------------------------------
1 | ==========
2 | User Guide
3 | ==========
4 |
5 | This user guide covers essential features of pysal-inequality, mostly in the form of interactive Jupyter notebooks. Reading this guide, you will learn:
6 |
7 | - how to :ref:`visualize ` spatial inequality
8 | - how to :ref:`measure ` spatial inequality
9 |
10 | Notebooks cover just a small selection of functions as an illustration of
11 | principles. For a full overview of pysal-inequality capabilities, head to the `API <../api.rst>`_.
12 |
13 |
--------------------------------------------------------------------------------
/docs/user-guide/measure/intro.rst:
--------------------------------------------------------------------------------
1 | .. _measure:
2 |
3 | ============================
4 | Measuring Spatial Inequality
5 | ============================
6 |
7 | .. toctree::
8 | :maxdepth: 1
9 |
10 |
11 | Gini Index
12 | Theil Index
13 | Wolfson Index
14 |
15 |
--------------------------------------------------------------------------------
/docs/user-guide/viz/intro.rst:
--------------------------------------------------------------------------------
1 | .. _viz:
2 |
3 | ==============================
4 | Visualizing Spatial Inequality
5 | ==============================
6 |
7 | .. toctree::
8 | :maxdepth: 1
9 |
10 | Lorenz Curves and Schutz Line
11 | Pen's Parade and Pengrams
12 |
--------------------------------------------------------------------------------
/docs/user-guide/viz/weighted.cpg:
--------------------------------------------------------------------------------
1 | UTF-8
--------------------------------------------------------------------------------
/docs/user-guide/viz/weighted.dbf:
--------------------------------------------------------------------------------
1 | | a, POLY_ID N AREA N CODE C P NAME C P PERIMETER N ACRES N HECTARES N PCGDP1940 N PCGDP1950 N PCGDP1960 N PCGDP1970 N PCGDP1980 N PCGDP1990 N PCGDP2000 N HANSON03 N HANSON98 N ESQUIVEL99 N INEGI N INEGI2 N MAXP N GR4000 N GR5000 N GR6000 N GR7000 N GR8000 N GR9000 N LPCGDP40 N LPCGDP50 N LPCGDP60 N LPCGDP70 N LPCGDP80 N LPCGDP90 N LPCGDP00 N TEST N Name_1 C P Population C P NAMEp C P populati_1 N Y2000 N y2000_1 N p N State C P
55467029851.1000003814697MX01 Aguascalientes 313895.530000000027940 1350927.093000000109896 546702.984999999986030 10384.000000000000000 6234.000000000000000 8714.000000000000000 16078.000000000000000 21022.000000000000000 20787.000000000000000 27782.000000000000000 2.000000000000000 2.000000000000000 3.000000000000000 4.000000000000000 4.000000000000000 4.000000000000000 0.430000000000000 0.650000000000000 0.500000000000000 0.240000000000000 0.120000000000000 0.130000000000000 4.020000000000000 3.790000000000000 3.940000000000000 4.210000000000000 4.320000000000000 4.320000000000000 4.440000000000000 5.000000000000000Aguascalientes 944285 Aguascalientes 94428526234125870.000000000000 27782.000000000000000 0.009647155924627Aguascalientes 172527513755.000000000000MX02 Baja California Norte 2040312.38500000000931317921867.261999998241663 7252751.376000000163913 22361.000000000000000 20977.000000000000000 17865.000000000000000 25321.000000000000000 29283.000000000000000 26839.000000000000000 29855.000000000000000 1.000000000000000 1.000000000000000 5.000000000000000 1.000000000000000 1.000000000000000 2.000000000000000 0.130000000000000 0.150000000000000 0.220000000000000 0.070000000000000 0.010000000000000 0.050000000000000 4.350000000000000 4.320000000000000 4.250000000000000 4.400000000000000 4.470000000000000 4.430000000000000 4.480000000000000 1.000000000000000Querétaro de Arteaga 1404306 Baja California Norte 140430641925555630.000000000000 29855.000000000000000 0.014346896273783Baja California Norte 272259877689.000000000000MX03 Baja California Sur 2912880.77199999988079117855733.214000001549721 7225987.769000000320375 9573.000000000000000 16013.000000000000000 16707.000000000000000 24384.000000000000000 29038.000000000000000 25842.000000000000000 26103.000000000000000 2.000000000000000 2.000000000000000 6.000000000000000 1.000000000000000 1.000000000000000 2.000000000000000 0.440000000000000 0.210000000000000 0.190000000000000 0.030000000000000 -0.050000000000000 0.000000000000000 3.980000000000000 4.200000000000000 4.220000000000000 4.390000000000000 4.460000000000000 4.410000000000000 4.420000000000000 2.000000000000000Baja California Sur 424041 Baja California Sur 42404111068742223.000000000000 26103.000000000000000 0.004332155700276Baja California Sur 1550165837229.000000000000MX04 Campeche 1575361.14599999994970912396198.757999999448657 5016583.723000000230968 3758.000000000000000 4929.000000000000000 5925.000000000000000 10274.000000000000000 12166.000000000000000 51123.000000000000000 36163.000000000000000 6.000000000000000 5.000000000000000 4.000000000000000 5.000000000000000 5.000000000000000 1.000000000000000 0.980000000000000 0.870000000000000 0.790000000000000 0.550000000000000 0.470000000000000 -0.150000000000000 3.570000000000000 3.690000000000000 3.770000000000000 4.010000000000000 4.090000000000000 4.710000000000000 4.560000000000000 15.000000000000000Campeche 690689 Campeche 69068924977386307.000000000000 36163.000000000000000 0.007056327780728Campeche 2273391573763.000000000000MX05 Chiapas 1477195.19900000002235218135380.285999998450279 7339157.376000000163913 2934.000000000000000 4138.000000000000000 5280.000000000000000 7015.000000000000000 16200.000000000000000 8637.000000000000000 8684.000000000000000 5.000000000000000 5.000000000000000 7.000000000000000 5.000000000000000 5.000000000000000 3.000000000000000 0.470000000000000 0.320000000000000 0.220000000000000 0.090000000000000 -0.270000000000000 0.000000000000000 3.470000000000000 3.620000000000000 3.720000000000000 3.850000000000000 4.210000000000000 3.940000000000000 3.940000000000000 22.000000000000000Chiapas 3920892 Chiapas 392089234049026128.000000000000 8684.000000000000000 0.040057245945474Chiapas 24248054569980.00000000000MX06 Chihuahua 2393736.22800000011920961295373.94500000029802324805456.997999999672174 8578.000000000000000 13997.000000000000000 16265.000000000000000 19178.000000000000000 23399.000000000000000 25332.000000000000000 30735.000000000000000 1.000000000000000 1.000000000000000 5.000000000000000 1.000000000000000 2.000000000000000 2.000000000000000 0.550000000000000 0.340000000000000 0.280000000000000 0.200000000000000 0.120000000000000 0.080000000000000 3.930000000000000 4.150000000000000 4.210000000000000 4.280000000000000 4.370000000000000 4.400000000000000 4.490000000000000 24.000000000000000Chihuahua 3052907 Chihuahua 305290793831096645.000000000000 30735.000000000000000 0.031189598322948Chihuahua 25150192356644.00000000000MX07 Coahuila De Zaragoza 2107437.83499999996274737113191.12099999934434915019235.664000000804663 8537.000000000000000 9673.000000000000000 12318.000000000000000 20562.000000000000000 25688.000000000000000 26084.000000000000000 28460.000000000000000 1.000000000000000 1.000000000000000 5.000000000000000 2.000000000000000 2.000000000000000 4.000000000000000 0.520000000000000 0.470000000000000 0.360000000000000 0.140000000000000 0.040000000000000 0.040000000000000 3.930000000000000 3.990000000000000 4.090000000000000 4.310000000000000 4.410000000000000 4.420000000000000 4.450000000000000 25.000000000000000Coahuila de Zaragoza 2298070 Coahuila De Zaragoza 229807065403072200.000000000000 28460.000000000000000 0.023477911452270Coahuila De Zaragoza 125726784868.3000001907348MX08 Colima 354755.534999999974389 1415113.699000000022352 572678.486999999964610 6909.000000000000000 6049.000000000000000 6036.000000000000000 12551.000000000000000 17427.000000000000000 18313.000000000000000 21358.000000000000000 3.000000000000000 3.000000000000000 6.000000000000000 4.000000000000000 4.000000000000000 4.000000000000000 0.490000000000000 0.550000000000000 0.550000000000000 0.230000000000000 0.090000000000000 0.070000000000000 3.840000000000000 3.780000000000000 3.780000000000000 4.100000000000000 4.240000000000000 4.260000000000000 4.330000000000000 12.000000000000000Colima 542627 Colima 54262711589427466.000000000000 21358.000000000000000 0.005543673020235Colima 111326256296.9000000953674MX09 Distrito Federal 149985.706999999994878 327723.756999999983236 132625.630000000004657 17816.000000000000000 17119.000000000000000 23174.000000000000000 32386.000000000000000 42028.000000000000000 43810.000000000000000 54349.000000000000000 4.000000000000000 4.000000000000000 1.000000000000000 3.000000000000000 3.000000000000000 5.000000000000000 0.480000000000000 0.500000000000000 0.370000000000000 0.220000000000000 0.110000000000000 0.090000000000000 4.250000000000000 4.230000000000000 4.370000000000000 4.510000000000000 4.620000000000000 4.640000000000000 4.740000000000000 11.000000000000000Michoacán 4384471 Distrito Federal 4384471238291614379.00000000000 54349.000000000000000 0.044793336105100Distrito Federal 27120343805563.00000000000MX10 Durango 1866079.59499999997206029737483.02499999850988412034380.555999999865890 12132.000000000000000 8859.000000000000000 9323.000000000000000 12700.000000000000000 16726.000000000000000 17353.000000000000000 17379.000000000000000 2.000000000000000 2.000000000000000 3.000000000000000 1.000000000000000 2.000000000000000 4.000000000000000 0.160000000000000 0.290000000000000 0.270000000000000 0.140000000000000 0.020000000000000 0.000000000000000 4.080000000000000 3.950000000000000 3.970000000000000 4.100000000000000 4.220000000000000 4.240000000000000 4.240000000000000 27.000000000000000Durango 1448661 Durango 144866125176279519.000000000000 17379.000000000000000 0.014800042941406Durango 630344905851.000000000000MX11 Guanajuato 918758.241000000038184 7498359.541000000201166 3034490.584999999962747 4359.000000000000000 5686.000000000000000 8209.000000000000000 11635.000000000000000 13864.000000000000000 13607.000000000000000 15585.000000000000000 3.000000000000000 3.000000000000000 3.000000000000000 4.000000000000000 4.000000000000000 5.000000000000000 0.550000000000000 0.440000000000000 0.280000000000000 0.130000000000000 0.050000000000000 0.060000000000000 3.640000000000000 3.750000000000000 3.910000000000000 4.070000000000000 4.140000000000000 4.130000000000000 4.190000000000000 6.000000000000000Guanajuato 4663032 Guanajuato 466303272673353720.000000000000 15585.000000000000000 0.047639215687557Guanajuato 1964755232407.000000000000MX12 Guerrero 1387049.88800000003539016001302.396999999880791 6475523.241000000387430 2181.000000000000000 3629.000000000000000 4991.000000000000000 6497.000000000000000 8727.000000000000000 9084.000000000000000 11820.000000000000000 5.000000000000000 5.000000000000000 7.000000000000000 5.000000000000000 5.000000000000000 3.000000000000000 0.730000000000000 0.510000000000000 0.370000000000000 0.260000000000000 0.130000000000000 0.110000000000000 3.340000000000000 3.560000000000000 3.700000000000000 3.810000000000000 3.940000000000000 3.960000000000000 4.070000000000000 19.000000000000000Guerrero 3079649 Guerrero 307964936401451180.000000000000 11820.000000000000000 0.031462804234019Guerrero 821235327145.000000000000MX13 Hidalgo 953861.243999999947846 5247342.623999999836087 2123532.714999999850988 4414.000000000000000 5194.000000000000000 6399.000000000000000 7767.000000000000000 12391.000000000000000 13091.000000000000000 12348.000000000000000 3.000000000000000 3.000000000000000 2.000000000000000 3.000000000000000 3.000000000000000 3.000000000000000 0.450000000000000 0.380000000000000 0.290000000000000 0.200000000000000 -0.000000000000000 -0.030000000000000 3.640000000000000 3.720000000000000 3.810000000000000 3.890000000000000 4.090000000000000 4.120000000000000 4.090000000000000 8.000000000000000Hidalgo 2235591 Hidalgo 223559127605077668.000000000000 12348.000000000000000 0.022839603467907Hidalgo 479610082850.000000000000MX14 Jalisco 2324727.43600000021979219672001.199000000953674 7961008.285000000149012 5309.000000000000000 8232.000000000000000 9953.000000000000000 16288.000000000000000 20659.000000000000000 20133.000000000000000 21610.000000000000000 3.000000000000000 3.000000000000000 6.000000000000000 4.000000000000000 4.000000000000000 4.000000000000000 0.610000000000000 0.420000000000000 0.340000000000000 0.120000000000000 0.020000000000000 0.030000000000000 3.730000000000000 3.920000000000000 4.000000000000000 4.210000000000000 4.320000000000000 4.300000000000000 4.330000000000000 4.000000000000000Jalisco 6322002 Jalisco 6322002136618463220.00000000000 21610.000000000000000 0.064587851178196Jalisco 1021476282978.000000000000MX15 Mexico 888381.807000000029802 5306883.868999999947846 2147628.297999999951571 3408.000000000000000 4972.000000000000000 9053.000000000000000 17164.000000000000000 20165.000000000000000 18547.000000000000000 16322.000000000000000 4.000000000000000 4.000000000000000 1.000000000000000 3.000000000000000 3.000000000000000 5.000000000000000 0.680000000000000 0.520000000000000 0.260000000000000 -0.020000000000000 -0.090000000000000 -0.060000000000000 3.530000000000000 3.700000000000000 3.960000000000000 4.230000000000000 4.300000000000000 4.270000000000000 4.210000000000000 10.000000000000000San Luis Potosí 2299360 Mexico 229936037530153920.000000000000 16322.000000000000000 0.023491090557247Mexico 959473519192.000000000000MX16 Michoacan de Ocampo 1431015.87700000009499514696167.858999999240041 5947351.918999999761581 3327.000000000000000 5272.000000000000000 5244.000000000000000 8109.000000000000000 11206.000000000000000 10980.000000000000000 11838.000000000000000 3.000000000000000 3.000000000000000 7.000000000000000 4.000000000000000 4.000000000000000 3.000000000000000 0.550000000000000 0.350000000000000 0.350000000000000 0.160000000000000 0.020000000000000 0.030000000000000 3.520000000000000 3.720000000000000 3.720000000000000 3.910000000000000 4.050000000000000 4.040000000000000 4.070000000000000 9.000000000000000Ciudad de México 8605239 Michoacan de Ocampo 8605239101868819282.00000000000 11838.000000000000000 0.087914223355957Michoacan de Ocampo 135055028040.1999998092651MX17 Morelos 335390.325000000011642 1249119.635000000009313 505502.804000000003725 6936.000000000000000 8962.000000000000000 10499.000000000000000 13892.000000000000000 16513.000000000000000 17701.000000000000000 18170.000000000000000 3.000000000000000 3.000000000000000 2.000000000000000 3.000000000000000 3.000000000000000 5.000000000000000 0.420000000000000 0.310000000000000 0.240000000000000 0.120000000000000 0.040000000000000 0.010000000000000 3.840000000000000 3.950000000000000 4.020000000000000 4.140000000000000 4.220000000000000 4.250000000000000 4.260000000000000 13.000000000000000Morelos 1555296 Morelos 155529628259728320.000000000000 18170.000000000000000 0.015889464537664Morelos 327319568586.000000000000MX18 Nayarit 1034770.341000000014901 6750785.411999999545515 2731956.859000000171363 4836.000000000000000 7515.000000000000000 7621.000000000000000 10880.000000000000000 13354.000000000000000 12757.000000000000000 11478.000000000000000 2.000000000000000 2.000000000000000 6.000000000000000 4.000000000000000 4.000000000000000 1.000000000000000 0.380000000000000 0.180000000000000 0.180000000000000 0.020000000000000 -0.070000000000000 -0.050000000000000 3.680000000000000 3.880000000000000 3.880000000000000 4.040000000000000 4.130000000000000 4.110000000000000 4.060000000000000 3.000000000000000Nayarit 920185 Nayarit 92018510561883430.000000000000 11478.000000000000000 0.009400941637856Nayarit 3065113809326.000000000000MX19 Nuevo Leon 1706261.49200000008568216089908.328999999910593 6511380.933000000193715 9073.000000000000000 11490.000000000000000 20117.000000000000000 28206.000000000000000 34856.000000000000000 34726.000000000000000 38672.000000000000000 1.000000000000000 1.000000000000000 5.000000000000000 2.000000000000000 2.000000000000000 4.000000000000000 0.630000000000000 0.530000000000000 0.280000000000000 0.140000000000000 0.050000000000000 0.050000000000000 3.960000000000000 4.060000000000000 4.300000000000000 4.450000000000000 4.540000000000000 4.540000000000000 4.590000000000000 30.000000000000000México 13096686 Nuevo Leon 13096686506475040992.00000000000 38672.000000000000000 0.133800464836227Nuevo Leon 2092691433795.000000000000MX20 Oaxaca 1995816.28399999998509922904460.484000001102686 9269143.380000000819564 1892.000000000000000 4538.000000000000000 4140.000000000000000 5230.000000000000000 7730.000000000000000 8465.000000000000000 9010.000000000000000 5.000000000000000 5.000000000000000 7.000000000000000 5.000000000000000 5.000000000000000 3.000000000000000 0.680000000000000 0.300000000000000 0.340000000000000 0.240000000000000 0.070000000000000 0.030000000000000 3.280000000000000 3.660000000000000 3.620000000000000 3.720000000000000 3.890000000000000 3.930000000000000 3.950000000000000 20.000000000000000Oaxaca 3438765 Oaxaca 343876530983272650.000000000000 9010.000000000000000 0.035131662732278Oaxaca 1634260003971.000000000000MX21 Puebla 1472803.283999999985099 8465797.484999999403954 3426000.396999999880791 3569.000000000000000 6415.000000000000000 6542.000000000000000 9775.000000000000000 13374.000000000000000 11895.000000000000000 15685.000000000000000 3.000000000000000 3.000000000000000 2.000000000000000 3.000000000000000 5.000000000000000 3.000000000000000 0.640000000000000 0.390000000000000 0.380000000000000 0.210000000000000 0.070000000000000 0.120000000000000 3.550000000000000 3.810000000000000 3.820000000000000 3.990000000000000 4.130000000000000 4.080000000000000 4.200000000000000 16.000000000000000Puebla 5076686 Puebla 507668679627819910.000000000000 15685.000000000000000 0.051865254051870Puebla 712032399965.000000000000MX22 Queretaro de Arteaga 619581.709000000031665 2973258.890000000130385 1203239.996999999973923 11016.000000000000000 5560.000000000000000 7110.000000000000000 14073.000000000000000 20088.000000000000000 22441.000000000000000 26149.000000000000000 3.000000000000000 3.000000000000000 3.000000000000000 3.000000000000000 4.000000000000000 5.000000000000000 0.380000000000000 0.670000000000000 0.570000000000000 0.270000000000000 0.110000000000000 0.070000000000000 4.040000000000000 3.750000000000000 3.850000000000000 4.150000000000000 4.300000000000000 4.350000000000000 4.420000000000000 7.000000000000000Veracruz 6908975 Queretaro de Arteaga 6908975180662787275.00000000000 26149.000000000000000 0.070584578918810Queretaro de Arteaga 1751238434535.000000000000MX23 Quintana Roo 1756848.57799999997951112661242.264000000432134 5123843.453999999910593 21965.000000000000000 28747.000000000000000 9677.000000000000000 17046.000000000000000 26695.000000000000000 25049.000000000000000 33442.000000000000000 6.000000000000000 5.000000000000000 4.000000000000000 5.000000000000000 5.000000000000000 1.000000000000000 0.180000000000000 0.070000000000000 0.540000000000000 0.290000000000000 0.100000000000000 0.130000000000000 4.340000000000000 4.460000000000000 3.990000000000000 4.230000000000000 4.430000000000000 4.400000000000000 4.520000000000000 17.000000000000000Quintana Roo 874963 Quintana Roo 87496329260512646.000000000000 33442.000000000000000 0.008938937385725Quintana Roo 2964148547178.000000000000MX24 San Luis Potosi 1529201.48699999996461015851387.812000000849366 6414854.718000000342727 4372.000000000000000 7533.000000000000000 6440.000000000000000 9721.000000000000000 12691.000000000000000 15436.000000000000000 15866.000000000000000 2.000000000000000 2.000000000000000 3.000000000000000 4.000000000000000 4.000000000000000 1.000000000000000 0.560000000000000 0.320000000000000 0.390000000000000 0.210000000000000 0.100000000000000 0.010000000000000 3.640000000000000 3.880000000000000 3.810000000000000 3.990000000000000 4.100000000000000 4.190000000000000 4.200000000000000 29.000000000000000Baja California 2487367 San Luis Potosi 248736739464564822.000000000000 15866.000000000000000 0.025411837835792San Luis Potosi 2657798879721.000000000000MX25 Sinaloa 2090624.51200000010430814282357.088999999687076 5779887.972000000067055 4840.000000000000000 6663.000000000000000 9613.000000000000000 14477.000000000000000 15312.000000000000000 15823.000000000000000 15242.000000000000000 2.000000000000000 2.000000000000000 6.000000000000000 1.000000000000000 1.000000000000000 2.000000000000000 0.500000000000000 0.360000000000000 0.200000000000000 0.020000000000000 -0.000000000000000 -0.020000000000000 3.680000000000000 3.820000000000000 3.980000000000000 4.160000000000000 4.190000000000000 4.200000000000000 4.180000000000000 26.000000000000000Sinaloa 2536844 Sinaloa 253684438666576248.000000000000 15242.000000000000000 0.025917312701625Sinaloa 23180066243824.00000000000MX26 Sonora 2735537.38599999994039544495159.87900000065565118006624.381999999284744 6399.000000000000000 10345.000000000000000 12134.000000000000000 22662.000000000000000 23181.000000000000000 24784.000000000000000 24068.000000000000000 1.000000000000000 1.000000000000000 5.000000000000000 1.000000000000000 1.000000000000000 2.000000000000000 0.580000000000000 0.370000000000000 0.300000000000000 0.030000000000000 0.020000000000000 -0.010000000000000 3.810000000000000 4.010000000000000 4.080000000000000 4.360000000000000 4.370000000000000 4.390000000000000 4.380000000000000 23.000000000000000Sonora 2216969 Sonora 221696953358009892.000000000000 24068.000000000000000 0.022649354403664Sonora 2124255651555.000000000000MX27 Tabasco 1244472.600000000093132 5993678.053999999538064 2425565.155999999959022 2459.000000000000000 3857.000000000000000 6494.000000000000000 9367.000000000000000 42361.000000000000000 16055.000000000000000 13360.000000000000000 6.000000000000000 5.000000000000000 4.000000000000000 5.000000000000000 5.000000000000000 1.000000000000000 0.740000000000000 0.540000000000000 0.310000000000000 0.150000000000000 -0.500000000000000 -0.080000000000000 3.390000000000000 3.590000000000000 3.810000000000000 3.970000000000000 4.630000000000000 4.210000000000000 4.130000000000000 21.000000000000000Tabasco 1891829 Tabasco 189182925274835440.000000000000 13360.000000000000000 0.019327606967950Tabasco 3179005654082.000000000000MX28 Tamaulipas 2077945.64599999994970919522644.195000000298023 7900565.407999999821186 7508.000000000000000 8536.000000000000000 8383.000000000000000 17128.000000000000000 21937.000000000000000 19983.000000000000000 23546.000000000000000 1.000000000000000 1.000000000000000 5.000000000000000 2.000000000000000 2.000000000000000 4.000000000000000 0.500000000000000 0.440000000000000 0.450000000000000 0.140000000000000 0.030000000000000 0.070000000000000 3.880000000000000 3.930000000000000 3.920000000000000 4.230000000000000 4.340000000000000 4.300000000000000 4.370000000000000 31.000000000000000Tamaulipas 2753222 Tamaulipas 275322264827365212.000000000000 23546.000000000000000 0.028127908342410Tamaulipas 183973410003.5999999046325MX29 Tlaxcala 319017.395000000018626 981847.067000000039116 397341.000000000000000 3605.000000000000000 4178.000000000000000 4357.000000000000000 6245.000000000000000 9882.000000000000000 10339.000000000000000 11701.000000000000000 3.000000000000000 3.000000000000000 2.000000000000000 3.000000000000000 3.000000000000000 3.000000000000000 0.510000000000000 0.450000000000000 0.430000000000000 0.270000000000000 0.070000000000000 0.050000000000000 3.560000000000000 3.620000000000000 3.640000000000000 3.800000000000000 3.990000000000000 4.010000000000000 4.070000000000000 18.000000000000000Tlaxcala 962646 Tlaxcala 96264611263920846.000000000000 11701.000000000000000 0.009834738518793Tlaxcala 3271394747808.000000000000MX30 Veracruz-Llave 2796252.49899999983608717641955.820000000298023 7139474.781000000424683 5203.000000000000000 10143.000000000000000 11404.000000000000000 12240.000000000000000 14252.000000000000000 13796.000000000000000 12191.000000000000000 3.000000000000000 3.000000000000000 4.000000000000000 5.000000000000000 5.000000000000000 1.000000000000000 0.370000000000000 0.080000000000000 0.030000000000000 -0.000000000000000 -0.070000000000000 -0.050000000000000 3.720000000000000 4.010000000000000 4.060000000000000 4.090000000000000 4.150000000000000 4.140000000000000 4.090000000000000 32.000000000000000Nuevo León 3834141 Veracruz-Llave 383414146742012931.000000000000 12191.000000000000000 0.039170966460343Veracruz-Llave 1437890016803.000000000000MX31 Yucatan 955594.974999999976717 9362789.603000000119209 3789001.680000000167638 7990.000000000000000 8428.000000000000000 10067.000000000000000 11665.000000000000000 15239.000000000000000 13979.000000000000000 17509.000000000000000 6.000000000000000 5.000000000000000 4.000000000000000 5.000000000000000 5.000000000000000 1.000000000000000 0.340000000000000 0.320000000000000 0.240000000000000 0.180000000000000 0.060000000000000 0.100000000000000 3.900000000000000 3.930000000000000 4.000000000000000 4.070000000000000 4.180000000000000 4.150000000000000 4.240000000000000 14.000000000000000Yucatán 1658210 Yucatan 165821029033598890.000000000000 17509.000000000000000 0.016940871056699Yucatan 2874805700364.000000000000MX32 Zacatecas 2165307.92100000008940718484817.181000001728535 7480570.036000000312924 3734.000000000000000 6435.000000000000000 5821.000000000000000 7426.000000000000000 8876.000000000000000 11656.000000000000000 11130.000000000000000 2.000000000000000 2.000000000000000 3.000000000000000 4.000000000000000 4.000000000000000 1.000000000000000 0.470000000000000 0.240000000000000 0.280000000000000 0.180000000000000 0.100000000000000 -0.020000000000000 3.570000000000000 3.810000000000000 3.760000000000000 3.870000000000000 3.950000000000000 4.070000000000000 4.050000000000000 28.000000000000000Zacatecas 1353610 Zacatecas 135361015065679300.000000000000 11130.000000000000000 0.013828967664565Zacatecas
--------------------------------------------------------------------------------
/docs/user-guide/viz/weighted.shp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pysal/inequality/cd2e2c5eebe0afa3d3e39f57bcbad28c646cdb1e/docs/user-guide/viz/weighted.shp
--------------------------------------------------------------------------------
/docs/user-guide/viz/weighted.shx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pysal/inequality/cd2e2c5eebe0afa3d3e39f57bcbad28c646cdb1e/docs/user-guide/viz/weighted.shx
--------------------------------------------------------------------------------
/inequality/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | :mod:`inequality` --- Spatial Inequality Analysis
3 | =================================================
4 |
5 | """
6 |
7 | import contextlib
8 | from importlib.metadata import PackageNotFoundError, version
9 |
10 | from . import atkinson, gini, schutz, theil, wolfson
11 | from ._indices import (
12 | abundance,
13 | ellison_glaeser_egg,
14 | ellison_glaeser_egg_pop,
15 | fractionalization_gs,
16 | gini_gi,
17 | gini_gi_m,
18 | gini_gig,
19 | herfindahl_hd,
20 | hoover_hi,
21 | isolation_ii,
22 | isolation_isg,
23 | margalev_md,
24 | maurel_sedillot_msg,
25 | maurel_sedillot_msg_pop,
26 | menhinick_mi,
27 | modified_segregation_msg,
28 | polarization,
29 | segregation_gsg,
30 | shannon_se,
31 | similarity_w_wd,
32 | simpson_sd,
33 | simpson_so,
34 | theil_th,
35 | )
36 |
37 | with contextlib.suppress(PackageNotFoundError):
38 | __version__ = version("inequality")
39 |
--------------------------------------------------------------------------------
/inequality/_indices.py:
--------------------------------------------------------------------------------
1 | """
2 | Diversity indices as suggested in Nijkamp & Poot (2015) [1]
3 |
4 | References
5 | ----------
6 |
7 | [1]_ Nijkamp, P. and Poot, J. "Cultural Diversity: A Matter of Measurement".
8 | IZA Discussion Paper Series No. 8782
9 | :cite:`nijkamp2015cultural`
10 | https://www.econstor.eu/bitstream/10419/107568/1/dp8782.pdf
11 | """
12 |
13 | import functools
14 | import itertools
15 | import warnings
16 |
17 | import numpy
18 |
19 | SMALL = numpy.finfo("float").tiny
20 |
21 |
22 | def deprecated_function(func):
23 | """Decorator to mark functions as deprecated."""
24 |
25 | @functools.wraps(func)
26 | def wrapper(*args, **kwargs):
27 | warnings.warn(
28 | f"{func.__name__} is deprecated and will be removed on 2025-01-01.",
29 | FutureWarning,
30 | stacklevel=2,
31 | )
32 | return func(*args, **kwargs)
33 |
34 | return wrapper
35 |
36 |
37 | @deprecated_function
38 | def abundance(x):
39 | """
40 | Abundance index. :cite:`nijkamp2015cultural`
41 |
42 | Parameters
43 | ----------
44 |
45 | x : numpy.array
46 | An :math:`(N, k)` shaped array containing :math:`N` rows (one per
47 | neighborhood) and :math:`k` columns (one per cultural group).
48 |
49 | Returns
50 | -------
51 |
52 | a : float
53 | Abundance index.
54 |
55 | Examples
56 | --------
57 |
58 | >>> import numpy
59 | >>> x = numpy.array([[0, 1, 2], [0, 2, 4], [0, 0, 3]])
60 | >>> int(abundance(x))
61 | 2
62 |
63 | """
64 |
65 | xs = x.sum(axis=0)
66 | a = numpy.sum([1 for i in xs if i > 0])
67 | return a
68 |
69 |
70 | @deprecated_function
71 | def margalev_md(x):
72 | """
73 | Margalev MD index. :cite:`nijkamp2015cultural`
74 |
75 | Parameters
76 | ----------
77 |
78 | x : numpy.array
79 | An :math:`(N, k)` shaped array containing :math:`N` rows (one per
80 | neighborhood) and :math:`k` columns (one per cultural group).
81 |
82 | Returns
83 | -------
84 |
85 | mmd : float
86 | Margalev MD index.
87 |
88 | Examples
89 | --------
90 |
91 | >>> import numpy
92 | >>> x = numpy.array([[0, 1, 2], [0, 2, 4], [0, 0, 3]])
93 | >>> float(margalev_md(x))
94 | 0.40242960438184466
95 |
96 | """
97 |
98 | a = abundance(x)
99 | mmd = (a - 1.0) / numpy.log(x.sum())
100 | return mmd
101 |
102 |
103 | @deprecated_function
104 | def menhinick_mi(x):
105 | """
106 | Menhinick MI index. :cite:`nijkamp2015cultural`
107 |
108 | Parameters
109 | ----------
110 |
111 | x : numpy.array
112 | An :math:`(N, k)` shaped array containing :math:`N` rows (one per
113 | neighborhood) and :math:`k` columns (one per cultural group).
114 |
115 | Returns
116 | -------
117 |
118 | mmi : float
119 | Menhinick MI index.
120 |
121 | Examples
122 | --------
123 |
124 | >>> import numpy
125 | >>> x = numpy.array([[0, 1, 2], [0, 2, 4], [0, 0, 3]])
126 | >>> float(menhinick_mi(x))
127 | 0.2886751345948129
128 |
129 | """
130 |
131 | a = abundance(x)
132 | mmi = (a - 1.0) / numpy.sqrt(x.sum())
133 | return mmi
134 |
135 |
136 | @deprecated_function
137 | def simpson_so(x):
138 | """
139 | Simpson diversity index SO. :cite:`nijkamp2015cultural`
140 |
141 | Parameters
142 | ----------
143 |
144 | x : numpy.array
145 | An :math:`(N, k)` shaped array containing :math:`N` rows (one per
146 | neighborhood) and :math:`k` columns (one per cultural group).
147 |
148 | Returns
149 | -------
150 |
151 | sso : float
152 | Simpson diversity index SO.
153 |
154 | Examples
155 | --------
156 |
157 | >>> import numpy
158 | >>> x = numpy.array([[0, 1, 2], [0, 2, 4], [0, 0, 3]])
159 | >>> float(simpson_so(x))
160 | 0.5909090909090909
161 |
162 | """
163 |
164 | xs0 = x.sum(axis=0)
165 | xs = x.sum()
166 | num = (xs0 * (xs0 - 1.0)).sum()
167 | den = xs * (xs - 1.0)
168 | sso = num / den
169 | return sso
170 |
171 |
172 | @deprecated_function
173 | def simpson_sd(x):
174 | """
175 | Simpson diversity index SD. :cite:`nijkamp2015cultural`
176 |
177 | Parameters
178 | ----------
179 |
180 | x : numpy.array
181 | An :math:`(N, k)` shaped array containing :math:`N` rows (one per
182 | neighborhood) and :math:`k` columns (one per cultural group).
183 |
184 | Returns
185 | -------
186 |
187 | ssd : float
188 | Simpson diversity index SD.
189 |
190 | Examples
191 | --------
192 |
193 | >>> import numpy
194 | >>> x = numpy.array([[0, 1, 2], [0, 2, 4], [0, 0, 3]])
195 | >>> float(simpson_sd(x))
196 | 0.40909090909090906
197 |
198 | """
199 |
200 | ssd = 1.0 - simpson_so(x)
201 | return ssd
202 |
203 |
204 | @deprecated_function
205 | def herfindahl_hd(x):
206 | """
207 | Herfindahl index HD. :cite:`nijkamp2015cultural`
208 |
209 | Parameters
210 | ----------
211 |
212 | x : numpy.array
213 | An :math:`(N, k)` shaped array containing :math:`N` rows (one per
214 | neighborhood) and :math:`k` columns (one per cultural group).
215 |
216 | Returns
217 | -------
218 |
219 | hhd : float
220 | Herfindahl index HD.
221 |
222 | Examples
223 | --------
224 |
225 | >>> import numpy
226 | >>> x = numpy.array([[0, 1, 2], [0, 2, 4], [0, 0, 3]])
227 | >>> float(herfindahl_hd(x))
228 | 0.625
229 |
230 | """
231 |
232 | pgs = x.sum(axis=0)
233 | p = pgs.sum()
234 | hhd = ((pgs * 1.0 / p) ** 2).sum()
235 | return hhd
236 |
237 |
238 | @deprecated_function
239 | def theil_th(x, ridz=True):
240 | """
241 | Theil index TH as expressed in equation (32) of [2]. :cite:`nijkamp2015cultural`
242 |
243 | Parameters
244 | ----------
245 |
246 | x : numpy.array
247 | An :math:`(N, k)` shaped array containing :math:`N` rows (one per
248 | neighborhood) and :math:`k` columns (one per cultural group).
249 | ridz : bool (default True)
250 | Flag to add a small amount to zero values to avoid zero division problems.
251 |
252 | Returns
253 | -------
254 |
255 | tth : float
256 | Theil index TH.
257 |
258 | Examples
259 | --------
260 |
261 | >>> import numpy
262 | >>> x = numpy.array([[0, 1, 2], [0, 2, 4], [0, 0, 3]])
263 | >>> float(theil_th(x))
264 | 0.15106563978903298
265 |
266 | """
267 |
268 | if ridz:
269 | x = x + SMALL * (x == 0) # can't have 0 values
270 | pa = x.sum(axis=1).astype(float) # Area totals
271 | pg = x.sum(axis=0).astype(float) # Group totals
272 | p = pa.sum()
273 | num = (x / pa[:, None]) * (numpy.log(pg / p) - numpy.log(x / pa[:, None]))
274 | den = ((pg / p) * numpy.log(pg / p)).sum()
275 | th = (pa / p)[:, None] * (num / den)
276 | tth = th.sum().sum()
277 | return tth
278 |
279 |
280 | @deprecated_function
281 | def theil_th_brute(x, ridz=True):
282 | """
283 | Theil index TH using inefficient computation.
284 | NOTE: just for result comparison, it matches ``theil_th``.
285 |
286 | Parameters
287 | ----------
288 |
289 | x : numpy.array
290 | An :math:`(N, k)` shaped array containing :math:`N` rows (one per
291 | neighborhood) and :math:`k` columns (one per cultural group).
292 | ridz : bool (default True)
293 | Flag to add a small amount to zero values to avoid zero division problems.
294 |
295 | Returns
296 | -------
297 |
298 | tth : float
299 | Theil index TH.
300 |
301 | """
302 |
303 | if ridz:
304 | x = x + SMALL * (x == 0) # can't have 0 values
305 | pas = x.sum(axis=1).astype(float) # Area totals
306 | pgs = x.sum(axis=0).astype(float) # Group totals
307 | p = pas.sum()
308 | th = numpy.zeros(x.shape)
309 | for g in numpy.arange(x.shape[1]):
310 | pg = pgs[g]
311 | for a in numpy.arange(x.shape[0]):
312 | pa = pas[a]
313 | pga = x[a, g]
314 | num = (pga / pa) * ((numpy.log(pg / p)) - numpy.log(pga / pa))
315 | den = ((pgs / p) * numpy.log(pgs / p)).sum()
316 | th[a, g] = (pa / p) * (num / den)
317 | tth = th.sum().sum()
318 | return tth
319 |
320 |
321 | @deprecated_function
322 | def fractionalization_gs(x):
323 | """
324 | Fractionalization Gini-Simpson index GS. :cite:`nijkamp2015cultural`
325 |
326 | Parameters
327 | ----------
328 |
329 | x : numpy.array
330 | An :math:`(N, k)` shaped array containing :math:`N` rows (one per
331 | neighborhood) and :math:`k` columns (one per cultural group).
332 |
333 | Returns
334 | -------
335 |
336 | fgs : float
337 | Fractionalization Gini-Simpson index GS.
338 |
339 | Examples
340 | --------
341 |
342 | >>> import numpy
343 | >>> x = numpy.array([[0, 1, 2], [0, 2, 4], [0, 0, 3]])
344 | >>> float(fractionalization_gs(x))
345 | 0.375
346 |
347 | """
348 |
349 | fgs = 1.0 - herfindahl_hd(x)
350 | return fgs
351 |
352 |
353 | @deprecated_function
354 | def polarization(x): # noqa ARG001
355 | raise RuntimeError("Not currently implemented.")
356 |
357 |
358 | @deprecated_function
359 | def shannon_se(x):
360 | """
361 | Shannon index SE. :cite:`nijkamp2015cultural`
362 |
363 | Parameters
364 | ----------
365 |
366 | x : numpy.array
367 | An :math:`(N, k)` shaped array containing :math:`N` rows (one per
368 | neighborhood) and :math:`k` columns (one per cultural group).
369 |
370 | Returns
371 | -------
372 |
373 | sse : float
374 | Shannon index SE.
375 |
376 | Examples
377 | --------
378 |
379 | >>> import numpy
380 | >>> numpy.random.seed(0)
381 | >>> y = numpy.random.randint(1, 10, size=(4,3))
382 | >>> float(shannon_se(y))
383 | 1.094070862104929
384 |
385 | """
386 |
387 | pgs = x.sum(axis=0)
388 | p = pgs.sum()
389 | ratios = pgs * 1.0 / p
390 | sse = -(ratios * numpy.log(ratios)).sum()
391 | return sse
392 |
393 |
394 | @deprecated_function
395 | def _gini(ys):
396 | """Gini for a single row to be used both by ``gini_gi`` and ``gini_gig``."""
397 |
398 | n = ys.flatten().shape[0]
399 | ys.sort()
400 | num = 2.0 * ((numpy.arange(n) + 1) * ys).sum()
401 | den = n * ys.sum()
402 | return (num / den) - ((n + 1.0) / n)
403 |
404 |
405 | @deprecated_function
406 | def gini_gi(x):
407 | """
408 | Gini GI index. :cite:`nijkamp2015cultural`
409 |
410 | NOTE: based on 3rd eq. of "Calculation" in:
411 |
412 | http://en.wikipedia.org/wiki/Gini_coefficient
413 |
414 | Returns same value as ``gini`` method in the R package ``reldist`` (see
415 | http://rss.acs.unt.edu/Rdoc/library/reldist/html/gini.html) if every
416 | category has at least one observation.
417 |
418 | Parameters
419 | ----------
420 |
421 | x : numpy.array
422 | An :math:`(N, k)` shaped array containing :math:`N` rows (one per
423 | neighborhood) and :math:`k` columns (one per cultural group).
424 |
425 | Returns
426 | -------
427 |
428 | ggi : float
429 | Gini GI index.
430 |
431 | Examples
432 | --------
433 |
434 | >>> import numpy
435 | >>> numpy.random.seed(0)
436 | >>> y = numpy.random.randint(1, 10, size=(4,3))
437 | >>> float(round(gini_gi(y), 10))
438 | 0.0512820513
439 |
440 | """
441 | ys = x.sum(axis=0)
442 | return _gini(ys)
443 |
444 |
445 | @deprecated_function
446 | def gini_gig(x):
447 | """
448 | Gini GI index. :cite:`nijkamp2015cultural`
449 |
450 | NOTE: based on Wolfram Mathworld formula in:
451 |
452 | http://mathworld.wolfram.com/GiniCoefficient.html
453 |
454 | Parameters
455 | ----------
456 |
457 | x : numpy.array
458 | An :math:`(N, k)` shaped array containing :math:`N` rows (one per
459 | neighborhood) and :math:`k` columns (one per cultural group).
460 |
461 | Returns
462 | -------
463 |
464 | ggig : numpy.array
465 | Gini GI index for every group :math:`k`.
466 |
467 | Examples
468 | --------
469 |
470 | >>> import numpy
471 | >>> numpy.random.seed(0)
472 | >>> y = numpy.random.randint(1, 10, size=(4,3))
473 | >>> gini_gig(y)
474 | array([0.125 , 0.32894737, 0.18181818])
475 |
476 | """
477 |
478 | ggig = numpy.apply_along_axis(_gini, 0, x.copy())
479 | return ggig
480 |
481 |
482 | @deprecated_function
483 | def gini_gi_m(x):
484 | """
485 | Gini GI index (equivalent to ``gini_gi``, not vectorized).
486 | :cite:`nijkamp2015cultural`
487 |
488 | NOTE: based on Wolfram Mathworld formula in:
489 |
490 | http://mathworld.wolfram.com/GiniCoefficient.html
491 |
492 | Returns same value as ``gini_gi``.
493 |
494 | Parameters
495 | ----------
496 |
497 | x : numpy.array
498 | An :math:`(N, k)` shaped array containing :math:`N` rows (one per
499 | neighborhood) and :math:`k` columns (one per cultural group).
500 |
501 | Returns
502 | -------
503 |
504 | ggim : float
505 | Gini GI index.
506 |
507 | Examples
508 | --------
509 |
510 | >>> import numpy
511 | >>> numpy.random.seed(0)
512 | >>> y = numpy.random.randint(1, 10, size=(4,3))
513 | >>> float(round(gini_gi_m(y), 10))
514 | 0.0512820513
515 |
516 | """
517 |
518 | xs = x.sum(axis=0)
519 | num = numpy.sum([numpy.abs(xi - xj) for xi, xj in itertools.permutations(xs, 2)])
520 | den = 2.0 * xs.shape[0] ** 2 * numpy.mean(xs)
521 | ggim = num / den
522 | return ggim
523 |
524 |
525 | @deprecated_function
526 | def hoover_hi(x):
527 | """
528 | Hoover index HI. :cite:`nijkamp2015cultural`
529 |
530 | NOTE: based on
531 |
532 | http://en.wikipedia.org/wiki/Hoover_index
533 |
534 | Parameters
535 | ----------
536 |
537 | x : numpy.array
538 | An :math:`(N, k)` shaped array containing :math:`N` rows (one per
539 | neighborhood) and :math:`k` columns (one per cultural group).
540 |
541 | Returns
542 | -------
543 |
544 | hhi : float
545 | Hoover HI index.
546 |
547 | Examples
548 | --------
549 |
550 | >>> import numpy
551 | >>> numpy.random.seed(0)
552 | >>> y = numpy.random.randint(1, 10, size=(4,3))
553 | >>> f'{hoover_hi(y):.3f}'
554 | '0.041'
555 |
556 | """
557 |
558 | es = x.sum(axis=0)
559 | e_total = es.sum()
560 | a_total = es.shape[0]
561 | s = numpy.abs((es * 1.0 / e_total) - (1.0 / a_total)).sum()
562 | hhi = s / 2.0
563 | return hhi
564 |
565 |
566 | @deprecated_function
567 | def similarity_w_wd(x, tau):
568 | """
569 | Similarity weighted diversity. :cite:`nijkamp2015cultural`
570 |
571 | Parameters
572 | ----------
573 |
574 | x : numpy.array
575 | An :math:`(N, k)` shaped array containing :math:`N` rows (one per
576 | neighborhood) and :math:`k` columns (one per cultural group).
577 | tau : numpy.array
578 | A :math:`(k, k)` array where :math:`tau_{ij}` represents dissimilarity
579 | between group :math:`i` and group :math:`j`. Diagonal elements are
580 | assumed to be one.
581 |
582 | Returns
583 | -------
584 |
585 | swwd : float
586 | Similarity weighted diversity index.
587 |
588 | Examples
589 | --------
590 |
591 | >>> import numpy
592 | >>> numpy.random.seed(0)
593 | >>> y = numpy.random.randint(1, 10, size=(4,3))
594 | >>> numpy.random.seed(0)
595 | >>> tau = numpy.random.uniform(size=(3,3))
596 | >>> numpy.fill_diagonal(tau, 0.)
597 | >>> tau = (tau + tau.T)/2
598 | >>> tau
599 | array([[0. , 0.63003627, 0.52017529],
600 | [0.63003627, 0. , 0.76883356],
601 | [0.52017529, 0.76883356, 0. ]])
602 |
603 | >>> f'{similarity_w_wd(y, tau):.3f}'
604 | '0.582'
605 |
606 | """
607 |
608 | pgs = x.sum(axis=0)
609 | pgs = pgs * 1.0 / pgs.sum()
610 | s = sum(
611 | [
612 | pgs[i] * pgs[j] * tau[i, j]
613 | for i, j in itertools.product(numpy.arange(pgs.shape[0]), repeat=2)
614 | ]
615 | )
616 | swwd = 1.0 - s
617 | return swwd
618 |
619 |
620 | @deprecated_function
621 | def segregation_gsg(x):
622 | """
623 | Segregation index GS.
624 |
625 | This is a Duncan&Duncan index of a group against the rest combined.
626 |
627 | Parameters
628 | ----------
629 |
630 | x : numpy.array
631 | An :math:`(N, k)` shaped array containing :math:`N` rows (one per
632 | neighborhood) and :math:`k` columns (one per cultural group).
633 |
634 | Returns
635 | -------
636 |
637 | sgsg : array
638 | An array with GSg indices for the :math:`k` groups.
639 |
640 | Examples
641 | --------
642 |
643 | >>> import numpy
644 | >>> numpy.random.seed(0)
645 | >>> y = numpy.random.randint(1, 10, size=(4,3))
646 | >>> segregation_gsg(y).round(6)
647 | array([0.182927, 0.24714 , 0.097252])
648 |
649 | """
650 |
651 | pgs = x.sum(axis=0)
652 | pas = x.sum(axis=1)
653 | p = pgs.sum()
654 | first = (x.T * 1.0 / pgs[:, None]).T
655 | pampga = pas[:, None] - x
656 | pmpg = p - pgs
657 | second = pampga * 1.0 / pmpg[None, :]
658 | sgsg = 0.5 * (numpy.abs(first - second)).sum(axis=0)
659 | return sgsg
660 |
661 |
662 | @deprecated_function
663 | def modified_segregation_msg(x):
664 | """
665 | Modified segregation index GS.
666 |
667 | This is a modified version of GSg index as used by Van Mourik et al. (1989)
668 | :cite:`van_Mourik_1989`.
669 |
670 | Parameters
671 | ----------
672 |
673 | x : numpy.array
674 | An :math:`(N, k)` shaped array containing :math:`N` rows (one per
675 | neighborhood) and :math:`k` columns (one per cultural group).
676 |
677 | Returns
678 | -------
679 |
680 | ms_inds : numpy.array
681 | An array with MSg indices for the :math:`k` groups.
682 |
683 | Examples
684 | --------
685 |
686 | >>> import numpy
687 | >>> numpy.random.seed(0)
688 | >>> y = numpy.random.randint(1, 10, size=(4,3))
689 | >>> modified_segregation_msg(y).round(6)
690 | array([0.085207, 0.102249, 0.04355 ])
691 |
692 | """
693 |
694 | pgs = x.sum(axis=0)
695 | p = pgs.sum()
696 | ms_inds = segregation_gsg(x) # To be updated in loop below
697 | for gi in numpy.arange(x.shape[1]):
698 | pg = pgs[gi]
699 | pgp = pg * 1.0 / p
700 | ms_inds[gi] = 2.0 * pgp * (1.0 - pgp) * ms_inds[gi]
701 | return ms_inds
702 |
703 |
704 | @deprecated_function
705 | def isolation_isg(x):
706 | """
707 | Isolation index IS. :cite:`nijkamp2015cultural`
708 |
709 | Parameters
710 | ----------
711 |
712 | x : numpy.array
713 | An :math:`(N, k)` shaped array containing :math:`N` rows (one per
714 | neighborhood) and :math:`k` columns (one per cultural group).
715 |
716 | Returns
717 | -------
718 |
719 | iisg : numpy.array
720 | An array with ISg indices for the :math:`k` groups.
721 |
722 | Examples
723 | --------
724 |
725 | >>> import numpy
726 | >>> numpy.random.seed(0)
727 | >>> y = numpy.random.randint(1, 10, size=(4,3))
728 | >>> isolation_isg(y).round(6)
729 | array([1.07327 , 1.219953, 1.022711])
730 |
731 | """
732 |
733 | ws = x * 1.0 / x.sum(axis=0)
734 | pgapa = (x.T * 1.0 / x.sum(axis=1)).T
735 | pgp = x.sum(axis=0) * 1.0 / x.sum()
736 | iisg = (ws * pgapa / pgp).sum(axis=0)
737 | return iisg
738 |
739 |
740 | @deprecated_function
741 | def isolation_ii(x):
742 | """
743 | Isolation index :math:`II_g` as in equation (23) of [2].
744 | :cite:`nijkamp2015cultural`
745 |
746 | Parameters
747 | ----------
748 |
749 | x : numpy.array
750 | An :math:`(N, k)` shaped array containing :math:`N` rows (one per
751 | neighborhood) and :math:`k` columns (one per cultural group).
752 |
753 | Returns
754 | -------
755 |
756 | iso_ii : numpy.array
757 | An array with IIg indices for the :math:`k` groups.
758 |
759 | Examples
760 | --------
761 |
762 | >>> import numpy
763 | >>> numpy.random.seed(0)
764 | >>> y = numpy.random.randint(1, 10, size=(4,3))
765 | >>> isolation_ii(y).round(6)
766 | array([1.11616 , 1.310804, 1.03433 ])
767 |
768 | """
769 |
770 | pa = x.sum(axis=1).astype(float) # Area totals
771 | pg = x.sum(axis=0).astype(float) # Group totals
772 | p = pa.sum()
773 | ws = x / pg
774 |
775 | block = (ws * (x / pa[:, None])).sum(axis=0)
776 | num = (block / (pg / p)) - (pg / p)
777 | den = 1.0 - (pg / p)
778 | iso_ii = num / den
779 | return iso_ii
780 |
781 |
782 | @deprecated_function
783 | def ellison_glaeser_egg(x, hs=None):
784 | """
785 | Ellison and Glaeser (1997) :cite:`ellison_1997` index of concentration.
786 | Implemented as in equation (5) of original reference.
787 |
788 | Parameters
789 | ----------
790 |
791 | x : numpy.array
792 | An :math:`(N, k)` shaped array containing :math:`N` rows (one per
793 | area) and :math:`k` columns (one per industry). Each cell indicates
794 | employment figures for area :math:`n` and industry :math:`k`.
795 | hs : numpy.array (default None)
796 | An array of dimension :math:`(k,)` containing the Herfindahl
797 | indices of each industry's plant sizes. If not passed, it is
798 | assumed every plant contains one and only one worker and thus
799 | :math:`H_k = 1 / P_k`, where :math:`P_k` is the total
800 | employment in :math:`k`.
801 |
802 | Returns
803 | -------
804 |
805 | eg_inds : numpy.array
806 | EG index for each of the :math:`k` groups.
807 |
808 | Examples
809 | --------
810 |
811 | >>> import numpy
812 | >>> numpy.random.seed(0)
813 | >>> z = numpy.random.randint(10, 50, size=(3,4))
814 |
815 | >>> ellison_glaeser_egg(z).round(6)
816 | array([0.054499, 0.016242, 0.010141, 0.028803])
817 |
818 | >>> numpy.random.seed(0)
819 | >>> v = numpy.random.uniform(0, 1, size=(4,)).round(3)
820 | >>> v
821 | array([0.549, 0.715, 0.603, 0.545])
822 |
823 | >>> ellison_glaeser_egg(z, hs=v).round(6)
824 | array([-1.06264 , -2.39227 , -1.461383, -1.117953])
825 |
826 | References
827 | ----------
828 |
829 | - :cite:`ellison_1997` Ellison, G. and Glaeser, E. L. "Geographic Concentration in U.S. Manufacturing Industries: A Dartboard Approach". Journal of Political Economy. 105: 889-927
830 |
831 | """ # noqa E501
832 |
833 | industry_totals = x.sum(axis=0)
834 | if hs is None:
835 | hs = 1.0 / industry_totals
836 | xs = x.sum(axis=1) * 1.0 / x.sum()
837 | part = 1.0 - (xs**2).sum()
838 | eg_inds = numpy.zeros(x.shape[1])
839 | for gi in numpy.arange(x.shape[1]):
840 | ss = x[:, gi] * 1.0 / industry_totals[gi]
841 | g = ((ss - xs) ** 2).sum()
842 | h = hs[gi]
843 | eg_inds[gi] = (g - part * h) / (part * (1.0 - h))
844 | return eg_inds
845 |
846 |
847 | @deprecated_function
848 | def ellison_glaeser_egg_pop(x):
849 | """
850 | Ellison and Glaeser (1997) :cite:`ellison_1997` index of concentration.
851 | Implemented to be computed with data about people (segregation/diversity)
852 | rather than as industry concentration, following Mare et al (2012)
853 | :cite:`care_2012`.
854 |
855 | Parameters
856 | ----------
857 |
858 | x : numpy.array
859 | An :math:`(N, k)` shaped array containing :math:`N` rows (one per
860 | neighborhood) and :math:`k` columns (one per cultural group).
861 |
862 | Returns
863 | -------
864 |
865 | eg_inds : numpy.array
866 | EG index for each of the :math:`k` groups.
867 |
868 | Examples
869 | --------
870 |
871 | >>> import numpy
872 | >>> numpy.random.seed(0)
873 | >>> y = numpy.random.randint(1, 10, size=(4,3))
874 | >>> ellison_glaeser_egg_pop(y).round(6)
875 | array([-0.021508, 0.013299, -0.038946])
876 |
877 | References
878 | ----------
879 |
880 | - :cite:`ellison_1997` – Ellison, G. and Glaeser, E. L. "Geographic Concentration in U.S. Manufacturing Industries: A Dartboard Approach". Journal of Political Economy. 105: 889-927
881 |
882 | - :cite:`care_2012` – Care, D., Pinkerton, R., Poot, J. and Coleman, A. (2012) "Residential sorting across Auckland neighbourhoods." New Zealand Population Review, 38, 23-54.
883 |
884 | """ # noqa E501
885 |
886 | pas = x.sum(axis=1)
887 | pgs = x.sum(axis=0)
888 | p = pas.sum()
889 | pap = pas * 1.0 / p
890 | opg = 1.0 / pgs
891 | oopg = 1.0 - opg
892 | eg_inds = numpy.zeros(x.shape[1])
893 | for g in numpy.arange(x.shape[1]):
894 | pgas = x[:, g]
895 | pg = pgs[g]
896 | num1n = (((pgas * 1.0 / pg) - pap) ** 2).sum()
897 | num1d = 1.0 - (pap**2).sum()
898 | num2 = opg[g]
899 | den = oopg[g]
900 | eg_inds[g] = ((num1n / num1d) - num2) / den
901 | return eg_inds
902 |
903 |
904 | @deprecated_function
905 | def maurel_sedillot_msg(x, hs=None):
906 | """
907 | Maurel and Sedillot (1999) :cite:`maurel_1999` index of concentration.
908 | Implemented as in equation (7) of original reference.
909 |
910 | Parameters
911 | ----------
912 |
913 | x : numpy.array
914 | An :math:`(N, k)` shaped array containing :math:`N` rows (one per
915 | area) and :math:`k` columns (one per industry). Each cell indicates
916 | employment figures for area :math:`n` and industry :math:`k`.
917 | hs : numpy.array (default None)
918 | An array of dimension :math:`(k,)` containing the Herfindahl
919 | indices of each industry's plant sizes. If not passed, it is
920 | assumed every plant contains one and only one worker and thus
921 | :math:`H_k = 1 / P_k`, where :math:`P_k` is the total
922 | employment in :math:`k`.
923 |
924 | Returns
925 | -------
926 |
927 | ms_inds : numpy.array
928 | MS index for each of the :math:`k` groups.
929 |
930 | Examples
931 | --------
932 |
933 | >>> import numpy
934 | >>> numpy.random.seed(0)
935 | >>> z = numpy.random.randint(10, 50, size=(3,4))
936 |
937 | >>> maurel_sedillot_msg(z).round(6)
938 | array([ 0.078583, 0.035977, 0.039374, -0.009049])
939 |
940 | >>> numpy.random.seed(0)
941 | >>> v = numpy.random.uniform(0, 1, size=(4,)).round(3)
942 | >>> v
943 | array([0.549, 0.715, 0.603, 0.545])
944 |
945 | >>> maurel_sedillot_msg(z, hs=v).round(6)
946 | array([-1.010102, -2.324216, -1.38869 , -1.200499])
947 |
948 | References
949 | ----------
950 |
951 | - :cite:`maurel_1999` – Maurel, F. and Sédillot, B. (1999). "A Measure of the Geographic Concentration in French Manufacturing Industries." Regional Science and Urban Economics 29: 575-604.
952 |
953 | """ # noqa E501
954 |
955 | industry_totals = x.sum(axis=0)
956 | if hs is None:
957 | hs = 1.0 / industry_totals
958 | x2s = numpy.sum((x.sum(axis=1) * 1.0 / x.sum()) ** 2)
959 | ms_inds = numpy.zeros(x.shape[1])
960 | for gi in numpy.arange(x.shape[1]):
961 | s2s = numpy.sum((x[:, gi] * 1.0 / industry_totals[gi]) ** 2)
962 | h = hs[gi]
963 | num = ((s2s - x2s) / (1.0 - x2s)) - h
964 | den = 1.0 - h
965 | ms_inds[gi] = num / den
966 | return ms_inds
967 |
968 |
969 | @deprecated_function
970 | def maurel_sedillot_msg_pop(x):
971 | """
972 | Maurel and Sedillot (1999) :cite:`maurel_1999` index of concentration.
973 | Implemented to be computed with data about people (segregation/diversity)
974 | rather than as industry concentration, following Mare et al (2012)
975 | :cite:`care_2012`.
976 |
977 | Parameters
978 | ----------
979 |
980 | x : numpy.array
981 | An :math:`(N, k)` shaped array containing :math:`N` rows (one per
982 | neighborhood) and :math:`k` columns (one per cultural group).
983 |
984 | Returns
985 | -------
986 |
987 | eg_inds : numpy.array
988 | MS index for each of the :math:`k` groups.
989 |
990 | Examples
991 | --------
992 |
993 | >>> import numpy
994 | >>> numpy.random.seed(0)
995 | >>> y = numpy.random.randint(1, 10, size=(4,3))
996 |
997 | >>> maurel_sedillot_msg_pop(y).round(6)
998 | array([-0.055036, 0.044147, -0.028666])
999 |
1000 | References
1001 | ----------
1002 |
1003 | - :cite:`maurel_1999` – Maurel, F. and Sédillot, B. (1999). "A Measure of the Geographic Concentration in French Manufacturing Industries." Regional Science and Urban Economics 29: 575-604.
1004 |
1005 | - :cite:`care_2012` – Care, D., Pinkerton, R., Poot, J. and Coleman, A. (2012) "Residential sorting across Auckland neighbourhoods." New Zealand Population Review, 38, 23-54.
1006 |
1007 | """ # noqa E501
1008 |
1009 | pas = x.sum(axis=1)
1010 | pgs = x.sum(axis=0)
1011 | p = pas.sum()
1012 | pap = pas * 1.0 / p
1013 | eg_inds = numpy.zeros(x.shape[1])
1014 | for g in numpy.arange(x.shape[1]):
1015 | pgas = x[:, g]
1016 | pg = pgs[g]
1017 | num1n = ((pgas * 1.0 / pg) ** 2 - pap**2).sum()
1018 | num1d = 1.0 - (pap**2).sum()
1019 | num2 = 1.0 / pg
1020 | den = 1.0 - (1.0 / pg)
1021 | eg_inds[g] = ((num1n / num1d) - num2) / den
1022 | return eg_inds
1023 |
--------------------------------------------------------------------------------
/inequality/atkinson.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | __all__ = ["Atkinson", "atkinson"]
4 |
5 |
6 | def atkinson(y, epsilon):
7 | """Compute the Atkinson index for a given distribution of income or wealth.
8 |
9 | The Atkinson index is a measure of economic inequality that takes
10 | into account the social aversion to inequality. It is sensitive to
11 | changes in different parts of the income distribution depending on
12 | the value of the parameter epsilon.
13 |
14 | Parameters
15 | ----------
16 | y : array-like
17 | An array of income or wealth values.
18 | epsilon : float
19 | The inequality aversion parameter. Higher values of epsilon
20 | give more weight to the lower end of the distribution, making
21 | the index more sensitive to changes in the lower tail.
22 |
23 | Returns
24 | -------
25 | float
26 | The Atkinson index, which ranges from 0 (perfect equality) to
27 | 1 (maximum inequality).
28 |
29 | Notes
30 | -----
31 | - If epsilon equals 0, the Atkinson index is 0 regardless of the
32 | distribution, as it implies no aversion to inequality.
33 | - If epsilon equals 1, the Atkinson index is calculated using the
34 | geometric mean.
35 | - The input array y should contain positive values for a
36 | meaningful calculation.
37 |
38 | Example
39 | -------
40 | >>> import numpy as np
41 | >>> incomes = np.array([10, 20, 30, 40, 50])
42 | >>> float(round(atkinson(incomes, 0.5), 5))
43 | 0.06315
44 | >>> float(round(atkinson(incomes, 1), 5))
45 | 0.13161
46 |
47 | """
48 | y = np.asarray(y)
49 | if np.any(y <= 0):
50 | raise ValueError("All values in 'y' must be positive.")
51 | if epsilon < 0:
52 | raise ValueError("'epsilon' must be non-negative.")
53 |
54 | mean_y = y.mean()
55 | if epsilon == 1:
56 | geom_mean = np.exp(np.mean(np.log(y)))
57 | return 1 - geom_mean / mean_y
58 | else:
59 | ye = y ** (1 - epsilon)
60 | ye_bar = ye.mean() ** (1 / (1 - epsilon))
61 | return 1 - ye_bar / mean_y
62 |
63 |
64 | class Atkinson:
65 | """A class to calculate and store the Atkinson index and the equally
66 | distributed equivalent (EDE).
67 |
68 | The Atkinson index is a measure of economic inequality that takes
69 | into account the social aversion to inequality. The equally
70 | distributed equivalent (EDE) represents the level of income that,
71 | if equally distributed, would give the same level of social
72 | welfare as the actual distribution.
73 |
74 | See: :cite:`Atkinson_1970_Measurement`.
75 |
76 | Parameters
77 | ----------
78 | y: array-like
79 | An array of income or wealth values.
80 | epsilon: float
81 | The inequality aversion parameter. Higher values of epsilon
82 | give more weight to the lower end of the distribution, making
83 | the index more sensitive to changes in the lower tail.
84 |
85 | Attributes
86 | ----------
87 | y: array-like
88 | The input array of income or wealth values.
89 | epsilon: float
90 | The inequality aversion parameter.
91 | A: float
92 | The calculated Atkinson index.
93 | EDE: float
94 | The equally distributed equivalent (EDE) of the income or
95 | wealth distribution.
96 |
97 | Example
98 | -------
99 | >>> incomes = np.array([10, 20, 30, 40, 50])
100 | >>> atkinson = Atkinson(incomes, 0.5)
101 | >>> float(round(atkinson.A, 5))
102 | 0.06315
103 | >>> float(round(atkinson.EDE, 5))
104 | 28.1054
105 | >>> atkinson = Atkinson(incomes, 1)
106 | >>> float(round(atkinson.A, 5))
107 | 0.13161
108 | >>> float(round(atkinson.EDE, 5))
109 | 26.05171
110 |
111 | """
112 |
113 | def __init__(self, y, epsilon):
114 | y = np.asarray(y)
115 | if np.any(y <= 0):
116 | raise ValueError("All values in 'y' must be positive.")
117 | if epsilon < 0:
118 | raise ValueError("'epsilon' must be non-negative.")
119 |
120 | self.y = y
121 | self.epsilon = epsilon
122 | self.A = atkinson(self.y, self.epsilon)
123 | self.EDE = self.y.mean() * (1 - self.A)
124 |
--------------------------------------------------------------------------------
/inequality/gini.py:
--------------------------------------------------------------------------------
1 | """
2 | Gini based Inequality Metrics
3 | """
4 |
5 | __author__ = "Sergio J. Rey "
6 |
7 | import numpy
8 | from scipy.stats import norm
9 |
10 | __all__ = ["Gini", "Gini_Spatial"]
11 |
12 |
13 | def _gini(x):
14 | """
15 | Memory efficient calculation of Gini coefficient
16 | in relative mean difference form.
17 |
18 | Parameters
19 | ----------
20 |
21 | x : array-like
22 |
23 | Attributes
24 | ----------
25 |
26 | g : float
27 | Gini coefficient.
28 |
29 | Notes
30 | -----
31 | Based on
32 | http://www.statsdirect.com/help/default.htm#nonparametric_methods/gini.htm.
33 |
34 | """
35 |
36 | n = len(x)
37 | try:
38 | x_sum = x.sum()
39 | except AttributeError:
40 | x = numpy.asarray(x)
41 | x_sum = x.sum()
42 | n_x_sum = n * x_sum
43 | x = x.ravel() # ensure shape is (n,)
44 | r_x = (2.0 * numpy.arange(1, len(x) + 1) * x[numpy.argsort(x)]).sum()
45 | return (r_x - n_x_sum - x_sum) / n_x_sum
46 |
47 |
48 | class Gini:
49 | """
50 | Classic Gini coefficient in absolute deviation form.
51 |
52 | Parameters
53 | ----------
54 |
55 | y : numpy.array
56 | An array in the shape :math:`(n,1)` containing the attribute values.
57 |
58 | Attributes
59 | ----------
60 |
61 | g : float
62 | Gini coefficient.
63 |
64 | """
65 |
66 | def __init__(self, x):
67 | self.g = _gini(x)
68 |
69 |
70 | class Gini_Spatial: # noqa N801
71 | """
72 | Spatial Gini coefficient.
73 |
74 | Provides for computationally based inference regarding the contribution of
75 | spatial neighbor pairs to overall inequality across a set of regions.
76 | See :cite:`Rey_2013_sea`.
77 |
78 | Parameters
79 | ----------
80 |
81 | y : numpy.array
82 | An array in the shape :math:`(n,1)` containing the attribute values.
83 | w : libpysal.weights.W
84 | Binary spatial weights object.
85 | permutations : int (default 99)
86 | The number of permutations for inference.
87 |
88 | Attributes
89 | ----------
90 |
91 | g : float
92 | Gini coefficient.
93 | wg : float
94 | Neighbor inequality component (geographic inequality).
95 | wcg : float
96 | Non-neighbor inequality component (geographic complement inequality).
97 | wcg_share : float
98 | Share of inequality in non-neighbor component.
99 | p_sim : float
100 | (If ``permuations > 0``) pseudo :math:`p`-value for spatial gini.
101 | e_wcg : float
102 | (If ``permuations > 0``) expected value of non-neighbor
103 | inequality component (level) from permutations.
104 | s_wcg : float
105 | (If ``permuations > 0``) standard deviation non-neighbor
106 | inequality component (level) from permutations.
107 | z_wcg : float
108 | (If ``permuations > 0``) z-value non-neighbor inequality
109 | component (level) from permutations.
110 | p_z_sim : float
111 | (If ``permuations > 0``) pseudo :math:`p`-value based on
112 | standard normal approximation of permutation based values.
113 | polarization: float
114 | Spatial polarization index with an expected value of 1.
115 | polarization_p_sim: float
116 | (If ``permutations >0``) pseudo :math:`p`-value for polarization index.
117 | polarization_sim: float
118 | (If ``permutations >0``) polarization values under the null from permutations.
119 |
120 | Examples
121 | --------
122 |
123 | >>> import libpysal
124 | >>> import numpy
125 | >>> from inequality.gini import Gini_Spatial
126 |
127 | Use data from the 32 Mexican States, decade frequency 1940-2010.
128 |
129 | >>> f = libpysal.io.open(libpysal.examples.get_path('mexico.csv'))
130 | >>> vnames = [f'pcgdp{dec}' for dec in range(1940, 2010, 10)]
131 | >>> y = numpy.transpose(numpy.array([f.by_col[v] for v in vnames]))
132 |
133 | Define regime neighbors.
134 |
135 | >>> regimes = numpy.array(f.by_col('hanson98'))
136 | >>> w = libpysal.weights.block_weights(regimes, silence_warnings=True)
137 | >>> numpy.random.seed(12345)
138 | >>> gs = Gini_Spatial(y[:,0], w)
139 |
140 | >>> float(gs.p_sim)
141 | 0.04
142 |
143 | >>> float(gs.wcg)
144 | 4353856.0
145 |
146 | >>> float(gs.e_wcg)
147 | 4170356.7474747472
148 |
149 | Thus, the amount of inequality between pairs of states that are not in the
150 | same regime (neighbors) is significantly higher than what is expected
151 | under the null of random spatial inequality.
152 |
153 | """
154 |
155 | def __init__(self, x, w, permutations=99):
156 | x = numpy.asarray(x)
157 | g = _gini(x)
158 | self.g = g
159 | n = len(x)
160 | den = x.mean() * 2 * n**2
161 | d = g * den # sum of absolute devations SAD
162 | wg = self._calc(x, w) # sum of absolute deviations for neighbor pairs
163 | wcg = d - wg # sum of absolution deviations for distant pairs
164 | n_pairs = n * (n - 1) / 2
165 | n_n_pairs = w.s0 / 2
166 | n_d_pairs = n_pairs - n_n_pairs
167 | polarization = (wcg / wg) * (n_n_pairs / n_d_pairs)
168 | self.polarization = polarization
169 | self.g = g
170 | self.wcg = wcg
171 | self.wg = wg
172 | self.dtotal = d
173 | self.den = den
174 | self.wcg_share = wcg / den
175 |
176 | if permutations:
177 | _scale = n_n_pairs / n_d_pairs
178 | ids = numpy.arange(n)
179 | wcgp = numpy.zeros((permutations,))
180 | polarization_sim = numpy.zeros((permutations,))
181 | for perm in range(permutations):
182 | numpy.random.shuffle(ids)
183 | wcgp[perm] = d - self._calc(x[ids], w)
184 | polar = wcgp[perm] / (d - wcgp[perm])
185 | polarization_sim[perm] = polar * _scale
186 | above = wcgp >= self.wcg
187 | larger = above.sum()
188 | if (permutations - larger) < larger:
189 | larger = permutations - larger
190 | self.wcgp = wcgp
191 | self.p_sim = (larger + 1.0) / (permutations + 1.0)
192 | self.e_wcg = wcgp.mean()
193 | self.s_wcg = wcgp.std()
194 | self.z_wcg = (self.wcg - self.e_wcg) / self.s_wcg
195 | self.p_z_sim = 1.0 - norm.cdf(self.z_wcg)
196 | self.polarization_sim = polarization_sim
197 | # polarization is a directional concept, upper tail only
198 | larger = (polarization_sim >= polarization).sum()
199 | self.polarization_p_sim = (larger + 1) / (permutations + 1)
200 |
201 | def _calc(self, x, w):
202 | sad_sum = 0.0
203 | for i, js in w.neighbors.items():
204 | sad_sum += numpy.abs(x[i] - x[js]).sum()
205 | return sad_sum
206 |
--------------------------------------------------------------------------------
/inequality/pen.py:
--------------------------------------------------------------------------------
1 | """
2 | Pen's Parade and Pengram Visualizations
3 |
4 | This module provides functions to create Pen's Parade visualizations and
5 | extend them with choropleth maps to display the spatial distribution of
6 | values. The `pen` function generates a traditional Pen's Parade, which is
7 | a visual representation of income distribution or similar data, typically
8 | used to show inequality. The `pengram` function enhances this by combining
9 | the Pen's Parade with a choropleth map, allowing for a richer analysis of
10 | spatial data distributions.
11 |
12 | Author
13 | ------
14 | Serge Rey
15 | """
16 |
17 | import math
18 |
19 | import matplotlib.patches as patches
20 | import matplotlib.pyplot as plt
21 | import numpy as np
22 | from mpl_toolkits.axes_grid1.inset_locator import inset_axes
23 |
24 |
25 | def _check_deps(caller="pen"):
26 | """
27 | Check for required dependencies.
28 |
29 | Returns
30 | -------
31 | tuple
32 | A tuple containing the imported modules (Seaborn, mapclassify, pandas).
33 | """
34 | try:
35 | import seaborn as sns
36 | except ImportError as e:
37 | msg = f"{caller} requires Seaborn."
38 | msg = f"{msg} Install it using `conda install -c conda-forge seaborn`"
39 | raise ImportError(msg) from e
40 |
41 | try:
42 | import mapclassify as mc
43 | except ImportError as e:
44 | msg = f"{caller} requires mapclassify."
45 | msg = f"{msg} Install it using `conda install -c conda-forge mapclassify`"
46 | raise ImportError(msg) from e
47 |
48 | try:
49 | import pandas as pd
50 | except ImportError as e:
51 | msg = f"{caller} requires pandas. "
52 | msg = f"{msg} Install it using `conda install -c conda-forge pandas`"
53 | raise ImportError(msg) from e
54 |
55 | return sns, mc, pd
56 |
57 |
58 | def pen(
59 | df,
60 | col,
61 | x,
62 | weight=None,
63 | ascending=True,
64 | xticks=True,
65 | total_bars=100,
66 | figsize=(8, 6),
67 | ax=None,
68 | ):
69 | """
70 | Creates the Pen's Parade visualization.
71 |
72 | This function generates a bar plot sorted by a specified column, with
73 | options to customize the x-axis ticks and figure size. The Pen's Parade
74 | is a visual representation of income distribution (or similar data),
75 | typically used to show inequality.
76 |
77 | Parameters
78 | ----------
79 | df : pd.DataFrame
80 | DataFrame containing the data to plot.
81 | col : str
82 | The column to plot on the y-axis.
83 | x : str
84 | The column to plot on the x-axis.
85 | weight : str, optional
86 | A column used to weight the bars in the Pen’s Parade. Default is None.
87 | ascending : bool, optional
88 | Whether to sort the DataFrame in ascending order by the `col`.
89 | Default is True.
90 | xticks : bool, optional
91 | Whether to show x-axis ticks. Default is True.
92 | total_bars : int, optional
93 | Total number of bars to create for the weighted Pen’s Parade. Default
94 | is 100.
95 | figsize : list, optional
96 | The size of the figure as a list [width, height]. Default is [8, 6].
97 | ax : matplotlib.axes.Axes, optional
98 | Matplotlib Axes instance to plot on. If None, a new figure and axes
99 | will be created. Default is None.
100 |
101 | Returns
102 | -------
103 | matplotlib.axes.Axes
104 | A Matplotlib Axes object with the Pen's Parade plot.
105 |
106 | """
107 |
108 | sns, mc, pd = _check_deps()
109 |
110 | if ax is None:
111 | fig, ax = plt.subplots(1, 1, figsize=figsize)
112 |
113 | if weight is None:
114 | dbfs = df.sort_values(col, ascending=ascending).reset_index(drop=True)
115 | sns.barplot(x=x, y=col, data=dbfs, ax=ax)
116 | ax.set_ylabel(col)
117 | ax.set_xlabel(x)
118 | plt.xticks(rotation=90)
119 | ax.set_xticks(dbfs.index)
120 | ax.set_xticklabels(dbfs[x], rotation=90)
121 |
122 | if not xticks:
123 | ax.set(xticks=[])
124 | ax.set(xlabel="")
125 | else:
126 | df["NumBars"] = (
127 | (df[weight] / df[weight].sum() * total_bars).apply(math.ceil).astype(int)
128 | )
129 |
130 | repeated_rows = []
131 | name = x
132 | for _, row in df.iterrows():
133 | repeated_rows.extend([row] * row["NumBars"])
134 |
135 | df_repeated = pd.DataFrame(repeated_rows)
136 |
137 | df_sorted = df_repeated.sort_values(by=col).reset_index(drop=True)
138 |
139 | unique_obs = df[name].unique()
140 | colors = plt.get_cmap("tab20", len(unique_obs))
141 | color_map = {state: colors(i) for i, state in enumerate(unique_obs)}
142 | bar_colors = df_sorted[name].map(color_map)
143 |
144 | bar_positions = np.arange(len(df_sorted))
145 | bar_heights = df_sorted[col]
146 | bar_widths = 1 # Equal width for all bars
147 |
148 | _ = ax.bar(
149 | bar_positions,
150 | bar_heights,
151 | width=bar_widths,
152 | color=bar_colors,
153 | edgecolor="black",
154 | )
155 | tick_width = plt.rcParams["xtick.major.width"]
156 |
157 | first_positions = []
158 | first_labels = []
159 | current_state = None
160 | state_index = 0
161 | last_name = df_sorted[name].iloc[-1]
162 | for i in range(len(bar_positions)):
163 | label = df_sorted[name].iloc[i]
164 | if label != current_state:
165 | if state_index % 2 == 0 or label == last_name:
166 | first_positions.append(bar_positions[i])
167 | first_labels.append(df_sorted[name].iloc[i])
168 | else:
169 | text_y_position = bar_heights[i] + 0.05 * max(bar_heights)
170 | ax.plot(
171 | [bar_positions[i], bar_positions[i]],
172 | [bar_heights[i], text_y_position - 550],
173 | color="black",
174 | linewidth=tick_width,
175 | )
176 | ax.text(
177 | bar_positions[i],
178 | text_y_position,
179 | df_sorted[name].iloc[i],
180 | ha="center",
181 | rotation=90,
182 | fontsize=8,
183 | )
184 | current_state = df_sorted[name].iloc[i]
185 | state_index += 1
186 |
187 | ax.set_xticks(first_positions)
188 | ax.set_xticklabels(first_labels, rotation=90, fontsize=8)
189 |
190 | ax.set_xlabel(name)
191 | ax.set_ylabel(col)
192 | ax.set_title(f"Weighted Pen Parade of {name} by {col}")
193 |
194 | plt.tight_layout()
195 | return ax
196 |
197 |
198 | def pengram(
199 | gdf,
200 | col,
201 | name,
202 | figsize=(8, 6),
203 | k=5,
204 | scheme="quantiles",
205 | xticks=True,
206 | legend=True,
207 | leg_pos="lower right",
208 | fmt="{:.2f}",
209 | query=None,
210 | ax=None,
211 | inset_size="30%",
212 | ):
213 | """
214 | Pen's Parade combined with a choropleth map.
215 |
216 | This function generates a Pen’s Parade plot combined with a choropleth
217 | map. Both plots are placed within the same subplot, with the choropleth
218 | map as the main plot and the Pen's Parade as an inset.
219 |
220 | Parameters
221 | ----------
222 | gdf : gpd.GeoDataFrame
223 | GeoDataFrame containing the data to plot.
224 | col : str
225 | The column to plot on the y-axis.
226 | name : str
227 | The name of the geographic units (e.g., states, regions).
228 | figsize : tuple, optional
229 | The size of the figure as a tuple (width, height). Default is (8, 6).
230 | k : int, optional
231 | Number of classes for the classification scheme. Default is 5.
232 | scheme : str, optional
233 | Classification scheme to use (e.g., 'Quantiles'). Default is
234 | 'quantiles'.
235 | xticks : bool, optional
236 | Whether to show x-axis ticks. Default is True.
237 | legend : bool, optional
238 | Whether to show the map legend. Default is True.
239 | leg_pos : str, optional
240 | The position of the legend on the choropleth map. Default is
241 | "lower right".
242 | fmt : str, optional
243 | Format string for legend labels. Default is "{:.2f}".
244 | query : list, optional
245 | Specific geographic units to highlight. Default is an empty list.
246 | ax : matplotlib.axes.Axes, optional
247 | Matplotlib Axes instance to plot on. If None, a new figure and axes
248 | will be created. Default is None.
249 | inset_size : str, optional
250 | Size of the inset plot as a percentage of the main plot. Default is "30%".
251 |
252 | Returns
253 | -------
254 | matplotlib.axes.Axes
255 | Matplotlib Axes objects for the combined choropleth and Pen's parade.
256 | """
257 | sns, mc, pd = _check_deps()
258 |
259 | if ax is None:
260 | fig, ax = plt.subplots(figsize=figsize)
261 |
262 | # Main plot: Choropleth map
263 | _ = gdf.plot(
264 | column=col,
265 | scheme=scheme,
266 | k=k,
267 | ax=ax,
268 | legend=legend,
269 | legend_kwds={"loc": leg_pos, "fmt": fmt},
270 | )
271 | ax.axis("off")
272 |
273 | if query:
274 | highlight = gdf[gdf[name].isin(query)]
275 | highlight.boundary.plot(ax=ax, edgecolor="red", linewidth=2)
276 |
277 | # Inset plot: Pen's Parade
278 | inset_ax = inset_axes(ax, width=inset_size, height=inset_size, loc="upper right")
279 |
280 | binned = mc.classify(gdf[col], scheme, k=k)
281 | gdf["_bin"] = binned.yb
282 |
283 | sgdf = gdf.sort_values(by=col, ascending=True).reset_index(drop=True)
284 |
285 | sns.barplot(
286 | x=sgdf.index, y=col, hue="_bin", data=sgdf, palette="viridis", ax=inset_ax
287 | )
288 | inset_ax.set_ylabel(col)
289 | inset_ax.set_xlabel(name)
290 | plt.xticks(rotation=90)
291 | inset_ax.set_title("Pen's Parade", fontsize=10)
292 |
293 | inset_ax.set_xticks(sgdf.index)
294 | inset_ax.set_xticklabels(sgdf[name], rotation=90, fontsize=8)
295 |
296 | if not xticks:
297 | inset_ax.set(xticks=[])
298 | inset_ax.set(xlabel="")
299 |
300 | if query:
301 | for obs in query:
302 | if obs in sgdf[name].values:
303 | obs_idx = sgdf[sgdf[name] == obs].index[0]
304 | rect = patches.Rectangle(
305 | (obs_idx - 0.5, 0),
306 | 1,
307 | sgdf.loc[obs_idx, col],
308 | linewidth=2,
309 | edgecolor="red",
310 | facecolor="none",
311 | )
312 | inset_ax.add_patch(rect)
313 |
314 | inset_ax.get_legend().remove()
315 |
316 | # plt.tight_layout()
317 | return ax, inset_ax
318 |
--------------------------------------------------------------------------------
/inequality/schutz.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 |
3 | __all__ = ["Schutz"]
4 |
5 |
6 | class Schutz:
7 | """The Schutz class calculates measures of inequality in an income
8 | distribution.
9 |
10 | It calculates the Schutz distance, which is the maximum distance
11 | between the line of perfect equality and the Lorenz curve.
12 | Additionally, it computes the intersection point with the line of
13 | perfect equality where the Schutz distance occurs and the original
14 | Schutz coefficient.
15 | See :cite:`schutz1951MeasurementIncome`.
16 |
17 | Parameters
18 | ----------
19 | df : pd.DataFrame
20 | The input DataFrame containing the data.
21 | column_name : str
22 | The name of the column for which the Schutz coefficient is to
23 | be calculated.
24 |
25 | Attributes
26 | ----------
27 | df : pd.DataFrame
28 | The input DataFrame containing the data.
29 | column_name : str
30 | The name of the column for which the Schutz coefficient is to
31 | be calculated.
32 | df_processed : pd.DataFrame
33 | The processed DataFrame with additional columns.
34 | distance : float
35 | The maximum distance between the line of perfect equality and
36 | the Lorenz curve.
37 | intersection_point : float
38 | The x and y coordinate of the intersection point where the
39 | Schutz distance occurs.
40 | coefficient : float
41 | The original Schutz coefficient.
42 |
43 | Examples
44 | --------
45 | >>> import pandas as pd
46 | >>> gdf = pd.DataFrame({
47 | ... 'NAME': ['A', 'B', 'C', 'D', 'E'],
48 | ... 'Y': [1000, 2000, 1500, 3000, 2500]
49 | ... })
50 | >>> schutz_obj = Schutz(gdf, 'Y')
51 | >>> print("Schutz Distance:", round(float(schutz_obj.distance),2))
52 | Schutz Distance: 0.15
53 | >>> print("Intersection Point:", round(schutz_obj.intersection_point, 1))
54 | Intersection Point: 0.6
55 | >>> print("Schutz Coefficient:", round(schutz_obj.coefficient, 1))
56 | Schutz Coefficient: 7.5
57 | """
58 |
59 | def __init__(self, df, column_name):
60 | """
61 | Initialize the Schutz object, calculate the Schutz distance,
62 | the intersection point with the line of perfect equality, and
63 | the original Schutz coefficient.
64 |
65 | Parameters
66 | ----------
67 | df: pd.DataFrame
68 | The input DataFrame containing the data.
69 | column_name: str
70 | The name of the column for which the Schutz coefficient is
71 | to be calculated.
72 | """
73 | self.df = df
74 | self.column_name = column_name
75 | self.df_processed = self._prepare_dataframe()
76 | self.distance = self.calculate_schutz_distance()
77 | self.intersection_point = self.calculate_intersection_point()
78 | self.coefficient = self.calculate_schutz_coefficient()
79 |
80 | def _prepare_dataframe(self):
81 | """
82 | Prepare the DataFrame by sorting and calculating necessary
83 | columns.
84 |
85 | Returns
86 | -------
87 | pd.DataFrame
88 | The processed DataFrame with additional columns.
89 | """
90 | df = (
91 | self.df[[self.column_name]]
92 | .sort_values(by=self.column_name)
93 | .reset_index(drop=True)
94 | )
95 | df["unit"] = 1
96 | df["upct"] = df.unit / df.unit.sum()
97 | df["ypct"] = df[self.column_name] / df[self.column_name].sum()
98 | df["ucpct"] = df.upct.cumsum()
99 | df["ycpct"] = df.ypct.cumsum()
100 | df["distance"] = df["ucpct"] - df["ycpct"]
101 | df["slope"] = df.ypct / df.upct
102 | df["coefficient"] = 10 * (df.slope - 1)
103 | return df
104 |
105 | def calculate_schutz_distance(self):
106 | """
107 | Calculate the Schutz distance, which is the maximum distance
108 | between the line of perfect equality and the Lorenz curve.
109 |
110 | Returns
111 | -------
112 | float
113 | The maximum distance indicating the level of inequality.
114 | """
115 | return self.df_processed["distance"].max()
116 |
117 | def calculate_intersection_point(self):
118 | """
119 | Calculate the intersection point of the line of perfect equality
120 | and the Lorenz curve.
121 |
122 | Returns
123 | -------
124 | float
125 | The x and y coordinate of the intersection point where the
126 | Schutz distance occurs.
127 | """
128 | max_distance_row = self.df_processed[
129 | self.df_processed["distance"] == self.distance
130 | ].iloc[0]
131 | intersection_point = max_distance_row["ucpct"]
132 | return intersection_point
133 |
134 | def calculate_schutz_coefficient(self):
135 | """
136 | Calculate the original Schutz coefficient.
137 |
138 | Returns
139 | -------
140 | float
141 | The Schutz coefficient.
142 | """
143 | coefficient = self.df_processed[
144 | self.df_processed["coefficient"] > 0
145 | ].coefficient.sum()
146 | return coefficient
147 |
148 | def plot(
149 | self,
150 | xlabel="Cumulative Share of the Population",
151 | ylabel="Cumulative Share of Income",
152 | grid=True,
153 | title=None,
154 | ):
155 | """
156 | Plot the Lorenz curve, the line of perfect equality, and the
157 | Schutz line.
158 |
159 | The plot shows the Lorenz curve, a 45-degree line representing
160 | perfect equality, and the Schutz line dropping vertically from
161 | the intersection point on the line of perfect equality to the
162 | Lorenz curve.
163 | """
164 | plt.figure(figsize=(10, 6))
165 |
166 | # Plot Lorenz curve
167 | plt.plot(
168 | [0] + self.df_processed["ucpct"].tolist(),
169 | [0] + self.df_processed["ycpct"].tolist(),
170 | label="Lorenz Curve",
171 | color="blue",
172 | )
173 |
174 | # Plot 45-degree line of perfect equality
175 | plt.plot(
176 | [0, 1],
177 | [0, 1],
178 | label="Line of Perfect Equality",
179 | color="black",
180 | linestyle="--",
181 | )
182 |
183 | # Plot Schutz line
184 | plt.plot(
185 | [self.intersection_point, self.intersection_point],
186 | [self.intersection_point, self.intersection_point - self.distance],
187 | label="Schutz Line",
188 | color="red",
189 | linestyle=":",
190 | )
191 |
192 | # Add labels and title
193 | plt.xlabel(xlabel)
194 | plt.ylabel(ylabel)
195 | if title is None:
196 | title = self.column_name
197 | plt.title(title)
198 | plt.legend()
199 | plt.grid(grid)
200 | plt.show()
201 |
--------------------------------------------------------------------------------
/inequality/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pysal/inequality/cd2e2c5eebe0afa3d3e39f57bcbad28c646cdb1e/inequality/tests/__init__.py
--------------------------------------------------------------------------------
/inequality/tests/conftest.py:
--------------------------------------------------------------------------------
1 | # ----- IMPORTANT -----
2 | # See GL#80 & GL#81
3 | # This file can be deleted once ``_indices.py`` is fully removed.
4 |
5 | import pytest
6 |
7 |
8 | def warning_depr(x):
9 | return pytest.warns(
10 | FutureWarning, match=f"{x} is deprecated and will be removed on 2025-01-01."
11 | )
12 |
13 |
14 | def warning_invalid(x):
15 | return pytest.warns(RuntimeWarning, match=f"invalid value encountered in {x}")
16 |
17 |
18 | warning_div_zero = pytest.warns(RuntimeWarning, match="divide by zero encountered")
19 |
20 |
21 | def pytest_configure():
22 | pytest.warning_depr = warning_depr
23 | pytest.warning_invalid = warning_invalid
24 | pytest.warning_div_zero = warning_div_zero
25 |
26 |
27 | def pytest_ignore_collect(collection_path):
28 | return bool(str(collection_path).endswith("_indices.py"))
29 |
--------------------------------------------------------------------------------
/inequality/tests/test_atkinson.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pytest
3 | from inequality.atkinson import Atkinson, atkinson
4 |
5 |
6 | def testatkinson_function():
7 | # Test case for epsilon = 0.5
8 | incomes = np.array([10, 20, 30, 40, 50])
9 | result = atkinson(incomes, 0.5)
10 | expected = 0.06315
11 | assert np.isclose(result, expected, atol=1e-5), (
12 | f"Expected {expected}, but got {result}"
13 | )
14 |
15 | # Test case for epsilon = 1
16 | result = atkinson(incomes, 1)
17 | expected = 0.1316096
18 | assert np.isclose(result, expected, atol=1e-5), (
19 | f"Expected {expected}, but got {result}"
20 | )
21 |
22 | # Test case for epsilon = 0
23 | result = atkinson(incomes, 0)
24 | expected = 0
25 | assert np.isclose(result, expected, atol=1e-5), (
26 | f"Expected {expected}, but got {result}"
27 | )
28 |
29 |
30 | def testatkinson_class():
31 | # Test case for epsilon = 0.5
32 | incomes = np.array([10, 20, 30, 40, 50])
33 | atkinson = Atkinson(incomes, 0.5)
34 | expected_A = 0.06315
35 | expected_EDE = 28.105398233
36 | assert np.isclose(atkinson.A, expected_A, atol=1e-5), (
37 | f"Expected Atkinson index {expected_A}, but got {atkinson.A}"
38 | )
39 | assert np.isclose(atkinson.EDE, expected_EDE, atol=1e-5), (
40 | f"Expected EDE {expected_EDE}, but got {atkinson.EDE}"
41 | )
42 |
43 | # Test case for epsilon = 1
44 | atkinson = Atkinson(incomes, 1)
45 | expected_A = 0.1316096
46 | expected_EDE = 26.0517108
47 | assert np.isclose(atkinson.A, expected_A, atol=1e-5), (
48 | f"Expected Atkinson index {expected_A}, but got {atkinson.A}"
49 | )
50 | assert np.isclose(atkinson.EDE, expected_EDE, atol=1e-5), (
51 | f"Expected EDE {expected_EDE}, but got {atkinson.EDE}"
52 | )
53 |
54 | # Test case for epsilon = 0
55 | atkinson = Atkinson(incomes, 0)
56 | expected_A = 0
57 | expected_EDE = incomes.mean()
58 | assert np.isclose(atkinson.A, expected_A, atol=1e-5), (
59 | f"Expected Atkinson index {expected_A}, but got {atkinson.A}"
60 | )
61 | assert np.isclose(atkinson.EDE, expected_EDE, atol=1e-5), (
62 | f"Expected EDE {expected_EDE}, but got {atkinson.EDE}"
63 | )
64 |
65 |
66 | if __name__ == "__main__":
67 | pytest.main()
68 |
--------------------------------------------------------------------------------
/inequality/tests/test_gini.py:
--------------------------------------------------------------------------------
1 | import libpysal
2 | import numpy
3 |
4 | from inequality.gini import Gini, Gini_Spatial
5 |
6 |
7 | class TestGini:
8 | def setup_method(self):
9 | f = libpysal.io.open(libpysal.examples.get_path("mexico.csv"))
10 | vnames = [f"pcgdp{dec}" for dec in range(1940, 2010, 10)]
11 | y = numpy.transpose(numpy.array([f.by_col[v] for v in vnames]))
12 | self.y = y[:, 0]
13 | regimes = numpy.array(f.by_col("hanson98"))
14 |
15 | self.w = libpysal.weights.block_weights(regimes, silence_warnings=True)
16 |
17 | def test_Gini(self):
18 | g = Gini(self.y)
19 | numpy.testing.assert_almost_equal(g.g, 0.35372371173452849)
20 |
21 | def test_Gini_Spatial(self):
22 | numpy.random.seed(12345)
23 | g = Gini_Spatial(self.y, self.w)
24 | numpy.testing.assert_almost_equal(g.g, 0.35372371173452849)
25 | numpy.testing.assert_almost_equal(g.wg, 884130.0)
26 | numpy.testing.assert_almost_equal(g.wcg, 4353856.0)
27 | numpy.testing.assert_almost_equal(g.p_sim, 0.040)
28 | numpy.testing.assert_almost_equal(g.e_wcg, 4170356.7474747472)
29 |
--------------------------------------------------------------------------------
/inequality/tests/test_indices.py:
--------------------------------------------------------------------------------
1 | import numpy
2 | import pytest
3 |
4 | from inequality._indices import (
5 | abundance,
6 | ellison_glaeser_egg,
7 | ellison_glaeser_egg_pop,
8 | fractionalization_gs,
9 | gini_gi,
10 | gini_gi_m,
11 | gini_gig,
12 | herfindahl_hd,
13 | hoover_hi,
14 | isolation_ii,
15 | isolation_isg,
16 | margalev_md,
17 | maurel_sedillot_msg,
18 | maurel_sedillot_msg_pop,
19 | menhinick_mi,
20 | modified_segregation_msg,
21 | polarization,
22 | segregation_gsg,
23 | shannon_se,
24 | similarity_w_wd,
25 | simpson_sd,
26 | simpson_so,
27 | theil_th,
28 | theil_th_brute,
29 | )
30 |
31 | x = numpy.array([[0, 1, 2], [0, 2, 4], [0, 0, 3]])
32 |
33 | numpy.random.seed(0)
34 | y = numpy.random.randint(1, 10, size=(4, 3))
35 |
36 | numpy.random.seed(0)
37 | tau = numpy.random.uniform(size=(3, 3))
38 | numpy.fill_diagonal(tau, 0.0)
39 | tau = (tau + tau.T) / 2
40 |
41 | numpy.random.seed(0)
42 | z = numpy.random.randint(10, 50, size=(3, 4))
43 |
44 | numpy.random.seed(0)
45 | v = numpy.random.uniform(0, 1, size=(4,))
46 |
47 |
48 | class TestAbundance:
49 | def test_abundance(self):
50 | known = 2
51 | with pytest.warning_depr("abundance"):
52 | observed = abundance(x)
53 | assert known == observed
54 |
55 |
56 | class TestMargalevMD:
57 | def test_margalev_md(self):
58 | known = 0.40242960438184466
59 | with pytest.warning_depr("abundance"), pytest.warning_depr("margalev_md"):
60 | observed = margalev_md(x)
61 | assert known == pytest.approx(observed)
62 |
63 |
64 | class TestMenhinickMI:
65 | def test_menhinick_mi(self):
66 | known = 0.2886751345948129
67 | with pytest.warning_depr("abundance"), pytest.warning_depr("menhinick_mi"):
68 | observed = menhinick_mi(x)
69 | assert known == pytest.approx(observed)
70 |
71 |
72 | class TestSimpsonSO:
73 | def test_simpson_so(self):
74 | known = 0.5909090909090909
75 | with pytest.warning_depr("simpson_so"):
76 | observed = simpson_so(x)
77 | assert known == pytest.approx(observed)
78 |
79 |
80 | class TestSimpsonSD:
81 | def test_simpson_sd(self):
82 | known = 0.40909090909090906
83 | with pytest.warning_depr("simpson_so"), pytest.warning_depr("simpson_sd"):
84 | observed = simpson_sd(x)
85 | assert known == pytest.approx(observed)
86 |
87 |
88 | class TestHerfindahlHD:
89 | def test_herfindahl_hd(self):
90 | known = 0.625
91 | with pytest.warning_depr("herfindahl_hd"):
92 | observed = herfindahl_hd(x)
93 | assert known == pytest.approx(observed)
94 |
95 |
96 | class TestTheilTH:
97 | def test_theil_th(self):
98 | known = 0.15106563978903298
99 | with pytest.warning_depr("theil_th"):
100 | observed = theil_th(x, ridz=True)
101 | assert known == pytest.approx(observed)
102 |
103 | with (
104 | pytest.warning_depr("theil_th"),
105 | pytest.warning_div_zero,
106 | pytest.warning_invalid("subtract"),
107 | pytest.warning_invalid("multiply"),
108 | ):
109 | observed = theil_th(x, ridz=False)
110 | assert numpy.isnan(observed)
111 |
112 | # test brute comparison
113 | with pytest.warning_depr("theil_th_brute"):
114 | known = theil_th_brute(x, ridz=True)
115 | with pytest.warning_depr("theil_th"):
116 | observed = theil_th(x, ridz=True)
117 | assert known == pytest.approx(observed)
118 |
119 | with (
120 | pytest.warning_depr("theil_th_brute"),
121 | pytest.warning_div_zero,
122 | pytest.warning_invalid("scalar multiply"),
123 | pytest.warning_invalid("multiply"),
124 | pytest.warning_invalid("scalar subtract"),
125 | ):
126 | observed = theil_th_brute(x, ridz=False)
127 | assert numpy.isnan(observed)
128 |
129 |
130 | class TestFractionalizationGS:
131 | def test_fractionalization_gs(self):
132 | known = 0.375
133 | with (
134 | pytest.warning_depr("herfindahl_hd"),
135 | pytest.warning_depr("fractionalization_gs"),
136 | ):
137 | observed = fractionalization_gs(x)
138 | assert known == pytest.approx(observed)
139 |
140 |
141 | class TestPolarization:
142 | def test_polarization(self):
143 | with (
144 | pytest.raises(RuntimeError, match="Not currently implemented."),
145 | pytest.warning_depr("polarization"),
146 | ):
147 | polarization(None)
148 |
149 |
150 | class TestShannonSE:
151 | def test_shannon_se(self):
152 | known = 1.094070862104929
153 | with pytest.warning_depr("shannon_se"):
154 | observed = shannon_se(y)
155 | assert known == pytest.approx(observed)
156 |
157 |
158 | class TestGiniGI:
159 | def test_gini_gi(self):
160 | known = 0.05128205128205132
161 | with pytest.warning_depr("_gini"), pytest.warning_depr("gini_gi"):
162 | observed = gini_gi(y)
163 | assert known == pytest.approx(observed)
164 |
165 |
166 | class TestGiniGIG:
167 | def test_gini_gig(self):
168 | known = numpy.array([0.125, 0.32894737, 0.18181818])
169 | with pytest.warning_depr("_gini"), pytest.warning_depr("gini_gig"):
170 | observed = gini_gig(y)
171 | numpy.testing.assert_array_almost_equal(known, observed)
172 |
173 |
174 | class TestGiniGIM:
175 | def test_gini_gi_m(self):
176 | known = 0.05128205128205132
177 | with pytest.warning_depr("gini_gi_m"):
178 | observed = gini_gi_m(y)
179 | assert known == pytest.approx(observed)
180 |
181 |
182 | class TestHooverHI:
183 | def test_hoover_hi(self):
184 | known = 0.041025641025641046
185 | with pytest.warning_depr("hoover_hi"):
186 | observed = hoover_hi(y)
187 | assert known == pytest.approx(observed)
188 |
189 |
190 | class TestSimilarityWWD:
191 | def test_similarity_w_wd(self):
192 | known = 0.5818596340322582
193 | with pytest.warning_depr("similarity_w_wd"):
194 | observed = similarity_w_wd(y, tau)
195 | assert known == pytest.approx(observed)
196 |
197 |
198 | class TestSegregationGSG:
199 | def test_segregation_gsg(self):
200 | known = numpy.array([0.18292683, 0.24713959, 0.09725159])
201 | with pytest.warning_depr("segregation_gsg"):
202 | observed = segregation_gsg(y)
203 | numpy.testing.assert_array_almost_equal(known, observed)
204 |
205 |
206 | class TestModifiedSegregationMSG:
207 | def test_modified_segregation_msg(self):
208 | known = numpy.array([0.0852071, 0.10224852, 0.0435503])
209 | with (
210 | pytest.warning_depr("segregation_gsg"),
211 | pytest.warning_depr("modified_segregation_msg"),
212 | ):
213 | observed = modified_segregation_msg(y)
214 | numpy.testing.assert_array_almost_equal(known, observed)
215 |
216 |
217 | class TestIsolationISG:
218 | def test_isolation_isg(self):
219 | known = numpy.array([1.0732699, 1.21995329, 1.0227105])
220 | with pytest.warning_depr("isolation_isg"):
221 | observed = isolation_isg(y)
222 | numpy.testing.assert_array_almost_equal(known, observed)
223 |
224 |
225 | class TestIsolationII:
226 | def test_isolation_ii(self):
227 | known = numpy.array([1.1161596, 1.31080357, 1.03432983])
228 | with pytest.warning_depr("isolation_ii"):
229 | observed = isolation_ii(y)
230 | numpy.testing.assert_array_almost_equal(known, observed)
231 |
232 |
233 | class TestEllisonGlaeserEGG:
234 | def test_ellison_glaeser_egg(self):
235 | known = numpy.array([0.0544994, 0.01624183, 0.01014058, 0.02880251])
236 | with pytest.warning_depr("ellison_glaeser_egg"):
237 | observed = ellison_glaeser_egg(z)
238 | numpy.testing.assert_array_almost_equal(known, observed)
239 |
240 | known = numpy.array([-1.0617873, -2.39452501, -1.45991648, -1.11740985])
241 | with pytest.warning_depr("ellison_glaeser_egg"):
242 | observed = ellison_glaeser_egg(z, hs=v)
243 | numpy.testing.assert_array_almost_equal(known, observed)
244 |
245 |
246 | class TestEllisonGlaeserEGGPop:
247 | def test_ellison_glaeser_egg_pop(self):
248 | known = numpy.array([-0.02150826, 0.01329858, -0.03894556])
249 | with pytest.warning_depr("ellison_glaeser_egg_pop"):
250 | observed = ellison_glaeser_egg_pop(y)
251 | numpy.testing.assert_array_almost_equal(known, observed)
252 |
253 |
254 | class TestMaurelSedillotMSG:
255 | def test_maurel_sedillot_msg(self):
256 | known = numpy.array([0.07858256, 0.03597749, 0.03937436, -0.00904911])
257 | with pytest.warning_depr("maurel_sedillot_msg"):
258 | observed = maurel_sedillot_msg(z)
259 | numpy.testing.assert_array_almost_equal(known, observed)
260 |
261 | known = numpy.array([-1.01010171, -2.32421555, -1.38868998, -1.20049894])
262 | with pytest.warning_depr("maurel_sedillot_msg"):
263 | observed = maurel_sedillot_msg(z, hs=v.round(3))
264 | numpy.testing.assert_array_almost_equal(known, observed)
265 |
266 |
267 | class TestMaurelSedillotMSGPop:
268 | def test_maurel_sedillot_msg_pop(self):
269 | known = numpy.array([-0.05503571, 0.04414672, -0.02866628])
270 | with pytest.warning_depr("maurel_sedillot_msg_pop"):
271 | observed = maurel_sedillot_msg_pop(y)
272 | numpy.testing.assert_array_almost_equal(known, observed)
273 |
--------------------------------------------------------------------------------
/inequality/tests/test_interface.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 | import pandas as pd
4 | from inequality.wolfson import lorenz_curve, wolfson
5 |
6 |
7 | def test_lorenz_curve_with_array():
8 | income = np.array(
9 | [20000, 25000, 27000, 30000, 35000, 45000, 60000, 75000, 80000, 120000]
10 | )
11 | population, cumulative_income = lorenz_curve(income)
12 |
13 | # Check that both returned arrays have the correct length (n+1)
14 | assert len(population) == len(income) + 1
15 | assert len(cumulative_income) == len(income) + 1
16 |
17 | # Ensure that the Lorenz curve starts at zero
18 | assert cumulative_income[0] == 0.0
19 | assert population[0] == 0.0
20 |
21 |
22 | def test_lorenz_curve_with_list():
23 | income = [20000, 25000, 27000, 30000, 35000, 45000, 60000, 75000, 80000, 120000]
24 | population, cumulative_income = lorenz_curve(income)
25 |
26 | # Check that both returned arrays have the correct length (n+1)
27 | assert len(population) == len(income) + 1
28 | assert len(cumulative_income) == len(income) + 1
29 |
30 | # Ensure that the Lorenz curve starts at zero
31 | assert cumulative_income[0] == 0.0
32 | assert population[0] == 0.0
33 |
34 |
35 | def test_lorenz_curve_with_dataframe():
36 | df = pd.DataFrame(
37 | {
38 | "income": [
39 | 20000,
40 | 25000,
41 | 27000,
42 | 30000,
43 | 35000,
44 | 45000,
45 | 60000,
46 | 75000,
47 | 80000,
48 | 120000,
49 | ]
50 | }
51 | )
52 | population, cumulative_income = lorenz_curve(df, column="income")
53 |
54 | # Check that both returned arrays have the correct length (n+1)
55 | assert len(population) == len(df["income"]) + 1
56 | assert len(cumulative_income) == len(df["income"]) + 1
57 |
58 | # Ensure that the Lorenz curve starts at zero
59 | assert cumulative_income[0] == 0.0
60 | assert population[0] == 0.0
61 |
62 |
63 | def test_wolfson_with_array():
64 | income = np.array(
65 | [20000, 25000, 27000, 30000, 35000, 45000, 60000, 75000, 80000, 120000]
66 | )
67 | wolfson_index = wolfson(income)
68 |
69 | # Compare the result to an expected value (based on the example)
70 | assert np.isclose(wolfson_index, 0.2013, atol=1e-4)
71 |
72 |
73 | def test_wolfson_with_list():
74 | income = [20000, 25000, 27000, 30000, 35000, 45000, 60000, 75000, 80000, 120000]
75 | wolfson_index = wolfson(income)
76 |
77 | # Compare the result to an expected value (based on the example)
78 | assert np.isclose(wolfson_index, 0.2013, atol=1e-4)
79 |
80 |
81 | def test_wolfson_with_dataframe():
82 | df = pd.DataFrame(
83 | {
84 | "income": [
85 | 20000,
86 | 25000,
87 | 27000,
88 | 30000,
89 | 35000,
90 | 45000,
91 | 60000,
92 | 75000,
93 | 80000,
94 | 120000,
95 | ]
96 | }
97 | )
98 | wolfson_index = wolfson(df, column="income")
99 |
100 | # Compare the result to an expected value (based on the example)
101 | assert np.isclose(wolfson_index, 0.2013, atol=1e-4)
102 |
103 |
104 | def test_wolfson_with_small_dataset():
105 | income = [6, 6, 8, 8, 10, 10, 12, 12]
106 | wolfson_index = wolfson(income)
107 |
108 | # Compare the result to an expected value (based on the example)
109 | assert np.isclose(wolfson_index, 0.0833, atol=1e-4)
110 |
111 |
112 | def test_wolfson_with_even_distribution():
113 | income = [2, 4, 6, 8, 10, 12, 14, 16]
114 | wolfson_index = wolfson(income)
115 |
116 | # Compare the result to an expected value (based on the example)
117 | assert np.isclose(wolfson_index, 0.1528, atol=1e-4)
118 |
--------------------------------------------------------------------------------
/inequality/tests/test_pengram.py:
--------------------------------------------------------------------------------
1 | import geopandas as gpd
2 | import matplotlib
3 | import matplotlib.pyplot as plt
4 | import pandas as pd
5 | import pytest
6 | from inequality.pen import _check_deps, pen, pengram
7 | from shapely.geometry import Polygon
8 |
9 | # Set the backend to 'Agg' to prevent GUI windows from opening
10 | matplotlib.use("Agg")
11 |
12 |
13 | # Test Data Setup
14 |
15 |
16 | @pytest.fixture
17 | def sample_df():
18 | """Sample dataframe for testing the pen function."""
19 | data = {
20 | "region": ["A", "B", "C", "D"],
21 | "income": [50000, 60000, 70000, 80000],
22 | "population": [100, 150, 200, 250],
23 | }
24 | return pd.DataFrame(data)
25 |
26 |
27 | @pytest.fixture
28 | def sample_gdf():
29 | """Sample GeoDataFrame for testing the pengram function."""
30 | data = {"region": ["A", "B", "C", "D"], "income": [50000, 60000, 70000, 80000]}
31 | polygons = [
32 | Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
33 | Polygon([(1, 0), (2, 0), (2, 1), (1, 1)]),
34 | Polygon([(0, 1), (1, 1), (1, 2), (0, 2)]),
35 | Polygon([(1, 1), (2, 1), (2, 2), (1, 2)]),
36 | ]
37 | gdf = gpd.GeoDataFrame(data, geometry=polygons)
38 | return gdf
39 |
40 |
41 | # Test _check_deps function
42 |
43 |
44 | def test_check_deps():
45 | """Test that _check_deps function imports all necessary dependencies."""
46 | sns, mc, pd = _check_deps()
47 | assert sns is not None
48 | assert mc is not None
49 | assert pd is not None
50 |
51 |
52 | # Test pen function
53 |
54 |
55 | def test_pen_basic(sample_df):
56 | """Test basic functionality of the pen function."""
57 | ax = pen(sample_df, col="income", x="region")
58 | assert ax is not None
59 | assert isinstance(ax, plt.Axes)
60 | assert ax.get_ylabel() == "income"
61 | assert ax.get_xlabel() == "region"
62 | assert len(ax.patches) == len(sample_df), "All regions should be plotted."
63 | plt.close(ax.figure) # Close the figure to free up resources
64 |
65 |
66 | def test_pen_weighted(sample_df):
67 | """Test pen function with weighting."""
68 | ax = pen(sample_df, col="income", x="region", weight="population")
69 | assert ax is not None
70 | assert isinstance(ax, plt.Axes)
71 | assert ax.get_ylabel() == "income"
72 | assert ax.get_xlabel() == "region"
73 | plt.close(ax.figure) # Close the figure to free up resources
74 |
75 |
76 | @pytest.mark.parametrize("weight_col", ["population", None])
77 | def test_pen_parametrized(sample_df, weight_col):
78 | """Test pen function with and without weighting using parameterization."""
79 | ax = pen(sample_df, col="income", x="region", weight=weight_col)
80 | assert ax is not None
81 | assert isinstance(ax, plt.Axes)
82 | plt.close(ax.figure) # Close the figure to free up resources
83 |
84 |
85 | # Test pengram function
86 |
87 |
88 | def test_pengram_basic(sample_gdf):
89 | """Test basic functionality of the pengram function."""
90 | ax, inset_ax = pengram(sample_gdf, col="income", name="region")
91 | assert ax is not None
92 | assert inset_ax is not None
93 | assert isinstance(ax, plt.Axes)
94 | assert isinstance(inset_ax, plt.Axes)
95 | plt.close(ax.figure) # Close the main figure to free up resources
96 | plt.close(inset_ax.figure) # Close the inset figure to free up resources
97 |
98 |
99 | def test_pengram_custom_inset_size(sample_gdf):
100 | """Test pengram function with custom inset size."""
101 | ax, inset_ax = pengram(sample_gdf, col="income", name="region", inset_size="50%")
102 | assert ax is not None
103 | assert inset_ax is not None
104 | assert isinstance(ax, plt.Axes)
105 | assert isinstance(inset_ax, plt.Axes)
106 | plt.close(ax.figure) # Close the main figure to free up resources
107 | plt.close(inset_ax.figure) # Close the inset figure to free up resources
108 |
109 |
110 | # Test invalid cases
111 |
112 |
113 | def test_invalid_weight_column(sample_df):
114 | """Test pen function with an invalid weight column."""
115 | with pytest.raises(KeyError, match="invalid_column"):
116 | pen(sample_df, col="income", x="region", weight="invalid_column")
117 |
118 |
119 | def test_invalid_query_column(sample_gdf):
120 | """Test pengram function with an invalid query column."""
121 | with pytest.raises(KeyError, match="invalid_column"):
122 | pengram(sample_gdf, col="income", name="invalid_column", query=["A", "C"])
123 |
--------------------------------------------------------------------------------
/inequality/tests/test_schutz.py:
--------------------------------------------------------------------------------
1 | import os
2 | import platform
3 |
4 | import matplotlib.pyplot as plt
5 | import pandas as pd
6 | import pytest
7 | from inequality.schutz import Schutz
8 |
9 | NOT_LINUX = platform.system() != "Linux"
10 |
11 |
12 | @pytest.fixture
13 | def example_dataframe():
14 | data = {"NAME": ["A", "B", "C", "D", "E"], "Y": [1000, 2000, 1500, 3000, 2500]}
15 | return pd.DataFrame(data)
16 |
17 |
18 | def plot_warning_helper(schutz_obj):
19 | if NOT_LINUX:
20 | with pytest.warns(
21 | UserWarning,
22 | match="FigureCanvasAgg is non-interactive, and thus cannot be shown",
23 | ):
24 | schutz_obj.plot()
25 | else:
26 | schutz_obj.plot()
27 |
28 |
29 | def test_schutz_distance(example_dataframe):
30 | schutz_obj = Schutz(example_dataframe, "Y")
31 | expected_distance = 0.15
32 | assert pytest.approx(schutz_obj.distance, 0.01) == expected_distance
33 |
34 |
35 | def test_schutz_intersection_point(example_dataframe):
36 | schutz_obj = Schutz(example_dataframe, "Y")
37 | expected_intersection_point = 0.6
38 | assert (
39 | pytest.approx(schutz_obj.intersection_point, 0.1) == expected_intersection_point
40 | )
41 |
42 |
43 | def test_schutz_coefficient(example_dataframe):
44 | schutz_obj = Schutz(example_dataframe, "Y")
45 | expected_coefficient = 7.5
46 | assert pytest.approx(schutz_obj.coefficient, 0.1) == expected_coefficient
47 |
48 |
49 | def test_schutz_plot_runs_without_errors(example_dataframe):
50 | schutz_obj = Schutz(example_dataframe, "Y")
51 | try:
52 | plot_warning_helper(schutz_obj)
53 | except Exception as e:
54 | pytest.fail(f"Plotting failed: {e}")
55 |
56 |
57 | def test_schutz_plot_output(example_dataframe, tmpdir):
58 | """Test if the plot output matches the expected result by saving
59 | the plot and comparing it."""
60 | schutz_obj = Schutz(example_dataframe, "Y")
61 |
62 | # Save the plot to a temporary directory
63 | plot_file = os.path.join(tmpdir, "schutz_plot.png")
64 | plt.figure()
65 | plot_warning_helper(schutz_obj)
66 | plt.savefig(plot_file)
67 | plt.close()
68 |
69 | # Ensure that the plot file was created
70 | assert os.path.exists(plot_file), "Plot file was not created."
71 |
--------------------------------------------------------------------------------
/inequality/tests/test_theil.py:
--------------------------------------------------------------------------------
1 | import libpysal
2 | import numpy
3 |
4 | from inequality.theil import Theil, TheilD, TheilDSim
5 |
6 |
7 | class TestTheil:
8 | def test___init__(self):
9 | f = libpysal.io.open(libpysal.examples.get_path("mexico.csv"))
10 | vnames = [f"pcgdp{dec}" for dec in range(1940, 2010, 10)]
11 | y = numpy.transpose(numpy.array([f.by_col[v] for v in vnames]))
12 | theil_y = Theil(y)
13 | numpy.testing.assert_almost_equal(
14 | theil_y.T,
15 | numpy.array(
16 | [
17 | 0.20894344,
18 | 0.15222451,
19 | 0.10472941,
20 | 0.10194725,
21 | 0.09560113,
22 | 0.10511256,
23 | 0.10660832,
24 | ]
25 | ),
26 | )
27 |
28 |
29 | class TestTheilD:
30 | def test___init__(self):
31 | f = libpysal.io.open(libpysal.examples.get_path("mexico.csv"))
32 | vnames = [f"pcgdp{dec}" for dec in range(1940, 2010, 10)]
33 | y = numpy.transpose(numpy.array([f.by_col[v] for v in vnames]))
34 | regimes = numpy.array(f.by_col("hanson98"))
35 | theil_d = TheilD(y, regimes)
36 | numpy.testing.assert_almost_equal(
37 | theil_d.bg,
38 | numpy.array(
39 | [
40 | 0.0345889,
41 | 0.02816853,
42 | 0.05260921,
43 | 0.05931219,
44 | 0.03205257,
45 | 0.02963731,
46 | 0.03635872,
47 | ]
48 | ),
49 | )
50 |
51 | y = numpy.array([0, 0, 0, 10, 10, 10])
52 | regions = numpy.array([0, 0, 0, 1, 1, 1])
53 | theil_d = TheilD(y, regions)
54 | numpy.testing.assert_almost_equal(theil_d.T, 0.6931471805599453)
55 | numpy.testing.assert_almost_equal(theil_d.bg, 0.6931471805599453)
56 | numpy.testing.assert_almost_equal(theil_d.wg, 0)
57 |
58 |
59 | class TestTheilDSim:
60 | def test___init__(self):
61 | f = libpysal.io.open(libpysal.examples.get_path("mexico.csv"))
62 | vnames = [f"pcgdp{dec}" for dec in range(1940, 2010, 10)]
63 | y = numpy.transpose(numpy.array([f.by_col[v] for v in vnames]))
64 | regimes = numpy.array(f.by_col("hanson98"))
65 | numpy.random.seed(10)
66 | theil_ds = TheilDSim(y, regimes, 999)
67 | numpy.testing.assert_almost_equal(
68 | theil_ds.bg_pvalue,
69 | numpy.array([0.4, 0.344, 0.001, 0.001, 0.034, 0.072, 0.032]),
70 | )
71 |
--------------------------------------------------------------------------------
/inequality/tests/test_wolfson.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from inequality.wolfson import lorenz_curve, wolfson
4 |
5 |
6 | def test_lorenz_curve():
7 | income = [1, 2, 3, 4, 5]
8 | population, cumulative_income = lorenz_curve(income)
9 |
10 | # Expected cumulative income values (calculated manually)
11 | expected_cumulative_income = np.array([0, 0.06666667, 0.2, 0.4, 0.66666667, 1])
12 |
13 | np.testing.assert_almost_equal(
14 | cumulative_income, expected_cumulative_income, decimal=6
15 | )
16 | # Should include start and end points (0 and 1)
17 | assert len(population) == 6
18 |
19 |
20 | def test_wolfson():
21 | income = [6, 6, 8, 8, 10, 10, 12, 12]
22 | wolfson_idx = wolfson(income)
23 | expected_wolfson_idx = 1 / 12
24 | assert np.isclose(wolfson_idx, expected_wolfson_idx, atol=0.01)
25 | income = [2, 4, 6, 8, 10, 12, 14, 16]
26 | wolfson_idx = wolfson(income)
27 | expected_wolfson_idx = 11 / 72
28 | assert np.isclose(wolfson_idx, expected_wolfson_idx, atol=0.01)
29 |
--------------------------------------------------------------------------------
/inequality/theil.py:
--------------------------------------------------------------------------------
1 | """Theil Inequality metrics"""
2 |
3 | __author__ = "Sergio J. Rey "
4 |
5 | import numpy
6 |
7 | __all__ = ["Theil", "TheilD", "TheilDSim"]
8 |
9 | SMALL = numpy.finfo("float").tiny
10 |
11 |
12 | class Theil:
13 | """
14 | Classic Theil measure of inequality.
15 |
16 | .. math::
17 |
18 | T = \\sum_{i=1}^n
19 | \\left( \\frac{y_i}{\\sum_{i=1}^n y_i} \\ln
20 | \\left[ N \\frac{y_i}{\\sum_{i=1}^n y_i}\\right]
21 | \\right
22 | )
23 |
24 | Parameters
25 | ----------
26 |
27 | y : numpy.array
28 | An array in the shape :math:`(n,t)` or :math:`(n,)`
29 | with :math:`n` taken as the observations across which inequality is
30 | calculated. If ``y`` is :math:`(n,)` then a scalar inequality value is
31 | determined. If ``y`` is :math:`(n,t)` then an array of inequality values are
32 | determined, one value for each column in ``y``.
33 |
34 | Attributes
35 | ----------
36 |
37 | T : numpy.array
38 | An array in the shape :math:`(t,)` or :math:`(1,)`
39 | containing Theil's *T* for each column of ``y``.
40 |
41 | Notes
42 | -----
43 | This computation involves natural logs. To prevent ``ln[0]`` from occurring, a
44 | small value is added to each element of ``y`` before beginning the computation.
45 |
46 | Examples
47 | --------
48 |
49 | >>> import libpysal
50 | >>> import numpy
51 | >>> from inequality.theil import Theil
52 |
53 | >>> f = libpysal.io.open(libpysal.examples.get_path('mexico.csv'))
54 | >>> vnames = [f'pcgdp{dec}' for dec in range(1940, 2010, 10)]
55 | >>> y = numpy.array([f.by_col[v] for v in vnames]).T
56 | >>> theil_y = Theil(y)
57 |
58 | >>> theil_y.T
59 | array([0.20894344, 0.15222451, 0.10472941, 0.10194725, 0.09560113,
60 | 0.10511256, 0.10660832])
61 |
62 | """
63 |
64 | def __init__(self, y):
65 | n = len(y)
66 | y = y + SMALL * (y == 0) # can't have 0 values
67 | yt = y.sum(axis=0)
68 | s = y / (yt * 1.0)
69 | lns = numpy.log(n * s)
70 | slns = s * lns
71 | t = sum(slns)
72 | self.T = t
73 |
74 |
75 | class TheilD:
76 | """Decomposition of Theil's *T* based on partitioning of
77 | observations into exhaustive and mutually exclusive groups.
78 |
79 | Parameters
80 | ----------
81 |
82 | y : numpy.array
83 | An array in the shape :math:`(n,t)` or :math:`(n,)`
84 | with :math:`n` taken as the observations across which inequality is
85 | calculated. If ``y`` is :math:`(n,)` then a scalar inequality value is
86 | determined. If ``y`` is :math:`(n,t)` then an array of inequality values are
87 | determined, one value for each column in ``y``.
88 | partition : numpy.array
89 | An array in the shape :math:`(n,)` of elements indicating which partition
90 | each observation belongs to. These are assumed to be exhaustive.
91 |
92 | Attributes
93 | ----------
94 |
95 | T : numpy.array
96 | An array in the shape :math:`(t,)` or :math:`(1,)`
97 | containing the global inequality *T*.
98 | bg : numpy.array
99 | An array in the shape :math:`(n,t)` or :math:`(n,)`
100 | representing between group inequality.
101 | wg : numpy.array
102 | An array in the shape :math:`(n,t)` or :math:`(n,)`
103 | representing within group inequality.
104 |
105 | Examples
106 | --------
107 |
108 | >>> import libpysal
109 | >>> import numpy
110 | >>> from inequality.theil import TheilD
111 |
112 | >>> f = libpysal.io.open(libpysal.examples.get_path('mexico.csv'))
113 | >>> vnames = [f'pcgdp{dec}' for dec in range(1940, 2010, 10)]
114 | >>> y = numpy.array([f.by_col[v] for v in vnames]).T
115 | >>> regimes = numpy.array(f.by_col('hanson98'))
116 | >>> theil_d = TheilD(y, regimes)
117 |
118 | >>> theil_d.bg
119 | array([0.0345889 , 0.02816853, 0.05260921, 0.05931219, 0.03205257,
120 | 0.02963731, 0.03635872])
121 |
122 | >>> theil_d.wg
123 | array([0.17435454, 0.12405598, 0.0521202 , 0.04263506, 0.06354856,
124 | 0.07547525, 0.0702496 ])
125 |
126 | """
127 |
128 | def __init__(self, y, partition):
129 | groups = numpy.unique(partition)
130 | T = Theil(y).T # noqa N806
131 | ytot = y.sum(axis=0)
132 |
133 | # group totals
134 | gtot = numpy.array([y[partition == gid].sum(axis=0) for gid in groups])
135 |
136 | if ytot.size == 1: # y is 1-d
137 | sg = gtot / (ytot * 1.0)
138 | sg.shape = (sg.size, 1)
139 | else:
140 | sg = numpy.dot(gtot, numpy.diag(1.0 / ytot))
141 | ng = numpy.array([sum(partition == gid) for gid in groups])
142 | ng.shape = (ng.size,) # ensure ng is 1-d
143 | n = y.shape[0]
144 | # between group inequality
145 | sg = sg + SMALL * (sg == 0) # can't have 0 values
146 |
147 | bg = numpy.multiply(sg, numpy.log(numpy.dot(numpy.diag(n * 1.0 / ng), sg))).sum(
148 | axis=0
149 | )
150 |
151 | self.T = T
152 | self.bg = bg
153 | self.wg = T - bg
154 |
155 |
156 | class TheilDSim:
157 | """Random permutation based inference on Theil's inequality decomposition.
158 | Provides for computationally based inference regarding the inequality
159 | decomposition using random spatial permutations.
160 | See :cite:`rey_interregional_2010`.
161 |
162 | Parameters
163 | ----------
164 |
165 | y : numpy.array
166 | An array in the shape :math:`(n,t)` or :math:`(n,)`
167 | with :math:`n` taken as the observations across which inequality is
168 | calculated. If ``y`` is :math:`(n,)` then a scalar inequality value is
169 | determined. If ``y`` is :math:`(n,t)` then an array of inequality values are
170 | determined, one value for each column in ``y``.
171 | partition : numpy.array
172 | An array in the shape :math:`(n,)` of elements indicating which partition
173 | each observation belongs to. These are assumed to be exhaustive.
174 | permutations : int
175 | The number of random spatial permutations for computationally
176 | based inference on the decomposition.
177 |
178 | Attributes
179 | ----------
180 |
181 | observed : numpy.array
182 | An array in the shape :math:`(n,t)` or :math:`(n,)`
183 | representing a ``TheilD`` instance for the observed data.
184 | bg : numpy.array
185 | An array in the shape ``(permutations+1, t)``
186 | representing between group inequality.
187 | bg_pvalue : numpy.array
188 | An array in the shape :math:`(t,1)` representing the :math:`p`-value
189 | for the between group measure. Measures the percentage of the realized
190 | values that were greater than or equal to the observed ``bg`` value.
191 | Includes the observed value.
192 | wg : numpy.array
193 | An array in the shape ``(permutations+1)``
194 | representing within group inequality. Depending on the
195 | shape of ``y``, the array may be 1- or 2-dimensional.
196 |
197 | Examples
198 | --------
199 |
200 | >>> import libpysal
201 | >>> import numpy
202 | >>> from inequality.theil import TheilDSim
203 |
204 | >>> f = libpysal.io.open(libpysal.examples.get_path('mexico.csv'))
205 | >>> vnames = [f'pcgdp{dec}' for dec in range(1940, 2010, 10)]
206 | >>> y = numpy.array([f.by_col[v] for v in vnames]).T
207 | >>> regimes = numpy.array(f.by_col('hanson98'))
208 | >>> numpy.random.seed(10)
209 | >>> theil_ds = TheilDSim(y, regimes, 999)
210 |
211 | >>> theil_ds.bg_pvalue
212 | array([0.4 , 0.344, 0.001, 0.001, 0.034, 0.072, 0.032])
213 |
214 | """
215 |
216 | def __init__(self, y, partition, permutations=99):
217 | observed = TheilD(y, partition)
218 | bg_ct = observed.bg == observed.bg # already have one extreme value
219 | bg_ct = bg_ct * 1.0
220 | results = [observed]
221 | for _ in range(permutations):
222 | yp = numpy.random.permutation(y)
223 | t = TheilD(yp, partition)
224 | bg_ct += 1.0 * t.bg >= observed.bg
225 | results.append(t)
226 | self.results = results
227 | self.T = observed.T
228 | self.bg_pvalue = bg_ct / (permutations * 1.0 + 1)
229 | self.bg = numpy.array([r.bg for r in results])
230 | self.wg = numpy.array([r.wg for r in results])
231 |
--------------------------------------------------------------------------------
/inequality/utils.py:
--------------------------------------------------------------------------------
1 | from functools import wraps
2 |
3 | import numpy as np
4 | import pandas as pd
5 |
6 |
7 | def consistent_input(func):
8 | @wraps(func)
9 | def wrapper(data, *args, column=None, **kwargs):
10 | # If input is a DataFrame, extract the specified column
11 | if isinstance(data, pd.DataFrame):
12 | if column is None:
13 | raise ValueError(
14 | "For DataFrame input, 'column' argument must be provided."
15 | )
16 | data = data[column].values
17 | # If input is a series, numpy array, or list, no transformation needed
18 | elif isinstance(data, pd.Series | np.ndarray | list):
19 | data = np.asarray(data)
20 | else:
21 | raise TypeError(
22 | "Input should be a sequence, numpy array, or pandas DataFrame."
23 | )
24 |
25 | return func(data, *args, **kwargs)
26 |
27 | return wrapper
28 |
29 |
30 | # Example function using the decorator
31 | @consistent_input
32 | def compute_mean(data):
33 | return np.mean(data)
34 |
35 |
36 | # Usage
37 | # df = pd.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
38 | # print(compute_mean(df, column="a")) # Output: 2.5
39 |
40 | # arr = np.array([1, 2, 3, 4])
41 | # print(compute_mean(arr)) # Output: 2.5
42 |
43 | # lst = [1, 2, 3, 4]
44 | # print(compute_mean(lst)) # Output: 2.5
45 |
--------------------------------------------------------------------------------
/inequality/wolfson.py:
--------------------------------------------------------------------------------
1 | """
2 | Wolfson Bipolarization Index Module
3 |
4 | This module provides functions to calculate the Lorenz curve, Gini coefficient,
5 | and Wolfson Bipolarization Index for a given distribution of income or wealth.
6 |
7 | Author:
8 | Serge Rey
9 | """
10 |
11 | import numpy as np
12 |
13 | from .gini import Gini
14 | from .utils import consistent_input
15 |
16 | __all__ = ["wolfson", "lorenz_curve"]
17 |
18 |
19 | @consistent_input
20 | def lorenz_curve(data):
21 | """
22 | Calculate the Lorenz curve for a given distribution.
23 |
24 | This function takes an income or wealth distribution as input. The input
25 | can be a sequence, a NumPy array, or a Pandas DataFrame. If a DataFrame
26 | is provided, the `column` parameter must be used to specify which column
27 | contains the income or wealth values.
28 |
29 | Parameters
30 | ----------
31 | data : array-like or array
32 | A sequence or NumPy array representing the income or
33 | wealth distribution.
34 |
35 | Returns
36 | -------
37 | tuple
38 | Two numpy arrays: the first represents the cumulative share of the
39 | population, and the second represents the cumulative share of
40 | the income/wealth.
41 |
42 | Example
43 | -------
44 | >>> income = [20000, 25000, 27000, 30000, 35000, 45000, 60000, 75000, 80000, 120000]
45 | >>> population, income_share = lorenz_curve(income)
46 | >>> print(population[:2], income_share[:2])
47 | [0. 0.1] [0. 0.03868472]
48 | """
49 | sorted_y = np.sort(data)
50 | cumulative_y = np.cumsum(sorted_y)
51 | cumulative_y = np.insert(cumulative_y, 0, 0)
52 | cumulative_y = cumulative_y / cumulative_y[-1]
53 | cumulative_population = np.linspace(0, 1, len(data) + 1)
54 | return cumulative_population, cumulative_y
55 |
56 |
57 | @consistent_input
58 | def wolfson(data):
59 | """
60 | Calculate the Wolfson Bipolarization Index for a given income distribution.
61 |
62 | This function takes an income distribution and calculates the Wolfson
63 | Bipolarization Index. The input can be a sequence or a NumPy array.
64 | The Wolfson index is constructed from the polarization curve, which is
65 | a rotation and rescaling of the Lorenz curve by the median income:
66 |
67 | .. math::
68 |
69 | W = (2D_{50} - G)\\frac{\\mu}{m}
70 |
71 | Where :math:`D_{50} =0.5 - L(0.5)`, :math:`L(0.5)` is the value of the
72 | Lorenz curve at the median, :math:`G` is the Gini index, :math:`\\mu`
73 | is the mean, and :math:`m` is the median.
74 |
75 | See: :cite:`wolfson1994WhenInequalities`.
76 |
77 | Parameters
78 | ----------
79 | data : array-like or array
80 | A sequence or NumPy array representing the income or
81 | wealth distribution.
82 |
83 | Returns
84 | -------
85 | float
86 | The Wolfson Bipolarization Index value.
87 |
88 | Example
89 | -------
90 | >>> import pandas as pd
91 | >>> income_distribution = [20000, 25000, 27000, 30000, 35000, 45000, 60000,
92 | ... 75000, 80000, 120000]
93 | >>> wolfson_index = wolfson(income_distribution)
94 | >>> print(f"Wolfson Bipolarization Index: {wolfson_index:.4f}")
95 | Wolfson Bipolarization Index: 0.2013
96 |
97 | >>> df = pd.DataFrame({'income': [6, 6, 8, 8, 10, 10, 12, 12]})
98 | >>> wolfson_index = wolfson(df, column='income')
99 | >>> print(f"Wolfson Bipolarization Index: {wolfson_index:.4f}")
100 | Wolfson Bipolarization Index: 0.0833
101 | """
102 | y = np.array(data)
103 | y_med = np.median(y)
104 | ordinate, lc = lorenz_curve(y)
105 | l50 = np.interp(0.5, ordinate, lc)
106 | d50 = 0.5 - l50
107 | rat = y.mean() / y_med
108 | g = Gini(y).g
109 | w = (2 * d50 - g) * rat
110 |
111 | return w
112 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 |
2 |
3 | [build-system]
4 | requires = ["setuptools>=61.0", "setuptools_scm[toml]>=6.2"]
5 | build-backend = "setuptools.build_meta"
6 |
7 | [tool.setuptools_scm]
8 |
9 | [project]
10 | name = "inequality"
11 | dynamic = ["version"]
12 | authors = [
13 | { name = "PySAL Developers", email = "pysal-dev@googlegroups.com" },
14 | ]
15 | maintainers = [{ name = "PySAL Developers" }]
16 | license = { text = "BSD 3-Clause" }
17 | description = "inequality: Spatial inequality analysis"
18 | keywords = ["spatial statistics", "spatial inequality"]
19 | readme = "README.md"
20 | classifiers = [
21 | "Programming Language :: Python :: 3",
22 | "License :: OSI Approved :: BSD License",
23 | "Operating System :: OS Independent",
24 | "Intended Audience :: Science/Research",
25 | "Topic :: Scientific/Engineering :: GIS",
26 | ]
27 | requires-python = ">=3.10"
28 | dependencies = [
29 | "libpysal>=4.5",
30 | "matplotlib>=3.6",
31 | "numpy>=1.23",
32 | "scipy>=1.8",
33 | ]
34 |
35 | [project.urls]
36 | Home = "https://github.com/pysal/inequality/"
37 | Repository = "https://github.com/pysal/inequality"
38 |
39 | [project.optional-dependencies]
40 | dev = [
41 | "pre-commit",
42 | "ruff",
43 | ]
44 | docs = [
45 | "nbsphinx",
46 | "numpydoc",
47 | "sphinx",
48 | "sphinxcontrib-bibtex",
49 | "sphinx-gallery",
50 | "sphinx_bootstrap_theme",
51 | "pydata-sphinx-theme"
52 | ]
53 | tests = [
54 | "codecov",
55 | "mapclassify",
56 | "jupyterlab",
57 | "folium",
58 | "pytest",
59 | "seaborn",
60 | "pytest-cov",
61 | "pytest-xdist",
62 | ]
63 | pen = [
64 | "matplotlib",
65 | "seaborn",
66 | "pandas",
67 | ]
68 | [tool.setuptools.packages.find]
69 | include = ["inequality", "inequality.*"]
70 |
71 |
72 | [tool.ruff]
73 | line-length = 88
74 |
75 | [tool.ruff.lint]
76 | select = ["E", "F", "W", "I", "UP", "N", "B", "A", "C4", "SIM", "ARG"]
77 | exclude = ["inequality/tests/*", "docs/*"]
78 |
79 | [tool.ruff.lint.per-file-ignores]
80 | "*__init__.py" = [
81 | "F401", # imported but unused
82 | ]
83 |
84 | [tool.coverage.run]
85 | source = ["./inequality"]
86 |
87 | [tool.coverage.report]
88 | exclude_lines = [
89 | "raise NotImplementedError",
90 | "except ModuleNotFoundError:",
91 | "except ImportError",
92 | ]
93 | ignore_errors = true
94 | omit = ["inequality/tests/*", "docs/conf.py"]
95 |
96 |
--------------------------------------------------------------------------------