├── .gitattributes ├── TODO.md ├── docs ├── requirements.txt ├── source │ ├── quickstart.rst │ ├── index.rst │ └── conf.py └── Makefile ├── src └── ineqpy │ ├── grouped │ ├── __init__.py │ ├── inequality.py │ └── stats.py │ ├── __init__.py │ ├── utils.py │ ├── statistics.py │ ├── _statistics.py │ ├── inequality.py │ └── api.py ├── examples ├── download_data.md ├── quick_start.py ├── alternatives_comparision.py └── quick_start.ipynb ├── pyrightconfig.json ├── .github └── workflows │ ├── docs.yml │ ├── cicd.yml │ ├── release.yml │ └── pypi.yml ├── tests ├── test_api.py ├── data │ ├── weightedXW.csv │ └── repXW.csv ├── test_statistics.py ├── test_inequality.py └── test_moments.ipynb ├── license.txt ├── Makefile ├── .gitignore ├── pyproject.toml └── README.md /.gitattributes: -------------------------------------------------------------------------------- 1 | ineqpy/_version.py export-subst 2 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | # TODO 2 | 3 | - [x] adopt uv 4 | - [x] update deps and python ver 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | sphinx-autoapi 3 | sphinx-rtd-theme 4 | myst-parser 5 | numpydoc 6 | -------------------------------------------------------------------------------- /docs/source/quickstart.rst: -------------------------------------------------------------------------------- 1 | 2 | .. include:: ../../README.md 3 | :parser: myst_parser.sphinx_ 4 | 5 | -------------------------------------------------------------------------------- /src/ineqpy/grouped/__init__.py: -------------------------------------------------------------------------------- 1 | """Grouped subpackage. 2 | 3 | Contains inequality and stats module. 4 | """ 5 | from ineqpy.grouped import inequality, stats 6 | 7 | __all__ = ["inequality", "stats"] 8 | -------------------------------------------------------------------------------- /src/ineqpy/__init__.py: -------------------------------------------------------------------------------- 1 | """IneqPy: A python package for inequality analysis.""" 2 | 3 | from ineqpy import api, grouped, inequality, statistics, utils 4 | 5 | 6 | __all__ = ["inequality", "statistics", "grouped", "api", "utils"] 7 | -------------------------------------------------------------------------------- /examples/download_data.md: -------------------------------------------------------------------------------- 1 | # Get Data 2 | 3 | When we start to work with Survey Data, could be hard find data to apply this kind of analysis. 4 | For this reason I let you a nice web from dowload data to start to work with. 5 | 6 | - ![Link to download data.](http://www.icpsr.umich.edu/icpsrweb/ICPSR/studies/4517) 7 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. IneqPy documentation master file, created by 2 | sphinx-quickstart on Mon Jan 17 19:56:36 2022. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to IneqPy's documentation! 7 | ================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | ./quickstart.rst 14 | 15 | Indices and tables 16 | ================== 17 | 18 | * :ref:`genindex` 19 | * :ref:`modindex` 20 | * :ref:`search` 21 | -------------------------------------------------------------------------------- /pyrightconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": [ 3 | "src" 4 | ], 5 | 6 | "exclude": [ 7 | "deps", 8 | ".venv", 9 | "**/node_modules", 10 | "**/__pycache__", 11 | "src/experimental", 12 | "src/typestubs" 13 | ], 14 | 15 | "ignore": [ 16 | "src/oldstuff" 17 | ], 18 | 19 | "stubPath": "src/stubs", 20 | "venv": ".venv", 21 | 22 | "reportMissingImports": true, 23 | "reportMissingTypeStubs": false, 24 | 25 | "pythonPlatform": "Linux", 26 | 27 | "executionEnvironments": [ 28 | { 29 | "root": "src" 30 | } 31 | ] 32 | } 33 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = source 8 | BUILDDIR = build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: Docs2Pages 2 | on: 3 | push: 4 | tags: 'ineqpy*' 5 | pull_request: 6 | branches: 7 | - master 8 | 9 | jobs: 10 | build-docs: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout 14 | uses: actions/checkout@v4 15 | with: 16 | fetch-depth: 0 17 | - uses: actions/setup-python@v5 18 | with: 19 | python-version: 3.8 20 | - name: Build documentation 21 | run: | 22 | make dev 23 | make pages 24 | - name: Deploy documentation 25 | if: ${{ github.event_name == 'push' }} 26 | uses: JamesIves/github-pages-deploy-action@4.1.4 27 | with: 28 | branch: gh-pages 29 | folder: gh-pages 30 | -------------------------------------------------------------------------------- /examples/quick_start.py: -------------------------------------------------------------------------------- 1 | # load packages 2 | import pandas as pd 3 | import ineqpy as ineq 4 | from pathlib import Path 5 | 6 | # inputs 7 | data_path = Path("ineq.__file__").parent / "examples/eusilc.csv" 8 | data = pd.read_csv(data_path, index_col=0).dropna() 9 | svy = ineq.api.Survey(data, weights="rb050") 10 | 11 | # In[3]: 12 | colname = "eqincome" 13 | svy.gini(colname) 14 | 15 | # In[4]: 16 | svy.atkinson(colname) 17 | 18 | # In[5]: 19 | svy.theil(colname) 20 | 21 | # In[6]: 22 | svy.mean(colname) 23 | 24 | # In[7]: 25 | svy.percentile(colname) 26 | 27 | # In[8]: 28 | svy.kurt(colname) 29 | 30 | # In[9]: 31 | svy.skew(colname) 32 | 33 | # In[10]: 34 | svy.lorenz(colname).plot(figsize=(5, 5)) 35 | 36 | # In[10]: 37 | # also works passing variables. 38 | x = data.eqincome 39 | w = data.rb050 40 | ineq.var(variable=x, weights=w) 41 | -------------------------------------------------------------------------------- /.github/workflows/cicd.yml: -------------------------------------------------------------------------------- 1 | name: CICD 2 | 3 | on: 4 | push: 5 | branches-ignore: 6 | - main 7 | 8 | jobs: 9 | 10 | test: 11 | runs-on: ubuntu-latest 12 | continue-on-error: ${{ matrix.experimental }} 13 | strategy: 14 | matrix: 15 | python-version: ["3.10", "3.11", "3.12"] 16 | experimental: [false] 17 | include: 18 | - python-version: "3.13" 19 | experimental: true 20 | steps: 21 | - uses: actions/checkout@v4 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v5 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install dependencies 27 | run: make dev 28 | - name: Lint with flake8 29 | run: make lint 30 | - name: Test with pytest 31 | run: make test 32 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | 6 | permissions: 7 | contents: write 8 | pull-requests: write 9 | 10 | name: release-please 11 | 12 | jobs: 13 | release-please: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: googleapis/release-please-action@v4 17 | with: 18 | # this assumes that you have created a personal access token 19 | # (PAT) and configured it as a GitHub action secret named 20 | # `MY_RELEASE_PLEASE_TOKEN` (this secret name is not important). 21 | token: ${{ secrets.GH_TOKEN }} 22 | # this is a built-in strategy in release-please, see "Action Inputs" 23 | # for more options 24 | # https://github.com/googleapis/release-please-action?tab=readme-ov-file#how-should-i-write-my-commits 25 | release-type: simple 26 | -------------------------------------------------------------------------------- /tests/test_api.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | import ineqpy 5 | 6 | 7 | def test_api(): 8 | # todo improve this test. 9 | # only checks that all methods works. 10 | svy = ineqpy.api.Survey 11 | data = np.random.randint(0, 100, (10, 3)) 12 | w = np.random.randint(1, 10, 10).reshape(-1, 1) 13 | data = np.hstack([data, w]) 14 | columns = list("abcw") 15 | 16 | df = svy(data=data, columns=columns, weights="w") 17 | df.mean("a") 18 | df.var("a") 19 | df.skew("a") 20 | df.kurt("a") 21 | df.gini("a") 22 | df.atkinson("a") 23 | df.theil("a") 24 | df.percentile("a") 25 | 26 | 27 | def test_df(): 28 | # GH #15 29 | LEN = 10 30 | values = [np.arange(LEN), np.random.randint(1, 10, LEN)] 31 | df = pd.DataFrame(values, index=["x", "n"]).T 32 | 33 | svy = ineqpy.api.Survey(df, df.index, df.columns, weights="n") 34 | svy.lorenz("x") 35 | -------------------------------------------------------------------------------- /.github/workflows/pypi.yml: -------------------------------------------------------------------------------- 1 | name: Upload Python Package 2 | on: 3 | release: 4 | types: [published] 5 | permissions: 6 | contents: read 7 | 8 | jobs: 9 | 10 | release: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | - name: Setup venv 15 | if: github.event_name == 'release' && github.event.action == 'created' 16 | uses: actions/setup-python@v4 17 | with: 18 | python-version: '3.8' 19 | - name: Install dependencies 20 | run: make install 21 | - name: Build package 22 | if: github.event_name == 'release' && github.event.action == 'created' 23 | run: make build 24 | - name: Publish package 25 | if: github.event_name == 'release' && github.event.action == 'created' 26 | # uses: pypa/gh-action-pypi-publish@release/v1 27 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 28 | with: 29 | user: __token__ 30 | password: ${{ secrets.PYPI_API_TOKEN }} 31 | -------------------------------------------------------------------------------- /tests/data/weightedXW.csv: -------------------------------------------------------------------------------- 1 | ,x,w 2 | 0,400,2 3 | 1,16,6 4 | 2,104,2 5 | 3,966,1 6 | 4,682,8 7 | 5,296,7 8 | 6,505,5 9 | 7,384,7 10 | 8,984,2 11 | 9,954,4 12 | 10,201,4 13 | 11,583,6 14 | 12,391,4 15 | 13,340,3 16 | 14,412,8 17 | 15,835,2 18 | 16,937,7 19 | 17,705,1 20 | 18,969,4 21 | 19,961,6 22 | 20,909,1 23 | 21,933,2 24 | 22,243,7 25 | 23,986,7 26 | 24,467,7 27 | 25,231,3 28 | 26,797,4 29 | 27,723,1 30 | 28,401,2 31 | 29,128,1 32 | 30,347,7 33 | 31,826,4 34 | 32,273,3 35 | 33,19,3 36 | 34,381,6 37 | 35,912,5 38 | 36,616,4 39 | 37,547,4 40 | 38,453,8 41 | 39,442,2 42 | 40,28,5 43 | 41,482,6 44 | 42,690,7 45 | 43,717,4 46 | 44,908,3 47 | 45,28,5 48 | 46,691,2 49 | 47,906,6 50 | 48,631,4 51 | 49,109,7 52 | 50,408,8 53 | 51,340,8 54 | 52,497,6 55 | 53,743,3 56 | 54,242,7 57 | 55,7,5 58 | 56,505,1 59 | 57,133,4 60 | 58,359,6 61 | 59,496,8 62 | 60,166,5 63 | 61,176,5 64 | 62,827,4 65 | 63,904,3 66 | 64,738,7 67 | 65,315,4 68 | 66,255,6 69 | 67,655,3 70 | 68,6,3 71 | 69,512,1 72 | 70,488,2 73 | 71,324,7 74 | 72,191,3 75 | 73,869,5 76 | 74,421,3 77 | -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) [year] [fullname] 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | 3 | ##@ Utility 4 | .PHONY: help 5 | help: ## Display this help 6 | @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) 7 | 8 | 9 | .PHONY: venv 10 | uv: ## Install uv 11 | @command -v uv >/dev/null 2>&1 || curl -LsSf https://astral.sh/uv/install.sh | sh 12 | 13 | .PHONY: dev 14 | dev: uv ## Install dev dependencies 15 | uv sync --dev 16 | 17 | .PHONY: install 18 | install: uv ## Install dependencies 19 | uv sync 20 | 21 | .PHONY: test 22 | test: ## Run tests 23 | uv run pytest 24 | 25 | .PHONY: lint 26 | lint: ## Run linters 27 | uv run ruff check ./src ./tests 28 | 29 | .PHONY: fix 30 | fix: ## Fix lint errors 31 | uv run ruff check ./src ./tests --fix 32 | 33 | .PHONY: cov 34 | cov: ## Run tests with coverage 35 | uv run pytest --cov=src --cov-report=term-missing 36 | 37 | .PHONY: pages 38 | pages: doc ## Build documentation and push to gh-pages 39 | mkdir gh-pages 40 | touch gh-pages/.nojekyll 41 | cp -r docs/build/html/* gh-pages/ 42 | 43 | .PHONY: doc 44 | doc: ## Build documentation 45 | cd docs && uv run make html 46 | 47 | .PHONY: build 48 | build: ## Build package 49 | uv build 50 | 51 | .PHONY: dbash 52 | dbash: ## Run docker 53 | docker run -v ${PWD}:/git/$(shell basename ${PWD}) -w /git/$(shell basename ${PWD}) -it python:3.12 /bin/bash 54 | -------------------------------------------------------------------------------- /tests/test_statistics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.testing as nptest 3 | import pytest 4 | import scipy.stats as sc 5 | 6 | from ineqpy import statistics, utils 7 | 8 | 9 | def gen_inputs(n_tuples=100): 10 | for _ in range(n_tuples): 11 | (x, w) = utils.generate_data_to_test((3, 7)) 12 | 13 | # NOBUG: _ is `repeated_w` which is a vector of ones. 14 | repeated_x, _ = utils.repeat_data_from_weighted(x, w) 15 | yield x, w, repeated_x 16 | 17 | 18 | @pytest.mark.parametrize("x,w,r_x", gen_inputs()) 19 | def test_mean(x, w, r_x): 20 | real = np.mean(r_x) 21 | obtained = statistics.mean(x, w) 22 | nptest.assert_almost_equal(obtained, real) 23 | 24 | 25 | @pytest.mark.parametrize("x,w,r_x", gen_inputs()) 26 | def test_variance(x, w, r_x): 27 | real = np.var(r_x) 28 | obtained = statistics.var(x, w) 29 | nptest.assert_almost_equal(obtained, real) 30 | 31 | 32 | @pytest.mark.parametrize("x,w,r_x", gen_inputs()) 33 | def test_kurt(x, w, r_x): 34 | real = sc.kurtosis(r_x) + 3 35 | obtained = statistics.kurt(x, w) 36 | nptest.assert_almost_equal(obtained, real) 37 | 38 | 39 | @pytest.mark.parametrize("x,w,r_x", gen_inputs()) 40 | def test_skew(x, w, r_x): 41 | real = sc.skew(r_x) 42 | obtained = statistics.skew(x, w) 43 | nptest.assert_almost_equal(obtained, real) 44 | 45 | 46 | @pytest.mark.parametrize("x,w,r_x", gen_inputs()) 47 | def test_coef_variation(x, w, r_x): 48 | real = np.var(r_x) ** 0.5 / abs(np.mean(r_x)) 49 | obtained = statistics.coef_variation(x, w) 50 | nptest.assert_almost_equal(obtained, real) 51 | 52 | 53 | @pytest.mark.parametrize("x,w,r_x", gen_inputs()) 54 | def test_percentile(x, w, r_x): 55 | p = 50 56 | real = np.percentile(r_x, p, method="lower") 57 | obtained = statistics.percentile(x, w, p=p) 58 | nptest.assert_almost_equal( 59 | obtained, real, err_msg=msg(real, obtained, r_x, x, w) 60 | ) 61 | 62 | 63 | def msg(real, obtained, r_x, x, w): 64 | if abs(real - obtained) > 1e-6: 65 | return f"\nr_x = {str(r_x)}\nx = {str(x)}\nw = {str(w)}" 66 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | \.idea/ 3 | 4 | *\.egg-info/ 5 | 6 | */__pycache__/ 7 | 8 | */\.ipynb_checkpoints/ 9 | 10 | *.pyc 11 | 12 | \.ropeproject/ 13 | 14 | *.sublime* 15 | 16 | .cache/ 17 | 18 | # Created by https://www.gitignore.io/api/python 19 | # Edit at https://www.gitignore.io/?templates=python 20 | 21 | ### Python ### 22 | # Byte-compiled / optimized / DLL files 23 | __pycache__/ 24 | *.py[cod] 25 | *$py.class 26 | 27 | # C extensions 28 | *.so 29 | 30 | # Distribution / packaging 31 | .Python 32 | build/ 33 | develop-eggs/ 34 | dist/ 35 | downloads/ 36 | eggs/ 37 | .eggs/ 38 | lib/ 39 | lib64/ 40 | parts/ 41 | sdist/ 42 | var/ 43 | wheels/ 44 | share/python-wheels/ 45 | *.egg-info/ 46 | .installed.cfg 47 | *.egg 48 | MANIFEST 49 | 50 | # PyInstaller 51 | # Usually these files are written by a python script from a template 52 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 53 | *.manifest 54 | *.spec 55 | 56 | # Installer logs 57 | pip-log.txt 58 | pip-delete-this-directory.txt 59 | 60 | # Unit test / coverage reports 61 | htmlcov/ 62 | .tox/ 63 | .nox/ 64 | .coverage 65 | .coverage.* 66 | .cache 67 | nosetests.xml 68 | coverage.xml 69 | *.cover 70 | .hypothesis/ 71 | .pytest_cache/ 72 | 73 | # Translations 74 | *.mo 75 | *.pot 76 | 77 | # Django stuff: 78 | *.log 79 | local_settings.py 80 | db.sqlite3 81 | 82 | # Flask stuff: 83 | instance/ 84 | .webassets-cache 85 | 86 | # Scrapy stuff: 87 | .scrapy 88 | 89 | # Sphinx documentation 90 | docs/_build/ 91 | 92 | # PyBuilder 93 | target/ 94 | 95 | # Jupyter Notebook 96 | .ipynb_checkpoints 97 | 98 | # IPython 99 | profile_default/ 100 | ipython_config.py 101 | 102 | # pyenv 103 | .python-version 104 | 105 | # celery beat schedule file 106 | celerybeat-schedule 107 | 108 | # SageMath parsed files 109 | *.sage.py 110 | 111 | # Environments 112 | .env 113 | .venv 114 | env/ 115 | venv/ 116 | ENV/ 117 | env.bak/ 118 | venv.bak/ 119 | 120 | # Spyder project settings 121 | .spyderproject 122 | .spyproject 123 | 124 | # Rope project settings 125 | .ropeproject 126 | 127 | # mkdocs documentation 128 | /site 129 | 130 | # mypy 131 | .mypy_cache/ 132 | .dmypy.json 133 | dmypy.json 134 | 135 | # Pyre type checker 136 | .pyre/ 137 | 138 | ### Python Patch ### 139 | .venv/ 140 | 141 | # End of https://www.gitignore.io/api/python 142 | **/.DS_Store 143 | .autoenv 144 | Session.vim 145 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel", "setuptools_scm"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "IneqPy" 7 | dynamic = ["version"] 8 | description = "A Python Package To Quantitative Analysis Of Inequality" 9 | readme = "README.md" 10 | authors = [ 11 | {name = "Max", email = "ineqpy@mxm.mozmail.com"}, 12 | ] 13 | license = {text = "MIT"} 14 | requires-python = ">=3.10" 15 | classifiers = [ 16 | "Intended Audience :: Science/Research", 17 | "License :: OSI Approved :: MIT License", 18 | "Programming Language :: Python :: 3", 19 | "Programming Language :: Python :: 3.10", 20 | "Programming Language :: Python :: 3.11", 21 | "Programming Language :: Python :: 3.12", 22 | "Programming Language :: Python :: 3.13", 23 | ] 24 | dependencies = [ 25 | "numpy", 26 | "pandas", 27 | "numba", 28 | "scipy>=1.14.1", 29 | ] 30 | 31 | [project.urls] 32 | Homepage = "https://github.com/asdf8601/IneqPy" 33 | 34 | [tool.setuptools] 35 | package-dir = {"" = "src"} 36 | 37 | [tool.setuptools_scm] 38 | 39 | [tool.setuptools.packages.find] 40 | where = ["src"] 41 | 42 | 43 | [tool.pyright] 44 | include = ["src"] 45 | exclude = ["**/node_modules", 46 | "**/__pycache__", 47 | "src/experimental", 48 | "deps", 49 | "src/typestubs" 50 | ] 51 | 52 | [tool.lint.ignore] 53 | ignore = ["src/oldstuff"] 54 | stubPath = "src/stubs" 55 | # venv = "env367" 56 | reportMissingImports = true 57 | reportMissingTypeStubs = false 58 | pythonPlatform = "Linux" 59 | 60 | executionEnvironments = [ 61 | { root = "src" } 62 | ] 63 | 64 | [tool.ruff.format] 65 | quote-style = "preserve" 66 | 67 | [tool.ruff.lint] 68 | select = [ 69 | 'F', 70 | 'E', 71 | 'W', 72 | 'UP', 73 | 'NPY201', 74 | ] 75 | ignore = [ 76 | 'E501', 77 | 'E741', 78 | 'E712', 79 | 'E721', 80 | 'UP038', # non-pep604-isinstance -- https://github.com/astral-sh/ruff/issues/7871 81 | ] 82 | 83 | [tool.ruff.lint.per-file-ignores] 84 | "__init__.py" = ["F401"] 85 | "**/__init__.py" = [ 86 | 'E402', 87 | 'F401', 88 | 'F403', 89 | 'F405', 90 | ] 91 | "**/__init__.pyi" = [ 92 | 'E402', 93 | 'F401', 94 | 'F403', 95 | 'F405', 96 | ] 97 | "skimage/_shared/testing.py" = ['F401'] 98 | "doc/examples/**/*.py" = ['E402'] 99 | 100 | [tool.ruff.lint.pydocstyle] 101 | convention = 'numpy' 102 | 103 | [tool.ruff.lint.isort] 104 | known-first-party = ["ineqpy"] 105 | combine-as-imports = true 106 | 107 | [tool.uv] 108 | dev-dependencies = [ 109 | "myst-parser>=4.0.0", 110 | "numpydoc>=1.8.0", 111 | "pytest-cov>=6.0.0", 112 | "pytest>=8.3.3", 113 | "ruff>=0.7.4", 114 | "sphinx-autoapi>=3.3.3", 115 | "sphinx-rtd-theme>=3.0.2", 116 | "sphinx>=8.1.3", 117 | ] 118 | -------------------------------------------------------------------------------- /src/ineqpy/grouped/inequality.py: -------------------------------------------------------------------------------- 1 | """inequality module.""" 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | from ineqpy import _statistics as stats, inequality as ineq, utils 7 | 8 | 9 | def atkinson_group( 10 | data: pd.DataFrame = None, 11 | income: str | pd.DataFrame | np.ndarray = None, 12 | weights: str | pd.DataFrame | np.ndarray = None, 13 | group: str | pd.DataFrame | np.ndarray = None, 14 | e: float = 0.5, 15 | ): 16 | r"""Calculate atkinson index. 17 | 18 | The Atkinson index (also known as the Atkinson measure or Atkinson 19 | grouped measure) is a measure of income grouped developed by British 20 | economist Anthony Barnes Atkinson. The measure is useful in determining 21 | which end of the distribution contributed most to the observed grouped.The 22 | index is subgroup decomposable. This means that overall grouped in the 23 | population can be computed as the sum of the corresponding Atkinson indices 24 | within each group, and the Atkinson index of the group mean incomes. 25 | 26 | Parameters 27 | ---------- 28 | income : str or np.array 29 | Income variable, you can pass name of variable in `df` or array-like 30 | weights : str or np.array 31 | probability or weights, you can pass name of variable in `df` or 32 | array-like 33 | groups : str or np.array 34 | stratum, name of stratum in `df` or array-like 35 | e : int, optional 36 | Value of epsilon parameter 37 | data : pd.DataFrame, optional 38 | DataFrame that's contains the previous data. 39 | 40 | Returns 41 | ------- 42 | atkinson_by_group : float 43 | 44 | Reference 45 | --------- 46 | Atkinson index. (2017, March 12). In Wikipedia, The Free Encyclopedia. 47 | Retrieved 14:52, May 15, 2017, from 48 | https://en.wikipedia.org/w/index.php?title=Atkinson_index&oldid=769991852 49 | 50 | TODO 51 | ---- 52 | - Review function, has different results with stata. 53 | """ 54 | if (weights is None) and (data is None): 55 | weights = utils.not_empty_weights(weights, income) 56 | 57 | if data is None: 58 | data = utils._to_df(income=income, weights=weights, group=group) 59 | income = "income" 60 | weights = "weights" 61 | group = "group" 62 | 63 | N = data.shape[0] 64 | 65 | def a_h(df): 66 | """Funtion alias to calculate atkinson from a DataFrame.""" 67 | if df is None: 68 | raise ValueError 69 | 70 | inc = df[income].values 71 | w = df[weights].values 72 | atk = ineq.atkinson(income=inc, weights=w, e=e) 73 | out = atk * (len(df) / N) 74 | 75 | return out 76 | 77 | # main calc: 78 | data = data.copy() 79 | groupped = data.groupby(group) 80 | atk_by_group = groupped.apply(a_h) 81 | mu_by_group = groupped.apply(lambda d: stats.mean(d[income], d[weights])) 82 | out = atk_by_group.sum() + ineq.atkinson(income=mu_by_group.values) 83 | 84 | return out 85 | -------------------------------------------------------------------------------- /src/ineqpy/utils.py: -------------------------------------------------------------------------------- 1 | """Useful functions that make easier develop other functions.""" 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | 7 | def _to_df(*args, **kwargs) -> pd.DataFrame: 8 | res = pd.DataFrame() 9 | 10 | if args != (): 11 | res = pd.DataFrame([*args]).T 12 | 13 | if kwargs is not None: 14 | df = pd.DataFrame.from_dict(kwargs, orient="columns") 15 | if res.empty: 16 | res = df 17 | else: 18 | res = pd.concat([res, df], axis=1) 19 | 20 | return res 21 | 22 | 23 | def _apply_to_df(func, df, x, weights, *args, **kwargs): 24 | """Generalize main arguments as Series of a pd.Dataframe. 25 | 26 | Parameters 27 | ---------- 28 | func : function 29 | Function to convert his arguments in Series of an Dataframe. 30 | df : pandas.Dataframe 31 | DataFrame whats contains the Series `x_name` and `w_name`. 32 | x_name : str 33 | Name of the column in `df`. 34 | weights_name : str 35 | Name of the column in `df`. 36 | 37 | Returns 38 | ------- 39 | return : func return 40 | It's depends of func output type. 41 | """ 42 | return func(df[x], df[weights], *args, **kwargs) 43 | 44 | 45 | def not_empty_weights(weights, like): 46 | """Create weights. 47 | 48 | Create normalized weight if it's None use like to create it. 49 | 50 | Parameters 51 | ---------- 52 | income, like : array-like 53 | 54 | Returns 55 | ------- 56 | weights : array-like 57 | Filtered array-like. 58 | 59 | See Also 60 | -------- 61 | normalize 62 | """ 63 | if weights is not None: 64 | return normalize(weights.copy()) 65 | 66 | return np.ones_like(like) 67 | 68 | 69 | def not_null_condition(income, weights): 70 | """Filter not null condition. 71 | 72 | If a negative value is found in the incomes it will dropped. 73 | 74 | Parameters 75 | ---------- 76 | income, weights : array-like 77 | 78 | Returns 79 | ------- 80 | income, weights : array-like 81 | Filtered array-like. 82 | """ 83 | if np.any(income <= 0): 84 | mask = income > 0 85 | income = income[mask] 86 | if weights is not None: 87 | weights = weights[mask] 88 | 89 | return income, weights 90 | 91 | 92 | def _sort_values(values, partner): 93 | idx_sort = np.argsort(values, axis=0).squeeze() 94 | values = values[idx_sort] 95 | partner = partner[idx_sort] 96 | return values, partner 97 | 98 | 99 | def _clean_nans_values(this, pair): 100 | if np.any(np.isnan(this)): 101 | idx = ~np.isnan(this) 102 | this = this[idx] 103 | pair = pair[idx] 104 | return this, pair 105 | 106 | 107 | def normalize(this): 108 | """Normalize data by the sum. 109 | 110 | Parameters 111 | ---------- 112 | this : array-like 113 | 114 | Returns 115 | ------- 116 | out : array-like 117 | """ 118 | return this / np.sum(this) 119 | 120 | 121 | def extract_values(data, variable, weights): 122 | """Extract values. 123 | 124 | Parameters 125 | ---------- 126 | data : pandas.DataFrame 127 | variable : str 128 | weights : str 129 | 130 | Returns 131 | ------- 132 | variable, weights : array-like 133 | """ 134 | if data is not None: 135 | variable = data.loc[:, variable].values 136 | weights = not_empty_weights( 137 | data.loc[:, weights].values, like=variable 138 | ) 139 | return variable, weights 140 | 141 | 142 | def repeat_data_from_weighted(x, w): 143 | """Generate data data (not sampled) from weights. 144 | 145 | Parameters 146 | ---------- 147 | x, w : array-like 148 | 149 | Returns 150 | ------- 151 | repeated_x, repeated_w : np.array 152 | """ 153 | if isinstance(w[0], float): 154 | raise NotImplementedError 155 | 156 | repeated_x = np.array([]) 157 | repeated_w = np.array([]) 158 | 159 | for xi, wi in zip(x, w, strict=False): 160 | repeated_x = np.append(repeated_x, np.repeat(xi, wi)) 161 | repeated_w = np.append(repeated_w, np.ones(wi)) 162 | 163 | return repeated_x, repeated_w 164 | 165 | 166 | def generate_data_to_test(n_sample_range=(20, 100)): 167 | """Generate sampled data for testing. 168 | 169 | Parameters 170 | ---------- 171 | n_sample_range : tuple[int, int] 172 | It's a shape, lenght and columns. 173 | 174 | Returns 175 | ------- 176 | income, weights : np.array 177 | """ 178 | N_sample = np.random.randint(*n_sample_range) 179 | weighted_x = np.random.randint(0, 1000, N_sample) 180 | weights = np.random.randint(1, 9, N_sample) 181 | return weighted_x, weights 182 | -------------------------------------------------------------------------------- /src/ineqpy/grouped/stats.py: -------------------------------------------------------------------------------- 1 | """Stats' module.""" 2 | from ineqpy import utils 3 | from ineqpy._statistics import c_moment, mean, std_moment 4 | 5 | 6 | def variance_hat_group(data=None, variable="x", weights="w", group="h"): 7 | """Calculate variance. 8 | 9 | Data a DataFrame calculates the sample variance for each stratum. The 10 | objective of this function is to make it easy to calculate the moments of 11 | the distribution that follows an estimator, eg. Can be used to calculate 12 | the variance that follows the mean. 13 | 14 | Parameters 15 | ---------- 16 | data : pandas.DataFrame 17 | Dataframe containing the series needed for the calculation 18 | x : str 19 | weights : str 20 | Name of the weights `w` in the DataFrame 21 | group : str 22 | Name of the stratum variable `h` in the DataFrame 23 | 24 | Returns 25 | ------- 26 | vhat_h : pandas.Series 27 | A series with the values of the variance of each `h` stratum. 28 | 29 | Todo 30 | ---- 31 | Review improvements. 32 | 33 | Examples 34 | -------- 35 | >>> # Computes the variance of the mean 36 | >>> data = pd.DataFrame(data=[renta, peso, estrato], 37 | columns=["renta", "peso", "estrato"]) 38 | >>> v = variance_hat_group(data) 39 | >>> v 40 | stratum 41 | 1 700.917.728,64 42 | 2 9.431.897.980,96 43 | 3 317.865.839.789,10 44 | 4 741.304.873.092,88 45 | 5 535.275.436.859,10 46 | 6 225.573.783.240,68 47 | 7 142.048.272.010,63 48 | 8 40.136.989.131,06 49 | 9 18.501.808.022,56 50 | dtype: float64 51 | 52 | >>> # the value of de variance of the mean: 53 | >>> v_total = v.sum() / peso.sum() ** 2 54 | 24662655225.947945 55 | """ 56 | if data is None: 57 | data = utils._to_df(x=variable, weights=weights, group=group) 58 | variable = "x" 59 | weights = "weights" 60 | group = "group" 61 | 62 | def v(df): 63 | r"""Calculate the variance of each stratum `h`. 64 | 65 | Parameters 66 | ---------- 67 | df : pandas.DataFrame 68 | Dataframe containing the data. 69 | 70 | Returns 71 | ------- 72 | vhat : float 73 | Value of the population variance for the stratum `h`. 74 | 75 | Notes 76 | ----- 77 | Source: 78 | .. math:: r`N_h ^2 \cdot fpc \cdot \frac{ \hatS ^2 _h }{n_h}` 79 | """ 80 | xi = df[variable].copy().values 81 | Nh = df[weights].sum() 82 | fpc = 1 - (len(df) / Nh) 83 | ddof = 1 if len(df) > 1 else 0 84 | shat2h = c_moment(variable=xi, order=2, ddof=ddof) 85 | return (Nh ** 2) * fpc * shat2h / len(df) 86 | 87 | return data.groupby(group).apply(v) 88 | 89 | 90 | def moment_group(data=None, variable="x", weights="w", group="h", order=2): 91 | """Calculate the asymmetry of each `h` stratum. 92 | 93 | Parameters 94 | ---------- 95 | variable : array or str 96 | weights : array or str 97 | group : array or str 98 | data : pd.DataFrame, optional 99 | order : int, optional 100 | 101 | Returns 102 | ------- 103 | moment_of_order : float 104 | 105 | TODO 106 | ---- 107 | Review calculations, it does not appear to be correct. 108 | Attempt to make a generalization of vhat_group, for any estimator. 109 | 110 | .. warning:: Actually Does Not Work! 111 | """ 112 | if data is None: 113 | data = utils._to_df(x=variable, weights=weights, group=group) 114 | variable = "x" 115 | weights = "weights" 116 | group = "group" 117 | 118 | def mh(df, weights=weights): 119 | x = df[variable].copy().values 120 | weights = utils.not_empty_weights(weights, x) 121 | Nh = df.loc[:, weights].sum() 122 | fpc = 1 - (len(df) / Nh) 123 | ddof = 1 if len(df) > 1 else 0 124 | stdm = std_moment(variable=x, weights=weights, order=order, ddof=ddof) 125 | return (Nh ** order) * fpc * stdm / len(df) 126 | 127 | return data.groupby(group).apply(mh) 128 | 129 | 130 | def quasivariance_hat_group( 131 | data=None, variable=None, weights=None, group=None 132 | ): 133 | """Calculate quasivariance. 134 | 135 | Sample variance of `variable`, calculated as the second-order central 136 | moment. 137 | 138 | Parameters 139 | ---------- 140 | data : pd.DataFrame, optional 141 | pd.DataFrame that contains all variables needed. 142 | variable : array or str 143 | variable `x` apply the statistic. If `data` is None then must pass this 144 | argument as array, else as string name in `data` 145 | weights : array or str 146 | weights can be interpreted as frequency, probability, 147 | density function of `x`, each element in `x`. If `data` is None then 148 | must pass this argument as array, else as string name in `data` 149 | group : array or str 150 | group is a categorical variable to calculate the statistical by each 151 | group. If `data` is None then must pass this argument as array, else as 152 | string name in `data` 153 | 154 | Returns 155 | ------- 156 | shat2_group : array or pd.Series 157 | 158 | References 159 | ---------- 160 | Moment (mathematics). (2017, May 6). In Wikipedia, The Free Encyclopedia. 161 | Retrieved 14:40, May 15, 2017, from 162 | https://en.wikipedia.org/w/index.php?title=Moment_(mathematics)&oldid=778996402 163 | 164 | Notes 165 | ----- 166 | This function is useful to calculate the variance of the mean. 167 | 168 | TODO 169 | ---- 170 | Review function 171 | """ 172 | if data is None: 173 | data = utils._to_df(x=variable, weights=weights) 174 | variable = "x" 175 | weights = "weights" 176 | 177 | def sd(df): 178 | x = variable 179 | return c_moment(variable=x, weights=weights, param=mean(x)) 180 | 181 | return data.groupby(group).apply(sd) 182 | -------------------------------------------------------------------------------- /tests/test_inequality.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from ineqpy import inequality 5 | 6 | 7 | def test_concentration_0d(): 8 | x = np.array([100]) 9 | 10 | obtained = inequality.concentration(income=x) 11 | 12 | assert np.isnan(obtained) 13 | 14 | def test_gini_2d(): 15 | x = np.array([[57], [63], [81], [79], [88], [57], [42], [3], [77], [89]]) 16 | w = np.array([[2], [5], [2], [9], [5], [7], [4], [5], [9], [9]]) 17 | obtained = inequality.gini(income=x, weights=w) 18 | expected = 0.2134389018024818 19 | assert obtained==expected 20 | 21 | 22 | def test_gini_1d(): 23 | x = np.array([57, 63, 81, 79, 88, 57, 42, 3, 77, 89]) 24 | w = np.array([2, 5, 2, 9, 5, 7, 4, 5, 9, 9]) 25 | obtained = inequality.gini(income=x, weights=w) 26 | expected = 0.2134389018024818 27 | assert obtained==expected 28 | 29 | 30 | def test_gini_1d_0_w(): 31 | x = np.array([2, 2]) 32 | w = np.array([1000000, 1]) 33 | obtained = inequality.gini(income=x, weights=w) 34 | expected = 0 35 | assert obtained==expected 36 | 37 | 38 | def test_gini_1d_0_series(): 39 | x = np.array([2, 2]) 40 | # w = np.array([1000000, 1]) 41 | obtained = inequality.gini(income=x) 42 | expected = 0 43 | assert obtained==expected 44 | 45 | 46 | def test_gini_1d_1_series(): 47 | x = np.array([0, 1]) 48 | # w = np.array([1000000, 1]) 49 | obtained = inequality.gini(income=x) 50 | expected = 1 51 | assert obtained==expected 52 | 53 | 54 | def test_gini_1d_1_w(): 55 | x = np.array([0, 1]) 56 | w = np.array([1, 1]) 57 | obtained = inequality.gini(income=x, weights=w) 58 | expected = 1 59 | assert obtained==expected 60 | 61 | 62 | def test_atkinson_2d(): 63 | x = np.array([[57], [63], [81], [79], [88], [57], [42], [3], [77], [89]]) 64 | w = np.array([[2], [5], [2], [9], [5], [7], [4], [5], [9], [9]]) 65 | obtained = inequality.atkinson(income=x, weights=w) 66 | expected = 0.06537929778911322 67 | assert obtained==expected 68 | 69 | 70 | def test_atkinson_1d(): 71 | x = np.array([57, 63, 81, 79, 88, 57, 42, 3, 77, 89]) 72 | w = np.array([2, 5, 2, 9, 5, 7, 4, 5, 9, 9]) 73 | obtained = inequality.atkinson(income=x, weights=w) 74 | expected = 0.06537929778911322 75 | assert obtained==expected 76 | 77 | 78 | def test_atkinson_1d_1_w(): 79 | x = np.array([1, 1]) 80 | w = np.array([1, 1]) 81 | obtained = inequality.atkinson(income=x, weights=w) 82 | expected = 0 83 | assert obtained==expected 84 | 85 | def test_theil_1d_series(): 86 | """ Testing theil with no weights. Every value is the same """ 87 | x = np.repeat(5, 10) 88 | 89 | obtained = inequality.theil(income=x) 90 | expected = 0 91 | 92 | np.testing.assert_almost_equal(obtained, expected) 93 | 94 | def test_theil_1d_series_2(): 95 | x = np.arange(1, 10) 96 | 97 | obtained = inequality.theil(income=x) 98 | expected = 0.1473838569435545 99 | 100 | np.testing.assert_almost_equal(obtained, expected) 101 | 102 | def test_theil_1d_1_w(): 103 | # TODO check this 104 | x = np.array([1, 1]) 105 | w = np.array([1, 1]) 106 | obtained = inequality.theil(income=x, weights=w) 107 | expected = 0 108 | assert obtained==expected 109 | 110 | def test_ratio_equality(): 111 | x = np.array([1, 9]) 112 | w = np.array([9, 1]) 113 | obtained = inequality.top_rest(income=x, weights=w) 114 | assert obtained == 1.0 115 | 116 | def test_ratio_equality_fracc(): 117 | x = np.array([1, 9]) 118 | w = np.array([.9, .1]) 119 | obtained = inequality.top_rest(income=x, weights=w) 120 | assert obtained == 1.0 121 | 122 | def test_ratio_0d(): 123 | x = np.array([100]) 124 | obtained = inequality.top_rest(income=x) 125 | 126 | assert np.isnan(obtained) 127 | 128 | def test_ratio_1d(): 129 | x = np.array([57, 63, 81, 79, 88, 42, 3, 77, 89]) 130 | w = np.array([9, 5, 2, 9, 5, 4, 5, 9, 9]) 131 | obtained = inequality.top_rest(income=x, weights=w) 132 | expected = pytest.approx(0.15323043465128208) 133 | assert obtained == expected 134 | 135 | def test_ratio_2d(): 136 | x = np.array([[57], [63], [81], [79], [88], [42], [3], [77], [89]]) 137 | w = np.array([[9], [5], [2], [9], [5], [4], [5], [9], [9]]) 138 | obtained = inequality.top_rest(income=x, weights=w) 139 | expected = pytest.approx(0.15323043465128208) 140 | assert obtained == expected 141 | 142 | 143 | @pytest.mark.parametrize('n', range(15, 20)) 144 | def test_ratio_weighted_eq_unweighted(n): 145 | # Generating a random list of between 10 and 100 items 146 | x = np.random.randint(1, 100, n) 147 | w = np.random.randint(1, 5, n) 148 | 149 | # Weight should be the same as repeating the number multiple times 150 | xw = [] 151 | for xi, wi in zip(x,w, strict=False): 152 | xw += [xi]*wi # Create a list that contains 153 | 154 | xw = np.array(xw) 155 | 156 | assert len(xw) == np.sum(w) 157 | 158 | weighted = inequality.top_rest(income=x, weights=w) 159 | unweighted = inequality.top_rest(income=xw) 160 | assert pytest.approx(weighted) == unweighted 161 | 162 | def test_ratio_unweighted(): 163 | x = np.array([ 164 | 11, 67, 93, 68, 80, 71, 0, 65, 45, 73, 56, 38, 18, 24, 94, 72, 56, 165 | 37, 26, 34, 49, 30, 30, 31, 10, 0, 77, 6, 64, 75, 56, 79, 46, 87, 166 | 39, 73, 63, 3, 49, 52, 94, 0, 68, 86, 42, 84, 58, 5, 45, 62, 49, 167 | 97, 77, 94, 66, 84, 42, 39, 7, 24, 65, 52, 59, 52, 38, 27, 85, 43, 168 | 26, 6, 93, 24, 48, 42, 50, 58, 89, 79, 94, 50, 2, 46, 82, 98, 69, 169 | 9, 50, 33, 86, 77, 25, 39, 61, 78, 47, 29, 43, 20, 56, 35]) 170 | obtained = inequality.top_rest(x) 171 | expected = 0.22203712517848642 172 | assert pytest.approx(obtained) == expected 173 | 174 | 175 | def test_hoover_index_series(): 176 | """ Testing hoover with no weights (default all ones) """ 177 | x = np.arange(10) 178 | obtained = inequality.hoover(x) 179 | expected = 4.0 180 | 181 | np.testing.assert_almost_equal(obtained, expected) 182 | 183 | def test_hoover_index(): 184 | x = np.arange(10) 185 | w = np.ones(10) 186 | obtained = inequality.hoover(x, w) 187 | expected = 4 188 | np.testing.assert_almost_equal(obtained, expected) 189 | -------------------------------------------------------------------------------- /tests/data/repXW.csv: -------------------------------------------------------------------------------- 1 | ,x,w 2 | 0,400.0,1.0 3 | 1,400.0,1.0 4 | 2,16.0,1.0 5 | 3,16.0,1.0 6 | 4,16.0,1.0 7 | 5,16.0,1.0 8 | 6,16.0,1.0 9 | 7,16.0,1.0 10 | 8,104.0,1.0 11 | 9,104.0,1.0 12 | 10,966.0,1.0 13 | 11,682.0,1.0 14 | 12,682.0,1.0 15 | 13,682.0,1.0 16 | 14,682.0,1.0 17 | 15,682.0,1.0 18 | 16,682.0,1.0 19 | 17,682.0,1.0 20 | 18,682.0,1.0 21 | 19,296.0,1.0 22 | 20,296.0,1.0 23 | 21,296.0,1.0 24 | 22,296.0,1.0 25 | 23,296.0,1.0 26 | 24,296.0,1.0 27 | 25,296.0,1.0 28 | 26,505.0,1.0 29 | 27,505.0,1.0 30 | 28,505.0,1.0 31 | 29,505.0,1.0 32 | 30,505.0,1.0 33 | 31,384.0,1.0 34 | 32,384.0,1.0 35 | 33,384.0,1.0 36 | 34,384.0,1.0 37 | 35,384.0,1.0 38 | 36,384.0,1.0 39 | 37,384.0,1.0 40 | 38,984.0,1.0 41 | 39,984.0,1.0 42 | 40,954.0,1.0 43 | 41,954.0,1.0 44 | 42,954.0,1.0 45 | 43,954.0,1.0 46 | 44,201.0,1.0 47 | 45,201.0,1.0 48 | 46,201.0,1.0 49 | 47,201.0,1.0 50 | 48,583.0,1.0 51 | 49,583.0,1.0 52 | 50,583.0,1.0 53 | 51,583.0,1.0 54 | 52,583.0,1.0 55 | 53,583.0,1.0 56 | 54,391.0,1.0 57 | 55,391.0,1.0 58 | 56,391.0,1.0 59 | 57,391.0,1.0 60 | 58,340.0,1.0 61 | 59,340.0,1.0 62 | 60,340.0,1.0 63 | 61,412.0,1.0 64 | 62,412.0,1.0 65 | 63,412.0,1.0 66 | 64,412.0,1.0 67 | 65,412.0,1.0 68 | 66,412.0,1.0 69 | 67,412.0,1.0 70 | 68,412.0,1.0 71 | 69,835.0,1.0 72 | 70,835.0,1.0 73 | 71,937.0,1.0 74 | 72,937.0,1.0 75 | 73,937.0,1.0 76 | 74,937.0,1.0 77 | 75,937.0,1.0 78 | 76,937.0,1.0 79 | 77,937.0,1.0 80 | 78,705.0,1.0 81 | 79,969.0,1.0 82 | 80,969.0,1.0 83 | 81,969.0,1.0 84 | 82,969.0,1.0 85 | 83,961.0,1.0 86 | 84,961.0,1.0 87 | 85,961.0,1.0 88 | 86,961.0,1.0 89 | 87,961.0,1.0 90 | 88,961.0,1.0 91 | 89,909.0,1.0 92 | 90,933.0,1.0 93 | 91,933.0,1.0 94 | 92,243.0,1.0 95 | 93,243.0,1.0 96 | 94,243.0,1.0 97 | 95,243.0,1.0 98 | 96,243.0,1.0 99 | 97,243.0,1.0 100 | 98,243.0,1.0 101 | 99,986.0,1.0 102 | 100,986.0,1.0 103 | 101,986.0,1.0 104 | 102,986.0,1.0 105 | 103,986.0,1.0 106 | 104,986.0,1.0 107 | 105,986.0,1.0 108 | 106,467.0,1.0 109 | 107,467.0,1.0 110 | 108,467.0,1.0 111 | 109,467.0,1.0 112 | 110,467.0,1.0 113 | 111,467.0,1.0 114 | 112,467.0,1.0 115 | 113,231.0,1.0 116 | 114,231.0,1.0 117 | 115,231.0,1.0 118 | 116,797.0,1.0 119 | 117,797.0,1.0 120 | 118,797.0,1.0 121 | 119,797.0,1.0 122 | 120,723.0,1.0 123 | 121,401.0,1.0 124 | 122,401.0,1.0 125 | 123,128.0,1.0 126 | 124,347.0,1.0 127 | 125,347.0,1.0 128 | 126,347.0,1.0 129 | 127,347.0,1.0 130 | 128,347.0,1.0 131 | 129,347.0,1.0 132 | 130,347.0,1.0 133 | 131,826.0,1.0 134 | 132,826.0,1.0 135 | 133,826.0,1.0 136 | 134,826.0,1.0 137 | 135,273.0,1.0 138 | 136,273.0,1.0 139 | 137,273.0,1.0 140 | 138,19.0,1.0 141 | 139,19.0,1.0 142 | 140,19.0,1.0 143 | 141,381.0,1.0 144 | 142,381.0,1.0 145 | 143,381.0,1.0 146 | 144,381.0,1.0 147 | 145,381.0,1.0 148 | 146,381.0,1.0 149 | 147,912.0,1.0 150 | 148,912.0,1.0 151 | 149,912.0,1.0 152 | 150,912.0,1.0 153 | 151,912.0,1.0 154 | 152,616.0,1.0 155 | 153,616.0,1.0 156 | 154,616.0,1.0 157 | 155,616.0,1.0 158 | 156,547.0,1.0 159 | 157,547.0,1.0 160 | 158,547.0,1.0 161 | 159,547.0,1.0 162 | 160,453.0,1.0 163 | 161,453.0,1.0 164 | 162,453.0,1.0 165 | 163,453.0,1.0 166 | 164,453.0,1.0 167 | 165,453.0,1.0 168 | 166,453.0,1.0 169 | 167,453.0,1.0 170 | 168,442.0,1.0 171 | 169,442.0,1.0 172 | 170,28.0,1.0 173 | 171,28.0,1.0 174 | 172,28.0,1.0 175 | 173,28.0,1.0 176 | 174,28.0,1.0 177 | 175,482.0,1.0 178 | 176,482.0,1.0 179 | 177,482.0,1.0 180 | 178,482.0,1.0 181 | 179,482.0,1.0 182 | 180,482.0,1.0 183 | 181,690.0,1.0 184 | 182,690.0,1.0 185 | 183,690.0,1.0 186 | 184,690.0,1.0 187 | 185,690.0,1.0 188 | 186,690.0,1.0 189 | 187,690.0,1.0 190 | 188,717.0,1.0 191 | 189,717.0,1.0 192 | 190,717.0,1.0 193 | 191,717.0,1.0 194 | 192,908.0,1.0 195 | 193,908.0,1.0 196 | 194,908.0,1.0 197 | 195,28.0,1.0 198 | 196,28.0,1.0 199 | 197,28.0,1.0 200 | 198,28.0,1.0 201 | 199,28.0,1.0 202 | 200,691.0,1.0 203 | 201,691.0,1.0 204 | 202,906.0,1.0 205 | 203,906.0,1.0 206 | 204,906.0,1.0 207 | 205,906.0,1.0 208 | 206,906.0,1.0 209 | 207,906.0,1.0 210 | 208,631.0,1.0 211 | 209,631.0,1.0 212 | 210,631.0,1.0 213 | 211,631.0,1.0 214 | 212,109.0,1.0 215 | 213,109.0,1.0 216 | 214,109.0,1.0 217 | 215,109.0,1.0 218 | 216,109.0,1.0 219 | 217,109.0,1.0 220 | 218,109.0,1.0 221 | 219,408.0,1.0 222 | 220,408.0,1.0 223 | 221,408.0,1.0 224 | 222,408.0,1.0 225 | 223,408.0,1.0 226 | 224,408.0,1.0 227 | 225,408.0,1.0 228 | 226,408.0,1.0 229 | 227,340.0,1.0 230 | 228,340.0,1.0 231 | 229,340.0,1.0 232 | 230,340.0,1.0 233 | 231,340.0,1.0 234 | 232,340.0,1.0 235 | 233,340.0,1.0 236 | 234,340.0,1.0 237 | 235,497.0,1.0 238 | 236,497.0,1.0 239 | 237,497.0,1.0 240 | 238,497.0,1.0 241 | 239,497.0,1.0 242 | 240,497.0,1.0 243 | 241,743.0,1.0 244 | 242,743.0,1.0 245 | 243,743.0,1.0 246 | 244,242.0,1.0 247 | 245,242.0,1.0 248 | 246,242.0,1.0 249 | 247,242.0,1.0 250 | 248,242.0,1.0 251 | 249,242.0,1.0 252 | 250,242.0,1.0 253 | 251,7.0,1.0 254 | 252,7.0,1.0 255 | 253,7.0,1.0 256 | 254,7.0,1.0 257 | 255,7.0,1.0 258 | 256,505.0,1.0 259 | 257,133.0,1.0 260 | 258,133.0,1.0 261 | 259,133.0,1.0 262 | 260,133.0,1.0 263 | 261,359.0,1.0 264 | 262,359.0,1.0 265 | 263,359.0,1.0 266 | 264,359.0,1.0 267 | 265,359.0,1.0 268 | 266,359.0,1.0 269 | 267,496.0,1.0 270 | 268,496.0,1.0 271 | 269,496.0,1.0 272 | 270,496.0,1.0 273 | 271,496.0,1.0 274 | 272,496.0,1.0 275 | 273,496.0,1.0 276 | 274,496.0,1.0 277 | 275,166.0,1.0 278 | 276,166.0,1.0 279 | 277,166.0,1.0 280 | 278,166.0,1.0 281 | 279,166.0,1.0 282 | 280,176.0,1.0 283 | 281,176.0,1.0 284 | 282,176.0,1.0 285 | 283,176.0,1.0 286 | 284,176.0,1.0 287 | 285,827.0,1.0 288 | 286,827.0,1.0 289 | 287,827.0,1.0 290 | 288,827.0,1.0 291 | 289,904.0,1.0 292 | 290,904.0,1.0 293 | 291,904.0,1.0 294 | 292,738.0,1.0 295 | 293,738.0,1.0 296 | 294,738.0,1.0 297 | 295,738.0,1.0 298 | 296,738.0,1.0 299 | 297,738.0,1.0 300 | 298,738.0,1.0 301 | 299,315.0,1.0 302 | 300,315.0,1.0 303 | 301,315.0,1.0 304 | 302,315.0,1.0 305 | 303,255.0,1.0 306 | 304,255.0,1.0 307 | 305,255.0,1.0 308 | 306,255.0,1.0 309 | 307,255.0,1.0 310 | 308,255.0,1.0 311 | 309,655.0,1.0 312 | 310,655.0,1.0 313 | 311,655.0,1.0 314 | 312,6.0,1.0 315 | 313,6.0,1.0 316 | 314,6.0,1.0 317 | 315,512.0,1.0 318 | 316,488.0,1.0 319 | 317,488.0,1.0 320 | 318,324.0,1.0 321 | 319,324.0,1.0 322 | 320,324.0,1.0 323 | 321,324.0,1.0 324 | 322,324.0,1.0 325 | 323,324.0,1.0 326 | 324,324.0,1.0 327 | 325,191.0,1.0 328 | 326,191.0,1.0 329 | 327,191.0,1.0 330 | 328,869.0,1.0 331 | 329,869.0,1.0 332 | 330,869.0,1.0 333 | 331,869.0,1.0 334 | 332,869.0,1.0 335 | 333,421.0,1.0 336 | 334,421.0,1.0 337 | 335,421.0,1.0 338 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Build Status](https://github.com/mmngreco/ineqpy/actions/workflows/python-package.yml/badge.svg) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1419582.svg)](https://doi.org/10.5281/zenodo.1419582) 2 | 3 | 4 | # IneqPy's Package 5 | 6 | This package provides statistics to carry on inequality's analysis. Among the 7 | estimators provided by this package you can find: 8 | 9 | 10 | | Main Statistics | Inequality Indicators | 11 | | :-------------- | :-------------------- | 12 | | Weighted Mean | Weighted Gini | 13 | | Weighted Variance | Weighted Atkinson | 14 | | Weighted Coefficient of variation | Weighted Theil | 15 | | Weighted Kurtosis | Weighted Kakwani | 16 | | Weighted Skewness | Weighted Lorenz curve | 17 | 18 | 19 | ## Installation 20 | 21 | ```bash 22 | pip install ineqpy 23 | # or from github's repo 24 | pip install git+https://github.com/mmngreco/IneqPy.git 25 | ``` 26 | 27 | ## What you can find 28 | 29 | Some examples of how use this package: 30 | 31 | ```python 32 | >>> import pandas as pd 33 | >>> import numpy as np 34 | >>> import ineqpy 35 | >>> d = load_data() # dataframe 36 | >>> d 37 | renta factor 38 | 0 -13004.12 1.0031 39 | 89900 141656.97 1.4145 40 | 179800 1400.38 4.4122 41 | 269700 415080.96 1.3295 42 | 359600 69165.22 1.3282 43 | 449500 9673.83 19.4605 44 | 539400 55057.72 1.2923 45 | 629300 -466.73 1.0050 46 | 719200 3431.86 2.2861 47 | 809100 423.24 1.1552 48 | 899000 0.00 1.0048 49 | 988900 -344.41 1.0028 50 | 1078800 56254.09 1.2752 51 | 1168700 60543.33 2.0159 52 | 1258600 2041.70 2.7381 53 | 1348500 581.38 7.9426 54 | 1438400 55646.05 1.2818 55 | 1528300 0.00 1.0281 56 | 1618200 69650.24 1.2315 57 | 1708100 -2770.88 1.0035 58 | 1798000 4088.63 1.1256 59 | 1887900 0.00 1.0251 60 | 1977800 10662.63 28.0409 61 | 2067700 3281.95 1.1670 62 | ``` 63 | 64 | ### Descriptive statistics 65 | 66 | ```python 67 | >>> ineqpy.mean(variable=d.renta, weights=d.factor) 68 | 20444.700666031338 69 | >>> ineqpy.var(variable=d.renta, weights=d.factor) 70 | 2982220948.7413292 71 | >>> x, w = d.renta.values, d.factor.values 72 | ``` 73 | 74 | > Note that the standardized moment for order `n`, retrieve the value in that 75 | > column: 76 | > 77 | > | `n` | value | 78 | > |:---:|:---------:| 79 | > | 1 | 0 | 80 | > | 2 | 1 | 81 | > | 3 | Skew | 82 | > | 4 | Kurtosis | 83 | 84 | A helpful table of interpretation of the moments 85 | 86 | ```python 87 | >>> ineqpy.std_moment(variable=x, weights=w, order=1) # ~= 0 88 | 2.4624948200717338e-17 89 | >>> ineqpy.std_moment(variable=x, weights=w, order=2) # = 1 90 | 1.0 91 | >>> ineqpy.std_moment(variable=x, weights=w, order=3) # = skew 92 | 5.9965055750379426 93 | >>> ineqpy.skew(variable=x, weights=w) 94 | 5.9965055750379426 95 | >>> ineqpy.std_moment(variable=x, weights=w, order=4) # = kurtosis 96 | 42.319928851703004 97 | >>> ineqpy.kurt(variable=x, weights=w) 98 | 42.319928851703004 99 | ``` 100 | 101 | ### Inequality's estimators 102 | 103 | ```python 104 | # pass a pandas.DataFrame and inputs as strings 105 | >>> ineqpy.gini(data=d, income='renta', weights='factor') 106 | 0.76739136365917116 107 | # you can pass arrays too 108 | >>> ineqpy.gini(income=d.renta.values, weights=d.factor.values) 109 | 0.76739136365917116 110 | ``` 111 | 112 | ### More examples and comparison with other packages: 113 | 114 | We generate random weighted data to show how ineqpy works. The variables 115 | simulate being: 116 | 117 | x : Income 118 | w : Weights 119 | 120 | To test with classical statistics we generate: 121 | 122 | x_rep : Income values replicated w times each one. 123 | w_rep : Ones column. 124 | 125 | Additional information: 126 | 127 | np : numpy package 128 | sp : scipy package 129 | pd : pandas package 130 | gsl_stat : GNU Scientific Library written in C. 131 | ineq : IneqPy 132 | 133 | 134 | #### Mean 135 | 136 | ```python 137 | >>> np.mean(x_rep) = 488.535714286 138 | >>> ineq.mean(x, w) = 488.535714286 139 | >>> gsl_stat.wmean(w, x) = 488.5357142857143 140 | ``` 141 | 142 | #### Variance 143 | 144 | ```python 145 | >>> np.var(x_rep) = 63086.1364796 146 | >>> ineq.var(x, w) = 63086.1364796 147 | >>> ineq_stat.wvar(x, w, kind=1) = 63086.1364796 148 | >>> ineq_stat.wvar(x, w, kind=2) = 63247.4820972 149 | >>> gsl_stat.wvariance(w, x) = 63993.161585889124 150 | >>> ineq_stat.wvar(x, w, kind=3) = 63993.1615859 151 | ``` 152 | 153 | #### Covariance 154 | 155 | ```python 156 | >>> np.cov(x_rep, x_rep) = [[ 63247.48209719 63247.48209719] 157 | [ 63247.48209719 63247.48209719]] 158 | >>> ineq_stat.wcov(x, x, w, kind=1) = 63086.1364796 159 | >>> ineq_stat.wcov(x, x, w, kind=2) = 4.94065645841e-324 160 | >>> ineq_stat.wcov(x, x, w, kind=3) = 9.88131291682e-324 161 | ``` 162 | 163 | #### Skewness 164 | 165 | ```python 166 | >>> gsl_stat.wskew(w, x) = -0.05742668111416989 167 | >>> sp_stat.skew(x_rep) = -0.058669605967865954 168 | >>> ineq.skew(x, w) = -0.0586696059679 169 | ``` 170 | 171 | #### Kurtosis 172 | 173 | ```python 174 | >>> sp_stat.kurtosis(x_rep) = -0.7919389201857214 175 | >>> gsl_stat.wkurtosis(w, x) = -0.8540884810553052 176 | >>> ineq.kurt(x, w) - 3 = -0.791938920186 177 | ``` 178 | 179 | #### Percentiles 180 | 181 | ```python 182 | >>> ineq_stat.percentile(x, w, 25) = 293 183 | >>> np.percentile(x_rep, 25) = 293.0 184 | 185 | >>> ineq_stat.percentile(x, w, 50) = 526 186 | >>> np.percentile(x_rep, 50) = 526.0 187 | 188 | >>> ineq_stat.percentile(x, w, 90) = 839 189 | >>> np.percentile(x_rep, 90) = 839.0 190 | ``` 191 | 192 | Another way to use this is through the API module as shown below: 193 | 194 | ## API's module 195 | 196 | Using API's module: 197 | 198 | ```python 199 | >>> data = Survey(data=data, columns=columns, weights='w') 200 | >>> data.df.head() 201 | x w 202 | 0 111 3 203 | 1 711 4 204 | 2 346 4 205 | 3 667 1 206 | 4 944 1 207 | ``` 208 | 209 | ### Statistics 210 | 211 | ```python 212 | >>> data.weights = w 213 | >>> df.mean(main_var) = 488.535714286 214 | >>> df.percentile(main_var) = 526 215 | >>> df.var(main_var) = 63086.1364796 216 | >>> df.skew(main_var) = -0.0586696059679 217 | >>> df.kurt(main_var) = 2.20806107981 218 | >>> df.gini(main_var) = 0.298494329293 219 | >>> df.atkinson(main_var) = 0.0925853855635 220 | >>> df.theil(main_var) = 0.156137490566 221 | ``` 222 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | import sys 16 | 17 | from pathlib import Path 18 | 19 | import sphinx_rtd_theme 20 | 21 | src = Path(__file__).parents[2] / "src" 22 | sys.path.insert(0, str(src)) 23 | 24 | 25 | # -- Project information ----------------------------------------------------- 26 | 27 | project = 'IneqPy' 28 | copyright = '2022, Maximiliano Greco' 29 | author = 'Maximiliano Greco' 30 | 31 | # The short X.Y version 32 | version = '' 33 | # The full version, including alpha/beta/rc tags 34 | release = '' 35 | 36 | 37 | # -- General configuration --------------------------------------------------- 38 | 39 | # If your documentation needs a minimal Sphinx version, state it here. 40 | # 41 | # needs_sphinx = '1.0' 42 | 43 | # Add any Sphinx extension module names here, as strings. They can be 44 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 45 | # ones. 46 | extensions = [ 47 | 'sphinx.ext.autodoc', 48 | 'sphinx.ext.doctest', 49 | 'sphinx.ext.todo', 50 | 'sphinx.ext.coverage', 51 | 'sphinx.ext.mathjax', 52 | 'sphinx.ext.viewcode', 53 | 'sphinx.ext.githubpages', 54 | 'sphinx_rtd_theme', 55 | 'autoapi.extension', 56 | 'numpydoc', 57 | 'myst_parser', 58 | ] 59 | 60 | autoapi_type = 'python' 61 | autoapi_dirs = [src] 62 | # Add any paths that contain templates here, relative to this directory. 63 | templates_path = ['_templates'] 64 | 65 | # The suffix(es) of source filenames. 66 | # You can specify multiple suffix as a list of string: 67 | # 68 | # source_suffix = ['.rst', '.md'] 69 | source_suffix = '.rst' 70 | 71 | # The master toctree document. 72 | master_doc = 'index' 73 | 74 | # The language for content autogenerated by Sphinx. Refer to documentation 75 | # for a list of supported languages. 76 | # 77 | # This is also used if you do content translation via gettext catalogs. 78 | # Usually you set "language" from the command line for these cases. 79 | language = None 80 | 81 | # List of patterns, relative to source directory, that match files and 82 | # directories to ignore when looking for source files. 83 | # This pattern also affects html_static_path and html_extra_path. 84 | exclude_patterns = [] 85 | 86 | # The name of the Pygments (syntax highlighting) style to use. 87 | pygments_style = None 88 | 89 | 90 | # -- Options for HTML output ------------------------------------------------- 91 | 92 | # The theme to use for HTML and HTML Help pages. See the documentation for 93 | # a list of builtin themes. 94 | # 95 | # html_theme = 'alabaster' 96 | html_theme = "sphinx_rtd_theme" 97 | 98 | # Theme options are theme-specific and customize the look and feel of a theme 99 | # further. For a list of options available for each theme, see the 100 | # documentation. 101 | # 102 | # html_theme_options = {} 103 | 104 | # Add any paths that contain custom static files (such as style sheets) here, 105 | # relative to this directory. They are copied after the builtin static files, 106 | # so a file named "default.css" will overwrite the builtin "default.css". 107 | html_static_path = ['_static'] 108 | 109 | # Custom sidebar templates, must be a dictionary that maps document names 110 | # to template names. 111 | # 112 | # The default sidebars (for documents that don't match any pattern) are 113 | # defined by theme itself. Builtin themes are using these templates by 114 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 115 | # 'searchbox.html']``. 116 | # 117 | # html_sidebars = {} 118 | 119 | 120 | # -- Options for HTMLHelp output --------------------------------------------- 121 | 122 | # Output file base name for HTML help builder. 123 | htmlhelp_basename = 'IneqPydoc' 124 | 125 | 126 | # -- Options for LaTeX output ------------------------------------------------ 127 | 128 | latex_elements = { 129 | # The paper size ('letterpaper' or 'a4paper'). 130 | # 131 | # 'papersize': 'letterpaper', 132 | 133 | # The font size ('10pt', '11pt' or '12pt'). 134 | # 135 | # 'pointsize': '10pt', 136 | 137 | # Additional stuff for the LaTeX preamble. 138 | # 139 | # 'preamble': '', 140 | 141 | # Latex figure (float) alignment 142 | # 143 | # 'figure_align': 'htbp', 144 | } 145 | 146 | # Grouping the document tree into LaTeX files. List of tuples 147 | # (source start file, target name, title, 148 | # author, documentclass [howto, manual, or own class]). 149 | latex_documents = [ 150 | (master_doc, 'IneqPy.tex', 'IneqPy Documentation', 151 | 'Maximiliano Greco', 'manual'), 152 | ] 153 | 154 | 155 | # -- Options for manual page output ------------------------------------------ 156 | 157 | # One entry per manual page. List of tuples 158 | # (source start file, name, description, authors, manual section). 159 | man_pages = [ 160 | (master_doc, 'ineqpy', 'IneqPy Documentation', 161 | [author], 1) 162 | ] 163 | 164 | 165 | # -- Options for Texinfo output ---------------------------------------------- 166 | 167 | # Grouping the document tree into Texinfo files. List of tuples 168 | # (source start file, target name, title, author, 169 | # dir menu entry, description, category) 170 | texinfo_documents = [ 171 | (master_doc, 'IneqPy', 'IneqPy Documentation', 172 | author, 'IneqPy', 'One line description of project.', 173 | 'Miscellaneous'), 174 | ] 175 | 176 | 177 | # -- Options for Epub output ------------------------------------------------- 178 | 179 | # Bibliographic Dublin Core info. 180 | epub_title = project 181 | 182 | # The unique identifier of the text. This can be a ISBN number 183 | # or the project homepage. 184 | # 185 | # epub_identifier = '' 186 | 187 | # A unique identification for the text. 188 | # 189 | # epub_uid = '' 190 | 191 | # A list of files that should not be packed into the epub file. 192 | epub_exclude_files = ['search.html'] 193 | 194 | 195 | # -- Extension configuration ------------------------------------------------- 196 | 197 | # -- Options for todo extension ---------------------------------------------- 198 | 199 | # If true, `todo` and `todoList` produce output, else they produce nothing. 200 | todo_include_todos = True 201 | -------------------------------------------------------------------------------- /examples/alternatives_comparision.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | # from pygsl import statistics as gsl_stat 3 | from scipy import stats as sp_stat 4 | 5 | import ineqpy as ineq 6 | from ineqpy import _statistics as ineq_stat 7 | 8 | # Generate random data 9 | x, w = ineq.utils.generate_data_to_test((60, 90)) 10 | # Replicating weights 11 | x_rep, w_rep = ineq.utils.repeat_data_from_weighted(x, w) 12 | svy = ineq.api.Survey 13 | 14 | print(""" 15 | ========== 16 | Quickstart 17 | ========== 18 | 19 | We generate random weighted data to show how ineqpy works. The variables 20 | simulate being: 21 | 22 | x : Income 23 | w : Weights 24 | 25 | ```python 26 | >>> x, w = ineq.utils.generate_data_to_test((60,90)) 27 | ``` 28 | 29 | To test with classical statistics we generate: 30 | 31 | x_rep : Income values replicated w times each one. 32 | w_rep : Ones column. 33 | 34 | ```python 35 | >>> x_rep, w_rep = ineq.utils.repeat_data_from_weighted(x, w) 36 | ``` 37 | 38 | Additional information: 39 | 40 | np : numpy package 41 | sp : scipy package 42 | pd : pandas package 43 | gsl_stat : GNU Scientific Library written in C. 44 | ineq : IneqPy 45 | """) 46 | 47 | 48 | print(""" 49 | Examples and comparision with other packages 50 | ============================================ 51 | 52 | STATISTICS 53 | ========== 54 | 55 | MEAN 56 | ---- 57 | 58 | """) 59 | 60 | print("```python") 61 | print(">>> np.mean(x_rep)".ljust(24), "=", np.mean(x_rep)) 62 | print(">>> ineq.mean(x, w)".ljust(24), "=", ineq.mean(x, w)) 63 | # print(">>> gsl_stat.wmean(w, x)".ljust(24), "=", gsl_stat.wmean(w, x)) 64 | print("```") 65 | 66 | # %timeit ineq.mean(None, x, w) 67 | # %timeit gsl_stat.wmean(w, x) 68 | # %timeit ineq_stat.mean(x, w) 69 | 70 | print( 71 | """ 72 | 73 | VARIANCE 74 | -------- 75 | 76 | """ 77 | ) 78 | 79 | np_var = np.var(x_rep) 80 | inq_var = ineq.var(x, w) 81 | wvar_1 = ineq_stat.wvar(x, w, 1) # population variance 82 | wvar_2 = ineq_stat.wvar(x, w, 2) # sample frequency variance 83 | # gsl_wvar = gsl_stat.wvariance(w, x) 84 | wvar_3 = ineq_stat.wvar(x, w, 3) # sample reliability variance 85 | 86 | print("```python") 87 | print(">>> np.var(x_rep)".ljust(32), "=", np_var) 88 | print(">>> ineq.var(x, w)".ljust(32), "=", inq_var) 89 | print(">>> ineq_stat.wvar(x, w, kind=1)".ljust(32), "=", wvar_1) 90 | print(">>> ineq_stat.wvar(x, w, kind=2)".ljust(32), "=", wvar_2) 91 | # print(">>> gsl_stat.wvariance(w, x)".ljust(32), "=", gsl_wvar) 92 | print(">>> ineq_stat.wvar(x, w, kind=3)".ljust(32), "=", wvar_3) 93 | print("```") 94 | 95 | print( 96 | """ 97 | 98 | COVARIANCE 99 | ---------- 100 | 101 | """ 102 | ) 103 | 104 | np_cov = np.cov(x_rep, x_rep) 105 | ineq_wcov1 = ineq_stat.wcov(x, x, w, 1) 106 | ineq_wcov2 = ineq_stat.wcov(x, x, w, 2) 107 | ineq_wcov3 = ineq_stat.wcov(x, x, w, 3) 108 | 109 | print("```python") 110 | print(">>> np.cov(x_rep, x_rep)".ljust(35), "= ", np_cov) 111 | print(">>> ineq_stat.wcov(x, x, w, kind=1)".ljust(35), "= ", ineq_wcov1) 112 | print(">>> ineq_stat.wcov(x, x, w, kind=2)".ljust(35), "= ", ineq_wcov2) 113 | print(">>> ineq_stat.wcov(x, x, w, kind=3)".ljust(35), "= ", ineq_wcov3) 114 | print("```") 115 | print( 116 | """ 117 | 118 | SKEWNESS 119 | -------- 120 | 121 | """ 122 | ) 123 | 124 | # gsl_wskew = gsl_stat.wskew(w, x) 125 | sp_skew = sp_stat.skew(x_rep) 126 | ineq_skew = ineq.skew(x, w) 127 | 128 | print("```python") 129 | # print(">>> gsl_stat.wskew(w, x)".ljust(24), "= ", gsl_wskew) 130 | print(">>> sp_stat.skew(x_rep)".ljust(24), "= ", sp_skew) 131 | print(">>> ineq.skew(x, w)".ljust(24), "= ", ineq_skew) 132 | print("```") 133 | 134 | # %timeit gsl_stat.wskew(w, x) 135 | # %timeit sp_stat.skew(x_rep) 136 | # %timeit ineq.skew(None, x, w) 137 | 138 | print( 139 | """ 140 | 141 | KURTOSIS 142 | -------- 143 | 144 | """ 145 | ) 146 | 147 | sp_kurt = sp_stat.kurtosis(x_rep) 148 | # gsl_wkurt = gsl_stat.wkurtosis(w, x) 149 | ineq_kurt = ineq.kurt(x, w) - 3 150 | print("```python") 151 | print(">>> sp_stat.kurtosis(x_rep)".ljust(28), "= ", sp_kurt) 152 | # print(">>> gsl_stat.wkurtosis(w, x)".ljust(28), "= ", gsl_wkurt) 153 | print(">>> ineq.kurt(x, w) - 3".ljust(28), "= ", ineq_kurt) 154 | print("```") 155 | # %timeit sp_stat.kurtosis(x_rep) 156 | # %timeit gsl_stat.wkurtosis(w, x) 157 | # %timeit ineq.kurt(None, x, w) - 3 158 | 159 | print( 160 | """ 161 | PERCENTILES 162 | ----------- 163 | 164 | """ 165 | ) 166 | q = 50 167 | ineq_perc_50 = ineq_stat.percentile(x, w, q) 168 | np_perc_50 = np.percentile(x_rep, q) 169 | print("```python") 170 | print(">>> ineq_stat.percentile(x, w, %s)".ljust(34) % q, "= ", ineq_perc_50) 171 | print(">>> np.percentile(x_rep, %s)".ljust(34) % q, "= ", np_perc_50) 172 | 173 | q = 25 174 | ineq_perc_25 = ineq_stat.percentile(x, w, q) 175 | np_perc_25 = np.percentile(x_rep, q) 176 | print(">>> ineq_stat.percentile(x, w, %s)".ljust(34) % q, "= ", ineq_perc_25) 177 | print(">>> np.percentile(x_rep, %s)".ljust(34) % q, "= ", np_perc_25) 178 | 179 | q = 75 180 | ineq_perc_75 = ineq_stat.percentile(x, w, q) 181 | np_perc_75 = np.percentile(x_rep, q) 182 | print(">>> ineq_stat.percentile(x, w, %s)".ljust(34) % q, "= ", ineq_perc_75) 183 | print(">>> np.percentile(x_rep, %s)".ljust(34) % q, "= ", np_perc_75) 184 | 185 | q = 10 186 | ineq_perc_10 = ineq_stat.percentile(x, w, q) 187 | np_perc_10 = np.percentile(x_rep, q) 188 | print(">>> ineq_stat.percentile(x, w, %s)".ljust(34) % q, "= ", ineq_perc_10) 189 | print(">>> np.percentile(x_rep, %s)".ljust(34) % q, "= ", np_perc_10) 190 | 191 | q = 90 192 | ineq_perc_90 = ineq_stat.percentile(x, w, q) 193 | np_perc_90 = np.percentile(x_rep, q) 194 | print(">>> ineq_stat.percentile(x, w, %s)".ljust(34) % q, "= ", ineq_perc_90) 195 | print(">>> np.percentile(x_rep, %s)".ljust(34) % q, "= ", np_perc_90) 196 | print("```") 197 | 198 | print( 199 | """ 200 | Another way to use this is through the API module as shown below: 201 | 202 | API MODULE 203 | ========== 204 | 205 | """ 206 | ) 207 | 208 | data = np.c_[x, w] 209 | columns = list("xw") 210 | 211 | df = svy(data=data, columns=columns, weights="w") 212 | print("```python") 213 | print(">>> data = svy(data=data, columns=columns, weights='w')") 214 | print(">>> data.head()") 215 | print(df.head()) 216 | print("") 217 | print(">>> data.weights =", df.weights) 218 | print("```") 219 | print("") 220 | main_var = "x" 221 | # df.mean(main_var) 222 | # df.var(main_var) 223 | # df.skew(main_var) 224 | # df.kurt(main_var) 225 | # df.gini(main_var) 226 | # df.atkinson(main_var) 227 | # df.theil(main_var) 228 | # df.percentile(main_var) 229 | 230 | print("```python") 231 | print(">>> df.mean(main_var)".ljust(27), "=", df.mean(main_var)) 232 | print(">>> df.percentile(main_var)".ljust(27), "=", df.percentile(main_var)) 233 | print(">>> df.var(main_var)".ljust(27), "=", df.var(main_var)) 234 | print(">>> df.skew(main_var)".ljust(27), "=", df.skew(main_var)) 235 | print(">>> df.kurt(main_var)".ljust(27), "=", df.kurt(main_var)) 236 | print(">>> df.gini(main_var)".ljust(27), "=", df.gini(main_var)) 237 | print(">>> df.atkinson(main_var)".ljust(27), "=", df.atkinson(main_var)) 238 | print(">>> df.theil(main_var)".ljust(27), "=", df.theil(main_var)) 239 | print("```") 240 | -------------------------------------------------------------------------------- /src/ineqpy/statistics.py: -------------------------------------------------------------------------------- 1 | """Descriptive statistics. 2 | 3 | This module contains main descriptive statistics like: mean, variance, etc. 4 | 5 | """ 6 | 7 | from ineqpy import _statistics as stat, utils 8 | 9 | 10 | def c_moment( 11 | variable=None, weights=None, data=None, order=2, param=None, ddof=0 12 | ): 13 | """Calculate central momment. 14 | 15 | Calculate the central moment of `x` with respect to `param` of order `n`, 16 | given the weights `w`. 17 | 18 | Parameters 19 | ---------- 20 | variable : 1d-array 21 | Variable 22 | weights : 1d-array 23 | Weights 24 | data : pandas.DataFrame 25 | Contains all variables needed. 26 | order : int, optional 27 | Moment order, 2 by default (variance) 28 | param : int or array, optional 29 | Parameter for which the moment is calculated, the default is None, 30 | implies use the mean. 31 | ddof : int, optional 32 | Degree of freedom, zero by default. 33 | 34 | Returns 35 | ------- 36 | central_moment : float 37 | 38 | Notes 39 | ----- 40 | - The cmoment of order 1 is 0 41 | - The cmoment of order 2 is the variance. 42 | 43 | Source : https://en.wikipedia.org/wiki/Moment_(mathematics) 44 | 45 | Todo 46 | ---- 47 | Implement: https://en.wikipedia.org/wiki/L-moment#cite_note-wang:96-6 48 | 49 | """ 50 | variable, weights = utils.extract_values(data, variable, weights) 51 | return stat.c_moment(variable, weights, order, param, ddof) 52 | 53 | 54 | def percentile( 55 | variable=None, weights=None, data=None, p=50, interpolate="lower" 56 | ): 57 | """Calculate the value of a quantile given a variable and his weights. 58 | 59 | Parameters 60 | ---------- 61 | variable : str or array 62 | weights : str or array 63 | data : pd.DataFrame, optional 64 | pd.DataFrame that contains all variables needed. 65 | q : float 66 | Quantile level, if pass 0.5 means median. 67 | interpolate : bool 68 | 69 | Returns 70 | ------- 71 | percentile : float or pd.Series 72 | 73 | """ 74 | variable, weights = utils.extract_values(data, variable, weights) 75 | return stat.percentile(variable, weights, p, interpolate) 76 | 77 | 78 | def std_moment( 79 | variable=None, weights=None, data=None, param=None, order=3, ddof=0 80 | ): 81 | """Calculate standarized momment. 82 | 83 | Calculate the standardized moment of order `c` for the variable` x` with 84 | respect to `c`. 85 | 86 | Parameters 87 | ---------- 88 | variable : 1d-array 89 | Random Variable 90 | weights : 1d-array, optional 91 | Weights or probability 92 | data : pd.DataFrame, optional 93 | pd.DataFrame that contains all variables needed. 94 | order : int, optional 95 | Order of Moment, three by default 96 | param : int or float or array, optional 97 | Central trend, default is the mean. 98 | ddof : int, optional 99 | Degree of freedom. 100 | 101 | Returns 102 | ------- 103 | std_moment : float 104 | Returns the standardized `n` order moment. 105 | 106 | References 107 | ---------- 108 | - https://en.wikipedia.org/wiki/Moment_(mathematics) 109 | - https://en.wikipedia.org/wiki/Standardized_moment 110 | 111 | Todo 112 | ---- 113 | It is the general case of the raw and central moments. Review 114 | implementation. 115 | 116 | """ 117 | variable, weights = utils.extract_values(data, variable, weights) 118 | return stat.std_moment(variable, weights, param, order, ddof) 119 | 120 | 121 | def mean(variable=None, weights=None, data=None): 122 | """Calculate the mean of `variable` given `weights`. 123 | 124 | Parameters 125 | ---------- 126 | variable : array-like or str 127 | Variable on which the mean is estimated. 128 | weights : array-like or str 129 | Weights of the `x` variable. 130 | data : pandas.DataFrame 131 | Is possible pass a DataFrame with variable and weights, then you must 132 | pass as `variable` and `weights` the column name stored in `data`. 133 | 134 | Returns 135 | ------- 136 | mean : array-like or float 137 | """ 138 | # if pass a DataFrame separate variables. 139 | if data is not None: 140 | variable, weights = utils.extract_values(data, variable, weights) 141 | return stat.mean(variable, utils.not_empty_weights(weights, variable)) 142 | 143 | 144 | def density(variable=None, weights=None, groups=None, data=None): 145 | """Density in percentage. 146 | 147 | Calculates density in percentage. This make division of variable inferring 148 | width in groups as max - min. 149 | 150 | Parameters 151 | ---------- 152 | variable : numpy.array or pandas.DataFrame 153 | Main variable. 154 | weights : numpy.array or pandas.DataFrame 155 | Weights of main variable. 156 | groups : numpy.array or pandas.DataFrame 157 | Label that show which group each element belongs to. 158 | data : pd.DataFrame, optional 159 | Object that contains all variables needed. 160 | 161 | 162 | Returns 163 | ------- 164 | density : array-like 165 | 166 | References 167 | ---------- 168 | Histogram. (2017, May 9). In Wikipedia, The Free Encyclopedia. Retrieved 169 | 14:47, May 15, 2017, from 170 | https://en.wikipedia.org/w/index.php?title=Histogram&oldid=779516918 171 | """ 172 | variable, weights = utils.extract_values(data, variable, weights) 173 | if groups is not None: 174 | groups = data[groups].values 175 | return stat.density(variable, weights, groups) 176 | 177 | 178 | def var(variable=None, weights=None, data=None, ddof=0): 179 | """Calculate the variance. 180 | 181 | Calculate the population variance of `variable` given `weights`. 182 | 183 | Parameters 184 | ---------- 185 | data : pd.DataFrame, optional 186 | pd.DataFrame that contains all variables needed. 187 | variable : 1d-array or pd.Series or pd.DataFrame 188 | Variable on which the quasivariation is estimated 189 | weights : 1d-array or pd.Series or pd.DataFrame 190 | Weights of the `variable`. 191 | data : pd.DataFrame 192 | Object that contains all variables needed. 193 | ddof : int 194 | Degree of freedom. 195 | 196 | Returns 197 | ------- 198 | variance : 1d-array or pd.Series or float 199 | Estimation of quasivariance of `variable` 200 | 201 | References 202 | ---------- 203 | Moment (mathematics). (2017, May 6). In Wikipedia, The Free Encyclopedia. 204 | Retrieved 14:40, May 15, 2017, from 205 | https://en.wikipedia.org/w/index.php?title=Moment_(mathematics) 206 | 207 | Notes 208 | ----- 209 | If stratificated sample must pass with groupby each strata. 210 | """ 211 | variable, weights = utils.extract_values(data, variable, weights) 212 | return stat.var(variable, weights, ddof) 213 | 214 | 215 | def coef_variation(variable=None, weights=None, data=None): 216 | """Calculate the coefficient of variation. 217 | 218 | Calculate the coefficient of variation of a `variable` given weights. 219 | The coefficient of variation is the square root of the variance of the 220 | incomes divided by the mean income. It has the advantages of being 221 | mathematically tractable and is subgroup decomposable, but is not bounded 222 | from above. 223 | 224 | Parameters 225 | ---------- 226 | variable : array-like or str 227 | weights : array-like or str 228 | data : pandas.DataFrame 229 | 230 | Returns 231 | ------- 232 | coefficient_variation : float 233 | 234 | References 235 | ---------- 236 | Coefficient of variation. (2017, May 5). In Wikipedia, The Free 237 | Encyclopedia. Retrieved 15:03, May 15, 2017, from 238 | https://en.wikipedia.org/w/index.php?title=Coefficient_of_variation 239 | """ 240 | # TODO complete docstring 241 | variable, weights = utils.extract_values(data, variable, weights) 242 | return stat.coef_variation(variable, weights) 243 | 244 | 245 | def kurt(variable=None, weights=None, data=None): 246 | """Calculate the Kurtosis coefficient. 247 | 248 | Parameters 249 | ---------- 250 | variable : 1d-array 251 | weights : 1d-array 252 | data : pandas.DataFrame 253 | Object which stores ``variable`` and ``weights``. 254 | 255 | Returns 256 | ------- 257 | kurt : float 258 | Kurtosis coefficient. 259 | 260 | References 261 | ---------- 262 | Moment (mathematics). (2017, May 6). In Wikipedia, The Free Encyclopedia. 263 | Retrieved 14:40, May 15, 2017, from 264 | https://en.wikipedia.org/w/index.php?title=Moment_(mathematics) 265 | 266 | Notes 267 | ----- 268 | It is an alias of the standardized fourth-order moment. 269 | """ 270 | variable, weights = utils.extract_values(data, variable, weights) 271 | return stat.kurt(variable, weights) 272 | 273 | 274 | def skew(variable=None, weights=None, data=None): 275 | """Return the asymmetry coefficient of a sample. 276 | 277 | Parameters 278 | ---------- 279 | data : pandas.DataFrame 280 | variable : array-like, str 281 | weights : array-like, str 282 | data : pandas.DataFrame 283 | Object which stores ``variable`` and ``weights``. 284 | 285 | Returns 286 | ------- 287 | skew : float 288 | 289 | References 290 | ---------- 291 | Moment (mathematics). (2017, May 6). In Wikipedia, The Free Encyclopedia. 292 | Retrieved 14:40, May 15, 2017, from 293 | https://en.wikipedia.org/w/index.php?title=Moment_(mathematics) 294 | 295 | Notes 296 | ----- 297 | It is an alias of the standardized third-order moment. 298 | """ 299 | variable, weights = utils.extract_values(data, variable, weights) 300 | return stat.skew(variable, weights) 301 | -------------------------------------------------------------------------------- /src/ineqpy/_statistics.py: -------------------------------------------------------------------------------- 1 | """Low level desciptive statistics. 2 | 3 | References 4 | ---------- 5 | 1. http://people.ds.cam.ac.uk/fanf2/hermes/doc/antiforgery/stats.pdf 6 | 2. https://en.wikipedia.org/wiki/Weighted_arithmetic_mean 7 | #Weighted_sample_variance 8 | 3. https://en.wikipedia.org/wiki/Algorithms%5Ffor%5Fcalculating%5Fvariance 9 | #Weighted_incremental_algorithm 10 | """ 11 | 12 | import numpy as np 13 | from numba import guvectorize 14 | 15 | from ineqpy import utils 16 | 17 | 18 | def c_moment(variable=None, weights=None, order=2, param=None, ddof=0): 19 | """Calculate central momment. 20 | 21 | Calculate the central moment of `x` with respect to `param` of order `n`, 22 | given the weights `w`. 23 | 24 | Parameters 25 | ---------- 26 | variable : 1d-array 27 | Variable 28 | weights : 1d-array 29 | Weights 30 | order : int, optional 31 | Moment order, 2 by default (variance) 32 | param : int or array, optional 33 | Parameter for which the moment is calculated, the default is None, 34 | implies use the mean. 35 | ddof : int, optional 36 | Degree of freedom, zero by default. 37 | 38 | Returns 39 | ------- 40 | central_moment : float 41 | 42 | Notes 43 | ----- 44 | - The cmoment of order 1 is 0 45 | - The cmoment of order 2 is the variance. 46 | 47 | Source : https://en.wikipedia.org/wiki/Moment_(mathematics) 48 | 49 | Todo 50 | ---- 51 | Implement : https://en.wikipedia.org/wiki/L-moment#cite_note-wang:96-6 52 | """ 53 | # return np.sum((x-c)^n*counts) / np.sum(counts) 54 | variable = variable.copy() 55 | weights = utils.not_empty_weights(weights, like=variable) 56 | 57 | if param is None: 58 | param = mean(variable=variable, weights=weights) 59 | elif not isinstance(param, (np.ndarray, int, float)): 60 | raise NotImplementedError 61 | 62 | return np.sum((variable - param) ** order * weights) / ( 63 | np.sum(weights) - ddof 64 | ) 65 | 66 | 67 | def percentile( 68 | variable, weights, percentile=50, interpolation="lower" 69 | ) -> float: 70 | """Calculate the percentile. 71 | 72 | Parameters 73 | ---------- 74 | variable : str or array 75 | weights : str or array 76 | percentile : int or list 77 | Percentile level, if pass 50 we get the median. 78 | interpolation : {'lower', 'higher', 'midpoint'}, optional 79 | Select interpolation method. 80 | 81 | Returns 82 | ------- 83 | percentile : float 84 | """ 85 | sorted_idx = np.argsort(variable) 86 | cum_weights = np.cumsum(weights[sorted_idx]) 87 | lower_percentile_idx = np.searchsorted( 88 | cum_weights, (percentile / 100.0) * cum_weights[-1] 89 | ) 90 | 91 | if interpolation == "midpoint": 92 | res = np.interp( 93 | lower_percentile_idx + 0.5, 94 | np.arange(len(variable)), 95 | variable[sorted_idx], 96 | ) 97 | elif interpolation == "lower": 98 | res = variable[sorted_idx[lower_percentile_idx]] 99 | elif interpolation == "higher": 100 | res = variable[sorted_idx[lower_percentile_idx + 1]] 101 | else: 102 | raise NotImplementedError 103 | 104 | return float(res) 105 | 106 | 107 | def std_moment(variable=None, weights=None, param=None, order=3, ddof=0): 108 | """Calculate the standarized moment. 109 | 110 | Calculate the standarized moment of order `c` for the variable` x` with 111 | respect to `c`. 112 | 113 | Parameters 114 | ---------- 115 | variable : 1d-array 116 | Random Variable 117 | weights : 1d-array, optional 118 | Weights or probability 119 | order : int, optional 120 | Order of Moment, three by default 121 | param : int or float or array, optional 122 | Central trend, default is the mean. 123 | ddof : int, optional 124 | Degree of freedom. 125 | 126 | Returns 127 | ------- 128 | std_moment : float 129 | Returns the standardized `n` order moment. 130 | 131 | References 132 | ---------- 133 | - https://en.wikipedia.org/wiki/Moment_(mathematics) 134 | #Significance_of_the_moments 135 | - https://en.wikipedia.org/wiki/Standardized_moment 136 | 137 | Todo 138 | ---- 139 | It is the general case of the raw and central moments. Review 140 | implementation. 141 | 142 | """ 143 | if param is None: 144 | param = mean(variable=variable, weights=weights) 145 | 146 | res = c_moment( 147 | variable=variable, weights=weights, order=order, param=param, ddof=ddof 148 | ) 149 | res /= var(variable=variable, weights=weights, ddof=ddof) ** (order / 2) 150 | return res 151 | 152 | 153 | def mean(variable=None, weights=None): 154 | """Calculate the mean of `variable` given `weights`. 155 | 156 | Parameters 157 | ---------- 158 | variable : array-like or str 159 | Variable on which the mean is estimated. 160 | weights : array-like or str 161 | Weights of the `x` variable. 162 | 163 | Returns 164 | ------- 165 | mean : array-like or float 166 | """ 167 | # if pass a DataFrame separate variables. 168 | variable = variable.copy() 169 | weights = utils.not_empty_weights(weights, like=variable) 170 | variable, weights = utils._clean_nans_values(variable, weights) 171 | return np.average(a=variable, weights=weights, axis=0) 172 | 173 | 174 | def var(variable=None, weights=None, ddof=0): 175 | """Calculate the population variance of ``variable`` given `weights`. 176 | 177 | Parameters 178 | ---------- 179 | variable : 1d-array or pd.Series or pd.DataFrame 180 | Variable on which the quasivariation is estimated 181 | weights : 1d-array or pd.Series or pd.DataFrame 182 | Weights of the `variable`. 183 | 184 | Returns 185 | ------- 186 | variance : 1d-array or pd.Series or float 187 | Estimation of quasivariance of `variable` 188 | 189 | References 190 | ---------- 191 | Moment (mathematics). (2017, May 6). In Wikipedia, The Free Encyclopedia. 192 | Retrieved 14:40, May 15, 2017, from 193 | https://en.wikipedia.org/w/index.php?title=Moment_(mathematics) 194 | 195 | Notes 196 | ----- 197 | If stratificated sample must pass with groupby each strata. 198 | """ 199 | return c_moment(variable=variable, weights=weights, order=2, ddof=ddof) 200 | 201 | 202 | def coef_variation(variable=None, weights=None): 203 | """Calculate the coefficient of variation. 204 | 205 | Calculate the coefficient of variation of a `variable` given weights. The 206 | coefficient of variation is the square root of the variance of the incomes 207 | divided by the mean income. It has the advantages of being mathematically 208 | tractable and is subgroup decomposable, but is not bounded from above. 209 | 210 | Parameters 211 | ---------- 212 | variable : array-like or str 213 | weights : array-like or str 214 | 215 | Returns 216 | ------- 217 | coefficient_variation : float 218 | 219 | References 220 | ---------- 221 | Coefficient of variation. (2017, May 5). In Wikipedia, The Free 222 | Encyclopedia. Retrieved 15:03, May 15, 2017, from 223 | https://en.wikipedia.org/w/index.php?title=Coefficient_of_variation 224 | """ 225 | # todo complete docstring 226 | return var(variable=variable, weights=weights) ** 0.5 / abs( 227 | mean(variable=variable, weights=weights) 228 | ) 229 | 230 | 231 | def kurt(variable=None, weights=None): 232 | """Calculate the asymmetry coefficient. 233 | 234 | Parameters 235 | ---------- 236 | variable : 1d-array 237 | weights : 1d-array 238 | 239 | Returns 240 | ------- 241 | kurt : float 242 | Kurtosis coefficient. 243 | 244 | References 245 | ---------- 246 | Moment (mathematics). (2017, May 6). In Wikipedia, The Free Encyclopedia. 247 | Retrieved 14:40, May 15, 2017, from 248 | https://en.wikipedia.org/w/index.php?title=Moment_(mathematics) 249 | 250 | Notes 251 | ----- 252 | It is an alias of the standardized fourth-order moment. 253 | """ 254 | return std_moment(variable=variable, weights=weights, order=4) 255 | 256 | 257 | def skew(variable=None, weights=None): 258 | """Return the asymmetry coefficient of a sample. 259 | 260 | Parameters 261 | ---------- 262 | variable : array-like, str 263 | weights : array-like, str 264 | 265 | Returns 266 | ------- 267 | skew : float 268 | 269 | References 270 | ---------- 271 | Moment (mathematics). (2017, May 6). In Wikipedia, The Free Encyclopedia. 272 | Retrieved 14:40, May 15, 2017, from 273 | https://en.wikipedia.org/w/index.php?title=Moment_(mathematics) 274 | 275 | Notes 276 | ----- 277 | It is an alias of the standardized third-order moment. 278 | 279 | """ 280 | return std_moment(variable=variable, weights=weights, order=3) 281 | 282 | 283 | @guvectorize( 284 | "float64[:], float64[:], int64, float64[:]", 285 | "(n),(n),()->()", 286 | nopython=True, 287 | cache=True, 288 | ) 289 | def wvar(x, w, kind, out): 290 | """Calculate weighted variance of X. 291 | 292 | Calculates the weighted variance of x according to a kind of weights. 293 | 294 | Parameters 295 | ---------- 296 | x : np.ndarray 297 | Main variable. 298 | w : np.ndarray 299 | Weigths. 300 | kind : int 301 | Has three modes to calculate de variance, you can control that with 302 | this argument, the values and the output are the next: 303 | * 1. population variance 304 | * 2. sample frequency variance 305 | * 3. sample reliability variance. 306 | out : np.ndarray 307 | 308 | Returns 309 | ------- 310 | weighted_variance : float 311 | 312 | References 313 | ---------- 314 | https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance 315 | #Weighted_incremental_algorithm 316 | """ 317 | wSum = wSum2 = mean = S = 0 318 | 319 | for i in range(len(x)): # Alternatively "for x, w in zip(data, weights):" 320 | wSum = wSum + w[i] 321 | wSum2 = wSum2 + w[i] * w[i] 322 | meanOld = mean 323 | mean = meanOld + (w[i] / wSum) * (x[i] - meanOld) 324 | S = S + w[i] * (x[i] - meanOld) * (x[i] - mean) 325 | 326 | if kind == 1: 327 | # population_variance 328 | out[0] = S / wSum 329 | elif kind == 2: 330 | # Bessel's correction for weighted samples 331 | # Frequency weights 332 | # sample_frequency_variance 333 | out[0] = S / (wSum - 1) 334 | elif kind == 3: 335 | # Reliability weights 336 | # sample_reliability_variance 337 | out[0] = S / (wSum - wSum2 / wSum) 338 | 339 | 340 | @guvectorize( 341 | "float64[:], float64[:], float64[:], int64, float64[:]", 342 | "(n),(n),(n),()->()", 343 | nopython=True, 344 | cache=True, 345 | ) 346 | def wcov(x, y, w, kind, out): 347 | """Compute weighted covariance between x and y. 348 | 349 | Compute the weighted covariance between two variables, we can chose which 350 | kind of covariance returns. 351 | 352 | Parameters 353 | ---------- 354 | x : np.array 355 | Main variable. 356 | y : np.array 357 | Second variable. 358 | w : np.array 359 | Weights. 360 | kind : int 361 | Kind of weighted covariance is returned: 362 | 1 : population variance 363 | 2 : sample frequency variance 364 | 3 : sample reliability variance. 365 | out : np.array 366 | 367 | Returns 368 | ------- 369 | weighted_covariance = float 370 | 371 | References 372 | ---------- 373 | https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online 374 | """ 375 | meanx = meany = 0 376 | wsum = wsum2 = 0 377 | C = 0 378 | for i in range(len(x)): 379 | wsum += w[i] 380 | wsum2 += w[i] * w[i] 381 | dx = x[i] - meanx 382 | meanx += (w[i] / wsum) * dx 383 | meany += (w[i] / wsum) * (y[i] - meany) 384 | C += w[i] * dx * (y[i] - meany) 385 | 386 | if kind == 1: 387 | # population_covar 388 | out[0] = C / wsum 389 | 390 | 391 | @guvectorize( 392 | "float64[:], float64[:], float64[:]", 393 | "(n),(n)->()", 394 | nopython=True, 395 | cache=True, 396 | ) 397 | def online_kurtosis(x, w, out): 398 | """Online kurtosis.""" 399 | n = 0 400 | mean = 0 401 | M2 = 0 402 | M3 = 0 403 | M4 = 0 404 | 405 | for i in range(len(x)): 406 | n1 = w[i] 407 | n = n + w[i] 408 | delta = x[i] - mean 409 | delta_n = delta / n 410 | delta_n2 = delta_n * delta_n 411 | term1 = delta * delta_n * n1 412 | mean = mean + w[i] * delta_n / n 413 | M4 = ( 414 | M4 415 | + term1 * delta_n2 * (n * n - 3 * n + 3) 416 | + 6 * delta_n2 * M2 417 | - 4 * delta_n * M3 418 | ) 419 | M3 = M3 + term1 * delta_n * (n - 2) - 3 * delta_n * M2 420 | M2 = M2 + term1 421 | 422 | out[0] = (n * M4) / (M2 * M2) - 3 423 | 424 | 425 | @guvectorize( 426 | "float64[:], float64[:], int64, float64[:]", 427 | "(n),(n),()->()", 428 | nopython=True, 429 | cache=True, 430 | ) 431 | def Mk(x, w, k, out): 432 | """Calculate Mk.""" 433 | w_sum = wx_sum = 0 434 | 435 | for i in range(len(x)): 436 | wx_sum += w[i] * (x[i] ** k) 437 | w_sum += w[i] 438 | 439 | out[0] = wx_sum / w_sum 440 | -------------------------------------------------------------------------------- /src/ineqpy/inequality.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """Analysis of inequality. 4 | 5 | This package provide an easy way to realize a quantitative analysis of 6 | grouped, also make easy work with stratified data, in this module you can 7 | find statistics and grouped indicators to this task. 8 | 9 | Todo 10 | ---- 11 | - Rethinking this module as Class. 12 | - https://en.wikipedia.org/wiki/Income_inequality_metrics 13 | 14 | """ 15 | import numpy as np 16 | import pandas as pd 17 | 18 | from ineqpy import utils 19 | from ineqpy.statistics import mean 20 | 21 | __all__ = [ 22 | "atkinson", 23 | "avg_tax_rate", 24 | "concentration", 25 | "gini", 26 | "kakwani", 27 | "lorenz", 28 | "reynolds_smolensky", 29 | "theil", 30 | "top_rest", 31 | "hoover", 32 | ] 33 | 34 | 35 | def concentration(income, weights=None, data=None, sort=True): 36 | """Calculate concentration's index. 37 | 38 | This function calculate the concentration index, according to the notation 39 | used in [Jenkins1988]_ you can calculate the: 40 | 41 | C_x = 2 / x · cov(x, F_x) 42 | if x = g(x) then C_x becomes C_y 43 | 44 | when there are taxes: 45 | 46 | y = g(x) = x - t(x) 47 | 48 | Parameters 49 | ---------- 50 | income : array-like 51 | weights : array-like 52 | data : pandas.DataFrame 53 | sort : bool 54 | If true, will sort the values. 55 | 56 | Returns 57 | ------- 58 | concentration : array-like 59 | 60 | References 61 | ---------- 62 | Jenkins, S. (1988). Calculating income distribution indices 63 | from micro-data. National Tax Journal. http://doi.org/10.2307/41788716 64 | """ 65 | # TODO complete docstring 66 | 67 | # check if DataFrame is passed, if yes then extract variables else make a 68 | # copy 69 | income, weights = utils.extract_values(data, income, weights) 70 | if weights is None: 71 | weights = utils.not_empty_weights(weights, like=income) 72 | 73 | if income.ndim == 0: 74 | income = np.array([income]) 75 | elif income.ndim == 2: 76 | income = np.squeeze(income, axis=1) 77 | 78 | if weights.ndim == 0: 79 | weights = np.array([weights]) 80 | elif weights.ndim == 2: 81 | weights = np.squeeze(weights, axis=1) 82 | 83 | # Small shortcut to avoid warnings below 84 | if income.size <= 1: 85 | return np.nan 86 | 87 | # if sort is true then sort the variables. 88 | if sort: 89 | income, weights = utils._sort_values(income, weights) 90 | 91 | # main calc 92 | f_x = np.atleast_1d(utils.normalize(weights)) 93 | F_x = f_x.cumsum(axis=0) 94 | mu = np.sum(income * f_x) 95 | cov = np.cov(income, F_x, rowvar=False, aweights=f_x)[0, 1] 96 | return 2 * cov / mu 97 | 98 | 99 | def lorenz(income, weights=None, data=None): 100 | """Calculate Lorent's curve. 101 | 102 | In economics, the Lorenz curve is a graphical representation of the 103 | distribution of income or of wealth. It was developed by Max O. Lorenz in 104 | 1905 for representing grouped of the wealth distribution. This function 105 | compute the lorenz curve and returns a DF with two columns of axis x and y. 106 | 107 | Parameters 108 | ---------- 109 | data : pandas.DataFrame 110 | A pandas.DataFrame that contains data. 111 | income : str or 1d-array, optional 112 | Population or wights, if a DataFrame is passed then `income` should be 113 | a name of the column of DataFrame, else can pass a pandas.Series or 114 | array. 115 | weights : str or 1d-array 116 | Income, monetary variable, if a DataFrame is passed then `y`is a name 117 | of the series on this DataFrame, however, you can pass a pd.Series or 118 | np.array. 119 | 120 | Returns 121 | ------- 122 | lorenz : pandas.Dataframe 123 | Lorenz distribution in a Dataframe with two columns, labeled x and y, 124 | that corresponds to plots axis. 125 | 126 | References 127 | ---------- 128 | Lorenz curve. (2017, February 11). In Wikipedia, The Free Encyclopedia. 129 | Retrieved 14:34, May 15, 2017, from 130 | https://en.wikipedia.org/w/index.php?title=Lorenz_curve&oldid=764853675 131 | """ 132 | if data is not None: 133 | income, weights = utils.extract_values(data, income, weights) 134 | if weights is None: 135 | weights = utils.not_empty_weights(weights, like=income) 136 | 137 | total_income = income * weights 138 | idx_sort = np.argsort(income) 139 | 140 | weights = weights[idx_sort].cumsum() / weights.sum() 141 | weights = weights.reshape(len(weights), 1) 142 | 143 | total_income = total_income[idx_sort].cumsum() / total_income.sum() 144 | total_income = total_income.reshape(len(total_income), 1) 145 | 146 | # to pandas 147 | data = np.hstack([weights, total_income]) 148 | columns = ["Equality", "Income"] 149 | index = pd.Index(weights.round(3).squeeze()) 150 | res = pd.DataFrame(data=data, columns=columns, index=index) 151 | res.index.name = "x" 152 | 153 | return res 154 | 155 | 156 | def gini(income, weights=None, data=None, sort=True): 157 | """Calculate Gini's index. 158 | 159 | The Gini coefficient (sometimes expressed as a Gini ratio or a 160 | normalized Gini index) is a measure of statistical dispersion intended to 161 | represent the income or wealth distribution of a nation's residents, and is 162 | the most commonly used measure of grouped. It was developed by Corrado 163 | Gini. 164 | The Gini coefficient measures the grouped among values of a frequency 165 | distribution (for example, levels of income). A Gini coefficient of zero 166 | expresses perfect equality, where all values are the same (for example, 167 | where everyone has the same income). A Gini coefficient of 1 (or 100%) 168 | expresses maximal grouped among values (e.g., for a large number of 169 | people, where only one person has all the income or consumption, and all 170 | others have none, the Gini coefficient will be very nearly one). 171 | 172 | Parameters 173 | ---------- 174 | data : pandas.DataFrame 175 | DataFrame that contains the data. 176 | income : str or np.array, optional 177 | Name of the monetary variable `x` in` df` 178 | weights : str or np.array, optional 179 | Name of the series containing the weights `x` in` df` 180 | sorted : bool, optional 181 | If the DataFrame is previously ordered by the variable `x`, it's must 182 | pass True, but False by default. 183 | 184 | Returns 185 | ------- 186 | gini : float 187 | Gini Index Value. 188 | 189 | Notes 190 | ----- 191 | The calculation is done following (discrete probability distribution): 192 | G = 1 - [∑_i^n f(y_i)·(S_{i-1} + S_i)] 193 | where: 194 | - y_i = Income 195 | - S_i = ∑_{j=1}^i y_i · f(y_i) 196 | 197 | Reference 198 | --------- 199 | - Gini coefficient. (2017, May 8). In Wikipedia, The Free Encyclopedia. 200 | Retrieved 14:30, May 15, 2017, from 201 | https://en.wikipedia.org/w/index.php?title=Gini_coefficient&oldid=779424616 202 | 203 | - Jenkins, S. (1988). Calculating income distribution indices 204 | from micro-data. National Tax Journal. http://doi.org/10.2307/41788716 205 | 206 | TODO 207 | ---- 208 | - Implement statistical deviation calculation, VAR (GINI) 209 | 210 | """ 211 | return concentration(data=data, income=income, weights=weights, sort=sort) 212 | 213 | 214 | def atkinson(income, weights=None, data=None, e=0.5) -> float: 215 | """Calculate atkinson index. 216 | 217 | More precisely labelled a family of income grouped measures, the 218 | theoretical range of Atkinson values is 0 to 1, with 0 being a state of 219 | equal distribution. 220 | 221 | An intuitive interpretation of this index is possible: Atkinson values can 222 | be used to calculate the proportion of total income that would be required 223 | to achieve an equal level of social welfare as at present if incomes were 224 | perfectly distributed. 225 | 226 | For example, an Atkinson index value of 0.20 suggests that we could achieve 227 | the same level of social welfare with only 1 – 0.20 = 80% of income. The 228 | theoretical range of Atkinson values is 0 to 1, with 0 being a state of 229 | equal distribution. 230 | 231 | Parameters 232 | ---------- 233 | income : array or str 234 | If `data` is none `income` must be an 1D-array, when `data` is a 235 | pd.DataFrame, you must pass the name of income variable as string. 236 | weights : array or str, optional 237 | If `data` is none `weights` must be an 1D-array, when `data` is a 238 | pd.DataFrame, you must pass the name of weights variable as string. 239 | e : int, optional 240 | Epsilon parameter interpreted by atkinson index as grouped adversion, 241 | must be between 0 and 1. 242 | data : pd.DataFrame, optional 243 | data is a pd.DataFrame that contains the variables. 244 | 245 | Returns 246 | ------- 247 | atkinson : float 248 | 249 | Reference 250 | --------- 251 | Atkinson index. (2017, March 12). In Wikipedia, The Free Encyclopedia. 252 | Retrieved 14:35, May 15, 2017, from 253 | https://en.wikipedia.org/w/index.php?title=Atkinson_index 254 | 255 | TODO 256 | ---- 257 | - Implement: CALCULATING INCOME DISTRIBUTION INDICES FROM MICRO-DATA 258 | http://www.jstor.org/stable/41788716 259 | - The results has difference with stata, maybe have a bug. 260 | """ 261 | if (income is None) and (data is None): 262 | raise ValueError("Must pass at least one of both `income` or `df`") 263 | 264 | income, weights = utils.extract_values(data, income, weights) 265 | weights = utils.not_empty_weights(weights, income) 266 | 267 | # not-null condition 268 | income, weights = utils.not_null_condition(income, weights) 269 | 270 | # not-empty condition 271 | if len(income) == 0: 272 | return 0 273 | 274 | # auxiliar variables: mean and distribution 275 | mu = mean(variable=income, weights=weights) 276 | f_i = np.atleast_1d(weights / sum(weights)) # density function 277 | 278 | # main calc 279 | if e == 1: 280 | return 1 - np.power(np.e, np.sum(f_i * np.log(income) - np.log(mu))) 281 | elif e >= 0 or e < 1: 282 | return 1 - np.power( 283 | np.sum(f_i * np.power(income / mu, 1 - e)), 1 / (1 - e) 284 | ) 285 | else: 286 | assert (e < 0) or (e > 1), "Not valid e value, 0 ≤ e ≤ 1" 287 | return np.nan 288 | 289 | 290 | def kakwani(tax, income_pre_tax, weights=None, data=None): 291 | """Calculate Kakwani's index. 292 | 293 | The Kakwani (1977) index of tax progressivity is defined as twice the 294 | area between the concentration curves for taxes and pre-tax income, 295 | or equivalently, the concentration index for t(x) minus the Gini index for 296 | x, i.e. 297 | 298 | K = C(t) - G(x) 299 | = (2/t) cov [t(x), F(x)] - (2/x) cov [x, F(x)]. 300 | 301 | Parameters 302 | ---------- 303 | data : pandas.DataFrame 304 | This variable is a DataFrame that contains all data required in 305 | columns. 306 | tax_variable : array-like or str 307 | This variable represent tax payment of person, if pass array-like 308 | then data must be None, else you pass str-name column in `data`. 309 | income_pre_tax : array-like or str 310 | This variable represent income of person, if pass array-like 311 | then data must be None, else you pass str-name column in `data`. 312 | weights : array-like or str 313 | This variable represent weights of each person, if pass array-like 314 | then data must be None, else you pass str-name column in `data`. 315 | 316 | Returns 317 | ------- 318 | kakwani : float 319 | 320 | References 321 | ---------- 322 | Jenkins, S. (1988). Calculating income distribution indices from 323 | micro-data. National Tax Journal. http://doi.org/10.2307/41788716 324 | """ 325 | # main calc 326 | c_t = concentration(data=data, income=tax, weights=weights, sort=True) 327 | g_y = concentration( 328 | data=data, income=income_pre_tax, weights=weights, sort=True 329 | ) 330 | return c_t - g_y 331 | 332 | 333 | def reynolds_smolensky( 334 | income_pre_tax, income_post_tax, weights=None, data=None 335 | ): 336 | """Calculate Reynolds-Smolensky's index. 337 | 338 | The Reynolds-Smolensky (1977) index of the redistributive effect of 339 | taxes, which can also be interpreted as an index of progressivity 340 | (Lambert 1985), is defined as: 341 | 342 | L = Gx - Gy 343 | = [2/x]cov[x,F(x)] - [2/ybar] cov [y, F(y)]. 344 | 345 | Parameters 346 | ---------- 347 | data : pandas.DataFrame 348 | This variable is a DataFrame that contains all data required in it's 349 | columns. 350 | income_pre_tax : array-like or str 351 | This variable represent tax payment of person, if pass array-like 352 | then data must be None, else you pass str-name column in `data`. 353 | income_post_tax : array-like or str 354 | This variable represent income of person, if pass array-like 355 | then data must be None, else you pass str-name column in `data`. 356 | weights : array-like or str 357 | This variable represent weights of each person, if pass array-like 358 | then data must be None, else you pass str-name column in `data`. 359 | 360 | Returns 361 | ------- 362 | reynolds_smolensky : float 363 | 364 | References 365 | ---------- 366 | Jenkins, S. (1988). Calculating income distribution indices from 367 | micro-data. National Tax Journal. http://doi.org/10.2307/41788716 368 | """ 369 | g_y = concentration(data=data, income=income_post_tax, weights=weights) 370 | g_x = concentration(data=data, income=income_pre_tax, weights=weights) 371 | return g_x - g_y 372 | 373 | 374 | def theil(income, weights=None, data=None): 375 | """Calculate Theil's index. 376 | 377 | The Theil index is a statistic primarily used to measure economic 378 | grouped and other economic phenomena. It is a special case of the 379 | generalized entropy index. It can be viewed as a measure of redundancy, 380 | lack of diversity, isolation, segregation, grouped, non-randomness, and 381 | compressibility. It was proposed by econometrician Henri Theil. 382 | 383 | Parameters 384 | ---------- 385 | data : pandas.DataFrame 386 | This variable is a DataFrame that contains all data required in it's 387 | columns. 388 | income : array-like or str 389 | This variable represent tax payment of person, if pass array-like 390 | then data must be None, else you pass str-name column in `data`. 391 | weights : array-like or str 392 | This variable represent weights of each person, if pass array-like 393 | then data must be None, else you pass str-name column in `data`. 394 | 395 | Returns 396 | ------- 397 | theil : float 398 | 399 | References 400 | ---------- 401 | Theil index. (2016, December 17). In Wikipedia, The Free Encyclopedia. 402 | Retrieved 14:17, May 15, 2017, from 403 | https://en.wikipedia.org/w/index.php?title=Theil_index&oldid=755407818 404 | 405 | """ 406 | if data is not None: 407 | income, weights = utils.extract_values(data, income, weights) 408 | else: 409 | income = income.copy() 410 | 411 | if weights is None: 412 | weights = utils.not_empty_weights(weights, like=income) 413 | else: 414 | weights = weights.copy() 415 | income, weights = utils.not_null_condition(income, weights) 416 | 417 | # variables needed 418 | mu = mean(variable=income, weights=weights) 419 | f_i = utils.normalize(weights) 420 | return np.sum((f_i * income / mu) * np.log(income / mu)) 421 | 422 | 423 | def avg_tax_rate(total_tax, total_base, weights=None, data=None): 424 | """Calculate average tax rate. 425 | 426 | This function compute the average tax rate given a base income and a total 427 | tax. 428 | 429 | Parameters 430 | ---------- 431 | total_base : str or numpy.array 432 | total_tax : str or numpy.array 433 | data : pd.DataFrame 434 | 435 | Returns 436 | ------- 437 | avg_tax_rate : float or pd.Series 438 | Is the ratio between mean the tax income and base of income. 439 | 440 | Reference 441 | --------- 442 | Panel de declarantes de IRPF 1999-2007: Metodología, estructura y 443 | variables. (2011). 444 | Panel de declarantes de IRPF 1999-2007: Metodología, estructura y 445 | variables. Documentos. 446 | """ 447 | if ( 448 | isinstance(total_base, (np.ndarray)) 449 | or not isinstance(total_base, (list)) 450 | and not isinstance(total_base, (str)) 451 | ): 452 | n_cols = total_base.shape[1] 453 | elif isinstance(total_base, list): 454 | n_cols = len(total_base) 455 | else: 456 | n_cols = 1 457 | numerator = mean(data=data, variable=total_tax, weights=weights) 458 | denominator = mean(data=data, variable=total_base, weights=weights) 459 | # main calc 460 | res = numerator / denominator 461 | 462 | if data is not None: 463 | base_name = total_base 464 | tax_name = total_tax 465 | else: 466 | base_name = ["base"] * n_cols 467 | tax_name = [f"tax_{i}" for i in range(n_cols)] 468 | 469 | names = ["_".join([t, b]) for t, b in zip(tax_name, base_name, strict=False)] 470 | res = pd.Series(res, index=names) 471 | return res 472 | 473 | 474 | def top_rest(income, weights=None, data=None, top_percentage=10.0): 475 | """Calculate the 10:90 Ratio. 476 | 477 | Calculates the quotient between the number of contributions from the top 478 | 10% of contributors divided by the number contributions made by the other 479 | 90%. The ratio is 1 if the total contributions by the top contributors are 480 | equal to the cotnributions made by the rest; less than zero if the top 10% 481 | contributes less than the rest; and greater that 1 if the top 10% 482 | contributes more than the other ninety percent. 483 | 484 | Parameters 485 | ---------- 486 | income : array-like or str 487 | This variable represent tax payment of person, if pass array-like 488 | then data must be None, else you pass str-name column in `data`. 489 | weights : array-like or str 490 | This variable represent weights of each person, if pass array-like 491 | then data must be None, else you pass str-name column in `data`. 492 | All-ones by default 493 | data : pandas.DataFrame 494 | This variable is a DataFrame that contains all data required in it's 495 | columns. 496 | top_percentage : float 497 | The richest x percent to consider. (10 percent by default) 498 | It must be a number between 0 and 100 499 | 500 | Returns 501 | ------- 502 | ratio : float 503 | 504 | References 505 | ---------- 506 | Participation Inequality in Wikis: A Temporal Analysis Using WikiChron. 507 | Serrano, Abel & Arroyo, Javier & Hassan, Samer. (2018). 508 | DOI: 10.1145/3233391.3233536. 509 | """ 510 | if data is not None: 511 | income, weights = utils.extract_values(data, income, weights) 512 | else: 513 | income = income.copy() 514 | weights = np.ones_like(income) if weights is None else weights.copy() 515 | 516 | # Small shortcut to avoid divide by zero below 517 | if income.size <= 1: 518 | return np.nan 519 | 520 | income, weights = utils._sort_values(income, weights) 521 | 522 | # variables needed 523 | weights = utils.normalize(weights) 524 | cumw = np.cumsum(weights) 525 | ftosearch = 1 - top_percentage / 100 526 | k = np.searchsorted(cumw, ftosearch, side='right') 527 | f_i = np.atleast_1d(income*weights) 528 | 529 | t = np.sum(f_i[k:]) 530 | r = np.sum(f_i[:k]) 531 | 532 | # Correction 533 | if k > 0: 534 | error = (ftosearch - cumw[k-1]) * income[k] 535 | t -= error 536 | r += error 537 | 538 | return t / r 539 | 540 | 541 | def hoover(income, weights=None, data=None): 542 | """Calculate Hoover index. 543 | 544 | The Hoover index, also known as the Robin Hood index or the Schutz index, 545 | is a measure of income metrics. It is equal to the portion of the total 546 | community income that would have to be redistributed (taken from the richer 547 | half of the population and given to the poorer half) for there to be income 548 | uniformity. 549 | 550 | Formula: 551 | 552 | H = 1/2 sum_i( |xi - mu| ) / sum_i(xi) 553 | 554 | Parameters 555 | ---------- 556 | income : array-like or str 557 | This variable represent tax payment of person, if pass array-like 558 | then data must be None, else you pass str-name column in `data`. 559 | weights : array-like or str 560 | This variable represent weights of each person, if pass array-like 561 | then data must be None, else you pass str-name column in `data`. 562 | data : pandas.DataFrame 563 | This variable is a DataFrame that contains all data required in it's 564 | columns. 565 | 566 | Returns 567 | ------- 568 | hoover : float 569 | 570 | References 571 | ---------- 572 | Hoover index : https://en.wikipedia.org/wiki/Hoover_index 573 | """ 574 | if data is not None: 575 | income, weights = utils.extract_values(data, income, weights) 576 | else: 577 | income = income.copy() 578 | if weights is None: 579 | weights = utils.not_empty_weights(weights, like=income) 580 | else: 581 | weights = weights.copy() 582 | 583 | income, weights = utils.not_null_condition(income, weights) 584 | 585 | # variables needed 586 | mu = mean(variable=income, weights=weights) 587 | f_i = utils.normalize(weights) 588 | xi = f_i * income 589 | 590 | # main calc 591 | h = np.sum(abs(xi - mu)) * 0.5 / sum(xi) 592 | 593 | return h 594 | -------------------------------------------------------------------------------- /tests/test_moments.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 54, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "import scipy.stats as stats\n", 12 | "\n", 13 | "import ineqpy" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "# Random Variable" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 42, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "x = np.random.randn(10)" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 43, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "w = abs(np.random.randn(10))\n", 39 | "w = w / w.sum()" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 44, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "data": { 49 | "text/plain": [ 50 | "0.079282307081652598" 51 | ] 52 | }, 53 | "execution_count": 44, 54 | "metadata": {}, 55 | "output_type": "execute_result" 56 | } 57 | ], 58 | "source": [ 59 | "np.mean(x)" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 45, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "data": { 69 | "text/plain": [ 70 | "0.079282307081652598" 71 | ] 72 | }, 73 | "execution_count": 45, 74 | "metadata": {}, 75 | "output_type": "execute_result" 76 | } 77 | ], 78 | "source": [ 79 | "ineqpy.mean(variable=x)" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 46, 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "data": { 89 | "text/plain": [ 90 | "0.26771028632127503" 91 | ] 92 | }, 93 | "execution_count": 46, 94 | "metadata": {}, 95 | "output_type": "execute_result" 96 | } 97 | ], 98 | "source": [ 99 | "np.var(x)" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 47, 105 | "metadata": {}, 106 | "outputs": [ 107 | { 108 | "data": { 109 | "text/plain": [ 110 | "0.26771028632127503" 111 | ] 112 | }, 113 | "execution_count": 47, 114 | "metadata": {}, 115 | "output_type": "execute_result" 116 | } 117 | ], 118 | "source": [ 119 | "ineqpy.var(variable=x)" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 48, 125 | "metadata": {}, 126 | "outputs": [ 127 | { 128 | "data": { 129 | "text/plain": [ 130 | "-0.08683494039388037" 131 | ] 132 | }, 133 | "execution_count": 48, 134 | "metadata": {}, 135 | "output_type": "execute_result" 136 | } 137 | ], 138 | "source": [ 139 | "stats.skew(x)" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 49, 145 | "metadata": {}, 146 | "outputs": [ 147 | { 148 | "data": { 149 | "text/plain": [ 150 | "-0.086834940393880372" 151 | ] 152 | }, 153 | "execution_count": 49, 154 | "metadata": {}, 155 | "output_type": "execute_result" 156 | } 157 | ], 158 | "source": [ 159 | "ineqpy.skew(variable=x)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 52, 165 | "metadata": {}, 166 | "outputs": [ 167 | { 168 | "data": { 169 | "text/plain": [ 170 | "2.1915828922522693" 171 | ] 172 | }, 173 | "execution_count": 52, 174 | "metadata": {}, 175 | "output_type": "execute_result" 176 | } 177 | ], 178 | "source": [ 179 | "stats.kurtosis(x) + 3" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 51, 185 | "metadata": {}, 186 | "outputs": [ 187 | { 188 | "data": { 189 | "text/plain": [ 190 | "2.1915828922522693" 191 | ] 192 | }, 193 | "execution_count": 51, 194 | "metadata": {}, 195 | "output_type": "execute_result" 196 | } 197 | ], 198 | "source": [ 199 | "ineqpy.kurt(variable=x)" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": {}, 205 | "source": [ 206 | "# Repeated values" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 4, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "x = np.array([1,2,2,3,3,3,4,4,4,4,5,5,5,5,5,6,6,6,6,7,7,7,8,8,9])" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 5, 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [ 224 | "xi, fi = np.unique(x, return_counts=True)" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 6, 230 | "metadata": {}, 231 | "outputs": [ 232 | { 233 | "data": { 234 | "text/plain": [ 235 | "array([1, 2, 3, 4, 5, 6, 7, 8, 9])" 236 | ] 237 | }, 238 | "execution_count": 6, 239 | "metadata": {}, 240 | "output_type": "execute_result" 241 | } 242 | ], 243 | "source": [ 244 | "xi # values" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": 7, 250 | "metadata": {}, 251 | "outputs": [ 252 | { 253 | "data": { 254 | "text/plain": [ 255 | "array([1, 2, 3, 4, 5, 4, 3, 2, 1])" 256 | ] 257 | }, 258 | "execution_count": 7, 259 | "metadata": {}, 260 | "output_type": "execute_result" 261 | } 262 | ], 263 | "source": [ 264 | "fi # absolute frequency" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": 18, 270 | "metadata": {}, 271 | "outputs": [], 272 | "source": [ 273 | "data = pd.DataFrame(np.c_[x, np.ones(len(x))], columns=list('xf'))" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": 19, 279 | "metadata": {}, 280 | "outputs": [ 281 | { 282 | "data": { 283 | "text/html": [ 284 | "
\n", 285 | "\n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | "
xf
01.01.0
12.01.0
22.01.0
33.01.0
43.01.0
53.01.0
64.01.0
74.01.0
84.01.0
94.01.0
105.01.0
115.01.0
125.01.0
135.01.0
145.01.0
156.01.0
166.01.0
176.01.0
186.01.0
197.01.0
207.01.0
217.01.0
228.01.0
238.01.0
249.01.0
\n", 421 | "
" 422 | ], 423 | "text/plain": [ 424 | " x f\n", 425 | "0 1.0 1.0\n", 426 | "1 2.0 1.0\n", 427 | "2 2.0 1.0\n", 428 | "3 3.0 1.0\n", 429 | "4 3.0 1.0\n", 430 | "5 3.0 1.0\n", 431 | "6 4.0 1.0\n", 432 | "7 4.0 1.0\n", 433 | "8 4.0 1.0\n", 434 | "9 4.0 1.0\n", 435 | "10 5.0 1.0\n", 436 | "11 5.0 1.0\n", 437 | "12 5.0 1.0\n", 438 | "13 5.0 1.0\n", 439 | "14 5.0 1.0\n", 440 | "15 6.0 1.0\n", 441 | "16 6.0 1.0\n", 442 | "17 6.0 1.0\n", 443 | "18 6.0 1.0\n", 444 | "19 7.0 1.0\n", 445 | "20 7.0 1.0\n", 446 | "21 7.0 1.0\n", 447 | "22 8.0 1.0\n", 448 | "23 8.0 1.0\n", 449 | "24 9.0 1.0" 450 | ] 451 | }, 452 | "execution_count": 19, 453 | "metadata": {}, 454 | "output_type": "execute_result" 455 | } 456 | ], 457 | "source": [ 458 | "data" 459 | ] 460 | }, 461 | { 462 | "cell_type": "code", 463 | "execution_count": 20, 464 | "metadata": {}, 465 | "outputs": [], 466 | "source": [ 467 | "data_weighted = pd.DataFrame(np.c_[xi,fi], columns=list('xf'))" 468 | ] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": 21, 473 | "metadata": {}, 474 | "outputs": [ 475 | { 476 | "data": { 477 | "text/html": [ 478 | "
\n", 479 | "\n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | "
xf
011
122
233
344
455
564
673
782
891
\n", 535 | "
" 536 | ], 537 | "text/plain": [ 538 | " x f\n", 539 | "0 1 1\n", 540 | "1 2 2\n", 541 | "2 3 3\n", 542 | "3 4 4\n", 543 | "4 5 5\n", 544 | "5 6 4\n", 545 | "6 7 3\n", 546 | "7 8 2\n", 547 | "8 9 1" 548 | ] 549 | }, 550 | "execution_count": 21, 551 | "metadata": {}, 552 | "output_type": "execute_result" 553 | } 554 | ], 555 | "source": [ 556 | "data_weighted" 557 | ] 558 | }, 559 | { 560 | "cell_type": "markdown", 561 | "metadata": {}, 562 | "source": [ 563 | "### Mean" 564 | ] 565 | }, 566 | { 567 | "cell_type": "code", 568 | "execution_count": 22, 569 | "metadata": {}, 570 | "outputs": [ 571 | { 572 | "data": { 573 | "text/plain": [ 574 | "5.0" 575 | ] 576 | }, 577 | "execution_count": 22, 578 | "metadata": {}, 579 | "output_type": "execute_result" 580 | } 581 | ], 582 | "source": [ 583 | "np.mean(x)" 584 | ] 585 | }, 586 | { 587 | "cell_type": "code", 588 | "execution_count": 23, 589 | "metadata": {}, 590 | "outputs": [ 591 | { 592 | "data": { 593 | "text/plain": [ 594 | "x 5.0\n", 595 | "f 1.0\n", 596 | "dtype: float64" 597 | ] 598 | }, 599 | "execution_count": 23, 600 | "metadata": {}, 601 | "output_type": "execute_result" 602 | } 603 | ], 604 | "source": [ 605 | "data.mean()" 606 | ] 607 | }, 608 | { 609 | "cell_type": "code", 610 | "execution_count": 24, 611 | "metadata": {}, 612 | "outputs": [ 613 | { 614 | "data": { 615 | "text/plain": [ 616 | "5.0" 617 | ] 618 | }, 619 | "execution_count": 24, 620 | "metadata": {}, 621 | "output_type": "execute_result" 622 | } 623 | ], 624 | "source": [ 625 | "ineqpy.mean(variable=x)" 626 | ] 627 | }, 628 | { 629 | "cell_type": "code", 630 | "execution_count": 25, 631 | "metadata": {}, 632 | "outputs": [ 633 | { 634 | "data": { 635 | "text/plain": [ 636 | "5.0" 637 | ] 638 | }, 639 | "execution_count": 25, 640 | "metadata": {}, 641 | "output_type": "execute_result" 642 | } 643 | ], 644 | "source": [ 645 | "ineqpy.mean(data, 'x')" 646 | ] 647 | }, 648 | { 649 | "cell_type": "code", 650 | "execution_count": 26, 651 | "metadata": {}, 652 | "outputs": [ 653 | { 654 | "data": { 655 | "text/plain": [ 656 | "5.0" 657 | ] 658 | }, 659 | "execution_count": 26, 660 | "metadata": {}, 661 | "output_type": "execute_result" 662 | } 663 | ], 664 | "source": [ 665 | "ineqpy.mean(variable=xi, weights=fi)" 666 | ] 667 | }, 668 | { 669 | "cell_type": "code", 670 | "execution_count": 27, 671 | "metadata": {}, 672 | "outputs": [ 673 | { 674 | "data": { 675 | "text/plain": [ 676 | "5.0" 677 | ] 678 | }, 679 | "execution_count": 27, 680 | "metadata": {}, 681 | "output_type": "execute_result" 682 | } 683 | ], 684 | "source": [ 685 | "ineqpy.mean(data, 'x', 'f')" 686 | ] 687 | }, 688 | { 689 | "cell_type": "markdown", 690 | "metadata": {}, 691 | "source": [ 692 | "### Variance" 693 | ] 694 | }, 695 | { 696 | "cell_type": "code", 697 | "execution_count": 28, 698 | "metadata": {}, 699 | "outputs": [ 700 | { 701 | "data": { 702 | "text/plain": [ 703 | "4.166666666666667" 704 | ] 705 | }, 706 | "execution_count": 28, 707 | "metadata": {}, 708 | "output_type": "execute_result" 709 | } 710 | ], 711 | "source": [ 712 | "np.var(x, ddof=1) # numpy (ddof=1)" 713 | ] 714 | }, 715 | { 716 | "cell_type": "code", 717 | "execution_count": 29, 718 | "metadata": {}, 719 | "outputs": [ 720 | { 721 | "data": { 722 | "text/plain": [ 723 | "0.40000000000000002" 724 | ] 725 | }, 726 | "execution_count": 29, 727 | "metadata": {}, 728 | "output_type": "execute_result" 729 | } 730 | ], 731 | "source": [ 732 | "stats.variation(x) # scipy (ddof=0)" 733 | ] 734 | }, 735 | { 736 | "cell_type": "code", 737 | "execution_count": 30, 738 | "metadata": {}, 739 | "outputs": [ 740 | { 741 | "data": { 742 | "text/plain": [ 743 | "x 4.166667\n", 744 | "f 0.000000\n", 745 | "dtype: float64" 746 | ] 747 | }, 748 | "execution_count": 30, 749 | "metadata": {}, 750 | "output_type": "execute_result" 751 | } 752 | ], 753 | "source": [ 754 | "data.var() # pandas (ddof=1)" 755 | ] 756 | }, 757 | { 758 | "cell_type": "code", 759 | "execution_count": 31, 760 | "metadata": {}, 761 | "outputs": [ 762 | { 763 | "data": { 764 | "text/plain": [ 765 | "4.0" 766 | ] 767 | }, 768 | "execution_count": 31, 769 | "metadata": {}, 770 | "output_type": "execute_result" 771 | } 772 | ], 773 | "source": [ 774 | "ineqpy.var(variable=x)" 775 | ] 776 | }, 777 | { 778 | "cell_type": "code", 779 | "execution_count": 32, 780 | "metadata": {}, 781 | "outputs": [ 782 | { 783 | "data": { 784 | "text/plain": [ 785 | "4.0" 786 | ] 787 | }, 788 | "execution_count": 32, 789 | "metadata": {}, 790 | "output_type": "execute_result" 791 | } 792 | ], 793 | "source": [ 794 | "ineqpy.var(variable=xi, weights=fi)" 795 | ] 796 | }, 797 | { 798 | "cell_type": "markdown", 799 | "metadata": {}, 800 | "source": [ 801 | "### Skewness" 802 | ] 803 | }, 804 | { 805 | "cell_type": "code", 806 | "execution_count": 33, 807 | "metadata": {}, 808 | "outputs": [ 809 | { 810 | "data": { 811 | "text/plain": [ 812 | "0.0" 813 | ] 814 | }, 815 | "execution_count": 33, 816 | "metadata": {}, 817 | "output_type": "execute_result" 818 | } 819 | ], 820 | "source": [ 821 | "stats.skew(x)" 822 | ] 823 | }, 824 | { 825 | "cell_type": "code", 826 | "execution_count": 41, 827 | "metadata": {}, 828 | "outputs": [ 829 | { 830 | "data": { 831 | "text/plain": [ 832 | "x 0.0\n", 833 | "f 0.0\n", 834 | "dtype: float64" 835 | ] 836 | }, 837 | "execution_count": 41, 838 | "metadata": {}, 839 | "output_type": "execute_result" 840 | } 841 | ], 842 | "source": [ 843 | "data.skew()" 844 | ] 845 | }, 846 | { 847 | "cell_type": "code", 848 | "execution_count": 34, 849 | "metadata": {}, 850 | "outputs": [ 851 | { 852 | "data": { 853 | "text/plain": [ 854 | "0.0" 855 | ] 856 | }, 857 | "execution_count": 34, 858 | "metadata": {}, 859 | "output_type": "execute_result" 860 | } 861 | ], 862 | "source": [ 863 | "ineqpy.skew(variable=x)" 864 | ] 865 | }, 866 | { 867 | "cell_type": "code", 868 | "execution_count": 35, 869 | "metadata": {}, 870 | "outputs": [ 871 | { 872 | "data": { 873 | "text/plain": [ 874 | "0.0" 875 | ] 876 | }, 877 | "execution_count": 35, 878 | "metadata": {}, 879 | "output_type": "execute_result" 880 | } 881 | ], 882 | "source": [ 883 | "ineqpy.skew(variable=xi, weights=fi)" 884 | ] 885 | }, 886 | { 887 | "cell_type": "markdown", 888 | "metadata": {}, 889 | "source": [ 890 | "### Kurtosis" 891 | ] 892 | }, 893 | { 894 | "cell_type": "code", 895 | "execution_count": 36, 896 | "metadata": {}, 897 | "outputs": [ 898 | { 899 | "data": { 900 | "text/plain": [ 901 | "2.35" 902 | ] 903 | }, 904 | "execution_count": 36, 905 | "metadata": {}, 906 | "output_type": "execute_result" 907 | } 908 | ], 909 | "source": [ 910 | "stats.kurtosis(x) + 3" 911 | ] 912 | }, 913 | { 914 | "cell_type": "code", 915 | "execution_count": 40, 916 | "metadata": {}, 917 | "outputs": [ 918 | { 919 | "data": { 920 | "text/plain": [ 921 | "x 2.483004\n", 922 | "f 3.000000\n", 923 | "dtype: float64" 924 | ] 925 | }, 926 | "execution_count": 40, 927 | "metadata": {}, 928 | "output_type": "execute_result" 929 | } 930 | ], 931 | "source": [ 932 | "data.kurt()+3" 933 | ] 934 | }, 935 | { 936 | "cell_type": "code", 937 | "execution_count": 37, 938 | "metadata": {}, 939 | "outputs": [ 940 | { 941 | "data": { 942 | "text/plain": [ 943 | "2.3500000000000001" 944 | ] 945 | }, 946 | "execution_count": 37, 947 | "metadata": {}, 948 | "output_type": "execute_result" 949 | } 950 | ], 951 | "source": [ 952 | "ineqpy.kurt(variable=x)" 953 | ] 954 | }, 955 | { 956 | "cell_type": "code", 957 | "execution_count": 38, 958 | "metadata": {}, 959 | "outputs": [ 960 | { 961 | "data": { 962 | "text/plain": [ 963 | "2.3500000000000001" 964 | ] 965 | }, 966 | "execution_count": 38, 967 | "metadata": {}, 968 | "output_type": "execute_result" 969 | } 970 | ], 971 | "source": [ 972 | "ineqpy.kurt(variable=xi, weights=fi)" 973 | ] 974 | }, 975 | { 976 | "cell_type": "code", 977 | "execution_count": null, 978 | "metadata": {}, 979 | "outputs": [], 980 | "source": [] 981 | } 982 | ], 983 | "metadata": { 984 | "kernelspec": { 985 | "display_name": "Py3 (dev)", 986 | "language": "python", 987 | "name": "dev" 988 | }, 989 | "language_info": { 990 | "codemirror_mode": { 991 | "name": "ipython", 992 | "version": 3 993 | }, 994 | "file_extension": ".py", 995 | "mimetype": "text/x-python", 996 | "name": "python", 997 | "nbconvert_exporter": "python", 998 | "pygments_lexer": "ipython3", 999 | "version": "3.5.2" 1000 | } 1001 | }, 1002 | "nbformat": 4, 1003 | "nbformat_minor": 2 1004 | } 1005 | -------------------------------------------------------------------------------- /examples/quick_start.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# load packages\n", 12 | "import pandas as pd\n", 13 | "import numpy as np\n", 14 | "import ineqpy as inq\n", 15 | "%matplotlib inline" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "# First-steps" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "metadata": { 29 | "collapsed": true 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "# load data\n", 34 | "data = pd.read_csv('eusilc.csv', index_col=0).dropna()\n", 35 | "svy = inq.api.Survey(data, weights='rb050')" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "metadata": {}, 42 | "outputs": [ 43 | { 44 | "data": { 45 | "text/plain": [ 46 | "0.26516133165507139" 47 | ] 48 | }, 49 | "execution_count": 3, 50 | "metadata": {}, 51 | "output_type": "execute_result" 52 | } 53 | ], 54 | "source": [ 55 | "svy.gini('eqincome')" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 4, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "text/plain": [ 66 | "0.060002757905598392" 67 | ] 68 | }, 69 | "execution_count": 4, 70 | "metadata": {}, 71 | "output_type": "execute_result" 72 | } 73 | ], 74 | "source": [ 75 | "svy.atkinson('eqincome')" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 5, 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "data": { 85 | "text/plain": [ 86 | "0.12064816023130914" 87 | ] 88 | }, 89 | "execution_count": 5, 90 | "metadata": {}, 91 | "output_type": "execute_result" 92 | } 93 | ], 94 | "source": [ 95 | "svy.theil('eqincome')" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 6, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "data": { 105 | "text/plain": [ 106 | "20431.292738646902" 107 | ] 108 | }, 109 | "execution_count": 6, 110 | "metadata": {}, 111 | "output_type": "execute_result" 112 | } 113 | ], 114 | "source": [ 115 | "svy.mean('eqincome')" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 7, 121 | "metadata": {}, 122 | "outputs": [ 123 | { 124 | "data": { 125 | "text/plain": [ 126 | "18658.461904761898" 127 | ] 128 | }, 129 | "execution_count": 7, 130 | "metadata": {}, 131 | "output_type": "execute_result" 132 | } 133 | ], 134 | "source": [ 135 | "svy.percentile('eqincome')" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 8, 141 | "metadata": {}, 142 | "outputs": [ 143 | { 144 | "data": { 145 | "text/plain": [ 146 | "13.28551976978007" 147 | ] 148 | }, 149 | "execution_count": 8, 150 | "metadata": {}, 151 | "output_type": "execute_result" 152 | } 153 | ], 154 | "source": [ 155 | "svy.kurt('eqincome')" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 9, 161 | "metadata": {}, 162 | "outputs": [ 163 | { 164 | "data": { 165 | "text/plain": [ 166 | "2.1150515104443115" 167 | ] 168 | }, 169 | "execution_count": 9, 170 | "metadata": {}, 171 | "output_type": "execute_result" 172 | } 173 | ], 174 | "source": [ 175 | "svy.skew('eqincome')" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 10, 181 | "metadata": {}, 182 | "outputs": [ 183 | { 184 | "data": { 185 | "text/plain": [ 186 | "" 187 | ] 188 | }, 189 | "execution_count": 10, 190 | "metadata": {}, 191 | "output_type": "execute_result" 192 | }, 193 | { 194 | "data": { 195 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUQAAAFACAYAAADEewXQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XdclXX/x/HXV4aIIg7cOHDvxXClWWnZMM2RMwU1NVO7\nW3c27rL1a97trCwV9y41c+WqXHEQ3BMVFXHgQhSZ5/v746JucnGAc7jOgc/z8eARcK5zXZ9L4N01\nvtf3o7TWCCGEgGJmFyCEEM5CAlEIIbJIIAohRBYJRCGEyCKBKIQQWSQQhRAiiwSiEEJkkUAUQogs\nEohCCJHF3awN+/n56Vq1apm1eSFEIbV9+/bzWusKeXmvaYFYq1YtIiMjzdq8EKKQUkodz+t75ZRZ\nCCGySCAKIUQWCUQhhMhi2jXEW0lPTycuLo6UlBSzS3EJXl5e+Pv74+HhYXYpQhQKThWIcXFx+Pj4\nUKtWLZRSZpfj1LTWXLhwgbi4OAICAswuR4hCwalOmVNSUihfvryEoQ2UUpQvX16OpoWwI6cKREDC\nMBfk30oI+8oxEJVSU5VS55RSe27zulJKfaGUilFK7VJKtbZ/mUII4Xi2HCGGA93u8PqDQL2sj5HA\nN/kvyzxubm60bNny74/333/f7tuoVasW58+fB6B9+/YAxMbGMmfOHLtvSwhhuxxvqmitf1dK1brD\nIj2AGdroVrVNKVVGKVVFa33aTjUWqBIlSrBjx44C296WLVuA/wXiwIEDC2zbQhQ2sduW5ev99riG\nWA04me3ruKzv3UQpNVIpFamUikxISLDDpgvOqlWraNiwIXfddRfjx4/nkUceAWDixIl8/PHHfy/X\ntGlTYmNjAejZsyeBgYE0adKEyZMn33K9pUqVAmDChAn88ccftGzZkk8//ZSOHTv+I5g7dOjArl27\nHLR3Qri+3w4lcGrlR/lahz2G3dzqyv4te5tqrScDkwGCgoLu2P/0zZ/3si/+Sv6ry6Zx1dK80b3J\nHZe5fv06LVu2/Pvrl19+mR49evDkk0+yfv166tatS79+/Wza3tSpUylXrhzXr18nODiY3r17U758\n+Vsu+/777/Pxxx+zfPlyAMqVK0d4eDifffYZhw4dIjU1lebNm9u4p0IULQsjT/Laj9FEex7O13rs\ncYQYB1TP9rU/EG+H9Zrir1Pmvz769evHgQMHCAgIoF69eiilGDx4sE3r+uKLL2jRogVt27bl5MmT\nHD5s+w+rb9++LF++nPT0dKZOnUpoaGge90iIwktrzRfrDvPiol0M8j+PN9fztT57HCEuA8YqpeYB\nbYBEe1w/zOlIrqDdboiLu7s7Vqv176//Ghe4ceNG1q5dy9atW/H29qZz5865GjPo7e1N165dWbp0\nKQsWLJCZgYS4QUamlf8s3cPciJP0al2NV/x2w7n8DUWzZdjNXGAr0EApFaeUGq6UGq2UGp21yArg\nKBADfA+MyVdFTqhhw4YcO3aMI0eOADB37ty/X6tVqxZRUVEAREVFcezYMQASExMpW7Ys3t7eHDhw\ngG3btt1xGz4+PiQlJf3jeyNGjGD8+PEEBwdTrlw5e+6SEC4tOS2DkTO3MzfiJE/fU4f/9m2Be8wq\nqNoqX+u15S7zgBxe18DT+arCidx4DbFbt268//77TJ48mYcffhg/Pz/uuusu9uwxhmX27t2bGTNm\n0LJlS4KDg6lfv/7f7/v2229p3rw5DRo0oG3btnfcbvPmzXF3d6dFixaEhoby7LPPEhgYSOnSpQkL\nC3PcDgvhYs5fTWV4uIXdpxJ5p2dTBretCZdiIT4aur4FbMzzup3qWWZnkJmZecvvd+vWjQMHDgDG\n6fBfgViiRAnWrFlzy/esXLnylt//6y40wNWrVwHw8PBg3bp1/1guPj4eq9XK/fffn6t9EKKwij1/\njaHTIjh7JYXvngiia+NKxgt7lxj/bdwD+Fee1+90j+4Jw4wZM2jTpg3vvvsuxYrJj0mI6BOX6PXN\nFpJSMpjzZNv/hSHA3p+gamsoWytf25AjxDzo3LkznTt3dug2hgwZwpAhQxy6DSFcxdp9Zxk7N4qK\nPl5MHxZCgF/J/70YvwNO74Bu+X+qTAJRCOHUZv95nP8s2UPTar5MGRpMBZ/i/1wgcgq4l4AWd7zd\nYRMJRCGEU9Ja8/Gag3y94Qj3NKjA14Na4+15Q2Rdvwy7FkLzvlCiTL63KYEohHA6aRlWJvy4ix+j\nTtE/uDrv9GyKu9strqXvnAcZ1yFouF22K4EohHAqSSnpjJkdxR+Hz/Nc1/qMu7furR+MyMyAP78B\n/xCo2vLm1/NAAvEGpUqV+nsojBCiYJ29kkLYNAsHzybxYZ/mPB5U/fYL71tijD+8/127bV8CUQjh\nFGLOJTF0qoVLyWlMDQ3m7voVbr+w1rDpM/CrDw0eslsNMsDtNjZu3Ejnzp3p06cPDRs2ZNCgQRgP\n5YDFYqF9+/a0aNGCkJAQkpKSSElJISwsjGbNmtGqVSs2bNgAQHh4OD179qR79+4EBATw1Vdf8ckn\nn9CqVSvatm3LxYsXAThy5AjdunUjMDCQjh07/j0IXIiiwBJ7kd7fbCU1w8qCUe3uHIYAMWvh7G7o\n8C+w4zhd5z1CXDkBzuy27zorN4MHbR+rFB0dzd69e6latSodOnRg8+bNhISE0K9fP+bPn09wcDBX\nrlyhRIkSfP755wDs3r2bAwcOcP/993Po0CEA9uzZQ3R0NCkpKdStW5cPPviA6Ohonn32WWbMmMG/\n/vUvRo4cybfffku9evX4888/GTNmDOvXr7fv/gvhhFbuPs0z83fgX7YE08NCqF7OO+c3bfoUSleD\nZn3tWovzBqITCAkJwd/fH4CWLVsSGxuLr68vVapUITg4GIDSpUsDsGnTJsaNGwcYk0HUrFnz70C8\n55578PHxwcfHB19fX7p37w5As2bN2LVrF1evXmXLli307fu/H25qamqB7acQZpm66Rhv/7KP1jXK\n8sOQIMqW9Mz5TbGb4fhmeOA9cLdh+Vxw3kDMxZGcoxQv/r8BoG5ubmRkZKC1vuUdr79Op3NaT7Fi\nxf7+ulixYmRkZGC1WilTpkyBti4QwkxWq+a9lfv5/o9jPNCkEp/3b4WXh1vOb9Qa1r8DpSpDYKjd\n65JriLnUsGFD4uPjsVgsACQlJZGRkUGnTp2YPXs2AIcOHeLEiRM0aNDApnWWLl2agIAAFi5cCBjh\nunPnTsfsgBAmS83I5Jn5O/j+j2MMbVeTSYMCbQtDgKMb4MQW6PQCeNpwap1LEoi55Onpyfz58xk3\nbhwtWrSga9eupKSkMGbMGDIzM2nWrBn9+vUjPDz8H0eGOZk9ezZTpkyhRYsWNGnShKVLlzpwL4Qw\nR+L1dIZMieDnnfFMeLAhEx9tglsxGyd11RrWvwul/aG1Y57zV3c61XOkoKAgfeMs0Pv376dRo0am\n1OOq5N9MuIr4y9cJnRbBsfPX+LhvC3q0vGUvuts7uArm9oPuX0Dg0NsuppTarrUOykuNznsNUQhR\naOw/fYWwaRaupWYwPSyE9nX9crcCa6Zx7bBsALR0XKteCUQhhENtiTnPqJnbKVncnQWj29GoSunc\nr2TnPGPcYe8p4OZh/yKzOF0g3u4urriZWZc7hLDV0h2neGHhTgL8ShIeFkLVMiVyv5K0a7D+bagW\nCE1727/IbJzqpoqXlxcXLlyQP3QbaK25cOECXl5eZpcixE201nyz8QjPzNtBYM2yLBzdPm9hCLDl\nK0g6DQ/8Hzj4YMmpjhD9/f2Ji4sjISHB7FJcgpeX198Dx4VwFplWzZs/72XG1uN0b1GVj/s2p7i7\njcNqbpR0BjZ/bvRKqXHnRm324FSB6OHhQUBAgNllCCHyKCU9k2fmRbN671lGdqrNhG4NKWbrsJpb\nWf8OZKZBl4n2KvGOnCoQhRCu69K1NIZPtxB98jJvdG9MWId8HtzER0P0LGj3NJSrbZ8icyCBKITI\nt5MXkxk6NYK4y9eZNLA1Dzarkr8VWq3wywtQsgLc/W/7FGkDCUQhRL7sjkskLNxCeqaV2SPaEFyr\nXP5XumMWnIqEx74DL9/8r89GEohCiDzbePAcY2ZHUdbbk3kj21C3ok/+V5p8EX59A2q0g+b98r++\nXJBAFELkyYLIk7z8424aVPIhPCyYiqXtNARs/TuQkggPfezwYTY3kkAUQuSK1pov1sXw6dpDdKzn\nx6RBrfHxstPTI/HREDkV2j4FlZvaZ525IIEohLBZRqaV15bsYZ7lJL1aV+OD3s3xuFV70Lz460ZK\nqYrQeYJ91plLEohCCJtcS81g7JwoNhxMYOw9dXn+/vr2fcx2+7SsGymTC/RGSnYSiEKIHCUkpTJ8\nuoU9pxJ597GmDGpT074buBIPaydC7c7Q/HH7rjsXJBCFEHd0NOEqodMsnEtKYfITQXRpXMn+G1n5\nb+OJlEc+LfAbKdlJIAohbivqxCWGh1tQSjFvZDtaVi9j/43sXw77fzYezyugJ1JuRwJRCHFLv+47\ny7i5UVQq7cX0sBBq+ZW0/0ZSrsCKF6BSM2g31v7rzyUJRCHETWZuO84bS/fQrJovU0KD8Stle3+g\nXFn3Jlw9C/1nO3TiV1tJIAoh/qa15qPVB5m08Qj3NazIlwNb4e3poJg48SdYphhjDqsFOmYbuSSB\nKIQAIC3DyoTFu/gx+hQDQmrwdo8muNtrjOGN0lPg5/Hg6w/3vOqYbeSBBKIQgqSUdJ6aFcWmmPM8\n37U+Y++t69hWHhvfg4QDMGgxFC/luO3kkgSiEEXc2SsphE6zcPhsEh/1aU7foOqO3eBJC2z5AloP\nhXpdHLutXJJAFKIIO3w2idBpFi4npzElNJi761dw7AbTr8OS0VC6Gtz/jmO3lQc2XSBQSnVTSh1U\nSsUopW56yFApVUMptUEpFa2U2qWUesj+pQoh7Cni2EV6f7OFtEwr80e1c3wYAqx7Gy7EQI+vwCsP\n7UgdLMdAVEq5AV8DDwKNgQFKqcY3LPYasEBr3QroD0yyd6FCCPv5ZddpBk/5Ez+f4vz4VHuaViuA\nZ4ePb4FtkyB4hPGInhOy5QgxBIjRWh/VWqcB84AeNyyjgb/i3heIt1+JQgh7mrLpGGPnRtGsmi+L\nR7enejlvx2807RosGQNla0KXNx2/vTyy5RpiNeBktq/jgDY3LDMRWKOUGgeUBJzrSqkQAqtV8+6K\n/UzZdIwHmlTi8/6t8PLIY3vQ3Fo7ES4dg9AVTnVX+Ua2HCHe6t77jZ3kBwDhWmt/4CFgplLqpnUr\npUYqpSKVUpHSe1mIgpOSnsm4edFM2XSM0Pa1mDQosODC8PCvEDEZ2o6BWh0KZpt5ZEsgxgHZ78P7\nc/Mp8XBgAYDWeivgBfjduCKt9WStdZDWOqhChQK4gCuEIDE5nSFTI/hl12leeaghb3RvjFt+eiXn\nxtUE41S5YmO4742C2WY+2BKIFqCeUipAKeWJcdNk2Q3LnADuA1BKNcIIRDkEFMJkpy5fp8+3W4g+\ncYnP+7dkZKc6jh1wnZ3WsGys0R+l9w/gYaeeKw6U4zVErXWGUmossBpwA6Zqrfcqpd4CIrXWy4Dn\nge+VUs9inE6Haq1vPK0WQhSgffFXCAuPIDktk+nDQmhf56aTNseKnAKHVkG3D6BSk4Lddh7ZNDBb\na70CWHHD917P9vk+wLkvDghRhGyOOc+omdspVdydhaPb0bByAY/5SzgIq1+Dul2gzaiC3XY+yJMq\nQhQyS6JP8eKindT2K0X4sGCq+JYo2AIyUmHxcPD0hh6TTJ0BO7ckEIUoJLTWfPPbET5cdZC2tcvx\n3RNB+JYwYY7B9e/Amd3Qfy74OKDdgANJIApRCGRaNROX7WXmtuN0b1GVj/s2p7h7AQ2ryS5mHWz5\nEgLDoKHrPcErgSiEi0tJz2T83GjW7DvLqE61ealbQ4oV1LCa7K6chh9HQsVG8MD/Ffz27UACUQgX\ndvFaGiOmW4g+eZmJ3RsT2iHAnEIyM2DxCEhPhr7hxvVDFySBKISLOnEhmaHTIjh1+TqTBrbmwWZV\nzCvmtw/g+Cbo+S1UaGBeHfkkgSiEC9oVd5lh4RbSMzVzRrQhqFY584o5sgF+/whaDoaWA8yrww4k\nEIVwMRsOnuPp2VGU9fZk3sgQ6lY0cbKEpLPGdcMKDeChD82rw04kEIVwIfMtJ3jlpz00rOzDtLBg\nKvqY+DicNdMYb5iaBEOXgacD+jYXMAlEIVyA1prP1h7m83WH6VS/ApMGtaZUcZP/fH//CGL/MAZf\nV2xkbi12IoEohJNLz7Ty2k97mB95kj6B/rzXqxkejmoPaqvDa2Hj+9BiALQaZG4tdiSBKIQTu5aa\nwdNzoth4MIHx99bl2a71C262mtu5FGucKldqAg9/Ym4tdiaBKISTSkhKZVi4hb3xifzfY80Y2KaG\n2SVBWjLMHwxo6DfTZccb3o4EohBO6GjCVYZOi+B8UhrfDwnivkZO8Eyw1vDLc3BmDwxcAOVqm12R\n3UkgCuFkth+/xIjpFoopxdyRbWlZvYzZJRksP8DOudD5Zah/v9nVOIQEohBOZM3eM4ybG00VXy/C\nw0Ko5eckQ1lORsCql6HeA9Dp32ZX4zASiEI4iZnbjvPG0j008y/D1KFBlC9V3OySDElnYcEQ8K0G\nvb6DYibf4XYgCUQhTGa1aj5ac5BvNh7hvoYV+XJgK7w9neRPMzMdFoXB9cswYi2UKGt2RQ7lJP/q\nQhRNaRlW/r1oJ0t2xDOwTQ3eerQJ7maPMcxu1ctwfDP0+h4qNzW7GoeTQBTCJFdS0nlq1nY2x1zg\nxQcaMKZzAXbEs0XkVLB8D+3HQfPHza6mQEggCmGCM4kphE6LIObcVf7btwW9A/3NLumfYjfBiheh\nblfo8qbZ1RQYCUQhCtihs0mETo0g8Xo6U0OD6VS/gtkl/dOlWJj/BJQNgD5ToJgJrQhMIoEoRAHa\ndvQCI2dEUtzDjQWj29Gkqq/ZJf1TahLMHQg6EwbOBy8nq8/BJBCFKCDLd8Xz3PydVC9XgunDQvAv\n62SPvVmt8NNoSNgPgxdD+TpmV1TgJBCFKAA//HGUd37ZT3Ctsnw/JIgy3p5ml3Szjf8HB5ZDt/eh\nzr1mV2MKCUQhHMhq1bzzy36mbj7Gg00r82m/lnh5OOE1uT2LjfkNWw2GNqPNrsY0EohCOEhKeibP\nL9jJL7tPE9q+Fv95pDFuZrQHzUn8DljyNFRva0zn5UxDfwqYBKIQDpCYnM6TMyOJOHaRVx9qxIiO\nAc41xvAviadgbn/wLm9M5+XuJI8LmkQCUQg7O3X5OkOnRnDiQjJfDGjFoy2qml3SraUmwZx+kHoV\nhq+GUhXNrsh0EohC2NG++CuETovgenom04eF0K5OebNLurXMDFg0DM7tg0ELjNmvhQSiEPay6fB5\nRs/ajo+XO4tGt6dBZR+zS7o1rWHVS3B4DTzyGdTtYnZFTkMCUQg7+DEqjn8v2kXdiqWYFhZMFd8S\nZpd0e9u+MSZ7bT8egsLMrsapSCAKkQ9aayZtPMJHqw/SrnZ5vhsSSGkvD7PLur0Dv8DqV6BR9yL1\njLKtJBCFyKNMq+aNZXuYte0EPVpW5aM+LfB0d6Kpu250KgoWj4CqreCxyYV6ote8kkAUIg+up2Uy\nbm40a/efZfTddfj3Aw0o5oxjDP9y+WTW8Bo/GDCv0HXLsxcJRCFy6eK1NIZPt7Dj5GXe6tGEIe1q\nmV3SnaUkGsNr0q/DkKXg4wQd/JyUBKIQuXD8wjVCp1mIv3ydbwYF0q1pZbNLurOMVJg3CM4fhEGL\noGIjsytyahKIQtho58nLDJ9uIcOqmfNkGwJrljO7pDuzWmHJUxD7Bzz2HdS5x+yKnJ4EohA22HDg\nHGNmR1G+lCfTh4VQp0Ips0vK2a//MSZt6DIRWvQ3uxqXIIEoRA7mW07wyk97aFTFh6mhwVT08TK7\npJxt/Rq2fgUho6DDv8yuxmVIIApxG1prPlt7mM/XHaZT/QpMGtSaUsVd4E9m96KssYaPQrf3ivTs\nNbll00AkpVQ3pdRBpVSMUmrCbZZ5XCm1Tym1Vyk1x75lClGw0jOtvLR4F5+vO0zfQH+mDA1yjTA8\n9rtx3bBGe6N1aBHqh2IPOf6ElVJuwNdAVyAOsCillmmt92Vbph7wMtBBa31JKSXTZgiXdS01gzGz\no/jtUALj76vHs13qOefUXTc6s8e4o1yuNgyYAx4ucGrvZGz5X14IEKO1PgqglJoH9AD2ZVvmSeBr\nrfUlAK31OXsXKkRBOJeUwrBwC/tPJ/Fer2YMCKlhdkm2uXwCZvcBz1JGP5QSZc2uyCXZcspcDTiZ\n7eu4rO9lVx+or5TarJTappTqdqsVKaVGKqUilVKRCQkJeatYCAc5knCVXpO2cOTcNb4fEug6YXg1\nAWb0hLRkGLwIfJ2sx7MLseUI8VbnCvoW66kHdAb8gT+UUk211pf/8SatJwOTAYKCgm5chxCm2X78\nIsOnR+KmFPNGtqVF9TJml2SblESY9RhciYchS2Rew3yy5QgxDqie7Wt/IP4WyyzVWqdrrY8BBzEC\nUgint2rPGQZ+/ydlvT35cUx71wnDtGSY0x/OHYB+s6BGW7Mrcnm2BKIFqKeUClBKeQL9gWU3LLME\nuAdAKeWHcQp91J6FCuEIM7bG8tTs7TSqUppFo9tRs3xJs0uyTWY6LBwKJ7ZCr++gnkzyag85njJr\nrTOUUmOB1YAbMFVrvVcp9RYQqbVelvXa/UqpfUAm8KLW+oIjCxciP6xWzYerD/Ltb0fo0qgSXw5o\nRQlPFxmiYs00GsofXgOPfApNe5tdUaGhtDbnUl5QUJCOjIw0ZduiaEvLsPLvRTtZsiOeQW1q8Oaj\nTXB3c5G5AbWGX56HyClw3xvQ8TmzK3I6SqntWuugvLzXBUaaCmE/V1LSGT1zO1uOXODFBxowpnMd\n1xhj+Jf17xhh2H483PWs2dUUOhKIosg4k5hC6LQIYs5d5ZPHW9CrtYsNT9nyJfzxMbQeAl3fkkfy\nHEACURQJh84mMXRqBEkpGUwLC6ZjvQpml5Q7UTNhzWvQuKfRKU/C0CEkEEWht+3oBZ6cEUkJDzfm\nj2pLk6q+ZpeUO7sWwrJxUOde6DVZnk92IAlEUaj9vDOe5xfspEZ5b8LDgvEv62K9RPYthZ9GQc0O\n0G82uBc3u6JCTQJRFEpaa6ZsOsY7v+wnpFY5Jg8JpIy3p9ll5c6h1bBoOFQLhIHSGKogSCCKQifT\nqnnnl31M2xzLQ80q88njLfHycLHTzCMbYP4TUKkxDFoIxX3MrqhIkEAUhUpKeibPzt/Byj1nGNYh\ngNcebuTc7UFv5fgWmDsAyteFJ5ZACRd5lLAQkEAUhcbl5DSenBGJJfYSrz3ciBEda5tdUu7FRcLs\nvlCmujFZg7eTN7IqZCQQRaEQdymZ0GkWTlxI5quBrXikeVWzS8q90zthVi8o6Wf0Ty4l8ywXNAlE\n4fL2xicSOs1CanomM4aH0LZ2ebNLyr2z+4w5DYuXhqE/Q2kXDPRCQAJRuLQ/DicweuZ2fEt4MPup\n9tSv5II3H87HwIwe4OZpHBmWcZGJaQshCUThshZvj+OlxbuoW7EU4WEhVPZ1wR4iF4/BjEdBWyF0\nOZSvY3ZFRZoEonA5WmsmbTzCR6sP0qFueb4ZHEhpLw+zy8q9i8cg/BFIT4ahy6FCA7MrKvIkEIVL\nyci08sayvcz+8wQ9W1blwz4t8HR3kam7svs7DK/BkGVQuanZFQkkEIULuZ6Wybi5Uazdf46nOtfh\nxfsbuN4YQ7g5DKs0N7sikUUCUbiEC1dTGT49kp1xl3mrRxOGtKtldkl5I2Ho1CQQhdM7fuEaQ6dG\ncDoxhW8HB/JAk8pml5Q3F4/B9O4Shk5MAlE4tZ0nLzMs3EKm1sx5sg2BNV30yY2/wjDtqjG0RsLQ\nKUkgCqe1bv9Zxs6Jxs/Hk/CwEOpUKGV2SXlzUxi2MLsicRsSiMIpzY04was/7aZJVV+mhgZTwcdF\n5wGUMHQpEojCqWit+fTXQ3yxPobODSrw9cDWlCzuor+m52OMMMy4LmHoIlz0N00URumZVl7+cTeL\ntsfxeJA/7z7WDA9XaQ96o7P7jMfxtNUYdC3jDF2CBKJwCldTMxgzO4rfDyXwry71eOa+eq7VHjS7\n+B0w8zFjuv8hv0CF+mZXJGwkgShMdy4phWHhFvafTuKD3s3oF+zCkxuctMCs3uBVGoYug3IuOCdj\nESaBKEx1JOEqQ6dGcOFqGj8MCeKehi48B2DsZpjzOJSsYEzhVaa62RWJXJJAFKaJjL3IiBmRuBdT\nzB/Vlub+LjxV/pH1MHdg1kzXy6B0FbMrEnkggShMsWrPGZ6ZF03VMiWYHhZCjfIu3FHu4EpYMAT8\n6hs9UEpVMLsikUcSiKLATd8Sy8Sf99KyehmmDA2mXEkXaw+a3d4lsHg4VG4Gg3+UHiguTgJRFBir\nVfPB6gN899tRujauxBf9W1HC08Xag2YXNRN+Hg/+wUarUC9fsysS+SSBKApEakYm/160i6U74hnc\ntgZvPtoUN1ecuusvW76CNa9CnXuh3yzwLGl2RcIOJBCFwyVeT2f0zO1sPXqBf3drwFN313HdMYZa\nw/p34I+PoXFP6DXZGG8oCgUJROFQpxOvEzrVwtHzV/m0Xwsea+Vvdkl5Z7XCyhfB8gO0HgKPfAbF\nXPiUX9xEAlE4zMEzSYROiyApJYPwsBA61PUzu6S8y0yHJU/B7oXQfjx0fQtc9ShX3JYEonCILUfO\nM2rmdrw93Vgwqh2Nq5Y2u6S8S0uGhaFweDV0mQh3PWtyQcJRJBCF3S3bGc8LC3ZSs7w34cNCqFam\nhNkl5V1KIszpDye2GqfIQWFmVyQcSAJR2I3Wmu//OMr/rThASEA5vn8iCF9vF2wP+perCTCrF5zb\nD32mQNPeZlckHEwCUdhFplXz9vJ9hG+J5eHmVfhv3xZ4ebjwDYdLscYkDYmnYMA8qNfF7IpEAZBA\nFPmWkp7Js/N3sHLPGYbfFcCrDzVyzfagfzm9E2b3hYxUGLIEarQ1uyJRQCQQRb5cTk5jxPRItp+4\nxGsPN2ILnC36AAAZ7UlEQVRERxef7urIBpj/BJQoY8xYU6GB2RWJAmTTdMRKqW5KqYNKqRil1IQ7\nLNdHKaWVUkH2K1E4q5MXk+n9zRZ2xSXy1YDWrh+GuxYaR4ZlasDwNRKGRVCOR4hKKTfga6ArEAdY\nlFLLtNb7bljOBxgP/OmIQoVz2XMqkbBwC6npmcwcHkKb2uXNLil/tnwJa16DmndB/9nGEaIocmw5\nQgwBYrTWR7XWacA8oMctlnsb+BBIsWN9wgn9fiiBft9txaOYYtFT7V07DK1WWP2qEYaNe8LgxRKG\nRZgtgVgNOJnt67is7/1NKdUKqK61Xm7H2oQTWrQ9jmHhFqqX8+anpztQv5KP2SXlXUYq/PgkbP0K\n2oyGPtPAw8vsqoSJbLmpcqvbhfrvF5UqBnwKhOa4IqVGAiMBatRw4b4ZRZDWmq/Wx/DfXw9xV10/\nvhncGh8vFx5jmHIF5g+GY79BlzehwzPyKJ6wKRDjgOzNIfyB+Gxf+wBNgY1ZM5hUBpYppR7VWkdm\nX5HWejIwGSAoKEgjXEJGppX/LN3L3IgT9GpVjfd7N8fT3UXbgwJciYfZj0PCfnjsO2jR3+yKhJOw\nJRAtQD2lVABwCugPDPzrRa11IvD3U/tKqY3ACzeGoXBNyWkZjJsTzboD5xjTuQ4vPtDAdafuAjiz\n2wjD1CQYOB/qyoBr8T85BqLWOkMpNRZYDbgBU7XWe5VSbwGRWutlji5SmOP81VSGT49kd9xl3u7Z\nlCfa1jS7pPw5tAYWhRkzWw9bJc3jxU1sGpittV4BrLjhe6/fZtnO+S9LmC32/DWGTovg7JUUvh0c\nyP1NKptdUv5YfoAVL0KlpjBwgXTFE7ckT6qIm+w4eZnh4RasWjPnyba0rlHW7JLyzpoJv75u3Emu\n/yD0/gGKlzK7KuGkJBDFP6zbf5an50RRwac408NCqF3BhcMjLdkYVnNgOYSMgm7vyQzX4o4kEMXf\n5vx5gteW7KZpNV+mDA2mgo8L9wpJOgtz+0N8NHT7ANqONrsi4QIkEAVaaz759RBfro/hngYV+Gpg\na0oWd+FfjXP7jTvJyeeh/xxo+JDZFQkX4cK/9cIe0jOtTFi8m8VRcfQLqs67jzXF3c2FxxjGrIWF\nYeBRAsJWQNVWZlckXIgEYhF2NTWDp2Zt54/D5/lXl3o8c1891x1jqDX8+S2sfgUqNjYmdS1TPef3\nCZGNBGIRde5KCmHhFg6cSeLD3s15PNiFwyMjDVa8AFHTocHDRq9kuZMs8kACsQiKOZfE0KkWLiWn\n8cPQIO5pUNHskvLu2gVYMASOb4K7noN7/wPFXPiUX5hKArGIscReZMT0SDzcijF/ZDua+fuaXVLe\nndtv3Em+chp6fQ/NHze7IuHiJBCLkJW7T/PM/B34lynB9GEhVC/nbXZJeXdoNSwaDp7exs0Tf5mk\nXeSfBGIRMW3zMd5avo9W1cvww9BgypX0NLukvNHamN3619ehcjMYMBd8/c2uShQSEoiFnNWqeX/V\nASb/fpT7G1fiiwGtXLc9aEYqLH8OdsyCxj2g5zfgWdLsqkQhIoFYiKVmZPLCwl38vDOeIe1q8kb3\nJri5anvQq+eMbngnt8HdE+Dul+TmibA7CcRCKvF6OqNmRrLt6EVe6taQ0XfXdt0xhnHbjdmtr18y\npvlv2svsikQhJYFYCMVfvk7YNAtHz1/ls34t6dmqWs5vclbRs4zTZJ9KMOJX47qhEA4igVjIHDhz\nhdCpFq6lZhAeFkKHun45v8kZZabDqpfB8j0E3A19w8G7nNlViUJOArEQ2XLkPKNmbMe7uBsLRrej\nUZXSZpeUN1fPwYKhcGILtB8H900EN/lVFY4nv2WFxNIdp3hh4U5qlS9J+LAQqpUpYXZJeXNqu3Hz\nJPki9PoBmvc1uyJRhEggujitNZN/P8p7Kw/QJqAck58IwtfbRduDRs+G5c9CqUowfA1UaW52RaKI\nkUB0YZlWzdvL9xG+JZZHmlfhv4+3oLi7C44xzEiDNa9BxHcQ0An6hEPJ8mZXJYogCUQXlZKeyTPz\nolm99yxPdgzg5QcbUcwVxxgmnoKFoRAXAW2fhq5vyfVCYRr5zXNBl66lMWJGJFEnLvH6I40ZdleA\n2SXlzZENsHgEZKTI+ELhFCQQXczJi8kMnRZB3KXrfD2wNQ81c8F2mlYrbPovrH8XKjSAx2dChfpm\nVyWEBKIr2XMqkdBpFtIzrcwa3oaQABccl5d8EX4aBYfXQLO+8MhnMpmrcBoSiC7it0MJjJm1nTLe\nnswb2Ya6FX3MLin3TkUZ4wuTTsPD/4Wg4eCqjxOKQkkC0QUsjDzJhB93U7+SD+FhwVQq7WV2Sbmj\nNWyfBitfMobUDFsN/oFmVyXETSQQnZjWmi/Xx/DJr4foWM+PSYNa4+PlYmMM05KNsYW75kHdLsbM\n1vIInnBSEohOKiPTyn+W7mFuxEl6ta7G+72a4+nuYtNdJRyChUONqf7veRU6viBTdgmnJoHohJLT\nMhg7J5r1B87x9D11eOH+Bq43ddeOOfDL8+DhDYMXQ937zK5IiBxJIDqZ81dTGR5uYfepRN7p2ZTB\nbWuaXVLupF41gnDXPKjV0ThFLu2CQ4NEkSSB6ERiz19j6LQIzl5J4bsngujauJLZJeXOmd2wMAwu\nHoHOr0CnF6CYCz5KKIosCUQnEX3iEsOnRwIw58m2tK5R1uSKckFriJxqzF9YoiwMWQYBHc2uSohc\nk0B0Ar/uO8u4uVFUKu1FeFgIAX4u1DgpJRGWjYd9S4y7yD2/hVIVzK5KiDyRQDTZrG3HeX3pHppV\n82VKaDB+pYqbXZLtTm03TpET46DLm9B+vNxFFi5NAtEkWms+XnOQrzcc4d6GFflqYCu8PV3kx2G1\nwtYvYd3b4FMZhq2C6iFmVyVEvrnIX2DhkpZhZcKPu/gx6hQDQqrzdo+muLu5yJFV4ilYMhqO/Q6N\nukP3L2SgtSg0JBALWFJKOmNmR/HH4fM817U+4+6t6zpjDPctg2XjjAZQj34FrQbLs8iiUJFALEBn\nr6QQNs3CwbNJfNinOY8HVTe7JNukXoVVEyB6JlRtBb2nQPk6ZlclhN1JIBaQmHNJDJ1q4VJyGlND\ng7m7vovciT21HRY/CRePwl3PwT2vgJuLPU8thI0kEAuAJfYiI6ZH4uFWjAWj2tG0mq/ZJeXMmgmb\nP4MN/2fMUBO6HGrdZXZVQjiUBKKDrdx9mmfm78C/bAmmh4VQvZy32SXl7PIJ+OkpOL4JGveE7p8Z\nA66FKORsurWplOqmlDqolIpRSk24xevPKaX2KaV2KaXWKaVc7AFcx5i66Rhj5kTRrJovi0e3d/4w\n1BqiZsKk9nB6B/T4GvqGSxiKIiPHI0SllBvwNdAViAMsSqllWut92RaLBoK01slKqaeAD4F+jijY\nFVitmvdW7uf7P47xQJNKfN6/FV4eTv5Mb9JZ+Hk8HFoFNe+CnpOgrPx/TRQttpwyhwAxWuujAEqp\neUAP4O9A1FpvyLb8NmCwPYt0JakZmbywcBc/74xnaLuavN69CW7O3h5070+w/DlIT4YH3oM2o+WJ\nE1Ek2RKI1YCT2b6OA9rcYfnhwMpbvaCUGgmMBKhRo4aNJbqOxOvpjJwRyZ/HLvLygw0Z2am2c48x\nTL4IK16EPYugamt47DvpfieKNFsC8VZ/0fqWCyo1GAgC7r7V61rrycBkgKCgoFuuw1XFX75O6LQI\njp2/xuf9W9KjZTWzS7qzw2th2Vi4lmDMZn3Xc9IgXhR5tvwFxAHZRxD7A/E3LqSU6gK8CtyttU61\nT3muYf/pK4ROiyA5NZPpw0JoX8fP7JJuL+UK/Pof2B4OFRrBgHlQtaXZVQnhFGwJRAtQTykVAJwC\n+gMDsy+glGoFfAd001qfs3uVTmxLzHlGzdxOyeLuLHyqHQ0rlza7pNs7/Cv8/IzRBrT9eOPI0MPF\nOvgJ4UA5BqLWOkMpNRZYDbgBU7XWe5VSbwGRWutlwEdAKWBh1jWzE1rrRx1Yt1NYuuMULyzcSYBf\nScLDQqhapoTZJd1a8kVY/QrsnAsVGsLjM8A/yOyqhHA6Nl000lqvAFbc8L3Xs33exc51OTWtNd/+\ndpQPVh2gbe1yfPdEEL4lnPRxtn3LjB4n1y9CpxeND3cXmnNRiAIkV9FzKdOqefPnvczYepzuLary\ncd/mFHd3wjGGV8/Bihdg31Ko3NzofFeludlVCeHUJBBzISU9k2fmRbN671lGdqrNhG4NKeZsYwy1\nhl0LYNVLkHYN7nvduF4oEzIIkSMJRBtdupbG8OkWok9e5o3ujQnrEGB2STdLPAXLn4XDq8E/2Hj0\nrkIDs6sSwmVIINrg5MVkhk6NIO7ydSYNbM2DzZysz7A1Eyw/GFP6WzOynjYZJS1AhcglCcQc7I5L\nJCzcQnqmldkj2hBcy8mmyz+9yxhKEx8Fde6Fhz+Bck549CqEC5BAvIONB88xZnYUZb09mTeyDXUr\n+phd0v+kXYON78HWSUZPk95ToGlvmdJfiHyQQLyNBZEnefnH3TSo5EN4WDAVSzvRAOZDa4yhNIkn\noPVQ6DJRGj0JYQcSiDfQWvPFuhg+XXuIjvX8+GZwIKWKO8k/U9IZo7fJ3p/ArwGErYSa7c2uSohC\nw0n+0p1DRqaV15bsYZ7lJL1b+/N+72Z4OEN7UGum8ezx2jchIwXueQ06PAPunmZXJkShIoGY5Vpq\nBmPnRLHhYALj7q3Lc13rO8fUXXHbYcXzEB8NAZ3g4U/Br67ZVQlRKEkgAglJqQyfbmHPqUTefawp\ng9o4wUzR1y7AujchaobR5ElumgjhcEU+EI8mXCV0moVzSSlMfiKILo0rmVvQX6fH69+G1CRo9zTc\n/RJ4OfEsOkIUEkU6EKNOXGJ4uAWlFPNGtqNl9TLmFhQXadw9Pr0DanWEhz6Gig3NrUmIIqTIBuKv\n+84ybm4UlUp7MT0shFp+Jc0r5toFWDfROD32qSKnx0KYpEgG4sxtx3lj6R6aVfNlSmgwfqVMmg4r\nMx0ip8GGdyHtKrQfZ5weF3eiAeBCFCFFKhC11ny0+iCTNh7hvoYV+XJgK7w9TfoniFkLq16B8wch\n4G548EM5PRbCZEUmENMyrExYvIsfo08xIKQGb/dogrsZYwwTDsGaV+HwGihXG/rPhQYPyumxEE6g\nSARiUko6T82KYlPMeZ7vWp+x99Yt+DGGyRfhtw/B8j14eMP970DIKBlcLYQTKfSBePZKCqHTLBw+\nm8RHfZrTN6h6zm+yp8wM2J51nTAlEQJDjeZOJZ24M58QRVShDsTDZ5MInWbhcnIaU0KDubt+hYLb\nuNaw/2djcPWFGOM6Ybf3oFKTgqtBCJErhTYQI45dZMR0C8U93Jg/qh1Nq/kW3MaPb4FfX4c4i9Hl\nbsA8qN9NrhMK4eQKZSD+sus0z87fQfVyJQgPC6F6Oe+C2fC5/bB2IhxaBT5V4dGvoMUAcCuU/8xC\nFDqF7i91yqZjvPPLPgJrlOWHoUGU8S6AmxaJcbDhPdg5Bzx9jPkJQ0aBZwEFsRDCLgpNIFqtmndX\n7GfKpmN0a1KZz/q3xMvDwT1Frp6DTZ9B5BTQVmg7Bjo+L5O1CuGiCkUgpqRn8vzCnfyy6zSh7Wvx\nn0ca4+bI9qDXLsCWzyHie2N+wub94Z6XoUwNx21TCOFwLh+IicnpPDkzkohjF3nloYY82bG248YY\nXr8EW76CP781epo06wN3T5D5CYUoJFw6EE9dvk7o1AhiL1zj8/4t6dGymmM2lJII276BrV9D6hVo\n3BM6T4CKjRyzPSGEKVw2EPfFXyEsPILktEymDwuhfR0HDHROvgh/fmccEaZchgYPG6fGlZvZf1tC\nCNO5ZCBujjnPqJnbKVXcnYWj29Gwsp0nT006A1u/AstUSL8GDR6CTi9Ctdb23Y4Qwqm4XCAuiT7F\ni4t2UtuvFOHDgqniW8J+K78UC5u/gOhZYE035iS861l5ukSIIsJlAlFrzTe/HeHDVQdpW7sc3z0R\nhG8JD/us/NwB2PwZ7FoAxdyg5UCjq1252vZZvxDCJbhEIGZaNROX7WXmtuM82qIqH/VtTnH3fI4x\n1BqOrDdulBxZZ8xA02Y0tB8Lpavap3AhhEtx+kC8npbJ+HnR/LrvLKPurs1LDzSkWH7GGKanwO6F\nRhAm7Dc62t37GgQOg5Ll7Ve4EMLlOHUgXryWxvDpFnacvMybjzZhaPtaeV/Z1QTjiRLLD3AtASo1\ng57fQtNe4G5SCwEhhFNx2kA8cSGZodMiiL98nW8GtaZb0yq5X4nWRic7yw+w9yfITDVmnWk7xmj6\nLrPPCCGyccpA3BV3mWHhFjKsmtkj2hBUK5fPBqddM06LLT/Amd3GhAutnzCuEfrVc0zRQgiX53SB\nuOHgOZ6eHUW5kp6Eh4VQt2Ip29+ccBAsU2DnXOOJkopN4OFPoPnj0slOCJEjpwrE+ZYTvPLTHhpW\n9mFaWDAVfbxyflNKIuz5EXbMNiZkLeYBTXpC8Aio3kZOi4UQNnOKQNRa89naw3y+7jCd6ldg0qDW\nlCp+h9KsVoj9wxhAvf9nyLgOfg2g61vQYiCUKsBWAUKIQsP0QEzPtPLaT3uYH3mSPoH+vNerGR63\nag9qtUJchHFzZN9SSDoNxX2h5QBoOQiqBcrRoBAiX2wKRKVUN+BzwA34QWv9/g2vFwdmAIHABaCf\n1jo2p/VeS83g6TlRbDyYwPh76/Js1/r/nLor/TrEbjZ6GO//GZLiwa041OsKTR6Dhg+Dhx0f3RNC\nFGk5BqJSyg34GugKxAEWpdQyrfW+bIsNBy5presqpfoDHwD97rTeDKum/+Rt7I1P5P8ea8bANjUg\nIxXid8DJbXDsd4jdZEzA6u4Fde6FJm8aw2a87DyZgxBCYNsRYggQo7U+CqCUmgf0ALIHYg9gYtbn\ni4CvlFJKa61vt9JzZ8/wcMLPfBrkTd3Tv8D3B+HMLshMMxYoXw8Cw6BeF6jZQY4EhRAOZ0sgVgNO\nZvs6Dmhzu2W01hlKqUSgPHA++0JKqZHASIDAKsV4t9i3sBvwqQLl6xrjBKu3MT7kxogQooDZEoi3\nulNx45GfLcugtZ4MTAZo0bSx5l+roEQ5KJ6LsYZCCOEgt7ide5M4oHq2r/2B+Nsto5RyB3yBi3da\nqYeXt9GUScJQCOEkbAlEC1BPKRWglPIE+gPLblhmGTA06/M+wPo7XT8UQghnlOMpc9Y1wbHAaoxh\nN1O11nuVUm8BkVrrZcAUYKZSKgbjyLC/I4sWQghHsGkcotZ6BbDihu+9nu3zFKCvfUsTQoiCZcsp\nsxBCFAkSiEIIkUUCUQghskggCiFEFglEIYTIIoEohBBZJBCFECKLMuuBEqVUEnDQlI0XDD9umNyi\nkCnM+1eY9w0K//410FrnqYmSmTNmH9RaB5m4fYdSSkXK/rmmwrxvUDT2L6/vlVNmIYTIIoEohBBZ\nzAzEySZuuyDI/rmuwrxvIPt3W6bdVBFCCGcjp8xCCJFFAlEIIbI4PBCVUt2UUgeVUjFKqQm3eL24\nUmp+1ut/KqVqObome7Jh/55TSu1TSu1SSq1TStU0o868yGnfsi3XRymllVIuNZTDlv1TSj2e9fPb\nq5SaU9A15ocNv5s1lFIblFLRWb+fD5lRZ14opaYqpc4ppfbc5nWllPoia993KaVa27RirbXDPjBm\n2D4C1AY8gZ1A4xuWGQN8m/V5f2C+I2syYf/uAbyzPn/KVfbPln3LWs4H+B3YBgSZXbedf3b1gGig\nbNbXFc2u2877Nxl4KuvzxkCs2XXnYv86Aa2BPbd5/SFgJUYDvLbAn7as19FHiH/3dNZapwF/9XTO\nrgcwPevzRcB9SqlbdfFzRjnun9Z6g9Y6OevLbRhNulyBLT87gLeBD4GUgizODmzZvyeBr7XWlwC0\n1ucKuMb8sGX/NFA663Nfbm4e57S01r9z50Z2PYAZ2rANKKOUqpLTeh0diLfq6VztdstorTOAv3o6\nuwJb9i+74Rj/13IFOe6bUqoVUF1rvbwgC7MTW3529YH6SqnNSqltSqluBVZd/tmyfxOBwUqpOIwW\nIeMKprQCkdu/TcDxj+7Zraezk7K5dqXUYCAIuNuhFdnPHfdNKVUM+BQILaiC7MyWn507xmlzZ4wj\n+z+UUk211pcdXJs92LJ/A4BwrfV/lVLtMBrFNdVaWx1fnsPlKVccfYTokJ7OTsSW/UMp1QV4FXhU\na51aQLXlV0775gM0BTYqpWIxrtMsc6EbK7b+bi7VWqdrrY9hTEZSr4Dqyy9b9m84sABAa70V8MKY\n+KEwsOlv8yYOvvDpDhwFAvjfhd0mNyzzNP+8qbLA7Au2dt6/VhgXt+uZXa+99+2G5TfiWjdVbPnZ\ndQOmZ33uh3EKVt7s2u24fyuB0KzPG2UFhjK79lzsYy1uf1PlYf55UyXCpnUWQNEPAYeyQuHVrO+9\nhXG0BMb/lRYCMUAEUNvsf2g7799a4CywI+tjmdk122vfbljWpQLRxp+dAj4B9gG7gf5m12zn/WsM\nbM4Kyx3A/WbXnIt9mwucBtIxjgaHA6OB0dl+dl9n7ftuW3835dE9IYTIIk+qCCFEFglEIYTIIoEo\nhBBZJBCFECKLBKIQQmSRQBRCiCwSiEIIkUUCUTgtpVRw1lx2XkqpkllzEjY1uy5ReMnAbOHUlFLv\nYDzNVAKI01q/Z3JJohCTQBROTSnlCVgw5ltsr7XONLkkUYjJKbNwduWAUhiz63iZXIso5OQIUTg1\npdQyjNmeA4AqWuuxJpckCjFHTxArRJ4ppYYAGVrrOUopN2CLUuperfV6s2sThZMcIQohRBa5hiiE\nEFkkEIUQIosEohBCZJFAFEKILBKIQgiRRQJRCCGySCAKIUSW/weUNLiOxWYaVgAAAABJRU5ErkJg\ngg==\n", 196 | "text/plain": [ 197 | "" 198 | ] 199 | }, 200 | "metadata": {}, 201 | "output_type": "display_data" 202 | } 203 | ], 204 | "source": [ 205 | "svy.lorenz('eqincome').plot(figsize=(5,5))" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "metadata": { 212 | "collapsed": true 213 | }, 214 | "outputs": [], 215 | "source": [] 216 | } 217 | ], 218 | "metadata": { 219 | "kernelspec": { 220 | "display_name": "Python 3", 221 | "language": "python", 222 | "name": "python3" 223 | }, 224 | "language_info": { 225 | "codemirror_mode": { 226 | "name": "ipython", 227 | "version": 3 228 | }, 229 | "file_extension": ".py", 230 | "mimetype": "text/x-python", 231 | "name": "python", 232 | "nbconvert_exporter": "python", 233 | "pygments_lexer": "ipython3", 234 | "version": "3.6.1" 235 | } 236 | }, 237 | "nbformat": 4, 238 | "nbformat_minor": 2 239 | } 240 | -------------------------------------------------------------------------------- /src/ineqpy/api.py: -------------------------------------------------------------------------------- 1 | """API's module. 2 | 3 | Extend pandas.DataFrames with the main functions from statistics and 4 | inequality modules. 5 | """ 6 | import inspect 7 | from functools import partial 8 | from types import MethodType 9 | 10 | import pandas as pd 11 | 12 | from ineqpy import inequality, statistics 13 | 14 | 15 | class Convey: 16 | """Convey.""" 17 | 18 | def __init__( 19 | self, 20 | data=None, 21 | index=None, 22 | columns=None, 23 | weights=None, 24 | group=None, 25 | **kw 26 | ): 27 | self.df = pd.DataFrame(data=data, index=index, columns=columns, **kw) 28 | self.weights = weights 29 | self.group = group 30 | self._attach_method(statistics, self) 31 | self._attach_method(inequality, self) 32 | 33 | @property 34 | def _constructor(self): 35 | return Survey 36 | 37 | @classmethod 38 | def _attach_method(module, instance): 39 | # get methods names contained in module 40 | res_names = [] 41 | res_methods = [] 42 | method_name_list = inspect.getmembers(module, inspect.isfunction) 43 | 44 | for method_name, func in method_name_list: 45 | # if method_name.startswith('_'): continue # avoid private methods 46 | func = getattr(module, method_name) # get function 47 | if ( 48 | "weights" in inspect.signature(func).parameters 49 | ): # replace weights variable 50 | func = partial(func, weights=instance.weights) 51 | # func = partial(func, data=instance.data) 52 | func = MethodType(func, instance) 53 | res_methods.append(func) 54 | res_names.append(method_name) 55 | setattr(instance, method_name, func) 56 | 57 | 58 | class Survey: 59 | """Survey it's a data structure that handles survey data. 60 | 61 | Attributes 62 | ---------- 63 | df : pandas.DataFrame 64 | weights : str 65 | group : str 66 | 67 | Methods 68 | ------- 69 | atkinson(income=None, weights=None, e=0.5) 70 | Calculate Atkinson's index. 71 | avg_tax_rate(total_tax=None, total_base=None, weights=None) 72 | Calculate average tax rate. 73 | c_moment(variable=None, weights=None, order=2, param=None, ddof=0) 74 | Calculate central momment. 75 | coef_variation(variable=None, weights=None) 76 | Calculate coefficient of variation. 77 | concentration(income=None, weights=None, sort=True) 78 | Calculate concentration's index. 79 | density(variable=None, weights=None, groups=None) 80 | Calculate density. 81 | gini(income=None, weights=None, sort=True) 82 | Calculate Gini's index. 83 | kakwani(tax=None, income_pre_tax=None, weights=None) 84 | Calculate Kakwani's index. 85 | kurt(variable=None, weights=None) 86 | Calculate Kurtosis. 87 | lorenz(income=None, weights=None) 88 | Calculate Lorenz curve. 89 | mean(variable=None, weights=None) 90 | Calculate mean. 91 | percentile(variable=None, weights=None, p=50, interpolate="lower") 92 | Calculate percentile. 93 | reynolds_smolensky(income_pre_tax=None, income_post_tax=None, weights=None) 94 | Calculate Reynolds-Smolensky's index. 95 | skew(variable=None, weights=None) 96 | Calculate Skew. 97 | std_moment(variable=None, weights=None, param=None, order=3, ddof=0) 98 | Calculate standard deviation. 99 | theil(income=None, weights=None) 100 | Calculate Theil's index. 101 | var(variable=None, weights=None, ddof=0) 102 | Calculate variance. 103 | """ 104 | 105 | def __init__( 106 | self, 107 | data=None, 108 | index=None, 109 | columns=None, 110 | weights=None, 111 | group=None, 112 | **kw 113 | ): 114 | self.df = pd.DataFrame(data=data, index=index, columns=columns, **kw) 115 | self.weights = weights 116 | self.group = group 117 | 118 | def c_moment(self, variable, weights=None, order=2, param=None, ddof=0): 119 | """Calculate central momment. 120 | 121 | Calculate the central moment of `x` with respect to `param` of order 122 | `n`, given the weights `w`. 123 | 124 | Parameters 125 | ---------- 126 | variable : 1d-array 127 | Variable 128 | weights : 1d-array 129 | Weights 130 | order : int, optional 131 | Moment order, 2 by default (variance) 132 | param : int or array, optional 133 | Parameter for which the moment is calculated, the default is None, 134 | implies use the mean. 135 | ddof : int, optional 136 | Degree of freedom, zero by default. 137 | 138 | Returns 139 | ------- 140 | central_moment : float 141 | 142 | Notes 143 | ----- 144 | - The cmoment of order 1 is 0 145 | - The cmoment of order 2 is the variance. 146 | Source : https://en.wikipedia.org/wiki/Moment_(mathematics) 147 | 148 | Todo 149 | ---- 150 | Implement: https://en.wikipedia.org/wiki/L-moment#cite_note-wang:96-6 151 | 152 | """ 153 | data = self.df 154 | if weights is None: 155 | weights = self.weights 156 | 157 | return statistics.c_moment(variable, weights, data, order, param, ddof) 158 | 159 | def percentile(self, variable, weights=None, p=50, interpolate="lower"): 160 | """Calculate the value of a quantile given a variable and his weights. 161 | 162 | Parameters 163 | ---------- 164 | data : pd.DataFrame, optional 165 | pd.DataFrame that contains all variables needed. 166 | variable : str or array 167 | weights : str or array 168 | q : float 169 | Quantile level, if pass 0.5 means median. 170 | interpolate : bool 171 | 172 | Returns 173 | ------- 174 | percentile : float or pd.Series 175 | 176 | """ 177 | data = self.df 178 | if weights is None: 179 | weights = self.weights 180 | 181 | return statistics.percentile(variable, weights, data, p, interpolate) 182 | 183 | def std_moment(self, variable, weights=None, param=None, order=3, ddof=0): 184 | """Calculate the standardized moment. 185 | 186 | Calculate the standardized moment of order `c` for the variable` x` 187 | with respect to `c`. 188 | 189 | Parameters 190 | ---------- 191 | data : pd.DataFrame, optional 192 | pd.DataFrame that contains all variables needed. 193 | variable : 1d-array 194 | Random Variable 195 | weights : 1d-array, optional 196 | Weights or probability 197 | order : int, optional 198 | Order of Moment, three by default 199 | param : int or float or array, optional 200 | Central trend, default is the mean. 201 | ddof : int, optional 202 | Degree of freedom. 203 | 204 | Returns 205 | ------- 206 | std_moment : float 207 | Returns the standardized `n` order moment. 208 | 209 | References 210 | ---------- 211 | - https://en.wikipedia.org/wiki/Moment_(mathematics)#Significance_ 212 | of_the_moments 213 | - https://en.wikipedia.org/wiki/Standardized_moment 214 | 215 | Todo 216 | ---- 217 | It is the general case of the raw and central moments. Review 218 | implementation. 219 | 220 | """ 221 | data = self.df 222 | if weights is None: 223 | weights = self.weights 224 | 225 | return statistics.std_moment( 226 | variable, weights, data, param, order, ddof 227 | ) 228 | 229 | def mean(self, variable, weights=None): 230 | """Calculate the mean of `variable` given `weights`. 231 | 232 | Parameters 233 | ---------- 234 | variable : array-like or str 235 | Variable on which the mean is estimated. 236 | weights : array-like or str 237 | Weights of the `x` variable. 238 | data : pandas.DataFrame 239 | Is possible pass a DataFrame with variable and weights, then you 240 | must pass as `variable` and `weights` the column name stored in 241 | `data`. 242 | 243 | Returns 244 | ------- 245 | mean : array-like or float 246 | """ 247 | # if pass a DataFrame separate variables. 248 | data = self.df 249 | if weights is None: 250 | weights = self.weights 251 | 252 | return statistics.mean(variable, weights, data) 253 | 254 | def density(self, variable, weights=None, groups=None): 255 | """Calculate density in percentage. 256 | 257 | This make division of variable inferring width in groups as max - min. 258 | 259 | Parameters 260 | ---------- 261 | data : pd.DataFrame, optional 262 | pandas.DataFrame that contains all variables needed. 263 | variable : array-like, optional 264 | weights : array-like, optional 265 | groups : array-like, optional 266 | 267 | Returns 268 | ------- 269 | density : array-like 270 | 271 | References 272 | ---------- 273 | Histogram. (2017, May 9). In Wikipedia, The Free Encyclopedia. 274 | Retrieved: https://en.wikipedia.org/w/index.php?title=Histogram 275 | """ 276 | data = self.df 277 | if weights is None: 278 | weights = self.weights 279 | 280 | return statistics.density(variable, weights, groups, data) 281 | 282 | def var(self, variable, weights=None, ddof=0): 283 | """Calculate the population variance of `variable` given `weights`. 284 | 285 | Parameters 286 | ---------- 287 | data : pd.DataFrame, optional 288 | pd.DataFrame that contains all variables needed. 289 | variable : 1d-array or pd.Series or pd.DataFrame 290 | Variable on which the quasivariation is estimated 291 | weights : 1d-array or pd.Series or pd.DataFrame 292 | Weights of the `variable`. 293 | 294 | Returns 295 | ------- 296 | variance : 1d-array or pd.Series or float 297 | Estimation of quasivariance of `variable` 298 | 299 | References 300 | ---------- 301 | Moment (mathematics). (2017, May 6). In Wikipedia, The Free 302 | Encyclopedia. 303 | Retrieved 14:40, May 15, 2017, from 304 | https://en.wikipedia.org/w/index.php?title=Moment_(mathematics) 305 | 306 | Notes 307 | ----- 308 | If stratificated sample must pass with groupby each strata. 309 | """ 310 | data = self.df 311 | if weights is None: 312 | weights = self.weights 313 | 314 | return statistics.var(variable, weights, data, ddof) 315 | 316 | def coef_variation(self, variable, weights=None): 317 | """Calculate the coefficient of variation. 318 | 319 | The coefficient of variation is the square root of the variance of the 320 | incomes divided by the mean income. It has the advantages of being 321 | mathematically tractable and is subgroup decomposable, but is not 322 | bounded from above. 323 | 324 | Parameters 325 | ---------- 326 | data : pandas.DataFrame 327 | variable : array-like or str 328 | weights : array-like or str 329 | 330 | Returns 331 | ------- 332 | coefficient_variation : float 333 | 334 | References 335 | ---------- 336 | Coefficient of variation. (2017, May 5). In Wikipedia, The Free 337 | Encyclopedia. 338 | Retrieved 15:03, May 15, 2017, from 339 | https://en.wikipedia.org/w/index.php?title=Coefficient_of_variation 340 | """ 341 | # TODO complete docstring 342 | data = self.df 343 | if weights is None: 344 | weights = self.weights 345 | 346 | return statistics.coef_variation(variable, weights, data) 347 | 348 | def kurt(self, variable, weights=None): 349 | """Calculate the asymmetry coefficient. 350 | 351 | Parameters 352 | ---------- 353 | variable : 1d-array 354 | w : 1d-array 355 | 356 | Returns 357 | ------- 358 | kurt : float 359 | Kurtosis coefficient. 360 | 361 | References 362 | ---------- 363 | Moment (mathematics). (2017, May 6). In Wikipedia, The Free 364 | Encyclopedia. 365 | Retrieved 14:40, May 15, 2017, from 366 | https://en.wikipedia.org/w/index.php?title=Moment_(mathematics) 367 | 368 | Notes 369 | ----- 370 | It is an alias of the standardized fourth-order moment. 371 | """ 372 | data = self.df 373 | if weights is None: 374 | weights = self.weights 375 | 376 | return statistics.kurt(variable, weights, data) 377 | 378 | def skew(self, variable, weights=None): 379 | """Return the asymmetry coefficient of a sample. 380 | 381 | Parameters 382 | ---------- 383 | data : pandas.DataFrame 384 | variable : array-like, str 385 | weights : array-like, str 386 | 387 | Returns 388 | ------- 389 | skew : float 390 | 391 | References 392 | ---------- 393 | Moment (mathematics). (2017, May 6). In Wikipedia, The Free 394 | Encyclopedia. 395 | Retrieved 14:40, May 15, 2017, from 396 | https://en.wikipedia.org/w/index.php?title=Moment_(mathematics)& 397 | oldid=778996402 398 | 399 | Notes 400 | ----- 401 | It is an alias of the standardized third-order moment. 402 | 403 | """ 404 | data = self.df 405 | if weights is None: 406 | weights = self.weights 407 | 408 | return statistics.skew(variable, weights, data) 409 | 410 | # INEQUALITY 411 | #  ---------- 412 | 413 | def concentration(self, income, weights=None, sort=True): 414 | """Calculate concentration index. 415 | 416 | This function calculate the concentration index, according to the 417 | notation used in [Jenkins1988]_ you can calculate the: 418 | C_x = 2 / x · cov(x, F_x) 419 | if x = g(x) then C_x becomes C_y 420 | when there are taxes: 421 | 422 | y = g(x) = x - t(x) 423 | 424 | Parameters 425 | ---------- 426 | income : array-like 427 | weights : array-like 428 | data : pandas.DataFrame 429 | sort : bool 430 | 431 | Returns 432 | ------- 433 | concentration : array-like 434 | 435 | References 436 | ---------- 437 | Jenkins, S. (1988). Calculating income distribution indices 438 | from micro-data. National Tax Journal. http://doi.org/10.2307/41788716 439 | """ 440 | # TODO complete docstring 441 | data = self.df 442 | if weights is None: 443 | weights = self.weights 444 | 445 | return inequality.concentration(income, weights, data, sort) 446 | 447 | def lorenz(self, income, weights=None): 448 | """Calculate lorenz curve. 449 | 450 | In economics, the Lorenz curve is a graphical representation of the 451 | distribution of income or of wealth. It was developed by Max O. Lorenz 452 | in 1905 for representing grouped of the wealth distribution. This 453 | function compute the lorenz curve and returns a DF with two columns of 454 | axis x and y. 455 | 456 | Parameters 457 | ---------- 458 | data : pandas.DataFrame 459 | A pandas.DataFrame that contains data. 460 | income : str or 1d-array, optional 461 | Population or wights, if a DataFrame is passed then `income` should 462 | be a name of the column of DataFrame, else can pass a pandas.Series 463 | or array. 464 | weights : str or 1d-array 465 | Income, monetary variable, if a DataFrame is passed then `y`is a 466 | name of the series on this DataFrame, however, you can pass a 467 | pd.Series or np.array. 468 | 469 | Returns 470 | ------- 471 | lorenz : pandas.Dataframe 472 | Lorenz distribution in a Dataframe with two columns, labeled x and 473 | y, that corresponds to plots axis. 474 | 475 | References 476 | ---------- 477 | Lorenz curve. (2017, February 11). In Wikipedia, The Free Encyclopedia. 478 | Retrieved 14:34, May 15, 2017, from 479 | https://en.wikipedia.org/w/index.php?title=Lorenz_curve&oldid=764853675 480 | """ 481 | data = self.df 482 | if weights is None: 483 | weights = self.weights 484 | 485 | return inequality.lorenz(income, weights, data) 486 | 487 | def gini(self, income, weights=None, sort=True): 488 | """Calculate Gini's index. 489 | 490 | The Gini coefficient (sometimes expressed as a Gini ratio or a 491 | normalized Gini index) is a measure of statistical dispersion intended 492 | to represent the income or wealth distribution of a nation's residents, 493 | and is the most commonly used measure of grouped. It was developed by 494 | Corrado Gini. 495 | 496 | The Gini coefficient measures the grouped among values of a frequency 497 | distribution (for example, levels of income). A Gini coefficient of 498 | zero expresses perfect equality, where all values are the same (for 499 | example, where everyone has the same income). A Gini coefficient of 1 500 | (or 100%) expresses maximal grouped among values (e.g., for a large 501 | number of people, where only one person has all the income or 502 | consumption, and all others have none, the Gini coefficient will be 503 | very nearly one). 504 | 505 | Parameters 506 | ---------- 507 | data : pandas.DataFrame 508 | DataFrame that contains the data. 509 | income : str or np.array, optional 510 | Name of the monetary variable `x` in` df` 511 | weights : str or np.array, optional 512 | Name of the series containing the weights `x` in` df` 513 | sorted : bool, optional 514 | If the DataFrame is previously ordered by the variable `x`, it's 515 | must pass True, but False by default. 516 | 517 | Returns 518 | ------- 519 | gini : float 520 | Gini Index Value. 521 | 522 | Notes 523 | ----- 524 | The calculation is done following (discrete probability distribution): 525 | G = 1 - [∑_i^n f(y_i)·(S_{i-1} + S_i)] 526 | where: 527 | - y_i = Income 528 | - S_i = ∑_{j=1}^i y_i · f(y_i) 529 | 530 | Reference 531 | --------- 532 | - Gini coefficient. (2017, May 8). In Wikipedia, The Free Encyclopedia. 533 | Retrieved 14:30, May 15, 2017, from 534 | https://en.wikipedia.org/w/index.php?title=Gini_coefficient&oldid=779424616 535 | 536 | - Jenkins, S. (1988). Calculating income distribution indices 537 | from micro-data. National Tax Journal. http://doi.org/10.2307/41788716 538 | 539 | Todo 540 | ---- 541 | - Implement statistical deviation calculation, VAR (GINI) 542 | 543 | """ 544 | data = self.df 545 | if weights is None: 546 | weights = self.weights 547 | 548 | return inequality.gini(income, weights, data, sort) 549 | 550 | def atkinson(self, income, weights=None, e=0.5): 551 | """Calculate Atkinson index. 552 | 553 | More precisely labelled a family of income grouped measures, the 554 | theoretical range of Atkinson values is 0 to 1, with 0 being a state of 555 | equal distribution. 556 | An intuitive interpretation of this index is possible: Atkinson values 557 | can be used to calculate the proportion of total income that would be 558 | required to achieve an equal level of social welfare as at present if 559 | incomes were perfectly distributed. 560 | 561 | For example, an Atkinson index value of 0.20 suggests 562 | that we could achieve the same level of social welfare with only 563 | 1 – 0.20 = 80% of income. The theoretical range of Atkinson values is 0 564 | to 1, with 0 being a state of equal distribution. 565 | 566 | Parameters 567 | ---------- 568 | income : array or str 569 | If `data` is none `income` must be an 1D-array, when `data` is a 570 | pd.DataFrame, you must pass the name of income variable as string. 571 | weights : array or str, optional 572 | If `data` is none `weights` must be an 1D-array, when `data` is a 573 | pd.DataFrame, you must pass the name of weights variable as string. 574 | e : int, optional 575 | Epsilon parameter interpreted by atkinson index as grouped 576 | adversion, must be a number between 0 to 1. 577 | data : pd.DataFrame, optional 578 | data is a pd.DataFrame that contains the variables. 579 | 580 | Returns 581 | ------- 582 | atkinson : float 583 | 584 | Reference 585 | --------- 586 | Atkinson index. (2017, March 12). In Wikipedia, The Free Encyclopedia. 587 | Retrieved 14:35, May 15, 2017, from 588 | https://en.wikipedia.org/w/index.php?title=Atkinson_index&oldid=769991852 589 | 590 | Todo 591 | ---- 592 | - Implement: CALCULATING INCOME DISTRIBUTION INDICES FROM MICRO-DATA 593 | http://www.jstor.org/stable/41788716 594 | - The results has difference with stata, maybe have a bug. 595 | """ 596 | data = self.df 597 | if weights is None: 598 | weights = self.weights 599 | 600 | return inequality.atkinson(income, weights, data, e) 601 | 602 | def kakwani(self, tax, income_pre_tax, weights=None): 603 | """Calculate kakwani's index. 604 | 605 | The Kakwani (1977) index of tax progressivity is defined as twice the 606 | area between the concentration curves for taxes and pre-tax income, 607 | or equivalently, the concentration index for t(x) minus the Gini index 608 | for x, i.e. 609 | 610 | K = C(t) - G(x) 611 | = (2/t) cov [t(x), F(x)] - (2/x) cov [x, F(x)]. 612 | 613 | Parameters 614 | ---------- 615 | data : pandas.DataFrame 616 | This variable is a DataFrame that contains all data required in 617 | columns. 618 | tax_variable : array-like or str 619 | This variable represent tax payment of person, if pass array-like 620 | then data must be None, else you pass str-name column in `data`. 621 | income_pre_tax : array-like or str 622 | This variable represent income of person, if pass array-like 623 | then data must be None, else you pass str-name column in `data`. 624 | weights : array-like or str 625 | This variable represent weights of each person, if pass array-like 626 | then data must be None, else you pass str-name column in `data`. 627 | 628 | Returns 629 | ------- 630 | kakwani : float 631 | 632 | References 633 | ---------- 634 | Jenkins, S. (1988). Calculating income distribution indices from 635 | micro-data. National Tax Journal. http://doi.org/10.2307/41788716 636 | """ 637 | # main calc 638 | data = self.df 639 | if weights is None: 640 | weights = self.weights 641 | 642 | return inequality.kakwani(tax, income_pre_tax, weights, data) 643 | 644 | def reynolds_smolensky( 645 | self, income_pre_tax, income_post_tax, weights=None 646 | ): 647 | """Calculate Reynolds-Smolensky's index. 648 | 649 | The Reynolds-Smolensky (1977) index of the redistributive effect of 650 | taxes, which can also be interpreted as an index of progressivity 651 | (Lambert 1985), is defined as: 652 | 653 | L = Gx - Gy 654 | = [2/x]cov[x,F(x)] - [2/ybar] cov [y, F(y)]. 655 | 656 | Parameters 657 | ---------- 658 | data : pandas.DataFrame 659 | This variable is a DataFrame that contains all data required in 660 | it's columns. 661 | income_pre_tax : array-like or str 662 | This variable represent tax payment of person, if pass array-like 663 | then data must be None, else you pass str-name column in `data`. 664 | income_post_tax : array-like or str 665 | This variable represent income of person, if pass array-like 666 | then data must be None, else you pass str-name column in `data`. 667 | weights : array-like or str 668 | This variable represent weights of each person, if pass array-like 669 | then data must be None, else you pass str-name column in `data`. 670 | 671 | Returns 672 | ------- 673 | reynolds_smolensky : float 674 | 675 | References 676 | ---------- 677 | Jenkins, S. (1988). Calculating income distribution indices from 678 | micro-data. National Tax Journal. http://doi.org/10.2307/41788716 679 | """ 680 | data = self.df 681 | if weights is None: 682 | weights = self.weights 683 | 684 | return inequality.reynolds_smolensky( 685 | income_pre_tax, income_post_tax, weights, data 686 | ) 687 | 688 | def theil(self, income, weights=None): 689 | """Calculate theil index. 690 | 691 | The Theil index is a statistic primarily used to measure economic 692 | grouped and other economic phenomena. It is a special case of the 693 | generalized entropy index. It can be viewed as a measure of redundancy, 694 | lack of diversity, isolation, segregation, grouped, non-randomness, and 695 | compressibility. It was proposed by econometrician Henri Theil. 696 | 697 | Parameters 698 | ---------- 699 | data : pandas.DataFrame 700 | This variable is a DataFrame that contains all data required in 701 | it's columns. 702 | income : array-like or str 703 | This variable represent tax payment of person, if pass array-like 704 | then data must be None, else you pass str-name column in `data`. 705 | weights : array-like or str 706 | This variable represent weights of each person, if pass array-like 707 | then data must be None, else you pass str-name column in `data`. 708 | 709 | Returns 710 | ------- 711 | theil : float 712 | 713 | References 714 | ---------- 715 | Theil index. (2016, December 17). In Wikipedia, The Free Encyclopedia. 716 | Retrieved 14:17, May 15, 2017, from 717 | https://en.wikipedia.org/w/index.php?title=Theil_index&oldid=755407818 718 | 719 | """ 720 | data = self.df 721 | if weights is None: 722 | weights = self.weights 723 | 724 | return inequality.theil(income, weights, data) 725 | 726 | def avg_tax_rate(self, total_tax, total_base, weights=None): 727 | """Compute the average tax rate given a base income and a total tax. 728 | 729 | Parameters 730 | ---------- 731 | total_base : str or numpy.array 732 | total_tax : str or numpy.array 733 | data : pd.DataFrame 734 | 735 | Returns 736 | ------- 737 | avg_tax_rate : float or pd.Series 738 | Is the ratio between mean the tax income and base of income. 739 | 740 | Reference 741 | --------- 742 | Panel de declarantes de IRPF 1999-2007: Metodología, estructura y 743 | variables. (2011). 744 | Panel de declarantes de IRPF 1999-2007: Metodología, estructura y 745 | variables. Documentos. 746 | """ 747 | data = self.df 748 | if weights is None: 749 | weights = self.weights 750 | 751 | return inequality.avg_tax_rate(total_tax, total_base, weights, data) 752 | 753 | def top_rest( 754 | self, income, weights=None, data=None, top_percentage=10 755 | ): 756 | """Calculate the 10:90 Ratio. 757 | 758 | Calculates the quotient between the number of contributions from the 759 | top 10% of contributors divided by the number contributions made by the 760 | other 90%. The ratio is 1 if the total contributions by the top 761 | contributors are equal to the cotnributions made by the rest; less than 762 | zero if the top 10% contributes less than the rest; and greater that 1 763 | if the top 10% contributes more than the other ninety percent. 764 | 765 | Parameters 766 | ---------- 767 | income : array-like or str 768 | This variable represent tax payment of person, if pass array-like 769 | then data must be None, else you pass str-name column in `data`. 770 | weights : array-like or str 771 | This variable represent weights of each person, if pass array-like 772 | then data must be None, else you pass str-name column in `data`. 773 | All-ones by default 774 | data : pandas.DataFrame 775 | This variable is a DataFrame that contains all data required in 776 | it's columns. 777 | top_percentage : float 778 | The richest x percent to consider. (10 percent by default) 779 | It must be a number between 0 and 100 780 | 781 | Returns 782 | ------- 783 | ratio : float 784 | 785 | References 786 | ---------- 787 | Participation Inequality in Wikis: A Temporal Analysis Using WikiChron. 788 | Serrano, Abel & Arroyo, Javier & Hassan, Samer. (2018). 789 | DOI: 10.1145/3233391.3233536. 790 | """ 791 | data = self.df 792 | if weights is None: 793 | weights = self.weights 794 | 795 | return inequality.top_rest(income, weights, data, top_percentage) 796 | --------------------------------------------------------------------------------