├── .gitattributes
├── TODO.md
├── docs
    ├── requirements.txt
    ├── source
    │   ├── quickstart.rst
    │   ├── index.rst
    │   └── conf.py
    └── Makefile
├── src
    └── ineqpy
    │   ├── grouped
    │       ├── __init__.py
    │       ├── inequality.py
    │       └── stats.py
    │   ├── __init__.py
    │   ├── utils.py
    │   ├── statistics.py
    │   ├── _statistics.py
    │   ├── inequality.py
    │   └── api.py
├── examples
    ├── download_data.md
    ├── quick_start.py
    ├── alternatives_comparision.py
    └── quick_start.ipynb
├── pyrightconfig.json
├── .github
    └── workflows
    │   ├── docs.yml
    │   ├── cicd.yml
    │   ├── release.yml
    │   └── pypi.yml
├── tests
    ├── test_api.py
    ├── data
    │   ├── weightedXW.csv
    │   └── repXW.csv
    ├── test_statistics.py
    ├── test_inequality.py
    └── test_moments.ipynb
├── license.txt
├── Makefile
├── .gitignore
├── pyproject.toml
└── README.md


/.gitattributes:
--------------------------------------------------------------------------------
1 | ineqpy/_version.py export-subst
2 | 


--------------------------------------------------------------------------------
/TODO.md:
--------------------------------------------------------------------------------
1 | # TODO
2 | 
3 | - [x] adopt uv
4 | - [x] update deps and python ver
5 | 
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | sphinx-autoapi
3 | sphinx-rtd-theme
4 | myst-parser
5 | numpydoc
6 | 


--------------------------------------------------------------------------------
/docs/source/quickstart.rst:
--------------------------------------------------------------------------------
1 | 
2 | .. include:: ../../README.md
3 |    :parser: myst_parser.sphinx_
4 | 
5 | 


--------------------------------------------------------------------------------
/src/ineqpy/grouped/__init__.py:
--------------------------------------------------------------------------------
1 | """Grouped subpackage.
2 | 
3 | Contains inequality and stats module.
4 | """
5 | from ineqpy.grouped import inequality, stats
6 | 
7 | __all__ = ["inequality", "stats"]
8 | 


--------------------------------------------------------------------------------
/src/ineqpy/__init__.py:
--------------------------------------------------------------------------------
1 | """IneqPy: A python package for inequality analysis."""
2 | 
3 | from ineqpy import api, grouped, inequality, statistics, utils
4 | 
5 | 
6 | __all__ = ["inequality", "statistics", "grouped", "api", "utils"]
7 | 


--------------------------------------------------------------------------------
/examples/download_data.md:
--------------------------------------------------------------------------------
1 | # Get Data
2 | 
3 | When we start to work with Survey Data, could be hard find data to apply this kind of analysis.
4 | For this reason I let you a nice web from dowload data to start to work with.
5 | 
6 | - ![Link to download data.](http://www.icpsr.umich.edu/icpsrweb/ICPSR/studies/4517)
7 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. IneqPy documentation master file, created by
 2 |    sphinx-quickstart on Mon Jan 17 19:56:36 2022.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to IneqPy's documentation!
 7 | ==================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 |    :caption: Contents:
12 | 
13 |    ./quickstart.rst
14 | 
15 | Indices and tables
16 | ==================
17 | 
18 | * :ref:`genindex`
19 | * :ref:`modindex`
20 | * :ref:`search`
21 | 


--------------------------------------------------------------------------------
/pyrightconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "include": [
 3 |         "src"
 4 |     ],
 5 | 
 6 |     "exclude": [
 7 |         "deps",
 8 |         ".venv",
 9 |         "**/node_modules",
10 |         "**/__pycache__",
11 |         "src/experimental",
12 |         "src/typestubs"
13 |     ],
14 | 
15 |     "ignore": [
16 |         "src/oldstuff"
17 |     ],
18 | 
19 |     "stubPath": "src/stubs",
20 |     "venv": ".venv",
21 | 
22 |     "reportMissingImports": true,
23 |     "reportMissingTypeStubs": false,
24 | 
25 |     "pythonPlatform": "Linux",
26 | 
27 |     "executionEnvironments": [
28 |         {
29 |             "root": "src"
30 |         }
31 |     ]
32 | }
33 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SOURCEDIR     = source
 8 | BUILDDIR      = build
 9 | 
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 | 
14 | .PHONY: help Makefile
15 | 
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
 1 | name: Docs2Pages
 2 | on:
 3 |   push:
 4 |     tags: 'ineqpy*'
 5 |   pull_request:
 6 |     branches:
 7 |       - master
 8 | 
 9 | jobs:
10 |   build-docs:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |     - name: Checkout
14 |       uses: actions/checkout@v4
15 |       with:
16 |         fetch-depth: 0
17 |     - uses: actions/setup-python@v5
18 |       with:
19 |         python-version: 3.8
20 |     - name: Build documentation
21 |       run: |
22 |         make dev
23 |         make pages
24 |     - name: Deploy documentation
25 |       if: ${{ github.event_name == 'push' }}
26 |       uses: JamesIves/github-pages-deploy-action@4.1.4
27 |       with:
28 |         branch: gh-pages
29 |         folder: gh-pages
30 | 


--------------------------------------------------------------------------------
/examples/quick_start.py:
--------------------------------------------------------------------------------
 1 | # load packages
 2 | import pandas as pd
 3 | import ineqpy as ineq
 4 | from pathlib import Path
 5 | 
 6 | # inputs
 7 | data_path = Path("ineq.__file__").parent / "examples/eusilc.csv"
 8 | data = pd.read_csv(data_path, index_col=0).dropna()
 9 | svy = ineq.api.Survey(data, weights="rb050")
10 | 
11 | # In[3]:
12 | colname = "eqincome"
13 | svy.gini(colname)
14 | 
15 | # In[4]:
16 | svy.atkinson(colname)
17 | 
18 | # In[5]:
19 | svy.theil(colname)
20 | 
21 | # In[6]:
22 | svy.mean(colname)
23 | 
24 | # In[7]:
25 | svy.percentile(colname)
26 | 
27 | # In[8]:
28 | svy.kurt(colname)
29 | 
30 | # In[9]:
31 | svy.skew(colname)
32 | 
33 | # In[10]:
34 | svy.lorenz(colname).plot(figsize=(5, 5))
35 | 
36 | # In[10]:
37 | # also works passing variables.
38 | x = data.eqincome
39 | w = data.rb050
40 | ineq.var(variable=x, weights=w)
41 | 


--------------------------------------------------------------------------------
/.github/workflows/cicd.yml:
--------------------------------------------------------------------------------
 1 | name: CICD
 2 | 
 3 | on:
 4 |   push:
 5 |     branches-ignore:
 6 |       - main
 7 | 
 8 | jobs:
 9 | 
10 |   test:
11 |     runs-on: ubuntu-latest
12 |     continue-on-error: ${{ matrix.experimental }}
13 |     strategy:
14 |       matrix:
15 |         python-version: ["3.10", "3.11", "3.12"]
16 |         experimental: [false]
17 |         include:
18 |           - python-version: "3.13"
19 |             experimental: true
20 |     steps:
21 |     - uses: actions/checkout@v4
22 |     - name: Set up Python ${{ matrix.python-version }}
23 |       uses: actions/setup-python@v5
24 |       with:
25 |         python-version: ${{ matrix.python-version }}
26 |     - name: Install dependencies
27 |       run: make dev
28 |     - name: Lint with flake8
29 |       run: make lint
30 |     - name: Test with pytest
31 |       run: make test
32 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches:
 4 |       - main
 5 | 
 6 | permissions:
 7 |   contents: write
 8 |   pull-requests: write
 9 | 
10 | name: release-please
11 | 
12 | jobs:
13 |   release-please:
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - uses: googleapis/release-please-action@v4
17 |         with:
18 |           # this assumes that you have created a personal access token
19 |           # (PAT) and configured it as a GitHub action secret named
20 |           # `MY_RELEASE_PLEASE_TOKEN` (this secret name is not important).
21 |           token: ${{ secrets.GH_TOKEN }}
22 |           # this is a built-in strategy in release-please, see "Action Inputs"
23 |           # for more options
24 |           # https://github.com/googleapis/release-please-action?tab=readme-ov-file#how-should-i-write-my-commits
25 |           release-type: simple
26 | 


--------------------------------------------------------------------------------
/tests/test_api.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | import ineqpy
 5 | 
 6 | 
 7 | def test_api():
 8 |     # todo improve this test.
 9 |     # only checks that all methods works.
10 |     svy = ineqpy.api.Survey
11 |     data = np.random.randint(0, 100, (10, 3))
12 |     w = np.random.randint(1, 10, 10).reshape(-1, 1)
13 |     data = np.hstack([data, w])
14 |     columns = list("abcw")
15 | 
16 |     df = svy(data=data, columns=columns, weights="w")
17 |     df.mean("a")
18 |     df.var("a")
19 |     df.skew("a")
20 |     df.kurt("a")
21 |     df.gini("a")
22 |     df.atkinson("a")
23 |     df.theil("a")
24 |     df.percentile("a")
25 | 
26 | 
27 | def test_df():
28 |     # GH #15
29 |     LEN = 10
30 |     values = [np.arange(LEN), np.random.randint(1, 10, LEN)]
31 |     df = pd.DataFrame(values, index=["x", "n"]).T
32 | 
33 |     svy = ineqpy.api.Survey(df, df.index, df.columns, weights="n")
34 |     svy.lorenz("x")
35 | 


--------------------------------------------------------------------------------
/.github/workflows/pypi.yml:
--------------------------------------------------------------------------------
 1 | name: Upload Python Package
 2 | on:
 3 |   release:
 4 |     types: [published]
 5 | permissions:
 6 |   contents: read
 7 | 
 8 | jobs:
 9 | 
10 |   release:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |     - uses: actions/checkout@v4
14 |     - name: Setup venv
15 |       if: github.event_name == 'release' && github.event.action == 'created'
16 |       uses: actions/setup-python@v4
17 |       with:
18 |         python-version: '3.8'
19 |     - name: Install dependencies
20 |       run: make install
21 |     - name: Build package
22 |       if: github.event_name == 'release' && github.event.action == 'created'
23 |       run: make build
24 |     - name: Publish package
25 |       if: github.event_name == 'release' && github.event.action == 'created'
26 |       # uses: pypa/gh-action-pypi-publish@release/v1
27 |       uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
28 |       with:
29 |         user: __token__
30 |         password: ${{ secrets.PYPI_API_TOKEN }}
31 | 


--------------------------------------------------------------------------------
/tests/data/weightedXW.csv:
--------------------------------------------------------------------------------
 1 | ,x,w
 2 | 0,400,2
 3 | 1,16,6
 4 | 2,104,2
 5 | 3,966,1
 6 | 4,682,8
 7 | 5,296,7
 8 | 6,505,5
 9 | 7,384,7
10 | 8,984,2
11 | 9,954,4
12 | 10,201,4
13 | 11,583,6
14 | 12,391,4
15 | 13,340,3
16 | 14,412,8
17 | 15,835,2
18 | 16,937,7
19 | 17,705,1
20 | 18,969,4
21 | 19,961,6
22 | 20,909,1
23 | 21,933,2
24 | 22,243,7
25 | 23,986,7
26 | 24,467,7
27 | 25,231,3
28 | 26,797,4
29 | 27,723,1
30 | 28,401,2
31 | 29,128,1
32 | 30,347,7
33 | 31,826,4
34 | 32,273,3
35 | 33,19,3
36 | 34,381,6
37 | 35,912,5
38 | 36,616,4
39 | 37,547,4
40 | 38,453,8
41 | 39,442,2
42 | 40,28,5
43 | 41,482,6
44 | 42,690,7
45 | 43,717,4
46 | 44,908,3
47 | 45,28,5
48 | 46,691,2
49 | 47,906,6
50 | 48,631,4
51 | 49,109,7
52 | 50,408,8
53 | 51,340,8
54 | 52,497,6
55 | 53,743,3
56 | 54,242,7
57 | 55,7,5
58 | 56,505,1
59 | 57,133,4
60 | 58,359,6
61 | 59,496,8
62 | 60,166,5
63 | 61,176,5
64 | 62,827,4
65 | 63,904,3
66 | 64,738,7
67 | 65,315,4
68 | 66,255,6
69 | 67,655,3
70 | 68,6,3
71 | 69,512,1
72 | 70,488,2
73 | 71,324,7
74 | 72,191,3
75 | 73,869,5
76 | 74,421,3
77 | 


--------------------------------------------------------------------------------
/license.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) [year] [fullname]
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ##@ Utility
 4 | .PHONY: help
 5 | help:  ## Display this help
 6 | 	@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n  make <target>\033[36m\033[0m\n"} /^[a-zA-Z_-]+:.*?##/ { printf "  \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
 7 | 
 8 | 
 9 | .PHONY: venv
10 | uv:  ## Install uv
11 | 	@command -v uv >/dev/null 2>&1 || curl -LsSf https://astral.sh/uv/install.sh | sh
12 | 
13 | .PHONY: dev
14 | dev: uv ## Install dev dependencies
15 | 	uv sync --dev
16 | 
17 | .PHONY: install
18 | install: uv ## Install dependencies
19 | 	uv sync
20 | 
21 | .PHONY: test
22 | test:  ## Run tests
23 | 	uv run pytest
24 | 
25 | .PHONY: lint
26 | lint:  ## Run linters
27 | 	uv run ruff check ./src ./tests
28 | 
29 | .PHONY: fix
30 | fix:  ## Fix lint errors
31 | 	uv run ruff check ./src ./tests --fix
32 | 
33 | .PHONY: cov
34 | cov: ## Run tests with coverage
35 | 	uv run pytest --cov=src --cov-report=term-missing
36 | 
37 | .PHONY: pages
38 | pages: doc  ## Build documentation and push to gh-pages
39 | 	mkdir gh-pages
40 | 	touch gh-pages/.nojekyll
41 | 	cp -r docs/build/html/* gh-pages/
42 | 
43 | .PHONY: doc
44 | doc:  ## Build documentation
45 | 	cd docs && uv run make html
46 | 
47 | .PHONY: build
48 | build:  ## Build package
49 | 	uv build
50 | 
51 | .PHONY: dbash
52 | dbash:  ## Run docker
53 | 	docker run -v ${PWD}:/git/$(shell basename ${PWD}) -w /git/$(shell basename ${PWD}) -it python:3.12 /bin/bash
54 | 


--------------------------------------------------------------------------------
/tests/test_statistics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import numpy.testing as nptest
 3 | import pytest
 4 | import scipy.stats as sc
 5 | 
 6 | from ineqpy import statistics, utils
 7 | 
 8 | 
 9 | def gen_inputs(n_tuples=100):
10 |     for _ in range(n_tuples):
11 |         (x, w) = utils.generate_data_to_test((3, 7))
12 | 
13 |         # NOBUG: _ is `repeated_w` which is a vector of ones.
14 |         repeated_x, _ = utils.repeat_data_from_weighted(x, w)
15 |         yield x, w, repeated_x
16 | 
17 | 
18 | @pytest.mark.parametrize("x,w,r_x", gen_inputs())
19 | def test_mean(x, w, r_x):
20 |     real = np.mean(r_x)
21 |     obtained = statistics.mean(x, w)
22 |     nptest.assert_almost_equal(obtained, real)
23 | 
24 | 
25 | @pytest.mark.parametrize("x,w,r_x", gen_inputs())
26 | def test_variance(x, w, r_x):
27 |     real = np.var(r_x)
28 |     obtained = statistics.var(x, w)
29 |     nptest.assert_almost_equal(obtained, real)
30 | 
31 | 
32 | @pytest.mark.parametrize("x,w,r_x", gen_inputs())
33 | def test_kurt(x, w, r_x):
34 |     real = sc.kurtosis(r_x) + 3
35 |     obtained = statistics.kurt(x, w)
36 |     nptest.assert_almost_equal(obtained, real)
37 | 
38 | 
39 | @pytest.mark.parametrize("x,w,r_x", gen_inputs())
40 | def test_skew(x, w, r_x):
41 |     real = sc.skew(r_x)
42 |     obtained = statistics.skew(x, w)
43 |     nptest.assert_almost_equal(obtained, real)
44 | 
45 | 
46 | @pytest.mark.parametrize("x,w,r_x", gen_inputs())
47 | def test_coef_variation(x, w, r_x):
48 |     real = np.var(r_x) ** 0.5 / abs(np.mean(r_x))
49 |     obtained = statistics.coef_variation(x, w)
50 |     nptest.assert_almost_equal(obtained, real)
51 | 
52 | 
53 | @pytest.mark.parametrize("x,w,r_x", gen_inputs())
54 | def test_percentile(x, w, r_x):
55 |     p = 50
56 |     real = np.percentile(r_x, p, method="lower")
57 |     obtained = statistics.percentile(x, w, p=p)
58 |     nptest.assert_almost_equal(
59 |         obtained, real, err_msg=msg(real, obtained, r_x, x, w)
60 |     )
61 | 
62 | 
63 | def msg(real, obtained, r_x, x, w):
64 |     if abs(real - obtained) > 1e-6:
65 |         return f"\nr_x = {str(r_x)}\nx = {str(x)}\nw = {str(w)}"
66 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | \.idea/
  3 | 
  4 | *\.egg-info/
  5 | 
  6 | */__pycache__/
  7 | 
  8 | */\.ipynb_checkpoints/
  9 | 
 10 | *.pyc
 11 | 
 12 | \.ropeproject/
 13 | 
 14 | *.sublime*
 15 | 
 16 | .cache/
 17 | 
 18 | # Created by https://www.gitignore.io/api/python
 19 | # Edit at https://www.gitignore.io/?templates=python
 20 | 
 21 | ### Python ###
 22 | # Byte-compiled / optimized / DLL files
 23 | __pycache__/
 24 | *.py[cod]
 25 | *$py.class
 26 | 
 27 | # C extensions
 28 | *.so
 29 | 
 30 | # Distribution / packaging
 31 | .Python
 32 | build/
 33 | develop-eggs/
 34 | dist/
 35 | downloads/
 36 | eggs/
 37 | .eggs/
 38 | lib/
 39 | lib64/
 40 | parts/
 41 | sdist/
 42 | var/
 43 | wheels/
 44 | share/python-wheels/
 45 | *.egg-info/
 46 | .installed.cfg
 47 | *.egg
 48 | MANIFEST
 49 | 
 50 | # PyInstaller
 51 | #  Usually these files are written by a python script from a template
 52 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 53 | *.manifest
 54 | *.spec
 55 | 
 56 | # Installer logs
 57 | pip-log.txt
 58 | pip-delete-this-directory.txt
 59 | 
 60 | # Unit test / coverage reports
 61 | htmlcov/
 62 | .tox/
 63 | .nox/
 64 | .coverage
 65 | .coverage.*
 66 | .cache
 67 | nosetests.xml
 68 | coverage.xml
 69 | *.cover
 70 | .hypothesis/
 71 | .pytest_cache/
 72 | 
 73 | # Translations
 74 | *.mo
 75 | *.pot
 76 | 
 77 | # Django stuff:
 78 | *.log
 79 | local_settings.py
 80 | db.sqlite3
 81 | 
 82 | # Flask stuff:
 83 | instance/
 84 | .webassets-cache
 85 | 
 86 | # Scrapy stuff:
 87 | .scrapy
 88 | 
 89 | # Sphinx documentation
 90 | docs/_build/
 91 | 
 92 | # PyBuilder
 93 | target/
 94 | 
 95 | # Jupyter Notebook
 96 | .ipynb_checkpoints
 97 | 
 98 | # IPython
 99 | profile_default/
100 | ipython_config.py
101 | 
102 | # pyenv
103 | .python-version
104 | 
105 | # celery beat schedule file
106 | celerybeat-schedule
107 | 
108 | # SageMath parsed files
109 | *.sage.py
110 | 
111 | # Environments
112 | .env
113 | .venv
114 | env/
115 | venv/
116 | ENV/
117 | env.bak/
118 | venv.bak/
119 | 
120 | # Spyder project settings
121 | .spyderproject
122 | .spyproject
123 | 
124 | # Rope project settings
125 | .ropeproject
126 | 
127 | # mkdocs documentation
128 | /site
129 | 
130 | # mypy
131 | .mypy_cache/
132 | .dmypy.json
133 | dmypy.json
134 | 
135 | # Pyre type checker
136 | .pyre/
137 | 
138 | ### Python Patch ###
139 | .venv/
140 | 
141 | # End of https://www.gitignore.io/api/python
142 | **/.DS_Store
143 | .autoenv
144 | Session.vim
145 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = ["setuptools", "wheel", "setuptools_scm"]
  3 | build-backend = "setuptools.build_meta"
  4 | 
  5 | [project]
  6 | name = "IneqPy"
  7 | dynamic = ["version"]
  8 | description = "A Python Package To Quantitative Analysis Of Inequality"
  9 | readme = "README.md"
 10 | authors = [
 11 |     {name = "Max", email = "ineqpy@mxm.mozmail.com"},
 12 | ]
 13 | license = {text = "MIT"}
 14 | requires-python = ">=3.10"
 15 | classifiers = [
 16 |     "Intended Audience :: Science/Research",
 17 |     "License :: OSI Approved :: MIT License",
 18 |     "Programming Language :: Python :: 3",
 19 |     "Programming Language :: Python :: 3.10",
 20 |     "Programming Language :: Python :: 3.11",
 21 |     "Programming Language :: Python :: 3.12",
 22 |     "Programming Language :: Python :: 3.13",
 23 | ]
 24 | dependencies = [
 25 |     "numpy",
 26 |     "pandas",
 27 |     "numba",
 28 |     "scipy>=1.14.1",
 29 | ]
 30 | 
 31 | [project.urls]
 32 | Homepage = "https://github.com/asdf8601/IneqPy"
 33 | 
 34 | [tool.setuptools]
 35 | package-dir = {"" = "src"}
 36 | 
 37 | [tool.setuptools_scm]
 38 | 
 39 | [tool.setuptools.packages.find]
 40 | where = ["src"]
 41 | 
 42 | 
 43 | [tool.pyright]
 44 | include = ["src"]
 45 | exclude = ["**/node_modules",
 46 |     "**/__pycache__",
 47 |     "src/experimental",
 48 |     "deps",
 49 |     "src/typestubs"
 50 | ]
 51 | 
 52 | [tool.lint.ignore]
 53 | ignore = ["src/oldstuff"]
 54 | stubPath = "src/stubs"
 55 | # venv = "env367"
 56 | reportMissingImports = true
 57 | reportMissingTypeStubs = false
 58 | pythonPlatform = "Linux"
 59 | 
 60 | executionEnvironments = [
 61 |   { root = "src" }
 62 | ]
 63 | 
 64 | [tool.ruff.format]
 65 | quote-style = "preserve"
 66 | 
 67 | [tool.ruff.lint]
 68 | select = [
 69 |     'F',
 70 |     'E',
 71 |     'W',
 72 |     'UP',
 73 |     'NPY201',
 74 | ]
 75 | ignore = [
 76 |     'E501',
 77 |     'E741',
 78 |     'E712',
 79 |     'E721',
 80 |     'UP038',  # non-pep604-isinstance -- https://github.com/astral-sh/ruff/issues/7871
 81 | ]
 82 | 
 83 | [tool.ruff.lint.per-file-ignores]
 84 | "__init__.py" = ["F401"]
 85 | "**/__init__.py" = [
 86 |     'E402',
 87 |     'F401',
 88 |     'F403',
 89 |     'F405',
 90 | ]
 91 | "**/__init__.pyi" = [
 92 |     'E402',
 93 |     'F401',
 94 |     'F403',
 95 |     'F405',
 96 | ]
 97 | "skimage/_shared/testing.py" = ['F401']
 98 | "doc/examples/**/*.py" = ['E402']
 99 | 
100 | [tool.ruff.lint.pydocstyle]
101 | convention = 'numpy'
102 | 
103 | [tool.ruff.lint.isort]
104 | known-first-party = ["ineqpy"]
105 | combine-as-imports = true
106 | 
107 | [tool.uv]
108 | dev-dependencies = [
109 |     "myst-parser>=4.0.0",
110 |     "numpydoc>=1.8.0",
111 |     "pytest-cov>=6.0.0",
112 |     "pytest>=8.3.3",
113 |     "ruff>=0.7.4",
114 |     "sphinx-autoapi>=3.3.3",
115 |     "sphinx-rtd-theme>=3.0.2",
116 |     "sphinx>=8.1.3",
117 | ]
118 | 


--------------------------------------------------------------------------------
/src/ineqpy/grouped/inequality.py:
--------------------------------------------------------------------------------
 1 | """inequality module."""
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | 
 6 | from ineqpy import _statistics as stats, inequality as ineq, utils
 7 | 
 8 | 
 9 | def atkinson_group(
10 |     data: pd.DataFrame = None,
11 |     income: str | pd.DataFrame | np.ndarray = None,
12 |     weights: str | pd.DataFrame | np.ndarray = None,
13 |     group: str | pd.DataFrame | np.ndarray = None,
14 |     e: float = 0.5,
15 | ):
16 |     r"""Calculate atkinson index.
17 | 
18 |     The Atkinson index (also known as the Atkinson measure or Atkinson
19 |     grouped measure) is a measure of income grouped developed by British
20 |     economist Anthony Barnes Atkinson. The measure is useful in determining
21 |     which end of the distribution contributed most to the observed grouped.The
22 |     index is subgroup decomposable. This means that overall grouped in the
23 |     population can be computed as the sum of the corresponding Atkinson indices
24 |     within each group, and the Atkinson index of the group mean incomes.
25 | 
26 |     Parameters
27 |     ----------
28 |     income : str or np.array
29 |         Income variable, you can pass name of variable in `df` or array-like
30 |     weights : str or np.array
31 |         probability or weights, you can pass name of variable in `df` or
32 |         array-like
33 |     groups : str or np.array
34 |         stratum, name of stratum in `df` or array-like
35 |     e : int, optional
36 |         Value of epsilon parameter
37 |     data : pd.DataFrame, optional
38 |         DataFrame that's contains the previous data.
39 | 
40 |     Returns
41 |     -------
42 |     atkinson_by_group : float
43 | 
44 |     Reference
45 |     ---------
46 |     Atkinson index. (2017, March 12). In Wikipedia, The Free Encyclopedia.
47 |     Retrieved 14:52, May 15, 2017, from
48 |     https://en.wikipedia.org/w/index.php?title=Atkinson_index&oldid=769991852
49 | 
50 |     TODO
51 |     ----
52 |     - Review function, has different results with stata.
53 |     """
54 |     if (weights is None) and (data is None):
55 |         weights = utils.not_empty_weights(weights, income)
56 | 
57 |     if data is None:
58 |         data = utils._to_df(income=income, weights=weights, group=group)
59 |         income = "income"
60 |         weights = "weights"
61 |         group = "group"
62 | 
63 |     N = data.shape[0]
64 | 
65 |     def a_h(df):
66 |         """Funtion alias to calculate atkinson from a DataFrame."""
67 |         if df is None:
68 |             raise ValueError
69 | 
70 |         inc = df[income].values
71 |         w = df[weights].values
72 |         atk = ineq.atkinson(income=inc, weights=w, e=e)
73 |         out = atk * (len(df) / N)
74 | 
75 |         return out
76 | 
77 |     # main calc:
78 |     data = data.copy()
79 |     groupped = data.groupby(group)
80 |     atk_by_group = groupped.apply(a_h)
81 |     mu_by_group = groupped.apply(lambda d: stats.mean(d[income], d[weights]))
82 |     out = atk_by_group.sum() + ineq.atkinson(income=mu_by_group.values)
83 | 
84 |     return out
85 | 


--------------------------------------------------------------------------------
/src/ineqpy/utils.py:
--------------------------------------------------------------------------------
  1 | """Useful functions that make easier develop other functions."""
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | 
  6 | 
  7 | def _to_df(*args, **kwargs) -> pd.DataFrame:
  8 |     res = pd.DataFrame()
  9 | 
 10 |     if args != ():
 11 |         res = pd.DataFrame([*args]).T
 12 | 
 13 |     if kwargs is not None:
 14 |         df = pd.DataFrame.from_dict(kwargs, orient="columns")
 15 |         if res.empty:
 16 |             res = df
 17 |         else:
 18 |             res = pd.concat([res, df], axis=1)
 19 | 
 20 |     return res
 21 | 
 22 | 
 23 | def _apply_to_df(func, df, x, weights, *args, **kwargs):
 24 |     """Generalize main arguments as Series of a pd.Dataframe.
 25 | 
 26 |     Parameters
 27 |     ----------
 28 |     func : function
 29 |         Function to convert his arguments in Series of an Dataframe.
 30 |     df : pandas.Dataframe
 31 |         DataFrame whats contains the Series `x_name` and `w_name`.
 32 |     x_name : str
 33 |         Name of the column in `df`.
 34 |     weights_name : str
 35 |         Name of the column in `df`.
 36 | 
 37 |     Returns
 38 |     -------
 39 |     return : func return
 40 |         It's depends of func output type.
 41 |     """
 42 |     return func(df[x], df[weights], *args, **kwargs)
 43 | 
 44 | 
 45 | def not_empty_weights(weights, like):
 46 |     """Create weights.
 47 | 
 48 |     Create normalized weight if it's None use like to create it.
 49 | 
 50 |     Parameters
 51 |     ----------
 52 |     income, like : array-like
 53 | 
 54 |     Returns
 55 |     -------
 56 |     weights : array-like
 57 |         Filtered array-like.
 58 | 
 59 |     See Also
 60 |     --------
 61 |     normalize
 62 |     """
 63 |     if weights is not None:
 64 |         return normalize(weights.copy())
 65 | 
 66 |     return np.ones_like(like)
 67 | 
 68 | 
 69 | def not_null_condition(income, weights):
 70 |     """Filter not null condition.
 71 | 
 72 |     If a negative value is found in the incomes it will dropped.
 73 | 
 74 |     Parameters
 75 |     ----------
 76 |     income, weights : array-like
 77 | 
 78 |     Returns
 79 |     -------
 80 |     income, weights : array-like
 81 |         Filtered array-like.
 82 |     """
 83 |     if np.any(income <= 0):
 84 |         mask = income > 0
 85 |         income = income[mask]
 86 |         if weights is not None:
 87 |             weights = weights[mask]
 88 | 
 89 |     return income, weights
 90 | 
 91 | 
 92 | def _sort_values(values, partner):
 93 |     idx_sort = np.argsort(values, axis=0).squeeze()
 94 |     values = values[idx_sort]
 95 |     partner = partner[idx_sort]
 96 |     return values, partner
 97 | 
 98 | 
 99 | def _clean_nans_values(this, pair):
100 |     if np.any(np.isnan(this)):
101 |         idx = ~np.isnan(this)
102 |         this = this[idx]
103 |         pair = pair[idx]
104 |     return this, pair
105 | 
106 | 
107 | def normalize(this):
108 |     """Normalize data by the sum.
109 | 
110 |     Parameters
111 |     ----------
112 |     this : array-like
113 | 
114 |     Returns
115 |     -------
116 |     out : array-like
117 |     """
118 |     return this / np.sum(this)
119 | 
120 | 
121 | def extract_values(data, variable, weights):
122 |     """Extract values.
123 | 
124 |     Parameters
125 |     ----------
126 |     data : pandas.DataFrame
127 |     variable : str
128 |     weights : str
129 | 
130 |     Returns
131 |     -------
132 |     variable, weights : array-like
133 |     """
134 |     if data is not None:
135 |         variable = data.loc[:, variable].values
136 |         weights = not_empty_weights(
137 |             data.loc[:, weights].values, like=variable
138 |         )
139 |     return variable, weights
140 | 
141 | 
142 | def repeat_data_from_weighted(x, w):
143 |     """Generate data data (not sampled) from weights.
144 | 
145 |     Parameters
146 |     ----------
147 |     x, w : array-like
148 | 
149 |     Returns
150 |     -------
151 |     repeated_x, repeated_w : np.array
152 |     """
153 |     if isinstance(w[0], float):
154 |         raise NotImplementedError
155 | 
156 |     repeated_x = np.array([])
157 |     repeated_w = np.array([])
158 | 
159 |     for xi, wi in zip(x, w, strict=False):
160 |         repeated_x = np.append(repeated_x, np.repeat(xi, wi))
161 |         repeated_w = np.append(repeated_w, np.ones(wi))
162 | 
163 |     return repeated_x, repeated_w
164 | 
165 | 
166 | def generate_data_to_test(n_sample_range=(20, 100)):
167 |     """Generate sampled data for testing.
168 | 
169 |     Parameters
170 |     ----------
171 |     n_sample_range : tuple[int, int]
172 |         It's a shape, lenght and columns.
173 | 
174 |     Returns
175 |     -------
176 |     income, weights : np.array
177 |     """
178 |     N_sample = np.random.randint(*n_sample_range)
179 |     weighted_x = np.random.randint(0, 1000, N_sample)
180 |     weights = np.random.randint(1, 9, N_sample)
181 |     return weighted_x, weights
182 | 


--------------------------------------------------------------------------------
/src/ineqpy/grouped/stats.py:
--------------------------------------------------------------------------------
  1 | """Stats' module."""
  2 | from ineqpy import utils
  3 | from ineqpy._statistics import c_moment, mean, std_moment
  4 | 
  5 | 
  6 | def variance_hat_group(data=None, variable="x", weights="w", group="h"):
  7 |     """Calculate variance.
  8 | 
  9 |     Data a DataFrame calculates the sample variance for each stratum. The
 10 |     objective of this function is to make it easy to calculate the moments of
 11 |     the distribution that follows an estimator, eg. Can be used to calculate
 12 |     the variance that follows the mean.
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     data : pandas.DataFrame
 17 |         Dataframe containing the series needed for the calculation
 18 |     x : str
 19 |     weights : str
 20 |         Name of the weights `w` in the DataFrame
 21 |     group : str
 22 |         Name of the stratum variable `h` in the DataFrame
 23 | 
 24 |     Returns
 25 |     -------
 26 |     vhat_h : pandas.Series
 27 |         A series with the values of the variance of each `h` stratum.
 28 | 
 29 |     Todo
 30 |     ----
 31 |     Review improvements.
 32 | 
 33 |     Examples
 34 |     --------
 35 |     >>> # Computes the variance of the mean
 36 |     >>> data = pd.DataFrame(data=[renta, peso, estrato],
 37 |                             columns=["renta", "peso", "estrato"])
 38 |     >>> v = variance_hat_group(data)
 39 |     >>> v
 40 |     stratum
 41 |     1                700.917.728,64
 42 |     2              9.431.897.980,96
 43 |     3            317.865.839.789,10
 44 |     4            741.304.873.092,88
 45 |     5            535.275.436.859,10
 46 |     6            225.573.783.240,68
 47 |     7            142.048.272.010,63
 48 |     8             40.136.989.131,06
 49 |     9             18.501.808.022,56
 50 |     dtype: float64
 51 | 
 52 |     >>> # the value of de variance of the mean:
 53 |     >>> v_total = v.sum() / peso.sum() ** 2
 54 |         24662655225.947945
 55 |     """
 56 |     if data is None:
 57 |         data = utils._to_df(x=variable, weights=weights, group=group)
 58 |         variable = "x"
 59 |         weights = "weights"
 60 |         group = "group"
 61 | 
 62 |     def v(df):
 63 |         r"""Calculate the variance of each stratum `h`.
 64 | 
 65 |         Parameters
 66 |         ----------
 67 |         df : pandas.DataFrame
 68 |             Dataframe containing the data.
 69 | 
 70 |         Returns
 71 |         -------
 72 |         vhat : float
 73 |             Value of the population variance for the stratum `h`.
 74 | 
 75 |         Notes
 76 |         -----
 77 |         Source:
 78 |         .. math:: r`N_h ^2 \cdot fpc \cdot \frac{ \hatS ^2 _h }{n_h}`
 79 |         """
 80 |         xi = df[variable].copy().values
 81 |         Nh = df[weights].sum()
 82 |         fpc = 1 - (len(df) / Nh)
 83 |         ddof = 1 if len(df) > 1 else 0
 84 |         shat2h = c_moment(variable=xi, order=2, ddof=ddof)
 85 |         return (Nh ** 2) * fpc * shat2h / len(df)
 86 | 
 87 |     return data.groupby(group).apply(v)
 88 | 
 89 | 
 90 | def moment_group(data=None, variable="x", weights="w", group="h", order=2):
 91 |     """Calculate the asymmetry of each `h` stratum.
 92 | 
 93 |     Parameters
 94 |     ----------
 95 |     variable : array or str
 96 |     weights : array or str
 97 |     group : array or str
 98 |     data : pd.DataFrame, optional
 99 |     order : int, optional
100 | 
101 |     Returns
102 |     -------
103 |     moment_of_order : float
104 | 
105 |     TODO
106 |     ----
107 |     Review calculations, it does not appear to be correct.
108 |     Attempt to make a generalization of vhat_group, for any estimator.
109 | 
110 |     .. warning:: Actually Does Not Work!
111 |     """
112 |     if data is None:
113 |         data = utils._to_df(x=variable, weights=weights, group=group)
114 |         variable = "x"
115 |         weights = "weights"
116 |         group = "group"
117 | 
118 |     def mh(df, weights=weights):
119 |         x = df[variable].copy().values
120 |         weights = utils.not_empty_weights(weights, x)
121 |         Nh = df.loc[:, weights].sum()
122 |         fpc = 1 - (len(df) / Nh)
123 |         ddof = 1 if len(df) > 1 else 0
124 |         stdm = std_moment(variable=x, weights=weights, order=order, ddof=ddof)
125 |         return (Nh ** order) * fpc * stdm / len(df)
126 | 
127 |     return data.groupby(group).apply(mh)
128 | 
129 | 
130 | def quasivariance_hat_group(
131 |     data=None, variable=None, weights=None, group=None
132 | ):
133 |     """Calculate quasivariance.
134 | 
135 |     Sample variance of `variable`, calculated as the second-order central
136 |     moment.
137 | 
138 |     Parameters
139 |     ----------
140 |     data : pd.DataFrame, optional
141 |         pd.DataFrame that contains all variables needed.
142 |     variable : array or str
143 |         variable `x` apply the statistic. If `data` is None then must pass this
144 |         argument as array, else as string name in `data`
145 |     weights : array or str
146 |         weights can be interpreted as frequency, probability,
147 |         density function of `x`, each element in `x`. If `data` is None then
148 |         must pass this argument as array, else as string name in `data`
149 |     group : array or str
150 |         group is a categorical variable to calculate the statistical by each
151 |         group. If `data` is None then must pass this argument as array, else as
152 |         string name in `data`
153 | 
154 |     Returns
155 |     -------
156 |     shat2_group : array or pd.Series
157 | 
158 |     References
159 |     ----------
160 |     Moment (mathematics). (2017, May 6). In Wikipedia, The Free Encyclopedia.
161 |     Retrieved 14:40, May 15, 2017, from
162 |     https://en.wikipedia.org/w/index.php?title=Moment_(mathematics)&oldid=778996402
163 | 
164 |     Notes
165 |     -----
166 |     This function is useful to calculate the variance of the mean.
167 | 
168 |     TODO
169 |     ----
170 |     Review function
171 |     """
172 |     if data is None:
173 |         data = utils._to_df(x=variable, weights=weights)
174 |         variable = "x"
175 |         weights = "weights"
176 | 
177 |     def sd(df):
178 |         x = variable
179 |         return c_moment(variable=x, weights=weights, param=mean(x))
180 | 
181 |     return data.groupby(group).apply(sd)
182 | 


--------------------------------------------------------------------------------
/tests/test_inequality.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | 
  4 | from ineqpy import inequality
  5 | 
  6 | 
  7 | def test_concentration_0d():
  8 |     x = np.array([100])
  9 | 
 10 |     obtained = inequality.concentration(income=x)
 11 | 
 12 |     assert np.isnan(obtained)
 13 | 
 14 | def test_gini_2d():
 15 |     x = np.array([[57], [63], [81], [79], [88], [57], [42], [3], [77], [89]])
 16 |     w = np.array([[2], [5], [2], [9], [5], [7], [4], [5], [9], [9]])
 17 |     obtained = inequality.gini(income=x, weights=w)
 18 |     expected = 0.2134389018024818
 19 |     assert obtained==expected
 20 | 
 21 | 
 22 | def test_gini_1d():
 23 |     x = np.array([57, 63, 81, 79, 88, 57, 42, 3, 77, 89])
 24 |     w = np.array([2, 5, 2, 9, 5, 7, 4, 5, 9, 9])
 25 |     obtained = inequality.gini(income=x, weights=w)
 26 |     expected = 0.2134389018024818
 27 |     assert obtained==expected
 28 | 
 29 | 
 30 | def test_gini_1d_0_w():
 31 |     x = np.array([2,       2])
 32 |     w = np.array([1000000, 1])
 33 |     obtained = inequality.gini(income=x, weights=w)
 34 |     expected = 0
 35 |     assert obtained==expected
 36 | 
 37 | 
 38 | def test_gini_1d_0_series():
 39 |     x = np.array([2, 2])
 40 |     # w = np.array([1000000, 1])
 41 |     obtained = inequality.gini(income=x)
 42 |     expected = 0
 43 |     assert obtained==expected
 44 | 
 45 | 
 46 | def test_gini_1d_1_series():
 47 |     x = np.array([0, 1])
 48 |     # w = np.array([1000000, 1])
 49 |     obtained = inequality.gini(income=x)
 50 |     expected = 1
 51 |     assert obtained==expected
 52 | 
 53 | 
 54 | def test_gini_1d_1_w():
 55 |     x = np.array([0, 1])
 56 |     w = np.array([1, 1])
 57 |     obtained = inequality.gini(income=x, weights=w)
 58 |     expected = 1
 59 |     assert obtained==expected
 60 | 
 61 | 
 62 | def test_atkinson_2d():
 63 |     x = np.array([[57], [63], [81], [79], [88], [57], [42], [3], [77], [89]])
 64 |     w = np.array([[2], [5], [2], [9], [5], [7], [4], [5], [9], [9]])
 65 |     obtained = inequality.atkinson(income=x, weights=w)
 66 |     expected = 0.06537929778911322
 67 |     assert obtained==expected
 68 | 
 69 | 
 70 | def test_atkinson_1d():
 71 |     x = np.array([57, 63, 81, 79, 88, 57, 42, 3, 77, 89])
 72 |     w = np.array([2, 5, 2, 9, 5, 7, 4, 5, 9, 9])
 73 |     obtained = inequality.atkinson(income=x, weights=w)
 74 |     expected = 0.06537929778911322
 75 |     assert obtained==expected
 76 | 
 77 | 
 78 | def test_atkinson_1d_1_w():
 79 |     x = np.array([1, 1])
 80 |     w = np.array([1, 1])
 81 |     obtained = inequality.atkinson(income=x, weights=w)
 82 |     expected = 0
 83 |     assert obtained==expected
 84 | 
 85 | def test_theil_1d_series():
 86 |     """ Testing theil with no weights. Every value is the same """
 87 |     x = np.repeat(5, 10)
 88 | 
 89 |     obtained = inequality.theil(income=x)
 90 |     expected = 0
 91 | 
 92 |     np.testing.assert_almost_equal(obtained, expected)
 93 | 
 94 | def test_theil_1d_series_2():
 95 |     x = np.arange(1, 10)
 96 | 
 97 |     obtained = inequality.theil(income=x)
 98 |     expected = 0.1473838569435545
 99 | 
100 |     np.testing.assert_almost_equal(obtained, expected)
101 | 
102 | def test_theil_1d_1_w():
103 |     # TODO check this
104 |     x = np.array([1, 1])
105 |     w = np.array([1, 1])
106 |     obtained = inequality.theil(income=x, weights=w)
107 |     expected = 0
108 |     assert obtained==expected
109 | 
110 | def test_ratio_equality():
111 |     x = np.array([1, 9])
112 |     w = np.array([9, 1])
113 |     obtained = inequality.top_rest(income=x, weights=w)
114 |     assert obtained == 1.0
115 | 
116 | def test_ratio_equality_fracc():
117 |     x = np.array([1, 9])
118 |     w = np.array([.9, .1])
119 |     obtained = inequality.top_rest(income=x, weights=w)
120 |     assert obtained == 1.0
121 | 
122 | def test_ratio_0d():
123 |     x = np.array([100])
124 |     obtained = inequality.top_rest(income=x)
125 | 
126 |     assert np.isnan(obtained)
127 | 
128 | def test_ratio_1d():
129 |     x = np.array([57, 63, 81, 79, 88, 42, 3, 77, 89])
130 |     w = np.array([9, 5, 2, 9, 5, 4, 5, 9, 9])
131 |     obtained = inequality.top_rest(income=x, weights=w)
132 |     expected = pytest.approx(0.15323043465128208)
133 |     assert obtained == expected
134 | 
135 | def test_ratio_2d():
136 |     x = np.array([[57], [63], [81], [79], [88], [42], [3], [77], [89]])
137 |     w = np.array([[9], [5], [2], [9], [5], [4], [5], [9], [9]])
138 |     obtained = inequality.top_rest(income=x, weights=w)
139 |     expected = pytest.approx(0.15323043465128208)
140 |     assert obtained == expected
141 | 
142 | 
143 | @pytest.mark.parametrize('n', range(15, 20))
144 | def test_ratio_weighted_eq_unweighted(n):
145 |     # Generating a random list of between 10 and 100 items
146 |     x = np.random.randint(1, 100, n)
147 |     w = np.random.randint(1, 5, n)
148 | 
149 |     # Weight should be the same as repeating the number multiple times
150 |     xw = []
151 |     for xi, wi in zip(x,w, strict=False):
152 |         xw += [xi]*wi  # Create a list that contains
153 | 
154 |     xw = np.array(xw)
155 | 
156 |     assert len(xw) == np.sum(w)
157 | 
158 |     weighted = inequality.top_rest(income=x, weights=w)
159 |     unweighted = inequality.top_rest(income=xw)
160 |     assert pytest.approx(weighted) == unweighted
161 | 
162 | def test_ratio_unweighted():
163 |     x = np.array([
164 |        11, 67, 93, 68, 80, 71,  0, 65, 45, 73, 56, 38, 18, 24, 94, 72, 56,
165 |        37, 26, 34, 49, 30, 30, 31, 10,  0, 77,  6, 64, 75, 56, 79, 46, 87,
166 |        39, 73, 63,  3, 49, 52, 94,  0, 68, 86, 42, 84, 58,  5, 45, 62, 49,
167 |        97, 77, 94, 66, 84, 42, 39,  7, 24, 65, 52, 59, 52, 38, 27, 85, 43,
168 |        26,  6, 93, 24, 48, 42, 50, 58, 89, 79, 94, 50,  2, 46, 82, 98, 69,
169 |        9,  50, 33, 86, 77, 25, 39, 61, 78, 47, 29, 43, 20, 56, 35])
170 |     obtained = inequality.top_rest(x)
171 |     expected = 0.22203712517848642
172 |     assert pytest.approx(obtained) == expected
173 | 
174 | 
175 | def test_hoover_index_series():
176 |     """ Testing hoover with no weights (default all ones) """
177 |     x = np.arange(10)
178 |     obtained = inequality.hoover(x)
179 |     expected = 4.0
180 | 
181 |     np.testing.assert_almost_equal(obtained, expected)
182 | 
183 | def test_hoover_index():
184 |     x = np.arange(10)
185 |     w = np.ones(10)
186 |     obtained = inequality.hoover(x, w)
187 |     expected = 4
188 |     np.testing.assert_almost_equal(obtained, expected)
189 | 


--------------------------------------------------------------------------------
/tests/data/repXW.csv:
--------------------------------------------------------------------------------
  1 | ,x,w
  2 | 0,400.0,1.0
  3 | 1,400.0,1.0
  4 | 2,16.0,1.0
  5 | 3,16.0,1.0
  6 | 4,16.0,1.0
  7 | 5,16.0,1.0
  8 | 6,16.0,1.0
  9 | 7,16.0,1.0
 10 | 8,104.0,1.0
 11 | 9,104.0,1.0
 12 | 10,966.0,1.0
 13 | 11,682.0,1.0
 14 | 12,682.0,1.0
 15 | 13,682.0,1.0
 16 | 14,682.0,1.0
 17 | 15,682.0,1.0
 18 | 16,682.0,1.0
 19 | 17,682.0,1.0
 20 | 18,682.0,1.0
 21 | 19,296.0,1.0
 22 | 20,296.0,1.0
 23 | 21,296.0,1.0
 24 | 22,296.0,1.0
 25 | 23,296.0,1.0
 26 | 24,296.0,1.0
 27 | 25,296.0,1.0
 28 | 26,505.0,1.0
 29 | 27,505.0,1.0
 30 | 28,505.0,1.0
 31 | 29,505.0,1.0
 32 | 30,505.0,1.0
 33 | 31,384.0,1.0
 34 | 32,384.0,1.0
 35 | 33,384.0,1.0
 36 | 34,384.0,1.0
 37 | 35,384.0,1.0
 38 | 36,384.0,1.0
 39 | 37,384.0,1.0
 40 | 38,984.0,1.0
 41 | 39,984.0,1.0
 42 | 40,954.0,1.0
 43 | 41,954.0,1.0
 44 | 42,954.0,1.0
 45 | 43,954.0,1.0
 46 | 44,201.0,1.0
 47 | 45,201.0,1.0
 48 | 46,201.0,1.0
 49 | 47,201.0,1.0
 50 | 48,583.0,1.0
 51 | 49,583.0,1.0
 52 | 50,583.0,1.0
 53 | 51,583.0,1.0
 54 | 52,583.0,1.0
 55 | 53,583.0,1.0
 56 | 54,391.0,1.0
 57 | 55,391.0,1.0
 58 | 56,391.0,1.0
 59 | 57,391.0,1.0
 60 | 58,340.0,1.0
 61 | 59,340.0,1.0
 62 | 60,340.0,1.0
 63 | 61,412.0,1.0
 64 | 62,412.0,1.0
 65 | 63,412.0,1.0
 66 | 64,412.0,1.0
 67 | 65,412.0,1.0
 68 | 66,412.0,1.0
 69 | 67,412.0,1.0
 70 | 68,412.0,1.0
 71 | 69,835.0,1.0
 72 | 70,835.0,1.0
 73 | 71,937.0,1.0
 74 | 72,937.0,1.0
 75 | 73,937.0,1.0
 76 | 74,937.0,1.0
 77 | 75,937.0,1.0
 78 | 76,937.0,1.0
 79 | 77,937.0,1.0
 80 | 78,705.0,1.0
 81 | 79,969.0,1.0
 82 | 80,969.0,1.0
 83 | 81,969.0,1.0
 84 | 82,969.0,1.0
 85 | 83,961.0,1.0
 86 | 84,961.0,1.0
 87 | 85,961.0,1.0
 88 | 86,961.0,1.0
 89 | 87,961.0,1.0
 90 | 88,961.0,1.0
 91 | 89,909.0,1.0
 92 | 90,933.0,1.0
 93 | 91,933.0,1.0
 94 | 92,243.0,1.0
 95 | 93,243.0,1.0
 96 | 94,243.0,1.0
 97 | 95,243.0,1.0
 98 | 96,243.0,1.0
 99 | 97,243.0,1.0
100 | 98,243.0,1.0
101 | 99,986.0,1.0
102 | 100,986.0,1.0
103 | 101,986.0,1.0
104 | 102,986.0,1.0
105 | 103,986.0,1.0
106 | 104,986.0,1.0
107 | 105,986.0,1.0
108 | 106,467.0,1.0
109 | 107,467.0,1.0
110 | 108,467.0,1.0
111 | 109,467.0,1.0
112 | 110,467.0,1.0
113 | 111,467.0,1.0
114 | 112,467.0,1.0
115 | 113,231.0,1.0
116 | 114,231.0,1.0
117 | 115,231.0,1.0
118 | 116,797.0,1.0
119 | 117,797.0,1.0
120 | 118,797.0,1.0
121 | 119,797.0,1.0
122 | 120,723.0,1.0
123 | 121,401.0,1.0
124 | 122,401.0,1.0
125 | 123,128.0,1.0
126 | 124,347.0,1.0
127 | 125,347.0,1.0
128 | 126,347.0,1.0
129 | 127,347.0,1.0
130 | 128,347.0,1.0
131 | 129,347.0,1.0
132 | 130,347.0,1.0
133 | 131,826.0,1.0
134 | 132,826.0,1.0
135 | 133,826.0,1.0
136 | 134,826.0,1.0
137 | 135,273.0,1.0
138 | 136,273.0,1.0
139 | 137,273.0,1.0
140 | 138,19.0,1.0
141 | 139,19.0,1.0
142 | 140,19.0,1.0
143 | 141,381.0,1.0
144 | 142,381.0,1.0
145 | 143,381.0,1.0
146 | 144,381.0,1.0
147 | 145,381.0,1.0
148 | 146,381.0,1.0
149 | 147,912.0,1.0
150 | 148,912.0,1.0
151 | 149,912.0,1.0
152 | 150,912.0,1.0
153 | 151,912.0,1.0
154 | 152,616.0,1.0
155 | 153,616.0,1.0
156 | 154,616.0,1.0
157 | 155,616.0,1.0
158 | 156,547.0,1.0
159 | 157,547.0,1.0
160 | 158,547.0,1.0
161 | 159,547.0,1.0
162 | 160,453.0,1.0
163 | 161,453.0,1.0
164 | 162,453.0,1.0
165 | 163,453.0,1.0
166 | 164,453.0,1.0
167 | 165,453.0,1.0
168 | 166,453.0,1.0
169 | 167,453.0,1.0
170 | 168,442.0,1.0
171 | 169,442.0,1.0
172 | 170,28.0,1.0
173 | 171,28.0,1.0
174 | 172,28.0,1.0
175 | 173,28.0,1.0
176 | 174,28.0,1.0
177 | 175,482.0,1.0
178 | 176,482.0,1.0
179 | 177,482.0,1.0
180 | 178,482.0,1.0
181 | 179,482.0,1.0
182 | 180,482.0,1.0
183 | 181,690.0,1.0
184 | 182,690.0,1.0
185 | 183,690.0,1.0
186 | 184,690.0,1.0
187 | 185,690.0,1.0
188 | 186,690.0,1.0
189 | 187,690.0,1.0
190 | 188,717.0,1.0
191 | 189,717.0,1.0
192 | 190,717.0,1.0
193 | 191,717.0,1.0
194 | 192,908.0,1.0
195 | 193,908.0,1.0
196 | 194,908.0,1.0
197 | 195,28.0,1.0
198 | 196,28.0,1.0
199 | 197,28.0,1.0
200 | 198,28.0,1.0
201 | 199,28.0,1.0
202 | 200,691.0,1.0
203 | 201,691.0,1.0
204 | 202,906.0,1.0
205 | 203,906.0,1.0
206 | 204,906.0,1.0
207 | 205,906.0,1.0
208 | 206,906.0,1.0
209 | 207,906.0,1.0
210 | 208,631.0,1.0
211 | 209,631.0,1.0
212 | 210,631.0,1.0
213 | 211,631.0,1.0
214 | 212,109.0,1.0
215 | 213,109.0,1.0
216 | 214,109.0,1.0
217 | 215,109.0,1.0
218 | 216,109.0,1.0
219 | 217,109.0,1.0
220 | 218,109.0,1.0
221 | 219,408.0,1.0
222 | 220,408.0,1.0
223 | 221,408.0,1.0
224 | 222,408.0,1.0
225 | 223,408.0,1.0
226 | 224,408.0,1.0
227 | 225,408.0,1.0
228 | 226,408.0,1.0
229 | 227,340.0,1.0
230 | 228,340.0,1.0
231 | 229,340.0,1.0
232 | 230,340.0,1.0
233 | 231,340.0,1.0
234 | 232,340.0,1.0
235 | 233,340.0,1.0
236 | 234,340.0,1.0
237 | 235,497.0,1.0
238 | 236,497.0,1.0
239 | 237,497.0,1.0
240 | 238,497.0,1.0
241 | 239,497.0,1.0
242 | 240,497.0,1.0
243 | 241,743.0,1.0
244 | 242,743.0,1.0
245 | 243,743.0,1.0
246 | 244,242.0,1.0
247 | 245,242.0,1.0
248 | 246,242.0,1.0
249 | 247,242.0,1.0
250 | 248,242.0,1.0
251 | 249,242.0,1.0
252 | 250,242.0,1.0
253 | 251,7.0,1.0
254 | 252,7.0,1.0
255 | 253,7.0,1.0
256 | 254,7.0,1.0
257 | 255,7.0,1.0
258 | 256,505.0,1.0
259 | 257,133.0,1.0
260 | 258,133.0,1.0
261 | 259,133.0,1.0
262 | 260,133.0,1.0
263 | 261,359.0,1.0
264 | 262,359.0,1.0
265 | 263,359.0,1.0
266 | 264,359.0,1.0
267 | 265,359.0,1.0
268 | 266,359.0,1.0
269 | 267,496.0,1.0
270 | 268,496.0,1.0
271 | 269,496.0,1.0
272 | 270,496.0,1.0
273 | 271,496.0,1.0
274 | 272,496.0,1.0
275 | 273,496.0,1.0
276 | 274,496.0,1.0
277 | 275,166.0,1.0
278 | 276,166.0,1.0
279 | 277,166.0,1.0
280 | 278,166.0,1.0
281 | 279,166.0,1.0
282 | 280,176.0,1.0
283 | 281,176.0,1.0
284 | 282,176.0,1.0
285 | 283,176.0,1.0
286 | 284,176.0,1.0
287 | 285,827.0,1.0
288 | 286,827.0,1.0
289 | 287,827.0,1.0
290 | 288,827.0,1.0
291 | 289,904.0,1.0
292 | 290,904.0,1.0
293 | 291,904.0,1.0
294 | 292,738.0,1.0
295 | 293,738.0,1.0
296 | 294,738.0,1.0
297 | 295,738.0,1.0
298 | 296,738.0,1.0
299 | 297,738.0,1.0
300 | 298,738.0,1.0
301 | 299,315.0,1.0
302 | 300,315.0,1.0
303 | 301,315.0,1.0
304 | 302,315.0,1.0
305 | 303,255.0,1.0
306 | 304,255.0,1.0
307 | 305,255.0,1.0
308 | 306,255.0,1.0
309 | 307,255.0,1.0
310 | 308,255.0,1.0
311 | 309,655.0,1.0
312 | 310,655.0,1.0
313 | 311,655.0,1.0
314 | 312,6.0,1.0
315 | 313,6.0,1.0
316 | 314,6.0,1.0
317 | 315,512.0,1.0
318 | 316,488.0,1.0
319 | 317,488.0,1.0
320 | 318,324.0,1.0
321 | 319,324.0,1.0
322 | 320,324.0,1.0
323 | 321,324.0,1.0
324 | 322,324.0,1.0
325 | 323,324.0,1.0
326 | 324,324.0,1.0
327 | 325,191.0,1.0
328 | 326,191.0,1.0
329 | 327,191.0,1.0
330 | 328,869.0,1.0
331 | 329,869.0,1.0
332 | 330,869.0,1.0
333 | 331,869.0,1.0
334 | 332,869.0,1.0
335 | 333,421.0,1.0
336 | 334,421.0,1.0
337 | 335,421.0,1.0
338 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![Build Status](https://github.com/mmngreco/ineqpy/actions/workflows/python-package.yml/badge.svg) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1419582.svg)](https://doi.org/10.5281/zenodo.1419582)
  2 | 
  3 | 
  4 | # IneqPy's Package
  5 | 
  6 | This package provides statistics to carry on inequality's analysis. Among the
  7 | estimators provided by this package you can find:
  8 | 
  9 | 
 10 | | Main Statistics                   | Inequality Indicators |
 11 | | :--------------                   | :-------------------- |
 12 | | Weighted Mean                     | Weighted Gini         |
 13 | | Weighted Variance                 | Weighted Atkinson     |
 14 | | Weighted Coefficient of variation | Weighted Theil        |
 15 | | Weighted Kurtosis                 | Weighted Kakwani      |
 16 | | Weighted Skewness                 | Weighted Lorenz curve |
 17 | 
 18 | 
 19 | ## Installation
 20 | 
 21 | ```bash
 22 | pip install ineqpy
 23 | # or from github's repo
 24 | pip install git+https://github.com/mmngreco/IneqPy.git
 25 | ```
 26 | 
 27 | ## What you can find
 28 | 
 29 | Some examples of how use this package:
 30 | 
 31 | ```python
 32 | >>> import pandas as pd
 33 | >>> import numpy as np
 34 | >>> import ineqpy
 35 | >>> d = load_data()  # dataframe
 36 | >>> d
 37 |              renta   factor
 38 | 0        -13004.12   1.0031
 39 | 89900    141656.97   1.4145
 40 | 179800     1400.38   4.4122
 41 | 269700   415080.96   1.3295
 42 | 359600    69165.22   1.3282
 43 | 449500     9673.83  19.4605
 44 | 539400    55057.72   1.2923
 45 | 629300     -466.73   1.0050
 46 | 719200     3431.86   2.2861
 47 | 809100      423.24   1.1552
 48 | 899000        0.00   1.0048
 49 | 988900     -344.41   1.0028
 50 | 1078800   56254.09   1.2752
 51 | 1168700   60543.33   2.0159
 52 | 1258600    2041.70   2.7381
 53 | 1348500     581.38   7.9426
 54 | 1438400   55646.05   1.2818
 55 | 1528300       0.00   1.0281
 56 | 1618200   69650.24   1.2315
 57 | 1708100   -2770.88   1.0035
 58 | 1798000    4088.63   1.1256
 59 | 1887900       0.00   1.0251
 60 | 1977800   10662.63  28.0409
 61 | 2067700    3281.95   1.1670
 62 | ```
 63 | 
 64 | ### Descriptive statistics
 65 | 
 66 | ```python
 67 | >>> ineqpy.mean(variable=d.renta, weights=d.factor)
 68 | 20444.700666031338
 69 | >>> ineqpy.var(variable=d.renta, weights=d.factor)
 70 | 2982220948.7413292
 71 | >>> x, w = d.renta.values, d.factor.values
 72 | ```
 73 | 
 74 | > Note that the standardized moment for order `n`, retrieve the value in that
 75 | > column:
 76 | >
 77 | > | `n` | value     |
 78 | > |:---:|:---------:|
 79 | > | 1   | 0         |
 80 | > | 2   | 1         |
 81 | > | 3   | Skew      |
 82 | > | 4   | Kurtosis  |
 83 | 
 84 | A helpful table of interpretation of the moments
 85 | 
 86 | ```python
 87 | >>> ineqpy.std_moment(variable=x, weights=w, order=1)  # ~= 0
 88 | 2.4624948200717338e-17
 89 | >>> ineqpy.std_moment(variable=x, weights=w, order=2)  # = 1
 90 | 1.0
 91 | >>> ineqpy.std_moment(variable=x, weights=w, order=3)  # = skew
 92 | 5.9965055750379426
 93 | >>> ineqpy.skew(variable=x, weights=w)
 94 | 5.9965055750379426
 95 | >>> ineqpy.std_moment(variable=x, weights=w, order=4)  # = kurtosis
 96 | 42.319928851703004
 97 | >>> ineqpy.kurt(variable=x, weights=w)
 98 | 42.319928851703004
 99 | ```
100 | 
101 | ### Inequality's estimators
102 | 
103 | ```python
104 | # pass a pandas.DataFrame and inputs as strings
105 | >>> ineqpy.gini(data=d, income='renta', weights='factor')
106 | 0.76739136365917116
107 | # you can pass arrays too
108 | >>> ineqpy.gini(income=d.renta.values, weights=d.factor.values)
109 | 0.76739136365917116
110 | ```
111 | 
112 | ### More examples and comparison with other packages:
113 | 
114 | We generate random weighted data to show how ineqpy works. The variables
115 | simulate being:
116 | 
117 |     x : Income
118 |     w : Weights
119 | 
120 | To test with classical statistics we generate:
121 | 
122 |     x_rep : Income values replicated w times each one.
123 |     w_rep : Ones column.
124 | 
125 | Additional information:
126 | 
127 |     np : numpy package
128 |     sp : scipy package
129 |     pd : pandas package
130 |     gsl_stat : GNU Scientific Library written in C.
131 |     ineq : IneqPy
132 | 
133 | 
134 | #### Mean
135 | 
136 | ```python
137 | >>> np.mean(x_rep)       = 488.535714286
138 | >>> ineq.mean(x, w)      = 488.535714286
139 | >>> gsl_stat.wmean(w, x) = 488.5357142857143
140 | ```
141 | 
142 | #### Variance
143 | 
144 | ```python
145 | >>> np.var(x_rep)                = 63086.1364796
146 | >>> ineq.var(x, w)               = 63086.1364796
147 | >>> ineq_stat.wvar(x, w, kind=1) = 63086.1364796
148 | >>> ineq_stat.wvar(x, w, kind=2) = 63247.4820972
149 | >>> gsl_stat.wvariance(w, x)     = 63993.161585889124
150 | >>> ineq_stat.wvar(x, w, kind=3) = 63993.1615859
151 | ```
152 | 
153 | #### Covariance
154 | 
155 | ```python
156 | >>> np.cov(x_rep, x_rep)            =  [[ 63247.48209719  63247.48209719]
157 |  [ 63247.48209719  63247.48209719]]
158 | >>> ineq_stat.wcov(x, x, w, kind=1) =  63086.1364796
159 | >>> ineq_stat.wcov(x, x, w, kind=2) =  4.94065645841e-324
160 | >>> ineq_stat.wcov(x, x, w, kind=3) =  9.88131291682e-324
161 | ```
162 | 
163 | #### Skewness
164 | 
165 | ```python
166 | >>> gsl_stat.wskew(w, x) =  -0.05742668111416989
167 | >>> sp_stat.skew(x_rep)  =  -0.058669605967865954
168 | >>> ineq.skew(x, w)      =  -0.0586696059679
169 | ```
170 | 
171 | #### Kurtosis
172 | 
173 | ```python
174 | >>> sp_stat.kurtosis(x_rep)  =  -0.7919389201857214
175 | >>> gsl_stat.wkurtosis(w, x) =  -0.8540884810553052
176 | >>> ineq.kurt(x, w) - 3      =  -0.791938920186
177 | ```
178 | 
179 | #### Percentiles
180 | 
181 | ```python
182 | >>> ineq_stat.percentile(x, w, 25) =  293
183 | >>> np.percentile(x_rep, 25)       =  293.0
184 | 
185 | >>> ineq_stat.percentile(x, w, 50) =  526
186 | >>> np.percentile(x_rep, 50)       =  526.0
187 | 
188 | >>> ineq_stat.percentile(x, w, 90) =  839
189 | >>> np.percentile(x_rep, 90)       =  839.0
190 | ```
191 | 
192 | Another way to use this is through the API module as shown below:
193 | 
194 | ## API's module
195 | 
196 | Using API's module:
197 | 
198 | ```python
199 | >>> data = Survey(data=data, columns=columns, weights='w')
200 | >>> data.df.head()
201 |      x  w
202 | 0  111  3
203 | 1  711  4
204 | 2  346  4
205 | 3  667  1
206 | 4  944  1
207 | ```
208 | 
209 | ### Statistics
210 | 
211 | ```python
212 | >>> data.weights = w
213 | >>> df.mean(main_var)       = 488.535714286
214 | >>> df.percentile(main_var) = 526
215 | >>> df.var(main_var)        = 63086.1364796
216 | >>> df.skew(main_var)       = -0.0586696059679
217 | >>> df.kurt(main_var)       = 2.20806107981
218 | >>> df.gini(main_var)       = 0.298494329293
219 | >>> df.atkinson(main_var)   = 0.0925853855635
220 | >>> df.theil(main_var)      = 0.156137490566
221 | ```
222 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Configuration file for the Sphinx documentation builder.
  4 | #
  5 | # This file does only contain a selection of the most common options. For a
  6 | # full list see the documentation:
  7 | # http://www.sphinx-doc.org/en/master/config
  8 | 
  9 | # -- Path setup --------------------------------------------------------------
 10 | 
 11 | # If extensions (or modules to document with autodoc) are in another directory,
 12 | # add these directories to sys.path here. If the directory is relative to the
 13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 14 | #
 15 | import sys
 16 | 
 17 | from pathlib import Path
 18 | 
 19 | import sphinx_rtd_theme
 20 | 
 21 | src = Path(__file__).parents[2] / "src"
 22 | sys.path.insert(0, str(src))
 23 | 
 24 | 
 25 | # -- Project information -----------------------------------------------------
 26 | 
 27 | project = 'IneqPy'
 28 | copyright = '2022, Maximiliano Greco'
 29 | author = 'Maximiliano Greco'
 30 | 
 31 | # The short X.Y version
 32 | version = ''
 33 | # The full version, including alpha/beta/rc tags
 34 | release = ''
 35 | 
 36 | 
 37 | # -- General configuration ---------------------------------------------------
 38 | 
 39 | # If your documentation needs a minimal Sphinx version, state it here.
 40 | #
 41 | # needs_sphinx = '1.0'
 42 | 
 43 | # Add any Sphinx extension module names here, as strings. They can be
 44 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 45 | # ones.
 46 | extensions = [
 47 |     'sphinx.ext.autodoc',
 48 |     'sphinx.ext.doctest',
 49 |     'sphinx.ext.todo',
 50 |     'sphinx.ext.coverage',
 51 |     'sphinx.ext.mathjax',
 52 |     'sphinx.ext.viewcode',
 53 |     'sphinx.ext.githubpages',
 54 |     'sphinx_rtd_theme',
 55 |     'autoapi.extension',
 56 |     'numpydoc',
 57 |     'myst_parser',
 58 | ]
 59 | 
 60 | autoapi_type = 'python'
 61 | autoapi_dirs = [src]
 62 | # Add any paths that contain templates here, relative to this directory.
 63 | templates_path = ['_templates']
 64 | 
 65 | # The suffix(es) of source filenames.
 66 | # You can specify multiple suffix as a list of string:
 67 | #
 68 | # source_suffix = ['.rst', '.md']
 69 | source_suffix = '.rst'
 70 | 
 71 | # The master toctree document.
 72 | master_doc = 'index'
 73 | 
 74 | # The language for content autogenerated by Sphinx. Refer to documentation
 75 | # for a list of supported languages.
 76 | #
 77 | # This is also used if you do content translation via gettext catalogs.
 78 | # Usually you set "language" from the command line for these cases.
 79 | language = None
 80 | 
 81 | # List of patterns, relative to source directory, that match files and
 82 | # directories to ignore when looking for source files.
 83 | # This pattern also affects html_static_path and html_extra_path.
 84 | exclude_patterns = []
 85 | 
 86 | # The name of the Pygments (syntax highlighting) style to use.
 87 | pygments_style = None
 88 | 
 89 | 
 90 | # -- Options for HTML output -------------------------------------------------
 91 | 
 92 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 93 | # a list of builtin themes.
 94 | #
 95 | # html_theme = 'alabaster'
 96 | html_theme = "sphinx_rtd_theme"
 97 | 
 98 | # Theme options are theme-specific and customize the look and feel of a theme
 99 | # further.  For a list of options available for each theme, see the
100 | # documentation.
101 | #
102 | # html_theme_options = {}
103 | 
104 | # Add any paths that contain custom static files (such as style sheets) here,
105 | # relative to this directory. They are copied after the builtin static files,
106 | # so a file named "default.css" will overwrite the builtin "default.css".
107 | html_static_path = ['_static']
108 | 
109 | # Custom sidebar templates, must be a dictionary that maps document names
110 | # to template names.
111 | #
112 | # The default sidebars (for documents that don't match any pattern) are
113 | # defined by theme itself.  Builtin themes are using these templates by
114 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
115 | # 'searchbox.html']``.
116 | #
117 | # html_sidebars = {}
118 | 
119 | 
120 | # -- Options for HTMLHelp output ---------------------------------------------
121 | 
122 | # Output file base name for HTML help builder.
123 | htmlhelp_basename = 'IneqPydoc'
124 | 
125 | 
126 | # -- Options for LaTeX output ------------------------------------------------
127 | 
128 | latex_elements = {
129 |     # The paper size ('letterpaper' or 'a4paper').
130 |     #
131 |     # 'papersize': 'letterpaper',
132 | 
133 |     # The font size ('10pt', '11pt' or '12pt').
134 |     #
135 |     # 'pointsize': '10pt',
136 | 
137 |     # Additional stuff for the LaTeX preamble.
138 |     #
139 |     # 'preamble': '',
140 | 
141 |     # Latex figure (float) alignment
142 |     #
143 |     # 'figure_align': 'htbp',
144 | }
145 | 
146 | # Grouping the document tree into LaTeX files. List of tuples
147 | # (source start file, target name, title,
148 | #  author, documentclass [howto, manual, or own class]).
149 | latex_documents = [
150 |     (master_doc, 'IneqPy.tex', 'IneqPy Documentation',
151 |      'Maximiliano Greco', 'manual'),
152 | ]
153 | 
154 | 
155 | # -- Options for manual page output ------------------------------------------
156 | 
157 | # One entry per manual page. List of tuples
158 | # (source start file, name, description, authors, manual section).
159 | man_pages = [
160 |     (master_doc, 'ineqpy', 'IneqPy Documentation',
161 |      [author], 1)
162 | ]
163 | 
164 | 
165 | # -- Options for Texinfo output ----------------------------------------------
166 | 
167 | # Grouping the document tree into Texinfo files. List of tuples
168 | # (source start file, target name, title, author,
169 | #  dir menu entry, description, category)
170 | texinfo_documents = [
171 |     (master_doc, 'IneqPy', 'IneqPy Documentation',
172 |      author, 'IneqPy', 'One line description of project.',
173 |      'Miscellaneous'),
174 | ]
175 | 
176 | 
177 | # -- Options for Epub output -------------------------------------------------
178 | 
179 | # Bibliographic Dublin Core info.
180 | epub_title = project
181 | 
182 | # The unique identifier of the text. This can be a ISBN number
183 | # or the project homepage.
184 | #
185 | # epub_identifier = ''
186 | 
187 | # A unique identification for the text.
188 | #
189 | # epub_uid = ''
190 | 
191 | # A list of files that should not be packed into the epub file.
192 | epub_exclude_files = ['search.html']
193 | 
194 | 
195 | # -- Extension configuration -------------------------------------------------
196 | 
197 | # -- Options for todo extension ----------------------------------------------
198 | 
199 | # If true, `todo` and `todoList` produce output, else they produce nothing.
200 | todo_include_todos = True
201 | 


--------------------------------------------------------------------------------
/examples/alternatives_comparision.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | # from pygsl import statistics as gsl_stat
  3 | from scipy import stats as sp_stat
  4 | 
  5 | import ineqpy as ineq
  6 | from ineqpy import _statistics as ineq_stat
  7 | 
  8 | # Generate random data
  9 | x, w = ineq.utils.generate_data_to_test((60, 90))
 10 | # Replicating weights
 11 | x_rep, w_rep = ineq.utils.repeat_data_from_weighted(x, w)
 12 | svy = ineq.api.Survey
 13 | 
 14 | print("""
 15 | ==========
 16 | Quickstart
 17 | ==========
 18 | 
 19 | We generate random weighted data to show how ineqpy works. The variables
 20 | simulate being:
 21 | 
 22 |     x : Income
 23 |     w : Weights
 24 | 
 25 | ```python
 26 | >>> x, w = ineq.utils.generate_data_to_test((60,90))
 27 | ```
 28 | 
 29 | To test with classical statistics we generate:
 30 | 
 31 |     x_rep : Income values replicated w times each one.
 32 |     w_rep : Ones column.
 33 | 
 34 | ```python
 35 | >>> x_rep, w_rep = ineq.utils.repeat_data_from_weighted(x, w)
 36 | ```
 37 | 
 38 | Additional information:
 39 | 
 40 |     np : numpy package
 41 |     sp : scipy package
 42 |     pd : pandas package
 43 |     gsl_stat : GNU Scientific Library written in C.
 44 |     ineq : IneqPy
 45 | """)
 46 | 
 47 | 
 48 | print("""
 49 | Examples and comparision with other packages
 50 | ============================================
 51 | 
 52 | STATISTICS
 53 | ==========
 54 | 
 55 | MEAN
 56 | ----
 57 | 
 58 | """)
 59 | 
 60 | print("```python")
 61 | print(">>> np.mean(x_rep)".ljust(24), "=", np.mean(x_rep))
 62 | print(">>> ineq.mean(x, w)".ljust(24), "=", ineq.mean(x, w))
 63 | # print(">>> gsl_stat.wmean(w, x)".ljust(24), "=", gsl_stat.wmean(w, x))
 64 | print("```")
 65 | 
 66 | # %timeit ineq.mean(None, x, w)
 67 | # %timeit gsl_stat.wmean(w, x)
 68 | # %timeit ineq_stat.mean(x, w)
 69 | 
 70 | print(
 71 |     """
 72 | 
 73 | VARIANCE
 74 | --------
 75 | 
 76 | """
 77 | )
 78 | 
 79 | np_var = np.var(x_rep)
 80 | inq_var = ineq.var(x, w)
 81 | wvar_1 = ineq_stat.wvar(x, w, 1)  # population variance
 82 | wvar_2 = ineq_stat.wvar(x, w, 2)  # sample frequency variance
 83 | # gsl_wvar = gsl_stat.wvariance(w, x)
 84 | wvar_3 = ineq_stat.wvar(x, w, 3)  # sample reliability variance
 85 | 
 86 | print("```python")
 87 | print(">>> np.var(x_rep)".ljust(32), "=", np_var)
 88 | print(">>> ineq.var(x, w)".ljust(32), "=", inq_var)
 89 | print(">>> ineq_stat.wvar(x, w, kind=1)".ljust(32), "=", wvar_1)
 90 | print(">>> ineq_stat.wvar(x, w, kind=2)".ljust(32), "=", wvar_2)
 91 | # print(">>> gsl_stat.wvariance(w, x)".ljust(32), "=", gsl_wvar)
 92 | print(">>> ineq_stat.wvar(x, w, kind=3)".ljust(32), "=", wvar_3)
 93 | print("```")
 94 | 
 95 | print(
 96 |     """
 97 | 
 98 | COVARIANCE
 99 | ----------
100 | 
101 | """
102 | )
103 | 
104 | np_cov = np.cov(x_rep, x_rep)
105 | ineq_wcov1 = ineq_stat.wcov(x, x, w, 1)
106 | ineq_wcov2 = ineq_stat.wcov(x, x, w, 2)
107 | ineq_wcov3 = ineq_stat.wcov(x, x, w, 3)
108 | 
109 | print("```python")
110 | print(">>> np.cov(x_rep, x_rep)".ljust(35), "= ", np_cov)
111 | print(">>> ineq_stat.wcov(x, x, w, kind=1)".ljust(35), "= ", ineq_wcov1)
112 | print(">>> ineq_stat.wcov(x, x, w, kind=2)".ljust(35), "= ", ineq_wcov2)
113 | print(">>> ineq_stat.wcov(x, x, w, kind=3)".ljust(35), "= ", ineq_wcov3)
114 | print("```")
115 | print(
116 |     """
117 | 
118 | SKEWNESS
119 | --------
120 | 
121 | """
122 | )
123 | 
124 | # gsl_wskew = gsl_stat.wskew(w, x)
125 | sp_skew = sp_stat.skew(x_rep)
126 | ineq_skew = ineq.skew(x, w)
127 | 
128 | print("```python")
129 | # print(">>> gsl_stat.wskew(w, x)".ljust(24), "= ", gsl_wskew)
130 | print(">>> sp_stat.skew(x_rep)".ljust(24), "= ", sp_skew)
131 | print(">>> ineq.skew(x, w)".ljust(24), "= ", ineq_skew)
132 | print("```")
133 | 
134 | # %timeit gsl_stat.wskew(w, x)
135 | # %timeit sp_stat.skew(x_rep)
136 | # %timeit ineq.skew(None, x, w)
137 | 
138 | print(
139 |     """
140 | 
141 | KURTOSIS
142 | --------
143 | 
144 | """
145 | )
146 | 
147 | sp_kurt = sp_stat.kurtosis(x_rep)
148 | # gsl_wkurt = gsl_stat.wkurtosis(w, x)
149 | ineq_kurt = ineq.kurt(x, w) - 3
150 | print("```python")
151 | print(">>> sp_stat.kurtosis(x_rep)".ljust(28), "= ", sp_kurt)
152 | # print(">>> gsl_stat.wkurtosis(w, x)".ljust(28), "= ", gsl_wkurt)
153 | print(">>> ineq.kurt(x, w) - 3".ljust(28), "= ", ineq_kurt)
154 | print("```")
155 | # %timeit sp_stat.kurtosis(x_rep)
156 | # %timeit gsl_stat.wkurtosis(w, x)
157 | # %timeit ineq.kurt(None, x, w) - 3
158 | 
159 | print(
160 |     """
161 | PERCENTILES
162 | -----------
163 | 
164 | """
165 | )
166 | q = 50
167 | ineq_perc_50 = ineq_stat.percentile(x, w, q)
168 | np_perc_50 = np.percentile(x_rep, q)
169 | print("```python")
170 | print(">>> ineq_stat.percentile(x, w, %s)".ljust(34) % q, "= ", ineq_perc_50)
171 | print(">>> np.percentile(x_rep, %s)".ljust(34) % q, "= ", np_perc_50)
172 | 
173 | q = 25
174 | ineq_perc_25 = ineq_stat.percentile(x, w, q)
175 | np_perc_25 = np.percentile(x_rep, q)
176 | print(">>> ineq_stat.percentile(x, w, %s)".ljust(34) % q, "= ", ineq_perc_25)
177 | print(">>> np.percentile(x_rep, %s)".ljust(34) % q, "= ", np_perc_25)
178 | 
179 | q = 75
180 | ineq_perc_75 = ineq_stat.percentile(x, w, q)
181 | np_perc_75 = np.percentile(x_rep, q)
182 | print(">>> ineq_stat.percentile(x, w, %s)".ljust(34) % q, "= ", ineq_perc_75)
183 | print(">>> np.percentile(x_rep, %s)".ljust(34) % q, "= ", np_perc_75)
184 | 
185 | q = 10
186 | ineq_perc_10 = ineq_stat.percentile(x, w, q)
187 | np_perc_10 = np.percentile(x_rep, q)
188 | print(">>> ineq_stat.percentile(x, w, %s)".ljust(34) % q, "= ", ineq_perc_10)
189 | print(">>> np.percentile(x_rep, %s)".ljust(34) % q, "= ", np_perc_10)
190 | 
191 | q = 90
192 | ineq_perc_90 = ineq_stat.percentile(x, w, q)
193 | np_perc_90 = np.percentile(x_rep, q)
194 | print(">>> ineq_stat.percentile(x, w, %s)".ljust(34) % q, "= ", ineq_perc_90)
195 | print(">>> np.percentile(x_rep, %s)".ljust(34) % q, "= ", np_perc_90)
196 | print("```")
197 | 
198 | print(
199 |     """
200 | Another way to use this is through the API module as shown below:
201 | 
202 | API MODULE
203 | ==========
204 | 
205 | """
206 | )
207 | 
208 | data = np.c_[x, w]
209 | columns = list("xw")
210 | 
211 | df = svy(data=data, columns=columns, weights="w")
212 | print("```python")
213 | print(">>> data = svy(data=data, columns=columns, weights='w')")
214 | print(">>> data.head()")
215 | print(df.head())
216 | print("")
217 | print(">>> data.weights =", df.weights)
218 | print("```")
219 | print("")
220 | main_var = "x"
221 | # df.mean(main_var)
222 | # df.var(main_var)
223 | # df.skew(main_var)
224 | # df.kurt(main_var)
225 | # df.gini(main_var)
226 | # df.atkinson(main_var)
227 | # df.theil(main_var)
228 | # df.percentile(main_var)
229 | 
230 | print("```python")
231 | print(">>> df.mean(main_var)".ljust(27), "=", df.mean(main_var))
232 | print(">>> df.percentile(main_var)".ljust(27), "=", df.percentile(main_var))
233 | print(">>> df.var(main_var)".ljust(27), "=", df.var(main_var))
234 | print(">>> df.skew(main_var)".ljust(27), "=", df.skew(main_var))
235 | print(">>> df.kurt(main_var)".ljust(27), "=", df.kurt(main_var))
236 | print(">>> df.gini(main_var)".ljust(27), "=", df.gini(main_var))
237 | print(">>> df.atkinson(main_var)".ljust(27), "=", df.atkinson(main_var))
238 | print(">>> df.theil(main_var)".ljust(27), "=", df.theil(main_var))
239 | print("```")
240 | 


--------------------------------------------------------------------------------
/src/ineqpy/statistics.py:
--------------------------------------------------------------------------------
  1 | """Descriptive statistics.
  2 | 
  3 | This module contains main descriptive statistics like: mean, variance, etc.
  4 | 
  5 | """
  6 | 
  7 | from ineqpy import _statistics as stat, utils
  8 | 
  9 | 
 10 | def c_moment(
 11 |     variable=None, weights=None, data=None, order=2, param=None, ddof=0
 12 | ):
 13 |     """Calculate central momment.
 14 | 
 15 |     Calculate the central moment of `x` with respect to `param` of order `n`,
 16 |     given the weights `w`.
 17 | 
 18 |     Parameters
 19 |     ----------
 20 |     variable : 1d-array
 21 |         Variable
 22 |     weights : 1d-array
 23 |         Weights
 24 |     data : pandas.DataFrame
 25 |         Contains all variables needed.
 26 |     order : int, optional
 27 |         Moment order, 2 by default (variance)
 28 |     param : int or array, optional
 29 |         Parameter for which the moment is calculated, the default is None,
 30 |         implies use the mean.
 31 |     ddof : int, optional
 32 |         Degree of freedom, zero by default.
 33 | 
 34 |     Returns
 35 |     -------
 36 |     central_moment : float
 37 | 
 38 |     Notes
 39 |     -----
 40 |     - The cmoment of order 1 is 0
 41 |     - The cmoment of order 2 is the variance.
 42 | 
 43 |     Source : https://en.wikipedia.org/wiki/Moment_(mathematics)
 44 | 
 45 |     Todo
 46 |     ----
 47 |     Implement: https://en.wikipedia.org/wiki/L-moment#cite_note-wang:96-6
 48 | 
 49 |     """
 50 |     variable, weights = utils.extract_values(data, variable, weights)
 51 |     return stat.c_moment(variable, weights, order, param, ddof)
 52 | 
 53 | 
 54 | def percentile(
 55 |     variable=None, weights=None, data=None, p=50, interpolate="lower"
 56 | ):
 57 |     """Calculate the value of a quantile given a variable and his weights.
 58 | 
 59 |     Parameters
 60 |     ----------
 61 |     variable : str or array
 62 |     weights :  str or array
 63 |     data : pd.DataFrame, optional
 64 |         pd.DataFrame that contains all variables needed.
 65 |     q : float
 66 |         Quantile level, if pass 0.5 means median.
 67 |     interpolate : bool
 68 | 
 69 |     Returns
 70 |     -------
 71 |     percentile : float or pd.Series
 72 | 
 73 |     """
 74 |     variable, weights = utils.extract_values(data, variable, weights)
 75 |     return stat.percentile(variable, weights, p, interpolate)
 76 | 
 77 | 
 78 | def std_moment(
 79 |     variable=None, weights=None, data=None, param=None, order=3, ddof=0
 80 | ):
 81 |     """Calculate standarized momment.
 82 | 
 83 |     Calculate the standardized moment of order `c` for the variable` x` with
 84 |     respect to `c`.
 85 | 
 86 |     Parameters
 87 |     ----------
 88 |     variable : 1d-array
 89 |        Random Variable
 90 |     weights : 1d-array, optional
 91 |        Weights or probability
 92 |     data : pd.DataFrame, optional
 93 |         pd.DataFrame that contains all variables needed.
 94 |     order : int, optional
 95 |        Order of Moment, three by default
 96 |     param : int or float or array, optional
 97 |        Central trend, default is the mean.
 98 |     ddof : int, optional
 99 |         Degree of freedom.
100 | 
101 |     Returns
102 |     -------
103 |     std_moment : float
104 |        Returns the standardized `n` order moment.
105 | 
106 |     References
107 |     ----------
108 |     - https://en.wikipedia.org/wiki/Moment_(mathematics)
109 |     - https://en.wikipedia.org/wiki/Standardized_moment
110 | 
111 |     Todo
112 |     ----
113 |     It is the general case of the raw and central moments. Review
114 |     implementation.
115 | 
116 |     """
117 |     variable, weights = utils.extract_values(data, variable, weights)
118 |     return stat.std_moment(variable, weights, param, order, ddof)
119 | 
120 | 
121 | def mean(variable=None, weights=None, data=None):
122 |     """Calculate the mean of `variable` given `weights`.
123 | 
124 |     Parameters
125 |     ----------
126 |     variable : array-like or str
127 |         Variable on which the mean is estimated.
128 |     weights : array-like or str
129 |         Weights of the `x` variable.
130 |     data : pandas.DataFrame
131 |         Is possible pass a DataFrame with variable and weights, then you must
132 |         pass as `variable` and `weights` the column name stored in `data`.
133 | 
134 |     Returns
135 |     -------
136 |     mean : array-like or float
137 |     """
138 |     # if pass a DataFrame separate variables.
139 |     if data is not None:
140 |         variable, weights = utils.extract_values(data, variable, weights)
141 |     return stat.mean(variable, utils.not_empty_weights(weights, variable))
142 | 
143 | 
144 | def density(variable=None, weights=None, groups=None, data=None):
145 |     """Density in percentage.
146 | 
147 |     Calculates density in percentage. This make division of variable inferring
148 |     width in groups as max - min.
149 | 
150 |     Parameters
151 |     ----------
152 |     variable : numpy.array or pandas.DataFrame
153 |         Main variable.
154 |     weights : numpy.array or pandas.DataFrame
155 |         Weights of main variable.
156 |     groups : numpy.array or pandas.DataFrame
157 |         Label that show which group each element belongs to.
158 |     data : pd.DataFrame, optional
159 |         Object that contains all variables needed.
160 | 
161 | 
162 |     Returns
163 |     -------
164 |     density : array-like
165 | 
166 |     References
167 |     ----------
168 |     Histogram. (2017, May 9). In Wikipedia, The Free Encyclopedia. Retrieved
169 |     14:47, May 15, 2017, from
170 |     https://en.wikipedia.org/w/index.php?title=Histogram&oldid=779516918
171 |     """
172 |     variable, weights = utils.extract_values(data, variable, weights)
173 |     if groups is not None:
174 |         groups = data[groups].values
175 |     return stat.density(variable, weights, groups)
176 | 
177 | 
178 | def var(variable=None, weights=None, data=None, ddof=0):
179 |     """Calculate the variance.
180 | 
181 |     Calculate the population variance of `variable` given `weights`.
182 | 
183 |     Parameters
184 |     ----------
185 |     data : pd.DataFrame, optional
186 |         pd.DataFrame that contains all variables needed.
187 |     variable : 1d-array or pd.Series or pd.DataFrame
188 |         Variable on which the quasivariation is estimated
189 |     weights : 1d-array or pd.Series or pd.DataFrame
190 |         Weights of the `variable`.
191 |     data : pd.DataFrame
192 |         Object that contains all variables needed.
193 |     ddof : int
194 |         Degree of freedom.
195 | 
196 |     Returns
197 |     -------
198 |     variance : 1d-array or pd.Series or float
199 |         Estimation of quasivariance of `variable`
200 | 
201 |     References
202 |     ----------
203 |     Moment (mathematics). (2017, May 6). In Wikipedia, The Free Encyclopedia.
204 |     Retrieved 14:40, May 15, 2017, from
205 |     https://en.wikipedia.org/w/index.php?title=Moment_(mathematics)
206 | 
207 |     Notes
208 |     -----
209 |     If stratificated sample must pass with groupby each strata.
210 |     """
211 |     variable, weights = utils.extract_values(data, variable, weights)
212 |     return stat.var(variable, weights, ddof)
213 | 
214 | 
215 | def coef_variation(variable=None, weights=None, data=None):
216 |     """Calculate the coefficient of variation.
217 | 
218 |     Calculate the coefficient of variation of a `variable` given weights.
219 |     The coefficient of variation is the square root of the variance of the
220 |     incomes divided by the mean income. It has the advantages of being
221 |     mathematically tractable and is subgroup decomposable, but is not bounded
222 |     from above.
223 | 
224 |     Parameters
225 |     ----------
226 |     variable : array-like or str
227 |     weights : array-like or str
228 |     data : pandas.DataFrame
229 | 
230 |     Returns
231 |     -------
232 |     coefficient_variation : float
233 | 
234 |     References
235 |     ----------
236 |     Coefficient of variation. (2017, May 5). In Wikipedia, The Free
237 |     Encyclopedia. Retrieved 15:03, May 15, 2017, from
238 |     https://en.wikipedia.org/w/index.php?title=Coefficient_of_variation
239 |     """
240 |     # TODO complete docstring
241 |     variable, weights = utils.extract_values(data, variable, weights)
242 |     return stat.coef_variation(variable, weights)
243 | 
244 | 
245 | def kurt(variable=None, weights=None, data=None):
246 |     """Calculate the Kurtosis coefficient.
247 | 
248 |     Parameters
249 |     ----------
250 |     variable : 1d-array
251 |     weights : 1d-array
252 |     data : pandas.DataFrame
253 |         Object which stores ``variable`` and ``weights``.
254 | 
255 |     Returns
256 |     -------
257 |     kurt : float
258 |         Kurtosis coefficient.
259 | 
260 |     References
261 |     ----------
262 |     Moment (mathematics). (2017, May 6). In Wikipedia, The Free Encyclopedia.
263 |     Retrieved 14:40, May 15, 2017, from
264 |     https://en.wikipedia.org/w/index.php?title=Moment_(mathematics)
265 | 
266 |     Notes
267 |     -----
268 |     It is an alias of the standardized fourth-order moment.
269 |     """
270 |     variable, weights = utils.extract_values(data, variable, weights)
271 |     return stat.kurt(variable, weights)
272 | 
273 | 
274 | def skew(variable=None, weights=None, data=None):
275 |     """Return the asymmetry coefficient of a sample.
276 | 
277 |     Parameters
278 |     ----------
279 |     data : pandas.DataFrame
280 |     variable : array-like, str
281 |     weights : array-like, str
282 |     data : pandas.DataFrame
283 |         Object which stores ``variable`` and ``weights``.
284 | 
285 |     Returns
286 |     -------
287 |     skew : float
288 | 
289 |     References
290 |     ----------
291 |     Moment (mathematics). (2017, May 6). In Wikipedia, The Free Encyclopedia.
292 |     Retrieved 14:40, May 15, 2017, from
293 |     https://en.wikipedia.org/w/index.php?title=Moment_(mathematics)
294 | 
295 |     Notes
296 |     -----
297 |     It is an alias of the standardized third-order moment.
298 |     """
299 |     variable, weights = utils.extract_values(data, variable, weights)
300 |     return stat.skew(variable, weights)
301 | 


--------------------------------------------------------------------------------
/src/ineqpy/_statistics.py:
--------------------------------------------------------------------------------
  1 | """Low level desciptive statistics.
  2 | 
  3 | References
  4 | ----------
  5 | 1. http://people.ds.cam.ac.uk/fanf2/hermes/doc/antiforgery/stats.pdf
  6 | 2. https://en.wikipedia.org/wiki/Weighted_arithmetic_mean
  7 |    #Weighted_sample_variance
  8 | 3. https://en.wikipedia.org/wiki/Algorithms%5Ffor%5Fcalculating%5Fvariance
  9 |    #Weighted_incremental_algorithm
 10 | """
 11 | 
 12 | import numpy as np
 13 | from numba import guvectorize
 14 | 
 15 | from ineqpy import utils
 16 | 
 17 | 
 18 | def c_moment(variable=None, weights=None, order=2, param=None, ddof=0):
 19 |     """Calculate central momment.
 20 | 
 21 |     Calculate the central moment of `x` with respect to `param` of order `n`,
 22 |     given the weights `w`.
 23 | 
 24 |     Parameters
 25 |     ----------
 26 |     variable : 1d-array
 27 |         Variable
 28 |     weights : 1d-array
 29 |         Weights
 30 |     order : int, optional
 31 |         Moment order, 2 by default (variance)
 32 |     param : int or array, optional
 33 |         Parameter for which the moment is calculated, the default is None,
 34 |         implies use the mean.
 35 |     ddof : int, optional
 36 |         Degree of freedom, zero by default.
 37 | 
 38 |     Returns
 39 |     -------
 40 |     central_moment : float
 41 | 
 42 |     Notes
 43 |     -----
 44 |     - The cmoment of order 1 is 0
 45 |     - The cmoment of order 2 is the variance.
 46 | 
 47 |     Source : https://en.wikipedia.org/wiki/Moment_(mathematics)
 48 | 
 49 |     Todo
 50 |     ----
 51 |     Implement : https://en.wikipedia.org/wiki/L-moment#cite_note-wang:96-6
 52 |     """
 53 |     # return np.sum((x-c)^n*counts) / np.sum(counts)
 54 |     variable = variable.copy()
 55 |     weights = utils.not_empty_weights(weights, like=variable)
 56 | 
 57 |     if param is None:
 58 |         param = mean(variable=variable, weights=weights)
 59 |     elif not isinstance(param, (np.ndarray, int, float)):
 60 |         raise NotImplementedError
 61 | 
 62 |     return np.sum((variable - param) ** order * weights) / (
 63 |         np.sum(weights) - ddof
 64 |     )
 65 | 
 66 | 
 67 | def percentile(
 68 |     variable, weights, percentile=50, interpolation="lower"
 69 | ) -> float:
 70 |     """Calculate the percentile.
 71 | 
 72 |     Parameters
 73 |     ----------
 74 |     variable : str or array
 75 |     weights :  str or array
 76 |     percentile : int or list
 77 |         Percentile level, if pass 50 we get the median.
 78 |     interpolation : {'lower', 'higher', 'midpoint'}, optional
 79 |         Select interpolation method.
 80 | 
 81 |     Returns
 82 |     -------
 83 |     percentile : float
 84 |     """
 85 |     sorted_idx = np.argsort(variable)
 86 |     cum_weights = np.cumsum(weights[sorted_idx])
 87 |     lower_percentile_idx = np.searchsorted(
 88 |         cum_weights, (percentile / 100.0) * cum_weights[-1]
 89 |     )
 90 | 
 91 |     if interpolation == "midpoint":
 92 |         res = np.interp(
 93 |             lower_percentile_idx + 0.5,
 94 |             np.arange(len(variable)),
 95 |             variable[sorted_idx],
 96 |         )
 97 |     elif interpolation == "lower":
 98 |         res = variable[sorted_idx[lower_percentile_idx]]
 99 |     elif interpolation == "higher":
100 |         res = variable[sorted_idx[lower_percentile_idx + 1]]
101 |     else:
102 |         raise NotImplementedError
103 | 
104 |     return float(res)
105 | 
106 | 
107 | def std_moment(variable=None, weights=None, param=None, order=3, ddof=0):
108 |     """Calculate the standarized moment.
109 | 
110 |     Calculate the standarized moment of order `c` for the variable` x` with
111 |     respect to `c`.
112 | 
113 |     Parameters
114 |     ----------
115 |     variable : 1d-array
116 |        Random Variable
117 |     weights : 1d-array, optional
118 |        Weights or probability
119 |     order : int, optional
120 |        Order of Moment, three by default
121 |     param : int or float or array, optional
122 |        Central trend, default is the mean.
123 |     ddof : int, optional
124 |         Degree of freedom.
125 | 
126 |     Returns
127 |     -------
128 |     std_moment : float
129 |        Returns the standardized `n` order moment.
130 | 
131 |     References
132 |     ----------
133 |     - https://en.wikipedia.org/wiki/Moment_(mathematics)
134 |       #Significance_of_the_moments
135 |     - https://en.wikipedia.org/wiki/Standardized_moment
136 | 
137 |     Todo
138 |     ----
139 |     It is the general case of the raw and central moments. Review
140 |     implementation.
141 | 
142 |     """
143 |     if param is None:
144 |         param = mean(variable=variable, weights=weights)
145 | 
146 |     res = c_moment(
147 |         variable=variable, weights=weights, order=order, param=param, ddof=ddof
148 |     )
149 |     res /= var(variable=variable, weights=weights, ddof=ddof) ** (order / 2)
150 |     return res
151 | 
152 | 
153 | def mean(variable=None, weights=None):
154 |     """Calculate the mean of `variable` given `weights`.
155 | 
156 |     Parameters
157 |     ----------
158 |     variable : array-like or str
159 |         Variable on which the mean is estimated.
160 |     weights : array-like or str
161 |         Weights of the `x` variable.
162 | 
163 |     Returns
164 |     -------
165 |     mean : array-like or float
166 |     """
167 |     # if pass a DataFrame separate variables.
168 |     variable = variable.copy()
169 |     weights = utils.not_empty_weights(weights, like=variable)
170 |     variable, weights = utils._clean_nans_values(variable, weights)
171 |     return np.average(a=variable, weights=weights, axis=0)
172 | 
173 | 
174 | def var(variable=None, weights=None, ddof=0):
175 |     """Calculate the population variance of ``variable`` given `weights`.
176 | 
177 |     Parameters
178 |     ----------
179 |     variable : 1d-array or pd.Series or pd.DataFrame
180 |         Variable on which the quasivariation is estimated
181 |     weights : 1d-array or pd.Series or pd.DataFrame
182 |         Weights of the `variable`.
183 | 
184 |     Returns
185 |     -------
186 |     variance : 1d-array or pd.Series or float
187 |         Estimation of quasivariance of `variable`
188 | 
189 |     References
190 |     ----------
191 |     Moment (mathematics). (2017, May 6). In Wikipedia, The Free Encyclopedia.
192 |     Retrieved 14:40, May 15, 2017, from
193 |     https://en.wikipedia.org/w/index.php?title=Moment_(mathematics)
194 | 
195 |     Notes
196 |     -----
197 |     If stratificated sample must pass with groupby each strata.
198 |     """
199 |     return c_moment(variable=variable, weights=weights, order=2, ddof=ddof)
200 | 
201 | 
202 | def coef_variation(variable=None, weights=None):
203 |     """Calculate the coefficient of variation.
204 | 
205 |     Calculate the coefficient of variation of a `variable` given weights. The
206 |     coefficient of variation is the square root of the variance of the incomes
207 |     divided by the mean income. It has the advantages of being mathematically
208 |     tractable and is subgroup decomposable, but is not bounded from above.
209 | 
210 |     Parameters
211 |     ----------
212 |     variable : array-like or str
213 |     weights : array-like or str
214 | 
215 |     Returns
216 |     -------
217 |     coefficient_variation : float
218 | 
219 |     References
220 |     ----------
221 |     Coefficient of variation. (2017, May 5). In Wikipedia, The Free
222 |     Encyclopedia. Retrieved 15:03, May 15, 2017, from
223 |     https://en.wikipedia.org/w/index.php?title=Coefficient_of_variation
224 |     """
225 |     # todo complete docstring
226 |     return var(variable=variable, weights=weights) ** 0.5 / abs(
227 |         mean(variable=variable, weights=weights)
228 |     )
229 | 
230 | 
231 | def kurt(variable=None, weights=None):
232 |     """Calculate the asymmetry coefficient.
233 | 
234 |     Parameters
235 |     ----------
236 |     variable : 1d-array
237 |     weights : 1d-array
238 | 
239 |     Returns
240 |     -------
241 |     kurt : float
242 |         Kurtosis coefficient.
243 | 
244 |     References
245 |     ----------
246 |     Moment (mathematics). (2017, May 6). In Wikipedia, The Free Encyclopedia.
247 |     Retrieved 14:40, May 15, 2017, from
248 |     https://en.wikipedia.org/w/index.php?title=Moment_(mathematics)
249 | 
250 |     Notes
251 |     -----
252 |     It is an alias of the standardized fourth-order moment.
253 |     """
254 |     return std_moment(variable=variable, weights=weights, order=4)
255 | 
256 | 
257 | def skew(variable=None, weights=None):
258 |     """Return the asymmetry coefficient of a sample.
259 | 
260 |     Parameters
261 |     ----------
262 |     variable : array-like, str
263 |     weights : array-like, str
264 | 
265 |     Returns
266 |     -------
267 |     skew : float
268 | 
269 |     References
270 |     ----------
271 |     Moment (mathematics). (2017, May 6). In Wikipedia, The Free Encyclopedia.
272 |     Retrieved 14:40, May 15, 2017, from
273 |     https://en.wikipedia.org/w/index.php?title=Moment_(mathematics)
274 | 
275 |     Notes
276 |     -----
277 |     It is an alias of the standardized third-order moment.
278 | 
279 |     """
280 |     return std_moment(variable=variable, weights=weights, order=3)
281 | 
282 | 
283 | @guvectorize(
284 |     "float64[:], float64[:], int64, float64[:]",
285 |     "(n),(n),()->()",
286 |     nopython=True,
287 |     cache=True,
288 | )
289 | def wvar(x, w, kind, out):
290 |     """Calculate weighted variance of X.
291 | 
292 |     Calculates the weighted variance of x according to a kind of weights.
293 | 
294 |     Parameters
295 |     ----------
296 |     x : np.ndarray
297 |         Main variable.
298 |     w : np.ndarray
299 |         Weigths.
300 |     kind : int
301 |         Has three modes to calculate de variance, you can control that with
302 |         this argument, the values and the output are the next:
303 |         * 1. population variance
304 |         * 2. sample frequency variance
305 |         * 3. sample reliability variance.
306 |     out : np.ndarray
307 | 
308 |     Returns
309 |     -------
310 |     weighted_variance : float
311 | 
312 |     References
313 |     ----------
314 |     https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
315 |     #Weighted_incremental_algorithm
316 |     """
317 |     wSum = wSum2 = mean = S = 0
318 | 
319 |     for i in range(len(x)):  # Alternatively "for x, w in zip(data, weights):"
320 |         wSum = wSum + w[i]
321 |         wSum2 = wSum2 + w[i] * w[i]
322 |         meanOld = mean
323 |         mean = meanOld + (w[i] / wSum) * (x[i] - meanOld)
324 |         S = S + w[i] * (x[i] - meanOld) * (x[i] - mean)
325 | 
326 |     if kind == 1:
327 |         # population_variance
328 |         out[0] = S / wSum
329 |     elif kind == 2:
330 |         # Bessel's correction for weighted samples
331 |         # Frequency weights
332 |         # sample_frequency_variance
333 |         out[0] = S / (wSum - 1)
334 |     elif kind == 3:
335 |         # Reliability weights
336 |         # sample_reliability_variance
337 |         out[0] = S / (wSum - wSum2 / wSum)
338 | 
339 | 
340 | @guvectorize(
341 |     "float64[:], float64[:], float64[:], int64, float64[:]",
342 |     "(n),(n),(n),()->()",
343 |     nopython=True,
344 |     cache=True,
345 | )
346 | def wcov(x, y, w, kind, out):
347 |     """Compute weighted covariance between x and y.
348 | 
349 |     Compute the weighted covariance between two variables, we can chose which
350 |     kind of covariance returns.
351 | 
352 |     Parameters
353 |     ----------
354 |     x : np.array
355 |         Main variable.
356 |     y : np.array
357 |         Second variable.
358 |     w : np.array
359 |         Weights.
360 |     kind : int
361 |         Kind of weighted covariance is returned:
362 |             1 : population variance
363 |             2 : sample frequency variance
364 |             3 : sample reliability variance.
365 |     out : np.array
366 | 
367 |     Returns
368 |     -------
369 |     weighted_covariance = float
370 | 
371 |     References
372 |     ----------
373 |     https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online
374 |     """
375 |     meanx = meany = 0
376 |     wsum = wsum2 = 0
377 |     C = 0
378 |     for i in range(len(x)):
379 |         wsum += w[i]
380 |         wsum2 += w[i] * w[i]
381 |         dx = x[i] - meanx
382 |         meanx += (w[i] / wsum) * dx
383 |         meany += (w[i] / wsum) * (y[i] - meany)
384 |         C += w[i] * dx * (y[i] - meany)
385 | 
386 |     if kind == 1:
387 |         # population_covar
388 |         out[0] = C / wsum
389 | 
390 | 
391 | @guvectorize(
392 |     "float64[:], float64[:], float64[:]",
393 |     "(n),(n)->()",
394 |     nopython=True,
395 |     cache=True,
396 | )
397 | def online_kurtosis(x, w, out):
398 |     """Online kurtosis."""
399 |     n = 0
400 |     mean = 0
401 |     M2 = 0
402 |     M3 = 0
403 |     M4 = 0
404 | 
405 |     for i in range(len(x)):
406 |         n1 = w[i]
407 |         n = n + w[i]
408 |         delta = x[i] - mean
409 |         delta_n = delta / n
410 |         delta_n2 = delta_n * delta_n
411 |         term1 = delta * delta_n * n1
412 |         mean = mean + w[i] * delta_n / n
413 |         M4 = (
414 |             M4
415 |             + term1 * delta_n2 * (n * n - 3 * n + 3)
416 |             + 6 * delta_n2 * M2
417 |             - 4 * delta_n * M3
418 |         )
419 |         M3 = M3 + term1 * delta_n * (n - 2) - 3 * delta_n * M2
420 |         M2 = M2 + term1
421 | 
422 |     out[0] = (n * M4) / (M2 * M2) - 3
423 | 
424 | 
425 | @guvectorize(
426 |     "float64[:], float64[:], int64, float64[:]",
427 |     "(n),(n),()->()",
428 |     nopython=True,
429 |     cache=True,
430 | )
431 | def Mk(x, w, k, out):
432 |     """Calculate Mk."""
433 |     w_sum = wx_sum = 0
434 | 
435 |     for i in range(len(x)):
436 |         wx_sum += w[i] * (x[i] ** k)
437 |         w_sum += w[i]
438 | 
439 |     out[0] = wx_sum / w_sum
440 | 


--------------------------------------------------------------------------------
/src/ineqpy/inequality.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """Analysis of inequality.
  4 | 
  5 | This package provide an easy way to realize a quantitative analysis of
  6 | grouped, also make easy work with stratified data, in this module you can
  7 | find statistics and grouped indicators to this task.
  8 | 
  9 | Todo
 10 | ----
 11 | - Rethinking this module as Class.
 12 | - https://en.wikipedia.org/wiki/Income_inequality_metrics
 13 | 
 14 | """
 15 | import numpy as np
 16 | import pandas as pd
 17 | 
 18 | from ineqpy import utils
 19 | from ineqpy.statistics import mean
 20 | 
 21 | __all__ = [
 22 |     "atkinson",
 23 |     "avg_tax_rate",
 24 |     "concentration",
 25 |     "gini",
 26 |     "kakwani",
 27 |     "lorenz",
 28 |     "reynolds_smolensky",
 29 |     "theil",
 30 |     "top_rest",
 31 |     "hoover",
 32 | ]
 33 | 
 34 | 
 35 | def concentration(income, weights=None, data=None, sort=True):
 36 |     """Calculate concentration's index.
 37 | 
 38 |     This function calculate the concentration index, according to the notation
 39 |     used in [Jenkins1988]_ you can calculate the:
 40 | 
 41 |     C_x = 2 / x · cov(x, F_x)
 42 |     if x = g(x) then C_x becomes C_y
 43 | 
 44 |     when there are taxes:
 45 | 
 46 |     y = g(x) = x - t(x)
 47 | 
 48 |     Parameters
 49 |     ----------
 50 |     income : array-like
 51 |     weights : array-like
 52 |     data : pandas.DataFrame
 53 |     sort : bool
 54 |         If true, will sort the values.
 55 | 
 56 |     Returns
 57 |     -------
 58 |     concentration : array-like
 59 | 
 60 |     References
 61 |     ----------
 62 |     Jenkins, S. (1988). Calculating income distribution indices
 63 |     from micro-data. National Tax Journal. http://doi.org/10.2307/41788716
 64 |     """
 65 |     # TODO complete docstring
 66 | 
 67 |     # check if DataFrame is passed, if yes then extract variables else make a
 68 |     # copy
 69 |     income, weights = utils.extract_values(data, income, weights)
 70 |     if weights is None:
 71 |         weights = utils.not_empty_weights(weights, like=income)
 72 | 
 73 |     if income.ndim == 0:
 74 |         income = np.array([income])
 75 |     elif income.ndim == 2:
 76 |         income = np.squeeze(income, axis=1)
 77 | 
 78 |     if weights.ndim == 0:
 79 |         weights = np.array([weights])
 80 |     elif weights.ndim == 2:
 81 |         weights = np.squeeze(weights, axis=1)
 82 | 
 83 |     # Small shortcut to avoid warnings below
 84 |     if income.size <= 1:
 85 |         return np.nan
 86 | 
 87 |     # if sort is true then sort the variables.
 88 |     if sort:
 89 |         income, weights = utils._sort_values(income, weights)
 90 | 
 91 |     # main calc
 92 |     f_x = np.atleast_1d(utils.normalize(weights))
 93 |     F_x = f_x.cumsum(axis=0)
 94 |     mu = np.sum(income * f_x)
 95 |     cov = np.cov(income, F_x, rowvar=False, aweights=f_x)[0, 1]
 96 |     return 2 * cov / mu
 97 | 
 98 | 
 99 | def lorenz(income, weights=None, data=None):
100 |     """Calculate Lorent's curve.
101 | 
102 |     In economics, the Lorenz curve is a graphical representation of the
103 |     distribution of income or of wealth. It was developed by Max O. Lorenz in
104 |     1905 for representing grouped of the wealth distribution. This function
105 |     compute the lorenz curve and returns a DF with two columns of axis x and y.
106 | 
107 |     Parameters
108 |     ----------
109 |     data : pandas.DataFrame
110 |         A pandas.DataFrame that contains data.
111 |     income : str or 1d-array, optional
112 |         Population or wights, if a DataFrame is passed then `income` should be
113 |         a name of the column of DataFrame, else can pass a pandas.Series or
114 |         array.
115 |     weights : str or 1d-array
116 |         Income, monetary variable, if a DataFrame is passed then `y`is a name
117 |         of the series on this DataFrame, however, you can pass a pd.Series or
118 |         np.array.
119 | 
120 |     Returns
121 |     -------
122 |     lorenz : pandas.Dataframe
123 |         Lorenz distribution in a Dataframe with two columns, labeled x and y,
124 |         that corresponds to plots axis.
125 | 
126 |     References
127 |     ----------
128 |     Lorenz curve. (2017, February 11). In Wikipedia, The Free Encyclopedia.
129 |     Retrieved 14:34, May 15, 2017, from
130 |     https://en.wikipedia.org/w/index.php?title=Lorenz_curve&oldid=764853675
131 |     """
132 |     if data is not None:
133 |         income, weights = utils.extract_values(data, income, weights)
134 |     if weights is None:
135 |         weights = utils.not_empty_weights(weights, like=income)
136 | 
137 |     total_income = income * weights
138 |     idx_sort = np.argsort(income)
139 | 
140 |     weights = weights[idx_sort].cumsum() / weights.sum()
141 |     weights = weights.reshape(len(weights), 1)
142 | 
143 |     total_income = total_income[idx_sort].cumsum() / total_income.sum()
144 |     total_income = total_income.reshape(len(total_income), 1)
145 | 
146 |     # to pandas
147 |     data = np.hstack([weights, total_income])
148 |     columns = ["Equality", "Income"]
149 |     index = pd.Index(weights.round(3).squeeze())
150 |     res = pd.DataFrame(data=data, columns=columns, index=index)
151 |     res.index.name = "x"
152 | 
153 |     return res
154 | 
155 | 
156 | def gini(income, weights=None, data=None, sort=True):
157 |     """Calculate Gini's index.
158 | 
159 |     The Gini coefficient (sometimes expressed as a Gini ratio or a
160 |     normalized Gini index) is a measure of statistical dispersion intended to
161 |     represent the income or wealth distribution of a nation's residents, and is
162 |     the most commonly used measure of grouped. It was developed by Corrado
163 |     Gini.
164 |     The Gini coefficient measures the grouped among values of a frequency
165 |     distribution (for example, levels of income). A Gini coefficient of zero
166 |     expresses perfect equality, where all values are the same (for example,
167 |     where everyone has the same income). A Gini coefficient of 1 (or 100%)
168 |     expresses maximal grouped among values (e.g., for a large number of
169 |     people, where only one person has all the income or consumption, and all
170 |     others have none, the Gini coefficient will be very nearly one).
171 | 
172 |     Parameters
173 |     ----------
174 |     data : pandas.DataFrame
175 |         DataFrame that contains the data.
176 |     income : str or np.array, optional
177 |         Name of the monetary variable `x` in` df`
178 |     weights : str or np.array, optional
179 |         Name of the series containing the weights `x` in` df`
180 |     sorted : bool, optional
181 |         If the DataFrame is previously ordered by the variable `x`, it's must
182 |         pass True, but False by default.
183 | 
184 |     Returns
185 |     -------
186 |     gini : float
187 |         Gini Index Value.
188 | 
189 |     Notes
190 |     -----
191 |     The calculation is done following (discrete probability distribution):
192 |     G = 1 - [∑_i^n f(y_i)·(S_{i-1} + S_i)]
193 |     where:
194 |     - y_i = Income
195 |     - S_i = ∑_{j=1}^i y_i · f(y_i)
196 | 
197 |     Reference
198 |     ---------
199 |     - Gini coefficient. (2017, May 8). In Wikipedia, The Free Encyclopedia.
200 |       Retrieved 14:30, May 15, 2017, from
201 |       https://en.wikipedia.org/w/index.php?title=Gini_coefficient&oldid=779424616
202 | 
203 |     - Jenkins, S. (1988). Calculating income distribution indices
204 |     from micro-data. National Tax Journal. http://doi.org/10.2307/41788716
205 | 
206 |     TODO
207 |     ----
208 |     - Implement statistical deviation calculation, VAR (GINI)
209 | 
210 |     """
211 |     return concentration(data=data, income=income, weights=weights, sort=sort)
212 | 
213 | 
214 | def atkinson(income, weights=None, data=None, e=0.5) -> float:
215 |     """Calculate atkinson index.
216 | 
217 |     More precisely labelled a family of income grouped measures, the
218 |     theoretical range of Atkinson values is 0 to 1, with 0 being a state of
219 |     equal distribution.
220 | 
221 |     An intuitive interpretation of this index is possible: Atkinson values can
222 |     be used to calculate the proportion of total income that would be required
223 |     to achieve an equal level of social welfare as at present if incomes were
224 |     perfectly distributed.
225 | 
226 |     For example, an Atkinson index value of 0.20 suggests that we could achieve
227 |     the same level of social welfare with only 1 – 0.20 = 80% of income. The
228 |     theoretical range of Atkinson values is 0 to 1, with 0 being a state of
229 |     equal distribution.
230 | 
231 |     Parameters
232 |     ----------
233 |     income : array or str
234 |         If `data` is none `income` must be an 1D-array, when `data` is a
235 |         pd.DataFrame, you must pass the name of income variable as string.
236 |     weights : array or str, optional
237 |         If `data` is none `weights` must be an 1D-array, when `data` is a
238 |         pd.DataFrame, you must pass the name of weights variable as string.
239 |     e : int, optional
240 |         Epsilon parameter interpreted by atkinson index as grouped adversion,
241 |         must be between 0 and 1.
242 |     data : pd.DataFrame, optional
243 |         data is a pd.DataFrame that contains the variables.
244 | 
245 |     Returns
246 |     -------
247 |     atkinson : float
248 | 
249 |     Reference
250 |     ---------
251 |     Atkinson index. (2017, March 12). In Wikipedia, The Free Encyclopedia.
252 |     Retrieved 14:35, May 15, 2017, from
253 |     https://en.wikipedia.org/w/index.php?title=Atkinson_index
254 | 
255 |     TODO
256 |     ----
257 |     - Implement: CALCULATING INCOME DISTRIBUTION INDICES FROM MICRO-DATA
258 |       http://www.jstor.org/stable/41788716
259 |     - The results has difference with stata, maybe have a bug.
260 |     """
261 |     if (income is None) and (data is None):
262 |         raise ValueError("Must pass at least one of both `income` or `df`")
263 | 
264 |     income, weights = utils.extract_values(data, income, weights)
265 |     weights = utils.not_empty_weights(weights, income)
266 | 
267 |     # not-null condition
268 |     income, weights = utils.not_null_condition(income, weights)
269 | 
270 |     # not-empty condition
271 |     if len(income) == 0:
272 |         return 0
273 | 
274 |     # auxiliar variables: mean and distribution
275 |     mu = mean(variable=income, weights=weights)
276 |     f_i = np.atleast_1d(weights / sum(weights))  # density function
277 | 
278 |     # main calc
279 |     if e == 1:
280 |         return 1 - np.power(np.e, np.sum(f_i * np.log(income) - np.log(mu)))
281 |     elif e >= 0 or e < 1:
282 |         return 1 - np.power(
283 |             np.sum(f_i * np.power(income / mu, 1 - e)), 1 / (1 - e)
284 |         )
285 |     else:
286 |         assert (e < 0) or (e > 1), "Not valid e value,  0 ≤ e ≤ 1"
287 |         return np.nan
288 | 
289 | 
290 | def kakwani(tax, income_pre_tax, weights=None, data=None):
291 |     """Calculate Kakwani's index.
292 | 
293 |     The Kakwani (1977) index of tax progressivity is defined as twice the
294 |     area between the concentration curves for taxes and pre-tax income,
295 |     or equivalently, the concentration index for t(x) minus the Gini index for
296 |     x, i.e.
297 | 
298 |     K = C(t) - G(x)
299 |       = (2/t) cov [t(x), F(x)] - (2/x) cov [x, F(x)].
300 | 
301 |     Parameters
302 |     ----------
303 |     data : pandas.DataFrame
304 |         This variable is a DataFrame that contains all data required in
305 |         columns.
306 |     tax_variable : array-like or str
307 |         This variable represent tax payment of person, if pass array-like
308 |         then data must be None, else you pass str-name column in `data`.
309 |     income_pre_tax : array-like or str
310 |         This variable represent income of person, if pass array-like
311 |         then data must be None, else you pass str-name column in `data`.
312 |     weights : array-like or str
313 |         This variable represent weights of each person, if pass array-like
314 |         then data must be None, else you pass str-name column in `data`.
315 | 
316 |     Returns
317 |     -------
318 |     kakwani : float
319 | 
320 |     References
321 |     ----------
322 |     Jenkins, S. (1988). Calculating income distribution indices from
323 |     micro-data. National Tax Journal. http://doi.org/10.2307/41788716
324 |     """
325 |     # main calc
326 |     c_t = concentration(data=data, income=tax, weights=weights, sort=True)
327 |     g_y = concentration(
328 |         data=data, income=income_pre_tax, weights=weights, sort=True
329 |     )
330 |     return c_t - g_y
331 | 
332 | 
333 | def reynolds_smolensky(
334 |     income_pre_tax, income_post_tax, weights=None, data=None
335 | ):
336 |     """Calculate Reynolds-Smolensky's index.
337 | 
338 |     The Reynolds-Smolensky (1977) index of the redistributive effect of
339 |     taxes, which can also be interpreted as an index of progressivity
340 |     (Lambert 1985), is defined as:
341 | 
342 |     L = Gx - Gy
343 |       = [2/x]cov[x,F(x)] - [2/ybar] cov [y, F(y)].
344 | 
345 |     Parameters
346 |     ----------
347 |     data : pandas.DataFrame
348 |         This variable is a DataFrame that contains all data required in it's
349 |         columns.
350 |     income_pre_tax : array-like or str
351 |         This variable represent tax payment of person, if pass array-like
352 |         then data must be None, else you pass str-name column in `data`.
353 |     income_post_tax : array-like or str
354 |         This variable represent income of person, if pass array-like
355 |         then data must be None, else you pass str-name column in `data`.
356 |     weights : array-like or str
357 |         This variable represent weights of each person, if pass array-like
358 |         then data must be None, else you pass str-name column in `data`.
359 | 
360 |     Returns
361 |     -------
362 |     reynolds_smolensky : float
363 | 
364 |     References
365 |     ----------
366 |     Jenkins, S. (1988). Calculating income distribution indices from
367 |     micro-data. National Tax Journal. http://doi.org/10.2307/41788716
368 |     """
369 |     g_y = concentration(data=data, income=income_post_tax, weights=weights)
370 |     g_x = concentration(data=data, income=income_pre_tax, weights=weights)
371 |     return g_x - g_y
372 | 
373 | 
374 | def theil(income, weights=None, data=None):
375 |     """Calculate Theil's index.
376 | 
377 |     The Theil index is a statistic primarily used to measure economic
378 |     grouped and other economic phenomena. It is a special case of the
379 |     generalized entropy index. It can be viewed as a measure of redundancy,
380 |     lack of diversity, isolation, segregation, grouped, non-randomness, and
381 |     compressibility. It was proposed by econometrician Henri Theil.
382 | 
383 |     Parameters
384 |     ----------
385 |     data : pandas.DataFrame
386 |         This variable is a DataFrame that contains all data required in it's
387 |         columns.
388 |     income : array-like or str
389 |         This variable represent tax payment of person, if pass array-like
390 |         then data must be None, else you pass str-name column in `data`.
391 |     weights : array-like or str
392 |         This variable represent weights of each person, if pass array-like
393 |         then data must be None, else you pass str-name column in `data`.
394 | 
395 |     Returns
396 |     -------
397 |     theil : float
398 | 
399 |     References
400 |     ----------
401 |     Theil index. (2016, December 17). In Wikipedia, The Free Encyclopedia.
402 |     Retrieved 14:17, May 15, 2017, from
403 |     https://en.wikipedia.org/w/index.php?title=Theil_index&oldid=755407818
404 | 
405 |     """
406 |     if data is not None:
407 |         income, weights = utils.extract_values(data, income, weights)
408 |     else:
409 |         income = income.copy()
410 | 
411 |         if weights is None:
412 |             weights = utils.not_empty_weights(weights, like=income)
413 |         else:
414 |             weights = weights.copy()
415 |     income, weights = utils.not_null_condition(income, weights)
416 | 
417 |     # variables needed
418 |     mu = mean(variable=income, weights=weights)
419 |     f_i = utils.normalize(weights)
420 |     return np.sum((f_i * income / mu) * np.log(income / mu))
421 | 
422 | 
423 | def avg_tax_rate(total_tax, total_base, weights=None, data=None):
424 |     """Calculate average tax rate.
425 | 
426 |     This function compute the average tax rate given a base income and a total
427 |     tax.
428 | 
429 |     Parameters
430 |     ----------
431 |     total_base : str or numpy.array
432 |     total_tax : str or numpy.array
433 |     data : pd.DataFrame
434 | 
435 |     Returns
436 |     -------
437 |     avg_tax_rate : float or pd.Series
438 |         Is the ratio between mean the tax income and base of income.
439 | 
440 |     Reference
441 |     ---------
442 |     Panel de declarantes de IRPF 1999-2007: Metodología, estructura y
443 |     variables. (2011).
444 |     Panel de declarantes de IRPF 1999-2007: Metodología, estructura y
445 |     variables. Documentos.
446 |     """
447 |     if (
448 |         isinstance(total_base, (np.ndarray))
449 |         or not isinstance(total_base, (list))
450 |         and not isinstance(total_base, (str))
451 |     ):
452 |         n_cols = total_base.shape[1]
453 |     elif isinstance(total_base, list):
454 |         n_cols = len(total_base)
455 |     else:
456 |         n_cols = 1
457 |     numerator = mean(data=data, variable=total_tax, weights=weights)
458 |     denominator = mean(data=data, variable=total_base, weights=weights)
459 |     # main calc
460 |     res = numerator / denominator
461 | 
462 |     if data is not None:
463 |         base_name = total_base
464 |         tax_name = total_tax
465 |     else:
466 |         base_name = ["base"] * n_cols
467 |         tax_name = [f"tax_{i}" for i in range(n_cols)]
468 | 
469 |     names = ["_".join([t, b]) for t, b in zip(tax_name, base_name, strict=False)]
470 |     res = pd.Series(res, index=names)
471 |     return res
472 | 
473 | 
474 | def top_rest(income, weights=None, data=None, top_percentage=10.0):
475 |     """Calculate the 10:90 Ratio.
476 | 
477 |     Calculates the quotient between the number of contributions from the top
478 |     10% of contributors divided by the number contributions made by the other
479 |     90%. The ratio is 1 if the total contributions by the top contributors are
480 |     equal to the cotnributions made by the rest; less than zero if the top 10%
481 |     contributes less than the rest; and greater that 1 if the top 10%
482 |     contributes more than the other ninety percent.
483 | 
484 |     Parameters
485 |     ----------
486 |     income : array-like or str
487 |         This variable represent tax payment of person, if pass array-like
488 |         then data must be None, else you pass str-name column in `data`.
489 |     weights : array-like or str
490 |         This variable represent weights of each person, if pass array-like
491 |         then data must be None, else you pass str-name column in `data`.
492 |         All-ones by default
493 |     data : pandas.DataFrame
494 |         This variable is a DataFrame that contains all data required in it's
495 |         columns.
496 |     top_percentage : float
497 |         The richest x percent to consider. (10 percent by default)
498 |         It must be a number between 0 and 100
499 | 
500 |     Returns
501 |     -------
502 |     ratio : float
503 | 
504 |     References
505 |     ----------
506 |     Participation Inequality in Wikis: A Temporal Analysis Using WikiChron.
507 |     Serrano, Abel & Arroyo, Javier & Hassan, Samer. (2018).
508 |     DOI: 10.1145/3233391.3233536.
509 |     """
510 |     if data is not None:
511 |         income, weights = utils.extract_values(data, income, weights)
512 |     else:
513 |         income = income.copy()
514 |         weights = np.ones_like(income) if weights is None else weights.copy()
515 | 
516 |     # Small shortcut to avoid divide by zero below
517 |     if income.size <= 1:
518 |         return np.nan
519 | 
520 |     income, weights = utils._sort_values(income, weights)
521 | 
522 |     # variables needed
523 |     weights = utils.normalize(weights)
524 |     cumw = np.cumsum(weights)
525 |     ftosearch = 1 - top_percentage / 100
526 |     k = np.searchsorted(cumw, ftosearch, side='right')
527 |     f_i = np.atleast_1d(income*weights)
528 | 
529 |     t = np.sum(f_i[k:])
530 |     r = np.sum(f_i[:k])
531 | 
532 |     # Correction
533 |     if k > 0:
534 |         error = (ftosearch - cumw[k-1]) * income[k]
535 |         t -= error
536 |         r += error
537 | 
538 |     return t / r
539 | 
540 | 
541 | def hoover(income, weights=None, data=None):
542 |     """Calculate Hoover index.
543 | 
544 |     The Hoover index, also known as the Robin Hood index or the Schutz index,
545 |     is a measure of income metrics. It is equal to the portion of the total
546 |     community income that would have to be redistributed (taken from the richer
547 |     half of the population and given to the poorer half) for there to be income
548 |     uniformity.
549 | 
550 |     Formula:
551 | 
552 |         H = 1/2 sum_i( |xi - mu| ) / sum_i(xi)
553 | 
554 |     Parameters
555 |     ----------
556 |     income : array-like or str
557 |         This variable represent tax payment of person, if pass array-like
558 |         then data must be None, else you pass str-name column in `data`.
559 |     weights : array-like or str
560 |         This variable represent weights of each person, if pass array-like
561 |         then data must be None, else you pass str-name column in `data`.
562 |     data : pandas.DataFrame
563 |         This variable is a DataFrame that contains all data required in it's
564 |         columns.
565 | 
566 |     Returns
567 |     -------
568 |     hoover : float
569 | 
570 |     References
571 |     ----------
572 |     Hoover index : https://en.wikipedia.org/wiki/Hoover_index
573 |     """
574 |     if data is not None:
575 |         income, weights = utils.extract_values(data, income, weights)
576 |     else:
577 |         income = income.copy()
578 |         if weights is None:
579 |             weights = utils.not_empty_weights(weights, like=income)
580 |         else:
581 |             weights = weights.copy()
582 | 
583 |     income, weights = utils.not_null_condition(income, weights)
584 | 
585 |     # variables needed
586 |     mu = mean(variable=income, weights=weights)
587 |     f_i = utils.normalize(weights)
588 |     xi = f_i * income
589 | 
590 |     # main calc
591 |     h = np.sum(abs(xi - mu)) * 0.5 / sum(xi)
592 | 
593 |     return h
594 | 


--------------------------------------------------------------------------------
/tests/test_moments.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": 54,
   6 |    "metadata": {},
   7 |    "outputs": [],
   8 |    "source": [
   9 |     "import numpy as np\n",
  10 |     "import pandas as pd\n",
  11 |     "import scipy.stats as stats\n",
  12 |     "\n",
  13 |     "import ineqpy"
  14 |    ]
  15 |   },
  16 |   {
  17 |    "cell_type": "markdown",
  18 |    "metadata": {},
  19 |    "source": [
  20 |     "# Random Variable"
  21 |    ]
  22 |   },
  23 |   {
  24 |    "cell_type": "code",
  25 |    "execution_count": 42,
  26 |    "metadata": {},
  27 |    "outputs": [],
  28 |    "source": [
  29 |     "x = np.random.randn(10)"
  30 |    ]
  31 |   },
  32 |   {
  33 |    "cell_type": "code",
  34 |    "execution_count": 43,
  35 |    "metadata": {},
  36 |    "outputs": [],
  37 |    "source": [
  38 |     "w = abs(np.random.randn(10))\n",
  39 |     "w = w / w.sum()"
  40 |    ]
  41 |   },
  42 |   {
  43 |    "cell_type": "code",
  44 |    "execution_count": 44,
  45 |    "metadata": {},
  46 |    "outputs": [
  47 |     {
  48 |      "data": {
  49 |       "text/plain": [
  50 |        "0.079282307081652598"
  51 |       ]
  52 |      },
  53 |      "execution_count": 44,
  54 |      "metadata": {},
  55 |      "output_type": "execute_result"
  56 |     }
  57 |    ],
  58 |    "source": [
  59 |     "np.mean(x)"
  60 |    ]
  61 |   },
  62 |   {
  63 |    "cell_type": "code",
  64 |    "execution_count": 45,
  65 |    "metadata": {},
  66 |    "outputs": [
  67 |     {
  68 |      "data": {
  69 |       "text/plain": [
  70 |        "0.079282307081652598"
  71 |       ]
  72 |      },
  73 |      "execution_count": 45,
  74 |      "metadata": {},
  75 |      "output_type": "execute_result"
  76 |     }
  77 |    ],
  78 |    "source": [
  79 |     "ineqpy.mean(variable=x)"
  80 |    ]
  81 |   },
  82 |   {
  83 |    "cell_type": "code",
  84 |    "execution_count": 46,
  85 |    "metadata": {},
  86 |    "outputs": [
  87 |     {
  88 |      "data": {
  89 |       "text/plain": [
  90 |        "0.26771028632127503"
  91 |       ]
  92 |      },
  93 |      "execution_count": 46,
  94 |      "metadata": {},
  95 |      "output_type": "execute_result"
  96 |     }
  97 |    ],
  98 |    "source": [
  99 |     "np.var(x)"
 100 |    ]
 101 |   },
 102 |   {
 103 |    "cell_type": "code",
 104 |    "execution_count": 47,
 105 |    "metadata": {},
 106 |    "outputs": [
 107 |     {
 108 |      "data": {
 109 |       "text/plain": [
 110 |        "0.26771028632127503"
 111 |       ]
 112 |      },
 113 |      "execution_count": 47,
 114 |      "metadata": {},
 115 |      "output_type": "execute_result"
 116 |     }
 117 |    ],
 118 |    "source": [
 119 |     "ineqpy.var(variable=x)"
 120 |    ]
 121 |   },
 122 |   {
 123 |    "cell_type": "code",
 124 |    "execution_count": 48,
 125 |    "metadata": {},
 126 |    "outputs": [
 127 |     {
 128 |      "data": {
 129 |       "text/plain": [
 130 |        "-0.08683494039388037"
 131 |       ]
 132 |      },
 133 |      "execution_count": 48,
 134 |      "metadata": {},
 135 |      "output_type": "execute_result"
 136 |     }
 137 |    ],
 138 |    "source": [
 139 |     "stats.skew(x)"
 140 |    ]
 141 |   },
 142 |   {
 143 |    "cell_type": "code",
 144 |    "execution_count": 49,
 145 |    "metadata": {},
 146 |    "outputs": [
 147 |     {
 148 |      "data": {
 149 |       "text/plain": [
 150 |        "-0.086834940393880372"
 151 |       ]
 152 |      },
 153 |      "execution_count": 49,
 154 |      "metadata": {},
 155 |      "output_type": "execute_result"
 156 |     }
 157 |    ],
 158 |    "source": [
 159 |     "ineqpy.skew(variable=x)"
 160 |    ]
 161 |   },
 162 |   {
 163 |    "cell_type": "code",
 164 |    "execution_count": 52,
 165 |    "metadata": {},
 166 |    "outputs": [
 167 |     {
 168 |      "data": {
 169 |       "text/plain": [
 170 |        "2.1915828922522693"
 171 |       ]
 172 |      },
 173 |      "execution_count": 52,
 174 |      "metadata": {},
 175 |      "output_type": "execute_result"
 176 |     }
 177 |    ],
 178 |    "source": [
 179 |     "stats.kurtosis(x) + 3"
 180 |    ]
 181 |   },
 182 |   {
 183 |    "cell_type": "code",
 184 |    "execution_count": 51,
 185 |    "metadata": {},
 186 |    "outputs": [
 187 |     {
 188 |      "data": {
 189 |       "text/plain": [
 190 |        "2.1915828922522693"
 191 |       ]
 192 |      },
 193 |      "execution_count": 51,
 194 |      "metadata": {},
 195 |      "output_type": "execute_result"
 196 |     }
 197 |    ],
 198 |    "source": [
 199 |     "ineqpy.kurt(variable=x)"
 200 |    ]
 201 |   },
 202 |   {
 203 |    "cell_type": "markdown",
 204 |    "metadata": {},
 205 |    "source": [
 206 |     "# Repeated values"
 207 |    ]
 208 |   },
 209 |   {
 210 |    "cell_type": "code",
 211 |    "execution_count": 4,
 212 |    "metadata": {},
 213 |    "outputs": [],
 214 |    "source": [
 215 |     "x = np.array([1,2,2,3,3,3,4,4,4,4,5,5,5,5,5,6,6,6,6,7,7,7,8,8,9])"
 216 |    ]
 217 |   },
 218 |   {
 219 |    "cell_type": "code",
 220 |    "execution_count": 5,
 221 |    "metadata": {},
 222 |    "outputs": [],
 223 |    "source": [
 224 |     "xi, fi = np.unique(x, return_counts=True)"
 225 |    ]
 226 |   },
 227 |   {
 228 |    "cell_type": "code",
 229 |    "execution_count": 6,
 230 |    "metadata": {},
 231 |    "outputs": [
 232 |     {
 233 |      "data": {
 234 |       "text/plain": [
 235 |        "array([1, 2, 3, 4, 5, 6, 7, 8, 9])"
 236 |       ]
 237 |      },
 238 |      "execution_count": 6,
 239 |      "metadata": {},
 240 |      "output_type": "execute_result"
 241 |     }
 242 |    ],
 243 |    "source": [
 244 |     "xi # values"
 245 |    ]
 246 |   },
 247 |   {
 248 |    "cell_type": "code",
 249 |    "execution_count": 7,
 250 |    "metadata": {},
 251 |    "outputs": [
 252 |     {
 253 |      "data": {
 254 |       "text/plain": [
 255 |        "array([1, 2, 3, 4, 5, 4, 3, 2, 1])"
 256 |       ]
 257 |      },
 258 |      "execution_count": 7,
 259 |      "metadata": {},
 260 |      "output_type": "execute_result"
 261 |     }
 262 |    ],
 263 |    "source": [
 264 |     "fi # absolute frequency"
 265 |    ]
 266 |   },
 267 |   {
 268 |    "cell_type": "code",
 269 |    "execution_count": 18,
 270 |    "metadata": {},
 271 |    "outputs": [],
 272 |    "source": [
 273 |     "data = pd.DataFrame(np.c_[x, np.ones(len(x))], columns=list('xf'))"
 274 |    ]
 275 |   },
 276 |   {
 277 |    "cell_type": "code",
 278 |    "execution_count": 19,
 279 |    "metadata": {},
 280 |    "outputs": [
 281 |     {
 282 |      "data": {
 283 |       "text/html": [
 284 |        "<div>\n",
 285 |        "<table border=\"1\" class=\"dataframe\">\n",
 286 |        "  <thead>\n",
 287 |        "    <tr style=\"text-align: right;\">\n",
 288 |        "      <th></th>\n",
 289 |        "      <th>x</th>\n",
 290 |        "      <th>f</th>\n",
 291 |        "    </tr>\n",
 292 |        "  </thead>\n",
 293 |        "  <tbody>\n",
 294 |        "    <tr>\n",
 295 |        "      <th>0</th>\n",
 296 |        "      <td>1.0</td>\n",
 297 |        "      <td>1.0</td>\n",
 298 |        "    </tr>\n",
 299 |        "    <tr>\n",
 300 |        "      <th>1</th>\n",
 301 |        "      <td>2.0</td>\n",
 302 |        "      <td>1.0</td>\n",
 303 |        "    </tr>\n",
 304 |        "    <tr>\n",
 305 |        "      <th>2</th>\n",
 306 |        "      <td>2.0</td>\n",
 307 |        "      <td>1.0</td>\n",
 308 |        "    </tr>\n",
 309 |        "    <tr>\n",
 310 |        "      <th>3</th>\n",
 311 |        "      <td>3.0</td>\n",
 312 |        "      <td>1.0</td>\n",
 313 |        "    </tr>\n",
 314 |        "    <tr>\n",
 315 |        "      <th>4</th>\n",
 316 |        "      <td>3.0</td>\n",
 317 |        "      <td>1.0</td>\n",
 318 |        "    </tr>\n",
 319 |        "    <tr>\n",
 320 |        "      <th>5</th>\n",
 321 |        "      <td>3.0</td>\n",
 322 |        "      <td>1.0</td>\n",
 323 |        "    </tr>\n",
 324 |        "    <tr>\n",
 325 |        "      <th>6</th>\n",
 326 |        "      <td>4.0</td>\n",
 327 |        "      <td>1.0</td>\n",
 328 |        "    </tr>\n",
 329 |        "    <tr>\n",
 330 |        "      <th>7</th>\n",
 331 |        "      <td>4.0</td>\n",
 332 |        "      <td>1.0</td>\n",
 333 |        "    </tr>\n",
 334 |        "    <tr>\n",
 335 |        "      <th>8</th>\n",
 336 |        "      <td>4.0</td>\n",
 337 |        "      <td>1.0</td>\n",
 338 |        "    </tr>\n",
 339 |        "    <tr>\n",
 340 |        "      <th>9</th>\n",
 341 |        "      <td>4.0</td>\n",
 342 |        "      <td>1.0</td>\n",
 343 |        "    </tr>\n",
 344 |        "    <tr>\n",
 345 |        "      <th>10</th>\n",
 346 |        "      <td>5.0</td>\n",
 347 |        "      <td>1.0</td>\n",
 348 |        "    </tr>\n",
 349 |        "    <tr>\n",
 350 |        "      <th>11</th>\n",
 351 |        "      <td>5.0</td>\n",
 352 |        "      <td>1.0</td>\n",
 353 |        "    </tr>\n",
 354 |        "    <tr>\n",
 355 |        "      <th>12</th>\n",
 356 |        "      <td>5.0</td>\n",
 357 |        "      <td>1.0</td>\n",
 358 |        "    </tr>\n",
 359 |        "    <tr>\n",
 360 |        "      <th>13</th>\n",
 361 |        "      <td>5.0</td>\n",
 362 |        "      <td>1.0</td>\n",
 363 |        "    </tr>\n",
 364 |        "    <tr>\n",
 365 |        "      <th>14</th>\n",
 366 |        "      <td>5.0</td>\n",
 367 |        "      <td>1.0</td>\n",
 368 |        "    </tr>\n",
 369 |        "    <tr>\n",
 370 |        "      <th>15</th>\n",
 371 |        "      <td>6.0</td>\n",
 372 |        "      <td>1.0</td>\n",
 373 |        "    </tr>\n",
 374 |        "    <tr>\n",
 375 |        "      <th>16</th>\n",
 376 |        "      <td>6.0</td>\n",
 377 |        "      <td>1.0</td>\n",
 378 |        "    </tr>\n",
 379 |        "    <tr>\n",
 380 |        "      <th>17</th>\n",
 381 |        "      <td>6.0</td>\n",
 382 |        "      <td>1.0</td>\n",
 383 |        "    </tr>\n",
 384 |        "    <tr>\n",
 385 |        "      <th>18</th>\n",
 386 |        "      <td>6.0</td>\n",
 387 |        "      <td>1.0</td>\n",
 388 |        "    </tr>\n",
 389 |        "    <tr>\n",
 390 |        "      <th>19</th>\n",
 391 |        "      <td>7.0</td>\n",
 392 |        "      <td>1.0</td>\n",
 393 |        "    </tr>\n",
 394 |        "    <tr>\n",
 395 |        "      <th>20</th>\n",
 396 |        "      <td>7.0</td>\n",
 397 |        "      <td>1.0</td>\n",
 398 |        "    </tr>\n",
 399 |        "    <tr>\n",
 400 |        "      <th>21</th>\n",
 401 |        "      <td>7.0</td>\n",
 402 |        "      <td>1.0</td>\n",
 403 |        "    </tr>\n",
 404 |        "    <tr>\n",
 405 |        "      <th>22</th>\n",
 406 |        "      <td>8.0</td>\n",
 407 |        "      <td>1.0</td>\n",
 408 |        "    </tr>\n",
 409 |        "    <tr>\n",
 410 |        "      <th>23</th>\n",
 411 |        "      <td>8.0</td>\n",
 412 |        "      <td>1.0</td>\n",
 413 |        "    </tr>\n",
 414 |        "    <tr>\n",
 415 |        "      <th>24</th>\n",
 416 |        "      <td>9.0</td>\n",
 417 |        "      <td>1.0</td>\n",
 418 |        "    </tr>\n",
 419 |        "  </tbody>\n",
 420 |        "</table>\n",
 421 |        "</div>"
 422 |       ],
 423 |       "text/plain": [
 424 |        "      x    f\n",
 425 |        "0   1.0  1.0\n",
 426 |        "1   2.0  1.0\n",
 427 |        "2   2.0  1.0\n",
 428 |        "3   3.0  1.0\n",
 429 |        "4   3.0  1.0\n",
 430 |        "5   3.0  1.0\n",
 431 |        "6   4.0  1.0\n",
 432 |        "7   4.0  1.0\n",
 433 |        "8   4.0  1.0\n",
 434 |        "9   4.0  1.0\n",
 435 |        "10  5.0  1.0\n",
 436 |        "11  5.0  1.0\n",
 437 |        "12  5.0  1.0\n",
 438 |        "13  5.0  1.0\n",
 439 |        "14  5.0  1.0\n",
 440 |        "15  6.0  1.0\n",
 441 |        "16  6.0  1.0\n",
 442 |        "17  6.0  1.0\n",
 443 |        "18  6.0  1.0\n",
 444 |        "19  7.0  1.0\n",
 445 |        "20  7.0  1.0\n",
 446 |        "21  7.0  1.0\n",
 447 |        "22  8.0  1.0\n",
 448 |        "23  8.0  1.0\n",
 449 |        "24  9.0  1.0"
 450 |       ]
 451 |      },
 452 |      "execution_count": 19,
 453 |      "metadata": {},
 454 |      "output_type": "execute_result"
 455 |     }
 456 |    ],
 457 |    "source": [
 458 |     "data"
 459 |    ]
 460 |   },
 461 |   {
 462 |    "cell_type": "code",
 463 |    "execution_count": 20,
 464 |    "metadata": {},
 465 |    "outputs": [],
 466 |    "source": [
 467 |     "data_weighted = pd.DataFrame(np.c_[xi,fi], columns=list('xf'))"
 468 |    ]
 469 |   },
 470 |   {
 471 |    "cell_type": "code",
 472 |    "execution_count": 21,
 473 |    "metadata": {},
 474 |    "outputs": [
 475 |     {
 476 |      "data": {
 477 |       "text/html": [
 478 |        "<div>\n",
 479 |        "<table border=\"1\" class=\"dataframe\">\n",
 480 |        "  <thead>\n",
 481 |        "    <tr style=\"text-align: right;\">\n",
 482 |        "      <th></th>\n",
 483 |        "      <th>x</th>\n",
 484 |        "      <th>f</th>\n",
 485 |        "    </tr>\n",
 486 |        "  </thead>\n",
 487 |        "  <tbody>\n",
 488 |        "    <tr>\n",
 489 |        "      <th>0</th>\n",
 490 |        "      <td>1</td>\n",
 491 |        "      <td>1</td>\n",
 492 |        "    </tr>\n",
 493 |        "    <tr>\n",
 494 |        "      <th>1</th>\n",
 495 |        "      <td>2</td>\n",
 496 |        "      <td>2</td>\n",
 497 |        "    </tr>\n",
 498 |        "    <tr>\n",
 499 |        "      <th>2</th>\n",
 500 |        "      <td>3</td>\n",
 501 |        "      <td>3</td>\n",
 502 |        "    </tr>\n",
 503 |        "    <tr>\n",
 504 |        "      <th>3</th>\n",
 505 |        "      <td>4</td>\n",
 506 |        "      <td>4</td>\n",
 507 |        "    </tr>\n",
 508 |        "    <tr>\n",
 509 |        "      <th>4</th>\n",
 510 |        "      <td>5</td>\n",
 511 |        "      <td>5</td>\n",
 512 |        "    </tr>\n",
 513 |        "    <tr>\n",
 514 |        "      <th>5</th>\n",
 515 |        "      <td>6</td>\n",
 516 |        "      <td>4</td>\n",
 517 |        "    </tr>\n",
 518 |        "    <tr>\n",
 519 |        "      <th>6</th>\n",
 520 |        "      <td>7</td>\n",
 521 |        "      <td>3</td>\n",
 522 |        "    </tr>\n",
 523 |        "    <tr>\n",
 524 |        "      <th>7</th>\n",
 525 |        "      <td>8</td>\n",
 526 |        "      <td>2</td>\n",
 527 |        "    </tr>\n",
 528 |        "    <tr>\n",
 529 |        "      <th>8</th>\n",
 530 |        "      <td>9</td>\n",
 531 |        "      <td>1</td>\n",
 532 |        "    </tr>\n",
 533 |        "  </tbody>\n",
 534 |        "</table>\n",
 535 |        "</div>"
 536 |       ],
 537 |       "text/plain": [
 538 |        "   x  f\n",
 539 |        "0  1  1\n",
 540 |        "1  2  2\n",
 541 |        "2  3  3\n",
 542 |        "3  4  4\n",
 543 |        "4  5  5\n",
 544 |        "5  6  4\n",
 545 |        "6  7  3\n",
 546 |        "7  8  2\n",
 547 |        "8  9  1"
 548 |       ]
 549 |      },
 550 |      "execution_count": 21,
 551 |      "metadata": {},
 552 |      "output_type": "execute_result"
 553 |     }
 554 |    ],
 555 |    "source": [
 556 |     "data_weighted"
 557 |    ]
 558 |   },
 559 |   {
 560 |    "cell_type": "markdown",
 561 |    "metadata": {},
 562 |    "source": [
 563 |     "### Mean"
 564 |    ]
 565 |   },
 566 |   {
 567 |    "cell_type": "code",
 568 |    "execution_count": 22,
 569 |    "metadata": {},
 570 |    "outputs": [
 571 |     {
 572 |      "data": {
 573 |       "text/plain": [
 574 |        "5.0"
 575 |       ]
 576 |      },
 577 |      "execution_count": 22,
 578 |      "metadata": {},
 579 |      "output_type": "execute_result"
 580 |     }
 581 |    ],
 582 |    "source": [
 583 |     "np.mean(x)"
 584 |    ]
 585 |   },
 586 |   {
 587 |    "cell_type": "code",
 588 |    "execution_count": 23,
 589 |    "metadata": {},
 590 |    "outputs": [
 591 |     {
 592 |      "data": {
 593 |       "text/plain": [
 594 |        "x    5.0\n",
 595 |        "f    1.0\n",
 596 |        "dtype: float64"
 597 |       ]
 598 |      },
 599 |      "execution_count": 23,
 600 |      "metadata": {},
 601 |      "output_type": "execute_result"
 602 |     }
 603 |    ],
 604 |    "source": [
 605 |     "data.mean()"
 606 |    ]
 607 |   },
 608 |   {
 609 |    "cell_type": "code",
 610 |    "execution_count": 24,
 611 |    "metadata": {},
 612 |    "outputs": [
 613 |     {
 614 |      "data": {
 615 |       "text/plain": [
 616 |        "5.0"
 617 |       ]
 618 |      },
 619 |      "execution_count": 24,
 620 |      "metadata": {},
 621 |      "output_type": "execute_result"
 622 |     }
 623 |    ],
 624 |    "source": [
 625 |     "ineqpy.mean(variable=x)"
 626 |    ]
 627 |   },
 628 |   {
 629 |    "cell_type": "code",
 630 |    "execution_count": 25,
 631 |    "metadata": {},
 632 |    "outputs": [
 633 |     {
 634 |      "data": {
 635 |       "text/plain": [
 636 |        "5.0"
 637 |       ]
 638 |      },
 639 |      "execution_count": 25,
 640 |      "metadata": {},
 641 |      "output_type": "execute_result"
 642 |     }
 643 |    ],
 644 |    "source": [
 645 |     "ineqpy.mean(data, 'x')"
 646 |    ]
 647 |   },
 648 |   {
 649 |    "cell_type": "code",
 650 |    "execution_count": 26,
 651 |    "metadata": {},
 652 |    "outputs": [
 653 |     {
 654 |      "data": {
 655 |       "text/plain": [
 656 |        "5.0"
 657 |       ]
 658 |      },
 659 |      "execution_count": 26,
 660 |      "metadata": {},
 661 |      "output_type": "execute_result"
 662 |     }
 663 |    ],
 664 |    "source": [
 665 |     "ineqpy.mean(variable=xi, weights=fi)"
 666 |    ]
 667 |   },
 668 |   {
 669 |    "cell_type": "code",
 670 |    "execution_count": 27,
 671 |    "metadata": {},
 672 |    "outputs": [
 673 |     {
 674 |      "data": {
 675 |       "text/plain": [
 676 |        "5.0"
 677 |       ]
 678 |      },
 679 |      "execution_count": 27,
 680 |      "metadata": {},
 681 |      "output_type": "execute_result"
 682 |     }
 683 |    ],
 684 |    "source": [
 685 |     "ineqpy.mean(data, 'x', 'f')"
 686 |    ]
 687 |   },
 688 |   {
 689 |    "cell_type": "markdown",
 690 |    "metadata": {},
 691 |    "source": [
 692 |     "### Variance"
 693 |    ]
 694 |   },
 695 |   {
 696 |    "cell_type": "code",
 697 |    "execution_count": 28,
 698 |    "metadata": {},
 699 |    "outputs": [
 700 |     {
 701 |      "data": {
 702 |       "text/plain": [
 703 |        "4.166666666666667"
 704 |       ]
 705 |      },
 706 |      "execution_count": 28,
 707 |      "metadata": {},
 708 |      "output_type": "execute_result"
 709 |     }
 710 |    ],
 711 |    "source": [
 712 |     "np.var(x, ddof=1)  # numpy (ddof=1)"
 713 |    ]
 714 |   },
 715 |   {
 716 |    "cell_type": "code",
 717 |    "execution_count": 29,
 718 |    "metadata": {},
 719 |    "outputs": [
 720 |     {
 721 |      "data": {
 722 |       "text/plain": [
 723 |        "0.40000000000000002"
 724 |       ]
 725 |      },
 726 |      "execution_count": 29,
 727 |      "metadata": {},
 728 |      "output_type": "execute_result"
 729 |     }
 730 |    ],
 731 |    "source": [
 732 |     "stats.variation(x)  # scipy (ddof=0)"
 733 |    ]
 734 |   },
 735 |   {
 736 |    "cell_type": "code",
 737 |    "execution_count": 30,
 738 |    "metadata": {},
 739 |    "outputs": [
 740 |     {
 741 |      "data": {
 742 |       "text/plain": [
 743 |        "x    4.166667\n",
 744 |        "f    0.000000\n",
 745 |        "dtype: float64"
 746 |       ]
 747 |      },
 748 |      "execution_count": 30,
 749 |      "metadata": {},
 750 |      "output_type": "execute_result"
 751 |     }
 752 |    ],
 753 |    "source": [
 754 |     "data.var()  # pandas (ddof=1)"
 755 |    ]
 756 |   },
 757 |   {
 758 |    "cell_type": "code",
 759 |    "execution_count": 31,
 760 |    "metadata": {},
 761 |    "outputs": [
 762 |     {
 763 |      "data": {
 764 |       "text/plain": [
 765 |        "4.0"
 766 |       ]
 767 |      },
 768 |      "execution_count": 31,
 769 |      "metadata": {},
 770 |      "output_type": "execute_result"
 771 |     }
 772 |    ],
 773 |    "source": [
 774 |     "ineqpy.var(variable=x)"
 775 |    ]
 776 |   },
 777 |   {
 778 |    "cell_type": "code",
 779 |    "execution_count": 32,
 780 |    "metadata": {},
 781 |    "outputs": [
 782 |     {
 783 |      "data": {
 784 |       "text/plain": [
 785 |        "4.0"
 786 |       ]
 787 |      },
 788 |      "execution_count": 32,
 789 |      "metadata": {},
 790 |      "output_type": "execute_result"
 791 |     }
 792 |    ],
 793 |    "source": [
 794 |     "ineqpy.var(variable=xi, weights=fi)"
 795 |    ]
 796 |   },
 797 |   {
 798 |    "cell_type": "markdown",
 799 |    "metadata": {},
 800 |    "source": [
 801 |     "### Skewness"
 802 |    ]
 803 |   },
 804 |   {
 805 |    "cell_type": "code",
 806 |    "execution_count": 33,
 807 |    "metadata": {},
 808 |    "outputs": [
 809 |     {
 810 |      "data": {
 811 |       "text/plain": [
 812 |        "0.0"
 813 |       ]
 814 |      },
 815 |      "execution_count": 33,
 816 |      "metadata": {},
 817 |      "output_type": "execute_result"
 818 |     }
 819 |    ],
 820 |    "source": [
 821 |     "stats.skew(x)"
 822 |    ]
 823 |   },
 824 |   {
 825 |    "cell_type": "code",
 826 |    "execution_count": 41,
 827 |    "metadata": {},
 828 |    "outputs": [
 829 |     {
 830 |      "data": {
 831 |       "text/plain": [
 832 |        "x    0.0\n",
 833 |        "f    0.0\n",
 834 |        "dtype: float64"
 835 |       ]
 836 |      },
 837 |      "execution_count": 41,
 838 |      "metadata": {},
 839 |      "output_type": "execute_result"
 840 |     }
 841 |    ],
 842 |    "source": [
 843 |     "data.skew()"
 844 |    ]
 845 |   },
 846 |   {
 847 |    "cell_type": "code",
 848 |    "execution_count": 34,
 849 |    "metadata": {},
 850 |    "outputs": [
 851 |     {
 852 |      "data": {
 853 |       "text/plain": [
 854 |        "0.0"
 855 |       ]
 856 |      },
 857 |      "execution_count": 34,
 858 |      "metadata": {},
 859 |      "output_type": "execute_result"
 860 |     }
 861 |    ],
 862 |    "source": [
 863 |     "ineqpy.skew(variable=x)"
 864 |    ]
 865 |   },
 866 |   {
 867 |    "cell_type": "code",
 868 |    "execution_count": 35,
 869 |    "metadata": {},
 870 |    "outputs": [
 871 |     {
 872 |      "data": {
 873 |       "text/plain": [
 874 |        "0.0"
 875 |       ]
 876 |      },
 877 |      "execution_count": 35,
 878 |      "metadata": {},
 879 |      "output_type": "execute_result"
 880 |     }
 881 |    ],
 882 |    "source": [
 883 |     "ineqpy.skew(variable=xi, weights=fi)"
 884 |    ]
 885 |   },
 886 |   {
 887 |    "cell_type": "markdown",
 888 |    "metadata": {},
 889 |    "source": [
 890 |     "### Kurtosis"
 891 |    ]
 892 |   },
 893 |   {
 894 |    "cell_type": "code",
 895 |    "execution_count": 36,
 896 |    "metadata": {},
 897 |    "outputs": [
 898 |     {
 899 |      "data": {
 900 |       "text/plain": [
 901 |        "2.35"
 902 |       ]
 903 |      },
 904 |      "execution_count": 36,
 905 |      "metadata": {},
 906 |      "output_type": "execute_result"
 907 |     }
 908 |    ],
 909 |    "source": [
 910 |     "stats.kurtosis(x) + 3"
 911 |    ]
 912 |   },
 913 |   {
 914 |    "cell_type": "code",
 915 |    "execution_count": 40,
 916 |    "metadata": {},
 917 |    "outputs": [
 918 |     {
 919 |      "data": {
 920 |       "text/plain": [
 921 |        "x    2.483004\n",
 922 |        "f    3.000000\n",
 923 |        "dtype: float64"
 924 |       ]
 925 |      },
 926 |      "execution_count": 40,
 927 |      "metadata": {},
 928 |      "output_type": "execute_result"
 929 |     }
 930 |    ],
 931 |    "source": [
 932 |     "data.kurt()+3"
 933 |    ]
 934 |   },
 935 |   {
 936 |    "cell_type": "code",
 937 |    "execution_count": 37,
 938 |    "metadata": {},
 939 |    "outputs": [
 940 |     {
 941 |      "data": {
 942 |       "text/plain": [
 943 |        "2.3500000000000001"
 944 |       ]
 945 |      },
 946 |      "execution_count": 37,
 947 |      "metadata": {},
 948 |      "output_type": "execute_result"
 949 |     }
 950 |    ],
 951 |    "source": [
 952 |     "ineqpy.kurt(variable=x)"
 953 |    ]
 954 |   },
 955 |   {
 956 |    "cell_type": "code",
 957 |    "execution_count": 38,
 958 |    "metadata": {},
 959 |    "outputs": [
 960 |     {
 961 |      "data": {
 962 |       "text/plain": [
 963 |        "2.3500000000000001"
 964 |       ]
 965 |      },
 966 |      "execution_count": 38,
 967 |      "metadata": {},
 968 |      "output_type": "execute_result"
 969 |     }
 970 |    ],
 971 |    "source": [
 972 |     "ineqpy.kurt(variable=xi, weights=fi)"
 973 |    ]
 974 |   },
 975 |   {
 976 |    "cell_type": "code",
 977 |    "execution_count": null,
 978 |    "metadata": {},
 979 |    "outputs": [],
 980 |    "source": []
 981 |   }
 982 |  ],
 983 |  "metadata": {
 984 |   "kernelspec": {
 985 |    "display_name": "Py3 (dev)",
 986 |    "language": "python",
 987 |    "name": "dev"
 988 |   },
 989 |   "language_info": {
 990 |    "codemirror_mode": {
 991 |     "name": "ipython",
 992 |     "version": 3
 993 |    },
 994 |    "file_extension": ".py",
 995 |    "mimetype": "text/x-python",
 996 |    "name": "python",
 997 |    "nbconvert_exporter": "python",
 998 |    "pygments_lexer": "ipython3",
 999 |    "version": "3.5.2"
1000 |   }
1001 |  },
1002 |  "nbformat": 4,
1003 |  "nbformat_minor": 2
1004 | }
1005 | 


--------------------------------------------------------------------------------
/examples/quick_start.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "# load packages\n",
 12 |     "import pandas as pd\n",
 13 |     "import numpy as np\n",
 14 |     "import ineqpy as inq\n",
 15 |     "%matplotlib inline"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "# First-steps"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 2,
 28 |    "metadata": {
 29 |     "collapsed": true
 30 |    },
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "# load data\n",
 34 |     "data = pd.read_csv('eusilc.csv', index_col=0).dropna()\n",
 35 |     "svy = inq.api.Survey(data, weights='rb050')"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 3,
 41 |    "metadata": {},
 42 |    "outputs": [
 43 |     {
 44 |      "data": {
 45 |       "text/plain": [
 46 |        "0.26516133165507139"
 47 |       ]
 48 |      },
 49 |      "execution_count": 3,
 50 |      "metadata": {},
 51 |      "output_type": "execute_result"
 52 |     }
 53 |    ],
 54 |    "source": [
 55 |     "svy.gini('eqincome')"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 4,
 61 |    "metadata": {},
 62 |    "outputs": [
 63 |     {
 64 |      "data": {
 65 |       "text/plain": [
 66 |        "0.060002757905598392"
 67 |       ]
 68 |      },
 69 |      "execution_count": 4,
 70 |      "metadata": {},
 71 |      "output_type": "execute_result"
 72 |     }
 73 |    ],
 74 |    "source": [
 75 |     "svy.atkinson('eqincome')"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": 5,
 81 |    "metadata": {},
 82 |    "outputs": [
 83 |     {
 84 |      "data": {
 85 |       "text/plain": [
 86 |        "0.12064816023130914"
 87 |       ]
 88 |      },
 89 |      "execution_count": 5,
 90 |      "metadata": {},
 91 |      "output_type": "execute_result"
 92 |     }
 93 |    ],
 94 |    "source": [
 95 |     "svy.theil('eqincome')"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 6,
101 |    "metadata": {},
102 |    "outputs": [
103 |     {
104 |      "data": {
105 |       "text/plain": [
106 |        "20431.292738646902"
107 |       ]
108 |      },
109 |      "execution_count": 6,
110 |      "metadata": {},
111 |      "output_type": "execute_result"
112 |     }
113 |    ],
114 |    "source": [
115 |     "svy.mean('eqincome')"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": 7,
121 |    "metadata": {},
122 |    "outputs": [
123 |     {
124 |      "data": {
125 |       "text/plain": [
126 |        "18658.461904761898"
127 |       ]
128 |      },
129 |      "execution_count": 7,
130 |      "metadata": {},
131 |      "output_type": "execute_result"
132 |     }
133 |    ],
134 |    "source": [
135 |     "svy.percentile('eqincome')"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": 8,
141 |    "metadata": {},
142 |    "outputs": [
143 |     {
144 |      "data": {
145 |       "text/plain": [
146 |        "13.28551976978007"
147 |       ]
148 |      },
149 |      "execution_count": 8,
150 |      "metadata": {},
151 |      "output_type": "execute_result"
152 |     }
153 |    ],
154 |    "source": [
155 |     "svy.kurt('eqincome')"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": 9,
161 |    "metadata": {},
162 |    "outputs": [
163 |     {
164 |      "data": {
165 |       "text/plain": [
166 |        "2.1150515104443115"
167 |       ]
168 |      },
169 |      "execution_count": 9,
170 |      "metadata": {},
171 |      "output_type": "execute_result"
172 |     }
173 |    ],
174 |    "source": [
175 |     "svy.skew('eqincome')"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": 10,
181 |    "metadata": {},
182 |    "outputs": [
183 |     {
184 |      "data": {
185 |       "text/plain": [
186 |        "<matplotlib.axes._subplots.AxesSubplot at 0x119fcab70>"
187 |       ]
188 |      },
189 |      "execution_count": 10,
190 |      "metadata": {},
191 |      "output_type": "execute_result"
192 |     },
193 |     {
194 |      "data": {
195 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUQAAAFACAYAAADEewXQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XdclXX/x/HXV4aIIg7cOHDvxXClWWnZMM2RMwU1NVO7\nW3c27rL1a97trCwV9y41c+WqXHEQ3BMVFXHgQhSZ5/v746JucnGAc7jOgc/z8eARcK5zXZ9L4N01\nvtf3o7TWCCGEgGJmFyCEEM5CAlEIIbJIIAohRBYJRCGEyCKBKIQQWSQQhRAiiwSiEEJkkUAUQogs\nEohCCJHF3awN+/n56Vq1apm1eSFEIbV9+/bzWusKeXmvaYFYq1YtIiMjzdq8EKKQUkodz+t75ZRZ\nCCGySCAKIUQWCUQhhMhi2jXEW0lPTycuLo6UlBSzS3EJXl5e+Pv74+HhYXYpQhQKThWIcXFx+Pj4\nUKtWLZRSZpfj1LTWXLhwgbi4OAICAswuR4hCwalOmVNSUihfvryEoQ2UUpQvX16OpoWwI6cKREDC\nMBfk30oI+8oxEJVSU5VS55RSe27zulJKfaGUilFK7VJKtbZ/mUII4Xi2HCGGA93u8PqDQL2sj5HA\nN/kvyzxubm60bNny74/333/f7tuoVasW58+fB6B9+/YAxMbGMmfOHLtvSwhhuxxvqmitf1dK1brD\nIj2AGdroVrVNKVVGKVVFa33aTjUWqBIlSrBjx44C296WLVuA/wXiwIEDC2zbQhQ2sduW5ev99riG\nWA04me3ruKzv3UQpNVIpFamUikxISLDDpgvOqlWraNiwIXfddRfjx4/nkUceAWDixIl8/PHHfy/X\ntGlTYmNjAejZsyeBgYE0adKEyZMn33K9pUqVAmDChAn88ccftGzZkk8//ZSOHTv+I5g7dOjArl27\nHLR3Qri+3w4lcGrlR/lahz2G3dzqyv4te5tqrScDkwGCgoLu2P/0zZ/3si/+Sv6ry6Zx1dK80b3J\nHZe5fv06LVu2/Pvrl19+mR49evDkk0+yfv166tatS79+/Wza3tSpUylXrhzXr18nODiY3r17U758\n+Vsu+/777/Pxxx+zfPlyAMqVK0d4eDifffYZhw4dIjU1lebNm9u4p0IULQsjT/Laj9FEex7O13rs\ncYQYB1TP9rU/EG+H9Zrir1Pmvz769evHgQMHCAgIoF69eiilGDx4sE3r+uKLL2jRogVt27bl5MmT\nHD5s+w+rb9++LF++nPT0dKZOnUpoaGge90iIwktrzRfrDvPiol0M8j+PN9fztT57HCEuA8YqpeYB\nbYBEe1w/zOlIrqDdboiLu7s7Vqv176//Ghe4ceNG1q5dy9atW/H29qZz5865GjPo7e1N165dWbp0\nKQsWLJCZgYS4QUamlf8s3cPciJP0al2NV/x2w7n8DUWzZdjNXGAr0EApFaeUGq6UGq2UGp21yArg\nKBADfA+MyVdFTqhhw4YcO3aMI0eOADB37ty/X6tVqxZRUVEAREVFcezYMQASExMpW7Ys3t7eHDhw\ngG3btt1xGz4+PiQlJf3jeyNGjGD8+PEEBwdTrlw5e+6SEC4tOS2DkTO3MzfiJE/fU4f/9m2Be8wq\nqNoqX+u15S7zgBxe18DT+arCidx4DbFbt268//77TJ48mYcffhg/Pz/uuusu9uwxhmX27t2bGTNm\n0LJlS4KDg6lfv/7f7/v2229p3rw5DRo0oG3btnfcbvPmzXF3d6dFixaEhoby7LPPEhgYSOnSpQkL\nC3PcDgvhYs5fTWV4uIXdpxJ5p2dTBretCZdiIT4aur4FbMzzup3qWWZnkJmZecvvd+vWjQMHDgDG\n6fBfgViiRAnWrFlzy/esXLnylt//6y40wNWrVwHw8PBg3bp1/1guPj4eq9XK/fffn6t9EKKwij1/\njaHTIjh7JYXvngiia+NKxgt7lxj/bdwD+Fee1+90j+4Jw4wZM2jTpg3vvvsuxYrJj0mI6BOX6PXN\nFpJSMpjzZNv/hSHA3p+gamsoWytf25AjxDzo3LkznTt3dug2hgwZwpAhQxy6DSFcxdp9Zxk7N4qK\nPl5MHxZCgF/J/70YvwNO74Bu+X+qTAJRCOHUZv95nP8s2UPTar5MGRpMBZ/i/1wgcgq4l4AWd7zd\nYRMJRCGEU9Ja8/Gag3y94Qj3NKjA14Na4+15Q2Rdvwy7FkLzvlCiTL63KYEohHA6aRlWJvy4ix+j\nTtE/uDrv9GyKu9strqXvnAcZ1yFouF22K4EohHAqSSnpjJkdxR+Hz/Nc1/qMu7furR+MyMyAP78B\n/xCo2vLm1/NAAvEGpUqV+nsojBCiYJ29kkLYNAsHzybxYZ/mPB5U/fYL71tijD+8/127bV8CUQjh\nFGLOJTF0qoVLyWlMDQ3m7voVbr+w1rDpM/CrDw0eslsNMsDtNjZu3Ejnzp3p06cPDRs2ZNCgQRgP\n5YDFYqF9+/a0aNGCkJAQkpKSSElJISwsjGbNmtGqVSs2bNgAQHh4OD179qR79+4EBATw1Vdf8ckn\nn9CqVSvatm3LxYsXAThy5AjdunUjMDCQjh07/j0IXIiiwBJ7kd7fbCU1w8qCUe3uHIYAMWvh7G7o\n8C+w4zhd5z1CXDkBzuy27zorN4MHbR+rFB0dzd69e6latSodOnRg8+bNhISE0K9fP+bPn09wcDBX\nrlyhRIkSfP755wDs3r2bAwcOcP/993Po0CEA9uzZQ3R0NCkpKdStW5cPPviA6Ohonn32WWbMmMG/\n/vUvRo4cybfffku9evX4888/GTNmDOvXr7fv/gvhhFbuPs0z83fgX7YE08NCqF7OO+c3bfoUSleD\nZn3tWovzBqITCAkJwd/fH4CWLVsSGxuLr68vVapUITg4GIDSpUsDsGnTJsaNGwcYk0HUrFnz70C8\n55578PHxwcfHB19fX7p37w5As2bN2LVrF1evXmXLli307fu/H25qamqB7acQZpm66Rhv/7KP1jXK\n8sOQIMqW9Mz5TbGb4fhmeOA9cLdh+Vxw3kDMxZGcoxQv/r8BoG5ubmRkZKC1vuUdr79Op3NaT7Fi\nxf7+ulixYmRkZGC1WilTpkyBti4QwkxWq+a9lfv5/o9jPNCkEp/3b4WXh1vOb9Qa1r8DpSpDYKjd\n65JriLnUsGFD4uPjsVgsACQlJZGRkUGnTp2YPXs2AIcOHeLEiRM0aNDApnWWLl2agIAAFi5cCBjh\nunPnTsfsgBAmS83I5Jn5O/j+j2MMbVeTSYMCbQtDgKMb4MQW6PQCeNpwap1LEoi55Onpyfz58xk3\nbhwtWrSga9eupKSkMGbMGDIzM2nWrBn9+vUjPDz8H0eGOZk9ezZTpkyhRYsWNGnShKVLlzpwL4Qw\nR+L1dIZMieDnnfFMeLAhEx9tglsxGyd11RrWvwul/aG1Y57zV3c61XOkoKAgfeMs0Pv376dRo0am\n1OOq5N9MuIr4y9cJnRbBsfPX+LhvC3q0vGUvuts7uArm9oPuX0Dg0NsuppTarrUOykuNznsNUQhR\naOw/fYWwaRaupWYwPSyE9nX9crcCa6Zx7bBsALR0XKteCUQhhENtiTnPqJnbKVncnQWj29GoSunc\nr2TnPGPcYe8p4OZh/yKzOF0g3u4urriZWZc7hLDV0h2neGHhTgL8ShIeFkLVMiVyv5K0a7D+bagW\nCE1727/IbJzqpoqXlxcXLlyQP3QbaK25cOECXl5eZpcixE201nyz8QjPzNtBYM2yLBzdPm9hCLDl\nK0g6DQ/8Hzj4YMmpjhD9/f2Ji4sjISHB7FJcgpeX198Dx4VwFplWzZs/72XG1uN0b1GVj/s2p7i7\njcNqbpR0BjZ/bvRKqXHnRm324FSB6OHhQUBAgNllCCHyKCU9k2fmRbN671lGdqrNhG4NKWbrsJpb\nWf8OZKZBl4n2KvGOnCoQhRCu69K1NIZPtxB98jJvdG9MWId8HtzER0P0LGj3NJSrbZ8icyCBKITI\nt5MXkxk6NYK4y9eZNLA1Dzarkr8VWq3wywtQsgLc/W/7FGkDCUQhRL7sjkskLNxCeqaV2SPaEFyr\nXP5XumMWnIqEx74DL9/8r89GEohCiDzbePAcY2ZHUdbbk3kj21C3ok/+V5p8EX59A2q0g+b98r++\nXJBAFELkyYLIk7z8424aVPIhPCyYiqXtNARs/TuQkggPfezwYTY3kkAUQuSK1pov1sXw6dpDdKzn\nx6RBrfHxstPTI/HREDkV2j4FlZvaZ525IIEohLBZRqaV15bsYZ7lJL1aV+OD3s3xuFV70Lz460ZK\nqYrQeYJ91plLEohCCJtcS81g7JwoNhxMYOw9dXn+/vr2fcx2+7SsGymTC/RGSnYSiEKIHCUkpTJ8\nuoU9pxJ597GmDGpT074buBIPaydC7c7Q/HH7rjsXJBCFEHd0NOEqodMsnEtKYfITQXRpXMn+G1n5\nb+OJlEc+LfAbKdlJIAohbivqxCWGh1tQSjFvZDtaVi9j/43sXw77fzYezyugJ1JuRwJRCHFLv+47\ny7i5UVQq7cX0sBBq+ZW0/0ZSrsCKF6BSM2g31v7rzyUJRCHETWZuO84bS/fQrJovU0KD8Stle3+g\nXFn3Jlw9C/1nO3TiV1tJIAoh/qa15qPVB5m08Qj3NazIlwNb4e3poJg48SdYphhjDqsFOmYbuSSB\nKIQAIC3DyoTFu/gx+hQDQmrwdo8muNtrjOGN0lPg5/Hg6w/3vOqYbeSBBKIQgqSUdJ6aFcWmmPM8\n37U+Y++t69hWHhvfg4QDMGgxFC/luO3kkgSiEEXc2SsphE6zcPhsEh/1aU7foOqO3eBJC2z5AloP\nhXpdHLutXJJAFKIIO3w2idBpFi4npzElNJi761dw7AbTr8OS0VC6Gtz/jmO3lQc2XSBQSnVTSh1U\nSsUopW56yFApVUMptUEpFa2U2qWUesj+pQoh7Cni2EV6f7OFtEwr80e1c3wYAqx7Gy7EQI+vwCsP\n7UgdLMdAVEq5AV8DDwKNgQFKqcY3LPYasEBr3QroD0yyd6FCCPv5ZddpBk/5Ez+f4vz4VHuaViuA\nZ4ePb4FtkyB4hPGInhOy5QgxBIjRWh/VWqcB84AeNyyjgb/i3heIt1+JQgh7mrLpGGPnRtGsmi+L\nR7enejlvx2807RosGQNla0KXNx2/vTyy5RpiNeBktq/jgDY3LDMRWKOUGgeUBJzrSqkQAqtV8+6K\n/UzZdIwHmlTi8/6t8PLIY3vQ3Fo7ES4dg9AVTnVX+Ua2HCHe6t77jZ3kBwDhWmt/4CFgplLqpnUr\npUYqpSKVUpHSe1mIgpOSnsm4edFM2XSM0Pa1mDQosODC8PCvEDEZ2o6BWh0KZpt5ZEsgxgHZ78P7\nc/Mp8XBgAYDWeivgBfjduCKt9WStdZDWOqhChQK4gCuEIDE5nSFTI/hl12leeaghb3RvjFt+eiXn\nxtUE41S5YmO4742C2WY+2BKIFqCeUipAKeWJcdNk2Q3LnADuA1BKNcIIRDkEFMJkpy5fp8+3W4g+\ncYnP+7dkZKc6jh1wnZ3WsGys0R+l9w/gYaeeKw6U4zVErXWGUmossBpwA6Zqrfcqpd4CIrXWy4Dn\nge+VUs9inE6Haq1vPK0WQhSgffFXCAuPIDktk+nDQmhf56aTNseKnAKHVkG3D6BSk4Lddh7ZNDBb\na70CWHHD917P9vk+wLkvDghRhGyOOc+omdspVdydhaPb0bByAY/5SzgIq1+Dul2gzaiC3XY+yJMq\nQhQyS6JP8eKindT2K0X4sGCq+JYo2AIyUmHxcPD0hh6TTJ0BO7ckEIUoJLTWfPPbET5cdZC2tcvx\n3RNB+JYwYY7B9e/Amd3Qfy74OKDdgANJIApRCGRaNROX7WXmtuN0b1GVj/s2p7h7AQ2ryS5mHWz5\nEgLDoKHrPcErgSiEi0tJz2T83GjW7DvLqE61ealbQ4oV1LCa7K6chh9HQsVG8MD/Ffz27UACUQgX\ndvFaGiOmW4g+eZmJ3RsT2iHAnEIyM2DxCEhPhr7hxvVDFySBKISLOnEhmaHTIjh1+TqTBrbmwWZV\nzCvmtw/g+Cbo+S1UaGBeHfkkgSiEC9oVd5lh4RbSMzVzRrQhqFY584o5sgF+/whaDoaWA8yrww4k\nEIVwMRsOnuPp2VGU9fZk3sgQ6lY0cbKEpLPGdcMKDeChD82rw04kEIVwIfMtJ3jlpz00rOzDtLBg\nKvqY+DicNdMYb5iaBEOXgacD+jYXMAlEIVyA1prP1h7m83WH6VS/ApMGtaZUcZP/fH//CGL/MAZf\nV2xkbi12IoEohJNLz7Ty2k97mB95kj6B/rzXqxkejmoPaqvDa2Hj+9BiALQaZG4tdiSBKIQTu5aa\nwdNzoth4MIHx99bl2a71C262mtu5FGucKldqAg9/Ym4tdiaBKISTSkhKZVi4hb3xifzfY80Y2KaG\n2SVBWjLMHwxo6DfTZccb3o4EohBO6GjCVYZOi+B8UhrfDwnivkZO8Eyw1vDLc3BmDwxcAOVqm12R\n3UkgCuFkth+/xIjpFoopxdyRbWlZvYzZJRksP8DOudD5Zah/v9nVOIQEohBOZM3eM4ybG00VXy/C\nw0Ko5eckQ1lORsCql6HeA9Dp32ZX4zASiEI4iZnbjvPG0j008y/D1KFBlC9V3OySDElnYcEQ8K0G\nvb6DYibf4XYgCUQhTGa1aj5ac5BvNh7hvoYV+XJgK7w9neRPMzMdFoXB9cswYi2UKGt2RQ7lJP/q\nQhRNaRlW/r1oJ0t2xDOwTQ3eerQJ7maPMcxu1ctwfDP0+h4qNzW7GoeTQBTCJFdS0nlq1nY2x1zg\nxQcaMKZzAXbEs0XkVLB8D+3HQfPHza6mQEggCmGCM4kphE6LIObcVf7btwW9A/3NLumfYjfBiheh\nblfo8qbZ1RQYCUQhCtihs0mETo0g8Xo6U0OD6VS/gtkl/dOlWJj/BJQNgD5ToJgJrQhMIoEoRAHa\ndvQCI2dEUtzDjQWj29Gkqq/ZJf1TahLMHQg6EwbOBy8nq8/BJBCFKCDLd8Xz3PydVC9XgunDQvAv\n62SPvVmt8NNoSNgPgxdD+TpmV1TgJBCFKAA//HGUd37ZT3Ctsnw/JIgy3p5ml3Szjf8HB5ZDt/eh\nzr1mV2MKCUQhHMhq1bzzy36mbj7Gg00r82m/lnh5OOE1uT2LjfkNWw2GNqPNrsY0EohCOEhKeibP\nL9jJL7tPE9q+Fv95pDFuZrQHzUn8DljyNFRva0zn5UxDfwqYBKIQDpCYnM6TMyOJOHaRVx9qxIiO\nAc41xvAviadgbn/wLm9M5+XuJI8LmkQCUQg7O3X5OkOnRnDiQjJfDGjFoy2qml3SraUmwZx+kHoV\nhq+GUhXNrsh0EohC2NG++CuETovgenom04eF0K5OebNLurXMDFg0DM7tg0ELjNmvhQSiEPay6fB5\nRs/ajo+XO4tGt6dBZR+zS7o1rWHVS3B4DTzyGdTtYnZFTkMCUQg7+DEqjn8v2kXdiqWYFhZMFd8S\nZpd0e9u+MSZ7bT8egsLMrsapSCAKkQ9aayZtPMJHqw/SrnZ5vhsSSGkvD7PLur0Dv8DqV6BR9yL1\njLKtJBCFyKNMq+aNZXuYte0EPVpW5aM+LfB0d6Kpu250KgoWj4CqreCxyYV6ote8kkAUIg+up2Uy\nbm40a/efZfTddfj3Aw0o5oxjDP9y+WTW8Bo/GDCv0HXLsxcJRCFy6eK1NIZPt7Dj5GXe6tGEIe1q\nmV3SnaUkGsNr0q/DkKXg4wQd/JyUBKIQuXD8wjVCp1mIv3ydbwYF0q1pZbNLurOMVJg3CM4fhEGL\noGIjsytyahKIQtho58nLDJ9uIcOqmfNkGwJrljO7pDuzWmHJUxD7Bzz2HdS5x+yKnJ4EohA22HDg\nHGNmR1G+lCfTh4VQp0Ips0vK2a//MSZt6DIRWvQ3uxqXIIEoRA7mW07wyk97aFTFh6mhwVT08TK7\npJxt/Rq2fgUho6DDv8yuxmVIIApxG1prPlt7mM/XHaZT/QpMGtSaUsVd4E9m96KssYaPQrf3ivTs\nNbll00AkpVQ3pdRBpVSMUmrCbZZ5XCm1Tym1Vyk1x75lClGw0jOtvLR4F5+vO0zfQH+mDA1yjTA8\n9rtx3bBGe6N1aBHqh2IPOf6ElVJuwNdAVyAOsCillmmt92Vbph7wMtBBa31JKSXTZgiXdS01gzGz\no/jtUALj76vHs13qOefUXTc6s8e4o1yuNgyYAx4ucGrvZGz5X14IEKO1PgqglJoH9AD2ZVvmSeBr\nrfUlAK31OXsXKkRBOJeUwrBwC/tPJ/Fer2YMCKlhdkm2uXwCZvcBz1JGP5QSZc2uyCXZcspcDTiZ\n7eu4rO9lVx+or5TarJTappTqdqsVKaVGKqUilVKRCQkJeatYCAc5knCVXpO2cOTcNb4fEug6YXg1\nAWb0hLRkGLwIfJ2sx7MLseUI8VbnCvoW66kHdAb8gT+UUk211pf/8SatJwOTAYKCgm5chxCm2X78\nIsOnR+KmFPNGtqVF9TJml2SblESY9RhciYchS2Rew3yy5QgxDqie7Wt/IP4WyyzVWqdrrY8BBzEC\nUgint2rPGQZ+/ydlvT35cUx71wnDtGSY0x/OHYB+s6BGW7Mrcnm2BKIFqKeUClBKeQL9gWU3LLME\nuAdAKeWHcQp91J6FCuEIM7bG8tTs7TSqUppFo9tRs3xJs0uyTWY6LBwKJ7ZCr++gnkzyag85njJr\nrTOUUmOB1YAbMFVrvVcp9RYQqbVelvXa/UqpfUAm8KLW+oIjCxciP6xWzYerD/Ltb0fo0qgSXw5o\nRQlPFxmiYs00GsofXgOPfApNe5tdUaGhtDbnUl5QUJCOjIw0ZduiaEvLsPLvRTtZsiOeQW1q8Oaj\nTXB3c5G5AbWGX56HyClw3xvQ8TmzK3I6SqntWuugvLzXBUaaCmE/V1LSGT1zO1uOXODFBxowpnMd\n1xhj+Jf17xhh2H483PWs2dUUOhKIosg4k5hC6LQIYs5d5ZPHW9CrtYsNT9nyJfzxMbQeAl3fkkfy\nHEACURQJh84mMXRqBEkpGUwLC6ZjvQpml5Q7UTNhzWvQuKfRKU/C0CEkEEWht+3oBZ6cEUkJDzfm\nj2pLk6q+ZpeUO7sWwrJxUOde6DVZnk92IAlEUaj9vDOe5xfspEZ5b8LDgvEv62K9RPYthZ9GQc0O\n0G82uBc3u6JCTQJRFEpaa6ZsOsY7v+wnpFY5Jg8JpIy3p9ll5c6h1bBoOFQLhIHSGKogSCCKQifT\nqnnnl31M2xzLQ80q88njLfHycLHTzCMbYP4TUKkxDFoIxX3MrqhIkEAUhUpKeibPzt/Byj1nGNYh\ngNcebuTc7UFv5fgWmDsAyteFJ5ZACRd5lLAQkEAUhcbl5DSenBGJJfYSrz3ciBEda5tdUu7FRcLs\nvlCmujFZg7eTN7IqZCQQRaEQdymZ0GkWTlxI5quBrXikeVWzS8q90zthVi8o6Wf0Ty4l8ywXNAlE\n4fL2xicSOs1CanomM4aH0LZ2ebNLyr2z+4w5DYuXhqE/Q2kXDPRCQAJRuLQ/DicweuZ2fEt4MPup\n9tSv5II3H87HwIwe4OZpHBmWcZGJaQshCUThshZvj+OlxbuoW7EU4WEhVPZ1wR4iF4/BjEdBWyF0\nOZSvY3ZFRZoEonA5WmsmbTzCR6sP0qFueb4ZHEhpLw+zy8q9i8cg/BFIT4ahy6FCA7MrKvIkEIVL\nyci08sayvcz+8wQ9W1blwz4t8HR3kam7svs7DK/BkGVQuanZFQkkEIULuZ6Wybi5Uazdf46nOtfh\nxfsbuN4YQ7g5DKs0N7sikUUCUbiEC1dTGT49kp1xl3mrRxOGtKtldkl5I2Ho1CQQhdM7fuEaQ6dG\ncDoxhW8HB/JAk8pml5Q3F4/B9O4Shk5MAlE4tZ0nLzMs3EKm1sx5sg2BNV30yY2/wjDtqjG0RsLQ\nKUkgCqe1bv9Zxs6Jxs/Hk/CwEOpUKGV2SXlzUxi2MLsicRsSiMIpzY04was/7aZJVV+mhgZTwcdF\n5wGUMHQpEojCqWit+fTXQ3yxPobODSrw9cDWlCzuor+m52OMMMy4LmHoIlz0N00URumZVl7+cTeL\ntsfxeJA/7z7WDA9XaQ96o7P7jMfxtNUYdC3jDF2CBKJwCldTMxgzO4rfDyXwry71eOa+eq7VHjS7\n+B0w8zFjuv8hv0CF+mZXJGwkgShMdy4phWHhFvafTuKD3s3oF+zCkxuctMCs3uBVGoYug3IuOCdj\nESaBKEx1JOEqQ6dGcOFqGj8MCeKehi48B2DsZpjzOJSsYEzhVaa62RWJXJJAFKaJjL3IiBmRuBdT\nzB/Vlub+LjxV/pH1MHdg1kzXy6B0FbMrEnkggShMsWrPGZ6ZF03VMiWYHhZCjfIu3FHu4EpYMAT8\n6hs9UEpVMLsikUcSiKLATd8Sy8Sf99KyehmmDA2mXEkXaw+a3d4lsHg4VG4Gg3+UHiguTgJRFBir\nVfPB6gN899tRujauxBf9W1HC08Xag2YXNRN+Hg/+wUarUC9fsysS+SSBKApEakYm/160i6U74hnc\ntgZvPtoUN1ecuusvW76CNa9CnXuh3yzwLGl2RcIOJBCFwyVeT2f0zO1sPXqBf3drwFN313HdMYZa\nw/p34I+PoXFP6DXZGG8oCgUJROFQpxOvEzrVwtHzV/m0Xwsea+Vvdkl5Z7XCyhfB8gO0HgKPfAbF\nXPiUX9xEAlE4zMEzSYROiyApJYPwsBA61PUzu6S8y0yHJU/B7oXQfjx0fQtc9ShX3JYEonCILUfO\nM2rmdrw93Vgwqh2Nq5Y2u6S8S0uGhaFweDV0mQh3PWtyQcJRJBCF3S3bGc8LC3ZSs7w34cNCqFam\nhNkl5V1KIszpDye2GqfIQWFmVyQcSAJR2I3Wmu//OMr/rThASEA5vn8iCF9vF2wP+perCTCrF5zb\nD32mQNPeZlckHEwCUdhFplXz9vJ9hG+J5eHmVfhv3xZ4ebjwDYdLscYkDYmnYMA8qNfF7IpEAZBA\nFPmWkp7Js/N3sHLPGYbfFcCrDzVyzfagfzm9E2b3hYxUGLIEarQ1uyJRQCQQRb5cTk5jxPRItp+4\nxGsPN2ILnC36AAAZ7UlEQVRERxef7urIBpj/BJQoY8xYU6GB2RWJAmTTdMRKqW5KqYNKqRil1IQ7\nLNdHKaWVUkH2K1E4q5MXk+n9zRZ2xSXy1YDWrh+GuxYaR4ZlasDwNRKGRVCOR4hKKTfga6ArEAdY\nlFLLtNb7bljOBxgP/OmIQoVz2XMqkbBwC6npmcwcHkKb2uXNLil/tnwJa16DmndB/9nGEaIocmw5\nQgwBYrTWR7XWacA8oMctlnsb+BBIsWN9wgn9fiiBft9txaOYYtFT7V07DK1WWP2qEYaNe8LgxRKG\nRZgtgVgNOJnt67is7/1NKdUKqK61Xm7H2oQTWrQ9jmHhFqqX8+anpztQv5KP2SXlXUYq/PgkbP0K\n2oyGPtPAw8vsqoSJbLmpcqvbhfrvF5UqBnwKhOa4IqVGAiMBatRw4b4ZRZDWmq/Wx/DfXw9xV10/\nvhncGh8vFx5jmHIF5g+GY79BlzehwzPyKJ6wKRDjgOzNIfyB+Gxf+wBNgY1ZM5hUBpYppR7VWkdm\nX5HWejIwGSAoKEgjXEJGppX/LN3L3IgT9GpVjfd7N8fT3UXbgwJciYfZj0PCfnjsO2jR3+yKhJOw\nJRAtQD2lVABwCugPDPzrRa11IvD3U/tKqY3ACzeGoXBNyWkZjJsTzboD5xjTuQ4vPtDAdafuAjiz\n2wjD1CQYOB/qyoBr8T85BqLWOkMpNRZYDbgBU7XWe5VSbwGRWutlji5SmOP81VSGT49kd9xl3u7Z\nlCfa1jS7pPw5tAYWhRkzWw9bJc3jxU1sGpittV4BrLjhe6/fZtnO+S9LmC32/DWGTovg7JUUvh0c\nyP1NKptdUv5YfoAVL0KlpjBwgXTFE7ckT6qIm+w4eZnh4RasWjPnyba0rlHW7JLyzpoJv75u3Emu\n/yD0/gGKlzK7KuGkJBDFP6zbf5an50RRwac408NCqF3BhcMjLdkYVnNgOYSMgm7vyQzX4o4kEMXf\n5vx5gteW7KZpNV+mDA2mgo8L9wpJOgtz+0N8NHT7ANqONrsi4QIkEAVaaz759RBfro/hngYV+Gpg\na0oWd+FfjXP7jTvJyeeh/xxo+JDZFQkX4cK/9cIe0jOtTFi8m8VRcfQLqs67jzXF3c2FxxjGrIWF\nYeBRAsJWQNVWZlckXIgEYhF2NTWDp2Zt54/D5/lXl3o8c1891x1jqDX8+S2sfgUqNjYmdS1TPef3\nCZGNBGIRde5KCmHhFg6cSeLD3s15PNiFwyMjDVa8AFHTocHDRq9kuZMs8kACsQiKOZfE0KkWLiWn\n8cPQIO5pUNHskvLu2gVYMASOb4K7noN7/wPFXPiUX5hKArGIscReZMT0SDzcijF/ZDua+fuaXVLe\nndtv3Em+chp6fQ/NHze7IuHiJBCLkJW7T/PM/B34lynB9GEhVC/nbXZJeXdoNSwaDp7exs0Tf5mk\nXeSfBGIRMW3zMd5avo9W1cvww9BgypX0NLukvNHamN3619ehcjMYMBd8/c2uShQSEoiFnNWqeX/V\nASb/fpT7G1fiiwGtXLc9aEYqLH8OdsyCxj2g5zfgWdLsqkQhIoFYiKVmZPLCwl38vDOeIe1q8kb3\nJri5anvQq+eMbngnt8HdE+Dul+TmibA7CcRCKvF6OqNmRrLt6EVe6taQ0XfXdt0xhnHbjdmtr18y\npvlv2svsikQhJYFYCMVfvk7YNAtHz1/ls34t6dmqWs5vclbRs4zTZJ9KMOJX47qhEA4igVjIHDhz\nhdCpFq6lZhAeFkKHun45v8kZZabDqpfB8j0E3A19w8G7nNlViUJOArEQ2XLkPKNmbMe7uBsLRrej\nUZXSZpeUN1fPwYKhcGILtB8H900EN/lVFY4nv2WFxNIdp3hh4U5qlS9J+LAQqpUpYXZJeXNqu3Hz\nJPki9PoBmvc1uyJRhEggujitNZN/P8p7Kw/QJqAck58IwtfbRduDRs+G5c9CqUowfA1UaW52RaKI\nkUB0YZlWzdvL9xG+JZZHmlfhv4+3oLi7C44xzEiDNa9BxHcQ0An6hEPJ8mZXJYogCUQXlZKeyTPz\nolm99yxPdgzg5QcbUcwVxxgmnoKFoRAXAW2fhq5vyfVCYRr5zXNBl66lMWJGJFEnLvH6I40ZdleA\n2SXlzZENsHgEZKTI+ELhFCQQXczJi8kMnRZB3KXrfD2wNQ81c8F2mlYrbPovrH8XKjSAx2dChfpm\nVyWEBKIr2XMqkdBpFtIzrcwa3oaQABccl5d8EX4aBYfXQLO+8MhnMpmrcBoSiC7it0MJjJm1nTLe\nnswb2Ya6FX3MLin3TkUZ4wuTTsPD/4Wg4eCqjxOKQkkC0QUsjDzJhB93U7+SD+FhwVQq7WV2Sbmj\nNWyfBitfMobUDFsN/oFmVyXETSQQnZjWmi/Xx/DJr4foWM+PSYNa4+PlYmMM05KNsYW75kHdLsbM\n1vIInnBSEohOKiPTyn+W7mFuxEl6ta7G+72a4+nuYtNdJRyChUONqf7veRU6viBTdgmnJoHohJLT\nMhg7J5r1B87x9D11eOH+Bq43ddeOOfDL8+DhDYMXQ937zK5IiBxJIDqZ81dTGR5uYfepRN7p2ZTB\nbWuaXVLupF41gnDXPKjV0ThFLu2CQ4NEkSSB6ERiz19j6LQIzl5J4bsngujauJLZJeXOmd2wMAwu\nHoHOr0CnF6CYCz5KKIosCUQnEX3iEsOnRwIw58m2tK5R1uSKckFriJxqzF9YoiwMWQYBHc2uSohc\nk0B0Ar/uO8u4uVFUKu1FeFgIAX4u1DgpJRGWjYd9S4y7yD2/hVIVzK5KiDyRQDTZrG3HeX3pHppV\n82VKaDB+pYqbXZLtTm03TpET46DLm9B+vNxFFi5NAtEkWms+XnOQrzcc4d6GFflqYCu8PV3kx2G1\nwtYvYd3b4FMZhq2C6iFmVyVEvrnIX2DhkpZhZcKPu/gx6hQDQqrzdo+muLu5yJFV4ilYMhqO/Q6N\nukP3L2SgtSg0JBALWFJKOmNmR/HH4fM817U+4+6t6zpjDPctg2XjjAZQj34FrQbLs8iiUJFALEBn\nr6QQNs3CwbNJfNinOY8HVTe7JNukXoVVEyB6JlRtBb2nQPk6ZlclhN1JIBaQmHNJDJ1q4VJyGlND\ng7m7vovciT21HRY/CRePwl3PwT2vgJuLPU8thI0kEAuAJfYiI6ZH4uFWjAWj2tG0mq/ZJeXMmgmb\nP4MN/2fMUBO6HGrdZXZVQjiUBKKDrdx9mmfm78C/bAmmh4VQvZy32SXl7PIJ+OkpOL4JGveE7p8Z\nA66FKORsurWplOqmlDqolIpRSk24xevPKaX2KaV2KaXWKaVc7AFcx5i66Rhj5kTRrJovi0e3d/4w\n1BqiZsKk9nB6B/T4GvqGSxiKIiPHI0SllBvwNdAViAMsSqllWut92RaLBoK01slKqaeAD4F+jijY\nFVitmvdW7uf7P47xQJNKfN6/FV4eTv5Mb9JZ+Hk8HFoFNe+CnpOgrPx/TRQttpwyhwAxWuujAEqp\neUAP4O9A1FpvyLb8NmCwPYt0JakZmbywcBc/74xnaLuavN69CW7O3h5070+w/DlIT4YH3oM2o+WJ\nE1Ek2RKI1YCT2b6OA9rcYfnhwMpbvaCUGgmMBKhRo4aNJbqOxOvpjJwRyZ/HLvLygw0Z2am2c48x\nTL4IK16EPYugamt47DvpfieKNFsC8VZ/0fqWCyo1GAgC7r7V61rrycBkgKCgoFuuw1XFX75O6LQI\njp2/xuf9W9KjZTWzS7qzw2th2Vi4lmDMZn3Xc9IgXhR5tvwFxAHZRxD7A/E3LqSU6gK8CtyttU61\nT3muYf/pK4ROiyA5NZPpw0JoX8fP7JJuL+UK/Pof2B4OFRrBgHlQtaXZVQnhFGwJRAtQTykVAJwC\n+gMDsy+glGoFfAd001qfs3uVTmxLzHlGzdxOyeLuLHyqHQ0rlza7pNs7/Cv8/IzRBrT9eOPI0MPF\nOvgJ4UA5BqLWOkMpNRZYDbgBU7XWe5VSbwGRWutlwEdAKWBh1jWzE1rrRx1Yt1NYuuMULyzcSYBf\nScLDQqhapoTZJd1a8kVY/QrsnAsVGsLjM8A/yOyqhHA6Nl000lqvAFbc8L3Xs33exc51OTWtNd/+\ndpQPVh2gbe1yfPdEEL4lnPRxtn3LjB4n1y9CpxeND3cXmnNRiAIkV9FzKdOqefPnvczYepzuLary\ncd/mFHd3wjGGV8/Bihdg31Ko3NzofFeludlVCeHUJBBzISU9k2fmRbN671lGdqrNhG4NKeZsYwy1\nhl0LYNVLkHYN7nvduF4oEzIIkSMJRBtdupbG8OkWok9e5o3ujQnrEGB2STdLPAXLn4XDq8E/2Hj0\nrkIDs6sSwmVIINrg5MVkhk6NIO7ydSYNbM2DzZysz7A1Eyw/GFP6WzOynjYZJS1AhcglCcQc7I5L\nJCzcQnqmldkj2hBcy8mmyz+9yxhKEx8Fde6Fhz+Bck549CqEC5BAvIONB88xZnYUZb09mTeyDXUr\n+phd0v+kXYON78HWSUZPk95ToGlvmdJfiHyQQLyNBZEnefnH3TSo5EN4WDAVSzvRAOZDa4yhNIkn\noPVQ6DJRGj0JYQcSiDfQWvPFuhg+XXuIjvX8+GZwIKWKO8k/U9IZo7fJ3p/ArwGErYSa7c2uSohC\nw0n+0p1DRqaV15bsYZ7lJL1b+/N+72Z4OEN7UGum8ezx2jchIwXueQ06PAPunmZXJkShIoGY5Vpq\nBmPnRLHhYALj7q3Lc13rO8fUXXHbYcXzEB8NAZ3g4U/Br67ZVQlRKEkgAglJqQyfbmHPqUTefawp\ng9o4wUzR1y7AujchaobR5ElumgjhcEU+EI8mXCV0moVzSSlMfiKILo0rmVvQX6fH69+G1CRo9zTc\n/RJ4OfEsOkIUEkU6EKNOXGJ4uAWlFPNGtqNl9TLmFhQXadw9Pr0DanWEhz6Gig3NrUmIIqTIBuKv\n+84ybm4UlUp7MT0shFp+Jc0r5toFWDfROD32qSKnx0KYpEgG4sxtx3lj6R6aVfNlSmgwfqVMmg4r\nMx0ip8GGdyHtKrQfZ5weF3eiAeBCFCFFKhC11ny0+iCTNh7hvoYV+XJgK7w9TfoniFkLq16B8wch\n4G548EM5PRbCZEUmENMyrExYvIsfo08xIKQGb/dogrsZYwwTDsGaV+HwGihXG/rPhQYPyumxEE6g\nSARiUko6T82KYlPMeZ7vWp+x99Yt+DGGyRfhtw/B8j14eMP970DIKBlcLYQTKfSBePZKCqHTLBw+\nm8RHfZrTN6h6zm+yp8wM2J51nTAlEQJDjeZOJZ24M58QRVShDsTDZ5MInWbhcnIaU0KDubt+hYLb\nuNaw/2djcPWFGOM6Ybf3oFKTgqtBCJErhTYQI45dZMR0C8U93Jg/qh1Nq/kW3MaPb4FfX4c4i9Hl\nbsA8qN9NrhMK4eQKZSD+sus0z87fQfVyJQgPC6F6Oe+C2fC5/bB2IhxaBT5V4dGvoMUAcCuU/8xC\nFDqF7i91yqZjvPPLPgJrlOWHoUGU8S6AmxaJcbDhPdg5Bzx9jPkJQ0aBZwEFsRDCLgpNIFqtmndX\n7GfKpmN0a1KZz/q3xMvDwT1Frp6DTZ9B5BTQVmg7Bjo+L5O1CuGiCkUgpqRn8vzCnfyy6zSh7Wvx\nn0ca4+bI9qDXLsCWzyHie2N+wub94Z6XoUwNx21TCOFwLh+IicnpPDkzkohjF3nloYY82bG248YY\nXr8EW76CP781epo06wN3T5D5CYUoJFw6EE9dvk7o1AhiL1zj8/4t6dGymmM2lJII276BrV9D6hVo\n3BM6T4CKjRyzPSGEKVw2EPfFXyEsPILktEymDwuhfR0HDHROvgh/fmccEaZchgYPG6fGlZvZf1tC\nCNO5ZCBujjnPqJnbKVXcnYWj29Gwsp0nT006A1u/AstUSL8GDR6CTi9Ctdb23Y4Qwqm4XCAuiT7F\ni4t2UtuvFOHDgqniW8J+K78UC5u/gOhZYE035iS861l5ukSIIsJlAlFrzTe/HeHDVQdpW7sc3z0R\nhG8JD/us/NwB2PwZ7FoAxdyg5UCjq1252vZZvxDCJbhEIGZaNROX7WXmtuM82qIqH/VtTnH3fI4x\n1BqOrDdulBxZZ8xA02Y0tB8Lpavap3AhhEtx+kC8npbJ+HnR/LrvLKPurs1LDzSkWH7GGKanwO6F\nRhAm7Dc62t37GgQOg5Ll7Ve4EMLlOHUgXryWxvDpFnacvMybjzZhaPtaeV/Z1QTjiRLLD3AtASo1\ng57fQtNe4G5SCwEhhFNx2kA8cSGZodMiiL98nW8GtaZb0yq5X4nWRic7yw+w9yfITDVmnWk7xmj6\nLrPPCCGyccpA3BV3mWHhFjKsmtkj2hBUK5fPBqddM06LLT/Amd3GhAutnzCuEfrVc0zRQgiX53SB\nuOHgOZ6eHUW5kp6Eh4VQt2Ip29+ccBAsU2DnXOOJkopN4OFPoPnj0slOCJEjpwrE+ZYTvPLTHhpW\n9mFaWDAVfbxyflNKIuz5EXbMNiZkLeYBTXpC8Aio3kZOi4UQNnOKQNRa89naw3y+7jCd6ldg0qDW\nlCp+h9KsVoj9wxhAvf9nyLgOfg2g61vQYiCUKsBWAUKIQsP0QEzPtPLaT3uYH3mSPoH+vNerGR63\nag9qtUJchHFzZN9SSDoNxX2h5QBoOQiqBcrRoBAiX2wKRKVUN+BzwA34QWv9/g2vFwdmAIHABaCf\n1jo2p/VeS83g6TlRbDyYwPh76/Js1/r/nLor/TrEbjZ6GO//GZLiwa041OsKTR6Dhg+Dhx0f3RNC\nFGk5BqJSyg34GugKxAEWpdQyrfW+bIsNBy5presqpfoDHwD97rTeDKum/+Rt7I1P5P8ea8bANjUg\nIxXid8DJbXDsd4jdZEzA6u4Fde6FJm8aw2a87DyZgxBCYNsRYggQo7U+CqCUmgf0ALIHYg9gYtbn\ni4CvlFJKa61vt9JzZ8/wcMLPfBrkTd3Tv8D3B+HMLshMMxYoXw8Cw6BeF6jZQY4EhRAOZ0sgVgNO\nZvs6Dmhzu2W01hlKqUSgPHA++0JKqZHASIDAKsV4t9i3sBvwqQLl6xrjBKu3MT7kxogQooDZEoi3\nulNx45GfLcugtZ4MTAZo0bSx5l+roEQ5KJ6LsYZCCOEgt7ide5M4oHq2r/2B+Nsto5RyB3yBi3da\nqYeXt9GUScJQCOEkbAlEC1BPKRWglPIE+gPLblhmGTA06/M+wPo7XT8UQghnlOMpc9Y1wbHAaoxh\nN1O11nuVUm8BkVrrZcAUYKZSKgbjyLC/I4sWQghHsGkcotZ6BbDihu+9nu3zFKCvfUsTQoiCZcsp\nsxBCFAkSiEIIkUUCUQghskggCiFEFglEIYTIIoEohBBZJBCFECKLMuuBEqVUEnDQlI0XDD9umNyi\nkCnM+1eY9w0K//410FrnqYmSmTNmH9RaB5m4fYdSSkXK/rmmwrxvUDT2L6/vlVNmIYTIIoEohBBZ\nzAzEySZuuyDI/rmuwrxvIPt3W6bdVBFCCGcjp8xCCJFFAlEIIbI4PBCVUt2UUgeVUjFKqQm3eL24\nUmp+1ut/KqVqObome7Jh/55TSu1TSu1SSq1TStU0o868yGnfsi3XRymllVIuNZTDlv1TSj2e9fPb\nq5SaU9A15ocNv5s1lFIblFLRWb+fD5lRZ14opaYqpc4ppfbc5nWllPoia993KaVa27RirbXDPjBm\n2D4C1AY8gZ1A4xuWGQN8m/V5f2C+I2syYf/uAbyzPn/KVfbPln3LWs4H+B3YBgSZXbedf3b1gGig\nbNbXFc2u2877Nxl4KuvzxkCs2XXnYv86Aa2BPbd5/SFgJUYDvLbAn7as19FHiH/3dNZapwF/9XTO\nrgcwPevzRcB9SqlbdfFzRjnun9Z6g9Y6OevLbRhNulyBLT87gLeBD4GUgizODmzZvyeBr7XWlwC0\n1ucKuMb8sGX/NFA663Nfbm4e57S01r9z50Z2PYAZ2rANKKOUqpLTeh0diLfq6VztdstorTOAv3o6\nuwJb9i+74Rj/13IFOe6bUqoVUF1rvbwgC7MTW3529YH6SqnNSqltSqluBVZd/tmyfxOBwUqpOIwW\nIeMKprQCkdu/TcDxj+7Zraezk7K5dqXUYCAIuNuhFdnPHfdNKVUM+BQILaiC7MyWn507xmlzZ4wj\n+z+UUk211pcdXJs92LJ/A4BwrfV/lVLtMBrFNdVaWx1fnsPlKVccfYTokJ7OTsSW/UMp1QV4FXhU\na51aQLXlV0775gM0BTYqpWIxrtMsc6EbK7b+bi7VWqdrrY9hTEZSr4Dqyy9b9m84sABAa70V8MKY\n+KEwsOlv8yYOvvDpDhwFAvjfhd0mNyzzNP+8qbLA7Au2dt6/VhgXt+uZXa+99+2G5TfiWjdVbPnZ\ndQOmZ33uh3EKVt7s2u24fyuB0KzPG2UFhjK79lzsYy1uf1PlYf55UyXCpnUWQNEPAYeyQuHVrO+9\nhXG0BMb/lRYCMUAEUNvsf2g7799a4CywI+tjmdk122vfbljWpQLRxp+dAj4B9gG7gf5m12zn/WsM\nbM4Kyx3A/WbXnIt9mwucBtIxjgaHA6OB0dl+dl9n7ftuW3835dE9IYTIIk+qCCFEFglEIYTIIoEo\nhBBZJBCFECKLBKIQQmSRQBRCiCwSiEIIkUUCUTgtpVRw1lx2XkqpkllzEjY1uy5ReMnAbOHUlFLv\nYDzNVAKI01q/Z3JJohCTQBROTSnlCVgw5ltsr7XONLkkUYjJKbNwduWAUhiz63iZXIso5OQIUTg1\npdQyjNmeA4AqWuuxJpckCjFHTxArRJ4ppYYAGVrrOUopN2CLUuperfV6s2sThZMcIQohRBa5hiiE\nEFkkEIUQIosEohBCZJFAFEKILBKIQgiRRQJRCCGySCAKIUSW/weUNLiOxWYaVgAAAABJRU5ErkJg\ngg==\n",
196 |       "text/plain": [
197 |        "<matplotlib.figure.Figure at 0x10ef88828>"
198 |       ]
199 |      },
200 |      "metadata": {},
201 |      "output_type": "display_data"
202 |     }
203 |    ],
204 |    "source": [
205 |     "svy.lorenz('eqincome').plot(figsize=(5,5))"
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": null,
211 |    "metadata": {
212 |     "collapsed": true
213 |    },
214 |    "outputs": [],
215 |    "source": []
216 |   }
217 |  ],
218 |  "metadata": {
219 |   "kernelspec": {
220 |    "display_name": "Python 3",
221 |    "language": "python",
222 |    "name": "python3"
223 |   },
224 |   "language_info": {
225 |    "codemirror_mode": {
226 |     "name": "ipython",
227 |     "version": 3
228 |    },
229 |    "file_extension": ".py",
230 |    "mimetype": "text/x-python",
231 |    "name": "python",
232 |    "nbconvert_exporter": "python",
233 |    "pygments_lexer": "ipython3",
234 |    "version": "3.6.1"
235 |   }
236 |  },
237 |  "nbformat": 4,
238 |  "nbformat_minor": 2
239 | }
240 | 


--------------------------------------------------------------------------------
/src/ineqpy/api.py:
--------------------------------------------------------------------------------
  1 | """API's module.
  2 | 
  3 | Extend pandas.DataFrames with the main functions from statistics and
  4 | inequality modules.
  5 | """
  6 | import inspect
  7 | from functools import partial
  8 | from types import MethodType
  9 | 
 10 | import pandas as pd
 11 | 
 12 | from ineqpy import inequality, statistics
 13 | 
 14 | 
 15 | class Convey:
 16 |     """Convey."""
 17 | 
 18 |     def __init__(
 19 |         self,
 20 |         data=None,
 21 |         index=None,
 22 |         columns=None,
 23 |         weights=None,
 24 |         group=None,
 25 |         **kw
 26 |     ):
 27 |         self.df = pd.DataFrame(data=data, index=index, columns=columns, **kw)
 28 |         self.weights = weights
 29 |         self.group = group
 30 |         self._attach_method(statistics, self)
 31 |         self._attach_method(inequality, self)
 32 | 
 33 |     @property
 34 |     def _constructor(self):
 35 |         return Survey
 36 | 
 37 |     @classmethod
 38 |     def _attach_method(module, instance):
 39 |         # get methods names contained in module
 40 |         res_names = []
 41 |         res_methods = []
 42 |         method_name_list = inspect.getmembers(module, inspect.isfunction)
 43 | 
 44 |         for method_name, func in method_name_list:
 45 |             # if method_name.startswith('_'): continue  # avoid private methods
 46 |             func = getattr(module, method_name)  # get function
 47 |             if (
 48 |                 "weights" in inspect.signature(func).parameters
 49 |             ):  # replace weights variable
 50 |                 func = partial(func, weights=instance.weights)
 51 |             # func = partial(func, data=instance.data)
 52 |             func = MethodType(func, instance)
 53 |             res_methods.append(func)
 54 |             res_names.append(method_name)
 55 |             setattr(instance, method_name, func)
 56 | 
 57 | 
 58 | class Survey:
 59 |     """Survey it's a data structure that handles survey data.
 60 | 
 61 |     Attributes
 62 |     ----------
 63 |     df : pandas.DataFrame
 64 |     weights : str
 65 |     group : str
 66 | 
 67 |     Methods
 68 |     -------
 69 |     atkinson(income=None, weights=None, e=0.5)
 70 |         Calculate Atkinson's index.
 71 |     avg_tax_rate(total_tax=None, total_base=None, weights=None)
 72 |         Calculate average tax rate.
 73 |     c_moment(variable=None, weights=None, order=2, param=None, ddof=0)
 74 |         Calculate central momment.
 75 |     coef_variation(variable=None, weights=None)
 76 |         Calculate coefficient of variation.
 77 |     concentration(income=None, weights=None, sort=True)
 78 |         Calculate concentration's index.
 79 |     density(variable=None, weights=None, groups=None)
 80 |         Calculate density.
 81 |     gini(income=None, weights=None, sort=True)
 82 |         Calculate Gini's index.
 83 |     kakwani(tax=None, income_pre_tax=None, weights=None)
 84 |         Calculate Kakwani's index.
 85 |     kurt(variable=None, weights=None)
 86 |         Calculate Kurtosis.
 87 |     lorenz(income=None, weights=None)
 88 |         Calculate Lorenz curve.
 89 |     mean(variable=None, weights=None)
 90 |         Calculate mean.
 91 |     percentile(variable=None, weights=None, p=50, interpolate="lower")
 92 |         Calculate percentile.
 93 |     reynolds_smolensky(income_pre_tax=None, income_post_tax=None, weights=None)
 94 |         Calculate Reynolds-Smolensky's index.
 95 |     skew(variable=None, weights=None)
 96 |         Calculate Skew.
 97 |     std_moment(variable=None, weights=None, param=None, order=3, ddof=0)
 98 |         Calculate standard deviation.
 99 |     theil(income=None, weights=None)
100 |         Calculate Theil's index.
101 |     var(variable=None, weights=None, ddof=0)
102 |         Calculate variance.
103 |     """
104 | 
105 |     def __init__(
106 |         self,
107 |         data=None,
108 |         index=None,
109 |         columns=None,
110 |         weights=None,
111 |         group=None,
112 |         **kw
113 |     ):
114 |         self.df = pd.DataFrame(data=data, index=index, columns=columns, **kw)
115 |         self.weights = weights
116 |         self.group = group
117 | 
118 |     def c_moment(self, variable, weights=None, order=2, param=None, ddof=0):
119 |         """Calculate central momment.
120 | 
121 |         Calculate the central moment of `x` with respect to `param` of order
122 |         `n`, given the weights `w`.
123 | 
124 |         Parameters
125 |         ----------
126 |         variable : 1d-array
127 |             Variable
128 |         weights : 1d-array
129 |             Weights
130 |         order : int, optional
131 |             Moment order, 2 by default (variance)
132 |         param : int or array, optional
133 |             Parameter for which the moment is calculated, the default is None,
134 |             implies use the mean.
135 |         ddof : int, optional
136 |             Degree of freedom, zero by default.
137 | 
138 |         Returns
139 |         -------
140 |         central_moment : float
141 | 
142 |         Notes
143 |         -----
144 |         - The cmoment of order 1 is 0
145 |         - The cmoment of order 2 is the variance.
146 |         Source : https://en.wikipedia.org/wiki/Moment_(mathematics)
147 | 
148 |         Todo
149 |         ----
150 |         Implement: https://en.wikipedia.org/wiki/L-moment#cite_note-wang:96-6
151 | 
152 |         """
153 |         data = self.df
154 |         if weights is None:
155 |             weights = self.weights
156 | 
157 |         return statistics.c_moment(variable, weights, data, order, param, ddof)
158 | 
159 |     def percentile(self, variable, weights=None, p=50, interpolate="lower"):
160 |         """Calculate the value of a quantile given a variable and his weights.
161 | 
162 |         Parameters
163 |         ----------
164 |         data : pd.DataFrame, optional
165 |             pd.DataFrame that contains all variables needed.
166 |         variable : str or array
167 |         weights : str or array
168 |         q : float
169 |             Quantile level, if pass 0.5 means median.
170 |         interpolate : bool
171 | 
172 |         Returns
173 |         -------
174 |         percentile : float or pd.Series
175 | 
176 |         """
177 |         data = self.df
178 |         if weights is None:
179 |             weights = self.weights
180 | 
181 |         return statistics.percentile(variable, weights, data, p, interpolate)
182 | 
183 |     def std_moment(self, variable, weights=None, param=None, order=3, ddof=0):
184 |         """Calculate the standardized moment.
185 | 
186 |         Calculate the standardized moment of order `c` for the variable` x`
187 |         with respect to `c`.
188 | 
189 |         Parameters
190 |         ----------
191 |         data : pd.DataFrame, optional
192 |             pd.DataFrame that contains all variables needed.
193 |         variable : 1d-array
194 |            Random Variable
195 |         weights : 1d-array, optional
196 |            Weights or probability
197 |         order : int, optional
198 |            Order of Moment, three by default
199 |         param : int or float or array, optional
200 |            Central trend, default is the mean.
201 |         ddof : int, optional
202 |             Degree of freedom.
203 | 
204 |         Returns
205 |         -------
206 |         std_moment : float
207 |            Returns the standardized `n` order moment.
208 | 
209 |         References
210 |         ----------
211 |         - https://en.wikipedia.org/wiki/Moment_(mathematics)#Significance_
212 |           of_the_moments
213 |         - https://en.wikipedia.org/wiki/Standardized_moment
214 | 
215 |         Todo
216 |         ----
217 |         It is the general case of the raw and central moments. Review
218 |         implementation.
219 | 
220 |         """
221 |         data = self.df
222 |         if weights is None:
223 |             weights = self.weights
224 | 
225 |         return statistics.std_moment(
226 |             variable, weights, data, param, order, ddof
227 |         )
228 | 
229 |     def mean(self, variable, weights=None):
230 |         """Calculate the mean of `variable` given `weights`.
231 | 
232 |         Parameters
233 |         ----------
234 |         variable : array-like or str
235 |             Variable on which the mean is estimated.
236 |         weights : array-like or str
237 |             Weights of the `x` variable.
238 |         data : pandas.DataFrame
239 |             Is possible pass a DataFrame with variable and weights, then you
240 |             must pass as `variable` and `weights` the column name stored in
241 |             `data`.
242 | 
243 |         Returns
244 |         -------
245 |         mean : array-like or float
246 |         """
247 |         # if pass a DataFrame separate variables.
248 |         data = self.df
249 |         if weights is None:
250 |             weights = self.weights
251 | 
252 |         return statistics.mean(variable, weights, data)
253 | 
254 |     def density(self, variable, weights=None, groups=None):
255 |         """Calculate density in percentage.
256 | 
257 |         This make division of variable inferring width in groups as max - min.
258 | 
259 |         Parameters
260 |         ----------
261 |         data : pd.DataFrame, optional
262 |             pandas.DataFrame that contains all variables needed.
263 |         variable : array-like, optional
264 |         weights : array-like, optional
265 |         groups : array-like, optional
266 | 
267 |         Returns
268 |         -------
269 |         density : array-like
270 | 
271 |         References
272 |         ----------
273 |         Histogram. (2017, May 9). In Wikipedia, The Free Encyclopedia.
274 |         Retrieved: https://en.wikipedia.org/w/index.php?title=Histogram
275 |         """
276 |         data = self.df
277 |         if weights is None:
278 |             weights = self.weights
279 | 
280 |         return statistics.density(variable, weights, groups, data)
281 | 
282 |     def var(self, variable, weights=None, ddof=0):
283 |         """Calculate the population variance of `variable` given `weights`.
284 | 
285 |         Parameters
286 |         ----------
287 |         data : pd.DataFrame, optional
288 |             pd.DataFrame that contains all variables needed.
289 |         variable : 1d-array or pd.Series or pd.DataFrame
290 |             Variable on which the quasivariation is estimated
291 |         weights : 1d-array or pd.Series or pd.DataFrame
292 |             Weights of the `variable`.
293 | 
294 |         Returns
295 |         -------
296 |         variance : 1d-array or pd.Series or float
297 |             Estimation of quasivariance of `variable`
298 | 
299 |         References
300 |         ----------
301 |         Moment (mathematics). (2017, May 6). In Wikipedia, The Free
302 |         Encyclopedia.
303 |         Retrieved 14:40, May 15, 2017, from
304 |         https://en.wikipedia.org/w/index.php?title=Moment_(mathematics)
305 | 
306 |         Notes
307 |         -----
308 |         If stratificated sample must pass with groupby each strata.
309 |         """
310 |         data = self.df
311 |         if weights is None:
312 |             weights = self.weights
313 | 
314 |         return statistics.var(variable, weights, data, ddof)
315 | 
316 |     def coef_variation(self, variable, weights=None):
317 |         """Calculate the coefficient of variation.
318 | 
319 |         The coefficient of variation is the square root of the variance of the
320 |         incomes divided by the mean income. It has the advantages of being
321 |         mathematically tractable and is subgroup decomposable, but is not
322 |         bounded from above.
323 | 
324 |         Parameters
325 |         ----------
326 |         data : pandas.DataFrame
327 |         variable : array-like or str
328 |         weights : array-like or str
329 | 
330 |         Returns
331 |         -------
332 |         coefficient_variation : float
333 | 
334 |         References
335 |         ----------
336 |         Coefficient of variation. (2017, May 5). In Wikipedia, The Free
337 |         Encyclopedia.
338 |         Retrieved 15:03, May 15, 2017, from
339 |         https://en.wikipedia.org/w/index.php?title=Coefficient_of_variation
340 |         """
341 |         # TODO complete docstring
342 |         data = self.df
343 |         if weights is None:
344 |             weights = self.weights
345 | 
346 |         return statistics.coef_variation(variable, weights, data)
347 | 
348 |     def kurt(self, variable, weights=None):
349 |         """Calculate the asymmetry coefficient.
350 | 
351 |         Parameters
352 |         ----------
353 |         variable : 1d-array
354 |         w : 1d-array
355 | 
356 |         Returns
357 |         -------
358 |         kurt : float
359 |             Kurtosis coefficient.
360 | 
361 |         References
362 |         ----------
363 |         Moment (mathematics). (2017, May 6). In Wikipedia, The Free
364 |         Encyclopedia.
365 |         Retrieved 14:40, May 15, 2017, from
366 |         https://en.wikipedia.org/w/index.php?title=Moment_(mathematics)
367 | 
368 |         Notes
369 |         -----
370 |         It is an alias of the standardized fourth-order moment.
371 |         """
372 |         data = self.df
373 |         if weights is None:
374 |             weights = self.weights
375 | 
376 |         return statistics.kurt(variable, weights, data)
377 | 
378 |     def skew(self, variable, weights=None):
379 |         """Return the asymmetry coefficient of a sample.
380 | 
381 |         Parameters
382 |         ----------
383 |         data : pandas.DataFrame
384 |         variable : array-like, str
385 |         weights : array-like, str
386 | 
387 |         Returns
388 |         -------
389 |         skew : float
390 | 
391 |         References
392 |         ----------
393 |         Moment (mathematics). (2017, May 6). In Wikipedia, The Free
394 |         Encyclopedia.
395 |         Retrieved 14:40, May 15, 2017, from
396 |         https://en.wikipedia.org/w/index.php?title=Moment_(mathematics)&
397 |         oldid=778996402
398 | 
399 |         Notes
400 |         -----
401 |         It is an alias of the standardized third-order moment.
402 | 
403 |         """
404 |         data = self.df
405 |         if weights is None:
406 |             weights = self.weights
407 | 
408 |         return statistics.skew(variable, weights, data)
409 | 
410 |     # INEQUALITY
411 |     #  ----------
412 | 
413 |     def concentration(self, income, weights=None, sort=True):
414 |         """Calculate concentration index.
415 | 
416 |         This function calculate the concentration index, according to the
417 |         notation used in [Jenkins1988]_ you can calculate the:
418 |         C_x = 2 / x · cov(x, F_x)
419 |         if x = g(x) then C_x becomes C_y
420 |         when there are taxes:
421 | 
422 |         y = g(x) = x - t(x)
423 | 
424 |         Parameters
425 |         ----------
426 |         income : array-like
427 |         weights : array-like
428 |         data : pandas.DataFrame
429 |         sort : bool
430 | 
431 |         Returns
432 |         -------
433 |         concentration : array-like
434 | 
435 |         References
436 |         ----------
437 |         Jenkins, S. (1988). Calculating income distribution indices
438 |         from micro-data. National Tax Journal. http://doi.org/10.2307/41788716
439 |         """
440 |         # TODO complete docstring
441 |         data = self.df
442 |         if weights is None:
443 |             weights = self.weights
444 | 
445 |         return inequality.concentration(income, weights, data, sort)
446 | 
447 |     def lorenz(self, income, weights=None):
448 |         """Calculate lorenz curve.
449 | 
450 |         In economics, the Lorenz curve is a graphical representation of the
451 |         distribution of income or of wealth. It was developed by Max O. Lorenz
452 |         in 1905 for representing grouped of the wealth distribution. This
453 |         function compute the lorenz curve and returns a DF with two columns of
454 |         axis x and y.
455 | 
456 |         Parameters
457 |         ----------
458 |         data : pandas.DataFrame
459 |             A pandas.DataFrame that contains data.
460 |         income : str or 1d-array, optional
461 |             Population or wights, if a DataFrame is passed then `income` should
462 |             be a name of the column of DataFrame, else can pass a pandas.Series
463 |             or array.
464 |         weights : str or 1d-array
465 |             Income, monetary variable, if a DataFrame is passed then `y`is a
466 |             name of the series on this DataFrame, however, you can pass a
467 |             pd.Series or np.array.
468 | 
469 |         Returns
470 |         -------
471 |         lorenz : pandas.Dataframe
472 |             Lorenz distribution in a Dataframe with two columns, labeled x and
473 |             y, that corresponds to plots axis.
474 | 
475 |         References
476 |         ----------
477 |         Lorenz curve. (2017, February 11). In Wikipedia, The Free Encyclopedia.
478 |         Retrieved 14:34, May 15, 2017, from
479 |         https://en.wikipedia.org/w/index.php?title=Lorenz_curve&oldid=764853675
480 |         """
481 |         data = self.df
482 |         if weights is None:
483 |             weights = self.weights
484 | 
485 |         return inequality.lorenz(income, weights, data)
486 | 
487 |     def gini(self, income, weights=None, sort=True):
488 |         """Calculate Gini's index.
489 | 
490 |         The Gini coefficient (sometimes expressed as a Gini ratio or a
491 |         normalized Gini index) is a measure of statistical dispersion intended
492 |         to represent the income or wealth distribution of a nation's residents,
493 |         and is the most commonly used measure of grouped. It was developed by
494 |         Corrado Gini.
495 | 
496 |         The Gini coefficient measures the grouped among values of a frequency
497 |         distribution (for example, levels of income). A Gini coefficient of
498 |         zero expresses perfect equality, where all values are the same (for
499 |         example, where everyone has the same income). A Gini coefficient of 1
500 |         (or 100%) expresses maximal grouped among values (e.g., for a large
501 |         number of people, where only one person has all the income or
502 |         consumption, and all others have none, the Gini coefficient will be
503 |         very nearly one).
504 | 
505 |         Parameters
506 |         ----------
507 |         data : pandas.DataFrame
508 |             DataFrame that contains the data.
509 |         income : str or np.array, optional
510 |             Name of the monetary variable `x` in` df`
511 |         weights : str or np.array, optional
512 |             Name of the series containing the weights `x` in` df`
513 |         sorted : bool, optional
514 |             If the DataFrame is previously ordered by the variable `x`, it's
515 |             must pass True, but False by default.
516 | 
517 |         Returns
518 |         -------
519 |         gini : float
520 |             Gini Index Value.
521 | 
522 |         Notes
523 |         -----
524 |         The calculation is done following (discrete probability distribution):
525 |         G = 1 - [∑_i^n f(y_i)·(S_{i-1} + S_i)]
526 |         where:
527 |         - y_i = Income
528 |         - S_i = ∑_{j=1}^i y_i · f(y_i)
529 | 
530 |         Reference
531 |         ---------
532 |         - Gini coefficient. (2017, May 8). In Wikipedia, The Free Encyclopedia.
533 |           Retrieved 14:30, May 15, 2017, from
534 |           https://en.wikipedia.org/w/index.php?title=Gini_coefficient&oldid=779424616
535 | 
536 |         - Jenkins, S. (1988). Calculating income distribution indices
537 |         from micro-data. National Tax Journal. http://doi.org/10.2307/41788716
538 | 
539 |         Todo
540 |         ----
541 |         - Implement statistical deviation calculation, VAR (GINI)
542 | 
543 |         """
544 |         data = self.df
545 |         if weights is None:
546 |             weights = self.weights
547 | 
548 |         return inequality.gini(income, weights, data, sort)
549 | 
550 |     def atkinson(self, income, weights=None, e=0.5):
551 |         """Calculate Atkinson index.
552 | 
553 |         More precisely labelled a family of income grouped measures, the
554 |         theoretical range of Atkinson values is 0 to 1, with 0 being a state of
555 |         equal distribution.
556 |         An intuitive interpretation of this index is possible: Atkinson values
557 |         can be used to calculate the proportion of total income that would be
558 |         required to achieve an equal level of social welfare as at present if
559 |         incomes were perfectly distributed.
560 | 
561 |         For example, an Atkinson index value of 0.20 suggests
562 |         that we could achieve the same level of social welfare with only
563 |         1 – 0.20 = 80% of income. The theoretical range of Atkinson values is 0
564 |         to 1, with 0 being a state of equal distribution.
565 | 
566 |         Parameters
567 |         ----------
568 |         income : array or str
569 |             If `data` is none `income` must be an 1D-array, when `data` is a
570 |             pd.DataFrame, you must pass the name of income variable as string.
571 |         weights : array or str, optional
572 |             If `data` is none `weights` must be an 1D-array, when `data` is a
573 |             pd.DataFrame, you must pass the name of weights variable as string.
574 |         e : int, optional
575 |             Epsilon parameter interpreted by atkinson index as grouped
576 |             adversion, must be a number between 0 to 1.
577 |         data : pd.DataFrame, optional
578 |             data is a pd.DataFrame that contains the variables.
579 | 
580 |         Returns
581 |         -------
582 |         atkinson : float
583 | 
584 |         Reference
585 |         ---------
586 |         Atkinson index. (2017, March 12). In Wikipedia, The Free Encyclopedia.
587 |         Retrieved 14:35, May 15, 2017, from
588 |         https://en.wikipedia.org/w/index.php?title=Atkinson_index&oldid=769991852
589 | 
590 |         Todo
591 |         ----
592 |         - Implement: CALCULATING INCOME DISTRIBUTION INDICES FROM MICRO-DATA
593 |           http://www.jstor.org/stable/41788716
594 |         - The results has difference with stata, maybe have a bug.
595 |         """
596 |         data = self.df
597 |         if weights is None:
598 |             weights = self.weights
599 | 
600 |         return inequality.atkinson(income, weights, data, e)
601 | 
602 |     def kakwani(self, tax, income_pre_tax, weights=None):
603 |         """Calculate kakwani's index.
604 | 
605 |         The Kakwani (1977) index of tax progressivity is defined as twice the
606 |         area between the concentration curves for taxes and pre-tax income,
607 |         or equivalently, the concentration index for t(x) minus the Gini index
608 |         for x, i.e.
609 | 
610 |         K = C(t) - G(x)
611 |           = (2/t) cov [t(x), F(x)] - (2/x) cov [x, F(x)].
612 | 
613 |         Parameters
614 |         ----------
615 |         data : pandas.DataFrame
616 |             This variable is a DataFrame that contains all data required in
617 |             columns.
618 |         tax_variable : array-like or str
619 |             This variable represent tax payment of person, if pass array-like
620 |             then data must be None, else you pass str-name column in `data`.
621 |         income_pre_tax : array-like or str
622 |             This variable represent income of person, if pass array-like
623 |             then data must be None, else you pass str-name column in `data`.
624 |         weights : array-like or str
625 |             This variable represent weights of each person, if pass array-like
626 |             then data must be None, else you pass str-name column in `data`.
627 | 
628 |         Returns
629 |         -------
630 |         kakwani : float
631 | 
632 |         References
633 |         ----------
634 |         Jenkins, S. (1988). Calculating income distribution indices from
635 |         micro-data. National Tax Journal. http://doi.org/10.2307/41788716
636 |         """
637 |         # main calc
638 |         data = self.df
639 |         if weights is None:
640 |             weights = self.weights
641 | 
642 |         return inequality.kakwani(tax, income_pre_tax, weights, data)
643 | 
644 |     def reynolds_smolensky(
645 |         self, income_pre_tax, income_post_tax, weights=None
646 |     ):
647 |         """Calculate Reynolds-Smolensky's index.
648 | 
649 |         The Reynolds-Smolensky (1977) index of the redistributive effect of
650 |         taxes, which can also be interpreted as an index of progressivity
651 |         (Lambert 1985), is defined as:
652 | 
653 |         L = Gx - Gy
654 |           = [2/x]cov[x,F(x)] - [2/ybar] cov [y, F(y)].
655 | 
656 |         Parameters
657 |         ----------
658 |         data : pandas.DataFrame
659 |             This variable is a DataFrame that contains all data required in
660 |             it's columns.
661 |         income_pre_tax : array-like or str
662 |             This variable represent tax payment of person, if pass array-like
663 |             then data must be None, else you pass str-name column in `data`.
664 |         income_post_tax : array-like or str
665 |             This variable represent income of person, if pass array-like
666 |             then data must be None, else you pass str-name column in `data`.
667 |         weights : array-like or str
668 |             This variable represent weights of each person, if pass array-like
669 |             then data must be None, else you pass str-name column in `data`.
670 | 
671 |         Returns
672 |         -------
673 |         reynolds_smolensky : float
674 | 
675 |         References
676 |         ----------
677 |         Jenkins, S. (1988). Calculating income distribution indices from
678 |         micro-data. National Tax Journal. http://doi.org/10.2307/41788716
679 |         """
680 |         data = self.df
681 |         if weights is None:
682 |             weights = self.weights
683 | 
684 |         return inequality.reynolds_smolensky(
685 |             income_pre_tax, income_post_tax, weights, data
686 |         )
687 | 
688 |     def theil(self, income, weights=None):
689 |         """Calculate theil index.
690 | 
691 |         The Theil index is a statistic primarily used to measure economic
692 |         grouped and other economic phenomena. It is a special case of the
693 |         generalized entropy index. It can be viewed as a measure of redundancy,
694 |         lack of diversity, isolation, segregation, grouped, non-randomness, and
695 |         compressibility. It was proposed by econometrician Henri Theil.
696 | 
697 |         Parameters
698 |         ----------
699 |         data : pandas.DataFrame
700 |             This variable is a DataFrame that contains all data required in
701 |             it's columns.
702 |         income : array-like or str
703 |             This variable represent tax payment of person, if pass array-like
704 |             then data must be None, else you pass str-name column in `data`.
705 |         weights : array-like or str
706 |             This variable represent weights of each person, if pass array-like
707 |             then data must be None, else you pass str-name column in `data`.
708 | 
709 |         Returns
710 |         -------
711 |         theil : float
712 | 
713 |         References
714 |         ----------
715 |         Theil index. (2016, December 17). In Wikipedia, The Free Encyclopedia.
716 |         Retrieved 14:17, May 15, 2017, from
717 |         https://en.wikipedia.org/w/index.php?title=Theil_index&oldid=755407818
718 | 
719 |         """
720 |         data = self.df
721 |         if weights is None:
722 |             weights = self.weights
723 | 
724 |         return inequality.theil(income, weights, data)
725 | 
726 |     def avg_tax_rate(self, total_tax, total_base, weights=None):
727 |         """Compute the average tax rate given a base income and a total tax.
728 | 
729 |         Parameters
730 |         ----------
731 |         total_base : str or numpy.array
732 |         total_tax : str or numpy.array
733 |         data : pd.DataFrame
734 | 
735 |         Returns
736 |         -------
737 |         avg_tax_rate : float or pd.Series
738 |             Is the ratio between mean the tax income and base of income.
739 | 
740 |         Reference
741 |         ---------
742 |         Panel de declarantes de IRPF 1999-2007: Metodología, estructura y
743 |         variables. (2011).
744 |         Panel de declarantes de IRPF 1999-2007: Metodología, estructura y
745 |         variables. Documentos.
746 |         """
747 |         data = self.df
748 |         if weights is None:
749 |             weights = self.weights
750 | 
751 |         return inequality.avg_tax_rate(total_tax, total_base, weights, data)
752 | 
753 |     def top_rest(
754 |         self, income, weights=None, data=None, top_percentage=10
755 |     ):
756 |         """Calculate the 10:90 Ratio.
757 | 
758 |         Calculates the quotient between the number of contributions from the
759 |         top 10% of contributors divided by the number contributions made by the
760 |         other 90%. The ratio is 1 if the total contributions by the top
761 |         contributors are equal to the cotnributions made by the rest; less than
762 |         zero if the top 10% contributes less than the rest; and greater that 1
763 |         if the top 10% contributes more than the other ninety percent.
764 | 
765 |         Parameters
766 |         ----------
767 |         income : array-like or str
768 |             This variable represent tax payment of person, if pass array-like
769 |             then data must be None, else you pass str-name column in `data`.
770 |         weights : array-like or str
771 |             This variable represent weights of each person, if pass array-like
772 |             then data must be None, else you pass str-name column in `data`.
773 |             All-ones by default
774 |         data : pandas.DataFrame
775 |             This variable is a DataFrame that contains all data required in
776 |             it's columns.
777 |         top_percentage : float
778 |             The richest x percent to consider. (10 percent by default)
779 |             It must be a number between 0 and 100
780 | 
781 |         Returns
782 |         -------
783 |         ratio : float
784 | 
785 |         References
786 |         ----------
787 |         Participation Inequality in Wikis: A Temporal Analysis Using WikiChron.
788 |         Serrano, Abel & Arroyo, Javier & Hassan, Samer. (2018).
789 |         DOI: 10.1145/3233391.3233536.
790 |         """
791 |         data = self.df
792 |         if weights is None:
793 |             weights = self.weights
794 | 
795 |         return inequality.top_rest(income, weights, data, top_percentage)
796 | 


--------------------------------------------------------------------------------