├── .gitattributes
├── .github
└── workflows
│ ├── ci-tests.yml
│ ├── gen_readme.yml
│ └── pypi.yml
├── .gitignore
├── .vscode
└── settings.json
├── LICENSE
├── Pipfile
├── Pipfile.lock
├── README.md
├── README.qmd
├── README_files
└── figure-commonmark
│ ├── cell-5-output-1.png
│ ├── cell-5-output-2.png
│ ├── cell-7-output-1.png
│ ├── cell-7-output-2.png
│ └── cell-8-output-2.png
├── configs
└── data.yml
├── csdid
├── __init__.py
├── _version.py
├── aggte_fnc
│ ├── __init__.py
│ ├── aggte.py
│ ├── compute_aggte.py
│ └── utils.py
├── att_gt.py
├── attgt_fnc
│ ├── __init__.py
│ ├── compute_att_gt.py
│ └── preprocess_did.py
├── plots
│ ├── __init__.py
│ └── gplot.py
└── utils
│ ├── __init__.py
│ ├── bmisc.py
│ └── mboot.py
├── data
├── dta.csv
├── mpdta.csv
└── sim_data.csv
├── figs
├── did_py.drawio
├── did_py.png
├── did_r.drawio
└── did_r.png
├── readme.md
├── requirements.txt
├── setup.py
└── test
├── basic.py
└── test_vs_r.py
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb export-ignore
2 |
--------------------------------------------------------------------------------
/.github/workflows/ci-tests.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 | branches:
9 | - main
10 |
11 | jobs:
12 | test:
13 | name: "Test vs R::did"
14 | runs-on: ${{ matrix.os }}
15 | strategy:
16 | fail-fast: false
17 | matrix:
18 | os: ["ubuntu-latest"]
19 | python-version: ["3.8"]
20 | pytest_opts: ["--workers 4 --tests-per-worker 1"]
21 |
22 | steps:
23 | - name: Checkout source
24 | uses: actions/checkout@v4
25 | - name: Setup python
26 | uses: actions/setup-python@v5
27 | with:
28 | python-version: ${{ matrix.python-version }}
29 | architecture: x64
30 |
31 | - name: Install Dependencies
32 | run: pip install -r requirements.txt; pip install git+https://github.com/d2cml-ai/DRDID
33 |
34 | - name: Install package
35 | run: pip install .
36 |
37 | - name: Setup r2u
38 | uses: eddelbuettel/github-actions/r2u-setup@master
39 |
40 | - name: install R packages
41 | run: Rscript -e 'install.packages(c("did"))'
42 | shell: bash
43 |
44 | - name: Run tests
45 | run: |
46 | pytest test/test_vs_r.py
47 |
48 |
49 |
50 |
--------------------------------------------------------------------------------
/.github/workflows/gen_readme.yml:
--------------------------------------------------------------------------------
1 | name: Actualizar Readme
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | # - cron: "40 15 * * 1,3,5"
8 |
9 | jobs:
10 | CreateQuarto:
11 | # if: ${{ github.event.workflow_run.conclusion == 'success' }}
12 | runs-on: ubuntu-latest
13 | permissions:
14 | contents: write
15 | steps:
16 | - name: Chequear el código
17 | uses: actions/checkout@v4
18 |
19 | - name: Set up Quarto
20 | uses: quarto-dev/quarto-actions/setup@v2
21 |
22 | - name: Configurar Python
23 | uses: actions/setup-python@v5
24 | with:
25 | python-version: "3.10"
26 | cache: "pip"
27 | - run: pip uninstall csdid; pip install git+https://github.com/d2cml-ai/csdid/; pip install nbclient nbformat PyYAML; pip install git+https://github.com/d2cml-ai/DRDID
28 | - name: Install Dependencies
29 | run: pip install -r requirements.txt
30 |
31 | - name: Generar quarto doc
32 | run: quarto render README.qmd
33 |
34 | - id: commit
35 | name: Commit files
36 | run: |
37 | git config --local user.name "action-user"
38 | git pull
39 | git config --local user.email "actions@github.com"
40 | git add -A
41 | git commit -m "Update Readme"
42 | git push origin main
43 | env:
44 | REPO_KEY: ${{ secrets.GITHUB_TOKEN }}
45 | username: github-actions
46 |
--------------------------------------------------------------------------------
/.github/workflows/pypi.yml:
--------------------------------------------------------------------------------
1 | name: Publish to PyPI
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 |
8 | jobs:
9 | build:
10 | runs-on: ubuntu-latest
11 | env:
12 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
13 |
14 | steps:
15 | - uses: actions/checkout@v2
16 |
17 | - id: Python_pip
18 | name: Python dependencies
19 | uses: actions/setup-python@v2
20 | with:
21 | python-version: "3.8"
22 | cache: "pip"
23 | - run: pip install -r requirements.txt
24 |
25 | - name: Build Package
26 | run: |
27 | python setup.py sdist
28 |
29 | - name: Upload to PyPi
30 | env:
31 | TWINE_USERNAME: ${{ secrets.PYPI_USER}}
32 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
33 | run: |
34 | sudo apt-get install tree
35 | tree /home/runner/work/csdid
36 | twine upload dist/*
37 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *__pycache__
2 | .Rproj.user
3 | methods_r/
4 | *Rproj
5 |
6 | dist/
7 | *info
8 | *html
9 | test/plot_files
10 | .DS_Store
11 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "editor.tabSize": 2,
3 | "editor.insertSpaces": true,
4 | "python.linting.pylintEnabled": true,
5 | "python.linting.enabled": false,
6 | "[python]": {
7 | "editor.defaultFormatter": "ms-python.python"
8 | },
9 | "python.formatting.provider": "none"
10 | // "python.formatting.provider": "autopep8",
11 | // "[python]": {
12 | // "editor.defaultFormatter": "ms-python.python"
13 | // },
14 | // "python.formatting.autopep8Args": ["--indent-size", "2"]
15 | }
16 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Pedro Sant’Anna, Brantly Callaway, Alexander Quispe, Carlos Guevara, Jhon Flores
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Pipfile:
--------------------------------------------------------------------------------
1 | [[source]]
2 | url = "https://pypi.org/simple"
3 | verify_ssl = true
4 | name = "pypi"
5 |
6 | [packages]
7 | pandas = "*"
8 | numpy = "<=1.24.3"
9 | scipy = "*"
10 | patsy = "*"
11 | plotnine = "*"
12 |
13 | [dev-packages]
14 |
15 | [requires]
16 | python_version = "3.8"
17 |
--------------------------------------------------------------------------------
/Pipfile.lock:
--------------------------------------------------------------------------------
1 | {
2 | "_meta": {
3 | "hash": {
4 | "sha256": "b71b63262874e0de6cb1ba93dd3fc02b136b8229fe98b1c1a4ed640eb68c6a7e"
5 | },
6 | "pipfile-spec": 6,
7 | "requires": {
8 | "python_version": "3.8"
9 | },
10 | "sources": [
11 | {
12 | "name": "pypi",
13 | "url": "https://pypi.org/simple",
14 | "verify_ssl": true
15 | }
16 | ]
17 | },
18 | "default": {
19 | "backports.zoneinfo": {
20 | "hashes": [
21 | "sha256:17746bd546106fa389c51dbea67c8b7c8f0d14b5526a579ca6ccf5ed72c526cf",
22 | "sha256:1b13e654a55cd45672cb54ed12148cd33628f672548f373963b0bff67b217328",
23 | "sha256:1c5742112073a563c81f786e77514969acb58649bcdf6cdf0b4ed31a348d4546",
24 | "sha256:4a0f800587060bf8880f954dbef70de6c11bbe59c673c3d818921f042f9954a6",
25 | "sha256:5c144945a7752ca544b4b78c8c41544cdfaf9786f25fe5ffb10e838e19a27570",
26 | "sha256:7b0a64cda4145548fed9efc10322770f929b944ce5cee6c0dfe0c87bf4c0c8c9",
27 | "sha256:8439c030a11780786a2002261569bdf362264f605dfa4d65090b64b05c9f79a7",
28 | "sha256:8961c0f32cd0336fb8e8ead11a1f8cd99ec07145ec2931122faaac1c8f7fd987",
29 | "sha256:89a48c0d158a3cc3f654da4c2de1ceba85263fafb861b98b59040a5086259722",
30 | "sha256:a76b38c52400b762e48131494ba26be363491ac4f9a04c1b7e92483d169f6582",
31 | "sha256:da6013fd84a690242c310d77ddb8441a559e9cb3d3d59ebac9aca1a57b2e18bc",
32 | "sha256:e55b384612d93be96506932a786bbcde5a2db7a9e6a4bb4bffe8b733f5b9036b",
33 | "sha256:e81b76cace8eda1fca50e345242ba977f9be6ae3945af8d46326d776b4cf78d1",
34 | "sha256:e8236383a20872c0cdf5a62b554b27538db7fa1bbec52429d8d106effbaeca08",
35 | "sha256:f04e857b59d9d1ccc39ce2da1021d196e47234873820cbeaad210724b1ee28ac",
36 | "sha256:fadbfe37f74051d024037f223b8e001611eac868b5c5b06144ef4d8b799862f2"
37 | ],
38 | "markers": "python_version < '3.9'",
39 | "version": "==0.2.1"
40 | },
41 | "contourpy": {
42 | "hashes": [
43 | "sha256:052cc634bf903c604ef1a00a5aa093c54f81a2612faedaa43295809ffdde885e",
44 | "sha256:084eaa568400cfaf7179b847ac871582199b1b44d5699198e9602ecbbb5f6104",
45 | "sha256:0b6616375d7de55797d7a66ee7d087efe27f03d336c27cf1f32c02b8c1a5ac70",
46 | "sha256:0b7b04ed0961647691cfe5d82115dd072af7ce8846d31a5fac6c142dcce8b882",
47 | "sha256:143dde50520a9f90e4a2703f367cf8ec96a73042b72e68fcd184e1279962eb6f",
48 | "sha256:17cfaf5ec9862bc93af1ec1f302457371c34e688fbd381f4035a06cd47324f48",
49 | "sha256:181cbace49874f4358e2929aaf7ba84006acb76694102e88dd15af861996c16e",
50 | "sha256:189ceb1525eb0655ab8487a9a9c41f42a73ba52d6789754788d1883fb06b2d8a",
51 | "sha256:18a64814ae7bce73925131381603fff0116e2df25230dfc80d6d690aa6e20b37",
52 | "sha256:1f0cbd657e9bde94cd0e33aa7df94fb73c1ab7799378d3b3f902eb8eb2e04a3a",
53 | "sha256:1f795597073b09d631782e7245016a4323cf1cf0b4e06eef7ea6627e06a37ff2",
54 | "sha256:25ae46595e22f93592d39a7eac3d638cda552c3e1160255258b695f7b58e5655",
55 | "sha256:27bc79200c742f9746d7dd51a734ee326a292d77e7d94c8af6e08d1e6c15d545",
56 | "sha256:2b836d22bd2c7bb2700348e4521b25e077255ebb6ab68e351ab5aa91ca27e027",
57 | "sha256:30f511c05fab7f12e0b1b7730ebdc2ec8deedcfb505bc27eb570ff47c51a8f15",
58 | "sha256:317267d915490d1e84577924bd61ba71bf8681a30e0d6c545f577363157e5e94",
59 | "sha256:397b0ac8a12880412da3551a8cb5a187d3298a72802b45a3bd1805e204ad8439",
60 | "sha256:438ba416d02f82b692e371858143970ed2eb6337d9cdbbede0d8ad9f3d7dd17d",
61 | "sha256:53cc3a40635abedbec7f1bde60f8c189c49e84ac180c665f2cd7c162cc454baa",
62 | "sha256:5d123a5bc63cd34c27ff9c7ac1cd978909e9c71da12e05be0231c608048bb2ae",
63 | "sha256:62013a2cf68abc80dadfd2307299bfa8f5aa0dcaec5b2954caeb5fa094171103",
64 | "sha256:89f06eff3ce2f4b3eb24c1055a26981bffe4e7264acd86f15b97e40530b794bc",
65 | "sha256:90c81f22b4f572f8a2110b0b741bb64e5a6427e0a198b2cdc1fbaf85f352a3aa",
66 | "sha256:911ff4fd53e26b019f898f32db0d4956c9d227d51338fb3b03ec72ff0084ee5f",
67 | "sha256:9382a1c0bc46230fb881c36229bfa23d8c303b889b788b939365578d762b5c18",
68 | "sha256:9f2931ed4741f98f74b410b16e5213f71dcccee67518970c42f64153ea9313b9",
69 | "sha256:a67259c2b493b00e5a4d0f7bfae51fb4b3371395e47d079a4446e9b0f4d70e76",
70 | "sha256:a698c6a7a432789e587168573a864a7ea374c6be8d4f31f9d87c001d5a843493",
71 | "sha256:bc00bb4225d57bff7ebb634646c0ee2a1298402ec10a5fe7af79df9a51c1bfd9",
72 | "sha256:bcb41692aa09aeb19c7c213411854402f29f6613845ad2453d30bf421fe68fed",
73 | "sha256:d4f26b25b4f86087e7d75e63212756c38546e70f2a92d2be44f80114826e1cd4",
74 | "sha256:d551f3a442655f3dcc1285723f9acd646ca5858834efeab4598d706206b09c9f",
75 | "sha256:dffcc2ddec1782dd2f2ce1ef16f070861af4fb78c69862ce0aab801495dda6a3",
76 | "sha256:e53046c3863828d21d531cc3b53786e6580eb1ba02477e8681009b6aa0870b21",
77 | "sha256:e5cec36c5090e75a9ac9dbd0ff4a8cf7cecd60f1b6dc23a374c7d980a1cd710e",
78 | "sha256:e7a117ce7df5a938fe035cad481b0189049e8d92433b4b33aa7fc609344aafa1",
79 | "sha256:e94bef2580e25b5fdb183bf98a2faa2adc5b638736b2c0a4da98691da641316a",
80 | "sha256:ed614aea8462735e7d70141374bd7650afd1c3f3cb0c2dbbcbe44e14331bf002",
81 | "sha256:fb3b7d9e6243bfa1efb93ccfe64ec610d85cfe5aec2c25f97fbbd2e58b531256"
82 | ],
83 | "markers": "python_version >= '3.8'",
84 | "version": "==1.1.0"
85 | },
86 | "cycler": {
87 | "hashes": [
88 | "sha256:3a27e95f763a428a739d2add979fa7494c912a32c17c4c38c4d5f082cad165a3",
89 | "sha256:9c87405839a19696e837b3b818fed3f5f69f16f1eec1a1ad77e043dcea9c772f"
90 | ],
91 | "markers": "python_version >= '3.6'",
92 | "version": "==0.11.0"
93 | },
94 | "fonttools": {
95 | "hashes": [
96 | "sha256:00ab569b2a3e591e00425023ade87e8fef90380c1dde61be7691cb524ca5f743",
97 | "sha256:022c4a16b412293e7f1ce21b8bab7a6f9d12c4ffdf171fdc67122baddb973069",
98 | "sha256:05171f3c546f64d78569f10adc0de72561882352cac39ec7439af12304d8d8c0",
99 | "sha256:14037c31138fbd21847ad5e5441dfdde003e0a8f3feb5812a1a21fd1c255ffbd",
100 | "sha256:15abb3d055c1b2dff9ce376b6c3db10777cb74b37b52b78f61657634fd348a0d",
101 | "sha256:18ea64ac43e94c9e0c23d7a9475f1026be0e25b10dda8f236fc956188761df97",
102 | "sha256:1a003608400dd1cca3e089e8c94973c6b51a4fb1ef00ff6d7641617b9242e637",
103 | "sha256:1bc4c5b147be8dbc5df9cc8ac5e93ee914ad030fe2a201cc8f02f499db71011d",
104 | "sha256:200729d12461e2038700d31f0d49ad5a7b55855dec7525074979a06b46f88505",
105 | "sha256:337b6e83d7ee73c40ea62407f2ce03b07c3459e213b6f332b94a69923b9e1cb9",
106 | "sha256:37467cee0f32cada2ec08bc16c9c31f9b53ea54b2f5604bf25a1246b5f50593a",
107 | "sha256:425b74a608427499b0e45e433c34ddc350820b6f25b7c8761963a08145157a66",
108 | "sha256:530c5d35109f3e0cea2535742d6a3bc99c0786cf0cbd7bb2dc9212387f0d908c",
109 | "sha256:56d4d85f5374b45b08d2f928517d1e313ea71b4847240398decd0ab3ebbca885",
110 | "sha256:5e00334c66f4e83535384cb5339526d01d02d77f142c23b2f97bd6a4f585497a",
111 | "sha256:5fdf60f8a5c6bcce7d024a33f7e4bc7921f5b74e8ea13bccd204f2c8b86f3470",
112 | "sha256:6a8d71b9a5c884c72741868e845c0e563c5d83dcaf10bb0ceeec3b4b2eb14c67",
113 | "sha256:6d5adf4ba114f028fc3f5317a221fd8b0f4ef7a2e5524a2b1e0fd891b093791a",
114 | "sha256:7449e5e306f3a930a8944c85d0cbc8429cba13503372a1a40f23124d6fb09b58",
115 | "sha256:7961575221e3da0841c75da53833272c520000d76f7f71274dbf43370f8a1065",
116 | "sha256:7f6e3fa3da923063c286320e728ba2270e49c73386e3a711aa680f4b0747d692",
117 | "sha256:882983279bf39afe4e945109772c2ffad2be2c90983d6559af8b75c19845a80a",
118 | "sha256:8a917828dbfdb1cbe50cf40eeae6fbf9c41aef9e535649ed8f4982b2ef65c091",
119 | "sha256:8c4305b171b61040b1ee75d18f9baafe58bd3b798d1670078efe2c92436bfb63",
120 | "sha256:91784e21a1a085fac07c6a407564f4a77feb471b5954c9ee55a4f9165151f6c1",
121 | "sha256:94c915f6716589f78bc00fbc14c5b8de65cfd11ee335d32504f1ef234524cb24",
122 | "sha256:97d95b8301b62bdece1af943b88bcb3680fd385f88346a4a899ee145913b414a",
123 | "sha256:a954b90d1473c85a22ecf305761d9fd89da93bbd31dae86e7dea436ad2cb5dc9",
124 | "sha256:aa83b3f151bc63970f39b2b42a06097c5a22fd7ed9f7ba008e618de4503d3895",
125 | "sha256:b802dcbf9bcff74672f292b2466f6589ab8736ce4dcf36f48eb994c2847c4b30",
126 | "sha256:bae8c13abbc2511e9a855d2142c0ab01178dd66b1a665798f357da0d06253e0d",
127 | "sha256:c55f1b4109dbc3aeb496677b3e636d55ef46dc078c2a5e3f3db4e90f1c6d2907",
128 | "sha256:eb52c10fda31159c22c7ed85074e05f8b97da8773ea461706c273e31bcbea836",
129 | "sha256:ec468c022d09f1817c691cf884feb1030ef6f1e93e3ea6831b0d8144c06480d1"
130 | ],
131 | "markers": "python_version >= '3.8'",
132 | "version": "==4.40.0"
133 | },
134 | "importlib-resources": {
135 | "hashes": [
136 | "sha256:4be82589bf5c1d7999aedf2a45159d10cb3ca4f19b2271f8792bc8e6da7b22f6",
137 | "sha256:7b1deeebbf351c7578e09bf2f63fa2ce8b5ffec296e0d349139d43cca061a81a"
138 | ],
139 | "markers": "python_version < '3.10'",
140 | "version": "==5.12.0"
141 | },
142 | "kiwisolver": {
143 | "hashes": [
144 | "sha256:02f79693ec433cb4b5f51694e8477ae83b3205768a6fb48ffba60549080e295b",
145 | "sha256:03baab2d6b4a54ddbb43bba1a3a2d1627e82d205c5cf8f4c924dc49284b87166",
146 | "sha256:1041feb4cda8708ce73bb4dcb9ce1ccf49d553bf87c3954bdfa46f0c3f77252c",
147 | "sha256:10ee06759482c78bdb864f4109886dff7b8a56529bc1609d4f1112b93fe6423c",
148 | "sha256:1d1573129aa0fd901076e2bfb4275a35f5b7aa60fbfb984499d661ec950320b0",
149 | "sha256:283dffbf061a4ec60391d51e6155e372a1f7a4f5b15d59c8505339454f8989e4",
150 | "sha256:28bc5b299f48150b5f822ce68624e445040595a4ac3d59251703779836eceff9",
151 | "sha256:2a66fdfb34e05b705620dd567f5a03f239a088d5a3f321e7b6ac3239d22aa286",
152 | "sha256:2e307eb9bd99801f82789b44bb45e9f541961831c7311521b13a6c85afc09767",
153 | "sha256:2e407cb4bd5a13984a6c2c0fe1845e4e41e96f183e5e5cd4d77a857d9693494c",
154 | "sha256:2f5e60fabb7343a836360c4f0919b8cd0d6dbf08ad2ca6b9cf90bf0c76a3c4f6",
155 | "sha256:36dafec3d6d6088d34e2de6b85f9d8e2324eb734162fba59d2ba9ed7a2043d5b",
156 | "sha256:3fe20f63c9ecee44560d0e7f116b3a747a5d7203376abeea292ab3152334d004",
157 | "sha256:41dae968a94b1ef1897cb322b39360a0812661dba7c682aa45098eb8e193dbdf",
158 | "sha256:4bd472dbe5e136f96a4b18f295d159d7f26fd399136f5b17b08c4e5f498cd494",
159 | "sha256:4ea39b0ccc4f5d803e3337dd46bcce60b702be4d86fd0b3d7531ef10fd99a1ac",
160 | "sha256:5853eb494c71e267912275e5586fe281444eb5e722de4e131cddf9d442615626",
161 | "sha256:5bce61af018b0cb2055e0e72e7d65290d822d3feee430b7b8203d8a855e78766",
162 | "sha256:6295ecd49304dcf3bfbfa45d9a081c96509e95f4b9d0eb7ee4ec0530c4a96514",
163 | "sha256:62ac9cc684da4cf1778d07a89bf5f81b35834cb96ca523d3a7fb32509380cbf6",
164 | "sha256:70e7c2e7b750585569564e2e5ca9845acfaa5da56ac46df68414f29fea97be9f",
165 | "sha256:7577c1987baa3adc4b3c62c33bd1118c3ef5c8ddef36f0f2c950ae0b199e100d",
166 | "sha256:75facbe9606748f43428fc91a43edb46c7ff68889b91fa31f53b58894503a191",
167 | "sha256:787518a6789009c159453da4d6b683f468ef7a65bbde796bcea803ccf191058d",
168 | "sha256:78d6601aed50c74e0ef02f4204da1816147a6d3fbdc8b3872d263338a9052c51",
169 | "sha256:7c43e1e1206cd421cd92e6b3280d4385d41d7166b3ed577ac20444b6995a445f",
170 | "sha256:81e38381b782cc7e1e46c4e14cd997ee6040768101aefc8fa3c24a4cc58e98f8",
171 | "sha256:841293b17ad704d70c578f1f0013c890e219952169ce8a24ebc063eecf775454",
172 | "sha256:872b8ca05c40d309ed13eb2e582cab0c5a05e81e987ab9c521bf05ad1d5cf5cb",
173 | "sha256:877272cf6b4b7e94c9614f9b10140e198d2186363728ed0f701c6eee1baec1da",
174 | "sha256:8c808594c88a025d4e322d5bb549282c93c8e1ba71b790f539567932722d7bd8",
175 | "sha256:8ed58b8acf29798b036d347791141767ccf65eee7f26bde03a71c944449e53de",
176 | "sha256:91672bacaa030f92fc2f43b620d7b337fd9a5af28b0d6ed3f77afc43c4a64b5a",
177 | "sha256:968f44fdbf6dd757d12920d63b566eeb4d5b395fd2d00d29d7ef00a00582aac9",
178 | "sha256:9f85003f5dfa867e86d53fac6f7e6f30c045673fa27b603c397753bebadc3008",
179 | "sha256:a553dadda40fef6bfa1456dc4be49b113aa92c2a9a9e8711e955618cd69622e3",
180 | "sha256:a68b62a02953b9841730db7797422f983935aeefceb1679f0fc85cbfbd311c32",
181 | "sha256:abbe9fa13da955feb8202e215c4018f4bb57469b1b78c7a4c5c7b93001699938",
182 | "sha256:ad881edc7ccb9d65b0224f4e4d05a1e85cf62d73aab798943df6d48ab0cd79a1",
183 | "sha256:b1792d939ec70abe76f5054d3f36ed5656021dcad1322d1cc996d4e54165cef9",
184 | "sha256:b428ef021242344340460fa4c9185d0b1f66fbdbfecc6c63eff4b7c29fad429d",
185 | "sha256:b533558eae785e33e8c148a8d9921692a9fe5aa516efbdff8606e7d87b9d5824",
186 | "sha256:ba59c92039ec0a66103b1d5fe588fa546373587a7d68f5c96f743c3396afc04b",
187 | "sha256:bc8d3bd6c72b2dd9decf16ce70e20abcb3274ba01b4e1c96031e0c4067d1e7cd",
188 | "sha256:bc9db8a3efb3e403e4ecc6cd9489ea2bac94244f80c78e27c31dcc00d2790ac2",
189 | "sha256:bf7d9fce9bcc4752ca4a1b80aabd38f6d19009ea5cbda0e0856983cf6d0023f5",
190 | "sha256:c2dbb44c3f7e6c4d3487b31037b1bdbf424d97687c1747ce4ff2895795c9bf69",
191 | "sha256:c79ebe8f3676a4c6630fd3f777f3cfecf9289666c84e775a67d1d358578dc2e3",
192 | "sha256:c97528e64cb9ebeff9701e7938653a9951922f2a38bd847787d4a8e498cc83ae",
193 | "sha256:d0611a0a2a518464c05ddd5a3a1a0e856ccc10e67079bb17f265ad19ab3c7597",
194 | "sha256:d06adcfa62a4431d404c31216f0f8ac97397d799cd53800e9d3efc2fbb3cf14e",
195 | "sha256:d41997519fcba4a1e46eb4a2fe31bc12f0ff957b2b81bac28db24744f333e955",
196 | "sha256:d5b61785a9ce44e5a4b880272baa7cf6c8f48a5180c3e81c59553ba0cb0821ca",
197 | "sha256:da152d8cdcab0e56e4f45eb08b9aea6455845ec83172092f09b0e077ece2cf7a",
198 | "sha256:da7e547706e69e45d95e116e6939488d62174e033b763ab1496b4c29b76fabea",
199 | "sha256:db5283d90da4174865d520e7366801a93777201e91e79bacbac6e6927cbceede",
200 | "sha256:db608a6757adabb32f1cfe6066e39b3706d8c3aa69bbc353a5b61edad36a5cb4",
201 | "sha256:e0ea21f66820452a3f5d1655f8704a60d66ba1191359b96541eaf457710a5fc6",
202 | "sha256:e7da3fec7408813a7cebc9e4ec55afed2d0fd65c4754bc376bf03498d4e92686",
203 | "sha256:e92a513161077b53447160b9bd8f522edfbed4bd9759e4c18ab05d7ef7e49408",
204 | "sha256:ecb1fa0db7bf4cff9dac752abb19505a233c7f16684c5826d1f11ebd9472b871",
205 | "sha256:efda5fc8cc1c61e4f639b8067d118e742b812c930f708e6667a5ce0d13499e29",
206 | "sha256:f0a1dbdb5ecbef0d34eb77e56fcb3e95bbd7e50835d9782a45df81cc46949750",
207 | "sha256:f0a71d85ecdd570ded8ac3d1c0f480842f49a40beb423bb8014539a9f32a5897",
208 | "sha256:f4f270de01dd3e129a72efad823da90cc4d6aafb64c410c9033aba70db9f1ff0",
209 | "sha256:f6cb459eea32a4e2cf18ba5fcece2dbdf496384413bc1bae15583f19e567f3b2",
210 | "sha256:f8ad8285b01b0d4695102546b342b493b3ccc6781fc28c8c6a1bb63e95d22f09",
211 | "sha256:f9f39e2f049db33a908319cf46624a569b36983c7c78318e9726a4cb8923b26c"
212 | ],
213 | "markers": "python_version >= '3.7'",
214 | "version": "==1.4.4"
215 | },
216 | "matplotlib": {
217 | "hashes": [
218 | "sha256:08308bae9e91aca1ec6fd6dda66237eef9f6294ddb17f0d0b3c863169bf82353",
219 | "sha256:14645aad967684e92fc349493fa10c08a6da514b3d03a5931a1bac26e6792bd1",
220 | "sha256:21e9cff1a58d42e74d01153360de92b326708fb205250150018a52c70f43c290",
221 | "sha256:28506a03bd7f3fe59cd3cd4ceb2a8d8a2b1db41afede01f66c42561b9be7b4b7",
222 | "sha256:2bf092f9210e105f414a043b92af583c98f50050559616930d884387d0772aba",
223 | "sha256:3032884084f541163f295db8a6536e0abb0db464008fadca6c98aaf84ccf4717",
224 | "sha256:3a2cb34336110e0ed8bb4f650e817eed61fa064acbefeb3591f1b33e3a84fd96",
225 | "sha256:3ba2af245e36990facf67fde840a760128ddd71210b2ab6406e640188d69d136",
226 | "sha256:3d7bc90727351fb841e4d8ae620d2d86d8ed92b50473cd2b42ce9186104ecbba",
227 | "sha256:438196cdf5dc8d39b50a45cb6e3f6274edbcf2254f85fa9b895bf85851c3a613",
228 | "sha256:46a561d23b91f30bccfd25429c3c706afe7d73a5cc64ef2dfaf2b2ac47c1a5dc",
229 | "sha256:4cf327e98ecf08fcbb82685acaf1939d3338548620ab8dfa02828706402c34de",
230 | "sha256:4f99e1b234c30c1e9714610eb0c6d2f11809c9c78c984a613ae539ea2ad2eb4b",
231 | "sha256:544764ba51900da4639c0f983b323d288f94f65f4024dc40ecb1542d74dc0500",
232 | "sha256:56d94989191de3fcc4e002f93f7f1be5da476385dde410ddafbb70686acf00ea",
233 | "sha256:57bfb8c8ea253be947ccb2bc2d1bb3862c2bccc662ad1b4626e1f5e004557042",
234 | "sha256:617f14ae9d53292ece33f45cba8503494ee199a75b44de7717964f70637a36aa",
235 | "sha256:6eb88d87cb2c49af00d3bbc33a003f89fd9f78d318848da029383bfc08ecfbfb",
236 | "sha256:75d4725d70b7c03e082bbb8a34639ede17f333d7247f56caceb3801cb6ff703d",
237 | "sha256:770a205966d641627fd5cf9d3cb4b6280a716522cd36b8b284a8eb1581310f61",
238 | "sha256:7b73305f25eab4541bd7ee0b96d87e53ae9c9f1823be5659b806cd85786fe882",
239 | "sha256:7c9a4b2da6fac77bcc41b1ea95fadb314e92508bf5493ceff058e727e7ecf5b0",
240 | "sha256:81a6b377ea444336538638d31fdb39af6be1a043ca5e343fe18d0f17e098770b",
241 | "sha256:83111e6388dec67822e2534e13b243cc644c7494a4bb60584edbff91585a83c6",
242 | "sha256:8704726d33e9aa8a6d5215044b8d00804561971163563e6e6591f9dcf64340cc",
243 | "sha256:89768d84187f31717349c6bfadc0e0d8c321e8eb34522acec8a67b1236a66332",
244 | "sha256:8bf26ade3ff0f27668989d98c8435ce9327d24cffb7f07d24ef609e33d582439",
245 | "sha256:8c587963b85ce41e0a8af53b9b2de8dddbf5ece4c34553f7bd9d066148dc719c",
246 | "sha256:95cbc13c1fc6844ab8812a525bbc237fa1470863ff3dace7352e910519e194b1",
247 | "sha256:97cc368a7268141afb5690760921765ed34867ffb9655dd325ed207af85c7529",
248 | "sha256:a867bf73a7eb808ef2afbca03bcdb785dae09595fbe550e1bab0cd023eba3de0",
249 | "sha256:b867e2f952ed592237a1828f027d332d8ee219ad722345b79a001f49df0936eb",
250 | "sha256:c0bd19c72ae53e6ab979f0ac6a3fafceb02d2ecafa023c5cca47acd934d10be7",
251 | "sha256:ce463ce590f3825b52e9fe5c19a3c6a69fd7675a39d589e8b5fbe772272b3a24",
252 | "sha256:cf0e4f727534b7b1457898c4f4ae838af1ef87c359b76dcd5330fa31893a3ac7",
253 | "sha256:def58098f96a05f90af7e92fd127d21a287068202aa43b2a93476170ebd99e87",
254 | "sha256:e99bc9e65901bb9a7ce5e7bb24af03675cbd7c70b30ac670aa263240635999a4",
255 | "sha256:eb7d248c34a341cd4c31a06fd34d64306624c8cd8d0def7abb08792a5abfd556",
256 | "sha256:f67bfdb83a8232cb7a92b869f9355d677bce24485c460b19d01970b64b2ed476",
257 | "sha256:f883a22a56a84dba3b588696a2b8a1ab0d2c3d41be53264115c71b0a942d8fdb",
258 | "sha256:fbdeeb58c0cf0595efe89c05c224e0a502d1aa6a8696e68a73c3efc6bc354304"
259 | ],
260 | "markers": "python_version >= '3.8'",
261 | "version": "==3.7.1"
262 | },
263 | "mizani": {
264 | "hashes": [
265 | "sha256:1d481a4dc673caa9b7cfdc6505b9401f0e9a9f43434d748df0678a1a4017b0e2",
266 | "sha256:e8b7c791041dbb5ba832649922070a6224ccfc7b45e5685f22c9b19a92c048c7"
267 | ],
268 | "markers": "python_version >= '3.8'",
269 | "version": "==0.9.2"
270 | },
271 | "numpy": {
272 | "hashes": [
273 | "sha256:0ec87a7084caa559c36e0a2309e4ecb1baa03b687201d0a847c8b0ed476a7187",
274 | "sha256:1a7d6acc2e7524c9955e5c903160aa4ea083736fde7e91276b0e5d98e6332812",
275 | "sha256:202de8f38fc4a45a3eea4b63e2f376e5f2dc64ef0fa692838e31a808520efaf7",
276 | "sha256:210461d87fb02a84ef243cac5e814aad2b7f4be953b32cb53327bb49fd77fbb4",
277 | "sha256:2d926b52ba1367f9acb76b0df6ed21f0b16a1ad87c6720a1121674e5cf63e2b6",
278 | "sha256:352ee00c7f8387b44d19f4cada524586f07379c0d49270f87233983bc5087ca0",
279 | "sha256:35400e6a8d102fd07c71ed7dcadd9eb62ee9a6e84ec159bd48c28235bbb0f8e4",
280 | "sha256:3c1104d3c036fb81ab923f507536daedc718d0ad5a8707c6061cdfd6d184e570",
281 | "sha256:4719d5aefb5189f50887773699eaf94e7d1e02bf36c1a9d353d9f46703758ca4",
282 | "sha256:4749e053a29364d3452c034827102ee100986903263e89884922ef01a0a6fd2f",
283 | "sha256:5342cf6aad47943286afa6f1609cad9b4266a05e7f2ec408e2cf7aea7ff69d80",
284 | "sha256:56e48aec79ae238f6e4395886b5eaed058abb7231fb3361ddd7bfdf4eed54289",
285 | "sha256:76e3f4e85fc5d4fd311f6e9b794d0c00e7002ec122be271f2019d63376f1d385",
286 | "sha256:7776ea65423ca6a15255ba1872d82d207bd1e09f6d0894ee4a64678dd2204078",
287 | "sha256:784c6da1a07818491b0ffd63c6bbe5a33deaa0e25a20e1b3ea20cf0e43f8046c",
288 | "sha256:8535303847b89aa6b0f00aa1dc62867b5a32923e4d1681a35b5eef2d9591a463",
289 | "sha256:9a7721ec204d3a237225db3e194c25268faf92e19338a35f3a224469cb6039a3",
290 | "sha256:a1d3c026f57ceaad42f8231305d4653d5f05dc6332a730ae5c0bea3513de0950",
291 | "sha256:ab344f1bf21f140adab8e47fdbc7c35a477dc01408791f8ba00d018dd0bc5155",
292 | "sha256:ab5f23af8c16022663a652d3b25dcdc272ac3f83c3af4c02eb8b824e6b3ab9d7",
293 | "sha256:ae8d0be48d1b6ed82588934aaaa179875e7dc4f3d84da18d7eae6eb3f06c242c",
294 | "sha256:c91c4afd8abc3908e00a44b2672718905b8611503f7ff87390cc0ac3423fb096",
295 | "sha256:d5036197ecae68d7f491fcdb4df90082b0d4960ca6599ba2659957aafced7c17",
296 | "sha256:d6cc757de514c00b24ae8cf5c876af2a7c3df189028d68c0cb4eaa9cd5afc2bf",
297 | "sha256:d933fabd8f6a319e8530d0de4fcc2e6a61917e0b0c271fded460032db42a0fe4",
298 | "sha256:ea8282b9bcfe2b5e7d491d0bf7f3e2da29700cec05b49e64d6246923329f2b02",
299 | "sha256:ecde0f8adef7dfdec993fd54b0f78183051b6580f606111a6d789cd14c61ea0c",
300 | "sha256:f21c442fdd2805e91799fbe044a7b999b8571bb0ab0f7850d0cb9641a687092b"
301 | ],
302 | "index": "pypi",
303 | "version": "==1.24.3"
304 | },
305 | "packaging": {
306 | "hashes": [
307 | "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61",
308 | "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"
309 | ],
310 | "markers": "python_version >= '3.7'",
311 | "version": "==23.1"
312 | },
313 | "pandas": {
314 | "hashes": [
315 | "sha256:02755de164da6827764ceb3bbc5f64b35cb12394b1024fdf88704d0fa06e0e2f",
316 | "sha256:0a1e0576611641acde15c2322228d138258f236d14b749ad9af498ab69089e2d",
317 | "sha256:1eb09a242184092f424b2edd06eb2b99d06dc07eeddff9929e8667d4ed44e181",
318 | "sha256:30a89d0fec4263ccbf96f68592fd668939481854d2ff9da709d32a047689393b",
319 | "sha256:50e451932b3011b61d2961b4185382c92cc8c6ee4658dcd4f320687bb2d000ee",
320 | "sha256:51a93d422fbb1bd04b67639ba4b5368dffc26923f3ea32a275d2cc450f1d1c86",
321 | "sha256:598e9020d85a8cdbaa1815eb325a91cfff2bb2b23c1442549b8a3668e36f0f77",
322 | "sha256:66d00300f188fa5de73f92d5725ced162488f6dc6ad4cecfe4144ca29debe3b8",
323 | "sha256:69167693cb8f9b3fc060956a5d0a0a8dbfed5f980d9fd2c306fb5b9c855c814c",
324 | "sha256:6d6d10c2142d11d40d6e6c0a190b1f89f525bcf85564707e31b0a39e3b398e08",
325 | "sha256:713f2f70abcdade1ddd68fc91577cb090b3544b07ceba78a12f799355a13ee44",
326 | "sha256:7376e13d28eb16752c398ca1d36ccfe52bf7e887067af9a0474de6331dd948d2",
327 | "sha256:77550c8909ebc23e56a89f91b40ad01b50c42cfbfab49b3393694a50549295ea",
328 | "sha256:7b21cb72958fc49ad757685db1919021d99650d7aaba676576c9e88d3889d456",
329 | "sha256:9ebb9f1c22ddb828e7fd017ea265a59d80461d5a79154b49a4207bd17514d122",
330 | "sha256:a18e5c72b989ff0f7197707ceddc99828320d0ca22ab50dd1b9e37db45b010c0",
331 | "sha256:a6b5f14cd24a2ed06e14255ff40fe2ea0cfaef79a8dd68069b7ace74bd6acbba",
332 | "sha256:b42b120458636a981077cfcfa8568c031b3e8709701315e2bfa866324a83efa8",
333 | "sha256:c4af689352c4fe3d75b2834933ee9d0ccdbf5d7a8a7264f0ce9524e877820c08",
334 | "sha256:c7319b6e68de14e6209460f72a8d1ef13c09fb3d3ef6c37c1e65b35d50b5c145",
335 | "sha256:cf3f0c361a4270185baa89ec7ab92ecaa355fe783791457077473f974f654df5",
336 | "sha256:dd46bde7309088481b1cf9c58e3f0e204b9ff9e3244f441accd220dd3365ce7c",
337 | "sha256:dd5476b6c3fe410ee95926873f377b856dbc4e81a9c605a0dc05aaccc6a7c6c6",
338 | "sha256:e69140bc2d29a8556f55445c15f5794490852af3de0f609a24003ef174528b79",
339 | "sha256:f908a77cbeef9bbd646bd4b81214cbef9ac3dda4181d5092a4aa9797d1bc7774"
340 | ],
341 | "index": "pypi",
342 | "version": "==2.0.2"
343 | },
344 | "patsy": {
345 | "hashes": [
346 | "sha256:7eb5349754ed6aa982af81f636479b1b8db9d5b1a6e957a6016ec0534b5c86b7",
347 | "sha256:bdc18001875e319bc91c812c1eb6a10be4bb13cb81eb763f466179dca3b67277"
348 | ],
349 | "index": "pypi",
350 | "version": "==0.5.3"
351 | },
352 | "pillow": {
353 | "hashes": [
354 | "sha256:07999f5834bdc404c442146942a2ecadd1cb6292f5229f4ed3b31e0a108746b1",
355 | "sha256:0852ddb76d85f127c135b6dd1f0bb88dbb9ee990d2cd9aa9e28526c93e794fba",
356 | "sha256:1781a624c229cb35a2ac31cc4a77e28cafc8900733a864870c49bfeedacd106a",
357 | "sha256:1e7723bd90ef94eda669a3c2c19d549874dd5badaeefabefd26053304abe5799",
358 | "sha256:229e2c79c00e85989a34b5981a2b67aa079fd08c903f0aaead522a1d68d79e51",
359 | "sha256:22baf0c3cf0c7f26e82d6e1adf118027afb325e703922c8dfc1d5d0156bb2eeb",
360 | "sha256:252a03f1bdddce077eff2354c3861bf437c892fb1832f75ce813ee94347aa9b5",
361 | "sha256:2dfaaf10b6172697b9bceb9a3bd7b951819d1ca339a5ef294d1f1ac6d7f63270",
362 | "sha256:322724c0032af6692456cd6ed554bb85f8149214d97398bb80613b04e33769f6",
363 | "sha256:35f6e77122a0c0762268216315bf239cf52b88865bba522999dc38f1c52b9b47",
364 | "sha256:375f6e5ee9620a271acb6820b3d1e94ffa8e741c0601db4c0c4d3cb0a9c224bf",
365 | "sha256:3ded42b9ad70e5f1754fb7c2e2d6465a9c842e41d178f262e08b8c85ed8a1d8e",
366 | "sha256:432b975c009cf649420615388561c0ce7cc31ce9b2e374db659ee4f7d57a1f8b",
367 | "sha256:482877592e927fd263028c105b36272398e3e1be3269efda09f6ba21fd83ec66",
368 | "sha256:489f8389261e5ed43ac8ff7b453162af39c3e8abd730af8363587ba64bb2e865",
369 | "sha256:54f7102ad31a3de5666827526e248c3530b3a33539dbda27c6843d19d72644ec",
370 | "sha256:560737e70cb9c6255d6dcba3de6578a9e2ec4b573659943a5e7e4af13f298f5c",
371 | "sha256:5671583eab84af046a397d6d0ba25343c00cd50bce03787948e0fff01d4fd9b1",
372 | "sha256:5ba1b81ee69573fe7124881762bb4cd2e4b6ed9dd28c9c60a632902fe8db8b38",
373 | "sha256:5d4ebf8e1db4441a55c509c4baa7a0587a0210f7cd25fcfe74dbbce7a4bd1906",
374 | "sha256:60037a8db8750e474af7ffc9faa9b5859e6c6d0a50e55c45576bf28be7419705",
375 | "sha256:608488bdcbdb4ba7837461442b90ea6f3079397ddc968c31265c1e056964f1ef",
376 | "sha256:6608ff3bf781eee0cd14d0901a2b9cc3d3834516532e3bd673a0a204dc8615fc",
377 | "sha256:662da1f3f89a302cc22faa9f14a262c2e3951f9dbc9617609a47521c69dd9f8f",
378 | "sha256:7002d0797a3e4193c7cdee3198d7c14f92c0836d6b4a3f3046a64bd1ce8df2bf",
379 | "sha256:763782b2e03e45e2c77d7779875f4432e25121ef002a41829d8868700d119392",
380 | "sha256:77165c4a5e7d5a284f10a6efaa39a0ae8ba839da344f20b111d62cc932fa4e5d",
381 | "sha256:7c9af5a3b406a50e313467e3565fc99929717f780164fe6fbb7704edba0cebbe",
382 | "sha256:7ec6f6ce99dab90b52da21cf0dc519e21095e332ff3b399a357c187b1a5eee32",
383 | "sha256:833b86a98e0ede388fa29363159c9b1a294b0905b5128baf01db683672f230f5",
384 | "sha256:84a6f19ce086c1bf894644b43cd129702f781ba5751ca8572f08aa40ef0ab7b7",
385 | "sha256:8507eda3cd0608a1f94f58c64817e83ec12fa93a9436938b191b80d9e4c0fc44",
386 | "sha256:85ec677246533e27770b0de5cf0f9d6e4ec0c212a1f89dfc941b64b21226009d",
387 | "sha256:8aca1152d93dcc27dc55395604dcfc55bed5f25ef4c98716a928bacba90d33a3",
388 | "sha256:8d935f924bbab8f0a9a28404422da8af4904e36d5c33fc6f677e4c4485515625",
389 | "sha256:8f36397bf3f7d7c6a3abdea815ecf6fd14e7fcd4418ab24bae01008d8d8ca15e",
390 | "sha256:91ec6fe47b5eb5a9968c79ad9ed78c342b1f97a091677ba0e012701add857829",
391 | "sha256:965e4a05ef364e7b973dd17fc765f42233415974d773e82144c9bbaaaea5d089",
392 | "sha256:96e88745a55b88a7c64fa49bceff363a1a27d9a64e04019c2281049444a571e3",
393 | "sha256:99eb6cafb6ba90e436684e08dad8be1637efb71c4f2180ee6b8f940739406e78",
394 | "sha256:9adf58f5d64e474bed00d69bcd86ec4bcaa4123bfa70a65ce72e424bfb88ed96",
395 | "sha256:9b1af95c3a967bf1da94f253e56b6286b50af23392a886720f563c547e48e964",
396 | "sha256:a0aa9417994d91301056f3d0038af1199eb7adc86e646a36b9e050b06f526597",
397 | "sha256:a0f9bb6c80e6efcde93ffc51256d5cfb2155ff8f78292f074f60f9e70b942d99",
398 | "sha256:a127ae76092974abfbfa38ca2d12cbeddcdeac0fb71f9627cc1135bedaf9d51a",
399 | "sha256:aaf305d6d40bd9632198c766fb64f0c1a83ca5b667f16c1e79e1661ab5060140",
400 | "sha256:aca1c196f407ec7cf04dcbb15d19a43c507a81f7ffc45b690899d6a76ac9fda7",
401 | "sha256:ace6ca218308447b9077c14ea4ef381ba0b67ee78d64046b3f19cf4e1139ad16",
402 | "sha256:b416f03d37d27290cb93597335a2f85ed446731200705b22bb927405320de903",
403 | "sha256:bf548479d336726d7a0eceb6e767e179fbde37833ae42794602631a070d630f1",
404 | "sha256:c1170d6b195555644f0616fd6ed929dfcf6333b8675fcca044ae5ab110ded296",
405 | "sha256:c380b27d041209b849ed246b111b7c166ba36d7933ec6e41175fd15ab9eb1572",
406 | "sha256:c446d2245ba29820d405315083d55299a796695d747efceb5717a8b450324115",
407 | "sha256:c830a02caeb789633863b466b9de10c015bded434deb3ec87c768e53752ad22a",
408 | "sha256:cb841572862f629b99725ebaec3287fc6d275be9b14443ea746c1dd325053cbd",
409 | "sha256:cfa4561277f677ecf651e2b22dc43e8f5368b74a25a8f7d1d4a3a243e573f2d4",
410 | "sha256:cfcc2c53c06f2ccb8976fb5c71d448bdd0a07d26d8e07e321c103416444c7ad1",
411 | "sha256:d3c6b54e304c60c4181da1c9dadf83e4a54fd266a99c70ba646a9baa626819eb",
412 | "sha256:d3d403753c9d5adc04d4694d35cf0391f0f3d57c8e0030aac09d7678fa8030aa",
413 | "sha256:d9c206c29b46cfd343ea7cdfe1232443072bbb270d6a46f59c259460db76779a",
414 | "sha256:e49eb4e95ff6fd7c0c402508894b1ef0e01b99a44320ba7d8ecbabefddcc5569",
415 | "sha256:f8286396b351785801a976b1e85ea88e937712ee2c3ac653710a4a57a8da5d9c",
416 | "sha256:f8fc330c3370a81bbf3f88557097d1ea26cd8b019d6433aa59f71195f5ddebbf",
417 | "sha256:fbd359831c1657d69bb81f0db962905ee05e5e9451913b18b831febfe0519082",
418 | "sha256:fe7e1c262d3392afcf5071df9afa574544f28eac825284596ac6db56e6d11062",
419 | "sha256:fed1e1cf6a42577953abbe8e6cf2fe2f566daebde7c34724ec8803c4c0cda579"
420 | ],
421 | "markers": "python_version >= '3.7'",
422 | "version": "==9.5.0"
423 | },
424 | "plotnine": {
425 | "hashes": [
426 | "sha256:be852c6e50e331ad250151dc4120f269ee9ae5e795f67030f7794718b502592a",
427 | "sha256:f0989d1f368925082d543efcfc1cfb7236de8341cc6138ae65b856d70f77106a"
428 | ],
429 | "index": "pypi",
430 | "version": "==0.12.1"
431 | },
432 | "pyparsing": {
433 | "hashes": [
434 | "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb",
435 | "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"
436 | ],
437 | "markers": "python_full_version >= '3.6.8'",
438 | "version": "==3.0.9"
439 | },
440 | "python-dateutil": {
441 | "hashes": [
442 | "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86",
443 | "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"
444 | ],
445 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
446 | "version": "==2.8.2"
447 | },
448 | "pytz": {
449 | "hashes": [
450 | "sha256:1d8ce29db189191fb55338ee6d0387d82ab59f3d00eac103412d64e0ebd0c588",
451 | "sha256:a151b3abb88eda1d4e34a9814df37de2a80e301e68ba0fd856fb9b46bfbbbffb"
452 | ],
453 | "version": "==2023.3"
454 | },
455 | "scipy": {
456 | "hashes": [
457 | "sha256:049a8bbf0ad95277ffba9b3b7d23e5369cc39e66406d60422c8cfef40ccc8415",
458 | "sha256:07c3457ce0b3ad5124f98a86533106b643dd811dd61b548e78cf4c8786652f6f",
459 | "sha256:0f1564ea217e82c1bbe75ddf7285ba0709ecd503f048cb1236ae9995f64217bd",
460 | "sha256:1553b5dcddd64ba9a0d95355e63fe6c3fc303a8fd77c7bc91e77d61363f7433f",
461 | "sha256:15a35c4242ec5f292c3dd364a7c71a61be87a3d4ddcc693372813c0b73c9af1d",
462 | "sha256:1b4735d6c28aad3cdcf52117e0e91d6b39acd4272f3f5cd9907c24ee931ad601",
463 | "sha256:2cf9dfb80a7b4589ba4c40ce7588986d6d5cebc5457cad2c2880f6bc2d42f3a5",
464 | "sha256:39becb03541f9e58243f4197584286e339029e8908c46f7221abeea4b749fa88",
465 | "sha256:43b8e0bcb877faf0abfb613d51026cd5cc78918e9530e375727bf0625c82788f",
466 | "sha256:4b3f429188c66603a1a5c549fb414e4d3bdc2a24792e061ffbd607d3d75fd84e",
467 | "sha256:4c0ff64b06b10e35215abce517252b375e580a6125fd5fdf6421b98efbefb2d2",
468 | "sha256:51af417a000d2dbe1ec6c372dfe688e041a7084da4fdd350aeb139bd3fb55353",
469 | "sha256:5678f88c68ea866ed9ebe3a989091088553ba12c6090244fdae3e467b1139c35",
470 | "sha256:79c8e5a6c6ffaf3a2262ef1be1e108a035cf4f05c14df56057b64acc5bebffb6",
471 | "sha256:7ff7f37b1bf4417baca958d254e8e2875d0cc23aaadbe65b3d5b3077b0eb23ea",
472 | "sha256:aaea0a6be54462ec027de54fca511540980d1e9eea68b2d5c1dbfe084797be35",
473 | "sha256:bce5869c8d68cf383ce240e44c1d9ae7c06078a9396df68ce88a1230f93a30c1",
474 | "sha256:cd9f1027ff30d90618914a64ca9b1a77a431159df0e2a195d8a9e8a04c78abf9",
475 | "sha256:d925fa1c81b772882aa55bcc10bf88324dadb66ff85d548c71515f6689c6dac5",
476 | "sha256:e7354fd7527a4b0377ce55f286805b34e8c54b91be865bac273f527e1b839019",
477 | "sha256:fae8a7b898c42dffe3f7361c40d5952b6bf32d10c4569098d276b4c547905ee1"
478 | ],
479 | "index": "pypi",
480 | "version": "==1.10.1"
481 | },
482 | "six": {
483 | "hashes": [
484 | "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926",
485 | "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"
486 | ],
487 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
488 | "version": "==1.16.0"
489 | },
490 | "statsmodels": {
491 | "hashes": [
492 | "sha256:0ef7fa4813c7a73b0d8a0c830250f021c102c71c95e9fe0d6877bcfb56d38b8c",
493 | "sha256:16bfe0c96a53b20fa19067e3b6bd2f1d39e30d4891ea0d7bc20734a0ae95942d",
494 | "sha256:1c7724ad573af26139a98393ae64bc318d1b19762b13442d96c7a3e793f495c3",
495 | "sha256:3757542c95247e4ab025291a740efa5da91dc11a05990c033d40fce31c450dc9",
496 | "sha256:3b0a135f3bfdeec987e36e3b3b4c53e0bb87a8d91464d2fcc4d169d176f46fdb",
497 | "sha256:582f9e41092e342aaa04920d17cc3f97240e3ee198672f194719b5a3d08657d6",
498 | "sha256:5a6a0a1a06ff79be8aa89c8494b33903442859add133f0dda1daf37c3c71682e",
499 | "sha256:6875c7d689e966d948f15eb816ab5616f4928706b180cf470fd5907ab6f647a4",
500 | "sha256:68b1c768dd94cc5ba8398121a632b673c625491aa7ed627b82cb4c880a25563f",
501 | "sha256:71054f9dbcead56def14e3c9db6f66f943110fdfb19713caf0eb0f08c1ec03fd",
502 | "sha256:76e290f4718177bffa8823a780f3b882d56dd64ad1c18cfb4bc8b5558f3f5757",
503 | "sha256:77b3cd3a5268ef966a0a08582c591bd29c09c88b4566c892a7c087935234f285",
504 | "sha256:7ebe885ccaa64b4bc5ad49ac781c246e7a594b491f08ab4cfd5aa456c363a6f6",
505 | "sha256:8d1e3e10dfbfcd58119ba5a4d3c7d519182b970a2aebaf0b6f539f55ae16058d",
506 | "sha256:9c64ebe9cf376cba0c31aed138e15ed179a1d128612dd241cdf299d159e5e882",
507 | "sha256:a6ad7b8aadccd4e4dd7f315a07bef1bca41d194eeaf4ec600d20dea02d242fce",
508 | "sha256:b587ee5d23369a0e881da6e37f78371dce4238cf7638a455db4b633a1a1c62d6",
509 | "sha256:ce28eb1c397dba437ec39b9ab18f2101806f388c7a0cf9cdfd8f09294ad1c799",
510 | "sha256:d7fda067837df94e0a614d93d3a38fb6868958d37f7f50afe2a534524f2660cb",
511 | "sha256:de489e3ed315bdba55c9d1554a2e89faa65d212e365ab81bc323fa52681fc60e",
512 | "sha256:fb471f757fc45102a87e5d86e87dc2c8c78b34ad4f203679a46520f1d863b9da"
513 | ],
514 | "markers": "python_version >= '3.8'",
515 | "version": "==0.14.0"
516 | },
517 | "tzdata": {
518 | "hashes": [
519 | "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a",
520 | "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"
521 | ],
522 | "markers": "python_version >= '2'",
523 | "version": "==2023.3"
524 | },
525 | "zipp": {
526 | "hashes": [
527 | "sha256:112929ad649da941c23de50f356a2b5570c954b65150642bccdd66bf194d224b",
528 | "sha256:48904fc76a60e542af151aded95726c1a5c34ed43ab4134b597665c86d7ad556"
529 | ],
530 | "markers": "python_version < '3.10'",
531 | "version": "==3.15.0"
532 | }
533 | },
534 | "develop": {}
535 | }
536 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Difference in Difference in Python
2 |
3 | [](https://pypi.org/project/csdid/)
4 | [](https://pepy.tech/project/csdid)
5 | [](https://github.com/d2cml-ai/csdid/commits/main)
6 | [](https://github.com/d2cml-ai/csdid/stargazers)
7 | [](https://github.com/d2cml-ai/csdid/issues)
8 | [](https://github.com/d2cml-ai/csdid/blob/main/LICENSE)
9 |
10 |
11 | The **csdid** package contains tools for computing average treatment
12 | effect parameters in a Difference-in-Differences setup allowing for
13 |
14 | - More than two time periods
15 |
16 | - Variation in treatment timing (i.e., units can become treated at
17 | different points in time)
18 |
19 | - Treatment effect heterogeneity (i.e, the effect of participating in
20 | the treatment can vary across units and exhibit potentially complex
21 | dynamics, selection into treatment, or time effects)
22 |
23 | - The parallel trends assumption holds only after conditioning on
24 | covariates
25 |
26 | The main parameters are **group-time average treatment effects**. These
27 | are the average treatment effect for a particular group (group is
28 | defined by treatment timing) in a particular time period. These
29 | parameters are a natural generalization of the average treatment effect
30 | on the treated (ATT) which is identified in the textbook case with two
31 | periods and two groups to the case with multiple periods.
32 |
33 | Group-time average treatment effects are also natural building blocks
34 | for more aggregated treatment effect parameters such as overall
35 | treatment effects or event-study-type estimands.
36 |
37 | ## Getting Started
38 |
39 | There has been some recent work on DiD with multiple time periods. The
40 | **csdid** package implements the framework put forward in
41 |
42 | - [Callaway, Brantly and Pedro H.C. Sant’Anna.
43 | “Difference-in-Differences with Multiple Time Periods.” Journal of
44 | Econometrics, Vol. 225, No. 2, pp. 200-230,
45 | 2021.](https://doi.org/10.1016/j.jeconom.2020.12.001) or
46 | \[arXiv\](https://arxiv.org/abs/1803.09015
47 |
48 | This project is based on the original [did R
49 | package](https://github.com/bcallaway11/did).
50 |
51 | ## Instalation
52 |
53 | You can install **csdid** from `pypi` with:
54 |
55 | pip install csdid
56 |
57 | or via github:
58 |
59 | pip install git+https://github.com/d2cml-ai/csdid/
60 |
61 | ### Dependencies
62 |
63 | Additionally, I have created an additional library called `drdid`, which
64 | can be installed via GitHub.
65 |
66 | pip install git+https://github.com/d2cml-ai/DRDID
67 |
68 | ## Basic Example
69 |
70 | The following is a simplified example of the effect of states increasing
71 | their minimum wages on county-level teen employment rates which comes
72 | from [Callaway and Sant’Anna
73 | (2021)](https://authors.elsevier.com/a/1cFzc15Dji4pnC).
74 |
75 | - [More detailed examples are also
76 | available](https://bcallaway11.github.io/did/articles/did-basics.html)
77 |
78 | A subset of the data is available in the package and can be loaded by
79 |
80 | ``` python
81 | from csdid.att_gt import ATTgt
82 | import pandas as pd
83 | data = pd.read_csv("https://raw.githubusercontent.com/d2cml-ai/csdid/function-aggte/data/mpdta.csv")
84 | ```
85 |
86 | The dataset contains 500 observations of county-level teen employment
87 | rates from 2003-2007. Some states are first treated in 2004, some in
88 | 2006, and some in 2007 (see the paper for more details). The important
89 | variables in the dataset are
90 |
91 | - **lemp** This is the log of county-level teen employment. It is the
92 | outcome variable
93 |
94 | - **first.treat** This is the period when a state first increases its
95 | minimum wage. It can be 2004, 2006, or 2007. It is the variable that
96 | defines *group* in this application
97 |
98 | - **year** This is the year and is the *time* variable
99 |
100 | - **countyreal** This is an id number for each county and provides the
101 | individual identifier in this panel data context
102 |
103 | To estimate group-time average treatment effects, use the
104 | **ATTgt().fit()** method
105 |
106 | ``` python
107 | out = ATTgt(yname = "lemp",
108 | gname = "first.treat",
109 | idname = "countyreal",
110 | tname = "year",
111 | xformla = f"lemp~1",
112 | data = data,
113 | ).fit(est_method = 'dr')
114 | ```
115 |
116 | Summary table
117 |
118 | ``` python
119 | out.summ_attgt().summary2
120 | ```
121 |
122 |
123 |
134 |
135 | | | Group | Time | ATT(g, t) | Post | Std. Error | \[95% Pointwise | Conf. Band\] | |
136 | |-----|-------|------|-----------|------|------------|-----------------|--------------|-----|
137 | | 0 | 2004 | 2004 | -0.0105 | 0 | 0.0241 | -0.0781 | 0.0571 | |
138 | | 1 | 2004 | 2005 | -0.0704 | 0 | 0.0324 | -0.1612 | 0.0204 | |
139 | | 2 | 2004 | 2006 | -0.1373 | 0 | 0.0393 | -0.2476 | -0.0269 | \* |
140 | | 3 | 2004 | 2007 | -0.1008 | 0 | 0.0360 | -0.2017 | 0.0001 | |
141 | | 4 | 2006 | 2004 | 0.0065 | 0 | 0.0238 | -0.0601 | 0.0732 | |
142 | | 5 | 2006 | 2005 | -0.0028 | 0 | 0.0188 | -0.0554 | 0.0499 | |
143 | | 6 | 2006 | 2006 | -0.0046 | 0 | 0.0172 | -0.0528 | 0.0437 | |
144 | | 7 | 2006 | 2007 | -0.0412 | 0 | 0.0201 | -0.0976 | 0.0152 | |
145 | | 8 | 2007 | 2004 | 0.0305 | 0 | 0.0147 | -0.0108 | 0.0719 | |
146 | | 9 | 2007 | 2005 | -0.0027 | 0 | 0.0160 | -0.0476 | 0.0421 | |
147 | | 10 | 2007 | 2006 | -0.0311 | 0 | 0.0173 | -0.0796 | 0.0174 | |
148 | | 11 | 2007 | 2007 | -0.0261 | 0 | 0.0171 | -0.0740 | 0.0219 | |
149 |
150 |
151 |
152 | In the graphs, a semicolon `;` should be added to prevent printing the
153 | class and the graph information.
154 |
155 | ``` python
156 | out.plot_attgt();
157 | ```
158 |
159 | /home/runner/work/csdid/csdid/csdid/plots/gplot.py:19: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '['2004' '2005' '2006' '2007']' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
160 | ssresults.loc[:, 'year'] = ssresults['year'].astype(int).astype(str)
161 | /home/runner/work/csdid/csdid/csdid/plots/gplot.py:19: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '['2004' '2005' '2006' '2007']' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
162 | ssresults.loc[:, 'year'] = ssresults['year'].astype(int).astype(str)
163 | /home/runner/work/csdid/csdid/csdid/plots/gplot.py:19: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '['2004' '2005' '2006' '2007']' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
164 | ssresults.loc[:, 'year'] = ssresults['year'].astype(int).astype(str)
165 |
166 | 
167 |
168 | ``` python
169 | out.aggte(typec='calendar');
170 | ```
171 |
172 |
173 |
174 | Overall summary of ATT's based on calendar time aggregation:
175 | ATT Std. Error [95.0% Conf. Int.]
176 | -0.0417 0.0169 -0.0748 -0.0086 *
177 |
178 |
179 | Time Effects (calendar):
180 | Time Estimate Std. Error [95.0% Simult. Conf. Band
181 | 0 2004 -0.0105 0.0244 -0.0584 0.0374
182 | 1 2005 -0.0704 0.0307 -0.1305 -0.0103 *
183 | 2 2006 -0.0488 0.0210 -0.0900 -0.0076 *
184 | 3 2007 -0.0371 0.0136 -0.0637 -0.0105 *
185 | ---
186 | Signif. codes: `*' confidence band does not cover 0
187 | Control Group: Never Treated ,
188 | Anticipation Periods: 0
189 | Estimation Method: Doubly Robust
190 |
191 | ``` python
192 | out.plot_aggte();
193 | ```
194 |
195 | /home/runner/work/csdid/csdid/csdid/plots/gplot.py:19: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '['2004' '2005' '2006' '2007']' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
196 | ssresults.loc[:, 'year'] = ssresults['year'].astype(int).astype(str)
197 |
198 | 
199 |
200 |
201 | **Event Studies**
202 |
203 | Although in the current example it is pretty easy to directly interpret
204 | the group-time average treatment effects, there are many cases where it
205 | is convenient to aggregate the group-time average treatment effects into
206 | a small number of parameters. A main type of aggregation is into an
207 | *event study* plot.
208 |
209 | To make an event study plot in the **csdid** package, one can use the
210 | **aggte** function with **dynamic** option
211 |
212 | ``` python
213 | out.aggte(typec='dynamic');
214 | ```
215 |
216 |
217 |
218 | Overall summary of ATT's based on event-study/dynamic aggregation:
219 | ATT Std. Error [95.0% Conf. Int.]
220 | -0.0772 0.0207 -0.1179 -0.0366 *
221 |
222 |
223 | Dynamic Effects:
224 | Event time Estimate Std. Error [95.0% Simult. Conf. Band
225 | 0 -3 0.0305 0.0146 0.0019 0.0591 *
226 | 1 -2 -0.0006 0.0129 -0.0259 0.0248
227 | 2 -1 -0.0245 0.0141 -0.0521 0.0032
228 | 3 0 -0.0199 0.0117 -0.0428 0.0030
229 | 4 1 -0.0510 0.0154 -0.0811 -0.0208 *
230 | 5 2 -0.1373 0.0366 -0.2091 -0.0655 *
231 | 6 3 -0.1008 0.0337 -0.1669 -0.0347 *
232 | ---
233 | Signif. codes: `*' confidence band does not cover 0
234 | Control Group: Never Treated ,
235 | Anticipation Periods: 0
236 | Estimation Method: Doubly Robust
237 |
238 | The column `event time` is for each group relative to when they first
239 | participate in the treatment. To give some examples, `event time=0`
240 | corresponds to the *on impact* effect, and `event time=-1` is the
241 | *effect* in the period before a unit becomes treated (checking that this
242 | is equal to 0 is potentially useful as a pre-test).
243 |
244 | To plot the event study, use **plot_aggte** method
245 | ``` python
246 | out.plot_aggte();
247 | ```
248 |
249 | /home/runner/work/csdid/csdid/csdid/plots/gplot.py:19: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '['2004' '2005' '2006' '2007']' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
250 | ssresults.loc[:, 'year'] = ssresults['year'].astype(int).astype(str)
251 |
252 | 
253 |
254 | The figure here is very similar to the group-time average treatment
255 | effects. Red dots are pre-treatment periods, blue dots are
256 | post-treatment periods. The difference is that the x-axis is in event
257 | time.
258 |
259 | **Overall Effect of Participating in the Treatment**
260 |
261 | The event study above reported an overall effect of participating in the
262 | treatment. This was computed by averaging the average effects computed
263 | at each length of exposure.
264 |
265 | In many cases, a more general purpose overall treatment effect parameter
266 | is give by computing the average treatment effect for each group, and
267 | then averaging across groups. This sort of procedure provides an average
268 | treatment effect parameter with a very similar interpretation to the
269 | Average Treatment Effect on the Treated (ATT) in the two period and two
270 | group case.
271 |
272 | To compute this overall average treatment effect parameter, use
273 |
274 |
275 | ``` python
276 | out.aggte(typec='group');
277 | ```
278 |
279 |
280 |
281 | Overall summary of ATT's based on group/cohort aggregation:
282 | ATT Std. Error [95.0% Conf. Int.]
283 | -0.031 0.0124 -0.0553 -0.0067 *
284 |
285 |
286 | Group Effects:
287 | Group Estimate Std. Error [95.0% Simult. Conf. Band
288 | 0 2004 -0.0797 0.0301 -0.1387 -0.0208 *
289 | 1 2006 -0.0229 0.0172 -0.0567 0.0109
290 | 2 2007 -0.0261 0.0174 -0.0601 0.0080
291 | ---
292 | Signif. codes: `*' confidence band does not cover 0
293 | Control Group: Never Treated ,
294 | Anticipation Periods: 0
295 | Estimation Method: Doubly Robust
296 |
297 | Of particular interest is the `Overall ATT` in the results. Here, we
298 | estimate that increasing the minimum wage decreased teen employment by
299 | 3.1% and the effect is marginally statistically significant.
--------------------------------------------------------------------------------
/README.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: Difference in Difference in Python
3 | format: gfm
4 | ---
5 |
6 | The **csdid** package contains tools for computing average treatment
7 | effect parameters in a Difference-in-Differences setup allowing for
8 |
9 | - More than two time periods
10 |
11 | - Variation in treatment timing (i.e., units can become treated at
12 | different points in time)
13 |
14 | - Treatment effect heterogeneity (i.e, the effect of participating in
15 | the treatment can vary across units and exhibit potentially complex
16 | dynamics, selection into treatment, or time effects)
17 |
18 | - The parallel trends assumption holds only after conditioning on
19 | covariates
20 |
21 | The main parameters are **group-time average treatment effects**. These
22 | are the average treatment effect for a particular group (group is
23 | defined by treatment timing) in a particular time period. These
24 | parameters are a natural generalization of the average treatment effect
25 | on the treated (ATT) which is identified in the textbook case with two
26 | periods and two groups to the case with multiple periods.
27 |
28 | Group-time average treatment effects are also natural building blocks
29 | for more aggregated treatment effect parameters such as overall
30 | treatment effects or event-study-type estimands.
31 |
32 | ## Getting Started
33 |
34 | There has been some recent work on DiD with multiple time periods. The
35 | **did** package implements the framework put forward in
36 |
37 | - [Callaway, Brantly and Pedro H.C. Sant’Anna.
38 | "Difference-in-Differences with Multiple Time Periods." Journal of
39 | Econometrics, Vol. 225, No. 2,
40 | pp. 200-230, 2021.](https://doi.org/10.1016/j.jeconom.2020.12.001)
41 | or [arXiv](https://arxiv.org/abs/1803.09015
42 |
43 | This project is based on the original [did R package](https://github.com/bcallaway11/did).
44 |
45 | ## Instalation
46 |
47 | You can install **csdid** from `pypi` with:
48 |
49 | ```
50 | pip install csdid
51 | ```
52 |
53 | or via github:
54 |
55 | ```
56 | pip install git+https://github.com/d2cml-ai/csdid/
57 | ```
58 |
59 | ### Dependencies
60 |
61 | Additionally, I have created an additional library called `drdid`, which can be installed via GitHub.
62 |
63 | ```
64 | pip install git+https://github.com/d2cml-ai/DRDID
65 | ```
66 |
67 | ## Basic Example
68 |
69 |
70 | The following is a simplified example of the effect of states increasing
71 | their minimum wages on county-level teen employment rates which comes
72 | from [Callaway and Sant’Anna
73 | (2021)](https://authors.elsevier.com/a/1cFzc15Dji4pnC).
74 |
75 | - [More detailed examples are also
76 | available](https://bcallaway11.github.io/did/articles/did-basics.html)
77 |
78 | A subset of the data is available in the package and can be loaded by
79 |
80 | ```{python}
81 | from csdid.att_gt import ATTgt
82 | import pandas as pd
83 | data = pd.read_csv("https://raw.githubusercontent.com/d2cml-ai/csdid/function-aggte/data/mpdta.csv")
84 | ```
85 |
86 | The dataset contains 500 observations of county-level teen employment
87 | rates from 2003-2007. Some states are first treated in 2004, some in
88 | 2006, and some in 2007 (see the paper for more details). The important
89 | variables in the dataset are
90 |
91 | - **lemp** This is the log of county-level teen employment. It is the
92 | outcome variable
93 |
94 | - **first.treat** This is the period when a state first increases its
95 | minimum wage. It can be 2004, 2006, or 2007. It is the variable that
96 | defines *group* in this application
97 |
98 | - **year** This is the year and is the *time* variable
99 |
100 | - **countyreal** This is an id number for each county and provides the
101 | individual identifier in this panel data context
102 |
103 | To estimate group-time average treatment effects, use the **ATTgt().fit()**
104 | method
105 |
106 | ```{python}
107 | out = ATTgt(yname = "lemp",
108 | gname = "first.treat",
109 | idname = "countyreal",
110 | tname = "year",
111 | xformla = f"lemp~1",
112 | data = data,
113 | ).fit(est_method = 'dr')
114 | ```
115 |
116 |
117 | Summary table
118 |
119 | ```{python, eval = False}
120 | out.summ_attgt().summary2
121 | ```
122 |
123 | In the graphs, a semicolon `;` should be added to prevent printing the class and the graph information.
124 |
125 | ```{python}
126 | out.plot_attgt();
127 | ```
128 |
129 |
130 | ```{python}
131 | out.aggte(typec='calendar');
132 | ```
133 |
134 |
135 | ```{python}
136 | out.plot_aggte();
137 | ```
--------------------------------------------------------------------------------
/README_files/figure-commonmark/cell-5-output-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2cml-ai/csdid/7ad721d4c384bca147ae16de84b7c5df8fe26959/README_files/figure-commonmark/cell-5-output-1.png
--------------------------------------------------------------------------------
/README_files/figure-commonmark/cell-5-output-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2cml-ai/csdid/7ad721d4c384bca147ae16de84b7c5df8fe26959/README_files/figure-commonmark/cell-5-output-2.png
--------------------------------------------------------------------------------
/README_files/figure-commonmark/cell-7-output-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2cml-ai/csdid/7ad721d4c384bca147ae16de84b7c5df8fe26959/README_files/figure-commonmark/cell-7-output-1.png
--------------------------------------------------------------------------------
/README_files/figure-commonmark/cell-7-output-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2cml-ai/csdid/7ad721d4c384bca147ae16de84b7c5df8fe26959/README_files/figure-commonmark/cell-7-output-2.png
--------------------------------------------------------------------------------
/README_files/figure-commonmark/cell-8-output-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2cml-ai/csdid/7ad721d4c384bca147ae16de84b7c5df8fe26959/README_files/figure-commonmark/cell-8-output-2.png
--------------------------------------------------------------------------------
/configs/data.yml:
--------------------------------------------------------------------------------
1 | simdata: data/sim_data.csv
2 | mpdata: data/mpdta.csv
3 |
--------------------------------------------------------------------------------
/csdid/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2cml-ai/csdid/7ad721d4c384bca147ae16de84b7c5df8fe26959/csdid/__init__.py
--------------------------------------------------------------------------------
/csdid/_version.py:
--------------------------------------------------------------------------------
1 | __version = '0.2.7'
2 |
--------------------------------------------------------------------------------
/csdid/aggte_fnc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2cml-ai/csdid/7ad721d4c384bca147ae16de84b7c5df8fe26959/csdid/aggte_fnc/__init__.py
--------------------------------------------------------------------------------
/csdid/aggte_fnc/aggte.py:
--------------------------------------------------------------------------------
1 | #' @title Aggregate Group-Time Average Treatment Effects
2 | #'
3 | #' @description A function to take group-time average treatment effects
4 | #' and aggregate them into a smaller number of parameters. There are
5 | #' several possible aggregations including "simple", "dynamic", "group",
6 | #' and "calendar."
7 | #'
8 | #' @param MP an MP object (i.e., the results of the [att_gt()] method)
9 | #' @param type Which type of aggregated treatment effect parameter to compute.
10 | #' One option is "simple" (this just computes a weighted average of all
11 | #' group-time average treatment effects with weights proportional to group
12 | #' size). Other options are "dynamic" (this computes average effects across
13 | #' different lengths of exposure to the treatment and is similar to an
14 | #' "event study"; here the overall effect averages the effect of the
15 | #' treatment across all positive lengths of exposure); "group" (this
16 | #' is the default option and
17 | #' computes average treatment effects across different groups; here
18 | #' the overall effect averages the effect across different groups); and
19 | #' "calendar" (this computes average treatment effects across different
20 | #' time periods; here the overall effect averages the effect across each
21 | #' time period).
22 | #' @param balance_e If set (and if one computes dynamic effects), it balances
23 | #' the sample with respect to event time. For example, if `balance.e=2`,
24 | #' `aggte` will drop groups that are not exposed to treatment for
25 | #' at least three periods. (the initial period when `e=0` as well as the
26 | #' next two periods when `e=1` and the `e=2`). This ensures that
27 | #' the composition of groups does not change when event time changes.
28 | #' @param min_e For event studies, this is the smallest event time to compute
29 | #' dynamic effects for. By default, `min_e = -Inf` so that effects at
30 | #' all lengths of exposure are computed.
31 | #' @param max_e For event studies, this is the largest event time to compute
32 | #' dynamic effects for. By default, `max_e = Inf` so that effects at
33 | #' all lengths of exposure are computed.
34 | #' @param na.rm Logical value if we are to remove missing Values from analyses. Defaults is FALSE.
35 | #' @param bstrap Boolean for whether or not to compute standard errors using
36 | #' the multiplier bootstrap. If standard errors are clustered, then one
37 | #' must set `bstrap=TRUE`. Default is value set in the MP object. If bstrap is `FALSE`, then analytical
38 | #' standard errors are reported.
39 | #' @param biters The number of bootstrap iterations to use. The default is the value set in the MP object,
40 | #' and this is only applicable if `bstrap=TRUE`.
41 | #'
42 | #' @param cband Boolean for whether or not to compute a uniform confidence
43 | #' band that covers all of the group-time average treatment effects
44 | #' with fixed probability `1-alp`. In order to compute uniform confidence
45 | #' bands, `bstrap` must also be set to `TRUE`. The default is
46 | #' the value set in the MP object
47 | #' @param alp the significance level, default is value set in the MP object.
48 | #' @param clustervars A vector of variables to cluster on. At most, there
49 | #' can be two variables (otherwise will throw an error) and one of these
50 | #' must be the same as idname which allows for clustering at the individual
51 | #' level. Default is the variables set in the MP object
52 |
53 | #'
54 | #' @return An [`AGGTEobj`] object that holds the results from the
55 | #' aggregation
56 | #'
57 | #' @section Examples:
58 | #'
59 | #
60 | #' Initial ATT(g,t) estimates from [att_gt()]
61 | #'
62 | #'
63 | #' You can aggregate the ATT(g,t) in many ways.
64 | #'
65 | #' **Overall ATT:**
66 | #' aggte(out, type = "simple")
67 | #' ```
68 | #'
69 | #' **Dynamic ATT (Event-Study):**
70 | #' aggte(out, type = "dynamic")
71 | #' ```
72 | #'
73 | #' **ATT for each group:**
74 | #' aggte(out, type = "group")
75 | #' ```
76 | #'
77 | #' **ATT for each calendar year:**
78 | #' aggte(out, type = "calendar")
79 | #' ```
80 | #'
81 | #'
82 | #'
83 | from csdid.aggte_fnc.compute_aggte import compute_aggte
84 | import inspect
85 |
86 | def aggte(MP,
87 | typec="group",
88 | balance_e=None,
89 | min_e=float('-inf'),
90 | max_e=float('inf'),
91 | na_rm=False,
92 | bstrap=None,
93 | biters=None,
94 | cband=None,
95 | alp=None,
96 | clustervars=None):
97 | call = inspect.currentframe().f_back.f_locals.copy()
98 |
99 | return compute_aggte(MP=MP,
100 | typec=typec,
101 | balance_e=balance_e,
102 | min_e=min_e,
103 | max_e=max_e,
104 | na_rm=na_rm,
105 | bstrap=bstrap,
106 | biters=biters,
107 | cband=cband,
108 | alp=alp,
109 | clustervars=clustervars,
110 | call=call)
111 |
112 |
113 |
114 |
--------------------------------------------------------------------------------
/csdid/aggte_fnc/compute_aggte.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | import pandas as pd
3 | import numpy as np
4 | from scipy.stats import norm
5 |
6 |
7 | from csdid.aggte_fnc.utils import get_agg_inf_func, get_se, wif, AGGTEobj
8 | from csdid.utils.mboot import mboot
9 | import warnings
10 |
11 | def compute_aggte(MP,
12 | typec = "group",
13 | balance_e = None,
14 | min_e = float('-inf'),
15 | max_e = float('inf'),
16 | na_rm = False,
17 | bstrap = None,
18 | biters = None,
19 | cband = None,
20 | alp = None,
21 | clustervars = None,
22 | call = None):
23 |
24 | # =============================================================================
25 | # unpack MP object
26 | # =============================================================================
27 | group = np.array( MP['group'] )
28 | t = np.array( MP['t'] )
29 | att = np.array( MP['att'] )
30 | dp = MP['DIDparams']
31 | tlist = np.array( dp['tlist'] )
32 | glist = np.array( dp['glist'] )
33 | data = dp['data']
34 | inffunc = MP['inffunc']['inffunc']
35 | n = MP['n']
36 | gname = dp['gname']
37 | tname = dp['tname']
38 | idname = dp['idname']
39 | # typec = MP['type']
40 | panel = dp['panel']
41 |
42 |
43 |
44 | if clustervars is None:
45 | clustervars = dp['clustervars']
46 | if bstrap is None:
47 | bstrap = dp['bstrap']
48 | if biters is None:
49 | biters = dp['biters']
50 | if alp is None:
51 | alp = dp['alp']
52 | if cband is None:
53 | cband = dp['cband']
54 |
55 |
56 |
57 | # Overwrite MP objects (to compute bootstrap)
58 | MP['DIDparams']['clustervars'] = clustervars
59 | MP['DIDparams']['bstrap'] = bstrap
60 | MP['DIDparams']['biters'] = biters
61 | MP['DIDparams']['alp'] = alp
62 | MP['DIDparams']['cband'] = cband
63 |
64 | # =============================================================================
65 | # Treat data
66 | # =============================================================================
67 |
68 | if typec not in ["simple", "dynamic", "group", "calendar"]:
69 | raise ValueError("`typec` must be one of ['simple', 'dynamic', 'group', 'calendar']")
70 | # Removing missing values
71 | if na_rm:
72 | notna = ~np.isnan(att)
73 | group = group[notna]
74 | t = t[notna]
75 | att = att[notna]
76 | inffunc = inffunc[:, notna]
77 | glist = np.sort(np.unique(group))
78 |
79 | if typec == "group":
80 | gnotna = []
81 | for g in glist:
82 | indices = np.where((group == g) & (g <= t))
83 | is_not_na = np.any(~np.isnan(att[indices]))
84 | gnotna.append(is_not_na)
85 |
86 | gnotna = np.array(gnotna)
87 | glist = glist[gnotna]
88 | not_all_na = np.isin(group, glist)
89 | group = group[not_all_na]
90 | t = t[not_all_na]
91 | att = att[not_all_na]
92 | inffunc = inffunc[:, not_all_na]
93 | glist = np.sort(np.unique(group))
94 |
95 |
96 | if (not na_rm) and np.any(np.isnan(att)):
97 | raise ValueError("Missing values at att_gt found. If you want to remove these, set `na_rm = True`.")
98 |
99 | if panel:
100 | dta = data[data[tname] == tlist[0]]
101 | else:
102 | dta = data.groupby(idname).mean().reset_index()
103 | dta = dta.iloc[:, 1:]
104 |
105 | # =============================================================================
106 | # Treat data 2
107 | # =============================================================================
108 |
109 |
110 | originalt = t
111 | originalgroup = group
112 | originalglist = glist
113 | originaltlist = tlist
114 | # In case g's are not part of tlist
115 | originalgtlist = np.sort(np.unique(np.concatenate((originaltlist, originalglist))))
116 | uniquet = list(range(1, len(originalgtlist) + 1))
117 |
118 | # Function to switch from "new" t values to original t values
119 | def t2orig(t):
120 | return originalgtlist[uniquet.index(t) if t in uniquet else -1]
121 |
122 | # Function to switch between "original" t values and new t values
123 | def orig2t(orig):
124 | new_t = [uniquet[i] for i in range(len(originalgtlist)) if originalgtlist[i] == orig]
125 | out = new_t[0] if new_t else None
126 | return out
127 |
128 | t = [orig2t(orig) for orig in originalt]
129 | group = [orig2t(orig) for orig in originalgroup]
130 | glist = [orig2t(orig) for orig in originalglist]
131 | tlist = np.asarray(list(set(t)))
132 | maxT = max(t)
133 |
134 | # Set the weights
135 | # return data.columns
136 | weights_ind = dta['w1'].to_numpy()
137 |
138 | # We can work in overall probabilities because conditioning will cancel out
139 | # since it shows up in numerator and denominator
140 | pg = np.array([np.mean(weights_ind * (dta[gname].to_numpy() == g)) for g in originalglist])
141 |
142 | # Length of this is equal to the number of groups
143 | pgg = pg
144 |
145 | # Same but length is equal to the number of ATT(g,t)
146 | pg = [pg[glist.index(g)] for g in group]
147 |
148 | # Which group time average treatment effects are post-treatment
149 | keepers = [i for i in range(len(group)) if group[i] <= t[i] <= (group[i] + max_e)] ### added second condition to allow for limit on longest period included in att
150 |
151 | # n x 1 vector of group variable
152 | G = [orig2t(g) for g in dta[gname].to_numpy()]
153 |
154 | # =============================================================================
155 | # simple
156 | # =============================================================================
157 |
158 |
159 | if typec == "simple":
160 | # Simple ATT
161 | # Averages all post-treatment ATT(g,t) with weights given by group size
162 | pg = np.array(pg)
163 | simple_att = np.sum(att[keepers] * pg[keepers]) / np.sum(pg[keepers])
164 | if np.isnan(simple_att):
165 | simple_att = None
166 |
167 | # Get the part of the influence function coming from estimated weights
168 | simple_wif = wif(keepers, pg, weights_ind, G, group)
169 |
170 | # Get the overall influence function
171 | simple_if = get_agg_inf_func(att = att ,
172 | inffunc = inffunc ,
173 | whichones = keepers ,
174 | weights_agg = np.array(pg)[keepers]/np.sum(np.array(pg)[keepers]) ,
175 | wif = simple_wif )[:, None]
176 |
177 | # Get standard errors from the overall influence function
178 | simple_se = get_se(simple_if, dp)
179 |
180 | if simple_se is not None:
181 | if simple_se <= np.sqrt(np.finfo(float).eps) * 10:
182 | simple_se = None
183 |
184 | AGGTEobj_print = AGGTEobj(overall_att=simple_att,
185 | overall_se=simple_se,
186 | typec=typec,
187 | inf_function={'simple_att': simple_if},
188 | call=call, DIDparams=dp)
189 |
190 | return AGGTEobj_print
191 |
192 |
193 | # =============================================================================
194 | # GRoup
195 | # =============================================================================
196 |
197 | if typec == "group":
198 | group = np.array(group)
199 | t = np.array(t)
200 | pg = np.array(pg)
201 | selective_att_g = [np.mean(att[( group== g) & (t >= g) & (t <= (group + max_e))]) for g in glist]
202 | selective_att_g = np.asarray(selective_att_g)
203 | selective_att_g[np.isnan(selective_att_g)] = None
204 |
205 | selective_se_inner = [None] * len(glist)
206 | for i, g in enumerate(glist):
207 | whichg = np.where(np.logical_and.reduce((group == g, g <= t, t <= (group + max_e))))[0]
208 | weightsg = pg[whichg] / np.sum(pg[whichg])
209 | inf_func_g = get_agg_inf_func(att = att ,
210 | inffunc = inffunc ,
211 | whichones = whichg ,
212 | weights_agg = weightsg ,
213 | wif = None)[:, None]
214 | se_g = get_se(inf_func_g, dp)
215 | selective_se_inner[i] = {'inf_func': inf_func_g, 'se': se_g}
216 |
217 | # recover standard errors separately by group
218 | selective_se_g = np.asarray([item['se'] for item in selective_se_inner]).T
219 |
220 | selective_se_g[selective_se_g <= np.sqrt(np.finfo(float).eps) * 10] = None
221 |
222 | selective_inf_func_g = np.column_stack([elem["inf_func"] for elem in selective_se_inner])
223 |
224 | # use multiplier bootstrap (across groups) to get critical value
225 | # for constructing uniform confidence bands
226 | selective_crit_val = norm.ppf(1 - alp/2)
227 |
228 | if dp['cband']:
229 | if not dp['bstrap']:
230 | print("Used bootstrap procedure to compute simultaneous confidence band")
231 |
232 | selective_crit_val = mboot(selective_inf_func_g, dp)['crit_val']
233 |
234 | if np.isnan(selective_crit_val) or np.isinf(selective_crit_val):
235 | print("Simultaneous critical value is NA. This probably happened because we cannot compute t-statistic (std errors are NA). We then report pointwise conf. intervals.")
236 | selective_crit_val = norm.ppf(1 - alp/2)
237 | dp['cband'] = False
238 |
239 | if selective_crit_val < norm.ppf(1 - alp/2):
240 | print("Simultaneous conf. band is somehow smaller than pointwise one using normal approximation. Since this is unusual, we are reporting pointwise confidence intervals")
241 | selective_crit_val = norm.ppf(1 - alp/2)
242 | dp['cband'] = False
243 |
244 | if selective_crit_val >= 7:
245 | print("Simultaneous critical value is arguably 'too large' to be reliable. This usually happens when the number of observations per group is small and/or there is not much variation in outcomes.")
246 |
247 | # get overall att under selective treatment timing
248 | # (here use pgg instead of pg because we can just look at each group)
249 | selective_att = np.sum(selective_att_g * pgg) / np.sum(pgg)
250 |
251 | # account for having to estimate pgg in the influence function
252 | selective_wif = wif(keepers = np.arange(1, len(glist)+1)-1,
253 | pg = pgg,
254 | weights_ind = weights_ind,
255 | G = G,
256 | group = group)
257 |
258 | # get overall influence function
259 | selective_inf_func = get_agg_inf_func(att = selective_att_g,
260 | inffunc = selective_inf_func_g,
261 | whichones = np.arange(1, len(glist)+1)-1,
262 | weights_agg = pgg/np.sum(pgg),
263 | wif = selective_wif)[:, None]
264 |
265 | # get overall standard error
266 | selective_se = get_se(selective_inf_func, dp)
267 | if not np.isnan(selective_se):
268 | if selective_se <= np.sqrt(np.finfo(float).eps) * 10:
269 | selective_se = None
270 |
271 | AGGTEobj_print = AGGTEobj(overall_att = selective_att,
272 | overall_se = selective_se,
273 | typec = typec,
274 | egt = originalglist,
275 | att_egt = selective_att_g,
276 | se_egt = selective_se_g,
277 | crit_val_egt = selective_crit_val,
278 | inf_function = {'selective_inf_func_g': selective_inf_func_g,
279 | 'selective_inf_func': selective_inf_func},
280 | call = call,
281 | DIDparams = dp)
282 |
283 | return AGGTEobj_print
284 |
285 |
286 | # =============================================================================
287 | # Dynamic
288 | # =============================================================================
289 |
290 | if typec == "dynamic":
291 | # event times
292 | # this looks at all available event times
293 | # note: event times can be negative here.
294 | # note: event time = 0 corresponds to "on impact"
295 | eseq = np.unique(np.array(originalt) - np.array(originalgroup) ) # Subtract corresponding elements and convert to NumPy array
296 | eseq = np.sort(eseq) # Sort the unique values in ascending order
297 |
298 | # if the user specifies balance_e, then we are going to
299 | # drop some event times and some groups; if not, we just
300 | # keep everything (that is what this variable is for)
301 | originalt = np.array(originalt)
302 | originalgroup = np.array(originalgroup)
303 | pg = np.array(pg)
304 | include_balanced_gt = np.repeat(True, len(originalgroup))
305 |
306 | if balance_e is not None:
307 | include_balanced_gt = (t2orig(maxT) - originalgroup >= balance_e)
308 | eseq = np.unique(originalt[include_balanced_gt] - originalgroup[include_balanced_gt])
309 | eseq = np.sort(eseq)
310 | eseq = eseq[(eseq <= balance_e) & (eseq >= balance_e - t2orig(maxT) + t2orig(1))]
311 | eseq = eseq[(eseq >= min_e) & (eseq <= max_e)]
312 |
313 | dynamic_att_e = []
314 | for e in eseq:
315 | whiche = np.where((originalt - originalgroup == e) & include_balanced_gt)
316 | atte = att[whiche]
317 | pge = pg[whiche] / np.sum(pg[whiche])
318 | dynamic_att_e.append(np.sum(atte * pge))
319 |
320 | dynamic_se_inner = []
321 | for e in eseq:
322 | whiche = np.where((originalt - originalgroup == e) & (include_balanced_gt) )[0]
323 | pge = pg[whiche] / sum(pg[whiche])
324 | wif_e = wif(whiche,
325 | pg,
326 | weights_ind,
327 | G,
328 | group)
329 | inf_func_e = get_agg_inf_func(att = att,
330 | inffunc = inffunc,
331 | whichones = whiche,
332 | weights_agg = pge,
333 | wif = wif_e)[:, None]
334 | se_e = get_se(inf_func_e, dp)
335 | dynamic_se_inner.append({'inf_func': inf_func_e, 'se': se_e})
336 |
337 | dynamic_se_e = np.array([item['se'] for item in dynamic_se_inner]).T
338 |
339 | dynamic_se_e[dynamic_se_e <= np.sqrt(np.finfo(float).eps) * 10] = np.nan
340 |
341 | dynamic_inf_func_e = np.column_stack([item['inf_func'] for item in dynamic_se_inner])
342 |
343 | dynamic_crit_val = norm.ppf(1 - alp/2)
344 | if dp['cband']:
345 | if not dp['bstrap']:
346 | print('Used bootstrap procedure to compute simultaneous confidence band')
347 | dynamic_crit_val = mboot(dynamic_inf_func_e, dp)['crit_val']
348 |
349 | if np.isnan(dynamic_crit_val) or np.isinf(dynamic_crit_val):
350 | print('Simultaneous critical value is NA. This probably happened because we cannot compute t-statistic (std errors are NA). We then report pointwise conf. intervals.')
351 | dynamic_crit_val = norm.ppf(1 - alp/2)
352 | dp['cband'] = False
353 |
354 | if dynamic_crit_val < norm.ppf(1 - alp/2):
355 | print('Simultaneous conf. band is somehow smaller than pointwise one using normal approximation. Since this is unusual, we are reporting pointwise confidence intervals')
356 | dynamic_crit_val = norm.ppf(1 - alp/2)
357 | dp['cband'] = False
358 |
359 | if dynamic_crit_val >= 7:
360 | print("Simultaneous critical value is arguably 'too large' to be reliable. This usually happens when the number of observations per group is small and/or there is not much variation in outcomes.")
361 |
362 | epos = eseq >= 0
363 | dynamic_att = np.mean(np.array(dynamic_att_e)[epos])
364 | dynamic_inf_func = get_agg_inf_func(att = np.array(dynamic_att_e)[epos],
365 | inffunc = np.array(dynamic_inf_func_e[:, epos]),
366 | whichones = np.arange(1, np.sum(epos)+1)-1,
367 | weights_agg = np.repeat(1 / np.sum(epos), np.sum(epos)),
368 | wif=None)[:, None]
369 |
370 | dynamic_se = get_se(dynamic_inf_func, dp)
371 | if not np.isnan(dynamic_se):
372 | if dynamic_se <= np.sqrt(np.finfo(float).eps) * 10:
373 | dynamic_se = np.nan
374 |
375 | AGGTEobj_print = AGGTEobj(overall_att=dynamic_att,
376 | overall_se=dynamic_se,
377 | typec=typec,
378 | egt=eseq,
379 | att_egt=dynamic_att_e,
380 | se_egt=dynamic_se_e,
381 | crit_val_egt=dynamic_crit_val,
382 | inf_function={'dynamic_inf_func_e': dynamic_inf_func_e,
383 | 'dynamic_inf_func': dynamic_inf_func},
384 | call=call,
385 | min_e=min_e,
386 | max_e=max_e,
387 | balance_e=balance_e,
388 | DIDparams=dp)
389 |
390 | return AGGTEobj_print
391 |
392 |
393 |
394 | # =============================================================================
395 | # Calendar
396 | # =============================================================================
397 |
398 | # np.array(group)
399 | if typec == "calendar":
400 | minG = min(group)
401 | calendar_tlist = tlist[tlist >= minG]
402 | pg = np.array(pg)
403 | calendar_att_t = []
404 | group = np.array(group)
405 | t = np.array(t)
406 | for t1 in calendar_tlist:
407 | whicht = np.where((t == t1) & (group <= t))[0]
408 | attt = att[whicht]
409 | pgt = pg[whicht] / np.sum(pg[whicht])
410 | calendar_att_t.append(np.sum(pgt * attt))
411 |
412 | # get standard errors and influence functions
413 | # for each time specific att
414 | calendar_se_inner = []
415 | for t1 in calendar_tlist:
416 | which_t = np.where((t == t1) & (group <= t))[0]
417 | pgt = pg[which_t] / np.sum(pg[which_t])
418 | wif_t = wif(keepers=which_t,
419 | pg=pg,
420 | weights_ind=weights_ind,
421 | G=G,
422 | group=group)
423 | inf_func_t = get_agg_inf_func(att=att,
424 | inffunc=inffunc,
425 | whichones=which_t,
426 | weights_agg=pgt,
427 | wif=wif_t)[:, None]
428 | se_t = get_se(inf_func_t, dp)
429 | calendar_se_inner.append({"inf_func": inf_func_t, "se": se_t})
430 |
431 |
432 |
433 | # recover standard errors separately by time
434 | calendar_se_t = np.array([se["se"] for se in calendar_se_inner]).T
435 | calendar_se_t[calendar_se_t <= np.sqrt(np.finfo(float).eps) * 10] = np.nan
436 |
437 | # recover influence function separately by time
438 | calendar_inf_func_t = np.column_stack([se["inf_func"] for se in calendar_se_inner])
439 |
440 | # use multiplier boostrap (across groups) to get critical value
441 | # for constructing uniform confidence bands
442 | calendar_crit_val = norm.ppf(1 - alp/2)
443 |
444 | if dp['cband']:
445 | if not dp['bstrap']:
446 | warnings.warn('Used bootstrap procedure to compute simultaneous confidence band')
447 |
448 | # mboot function is not provided, please define it separately
449 | calendar_crit_val = mboot(calendar_inf_func_t, dp)['crit_val']
450 |
451 | if np.isnan(calendar_crit_val) or np.isinf(calendar_crit_val):
452 | warnings.warn('Simultaneous critical value is NA. This probably happened because we cannot compute t-statistic (std errors are NA). We then report pointwise conf. intervals.')
453 | calendar_crit_val = norm.ppf(1 - alp/2)
454 | dp['cband'] = False
455 |
456 | if calendar_crit_val < norm.ppf(1 - alp/2):
457 | warnings.warn('Simultaneous conf. band is somehow smaller than pointwise one using normal approximation. Since this is unusual, we are reporting pointwise confidence intervals.')
458 | calendar_crit_val = norm.ppf(1 - alp/2)
459 | dp['cband'] = False
460 |
461 | if calendar_crit_val >= 7:
462 | warnings.warn("Simultaneous critical value is arguably 'too large' to be reliable. This usually happens when the number of observations per group is small and/or there is not much variation in outcomes.")
463 |
464 |
465 | # get overall att under calendar time effects
466 | # this is just average over all time periods
467 | calendar_att = np.mean(calendar_att_t)
468 |
469 | # get overall influence function
470 | calendar_inf_func = get_agg_inf_func(att=calendar_att_t,
471 | inffunc=calendar_inf_func_t,
472 | whichones=range(len(calendar_tlist)),
473 | weights_agg=np.repeat(1/len(calendar_tlist), len(calendar_tlist)),
474 | wif=None)[:, None]
475 | calendar_inf_func = np.array(calendar_inf_func)
476 |
477 | # get overall standard error
478 | calendar_se = get_se(calendar_inf_func, dp)
479 | if not np.isnan(calendar_se):
480 | if calendar_se <= np.sqrt(np.finfo(float).eps) * 10:
481 | calendar_se = np.nan
482 |
483 | AGGTEobj_print = AGGTEobj(overall_att=calendar_att,
484 | overall_se=calendar_se,
485 | typec=typec,
486 | egt=list(map(t2orig, calendar_tlist)),
487 | att_egt=calendar_att_t,
488 | se_egt=calendar_se_t,
489 | crit_val_egt=calendar_crit_val,
490 | inf_function={"calendar_inf_func_t": calendar_inf_func_t,
491 | "calendar_inf_func": calendar_inf_func},
492 | call=call,
493 | DIDparams=dp)
494 |
495 | return AGGTEobj_print
496 |
--------------------------------------------------------------------------------
/csdid/aggte_fnc/utils.py:
--------------------------------------------------------------------------------
1 | from csdid.utils.bmisc import TorF
2 | from csdid.utils.mboot import mboot
3 |
4 | import numpy as np
5 | import scipy.stats as stats
6 | import pandas as pd
7 |
8 | def wif(keepers, pg, weights_ind, G, group):
9 | # note: weights are all of the form P(G=g|cond)/sum_cond(P(G=g|cond))
10 | # this is equal to P(G=g)/sum_cond(P(G=g)) which simplifies things here
11 | pg = np.array(pg)
12 | group = np.array(group)
13 |
14 | # effect of estimating weights in the numerator
15 | if1 = np.empty((len(weights_ind), len(keepers)))
16 | for i, k in enumerate(keepers):
17 | numerator = (weights_ind * 1 * TorF(G == group[k])) - pg[k]
18 | # denominator = sum(np.array(pg)[keepers]) )[:, None]
19 | denominator = np.sum(pg[keepers])
20 |
21 | result = numerator[:, None] / denominator
22 | if1[:, i] = result.squeeze()
23 |
24 | # effect of estimating weights in the denominator
25 | if2 = np.empty((len(weights_ind), len(keepers)))
26 | for i, k in enumerate(keepers):
27 | numerator = ( weights_ind * 1 * TorF(G == group[k]) ) - pg[k]
28 | # result = numerator.to_numpy()[:, None] @ multipler[:, None].T
29 | if2[:, i] = numerator.squeeze()
30 | if2 = np.sum(if2, axis=1)
31 | multiplier = ( pg[keepers] / sum( pg[keepers] ) ** 2 )
32 | if2 = np.outer( if2 , multiplier)
33 |
34 | # if1 = [((weights_ind * 1*TorF(G==group[k])) - pg[k]) / sum(pg[keepers]) for k in keepers]
35 | # if2 = np.dot(np.array([weights_ind*1*TorF(G==group[k]) - pg[k] for k in keepers]).T, pg[keepers]/(sum(pg[keepers])**2))
36 | wif_factor = if1 - if2
37 | return wif_factor
38 |
39 | def get_agg_inf_func(att, inffunc, whichones, weights_agg, wif=None):
40 | # enforce weights are in matrix form
41 | weights_agg = np.asarray(weights_agg)
42 |
43 | # multiplies influence function times weights and sums to get vector of weighted IF (of length n)
44 | thisinffunc = np.dot(inffunc[:, whichones], weights_agg)
45 |
46 | # Incorporate influence function of the weights
47 | if wif is not None:
48 | thisinffunc = thisinffunc + np.dot(wif, np.array(att[whichones]))
49 |
50 | # return influence function
51 | return thisinffunc
52 |
53 |
54 | def get_se(thisinffunc, DIDparams=None):
55 | alpha = 0.05
56 | bstrap = False
57 | if DIDparams is not None:
58 | bstrap = DIDparams['bstrap']
59 | alpha = DIDparams['alp']
60 | cband = DIDparams['cband']
61 | n = len(thisinffunc)
62 |
63 | if bstrap:
64 | bout = mboot(thisinffunc, DIDparams)
65 | return bout['se']
66 | else:
67 | return np.sqrt(np.mean((thisinffunc)**2) / n)
68 |
69 | def AGGTEobj(overall_att=None,
70 | overall_se=None,
71 | typec="simple",
72 | egt=None,
73 | att_egt=None,
74 | se_egt=None,
75 | crit_val_egt=None,
76 | inf_function=None,
77 | min_e=None,
78 | max_e=None,
79 | balance_e=None,
80 | call=None,
81 | DIDparams=None):
82 |
83 | out = {
84 | "overall_att": overall_att,
85 | "overall_se": overall_se,
86 | "type": typec,
87 | "egt": egt,
88 | "att_egt": att_egt,
89 | "se_egt": se_egt,
90 | "crit_val_egt": crit_val_egt,
91 | "inf_function": inf_function,
92 | "min_e": min_e,
93 | "max_e": max_e,
94 | "balance_e": balance_e,
95 | "call": call,
96 | "DIDparams": DIDparams
97 | }
98 |
99 |
100 | # Overall estimates
101 | alp = out["DIDparams"]["alp"]
102 | pointwise_cval = stats.norm.ppf(1 - alp / 2)
103 | overall_cband_upper = out["overall_att"] + pointwise_cval * out["overall_se"]
104 | overall_cband_lower = out["overall_att"] - pointwise_cval * out["overall_se"]
105 | out1 = np.column_stack((out["overall_att"], out["overall_se"], overall_cband_lower, overall_cband_upper))
106 | out1 = np.round(out1, 4)
107 | overall_sig = (overall_cband_upper < 0) | (overall_cband_lower > 0)
108 | overall_sig[np.isnan(overall_sig)] = False
109 | overall_sig_text = np.where(overall_sig, "*", "")
110 | out1 = np.column_stack((out1, overall_sig_text))
111 |
112 | print("\n")
113 | if out["type"] == "dynamic":
114 | print("Overall summary of ATT's based on event-study/dynamic aggregation:")
115 | elif out["type"] == "group":
116 | print("Overall summary of ATT's based on group/cohort aggregation:")
117 | elif out["type"] == "calendar":
118 | print("Overall summary of ATT's based on calendar time aggregation:")
119 | colnames = ["ATT", "Std. Error", f"[{100 * (1 - out['DIDparams']['alp'])}%"," Conf. Int.]", ""]
120 | print(pd.DataFrame(out1, columns=colnames).to_string(index=False))
121 | print("\n")
122 |
123 | # Handle cases depending on type
124 | if out["type"] in ["group", "dynamic", "calendar"]:
125 | if out["type"] == "dynamic":
126 | c1name = "Event time"
127 | print("Dynamic Effects:")
128 | elif out["type"] == "group":
129 | c1name = "Group"
130 | print("Group Effects:")
131 | elif out["type"] == "calendar":
132 | c1name = "Time"
133 | print("Time Effects (calendar):")
134 |
135 | cband_text1a = f"{100 * (1 - out['DIDparams']['alp'])}% "
136 | cband_text1b = "Simult. " if out["DIDparams"]["bstrap"] else "Pointwise "
137 | cband_text1 = f"[{cband_text1a}{cband_text1b}"
138 |
139 | cband_lower = out["att_egt"] - out["crit_val_egt"] * out["se_egt"]
140 | cband_upper = out["att_egt"] + out["crit_val_egt"] * out["se_egt"]
141 |
142 | sig = (cband_upper < 0) | (cband_lower > 0)
143 | sig[np.isnan(sig)] = False
144 | sig_text = np.where(sig, "*", "").T
145 |
146 | out2 = pd.DataFrame([out["egt"],
147 | out["att_egt"],
148 | out["se_egt"].flatten(),
149 | np.hstack(cband_lower),
150 | np.hstack(cband_upper)]).T
151 |
152 | out2 = out2.round(4)
153 | out2[0] = out2[0].astype(int)
154 | out2 = pd.concat([out2, pd.DataFrame(sig_text, columns=['sig_text']) ], axis=1)
155 |
156 | out2.columns = [c1name, "Estimate", "Std. Error", cband_text1, "Conf. Band", ""]
157 | print(out2)
158 |
159 |
160 |
161 |
162 |
163 | print("---")
164 | print("Signif. codes: `*' confidence band does not cover 0")
165 |
166 | # Set control group text
167 | control_group = out["DIDparams"]["control_group"]
168 | control_group_text = None
169 | if control_group == "nevertreated":
170 | control_group_text = "Never Treated"
171 | elif control_group == "notyettreated":
172 | control_group_text = "Not Yet Treated"
173 |
174 | if control_group:
175 | print("Control Group: ", control_group_text, ", ")
176 |
177 | # Anticipation periods
178 | print("Anticipation Periods: ", out["DIDparams"]["anticipation"])
179 |
180 | # Estimation method text
181 | est_method = out["DIDparams"]["est_method"]
182 | if isinstance(est_method, str):
183 | est_method_text = est_method
184 | if est_method == "dr":
185 | est_method_text = "Doubly Robust"
186 | elif est_method == "ipw":
187 | est_method_text = "Inverse Probability Weighting"
188 | elif est_method == "reg":
189 | est_method_text = "Outcome Regression"
190 |
191 | print("Estimation Method: ", est_method_text)
192 | print("\n")
193 |
194 | return out
195 |
196 |
197 |
--------------------------------------------------------------------------------
/csdid/att_gt.py:
--------------------------------------------------------------------------------
1 | # from aggte import AGGte
2 | from csdid.aggte_fnc.aggte import aggte as agg_te
3 |
4 | from csdid.attgt_fnc.preprocess_did import pre_process_did
5 | from csdid.attgt_fnc.compute_att_gt import compute_att_gt
6 |
7 | from csdid.utils.mboot import mboot
8 |
9 | from csdid.plots.gplot import gplot, splot
10 |
11 |
12 | import matplotlib.pyplot as plt
13 |
14 | import warnings
15 |
16 | import numpy as np, pandas as pd
17 |
18 | # class ATTgt(AGGte):
19 | class ATTgt:
20 | def __init__(self, yname, tname, idname, gname, data, control_group = ['nevertreated', 'notyettreated'],
21 | xformla: str = None, panel = True, allow_unbalanced_panel = True,
22 | clustervar = None, weights_name = None, anticipation = 0,
23 | cband = False, biters = 1000, alp = 0.05
24 | ):
25 | dp = pre_process_did(
26 | yname=yname, tname = tname, idname=idname, gname = gname,
27 | data = data, control_group=control_group, anticipation=anticipation,
28 | xformla=xformla, panel=panel, allow_unbalanced_panel=allow_unbalanced_panel, cband=cband, clustervar=None, weights_name=None
29 | )
30 |
31 | dp['biters'] = biters
32 | dp['alp'] = alp
33 | dp['true_repeated_cross_sections'] = dp['true_rep_cross_section']
34 | dp['cband'] = cband
35 | dp['panel'] = panel
36 | self.dp = dp
37 |
38 | def fit(self, est_method = 'dr', base_period = 'varying', bstrap = True):
39 | # print(self.dp)
40 | dp = self.dp
41 | result, inffunc = compute_att_gt(dp, est_method = est_method, base_period = base_period)
42 | att = result['att']
43 | n_len = list(map(len, inffunc))
44 | crit_val, se, V = (
45 | 1.96,
46 | np.std(inffunc, axis=1, ddof = 1) / np.sqrt(n_len),
47 | np.zeros(len(att)),
48 | )
49 | if bstrap:
50 | ref_se = mboot(inffunc.T, dp)
51 | crit_val, se = ref_se['crit_val'], ref_se['se']
52 | V = ref_se['V']
53 |
54 | ############# aggte input
55 | group = result['group']
56 | att = result['att']
57 | tt = result['year']
58 | inf_fnc = {'inffunc': inffunc.T}
59 |
60 | dp['bstrap'] = bstrap
61 | dp['est_method'] = est_method
62 | dp['base_period'] = base_period
63 | self.dp = dp
64 | n = dp['n']
65 |
66 | mp = {
67 | 'group': group, 'att': att, 't': tt,
68 | 'DIDparams': dp, 'inffunc': inf_fnc,
69 | 'n': n
70 | }
71 | self.MP = mp
72 |
73 |
74 | cband_lower = att - crit_val * se
75 | cband_upper = att + crit_val * se
76 | sig = (cband_upper < 0) | (cband_lower > 0)
77 | sig[np.isnan(sig)] = False
78 | sig_text = np.where(sig, "*", "")
79 |
80 | result.update(
81 | {
82 | 'se': se, 'l_se': cband_lower,
83 | 'c': crit_val,
84 | 'u_se': cband_upper, 'sig': sig_text
85 | })
86 |
87 | self.results = result
88 |
89 | rst = result
90 | did_object = {
91 | 'group': mp['group'],
92 | 't': mp['t'],
93 | 'att': rst['att'],
94 | 'se': rst['se'],
95 | 'c': rst['c'],
96 | }
97 | self.did_object = did_object
98 | return self
99 | def summ_attgt(self, n = 4):
100 | result = self.results
101 | att_gt = pd.DataFrame(result)
102 | att_gt = att_gt.drop('c', axis=1)
103 | name_attgt_df = ['Group', 'Time', 'ATT(g, t)', 'Post', "Std. Error", "[95% Pointwise", 'Conf. Band]', '']
104 | att_gt.columns = name_attgt_df
105 | att_gt = att_gt.round(n)
106 | self.summary2 = att_gt
107 | return self
108 |
109 | def aggte(
110 | self,
111 | typec = "group",
112 | balance_e = None,
113 | min_e = float('-inf'),
114 | max_e = float('inf'),
115 | na_rm = False,
116 | bstrap = None,
117 | biters = None,
118 | cband = None,
119 | alp = None,
120 | clustervars = None,
121 | ):
122 | mp = self.MP
123 | did_object = self.did_object
124 |
125 | did_object.update({
126 | 'type': typec
127 | }
128 | )
129 |
130 |
131 | atte = agg_te(
132 | mp, typec=typec, balance_e=balance_e,
133 | min_e=min_e, max_e=max_e, na_rm=na_rm, bstrap=bstrap,
134 | biters=biters, cband=cband, alp=alp, clustervars=clustervars
135 | )
136 |
137 | self.atte = atte
138 | return self
139 | def plot_attgt(self, ylim=None,
140 | xlab=None,
141 | ylab=None,
142 | title="Group",
143 | xgap=1,
144 | ncol=1,
145 | legend=True,
146 | group=None,
147 | ref_line=0,
148 | theming=True,
149 | grtitle="Group"
150 | ):
151 |
152 | did_object = self.did_object
153 |
154 | grp = did_object['group']
155 | t_i = did_object['t']
156 |
157 | G = len(np.unique(grp))
158 | Y = len(np.unique(t_i))
159 | g = np.unique(grp)[np.argsort(np.unique(grp))].astype(int)
160 | y = np.unique(t_i)
161 |
162 | results = pd.DataFrame({'year': np.tile(y, G)})
163 | results['group'] = np.repeat(g, Y)
164 | results['grtitle'] = grtitle + ' ' + results['group'].astype(str)
165 | results['att'] = did_object['att']
166 | results['att_se'] = did_object['se']
167 | results['post'] = np.where(results['year'] >= grp, 1, 0)
168 | results['year'] = results['year']
169 | results['c'] = did_object['c']
170 |
171 | self.results_plot_df_attgt = results
172 |
173 | if group is None:
174 | group = g
175 | if any(group not in g for group in group):
176 | raise ValueError("Some of the specified groups do not exist in the data. Reporting all available groups.")
177 |
178 |
179 | legend_1 = False # for multiple subplots, legend outside
180 | fig, axes = plt.subplots(nrows=len(group), ncols=1, figsize=(10, 5)) # Adjust the figsize as needed
181 | handles = []
182 | labels = []
183 | for i, group_cat in enumerate(group):
184 | group_data = results.loc[results['group'] == group_cat]
185 | title = group_data['grtitle'].unique()[0]
186 | ax = axes[i]
187 | ax = gplot(group_data, ax, ylim, xlab, ylab, title, xgap, legend_1, ref_line, theming)
188 | plt.tight_layout()
189 | if legend is True:
190 | handles_ax, labels_ax = ax.get_legend_handles_labels()
191 | handles.extend(handles_ax)
192 | labels.extend(labels_ax)
193 | fig.legend(handles, labels, loc='lower center', fontsize='small', bbox_to_anchor=(0.545, -0.075), ncol=2)
194 |
195 | plt.show()
196 | return fig
197 |
198 | def plot_aggte(self, ylim=None,
199 | xlab=None,
200 | ylab=None,
201 | title="",
202 | xgap=1,
203 | legend=True,
204 | ref_line=0,
205 | theming=True,
206 | **kwargs):
207 |
208 | did_object = self.atte
209 |
210 | post_treat = 1 * (np.asarray(did_object["egt"]).astype(int) >= 0)
211 |
212 | results = {
213 | "year": list(map(int, did_object["egt"])),
214 | "att": did_object["att_egt"],
215 | "att_se": did_object["se_egt"][0],
216 | "post": post_treat
217 | }
218 |
219 | results = pd.DataFrame(results)
220 | self.results_plot_df_aggte = results
221 |
222 | if did_object['crit_val_egt'] is None:
223 | results['c'] = abs(norm.ppf(0.025))
224 | else:
225 | results['c'] = did_object['crit_val_egt']
226 |
227 | if title == "":
228 | title = "Average Effect by Group" if\
229 | did_object["type"] == "group" else\
230 | "Average Effect by Length of Exposure"
231 |
232 |
233 | if did_object["type"] == "group":
234 | fig, ax = plt.subplots(figsize=(10, 5))
235 | p = splot(results, ax, ylim, xlab, ylab, title, legend, ref_line, theming)
236 | plt.tight_layout()
237 | plt.show()
238 |
239 | else:
240 | fig, ax = plt.subplots(figsize=(10, 5))
241 | p = gplot(results, ax, ylim, xlab, ylab, title, xgap, legend, ref_line, theming)
242 | plt.tight_layout()
243 | plt.show()
244 |
245 | return p
246 |
--------------------------------------------------------------------------------
/csdid/attgt_fnc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2cml-ai/csdid/7ad721d4c384bca147ae16de84b7c5df8fe26959/csdid/attgt_fnc/__init__.py
--------------------------------------------------------------------------------
/csdid/attgt_fnc/compute_att_gt.py:
--------------------------------------------------------------------------------
1 | import numpy as np, pandas as pd
2 | import patsy
3 | from drdid import drdid, reg_did, ipwd_did
4 |
5 | from csdid.utils.bmisc import panel2cs2
6 | import warnings
7 |
8 |
9 | fml = patsy.dmatrices
10 | # Initialize a list to store data for each iteration
11 | results_list = []
12 |
13 | def compute_att_gt(dp, est_method = "dr", base_period = 'varying'):
14 | yname = dp['yname']
15 | tname = dp['tname']
16 | idname = dp['idname']
17 | xformla = dp['xformla']
18 | data = dp['data']
19 | weights_name = dp['weights_name']
20 | # base_period = dp['base_period']
21 | panel = dp['panel']
22 | true_rep_cross_section = dp['true_rep_cross_section']
23 | control_group = dp['control_group']
24 | anticipation = dp['anticipation']
25 | gname = dp['gname']
26 | n = dp['n']
27 | nT = dp['nT']
28 | nG = dp['nG']
29 | tlist = dp['tlist']
30 | glist = dp['glist']
31 |
32 | # Calculate time periods and adjustment factor
33 | tlist_len = len(tlist) - 1 if base_period != "universal" else len(tlist)
34 | tfac = 1 if base_period != "universal" else 0
35 |
36 | inf_func = []
37 |
38 | att_est, group, year, post_array = [], [], [], []
39 |
40 | def add_att_data(att = 0, pst = 0, inf_f = []):
41 | inf_func.append(inf_f)
42 | att_est.append(att)
43 | group.append(g)
44 | year.append(tn)
45 | post_array.append(pst)
46 |
47 | # Handle never treated case
48 | never_treated = control_group == 'nevertreated'
49 | if never_treated:
50 | data['C'] = (data[gname] == 0).astype(int)
51 | data['y_main'] = data[yname]
52 |
53 | # Loop over groups
54 | for g_index, g in enumerate(glist):
55 | # Set up .G once
56 | # Create a binary column 'G_m' to indicate if a row belongs to the current group 'g'
57 | G_main = (data[gname] == glist[g_index])
58 | data = data.assign(G_m=1 * G_main)
59 |
60 | # Loop over time periods
61 | for t_i in range(tlist_len):
62 |
63 | # Set pretreatment period
64 | pret = t_i # Initialize pretreatment period as current time period index
65 | tn = tlist[t_i + tfac] # Current time period (adjusted for tfac)
66 |
67 | # Universal base period
68 | if base_period == 'universal': # Check if using a universal base period
69 | try:
70 | # Set pretreatment period as the last period before treatment
71 | pret = np.where((tlist + anticipation) < g)[0][-1]
72 | except IndexError:
73 | # Handle cases where no pretreatment periods exist
74 | raise ValueError(
75 | f"There are no pre-treatment periods for the group first treated at {g}. Units from this group are dropped."
76 | )
77 |
78 | # If we are in the universal base period
79 | if base_period == 'universal' and pret == tn:
80 | # Normalize results to zero and skip computation
81 | add_att_data(att=0, pst=0, inf_f=np.zeros(len(data)))
82 | continue
83 |
84 | # For non-never treated groups, set up the control group indicator 'C'
85 | if not never_treated:
86 | # Units that are either never treated (gname == 0) or treated in the future
87 | n1 = (data[gname] == 0)
88 | n2 = (data[gname] > (tlist[max(t_i, pret) + tfac] + anticipation))
89 | n3 = (data[gname] != glist[g_index]) # Not in the current group
90 | row_eval = n1 | (n2 & n3) # Combine conditions
91 | data = data.assign(C=1 * row_eval) # Assign the control indicator
92 |
93 | # -----------------------------------------------------------------------------
94 |
95 |
96 | # Check if in post-treatment period
97 | if glist[g_index] <= tlist[t_i + tfac]:
98 | # Use same base period as for post-treatment periods
99 | # This matches R's: tail(which((tlist+anticipation) < glist[g]), 1)
100 | pret_mask = (np.array(tlist) + anticipation) < glist[g_index]
101 | if not any(pret_mask):
102 | warnings.warn(f"There are no pre-treatment periods for the group first treated at {glist[g_index]}\nUnits from this group are dropped")
103 | break
104 |
105 | pret = np.where(pret_mask)[0][-1] # Gets last element like R's tail(..., 1)
106 | # print("this is alex", pret)
107 |
108 |
109 | # Print details for debugging
110 | # print(f"Current period: {tlist[t_i + tfac]}")
111 | # print(f"Current group: {glist[g_index]}")
112 | # print(f"Set pretreatment period to be: {tlist[pret]}")
113 |
114 | # -----------------------------------------------------------------------------
115 | # Debugging and validation
116 | if base_period == 'universal' and pret == tn:
117 | # Normalize results to zero and break the loop
118 | add_att_data(att=0, pst=post_treat, inf_f=np.zeros(len(data)))
119 | continue
120 |
121 | # Post-treatment dummy variable
122 | post_treat = 1 * (g <= tn)
123 |
124 | # Subset the data for the current and pretreatment periods
125 | disdat = data[(data[tname] == tn) | (data[tname] == tlist[pret])]
126 | # print("Shape of disdat:", disdat.shape)
127 |
128 | # results for the case with panel data
129 | #-----------------------------------------------------------------------------
130 |
131 | if panel:
132 | disdat = panel2cs2(disdat, yname, idname, tname)
133 | disdat = disdat.dropna()
134 | n = len(disdat)
135 | dis_idx = np.array(disdat.G_m == 1) | np.array(disdat.C == 1)
136 | disdat = disdat.loc[dis_idx, :]
137 | n1 = len(disdat)
138 | G = disdat.G_m
139 | C = disdat.C
140 | w = disdat.w
141 |
142 | ypre = disdat.y0 if tn > pret else disdat.y1
143 | ypost = disdat.y0 if tn < pret else disdat.y1
144 | _, covariates = fml(xformla, data = disdat, return_type = 'dataframe')
145 |
146 | G, C, w, ypre = map(np.array, [G, C, w, ypre])
147 | ypost, covariates = map(np.array, [ypost, covariates])
148 |
149 | if callable(est_method):
150 | est_att_f = est_method
151 | elif est_method == "reg":
152 | est_att_f = reg_did.reg_did_panel
153 | elif est_method == "ipw":
154 | est_att_f = ipwd_did.std_ipw_did_panel
155 | elif est_method == "dr":
156 | est_att_f = drdid.drdid_panel
157 |
158 | att_gt, att_inf_func = est_att_f(ypost, ypre, G, i_weights=w, covariates=covariates)
159 |
160 | inf_zeros = np.zeros(n)
161 | att_inf = n / n1 * att_inf_func
162 | inf_zeros[dis_idx] = att_inf
163 |
164 | add_att_data(att_gt, inf_f=inf_zeros)
165 |
166 | #-----------------------------------------------------------------------------
167 | # results for the case with no panel data
168 | #-----------------------------------------------------------------------------
169 |
170 | if not panel:
171 | # Fixed right_ids selection
172 | right_ids = disdat.loc[disdat.G_m.eq(1) | disdat.C.eq(1), 'rowid'].to_numpy()
173 | # print("Shape of ids:",right_ids.shape) # Show dimensions
174 | # Consistent time period selection
175 | dis_idx = (data['rowid'].isin(right_ids)) & \
176 | (data[tname].isin([tlist[t_i + tfac], tlist[pret]]))
177 |
178 | disdat = data.loc[dis_idx]
179 |
180 | G = disdat.G_m.to_numpy()
181 | C = disdat.C.to_numpy()
182 | Y = disdat[yname].to_numpy()
183 | post = 1 * (disdat[tname] == tlist[t_i + tfac]).to_numpy()
184 | w = disdat.w.to_numpy()
185 | n1 = sum(G + C)
186 |
187 | # Store the current iteration's data
188 | current_data = {
189 | 'Y': Y,
190 | 'post': post,
191 | 'G': G,
192 | 'group': g,
193 | 'time_period': tn
194 | }
195 |
196 | # print(f"Lengths: Y={len(Y)}, post={len(post)}, G={len(G)}, group={type(g)}, time_period={type(tn)}")
197 | # results_list.append(pd.DataFrame(current_data))
198 |
199 | #-----------------------------------------------------------------------------
200 | # checks to make sure that we have enough observations
201 |
202 | skip_this_att_gt = False
203 |
204 | if np.sum(G * post) == 0:
205 | print(f"No units in group {g} in time period {t_i + tfac + 1}, e1")
206 | skip_this_att_gt = True
207 |
208 | if np.sum(G * (1 - post)) == 0:
209 | print(f"No units in group {g} in time period {t_i + 1}, e2")
210 | skip_this_att_gt = True
211 |
212 | if np.sum(C * post) == 0:
213 | print(f"No available control units for group {g} in time period {t_i + tfac + 1}, e3")
214 | skip_this_att_gt = True
215 |
216 | if np.sum(C * (1 - post)) == 0:
217 | print(f"No available control units for group {g} in time period {t_i+1}, e4")
218 | skip_this_att_gt = True
219 |
220 | if skip_this_att_gt:
221 | # Append results with missing ATT and NA influence function
222 | add_att_data(att=np.nan, pst=post_treat, inf_f=np.full(n, np.nan))
223 | #add_att_data()
224 | continue
225 |
226 | # return (inf_func)
227 | try:
228 | _, covariates = fml(xformla, data = disdat, return_type = 'dataframe')
229 | covariates = np.array(covariates)
230 | except Exception as e:
231 | print(f"Warning: Formula processing failed: {e}")
232 | y_str, x_str = xformla.split("~")
233 | xs1 = x_str.split('+')
234 | xs1_col_names = [x.strip() for x in xs1 if x.strip() != '1']
235 | n_dis = len(disdat)
236 | ones = np.ones((n_dis, 1))
237 | try:
238 | covariates = disdat[xs1_col_names].to_numpy()
239 | covariates = np.append(covariates, ones, axis=1)
240 | except:
241 | covariates = ones
242 |
243 | #-----------------------------------------------------------------------------
244 | # code for actually computing att(g,t)
245 | #-----------------------------------------------------------------------------
246 | # print(Y, post, G, w, covariates)
247 |
248 | if callable(est_method):
249 | est_att_f = est_method
250 | elif est_method == "reg":
251 | est_att_f = reg_did.reg_did_rc
252 | elif est_method == "ipw":
253 | est_att_f = ipwd_did.std_ipw_did_rc
254 | elif est_method == "dr":
255 | est_att_f = drdid.drdid_rc
256 |
257 | att_gt, att_inf_func = est_att_f(y=Y, post=post, D = G, i_weights=w, covariates=covariates)
258 | # print(att_inf_func)
259 | att_inf_func = (n/n1)*att_inf_func
260 |
261 | inf_func_df = pd.DataFrame(
262 | {
263 | "inf_func": att_inf_func,
264 | "right_ids": right_ids
265 | }
266 | ).fillna(0)
267 |
268 | inf_zeros = np.zeros(n)
269 | aggte_infffuc = inf_func_df.groupby('right_ids').inf_func.sum()
270 | try:
271 | dis_idx1 = np.isin(data['rowid'].unique(), aggte_infffuc.index.to_numpy())
272 | except:
273 | dis_idx1 = np.isin(data['rowid'].unique().to_numpy(), aggte_infffuc.index.to_numpy())
274 |
275 | inf_zeros[dis_idx1] = np.array(aggte_infffuc)
276 |
277 | add_att_data(att_gt, pst = post_treat, inf_f=inf_zeros)
278 | # print(att_est)
279 |
280 | output = {
281 | 'group': group ,
282 | 'year': year,
283 | "att" : att_est,
284 | 'post ': post_array
285 | }
286 |
287 | return (output, np.vstack(inf_func))
288 |
289 |
290 | # import numpy as np, pandas as pd
291 | # import patsy
292 | # from drdid import drdid, reg_did, ipwd_did
293 |
294 | # from csdid.utils.bmisc import panel2cs2
295 |
296 | # fml = patsy.dmatrices
297 |
298 |
299 | # # data Struct
300 | # # output = {
301 | # # "att" : []
302 | # # 'group': []
303 | # # 'year': []
304 | # # 'post ': []
305 | # # }
306 |
307 |
308 | # def compute_att_gt(dp, est_method = "dr", base_period = 'varying'):
309 | # yname = dp['yname']
310 | # tname = dp['tname']
311 | # idname = dp['idname']
312 | # xformla = dp['xformla']
313 | # data = dp['data']
314 | # weights_name = dp['weights_name']
315 | # # base_period = dp['base_period']
316 | # panel = dp['panel']
317 | # true_rep_cross_section = dp['true_rep_cross_section']
318 | # control_group = dp['control_group']
319 | # anticipation = dp['anticipation']
320 | # gname = dp['gname']
321 | # n = dp['n']
322 | # nT = dp['nT']
323 | # nG = dp['nG']
324 | # tlist = dp['tlist']
325 | # glist = dp['glist']
326 | # tlist_len, tfac = len(tlist), 0
327 | # if base_period != 'universal':
328 | # tlist_len = tlist_len - 1
329 | # tfac = 1
330 |
331 | # inf_func = []
332 |
333 | # att_est, group, year, post_array = [], [], [], []
334 |
335 | # def add_att_data(att = 0, pst = 0, inf_f = []):
336 | # inf_func.append(inf_f)
337 | # att_est.append(att)
338 | # group.append(g)
339 | # year.append(tn)
340 | # post_array.append(pst)
341 |
342 | # never_treated = control_group == 'nevertreated'
343 | # if never_treated:
344 | # data = data.assign(C = 1 * (data[gname] == 0))
345 | # data = data.assign(y_main = data[yname])
346 |
347 | # # g, t = glist[0], tlist[0]
348 |
349 | # # for _, g, in enumerate(glist):
350 | # for g_index, g in enumerate(glist):
351 |
352 | # # g = glist[1]
353 | # G_main = (data[gname] == g)
354 | # data = data.assign(G_m = 1 * G_main)
355 |
356 | # for t_i in range(tlist_len):
357 | # pret = t_i
358 | # tn = tlist[t_i + tfac]
359 | # if base_period == 'universal' or g < tn:
360 | # try:
361 | # pret = np.where(tlist + anticipation < g)[0][-1]
362 | # except:
363 | # raise f"There are no pre-treatment periods for the group first treated at {g}\nUnits from this group are dropped"
364 | # # break
365 |
366 |
367 | # if base_period == 'universal':
368 | # if pret == tn:
369 | # add_att_data()
370 |
371 | # if not never_treated:
372 | # n1 = data[gname] == 0
373 | # n2 = (data[gname] > (tlist[np.max([t_i, pret]) + tfac]) + anticipation)
374 | # #n3 = np.where(data[gname] != glist[g], True, False)
375 | # n3 = np.where(data[gname] != glist[g_index], True, False)
376 |
377 | # row_eval = n1 | n2 & n3
378 | # data = data.assign(C = 1 * row_eval)
379 |
380 | # post_treat = 1 * (g <= tn)
381 | # disdat = data[(data[tname] == tn) | (data[tname] == tlist[pret])]
382 |
383 | # if panel:
384 | # disdat = panel2cs2(disdat, yname, idname, tname)
385 | # disdat = disdat.dropna()
386 | # n = len(disdat)
387 | # dis_idx = np.array(disdat.G_m == 1) | np.array(disdat.C == 1)
388 | # disdat = disdat.loc[dis_idx, :]
389 | # n1 = len(disdat)
390 | # G = disdat.G_m
391 | # C = disdat.C
392 | # w = disdat.w
393 |
394 | # ypre = disdat.y0 if tn > pret else disdat.y1
395 | # ypost = disdat.y0 if tn < pret else disdat.y1
396 | # _, covariates = fml(xformla, data = disdat, return_type = 'dataframe')
397 |
398 | # G, C, w, ypre = map(np.array, [G, C, w, ypre])
399 | # ypost, covariates = map(np.array, [ypost, covariates])
400 |
401 | # if callable(est_method):
402 | # est_att_f = est_method
403 | # elif est_method == "reg":
404 | # est_att_f = reg_did.reg_did_panel
405 | # elif est_method == "ipw":
406 | # est_att_f = ipwd_did.std_ipw_did_panel
407 | # elif est_method == "dr":
408 | # est_att_f = drdid.drdid_panel
409 |
410 | # att_gt, att_inf_func = est_att_f(ypost, ypre, G, i_weights=w, covariates=covariates)
411 |
412 | # inf_zeros = np.zeros(n)
413 | # att_inf = n / n1 * att_inf_func
414 | # inf_zeros[dis_idx] = att_inf
415 |
416 | # add_att_data(att_gt, inf_f=inf_zeros)
417 |
418 | # if not panel:
419 | # right_ids = np.array(disdat.query('(G_m == 1) or (C == 1)').rowid.to_numpy())
420 | # dis_idx = (data['rowid'].isin(right_ids)) &\
421 | # ((data[tname] == tlist[t_i + tfac]) |\
422 | # (data[tname] == tlist[pret]))
423 |
424 | # disdat = data.loc[dis_idx]
425 |
426 | # G = disdat.G_m.to_numpy()
427 | # C = disdat.C.to_numpy()
428 | # Y = disdat[yname].to_numpy()
429 | # post = 1 * (disdat[tname] == tlist[t_i + tfac]).to_numpy()
430 | # w = disdat.w.to_numpy()
431 |
432 | # # G, C, Y, post, w = map(np.array, [G, C, Y, post, w])
433 |
434 |
435 | # n1 = sum(G + C)
436 |
437 | # skip_this_att_gt = False
438 | # if np.sum(G * post) == 0:
439 | # print(f"No units in group {g} in time period {tn}")
440 | # skip_this_att_gt = True
441 |
442 | # if np.sum(G * (1 - post)) == 0:
443 | # print(f"No units in group {g} in time period {tn}")
444 | # skip_this_att_gt = True
445 |
446 | # if np.sum(C * post) == 0:
447 | # print(f"No available control units for group {g} in time period {tn}")
448 | # skip_this_att_gt = True
449 |
450 | # if np.sum(C * (1 - post)) == 0:
451 | # print(f"No available control units for group {g} in time period {tn}")
452 | # skip_this_att_gt = True
453 |
454 | # if skip_this_att_gt:
455 | # add_att_data()
456 |
457 | # try:
458 | # _, covariates = fml(xformla, data = disdat, return_type = 'dataframe')
459 | # covariates = np.array(covariates)
460 | # except:
461 | # y_str, x_str = xformla.split("~")
462 | # xs1 = x_str.split('+')
463 | # xs1_col_names = [x.strip() for x in xs1 if x.strip() != '1']
464 | # n_dis = len(disdat)
465 | # ones = np.ones((n_dis, 1))
466 | # try:
467 | # covariates = disdat[xs1_col_names].to_numpy()
468 | # covariates = np.append(covariates, ones, axis=1)
469 | # except:
470 | # covariates = ones
471 |
472 |
473 |
474 | # if callable(est_method):
475 | # est_att_f = est_method
476 | # elif est_method == "reg":
477 | # est_att_f = reg_did.reg_did_rc
478 | # elif est_method == "ipw":
479 | # est_att_f = ipwd_did.std_ipw_did_rc
480 | # elif est_method == "dr":
481 | # est_att_f = drdid.drdid_rc
482 | # att_gt, att_inf_func = est_att_f(y=Y, post=post, D = G, i_weights=w, covariates=covariates)
483 |
484 | # inf_func_df = pd.DataFrame(
485 | # {
486 | # "inf_func": att_inf_func,
487 | # "right_ids": right_ids
488 | # }
489 | # )
490 | # inf_zeros = np.zeros(n)
491 | # aggte_infffuc = inf_func_df.groupby('right_ids').inf_func.sum()
492 | # try:
493 | # dis_idx1 = np.isin(data['rowid'].unique(), aggte_infffuc.index.to_numpy())
494 | # except:
495 | # dis_idx1 = np.isin(data['rowid'].unique().to_numpy(), aggte_infffuc.index.to_numpy())
496 | # inf_zeros[dis_idx1] = np.array(aggte_infffuc)
497 |
498 | # add_att_data(att_gt, pst = post_treat, inf_f=inf_zeros)
499 | # # print(att_est)
500 |
501 | # output = {
502 | # 'group': group ,
503 | # 'year': year,
504 | # "att" : att_est,
505 | # 'post ': post_array
506 | # }
507 | # return (output, np.array(inf_func))
--------------------------------------------------------------------------------
/csdid/attgt_fnc/preprocess_did.py:
--------------------------------------------------------------------------------
1 | # from typing import Union, List, Optional, Dict
2 | # import pandas as pd, numpy as np
3 | # import patsy
4 | # from csdid.utils.bmisc import makeBalancedPanel
5 |
6 | # fml = patsy.dmatrices
7 |
8 | # def pre_process_did(yname, tname, idname, gname, data: pd.DataFrame,
9 | # control_group: Union[str, List[str]] = "nevertreated",
10 | # anticipation = 0, xformla : str = None,
11 | # panel = True, allow_unbalanced_panel = True, cband = False,
12 | # clustervar = None, weights_name = None
13 | # ) -> dict:
14 |
15 | # n, t = data.shape
16 | # # control_group = "nevertreated"
17 | # columns = [idname, tname, yname, gname]
18 | # # print(columns)
19 | # # Columns
20 | # if clustervar is not None:
21 | # columns += [clustervar]
22 | # if weights_name is not None:
23 | # columns += [weights_name]
24 | # w = data[weights_name]
25 | # else:
26 | # w = np.ones(n)
27 |
28 |
29 | # if xformla is None:
30 | # xformla = f'{yname} ~ 1'
31 |
32 | # # if xformla is None:
33 | # try:
34 | # _, x_cov = fml(xformla, data = data, return_type='dataframe')
35 | # _, n_cov = x_cov.shape
36 | # data = pd.concat([data[columns], x_cov], axis=1)
37 | # data = data.assign(w = w)
38 | # except:
39 | # data = data.assign(intercept = 1)
40 | # clms = columns + ['intercept']
41 | # n_cov = len(data.columns)
42 | # # patsy dont work with pyspark
43 | # data = data[clms]
44 | # if weights_name is None:
45 | # data = data.assign(w = 1)
46 | # else:
47 | # data = data.assign(w = lambda x: x[weights_name] * 1)
48 |
49 |
50 | # data = data.dropna()
51 | # ndiff = n - len(data)
52 | # if ndiff != 0:
53 | # print(f'dropped, {ndiff}, rows from original data due to missing data')
54 | # try:
55 |
56 | # tlist = np.sort(data[tname].unique())
57 | # glist = np.sort(data[gname].unique())
58 | # except:
59 | # tlist = np.sort(data[tname].unique().to_numpy())
60 | # glist = np.sort(data[gname].unique().to_numpy())
61 |
62 | # asif_nev_treated = data[gname] > np.max(tlist)
63 | # asif_nev_treated.fillna(False, inplace=True)
64 | # data.loc[asif_nev_treated, gname] = 0
65 |
66 | # if len(glist[glist == 0]) == 0:
67 | # if control_group == "nevertreated":
68 | # raise ValueError("There is no available never-treated group")
69 | # else:
70 | # value = np.max(glist) - anticipation
71 | # data = data.query(f'{tname} < @value')
72 | # tlist = np.sort(data[tname].unique())
73 | # glist = np.sort(data[gname].unique())
74 | # glist = glist[glist < np.max(glist)]
75 |
76 | # glist = glist[glist > 0]
77 | # # first prerios
78 | # fp = tlist[0]
79 | # glist = glist[glist > fp + anticipation]
80 |
81 | # treated_fp = (data[gname] <= fp) & ~(data[gname] == 0)
82 | # treated_fp.fillna(False, inplace=True)
83 |
84 | # try:
85 |
86 | # nfirst_period = np.sum(treated_fp) if panel \
87 | # else len(data.loc[treated_fp, idname].unique())
88 | # except:
89 | # nfirst_period = treated_fp.sum() if panel \
90 | # else len(data.loc[treated_fp, idname].unique())
91 |
92 | # if nfirst_period > 0:
93 | # warning_message = f"Dropped {nfirst_period} units that were already treated in the first period."
94 | # print(warning_message)
95 | # glist_in = np.append(glist, [0])
96 | # data = data.query(f'{gname} in @glist_in')
97 | # tlist = np.sort(data[tname].unique())
98 | # glist = np.sort(data[gname].unique())
99 | # glist = glist[glist > 0]
100 | # fp = tlist[0]
101 | # glist = glist[glist > fp + anticipation]
102 |
103 | # #todo: idname must be numeric
104 | # true_rep_cross_section = False
105 | # if not panel:
106 | # true_rep_cross_section = True
107 |
108 |
109 |
110 | # #-----------------------------------------------------------------------------
111 | # # setup data in panel case
112 | # #-----------------------------------------------------------------------------
113 |
114 | # # # Check if data is a balanced panel if panel = True and allow_unbalanced_panel = True
115 | # # if panel and allow_unbalanced_panel:
116 | # # # First, focus on complete cases
117 | # # keepers = data.dropna()
118 | # # data_comp = keepers.copy()
119 |
120 | # # # Make it a balanced dataset
121 | # # n_all = len(data_comp[idname].unique())
122 | # # data_bal = makeBalancedPanel(data_comp, idname, tname)
123 | # # n_bal = len(data_bal[idname].unique())
124 |
125 | # # if n_bal < n_all:
126 | # # # If fewer unique IDs in the balanced panel, it means the panel is unbalanced
127 | # # print("You have an unbalanced panel. Proceeding as such.")
128 | # # allow_unbalanced_panel = True
129 | # # else:
130 | # # # If the number of unique IDs remains the same, it means the panel is balanced
131 | # # print("You have a balanced panel. Setting allow_unbalanced_panel = False.")
132 | # # allow_unbalanced_panel = False
133 |
134 |
135 | # if panel:
136 | # if allow_unbalanced_panel:
137 | # panel = False
138 | # true_rep_cross_section = False
139 | # else:
140 | # keepers = data.dropna().index
141 | # n = len(data[idname].unique)
142 | # print(n)
143 | # n_keep = len(data.iloc[keepers, idname].unique())
144 |
145 | # if len(data.loc[keepers] < len(data)):
146 | # print(f"Dropped {n-n_keep} observations that had missing data.")
147 | # data = data.loc[keepers]
148 | # # make balanced data set
149 | # n_old = len(data[idname].unique())
150 | # data = makeBalancedPanel(data, idname=idname, tname=tname)
151 | # n = len(data[idname].unique())
152 | # if len(data) == 0:
153 | # raise ValueError("All observations dropped to convert data to balanced panel. Consider setting `panel=False` and/or revisit 'idname'.")
154 | # if n < n_old:
155 | # warnings.warn(f"Dropped {n_old-n} observations while converting to balanced panel.")
156 | # tn = tlist[0]
157 | # n = len(data.query(f'{tname} == @tn'))
158 |
159 | # # # Add rowid
160 | # # if not panel:
161 | # # # Handle missing data
162 | # # keepers = data.dropna().index
163 | # # dropped_count = len(data) - len(keepers)
164 | # # if dropped_count > 0:
165 | # # print(f"Dropped {dropped_count} observations that had missing data.")
166 |
167 | # # # Drop incomplete rows
168 | # # data = data.loc[keepers]
169 |
170 | # # # Check if all rows are dropped
171 | # # if len(data) == 0:
172 | # # raise ValueError("All observations dropped due to missing data problems.")
173 |
174 | # # # Add rowid based on true_repeated_cross_section
175 | # # if true_rep_cross_section:
176 | # # data = data.assign(rowid=range(len(data)))
177 | # # idname = 'rowid'
178 | # # else:
179 | # # if idname not in data.columns:
180 | # # raise ValueError(f"Column {idname} is not in the dataset.")
181 | # # data = data.assign(rowid=lambda x: x[idname] * 1)
182 |
183 | # # # Number of unique observations
184 | # # n = data[idname].nunique()
185 |
186 | # if not panel:
187 | # # Check for complete cases (equivalent to complete.cases in R)
188 | # keepers = data.notna().all(axis=1)
189 | # dropped_count = len(data) - keepers.sum()
190 |
191 | # if dropped_count > 0:
192 | # import warnings
193 | # warnings.warn(f"Dropped {dropped_count} observations that had missing data.")
194 | # data = data[keepers].copy()
195 |
196 | # # If drop all data, raise error
197 | # if len(data) == 0:
198 | # raise ValueError("All observations dropped due to missing data problems.")
199 |
200 | # # Add rowid column
201 | # if true_rep_cross_section:
202 | # data.loc[:, 'rowid'] = range(1, len(data) + 1) # 1-based indexing like R
203 | # idname = 'rowid'
204 | # else:
205 | # # Set rowid to idname for repeated cross section/unbalanced
206 | # if idname not in data.columns:
207 | # raise ValueError(f"Column {idname} not found in dataset")
208 | # data.loc[:, 'rowid'] = data[idname]
209 |
210 | # # Calculate n as unique number of cross section observations
211 | # n = data[idname].nunique()
212 |
213 | # data = data.sort_values([idname, tname])
214 | # data = data.assign(w1 = lambda x: x['w'] * 1)
215 | # # data.loc[:, ".w"] = data['w']
216 | # if len(glist) == 0:
217 | # raise f"No valid groups. The variable in '{gname}' should be expressed as the time a unit is first treated (0 if never-treated)."
218 | # if len(tlist) == 2:
219 | # cband = False
220 | # gsize = data.groupby(data[gname]).size().reset_index(name="count")
221 | # gsize["count"] /= len(tlist)
222 |
223 | # reqsize = n_cov + 5
224 | # gsize = gsize[gsize["count"] < reqsize]
225 |
226 | # if len(gsize) > 0:
227 | # gpaste = ",".join(map(str, gsize[gname]))
228 | # warnings.warn(f"Be aware that there are some small groups in your dataset.\n Check groups: {gpaste}.")
229 |
230 | # if 0 in gsize[gname].to_numpy() and control_group == "nevertreated":
231 | # raise "Never-treated group is too small, try setting control_group='notyettreated'."
232 | # nT, nG = map(len, [tlist, glist])
233 | # did_params = {
234 | # 'yname' : yname, 'tname': tname,
235 | # 'idname' : idname, 'gname': gname,
236 | # 'xformla': xformla, 'data': data,
237 | # 'tlist': tlist, 'glist': glist,
238 | # 'n': n, 'nG': nG, 'nT': nT,
239 | # 'control_group': control_group, 'anticipation': anticipation,
240 | # 'weights_name': weights_name, 'panel': panel,
241 | # 'true_rep_cross_section': true_rep_cross_section,
242 | # 'clustervars': clustervar
243 | # }
244 | # return did_params
245 |
246 |
247 | import pandas as pd, numpy as np
248 | import patsy
249 | from csdid.utils.bmisc import makeBalancedPanel
250 | import warnings
251 |
252 | fml = patsy.dmatrices
253 |
254 | def pre_process_did(yname, tname, idname, gname, data: pd.DataFrame,
255 | control_group = ['nevertreated', 'notyettreated'],
256 | anticipation = 0, xformla : str = None,
257 | panel = True, allow_unbalanced_panel = True, cband = False,
258 | clustervar = None, weights_name = None
259 | ) -> dict:
260 |
261 | n, t = data.shape
262 | control_group = control_group[0]
263 | columns = [idname, tname, yname, gname]
264 | control_group = "nevertreated"
265 | # print(columns)
266 | # Columns
267 | if clustervar is not None:
268 | columns += [clustervar]
269 | if weights_name is not None:
270 | columns += [weights_name]
271 | w = data[weights_name]
272 | else:
273 | w = np.ones(n)
274 |
275 |
276 | if xformla is None:
277 | xformla = f'{yname} ~ 1'
278 |
279 | # if xformla is None:
280 | try:
281 | _, x_cov = fml(xformla, data = data, return_type='dataframe')
282 | _, n_cov = x_cov.shape
283 | data = pd.concat([data[columns], x_cov], axis=1)
284 | data = data.assign(w = w)
285 | except:
286 | data = data.assign(intercept = 1)
287 | clms = columns + ['intercept']
288 | n_cov = len(data.columns)
289 | # patsy dont work with pyspark
290 | data = data[clms]
291 | if weights_name is None:
292 | data = data.assign(w = 1)
293 | else:
294 | data = data.assign(w = lambda x: x[weights_name] * 1)
295 |
296 |
297 | data = data.dropna()
298 | ndiff = n - len(data)
299 | if ndiff != 0:
300 | print(f'dropped, {ndiff}, rows from original data due to missing data')
301 | try:
302 |
303 | tlist = np.sort(data[tname].unique())
304 | glist = np.sort(data[gname].unique())
305 | except:
306 | tlist = np.sort(data[tname].unique().to_numpy())
307 | glist = np.sort(data[gname].unique().to_numpy())
308 |
309 | asif_nev_treated = data[gname] > np.max(tlist)
310 | asif_nev_treated.fillna(False, inplace=True)
311 | data.loc[asif_nev_treated, gname] = 0
312 |
313 | if len(glist[glist == 0]) == 0:
314 | if control_group == "nevertreated":
315 | raise ValueError("There is no available never-treated group")
316 | else:
317 | value = np.max(glist) - anticipation
318 | data = data.query(f'{tname} < @value')
319 | tlist = np.sort(data[tname].unique())
320 | glist = np.sort(data[gname].unique())
321 | glist = glist[glist < np.max(glist)]
322 |
323 | glist = glist[glist > 0]
324 | # first prerios
325 | fp = tlist[0]
326 | glist = glist[glist > fp + anticipation]
327 |
328 | treated_fp = (data[gname] <= fp) & ~(data[gname] == 0)
329 | treated_fp.fillna(False, inplace=True)
330 |
331 | try:
332 |
333 | nfirst_period = np.sum(treated_fp) if panel \
334 | else len(data.loc[treated_fp, idname].unique())
335 | except:
336 | nfirst_period = treated_fp.sum() if panel \
337 | else len(data.loc[treated_fp, idname].unique())
338 |
339 | if nfirst_period > 0:
340 | warning_message = f"Dropped {nfirst_period} units that were already treated in the first period."
341 | print(warning_message)
342 | glist_in = np.append(glist, [0])
343 | data = data.query(f'{gname} in @glist_in')
344 | tlist = np.sort(data[tname].unique())
345 | glist = np.sort(data[gname].unique())
346 | glist = glist[glist > 0]
347 | fp = tlist[0]
348 | glist = glist[glist > fp + anticipation]
349 |
350 | #todo: idname must be numeric
351 | true_rep_cross_section = False
352 | if not panel:
353 | true_rep_cross_section = True
354 |
355 | if panel:
356 | if allow_unbalanced_panel:
357 | panel = False
358 | true_rep_cross_section = False
359 | else:
360 | keepers = data.dropna().index
361 | n = len(data[idname].unique)
362 | print(n)
363 | n_keep = len(data.iloc[keepers, idname].unique())
364 |
365 | if len(data.loc[keepers] < len(data)):
366 | print(f"Dropped {n-n_keep} observations that had missing data.")
367 | data = data.loc[keepers]
368 | # make balanced data set
369 | n_old = len(data[idname].unique())
370 | data = makeBalancedPanel(data, idname=idname, tname=tname)
371 | n = len(data[idname].unique())
372 | if len(data) == 0:
373 | raise ValueError("All observations dropped to convert data to balanced panel. Consider setting `panel=False` and/or revisit 'idname'.")
374 | if n < n_old:
375 | warnings.warn(f"Dropped {n_old-n} observations while converting to balanced panel.")
376 | tn = tlist[0]
377 | n = len(data.query(f'{tname} == @tn'))
378 | # add rowid
379 | if not panel:
380 |
381 | keepers = data.dropna().index.to_numpy()
382 | ndiff = len(data.loc[keepers]) - len(data)
383 | if len(keepers) == 0:
384 | raise "All observations dropped due to missing data problems."
385 | if ndiff < 0:
386 | mssg = f"Dropped {ndiff} observations that had missing data."
387 | data = data.loc[keepers]
388 | if true_rep_cross_section:
389 | # fix: posible error
390 | data = data.assign(rowid = range(len(data)))
391 | idname = 'rowid'
392 | else:
393 | # r_id = np.array(data[idname])
394 | data = data.assign(rowid = lambda x: x[idname] * 1)
395 |
396 | n = len(data[idname].unique())
397 |
398 | data = data.sort_values([idname, tname])
399 | data = data.assign(w1 = lambda x: x['w'] * 1)
400 | # data.loc[:, ".w"] = data['w']
401 | if len(glist) == 0:
402 | raise f"No valid groups. The variable in '{gname}' should be expressed as the time a unit is first treated (0 if never-treated)."
403 | if len(tlist) == 2:
404 | cband = False
405 | gsize = data.groupby(data[gname]).size().reset_index(name="count")
406 | gsize["count"] /= len(tlist)
407 |
408 | reqsize = n_cov + 5
409 | gsize = gsize[gsize["count"] < reqsize]
410 |
411 | if len(gsize) > 0:
412 | gpaste = ",".join(map(str, gsize[gname]))
413 | warnings.warn(f"Be aware that there are some small groups in your dataset.\n Check groups: {gpaste}.")
414 |
415 | if 0 in gsize[gname].to_numpy() and control_group == "nevertreated":
416 | raise "Never-treated group is too small, try setting control_group='notyettreated'."
417 | nT, nG = map(len, [tlist, glist])
418 | did_params = {
419 | 'yname' : yname, 'tname': tname,
420 | 'idname' : idname, 'gname': gname,
421 | 'xformla': xformla, 'data': data,
422 | 'tlist': tlist, 'glist': glist,
423 | 'n': n, 'nG': nG, 'nT': nT,
424 | 'control_group': control_group, 'anticipation': anticipation,
425 | 'weights_name': weights_name, 'panel': panel,
426 | 'true_rep_cross_section': true_rep_cross_section,
427 | 'clustervars': clustervar
428 | }
429 | return did_params
430 |
--------------------------------------------------------------------------------
/csdid/plots/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2cml-ai/csdid/7ad721d4c384bca147ae16de84b7c5df8fe26959/csdid/plots/__init__.py
--------------------------------------------------------------------------------
/csdid/plots/gplot.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Sun Jun 11 16:17:07 2023
4 |
5 | @author: Carlos Guevara
6 | """
7 |
8 | import matplotlib.pyplot as plt
9 |
10 | # get_ipython().run_line_magic('matplotlib', 'qt') # To aopen separate window
11 | # get_ipython().run_line_magic('matplotlib', 'inline') # In line graph
12 |
13 | def gplot(ssresults, ax, ylim=None, xlab=None, ylab=None, title="Group", xgap=1,
14 | legend=True, ref_line=0, theming=True):
15 | if ylab is None:
16 | ylab = 'ATT'
17 |
18 | ssresults = ssresults[ssresults['year'].notnull()].copy()
19 | ssresults.loc[:, 'year'] = ssresults['year'].astype(int).astype(str)
20 |
21 | pre_points = ssresults.loc[ssresults['post'] == 0]
22 | post_points = ssresults.loc[ssresults['post'] == 1]
23 |
24 | ax.errorbar(pre_points['year'], pre_points['att'], yerr=pre_points['c']*pre_points['att_se'],
25 | fmt='o', markersize=5, color='#e87d72', ecolor='#e87d72', capsize=5, label='Pre')
26 |
27 | ax.errorbar(post_points['year'], post_points['att'], yerr=post_points['c']*post_points['att_se'],
28 | fmt='o', markersize=5, color='#56bcc2', ecolor='#56bcc2', capsize=5, label='Post')
29 |
30 | ax.set_ylim(ylim)
31 | ax.set_title(title)
32 | ax.set_xlabel(xlab)
33 | ax.set_ylabel(ylab)
34 |
35 | handles, labels = ax.get_legend_handles_labels()
36 |
37 | if ref_line is not None:
38 | ax.axhline(ref_line, linestyle='dashed', color='#1F1F1F')
39 | if theming:
40 | ax.set_facecolor('white')
41 | ax.set_title(title, color="#1F1F1F", fontweight="bold", fontsize=10)
42 | ax.spines['bottom'].set_color('#1F1F1F')
43 | ax.spines['left'].set_color('#1F1F1F')
44 | ax.tick_params(axis='x', colors='#1F1F1F')
45 | ax.tick_params(axis='y', colors='#1F1F1F')
46 | if not pre_points.empty and not post_points.empty:
47 | ax.legend(handles[0:2], labels[0:2], loc='lower center',fontsize='small', ncol=2, bbox_to_anchor=(0.5,-0.27))
48 | elif not pre_points.empty:
49 | ax.legend(handles[:1], labels[:1], loc='lower center',fontsize='small', ncol=2, bbox_to_anchor=(0.5,-0.27))
50 | elif not post_points.empty:
51 | ax.legend(handles[1:2], labels[1:2], loc='lower center',fontsize='small', ncol=2, bbox_to_anchor=(0.5,-0.27))
52 | if not legend:
53 | ax.legend().set_visible(False)
54 |
55 | return ax
56 |
57 |
58 | def splot(ssresults, ax, ylim=None, xlab=None, ylab=None, title="Group",
59 | legend=True, ref_line=0, theming=True):
60 |
61 | if xlab is None:
62 | xlab = 'Group'
63 | if ylab is None:
64 | ylab = 'ATT'
65 |
66 | ssresults['year'] = ssresults['year'].copy().astype(str)
67 |
68 | pre_points = ssresults.loc[ssresults['post'] == 0]
69 | post_points = ssresults.loc[ssresults['post'] == 1]
70 |
71 | ax.errorbar(pre_points['year'], pre_points['att'], yerr=pre_points['c']*pre_points['att_se'],
72 | fmt='o', markersize=5, color='#e87d72', ecolor='#e87d72', capsize=5, label='Pre')
73 |
74 | ax.errorbar(post_points['year'], post_points['att'], yerr=post_points['c']*post_points['att_se'],
75 | fmt='o', markersize=5, color='#56bcc2', ecolor='#56bcc2', capsize=5, label='Post')
76 |
77 | ax.set_xlabel(xlab)
78 | ax.set_ylabel(ylab)
79 | ax.set_title(title)
80 |
81 | handles, labels = ax.get_legend_handles_labels()
82 |
83 | if ylim is not None:
84 | ax.set_ylim(ylim)
85 |
86 | if ref_line is not None:
87 | ax.axhline(ref_line, linestyle='dashed', color='#1F1F1F')
88 |
89 | if theming:
90 | ax.set_facecolor('white')
91 | ax.set_title(title, color="#1F1F1F", fontweight="bold", fontsize=12)
92 | ax.spines['bottom'].set_color('#1F1F1F')
93 | ax.spines['left'].set_color('#1F1F1F')
94 | ax.tick_params(axis='x', colors='#1F1F1F')
95 | ax.tick_params(axis='y', colors='#1F1F1F')
96 | if not pre_points.empty and not post_points.empty:
97 | ax.legend(handles[0:2], labels[0:2], loc='lower center',fontsize='small', ncol=2, bbox_to_anchor=(0.5,-0.27))
98 | elif not pre_points.empty:
99 | ax.legend(handles[:1], labels[:1], loc='lower center',fontsize='small', ncol=2, bbox_to_anchor=(0.5,-0.27))
100 | elif not post_points.empty:
101 | ax.legend(handles[1:2], labels[1:2], loc='lower center',fontsize='small', ncol=2, bbox_to_anchor=(0.5,-0.27))
102 |
103 | if not legend:
104 | ax.legend().set_visible(False)
105 |
106 | return ax
--------------------------------------------------------------------------------
/csdid/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2cml-ai/csdid/7ad721d4c384bca147ae16de84b7c5df8fe26959/csdid/utils/__init__.py
--------------------------------------------------------------------------------
/csdid/utils/bmisc.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | def makeBalancedPanel(data, idname, tname):
3 | data = data.sort_values([idname, tname]).reset_index(drop = True)
4 | nt = len(data[tname].unique())
5 | data = data.groupby(idname)\
6 | .filter(lambda x: len(x) == nt)
7 | return data
8 |
9 |
10 | def panel2cs2(data: pd.DataFrame, yname, idname, tname):
11 | if len(data[tname].unique()) != 2:
12 | raise ValueError('panel2cs2 only for 2 perios of apnel data')
13 |
14 | data = data.sort_values([idname, tname])
15 | y1 = data.groupby(idname)[yname].shift(-1)
16 | y0 = data[yname]
17 | dy = y1 - y0
18 | data = data.assign(
19 | y1 = y1, y0 = y0, dy = dy
20 | )
21 | return data.dropna()
22 |
23 | # -*- coding: utf-8 -*-
24 | """
25 | Created on Wed May 31 18:58:35 2023
26 |
27 | @author: Carlos Guevara
28 | """
29 | import numpy as np
30 |
31 | def TorF(cond, use_isTRUE=False):
32 |
33 | if not isinstance(cond, np.ndarray) or cond.dtype != bool:
34 | raise ValueError("cond should be a logical vector")
35 | if use_isTRUE:
36 | cond = np.array([x is True for x in cond])
37 | else:
38 | cond[np.isnan(cond)] = False
39 | return cond
40 |
41 | def multiplier_bootstrap(inf_func, biters): # This function comes from c++
42 | n, K = inf_func.shape
43 | biters = int(biters)
44 | innerMat = np.zeros((n, K))
45 | Ub = np.zeros(n)
46 | outMat = np.zeros((biters,K))
47 |
48 | for b in range(biters):
49 | # draw Rademechar weights
50 | # Ub = ( np.ones(n) - 2 * np.round(np.random.rand(n)) )[:, np.newaxis]
51 | Ub = np.random.choice([1, -1], size=(n, 1))
52 | innerMat = inf_func * Ub
53 | outMat[b] = np.mean(innerMat, axis=0)
54 |
55 | return outMat
--------------------------------------------------------------------------------
/csdid/utils/mboot.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from scipy.stats import mstats, norm
3 | from joblib import Parallel, delayed
4 | import pandas as pd
5 |
6 | from csdid.utils.bmisc import multiplier_bootstrap
7 |
8 | def mboot(inf_func, DIDparams, pl=False, cores=1):
9 | # Setup needed variables
10 | data = DIDparams['data']
11 | idname = DIDparams['idname']
12 | clustervars = DIDparams['clustervars']
13 | biters = DIDparams['biters']
14 | tname = DIDparams['tname']
15 | try:
16 | tlist = np.sort(data[tname].unique())
17 | except:
18 | tlist = np.sort(data[tname].unique().to_numpy())
19 | alp = DIDparams['alp']
20 | panel = DIDparams['panel']
21 | true_repeated_cross_sections = DIDparams['true_repeated_cross_sections']
22 |
23 | # Get n observations (for clustering below)
24 | if panel:
25 | dta = data[ data[tname] == tlist[0] ]
26 | else:
27 | dta = data.copy()
28 |
29 | # Convert inf_func to matrix
30 | inf_func = np.asarray(inf_func)
31 |
32 | # Set correct number of units
33 | n = inf_func.shape[0]
34 |
35 | # Drop idname if it is in clustervars
36 | if clustervars is not None and idname in clustervars:
37 | clustervars.remove(idname)
38 |
39 | if clustervars is not None:
40 | if isinstance(clustervars, list) and isinstance(clustervars[0], str):
41 | raise ValueError("clustervars need to be the name of the clustering variable.")
42 |
43 | # We can only handle up to 2-way clustering
44 | if clustervars is not None and len(clustervars) > 1:
45 | raise ValueError("Can't handle that many cluster variables")
46 |
47 | if clustervars is not None:
48 | # Check that cluster variable does not vary over time within unit
49 | clust_tv = dta.groupby(idname)[clustervars[0]].nunique() == 1
50 | if not clust_tv.all():
51 | raise ValueError("Can't handle time-varying cluster variables")
52 | # clustervars='year'
53 | # Multiplier bootstrap
54 | n_clusters = n
55 | if not clustervars:
56 | bres = np.sqrt(n) * run_multiplier_bootstrap(inf_func, biters, pl, cores)
57 | else:
58 | n_clusters = len(data[clustervars].drop_duplicates())
59 | cluster = dta[[idname, clustervars]].drop_duplicates().values[:, 1]
60 | cluster_n = dta.groupby(cluster).size().values
61 | cluster_mean_if = pd.DataFrame(inf_func).groupby(cluster).sum().values / cluster_n
62 | bres = np.sqrt(n_clusters) * run_multiplier_bootstrap(cluster_mean_if, biters, pl, cores)
63 |
64 | # Handle vector and matrix case differently to get nxk matrix
65 | if isinstance(bres, np.ndarray) and bres.ndim == 1:
66 | bres = np.expand_dims(bres, axis=0)
67 | elif isinstance(bres, np.ndarray) and bres.ndim > 2:
68 | bres = bres.transpose()
69 |
70 | # Non-degenerate dimensions
71 | ndg_dim = np.logical_and(~np.isnan(np.sum(bres, axis=0)), np.sum(bres ** 2, axis=0) > np.sqrt(np.finfo(float).eps) * 10)
72 | bres = bres[:, ndg_dim]
73 |
74 | # Bootstrap variance matrix (this matrix can be defective because of degenerate cases)
75 | V = np.cov(bres, rowvar=False)
76 |
77 | # Bootstrap standard error
78 | quantile_75 = np.quantile(bres, 0.75, axis=0, method="inverted_cdf")
79 | quantile_25 = np.quantile(bres, 0.25, axis=0, method="inverted_cdf")
80 | qnorm_75 = norm.ppf(0.75)
81 | qnorm_25 = norm.ppf(0.25)
82 | bSigma = (quantile_75 - quantile_25) / (qnorm_75 - qnorm_25)
83 |
84 | # Critical value for uniform confidence band
85 | bT = np.max(np.abs(bres / bSigma), axis=1)
86 | bT = bT[np.isfinite(bT)]
87 | crit_val = np.quantile(bT, 1 - alp, method="inverted_cdf")
88 |
89 | # Standard error
90 | se = np.full(ndg_dim.shape, np.nan)
91 | se[ndg_dim] = bSigma / np.sqrt(n_clusters)
92 |
93 | return {'bres': bres, 'V': V, 'se': se, 'crit_val': crit_val}
94 |
95 | def run_multiplier_bootstrap(inf_func, biters, pl=False, cores=1):
96 | ngroups = int(np.ceil(biters / cores))
97 | chunks = [ngroups] * cores
98 | chunks[0] += biters - sum(chunks)
99 |
100 | n = inf_func.shape[0]
101 |
102 | def parallel_function(biters):
103 | return multiplier_bootstrap(inf_func, biters)
104 |
105 | if n > 2500 and pl and cores > 1:
106 | results = Parallel(n_jobs=cores)(
107 | delayed(parallel_function)(biters) for biters in chunks
108 | )
109 | results = np.vstack(results)
110 | else:
111 | results = multiplier_bootstrap(inf_func, biters)
112 |
113 | return results
114 |
115 |
116 |
--------------------------------------------------------------------------------
/figs/did_py.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2cml-ai/csdid/7ad721d4c384bca147ae16de84b7c5df8fe26959/figs/did_py.png
--------------------------------------------------------------------------------
/figs/did_r.drawio:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
--------------------------------------------------------------------------------
/figs/did_r.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2cml-ai/csdid/7ad721d4c384bca147ae16de84b7c5df8fe26959/figs/did_r.png
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # Difference in Difference in Python
2 |
3 | [](https://pypi.org/project/csdid/)
4 | [](https://pepy.tech/project/csdid)
5 | [](https://github.com/d2cml-ai/csdid/commits/main)
6 | [](https://github.com/d2cml-ai/csdid/stargazers)
7 | [](https://github.com/d2cml-ai/csdid/issues)
8 | [](https://github.com/d2cml-ai/csdid/blob/main/LICENSE)
9 |
10 |
11 | The **csdid** package contains tools for computing average treatment
12 | effect parameters in a Difference-in-Differences setup allowing for
13 |
14 | - More than two time periods
15 |
16 | - Variation in treatment timing (i.e., units can become treated at
17 | different points in time)
18 |
19 | - Treatment effect heterogeneity (i.e, the effect of participating in
20 | the treatment can vary across units and exhibit potentially complex
21 | dynamics, selection into treatment, or time effects)
22 |
23 | - The parallel trends assumption holds only after conditioning on
24 | covariates
25 |
26 | The main parameters are **group-time average treatment effects**. These
27 | are the average treatment effect for a particular group (group is
28 | defined by treatment timing) in a particular time period. These
29 | parameters are a natural generalization of the average treatment effect
30 | on the treated (ATT) which is identified in the textbook case with two
31 | periods and two groups to the case with multiple periods.
32 |
33 | Group-time average treatment effects are also natural building blocks
34 | for more aggregated treatment effect parameters such as overall
35 | treatment effects or event-study-type estimands.
36 |
37 | ## Getting Started
38 |
39 | There has been some recent work on DiD with multiple time periods. The
40 | **csdid** package implements the framework put forward in
41 |
42 | - [Callaway, Brantly and Pedro H.C. Sant’Anna.
43 | “Difference-in-Differences with Multiple Time Periods.” Journal of
44 | Econometrics, Vol. 225, No. 2, pp. 200-230,
45 | 2021.](https://doi.org/10.1016/j.jeconom.2020.12.001) or
46 | \[arXiv\](https://arxiv.org/abs/1803.09015
47 |
48 | This project is based on the original [did R
49 | package](https://github.com/bcallaway11/did).
50 |
51 | ## Instalation
52 |
53 | You can install **csdid** from `pypi` with:
54 |
55 | pip install csdid
56 |
57 | or via github:
58 |
59 | pip install git+https://github.com/d2cml-ai/csdid/
60 |
61 | ### Dependencies
62 |
63 | Additionally, I have created an additional library called `drdid`, which
64 | can be installed via GitHub.
65 |
66 | pip install git+https://github.com/d2cml-ai/DRDID
67 |
68 | ## Basic Example
69 |
70 | The following is a simplified example of the effect of states increasing
71 | their minimum wages on county-level teen employment rates which comes
72 | from [Callaway and Sant’Anna
73 | (2021)](https://authors.elsevier.com/a/1cFzc15Dji4pnC).
74 |
75 | - [More detailed examples are also
76 | available](https://bcallaway11.github.io/did/articles/did-basics.html)
77 |
78 | A subset of the data is available in the package and can be loaded by
79 |
80 | ``` python
81 | from csdid.att_gt import ATTgt
82 | import pandas as pd
83 | data = pd.read_csv("https://raw.githubusercontent.com/d2cml-ai/csdid/function-aggte/data/mpdta.csv")
84 | ```
85 |
86 | The dataset contains 500 observations of county-level teen employment
87 | rates from 2003-2007. Some states are first treated in 2004, some in
88 | 2006, and some in 2007 (see the paper for more details). The important
89 | variables in the dataset are
90 |
91 | - **lemp** This is the log of county-level teen employment. It is the
92 | outcome variable
93 |
94 | - **first.treat** This is the period when a state first increases its
95 | minimum wage. It can be 2004, 2006, or 2007. It is the variable that
96 | defines *group* in this application
97 |
98 | - **year** This is the year and is the *time* variable
99 |
100 | - **countyreal** This is an id number for each county and provides the
101 | individual identifier in this panel data context
102 |
103 | To estimate group-time average treatment effects, use the
104 | **ATTgt().fit()** method
105 |
106 | ``` python
107 | out = ATTgt(yname = "lemp",
108 | gname = "first.treat",
109 | idname = "countyreal",
110 | tname = "year",
111 | xformla = f"lemp~1",
112 | data = data,
113 | ).fit(est_method = 'dr')
114 | ```
115 |
116 | Summary table
117 |
118 | ``` python
119 | out.summ_attgt().summary2
120 | ```
121 |
122 |
123 |
134 |
135 | | | Group | Time | ATT(g, t) | Post | Std. Error | \[95% Pointwise | Conf. Band\] | |
136 | |-----|-------|------|-----------|------|------------|-----------------|--------------|-----|
137 | | 0 | 2004 | 2004 | -0.0105 | 0 | 0.0241 | -0.0781 | 0.0571 | |
138 | | 1 | 2004 | 2005 | -0.0704 | 0 | 0.0324 | -0.1612 | 0.0204 | |
139 | | 2 | 2004 | 2006 | -0.1373 | 0 | 0.0393 | -0.2476 | -0.0269 | \* |
140 | | 3 | 2004 | 2007 | -0.1008 | 0 | 0.0360 | -0.2017 | 0.0001 | |
141 | | 4 | 2006 | 2004 | 0.0065 | 0 | 0.0238 | -0.0601 | 0.0732 | |
142 | | 5 | 2006 | 2005 | -0.0028 | 0 | 0.0188 | -0.0554 | 0.0499 | |
143 | | 6 | 2006 | 2006 | -0.0046 | 0 | 0.0172 | -0.0528 | 0.0437 | |
144 | | 7 | 2006 | 2007 | -0.0412 | 0 | 0.0201 | -0.0976 | 0.0152 | |
145 | | 8 | 2007 | 2004 | 0.0305 | 0 | 0.0147 | -0.0108 | 0.0719 | |
146 | | 9 | 2007 | 2005 | -0.0027 | 0 | 0.0160 | -0.0476 | 0.0421 | |
147 | | 10 | 2007 | 2006 | -0.0311 | 0 | 0.0173 | -0.0796 | 0.0174 | |
148 | | 11 | 2007 | 2007 | -0.0261 | 0 | 0.0171 | -0.0740 | 0.0219 | |
149 |
150 |
151 |
152 | In the graphs, a semicolon `;` should be added to prevent printing the
153 | class and the graph information.
154 |
155 | ``` python
156 | out.plot_attgt();
157 | ```
158 |
159 | /home/runner/work/csdid/csdid/csdid/plots/gplot.py:19: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '['2004' '2005' '2006' '2007']' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
160 | ssresults.loc[:, 'year'] = ssresults['year'].astype(int).astype(str)
161 | /home/runner/work/csdid/csdid/csdid/plots/gplot.py:19: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '['2004' '2005' '2006' '2007']' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
162 | ssresults.loc[:, 'year'] = ssresults['year'].astype(int).astype(str)
163 | /home/runner/work/csdid/csdid/csdid/plots/gplot.py:19: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '['2004' '2005' '2006' '2007']' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
164 | ssresults.loc[:, 'year'] = ssresults['year'].astype(int).astype(str)
165 |
166 | 
167 |
168 | ``` python
169 | out.aggte(typec='calendar');
170 | ```
171 |
172 |
173 |
174 | Overall summary of ATT's based on calendar time aggregation:
175 | ATT Std. Error [95.0% Conf. Int.]
176 | -0.0417 0.0169 -0.0748 -0.0086 *
177 |
178 |
179 | Time Effects (calendar):
180 | Time Estimate Std. Error [95.0% Simult. Conf. Band
181 | 0 2004 -0.0105 0.0244 -0.0584 0.0374
182 | 1 2005 -0.0704 0.0307 -0.1305 -0.0103 *
183 | 2 2006 -0.0488 0.0210 -0.0900 -0.0076 *
184 | 3 2007 -0.0371 0.0136 -0.0637 -0.0105 *
185 | ---
186 | Signif. codes: `*' confidence band does not cover 0
187 | Control Group: Never Treated ,
188 | Anticipation Periods: 0
189 | Estimation Method: Doubly Robust
190 |
191 | ``` python
192 | out.plot_aggte();
193 | ```
194 |
195 | /home/runner/work/csdid/csdid/csdid/plots/gplot.py:19: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '['2004' '2005' '2006' '2007']' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
196 | ssresults.loc[:, 'year'] = ssresults['year'].astype(int).astype(str)
197 |
198 | 
199 |
200 |
201 | **Event Studies**
202 |
203 | Although in the current example it is pretty easy to directly interpret
204 | the group-time average treatment effects, there are many cases where it
205 | is convenient to aggregate the group-time average treatment effects into
206 | a small number of parameters. A main type of aggregation is into an
207 | *event study* plot.
208 |
209 | To make an event study plot in the **csdid** package, one can use the
210 | **aggte** function with **dynamic** option
211 |
212 | ``` python
213 | out.aggte(typec='dynamic');
214 | ```
215 |
216 |
217 |
218 | Overall summary of ATT's based on event-study/dynamic aggregation:
219 | ATT Std. Error [95.0% Conf. Int.]
220 | -0.0772 0.0207 -0.1179 -0.0366 *
221 |
222 |
223 | Dynamic Effects:
224 | Event time Estimate Std. Error [95.0% Simult. Conf. Band
225 | 0 -3 0.0305 0.0146 0.0019 0.0591 *
226 | 1 -2 -0.0006 0.0129 -0.0259 0.0248
227 | 2 -1 -0.0245 0.0141 -0.0521 0.0032
228 | 3 0 -0.0199 0.0117 -0.0428 0.0030
229 | 4 1 -0.0510 0.0154 -0.0811 -0.0208 *
230 | 5 2 -0.1373 0.0366 -0.2091 -0.0655 *
231 | 6 3 -0.1008 0.0337 -0.1669 -0.0347 *
232 | ---
233 | Signif. codes: `*' confidence band does not cover 0
234 | Control Group: Never Treated ,
235 | Anticipation Periods: 0
236 | Estimation Method: Doubly Robust
237 |
238 | The column `event time` is for each group relative to when they first
239 | participate in the treatment. To give some examples, `event time=0`
240 | corresponds to the *on impact* effect, and `event time=-1` is the
241 | *effect* in the period before a unit becomes treated (checking that this
242 | is equal to 0 is potentially useful as a pre-test).
243 |
244 | To plot the event study, use **plot_aggte** method
245 | ``` python
246 | out.plot_aggte();
247 | ```
248 |
249 | /home/runner/work/csdid/csdid/csdid/plots/gplot.py:19: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '['2004' '2005' '2006' '2007']' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
250 | ssresults.loc[:, 'year'] = ssresults['year'].astype(int).astype(str)
251 |
252 | 
253 |
254 | The figure here is very similar to the group-time average treatment
255 | effects. Red dots are pre-treatment periods, blue dots are
256 | post-treatment periods. The difference is that the x-axis is in event
257 | time.
258 |
259 | **Overall Effect of Participating in the Treatment**
260 |
261 | The event study above reported an overall effect of participating in the
262 | treatment. This was computed by averaging the average effects computed
263 | at each length of exposure.
264 |
265 | In many cases, a more general purpose overall treatment effect parameter
266 | is give by computing the average treatment effect for each group, and
267 | then averaging across groups. This sort of procedure provides an average
268 | treatment effect parameter with a very similar interpretation to the
269 | Average Treatment Effect on the Treated (ATT) in the two period and two
270 | group case.
271 |
272 | To compute this overall average treatment effect parameter, use
273 |
274 |
275 | ``` python
276 | out.aggte(typec='group');
277 | ```
278 |
279 |
280 |
281 | Overall summary of ATT's based on group/cohort aggregation:
282 | ATT Std. Error [95.0% Conf. Int.]
283 | -0.031 0.0124 -0.0553 -0.0067 *
284 |
285 |
286 | Group Effects:
287 | Group Estimate Std. Error [95.0% Simult. Conf. Band
288 | 0 2004 -0.0797 0.0301 -0.1387 -0.0208 *
289 | 1 2006 -0.0229 0.0172 -0.0567 0.0109
290 | 2 2007 -0.0261 0.0174 -0.0601 0.0080
291 | ---
292 | Signif. codes: `*' confidence band does not cover 0
293 | Control Group: Never Treated ,
294 | Anticipation Periods: 0
295 | Estimation Method: Doubly Robust
296 |
297 | Of particular interest is the `Overall ATT` in the results. Here, we
298 | estimate that increasing the minimum wage decreased teen employment by
299 | 3.1% and the effect is marginally statistically significant.
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas
2 | numpy==1.24.3
3 | scipy
4 | patsy
5 | plotnine
6 | statsmodels
7 | ipykernel
8 | joblib
9 | twine
10 | pytest
11 | rpy2==3.5.15
12 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | # with open('requirements.txt') as f:
4 | # required = f.read().splitlines()
5 | # print(required)
6 | from csdid._version import __version
7 | print(__version)
8 |
9 | setup(
10 | name = 'csdid',
11 | version=__version,
12 | url='https://github.com/d2cml-ai/csdid',
13 | author='D2CML Team, Alexander Quispe, Carlos Guevara, Jhon Flroes',
14 | keywords=['Causal inference', 'Research'],
15 | license="MIT",
16 | description='Difference in Difference in Python',
17 | classifiers=[
18 | "Intended Audience :: Developers",
19 | "Intended Audience :: Education",
20 | "Intended Audience :: Science/Research",
21 | "License :: OSI Approved :: Apache Software License",
22 | "Topic :: Scientific/Engineering",
23 | ],
24 | install_requires=[
25 | 'pandas',
26 | 'numpy<=1.24.3',
27 | 'scipy',
28 | 'patsy',
29 | 'plotnine',
30 | 'twine',
31 | 'joblib',
32 | 'drdid @ git+https://github.com/d2cml-ai/DRDID'
33 | ],
34 | packages=find_packages(),
35 | package_data={
36 | 'data': ['data/*'],
37 | 'configs': ['configs/*']
38 | }
39 | )
40 |
--------------------------------------------------------------------------------
/test/basic.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import yaml, pandas as pd
4 |
5 | with open('configs/data.yml') as f:
6 | dt = yaml.safe_load(f)
7 |
8 | data = pd.read_csv(dt['mpdata'])
9 |
10 |
11 | yname = "lemp"
12 | gname = "first.treat"
13 | idname = "countyreal"
14 | tname = "year"
15 | xformla = f"lemp~1"
16 |
17 | from csdid.attgt_fnc import preprocess_did
18 | from csdid.attgt_fnc import compute_att_gt
19 |
20 | dp = preprocess_did(yname, tname, idname, gname, data = data, xformla=xformla)
21 |
22 |
23 | # data = mpdta
24 | # print(data)
25 | # print(tname)
26 |
27 | # from csdid.att_gt import ATTgt
28 |
29 | # b = ATTgt(yname, tname, idname, gname, data = data, xformla=xformla).fit()
30 | # c = b.summ_attgt(n = 12).summary2
31 |
32 | # # print(dir(b))
33 | # # print(b.MP)
34 | # # print(b.dp)
35 | # # print(b.results)
36 |
37 | # # print(c)
38 |
39 | # c = b.aggte(balance_e=1)
40 | # print(c)
41 | # b.sdplot()
42 | # b.dplto()
43 | # algo()
--------------------------------------------------------------------------------
/test/test_vs_r.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import numpy as np
4 | import pandas as pd
5 | import pytest
6 | import rpy2.robjects as ro
7 | from rpy2.robjects import pandas2ri
8 |
9 | # rpy2 imports
10 | from rpy2.robjects.packages import importr
11 | from csdid.att_gt import ATTgt
12 |
13 |
14 | pandas2ri.activate()
15 |
16 | did = importr("did")
17 |
18 | @pytest.fixture
19 | def data():
20 |
21 | return pd.read_csv("https://raw.githubusercontent.com/d2cml-ai/csdid/function-aggte/data/mpdta.csv")
22 |
23 | def check_absolute_diff(x1, x2, tol, msg=None):
24 | msg = "" if msg is None else msg
25 | assert np.all(np.abs(x1 - x2) < tol), msg
26 |
27 | def check_relative_diff(x1, x2, tol, msg=None):
28 | msg = "" if msg is None else msg
29 | assert np.all(np.abs(x1 - x2) / np.abs(x1) < tol), msg
30 |
31 | def test_ate(data):
32 |
33 | "Test simple ATE via Py vs R."
34 |
35 | py_did = ATTgt(
36 | yname = "lemp",
37 | gname = "first.treat",
38 | idname = "countyreal",
39 | tname = "year",
40 | data = data,
41 | biters = 20_000,
42 | ).fit(est_method = 'dr')
43 |
44 | py_res = py_did.aggte("simple")
45 | py_coef = py_res.atte.get("overall_att")
46 | py_se = py_res.atte.get("overall_se")
47 |
48 | r_did = did.att_gt(
49 | yname = "lemp",
50 | gname = "first.treat",
51 | idname = "countyreal",
52 | tname = "year",
53 | data = data,
54 | biters = 20_000
55 | )
56 |
57 | r_coef = did.aggte(r_did, type = "simple").rx2('overall.att')
58 | r_se = did.aggte(r_did, type = "simple").rx2('overall.se')
59 |
60 | check_absolute_diff(py_coef, r_coef, 1e-8, "ATEs are not equal.")
61 | check_relative_diff(py_se, r_se, 0.01, "SEs are not equal.")
62 |
--------------------------------------------------------------------------------