├── .gitattributes
├── .github
    └── workflows
    │   ├── ci-tests.yml
    │   ├── gen_readme.yml
    │   └── pypi.yml
├── .gitignore
├── .vscode
    └── settings.json
├── LICENSE
├── Pipfile
├── Pipfile.lock
├── README.md
├── README.qmd
├── README_files
    └── figure-commonmark
    │   ├── cell-5-output-1.png
    │   ├── cell-5-output-2.png
    │   ├── cell-7-output-1.png
    │   ├── cell-7-output-2.png
    │   └── cell-8-output-2.png
├── configs
    └── data.yml
├── csdid
    ├── __init__.py
    ├── _version.py
    ├── aggte_fnc
    │   ├── __init__.py
    │   ├── aggte.py
    │   ├── compute_aggte.py
    │   └── utils.py
    ├── att_gt.py
    ├── attgt_fnc
    │   ├── __init__.py
    │   ├── compute_att_gt.py
    │   └── preprocess_did.py
    ├── plots
    │   ├── __init__.py
    │   └── gplot.py
    └── utils
    │   ├── __init__.py
    │   ├── bmisc.py
    │   └── mboot.py
├── data
    ├── dta.csv
    ├── mpdta.csv
    └── sim_data.csv
├── figs
    ├── did_py.drawio
    ├── did_py.png
    ├── did_r.drawio
    └── did_r.png
├── readme.md
├── requirements.txt
├── setup.py
└── test
    ├── basic.py
    └── test_vs_r.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb export-ignore
2 | 


--------------------------------------------------------------------------------
/.github/workflows/ci-tests.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 | 
11 | jobs:
12 |   test:
13 |     name: "Test vs R::did"
14 |     runs-on: ${{ matrix.os }}
15 |     strategy:
16 |       fail-fast: false
17 |       matrix:
18 |         os: ["ubuntu-latest"]
19 |         python-version: ["3.8"]
20 |         pytest_opts: ["--workers 4 --tests-per-worker 1"]
21 | 
22 |     steps:
23 |       - name: Checkout source
24 |         uses: actions/checkout@v4
25 |       - name: Setup python
26 |         uses: actions/setup-python@v5
27 |         with:
28 |           python-version: ${{ matrix.python-version }}
29 |           architecture: x64
30 | 
31 |       - name: Install Dependencies
32 |         run: pip install -r requirements.txt; pip install git+https://github.com/d2cml-ai/DRDID
33 | 
34 |       - name: Install package
35 |         run: pip install .
36 | 
37 |       - name: Setup r2u
38 |         uses: eddelbuettel/github-actions/r2u-setup@master
39 | 
40 |       - name: install R packages
41 |         run: Rscript -e 'install.packages(c("did"))'
42 |         shell: bash
43 | 
44 |       - name: Run tests
45 |         run: |
46 |           pytest test/test_vs_r.py
47 | 
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/.github/workflows/gen_readme.yml:
--------------------------------------------------------------------------------
 1 | name: Actualizar Readme
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |     # - cron: "40 15 * * 1,3,5"
 8 | 
 9 | jobs:
10 |   CreateQuarto:
11 |     # if: ${{ github.event.workflow_run.conclusion == 'success' }}
12 |     runs-on: ubuntu-latest
13 |     permissions:
14 |       contents: write
15 |     steps:
16 |       - name: Chequear el  código
17 |         uses: actions/checkout@v4
18 | 
19 |       - name: Set up Quarto
20 |         uses: quarto-dev/quarto-actions/setup@v2
21 | 
22 |       - name: Configurar Python
23 |         uses: actions/setup-python@v5
24 |         with:
25 |           python-version: "3.10"
26 |           cache: "pip"
27 |       - run: pip uninstall csdid; pip install git+https://github.com/d2cml-ai/csdid/; pip install nbclient nbformat PyYAML; pip install git+https://github.com/d2cml-ai/DRDID
28 |       - name: Install Dependencies
29 |         run: pip install -r requirements.txt
30 | 
31 |       - name: Generar quarto doc
32 |         run: quarto render README.qmd
33 | 
34 |       - id: commit
35 |         name: Commit  files
36 |         run: |
37 |           git config --local user.name "action-user"
38 |           git pull
39 |           git config --local user.email "actions@github.com"
40 |           git add -A
41 |           git commit -m "Update Readme"
42 |           git push origin main
43 |         env:
44 |           REPO_KEY: ${{ secrets.GITHUB_TOKEN }}
45 |           username: github-actions
46 | 


--------------------------------------------------------------------------------
/.github/workflows/pypi.yml:
--------------------------------------------------------------------------------
 1 | name: Publish to PyPI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   build:
10 |     runs-on: ubuntu-latest
11 |     env:
12 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
13 | 
14 |     steps:
15 |       - uses: actions/checkout@v2
16 | 
17 |       - id: Python_pip
18 |         name: Python dependencies
19 |         uses: actions/setup-python@v2
20 |         with:
21 |           python-version: "3.8"
22 |           cache: "pip"
23 |       - run: pip install -r requirements.txt
24 | 
25 |       - name: Build Package
26 |         run: |
27 |           python setup.py sdist
28 | 
29 |       - name: Upload to PyPi
30 |         env:
31 |           TWINE_USERNAME: ${{ secrets.PYPI_USER}}
32 |           TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
33 |         run: |
34 |           sudo apt-get install tree
35 |           tree /home/runner/work/csdid
36 |           twine upload dist/*
37 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *__pycache__
 2 | .Rproj.user
 3 | methods_r/
 4 | *Rproj
 5 | 
 6 | dist/
 7 | *info
 8 | *html
 9 | test/plot_files
10 | .DS_Store
11 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "editor.tabSize": 2,
 3 |   "editor.insertSpaces": true,
 4 |   "python.linting.pylintEnabled": true,
 5 |   "python.linting.enabled": false,
 6 |   "[python]": {
 7 |     "editor.defaultFormatter": "ms-python.python"
 8 |   },
 9 |   "python.formatting.provider": "none"
10 |   // "python.formatting.provider": "autopep8",
11 |   // "[python]": {
12 |   //   "editor.defaultFormatter": "ms-python.python"
13 |   // },
14 |   // "python.formatting.autopep8Args": ["--indent-size", "2"]
15 | }
16 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Pedro Sant’Anna, Brantly Callaway, Alexander Quispe, Carlos Guevara, Jhon Flores
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | url = "https://pypi.org/simple"
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | 
 6 | [packages]
 7 | pandas = "*"
 8 | numpy = "<=1.24.3"
 9 | scipy = "*"
10 | patsy = "*"
11 | plotnine = "*"
12 | 
13 | [dev-packages]
14 | 
15 | [requires]
16 | python_version = "3.8"
17 | 


--------------------------------------------------------------------------------
/Pipfile.lock:
--------------------------------------------------------------------------------
  1 | {
  2 |     "_meta": {
  3 |         "hash": {
  4 |             "sha256": "b71b63262874e0de6cb1ba93dd3fc02b136b8229fe98b1c1a4ed640eb68c6a7e"
  5 |         },
  6 |         "pipfile-spec": 6,
  7 |         "requires": {
  8 |             "python_version": "3.8"
  9 |         },
 10 |         "sources": [
 11 |             {
 12 |                 "name": "pypi",
 13 |                 "url": "https://pypi.org/simple",
 14 |                 "verify_ssl": true
 15 |             }
 16 |         ]
 17 |     },
 18 |     "default": {
 19 |         "backports.zoneinfo": {
 20 |             "hashes": [
 21 |                 "sha256:17746bd546106fa389c51dbea67c8b7c8f0d14b5526a579ca6ccf5ed72c526cf",
 22 |                 "sha256:1b13e654a55cd45672cb54ed12148cd33628f672548f373963b0bff67b217328",
 23 |                 "sha256:1c5742112073a563c81f786e77514969acb58649bcdf6cdf0b4ed31a348d4546",
 24 |                 "sha256:4a0f800587060bf8880f954dbef70de6c11bbe59c673c3d818921f042f9954a6",
 25 |                 "sha256:5c144945a7752ca544b4b78c8c41544cdfaf9786f25fe5ffb10e838e19a27570",
 26 |                 "sha256:7b0a64cda4145548fed9efc10322770f929b944ce5cee6c0dfe0c87bf4c0c8c9",
 27 |                 "sha256:8439c030a11780786a2002261569bdf362264f605dfa4d65090b64b05c9f79a7",
 28 |                 "sha256:8961c0f32cd0336fb8e8ead11a1f8cd99ec07145ec2931122faaac1c8f7fd987",
 29 |                 "sha256:89a48c0d158a3cc3f654da4c2de1ceba85263fafb861b98b59040a5086259722",
 30 |                 "sha256:a76b38c52400b762e48131494ba26be363491ac4f9a04c1b7e92483d169f6582",
 31 |                 "sha256:da6013fd84a690242c310d77ddb8441a559e9cb3d3d59ebac9aca1a57b2e18bc",
 32 |                 "sha256:e55b384612d93be96506932a786bbcde5a2db7a9e6a4bb4bffe8b733f5b9036b",
 33 |                 "sha256:e81b76cace8eda1fca50e345242ba977f9be6ae3945af8d46326d776b4cf78d1",
 34 |                 "sha256:e8236383a20872c0cdf5a62b554b27538db7fa1bbec52429d8d106effbaeca08",
 35 |                 "sha256:f04e857b59d9d1ccc39ce2da1021d196e47234873820cbeaad210724b1ee28ac",
 36 |                 "sha256:fadbfe37f74051d024037f223b8e001611eac868b5c5b06144ef4d8b799862f2"
 37 |             ],
 38 |             "markers": "python_version < '3.9'",
 39 |             "version": "==0.2.1"
 40 |         },
 41 |         "contourpy": {
 42 |             "hashes": [
 43 |                 "sha256:052cc634bf903c604ef1a00a5aa093c54f81a2612faedaa43295809ffdde885e",
 44 |                 "sha256:084eaa568400cfaf7179b847ac871582199b1b44d5699198e9602ecbbb5f6104",
 45 |                 "sha256:0b6616375d7de55797d7a66ee7d087efe27f03d336c27cf1f32c02b8c1a5ac70",
 46 |                 "sha256:0b7b04ed0961647691cfe5d82115dd072af7ce8846d31a5fac6c142dcce8b882",
 47 |                 "sha256:143dde50520a9f90e4a2703f367cf8ec96a73042b72e68fcd184e1279962eb6f",
 48 |                 "sha256:17cfaf5ec9862bc93af1ec1f302457371c34e688fbd381f4035a06cd47324f48",
 49 |                 "sha256:181cbace49874f4358e2929aaf7ba84006acb76694102e88dd15af861996c16e",
 50 |                 "sha256:189ceb1525eb0655ab8487a9a9c41f42a73ba52d6789754788d1883fb06b2d8a",
 51 |                 "sha256:18a64814ae7bce73925131381603fff0116e2df25230dfc80d6d690aa6e20b37",
 52 |                 "sha256:1f0cbd657e9bde94cd0e33aa7df94fb73c1ab7799378d3b3f902eb8eb2e04a3a",
 53 |                 "sha256:1f795597073b09d631782e7245016a4323cf1cf0b4e06eef7ea6627e06a37ff2",
 54 |                 "sha256:25ae46595e22f93592d39a7eac3d638cda552c3e1160255258b695f7b58e5655",
 55 |                 "sha256:27bc79200c742f9746d7dd51a734ee326a292d77e7d94c8af6e08d1e6c15d545",
 56 |                 "sha256:2b836d22bd2c7bb2700348e4521b25e077255ebb6ab68e351ab5aa91ca27e027",
 57 |                 "sha256:30f511c05fab7f12e0b1b7730ebdc2ec8deedcfb505bc27eb570ff47c51a8f15",
 58 |                 "sha256:317267d915490d1e84577924bd61ba71bf8681a30e0d6c545f577363157e5e94",
 59 |                 "sha256:397b0ac8a12880412da3551a8cb5a187d3298a72802b45a3bd1805e204ad8439",
 60 |                 "sha256:438ba416d02f82b692e371858143970ed2eb6337d9cdbbede0d8ad9f3d7dd17d",
 61 |                 "sha256:53cc3a40635abedbec7f1bde60f8c189c49e84ac180c665f2cd7c162cc454baa",
 62 |                 "sha256:5d123a5bc63cd34c27ff9c7ac1cd978909e9c71da12e05be0231c608048bb2ae",
 63 |                 "sha256:62013a2cf68abc80dadfd2307299bfa8f5aa0dcaec5b2954caeb5fa094171103",
 64 |                 "sha256:89f06eff3ce2f4b3eb24c1055a26981bffe4e7264acd86f15b97e40530b794bc",
 65 |                 "sha256:90c81f22b4f572f8a2110b0b741bb64e5a6427e0a198b2cdc1fbaf85f352a3aa",
 66 |                 "sha256:911ff4fd53e26b019f898f32db0d4956c9d227d51338fb3b03ec72ff0084ee5f",
 67 |                 "sha256:9382a1c0bc46230fb881c36229bfa23d8c303b889b788b939365578d762b5c18",
 68 |                 "sha256:9f2931ed4741f98f74b410b16e5213f71dcccee67518970c42f64153ea9313b9",
 69 |                 "sha256:a67259c2b493b00e5a4d0f7bfae51fb4b3371395e47d079a4446e9b0f4d70e76",
 70 |                 "sha256:a698c6a7a432789e587168573a864a7ea374c6be8d4f31f9d87c001d5a843493",
 71 |                 "sha256:bc00bb4225d57bff7ebb634646c0ee2a1298402ec10a5fe7af79df9a51c1bfd9",
 72 |                 "sha256:bcb41692aa09aeb19c7c213411854402f29f6613845ad2453d30bf421fe68fed",
 73 |                 "sha256:d4f26b25b4f86087e7d75e63212756c38546e70f2a92d2be44f80114826e1cd4",
 74 |                 "sha256:d551f3a442655f3dcc1285723f9acd646ca5858834efeab4598d706206b09c9f",
 75 |                 "sha256:dffcc2ddec1782dd2f2ce1ef16f070861af4fb78c69862ce0aab801495dda6a3",
 76 |                 "sha256:e53046c3863828d21d531cc3b53786e6580eb1ba02477e8681009b6aa0870b21",
 77 |                 "sha256:e5cec36c5090e75a9ac9dbd0ff4a8cf7cecd60f1b6dc23a374c7d980a1cd710e",
 78 |                 "sha256:e7a117ce7df5a938fe035cad481b0189049e8d92433b4b33aa7fc609344aafa1",
 79 |                 "sha256:e94bef2580e25b5fdb183bf98a2faa2adc5b638736b2c0a4da98691da641316a",
 80 |                 "sha256:ed614aea8462735e7d70141374bd7650afd1c3f3cb0c2dbbcbe44e14331bf002",
 81 |                 "sha256:fb3b7d9e6243bfa1efb93ccfe64ec610d85cfe5aec2c25f97fbbd2e58b531256"
 82 |             ],
 83 |             "markers": "python_version >= '3.8'",
 84 |             "version": "==1.1.0"
 85 |         },
 86 |         "cycler": {
 87 |             "hashes": [
 88 |                 "sha256:3a27e95f763a428a739d2add979fa7494c912a32c17c4c38c4d5f082cad165a3",
 89 |                 "sha256:9c87405839a19696e837b3b818fed3f5f69f16f1eec1a1ad77e043dcea9c772f"
 90 |             ],
 91 |             "markers": "python_version >= '3.6'",
 92 |             "version": "==0.11.0"
 93 |         },
 94 |         "fonttools": {
 95 |             "hashes": [
 96 |                 "sha256:00ab569b2a3e591e00425023ade87e8fef90380c1dde61be7691cb524ca5f743",
 97 |                 "sha256:022c4a16b412293e7f1ce21b8bab7a6f9d12c4ffdf171fdc67122baddb973069",
 98 |                 "sha256:05171f3c546f64d78569f10adc0de72561882352cac39ec7439af12304d8d8c0",
 99 |                 "sha256:14037c31138fbd21847ad5e5441dfdde003e0a8f3feb5812a1a21fd1c255ffbd",
100 |                 "sha256:15abb3d055c1b2dff9ce376b6c3db10777cb74b37b52b78f61657634fd348a0d",
101 |                 "sha256:18ea64ac43e94c9e0c23d7a9475f1026be0e25b10dda8f236fc956188761df97",
102 |                 "sha256:1a003608400dd1cca3e089e8c94973c6b51a4fb1ef00ff6d7641617b9242e637",
103 |                 "sha256:1bc4c5b147be8dbc5df9cc8ac5e93ee914ad030fe2a201cc8f02f499db71011d",
104 |                 "sha256:200729d12461e2038700d31f0d49ad5a7b55855dec7525074979a06b46f88505",
105 |                 "sha256:337b6e83d7ee73c40ea62407f2ce03b07c3459e213b6f332b94a69923b9e1cb9",
106 |                 "sha256:37467cee0f32cada2ec08bc16c9c31f9b53ea54b2f5604bf25a1246b5f50593a",
107 |                 "sha256:425b74a608427499b0e45e433c34ddc350820b6f25b7c8761963a08145157a66",
108 |                 "sha256:530c5d35109f3e0cea2535742d6a3bc99c0786cf0cbd7bb2dc9212387f0d908c",
109 |                 "sha256:56d4d85f5374b45b08d2f928517d1e313ea71b4847240398decd0ab3ebbca885",
110 |                 "sha256:5e00334c66f4e83535384cb5339526d01d02d77f142c23b2f97bd6a4f585497a",
111 |                 "sha256:5fdf60f8a5c6bcce7d024a33f7e4bc7921f5b74e8ea13bccd204f2c8b86f3470",
112 |                 "sha256:6a8d71b9a5c884c72741868e845c0e563c5d83dcaf10bb0ceeec3b4b2eb14c67",
113 |                 "sha256:6d5adf4ba114f028fc3f5317a221fd8b0f4ef7a2e5524a2b1e0fd891b093791a",
114 |                 "sha256:7449e5e306f3a930a8944c85d0cbc8429cba13503372a1a40f23124d6fb09b58",
115 |                 "sha256:7961575221e3da0841c75da53833272c520000d76f7f71274dbf43370f8a1065",
116 |                 "sha256:7f6e3fa3da923063c286320e728ba2270e49c73386e3a711aa680f4b0747d692",
117 |                 "sha256:882983279bf39afe4e945109772c2ffad2be2c90983d6559af8b75c19845a80a",
118 |                 "sha256:8a917828dbfdb1cbe50cf40eeae6fbf9c41aef9e535649ed8f4982b2ef65c091",
119 |                 "sha256:8c4305b171b61040b1ee75d18f9baafe58bd3b798d1670078efe2c92436bfb63",
120 |                 "sha256:91784e21a1a085fac07c6a407564f4a77feb471b5954c9ee55a4f9165151f6c1",
121 |                 "sha256:94c915f6716589f78bc00fbc14c5b8de65cfd11ee335d32504f1ef234524cb24",
122 |                 "sha256:97d95b8301b62bdece1af943b88bcb3680fd385f88346a4a899ee145913b414a",
123 |                 "sha256:a954b90d1473c85a22ecf305761d9fd89da93bbd31dae86e7dea436ad2cb5dc9",
124 |                 "sha256:aa83b3f151bc63970f39b2b42a06097c5a22fd7ed9f7ba008e618de4503d3895",
125 |                 "sha256:b802dcbf9bcff74672f292b2466f6589ab8736ce4dcf36f48eb994c2847c4b30",
126 |                 "sha256:bae8c13abbc2511e9a855d2142c0ab01178dd66b1a665798f357da0d06253e0d",
127 |                 "sha256:c55f1b4109dbc3aeb496677b3e636d55ef46dc078c2a5e3f3db4e90f1c6d2907",
128 |                 "sha256:eb52c10fda31159c22c7ed85074e05f8b97da8773ea461706c273e31bcbea836",
129 |                 "sha256:ec468c022d09f1817c691cf884feb1030ef6f1e93e3ea6831b0d8144c06480d1"
130 |             ],
131 |             "markers": "python_version >= '3.8'",
132 |             "version": "==4.40.0"
133 |         },
134 |         "importlib-resources": {
135 |             "hashes": [
136 |                 "sha256:4be82589bf5c1d7999aedf2a45159d10cb3ca4f19b2271f8792bc8e6da7b22f6",
137 |                 "sha256:7b1deeebbf351c7578e09bf2f63fa2ce8b5ffec296e0d349139d43cca061a81a"
138 |             ],
139 |             "markers": "python_version < '3.10'",
140 |             "version": "==5.12.0"
141 |         },
142 |         "kiwisolver": {
143 |             "hashes": [
144 |                 "sha256:02f79693ec433cb4b5f51694e8477ae83b3205768a6fb48ffba60549080e295b",
145 |                 "sha256:03baab2d6b4a54ddbb43bba1a3a2d1627e82d205c5cf8f4c924dc49284b87166",
146 |                 "sha256:1041feb4cda8708ce73bb4dcb9ce1ccf49d553bf87c3954bdfa46f0c3f77252c",
147 |                 "sha256:10ee06759482c78bdb864f4109886dff7b8a56529bc1609d4f1112b93fe6423c",
148 |                 "sha256:1d1573129aa0fd901076e2bfb4275a35f5b7aa60fbfb984499d661ec950320b0",
149 |                 "sha256:283dffbf061a4ec60391d51e6155e372a1f7a4f5b15d59c8505339454f8989e4",
150 |                 "sha256:28bc5b299f48150b5f822ce68624e445040595a4ac3d59251703779836eceff9",
151 |                 "sha256:2a66fdfb34e05b705620dd567f5a03f239a088d5a3f321e7b6ac3239d22aa286",
152 |                 "sha256:2e307eb9bd99801f82789b44bb45e9f541961831c7311521b13a6c85afc09767",
153 |                 "sha256:2e407cb4bd5a13984a6c2c0fe1845e4e41e96f183e5e5cd4d77a857d9693494c",
154 |                 "sha256:2f5e60fabb7343a836360c4f0919b8cd0d6dbf08ad2ca6b9cf90bf0c76a3c4f6",
155 |                 "sha256:36dafec3d6d6088d34e2de6b85f9d8e2324eb734162fba59d2ba9ed7a2043d5b",
156 |                 "sha256:3fe20f63c9ecee44560d0e7f116b3a747a5d7203376abeea292ab3152334d004",
157 |                 "sha256:41dae968a94b1ef1897cb322b39360a0812661dba7c682aa45098eb8e193dbdf",
158 |                 "sha256:4bd472dbe5e136f96a4b18f295d159d7f26fd399136f5b17b08c4e5f498cd494",
159 |                 "sha256:4ea39b0ccc4f5d803e3337dd46bcce60b702be4d86fd0b3d7531ef10fd99a1ac",
160 |                 "sha256:5853eb494c71e267912275e5586fe281444eb5e722de4e131cddf9d442615626",
161 |                 "sha256:5bce61af018b0cb2055e0e72e7d65290d822d3feee430b7b8203d8a855e78766",
162 |                 "sha256:6295ecd49304dcf3bfbfa45d9a081c96509e95f4b9d0eb7ee4ec0530c4a96514",
163 |                 "sha256:62ac9cc684da4cf1778d07a89bf5f81b35834cb96ca523d3a7fb32509380cbf6",
164 |                 "sha256:70e7c2e7b750585569564e2e5ca9845acfaa5da56ac46df68414f29fea97be9f",
165 |                 "sha256:7577c1987baa3adc4b3c62c33bd1118c3ef5c8ddef36f0f2c950ae0b199e100d",
166 |                 "sha256:75facbe9606748f43428fc91a43edb46c7ff68889b91fa31f53b58894503a191",
167 |                 "sha256:787518a6789009c159453da4d6b683f468ef7a65bbde796bcea803ccf191058d",
168 |                 "sha256:78d6601aed50c74e0ef02f4204da1816147a6d3fbdc8b3872d263338a9052c51",
169 |                 "sha256:7c43e1e1206cd421cd92e6b3280d4385d41d7166b3ed577ac20444b6995a445f",
170 |                 "sha256:81e38381b782cc7e1e46c4e14cd997ee6040768101aefc8fa3c24a4cc58e98f8",
171 |                 "sha256:841293b17ad704d70c578f1f0013c890e219952169ce8a24ebc063eecf775454",
172 |                 "sha256:872b8ca05c40d309ed13eb2e582cab0c5a05e81e987ab9c521bf05ad1d5cf5cb",
173 |                 "sha256:877272cf6b4b7e94c9614f9b10140e198d2186363728ed0f701c6eee1baec1da",
174 |                 "sha256:8c808594c88a025d4e322d5bb549282c93c8e1ba71b790f539567932722d7bd8",
175 |                 "sha256:8ed58b8acf29798b036d347791141767ccf65eee7f26bde03a71c944449e53de",
176 |                 "sha256:91672bacaa030f92fc2f43b620d7b337fd9a5af28b0d6ed3f77afc43c4a64b5a",
177 |                 "sha256:968f44fdbf6dd757d12920d63b566eeb4d5b395fd2d00d29d7ef00a00582aac9",
178 |                 "sha256:9f85003f5dfa867e86d53fac6f7e6f30c045673fa27b603c397753bebadc3008",
179 |                 "sha256:a553dadda40fef6bfa1456dc4be49b113aa92c2a9a9e8711e955618cd69622e3",
180 |                 "sha256:a68b62a02953b9841730db7797422f983935aeefceb1679f0fc85cbfbd311c32",
181 |                 "sha256:abbe9fa13da955feb8202e215c4018f4bb57469b1b78c7a4c5c7b93001699938",
182 |                 "sha256:ad881edc7ccb9d65b0224f4e4d05a1e85cf62d73aab798943df6d48ab0cd79a1",
183 |                 "sha256:b1792d939ec70abe76f5054d3f36ed5656021dcad1322d1cc996d4e54165cef9",
184 |                 "sha256:b428ef021242344340460fa4c9185d0b1f66fbdbfecc6c63eff4b7c29fad429d",
185 |                 "sha256:b533558eae785e33e8c148a8d9921692a9fe5aa516efbdff8606e7d87b9d5824",
186 |                 "sha256:ba59c92039ec0a66103b1d5fe588fa546373587a7d68f5c96f743c3396afc04b",
187 |                 "sha256:bc8d3bd6c72b2dd9decf16ce70e20abcb3274ba01b4e1c96031e0c4067d1e7cd",
188 |                 "sha256:bc9db8a3efb3e403e4ecc6cd9489ea2bac94244f80c78e27c31dcc00d2790ac2",
189 |                 "sha256:bf7d9fce9bcc4752ca4a1b80aabd38f6d19009ea5cbda0e0856983cf6d0023f5",
190 |                 "sha256:c2dbb44c3f7e6c4d3487b31037b1bdbf424d97687c1747ce4ff2895795c9bf69",
191 |                 "sha256:c79ebe8f3676a4c6630fd3f777f3cfecf9289666c84e775a67d1d358578dc2e3",
192 |                 "sha256:c97528e64cb9ebeff9701e7938653a9951922f2a38bd847787d4a8e498cc83ae",
193 |                 "sha256:d0611a0a2a518464c05ddd5a3a1a0e856ccc10e67079bb17f265ad19ab3c7597",
194 |                 "sha256:d06adcfa62a4431d404c31216f0f8ac97397d799cd53800e9d3efc2fbb3cf14e",
195 |                 "sha256:d41997519fcba4a1e46eb4a2fe31bc12f0ff957b2b81bac28db24744f333e955",
196 |                 "sha256:d5b61785a9ce44e5a4b880272baa7cf6c8f48a5180c3e81c59553ba0cb0821ca",
197 |                 "sha256:da152d8cdcab0e56e4f45eb08b9aea6455845ec83172092f09b0e077ece2cf7a",
198 |                 "sha256:da7e547706e69e45d95e116e6939488d62174e033b763ab1496b4c29b76fabea",
199 |                 "sha256:db5283d90da4174865d520e7366801a93777201e91e79bacbac6e6927cbceede",
200 |                 "sha256:db608a6757adabb32f1cfe6066e39b3706d8c3aa69bbc353a5b61edad36a5cb4",
201 |                 "sha256:e0ea21f66820452a3f5d1655f8704a60d66ba1191359b96541eaf457710a5fc6",
202 |                 "sha256:e7da3fec7408813a7cebc9e4ec55afed2d0fd65c4754bc376bf03498d4e92686",
203 |                 "sha256:e92a513161077b53447160b9bd8f522edfbed4bd9759e4c18ab05d7ef7e49408",
204 |                 "sha256:ecb1fa0db7bf4cff9dac752abb19505a233c7f16684c5826d1f11ebd9472b871",
205 |                 "sha256:efda5fc8cc1c61e4f639b8067d118e742b812c930f708e6667a5ce0d13499e29",
206 |                 "sha256:f0a1dbdb5ecbef0d34eb77e56fcb3e95bbd7e50835d9782a45df81cc46949750",
207 |                 "sha256:f0a71d85ecdd570ded8ac3d1c0f480842f49a40beb423bb8014539a9f32a5897",
208 |                 "sha256:f4f270de01dd3e129a72efad823da90cc4d6aafb64c410c9033aba70db9f1ff0",
209 |                 "sha256:f6cb459eea32a4e2cf18ba5fcece2dbdf496384413bc1bae15583f19e567f3b2",
210 |                 "sha256:f8ad8285b01b0d4695102546b342b493b3ccc6781fc28c8c6a1bb63e95d22f09",
211 |                 "sha256:f9f39e2f049db33a908319cf46624a569b36983c7c78318e9726a4cb8923b26c"
212 |             ],
213 |             "markers": "python_version >= '3.7'",
214 |             "version": "==1.4.4"
215 |         },
216 |         "matplotlib": {
217 |             "hashes": [
218 |                 "sha256:08308bae9e91aca1ec6fd6dda66237eef9f6294ddb17f0d0b3c863169bf82353",
219 |                 "sha256:14645aad967684e92fc349493fa10c08a6da514b3d03a5931a1bac26e6792bd1",
220 |                 "sha256:21e9cff1a58d42e74d01153360de92b326708fb205250150018a52c70f43c290",
221 |                 "sha256:28506a03bd7f3fe59cd3cd4ceb2a8d8a2b1db41afede01f66c42561b9be7b4b7",
222 |                 "sha256:2bf092f9210e105f414a043b92af583c98f50050559616930d884387d0772aba",
223 |                 "sha256:3032884084f541163f295db8a6536e0abb0db464008fadca6c98aaf84ccf4717",
224 |                 "sha256:3a2cb34336110e0ed8bb4f650e817eed61fa064acbefeb3591f1b33e3a84fd96",
225 |                 "sha256:3ba2af245e36990facf67fde840a760128ddd71210b2ab6406e640188d69d136",
226 |                 "sha256:3d7bc90727351fb841e4d8ae620d2d86d8ed92b50473cd2b42ce9186104ecbba",
227 |                 "sha256:438196cdf5dc8d39b50a45cb6e3f6274edbcf2254f85fa9b895bf85851c3a613",
228 |                 "sha256:46a561d23b91f30bccfd25429c3c706afe7d73a5cc64ef2dfaf2b2ac47c1a5dc",
229 |                 "sha256:4cf327e98ecf08fcbb82685acaf1939d3338548620ab8dfa02828706402c34de",
230 |                 "sha256:4f99e1b234c30c1e9714610eb0c6d2f11809c9c78c984a613ae539ea2ad2eb4b",
231 |                 "sha256:544764ba51900da4639c0f983b323d288f94f65f4024dc40ecb1542d74dc0500",
232 |                 "sha256:56d94989191de3fcc4e002f93f7f1be5da476385dde410ddafbb70686acf00ea",
233 |                 "sha256:57bfb8c8ea253be947ccb2bc2d1bb3862c2bccc662ad1b4626e1f5e004557042",
234 |                 "sha256:617f14ae9d53292ece33f45cba8503494ee199a75b44de7717964f70637a36aa",
235 |                 "sha256:6eb88d87cb2c49af00d3bbc33a003f89fd9f78d318848da029383bfc08ecfbfb",
236 |                 "sha256:75d4725d70b7c03e082bbb8a34639ede17f333d7247f56caceb3801cb6ff703d",
237 |                 "sha256:770a205966d641627fd5cf9d3cb4b6280a716522cd36b8b284a8eb1581310f61",
238 |                 "sha256:7b73305f25eab4541bd7ee0b96d87e53ae9c9f1823be5659b806cd85786fe882",
239 |                 "sha256:7c9a4b2da6fac77bcc41b1ea95fadb314e92508bf5493ceff058e727e7ecf5b0",
240 |                 "sha256:81a6b377ea444336538638d31fdb39af6be1a043ca5e343fe18d0f17e098770b",
241 |                 "sha256:83111e6388dec67822e2534e13b243cc644c7494a4bb60584edbff91585a83c6",
242 |                 "sha256:8704726d33e9aa8a6d5215044b8d00804561971163563e6e6591f9dcf64340cc",
243 |                 "sha256:89768d84187f31717349c6bfadc0e0d8c321e8eb34522acec8a67b1236a66332",
244 |                 "sha256:8bf26ade3ff0f27668989d98c8435ce9327d24cffb7f07d24ef609e33d582439",
245 |                 "sha256:8c587963b85ce41e0a8af53b9b2de8dddbf5ece4c34553f7bd9d066148dc719c",
246 |                 "sha256:95cbc13c1fc6844ab8812a525bbc237fa1470863ff3dace7352e910519e194b1",
247 |                 "sha256:97cc368a7268141afb5690760921765ed34867ffb9655dd325ed207af85c7529",
248 |                 "sha256:a867bf73a7eb808ef2afbca03bcdb785dae09595fbe550e1bab0cd023eba3de0",
249 |                 "sha256:b867e2f952ed592237a1828f027d332d8ee219ad722345b79a001f49df0936eb",
250 |                 "sha256:c0bd19c72ae53e6ab979f0ac6a3fafceb02d2ecafa023c5cca47acd934d10be7",
251 |                 "sha256:ce463ce590f3825b52e9fe5c19a3c6a69fd7675a39d589e8b5fbe772272b3a24",
252 |                 "sha256:cf0e4f727534b7b1457898c4f4ae838af1ef87c359b76dcd5330fa31893a3ac7",
253 |                 "sha256:def58098f96a05f90af7e92fd127d21a287068202aa43b2a93476170ebd99e87",
254 |                 "sha256:e99bc9e65901bb9a7ce5e7bb24af03675cbd7c70b30ac670aa263240635999a4",
255 |                 "sha256:eb7d248c34a341cd4c31a06fd34d64306624c8cd8d0def7abb08792a5abfd556",
256 |                 "sha256:f67bfdb83a8232cb7a92b869f9355d677bce24485c460b19d01970b64b2ed476",
257 |                 "sha256:f883a22a56a84dba3b588696a2b8a1ab0d2c3d41be53264115c71b0a942d8fdb",
258 |                 "sha256:fbdeeb58c0cf0595efe89c05c224e0a502d1aa6a8696e68a73c3efc6bc354304"
259 |             ],
260 |             "markers": "python_version >= '3.8'",
261 |             "version": "==3.7.1"
262 |         },
263 |         "mizani": {
264 |             "hashes": [
265 |                 "sha256:1d481a4dc673caa9b7cfdc6505b9401f0e9a9f43434d748df0678a1a4017b0e2",
266 |                 "sha256:e8b7c791041dbb5ba832649922070a6224ccfc7b45e5685f22c9b19a92c048c7"
267 |             ],
268 |             "markers": "python_version >= '3.8'",
269 |             "version": "==0.9.2"
270 |         },
271 |         "numpy": {
272 |             "hashes": [
273 |                 "sha256:0ec87a7084caa559c36e0a2309e4ecb1baa03b687201d0a847c8b0ed476a7187",
274 |                 "sha256:1a7d6acc2e7524c9955e5c903160aa4ea083736fde7e91276b0e5d98e6332812",
275 |                 "sha256:202de8f38fc4a45a3eea4b63e2f376e5f2dc64ef0fa692838e31a808520efaf7",
276 |                 "sha256:210461d87fb02a84ef243cac5e814aad2b7f4be953b32cb53327bb49fd77fbb4",
277 |                 "sha256:2d926b52ba1367f9acb76b0df6ed21f0b16a1ad87c6720a1121674e5cf63e2b6",
278 |                 "sha256:352ee00c7f8387b44d19f4cada524586f07379c0d49270f87233983bc5087ca0",
279 |                 "sha256:35400e6a8d102fd07c71ed7dcadd9eb62ee9a6e84ec159bd48c28235bbb0f8e4",
280 |                 "sha256:3c1104d3c036fb81ab923f507536daedc718d0ad5a8707c6061cdfd6d184e570",
281 |                 "sha256:4719d5aefb5189f50887773699eaf94e7d1e02bf36c1a9d353d9f46703758ca4",
282 |                 "sha256:4749e053a29364d3452c034827102ee100986903263e89884922ef01a0a6fd2f",
283 |                 "sha256:5342cf6aad47943286afa6f1609cad9b4266a05e7f2ec408e2cf7aea7ff69d80",
284 |                 "sha256:56e48aec79ae238f6e4395886b5eaed058abb7231fb3361ddd7bfdf4eed54289",
285 |                 "sha256:76e3f4e85fc5d4fd311f6e9b794d0c00e7002ec122be271f2019d63376f1d385",
286 |                 "sha256:7776ea65423ca6a15255ba1872d82d207bd1e09f6d0894ee4a64678dd2204078",
287 |                 "sha256:784c6da1a07818491b0ffd63c6bbe5a33deaa0e25a20e1b3ea20cf0e43f8046c",
288 |                 "sha256:8535303847b89aa6b0f00aa1dc62867b5a32923e4d1681a35b5eef2d9591a463",
289 |                 "sha256:9a7721ec204d3a237225db3e194c25268faf92e19338a35f3a224469cb6039a3",
290 |                 "sha256:a1d3c026f57ceaad42f8231305d4653d5f05dc6332a730ae5c0bea3513de0950",
291 |                 "sha256:ab344f1bf21f140adab8e47fdbc7c35a477dc01408791f8ba00d018dd0bc5155",
292 |                 "sha256:ab5f23af8c16022663a652d3b25dcdc272ac3f83c3af4c02eb8b824e6b3ab9d7",
293 |                 "sha256:ae8d0be48d1b6ed82588934aaaa179875e7dc4f3d84da18d7eae6eb3f06c242c",
294 |                 "sha256:c91c4afd8abc3908e00a44b2672718905b8611503f7ff87390cc0ac3423fb096",
295 |                 "sha256:d5036197ecae68d7f491fcdb4df90082b0d4960ca6599ba2659957aafced7c17",
296 |                 "sha256:d6cc757de514c00b24ae8cf5c876af2a7c3df189028d68c0cb4eaa9cd5afc2bf",
297 |                 "sha256:d933fabd8f6a319e8530d0de4fcc2e6a61917e0b0c271fded460032db42a0fe4",
298 |                 "sha256:ea8282b9bcfe2b5e7d491d0bf7f3e2da29700cec05b49e64d6246923329f2b02",
299 |                 "sha256:ecde0f8adef7dfdec993fd54b0f78183051b6580f606111a6d789cd14c61ea0c",
300 |                 "sha256:f21c442fdd2805e91799fbe044a7b999b8571bb0ab0f7850d0cb9641a687092b"
301 |             ],
302 |             "index": "pypi",
303 |             "version": "==1.24.3"
304 |         },
305 |         "packaging": {
306 |             "hashes": [
307 |                 "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61",
308 |                 "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"
309 |             ],
310 |             "markers": "python_version >= '3.7'",
311 |             "version": "==23.1"
312 |         },
313 |         "pandas": {
314 |             "hashes": [
315 |                 "sha256:02755de164da6827764ceb3bbc5f64b35cb12394b1024fdf88704d0fa06e0e2f",
316 |                 "sha256:0a1e0576611641acde15c2322228d138258f236d14b749ad9af498ab69089e2d",
317 |                 "sha256:1eb09a242184092f424b2edd06eb2b99d06dc07eeddff9929e8667d4ed44e181",
318 |                 "sha256:30a89d0fec4263ccbf96f68592fd668939481854d2ff9da709d32a047689393b",
319 |                 "sha256:50e451932b3011b61d2961b4185382c92cc8c6ee4658dcd4f320687bb2d000ee",
320 |                 "sha256:51a93d422fbb1bd04b67639ba4b5368dffc26923f3ea32a275d2cc450f1d1c86",
321 |                 "sha256:598e9020d85a8cdbaa1815eb325a91cfff2bb2b23c1442549b8a3668e36f0f77",
322 |                 "sha256:66d00300f188fa5de73f92d5725ced162488f6dc6ad4cecfe4144ca29debe3b8",
323 |                 "sha256:69167693cb8f9b3fc060956a5d0a0a8dbfed5f980d9fd2c306fb5b9c855c814c",
324 |                 "sha256:6d6d10c2142d11d40d6e6c0a190b1f89f525bcf85564707e31b0a39e3b398e08",
325 |                 "sha256:713f2f70abcdade1ddd68fc91577cb090b3544b07ceba78a12f799355a13ee44",
326 |                 "sha256:7376e13d28eb16752c398ca1d36ccfe52bf7e887067af9a0474de6331dd948d2",
327 |                 "sha256:77550c8909ebc23e56a89f91b40ad01b50c42cfbfab49b3393694a50549295ea",
328 |                 "sha256:7b21cb72958fc49ad757685db1919021d99650d7aaba676576c9e88d3889d456",
329 |                 "sha256:9ebb9f1c22ddb828e7fd017ea265a59d80461d5a79154b49a4207bd17514d122",
330 |                 "sha256:a18e5c72b989ff0f7197707ceddc99828320d0ca22ab50dd1b9e37db45b010c0",
331 |                 "sha256:a6b5f14cd24a2ed06e14255ff40fe2ea0cfaef79a8dd68069b7ace74bd6acbba",
332 |                 "sha256:b42b120458636a981077cfcfa8568c031b3e8709701315e2bfa866324a83efa8",
333 |                 "sha256:c4af689352c4fe3d75b2834933ee9d0ccdbf5d7a8a7264f0ce9524e877820c08",
334 |                 "sha256:c7319b6e68de14e6209460f72a8d1ef13c09fb3d3ef6c37c1e65b35d50b5c145",
335 |                 "sha256:cf3f0c361a4270185baa89ec7ab92ecaa355fe783791457077473f974f654df5",
336 |                 "sha256:dd46bde7309088481b1cf9c58e3f0e204b9ff9e3244f441accd220dd3365ce7c",
337 |                 "sha256:dd5476b6c3fe410ee95926873f377b856dbc4e81a9c605a0dc05aaccc6a7c6c6",
338 |                 "sha256:e69140bc2d29a8556f55445c15f5794490852af3de0f609a24003ef174528b79",
339 |                 "sha256:f908a77cbeef9bbd646bd4b81214cbef9ac3dda4181d5092a4aa9797d1bc7774"
340 |             ],
341 |             "index": "pypi",
342 |             "version": "==2.0.2"
343 |         },
344 |         "patsy": {
345 |             "hashes": [
346 |                 "sha256:7eb5349754ed6aa982af81f636479b1b8db9d5b1a6e957a6016ec0534b5c86b7",
347 |                 "sha256:bdc18001875e319bc91c812c1eb6a10be4bb13cb81eb763f466179dca3b67277"
348 |             ],
349 |             "index": "pypi",
350 |             "version": "==0.5.3"
351 |         },
352 |         "pillow": {
353 |             "hashes": [
354 |                 "sha256:07999f5834bdc404c442146942a2ecadd1cb6292f5229f4ed3b31e0a108746b1",
355 |                 "sha256:0852ddb76d85f127c135b6dd1f0bb88dbb9ee990d2cd9aa9e28526c93e794fba",
356 |                 "sha256:1781a624c229cb35a2ac31cc4a77e28cafc8900733a864870c49bfeedacd106a",
357 |                 "sha256:1e7723bd90ef94eda669a3c2c19d549874dd5badaeefabefd26053304abe5799",
358 |                 "sha256:229e2c79c00e85989a34b5981a2b67aa079fd08c903f0aaead522a1d68d79e51",
359 |                 "sha256:22baf0c3cf0c7f26e82d6e1adf118027afb325e703922c8dfc1d5d0156bb2eeb",
360 |                 "sha256:252a03f1bdddce077eff2354c3861bf437c892fb1832f75ce813ee94347aa9b5",
361 |                 "sha256:2dfaaf10b6172697b9bceb9a3bd7b951819d1ca339a5ef294d1f1ac6d7f63270",
362 |                 "sha256:322724c0032af6692456cd6ed554bb85f8149214d97398bb80613b04e33769f6",
363 |                 "sha256:35f6e77122a0c0762268216315bf239cf52b88865bba522999dc38f1c52b9b47",
364 |                 "sha256:375f6e5ee9620a271acb6820b3d1e94ffa8e741c0601db4c0c4d3cb0a9c224bf",
365 |                 "sha256:3ded42b9ad70e5f1754fb7c2e2d6465a9c842e41d178f262e08b8c85ed8a1d8e",
366 |                 "sha256:432b975c009cf649420615388561c0ce7cc31ce9b2e374db659ee4f7d57a1f8b",
367 |                 "sha256:482877592e927fd263028c105b36272398e3e1be3269efda09f6ba21fd83ec66",
368 |                 "sha256:489f8389261e5ed43ac8ff7b453162af39c3e8abd730af8363587ba64bb2e865",
369 |                 "sha256:54f7102ad31a3de5666827526e248c3530b3a33539dbda27c6843d19d72644ec",
370 |                 "sha256:560737e70cb9c6255d6dcba3de6578a9e2ec4b573659943a5e7e4af13f298f5c",
371 |                 "sha256:5671583eab84af046a397d6d0ba25343c00cd50bce03787948e0fff01d4fd9b1",
372 |                 "sha256:5ba1b81ee69573fe7124881762bb4cd2e4b6ed9dd28c9c60a632902fe8db8b38",
373 |                 "sha256:5d4ebf8e1db4441a55c509c4baa7a0587a0210f7cd25fcfe74dbbce7a4bd1906",
374 |                 "sha256:60037a8db8750e474af7ffc9faa9b5859e6c6d0a50e55c45576bf28be7419705",
375 |                 "sha256:608488bdcbdb4ba7837461442b90ea6f3079397ddc968c31265c1e056964f1ef",
376 |                 "sha256:6608ff3bf781eee0cd14d0901a2b9cc3d3834516532e3bd673a0a204dc8615fc",
377 |                 "sha256:662da1f3f89a302cc22faa9f14a262c2e3951f9dbc9617609a47521c69dd9f8f",
378 |                 "sha256:7002d0797a3e4193c7cdee3198d7c14f92c0836d6b4a3f3046a64bd1ce8df2bf",
379 |                 "sha256:763782b2e03e45e2c77d7779875f4432e25121ef002a41829d8868700d119392",
380 |                 "sha256:77165c4a5e7d5a284f10a6efaa39a0ae8ba839da344f20b111d62cc932fa4e5d",
381 |                 "sha256:7c9af5a3b406a50e313467e3565fc99929717f780164fe6fbb7704edba0cebbe",
382 |                 "sha256:7ec6f6ce99dab90b52da21cf0dc519e21095e332ff3b399a357c187b1a5eee32",
383 |                 "sha256:833b86a98e0ede388fa29363159c9b1a294b0905b5128baf01db683672f230f5",
384 |                 "sha256:84a6f19ce086c1bf894644b43cd129702f781ba5751ca8572f08aa40ef0ab7b7",
385 |                 "sha256:8507eda3cd0608a1f94f58c64817e83ec12fa93a9436938b191b80d9e4c0fc44",
386 |                 "sha256:85ec677246533e27770b0de5cf0f9d6e4ec0c212a1f89dfc941b64b21226009d",
387 |                 "sha256:8aca1152d93dcc27dc55395604dcfc55bed5f25ef4c98716a928bacba90d33a3",
388 |                 "sha256:8d935f924bbab8f0a9a28404422da8af4904e36d5c33fc6f677e4c4485515625",
389 |                 "sha256:8f36397bf3f7d7c6a3abdea815ecf6fd14e7fcd4418ab24bae01008d8d8ca15e",
390 |                 "sha256:91ec6fe47b5eb5a9968c79ad9ed78c342b1f97a091677ba0e012701add857829",
391 |                 "sha256:965e4a05ef364e7b973dd17fc765f42233415974d773e82144c9bbaaaea5d089",
392 |                 "sha256:96e88745a55b88a7c64fa49bceff363a1a27d9a64e04019c2281049444a571e3",
393 |                 "sha256:99eb6cafb6ba90e436684e08dad8be1637efb71c4f2180ee6b8f940739406e78",
394 |                 "sha256:9adf58f5d64e474bed00d69bcd86ec4bcaa4123bfa70a65ce72e424bfb88ed96",
395 |                 "sha256:9b1af95c3a967bf1da94f253e56b6286b50af23392a886720f563c547e48e964",
396 |                 "sha256:a0aa9417994d91301056f3d0038af1199eb7adc86e646a36b9e050b06f526597",
397 |                 "sha256:a0f9bb6c80e6efcde93ffc51256d5cfb2155ff8f78292f074f60f9e70b942d99",
398 |                 "sha256:a127ae76092974abfbfa38ca2d12cbeddcdeac0fb71f9627cc1135bedaf9d51a",
399 |                 "sha256:aaf305d6d40bd9632198c766fb64f0c1a83ca5b667f16c1e79e1661ab5060140",
400 |                 "sha256:aca1c196f407ec7cf04dcbb15d19a43c507a81f7ffc45b690899d6a76ac9fda7",
401 |                 "sha256:ace6ca218308447b9077c14ea4ef381ba0b67ee78d64046b3f19cf4e1139ad16",
402 |                 "sha256:b416f03d37d27290cb93597335a2f85ed446731200705b22bb927405320de903",
403 |                 "sha256:bf548479d336726d7a0eceb6e767e179fbde37833ae42794602631a070d630f1",
404 |                 "sha256:c1170d6b195555644f0616fd6ed929dfcf6333b8675fcca044ae5ab110ded296",
405 |                 "sha256:c380b27d041209b849ed246b111b7c166ba36d7933ec6e41175fd15ab9eb1572",
406 |                 "sha256:c446d2245ba29820d405315083d55299a796695d747efceb5717a8b450324115",
407 |                 "sha256:c830a02caeb789633863b466b9de10c015bded434deb3ec87c768e53752ad22a",
408 |                 "sha256:cb841572862f629b99725ebaec3287fc6d275be9b14443ea746c1dd325053cbd",
409 |                 "sha256:cfa4561277f677ecf651e2b22dc43e8f5368b74a25a8f7d1d4a3a243e573f2d4",
410 |                 "sha256:cfcc2c53c06f2ccb8976fb5c71d448bdd0a07d26d8e07e321c103416444c7ad1",
411 |                 "sha256:d3c6b54e304c60c4181da1c9dadf83e4a54fd266a99c70ba646a9baa626819eb",
412 |                 "sha256:d3d403753c9d5adc04d4694d35cf0391f0f3d57c8e0030aac09d7678fa8030aa",
413 |                 "sha256:d9c206c29b46cfd343ea7cdfe1232443072bbb270d6a46f59c259460db76779a",
414 |                 "sha256:e49eb4e95ff6fd7c0c402508894b1ef0e01b99a44320ba7d8ecbabefddcc5569",
415 |                 "sha256:f8286396b351785801a976b1e85ea88e937712ee2c3ac653710a4a57a8da5d9c",
416 |                 "sha256:f8fc330c3370a81bbf3f88557097d1ea26cd8b019d6433aa59f71195f5ddebbf",
417 |                 "sha256:fbd359831c1657d69bb81f0db962905ee05e5e9451913b18b831febfe0519082",
418 |                 "sha256:fe7e1c262d3392afcf5071df9afa574544f28eac825284596ac6db56e6d11062",
419 |                 "sha256:fed1e1cf6a42577953abbe8e6cf2fe2f566daebde7c34724ec8803c4c0cda579"
420 |             ],
421 |             "markers": "python_version >= '3.7'",
422 |             "version": "==9.5.0"
423 |         },
424 |         "plotnine": {
425 |             "hashes": [
426 |                 "sha256:be852c6e50e331ad250151dc4120f269ee9ae5e795f67030f7794718b502592a",
427 |                 "sha256:f0989d1f368925082d543efcfc1cfb7236de8341cc6138ae65b856d70f77106a"
428 |             ],
429 |             "index": "pypi",
430 |             "version": "==0.12.1"
431 |         },
432 |         "pyparsing": {
433 |             "hashes": [
434 |                 "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb",
435 |                 "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"
436 |             ],
437 |             "markers": "python_full_version >= '3.6.8'",
438 |             "version": "==3.0.9"
439 |         },
440 |         "python-dateutil": {
441 |             "hashes": [
442 |                 "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86",
443 |                 "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"
444 |             ],
445 |             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
446 |             "version": "==2.8.2"
447 |         },
448 |         "pytz": {
449 |             "hashes": [
450 |                 "sha256:1d8ce29db189191fb55338ee6d0387d82ab59f3d00eac103412d64e0ebd0c588",
451 |                 "sha256:a151b3abb88eda1d4e34a9814df37de2a80e301e68ba0fd856fb9b46bfbbbffb"
452 |             ],
453 |             "version": "==2023.3"
454 |         },
455 |         "scipy": {
456 |             "hashes": [
457 |                 "sha256:049a8bbf0ad95277ffba9b3b7d23e5369cc39e66406d60422c8cfef40ccc8415",
458 |                 "sha256:07c3457ce0b3ad5124f98a86533106b643dd811dd61b548e78cf4c8786652f6f",
459 |                 "sha256:0f1564ea217e82c1bbe75ddf7285ba0709ecd503f048cb1236ae9995f64217bd",
460 |                 "sha256:1553b5dcddd64ba9a0d95355e63fe6c3fc303a8fd77c7bc91e77d61363f7433f",
461 |                 "sha256:15a35c4242ec5f292c3dd364a7c71a61be87a3d4ddcc693372813c0b73c9af1d",
462 |                 "sha256:1b4735d6c28aad3cdcf52117e0e91d6b39acd4272f3f5cd9907c24ee931ad601",
463 |                 "sha256:2cf9dfb80a7b4589ba4c40ce7588986d6d5cebc5457cad2c2880f6bc2d42f3a5",
464 |                 "sha256:39becb03541f9e58243f4197584286e339029e8908c46f7221abeea4b749fa88",
465 |                 "sha256:43b8e0bcb877faf0abfb613d51026cd5cc78918e9530e375727bf0625c82788f",
466 |                 "sha256:4b3f429188c66603a1a5c549fb414e4d3bdc2a24792e061ffbd607d3d75fd84e",
467 |                 "sha256:4c0ff64b06b10e35215abce517252b375e580a6125fd5fdf6421b98efbefb2d2",
468 |                 "sha256:51af417a000d2dbe1ec6c372dfe688e041a7084da4fdd350aeb139bd3fb55353",
469 |                 "sha256:5678f88c68ea866ed9ebe3a989091088553ba12c6090244fdae3e467b1139c35",
470 |                 "sha256:79c8e5a6c6ffaf3a2262ef1be1e108a035cf4f05c14df56057b64acc5bebffb6",
471 |                 "sha256:7ff7f37b1bf4417baca958d254e8e2875d0cc23aaadbe65b3d5b3077b0eb23ea",
472 |                 "sha256:aaea0a6be54462ec027de54fca511540980d1e9eea68b2d5c1dbfe084797be35",
473 |                 "sha256:bce5869c8d68cf383ce240e44c1d9ae7c06078a9396df68ce88a1230f93a30c1",
474 |                 "sha256:cd9f1027ff30d90618914a64ca9b1a77a431159df0e2a195d8a9e8a04c78abf9",
475 |                 "sha256:d925fa1c81b772882aa55bcc10bf88324dadb66ff85d548c71515f6689c6dac5",
476 |                 "sha256:e7354fd7527a4b0377ce55f286805b34e8c54b91be865bac273f527e1b839019",
477 |                 "sha256:fae8a7b898c42dffe3f7361c40d5952b6bf32d10c4569098d276b4c547905ee1"
478 |             ],
479 |             "index": "pypi",
480 |             "version": "==1.10.1"
481 |         },
482 |         "six": {
483 |             "hashes": [
484 |                 "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926",
485 |                 "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"
486 |             ],
487 |             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
488 |             "version": "==1.16.0"
489 |         },
490 |         "statsmodels": {
491 |             "hashes": [
492 |                 "sha256:0ef7fa4813c7a73b0d8a0c830250f021c102c71c95e9fe0d6877bcfb56d38b8c",
493 |                 "sha256:16bfe0c96a53b20fa19067e3b6bd2f1d39e30d4891ea0d7bc20734a0ae95942d",
494 |                 "sha256:1c7724ad573af26139a98393ae64bc318d1b19762b13442d96c7a3e793f495c3",
495 |                 "sha256:3757542c95247e4ab025291a740efa5da91dc11a05990c033d40fce31c450dc9",
496 |                 "sha256:3b0a135f3bfdeec987e36e3b3b4c53e0bb87a8d91464d2fcc4d169d176f46fdb",
497 |                 "sha256:582f9e41092e342aaa04920d17cc3f97240e3ee198672f194719b5a3d08657d6",
498 |                 "sha256:5a6a0a1a06ff79be8aa89c8494b33903442859add133f0dda1daf37c3c71682e",
499 |                 "sha256:6875c7d689e966d948f15eb816ab5616f4928706b180cf470fd5907ab6f647a4",
500 |                 "sha256:68b1c768dd94cc5ba8398121a632b673c625491aa7ed627b82cb4c880a25563f",
501 |                 "sha256:71054f9dbcead56def14e3c9db6f66f943110fdfb19713caf0eb0f08c1ec03fd",
502 |                 "sha256:76e290f4718177bffa8823a780f3b882d56dd64ad1c18cfb4bc8b5558f3f5757",
503 |                 "sha256:77b3cd3a5268ef966a0a08582c591bd29c09c88b4566c892a7c087935234f285",
504 |                 "sha256:7ebe885ccaa64b4bc5ad49ac781c246e7a594b491f08ab4cfd5aa456c363a6f6",
505 |                 "sha256:8d1e3e10dfbfcd58119ba5a4d3c7d519182b970a2aebaf0b6f539f55ae16058d",
506 |                 "sha256:9c64ebe9cf376cba0c31aed138e15ed179a1d128612dd241cdf299d159e5e882",
507 |                 "sha256:a6ad7b8aadccd4e4dd7f315a07bef1bca41d194eeaf4ec600d20dea02d242fce",
508 |                 "sha256:b587ee5d23369a0e881da6e37f78371dce4238cf7638a455db4b633a1a1c62d6",
509 |                 "sha256:ce28eb1c397dba437ec39b9ab18f2101806f388c7a0cf9cdfd8f09294ad1c799",
510 |                 "sha256:d7fda067837df94e0a614d93d3a38fb6868958d37f7f50afe2a534524f2660cb",
511 |                 "sha256:de489e3ed315bdba55c9d1554a2e89faa65d212e365ab81bc323fa52681fc60e",
512 |                 "sha256:fb471f757fc45102a87e5d86e87dc2c8c78b34ad4f203679a46520f1d863b9da"
513 |             ],
514 |             "markers": "python_version >= '3.8'",
515 |             "version": "==0.14.0"
516 |         },
517 |         "tzdata": {
518 |             "hashes": [
519 |                 "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a",
520 |                 "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"
521 |             ],
522 |             "markers": "python_version >= '2'",
523 |             "version": "==2023.3"
524 |         },
525 |         "zipp": {
526 |             "hashes": [
527 |                 "sha256:112929ad649da941c23de50f356a2b5570c954b65150642bccdd66bf194d224b",
528 |                 "sha256:48904fc76a60e542af151aded95726c1a5c34ed43ab4134b597665c86d7ad556"
529 |             ],
530 |             "markers": "python_version < '3.10'",
531 |             "version": "==3.15.0"
532 |         }
533 |     },
534 |     "develop": {}
535 | }
536 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Difference in Difference in Python
  2 | 
  3 | [![PyPI version](https://img.shields.io/pypi/v/csdid.svg?color=blue)](https://pypi.org/project/csdid/)
  4 | [![Downloads](https://static.pepy.tech/personalized-badge/csdid?period=total&units=international_system&left_color=blue&right_color=grey&left_text=Downloads)](https://pepy.tech/project/csdid)
  5 | [![Last commit](https://img.shields.io/github/last-commit/d2cml-ai/csdid.svg)](https://github.com/d2cml-ai/csdid/commits/main)
  6 | [![GitHub stars](https://img.shields.io/github/stars/d2cml-ai/csdid.svg?style=social)](https://github.com/d2cml-ai/csdid/stargazers)
  7 | [![GitHub issues](https://img.shields.io/github/issues/d2cml-ai/csdid.svg)](https://github.com/d2cml-ai/csdid/issues)
  8 | [![License](https://img.shields.io/github/license/d2cml-ai/csdid.svg)](https://github.com/d2cml-ai/csdid/blob/main/LICENSE)
  9 | 
 10 | 
 11 | The **csdid** package contains tools for computing average treatment
 12 | effect parameters in a Difference-in-Differences setup allowing for
 13 | 
 14 | - More than two time periods
 15 | 
 16 | - Variation in treatment timing (i.e., units can become treated at
 17 |   different points in time)
 18 | 
 19 | - Treatment effect heterogeneity (i.e, the effect of participating in
 20 |   the treatment can vary across units and exhibit potentially complex
 21 |   dynamics, selection into treatment, or time effects)
 22 | 
 23 | - The parallel trends assumption holds only after conditioning on
 24 |   covariates
 25 | 
 26 | The main parameters are **group-time average treatment effects**. These
 27 | are the average treatment effect for a particular group (group is
 28 | defined by treatment timing) in a particular time period. These
 29 | parameters are a natural generalization of the average treatment effect
 30 | on the treated (ATT) which is identified in the textbook case with two
 31 | periods and two groups to the case with multiple periods.
 32 | 
 33 | Group-time average treatment effects are also natural building blocks
 34 | for more aggregated treatment effect parameters such as overall
 35 | treatment effects or event-study-type estimands.
 36 | 
 37 | ## Getting Started
 38 | 
 39 | There has been some recent work on DiD with multiple time periods. The
 40 | **csdid** package implements the framework put forward in
 41 | 
 42 | - [Callaway, Brantly and Pedro H.C. Sant’Anna.
 43 |   “Difference-in-Differences with Multiple Time Periods.” Journal of
 44 |   Econometrics, Vol. 225, No. 2, pp. 200-230,
 45 |   2021.](https://doi.org/10.1016/j.jeconom.2020.12.001) or
 46 |   \[arXiv\](https://arxiv.org/abs/1803.09015
 47 | 
 48 | This project is based on the original [did R
 49 | package](https://github.com/bcallaway11/did).
 50 | 
 51 | ## Instalation
 52 | 
 53 | You can install **csdid** from `pypi` with:
 54 | 
 55 |     pip install csdid
 56 | 
 57 | or via github:
 58 | 
 59 |     pip install git+https://github.com/d2cml-ai/csdid/
 60 | 
 61 | ### Dependencies
 62 | 
 63 | Additionally, I have created an additional library called `drdid`, which
 64 | can be installed via GitHub.
 65 | 
 66 |     pip install git+https://github.com/d2cml-ai/DRDID
 67 | 
 68 | ## Basic Example
 69 | 
 70 | The following is a simplified example of the effect of states increasing
 71 | their minimum wages on county-level teen employment rates which comes
 72 | from [Callaway and Sant’Anna
 73 | (2021)](https://authors.elsevier.com/a/1cFzc15Dji4pnC).
 74 | 
 75 | - [More detailed examples are also
 76 |   available](https://bcallaway11.github.io/did/articles/did-basics.html)
 77 | 
 78 | A subset of the data is available in the package and can be loaded by
 79 | 
 80 | ``` python
 81 | from csdid.att_gt import ATTgt
 82 | import pandas as pd
 83 | data = pd.read_csv("https://raw.githubusercontent.com/d2cml-ai/csdid/function-aggte/data/mpdta.csv")
 84 | ```
 85 | 
 86 | The dataset contains 500 observations of county-level teen employment
 87 | rates from 2003-2007. Some states are first treated in 2004, some in
 88 | 2006, and some in 2007 (see the paper for more details). The important
 89 | variables in the dataset are
 90 | 
 91 | - **lemp** This is the log of county-level teen employment. It is the
 92 |   outcome variable
 93 | 
 94 | - **first.treat** This is the period when a state first increases its
 95 |   minimum wage. It can be 2004, 2006, or 2007. It is the variable that
 96 |   defines *group* in this application
 97 | 
 98 | - **year** This is the year and is the *time* variable
 99 | 
100 | - **countyreal** This is an id number for each county and provides the
101 |   individual identifier in this panel data context
102 | 
103 | To estimate group-time average treatment effects, use the
104 | **ATTgt().fit()** method
105 | 
106 | ``` python
107 | out = ATTgt(yname = "lemp",
108 |               gname = "first.treat",
109 |               idname = "countyreal",
110 |               tname = "year",
111 |               xformla = f"lemp~1",
112 |               data = data,
113 |               ).fit(est_method = 'dr')
114 | ```
115 | 
116 | Summary table
117 | 
118 | ``` python
119 | out.summ_attgt().summary2
120 | ```
121 | 
122 | <div>
123 | <style scoped>
124 |     .dataframe tbody tr th:only-of-type {
125 |         vertical-align: middle;
126 |     }
127 | &#10;    .dataframe tbody tr th {
128 |         vertical-align: top;
129 |     }
130 | &#10;    .dataframe thead th {
131 |         text-align: right;
132 |     }
133 | </style>
134 | 
135 | |     | Group | Time | ATT(g, t) | Post | Std. Error | \[95% Pointwise | Conf. Band\] |     |
136 | |-----|-------|------|-----------|------|------------|-----------------|--------------|-----|
137 | | 0   | 2004  | 2004 | -0.0105   | 0    | 0.0241     | -0.0781         | 0.0571       |     |
138 | | 1   | 2004  | 2005 | -0.0704   | 0    | 0.0324     | -0.1612         | 0.0204       |     |
139 | | 2   | 2004  | 2006 | -0.1373   | 0    | 0.0393     | -0.2476         | -0.0269      | \*  |
140 | | 3   | 2004  | 2007 | -0.1008   | 0    | 0.0360     | -0.2017         | 0.0001       |     |
141 | | 4   | 2006  | 2004 | 0.0065    | 0    | 0.0238     | -0.0601         | 0.0732       |     |
142 | | 5   | 2006  | 2005 | -0.0028   | 0    | 0.0188     | -0.0554         | 0.0499       |     |
143 | | 6   | 2006  | 2006 | -0.0046   | 0    | 0.0172     | -0.0528         | 0.0437       |     |
144 | | 7   | 2006  | 2007 | -0.0412   | 0    | 0.0201     | -0.0976         | 0.0152       |     |
145 | | 8   | 2007  | 2004 | 0.0305    | 0    | 0.0147     | -0.0108         | 0.0719       |     |
146 | | 9   | 2007  | 2005 | -0.0027   | 0    | 0.0160     | -0.0476         | 0.0421       |     |
147 | | 10  | 2007  | 2006 | -0.0311   | 0    | 0.0173     | -0.0796         | 0.0174       |     |
148 | | 11  | 2007  | 2007 | -0.0261   | 0    | 0.0171     | -0.0740         | 0.0219       |     |
149 | 
150 | </div>
151 | 
152 | In the graphs, a semicolon `;` should be added to prevent printing the
153 | class and the graph information.
154 | 
155 | ``` python
156 | out.plot_attgt();
157 | ```
158 | 
159 |     /home/runner/work/csdid/csdid/csdid/plots/gplot.py:19: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '['2004' '2005' '2006' '2007']' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
160 |       ssresults.loc[:, 'year'] = ssresults['year'].astype(int).astype(str)
161 |     /home/runner/work/csdid/csdid/csdid/plots/gplot.py:19: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '['2004' '2005' '2006' '2007']' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
162 |       ssresults.loc[:, 'year'] = ssresults['year'].astype(int).astype(str)
163 |     /home/runner/work/csdid/csdid/csdid/plots/gplot.py:19: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '['2004' '2005' '2006' '2007']' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
164 |       ssresults.loc[:, 'year'] = ssresults['year'].astype(int).astype(str)
165 | 
166 | ![](README_files/figure-commonmark/cell-5-output-2.png)
167 | 
168 | ``` python
169 | out.aggte(typec='calendar');
170 | ```
171 | 
172 | 
173 | 
174 |     Overall summary of ATT's based on calendar time aggregation:
175 |         ATT Std. Error  [95.0%  Conf. Int.]  
176 |     -0.0417     0.0169 -0.0748      -0.0086 *
177 | 
178 | 
179 |     Time Effects (calendar):
180 |        Time  Estimate  Std. Error  [95.0% Simult.   Conf. Band   
181 |     0  2004   -0.0105      0.0244          -0.0584      0.0374   
182 |     1  2005   -0.0704      0.0307          -0.1305     -0.0103  *
183 |     2  2006   -0.0488      0.0210          -0.0900     -0.0076  *
184 |     3  2007   -0.0371      0.0136          -0.0637     -0.0105  *
185 |     ---
186 |     Signif. codes: `*' confidence band does not cover 0
187 |     Control Group:  Never Treated , 
188 |     Anticipation Periods:  0
189 |     Estimation Method:  Doubly Robust
190 | 
191 | ``` python
192 | out.plot_aggte();
193 | ```
194 | 
195 |     /home/runner/work/csdid/csdid/csdid/plots/gplot.py:19: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '['2004' '2005' '2006' '2007']' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
196 |       ssresults.loc[:, 'year'] = ssresults['year'].astype(int).astype(str)
197 | 
198 | ![](README_files/figure-commonmark/cell-7-output-2.png)
199 | 
200 | 
201 | **Event Studies**
202 | 
203 | Although in the current example it is pretty easy to directly interpret
204 | the group-time average treatment effects, there are many cases where it
205 | is convenient to aggregate the group-time average treatment effects into
206 | a small number of parameters. A main type of aggregation is into an
207 | *event study* plot.
208 | 
209 | To make an event study plot in the **csdid** package, one can use the
210 | **aggte** function with **dynamic** option
211 | 
212 | ``` python
213 | out.aggte(typec='dynamic');
214 | ```
215 | 
216 | 
217 | 
218 |     Overall summary of ATT's based on event-study/dynamic aggregation:
219 |     ATT Std. Error  [95.0%  Conf. Int.]  
220 |     -0.0772     0.0207 -0.1179      -0.0366 *
221 | 
222 | 
223 |     Dynamic Effects:
224 |       Event time  Estimate  Std. Error  [95.0% Simult.   Conf. Band   
225 |     0          -3    0.0305      0.0146           0.0019      0.0591  *
226 |     1          -2   -0.0006      0.0129          -0.0259      0.0248   
227 |     2          -1   -0.0245      0.0141          -0.0521      0.0032   
228 |     3           0   -0.0199      0.0117          -0.0428      0.0030   
229 |     4           1   -0.0510      0.0154          -0.0811     -0.0208  *
230 |     5           2   -0.1373      0.0366          -0.2091     -0.0655  *
231 |     6           3   -0.1008      0.0337          -0.1669     -0.0347  *
232 |     ---
233 |     Signif. codes: `*' confidence band does not cover 0
234 |     Control Group:  Never Treated , 
235 |     Anticipation Periods:  0
236 |     Estimation Method:  Doubly Robust
237 | 
238 | The column `event time` is for each group relative to when they first
239 | participate in the treatment. To give some examples, `event time=0`
240 | corresponds to the *on impact* effect, and `event time=-1` is the
241 | *effect* in the period before a unit becomes treated (checking that this
242 | is equal to 0 is potentially useful as a pre-test).
243 | 
244 | To plot the event study, use **plot_aggte** method
245 | ``` python
246 | out.plot_aggte();
247 | ```
248 | 
249 |     /home/runner/work/csdid/csdid/csdid/plots/gplot.py:19: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '['2004' '2005' '2006' '2007']' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
250 |       ssresults.loc[:, 'year'] = ssresults['year'].astype(int).astype(str)
251 | 
252 | ![](README_files/figure-commonmark/cell-8-output-2.png)
253 | 
254 | The figure here is very similar to the group-time average treatment
255 | effects. Red dots are pre-treatment periods, blue dots are
256 | post-treatment periods. The difference is that the x-axis is in event
257 | time.
258 | 
259 | **Overall Effect of Participating in the Treatment**
260 | 
261 | The event study above reported an overall effect of participating in the
262 | treatment. This was computed by averaging the average effects computed
263 | at each length of exposure.
264 | 
265 | In many cases, a more general purpose overall treatment effect parameter
266 | is give by computing the average treatment effect for each group, and
267 | then averaging across groups. This sort of procedure provides an average
268 | treatment effect parameter with a very similar interpretation to the
269 | Average Treatment Effect on the Treated (ATT) in the two period and two
270 | group case.
271 | 
272 | To compute this overall average treatment effect parameter, use
273 | 
274 | 
275 | ``` python
276 | out.aggte(typec='group');
277 | ```
278 | 
279 | 
280 | 
281 |     Overall summary of ATT's based on group/cohort aggregation:
282 |     ATT Std. Error  [95.0%  Conf. Int.]  
283 |     -0.031     0.0124 -0.0553      -0.0067 *
284 | 
285 | 
286 |     Group Effects:
287 |       Group  Estimate  Std. Error  [95.0% Simult.   Conf. Band   
288 |     0   2004   -0.0797      0.0301          -0.1387     -0.0208  *
289 |     1   2006   -0.0229      0.0172          -0.0567      0.0109   
290 |     2   2007   -0.0261      0.0174          -0.0601      0.0080   
291 |     ---
292 |     Signif. codes: `*' confidence band does not cover 0
293 |     Control Group:  Never Treated , 
294 |     Anticipation Periods:  0
295 |     Estimation Method:  Doubly Robust
296 | 
297 | Of particular interest is the `Overall ATT` in the results. Here, we
298 | estimate that increasing the minimum wage decreased teen employment by
299 | 3.1% and the effect is marginally statistically significant.


--------------------------------------------------------------------------------
/README.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: Difference in Difference in Python
  3 | format: gfm
  4 | ---
  5 | 
  6 | The **csdid** package contains tools for computing average treatment
  7 | effect parameters in a Difference-in-Differences setup allowing for
  8 | 
  9 |   - More than two time periods
 10 | 
 11 |   - Variation in treatment timing (i.e., units can become treated at
 12 |     different points in time)
 13 | 
 14 |   - Treatment effect heterogeneity (i.e, the effect of participating in
 15 |     the treatment can vary across units and exhibit potentially complex
 16 |     dynamics, selection into treatment, or time effects)
 17 | 
 18 |   - The parallel trends assumption holds only after conditioning on
 19 |     covariates
 20 | 
 21 | The main parameters are **group-time average treatment effects**. These
 22 | are the average treatment effect for a particular group (group is
 23 | defined by treatment timing) in a particular time period. These
 24 | parameters are a natural generalization of the average treatment effect
 25 | on the treated (ATT) which is identified in the textbook case with two
 26 | periods and two groups to the case with multiple periods.
 27 | 
 28 | Group-time average treatment effects are also natural building blocks
 29 | for more aggregated treatment effect parameters such as overall
 30 | treatment effects or event-study-type estimands.
 31 | 
 32 | ## Getting Started
 33 | 
 34 | There has been some recent work on DiD with multiple time periods. The
 35 | **did** package implements the framework put forward in
 36 | 
 37 | - [Callaway, Brantly and Pedro H.C. Sant’Anna.
 38 |   "Difference-in-Differences with Multiple Time Periods." Journal of
 39 |   Econometrics, Vol. 225, No. 2,
 40 |   pp. 200-230, 2021.](https://doi.org/10.1016/j.jeconom.2020.12.001)
 41 |   or [arXiv](https://arxiv.org/abs/1803.09015
 42 | 
 43 | This project is based on the original [did R package](https://github.com/bcallaway11/did).
 44 | 
 45 | ## Instalation
 46 | 
 47 | You can install **csdid** from `pypi` with:
 48 | 
 49 | ```
 50 | pip install csdid
 51 | ```
 52 | 
 53 | or via github:
 54 | 
 55 | ```
 56 | pip install git+https://github.com/d2cml-ai/csdid/
 57 | ```
 58 | 
 59 | ### Dependencies
 60 | 
 61 | Additionally, I have created an additional library called `drdid`, which can be installed via GitHub.
 62 | 
 63 | ```
 64 | pip install git+https://github.com/d2cml-ai/DRDID
 65 | ```
 66 | 
 67 | ## Basic Example
 68 | 
 69 | 
 70 | The following is a simplified example of the effect of states increasing
 71 | their minimum wages on county-level teen employment rates which comes
 72 | from [Callaway and Sant’Anna
 73 | (2021)](https://authors.elsevier.com/a/1cFzc15Dji4pnC).
 74 | 
 75 |   - [More detailed examples are also
 76 |     available](https://bcallaway11.github.io/did/articles/did-basics.html)
 77 | 
 78 | A subset of the data is available in the package and can be loaded by
 79 | 
 80 | ```{python}
 81 | from csdid.att_gt import ATTgt
 82 | import pandas as pd
 83 | data = pd.read_csv("https://raw.githubusercontent.com/d2cml-ai/csdid/function-aggte/data/mpdta.csv")
 84 | ```
 85 | 
 86 | The dataset contains 500 observations of county-level teen employment
 87 | rates from 2003-2007. Some states are first treated in 2004, some in
 88 | 2006, and some in 2007 (see the paper for more details). The important
 89 | variables in the dataset are
 90 | 
 91 |   - **lemp** This is the log of county-level teen employment. It is the
 92 |     outcome variable
 93 | 
 94 |   - **first.treat** This is the period when a state first increases its
 95 |     minimum wage. It can be 2004, 2006, or 2007. It is the variable that
 96 |     defines *group* in this application
 97 | 
 98 |   - **year** This is the year and is the *time* variable
 99 | 
100 |   - **countyreal** This is an id number for each county and provides the
101 |     individual identifier in this panel data context
102 | 
103 | To estimate group-time average treatment effects, use the **ATTgt().fit()**
104 | method
105 | 
106 | ```{python}
107 | out = ATTgt(yname = "lemp",
108 |               gname = "first.treat",
109 |               idname = "countyreal",
110 |               tname = "year",
111 |               xformla = f"lemp~1",
112 |               data = data,
113 |               ).fit(est_method = 'dr')
114 | ```
115 | 
116 | 
117 | Summary table
118 | 
119 | ```{python, eval = False}
120 | out.summ_attgt().summary2
121 | ```
122 | 
123 | In the graphs, a semicolon `;` should be added to prevent printing the class and the graph information.
124 | 
125 | ```{python}
126 | out.plot_attgt();
127 | ```
128 | 
129 | 
130 | ```{python}
131 | out.aggte(typec='calendar');
132 | ```
133 | 
134 | 
135 | ```{python}
136 | out.plot_aggte();
137 | ```


--------------------------------------------------------------------------------
/README_files/figure-commonmark/cell-5-output-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2cml-ai/csdid/7ad721d4c384bca147ae16de84b7c5df8fe26959/README_files/figure-commonmark/cell-5-output-1.png


--------------------------------------------------------------------------------
/README_files/figure-commonmark/cell-5-output-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2cml-ai/csdid/7ad721d4c384bca147ae16de84b7c5df8fe26959/README_files/figure-commonmark/cell-5-output-2.png


--------------------------------------------------------------------------------
/README_files/figure-commonmark/cell-7-output-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2cml-ai/csdid/7ad721d4c384bca147ae16de84b7c5df8fe26959/README_files/figure-commonmark/cell-7-output-1.png


--------------------------------------------------------------------------------
/README_files/figure-commonmark/cell-7-output-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2cml-ai/csdid/7ad721d4c384bca147ae16de84b7c5df8fe26959/README_files/figure-commonmark/cell-7-output-2.png


--------------------------------------------------------------------------------
/README_files/figure-commonmark/cell-8-output-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2cml-ai/csdid/7ad721d4c384bca147ae16de84b7c5df8fe26959/README_files/figure-commonmark/cell-8-output-2.png


--------------------------------------------------------------------------------
/configs/data.yml:
--------------------------------------------------------------------------------
1 | simdata: data/sim_data.csv
2 | mpdata: data/mpdta.csv
3 | 


--------------------------------------------------------------------------------
/csdid/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2cml-ai/csdid/7ad721d4c384bca147ae16de84b7c5df8fe26959/csdid/__init__.py


--------------------------------------------------------------------------------
/csdid/_version.py:
--------------------------------------------------------------------------------
1 | __version = '0.2.7'
2 | 


--------------------------------------------------------------------------------
/csdid/aggte_fnc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2cml-ai/csdid/7ad721d4c384bca147ae16de84b7c5df8fe26959/csdid/aggte_fnc/__init__.py


--------------------------------------------------------------------------------
/csdid/aggte_fnc/aggte.py:
--------------------------------------------------------------------------------
  1 | #' @title Aggregate Group-Time Average Treatment Effects
  2 | #'
  3 | #' @description A function to take group-time average treatment effects
  4 | #'  and aggregate them into a smaller number of parameters.  There are
  5 | #'  several possible aggregations including "simple", "dynamic", "group",
  6 | #'  and "calendar."
  7 | #'
  8 | #' @param MP an MP object (i.e., the results of the [att_gt()] method)
  9 | #' @param type Which type of aggregated treatment effect parameter to compute.
 10 | #'   One option is "simple" (this just computes a weighted average of all
 11 | #'   group-time average treatment effects with weights proportional to group
 12 | #'   size).  Other options are "dynamic" (this computes average effects across
 13 | #'   different lengths of exposure to the treatment and is similar to an
 14 | #'   "event study"; here the overall effect averages the effect of the
 15 | #'   treatment across all positive lengths of exposure); "group" (this
 16 | #'   is the default option and
 17 | #'   computes average treatment effects across different groups; here
 18 | #'   the overall effect averages the effect across different groups); and
 19 | #'   "calendar" (this computes average treatment effects across different
 20 | #'   time periods; here the overall effect averages the effect across each
 21 | #'   time period).
 22 | #' @param balance_e If set (and if one computes dynamic effects), it balances
 23 | #'  the sample with respect to event time.  For example, if `balance.e=2`,
 24 | #'  `aggte` will drop groups that are not exposed to treatment for
 25 | #'  at least three periods. (the initial period when `e=0` as well as the
 26 | #'  next two periods when `e=1` and the `e=2`).  This ensures that
 27 | #'  the composition of groups does not change when event time changes.
 28 | #' @param min_e For event studies, this is the smallest event time to compute
 29 | #'  dynamic effects for.  By default, `min_e = -Inf` so that effects at
 30 | #'  all lengths of exposure are computed.
 31 | #' @param max_e For event studies, this is the largest event time to compute
 32 | #'  dynamic effects for.  By default, `max_e = Inf` so that effects at
 33 | #'  all lengths of exposure are computed.
 34 | #' @param na.rm Logical value if we are to remove missing Values from analyses. Defaults is FALSE.
 35 | #' @param bstrap Boolean for whether or not to compute standard errors using
 36 | #'  the multiplier bootstrap.  If standard errors are clustered, then one
 37 | #'  must set `bstrap=TRUE`. Default is value set in the MP object.  If bstrap is `FALSE`, then analytical
 38 | #'  standard errors are reported.
 39 | #' @param biters The number of bootstrap iterations to use.  The default is the value set in the MP object,
 40 | #'  and this is only applicable if `bstrap=TRUE`.
 41 | #'
 42 | #' @param cband Boolean for whether or not to compute a uniform confidence
 43 | #'  band that covers all of the group-time average treatment effects
 44 | #'  with fixed probability `1-alp`.  In order to compute uniform confidence
 45 | #'  bands, `bstrap` must also be set to `TRUE`.  The default is
 46 | #'  the value set in the MP object
 47 | #' @param alp the significance level, default is value set in the MP object.
 48 | #' @param clustervars A vector of variables to cluster on.  At most, there
 49 | #'  can be two variables (otherwise will throw an error) and one of these
 50 | #'  must be the same as idname which allows for clustering at the individual
 51 | #'  level. Default is the variables set in the MP object
 52 | 
 53 | #'
 54 | #' @return An [`AGGTEobj`] object that holds the results from the
 55 | #'  aggregation
 56 | #'
 57 | #' @section Examples:
 58 | #'
 59 | #
 60 | #' Initial ATT(g,t) estimates from [att_gt()]
 61 | #'
 62 | #'
 63 | #' You can aggregate the ATT(g,t) in many ways.
 64 | #'
 65 | #' **Overall ATT:**
 66 | #' aggte(out, type = "simple")
 67 | #' ```
 68 | #'
 69 | #' **Dynamic ATT (Event-Study):**
 70 | #' aggte(out, type = "dynamic")
 71 | #' ```
 72 | #'
 73 | #' **ATT for each group:**
 74 | #' aggte(out, type = "group")
 75 | #' ```
 76 | #'
 77 | #' **ATT for each calendar year:**
 78 | #' aggte(out, type = "calendar")
 79 | #' ```
 80 | #'
 81 | #'
 82 | #'
 83 | from csdid.aggte_fnc.compute_aggte import compute_aggte
 84 | import inspect
 85 | 
 86 | def aggte(MP, 
 87 |           typec="group", 
 88 |           balance_e=None, 
 89 |           min_e=float('-inf'), 
 90 |           max_e=float('inf'), 
 91 |           na_rm=False,
 92 |           bstrap=None, 
 93 |           biters=None, 
 94 |           cband=None, 
 95 |           alp=None, 
 96 |           clustervars=None):
 97 |     call = inspect.currentframe().f_back.f_locals.copy()
 98 | 
 99 |     return compute_aggte(MP=MP, 
100 |                   typec=typec, 
101 |                   balance_e=balance_e, 
102 |                   min_e=min_e, 
103 |                   max_e=max_e, 
104 |                   na_rm=na_rm,
105 |                   bstrap=bstrap, 
106 |                   biters=biters, 
107 |                   cband=cband, 
108 |                   alp=alp, 
109 |                   clustervars=clustervars, 
110 |                   call=call)
111 | 
112 |  
113 | 
114 | 


--------------------------------------------------------------------------------
/csdid/aggte_fnc/compute_aggte.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import pandas as pd
  3 | import numpy as np
  4 | from scipy.stats import norm
  5 | 
  6 | 
  7 | from csdid.aggte_fnc.utils import get_agg_inf_func, get_se, wif, AGGTEobj
  8 | from csdid.utils.mboot import mboot
  9 | import warnings
 10 | 
 11 | def compute_aggte(MP,
 12 |                       typec         = "group",
 13 |                       balance_e     = None,
 14 |                       min_e         = float('-inf'),
 15 |                       max_e         = float('inf'),
 16 |                       na_rm         = False,
 17 |                       bstrap        = None,
 18 |                       biters        = None,
 19 |                       cband         = None,
 20 |                       alp           = None,
 21 |                       clustervars   = None,
 22 |                       call          = None):
 23 |   
 24 | # =============================================================================
 25 | # unpack MP object
 26 | # =============================================================================
 27 |     group       = np.array( MP['group'] )
 28 |     t           = np.array( MP['t']     )
 29 |     att         = np.array( MP['att'] )
 30 |     dp          = MP['DIDparams']
 31 |     tlist       = np.array( dp['tlist'] )
 32 |     glist       = np.array( dp['glist'] )
 33 |     data        = dp['data']
 34 |     inffunc     = MP['inffunc']['inffunc']
 35 |     n           = MP['n']
 36 |     gname       = dp['gname']
 37 |     tname       = dp['tname']
 38 |     idname      = dp['idname']
 39 |     # typec       = MP['type']  
 40 |     panel       = dp['panel']    
 41 | 
 42 | 
 43 |     
 44 |     if clustervars is None:
 45 |         clustervars = dp['clustervars']
 46 |     if bstrap is None:
 47 |         bstrap = dp['bstrap']
 48 |     if biters is None:
 49 |         biters = dp['biters']
 50 |     if alp is None:
 51 |         alp = dp['alp']
 52 |     if cband is None:
 53 |         cband = dp['cband']
 54 | 
 55 | 
 56 | 
 57 |     # Overwrite MP objects (to compute bootstrap)
 58 |     MP['DIDparams']['clustervars'] = clustervars
 59 |     MP['DIDparams']['bstrap'] = bstrap
 60 |     MP['DIDparams']['biters'] = biters
 61 |     MP['DIDparams']['alp'] = alp
 62 |     MP['DIDparams']['cband'] = cband
 63 |     
 64 | # =============================================================================
 65 | #  Treat data
 66 | # =============================================================================
 67 | 
 68 |     if typec not in ["simple", "dynamic", "group", "calendar"]:
 69 |         raise ValueError("`typec` must be one of ['simple', 'dynamic', 'group', 'calendar']")
 70 |     # Removing missing values    
 71 |     if na_rm:
 72 |         notna = ~np.isnan(att)
 73 |         group = group[notna]
 74 |         t = t[notna]
 75 |         att = att[notna]
 76 |         inffunc = inffunc[:, notna]
 77 |         glist = np.sort(np.unique(group))
 78 |     
 79 |         if typec == "group":
 80 |             gnotna = []
 81 |             for g in glist:
 82 |                 indices = np.where((group == g) & (g <= t))
 83 |                 is_not_na = np.any(~np.isnan(att[indices]))
 84 |                 gnotna.append(is_not_na)
 85 |             
 86 |             gnotna = np.array(gnotna)
 87 |             glist = glist[gnotna]
 88 |             not_all_na = np.isin(group, glist)
 89 |             group = group[not_all_na]
 90 |             t = t[not_all_na]
 91 |             att = att[not_all_na]
 92 |             inffunc = inffunc[:, not_all_na]
 93 |             glist = np.sort(np.unique(group))
 94 | 
 95 |     
 96 |     if (not na_rm) and np.any(np.isnan(att)):
 97 |         raise ValueError("Missing values at att_gt found. If you want to remove these, set `na_rm = True`.")
 98 | 
 99 |     if panel:
100 |         dta = data[data[tname] == tlist[0]]
101 |     else:
102 |         dta = data.groupby(idname).mean().reset_index()
103 |         dta = dta.iloc[:, 1:]
104 | 
105 | # =============================================================================
106 | #  Treat data 2
107 | # =============================================================================
108 | 
109 |     
110 |     originalt = t
111 |     originalgroup = group
112 |     originalglist = glist
113 |     originaltlist = tlist
114 |     # In case g's are not part of tlist
115 |     originalgtlist = np.sort(np.unique(np.concatenate((originaltlist, originalglist))))
116 |     uniquet = list(range(1, len(originalgtlist) + 1))
117 |     
118 |     # Function to switch from "new" t values to original t values
119 |     def t2orig(t):
120 |         return originalgtlist[uniquet.index(t) if t in uniquet else -1]
121 |     
122 |     # Function to switch between "original" t values and new t values
123 |     def orig2t(orig):
124 |         new_t = [uniquet[i] for i in range(len(originalgtlist)) if originalgtlist[i] == orig]
125 |         out = new_t[0] if new_t else None
126 |         return out
127 |     
128 |     t     = [orig2t(orig) for orig in originalt]
129 |     group = [orig2t(orig) for orig in originalgroup]
130 |     glist = [orig2t(orig) for orig in originalglist]
131 |     tlist = np.asarray(list(set(t)))
132 |     maxT  = max(t)
133 |         
134 |     # Set the weights
135 |     # return data.columns
136 |     weights_ind = dta['w1'].to_numpy()
137 |     
138 |     # We can work in overall probabilities because conditioning will cancel out
139 |     # since it shows up in numerator and denominator
140 |     pg = np.array([np.mean(weights_ind * (dta[gname].to_numpy() == g)) for g in originalglist])
141 |     
142 |     # Length of this is equal to the number of groups
143 |     pgg = pg
144 | 
145 |     # Same but length is equal to the number of ATT(g,t)
146 |     pg = [pg[glist.index(g)] for g in group]  
147 |     
148 |     # Which group time average treatment effects are post-treatment
149 |     keepers = [i for i in range(len(group)) if group[i] <= t[i] <= (group[i] + max_e)] ### added second condition to allow for limit on longest period included in att
150 |     
151 |     # n x 1 vector of group variable
152 |     G = [orig2t(g) for g in dta[gname].to_numpy()]
153 | 
154 | # =============================================================================
155 | #  simple
156 | # =============================================================================
157 | 
158 | 
159 |     if typec == "simple":
160 |         # Simple ATT
161 |         # Averages all post-treatment ATT(g,t) with weights given by group size
162 |         pg = np.array(pg)
163 |         simple_att = np.sum(att[keepers] * pg[keepers]) / np.sum(pg[keepers])
164 |         if np.isnan(simple_att):
165 |             simple_att = None
166 |     
167 |         # Get the part of the influence function coming from estimated weights
168 |         simple_wif = wif(keepers, pg, weights_ind, G, group)
169 |     
170 |         # Get the overall influence function
171 |         simple_if = get_agg_inf_func(att = att , 
172 |                                      inffunc = inffunc , 
173 |                                      whichones = keepers ,
174 |                                      weights_agg = np.array(pg)[keepers]/np.sum(np.array(pg)[keepers]) , 
175 |                                      wif = simple_wif )[:, None]
176 |     
177 |         # Get standard errors from the overall influence function
178 |         simple_se = get_se(simple_if, dp)
179 |         
180 |         if simple_se is not None:
181 |             if simple_se <= np.sqrt(np.finfo(float).eps) * 10:
182 |                 simple_se = None
183 |                 
184 |         AGGTEobj_print = AGGTEobj(overall_att=simple_att, 
185 |                                   overall_se=simple_se, 
186 |                                   typec=typec,
187 |                                   inf_function={'simple_att': simple_if}, 
188 |                                   call=call, DIDparams=dp)
189 |         
190 |         return AGGTEobj_print
191 | 
192 | 
193 | # =============================================================================
194 | #  GRoup
195 | # =============================================================================
196 | 
197 |     if typec == "group":
198 |         group = np.array(group)
199 |         t = np.array(t) 
200 |         pg = np.array(pg) 
201 |         selective_att_g = [np.mean(att[( group== g) & (t >= g) & (t <= (group + max_e))]) for g in glist]
202 |         selective_att_g = np.asarray(selective_att_g)
203 |         selective_att_g[np.isnan(selective_att_g)] = None
204 |     
205 |         selective_se_inner = [None] * len(glist)
206 |         for i, g in enumerate(glist):
207 |             whichg = np.where(np.logical_and.reduce((group == g, g <= t, t <= (group + max_e))))[0]
208 |             weightsg =  pg[whichg] / np.sum(pg[whichg])
209 |             inf_func_g = get_agg_inf_func(att = att , 
210 |                                             inffunc = inffunc , 
211 |                                             whichones = whichg ,
212 |                                             weights_agg = weightsg , 
213 |                                             wif = None)[:, None]
214 |             se_g = get_se(inf_func_g, dp)
215 |             selective_se_inner[i] = {'inf_func': inf_func_g, 'se': se_g}
216 |             
217 |         # recover standard errors separately by group   
218 |         selective_se_g = np.asarray([item['se'] for item in selective_se_inner]).T
219 |         
220 |         selective_se_g[selective_se_g <= np.sqrt(np.finfo(float).eps) * 10] = None
221 |         
222 |         selective_inf_func_g = np.column_stack([elem["inf_func"] for elem in selective_se_inner])
223 |   
224 |         # use multiplier bootstrap (across groups) to get critical value
225 |         # for constructing uniform confidence bands   
226 |         selective_crit_val = norm.ppf(1 - alp/2)
227 |         
228 |         if dp['cband']:
229 |             if not dp['bstrap']:
230 |                 print("Used bootstrap procedure to compute simultaneous confidence band")
231 |         
232 |             selective_crit_val = mboot(selective_inf_func_g, dp)['crit_val']
233 |         
234 |             if np.isnan(selective_crit_val) or np.isinf(selective_crit_val):
235 |                 print("Simultaneous critical value is NA. This probably happened because we cannot compute t-statistic (std errors are NA). We then report pointwise conf. intervals.")
236 |                 selective_crit_val = norm.ppf(1 - alp/2)
237 |                 dp['cband'] = False
238 |         
239 |             if selective_crit_val < norm.ppf(1 - alp/2):
240 |                 print("Simultaneous conf. band is somehow smaller than pointwise one using normal approximation. Since this is unusual, we are reporting pointwise confidence intervals")
241 |                 selective_crit_val = norm.ppf(1 - alp/2)
242 |                 dp['cband'] = False
243 |         
244 |             if selective_crit_val >= 7:
245 |                 print("Simultaneous critical value is arguably 'too large' to be reliable. This usually happens when the number of observations per group is small and/or there is not much variation in outcomes.")
246 |   
247 |         # get overall att under selective treatment timing
248 |         # (here use pgg instead of pg because we can just look at each group)            
249 |         selective_att = np.sum(selective_att_g * pgg) / np.sum(pgg)
250 |         
251 |         # account for having to estimate pgg in the influence function    
252 |         selective_wif = wif(keepers = np.arange(1, len(glist)+1)-1, 
253 |                             pg  = pgg, 
254 |                             weights_ind = weights_ind, 
255 |                             G = G, 
256 |                             group = group)
257 |         
258 |         # get overall influence function   
259 |         selective_inf_func = get_agg_inf_func(att = selective_att_g, 
260 |                                               inffunc = selective_inf_func_g,
261 |                                               whichones = np.arange(1, len(glist)+1)-1, 
262 |                                               weights_agg = pgg/np.sum(pgg),
263 |                                               wif = selective_wif)[:, None]    
264 |         
265 |         # get overall standard error        
266 |         selective_se = get_se(selective_inf_func, dp)
267 |         if not np.isnan(selective_se):
268 |             if selective_se <= np.sqrt(np.finfo(float).eps) * 10:
269 |                 selective_se = None
270 |     
271 |         AGGTEobj_print = AGGTEobj(overall_att = selective_att, 
272 |                             overall_se = selective_se, 
273 |                             typec = typec,
274 |                             egt = originalglist,
275 |                             att_egt = selective_att_g,
276 |                             se_egt = selective_se_g,
277 |                             crit_val_egt = selective_crit_val,
278 |                             inf_function = {'selective_inf_func_g': selective_inf_func_g, 
279 |                                             'selective_inf_func': selective_inf_func},
280 |                             call = call, 
281 |                             DIDparams = dp)
282 | 
283 |         return AGGTEobj_print
284 | 
285 | 
286 | # =============================================================================
287 | #  Dynamic
288 | # =============================================================================
289 | 
290 |     if typec == "dynamic":
291 |         # event times
292 |         # this looks at all available event times
293 |         # note: event times can be negative here.
294 |         # note: event time = 0 corresponds to "on impact"
295 |         eseq = np.unique(np.array(originalt) - np.array(originalgroup) ) # Subtract corresponding elements and convert to NumPy array
296 |         eseq = np.sort(eseq)  # Sort the unique values in ascending order
297 |     
298 |         # if the user specifies balance_e, then we are going to
299 |         # drop some event times and some groups; if not, we just
300 |         # keep everything (that is what this variable is for)
301 |         originalt = np.array(originalt)
302 |         originalgroup = np.array(originalgroup)
303 |         pg = np.array(pg)
304 |         include_balanced_gt = np.repeat(True, len(originalgroup))
305 | 
306 |         if balance_e is not None:
307 |             include_balanced_gt = (t2orig(maxT) - originalgroup >= balance_e)        
308 |             eseq = np.unique(originalt[include_balanced_gt] - originalgroup[include_balanced_gt])
309 |             eseq = np.sort(eseq)      
310 |             eseq = eseq[(eseq <= balance_e) & (eseq >= balance_e - t2orig(maxT) + t2orig(1))]
311 |         eseq = eseq[(eseq >= min_e) & (eseq <= max_e)]
312 | 
313 |         dynamic_att_e = []
314 |         for e in eseq:
315 |             whiche = np.where((originalt - originalgroup == e) & include_balanced_gt)
316 |             atte = att[whiche]
317 |             pge = pg[whiche] / np.sum(pg[whiche])
318 |             dynamic_att_e.append(np.sum(atte * pge))
319 |         
320 |         dynamic_se_inner = []
321 |         for e in eseq:
322 |             whiche = np.where((originalt - originalgroup == e) & (include_balanced_gt) )[0]
323 |             pge = pg[whiche] / sum(pg[whiche])
324 |             wif_e = wif(whiche, 
325 |                         pg, 
326 |                         weights_ind, 
327 |                         G, 
328 |                         group)
329 |             inf_func_e = get_agg_inf_func(att         = att, 
330 |                                           inffunc     = inffunc, 
331 |                                           whichones   = whiche, 
332 |                                           weights_agg = pge, 
333 |                                           wif         = wif_e)[:, None]
334 |             se_e = get_se(inf_func_e, dp)
335 |             dynamic_se_inner.append({'inf_func': inf_func_e, 'se': se_e})
336 | 
337 |         dynamic_se_e = np.array([item['se'] for item in dynamic_se_inner]).T
338 |         
339 |         dynamic_se_e[dynamic_se_e <= np.sqrt(np.finfo(float).eps) * 10] = np.nan
340 |         
341 |         dynamic_inf_func_e = np.column_stack([item['inf_func'] for item in dynamic_se_inner])
342 |                 
343 |         dynamic_crit_val = norm.ppf(1 - alp/2)
344 |         if dp['cband']:
345 |             if not dp['bstrap']:
346 |                 print('Used bootstrap procedure to compute simultaneous confidence band')
347 |             dynamic_crit_val = mboot(dynamic_inf_func_e, dp)['crit_val']
348 |         
349 |             if np.isnan(dynamic_crit_val) or np.isinf(dynamic_crit_val):
350 |                 print('Simultaneous critical value is NA. This probably happened because we cannot compute t-statistic (std errors are NA). We then report pointwise conf. intervals.')
351 |                 dynamic_crit_val = norm.ppf(1 - alp/2)
352 |                 dp['cband'] = False
353 |         
354 |             if dynamic_crit_val < norm.ppf(1 - alp/2):
355 |                 print('Simultaneous conf. band is somehow smaller than pointwise one using normal approximation. Since this is unusual, we are reporting pointwise confidence intervals')
356 |                 dynamic_crit_val = norm.ppf(1 - alp/2)
357 |                 dp['cband'] = False
358 |         
359 |             if dynamic_crit_val >= 7:
360 |                 print("Simultaneous critical value is arguably 'too large' to be reliable. This usually happens when the number of observations per group is small and/or there is not much variation in outcomes.")
361 | 
362 |         epos = eseq >= 0
363 |         dynamic_att = np.mean(np.array(dynamic_att_e)[epos])
364 |         dynamic_inf_func = get_agg_inf_func(att         = np.array(dynamic_att_e)[epos],
365 |                                             inffunc     = np.array(dynamic_inf_func_e[:, epos]),
366 |                                             whichones   = np.arange(1, np.sum(epos)+1)-1,
367 |                                             weights_agg = np.repeat(1 / np.sum(epos), np.sum(epos)),
368 |                                             wif=None)[:, None]
369 |         
370 |         dynamic_se = get_se(dynamic_inf_func, dp)
371 |         if not np.isnan(dynamic_se):
372 |             if dynamic_se <= np.sqrt(np.finfo(float).eps) * 10:
373 |                 dynamic_se = np.nan
374 | 
375 |         AGGTEobj_print = AGGTEobj(overall_att=dynamic_att,
376 |                                 overall_se=dynamic_se,
377 |                                 typec=typec,
378 |                                 egt=eseq,
379 |                                 att_egt=dynamic_att_e,
380 |                                 se_egt=dynamic_se_e,
381 |                                 crit_val_egt=dynamic_crit_val,
382 |                                 inf_function={'dynamic_inf_func_e': dynamic_inf_func_e,
383 |                                               'dynamic_inf_func': dynamic_inf_func},
384 |                                 call=call,
385 |                                 min_e=min_e,
386 |                                 max_e=max_e,
387 |                                 balance_e=balance_e,
388 |                                 DIDparams=dp)
389 |             
390 |         return AGGTEobj_print
391 | 
392 | 
393 | 
394 | # =============================================================================
395 | #  Calendar
396 | # =============================================================================
397 | 
398 |  # np.array(group)
399 |     if typec == "calendar":
400 |         minG = min(group)
401 |         calendar_tlist = tlist[tlist >= minG]
402 |         pg = np.array(pg)
403 |         calendar_att_t = []
404 |         group = np.array(group)
405 |         t = np.array(t)
406 |         for t1 in calendar_tlist:
407 |             whicht = np.where((t == t1) & (group <= t))[0]
408 |             attt = att[whicht]
409 |             pgt = pg[whicht] / np.sum(pg[whicht])
410 |             calendar_att_t.append(np.sum(pgt * attt))
411 |             
412 |         # get standard errors and influence functions
413 |         # for each time specific att
414 |         calendar_se_inner = []
415 |         for t1 in calendar_tlist:
416 |             which_t = np.where((t == t1) & (group <= t))[0]
417 |             pgt = pg[which_t] / np.sum(pg[which_t])
418 |             wif_t = wif(keepers=which_t, 
419 |                         pg=pg, 
420 |                         weights_ind=weights_ind, 
421 |                         G=G, 
422 |                         group=group)
423 |             inf_func_t = get_agg_inf_func(att=att, 
424 |                                             inffunc=inffunc, 
425 |                                             whichones=which_t, 
426 |                                             weights_agg=pgt, 
427 |                                             wif=wif_t)[:, None]
428 |             se_t = get_se(inf_func_t, dp)
429 |             calendar_se_inner.append({"inf_func": inf_func_t, "se": se_t})
430 |     
431 |     
432 |     
433 |         # recover standard errors separately by time
434 |         calendar_se_t = np.array([se["se"] for se in calendar_se_inner]).T
435 |         calendar_se_t[calendar_se_t <= np.sqrt(np.finfo(float).eps) * 10] = np.nan
436 |         
437 |         # recover influence function separately by time
438 |         calendar_inf_func_t = np.column_stack([se["inf_func"] for se in calendar_se_inner])
439 |     
440 |         # use multiplier boostrap (across groups) to get critical value
441 |         # for constructing uniform confidence bands
442 |         calendar_crit_val = norm.ppf(1 - alp/2)
443 |         
444 |         if dp['cband']:
445 |             if not dp['bstrap']:
446 |                 warnings.warn('Used bootstrap procedure to compute simultaneous confidence band')
447 |         
448 |             # mboot function is not provided, please define it separately
449 |             calendar_crit_val = mboot(calendar_inf_func_t, dp)['crit_val']
450 |         
451 |             if np.isnan(calendar_crit_val) or np.isinf(calendar_crit_val):
452 |                 warnings.warn('Simultaneous critical value is NA. This probably happened because we cannot compute t-statistic (std errors are NA). We then report pointwise conf. intervals.')
453 |                 calendar_crit_val = norm.ppf(1 - alp/2)
454 |                 dp['cband'] = False
455 |         
456 |             if calendar_crit_val < norm.ppf(1 - alp/2):
457 |                 warnings.warn('Simultaneous conf. band is somehow smaller than pointwise one using normal approximation. Since this is unusual, we are reporting pointwise confidence intervals.')
458 |                 calendar_crit_val = norm.ppf(1 - alp/2)
459 |                 dp['cband'] = False
460 |         
461 |             if calendar_crit_val >= 7:
462 |                 warnings.warn("Simultaneous critical value is arguably 'too large' to be reliable. This usually happens when the number of observations per group is small and/or there is not much variation in outcomes.")
463 |     
464 |     
465 |         # get overall att under calendar time effects
466 |         # this is just average over all time periods
467 |         calendar_att = np.mean(calendar_att_t)
468 |         
469 |         # get overall influence function
470 |         calendar_inf_func = get_agg_inf_func(att=calendar_att_t,
471 |                                              inffunc=calendar_inf_func_t,
472 |                                              whichones=range(len(calendar_tlist)),
473 |                                              weights_agg=np.repeat(1/len(calendar_tlist), len(calendar_tlist)),
474 |                                              wif=None)[:, None]
475 |         calendar_inf_func = np.array(calendar_inf_func)
476 |         
477 |         # get overall standard error
478 |         calendar_se = get_se(calendar_inf_func, dp)
479 |         if not np.isnan(calendar_se):
480 |             if calendar_se <= np.sqrt(np.finfo(float).eps) * 10:
481 |                 calendar_se = np.nan
482 |         
483 |         AGGTEobj_print = AGGTEobj(overall_att=calendar_att,
484 |                                 overall_se=calendar_se,
485 |                                 typec=typec,
486 |                                 egt=list(map(t2orig, calendar_tlist)),
487 |                                 att_egt=calendar_att_t,
488 |                                 se_egt=calendar_se_t,
489 |                                 crit_val_egt=calendar_crit_val,
490 |                                 inf_function={"calendar_inf_func_t": calendar_inf_func_t,
491 |                                               "calendar_inf_func": calendar_inf_func},
492 |                                 call=call,
493 |                                 DIDparams=dp)
494 |     
495 |         return AGGTEobj_print
496 | 


--------------------------------------------------------------------------------
/csdid/aggte_fnc/utils.py:
--------------------------------------------------------------------------------
  1 | from csdid.utils.bmisc import TorF
  2 | from csdid.utils.mboot import mboot
  3 | 
  4 | import numpy as np
  5 | import scipy.stats as stats
  6 | import pandas as pd
  7 | 
  8 | def wif(keepers, pg, weights_ind, G, group):
  9 |     # note: weights are all of the form P(G=g|cond)/sum_cond(P(G=g|cond))
 10 |     # this is equal to P(G=g)/sum_cond(P(G=g)) which simplifies things here
 11 |     pg = np.array(pg)
 12 |     group = np.array(group)
 13 |     
 14 |     # effect of estimating weights in the numerator
 15 |     if1 = np.empty((len(weights_ind), len(keepers)))
 16 |     for i, k  in enumerate(keepers):
 17 |         numerator = (weights_ind * 1 * TorF(G == group[k])) - pg[k]
 18 |         # denominator = sum(np.array(pg)[keepers]) )[:, None]  
 19 |         denominator = np.sum(pg[keepers])
 20 | 
 21 |         result = numerator[:, None]  / denominator
 22 |         if1[:, i] = result.squeeze()
 23 |     
 24 |     # effect of estimating weights in the denominator
 25 |     if2 = np.empty((len(weights_ind), len(keepers)))
 26 |     for i, k  in enumerate(keepers):
 27 |         numerator = ( weights_ind * 1 * TorF(G == group[k]) ) - pg[k]
 28 |         # result = numerator.to_numpy()[:, None]  @ multipler[:, None].T
 29 |         if2[:, i] = numerator.squeeze()
 30 |     if2 = np.sum(if2, axis=1)    
 31 |     multiplier = ( pg[keepers] / sum( pg[keepers] ) ** 2 )   
 32 |     if2 = np.outer( if2 , multiplier)
 33 | 
 34 |     # if1 = [((weights_ind * 1*TorF(G==group[k])) - pg[k]) / sum(pg[keepers]) for k in keepers]
 35 |     # if2 = np.dot(np.array([weights_ind*1*TorF(G==group[k]) - pg[k] for k in keepers]).T, pg[keepers]/(sum(pg[keepers])**2))
 36 |     wif_factor = if1 - if2
 37 |     return wif_factor
 38 | 
 39 | def get_agg_inf_func(att, inffunc, whichones, weights_agg, wif=None):
 40 |     # enforce weights are in matrix form
 41 |     weights_agg = np.asarray(weights_agg)
 42 | 
 43 |     # multiplies influence function times weights and sums to get vector of weighted IF (of length n)
 44 |     thisinffunc = np.dot(inffunc[:, whichones], weights_agg)
 45 | 
 46 |     # Incorporate influence function of the weights
 47 |     if wif is not None:
 48 |         thisinffunc = thisinffunc + np.dot(wif, np.array(att[whichones]))
 49 |         
 50 |     # return influence function
 51 |     return thisinffunc
 52 | 
 53 | 
 54 | def get_se(thisinffunc, DIDparams=None):
 55 |     alpha = 0.05
 56 |     bstrap = False
 57 |     if DIDparams is not None:
 58 |         bstrap = DIDparams['bstrap']
 59 |         alpha = DIDparams['alp']
 60 |         cband = DIDparams['cband']
 61 |         n = len(thisinffunc)
 62 | 
 63 |     if bstrap:
 64 |         bout = mboot(thisinffunc, DIDparams)
 65 |         return bout['se']
 66 |     else:
 67 |         return np.sqrt(np.mean((thisinffunc)**2) / n)
 68 | 
 69 | def AGGTEobj(overall_att=None,
 70 |              overall_se=None,
 71 |              typec="simple",
 72 |              egt=None,
 73 |              att_egt=None,
 74 |              se_egt=None,
 75 |              crit_val_egt=None,
 76 |              inf_function=None,
 77 |              min_e=None,
 78 |              max_e=None,
 79 |              balance_e=None,
 80 |              call=None,
 81 |              DIDparams=None):
 82 | 
 83 |     out = {
 84 |         "overall_att": overall_att,
 85 |         "overall_se": overall_se,
 86 |         "type": typec,
 87 |         "egt": egt,
 88 |         "att_egt": att_egt,
 89 |         "se_egt": se_egt,
 90 |         "crit_val_egt": crit_val_egt,
 91 |         "inf_function": inf_function,
 92 |         "min_e": min_e,
 93 |         "max_e": max_e,
 94 |         "balance_e": balance_e,
 95 |         "call": call,
 96 |         "DIDparams": DIDparams
 97 |     }
 98 |     
 99 |     
100 |     # Overall estimates
101 |     alp = out["DIDparams"]["alp"]
102 |     pointwise_cval = stats.norm.ppf(1 - alp / 2)
103 |     overall_cband_upper = out["overall_att"] + pointwise_cval * out["overall_se"]
104 |     overall_cband_lower = out["overall_att"] - pointwise_cval * out["overall_se"]
105 |     out1 = np.column_stack((out["overall_att"], out["overall_se"], overall_cband_lower, overall_cband_upper))
106 |     out1 = np.round(out1, 4)
107 |     overall_sig = (overall_cband_upper < 0) | (overall_cband_lower > 0)
108 |     overall_sig[np.isnan(overall_sig)] = False
109 |     overall_sig_text = np.where(overall_sig, "*", "")
110 |     out1 = np.column_stack((out1, overall_sig_text))
111 |     
112 |     print("\n")
113 |     if out["type"] == "dynamic":
114 |         print("Overall summary of ATT's based on event-study/dynamic aggregation:")
115 |     elif out["type"] == "group":
116 |         print("Overall summary of ATT's based on group/cohort aggregation:")
117 |     elif out["type"] == "calendar":
118 |         print("Overall summary of ATT's based on calendar time aggregation:")
119 |     colnames = ["ATT", "Std. Error", f"[{100 * (1 - out['DIDparams']['alp'])}%"," Conf. Int.]", ""]
120 |     print(pd.DataFrame(out1, columns=colnames).to_string(index=False))
121 |     print("\n")
122 |     
123 |     # Handle cases depending on type
124 |     if out["type"] in ["group", "dynamic", "calendar"]:
125 |         if out["type"] == "dynamic":
126 |             c1name = "Event time"
127 |             print("Dynamic Effects:")
128 |         elif out["type"] == "group":
129 |             c1name = "Group"
130 |             print("Group Effects:")
131 |         elif out["type"] == "calendar":
132 |             c1name = "Time"
133 |             print("Time Effects (calendar):")
134 |     
135 |         cband_text1a = f"{100 * (1 - out['DIDparams']['alp'])}% "
136 |         cband_text1b = "Simult. " if out["DIDparams"]["bstrap"] else "Pointwise "
137 |         cband_text1 = f"[{cband_text1a}{cband_text1b}"
138 |     
139 |         cband_lower = out["att_egt"] - out["crit_val_egt"] * out["se_egt"]
140 |         cband_upper = out["att_egt"] + out["crit_val_egt"] * out["se_egt"]
141 |     
142 |         sig = (cband_upper < 0) | (cband_lower > 0)
143 |         sig[np.isnan(sig)] = False
144 |         sig_text = np.where(sig, "*", "").T
145 | 
146 |         out2 = pd.DataFrame([out["egt"], 
147 |                              out["att_egt"], 
148 |                              out["se_egt"].flatten(), 
149 |                              np.hstack(cband_lower),
150 |                              np.hstack(cband_upper)]).T
151 |         
152 |         out2 = out2.round(4)
153 |         out2[0] = out2[0].astype(int)
154 |         out2 = pd.concat([out2, pd.DataFrame(sig_text, columns=['sig_text']) ], axis=1)    
155 |         
156 |         out2.columns = [c1name, "Estimate", "Std. Error", cband_text1, "Conf. Band", ""]
157 |         print(out2)
158 |     
159 |     
160 |     
161 | 
162 |     
163 |     print("---")
164 |     print("Signif. codes: `*' confidence band does not cover 0")
165 |     
166 |     # Set control group text
167 |     control_group = out["DIDparams"]["control_group"]
168 |     control_group_text = None
169 |     if control_group == "nevertreated":
170 |         control_group_text = "Never Treated"
171 |     elif control_group == "notyettreated":
172 |         control_group_text = "Not Yet Treated"
173 |     
174 |     if control_group:
175 |         print("Control Group: ", control_group_text, ", ")
176 |     
177 |     # Anticipation periods
178 |     print("Anticipation Periods: ", out["DIDparams"]["anticipation"])
179 |     
180 |     # Estimation method text
181 |     est_method = out["DIDparams"]["est_method"]
182 |     if isinstance(est_method, str):
183 |         est_method_text = est_method
184 |         if est_method == "dr":
185 |             est_method_text = "Doubly Robust"
186 |         elif est_method == "ipw":
187 |             est_method_text = "Inverse Probability Weighting"
188 |         elif est_method == "reg":
189 |             est_method_text = "Outcome Regression"
190 |     
191 |         print("Estimation Method: ", est_method_text)
192 |         print("\n")
193 |         
194 |     return out
195 | 
196 | 
197 | 


--------------------------------------------------------------------------------
/csdid/att_gt.py:
--------------------------------------------------------------------------------
  1 | # from aggte import AGGte
  2 | from csdid.aggte_fnc.aggte import aggte as agg_te
  3 | 
  4 | from csdid.attgt_fnc.preprocess_did import pre_process_did
  5 | from csdid.attgt_fnc.compute_att_gt import compute_att_gt
  6 | 
  7 | from csdid.utils.mboot import mboot
  8 | 
  9 | from csdid.plots.gplot import gplot, splot
 10 | 
 11 | 
 12 | import matplotlib.pyplot as plt
 13 | 
 14 | import warnings
 15 | 
 16 | import numpy as np, pandas as pd
 17 | 
 18 | # class ATTgt(AGGte):
 19 | class ATTgt:
 20 |   def __init__(self, yname, tname, idname, gname, data, control_group = ['nevertreated', 'notyettreated'], 
 21 |   xformla: str = None, panel = True, allow_unbalanced_panel = True, 
 22 |   clustervar = None, weights_name = None, anticipation = 0, 
 23 |   cband = False, biters = 1000, alp = 0.05
 24 |   ):
 25 |     dp = pre_process_did(
 26 |       yname=yname, tname = tname, idname=idname, gname = gname,
 27 |       data = data, control_group=control_group, anticipation=anticipation,
 28 |       xformla=xformla, panel=panel, allow_unbalanced_panel=allow_unbalanced_panel, cband=cband, clustervar=None, weights_name=None
 29 |     )
 30 | 
 31 |     dp['biters'] = biters
 32 |     dp['alp'] = alp
 33 |     dp['true_repeated_cross_sections'] = dp['true_rep_cross_section']
 34 |     dp['cband'] = cband
 35 |     dp['panel'] = panel
 36 |     self.dp = dp
 37 | 
 38 |   def fit(self, est_method = 'dr', base_period = 'varying', bstrap = True):
 39 |     # print(self.dp)
 40 |     dp = self.dp
 41 |     result, inffunc = compute_att_gt(dp, est_method = est_method, base_period = base_period)
 42 |     att = result['att']
 43 |     n_len = list(map(len, inffunc))
 44 |     crit_val, se, V = (
 45 |             1.96,
 46 |             np.std(inffunc, axis=1, ddof = 1) / np.sqrt(n_len),
 47 |             np.zeros(len(att)),
 48 |         )
 49 |     if bstrap:
 50 |       ref_se = mboot(inffunc.T, dp)
 51 |       crit_val, se = ref_se['crit_val'], ref_se['se']
 52 |       V = ref_se['V']
 53 |     
 54 |     ############# aggte input
 55 |     group = result['group']
 56 |     att = result['att']
 57 |     tt = result['year']
 58 |     inf_fnc = {'inffunc': inffunc.T}
 59 | 
 60 |     dp['bstrap'] = bstrap
 61 |     dp['est_method'] = est_method
 62 |     dp['base_period'] = base_period
 63 |     self.dp = dp
 64 |     n = dp['n']
 65 | 
 66 |     mp = {
 67 |       'group': group, 'att': att, 't': tt,
 68 |       'DIDparams': dp, 'inffunc': inf_fnc, 
 69 |       'n': n
 70 |     }
 71 |     self.MP = mp
 72 | 
 73 | 
 74 |     cband_lower = att - crit_val * se
 75 |     cband_upper = att + crit_val * se
 76 |     sig = (cband_upper < 0) | (cband_lower > 0)
 77 |     sig[np.isnan(sig)] = False
 78 |     sig_text = np.where(sig, "*", "")
 79 | 
 80 |     result.update(
 81 |       {
 82 |         'se': se, 'l_se': cband_lower,
 83 |         'c': crit_val,
 84 |         'u_se': cband_upper, 'sig': sig_text
 85 |        })
 86 |     
 87 |     self.results = result
 88 | 
 89 |     rst = result
 90 |     did_object = {
 91 |       'group': mp['group'],
 92 |       't': mp['t'],
 93 |       'att': rst['att'],
 94 |       'se': rst['se'],
 95 |       'c': rst['c'],
 96 |     }
 97 |     self.did_object = did_object
 98 |     return self
 99 |   def summ_attgt(self, n = 4):
100 |     result = self.results
101 |     att_gt = pd.DataFrame(result)
102 |     att_gt = att_gt.drop('c', axis=1)
103 |     name_attgt_df = ['Group', 'Time', 'ATT(g, t)', 'Post', "Std. Error", "[95% Pointwise", 'Conf. Band]', '']
104 |     att_gt.columns = name_attgt_df
105 |     att_gt = att_gt.round(n)
106 |     self.summary2 = att_gt
107 |     return self
108 | 
109 |   def aggte(
110 |     self, 
111 |     typec         = "group",
112 |     balance_e     = None,
113 |     min_e         = float('-inf'),
114 |     max_e         = float('inf'),
115 |     na_rm         = False,
116 |     bstrap        = None,
117 |     biters        = None,
118 |     cband         = None,
119 |     alp           = None,
120 |     clustervars   = None,
121 |     ):
122 |     mp = self.MP
123 |     did_object = self.did_object
124 |     
125 |     did_object.update({
126 |       'type': typec
127 |       }
128 |     )
129 | 
130 | 
131 |     atte = agg_te(
132 |       mp, typec=typec, balance_e=balance_e, 
133 |       min_e=min_e, max_e=max_e, na_rm=na_rm, bstrap=bstrap, 
134 |       biters=biters, cband=cband, alp=alp, clustervars=clustervars
135 |     )
136 | 
137 |     self.atte = atte
138 |     return self
139 |   def plot_attgt(self, ylim=None, 
140 |                 xlab=None, 
141 |                 ylab=None, 
142 |                 title="Group",
143 |                 xgap=1, 
144 |                 ncol=1, 
145 |                 legend=True, 
146 |                 group=None, 
147 |                 ref_line=0,
148 |                 theming=True, 
149 |                 grtitle="Group"
150 |                 ):
151 | 
152 |     did_object = self.did_object
153 | 
154 |     grp = did_object['group']
155 |     t_i = did_object['t']
156 | 
157 |     G = len(np.unique(grp))
158 |     Y = len(np.unique(t_i))
159 |     g = np.unique(grp)[np.argsort(np.unique(grp))].astype(int)
160 |     y = np.unique(t_i)
161 | 
162 |     results = pd.DataFrame({'year': np.tile(y, G)})
163 |     results['group'] = np.repeat(g, Y)
164 |     results['grtitle'] = grtitle + ' ' + results['group'].astype(str)
165 |     results['att'] = did_object['att']
166 |     results['att_se'] = did_object['se']
167 |     results['post'] = np.where(results['year'] >= grp, 1, 0)
168 |     results['year'] = results['year']
169 |     results['c'] = did_object['c']
170 | 
171 |     self.results_plot_df_attgt = results
172 | 
173 |     if group is None:
174 |       group = g
175 |       if any(group not in g for group in group):
176 |         raise ValueError("Some of the specified groups do not exist in the data. Reporting all available groups.")
177 | 
178 | 
179 |     legend_1 = False    # for multiple subplots, legend outside 
180 |     fig, axes = plt.subplots(nrows=len(group), ncols=1, figsize=(10, 5))  # Adjust the figsize as needed
181 |     handles = []
182 |     labels = []
183 |     for i, group_cat in enumerate(group):
184 |         group_data = results.loc[results['group'] == group_cat]
185 |         title = group_data['grtitle'].unique()[0]
186 |         ax = axes[i]
187 |         ax = gplot(group_data, ax, ylim, xlab, ylab, title, xgap, legend_1, ref_line, theming)
188 |     plt.tight_layout()
189 |     if legend is True:
190 |         handles_ax, labels_ax = ax.get_legend_handles_labels()
191 |         handles.extend(handles_ax)
192 |         labels.extend(labels_ax)
193 |         fig.legend(handles, labels, loc='lower center', fontsize='small', bbox_to_anchor=(0.545, -0.075), ncol=2)
194 |     
195 |     plt.show()
196 |     return fig 
197 | 
198 |   def plot_aggte(self, ylim=None, 
199 |                    xlab=None, 
200 |                    ylab=None, 
201 |                    title="", 
202 |                    xgap=1, 
203 |                    legend=True, 
204 |                    ref_line=0, 
205 |                    theming=True,
206 |                    **kwargs):
207 | 
208 |     did_object = self.atte
209 | 
210 |     post_treat = 1 * (np.asarray(did_object["egt"]).astype(int) >= 0)
211 |     
212 |     results = {
213 |         "year": list(map(int, did_object["egt"])),
214 |         "att": did_object["att_egt"],
215 |         "att_se": did_object["se_egt"][0],
216 |         "post": post_treat
217 |     }
218 |     
219 |     results = pd.DataFrame(results)
220 |     self.results_plot_df_aggte = results
221 |     
222 |     if did_object['crit_val_egt'] is None:
223 |         results['c'] = abs(norm.ppf(0.025))
224 |     else:
225 |         results['c'] = did_object['crit_val_egt']
226 | 
227 |     if title == "":
228 |         title = "Average Effect by Group" if\
229 |           did_object["type"] == "group" else\
230 |             "Average Effect by Length of Exposure"
231 | 
232 | 
233 |     if did_object["type"] == "group":
234 |         fig, ax = plt.subplots(figsize=(10, 5))
235 |         p = splot(results, ax, ylim, xlab, ylab, title, legend, ref_line, theming)
236 |         plt.tight_layout()
237 |         plt.show()
238 | 
239 |     else:
240 |         fig, ax = plt.subplots(figsize=(10, 5))
241 |         p = gplot(results, ax, ylim, xlab, ylab, title, xgap, legend, ref_line, theming)
242 |         plt.tight_layout()
243 |         plt.show() 
244 |         
245 |     return p
246 | 


--------------------------------------------------------------------------------
/csdid/attgt_fnc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2cml-ai/csdid/7ad721d4c384bca147ae16de84b7c5df8fe26959/csdid/attgt_fnc/__init__.py


--------------------------------------------------------------------------------
/csdid/attgt_fnc/compute_att_gt.py:
--------------------------------------------------------------------------------
  1 | import numpy as np, pandas as pd
  2 | import patsy 
  3 | from drdid import drdid, reg_did, ipwd_did
  4 | 
  5 | from csdid.utils.bmisc import panel2cs2
  6 | import warnings
  7 | 
  8 | 
  9 | fml = patsy.dmatrices
 10 | # Initialize a list to store data for each iteration
 11 | results_list = []
 12 | 
 13 | def compute_att_gt(dp, est_method = "dr", base_period = 'varying'):
 14 |     yname = dp['yname']
 15 |     tname = dp['tname']
 16 |     idname = dp['idname']
 17 |     xformla = dp['xformla']
 18 |     data = dp['data']
 19 |     weights_name = dp['weights_name']
 20 |     # base_period = dp['base_period']
 21 |     panel = dp['panel']
 22 |     true_rep_cross_section = dp['true_rep_cross_section']
 23 |     control_group = dp['control_group']
 24 |     anticipation = dp['anticipation']
 25 |     gname = dp['gname']
 26 |     n = dp['n']
 27 |     nT = dp['nT']
 28 |     nG = dp['nG']
 29 |     tlist = dp['tlist']
 30 |     glist = dp['glist']
 31 | 
 32 |     # Calculate time periods and adjustment factor
 33 |     tlist_len = len(tlist) - 1 if base_period != "universal" else len(tlist)
 34 |     tfac = 1 if base_period != "universal" else 0
 35 | 
 36 |     inf_func = []
 37 | 
 38 |     att_est, group, year, post_array = [], [], [], []
 39 | 
 40 |     def add_att_data(att = 0, pst = 0, inf_f = []):
 41 |         inf_func.append(inf_f)
 42 |         att_est.append(att)
 43 |         group.append(g)
 44 |         year.append(tn)
 45 |         post_array.append(pst)
 46 | 
 47 |     # Handle never treated case
 48 |     never_treated = control_group == 'nevertreated'
 49 |     if never_treated:
 50 |         data['C'] = (data[gname] == 0).astype(int)
 51 |     data['y_main'] = data[yname]
 52 | 
 53 |     # Loop over groups
 54 |     for g_index, g in enumerate(glist):  
 55 |         # Set up .G once
 56 |         # Create a binary column 'G_m' to indicate if a row belongs to the current group 'g'
 57 |         G_main = (data[gname] == glist[g_index])
 58 |         data = data.assign(G_m=1 * G_main)
 59 | 
 60 |         # Loop over time periods
 61 |         for t_i in range(tlist_len):
 62 | 
 63 |             # Set pretreatment period
 64 |             pret = t_i  # Initialize pretreatment period as current time period index
 65 |             tn = tlist[t_i + tfac]  # Current time period (adjusted for tfac)
 66 | 
 67 |             # Universal base period
 68 |             if base_period == 'universal':  # Check if using a universal base period
 69 |                 try:
 70 |                     # Set pretreatment period as the last period before treatment
 71 |                     pret = np.where((tlist + anticipation) < g)[0][-1]
 72 |                 except IndexError:
 73 |                     # Handle cases where no pretreatment periods exist
 74 |                     raise ValueError(
 75 |                         f"There are no pre-treatment periods for the group first treated at {g}. Units from this group are dropped."
 76 |                     )
 77 | 
 78 |             # If we are in the universal base period
 79 |             if base_period == 'universal' and pret == tn:
 80 |                 # Normalize results to zero and skip computation
 81 |                 add_att_data(att=0, pst=0, inf_f=np.zeros(len(data)))
 82 |                 continue
 83 | 
 84 |             # For non-never treated groups, set up the control group indicator 'C'
 85 |             if not never_treated:
 86 |                 # Units that are either never treated (gname == 0) or treated in the future
 87 |                 n1 = (data[gname] == 0)
 88 |                 n2 = (data[gname] > (tlist[max(t_i, pret) + tfac] + anticipation))
 89 |                 n3 = (data[gname] != glist[g_index])  # Not in the current group
 90 |                 row_eval = n1 | (n2 & n3)  # Combine conditions
 91 |                 data = data.assign(C=1 * row_eval)  # Assign the control indicator
 92 | 
 93 |         # -----------------------------------------------------------------------------
 94 |   
 95 |             
 96 |             # Check if in post-treatment period
 97 |             if glist[g_index] <= tlist[t_i + tfac]:
 98 |                 # Use same base period as for post-treatment periods
 99 |                 # This matches R's: tail(which((tlist+anticipation) < glist[g]), 1)
100 |                 pret_mask = (np.array(tlist) + anticipation) < glist[g_index]
101 |                 if not any(pret_mask):
102 |                     warnings.warn(f"There are no pre-treatment periods for the group first treated at {glist[g_index]}\nUnits from this group are dropped")
103 |                     break
104 |                 
105 |                 pret = np.where(pret_mask)[0][-1]  # Gets last element like R's tail(..., 1)
106 |                 # print("this is alex", pret)
107 | 
108 | 
109 |             # Print details for debugging
110 |             # print(f"Current period: {tlist[t_i + tfac]}")
111 |             # print(f"Current group: {glist[g_index]}")
112 |             # print(f"Set pretreatment period to be: {tlist[pret]}")
113 | 
114 |             # -----------------------------------------------------------------------------
115 |             # Debugging and validation
116 |             if base_period == 'universal' and pret == tn:
117 |                 # Normalize results to zero and break the loop
118 |                 add_att_data(att=0, pst=post_treat, inf_f=np.zeros(len(data)))
119 |                 continue
120 |             
121 |             # Post-treatment dummy variable
122 |             post_treat = 1 * (g <= tn)
123 | 
124 |             # Subset the data for the current and pretreatment periods
125 |             disdat = data[(data[tname] == tn) | (data[tname] == tlist[pret])]
126 |             # print("Shape of disdat:", disdat.shape)
127 | 
128 |         # results for the case with panel data
129 |         #-----------------------------------------------------------------------------
130 | 
131 |             if panel: 
132 |                 disdat = panel2cs2(disdat, yname, idname, tname)
133 |                 disdat = disdat.dropna()
134 |                 n = len(disdat)
135 |                 dis_idx = np.array(disdat.G_m == 1) | np.array(disdat.C == 1)
136 |                 disdat = disdat.loc[dis_idx, :]
137 |                 n1 = len(disdat)
138 |                 G = disdat.G_m
139 |                 C = disdat.C
140 |                 w = disdat.w
141 | 
142 |                 ypre = disdat.y0 if tn > pret else disdat.y1
143 |                 ypost = disdat.y0 if tn < pret else disdat.y1
144 |                 _, covariates = fml(xformla, data = disdat, return_type = 'dataframe')
145 | 
146 |                 G, C, w, ypre = map(np.array, [G, C, w, ypre])
147 |                 ypost, covariates = map(np.array, [ypost, covariates])
148 | 
149 |                 if callable(est_method):
150 |                     est_att_f = est_method
151 |                 elif est_method == "reg":
152 |                     est_att_f = reg_did.reg_did_panel
153 |                 elif est_method == "ipw":
154 |                     est_att_f = ipwd_did.std_ipw_did_panel
155 |                 elif est_method == "dr":
156 |                     est_att_f = drdid.drdid_panel
157 | 
158 |                 att_gt, att_inf_func = est_att_f(ypost, ypre, G, i_weights=w, covariates=covariates)
159 | 
160 |                 inf_zeros = np.zeros(n)
161 |                 att_inf = n / n1 * att_inf_func
162 |                 inf_zeros[dis_idx] = att_inf
163 | 
164 |                 add_att_data(att_gt, inf_f=inf_zeros)
165 | 
166 |         #-----------------------------------------------------------------------------
167 |         # results for the case with no panel data
168 |         #-----------------------------------------------------------------------------
169 |             
170 |             if not panel:
171 |                 # Fixed right_ids selection
172 |                 right_ids = disdat.loc[disdat.G_m.eq(1) | disdat.C.eq(1), 'rowid'].to_numpy()
173 |                 # print("Shape of ids:",right_ids.shape)  # Show dimensions
174 |                 # Consistent time period selection
175 |                 dis_idx = (data['rowid'].isin(right_ids)) & \
176 |                             (data[tname].isin([tlist[t_i + tfac], tlist[pret]]))
177 | 
178 |                 disdat = data.loc[dis_idx]
179 | 
180 |                 G = disdat.G_m.to_numpy()
181 |                 C = disdat.C.to_numpy()
182 |                 Y = disdat[yname].to_numpy()
183 |                 post = 1 * (disdat[tname] == tlist[t_i + tfac]).to_numpy()
184 |                 w = disdat.w.to_numpy()
185 |                 n1 = sum(G + C)
186 | 
187 |                 # Store the current iteration's data
188 |                 current_data = {
189 |                     'Y': Y,
190 |                     'post': post,
191 |                     'G': G,
192 |                     'group': g,
193 |                     'time_period': tn
194 |                 }
195 |                 
196 |                 # print(f"Lengths: Y={len(Y)}, post={len(post)}, G={len(G)}, group={type(g)}, time_period={type(tn)}")
197 |                 # results_list.append(pd.DataFrame(current_data))
198 | 
199 |                 #-----------------------------------------------------------------------------
200 |                 # checks to make sure that we have enough observations
201 |                 
202 |                 skip_this_att_gt = False
203 | 
204 |                 if np.sum(G * post) == 0:
205 |                     print(f"No units in group {g} in time period {t_i + tfac + 1}, e1")
206 |                     skip_this_att_gt = True 
207 | 
208 |                 if np.sum(G * (1 - post)) == 0:
209 |                     print(f"No units in group {g} in time period {t_i + 1}, e2")
210 |                     skip_this_att_gt = True 
211 | 
212 |                 if np.sum(C * post) == 0:
213 |                     print(f"No available control units for group {g} in time period {t_i + tfac + 1}, e3")
214 |                     skip_this_att_gt = True 
215 | 
216 |                 if np.sum(C * (1 - post)) == 0:
217 |                     print(f"No available control units for group {g} in time period {t_i+1}, e4")
218 |                     skip_this_att_gt = True 
219 | 
220 |                 if skip_this_att_gt:
221 |                     # Append results with missing ATT and NA influence function
222 |                     add_att_data(att=np.nan, pst=post_treat, inf_f=np.full(n, np.nan))
223 |                     #add_att_data()
224 |                     continue
225 | 
226 |                 # return (inf_func)
227 |                 try:
228 |                     _, covariates = fml(xformla, data = disdat, return_type = 'dataframe')
229 |                     covariates = np.array(covariates)
230 |                 except Exception as e:
231 |                     print(f"Warning: Formula processing failed: {e}")
232 |                     y_str, x_str = xformla.split("~")
233 |                     xs1 = x_str.split('+')
234 |                     xs1_col_names = [x.strip() for x in xs1 if x.strip() != '1']
235 |                     n_dis = len(disdat)
236 |                     ones = np.ones((n_dis, 1))
237 |                     try:
238 |                         covariates = disdat[xs1_col_names].to_numpy()
239 |                         covariates = np.append(covariates, ones, axis=1)
240 |                     except:
241 |                         covariates = ones
242 | 
243 |                 #-----------------------------------------------------------------------------
244 |                 # code for actually computing att(g,t)
245 |                 #-----------------------------------------------------------------------------
246 |                 # print(Y, post, G, w, covariates)
247 |                 
248 |                 if callable(est_method):
249 |                     est_att_f = est_method
250 |                 elif est_method == "reg":
251 |                     est_att_f = reg_did.reg_did_rc
252 |                 elif est_method == "ipw":
253 |                     est_att_f = ipwd_did.std_ipw_did_rc
254 |                 elif est_method == "dr":
255 |                     est_att_f = drdid.drdid_rc
256 |                 
257 |                 att_gt, att_inf_func = est_att_f(y=Y, post=post, D = G, i_weights=w, covariates=covariates)
258 |                 # print(att_inf_func)
259 |                 att_inf_func = (n/n1)*att_inf_func
260 |                 
261 |                 inf_func_df = pd.DataFrame(
262 |                 {
263 |                     "inf_func": att_inf_func,
264 |                     "right_ids": right_ids
265 |                 }
266 |                 ).fillna(0)
267 | 
268 |                 inf_zeros = np.zeros(n)
269 |                 aggte_infffuc = inf_func_df.groupby('right_ids').inf_func.sum()
270 |                 try:
271 |                     dis_idx1 = np.isin(data['rowid'].unique(), aggte_infffuc.index.to_numpy())
272 |                 except:
273 |                     dis_idx1 = np.isin(data['rowid'].unique().to_numpy(), aggte_infffuc.index.to_numpy())
274 |                 
275 |                 inf_zeros[dis_idx1] = np.array(aggte_infffuc)
276 | 
277 |                 add_att_data(att_gt, pst = post_treat, inf_f=inf_zeros)
278 |                 # print(att_est)
279 | 
280 |     output = {
281 |     'group': group ,
282 |     'year': year,
283 |     "att" : att_est,
284 |     'post ': post_array
285 |     }
286 | 
287 |     return (output, np.vstack(inf_func))
288 | 
289 | 
290 | # import numpy as np, pandas as pd
291 | # import patsy 
292 | # from drdid import drdid, reg_did, ipwd_did
293 | 
294 | # from csdid.utils.bmisc import panel2cs2
295 | 
296 | # fml = patsy.dmatrices
297 | 
298 | 
299 | # # data Struct
300 | # # output = {
301 | # #   "att" : []
302 | # #   'group': []
303 | # #   'year': []
304 | # #   'post ': []
305 | # # }
306 | 
307 | 
308 | # def compute_att_gt(dp, est_method = "dr", base_period = 'varying'):
309 | #   yname = dp['yname']
310 | #   tname = dp['tname']
311 | #   idname = dp['idname']
312 | #   xformla = dp['xformla']
313 | #   data = dp['data']
314 | #   weights_name = dp['weights_name']
315 | #   # base_period = dp['base_period']
316 | #   panel = dp['panel']
317 | #   true_rep_cross_section = dp['true_rep_cross_section']
318 | #   control_group = dp['control_group']
319 | #   anticipation = dp['anticipation']
320 | #   gname = dp['gname']
321 | #   n = dp['n']
322 | #   nT = dp['nT']
323 | #   nG = dp['nG']
324 | #   tlist = dp['tlist']
325 | #   glist = dp['glist']
326 | #   tlist_len, tfac = len(tlist), 0
327 | #   if base_period != 'universal':
328 | #     tlist_len = tlist_len - 1
329 | #     tfac = 1
330 | 
331 | #   inf_func = []
332 | 
333 | #   att_est, group, year, post_array = [], [], [], []
334 | 
335 | #   def add_att_data(att = 0, pst = 0, inf_f = []):
336 | #     inf_func.append(inf_f)
337 | #     att_est.append(att)
338 | #     group.append(g)
339 | #     year.append(tn)
340 | #     post_array.append(pst)
341 | 
342 | #   never_treated = control_group == 'nevertreated'
343 | #   if never_treated:
344 | #     data = data.assign(C = 1 * (data[gname] == 0))
345 | #   data = data.assign(y_main = data[yname])
346 | 
347 | #   # g, t = glist[0], tlist[0]
348 | 
349 | #   # for _, g, in enumerate(glist):
350 | #   for g_index, g in enumerate(glist):
351 | 
352 | #     # g = glist[1]
353 | #     G_main = (data[gname] == g)
354 | #     data = data.assign(G_m = 1 * G_main)
355 | 
356 | #     for t_i in range(tlist_len):
357 | #       pret = t_i
358 | #       tn = tlist[t_i + tfac]
359 | #       if base_period == 'universal' or g < tn:
360 | #         try:
361 | #           pret = np.where(tlist + anticipation < g)[0][-1]
362 | #         except:
363 | #           raise f"There are no pre-treatment periods for the group first treated at {g}\nUnits from this group are dropped"
364 | #           # break
365 | 
366 | 
367 | #       if base_period == 'universal':
368 | #         if pret == tn:
369 | #           add_att_data()
370 | 
371 | #       if not never_treated:
372 | #         n1 = data[gname] == 0
373 | #         n2 = (data[gname] > (tlist[np.max([t_i, pret]) + tfac]) + anticipation)
374 | #         #n3 = np.where(data[gname] != glist[g], True, False)
375 | #         n3 = np.where(data[gname] != glist[g_index], True, False)
376 | 
377 | #         row_eval = n1 | n2 & n3
378 | #         data = data.assign(C = 1 * row_eval)
379 | 
380 | #       post_treat = 1 * (g <= tn)
381 | #       disdat = data[(data[tname] == tn) | (data[tname] == tlist[pret])]
382 | 
383 | #       if panel: 
384 | #         disdat = panel2cs2(disdat, yname, idname, tname)
385 | #         disdat = disdat.dropna()
386 | #         n = len(disdat)
387 | #         dis_idx = np.array(disdat.G_m == 1) | np.array(disdat.C == 1)
388 | #         disdat = disdat.loc[dis_idx, :]
389 | #         n1 = len(disdat)
390 | #         G = disdat.G_m
391 | #         C = disdat.C
392 | #         w = disdat.w
393 | 
394 | #         ypre = disdat.y0 if tn > pret else disdat.y1
395 | #         ypost = disdat.y0 if tn < pret else disdat.y1
396 | #         _, covariates = fml(xformla, data = disdat, return_type = 'dataframe')
397 | 
398 | #         G, C, w, ypre = map(np.array, [G, C, w, ypre])
399 | #         ypost, covariates = map(np.array, [ypost, covariates])
400 | 
401 | #         if callable(est_method):
402 | #           est_att_f = est_method
403 | #         elif est_method == "reg":
404 | #           est_att_f = reg_did.reg_did_panel
405 | #         elif est_method == "ipw":
406 | #           est_att_f = ipwd_did.std_ipw_did_panel
407 | #         elif est_method == "dr":
408 | #           est_att_f = drdid.drdid_panel
409 | 
410 | #         att_gt, att_inf_func = est_att_f(ypost, ypre, G, i_weights=w, covariates=covariates)
411 | 
412 | #         inf_zeros = np.zeros(n)
413 | #         att_inf = n / n1 * att_inf_func
414 | #         inf_zeros[dis_idx] = att_inf
415 | 
416 | #         add_att_data(att_gt, inf_f=inf_zeros)
417 | 
418 | #       if not panel:
419 | #         right_ids = np.array(disdat.query('(G_m == 1) or (C == 1)').rowid.to_numpy())
420 | #         dis_idx = (data['rowid'].isin(right_ids)) &\
421 | #           ((data[tname] == tlist[t_i + tfac]) |\
422 | #             (data[tname] == tlist[pret]))
423 | 
424 | #         disdat = data.loc[dis_idx]
425 | 
426 | #         G = disdat.G_m.to_numpy()
427 | #         C = disdat.C.to_numpy()
428 | #         Y = disdat[yname].to_numpy()
429 | #         post = 1 * (disdat[tname] == tlist[t_i + tfac]).to_numpy()
430 | #         w = disdat.w.to_numpy()
431 | 
432 | #         # G, C, Y, post, w = map(np.array, [G, C, Y, post, w])
433 | 
434 | 
435 | #         n1 = sum(G + C)
436 | 
437 | #         skip_this_att_gt = False
438 | #         if np.sum(G * post) == 0:
439 | #           print(f"No units in group {g} in time period {tn}")
440 | #           skip_this_att_gt = True 
441 | 
442 | #         if np.sum(G * (1 - post)) == 0:
443 | #           print(f"No units in group {g} in time period {tn}")
444 | #           skip_this_att_gt = True 
445 | 
446 | #         if np.sum(C * post) == 0:
447 | #           print(f"No available control units for group {g} in time period {tn}")
448 | #           skip_this_att_gt = True 
449 | 
450 | #         if np.sum(C * (1 - post)) == 0:
451 | #           print(f"No available control units for group {g} in time period {tn}")
452 | #           skip_this_att_gt = True 
453 | 
454 | #         if skip_this_att_gt:
455 | #           add_att_data()
456 | 
457 | #         try:
458 | #           _, covariates = fml(xformla, data = disdat, return_type = 'dataframe')
459 | #           covariates = np.array(covariates)
460 | #         except:
461 | #           y_str, x_str = xformla.split("~")
462 | #           xs1 = x_str.split('+')
463 | #           xs1_col_names = [x.strip() for x in xs1 if x.strip() != '1']
464 | #           n_dis = len(disdat)
465 | #           ones = np.ones((n_dis, 1))
466 | #           try:
467 | #             covariates = disdat[xs1_col_names].to_numpy()
468 | #             covariates = np.append(covariates, ones, axis=1)
469 | #           except:
470 | #             covariates = ones
471 | 
472 |         
473 | 
474 | #         if callable(est_method):
475 | #           est_att_f = est_method
476 | #         elif est_method == "reg":
477 | #           est_att_f = reg_did.reg_did_rc
478 | #         elif est_method == "ipw":
479 | #           est_att_f = ipwd_did.std_ipw_did_rc
480 | #         elif est_method == "dr":
481 | #           est_att_f = drdid.drdid_rc
482 | #         att_gt, att_inf_func = est_att_f(y=Y, post=post, D = G, i_weights=w, covariates=covariates)
483 | 
484 | #         inf_func_df = pd.DataFrame(
485 | #           {
486 | #             "inf_func": att_inf_func,
487 | #             "right_ids": right_ids
488 | #           }
489 | #         )
490 | #         inf_zeros = np.zeros(n)
491 | #         aggte_infffuc = inf_func_df.groupby('right_ids').inf_func.sum()
492 | #         try:
493 | #           dis_idx1 = np.isin(data['rowid'].unique(), aggte_infffuc.index.to_numpy())
494 | #         except:
495 | #           dis_idx1 = np.isin(data['rowid'].unique().to_numpy(), aggte_infffuc.index.to_numpy())
496 | #         inf_zeros[dis_idx1] = np.array(aggte_infffuc)
497 | 
498 | #         add_att_data(att_gt, pst = post_treat, inf_f=inf_zeros)
499 | #         # print(att_est)
500 | 
501 | #   output = {
502 | #     'group': group ,
503 | #     'year': year,
504 | #     "att" : att_est,
505 | #     'post ': post_array
506 | #   }
507 | #   return (output, np.array(inf_func))


--------------------------------------------------------------------------------
/csdid/attgt_fnc/preprocess_did.py:
--------------------------------------------------------------------------------
  1 | # from typing import Union, List, Optional, Dict
  2 | # import pandas as pd, numpy as np
  3 | # import patsy 
  4 | # from csdid.utils.bmisc import makeBalancedPanel
  5 | 
  6 | # fml = patsy.dmatrices
  7 | 
  8 | # def pre_process_did(yname, tname, idname, gname, data: pd.DataFrame, 
  9 | #   control_group: Union[str, List[str]] = "nevertreated", 
 10 | #   anticipation = 0, xformla : str = None,
 11 | #   panel = True, allow_unbalanced_panel = True, cband = False,
 12 | #   clustervar = None,  weights_name = None
 13 | #   ) -> dict:
 14 | 
 15 | #   n, t = data.shape
 16 | #   # control_group = "nevertreated"
 17 | #   columns = [idname, tname, yname, gname]
 18 | #   # print(columns)
 19 | #   # Columns
 20 | #   if clustervar is not None:
 21 | #     columns += [clustervar]
 22 | #   if weights_name is not None:
 23 | #     columns += [weights_name]
 24 | #     w = data[weights_name]
 25 | #   else:
 26 | #     w = np.ones(n)
 27 | 
 28 | 
 29 | #   if xformla is None:
 30 | #     xformla = f'{yname} ~ 1'
 31 | 
 32 | #   # if xformla is None:
 33 | #   try:
 34 | #     _, x_cov = fml(xformla, data = data, return_type='dataframe')
 35 | #     _, n_cov = x_cov.shape
 36 | #     data = pd.concat([data[columns], x_cov], axis=1)
 37 | #     data = data.assign(w = w)
 38 | #   except:
 39 | #     data = data.assign(intercept = 1)
 40 | #     clms = columns + ['intercept']
 41 | #     n_cov = len(data.columns)
 42 | #     # patsy dont work with pyspark
 43 | #     data = data[clms]
 44 | #     if weights_name is None:
 45 | #       data = data.assign(w = 1)
 46 | #     else:
 47 | #       data = data.assign(w = lambda x: x[weights_name] * 1)
 48 | 
 49 | 
 50 | #   data = data.dropna()
 51 | #   ndiff = n - len(data) 
 52 | #   if ndiff != 0: 
 53 | #     print(f'dropped, {ndiff}, rows from original data due to missing data')
 54 | #   try:
 55 | 
 56 | #     tlist = np.sort(data[tname].unique())
 57 | #     glist = np.sort(data[gname].unique())
 58 | #   except:
 59 | #     tlist = np.sort(data[tname].unique().to_numpy())
 60 | #     glist = np.sort(data[gname].unique().to_numpy())
 61 | 
 62 | #   asif_nev_treated = data[gname] > np.max(tlist)
 63 | #   asif_nev_treated.fillna(False, inplace=True)
 64 | #   data.loc[asif_nev_treated, gname] = 0
 65 | 
 66 | #   if len(glist[glist == 0]) == 0:
 67 | #     if control_group == "nevertreated":
 68 | #       raise ValueError("There is no available never-treated group")
 69 | #     else:
 70 | #       value = np.max(glist) - anticipation
 71 | #       data = data.query(f'{tname} < @value')
 72 | #       tlist = np.sort(data[tname].unique())
 73 | #       glist = np.sort(data[gname].unique())
 74 | #       glist = glist[glist < np.max(glist)]
 75 | 
 76 | #   glist = glist[glist > 0]
 77 | #   # first prerios 
 78 | #   fp = tlist[0]
 79 | #   glist = glist[glist > fp + anticipation]
 80 | 
 81 | #   treated_fp = (data[gname] <= fp) & ~(data[gname] == 0)
 82 | #   treated_fp.fillna(False, inplace=True)
 83 | 
 84 | #   try:
 85 | 
 86 | #     nfirst_period = np.sum(treated_fp) if panel \
 87 | #       else len(data.loc[treated_fp, idname].unique())
 88 | #   except:
 89 | #     nfirst_period = treated_fp.sum() if panel \
 90 | #       else len(data.loc[treated_fp, idname].unique())
 91 | 
 92 | #   if nfirst_period > 0:
 93 | #     warning_message = f"Dropped {nfirst_period} units that were already treated in the first period."
 94 | #     print(warning_message)
 95 | #     glist_in = np.append(glist, [0])
 96 | #     data = data.query(f'{gname} in @glist_in')
 97 | #     tlist = np.sort(data[tname].unique())
 98 | #     glist = np.sort(data[gname].unique())
 99 | #     glist = glist[glist > 0]
100 | #     fp = tlist[0]
101 | #     glist = glist[glist > fp + anticipation]
102 | 
103 | #   #todo: idname must be numeric
104 | #   true_rep_cross_section = False
105 | #   if not panel:
106 | #     true_rep_cross_section = True
107 | 
108 | 
109 | 
110 | #   #-----------------------------------------------------------------------------
111 | #   # setup data in panel case
112 | #   #-----------------------------------------------------------------------------
113 | 
114 | #   # # Check if data is a balanced panel if panel = True and allow_unbalanced_panel = True
115 | #   # if panel and allow_unbalanced_panel:
116 | #   #     # First, focus on complete cases
117 | #   #     keepers = data.dropna()
118 | #   #     data_comp = keepers.copy()
119 | 
120 | #   #     # Make it a balanced dataset
121 | #   #     n_all = len(data_comp[idname].unique())
122 | #   #     data_bal = makeBalancedPanel(data_comp, idname, tname)
123 | #   #     n_bal = len(data_bal[idname].unique())
124 | 
125 | #   #     if n_bal < n_all:
126 | #   #         # If fewer unique IDs in the balanced panel, it means the panel is unbalanced
127 | #   #         print("You have an unbalanced panel. Proceeding as such.")
128 | #   #         allow_unbalanced_panel = True
129 | #   #     else:
130 | #   #         # If the number of unique IDs remains the same, it means the panel is balanced
131 | #   #         print("You have a balanced panel. Setting allow_unbalanced_panel = False.")
132 | #   #         allow_unbalanced_panel = False
133 | 
134 | 
135 | #   if panel:
136 | #     if allow_unbalanced_panel: 
137 | #       panel = False
138 | #       true_rep_cross_section = False
139 | #     else:
140 | #       keepers = data.dropna().index
141 | #       n = len(data[idname].unique)
142 | #       print(n)
143 | #       n_keep = len(data.iloc[keepers, idname].unique())
144 | 
145 | #       if len(data.loc[keepers] < len(data)):
146 | #         print(f"Dropped {n-n_keep} observations that had missing data.")
147 | #         data = data.loc[keepers]
148 | #       # make balanced data set
149 | #       n_old = len(data[idname].unique())
150 | #       data = makeBalancedPanel(data, idname=idname, tname=tname)
151 | #       n = len(data[idname].unique())
152 | #       if len(data) == 0:
153 | #         raise ValueError("All observations dropped to convert data to balanced panel. Consider setting `panel=False` and/or revisit 'idname'.")
154 | #       if n < n_old:
155 | #         warnings.warn(f"Dropped {n_old-n} observations while converting to balanced panel.")
156 | #       tn = tlist[0]
157 | #       n = len(data.query(f'{tname} == @tn'))
158 | 
159 | #   # # Add rowid
160 | #   # if not panel:
161 | #   #     # Handle missing data
162 | #   #     keepers = data.dropna().index
163 | #   #     dropped_count = len(data) - len(keepers)
164 | #   #     if dropped_count > 0:
165 | #   #         print(f"Dropped {dropped_count} observations that had missing data.")
166 |       
167 | #   #     # Drop incomplete rows
168 | #   #     data = data.loc[keepers]
169 | 
170 | #   #     # Check if all rows are dropped
171 | #   #     if len(data) == 0:
172 | #   #         raise ValueError("All observations dropped due to missing data problems.")
173 | 
174 | #   #     # Add rowid based on true_repeated_cross_section
175 | #   #     if true_rep_cross_section:
176 | #   #         data = data.assign(rowid=range(len(data)))
177 | #   #         idname = 'rowid'
178 | #   #     else:
179 | #   #         if idname not in data.columns:
180 | #   #             raise ValueError(f"Column {idname} is not in the dataset.")
181 | #   #         data = data.assign(rowid=lambda x: x[idname] * 1)
182 | 
183 | #   #     # Number of unique observations
184 | #   #     n = data[idname].nunique()
185 | 
186 | #   if not panel:
187 | #     # Check for complete cases (equivalent to complete.cases in R)
188 | #     keepers = data.notna().all(axis=1)
189 | #     dropped_count = len(data) - keepers.sum()
190 |     
191 | #     if dropped_count > 0:
192 | #       import warnings
193 | #       warnings.warn(f"Dropped {dropped_count} observations that had missing data.")
194 | #       data = data[keepers].copy()
195 |     
196 | #     # If drop all data, raise error
197 | #     if len(data) == 0:
198 | #       raise ValueError("All observations dropped due to missing data problems.")
199 |     
200 | #     # Add rowid column
201 | #     if true_rep_cross_section:
202 | #       data.loc[:, 'rowid'] = range(1, len(data) + 1)  # 1-based indexing like R
203 | #       idname = 'rowid'
204 | #     else:
205 | #       # Set rowid to idname for repeated cross section/unbalanced
206 | #       if idname not in data.columns:
207 | #         raise ValueError(f"Column {idname} not found in dataset")
208 | #       data.loc[:, 'rowid'] = data[idname]
209 |     
210 | #     # Calculate n as unique number of cross section observations
211 | #     n = data[idname].nunique()
212 | 
213 | #   data = data.sort_values([idname, tname])
214 | #   data = data.assign(w1 = lambda x: x['w'] * 1)
215 | #   # data.loc[:, ".w"] = data['w']
216 | #   if len(glist) == 0:
217 | #     raise f"No valid groups. The variable in '{gname}' should be expressed as the time a unit is first treated (0 if never-treated)."
218 | #   if len(tlist) == 2:
219 | #     cband = False
220 | #   gsize = data.groupby(data[gname]).size().reset_index(name="count")
221 | #   gsize["count"] /= len(tlist)
222 | 
223 | #   reqsize = n_cov + 5
224 | #   gsize = gsize[gsize["count"] < reqsize]
225 | 
226 | #   if len(gsize) > 0:
227 | #     gpaste = ",".join(map(str, gsize[gname]))
228 | #     warnings.warn(f"Be aware that there are some small groups in your dataset.\n  Check groups: {gpaste}.")
229 | 
230 | #     if 0 in gsize[gname].to_numpy() and control_group == "nevertreated":
231 | #       raise "Never-treated group is too small, try setting control_group='notyettreated'."
232 | #   nT, nG = map(len, [tlist, glist])
233 | #   did_params = {
234 | #     'yname' : yname, 'tname': tname,
235 | #     'idname' : idname, 'gname': gname,
236 | #     'xformla': xformla, 'data': data,
237 | #     'tlist': tlist, 'glist': glist,
238 | #     'n': n, 'nG': nG, 'nT': nT,
239 | #     'control_group': control_group, 'anticipation': anticipation,
240 | #     'weights_name': weights_name, 'panel': panel,
241 | #     'true_rep_cross_section': true_rep_cross_section,
242 | #     'clustervars': clustervar
243 | #   }
244 | #   return did_params
245 | 
246 | 
247 | import pandas as pd, numpy as np
248 | import patsy 
249 | from csdid.utils.bmisc import makeBalancedPanel
250 | import warnings
251 | 
252 | fml = patsy.dmatrices
253 | 
254 | def pre_process_did(yname, tname, idname, gname, data: pd.DataFrame, 
255 |   control_group = ['nevertreated', 'notyettreated'], 
256 |   anticipation = 0, xformla : str = None,
257 |   panel = True, allow_unbalanced_panel = True, cband = False,
258 |   clustervar = None,  weights_name = None
259 |   ) -> dict:
260 | 
261 |   n, t = data.shape
262 |   control_group = control_group[0]
263 |   columns = [idname, tname, yname, gname]
264 |   control_group = "nevertreated"
265 |   # print(columns)
266 |   # Columns
267 |   if clustervar is not None:
268 |     columns += [clustervar]
269 |   if weights_name is not None:
270 |     columns += [weights_name]
271 |     w = data[weights_name]
272 |   else:
273 |     w = np.ones(n)
274 | 
275 | 
276 |   if xformla is None:
277 |     xformla = f'{yname} ~ 1'
278 | 
279 |   # if xformla is None:
280 |   try:
281 |     _, x_cov = fml(xformla, data = data, return_type='dataframe')
282 |     _, n_cov = x_cov.shape
283 |     data = pd.concat([data[columns], x_cov], axis=1)
284 |     data = data.assign(w = w)
285 |   except:
286 |     data = data.assign(intercept = 1)
287 |     clms = columns + ['intercept']
288 |     n_cov = len(data.columns)
289 |     # patsy dont work with pyspark
290 |     data = data[clms]
291 |     if weights_name is None:
292 |       data = data.assign(w = 1)
293 |     else:
294 |       data = data.assign(w = lambda x: x[weights_name] * 1)
295 | 
296 | 
297 |   data = data.dropna()
298 |   ndiff = n - len(data) 
299 |   if ndiff != 0: 
300 |     print(f'dropped, {ndiff}, rows from original data due to missing data')
301 |   try:
302 | 
303 |     tlist = np.sort(data[tname].unique())
304 |     glist = np.sort(data[gname].unique())
305 |   except:
306 |     tlist = np.sort(data[tname].unique().to_numpy())
307 |     glist = np.sort(data[gname].unique().to_numpy())
308 | 
309 |   asif_nev_treated = data[gname] > np.max(tlist)
310 |   asif_nev_treated.fillna(False, inplace=True)
311 |   data.loc[asif_nev_treated, gname] = 0
312 | 
313 |   if len(glist[glist == 0]) == 0:
314 |     if control_group == "nevertreated":
315 |       raise ValueError("There is no available never-treated group")
316 |     else:
317 |       value = np.max(glist) - anticipation
318 |       data = data.query(f'{tname} < @value')
319 |       tlist = np.sort(data[tname].unique())
320 |       glist = np.sort(data[gname].unique())
321 |       glist = glist[glist < np.max(glist)]
322 | 
323 |   glist = glist[glist > 0]
324 |   # first prerios 
325 |   fp = tlist[0]
326 |   glist = glist[glist > fp + anticipation]
327 | 
328 |   treated_fp = (data[gname] <= fp) & ~(data[gname] == 0)
329 |   treated_fp.fillna(False, inplace=True)
330 | 
331 |   try:
332 | 
333 |     nfirst_period = np.sum(treated_fp) if panel \
334 |       else len(data.loc[treated_fp, idname].unique())
335 |   except:
336 |     nfirst_period = treated_fp.sum() if panel \
337 |       else len(data.loc[treated_fp, idname].unique())
338 | 
339 |   if nfirst_period > 0:
340 |     warning_message = f"Dropped {nfirst_period} units that were already treated in the first period."
341 |     print(warning_message)
342 |     glist_in = np.append(glist, [0])
343 |     data = data.query(f'{gname} in @glist_in')
344 |     tlist = np.sort(data[tname].unique())
345 |     glist = np.sort(data[gname].unique())
346 |     glist = glist[glist > 0]
347 |     fp = tlist[0]
348 |     glist = glist[glist > fp + anticipation]
349 | 
350 |   #todo: idname must be numeric
351 |   true_rep_cross_section = False
352 |   if not panel:
353 |     true_rep_cross_section = True
354 | 
355 |   if panel:
356 |     if allow_unbalanced_panel: 
357 |       panel = False
358 |       true_rep_cross_section = False
359 |     else:
360 |       keepers = data.dropna().index
361 |       n = len(data[idname].unique)
362 |       print(n)
363 |       n_keep = len(data.iloc[keepers, idname].unique())
364 | 
365 |       if len(data.loc[keepers] < len(data)):
366 |         print(f"Dropped {n-n_keep} observations that had missing data.")
367 |         data = data.loc[keepers]
368 |       # make balanced data set
369 |       n_old = len(data[idname].unique())
370 |       data = makeBalancedPanel(data, idname=idname, tname=tname)
371 |       n = len(data[idname].unique())
372 |       if len(data) == 0:
373 |         raise ValueError("All observations dropped to convert data to balanced panel. Consider setting `panel=False` and/or revisit 'idname'.")
374 |       if n < n_old:
375 |         warnings.warn(f"Dropped {n_old-n} observations while converting to balanced panel.")
376 |       tn = tlist[0]
377 |       n = len(data.query(f'{tname} == @tn'))
378 |   # add rowid
379 |   if not panel:
380 | 
381 |     keepers = data.dropna().index.to_numpy()
382 |     ndiff = len(data.loc[keepers]) - len(data)
383 |     if len(keepers) == 0:
384 |       raise "All observations dropped due to missing data problems."
385 |     if ndiff < 0:
386 |       mssg = f"Dropped {ndiff} observations that had missing data."
387 |       data = data.loc[keepers]
388 |     if true_rep_cross_section: 
389 |       # fix: posible error
390 |       data = data.assign(rowid = range(len(data)))
391 |       idname = 'rowid'
392 |     else:
393 |       # r_id = np.array(data[idname])
394 |       data = data.assign(rowid = lambda x: x[idname] * 1)
395 |     
396 |     n = len(data[idname].unique())
397 | 
398 |   data = data.sort_values([idname, tname])
399 |   data = data.assign(w1 = lambda x: x['w'] * 1)
400 |   # data.loc[:, ".w"] = data['w']
401 |   if len(glist) == 0:
402 |     raise f"No valid groups. The variable in '{gname}' should be expressed as the time a unit is first treated (0 if never-treated)."
403 |   if len(tlist) == 2:
404 |     cband = False
405 |   gsize = data.groupby(data[gname]).size().reset_index(name="count")
406 |   gsize["count"] /= len(tlist)
407 | 
408 |   reqsize = n_cov + 5
409 |   gsize = gsize[gsize["count"] < reqsize]
410 | 
411 |   if len(gsize) > 0:
412 |     gpaste = ",".join(map(str, gsize[gname]))
413 |     warnings.warn(f"Be aware that there are some small groups in your dataset.\n  Check groups: {gpaste}.")
414 | 
415 |     if 0 in gsize[gname].to_numpy() and control_group == "nevertreated":
416 |       raise "Never-treated group is too small, try setting control_group='notyettreated'."
417 |   nT, nG = map(len, [tlist, glist])
418 |   did_params = {
419 |     'yname' : yname, 'tname': tname,
420 |     'idname' : idname, 'gname': gname,
421 |     'xformla': xformla, 'data': data,
422 |     'tlist': tlist, 'glist': glist,
423 |     'n': n, 'nG': nG, 'nT': nT,
424 |     'control_group': control_group, 'anticipation': anticipation,
425 |     'weights_name': weights_name, 'panel': panel,
426 |     'true_rep_cross_section': true_rep_cross_section,
427 |     'clustervars': clustervar
428 |   }
429 |   return did_params
430 | 


--------------------------------------------------------------------------------
/csdid/plots/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2cml-ai/csdid/7ad721d4c384bca147ae16de84b7c5df8fe26959/csdid/plots/__init__.py


--------------------------------------------------------------------------------
/csdid/plots/gplot.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sun Jun 11 16:17:07 2023
  4 | 
  5 | @author: Carlos Guevara
  6 | """
  7 | 
  8 | import matplotlib.pyplot as plt
  9 | 
 10 | # get_ipython().run_line_magic('matplotlib', 'qt') # To aopen separate window
 11 | # get_ipython().run_line_magic('matplotlib', 'inline') # In line graph
 12 | 
 13 | def gplot(ssresults, ax, ylim=None, xlab=None, ylab=None, title="Group", xgap=1,
 14 |            legend=True, ref_line=0, theming=True):
 15 |     if ylab is None:
 16 |         ylab = 'ATT'
 17 |     
 18 |     ssresults = ssresults[ssresults['year'].notnull()].copy()
 19 |     ssresults.loc[:, 'year'] = ssresults['year'].astype(int).astype(str)
 20 |     
 21 |     pre_points = ssresults.loc[ssresults['post'] == 0]
 22 |     post_points = ssresults.loc[ssresults['post'] == 1]
 23 |     
 24 |     ax.errorbar(pre_points['year'], pre_points['att'], yerr=pre_points['c']*pre_points['att_se'],
 25 |                  fmt='o', markersize=5, color='#e87d72', ecolor='#e87d72', capsize=5, label='Pre')   
 26 |     
 27 |     ax.errorbar(post_points['year'], post_points['att'], yerr=post_points['c']*post_points['att_se'],
 28 |                  fmt='o', markersize=5, color='#56bcc2', ecolor='#56bcc2', capsize=5, label='Post')  
 29 |     
 30 |     ax.set_ylim(ylim)
 31 |     ax.set_title(title)
 32 |     ax.set_xlabel(xlab)    
 33 |     ax.set_ylabel(ylab)    
 34 | 
 35 |     handles, labels = ax.get_legend_handles_labels()    
 36 |     
 37 |     if ref_line is not None:
 38 |         ax.axhline(ref_line, linestyle='dashed', color='#1F1F1F')
 39 |     if theming:
 40 |         ax.set_facecolor('white')
 41 |         ax.set_title(title, color="#1F1F1F", fontweight="bold", fontsize=10)
 42 |         ax.spines['bottom'].set_color('#1F1F1F')
 43 |         ax.spines['left'].set_color('#1F1F1F')
 44 |         ax.tick_params(axis='x', colors='#1F1F1F')
 45 |         ax.tick_params(axis='y', colors='#1F1F1F')
 46 |         if not pre_points.empty and not post_points.empty:
 47 |             ax.legend(handles[0:2], labels[0:2], loc='lower center',fontsize='small', ncol=2, bbox_to_anchor=(0.5,-0.27))
 48 |         elif not pre_points.empty:
 49 |             ax.legend(handles[:1], labels[:1], loc='lower center',fontsize='small', ncol=2, bbox_to_anchor=(0.5,-0.27))
 50 |         elif not post_points.empty:
 51 |             ax.legend(handles[1:2], labels[1:2], loc='lower center',fontsize='small', ncol=2, bbox_to_anchor=(0.5,-0.27))     
 52 |     if not legend:
 53 |         ax.legend().set_visible(False)
 54 |         
 55 |     return ax
 56 | 
 57 | 
 58 | def splot(ssresults, ax, ylim=None, xlab=None, ylab=None, title="Group",
 59 |           legend=True, ref_line=0, theming=True):
 60 |     
 61 |     if xlab is None:
 62 |         xlab = 'Group'
 63 |     if ylab is None:
 64 |         ylab = 'ATT'
 65 | 
 66 |     ssresults['year'] = ssresults['year'].copy().astype(str)
 67 |     
 68 |     pre_points = ssresults.loc[ssresults['post'] == 0]
 69 |     post_points = ssresults.loc[ssresults['post'] == 1]
 70 |     
 71 |     ax.errorbar(pre_points['year'], pre_points['att'], yerr=pre_points['c']*pre_points['att_se'],
 72 |                  fmt='o', markersize=5, color='#e87d72', ecolor='#e87d72', capsize=5, label='Pre')   
 73 |     
 74 |     ax.errorbar(post_points['year'], post_points['att'], yerr=post_points['c']*post_points['att_se'],
 75 |                  fmt='o', markersize=5, color='#56bcc2', ecolor='#56bcc2', capsize=5, label='Post') 
 76 |     
 77 |     ax.set_xlabel(xlab)
 78 |     ax.set_ylabel(ylab)
 79 |     ax.set_title(title)
 80 | 
 81 |     handles, labels = ax.get_legend_handles_labels()    
 82 |     
 83 |     if ylim is not None:
 84 |         ax.set_ylim(ylim)
 85 |     
 86 |     if ref_line is not None:
 87 |         ax.axhline(ref_line, linestyle='dashed', color='#1F1F1F')
 88 |     
 89 |     if theming:
 90 |         ax.set_facecolor('white')
 91 |         ax.set_title(title, color="#1F1F1F", fontweight="bold", fontsize=12)
 92 |         ax.spines['bottom'].set_color('#1F1F1F')
 93 |         ax.spines['left'].set_color('#1F1F1F')
 94 |         ax.tick_params(axis='x', colors='#1F1F1F')
 95 |         ax.tick_params(axis='y', colors='#1F1F1F')
 96 |         if not pre_points.empty and not post_points.empty:
 97 |             ax.legend(handles[0:2], labels[0:2], loc='lower center',fontsize='small', ncol=2, bbox_to_anchor=(0.5,-0.27))
 98 |         elif not pre_points.empty:
 99 |             ax.legend(handles[:1], labels[:1], loc='lower center',fontsize='small', ncol=2, bbox_to_anchor=(0.5,-0.27))
100 |         elif not post_points.empty:
101 |             ax.legend(handles[1:2], labels[1:2], loc='lower center',fontsize='small', ncol=2, bbox_to_anchor=(0.5,-0.27))   
102 |             
103 |     if not legend:
104 |         ax.legend().set_visible(False)
105 |     
106 |     return ax


--------------------------------------------------------------------------------
/csdid/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2cml-ai/csdid/7ad721d4c384bca147ae16de84b7c5df8fe26959/csdid/utils/__init__.py


--------------------------------------------------------------------------------
/csdid/utils/bmisc.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | def makeBalancedPanel(data, idname, tname):
 3 |   data = data.sort_values([idname, tname]).reset_index(drop = True)
 4 |   nt = len(data[tname].unique())
 5 |   data = data.groupby(idname)\
 6 |     .filter(lambda x: len(x) == nt)
 7 |   return data
 8 | 
 9 | 
10 | def panel2cs2(data: pd.DataFrame, yname, idname, tname):
11 |   if len(data[tname].unique()) != 2:
12 |     raise ValueError('panel2cs2 only for 2 perios of apnel data')
13 | 
14 |   data = data.sort_values([idname, tname])
15 |   y1 = data.groupby(idname)[yname].shift(-1)
16 |   y0 = data[yname]
17 |   dy = y1 - y0
18 |   data = data.assign(
19 |     y1 = y1, y0 = y0, dy = dy
20 |   )
21 |   return data.dropna()
22 | 
23 | # -*- coding: utf-8 -*-
24 | """
25 | Created on Wed May 31 18:58:35 2023
26 | 
27 | @author: Carlos Guevara
28 | """
29 | import numpy as np
30 | 
31 | def TorF(cond, use_isTRUE=False):
32 | 
33 |     if not isinstance(cond, np.ndarray) or cond.dtype != bool:
34 |         raise ValueError("cond should be a logical vector")
35 |     if use_isTRUE:
36 |         cond = np.array([x is True for x in cond])
37 |     else:
38 |         cond[np.isnan(cond)] = False
39 |     return cond
40 | 
41 | def multiplier_bootstrap(inf_func, biters): # This function comes from c++
42 |     n, K = inf_func.shape
43 |     biters = int(biters)
44 |     innerMat = np.zeros((n, K))
45 |     Ub = np.zeros(n)
46 |     outMat = np.zeros((biters,K))
47 | 
48 |     for b in range(biters):
49 |         # draw Rademechar weights
50 |         # Ub = ( np.ones(n) - 2 * np.round(np.random.rand(n)) )[:, np.newaxis]
51 |         Ub = np.random.choice([1, -1], size=(n, 1))
52 |         innerMat = inf_func * Ub
53 |         outMat[b] = np.mean(innerMat, axis=0)
54 | 
55 |     return outMat


--------------------------------------------------------------------------------
/csdid/utils/mboot.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy.stats import mstats, norm
  3 | from joblib import Parallel, delayed
  4 | import pandas as pd
  5 | 
  6 | from csdid.utils.bmisc import multiplier_bootstrap
  7 | 
  8 | def mboot(inf_func, DIDparams, pl=False, cores=1):
  9 |     # Setup needed variables
 10 |     data            = DIDparams['data'] 
 11 |     idname          = DIDparams['idname']
 12 |     clustervars     = DIDparams['clustervars']
 13 |     biters          = DIDparams['biters']
 14 |     tname           = DIDparams['tname']
 15 |     try:
 16 |         tlist           = np.sort(data[tname].unique())
 17 |     except:
 18 |         tlist           = np.sort(data[tname].unique().to_numpy())
 19 |     alp             = DIDparams['alp']
 20 |     panel           = DIDparams['panel']
 21 |     true_repeated_cross_sections = DIDparams['true_repeated_cross_sections']
 22 | 
 23 |     # Get n observations (for clustering below)
 24 |     if panel:
 25 |         dta = data[ data[tname] == tlist[0] ]
 26 |     else:
 27 |         dta = data.copy()
 28 | 
 29 |     # Convert inf_func to matrix
 30 |     inf_func = np.asarray(inf_func)
 31 | 
 32 |     # Set correct number of units
 33 |     n = inf_func.shape[0]
 34 | 
 35 |     # Drop idname if it is in clustervars
 36 |     if clustervars is not None and idname in clustervars:
 37 |         clustervars.remove(idname)
 38 | 
 39 |     if clustervars is not None:
 40 |         if isinstance(clustervars, list) and isinstance(clustervars[0], str):
 41 |             raise ValueError("clustervars need to be the name of the clustering variable.")
 42 | 
 43 |     # We can only handle up to 2-way clustering
 44 |     if clustervars is not None and len(clustervars) > 1:
 45 |         raise ValueError("Can't handle that many cluster variables")
 46 | 
 47 |     if clustervars is not None:
 48 |         # Check that cluster variable does not vary over time within unit
 49 |         clust_tv = dta.groupby(idname)[clustervars[0]].nunique() == 1
 50 |         if not clust_tv.all():
 51 |             raise ValueError("Can't handle time-varying cluster variables")
 52 |     # clustervars='year'    
 53 |     # Multiplier bootstrap
 54 |     n_clusters = n
 55 |     if not clustervars:
 56 |         bres = np.sqrt(n) * run_multiplier_bootstrap(inf_func, biters, pl, cores)
 57 |     else:
 58 |         n_clusters = len(data[clustervars].drop_duplicates())
 59 |         cluster = dta[[idname, clustervars]].drop_duplicates().values[:, 1]
 60 |         cluster_n = dta.groupby(cluster).size().values
 61 |         cluster_mean_if = pd.DataFrame(inf_func).groupby(cluster).sum().values / cluster_n
 62 |         bres = np.sqrt(n_clusters) * run_multiplier_bootstrap(cluster_mean_if, biters, pl, cores)
 63 | 
 64 |     # Handle vector and matrix case differently to get nxk matrix
 65 |     if isinstance(bres, np.ndarray) and bres.ndim == 1:
 66 |         bres = np.expand_dims(bres, axis=0)
 67 |     elif isinstance(bres, np.ndarray) and bres.ndim > 2:
 68 |         bres = bres.transpose()
 69 | 
 70 |     # Non-degenerate dimensions
 71 |     ndg_dim = np.logical_and(~np.isnan(np.sum(bres, axis=0)), np.sum(bres ** 2, axis=0) > np.sqrt(np.finfo(float).eps) * 10)
 72 |     bres = bres[:, ndg_dim]
 73 | 
 74 |     # Bootstrap variance matrix (this matrix can be defective because of degenerate cases)
 75 |     V = np.cov(bres, rowvar=False)
 76 | 
 77 |     # Bootstrap standard error
 78 |     quantile_75 = np.quantile(bres, 0.75, axis=0, method="inverted_cdf")
 79 |     quantile_25 = np.quantile(bres, 0.25, axis=0, method="inverted_cdf")
 80 |     qnorm_75 = norm.ppf(0.75)
 81 |     qnorm_25 = norm.ppf(0.25)   
 82 |     bSigma = (quantile_75 - quantile_25) / (qnorm_75 - qnorm_25)
 83 |         
 84 |     # Critical value for uniform confidence band
 85 |     bT = np.max(np.abs(bres / bSigma), axis=1)
 86 |     bT = bT[np.isfinite(bT)]
 87 |     crit_val = np.quantile(bT, 1 - alp, method="inverted_cdf")
 88 |     
 89 |     # Standard error
 90 |     se = np.full(ndg_dim.shape, np.nan)
 91 |     se[ndg_dim] = bSigma / np.sqrt(n_clusters)
 92 | 
 93 |     return {'bres': bres, 'V': V, 'se': se, 'crit_val': crit_val}
 94 | 
 95 | def run_multiplier_bootstrap(inf_func, biters, pl=False, cores=1):
 96 |     ngroups = int(np.ceil(biters / cores))
 97 |     chunks = [ngroups] * cores
 98 |     chunks[0] += biters - sum(chunks)
 99 | 
100 |     n = inf_func.shape[0]
101 | 
102 |     def parallel_function(biters):
103 |         return multiplier_bootstrap(inf_func, biters)
104 | 
105 |     if n > 2500 and pl and cores > 1:
106 |         results = Parallel(n_jobs=cores)(
107 |             delayed(parallel_function)(biters) for biters in chunks
108 |         )
109 |         results = np.vstack(results)
110 |     else:
111 |         results = multiplier_bootstrap(inf_func, biters)
112 | 
113 |     return results
114 | 
115 | 
116 | 


--------------------------------------------------------------------------------
/figs/did_py.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2cml-ai/csdid/7ad721d4c384bca147ae16de84b7c5df8fe26959/figs/did_py.png


--------------------------------------------------------------------------------
/figs/did_r.drawio:
--------------------------------------------------------------------------------
  1 | <mxfile host="65bd71144e">
  2 |     <diagram id="Rv5UrFJeDnt34fFH7Rxd" name="Page-1">
  3 |         <mxGraphModel dx="1109" dy="1751" grid="0" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="850" pageHeight="1100" background="#ffffff" math="0" shadow="0">
  4 |             <root>
  5 |                 <mxCell id="0"/>
  6 |                 <mxCell id="1" parent="0"/>
  7 |                 <mxCell id="2" value="AGGTobj.R" style="whiteSpace=wrap;html=1;fillColor=#1ba1e2;fontColor=#ffffff;strokeColor=#006EAF;" parent="1" vertex="1">
  8 |                     <mxGeometry x="156" y="-1038" width="120" height="60" as="geometry"/>
  9 |                 </mxCell>
 10 |                 <mxCell id="3" value="DIDparms.r" style="whiteSpace=wrap;html=1;fillColor=#fa6800;fontColor=#000000;strokeColor=#C73500;" parent="1" vertex="1">
 11 |                     <mxGeometry x="276" y="-1038" width="120" height="60" as="geometry"/>
 12 |                 </mxCell>
 13 |                 <mxCell id="5" value="MP.r" style="whiteSpace=wrap;html=1;fillColor=#a20025;fontColor=#ffffff;strokeColor=#6F0000;" parent="1" vertex="1">
 14 |                     <mxGeometry x="386" y="-1038" width="120" height="60" as="geometry"/>
 15 |                 </mxCell>
 16 |                 <mxCell id="6" value="aggte.r" style="whiteSpace=wrap;html=1;fillColor=#e51400;fontColor=#ffffff;strokeColor=#B20000;" parent="1" vertex="1">
 17 |                     <mxGeometry x="156" y="-980" width="120" height="60" as="geometry"/>
 18 |                 </mxCell>
 19 |                 <mxCell id="7" value="att_gt.r" style="whiteSpace=wrap;html=1;fillColor=#0050ef;fontColor=#ffffff;strokeColor=#001DBC;" parent="1" vertex="1">
 20 |                     <mxGeometry x="276" y="-980" width="120" height="60" as="geometry"/>
 21 |                 </mxCell>
 22 |                 <mxCell id="10" value="compute.aggte.r" style="whiteSpace=wrap;html=1;fillColor=#60a917;fontColor=#ffffff;strokeColor=#2D7600;" parent="1" vertex="1">
 23 |                     <mxGeometry x="396" y="-980" width="120" height="60" as="geometry"/>
 24 |                 </mxCell>
 25 |                 <mxCell id="11" value="compute.agg_t.r" style="whiteSpace=wrap;html=1;fillColor=#008a00;fontColor=#ffffff;strokeColor=#005700;" parent="1" vertex="1">
 26 |                     <mxGeometry x="526" y="-980" width="120" height="60" as="geometry"/>
 27 |                 </mxCell>
 28 |                 <mxCell id="15" value="ggdid.r" style="whiteSpace=wrap;html=1;fillColor=#f0a30a;fontColor=#000000;strokeColor=#BD7000;" parent="1" vertex="1">
 29 |                     <mxGeometry x="1710" y="-960" width="120" height="60" as="geometry"/>
 30 |                 </mxCell>
 31 |                 <mxCell id="16" value="gplot.r" style="whiteSpace=wrap;html=1;fillColor=#e3c800;fontColor=#000000;strokeColor=#B09500;" parent="1" vertex="1">
 32 |                     <mxGeometry x="1500" y="-960" width="120" height="60" as="geometry"/>
 33 |                 </mxCell>
 34 |                 <mxCell id="18" value="mboot.r" style="whiteSpace=wrap;html=1;fillColor=#d80073;fontColor=#ffffff;strokeColor=#A50040;" parent="1" vertex="1">
 35 |                     <mxGeometry x="776" y="-980" width="120" height="60" as="geometry"/>
 36 |                 </mxCell>
 37 |                 <mxCell id="19" value="pre_precess_did" style="whiteSpace=wrap;html=1;fillColor=#6d8764;fontColor=#ffffff;strokeColor=#3A5431;" parent="1" vertex="1">
 38 |                     <mxGeometry x="896" y="-980" width="120" height="60" as="geometry"/>
 39 |                 </mxCell>
 40 |                 <mxCell id="20" value="process_attgt.r" style="whiteSpace=wrap;html=1;fillColor=#6a00ff;fontColor=#ffffff;strokeColor=#3700CC;" parent="1" vertex="1">
 41 |                     <mxGeometry x="1016" y="-980" width="120" height="60" as="geometry"/>
 42 |                 </mxCell>
 43 |                 <mxCell id="21" value="simulate_data.r" style="whiteSpace=wrap;html=1;fillColor=#647687;fontColor=#ffffff;strokeColor=#314354;" parent="1" vertex="1">
 44 |                     <mxGeometry x="1136" y="-980" width="120" height="60" as="geometry"/>
 45 |                 </mxCell>
 46 |                 <mxCell id="22" value="tidy.r" style="whiteSpace=wrap;html=1;fillColor=#76608a;fontColor=#ffffff;strokeColor=#432D57;" parent="1" vertex="1">
 47 |                     <mxGeometry x="1980" y="-960" width="120" height="60" as="geometry"/>
 48 |                 </mxCell>
 49 |                 <mxCell id="23" value="utily_functions.r" style="whiteSpace=wrap;html=1;" parent="1" vertex="1">
 50 |                     <mxGeometry x="1850" y="-960" width="120" height="60" as="geometry"/>
 51 |                 </mxCell>
 52 |                 <mxCell id="24" value="zzz.r" style="whiteSpace=wrap;html=1;fillColor=#a0522d;fontColor=#ffffff;strokeColor=#6D1F00;" parent="1" vertex="1">
 53 |                     <mxGeometry x="2110" y="-960" width="120" height="60" as="geometry"/>
 54 |                 </mxCell>
 55 |                 <mxCell id="65" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0;entryDx=0;entryDy=0;strokeColor=#000000;" parent="1" source="26" target="34" edge="1">
 56 |                     <mxGeometry relative="1" as="geometry"/>
 57 |                 </mxCell>
 58 |                 <mxCell id="26" value="AGGTEobj" style="whiteSpace=wrap;html=1;fillColor=#1ba1e2;fontColor=#ffffff;strokeColor=#006EAF;" parent="1" vertex="1">
 59 |                     <mxGeometry x="522" y="-498" width="120" height="60" as="geometry"/>
 60 |                 </mxCell>
 61 |                 <mxCell id="27" value="summary" style="whiteSpace=wrap;html=1;fillColor=#1ba1e2;fontColor=#ffffff;strokeColor=#006EAF;" parent="1" vertex="1">
 62 |                     <mxGeometry x="1980" y="-590" width="120" height="60" as="geometry"/>
 63 |                 </mxCell>
 64 |                 <mxCell id="79" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;strokeColor=#000000;" parent="1" source="28" target="48" edge="1">
 65 |                     <mxGeometry relative="1" as="geometry"/>
 66 |                 </mxCell>
 67 |                 <mxCell id="28" value="didparams" style="whiteSpace=wrap;html=1;fillColor=#fa6800;fontColor=#000000;strokeColor=#C73500;" parent="1" vertex="1">
 68 |                     <mxGeometry x="316" y="-784" width="120" height="60" as="geometry"/>
 69 |                 </mxCell>
 70 |                 <mxCell id="74" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=1;entryDx=0;entryDy=0;strokeColor=#B20000;fillColor=#e51400;" parent="1" source="29" target="33" edge="1">
 71 |                     <mxGeometry relative="1" as="geometry"/>
 72 |                 </mxCell>
 73 |                 <mxCell id="29" value="MP" style="whiteSpace=wrap;html=1;fillColor=#a20025;fontColor=#ffffff;strokeColor=#6F0000;" parent="1" vertex="1">
 74 |                     <mxGeometry x="786" y="-480" width="120" height="60" as="geometry"/>
 75 |                 </mxCell>
 76 |                 <mxCell id="30" value="summary" style="whiteSpace=wrap;html=1;fillColor=#a20025;fontColor=#ffffff;strokeColor=#6F0000;" parent="1" vertex="1">
 77 |                     <mxGeometry x="1980" y="-650" width="120" height="60" as="geometry"/>
 78 |                 </mxCell>
 79 |                 <mxCell id="83" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=1;entryDx=0;entryDy=0;strokeColor=#000000;" parent="1" source="31" target="52" edge="1">
 80 |                     <mxGeometry relative="1" as="geometry"/>
 81 |                 </mxCell>
 82 |                 <mxCell id="31" value="aggte" style="whiteSpace=wrap;html=1;fillColor=#e51400;fontColor=#ffffff;strokeColor=#B20000;" parent="1" vertex="1">
 83 |                     <mxGeometry x="1030" y="-381" width="120" height="60" as="geometry"/>
 84 |                 </mxCell>
 85 |                 <mxCell id="82" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;strokeColor=#000000;" parent="1" source="33" target="52" edge="1">
 86 |                     <mxGeometry relative="1" as="geometry"/>
 87 |                 </mxCell>
 88 |                 <mxCell id="33" value="att_gt" style="whiteSpace=wrap;html=1;fillColor=#0050ef;fontColor=#ffffff;strokeColor=#001DBC;" parent="1" vertex="1">
 89 |                     <mxGeometry x="1030" y="-500" width="120" height="60" as="geometry"/>
 90 |                 </mxCell>
 91 |                 <mxCell id="75" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;strokeColor=#000000;" parent="1" source="34" target="31" edge="1">
 92 |                     <mxGeometry relative="1" as="geometry"/>
 93 |                 </mxCell>
 94 |                 <mxCell id="34" value="compute.aggte" style="whiteSpace=wrap;html=1;fillColor=#60a917;fontColor=#ffffff;strokeColor=#2D7600;" parent="1" vertex="1">
 95 |                     <mxGeometry x="780" y="-379" width="120" height="60" as="geometry"/>
 96 |                 </mxCell>
 97 |                 <mxCell id="68" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.25;entryDx=0;entryDy=0;strokeColor=#000000;" parent="1" source="35" target="34" edge="1">
 98 |                     <mxGeometry relative="1" as="geometry"/>
 99 |                 </mxCell>
100 |                 <mxCell id="35" value="wif" style="whiteSpace=wrap;html=1;fillColor=#60a917;fontColor=#ffffff;strokeColor=#2D7600;" parent="1" vertex="1">
101 |                     <mxGeometry x="318" y="-454" width="120" height="60" as="geometry"/>
102 |                 </mxCell>
103 |                 <mxCell id="64" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;strokeColor=#000000;" parent="1" source="36" target="34" edge="1">
104 |                     <mxGeometry relative="1" as="geometry"/>
105 |                 </mxCell>
106 |                 <mxCell id="36" value="get_agg_inf_func" style="whiteSpace=wrap;html=1;fillColor=#60a917;fontColor=#ffffff;strokeColor=#2D7600;" parent="1" vertex="1">
107 |                     <mxGeometry x="318" y="-379" width="120" height="60" as="geometry"/>
108 |                 </mxCell>
109 |                 <mxCell id="69" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.75;entryDx=0;entryDy=0;strokeColor=#000000;" parent="1" source="37" target="34" edge="1">
110 |                     <mxGeometry relative="1" as="geometry"/>
111 |                 </mxCell>
112 |                 <mxCell id="37" value="getSE" style="whiteSpace=wrap;html=1;fillColor=#60a917;fontColor=#ffffff;strokeColor=#2D7600;" parent="1" vertex="1">
113 |                     <mxGeometry x="318" y="-304" width="120" height="60" as="geometry"/>
114 |                 </mxCell>
115 |                 <mxCell id="71" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;strokeColor=#000000;" parent="1" source="38" target="33" edge="1">
116 |                     <mxGeometry relative="1" as="geometry"/>
117 |                 </mxCell>
118 |                 <mxCell id="38" value="compute.att_gt" style="whiteSpace=wrap;html=1;fillColor=#008a00;fontColor=#ffffff;strokeColor=#005700;" parent="1" vertex="1">
119 |                     <mxGeometry x="318" y="-600" width="120" height="60" as="geometry"/>
120 |                 </mxCell>
121 |                 <mxCell id="39" value="conditiona_did_pretest" style="whiteSpace=wrap;html=1;" parent="1" vertex="1">
122 |                     <mxGeometry x="1850" y="-790" width="120" height="60" as="geometry"/>
123 |                 </mxCell>
124 |                 <mxCell id="40" value="indicator" style="whiteSpace=wrap;html=1;" parent="1" vertex="1">
125 |                     <mxGeometry x="1850" y="-730" width="120" height="60" as="geometry"/>
126 |                 </mxCell>
127 |                 <mxCell id="41" value="test.mboot" style="whiteSpace=wrap;html=1;" parent="1" vertex="1">
128 |                     <mxGeometry x="1850" y="-670" width="120" height="60" as="geometry"/>
129 |                 </mxCell>
130 |                 <mxCell id="42" value="gdid.mp" style="whiteSpace=wrap;html=1;fillColor=#f0a30a;fontColor=#000000;strokeColor=#BD7000;" parent="1" vertex="1">
131 |                     <mxGeometry x="1700" y="-832.5" width="120" height="60" as="geometry"/>
132 |                 </mxCell>
133 |                 <mxCell id="43" value="gdid.AGGTEobj" style="whiteSpace=wrap;html=1;fillColor=#f0a30a;fontColor=#000000;strokeColor=#BD7000;" parent="1" vertex="1">
134 |                     <mxGeometry x="1700" y="-747.5" width="120" height="60" as="geometry"/>
135 |                 </mxCell>
136 |                 <mxCell id="76" style="edgeStyle=none;html=1;exitX=1;exitY=0.25;exitDx=0;exitDy=0;entryX=0;entryY=0.25;entryDx=0;entryDy=0;strokeColor=#000000;" parent="1" source="44" target="42" edge="1">
137 |                     <mxGeometry relative="1" as="geometry"/>
138 |                 </mxCell>
139 |                 <mxCell id="44" value="gplot" style="whiteSpace=wrap;html=1;fillColor=#e3c800;fontColor=#000000;strokeColor=#B09500;" parent="1" vertex="1">
140 |                     <mxGeometry x="1500" y="-832.5" width="120" height="60" as="geometry"/>
141 |                 </mxCell>
142 |                 <mxCell id="45" value="splot" style="whiteSpace=wrap;html=1;fillColor=#e3c800;fontColor=#000000;strokeColor=#B09500;" parent="1" vertex="1">
143 |                     <mxGeometry x="1500" y="-747.5" width="120" height="60" as="geometry"/>
144 |                 </mxCell>
145 |                 <mxCell id="73" style="edgeStyle=none;html=1;exitX=1;exitY=0;exitDx=0;exitDy=0;entryX=0.5;entryY=1;entryDx=0;entryDy=0;strokeColor=#000000;" parent="1" source="46" target="31" edge="1">
146 |                     <mxGeometry relative="1" as="geometry"/>
147 |                 </mxCell>
148 |                 <mxCell id="46" value="mboot" style="whiteSpace=wrap;html=1;fillColor=#d80073;fontColor=#ffffff;strokeColor=#A50040;" parent="1" vertex="1">
149 |                     <mxGeometry x="780" y="-250" width="120" height="60" as="geometry"/>
150 |                 </mxCell>
151 |                 <mxCell id="77" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;strokeColor=#000000;" parent="1" source="47" target="46" edge="1">
152 |                     <mxGeometry relative="1" as="geometry"/>
153 |                 </mxCell>
154 |                 <mxCell id="47" value="run_multiplier_bottstrap" style="whiteSpace=wrap;html=1;fillColor=#d80073;fontColor=#ffffff;strokeColor=#A50040;" parent="1" vertex="1">
155 |                     <mxGeometry x="610" y="-250" width="120" height="60" as="geometry"/>
156 |                 </mxCell>
157 |                 <mxCell id="70" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0;entryDx=0;entryDy=0;strokeColor=#000000;" parent="1" source="48" target="33" edge="1">
158 |                     <mxGeometry relative="1" as="geometry"/>
159 |                 </mxCell>
160 |                 <mxCell id="48" value="pre_process_did" style="whiteSpace=wrap;html=1;fillColor=#6d8764;fontColor=#ffffff;strokeColor=#3A5431;" parent="1" vertex="1">
161 |                     <mxGeometry x="546" y="-654" width="120" height="60" as="geometry"/>
162 |                 </mxCell>
163 |                 <mxCell id="72" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0.25;entryY=0;entryDx=0;entryDy=0;strokeColor=#000000;" parent="1" source="49" target="33" edge="1">
164 |                     <mxGeometry relative="1" as="geometry"/>
165 |                 </mxCell>
166 |                 <mxCell id="49" value="process_attgt" style="whiteSpace=wrap;html=1;fillColor=#6a00ff;fontColor=#ffffff;strokeColor=#3700CC;" parent="1" vertex="1">
167 |                     <mxGeometry x="788" y="-690" width="120" height="60" as="geometry"/>
168 |                 </mxCell>
169 |                 <mxCell id="50" value="reset.sim" style="whiteSpace=wrap;html=1;fillColor=#647687;fontColor=#ffffff;strokeColor=#314354;" parent="1" vertex="1">
170 |                     <mxGeometry x="1215" y="-660" width="120" height="60" as="geometry"/>
171 |                 </mxCell>
172 |                 <mxCell id="81" style="edgeStyle=none;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;strokeColor=#000000;" parent="1" source="51" target="52" edge="1">
173 |                     <mxGeometry relative="1" as="geometry"/>
174 |                 </mxCell>
175 |                 <mxCell id="51" value="build_sim_dataset" style="whiteSpace=wrap;html=1;fillColor=#647687;fontColor=#ffffff;strokeColor=#314354;" parent="1" vertex="1">
176 |                     <mxGeometry x="1215" y="-590" width="120" height="60" as="geometry"/>
177 |                 </mxCell>
178 |                 <mxCell id="52" value="sim" style="whiteSpace=wrap;html=1;fillColor=#647687;fontColor=#ffffff;strokeColor=#314354;" parent="1" vertex="1">
179 |                     <mxGeometry x="1215" y="-420" width="120" height="60" as="geometry"/>
180 |                 </mxCell>
181 |                 <mxCell id="53" value="tidy.MP" style="whiteSpace=wrap;html=1;fillColor=#76608a;fontColor=#ffffff;strokeColor=#432D57;" parent="1" vertex="1">
182 |                     <mxGeometry x="1980" y="-890" width="120" height="60" as="geometry"/>
183 |                 </mxCell>
184 |                 <mxCell id="54" value="glance.MP" style="whiteSpace=wrap;html=1;fillColor=#76608a;fontColor=#ffffff;strokeColor=#432D57;" parent="1" vertex="1">
185 |                     <mxGeometry x="1980" y="-830" width="120" height="60" as="geometry"/>
186 |                 </mxCell>
187 |                 <mxCell id="55" value="tidy.AGGTEobj" style="whiteSpace=wrap;html=1;fillColor=#76608a;fontColor=#ffffff;strokeColor=#432D57;" parent="1" vertex="1">
188 |                     <mxGeometry x="1980" y="-770" width="120" height="60" as="geometry"/>
189 |                 </mxCell>
190 |                 <mxCell id="56" value="glance.AGGTEobj" style="whiteSpace=wrap;html=1;fillColor=#76608a;fontColor=#ffffff;strokeColor=#432D57;" parent="1" vertex="1">
191 |                     <mxGeometry x="1980" y="-710" width="120" height="60" as="geometry"/>
192 |                 </mxCell>
193 |                 <mxCell id="57" value="trimmer" style="whiteSpace=wrap;html=1;" parent="1" vertex="1">
194 |                     <mxGeometry x="1850" y="-860" width="120" height="60" as="geometry"/>
195 |                 </mxCell>
196 |                 <mxCell id="58" value="global variables" style="whiteSpace=wrap;html=1;fillColor=#a0522d;fontColor=#ffffff;strokeColor=#6D1F00;" parent="1" vertex="1">
197 |                     <mxGeometry x="2110" y="-890" width="120" height="60" as="geometry"/>
198 |                 </mxCell>
199 |                 <mxCell id="59" value="conditiona_did_pretest" style="whiteSpace=wrap;html=1;" parent="1" vertex="1">
200 |                     <mxGeometry x="666" y="-980" width="120" height="60" as="geometry"/>
201 |                 </mxCell>
202 |                 <mxCell id="62" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;strokeColor=#001DBC;fillColor=#0050ef;" parent="1" source="61" target="38" edge="1">
203 |                     <mxGeometry relative="1" as="geometry"/>
204 |                 </mxCell>
205 |                 <mxCell id="61" value="DRDID::{....}" style="ellipse;whiteSpace=wrap;html=1;fillColor=#008a00;fontColor=#ffffff;strokeColor=#005700;" parent="1" vertex="1">
206 |                     <mxGeometry x="170" y="-612" width="100" height="100" as="geometry"/>
207 |                 </mxCell>
208 |                 <mxCell id="67" style="edgeStyle=none;html=1;exitX=1;exitY=1;exitDx=0;exitDy=0;entryX=0.25;entryY=0;entryDx=0;entryDy=0;strokeColor=#000000;strokeWidth=2;" parent="1" source="66" target="34" edge="1">
209 |                     <mxGeometry relative="1" as="geometry"/>
210 |                 </mxCell>
211 |                 <mxCell id="78" style="edgeStyle=none;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;entryX=0.333;entryY=0;entryDx=0;entryDy=0;entryPerimeter=0;strokeColor=#000000;" parent="1" source="66" target="48" edge="1">
212 |                     <mxGeometry relative="1" as="geometry"/>
213 |                 </mxCell>
214 |                 <mxCell id="80" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.25;entryDx=0;entryDy=0;strokeColor=#000000;" parent="1" source="66" target="49" edge="1">
215 |                     <mxGeometry relative="1" as="geometry"/>
216 |                 </mxCell>
217 |                 <mxCell id="66" value="BMisc::{..}" style="ellipse;whiteSpace=wrap;html=1;fillColor=#60a917;strokeColor=#2D7600;fontColor=#ffffff;" parent="1" vertex="1">
218 |                     <mxGeometry x="516" y="-884" width="140" height="140" as="geometry"/>
219 |                 </mxCell>
220 |             </root>
221 |         </mxGraphModel>
222 |     </diagram>
223 | </mxfile>


--------------------------------------------------------------------------------
/figs/did_r.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2cml-ai/csdid/7ad721d4c384bca147ae16de84b7c5df8fe26959/figs/did_r.png


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | # Difference in Difference in Python
  2 | 
  3 | [![PyPI version](https://img.shields.io/pypi/v/csdid.svg?color=blue)](https://pypi.org/project/csdid/)
  4 | [![Downloads](https://static.pepy.tech/personalized-badge/csdid?period=total&units=international_system&left_color=blue&right_color=grey&left_text=Downloads)](https://pepy.tech/project/csdid)
  5 | [![Last commit](https://img.shields.io/github/last-commit/d2cml-ai/csdid.svg)](https://github.com/d2cml-ai/csdid/commits/main)
  6 | [![GitHub stars](https://img.shields.io/github/stars/d2cml-ai/csdid.svg?style=social)](https://github.com/d2cml-ai/csdid/stargazers)
  7 | [![GitHub issues](https://img.shields.io/github/issues/d2cml-ai/csdid.svg)](https://github.com/d2cml-ai/csdid/issues)
  8 | [![License](https://img.shields.io/github/license/d2cml-ai/csdid.svg)](https://github.com/d2cml-ai/csdid/blob/main/LICENSE)
  9 | 
 10 | 
 11 | The **csdid** package contains tools for computing average treatment
 12 | effect parameters in a Difference-in-Differences setup allowing for
 13 | 
 14 | - More than two time periods
 15 | 
 16 | - Variation in treatment timing (i.e., units can become treated at
 17 |   different points in time)
 18 | 
 19 | - Treatment effect heterogeneity (i.e, the effect of participating in
 20 |   the treatment can vary across units and exhibit potentially complex
 21 |   dynamics, selection into treatment, or time effects)
 22 | 
 23 | - The parallel trends assumption holds only after conditioning on
 24 |   covariates
 25 | 
 26 | The main parameters are **group-time average treatment effects**. These
 27 | are the average treatment effect for a particular group (group is
 28 | defined by treatment timing) in a particular time period. These
 29 | parameters are a natural generalization of the average treatment effect
 30 | on the treated (ATT) which is identified in the textbook case with two
 31 | periods and two groups to the case with multiple periods.
 32 | 
 33 | Group-time average treatment effects are also natural building blocks
 34 | for more aggregated treatment effect parameters such as overall
 35 | treatment effects or event-study-type estimands.
 36 | 
 37 | ## Getting Started
 38 | 
 39 | There has been some recent work on DiD with multiple time periods. The
 40 | **csdid** package implements the framework put forward in
 41 | 
 42 | - [Callaway, Brantly and Pedro H.C. Sant’Anna.
 43 |   “Difference-in-Differences with Multiple Time Periods.” Journal of
 44 |   Econometrics, Vol. 225, No. 2, pp. 200-230,
 45 |   2021.](https://doi.org/10.1016/j.jeconom.2020.12.001) or
 46 |   \[arXiv\](https://arxiv.org/abs/1803.09015
 47 | 
 48 | This project is based on the original [did R
 49 | package](https://github.com/bcallaway11/did).
 50 | 
 51 | ## Instalation
 52 | 
 53 | You can install **csdid** from `pypi` with:
 54 | 
 55 |     pip install csdid
 56 | 
 57 | or via github:
 58 | 
 59 |     pip install git+https://github.com/d2cml-ai/csdid/
 60 | 
 61 | ### Dependencies
 62 | 
 63 | Additionally, I have created an additional library called `drdid`, which
 64 | can be installed via GitHub.
 65 | 
 66 |     pip install git+https://github.com/d2cml-ai/DRDID
 67 | 
 68 | ## Basic Example
 69 | 
 70 | The following is a simplified example of the effect of states increasing
 71 | their minimum wages on county-level teen employment rates which comes
 72 | from [Callaway and Sant’Anna
 73 | (2021)](https://authors.elsevier.com/a/1cFzc15Dji4pnC).
 74 | 
 75 | - [More detailed examples are also
 76 |   available](https://bcallaway11.github.io/did/articles/did-basics.html)
 77 | 
 78 | A subset of the data is available in the package and can be loaded by
 79 | 
 80 | ``` python
 81 | from csdid.att_gt import ATTgt
 82 | import pandas as pd
 83 | data = pd.read_csv("https://raw.githubusercontent.com/d2cml-ai/csdid/function-aggte/data/mpdta.csv")
 84 | ```
 85 | 
 86 | The dataset contains 500 observations of county-level teen employment
 87 | rates from 2003-2007. Some states are first treated in 2004, some in
 88 | 2006, and some in 2007 (see the paper for more details). The important
 89 | variables in the dataset are
 90 | 
 91 | - **lemp** This is the log of county-level teen employment. It is the
 92 |   outcome variable
 93 | 
 94 | - **first.treat** This is the period when a state first increases its
 95 |   minimum wage. It can be 2004, 2006, or 2007. It is the variable that
 96 |   defines *group* in this application
 97 | 
 98 | - **year** This is the year and is the *time* variable
 99 | 
100 | - **countyreal** This is an id number for each county and provides the
101 |   individual identifier in this panel data context
102 | 
103 | To estimate group-time average treatment effects, use the
104 | **ATTgt().fit()** method
105 | 
106 | ``` python
107 | out = ATTgt(yname = "lemp",
108 |               gname = "first.treat",
109 |               idname = "countyreal",
110 |               tname = "year",
111 |               xformla = f"lemp~1",
112 |               data = data,
113 |               ).fit(est_method = 'dr')
114 | ```
115 | 
116 | Summary table
117 | 
118 | ``` python
119 | out.summ_attgt().summary2
120 | ```
121 | 
122 | <div>
123 | <style scoped>
124 |     .dataframe tbody tr th:only-of-type {
125 |         vertical-align: middle;
126 |     }
127 | &#10;    .dataframe tbody tr th {
128 |         vertical-align: top;
129 |     }
130 | &#10;    .dataframe thead th {
131 |         text-align: right;
132 |     }
133 | </style>
134 | 
135 | |     | Group | Time | ATT(g, t) | Post | Std. Error | \[95% Pointwise | Conf. Band\] |     |
136 | |-----|-------|------|-----------|------|------------|-----------------|--------------|-----|
137 | | 0   | 2004  | 2004 | -0.0105   | 0    | 0.0241     | -0.0781         | 0.0571       |     |
138 | | 1   | 2004  | 2005 | -0.0704   | 0    | 0.0324     | -0.1612         | 0.0204       |     |
139 | | 2   | 2004  | 2006 | -0.1373   | 0    | 0.0393     | -0.2476         | -0.0269      | \*  |
140 | | 3   | 2004  | 2007 | -0.1008   | 0    | 0.0360     | -0.2017         | 0.0001       |     |
141 | | 4   | 2006  | 2004 | 0.0065    | 0    | 0.0238     | -0.0601         | 0.0732       |     |
142 | | 5   | 2006  | 2005 | -0.0028   | 0    | 0.0188     | -0.0554         | 0.0499       |     |
143 | | 6   | 2006  | 2006 | -0.0046   | 0    | 0.0172     | -0.0528         | 0.0437       |     |
144 | | 7   | 2006  | 2007 | -0.0412   | 0    | 0.0201     | -0.0976         | 0.0152       |     |
145 | | 8   | 2007  | 2004 | 0.0305    | 0    | 0.0147     | -0.0108         | 0.0719       |     |
146 | | 9   | 2007  | 2005 | -0.0027   | 0    | 0.0160     | -0.0476         | 0.0421       |     |
147 | | 10  | 2007  | 2006 | -0.0311   | 0    | 0.0173     | -0.0796         | 0.0174       |     |
148 | | 11  | 2007  | 2007 | -0.0261   | 0    | 0.0171     | -0.0740         | 0.0219       |     |
149 | 
150 | </div>
151 | 
152 | In the graphs, a semicolon `;` should be added to prevent printing the
153 | class and the graph information.
154 | 
155 | ``` python
156 | out.plot_attgt();
157 | ```
158 | 
159 |     /home/runner/work/csdid/csdid/csdid/plots/gplot.py:19: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '['2004' '2005' '2006' '2007']' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
160 |       ssresults.loc[:, 'year'] = ssresults['year'].astype(int).astype(str)
161 |     /home/runner/work/csdid/csdid/csdid/plots/gplot.py:19: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '['2004' '2005' '2006' '2007']' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
162 |       ssresults.loc[:, 'year'] = ssresults['year'].astype(int).astype(str)
163 |     /home/runner/work/csdid/csdid/csdid/plots/gplot.py:19: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '['2004' '2005' '2006' '2007']' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
164 |       ssresults.loc[:, 'year'] = ssresults['year'].astype(int).astype(str)
165 | 
166 | ![](README_files/figure-commonmark/cell-5-output-2.png)
167 | 
168 | ``` python
169 | out.aggte(typec='calendar');
170 | ```
171 | 
172 | 
173 | 
174 |     Overall summary of ATT's based on calendar time aggregation:
175 |         ATT Std. Error  [95.0%  Conf. Int.]  
176 |     -0.0417     0.0169 -0.0748      -0.0086 *
177 | 
178 | 
179 |     Time Effects (calendar):
180 |        Time  Estimate  Std. Error  [95.0% Simult.   Conf. Band   
181 |     0  2004   -0.0105      0.0244          -0.0584      0.0374   
182 |     1  2005   -0.0704      0.0307          -0.1305     -0.0103  *
183 |     2  2006   -0.0488      0.0210          -0.0900     -0.0076  *
184 |     3  2007   -0.0371      0.0136          -0.0637     -0.0105  *
185 |     ---
186 |     Signif. codes: `*' confidence band does not cover 0
187 |     Control Group:  Never Treated , 
188 |     Anticipation Periods:  0
189 |     Estimation Method:  Doubly Robust
190 | 
191 | ``` python
192 | out.plot_aggte();
193 | ```
194 | 
195 |     /home/runner/work/csdid/csdid/csdid/plots/gplot.py:19: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '['2004' '2005' '2006' '2007']' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
196 |       ssresults.loc[:, 'year'] = ssresults['year'].astype(int).astype(str)
197 | 
198 | ![](README_files/figure-commonmark/cell-7-output-2.png)
199 | 
200 | 
201 | **Event Studies**
202 | 
203 | Although in the current example it is pretty easy to directly interpret
204 | the group-time average treatment effects, there are many cases where it
205 | is convenient to aggregate the group-time average treatment effects into
206 | a small number of parameters. A main type of aggregation is into an
207 | *event study* plot.
208 | 
209 | To make an event study plot in the **csdid** package, one can use the
210 | **aggte** function with **dynamic** option
211 | 
212 | ``` python
213 | out.aggte(typec='dynamic');
214 | ```
215 | 
216 | 
217 | 
218 |     Overall summary of ATT's based on event-study/dynamic aggregation:
219 |     ATT Std. Error  [95.0%  Conf. Int.]  
220 |     -0.0772     0.0207 -0.1179      -0.0366 *
221 | 
222 | 
223 |     Dynamic Effects:
224 |       Event time  Estimate  Std. Error  [95.0% Simult.   Conf. Band   
225 |     0          -3    0.0305      0.0146           0.0019      0.0591  *
226 |     1          -2   -0.0006      0.0129          -0.0259      0.0248   
227 |     2          -1   -0.0245      0.0141          -0.0521      0.0032   
228 |     3           0   -0.0199      0.0117          -0.0428      0.0030   
229 |     4           1   -0.0510      0.0154          -0.0811     -0.0208  *
230 |     5           2   -0.1373      0.0366          -0.2091     -0.0655  *
231 |     6           3   -0.1008      0.0337          -0.1669     -0.0347  *
232 |     ---
233 |     Signif. codes: `*' confidence band does not cover 0
234 |     Control Group:  Never Treated , 
235 |     Anticipation Periods:  0
236 |     Estimation Method:  Doubly Robust
237 | 
238 | The column `event time` is for each group relative to when they first
239 | participate in the treatment. To give some examples, `event time=0`
240 | corresponds to the *on impact* effect, and `event time=-1` is the
241 | *effect* in the period before a unit becomes treated (checking that this
242 | is equal to 0 is potentially useful as a pre-test).
243 | 
244 | To plot the event study, use **plot_aggte** method
245 | ``` python
246 | out.plot_aggte();
247 | ```
248 | 
249 |     /home/runner/work/csdid/csdid/csdid/plots/gplot.py:19: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '['2004' '2005' '2006' '2007']' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
250 |       ssresults.loc[:, 'year'] = ssresults['year'].astype(int).astype(str)
251 | 
252 | ![](README_files/figure-commonmark/cell-8-output-2.png)
253 | 
254 | The figure here is very similar to the group-time average treatment
255 | effects. Red dots are pre-treatment periods, blue dots are
256 | post-treatment periods. The difference is that the x-axis is in event
257 | time.
258 | 
259 | **Overall Effect of Participating in the Treatment**
260 | 
261 | The event study above reported an overall effect of participating in the
262 | treatment. This was computed by averaging the average effects computed
263 | at each length of exposure.
264 | 
265 | In many cases, a more general purpose overall treatment effect parameter
266 | is give by computing the average treatment effect for each group, and
267 | then averaging across groups. This sort of procedure provides an average
268 | treatment effect parameter with a very similar interpretation to the
269 | Average Treatment Effect on the Treated (ATT) in the two period and two
270 | group case.
271 | 
272 | To compute this overall average treatment effect parameter, use
273 | 
274 | 
275 | ``` python
276 | out.aggte(typec='group');
277 | ```
278 | 
279 | 
280 | 
281 |     Overall summary of ATT's based on group/cohort aggregation:
282 |     ATT Std. Error  [95.0%  Conf. Int.]  
283 |     -0.031     0.0124 -0.0553      -0.0067 *
284 | 
285 | 
286 |     Group Effects:
287 |       Group  Estimate  Std. Error  [95.0% Simult.   Conf. Band   
288 |     0   2004   -0.0797      0.0301          -0.1387     -0.0208  *
289 |     1   2006   -0.0229      0.0172          -0.0567      0.0109   
290 |     2   2007   -0.0261      0.0174          -0.0601      0.0080   
291 |     ---
292 |     Signif. codes: `*' confidence band does not cover 0
293 |     Control Group:  Never Treated , 
294 |     Anticipation Periods:  0
295 |     Estimation Method:  Doubly Robust
296 | 
297 | Of particular interest is the `Overall ATT` in the results. Here, we
298 | estimate that increasing the minimum wage decreased teen employment by
299 | 3.1% and the effect is marginally statistically significant.


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | ﻿pandas
 2 | numpy==1.24.3
 3 | scipy
 4 | patsy
 5 | plotnine
 6 | statsmodels
 7 | ipykernel
 8 | joblib
 9 | twine
10 | pytest
11 | rpy2==3.5.15
12 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | # with open('requirements.txt') as f:
 4 | #     required = f.read().splitlines()
 5 | # print(required)
 6 | from csdid._version import __version
 7 | print(__version)
 8 | 
 9 | setup(
10 |   name = 'csdid',
11 |   version=__version,
12 |   url='https://github.com/d2cml-ai/csdid',
13 |   author='D2CML Team, Alexander Quispe, Carlos Guevara, Jhon Flroes',
14 |   keywords=['Causal inference', 'Research'],
15 |   license="MIT",
16 |   description='Difference in Difference in Python',
17 |   classifiers=[
18 |         "Intended Audience :: Developers",
19 |         "Intended Audience :: Education",
20 |         "Intended Audience :: Science/Research",
21 |         "License :: OSI Approved :: Apache Software License",
22 |         "Topic :: Scientific/Engineering",
23 |     ],
24 |   install_requires=[
25 |         'pandas',
26 |         'numpy<=1.24.3',
27 |         'scipy',
28 |         'patsy',
29 |         'plotnine',
30 |         'twine',
31 |         'joblib',
32 |         'drdid @ git+https://github.com/d2cml-ai/DRDID'
33 |   ],
34 |   packages=find_packages(),
35 |   package_data={
36 |     'data': ['data/*'],
37 |     'configs': ['configs/*']
38 |   }
39 | )
40 | 


--------------------------------------------------------------------------------
/test/basic.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import yaml, pandas as pd
 4 | 
 5 | with open('configs/data.yml') as f:
 6 |   dt = yaml.safe_load(f)
 7 | 
 8 | data = pd.read_csv(dt['mpdata'])
 9 | 
10 | 
11 | yname = "lemp"
12 | gname = "first.treat"
13 | idname = "countyreal"
14 | tname = "year"
15 | xformla = f"lemp~1"
16 | 
17 | from csdid.attgt_fnc import preprocess_did
18 | from csdid.attgt_fnc import compute_att_gt
19 | 
20 | dp = preprocess_did(yname, tname, idname, gname, data = data, xformla=xformla)
21 | 
22 | 
23 | # data = mpdta
24 | # print(data)
25 | # print(tname)
26 | 
27 | # from csdid.att_gt import ATTgt
28 | 
29 | # b = ATTgt(yname, tname, idname, gname, data = data, xformla=xformla).fit()
30 | # c = b.summ_attgt(n = 12).summary2
31 | 
32 | # # print(dir(b))
33 | # # print(b.MP)
34 | # # print(b.dp)
35 | # # print(b.results)
36 | 
37 | # # print(c)
38 | 
39 | # c = b.aggte(balance_e=1)
40 | # print(c)
41 | # b.sdplot()
42 | # b.dplto()
43 | # algo()


--------------------------------------------------------------------------------
/test/test_vs_r.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | import pytest
 6 | import rpy2.robjects as ro
 7 | from rpy2.robjects import pandas2ri
 8 | 
 9 | # rpy2 imports
10 | from rpy2.robjects.packages import importr
11 | from csdid.att_gt import ATTgt
12 | 
13 | 
14 | pandas2ri.activate()
15 | 
16 | did = importr("did")
17 | 
18 | @pytest.fixture
19 | def data():
20 | 
21 |   return pd.read_csv("https://raw.githubusercontent.com/d2cml-ai/csdid/function-aggte/data/mpdta.csv")
22 | 
23 | def check_absolute_diff(x1, x2, tol, msg=None):
24 |     msg = "" if msg is None else msg
25 |     assert np.all(np.abs(x1 - x2) < tol), msg
26 | 
27 | def check_relative_diff(x1, x2, tol, msg=None):
28 |     msg = "" if msg is None else msg
29 |     assert np.all(np.abs(x1 - x2) / np.abs(x1) < tol), msg
30 | 
31 | def test_ate(data):
32 | 
33 |   "Test simple ATE via Py vs R."
34 | 
35 |   py_did = ATTgt(
36 |     yname = "lemp",
37 |     gname = "first.treat",
38 |     idname = "countyreal",
39 |     tname = "year",
40 |     data = data,
41 |     biters = 20_000,
42 |   ).fit(est_method = 'dr')
43 | 
44 |   py_res = py_did.aggte("simple")
45 |   py_coef = py_res.atte.get("overall_att")
46 |   py_se = py_res.atte.get("overall_se")
47 | 
48 |   r_did = did.att_gt(
49 |     yname = "lemp",
50 |     gname = "first.treat",
51 |     idname = "countyreal",
52 |     tname = "year",
53 |     data = data,
54 |     biters = 20_000
55 |   )
56 | 
57 |   r_coef = did.aggte(r_did, type = "simple").rx2('overall.att')
58 |   r_se = did.aggte(r_did, type = "simple").rx2('overall.se')
59 | 
60 |   check_absolute_diff(py_coef, r_coef, 1e-8, "ATEs are not equal.")
61 |   check_relative_diff(py_se, r_se, 0.01, "SEs are not equal.")
62 | 


--------------------------------------------------------------------------------