├── .gitattributes ├── .gitignore ├── .readthedocs.yaml ├── .vscode └── settings.json ├── LICENSE ├── README.md ├── docs ├── Makefile ├── api.rst ├── changelog.rst ├── conf.py ├── index.rst ├── ref.rst ├── requirements.txt ├── tmp │ └── mydocs │ │ ├── .buildinfo │ │ ├── .doctrees │ │ ├── api.doctree │ │ ├── environment.pickle │ │ ├── index.doctree │ │ ├── ref.doctree │ │ └── usage.doctree │ │ ├── _sources │ │ ├── api.rst.txt │ │ ├── index.rst.txt │ │ ├── ref.rst.txt │ │ └── usage.rst.txt │ │ ├── _static │ │ ├── basic.css │ │ ├── css │ │ │ ├── badge_only.css │ │ │ ├── fonts │ │ │ │ ├── Roboto-Slab-Bold.woff │ │ │ │ ├── Roboto-Slab-Bold.woff2 │ │ │ │ ├── Roboto-Slab-Regular.woff │ │ │ │ ├── Roboto-Slab-Regular.woff2 │ │ │ │ ├── fontawesome-webfont.eot │ │ │ │ ├── fontawesome-webfont.svg │ │ │ │ ├── fontawesome-webfont.ttf │ │ │ │ ├── fontawesome-webfont.woff │ │ │ │ ├── fontawesome-webfont.woff2 │ │ │ │ ├── lato-bold-italic.woff │ │ │ │ ├── lato-bold-italic.woff2 │ │ │ │ ├── lato-bold.woff │ │ │ │ ├── lato-bold.woff2 │ │ │ │ ├── lato-normal-italic.woff │ │ │ │ ├── lato-normal-italic.woff2 │ │ │ │ ├── lato-normal.woff │ │ │ │ └── lato-normal.woff2 │ │ │ └── theme.css │ │ ├── doctools.js │ │ ├── documentation_options.js │ │ ├── file.png │ │ ├── jquery-3.5.1.js │ │ ├── jquery.js │ │ ├── js │ │ │ ├── badge_only.js │ │ │ ├── html5shiv-printshiv.min.js │ │ │ ├── html5shiv.min.js │ │ │ └── theme.js │ │ ├── language_data.js │ │ ├── minus.png │ │ ├── plus.png │ │ ├── pygments.css │ │ ├── searchtools.js │ │ ├── underscore-1.13.1.js │ │ └── underscore.js │ │ ├── api.html │ │ ├── genindex.html │ │ ├── index.html │ │ ├── objects.inv │ │ ├── ref.html │ │ ├── search.html │ │ ├── searchindex.js │ │ └── usage.html └── usage.rst ├── poetry.lock ├── pyproject.toml ├── src └── causaltensor │ ├── .vscode │ └── settings.json │ ├── __init__.py │ ├── cauest │ ├── CovariancePCA.py │ ├── DID.py │ ├── DebiasConvex.py │ ├── DebiasConvexMissing.py │ ├── DebiasConvex_backup.py │ ├── MCNNM.py │ ├── OLSSyntheticControl.py │ ├── OLSSyntheticControl_old.py │ ├── Readme.ipynb │ ├── RobustSyntheticControl.py │ ├── Row_Specific_Treatments.py │ ├── SDID.py │ ├── __init__.py │ ├── panel_solver.py │ └── result.py │ ├── matcomple │ ├── ALS_solver.py │ ├── __init__.py │ └── hard_impute.py │ ├── matlib │ ├── __init__.py │ ├── generation.py │ ├── generation_treatment_pattern.py │ └── util.py │ ├── sample_data │ ├── __init__.py │ ├── fetch.ipynb │ └── fetch.py │ └── tests │ ├── test_real_class.py │ └── test_synthetic_class.py ├── tests ├── .vscode │ └── settings.json ├── MCNNM_test.ipynb ├── MLAB_data.txt ├── Panel Data Example local.ipynb ├── Panel_Regression_with_Multiple_Interventions.ipynb ├── SDID_test.ipynb ├── distribution_test.ipynb ├── ols_synthetic_control.ipynb ├── prop99.csv ├── readme_synth_matlab.txt ├── sales.p └── test_DC_PR.ipynb └── tutorials ├── MLAB_data.txt ├── Panel Data Example.ipynb ├── Panel_Data_Example.ipynb ├── Panel_Regression_with_Multiple_Interventions.ipynb ├── Synth.zip ├── loss_function.m ├── readme_synth_matlab.txt └── synth_code.m /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 105 | __pypackages__/ 106 | 107 | # Celery stuff 108 | celerybeat-schedule 109 | celerybeat.pid 110 | 111 | # SageMath parsed files 112 | *.sage.py 113 | 114 | # Environments 115 | .env 116 | .venv 117 | env/ 118 | venv/ 119 | ENV/ 120 | env.bak/ 121 | venv.bak/ 122 | 123 | # Spyder project settings 124 | .spyderproject 125 | .spyproject 126 | 127 | # Rope project settings 128 | .ropeproject 129 | 130 | # mkdocs documentation 131 | /site 132 | 133 | # mypy 134 | .mypy_cache/ 135 | .dmypy.json 136 | dmypy.json 137 | 138 | # Pyre type checker 139 | .pyre/ 140 | 141 | # pytype static type analyzer 142 | .pytype/ 143 | 144 | # Cython debug symbols 145 | cython_debug/ 146 | 147 | # PyCharm 148 | # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can 149 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 150 | # and can be added to the global gitignore or merged into this file. For a more nuclear 151 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 152 | #.idea/ 153 | tests/.DS_Store 154 | docs/.DS_Store 155 | docs/source/.DS_Store 156 | .DS_Store 157 | src/causaltensor/.DS_Store 158 | .DS_Store 159 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the version of Python and other tools you might need 9 | build: 10 | os: ubuntu-22.04 11 | tools: 12 | python: "3.11" 13 | 14 | # Build documentation in the docs/ directory with Sphinx 15 | sphinx: 16 | configuration: docs/conf.py 17 | 18 | # We recommend specifying your dependencies to enable reproducible builds: 19 | # https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 20 | python: 21 | install: 22 | - requirements: docs/requirements.txt 23 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "esbonio.sphinx.confDir": "" 3 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CausalTensor 2 | CausalTensor is a python package for doing causal inference and policy evaluation using panel data. The package achieves 13K downloads by 2025-03. 3 | 4 | [![PyPI Version](https://badge.fury.io/py/causaltensor.svg)](https://pypi.org/project/causaltensor/) 5 | [![Documentation Status](https://readthedocs.org/projects/causaltensor/badge/?version=latest)](https://causaltensor.readthedocs.io/en/latest/?badge=latest) 6 | [![Downloads](https://static.pepy.tech/badge/causaltensor)](https://pepy.tech/project/causaltensor) 7 | 8 | ## What is CausalTensor 9 | CausalTensor is a suite of tools for addressing questions like "What is the impact of strategy X to outcome Y" given time-series data colleting from multiple units. Answering such questions has wide range of applications from econometrics, operations research, business analytics, polictical science, to healthcare. Please visit our [complete documentation](https://causaltensor.readthedocs.io/) for more information. 10 | 11 | ## Installing CausalTensor 12 | CausalTensor is compatible with Python 3 or later and also depends on numpy. The simplest way to install CausalTensor and its dependencies is from PyPI with pip, Python's preferred package installer. 13 | 14 | $ pip install causaltensor 15 | 16 | Note that CausalTensor is an active project and routinely publishes new releases. In order to upgrade CausalTensor to the latest version, use pip as follows. 17 | 18 | $ pip install -U causaltensor 19 | 20 | ## Using CausalTensor 21 | We have implemented the following estimators including the traditional method Difference-in-Difference and recent proposed methods such as Synthetic Difference-in-Difference, Matrix Completion with Nuclear Norm Minimization, and De-biased Convex Panel Regression. 22 | 23 | | Estimator | Reference | 24 | | ----------- | ----------- | 25 | | [Difference-in-Difference (DID)](https://en.wikipedia.org/wiki/Difference_in_differences) | [Implemented through two-way fixed effects regression.](http://web.mit.edu/insong/www/pdf/FEmatch-twoway.pdf) | 26 | | [De-biased Convex Panel Regression (DC-PR)](https://arxiv.org/abs/2106.02780) | Vivek Farias, Andrew Li, and Tianyi Peng. "Learning treatment effects in panels with general intervention patterns." Advances in Neural Information Processing Systems 34 (2021): 14001-14013. | 27 | | [Synthetic Control (OLS SC)](http://www.jstor.org/stable/3132164) | Abadie, Alberto, and Javier Gardeazabal. “The Economic Costs of Conflict: A Case Study of the Basque Country.” The American Economic Review 93, no. 1 (2003): 113–32. | 28 | | [Synthetic Difference-in-Difference (SDID)](https://arxiv.org/pdf/1812.09970.pdf) | Dmitry Arkhangelsky, Susan Athey, David A. Hirshberg, Guido W. Imbens, and Stefan Wager. "Synthetic difference-in-differences." American Economic Review 111, no. 12 (2021): 4088-4118. | 29 | | [Matrix Completion with Nuclear Norm Minimization (MC-NNM)](https://arxiv.org/abs/1710.10251)| Susan Athey, Mohsen Bayati, Nikolay Doudchenko, Guido Imbens, and Khashayar Khosravi. "Matrix completion methods for causal panel data models." Journal of the American Statistical Association 116, no. 536 (2021): 1716-1730. | 30 | 31 | Please visit our [documentation](https://causaltensor.readthedocs.io/) for the usage instructions. Or check the following simple demo as a tutorial: 32 | 33 | - [Panel Data Example](https://colab.research.google.com/github/TianyiPeng/causaltensor/blob/main/tutorials/Panel_Data_Example.ipynb) 34 | - [Panel Data Example with old API](https://colab.research.google.com/github/TianyiPeng/causaltensor/blob/main/tutorials/Panel%20Data%20Example.ipynb) 35 | - [Panel Data with Multiple Treatments](https://colab.research.google.com/github/TianyiPeng/causaltensor/blob/main/tutorials/Panel_Regression_with_Multiple_Interventions.ipynb) 36 | - [MC-NNM with covariates and missing data](https://colab.research.google.com/github/TianyiPeng/causaltensor/blob/main/tests/MCNNM_test.ipynb) -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/causalml.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/causalml.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/causalml" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/causalml" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | API 2 | ####### 3 | 4 | 5 | Difference-in-Difference 6 | ***************************** 7 | 8 | Difference-in-difference (DID) is a popular method to address panel data problems. 9 | We use a two-way fixed effects regression to estimate the average treatment effect on the treated entries (ATT). 10 | In particular, we solve the following regression by linear regression 11 | 12 | .. math:: 13 | \min \sum_{ij} (O_{ij} - a_i - b_j - \tau Z_{ij})^2 14 | 15 | where :math:`a_{i}, b_{j}` are unknown fixed effects and :math:`\tau` is the ATT. 16 | 17 | To use DID, simply call 18 | 19 | .. code-block:: python 20 | 21 | M, tau = DID(O, Z) 22 | 23 | with two return parameters `M` and `tau`. Here :math:`M_{ij}=a_{i}+b_{j}`` is the estimated ideal outcome; and `tau` is the estimated ATT. 24 | 25 | De-biased Convex Panel Regression 26 | ********************************************************** 27 | 28 | The second method is De-biased Convex Panel Regression (DC-PR) proposed by [FariasLiPeng22]_. 29 | Note that an issue of the DID model is that, :math: `a_i+b_j` are often too simple to describe the complex reality of the outcome. As a fix, 30 | a low-rank factor model to generalize :math:`a_i+b_j` has been advocated. 31 | 32 | The idea in [FariasLiPeng22]_ is to first solve the following low-rank regression problem by replacing :math:`a_i+b_j` in DID with a low-rank matrix :math:`M` 33 | 34 | .. math:: 35 | \hat{M}, \hat{\tau} = \arg\min \sum_{ij} (O_{ij}-M_{ij}-\tau Z_{ij})^2 + \lambda \|M\|_{*} 36 | 37 | where :math:`\|M\|_{*}` is the nuclear norm to penalize the low-rankness of the matrix and :math:`\lambda` is a tuning parameter. The second step of [2] is to mitigate the bias induced by the regularization parameter (it also reflects the interaction between :math:`\hat{M}` and :math:`Z`): 38 | 39 | .. math:: 40 | \tau^{d} = \hat{\tau} - \lambda \frac{}{\|P_{\hat{T}^{\perp}}(Z)\|_{F}^2}. 41 | 42 | To use DC-PR, call 43 | 44 | .. code-block:: python 45 | 46 | M, tau, std = DC_PR_auto_rank(O, Z) 47 | 48 | where `M`, `tau` are the de-biased estimators and `std` is the estimator for the standard deviation. This function helps to find the proper rank for :math:`M` (but not very stable, and may be updated later). You can also use 49 | 50 | .. code-block:: python 51 | 52 | M, tau, std = DC_PR_with_suggested_rank(O, Z, suggest_r = r) 53 | 54 | if you have an estimation of the rank of :math:`M` by yourself. 55 | 56 | We also implemented the panel regression with a hard rank constraint: 57 | 58 | .. math:: 59 | \hat{M}, \hat{\tau} = \arg\min_{rank(M)\leq r} \sum_{ij} (O_{ij}-M_{ij}-\tau Z_{ij})^2 60 | 61 | This is a non-convex optimization problem and we used the alternate minimization between `M` and `tau` for the optimization. The theoretical guarantee for this non-convex method is weaker than the convex method above (the convergence to the global optimum is not always guaranteed), but the practical performance is comparable (sometimes even better). 62 | 63 | .. code-block:: python 64 | 65 | M, tau, std = DC_PR_auto_rank(O, Z, method='non-convex') 66 | M, tau, std = DC_PR_with_suggested_rank(O, Z, suggest_r = 2, method='non-convex') 67 | 68 | We also provide an option to select `convex` or `non-convex` panel regression in a data-driven fashion. This is recommended in practice. 69 | 70 | .. code-block:: python 71 | 72 | M, tau, std = DC_PR_auto_rank(O, Z, method='auto') 73 | M, tau, std = DC_PR_with_suggested_rank(O, Z, suggest_r = 2, method='auto') 74 | 75 | 76 | Synthetic Difference-in-Difference 77 | ********************************************************** 78 | 79 | 80 | The second method is called synthetic difference-in-difference (SDID) proposed by [Arkhangelsky21]_. Readers can read [Arkhangelsky21]_ for more details. To use SDID, simply call 81 | 82 | .. code-block:: python 83 | 84 | tau = SDID(O, Z) 85 | 86 | where `tau` is the estimation of SDID. 87 | 88 | Matrix Completion with Nuclear Norm Minimization 89 | ********************************************************** 90 | 91 | 92 | 93 | The third method is based on matrix completion method proposed by [Athey21]_. The idea is to solve the following matrix completion problem, only using the outcome data without intervention (i.e., :math:`Z_{ij}=0`) 94 | 95 | .. math:: 96 | \hat{M}, \hat{a}, \hat{b} = \arg\min \sum_{ij, Z_{ij}=0} (O_{ij}-M_{ij} - a_i - b_j)^2 + \lambda \|M\|_{*} 97 | 98 | where :math:`\|M\|_{*}` is the nuclear norm that penalizes the low-rankness of the matrix (here :math:`a_{i}` and :math:`b_{j}` are used to improve the empirical performance, as suggested by [Athey21]_). 99 | 100 | After :math:`\hat{M}, \hat{a}, \hat{b}` are obtained, the ATT :math:`\hat{\tau}` can be estimated simply by 101 | 102 | .. math:: 103 | 104 | \hat{\tau} = \frac{\sum_{ij, Z_{ij}=1} (O_{ij} - \hat{M}_{ij} - \hat{a}_i - \hat{b}_{j})}{\sum_{ij, Z_{ij}=1} 1}. 105 | 106 | 107 | To use this method (referred to as matrix completion with nuclear norm minimization, or MC-NNM), when you have an estimation of the rank of the matrix :math:`M` (e.g., by checking the spectrum), call 108 | 109 | .. code-block:: python 110 | 111 | M, a, b, tau = MC_NNM_with_suggested_rank(O, 1-Z, suggest_r = r) 112 | 113 | where `M`, `a`, `b` are the optimizers and `tau` is the estimated ATT. 114 | 115 | We also provide a function to help you find the right parameter $\lambda$ or rank by cross-validation: 116 | 117 | .. code-block:: python 118 | 119 | M, a, b, tau = MC_NNM_with_cross_validation(O, 1-Z) 120 | -------------------------------------------------------------------------------- /docs/changelog.rst: -------------------------------------------------------------------------------- 1 | .. :changelog: 2 | 3 | Changelog 4 | ========= 5 | 6 | 0.1.12 (2025-03-12) 7 | ------------------ 8 | - Added CVXPY package for SDID method 9 | 10 | 0.1.11 (2025-03-12) 11 | ------------------ 12 | - Fix a bug in the DC method: suggest_r was ignored due to the priority of auto_rank and now it will be prioritized over auto_rank 13 | 14 | 0.1.10 (2025-02-08) 15 | ------------------ 16 | - Added Covariate support for SDID method 17 | 18 | 0.1.9 (2025-02-07) 19 | ------------------ 20 | - Added Panel Solver Interface 21 | - Added more test cases 22 | - Added covariate support for synthetic control 23 | 24 | 0.1.8 (2023-11-05) 25 | ------------------ 26 | - Enhanced MC-NNM functionality with covariate integration and improved handling of missing data. 27 | 28 | 0.1.7 (2023-08-24) 29 | ------------------ 30 | - Introduced support for synthetic control methodology. 31 | 32 | 0.1.5 (2023-05-16) 33 | ------------------ 34 | - Expanded capabilities to address multiple-treatment problems using panel regression methods with debiasing features. -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | 3 | # -- Project information 4 | 5 | project = 'CausalTensor' 6 | copyright = '2022, Tianyi Peng' 7 | author = 'Tianyi Peng' 8 | 9 | release = '0.1' 10 | version = '0.1.4' 11 | 12 | # -- General configuration 13 | 14 | extensions = [ 15 | 'sphinx.ext.duration', 16 | 'sphinx.ext.doctest', 17 | 'sphinx.ext.autodoc', 18 | 'sphinx.ext.autosummary', 19 | 'sphinx.ext.intersphinx', 20 | 'sphinx.ext.mathjax', 21 | ] 22 | 23 | intersphinx_mapping = { 24 | 'python': ('https://docs.python.org/3/', None), 25 | 'sphinx': ('https://www.sphinx-doc.org/en/master/', None), 26 | } 27 | intersphinx_disabled_domains = ['std'] 28 | 29 | templates_path = ['_templates'] 30 | 31 | # -- Options for HTML output 32 | 33 | html_theme = 'sphinx_rtd_theme' 34 | 35 | # -- Options for EPUB output 36 | epub_show_urls = 'footnote' 37 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to CausalTensor's documentation! 2 | ======================================== 3 | 4 | **CausalTensor** is a Python library for doing causal inference and policy evaluation using panel data. 5 | It addresses questions like "What is the impact of strategy X to outcome Y" given time-series data colleting from multiple units. 6 | Answering such questions has wide range of applications from econometrics, operations research, business analytics, polictical science, to healthcare. 7 | 8 | Check out the :doc:`usage` section for further information. 9 | 10 | .. note:: 11 | 12 | This project is under active development. 13 | 14 | Contents 15 | -------- 16 | 17 | .. toctree:: 18 | 19 | usage 20 | api 21 | ref 22 | changelog 23 | -------------------------------------------------------------------------------- /docs/ref.rst: -------------------------------------------------------------------------------- 1 | References 2 | ========== 3 | 4 | .. [FariasLiPeng22] 5 | Farias, Vivek, Andrew Li, and Tianyi Peng. 6 | "Learning treatment effects in panels with general intervention patterns." Advances in Neural Information Processing Systems 34 (2021): 14001-14013. 7 | 8 | .. [Arkhangelsky21] 9 | Arkhangelsky, Dmitry, Susan Athey, David A. Hirshberg, Guido W. Imbens, and Stefan Wager. 10 | "Synthetic difference-in-differences." American Economic Review 111, no. 12 (2021): 4088-4118. 11 | 12 | .. [Athey21] 13 | Athey, Susan, Mohsen Bayati, Nikolay Doudchenko, Guido Imbens, and Khashayar Khosravi. 14 | "Matrix completion methods for causal panel data models." Journal of the American Statistical Association 116, no. 536 (2021): 1716-1730. -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | Cython>=0.28.0 2 | numpy>=0.16.0 3 | scikit-learn 4 | matplotlib 5 | sphinx 6 | sphinx_rtd_theme 7 | sphinxcontrib-bibtex<2.0.0 8 | -------------------------------------------------------------------------------- /docs/tmp/mydocs/.buildinfo: -------------------------------------------------------------------------------- 1 | # Sphinx build info version 1 2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. 3 | config: 437480f28468b528e11c91205393c07b 4 | tags: 645f666f9bcd5a90fca523b33c5a78b7 5 | -------------------------------------------------------------------------------- /docs/tmp/mydocs/.doctrees/api.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TianyiPeng/causaltensor/cafb53fc869c1f8902148fc34809b58472f1f124/docs/tmp/mydocs/.doctrees/api.doctree -------------------------------------------------------------------------------- /docs/tmp/mydocs/.doctrees/environment.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TianyiPeng/causaltensor/cafb53fc869c1f8902148fc34809b58472f1f124/docs/tmp/mydocs/.doctrees/environment.pickle -------------------------------------------------------------------------------- /docs/tmp/mydocs/.doctrees/index.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TianyiPeng/causaltensor/cafb53fc869c1f8902148fc34809b58472f1f124/docs/tmp/mydocs/.doctrees/index.doctree -------------------------------------------------------------------------------- /docs/tmp/mydocs/.doctrees/ref.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TianyiPeng/causaltensor/cafb53fc869c1f8902148fc34809b58472f1f124/docs/tmp/mydocs/.doctrees/ref.doctree -------------------------------------------------------------------------------- /docs/tmp/mydocs/.doctrees/usage.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TianyiPeng/causaltensor/cafb53fc869c1f8902148fc34809b58472f1f124/docs/tmp/mydocs/.doctrees/usage.doctree -------------------------------------------------------------------------------- /docs/tmp/mydocs/_sources/api.rst.txt: -------------------------------------------------------------------------------- 1 | API 2 | ####### 3 | 4 | 5 | Difference-in-Difference 6 | ***************************** 7 | 8 | Difference-in-difference (DID) is a popular method to address panel data problems. 9 | We use a two-way fixed effects regression to estimate the average treatment effect on the treated entries (ATT). 10 | In particular, we solve the following regression by linear regression 11 | 12 | .. math:: 13 | \min \sum_{ij} (O_{ij} - a_i - b_j - \tau Z_{ij})^2 14 | 15 | where :math:`a_{i}, b_{j}` are unknown fixed effects and :math:`\tau` is the ATT. 16 | 17 | To use DID, simply call 18 | 19 | .. code-block:: python 20 | 21 | M, tau = DID(O, Z) 22 | 23 | with two return parameters `M` and `tau`. Here :math:`M_{ij}=a_{i}+b_{j}`` is the estimated ideal outcome; and `tau` is the estimated ATT. 24 | 25 | De-biased Convex Panel Regression 26 | ********************************************************** 27 | 28 | The second method is De-biaeed Convex Panel Regression (DC-PR) proposed by [FariasLiPeng22]_. 29 | Note that an issue of the DID model is that, $a_i+b_j$ are often too simple to describe the complex reality of the outcome. As a fix, 30 | a low-rank factor model to generalize :math:`a_i+b_j` has been advocated. 31 | 32 | The idea in [FariasLiPeng22]_ is to firstly solve the following low-rank regression problem by replacing :math:`a_i+b_j` in DID by a low-rank matrix :math:`M` 33 | 34 | .. math:: 35 | \hat{M}, \hat{\tau} = \arg\min \sum_{ij} (O_{ij}-M_{ij}-\tau Z_{ij})^2 + \lambda \|M\|_{*} 36 | 37 | where $\|M\|_{*}$ is the nuclear norm to penalize the low-rankness of the matrix and $\lambda$ is a tunning parameter. The second step of [2] is to mitigate the bias induced by the regularization parameter (it also reflects the interaction between $\hat{M}$ and $Z$): 38 | 39 | .. math:: 40 | \tau^{d} = \hat{\tau} - \lambda \frac{}{\|P_{\hat{T}^{\perp}}(Z)\|_{F}^2}. 41 | 42 | To use DC-PR, call 43 | 44 | .. code-block:: python 45 | 46 | M, tau, M_raw, tau_raw = DC_PR_auto_rank(O, Z) 47 | 48 | where `M`, `tau` are the de-biased versions and `M_raw` and `tau_raw` are the optimizers for the first step. This function helps to find the proper rank for :math:`M` (but not very stable, and may be updated later). You can also use 49 | 50 | .. code-block:: python 51 | 52 | M, tau, M_raw, tau_raw = DC_PR_with_suggested_rank(O, Z, suggest_r = r) 53 | 54 | if you have an estimation of the rank of :math:`M` by yourself. 55 | 56 | In addition, we also provide a formula to estimate the empirical standard deviation of DC-PR when noises are (heterogenoues) independent sub-Gaussian. See [FariasLiPeng22]_ for further details. 57 | 58 | .. code-block:: python 59 | 60 | std = std_debiased_convex(O, Z, M_raw, tau_raw) 61 | 62 | 63 | Synthetic Difference-in-Difference 64 | ********************************************************** 65 | 66 | 67 | The second method is called synthetic difference-in-difference (SDID) proposed by [Arkhangelsky21]_. Readers can read [Arkhangelsky21]_ for more details. To use SDID, simply call 68 | 69 | .. code-block:: python 70 | 71 | tau = SDID(O, Z) 72 | 73 | where `tau` is the estimation of SDID. 74 | 75 | Matrix Completion with Nuclear Norm Minimization 76 | ********************************************************** 77 | 78 | 79 | 80 | The third method is based on matrix completion method proposed by [Athey21]_. The idea is to solve the following matrix completion problem, only using the outcome data without intervention (i.e., :math:`Z_{ij}=0`) 81 | 82 | .. math:: 83 | \hat{M}, \hat{a}, \hat{b} = \arg\min \sum_{ij, Z_{ij}=0} (O_{ij}-M_{ij} - a_i - b_j)^2 + \lambda \|M\|_{*} 84 | 85 | where :math:`\|M\|_{*}` is the nuclear norm that penalizes the low-rankness of the matrix (here :math:`a_{i}` and :math:`b_{j}` are used to improve the empirical performance, as suggested by [Athey21]_). 86 | 87 | After :math:`\hat{M}, \hat{a}, \hat{b}` are obtained, the ATT :math:`\hat{\tau}` can be estimated simply by 88 | 89 | .. math:: 90 | 91 | \hat{\tau} = \frac{\sum_{ij, Z_{ij}=1} (O_{ij} - \hat{M}_{ij} - \hat{a}_i - \hat{b}_{j})}{\sum_{ij, Z_{ij}=1} 1}. 92 | 93 | 94 | To use this method (referred to as matrix completion with nuclear norm minimization, or MC-NNM), when you have an estimation of the rank of the matrix :math:`M` (e.g., by checking the spectrum), call 95 | 96 | .. code-block:: python 97 | 98 | M, a, b, tau = MC_NNM_with_suggested_rank(O, 1-Z, suggest_r = r) 99 | 100 | where `M`, `a`, `b` are the optimizers and `tau` is the estimated ATT. 101 | 102 | We also provide a function to help you find the right parameter $\lambda$ or rank by cross-validation: 103 | 104 | .. code-block:: python 105 | 106 | M, a, b, tau = MC_NNM_with_cross_validation(O, 1-Z) 107 | -------------------------------------------------------------------------------- /docs/tmp/mydocs/_sources/index.rst.txt: -------------------------------------------------------------------------------- 1 | Welcome to CausalTensor's documentation! 2 | ======================================== 3 | 4 | **CausalTensor** is a Python library for doing causal inference and policy evaluation using panel data. 5 | It addresses questions like "What is the impact of strategy X to outcome Y" given time-series data colleting from multiple units. 6 | Answering such questions has wide range of applications from econometrics, operations research, business analytics, polictical science, to healthcare. 7 | 8 | Check out the :doc:`usage` section for further information. 9 | 10 | .. note:: 11 | 12 | This project is under active development. 13 | 14 | Contents 15 | -------- 16 | 17 | .. toctree:: 18 | 19 | usage 20 | api 21 | ref 22 | -------------------------------------------------------------------------------- /docs/tmp/mydocs/_sources/ref.rst.txt: -------------------------------------------------------------------------------- 1 | References 2 | ========== 3 | 4 | .. [FariasLiPeng22] 5 | Farias, Vivek, Andrew Li, and Tianyi Peng. 6 | "Learning treatment effects in panels with general intervention patterns." Advances in Neural Information Processing Systems 34 (2021): 14001-14013. 7 | 8 | .. [Arkhangelsky21] 9 | Arkhangelsky, Dmitry, Susan Athey, David A. Hirshberg, Guido W. Imbens, and Stefan Wager. 10 | "Synthetic difference-in-differences." American Economic Review 111, no. 12 (2021): 4088-4118. 11 | 12 | .. [Athey21] 13 | Athey, Susan, Mohsen Bayati, Nikolay Doudchenko, Guido Imbens, and Khashayar Khosravi. 14 | "Matrix completion methods for causal panel data models." Journal of the American Statistical Association 116, no. 536 (2021): 1716-1730. -------------------------------------------------------------------------------- /docs/tmp/mydocs/_sources/usage.rst.txt: -------------------------------------------------------------------------------- 1 | Usage 2 | ===== 3 | 4 | .. _installation: 5 | 6 | Installation 7 | ------------ 8 | 9 | CausalTensor is compatible with Python 3 or later and also depends on numpy. The simplest way to install CausalTensor and its dependencies is from PyPI with pip, Python's preferred package installer. 10 | 11 | .. code-block:: console 12 | 13 | $ pip install causaltensor 14 | 15 | Note that CausalTensor is an active project and routinely publishes new releases. In order to upgrade CausalTensor to the latest version, use pip as follows. 16 | 17 | .. code-block:: console 18 | 19 | $ pip install -U causaltensor 20 | 21 | Tutorial 22 | ---------------- 23 | For a basic panel data problem, we require two matrices as inputs 24 | 25 | 1. :math:`O \in R^{N \times T}`: :math:`O` is the outcome matrix where :math:`O_{ij}` represents the outcome of the i-th unit at time j 26 | 2. :math:`Z \in R^{N \times T}`: :math:`Z` is the intervention matrix where :math:`Z_{ij}` indicates whether the i-th unit used the intervention or not at time j. 27 | 28 | Given such two matrices, the problem is to ask **"what is the impact of the intervention to the outcome**"? 29 | 30 | Please check `Panel Data Example `_ 31 | for a simple demo. 32 | 33 | Check :doc:`api` for various methods for solving such a problem. -------------------------------------------------------------------------------- /docs/tmp/mydocs/_static/css/badge_only.css: -------------------------------------------------------------------------------- 1 | .clearfix{*zoom:1}.clearfix:after,.clearfix:before{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-style:normal;font-weight:400;src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713?#iefix) format("embedded-opentype"),url(fonts/fontawesome-webfont.woff2?af7ae505a9eed503f8b8e6982036873e) format("woff2"),url(fonts/fontawesome-webfont.woff?fee66e712a8a08eef5805a46892932ad) format("woff"),url(fonts/fontawesome-webfont.ttf?b06871f281fee6b241d60582ae9369b9) format("truetype"),url(fonts/fontawesome-webfont.svg?912ec66d7572ff821749319396470bde#FontAwesome) format("svg")}.fa:before{font-family:FontAwesome;font-style:normal;font-weight:400;line-height:1}.fa:before,a .fa{text-decoration:inherit}.fa:before,a .fa,li .fa{display:inline-block}li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before,.icon-book:before{content:"\f02d"}.fa-caret-down:before,.icon-caret-down:before{content:"\f0d7"}.fa-caret-up:before,.icon-caret-up:before{content:"\f0d8"}.fa-caret-left:before,.icon-caret-left:before{content:"\f0d9"}.fa-caret-right:before,.icon-caret-right:before{content:"\f0da"}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;z-index:400}.rst-versions a{color:#2980b9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27ae60}.rst-versions .rst-current-version:after{clear:both;content:"";display:block}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#e74c3c;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#f1c40f;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:grey;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:1px solid #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none;line-height:30px}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge>.rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width:768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}} -------------------------------------------------------------------------------- /docs/tmp/mydocs/_static/css/fonts/Roboto-Slab-Bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TianyiPeng/causaltensor/cafb53fc869c1f8902148fc34809b58472f1f124/docs/tmp/mydocs/_static/css/fonts/Roboto-Slab-Bold.woff -------------------------------------------------------------------------------- /docs/tmp/mydocs/_static/css/fonts/Roboto-Slab-Bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TianyiPeng/causaltensor/cafb53fc869c1f8902148fc34809b58472f1f124/docs/tmp/mydocs/_static/css/fonts/Roboto-Slab-Bold.woff2 -------------------------------------------------------------------------------- /docs/tmp/mydocs/_static/css/fonts/Roboto-Slab-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TianyiPeng/causaltensor/cafb53fc869c1f8902148fc34809b58472f1f124/docs/tmp/mydocs/_static/css/fonts/Roboto-Slab-Regular.woff -------------------------------------------------------------------------------- /docs/tmp/mydocs/_static/css/fonts/Roboto-Slab-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TianyiPeng/causaltensor/cafb53fc869c1f8902148fc34809b58472f1f124/docs/tmp/mydocs/_static/css/fonts/Roboto-Slab-Regular.woff2 -------------------------------------------------------------------------------- /docs/tmp/mydocs/_static/css/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TianyiPeng/causaltensor/cafb53fc869c1f8902148fc34809b58472f1f124/docs/tmp/mydocs/_static/css/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /docs/tmp/mydocs/_static/css/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TianyiPeng/causaltensor/cafb53fc869c1f8902148fc34809b58472f1f124/docs/tmp/mydocs/_static/css/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /docs/tmp/mydocs/_static/css/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TianyiPeng/causaltensor/cafb53fc869c1f8902148fc34809b58472f1f124/docs/tmp/mydocs/_static/css/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /docs/tmp/mydocs/_static/css/fonts/fontawesome-webfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TianyiPeng/causaltensor/cafb53fc869c1f8902148fc34809b58472f1f124/docs/tmp/mydocs/_static/css/fonts/fontawesome-webfont.woff2 -------------------------------------------------------------------------------- /docs/tmp/mydocs/_static/css/fonts/lato-bold-italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TianyiPeng/causaltensor/cafb53fc869c1f8902148fc34809b58472f1f124/docs/tmp/mydocs/_static/css/fonts/lato-bold-italic.woff -------------------------------------------------------------------------------- /docs/tmp/mydocs/_static/css/fonts/lato-bold-italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TianyiPeng/causaltensor/cafb53fc869c1f8902148fc34809b58472f1f124/docs/tmp/mydocs/_static/css/fonts/lato-bold-italic.woff2 -------------------------------------------------------------------------------- /docs/tmp/mydocs/_static/css/fonts/lato-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TianyiPeng/causaltensor/cafb53fc869c1f8902148fc34809b58472f1f124/docs/tmp/mydocs/_static/css/fonts/lato-bold.woff -------------------------------------------------------------------------------- /docs/tmp/mydocs/_static/css/fonts/lato-bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TianyiPeng/causaltensor/cafb53fc869c1f8902148fc34809b58472f1f124/docs/tmp/mydocs/_static/css/fonts/lato-bold.woff2 -------------------------------------------------------------------------------- /docs/tmp/mydocs/_static/css/fonts/lato-normal-italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TianyiPeng/causaltensor/cafb53fc869c1f8902148fc34809b58472f1f124/docs/tmp/mydocs/_static/css/fonts/lato-normal-italic.woff -------------------------------------------------------------------------------- /docs/tmp/mydocs/_static/css/fonts/lato-normal-italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TianyiPeng/causaltensor/cafb53fc869c1f8902148fc34809b58472f1f124/docs/tmp/mydocs/_static/css/fonts/lato-normal-italic.woff2 -------------------------------------------------------------------------------- /docs/tmp/mydocs/_static/css/fonts/lato-normal.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TianyiPeng/causaltensor/cafb53fc869c1f8902148fc34809b58472f1f124/docs/tmp/mydocs/_static/css/fonts/lato-normal.woff -------------------------------------------------------------------------------- /docs/tmp/mydocs/_static/css/fonts/lato-normal.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TianyiPeng/causaltensor/cafb53fc869c1f8902148fc34809b58472f1f124/docs/tmp/mydocs/_static/css/fonts/lato-normal.woff2 -------------------------------------------------------------------------------- /docs/tmp/mydocs/_static/documentation_options.js: -------------------------------------------------------------------------------- 1 | var DOCUMENTATION_OPTIONS = { 2 | URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), 3 | VERSION: '0.1', 4 | LANGUAGE: 'None', 5 | COLLAPSE_INDEX: false, 6 | BUILDER: 'html', 7 | FILE_SUFFIX: '.html', 8 | LINK_SUFFIX: '.html', 9 | HAS_SOURCE: true, 10 | SOURCELINK_SUFFIX: '.txt', 11 | NAVIGATION_WITH_KEYS: false 12 | }; -------------------------------------------------------------------------------- /docs/tmp/mydocs/_static/file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TianyiPeng/causaltensor/cafb53fc869c1f8902148fc34809b58472f1f124/docs/tmp/mydocs/_static/file.png -------------------------------------------------------------------------------- /docs/tmp/mydocs/_static/js/badge_only.js: -------------------------------------------------------------------------------- 1 | !function(e){var t={};function r(n){if(t[n])return t[n].exports;var o=t[n]={i:n,l:!1,exports:{}};return e[n].call(o.exports,o,o.exports,r),o.l=!0,o.exports}r.m=e,r.c=t,r.d=function(e,t,n){r.o(e,t)||Object.defineProperty(e,t,{enumerable:!0,get:n})},r.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},r.t=function(e,t){if(1&t&&(e=r(e)),8&t)return e;if(4&t&&"object"==typeof e&&e&&e.__esModule)return e;var n=Object.create(null);if(r.r(n),Object.defineProperty(n,"default",{enumerable:!0,value:e}),2&t&&"string"!=typeof e)for(var o in e)r.d(n,o,function(t){return e[t]}.bind(null,o));return n},r.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return r.d(t,"a",t),t},r.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r.p="",r(r.s=4)}({4:function(e,t,r){}}); -------------------------------------------------------------------------------- /docs/tmp/mydocs/_static/js/html5shiv-printshiv.min.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @preserve HTML5 Shiv 3.7.3-pre | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed 3 | */ 4 | !function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=y.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=y.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),y.elements=c+" "+a,j(b)}function f(a){var b=x[a[v]];return b||(b={},w++,a[v]=w,x[w]=b),b}function g(a,c,d){if(c||(c=b),q)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():u.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||t.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),q)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return y.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(y,b.frag)}function j(a){a||(a=b);var d=f(a);return!y.shivCSS||p||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),q||i(a,d),a}function k(a){for(var b,c=a.getElementsByTagName("*"),e=c.length,f=RegExp("^(?:"+d().join("|")+")$","i"),g=[];e--;)b=c[e],f.test(b.nodeName)&&g.push(b.applyElement(l(b)));return g}function l(a){for(var b,c=a.attributes,d=c.length,e=a.ownerDocument.createElement(A+":"+a.nodeName);d--;)b=c[d],b.specified&&e.setAttribute(b.nodeName,b.nodeValue);return e.style.cssText=a.style.cssText,e}function m(a){for(var b,c=a.split("{"),e=c.length,f=RegExp("(^|[\\s,>+~])("+d().join("|")+")(?=[[\\s,>+~#.:]|$)","gi"),g="$1"+A+"\\:$2";e--;)b=c[e]=c[e].split("}"),b[b.length-1]=b[b.length-1].replace(f,g),c[e]=b.join("}");return c.join("{")}function n(a){for(var b=a.length;b--;)a[b].removeNode()}function o(a){function b(){clearTimeout(g._removeSheetTimer),d&&d.removeNode(!0),d=null}var d,e,g=f(a),h=a.namespaces,i=a.parentWindow;return!B||a.printShived?a:("undefined"==typeof h[A]&&h.add(A),i.attachEvent("onbeforeprint",function(){b();for(var f,g,h,i=a.styleSheets,j=[],l=i.length,n=Array(l);l--;)n[l]=i[l];for(;h=n.pop();)if(!h.disabled&&z.test(h.media)){try{f=h.imports,g=f.length}catch(o){g=0}for(l=0;g>l;l++)n.push(f[l]);try{j.push(h.cssText)}catch(o){}}j=m(j.reverse().join("")),e=k(a),d=c(a,j)}),i.attachEvent("onafterprint",function(){n(e),clearTimeout(g._removeSheetTimer),g._removeSheetTimer=setTimeout(b,500)}),a.printShived=!0,a)}var p,q,r="3.7.3",s=a.html5||{},t=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,u=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,v="_html5shiv",w=0,x={};!function(){try{var a=b.createElement("a");a.innerHTML="",p="hidden"in a,q=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){p=!0,q=!0}}();var y={elements:s.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:r,shivCSS:s.shivCSS!==!1,supportsUnknownElements:q,shivMethods:s.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=y,j(b);var z=/^$|\b(?:all|print)\b/,A="html5shiv",B=!q&&function(){var c=b.documentElement;return!("undefined"==typeof b.namespaces||"undefined"==typeof b.parentWindow||"undefined"==typeof c.applyElement||"undefined"==typeof c.removeNode||"undefined"==typeof a.attachEvent)}();y.type+=" print",y.shivPrint=o,o(b),"object"==typeof module&&module.exports&&(module.exports=y)}("undefined"!=typeof window?window:this,document); -------------------------------------------------------------------------------- /docs/tmp/mydocs/_static/js/html5shiv.min.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @preserve HTML5 Shiv 3.7.3 | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed 3 | */ 4 | !function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=t.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=t.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),t.elements=c+" "+a,j(b)}function f(a){var b=s[a[q]];return b||(b={},r++,a[q]=r,s[r]=b),b}function g(a,c,d){if(c||(c=b),l)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():p.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||o.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),l)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return t.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(t,b.frag)}function j(a){a||(a=b);var d=f(a);return!t.shivCSS||k||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),l||i(a,d),a}var k,l,m="3.7.3-pre",n=a.html5||{},o=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,p=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,q="_html5shiv",r=0,s={};!function(){try{var a=b.createElement("a");a.innerHTML="",k="hidden"in a,l=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){k=!0,l=!0}}();var t={elements:n.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:m,shivCSS:n.shivCSS!==!1,supportsUnknownElements:l,shivMethods:n.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=t,j(b),"object"==typeof module&&module.exports&&(module.exports=t)}("undefined"!=typeof window?window:this,document); -------------------------------------------------------------------------------- /docs/tmp/mydocs/_static/js/theme.js: -------------------------------------------------------------------------------- 1 | !function(n){var e={};function t(i){if(e[i])return e[i].exports;var o=e[i]={i:i,l:!1,exports:{}};return n[i].call(o.exports,o,o.exports,t),o.l=!0,o.exports}t.m=n,t.c=e,t.d=function(n,e,i){t.o(n,e)||Object.defineProperty(n,e,{enumerable:!0,get:i})},t.r=function(n){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(n,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(n,"__esModule",{value:!0})},t.t=function(n,e){if(1&e&&(n=t(n)),8&e)return n;if(4&e&&"object"==typeof n&&n&&n.__esModule)return n;var i=Object.create(null);if(t.r(i),Object.defineProperty(i,"default",{enumerable:!0,value:n}),2&e&&"string"!=typeof n)for(var o in n)t.d(i,o,function(e){return n[e]}.bind(null,o));return i},t.n=function(n){var e=n&&n.__esModule?function(){return n.default}:function(){return n};return t.d(e,"a",e),e},t.o=function(n,e){return Object.prototype.hasOwnProperty.call(n,e)},t.p="",t(t.s=0)}([function(n,e,t){t(1),n.exports=t(3)},function(n,e,t){(function(){var e="undefined"!=typeof window?window.jQuery:t(2);n.exports.ThemeNav={navBar:null,win:null,winScroll:!1,winResize:!1,linkScroll:!1,winPosition:0,winHeight:null,docHeight:null,isRunning:!1,enable:function(n){var t=this;void 0===n&&(n=!0),t.isRunning||(t.isRunning=!0,e((function(e){t.init(e),t.reset(),t.win.on("hashchange",t.reset),n&&t.win.on("scroll",(function(){t.linkScroll||t.winScroll||(t.winScroll=!0,requestAnimationFrame((function(){t.onScroll()})))})),t.win.on("resize",(function(){t.winResize||(t.winResize=!0,requestAnimationFrame((function(){t.onResize()})))})),t.onResize()})))},enableSticky:function(){this.enable(!0)},init:function(n){n(document);var e=this;this.navBar=n("div.wy-side-scroll:first"),this.win=n(window),n(document).on("click","[data-toggle='wy-nav-top']",(function(){n("[data-toggle='wy-nav-shift']").toggleClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift")})).on("click",".wy-menu-vertical .current ul li a",(function(){var t=n(this);n("[data-toggle='wy-nav-shift']").removeClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift"),e.toggleCurrent(t),e.hashChange()})).on("click","[data-toggle='rst-current-version']",(function(){n("[data-toggle='rst-versions']").toggleClass("shift-up")})),n("table.docutils:not(.field-list,.footnote,.citation)").wrap("
"),n("table.docutils.footnote").wrap("
"),n("table.docutils.citation").wrap("
"),n(".wy-menu-vertical ul").not(".simple").siblings("a").each((function(){var t=n(this);expand=n(''),expand.on("click",(function(n){return e.toggleCurrent(t),n.stopPropagation(),!1})),t.prepend(expand)}))},reset:function(){var n=encodeURI(window.location.hash)||"#";try{var e=$(".wy-menu-vertical"),t=e.find('[href="'+n+'"]');if(0===t.length){var i=$('.document [id="'+n.substring(1)+'"]').closest("div.section");0===(t=e.find('[href="#'+i.attr("id")+'"]')).length&&(t=e.find('[href="#"]'))}if(t.length>0){$(".wy-menu-vertical .current").removeClass("current").attr("aria-expanded","false"),t.addClass("current").attr("aria-expanded","true"),t.closest("li.toctree-l1").parent().addClass("current").attr("aria-expanded","true");for(let n=1;n<=10;n++)t.closest("li.toctree-l"+n).addClass("current").attr("aria-expanded","true");t[0].scrollIntoView()}}catch(n){console.log("Error expanding nav for anchor",n)}},onScroll:function(){this.winScroll=!1;var n=this.win.scrollTop(),e=n+this.winHeight,t=this.navBar.scrollTop()+(n-this.winPosition);n<0||e>this.docHeight||(this.navBar.scrollTop(t),this.winPosition=n)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",(function(){this.linkScroll=!1}))},toggleCurrent:function(n){var e=n.closest("li");e.siblings("li.current").removeClass("current").attr("aria-expanded","false"),e.siblings().find("li.current").removeClass("current").attr("aria-expanded","false");var t=e.find("> ul li");t.length&&(t.removeClass("current").attr("aria-expanded","false"),e.toggleClass("current").attr("aria-expanded",(function(n,e){return"true"==e?"false":"true"})))}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:n.exports.ThemeNav,StickyNav:n.exports.ThemeNav}),function(){for(var n=0,e=["ms","moz","webkit","o"],t=0;t 2 | 3 | 4 | 5 | 6 | Index — CausalTensor 0.1 documentation 7 | 8 | 9 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 |
24 | 49 | 50 |
54 | 55 |
56 |
57 |
58 |
    59 |
  • 60 | 61 |
  • 62 |
  • 63 |
64 |
65 |
66 |
67 |
68 | 69 | 70 |

Index

71 | 72 |
73 | 74 |
75 | 76 | 77 |
78 |
79 |
80 | 81 |
82 | 83 |
84 |

© Copyright 2022, Tianyi Peng.

85 |
86 | 87 | Built with Sphinx using a 88 | theme 89 | provided by Read the Docs. 90 | 91 | 92 |
93 |
94 |
95 |
96 |
97 | 102 | 103 | 104 | -------------------------------------------------------------------------------- /docs/tmp/mydocs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Welcome to CausalTensor’s documentation! — CausalTensor 0.1 documentation 8 | 9 | 10 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 |
26 | 51 | 52 |
56 | 57 |
58 |
59 |
60 |
    61 |
  • 62 | 63 |
  • 64 | View page source 65 |
  • 66 |
67 |
68 |
69 |
70 |
71 | 72 |
73 |

Welcome to CausalTensor’s documentation!

74 |

CausalTensor is a Python library for doing causal inference and policy evaluation using panel data. 75 | It addresses questions like “What is the impact of strategy X to outcome Y” given time-series data colleting from multiple units. 76 | Answering such questions has wide range of applications from econometrics, operations research, business analytics, polictical science, to healthcare.

77 |

Check out the Usage section for further information.

78 |
79 |

Note

80 |

This project is under active development.

81 |
82 |
83 |

Contents

84 | 101 |
102 |
103 | 104 | 105 |
106 |
107 |
110 | 111 |
112 | 113 |
114 |

© Copyright 2022, Tianyi Peng.

115 |
116 | 117 | Built with Sphinx using a 118 | theme 119 | provided by Read the Docs. 120 | 121 | 122 |
123 |
124 |
125 |
126 |
127 | 132 | 133 | 134 | -------------------------------------------------------------------------------- /docs/tmp/mydocs/objects.inv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TianyiPeng/causaltensor/cafb53fc869c1f8902148fc34809b58472f1f124/docs/tmp/mydocs/objects.inv -------------------------------------------------------------------------------- /docs/tmp/mydocs/ref.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | References — CausalTensor 0.1 documentation 8 | 9 | 10 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 |
26 | 51 | 52 |
56 | 57 |
58 |
59 |
60 | 67 |
68 |
69 |
70 |
71 | 72 |
73 |

References

74 |
75 |
FariasLiPeng22
76 |

Farias, Vivek, Andrew Li, and Tianyi Peng. 77 | “Learning treatment effects in panels with general intervention patterns.” Advances in Neural Information Processing Systems 34 (2021): 14001-14013.

78 |
79 |
Arkhangelsky21
80 |

Arkhangelsky, Dmitry, Susan Athey, David A. Hirshberg, Guido W. Imbens, and Stefan Wager. 81 | “Synthetic difference-in-differences.” American Economic Review 111, no. 12 (2021): 4088-4118.

82 |
83 |
Athey21
84 |

Athey, Susan, Mohsen Bayati, Nikolay Doudchenko, Guido Imbens, and Khashayar Khosravi. 85 | “Matrix completion methods for causal panel data models.” Journal of the American Statistical Association 116, no. 536 (2021): 1716-1730.

86 |
87 |
88 |
89 | 90 | 91 |
92 |
93 |
96 | 97 |
98 | 99 |
100 |

© Copyright 2022, Tianyi Peng.

101 |
102 | 103 | Built with Sphinx using a 104 | theme 105 | provided by Read the Docs. 106 | 107 | 108 |
109 |
110 |
111 |
112 |
113 | 118 | 119 | 120 | -------------------------------------------------------------------------------- /docs/tmp/mydocs/search.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Search — CausalTensor 0.1 documentation 7 | 8 | 9 | 10 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 |
27 | 52 | 53 |
57 | 58 |
59 |
60 |
61 |
    62 |
  • 63 | 64 |
  • 65 |
  • 66 |
67 |
68 |
69 |
70 |
71 | 72 | 79 | 80 | 81 |
82 | 83 |
84 | 85 |
86 |
87 |
88 | 89 |
90 | 91 |
92 |

© Copyright 2022, Tianyi Peng.

93 |
94 | 95 | Built with Sphinx using a 96 | theme 97 | provided by Read the Docs. 98 | 99 | 100 |
101 |
102 |
103 |
104 |
105 | 110 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /docs/tmp/mydocs/searchindex.js: -------------------------------------------------------------------------------- 1 | Search.setIndex({docnames:["api","index","ref","usage"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":4,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":3,"sphinx.domains.rst":2,"sphinx.domains.std":2,"sphinx.ext.intersphinx":1,sphinx:56},filenames:["api.rst","index.rst","ref.rst","usage.rst"],objects:{},objnames:{},objtypes:{},terms:{"0":0,"1":0,"111":2,"116":2,"12":2,"14001":2,"14013":2,"1716":2,"1730":2,"2":0,"2021":2,"3":3,"34":2,"4088":2,"4118":2,"536":2,"do":1,"function":0,"new":3,"return":0,A:2,As:0,For:3,In:[0,3],It:1,The:[0,3],To:0,_:0,_i:0,a_:0,a_i:0,activ:[1,3],addit:0,address:[0,1],advanc:2,advoc:0,after:0,also:[0,3],american:2,an:[0,3],analyt:1,andrew:2,answer:1,api:[1,3],applic:1,ar:0,arg:0,argmin:[],arkhangelski:2,arkhangelsky21:[0,2],ask:3,associ:2,athei:2,athey21:[0,2],att:0,averag:0,b:0,b_:0,b_j:0,base:0,basic:3,bayati:2,been:0,between:0,bia:0,biaeed:0,bias:1,block:[],busi:1,call:0,can:0,causal:[1,2],causaltensor:3,check:[0,1,3],code:[],collet:1,compat:3,complet:[1,2],complex:0,convex:1,cross:0,d:0,data:[0,1,2,3],david:2,dc:0,dc_pr_auto_rank:0,dc_pr_with_suggested_rank:0,de:1,demo:3,depend:3,describ:0,detail:0,develop:1,deviat:0,did:0,differ:[1,2],dmitri:2,doudchenko:2,e:0,econom:2,econometr:1,effect:[0,2],empir:0,entri:0,estim:0,evalu:1,exampl:3,f:0,factor:0,faria:2,fariaslipeng22:[0,2],find:0,first:0,firstli:0,fix:0,follow:[0,3],formula:0,frac:0,from:[1,3],further:[0,1],g:0,gaussian:0,gener:[0,2],given:[1,3],guido:2,ha:[0,1],hand:[],hat:0,have:0,healthcar:1,help:0,here:0,heterogenou:0,hirshberg:2,i:[0,3],idea:0,ideal:0,ij:[0,3],imben:2,impact:[1,3],improv:0,independ:0,indic:3,induc:0,infer:1,inform:[1,2],input:3,instal:1,interact:0,intervent:[0,2,3],issu:0,its:3,j:[0,3],journal:2,khashayar:2,khosravi:2,lambda:0,later:[0,3],latest:3,learn:2,li:2,librari:1,like:1,linear:0,low:0,m:0,m_:0,m_raw:0,mai:0,math:[],matric:3,matrix:[1,2,3],mc:0,mc_nnm_with_cross_valid:0,mc_nnm_with_suggested_rank:0,method:[0,2,3],min:0,minim:1,mitig:0,model:[0,2],mohsen:2,more:0,multipl:1,n:3,neural:2,nikolai:2,nnm:0,nois:0,norm:1,note:[0,3],nuclear:1,numpi:3,o:[0,3],o_:[0,3],obtain:0,often:0,onli:0,oper:1,optim:0,order:3,out:1,outcom:[0,1,3],p_:0,packag:3,panel:[1,2,3],paramet:0,particular:0,pattern:2,penal:0,peng:2,perform:0,perp:0,pip:3,pleas:3,polici:1,polict:1,popular:0,pr:0,prefer:3,problem:[0,3],process:2,project:[1,3],proper:0,propos:0,provid:0,publish:3,pypi:3,python:[1,3],question:1,r:[0,3],rang:1,rank:0,read:0,reader:0,realiti:0,refer:[0,1],reflect:0,regress:1,regular:0,releas:3,replac:0,repres:3,requir:3,research:1,review:2,right:0,routin:3,s:3,scienc:1,sdid:0,second:0,section:1,see:0,seri:1,simpl:[0,3],simplest:3,simpli:0,solv:[0,3],spectrum:0,stabl:0,standard:0,statist:2,std:0,std_debiased_convex:0,stefan:2,step:0,strategi:1,sub:0,suggest:0,suggest_r:0,sum_:0,susan:2,synthet:[1,2],system:2,t:[0,3],tau:0,tau_raw:0,th:3,thi:[0,1],third:0,tianyi:2,time:[1,3],too:0,top:0,treat:0,treatment:[0,2],tun:0,tutori:1,two:[0,3],u:[0,3],under:1,unit:[1,3],unknown:0,updat:0,upgrad:3,us:[0,1,3],usag:1,v:0,valid:0,varianc:[],variou:3,veri:0,version:[0,3],vivek:2,w:2,wager:2,wai:[0,3],we:[0,3],what:[1,3],when:0,where:[0,3],whether:3,wide:1,without:0,x:1,y:1,you:0,yourself:0,z:[0,3],z_:[0,3]},titles:["API","Welcome to CausalTensor\u2019s documentation!","References","Usage"],titleterms:{api:0,bias:0,causaltensor:1,complet:0,content:1,convex:0,de:0,differ:0,document:1,input:[],instal:3,matrix:0,minim:0,norm:0,nuclear:0,panel:0,refer:2,regress:0,s:1,synthet:0,tutori:3,usag:3,welcom:1}}) -------------------------------------------------------------------------------- /docs/tmp/mydocs/usage.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Usage — CausalTensor 0.1 documentation 8 | 9 | 10 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 |
28 | 57 | 58 |
62 | 63 |
64 |
65 |
66 | 73 |
74 |
75 |
76 |
77 | 78 |
79 |

Usage

80 |
81 |

Installation

82 |

CausalTensor is compatible with Python 3 or later and also depends on numpy. The simplest way to install CausalTensor and its dependencies is from PyPI with pip, Python’s preferred package installer.

83 |
$ pip install causaltensor
 84 | 
85 |
86 |

Note that CausalTensor is an active project and routinely publishes new releases. In order to upgrade CausalTensor to the latest version, use pip as follows.

87 |
$ pip install -U causaltensor
 88 | 
89 |
90 |
91 |
92 |

Tutorial

93 |

For a basic panel data problem, we require two matrices as inputs

94 |
    95 |
  1. \(O \in R^{N \times T}\): \(O\) is the outcome matrix where \(O_{ij}\) represents the outcome of the i-th unit at time j

  2. 96 |
  3. \(Z \in R^{N \times T}\): \(Z\) is the intervention matrix where \(Z_{ij}\) indicates whether the i-th unit used the intervention or not at time j.

  4. 97 |
98 |

Given such two matrices, the problem is to ask “what is the impact of the intervention to the outcome”?

99 |

Please check Panel Data Example 100 | for a simple demo.

101 |

Check API for various methods for solving such a problem.

102 |
103 |
104 | 105 | 106 |
107 |
108 |
112 | 113 |
114 | 115 |
116 |

© Copyright 2022, Tianyi Peng.

117 |
118 | 119 | Built with Sphinx using a 120 | theme 121 | provided by Read the Docs. 122 | 123 | 124 |
125 |
126 |
127 |
128 |
129 | 134 | 135 | 136 | -------------------------------------------------------------------------------- /docs/usage.rst: -------------------------------------------------------------------------------- 1 | Usage 2 | ===== 3 | 4 | .. _installation: 5 | 6 | Installation 7 | ------------ 8 | 9 | CausalTensor is compatible with Python 3 or later and also depends on numpy. The simplest way to install CausalTensor and its dependencies is from PyPI with pip, Python's preferred package installer. 10 | 11 | .. code-block:: console 12 | 13 | $ pip install causaltensor 14 | 15 | Note that CausalTensor is an active project and routinely publishes new releases. In order to upgrade CausalTensor to the latest version, use pip as follows. 16 | 17 | .. code-block:: console 18 | 19 | $ pip install -U causaltensor 20 | 21 | Tutorial 22 | ---------------- 23 | For a basic panel data problem, we require two matrices as inputs 24 | 25 | 1. :math:`O \in R^{N \times T}`: :math:`O` is the outcome matrix where :math:`O_{ij}` represents the outcome of the i-th unit at time j 26 | 2. :math:`Z \in R^{N \times T}`: :math:`Z` is the intervention matrix where :math:`Z_{ij}` indicates whether the i-th unit used the intervention or not at time j. 27 | 28 | Given such two matrices, the problem is to ask **"what is the impact of the intervention to the outcome**"? 29 | 30 | Please check `Panel Data Example `_ 31 | for a simple demo. 32 | 33 | Check :doc:`api` for various methods for solving such a problem. -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "causaltensor" 3 | version = "0.1.12" 4 | description = "Package for causal inference in panels" 5 | authors = ["Tianyi Peng ", 6 | "Arushi Jain "] 7 | license = "MIT" 8 | readme = "README.md" 9 | homepage = "https://github.com/TianyiPeng/causaltensor" 10 | repository = "https://github.com/TianyiPeng/causaltensor" 11 | classifiers = [ 12 | "License :: OSI Approved :: MIT License", 13 | "Programming Language :: Python", 14 | "Programming Language :: Python :: 3", 15 | "Topic :: Scientific/Engineering", 16 | "Intended Audience :: Science/Research", 17 | ] 18 | keywords = ["causal", "inference", "panel", "matrix", "tensor"] 19 | 20 | [tool.poetry.dependencies] 21 | python = "^3.10" 22 | numpy = "^1.18" 23 | importlib-metadata = {version = "^3.0", python = "<=3.11"} 24 | toolz = "^0.12.1" 25 | matplotlib = "^3.8.0" 26 | pandas = "^2.1.0" 27 | tqdm = "^4.67.1" 28 | cvxpy = "^1.4.3" 29 | cvxopt = "^1.2" 30 | 31 | 32 | [tool.poetry.group.dev.dependencies] 33 | pytest = "^8.1.1" 34 | ipython = "^8.23.0" 35 | build = "^1.0.3" 36 | twine = "^5.0.0" 37 | ipykernel = "^6.29.0" 38 | 39 | 40 | [build-system] 41 | requires = ["poetry-core>=1.0.0"] 42 | build-backend = "poetry.core.masonry.api" -------------------------------------------------------------------------------- /src/causaltensor/.vscode/settings.json: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /src/causaltensor/__init__.py: -------------------------------------------------------------------------------- 1 | from . import matlib 2 | from .matlib import * 3 | 4 | from . import cauest 5 | from .cauest import * 6 | 7 | #from . import matcomple 8 | #from .matcomple import * -------------------------------------------------------------------------------- /src/causaltensor/cauest/CovariancePCA.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def random_subset(Ω, K, m): 4 | O_1 = np.reshape(Ω, -1) 5 | pos = np.arange(len(O_1))[O_1 == 1] 6 | O_list = [] 7 | for i in range(K): 8 | select_pos = np.random.choice(list(pos), m, replace=False) 9 | new_O_1 = np.zeros((len(O_1))) 10 | new_O_1[select_pos] = 1 11 | new_O = np.reshape(new_O_1, Ω.shape) 12 | O_list.append(new_O) 13 | return O_list 14 | 15 | def covariance_PCA(O, Ω, suggest_r=-1, return_U = False): #slight issue of cross-validation 16 | O_ob = O * Ω 17 | A = O_ob.dot(O_ob.T) / Ω.dot(Ω.T) 18 | u,s,vh = np.linalg.svd(A, full_matrices=False) 19 | #print(s[0:2*r]) 20 | 21 | def recover(O_train, Ω_train, r): 22 | U = u[:, :r] * np.sqrt(O.shape[0]) 23 | 24 | col_sum = np.sum(Ω_train, axis=0).reshape((Ω.shape[1], 1)) 25 | Y = O_train.T.dot(U) / col_sum ### Eq. (4) in their paper 26 | #Y = O_ob.T.dot(U) / O.shape[0] * Ω.size / np.sum(Ω) 27 | 28 | M = U.dot(Y.T) 29 | 30 | MSE = np.sum(((Ω-Ω_train)*(M-O))**2) 31 | return MSE, M, U 32 | 33 | if suggest_r == -1: 34 | K = 2 35 | p = np.sum(Ω) / np.size(Ω) 36 | Ω_list = random_subset(Ω, K, int(np.sum(Ω)*p)+1) 37 | 38 | energy = np.sum(s) 39 | opt_MSE = 1e9 40 | opt_r = 1e9 41 | 42 | #Cross Validation to choose the optimal r 43 | 44 | for r in range(1,len(s)): 45 | #print(np.sum(s[r-1:]) / energy) 46 | if (np.sum(s[r-1:]) / energy <= 1e-3): 47 | break 48 | train_MSE = [] 49 | for i in range(K): 50 | MSE, Mhat = recover(O*Ω_list[i], Ω_list[i], r) 51 | train_MSE.append(MSE) 52 | MSE = np.mean(train_MSE) 53 | if (MSE < opt_MSE): 54 | opt_MSE = MSE 55 | opt_r = r 56 | #print(MSE, r, np.sum(s[r-1:]) / energy) 57 | else: 58 | opt_r = suggest_r 59 | #print(opt_r) 60 | 61 | MSE, M, U = recover(O_ob, Ω, opt_r) 62 | 63 | tau = np.sum((1-Ω)*(O-M)) / np.sum(1-Ω) 64 | 65 | if (return_U): 66 | return M, tau, U 67 | else: 68 | return M, tau -------------------------------------------------------------------------------- /src/causaltensor/cauest/DID.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from causaltensor.cauest.result import Result 3 | from causaltensor.cauest.panel_solver import PanelSolver 4 | from causaltensor.cauest.panel_solver import FixedEffectPanelSolver 5 | 6 | class DIDResult(Result): 7 | def __init__(self, baseline = None, tau=None, beta=None, row_fixed_effects=None, column_fixed_effects=None, return_tau_scalar=False): 8 | super().__init__(baseline = baseline, tau = tau, return_tau_scalar = return_tau_scalar) 9 | self.beta = beta 10 | self.row_fixed_effects = row_fixed_effects 11 | self.column_fixed_effects = column_fixed_effects 12 | self.M = baseline # for backward compatability 13 | class DIDPanelSolver(PanelSolver): 14 | def __init__(self, Z=None, X=None, Omega=None, fixed_effects='two-way', **kwargs): 15 | super().__init__(Z) 16 | self.X = X 17 | self.Omega = Omega 18 | self.fixed_effects = fixed_effects 19 | if fixed_effects != 'two-way': 20 | raise NotImplementedError('Only two-way fixed effects are implemented.') 21 | if X is None: 22 | new_X = self.Z 23 | else: 24 | new_X = np.concatenate([self.Z, X], axis=2) 25 | self.fixed_effects_solver = FixedEffectPanelSolver(X = new_X, Omega=Omega, fixed_effects=fixed_effects, **kwargs) 26 | 27 | def fit(self, O): 28 | res_fe = self.fixed_effects_solver.fit(O) 29 | k = self.Z.shape[0] 30 | tau = res_fe.beta[:k] 31 | beta = res_fe.beta[k:] if self.X is not None else None 32 | res = DIDResult(baseline = res_fe.fitted_value, tau = tau, beta = beta, 33 | row_fixed_effects = res_fe.row_fixed_effects, 34 | column_fixed_effects = res_fe.column_fixed_effects, 35 | return_tau_scalar = self.return_tau_scalar) 36 | return res 37 | 38 | #deprecated 39 | #for backward compatability 40 | def DID(O, Z): 41 | solver = DIDPanelSolver(Z) 42 | res = solver.fit(O) 43 | return res.M, res.tau -------------------------------------------------------------------------------- /src/causaltensor/cauest/DebiasConvexMissing.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import copy 3 | 4 | def convex_algorithm_with_Omega(O, Omega, Z, l, suggest = [], eps = 1e-3, debug = False): 5 | 6 | M = suggest[0] 7 | tau = suggest[1] 8 | num_treat = np.sum(Z * Omega) 9 | for T in range(2000): 10 | ## update M 11 | u,s,vh = np.linalg.svd(Omega*(O - tau*Z) + (1-Omega)*M, full_matrices=False) 12 | #print(s) 13 | #print('before thresholding', np.sum(s), tau) 14 | s = np.maximum(s-l, 0) 15 | M = (u*s).dot(vh) 16 | 17 | #print(np.sum(s)) 18 | #print(s) 19 | 20 | 21 | tau_new = np.sum(Omega * Z * (O - M)) / num_treat # update tau 22 | #print('tau(t) is {}, tau(t+1) is {}'.format(tau, tau_new)) 23 | if (np.abs(tau_new - tau) < eps): 24 | #print('iterations', T) 25 | return M, tau, 'successful' 26 | tau = tau_new 27 | 28 | 29 | if (debug): 30 | print(tau) 31 | return M, tau, 'fail' 32 | 33 | def convex_algorithm_with_Omega_with_fixed_effects(O, Omega, Z, l, suggest = [], eps = 1e-3, debug = False): 34 | 35 | M = suggest[0] 36 | a = suggest[1] 37 | b = suggest[2] 38 | tau = suggest[3] 39 | 40 | n1 = O.shape[0] 41 | n2 = O.shape[1] 42 | 43 | one_row = np.ones((1, n2)) 44 | one_col = np.ones((n1, 1)) 45 | Ω_row_sum = np.sum(Omega, axis = 1).reshape((n1, 1)) 46 | Ω_column_sum = np.sum(Omega, axis = 0).reshape((n2, 1)) 47 | Ω_row_sum[Ω_row_sum==0] = 1 48 | Ω_column_sum[Ω_column_sum==0] = 1 49 | 50 | for T in range(2000): 51 | ## update M 52 | 53 | u,s,vh = np.linalg.svd((O - a.dot(one_row) - one_col.dot(b.T) - tau * Z)*Omega + M*(1-Omega), full_matrices = False) 54 | 55 | #print(s) 56 | #print('before thresholding', np.sum(s), tau) 57 | s = np.maximum(s-l, 0) 58 | M_new = (u*s).dot(vh) 59 | 60 | if (np.sum((M-M_new)**2) < 1e-5 * np.sum(M**2)): 61 | #print('total iterations', T) 62 | break 63 | 64 | M = M_new 65 | for T1 in range(2000): 66 | a = np.sum(Omega*(O-M-one_col.dot(b.T)-tau*Z), axis=1).reshape((n1, 1)) / Ω_row_sum 67 | 68 | b_new = np.sum(Omega*(O-M-a.dot(one_row)-tau*Z), axis=0).reshape((n2, 1)) / Ω_column_sum 69 | 70 | if (np.sum((b_new - b)**2) < 1e-5 * np.sum(b**2)): 71 | break 72 | b = b_new 73 | if (T1 >= 2000): 74 | break 75 | 76 | tau = np.sum(Omega * Z * (O - M - a.dot(one_row) - one_col.dot(b.T))) / np.sum(Omega * Z) 77 | 78 | if (debug): 79 | print(tau) 80 | return M, a, b, tau 81 | 82 | def non_convex_algorithm_with_Omega(O, Omega, Z, r, tau = 0, debug = False): 83 | M = O*Omega 84 | for T in range(2000): 85 | u,s,vh = np.linalg.svd(Omega*(O - tau*Z) + (1-Omega)*M, full_matrices=False) 86 | s[r:] = 0 87 | M = (u*s).dot(vh) 88 | tau_new = np.sum(Omega*Z*(O-M)) / np.sum(Omega*Z) 89 | if (np.abs(tau_new - tau) < 1e-4): 90 | return M, tau, "successful" 91 | tau = tau_new 92 | if (debug): 93 | print(tau) 94 | return M, tau, 'fail' -------------------------------------------------------------------------------- /src/causaltensor/cauest/DebiasConvex_backup.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import causaltensor.matlib.util as util 3 | from causaltensor.matlib.util import transform_to_3D 4 | 5 | 6 | def debias(M, tau, Z, l): 7 | u, s, vh = util.svd_fast(M) 8 | r = np.sum(s / np.cumsum(s) >= 1e-6) 9 | u = u[:, :r] 10 | vh = vh[:r, :] 11 | 12 | PTperpZ = np.zeros_like(Z) 13 | for k in np.arange(Z.shape[2]): 14 | PTperpZ[:, :, k] = util.remove_tangent_space_component(u, vh, Z[:, :, k]) 15 | 16 | D = np.zeros((Z.shape[2], Z.shape[2])) 17 | for k in np.arange(Z.shape[2]): 18 | for m in np.arange(k, Z.shape[2]): 19 | D[k, m] = np.sum(PTperpZ[:, :, k] * PTperpZ[:, :, m]) 20 | D[m, k] = D[k, m] 21 | 22 | Delta = np.array([l * np.sum(Z[:, :, k]*(u.dot(vh))) for k in range(Z.shape[2])]) 23 | 24 | tau_delta = np.linalg.pinv(D) @ Delta 25 | tau_debias = tau - tau_delta 26 | 27 | PTZ = Z - PTperpZ 28 | M_debias = M + l * u.dot(vh) + np.sum(PTZ * tau_delta.reshape(1, 1, -1), axis=2) 29 | return M_debias, tau_debias 30 | 31 | 32 | def prepare_OLS(Z): 33 | ### Select non-zero entries for OLS (optmizing sparsity of Zs) 34 | small_index = (np.sum(np.abs(Z) > 1e-9, axis=2) > 0) 35 | X = Z[small_index, :].astype(float) # small X 36 | ## X.shape = (#non_zero entries of Zs, num_treat) 37 | Xinv = np.linalg.inv(X.T @ X) 38 | return small_index, X, Xinv 39 | 40 | 41 | def DC_PR_with_l(O, Z, l, initial_tau = None, eps = 1e-6): 42 | """ 43 | De-biased Convex Panel Regression with the regularizer l. 44 | 45 | Parameters 46 | ------------- 47 | O : 2d float numpy array 48 | Observation matrix. 49 | Z : a list of 2d float numpy array or a single 2d/3d float numpy array 50 | Intervention matrices. If Z is a list, then each element of the list is a 2d numpy array. If Z is a single 2d numpy array, then Z is a single intervention matrix. If Z is a 3d numpy array, then Z is a collection of intervention matrices with the last dimension being the index of interventions. 51 | l : float 52 | Regularizer for the nuclear norm. 53 | intial_tau : (num_treat,) float numpy array 54 | Initial value(s) for tau. 55 | eps : float 56 | Convergence threshold. 57 | 58 | Returns 59 | ------------- 60 | M : 2d float numpy array 61 | Estimated matrix. 62 | tau : (num_treat,) float numpy array 63 | Estimated treatment effects. 64 | """ 65 | Z = transform_to_3D(Z) ## Z is (n1 x n2 x num_treat) numpy array 66 | if initial_tau is None: 67 | tau = np.zeros(Z.shape[2]) 68 | else: 69 | tau = initial_tau 70 | 71 | small_index, X, Xinv = prepare_OLS(Z) 72 | 73 | for T in range(2000): 74 | #### SVD to find low-rank M 75 | M = util.SVD_soft(O - np.tensordot(Z, tau, axes=([2], [0])), l) 76 | #### OLS to get tau 77 | y = (O - M)[small_index] #select non-zero entries 78 | tau_new = Xinv @ (X.T @ y) 79 | #### Check convergence 80 | if (np.linalg.norm(tau_new - tau) < eps * np.linalg.norm(tau)): 81 | return M, tau 82 | tau = tau_new 83 | return M, tau 84 | 85 | 86 | def non_convex_PR(O, Z, r, initial_tau = None, eps = 1e-6): 87 | """ 88 | Non-Convex Panel Regression with the rank r 89 | 90 | Parameters 91 | ------------- 92 | O : 2d float numpy array 93 | Observation matrix. 94 | Z : a list of 2d float numpy array or a single 2d/3d float numpy array 95 | Intervention matrices. If Z is a list, then each element of the list is a 2d numpy array. If Z is a single 2d numpy array, then Z is a single intervention matrix. If Z is a 3d numpy array, then Z is a collection of intervention matrices with the last dimension being the index of interventions. 96 | r : int 97 | rank constraint for the baseline matrix. 98 | intial_tau : (num_treat,) float numpy array 99 | Initial value(s) for tau. 100 | eps : float 101 | Convergence threshold. 102 | 103 | Returns 104 | ------------- 105 | M : 2d float numpy array 106 | Estimated baseline matrix. 107 | tau : (num_treat,) float numpy array 108 | Estimated treatment effects. 109 | """ 110 | Z = transform_to_3D(Z) ## Z is (n1 x n2 x num_treat) numpy array 111 | if initial_tau is None: 112 | tau = np.zeros(Z.shape[2]) 113 | else: 114 | tau = initial_tau 115 | 116 | small_index, X, Xinv = prepare_OLS(Z) 117 | 118 | for T in range(2000): 119 | #### SVD to find low-rank M 120 | M = util.SVD(O - np.tensordot(Z, tau, axes=([2], [0])), r) #hard truncation 121 | #### OLS to get tau 122 | y = (O - M)[small_index] #select non-zero entries 123 | tau_new = Xinv @ (X.T @ y) 124 | #### Check convergence 125 | if (np.linalg.norm(tau_new - tau) < eps * np.linalg.norm(tau)): 126 | return M, tau 127 | tau = tau_new 128 | return M, tau 129 | 130 | 131 | def solve_tau(O, Z): 132 | small_index, X, Xinv = prepare_OLS(Z) 133 | y = O[small_index] #select non-zero entries 134 | tau = Xinv @ (X.T @ y) 135 | return tau 136 | 137 | 138 | def DC_PR_with_suggested_rank(O, Z, suggest_r = 1, method = 'convex'): 139 | """ 140 | De-biased Convex Panel Regression with the suggested rank. Gradually decrease the nuclear-norm regularizer l until the rank of the next-iterated estimator exceeds r. 141 | 142 | :param O: observation matrix 143 | :param Z: intervention matrix 144 | 145 | """ 146 | Z = transform_to_3D(Z) ## Z is (n1 x n2 x num_treat) numpy array 147 | ## determine pre_tau 148 | pre_tau = solve_tau(O, Z) 149 | 150 | if method == 'convex' or method == 'auto': 151 | ## determine l 152 | coef = 1.1 153 | _, s, _ = util.svd_fast(O-np.tensordot(Z, pre_tau, axes=([2], [0]))) 154 | l = s[1]*coef 155 | ##inital pre_M and pre_tau for current l 156 | pre_M, pre_tau = DC_PR_with_l(O, Z, l, initial_tau = pre_tau) 157 | l = l / coef 158 | while (True): 159 | M, tau = DC_PR_with_l(O, Z, l, initial_tau = pre_tau) 160 | if (np.linalg.matrix_rank(M) > suggest_r): 161 | M_debias, tau_debias = debias(pre_M, pre_tau, Z, l*coef) 162 | M = util.SVD(M_debias, suggest_r) 163 | tau = tau_debias 164 | break 165 | pre_M = M 166 | pre_tau = tau 167 | l = l / coef 168 | if method == 'non-convex': 169 | M, tau = non_convex_PR(O, Z, suggest_r, initial_tau = pre_tau) 170 | 171 | if method == 'auto': 172 | M1, tau1 = non_convex_PR(O, Z, suggest_r, initial_tau = solve_tau(O, Z)) 173 | if np.linalg.matrix_rank(M) != suggest_r or np.linalg.norm(O-M-np.tensordot(Z, tau, axes=([2], [0]))) > np.linalg.norm(O-M1-np.tensordot(Z, tau1, axes=([2], [0]))): 174 | M = M1 175 | tau = tau1 176 | 177 | CI = panel_regression_CI(M, Z, O-M-np.tensordot(Z, tau, axes=([2], [0]))) 178 | standard_deviation = np.sqrt(np.diag(CI)) 179 | if len(tau) == 1: 180 | return M, tau[0], standard_deviation[0] 181 | else: 182 | return M, tau, standard_deviation 183 | 184 | 185 | def DC_PR_auto_rank(O, Z, spectrum_cut = 0.002, method='convex'): 186 | s = np.linalg.svd(O, full_matrices = False, compute_uv=False) 187 | suggest_r = np.sum(np.cumsum(s**2) / np.sum(s**2) <= 1-spectrum_cut) 188 | return DC_PR_with_suggested_rank(O, Z, suggest_r = suggest_r, method=method) 189 | 190 | 191 | def projection_T_orthogonal(Z, M): 192 | u, s, vh = np.linalg.svd(M, full_matrices = False) 193 | r = np.sum(s / np.cumsum(s) >= 1e-6) 194 | u = u[:, :r] 195 | vh = vh[:r, :] 196 | PTperpZ = (np.eye(u.shape[0]) - u.dot(u.T)).dot(Z).dot(np.eye(vh.shape[1]) - vh.T.dot(vh)) 197 | return PTperpZ 198 | 199 | 200 | def panel_regression_CI(M, Z, E): 201 | ''' 202 | Compute the confidence interval of taus using the first-order approximation. 203 | 204 | Parameters: 205 | ------------- 206 | M: the (approximate) baseline matrix 207 | Z: a list of intervention matrices 208 | E: the (approximate) noise matrix 209 | 210 | Returns 211 | ----------- 212 | CI: a kxk matrix that charaterizes the asymptotic covariance matrix of treatment estimation from non-convex panel regression, 213 | where k is the number of treatments 214 | ''' 215 | u, s, vh = util.svd_fast(M) 216 | r = np.sum(s / np.cumsum(s) >= 1e-6) 217 | u = u[:, :r] 218 | vh = vh[:r, :] 219 | 220 | X = np.zeros((Z.shape[0]*Z.shape[1], Z.shape[2])) 221 | for k in np.arange(Z.shape[2]): 222 | X[:, k] = util.remove_tangent_space_component(u, vh, Z[:, :, k]).reshape(-1) 223 | 224 | A = (np.linalg.inv(X.T@X)@X.T) 225 | CI = (A * np.reshape(E**2, -1)) @ A.T 226 | return CI -------------------------------------------------------------------------------- /src/causaltensor/cauest/MCNNM.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from causaltensor.matlib.util import transform_to_3D 3 | import causaltensor.matlib.util as util 4 | from causaltensor.cauest.panel_solver import FixedEffectPanelSolver 5 | from causaltensor.cauest.panel_solver import PanelSolver 6 | from causaltensor.cauest.result import Result 7 | 8 | 9 | def soft_impute(O, Omega, l, eps=1e-7, M_init=None, max_iter=2000): 10 | """Impute the missing entries of O under Ω with nuclear norm regularizer l. 11 | 12 | Parameters 13 | ---------- 14 | O: 2D numpy array 15 | Observed data. 16 | Ω: 2D numpy array 17 | Indicator matrix (1: observed, 0: missing). 18 | l: float 19 | Nuclear norm regularizer. 20 | eps: float 21 | Convergence threshold. 22 | M_init: 2D numpy array or None 23 | Initial guess of the underlying low-rank matrix. 24 | max_iter: int 25 | Maximum number of iterations. 26 | 27 | Returns 28 | ------- 29 | M_new: 2D numpy array 30 | Imputed matrix. 31 | """ 32 | if (M_init is None): 33 | M = np.zeros_like(O) 34 | else: 35 | M = M_init 36 | for T in range(max_iter): 37 | M_new = util.SVD_soft(O * Omega + M * (1-Omega), l) 38 | if (np.linalg.norm(M-M_new) < np.linalg.norm(M)*eps): 39 | break 40 | M = M_new 41 | return M_new 42 | 43 | class MCNNMResult(Result): 44 | def __init__(self, baseline = None, M = None, tau=None, beta=None, row_fixed_effects=None, column_fixed_effects=None, return_tau_scalar=False): 45 | super().__init__(baseline = baseline, tau = tau, return_tau_scalar = return_tau_scalar) 46 | self.beta = beta 47 | self.row_fixed_effects = row_fixed_effects 48 | self.column_fixed_effects = column_fixed_effects 49 | self.M = M # the low-rank component of the baseline model 50 | 51 | class MCNNMPanelSolver(PanelSolver): 52 | """ 53 | Solve the matrix completion problem with nuclear norm regularizer and fixed effects for panel data with covariates and missing data 54 | reference: https://arxiv.org/pdf/1710.10251.pdf 55 | """ 56 | 57 | def __init__(self, Z=None, X=None, Omega=None, fixed_effects = 'two-way'): 58 | """ 59 | Z: 2D bool numpy array 60 | The treatment matrix. 61 | TODO: support multiple treatments for matrix completion algorithm 62 | TODO: support non-binary treatment matrix 63 | X: 3D float numpy array (n,m,p) or 2D float numpy array (n,m) or a list of 2D float numpy array 64 | The covariates matrix. The last dimension is the index of covariates. 65 | Omega: 2D bool numpy array (n,m) 66 | Indicator matrix (1: observed, 0: missing). 67 | The indicator matrix includes both the treated entries and untreated entries. 68 | fixed_effects: ['two-way'] 69 | two-way fixed effects or one-way fixed effects (to be implemented) 70 | """ 71 | if (Omega is None): 72 | Omega = np.ones_like(Z[:, :], dtype=bool) 73 | Omega = Omega.astype(bool) 74 | if np.sum(Z==1) + np.sum(Z == 0) != Z.shape[0]*Z.shape[1]: 75 | raise ValueError('Z should only consist of 0/1 in matrix completion solver') 76 | Z = Z.astype(bool) 77 | self.raw_Omega = Omega 78 | self.Z = (Z & Omega) # we only care the treatment matrix for observed entries 79 | self.Omega = ((1 - Z) & Omega) # we treat the treatment matrix as missing entries 80 | if (np.sum(np.sum(self.Omega, axis=1)==0)>0 or np.sum(np.sum(self.Omega, axis=0)==0) > 0): 81 | raise ValueError("Since a whole row or a whole column is treated, the matrix completion algorithm won't work!") 82 | 83 | self.X = X 84 | if self.X is not None: 85 | self.X = transform_to_3D(X) 86 | self.fixed_effects = fixed_effects 87 | self.FE_beta_solver = FixedEffectPanelSolver(fixed_effects=self.fixed_effects, X=self.X, Omega=self.Omega) 88 | self.return_tau_scalar = False 89 | 90 | def solve_with_regularizer(self, O=None, l=None, M_init=None, eps=1e-7, max_iter=2000): 91 | """ Solve the matrix completion problem with nuclear norm regularizer and fixed effects 92 | Parameters 93 | ---------- 94 | O: 2D numpy array 95 | the observation matrix 96 | l: float 97 | Nuclear norm regularizer. 98 | M_init: 2D numpy array or None 99 | Initial guess of the underlying low-rank matrix. 100 | eps: float 101 | Convergence threshold. 102 | max_iter: int 103 | Maximum number of iterations. 104 | Returns 105 | ------- 106 | res: Result 107 | res.M: 2D numpy array 108 | The estimated low-rank matrix. 109 | res.row_fixed_effects: 2D numpy array (n, 1) 110 | res.column_fixed_effects: 2D numpy array (m, 1) 111 | res.beta: 1D numpy array (p, ) if X is not None 112 | res.baseline_model: 2D numpy array 113 | The estimated baseline model (M+ai+bj+beta*X). 114 | res.tau: float 115 | The estimated treatment effect. 116 | """ 117 | M = M_init 118 | if M is None: 119 | M = np.zeros_like(O) 120 | 121 | for T in range(max_iter): 122 | res = self.FE_beta_solver.fit(O - M) 123 | M_new = util.SVD_soft((O-res.fitted_value) * self.Omega + M * (1-self.Omega), l) 124 | if (np.sum((M-M_new)**2) < eps * np.sum(M**2)): 125 | break 126 | M = M_new 127 | 128 | baseline = res.fitted_value + M 129 | tau = np.sum((O - baseline)*self.Z) / np.sum(self.Z) 130 | res_new = MCNNMResult(baseline = baseline, M = M, tau = tau, 131 | beta = res.beta, 132 | row_fixed_effects = res.row_fixed_effects, 133 | column_fixed_effects = res.column_fixed_effects, 134 | return_tau_scalar = self.return_tau_scalar) 135 | return res_new 136 | 137 | def solve_with_suggested_rank(self, O=None, suggest_r=1): 138 | suggest_r = min(suggest_r, O.shape[0]) 139 | suggest_r = min(suggest_r, O.shape[1]) 140 | coef = 1.1 141 | u, s, vh = np.linalg.svd(O*self.Omega, full_matrices = False) 142 | l = s[1]*coef 143 | 144 | res = self.solve_with_regularizer(O=O, l=l) 145 | l = l / coef 146 | T = 2000 147 | for i in range(T): 148 | res_new = self.solve_with_regularizer(O=O, l=l, M_init=res.M) 149 | if (np.linalg.matrix_rank(res_new.M) > suggest_r): # we hope to minimize the l while keeping the rank of M to be suggest_r 150 | return res 151 | res = res_new 152 | l = l / coef 153 | 154 | def solve_with_cross_validation(self, O=None, K=2, list_l = []): 155 | """ 156 | Implement the K-fold cross validation in https://arxiv.org/pdf/1710.10251.pdf 157 | """ 158 | np.random.seed(42) #for reproducibility 159 | raw_Omega = self.raw_Omega 160 | def MSE_validate(res, valid_Ω): 161 | return np.sum((valid_Ω)*((O-res.baseline)**2)) / np.sum(valid_Ω) 162 | 163 | #K-fold cross validation 164 | train_list = [] 165 | valid_list = [] 166 | p = np.sum(self.Omega) / np.size(raw_Omega) # due to the treatment, the ratio of the missing entries 167 | for k in range(K): 168 | select = np.random.rand(O.shape[0], O.shape[1]) <= p 169 | train_list.append(raw_Omega * select) 170 | valid_list.append(raw_Omega * (1 - select)) 171 | 172 | if (len(list_l) == 0):# auto-selection of a list of regularization parameters 173 | _, s, _ = np.linalg.svd(O*self.Omega, full_matrices = False) 174 | l = s[1] #large enough regularization parameter 175 | for i in range(5): 176 | list_l.append(l) 177 | l /= 2 178 | 179 | error = np.ones((K, len(list_l))) * np.inf 180 | for k in range(K): 181 | #print(np.sum(self.Omega * (1-self.Z))) 182 | #print(np.sum((train_list[k]&self.Omega) * (1-self.Z))) 183 | 184 | solver = MCNNMPanelSolver(Z = self.Z, X=self.X, Omega=train_list[k], fixed_effects=self.fixed_effects) 185 | 186 | M = None 187 | for i, l in enumerate(list_l): 188 | res = solver.solve_with_regularizer(O=O, l=l, M_init=M) 189 | #import IPython; IPython.embed() 190 | error[k, i] = MSE_validate(res, valid_list[k]) 191 | M =res.M 192 | index = error.sum(axis=0).argmin() 193 | l_opt = list_l[index] 194 | res = self.solve_with_regularizer(O=O, l=l_opt) 195 | return res 196 | 197 | 198 | #backward compatability 199 | def MC_NNM_with_l(O, Omega, l): 200 | solver = MCNNMPanelSolver(Z = 1-Omega) 201 | res = solver.solve_with_regularizer(O, l) 202 | return res.M, res.row_fixed_effects, res.column_fixed_effects, res.tau 203 | 204 | def MC_NNM_with_suggested_rank(O, Omega, suggest_r=1): 205 | solver = MCNNMPanelSolver(Z = 1-Omega) 206 | res = solver.solve_with_suggested_rank(O, suggest_r) 207 | return res.M, res.row_fixed_effects, res.column_fixed_effects, res.tau 208 | 209 | def MC_NNM_with_cross_validation(O, Omega, K=5, list_l = []): 210 | solver = MCNNMPanelSolver(Z = 1-Omega) 211 | res = solver.solve_with_cross_validation(O, K, list_l) 212 | return res.M, res.row_fixed_effects, res.column_fixed_effects, res.tau -------------------------------------------------------------------------------- /src/causaltensor/cauest/OLSSyntheticControl.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.optimize import fmin_slsqp 3 | from sklearn.metrics import mean_squared_error 4 | from causaltensor.cauest.panel_solver import PanelSolver 5 | from causaltensor.cauest.result import Result 6 | 7 | 8 | class OLSSCResult(Result): 9 | def __init__(self, baseline = None, tau=None, beta=None, return_tau_scalar=False, individual_te=None, V=None): 10 | super().__init__(baseline = baseline, tau = tau, return_tau_scalar = return_tau_scalar) 11 | self.beta = beta # control unit weights 12 | self.M = baseline # the counterfactual 13 | self.individual_te = individual_te 14 | self.V = V # predictor importance 15 | 16 | 17 | class OLSSCPanelSolver(PanelSolver): 18 | def __init__(self, Y, Z, X=None, pval=False): 19 | """ 20 | @param Y: T x N matrix to be regressed 21 | @param Z: T x N intervention matrix of 0s and 1s 22 | @param X: K x N covariates (optional) 23 | """ 24 | self.Y = Y 25 | self.X = X 26 | self.T0, self.Y0, self.Y1, self.X0, self.X1, self.control_units, self.treatment_units = self.preprocess(Z) 27 | self.individual_te = np.zeros(len(self.treatment_units)) 28 | self.pval = pval 29 | 30 | 31 | 32 | 33 | def preprocess(self, Z): 34 | """ 35 | Split the observation matrix into Y0, Y1 and T0 36 | 37 | @param Z: T x N intervention matrix of 0s and 1s 38 | 39 | @return T0: number of pre-intervention (baseline) time periods 40 | @return Y0: T x control_units observation matrix 41 | @return Y1: T x treatment_units observation matrix 42 | @return control_units: column indices of the control units in O 43 | """ 44 | N = Z.shape[1] 45 | control_units = np.where(np.all(Z == 0, axis=0))[0] 46 | treatment_units = np.where(np.any(Z == 1, axis=0))[0] 47 | Y0 = self.Y[:, control_units] 48 | Y1 = self.Y[:, treatment_units] 49 | if self.X is not None: 50 | X0 = self.X[:, control_units] 51 | X1 = self.X[:, treatment_units] 52 | else: 53 | X0 = None 54 | X1 = None 55 | T0 = np.where(Z.any(axis=1))[0][0] 56 | return T0, Y0, Y1, X0, X1, control_units, treatment_units 57 | 58 | 59 | 60 | def ols_inference(self, Y1, Y0, X1=None, X0=None): 61 | """ 62 | given some treatment outcome data as well as some control outcome data, 63 | create a synthetic control and estimate the average treatment effect of the intervention 64 | 65 | @param Y1: outcome data for treated unit (T x 1 vector) 66 | 67 | @return counterfactual: counterfactual predicted by synthetic control 68 | @return tau: average treatment effect on test unit redicted by synthetic control 69 | """ 70 | def loss_v(W, y_c, y_t): 71 | return np.mean((y_t - y_c.dot(W))**2) 72 | 73 | def w_constraint(W, y_c, y_t): 74 | return np.sum(W) - 1 75 | 76 | 77 | 78 | y_c = Y0[:self.T0] # control units in pre-intervention 79 | y_t = Y1[:self.T0] # treatment unit in pre-intervention 80 | w_start = np.array([1/y_c.shape[1]]*y_c.shape[1]) # weights for each control units in synthetic control 81 | 82 | if X1 is not None: 83 | v_start = np.array([1/X0.shape[0]]*X0.shape[0]) # weights for each predictor 84 | 85 | def v_constraint(V, W, X0, X1, y_c, y_t): 86 | return np.sum(V) - 1 87 | 88 | def w_constraint(W, V, X0, X1): 89 | return np.sum(W) - 1 90 | 91 | def loss_w(W, V, X0, X1): 92 | return mean_squared_error(X1, X0.dot(W), sample_weight=V) 93 | 94 | def optimize_W(W, V, X0, X1): 95 | return fmin_slsqp(loss_w, W, bounds=[(0.0, 1.0)]*len(W), f_eqcons=w_constraint, 96 | args=(V, X0, X1), disp=False, full_output=True)[0] 97 | 98 | def optimize_V(V, W, X0, X1, y_c, y_t): 99 | w_at_v = optimize_W(W, V, X0, X1) 100 | return loss_v(w_at_v, y_c, y_t) 101 | 102 | 103 | V = fmin_slsqp(optimize_V, v_start, args=(w_start, X0, X1, y_c, y_t), bounds=[(0.0, 1.0)]*len(v_start), disp=False, f_eqcons=v_constraint, acc=1e-6) 104 | W = optimize_W(w_start, V, X0, X1) 105 | 106 | else: 107 | V = None 108 | W = fmin_slsqp(loss_v, w_start, args=(y_c, y_t), 109 | f_eqcons=w_constraint, 110 | bounds=[(0.0, 1.0)]*len(w_start), 111 | disp=False) 112 | 113 | M = Y0 @ W 114 | tau = np.mean((Y1-M)[self.T0:]) 115 | return M, tau, W, V 116 | 117 | 118 | def fit(self): 119 | T = len(self.Y1) 120 | V = [] 121 | weights = [] 122 | tau = 0 123 | M = np.copy(self.Y) 124 | self.individual_te = [] 125 | for i, s in enumerate(self.treatment_units): 126 | Y1_s = self.Y1[:,i].reshape((T,)) 127 | if self.X is not None: 128 | K = len(self.X1) 129 | X1_s = self.X1[:,i].reshape((K,)) 130 | counterfactual_s, tau_s, W_s, V_s = self.ols_inference(Y1_s, self.Y0, X1_s, self.X0) 131 | V.append(V_s) 132 | else: 133 | counterfactual_s, tau_s, W_s, V_s = self.ols_inference(Y1_s, self.Y0) 134 | tau += tau_s 135 | M[:, s] = counterfactual_s 136 | weights.append(W_s) 137 | self.individual_te.append([s, tau_s]) 138 | 139 | tau /= len(self.treatment_units) 140 | 141 | if self.pval: 142 | self.individual_te = self.permutation_test() 143 | 144 | res = OLSSCResult(baseline = M, tau = tau, individual_te=self.individual_te, beta=weights, V=V) 145 | 146 | 147 | return res 148 | 149 | 150 | def permutation_test(self): 151 | T = len(self.Y1) 152 | individual_te_control = [] 153 | for i, cu in enumerate(self.control_units): 154 | Y1_s = self.Y0[:,i].reshape((T,)) 155 | # create a synthetic control for eahc control unit using other control units 156 | _, tau_s = self.ols_inference(Y1_s, np.hstack((self.Y0[:, :i], self.Y0[:, i+1:]))) 157 | individual_te_control.append([cu, tau_s]) 158 | # rank treatment effects for both treatment and control units based on magnitude of tretament effect 159 | sorted_te = sorted(self.individual_te + individual_te_control, key=lambda x: abs(x[1]), reverse=True) 160 | n = len(sorted_te) 161 | p_values = [] 162 | # compute the probability of seeing treatment effects as extreme as current 163 | for i, unit_te in enumerate(sorted_te): 164 | if unit_te[0] in set(self.treatment_units): 165 | p_values.append(unit_te+[round((i+1)/n, 4)]) 166 | p_values = sorted(p_values, key=lambda x: x[0]) 167 | return p_values 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | #backward compatability 178 | 179 | def ols_synthetic_control(O, Z, X=None): 180 | solver = OLSSCPanelSolver(O, Z, X) 181 | res = solver.fit() 182 | return res.M, res.tau 183 | 184 | 185 | 186 | 187 | 188 | 189 | -------------------------------------------------------------------------------- /src/causaltensor/cauest/OLSSyntheticControl_old.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from sklearn.linear_model import LinearRegression, Lasso 4 | import statsmodels.api as sm 5 | import statsmodels.formula.api as smf 6 | from causaltensor.cauest.panel_solver import PanelSolver 7 | from causaltensor.cauest.result import Result 8 | 9 | 10 | class OLSSCResult(Result): 11 | def __init__(self, baseline = None, tau=None, beta=None, return_tau_scalar=False): 12 | super().__init__(baseline = baseline, tau = tau, return_tau_scalar = return_tau_scalar) 13 | self.beta = beta 14 | self.M = baseline # the counterfactual 15 | 16 | 17 | class OLSSCPanelSolver(PanelSolver): 18 | def __init__(self, X, Z, select_features=False): 19 | """ 20 | @param X: T x N matrix to be regressed 21 | @param Z: T x N intervention matrix of 0s and 1s 22 | 23 | @return M: the baseline matrix 24 | @return tau: the treatment effect 25 | """ 26 | self.X = X 27 | self.T0, self.Y0, self.Y1, self.control_units, self.treatment_units = self.preprocess(Z) 28 | self.select_features = select_features 29 | 30 | 31 | def preprocess(self, Z): 32 | """ 33 | Split the observation matrix into Y0, Y1 and T0 34 | 35 | @param Z: T x N intervention matrix of 0s and 1s 36 | 37 | @return T0: number of pre-intervention (baseline) time periods 38 | @return Y0: T x control_units observation matrix 39 | @return Y1: T x treatment_units observation matrix 40 | @return control_units: column indices of the control units in O 41 | """ 42 | N = Z.shape[1] 43 | control_units = np.where(np.all(Z == 0, axis=0))[0] 44 | treatment_units = np.where(np.any(Z == 1, axis=0))[0] 45 | Y0 = self.X[:, control_units] 46 | Y1 = self.X[:, ~np.isin(np.arange(N), control_units)] 47 | T0 = np.where(Z.any(axis=1))[0][0] 48 | return T0, Y0, Y1, control_units, treatment_units 49 | 50 | 51 | def feature_selection(self, Y1, alphas = np.logspace(-5,2,50, base=2.0)): 52 | """ 53 | Use Linear and Lasso Regressions to select which features (control stores) model the outcome (treatment store) 54 | 55 | @param Y1: outcome data for treated unit (T x 1 vector) 56 | @param alphas: values to try for Lasso regularization strength 57 | 58 | @return ans_select: list of indices of units to select for synthetic control 59 | @return max_score: maximum validation R^2 achieved 60 | """ 61 | 62 | num_control = self.Y0.shape[1] 63 | 64 | # Case for no control units 65 | if num_control == 0: 66 | return np.array([]), -np.inf 67 | 68 | # Create Training and Validation sets 69 | T0_train = int(0.75*self.T0) 70 | X_train, y_train = self.Y0[:T0_train, :], Y1[:T0_train] 71 | X_val, y_val = self.Y0[T0_train:, :], Y1[T0_train:] 72 | 73 | max_score = -np.inf 74 | ans_select = np.array([]) 75 | 76 | 77 | # Fit Linear Regression 78 | 79 | # Check if we have a full-row rank matrix 80 | if num_control > self.T0: # if features (num_control) are more than examples (T0_train), reduce the features to T0_train 81 | perm = np.random.permutation(num_control) 82 | select = np.array([False for i in range(num_control)]) 83 | select[perm[:self.T0]] = True 84 | else: # select everything 85 | select = np.array([True for i in range(num_control)]) 86 | 87 | OLS_estimator = LinearRegression(fit_intercept=True) 88 | OLS_estimator.fit(X_train[:, select], y_train) 89 | score = OLS_estimator.score(X_val[:, select], y_val) 90 | if score > max_score: 91 | max_score = score 92 | ans_select = select 93 | 94 | 95 | # Fit Lasso Regression 96 | for alpha in alphas: 97 | lasso_estimator = Lasso(alpha=alpha, fit_intercept=True, max_iter=5000) 98 | lasso_estimator.fit(X_train, y_train) 99 | 100 | # Get non-zero features (control units) 101 | select = (lasso_estimator.coef_ != 0) 102 | 103 | # ignore this alpha if no control units are selected by Lasso 104 | if np.sum(select) == 0: 105 | continue 106 | 107 | # Fit a Linear Model with the selected control units 108 | OLS_estimator = LinearRegression(fit_intercept=True) 109 | OLS_estimator.fit(X_train[:, select], y_train) 110 | score = OLS_estimator.score(X_val[:, select], y_val) 111 | 112 | if score > max_score: 113 | max_score = score 114 | ans_select = select 115 | 116 | 117 | # Handle Underdetermined Linear Regression 118 | num_selected = np.sum(ans_select) 119 | if num_selected > self.T0: # if regression selects too many features, unselect some of them 120 | num_extra = num_selected - self.T0 + 2 121 | true_idx = np.where(ans_select == True)[0] 122 | selected_idx = true_idx[np.random.permutation(len(true_idx))[:num_extra]] 123 | ans_select[selected_idx] = False 124 | 125 | return ans_select, max_score 126 | 127 | 128 | def ols_inference(self, Y1): 129 | """ 130 | given some treatment outcome data as well as some control outcome data, 131 | create a synthetic control and estimate the average treatment effect of the intervention 132 | 133 | @param Y1: outcome data for treated unit (T x 1 vector) 134 | 135 | @return counterfactual: counterfactual predicted by synthetic control 136 | @return tau: average treatment effect on test unit redicted by synthetic control 137 | """ 138 | if self.select_features: 139 | select, max_score = self.feature_selection(Y1) #get control units from lasso 140 | else: 141 | select, max_score = np.array([True]*self.Y0.shape[1]), np.inf #select all control units 142 | 143 | if select.size == 0: #if no controls, skip 144 | return None, None 145 | Y0_control = self.Y0[:,select] # keep only selected control units 146 | X_pre = Y0_control[:self.T0,:] 147 | y_pre = Y1[:self.T0] 148 | X_post = Y0_control[self.T0:,:] 149 | y_post = Y1[self.T0:] 150 | 151 | if max_score > 0: 152 | X_pre = sm.add_constant(X_pre) 153 | model = sm.OLS(y_pre, X_pre) 154 | else: 155 | y_pre -= np.mean(X_pre, axis = 1) 156 | y = pd.DataFrame(y_pre, columns = ['y']) 157 | model = smf.ols(formula='y ~ 1', data = y) 158 | 159 | results = model.fit() 160 | w = np.mean(X_post, axis=0) 161 | t_test_index = np.concatenate(([1], w)) 162 | b = np.mean(y_post) 163 | if max_score > 0: 164 | t_test = results.t_test((t_test_index, b)) 165 | Y0_control = sm.add_constant(Y0_control) 166 | counterfactual = results.predict(Y0_control) 167 | else: 168 | t_test = results.t_test((1, b-np.mean(X_post))) 169 | counterfactual = np.mean(Y0_control, axis=1) + results.params[0] 170 | 171 | 172 | tau = b - t_test.effect[0] 173 | return counterfactual, tau 174 | 175 | 176 | 177 | def fit(self): 178 | T, S = self.Y1.shape 179 | tau = 0 180 | M = np.copy(self.X) 181 | for s in range(S): 182 | Y1_s = self.Y1[:,s].reshape((T,)) 183 | counterfactual_s, tau_s = self.ols_inference(Y1_s) 184 | tau += tau_s 185 | M[:, self.treatment_units[s]] = counterfactual_s 186 | 187 | tau /= S 188 | res = OLSSCResult(baseline = M, tau = tau) 189 | return res 190 | 191 | #backward compatability 192 | def ols_synthetic_control(O, Z, select_features=False): 193 | solver = OLSSCPanelSolver(O, Z, select_features) 194 | res = solver.fit() 195 | return res.M, res.tau 196 | 197 | 198 | -------------------------------------------------------------------------------- /src/causaltensor/cauest/Readme.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Debias Convex Algorithm\n", 8 | "\n", 9 | "### Input:\n", 10 | "\n", 11 | "- O: the observation\n", 12 | "- Z: the treatment pattern\n", 13 | "- $\\lambda$: the parameter for nuclear norm\n", 14 | "\n", 15 | "### Output:\n", 16 | "The goal is to minimize\n", 17 | "$$\n", 18 | "\\min_{M,\\tau} \\frac{1}{2p}\\|P_{\\Omega}(O-M-\\tau Z)\\|_{F}^2 + \\lambda \\| M\\|_{*} \n", 19 | "$$\n", 20 | "\n", 21 | "We do this by implementing an iterative algorithm:\n", 22 | "\n", 23 | "- $M_0 = 0, \\tau_0 = 0, t = 0$\n", 24 | "- $t = t + 1$\n", 25 | "- $M_t = \\text{soft\\_thresholding}(O-\\tau Z, \\lambda)$ \n", 26 | "- $\\tau_{t} = /\\|Z\\|_{F}^2$\n", 27 | "- Until $|\\tau_{t} - \\tau_{t-1}| < 1e-3$" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "## MC-NNM (Bayati etc. 2016 algorithm)\n", 35 | "\n", 36 | "### View treatment as missing entries, then use convex optimization\n", 37 | "\n", 38 | "### Input:\n", 39 | " \n", 40 | "- O: the observation\n", 41 | "- Ω: the set of no-treatment (Ω = 1 - Z)\n", 42 | "- l: the parameter for nuclear norm minimization\n", 43 | "\n", 44 | "### Output:\n", 45 | "\n", 46 | "#### Without fixed effects\n", 47 | "\n", 48 | "solve the optimization:\n", 49 | "Let p be the observation probability: $ p =|\\Omega| / \\text{np.size}(O)$,\n", 50 | "\\begin{align}\n", 51 | "\\min_{M} \\frac{1}{2p} \\|P_{\\Omega}(O-M)\\|_{F}^2 + \\lambda \\|M\\|_{*}\n", 52 | "\\end{align}\n", 53 | "\n", 54 | "We do this by iteration (soft_impute)\n", 55 | "\n", 56 | "- $M_0 = 0$\n", 57 | "- $M_t = \\text{soft\\_thresholding}(Ω*O + (1-Ω)*M, \\lambda * p)$\n", 58 | "\n", 59 | "After convergence, obtaining $M$, Let\n", 60 | "$$\n", 61 | "\\tau = <1-Ω, O-M> / \\|1-Ω\\|_{F}^2\n", 62 | "$$\n", 63 | "\n", 64 | "#### With fixed effects\n", 65 | "\n", 66 | "solve the optimization\n", 67 | "\\begin{align}\n", 68 | "\\min_{M} \\frac{1}{2p} \\|P_{\\Omega}(O - a1^{T} - 1b^{T} - M)\\|_{F}^2 + \\lambda \\|M\\|_{*}\n", 69 | "\\end{align}\n", 70 | "\n", 71 | "We do this by:\n", 72 | "\n", 73 | "- $M_0 = 0, a^0 = 0, b^0 = 0$\n", 74 | "- fix $a^t, b^t$, solve $M_t = \\text{soft\\_thresholding}(Ω*(O-a^t1^{T}-1b^{tT}) + (1-Ω)*M, \\lambda * p)$\n", 75 | "- fix $M_t$, solve the following convex optimization\n", 76 | "$$\n", 77 | "\\min_{a \\in R^{n}, b \\in R^{n}} \\sum_{(i,j)\\in \\Omega} (O_{ij}-M_{t,ij} - a_{i} - b_{j})^2\n", 78 | "$$\n", 79 | "\n", 80 | "we solve the above by iteration again (one may change to solve a linear system by considering the first-order condition, not sure which one is quicker)\n", 81 | "- fix $b$, \n", 82 | "$$ \n", 83 | "a_i = \\frac{\\sum_{j, (i,j) \\in \\Omega} O_{ij} - M_{t,ij} - b_{j}}{\\sum_{j, (i, j) \\in \\Omega} 1}\n", 84 | "$$\n", 85 | "- fix $a$\n", 86 | "$$\n", 87 | "b_j = \\frac{\\sum_{i, (i,j) \\in \\Omega} O_{ij} - M_{t,ij} - a_{i}}{\\sum_{i, (i, j) \\in \\Omega} 1}\n", 88 | "$$\n" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "## Covariance PCA, Xiong and Pelger, 2019\n", 96 | "\n", 97 | "Input the matrix $O \\in R^{n\\times T}$ and Ω\n", 98 | "\n", 99 | "### Step 1: estimate loadings from covariance matrix\n", 100 | "\n", 101 | "\\begin{align}\n", 102 | "A_{ij} = \\frac{1}{\\sum_{k, Ω_{i,k} = Ω_{j,k}=1} 1}\\sum_{k, Ω_{i,k} = Ω_{j,k}=1} O_{i,k} \\cdot O_{j,k}\n", 103 | "\\end{align}\n", 104 | "\n", 105 | "U = $\\sqrt{n}$ * First r eigenvectors of A, therefore $B \\in R^{n\\times r}$\n", 106 | "\n", 107 | "### Step 2: regress the factors\n", 108 | "Let $Y \\in R^{T\\times r}$.\n", 109 | "\\begin{align}\n", 110 | "Y_{i,\\cdot} = \\frac{1}{\\sum_{k, Ω_{k,i}=1} 1}\\sum_{k, Ω_{k,i}=1} O_{k,i} U_{k,\\cdot} \n", 111 | "\\end{align}\n", 112 | "\n", 113 | "$M = U \\cdot Y^{T}$ is the estimator for $M^{*}$\n", 114 | "\n", 115 | "### Step 3: estimate $\\tau$\n", 116 | "\\begin{align}\n", 117 | "\\tau = \\frac{1}{|Z|_0}.\n", 118 | "\\end{align}" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "## Robust Synthetic Control Shah etc 2017" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "## Treatment Pattern Generation\n", 133 | "\n", 134 | "### Block Pattern\n", 135 | "\n", 136 | "\n", 137 | "### i.i.d Pattern\n", 138 | "\n", 139 | "\n", 140 | "### Two Segment Pattern\n", 141 | "\n", 142 | "### Adaptive Treatment Pattern\n", 143 | "\n", 144 | "#### Input: lowest_T, lasting_T, M\n", 145 | "\n", 146 | "For each row i, if M(i,j) is the smallest among M(i, j-lowest_T:j) and no treatments on (i, j-lowest_T:j), then start the treatment on M(i,j+1) to M(i,j+lasting_T+1)" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 40, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "def adpative_treatment_pattern(lowest_T, lasting_T, M):\n", 156 | " Z = np.zeros_like(M)\n", 157 | " for i in range(Z.shape[0]):\n", 158 | " j = 0\n", 159 | " #print(i)\n", 160 | " while j < Z.shape[1]:\n", 161 | " flag = 0\n", 162 | " for k in range(1, lowest_T+1):\n", 163 | " if (j-k < 0 or Z[i, j-k]==1 or M[i,j] > M[i,j-k]):\n", 164 | " flag = 1\n", 165 | " break\n", 166 | "\n", 167 | " if (flag == 0):\n", 168 | " for k in range(1, lasting_T+1):\n", 169 | " if (j+k < Z.shape[1]):\n", 170 | " Z[i, j+k] = 1\n", 171 | " j += lasting_T + lowest_T\n", 172 | " else:\n", 173 | " j = j + 1\n", 174 | " return Z" 175 | ] 176 | } 177 | ], 178 | "metadata": { 179 | "interpreter": { 180 | "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49" 181 | }, 182 | "kernelspec": { 183 | "display_name": "Python 3.9.5 64-bit", 184 | "name": "python3" 185 | }, 186 | "language_info": { 187 | "name": "python", 188 | "version": "" 189 | }, 190 | "orig_nbformat": 2 191 | }, 192 | "nbformat": 4, 193 | "nbformat_minor": 2 194 | } 195 | -------------------------------------------------------------------------------- /src/causaltensor/cauest/RobustSyntheticControl.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | #from sklearn.linear_model import LinearRegression 3 | 4 | 5 | def stagger_pattern_RSC(O, Z, suggest_r = 1): 6 | starting_time = O.shape[1] - np.sum(Z, axis=1).astype(int) 7 | donor_units = np.arange(O.shape[0])[starting_time == O.shape[1]] 8 | #print(donor_units) 9 | 10 | M = O[donor_units, :] 11 | 12 | u, s, vh = np.linalg.svd(M, full_matrices = False) 13 | r = suggest_r 14 | Mnew = (u[:,:r]*s[:r]).dot(vh[:r, :]) 15 | Mhat = np.zeros_like(O) 16 | Mhat[donor_units, :] = Mnew 17 | 18 | for i in range(O.shape[0]): 19 | start = starting_time[i] 20 | if (start == O.shape[1]): 21 | continue 22 | coef = np.linalg.pinv(Mnew[:,:start].T).dot(O[i, :start].T) 23 | Mhat[i, :] = Mnew.T.dot(coef) 24 | 25 | tau = np.sum(Z*(O-Mhat)) / np.sum(Z) 26 | return Mhat, tau 27 | 28 | def robust_synthetic_control(O, suggest_r=-1, treat_units = [0], starting_time = 100): 29 | ##Step 1, denoise 30 | 31 | if (starting_time == 0): 32 | raise Exception('Error: treatment starting at t=0 in synthetic control!') 33 | 34 | 35 | donor_units = [] 36 | for i in range(O.shape[0]): 37 | if (i not in treat_units): 38 | donor_units.append(i) 39 | 40 | M = O[donor_units, :] 41 | 42 | u, s, vh = np.linalg.svd(M, full_matrices = False) 43 | 44 | def recover(r, start, end): 45 | Mnew = (u[:,:r]*s[:r]).dot(vh[:r, :]) 46 | Mhat = np.zeros_like(O) 47 | Mhat[donor_units, :] = Mnew 48 | 49 | ##Step 2, linear regression 50 | Mminus = Mnew[:, :start] 51 | for i in treat_units: 52 | coef = np.linalg.pinv(Mminus.T).dot(O[i, :start].T) 53 | Mhat[i, :] = Mnew.T.dot(coef) 54 | 55 | MSE = np.sum((Mhat - O)[treat_units, start:end]**2) 56 | return MSE, Mhat 57 | 58 | if (suggest_r == -1): 59 | 60 | energy = np.sum(s) 61 | valid_start = int(starting_time/2+0.5) 62 | 63 | opt_MSE = 1e9 64 | opt_r = 1e9 65 | 66 | #Cross Validation to choose the optimal r 67 | 68 | for r in range(1,len(s)): 69 | if (np.sum(s[r-1:]) / energy <= 0.03): 70 | break 71 | MSE, Mhat = recover(r, valid_start, starting_time) 72 | if (MSE < opt_MSE): 73 | opt_MSE = MSE 74 | opt_r = r 75 | #print(MSE, r, np.sum(s[r-1:]) / energy) 76 | 77 | else: 78 | opt_r = suggest_r 79 | 80 | #print(opt_r) 81 | MSE, Mhat = recover(opt_r, starting_time, O.shape[1]) 82 | 83 | Z = np.zeros_like(O) 84 | Z[treat_units, starting_time:] = 1 85 | tau = np.sum(Z*(O-Mhat)) / np.sum(Z) 86 | return Mhat, tau -------------------------------------------------------------------------------- /src/causaltensor/cauest/Row_Specific_Treatments.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import copy 3 | 4 | def convex_algorithm_row_specific_treatments(O, Omega, Z, l, suggest = [], eps = 1e-3, debug = False): 5 | if (len(suggest) == 0): 6 | M = np.zeros_like(O) 7 | tau = np.zeros((O.shape[0] ,1)) 8 | else: 9 | M = suggest[0] 10 | tau = suggest[1] 11 | 12 | for T in range(2000): 13 | ## update M 14 | u,s,vh = np.linalg.svd(Omega*(O - tau*Z) + (1-Omega)*M, full_matrices=False) 15 | #print(s) 16 | #print('before thresholding', np.sum(s), tau) 17 | s = np.maximum(s-l, 0) 18 | M = (u*s).dot(vh) 19 | 20 | #print(np.sum(s)) 21 | #print(s) 22 | 23 | 24 | tau_new = np.sum(Omega * Z * (O - M), axis=1) / (np.sum(Omega * Z, axis = 1) + 1e-10) # update tau 25 | tau_new = tau_new.reshape((O.shape[0], 1)) 26 | if (np.linalg.norm(tau_new - tau) < eps): 27 | #print('iterations', T) 28 | return M, tau, 'successful' 29 | tau = tau_new 30 | 31 | if (debug): 32 | print(tau) 33 | return M, tau, 'fail' 34 | 35 | def debias_row_specific(M, tau, Z, l): 36 | u, s, vh = np.linalg.svd(M, full_matrices = False) 37 | r = np.sum(s >= 1e-5) 38 | u = u[:, :r] 39 | vh = vh[:r, :] 40 | PTperpZ = (np.eye(u.shape[0]) - u.dot(u.T)).dot(Z).dot(np.eye(vh.shape[1]) - vh.T.dot(vh)) 41 | 42 | D = np.zeros((M.shape[0], M.shape[0])) 43 | for i in range(M.shape[0]): 44 | if (np.sum(Z[i, :]) == 0): 45 | continue 46 | Z_i = np.zeros_like(M) 47 | Z_i[i, :] = Z[i, :] 48 | PTperpZ_i = (np.eye(u.shape[0]) - u.dot(u.T)).dot(Z_i).dot(np.eye(vh.shape[1]) - vh.T.dot(vh)) 49 | 50 | #print(D.shape, np.sum(PTperpZ_i * Z, axis = 1).shape) 51 | D[i, :] = np.sum(PTperpZ_i * Z, axis = 1) 52 | 53 | delta = np.sum(l * Z*(u.dot(vh)), axis = 1).reshape((M.shape[0], 1)) 54 | tau_d = tau - np.linalg.pinv(D) @ delta 55 | return tau_d -------------------------------------------------------------------------------- /src/causaltensor/cauest/SDID.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cvxpy as cp 3 | from causaltensor.cauest.panel_solver import PanelSolver 4 | from causaltensor.cauest.result import Result 5 | 6 | ''' 7 | An implementation of "Synthetic Difference-in-Differences" from [1] 8 | 9 | Created by Tianyi Peng, 2021/03/01 10 | Credit to Andy Zheng for the revised version, 2022/01/15 11 | 12 | [1] Arkhangelsky, Dmitry, Susan Athey, David A. Hirshberg, Guido W. Imbens, and Stefan Wager. 2021. 13 | "Synthetic Difference-in-Differences." American Economic Review, 111 (12): 4088–4118 14 | ''' 15 | class SDIDResult(Result): 16 | def __init__(self, baseline = None, tau=None, beta=None, row_fixed_effects=None, column_fixed_effects=None, return_tau_scalar=False): 17 | super().__init__(baseline = baseline, tau = tau, return_tau_scalar = return_tau_scalar) 18 | self.beta = beta 19 | self.row_fixed_effects = row_fixed_effects 20 | self.column_fixed_effects = column_fixed_effects 21 | self.M = baseline 22 | 23 | 24 | 25 | class SDIDPanelSolver(PanelSolver): 26 | def __init__(self, Z=None, O=None, X_cov=None, treat_units = [-1], starting_time = -1): 27 | ''' 28 | Input: 29 | O: nxT observation matrix 30 | Z: nxT binary treatment matrix 31 | X_cov: n x T x p array of exogenous covariates (optional). If provided, 32 | the algorithm will first compute residuals: 33 | Y_res = Y - X_cov * beta_t (for each time t) 34 | where beta_t is obtained by regressing Y[:,t] on X_cov[:,t] (with an intercept). 35 | This is based on footnote number 4 from [1] 36 | treat_units: a list containing elements in [0, 1, 2, ..., n-1] 37 | starting_time: for treat_units, pre-treatment time is 0, 1, .., starting_time-1 38 | Output: 39 | the average treatment effect estimated by [1] 40 | ''' 41 | super().__init__(Z) 42 | if self.Z.shape[2] == 1: 43 | self.Z = self.Z.reshape(self.Z.shape[0], self.Z.shape[1]) 44 | self.X = O 45 | self.treat_units = treat_units 46 | self.starting_time = starting_time 47 | self.X_cov = X_cov 48 | if (starting_time == -1): 49 | self.SDID_preprocess() 50 | 51 | def SDID_preprocess(self): 52 | n1, n2 = self.X.shape 53 | self.treat_units = [] 54 | for i in range(n1): 55 | if self.Z[i, -1] != 0: 56 | self.treat_units.append(i) 57 | if len(self.treat_units) == 0: 58 | print('no treated unit, or the treatment is not a block!!') 59 | return 60 | i = self.treat_units[0] 61 | for j in range(n2-1, -1, -1): 62 | if self.Z[i, j] == 0: 63 | break 64 | self.starting_time = j + 1 65 | 66 | def adjust_for_covariates(self): 67 | """ 68 | For each time period t, regress the outcome Y[:, t] on the covariates X_cov[:, t] 69 | (with intercept) and replace Y[:, t] by the residuals. 70 | 71 | Input: 72 | - self.X is an (n x T) outcome matrix. 73 | - self.X_cov is an (n x T x p) array of covariates. 74 | 75 | Output: 76 | self.X will contain the residuals computed as: 77 | Y_res = Y - X_cov * beta_t, for each time period t. 78 | """ 79 | n, T = self.X.shape 80 | X_resid = np.zeros_like(self.X) 81 | 82 | for t in range(T): 83 | X_t = self.X_cov[:, t, :] 84 | X_t_aug = np.concatenate([np.ones((n, 1)), X_t], axis=1) 85 | y_t = self.X[:, t] 86 | beta_t, _, _, _ = np.linalg.lstsq(X_t_aug, y_t, rcond=None) 87 | y_pred = X_t_aug @ beta_t 88 | X_resid[:, t] = y_t - y_pred 89 | 90 | self.X = X_resid 91 | 92 | 93 | def fit(self): 94 | 95 | if self.X_cov is not None: 96 | self.adjust_for_covariates() 97 | 98 | self.donor_units = [] 99 | for i in range(self.X.shape[0]): 100 | if (i not in self.treat_units): 101 | self.donor_units.append(i) 102 | 103 | Nco = len(self.donor_units) 104 | Ntr = len(self.treat_units) 105 | Tpre = self.starting_time 106 | Tpost = self.X.shape[1] - self.starting_time 107 | 108 | ##Step 1, Compute regularization parameter 109 | 110 | D = self.X[self.donor_units, 1:self.starting_time] - self.X[self.donor_units, :self.starting_time-1] 111 | D_bar = np.mean(D) 112 | z_square = np.mean((D - D_bar)**2) * (np.sqrt(Ntr * Tpost)) 113 | 114 | ##Step 2, Compute w^{sdid} 115 | 116 | w = cp.Variable(Nco) 117 | w0 = cp.Variable(1) 118 | G = np.eye(Nco) 119 | A = np.ones(Nco) 120 | #G @ w >= 0 121 | #A.T @ w == 1 122 | 123 | mean_treat = np.mean(self.X[self.treat_units, :Tpre], axis = 0) 124 | 125 | ## solving linear regression with constraints 126 | prob = cp.Problem( 127 | cp.Minimize( 128 | cp.sum_squares( 129 | w0+self.X[self.donor_units, :Tpre].T @ w - mean_treat) 130 | + z_square * Tpre * cp.sum_squares(w)), 131 | [G @ w >= 0, A.T @ w == 1]) 132 | prob.solve() 133 | #print("\nThe optimal value is", prob.value) 134 | #print("A solution w is") 135 | #print(w.value) 136 | 137 | w_sdid = np.zeros(self.X.shape[0]) 138 | w_sdid[self.donor_units] = w.value 139 | w_sdid[self.treat_units] = 1.0 / Ntr 140 | 141 | ##Step 3, Compute l^{sdid} 142 | l = cp.Variable(Tpre) 143 | l0 = cp.Variable(1) 144 | G = np.eye(Tpre) 145 | A = np.ones(Tpre) 146 | #G @ w >= 0 147 | #A.T @ w == 1 148 | 149 | mean_treat = np.mean(self.X[self.donor_units, Tpre:], axis = 1) 150 | #print(mean_treat) 151 | #print(mean_treat.shape) 152 | 153 | prob = cp.Problem( 154 | cp.Minimize( 155 | cp.sum_squares( 156 | l0+self.X[self.donor_units, :Tpre] @ l - mean_treat)), 157 | [G @ l >= 0, A.T @ l == 1]) 158 | prob.solve() 159 | #breakpoint() 160 | #print("\nThe optimal value is", prob.value) 161 | #print("A solution w is") 162 | #print(l.value) 163 | 164 | l_sdid = np.zeros(self.X.shape[1]) 165 | l_sdid[:Tpre] = l.value 166 | l_sdid[Tpre:] = 1.0 / Tpost 167 | 168 | ##Step 4, Compute SDID estimator 169 | #tau = w_sdid.T @ O @ l_sdid 170 | 171 | 172 | n1 = self.X.shape[0] 173 | n2 = self.X.shape[1] 174 | 175 | weights = w_sdid.reshape((self.X.shape[0], 1)) @ l_sdid.reshape((1, self.X.shape[1])) 176 | 177 | a = np.zeros((n1, 1)) 178 | b = np.zeros((n2, 1)) 179 | tau = 0 180 | 181 | one_row = np.ones((1, n2)) 182 | one_col = np.ones((n1, 1)) 183 | converged = False 184 | for T1 in range(1000): 185 | a_new = np.sum((self.X-tau*self.Z-one_col.dot(b.T))*weights, axis=1).reshape((n1, 1)) / np.sum(weights, axis=1).reshape((n1, 1)) 186 | b_new = np.sum((self.X-tau*self.Z-a.dot(one_row))*weights, axis=0).reshape((n2, 1)) / np.sum(weights, axis=0).reshape((n2, 1)) 187 | if (np.sum((b_new - b)**2) < 1e-7 * np.sum(b**2) and 188 | np.sum((a_new - a)**2) < 1e-7 * np.sum(a**2)): 189 | converged = True 190 | break 191 | a = a_new 192 | b = b_new 193 | M = a.dot(one_row)+one_col.dot(b.T) 194 | tau = np.sum(self.Z*(self.X-M)*weights)/np.sum(self.Z*weights) 195 | 196 | res = SDIDResult(baseline = M, tau = tau) 197 | return res 198 | 199 | # backward compatibility 200 | def SDID(O, Z, X_cov=None, treat_units = [-1], starting_time = -1): 201 | solver = SDIDPanelSolver(Z, O, X_cov, treat_units, starting_time) 202 | res = solver.fit() 203 | return res.tau 204 | -------------------------------------------------------------------------------- /src/causaltensor/cauest/__init__.py: -------------------------------------------------------------------------------- 1 | from .DebiasConvex import * 2 | from .DID import * 3 | from .SDID import SDID 4 | from .MCNNM import * 5 | from .panel_solver import * 6 | from .result import * -------------------------------------------------------------------------------- /src/causaltensor/cauest/panel_solver.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from causaltensor.cauest.result import Result 3 | from causaltensor.matlib.util import transform_to_3D 4 | 5 | class PanelSolver(): 6 | def __init__(self, Z): 7 | if isinstance(Z, np.ndarray): 8 | if len(Z.shape) == 2: 9 | self.return_tau_scalar = True 10 | self.Z = transform_to_3D(Z) 11 | 12 | class OLSResult(): 13 | def __init__(self, beta=None): 14 | self.beta = beta 15 | 16 | class OLSPanelSolver(): 17 | """Solve the OLS regression for panel data with covariates and missing data 18 | 19 | Y ~ X * beta 20 | """ 21 | def __init__(self, X, Omega=None, is_sparse_X=True): 22 | """ 23 | X: 3D float numpy array (n,m,p) 24 | The covariates matrix. The last dimension is the index of covariates. 25 | Omega: 2D bool numpy array (n,m) 26 | Indicator matrix (1: observed, 0: missing). 27 | is_sparse_X: bool 28 | if True, then remove the elements if all zero for covariates X 29 | """ 30 | if is_sparse_X: 31 | relevant_index = (np.sum(np.abs(X) > 1e-9, axis=2) > 0) #if all zero for X, then not included 32 | else: 33 | relevant_index = np.ones_like(X[:, :, 0], dtype=bool) 34 | if Omega is not None: #if there are missing entries 35 | relevant_index = (relevant_index & Omega.astype(bool)) #if missing, then not included 36 | X = X[relevant_index, :].astype(float) # compress X to the shape of (l, p) where l is the number of relevant observations and p is the number of covariates 37 | Xinv = np.linalg.inv(X.T @ X) #compute the inverse of the covariance matrix 38 | self.X = X 39 | self.relevant_index = relevant_index 40 | self.Xinv = Xinv 41 | 42 | def fit(self, O): 43 | """Solve the OLS regression for panel data with covariates and missing data 44 | 45 | Parameters 46 | ---------- 47 | O: 2D float numpy array 48 | The observation matrix. 49 | Returns 50 | ------- 51 | res: Result 52 | The result of OLS regression. 53 | res.beta: 1D numpy array (p, ) 54 | The estimated coefficients for Y~X*beta. 55 | """ 56 | O = O[self.relevant_index] #select non-zero entries, the resulting shape of O is (l, ) 57 | beta = self.Xinv @ (self.X.T @ O) 58 | res = Result() 59 | res.beta = beta 60 | return res 61 | 62 | class FixedEffectResult(): 63 | def __init__(self, beta=None, row_fixed_effects=None, column_fixed_effects=None, fitted_value=None): 64 | self.beta = beta 65 | self.row_fixed_effects = row_fixed_effects 66 | self.column_fixed_effects = column_fixed_effects 67 | self.fitted_value = fitted_value 68 | 69 | 70 | class FixedEffectPanelSolver(PanelSolver): 71 | """ Solve the OLS regression for panel data with covariates, missing data, and fixed effects 72 | 73 | Y ~ X * beta + ai + bj 74 | 75 | The implementation is based on the partial regrerssion method (which speeds up the computation significantly comparing to naive OLS): 76 | Let demean_Y be the residule of Y ~ ai + bj 77 | Let demean_X be the residule of X ~ ai + bj 78 | Solve beta by demean_Y ~ demean_X * beta 79 | """ 80 | 81 | def __init__(self, fixed_effects='two-way', X=None, Omega=None, 82 | demean_eps=1e-7, demean_max_iter=2000): 83 | """ 84 | fixed_effects: ['two-way'] 85 | two-way fixed effects 86 | TODO: implement one-way fixed effects 87 | X: 3D float numpy array (n,m,p) 88 | The covariates matrix. The last dimension is the index of covariates. 89 | Omega: 2D bool numpy array (n,m) 90 | Indicator matrix (1: observed, 0: missing). 91 | """ 92 | self.fixed_effects = fixed_effects 93 | if fixed_effects != 'two-way': 94 | raise NotImplementedError('Only two-way fixed effects are implemented.') 95 | self.X = X 96 | self.Omega = Omega 97 | 98 | if (X is not None): 99 | demean_X = np.zeros_like(X) 100 | for i in range(X.shape[2]): 101 | demean_X[:, :, i], _, _ = self.demean(X[:, :, i], eps=demean_eps, max_iter=demean_max_iter) 102 | self.demean_X = demean_X 103 | self.OLS_solver = OLSPanelSolver(demean_X, Omega) 104 | 105 | def demean(self, O, eps=1e-7, max_iter=2000): 106 | """ demean O by row and column (regress O by ai + bj on self.Omega) 107 | """ 108 | if self.Omega is None: 109 | self.Omega = np.ones_like(O) 110 | n1 = O.shape[0] 111 | n2 = O.shape[1] 112 | one_row = np.ones((1, n2)) 113 | one_col = np.ones((n1, 1)) 114 | Ω_row_sum = np.sum(self.Omega, axis = 1).reshape((n1, 1)) 115 | Ω_column_sum = np.sum(self.Omega, axis = 0).reshape((n2, 1)) 116 | Ω_row_sum[Ω_row_sum==0] = 1 117 | Ω_column_sum[Ω_column_sum==0] = 1 118 | b = np.zeros((n2, 1)) 119 | for T in range(max_iter): 120 | a = np.sum(self.Omega*(O-one_col.dot(b.T)), axis=1).reshape((n1, 1)) / Ω_row_sum 121 | 122 | b_new = np.sum(self.Omega*(O-a.dot(one_row)), axis=0).reshape((n2, 1)) / Ω_column_sum 123 | 124 | if (np.sum((b_new - b)**2) < eps * np.sum(b**2)): 125 | break 126 | b = b_new 127 | return O - a.dot(one_row) - one_col.dot(b.T), a, b 128 | 129 | def fit(self, O): 130 | """ Solve the OLS regression for panel data with covariates, missing data, and fixed effects 131 | Parameters 132 | ---------- 133 | O: 2D numpy array 134 | The observation matrix. 135 | Returns 136 | ------- 137 | res: Result 138 | The result of OLS regression. 139 | res.beta: 1D numpy array (p, ) if X is not None 140 | res.row_fixed_effects: 2D numpy array (n, 1) 141 | res.column_fixed_effects: 2D numpy array (m, 1) 142 | res.fitted_value: 2D numpy array (n, m) 143 | fitted_value of O ~ X * beta + ai + bj 144 | """ 145 | res = FixedEffectResult() 146 | demean_O, a, b = self.demean(O) 147 | if (self.X is not None): 148 | res_OLS = self.OLS_solver.fit(demean_O) 149 | res.beta = res_OLS.beta 150 | 151 | residual, a, b = self.demean(O - np.sum(res.beta * self.X, axis=2)) 152 | res.row_fixed_effects = a 153 | res.column_fixed_effects = b 154 | res.fitted_value = a + b.T + np.sum(res.beta * self.X, axis=2) 155 | else: 156 | res.row_fixed_effects = a 157 | res.column_fixed_effects = b 158 | res.fitted_value = a + b.T 159 | return res 160 | -------------------------------------------------------------------------------- /src/causaltensor/cauest/result.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class Result(): 4 | def __init__(self, baseline = None, tau = None, covariance_tau = None, std_tau = None, return_tau_scalar=False): 5 | self.baseline = baseline # the baseline outcome (i.e, the outcome without treatment and noise) 6 | self.tau = tau #treatment effect estimator 7 | self.covariance_tau = covariance_tau #covariance matrix of tau 8 | self.std_tau = std_tau #standard deviation of tau 9 | if return_tau_scalar: 10 | ## if tau is a scalar, then return tau instead of [tau] 11 | self.tau = tau[0] 12 | if self.covariance_tau is not None: 13 | self.covariance_tau = covariance_tau[0, 0] 14 | if self.std_tau is not None: 15 | self.std_tau = std_tau[0] -------------------------------------------------------------------------------- /src/causaltensor/matcomple/ALS_solver.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import time 3 | 4 | 5 | def ALS_solve(M, Ω, r, mu, epsilon=1e-3, max_iterations=100, debug = False): 6 | """ 7 | Solve probabilistic matrix factorization using alternating least squares. 8 | 9 | Since loss function is non-convex, each attempt at ALS starts from a 10 | random initialization and returns a local optimum. 11 | 12 | [ Salakhutdinov and Mnih 2008 ] 13 | [ Hu, Koren, and Volinksy 2009 ] 14 | 15 | Parameters: 16 | ----------- 17 | M : m x n array 18 | matrix to complete 19 | 20 | Ω : m x n array 21 | matrix with entries zero (if missing) or one (if present) 22 | 23 | r : integer 24 | how many factors to use 25 | 26 | mu : float 27 | hyper-parameter penalizing norm of factored U, V 28 | 29 | epsilon : float 30 | convergence condition on the difference between iterative results 31 | 32 | max_iterations: int 33 | hard limit on maximum number of iterations 34 | 35 | Returns: 36 | -------- 37 | X: m x n array 38 | completed matrix 39 | """ 40 | n1, n2 = M.shape 41 | 42 | U = np.random.randn(n1, r) 43 | V = np.random.randn(n2, r) 44 | 45 | prev_X = np.dot(U, V.T) 46 | 47 | def solve(M, U, Ω): 48 | V = np.zeros((M.shape[1], r)) 49 | mu_I = mu * np.eye(U.shape[1]) 50 | for j in range(M.shape[1]): 51 | X1 = Ω[:, j:j+1].copy() * U 52 | X2 = X1.T @ X1 + mu_I 53 | #V[j] = (np.linalg.pinv(X2) @ X1.T @ (M[:, j:j+1].copy())).T 54 | #print(M[:, j:j+1].shape) 55 | V[j] = np.linalg.solve(X2, X1.T @ (M[:, j:j+1].copy())).reshape(-1) 56 | return V 57 | 58 | for _ in range(max_iterations): 59 | 60 | U = solve(M.T, V, Ω.T) 61 | 62 | V = solve(M, U, Ω) 63 | 64 | 65 | X = np.dot(U, V.T) 66 | 67 | mean_diff = np.linalg.norm(X - prev_X) / np.linalg.norm(X) 68 | #if _ % 1 == 0: 69 | # logger.info("Iteration: %i; Mean diff: %.4f" % (_ + 1, mean_diff)) 70 | if (debug): 71 | print("Iteration: %i; Mean diff: %.4f" % (_ + 1, mean_diff)) 72 | 73 | if mean_diff < epsilon: 74 | break 75 | prev_X = X 76 | 77 | return X 78 | 79 | def unit_test(): 80 | 81 | r = 10 82 | n = 500 83 | M = np.random.rand(n, r) - 0.5 84 | M = M @ M.T 85 | mask = np.random.rand(n, n) < 0.9 86 | t1 = time.time() 87 | Mhat = ALS_solve(M, mask, r, 1e-3) 88 | print(time.time() - t1) 89 | print('error', np.linalg.norm(Mhat-M) / np.linalg.norm(M)) -------------------------------------------------------------------------------- /src/causaltensor/matcomple/__init__.py: -------------------------------------------------------------------------------- 1 | from .hard_impute import * 2 | from .ALS_solver import * -------------------------------------------------------------------------------- /src/causaltensor/matcomple/hard_impute.py: -------------------------------------------------------------------------------- 1 | from causaltensor.matlib import SVD 2 | import numpy as np 3 | 4 | def hard_impute(O, Ω, r=1, eps=1e-4): 5 | M = np.zeros_like(O) 6 | for T in range(2000): 7 | M_new = SVD(O * Ω + (1-Ω) * M , r) 8 | if (np.linalg.norm(M-M_new) < np.linalg.norm(M)*eps): 9 | break 10 | M = M_new 11 | return M -------------------------------------------------------------------------------- /src/causaltensor/matlib/__init__.py: -------------------------------------------------------------------------------- 1 | from .generation import * 2 | from .generation_treatment_pattern import * 3 | from .util import * -------------------------------------------------------------------------------- /src/causaltensor/matlib/generation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def low_rank_M0_normal(n1=50, n2=50, r = 10, loc = 0, scale = 1): 4 | """ 5 | Generate a random rank-r matrix M = U.dot(V.T) with shape (n1xn2), where U's shape is (n1xr) and V's shape is (n2xr). 6 | Here, entries of U and V are i.i.d Gaussian R.V.s drawn from N(loc, scale) where loc is the mean and scale is standard deviation. 7 | """ 8 | 9 | U = np.random.normal(loc = loc, scale = scale, size = (n1, r)) 10 | V = np.random.normal(loc = loc, scale = scale, size = (n2, r)) 11 | M0 = U.dot(V.T) 12 | return M0 13 | 14 | def low_rank_M0_Gamma(n1=50, n2=50, r = 10, mean_M = 1, shape = 1, scale = 2): 15 | """ 16 | Generate a random rank-r non-negative (n1 x n2) matrix with mean(M) = mean_M 17 | 18 | To do so, 19 | (i) Generate U with shape (n1xr) and V with shape (n2xr). The entries of U and V are i.i.d Gamma R.V.s drawn from Gamma(shape, scale). 20 | (ii) M0 = k * U.dot(V.T), where k is the scale to control the mean value of M0 such that np.mean(M0) = mean_M 21 | """ 22 | U = np.random.gamma(shape = shape, scale = scale, size = (n1, r)) 23 | V = np.random.gamma(shape = shape, scale = scale, size = (n2, r)) 24 | M0 = U.dot(V.T) 25 | M0 = M0 / np.mean(M0) * mean_M 26 | return M0 27 | 28 | def add_noise_normal(M0, noise_std=1): 29 | E = np.random.normal(loc=0, scale=noise_std, size=M0.shape) 30 | return M0 + E 31 | 32 | def add_noise_Poisson(M0): 33 | return np.random.poisson(M0) -------------------------------------------------------------------------------- /src/causaltensor/matlib/generation_treatment_pattern.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def generate_Z(pattern_tuple = ['adaptive'], M0 = 0): 4 | ''' 5 | generate the binary matrix Z for different patterns 6 | ''' 7 | while (True): 8 | if (pattern_tuple[0] == 'adaptive'): 9 | a = pattern_tuple[1][0] 10 | b = pattern_tuple[1][1] 11 | Z = adpative_treatment_pattern(a, b, M0) 12 | 13 | if (pattern_tuple[0] == 'iid'): 14 | p_treat = np.random.rand()*0.5 15 | Z = np.random.rand(n1, n2) <= p_treat 16 | 17 | if (pattern_tuple[0] == 'block'): 18 | m2 = pattern_tuple[1][1] 19 | Z, treat_units = simultaneous_adoption(pattern_tuple[1][0], m2, M0) 20 | 21 | if (pattern_tuple[0] == 'stagger'): 22 | m2 = pattern_tuple[1][1] 23 | Z = stagger_adoption(pattern_tuple[1][0], m2, M0) 24 | 25 | ## if some row or some column is all treated; or Z=0; generate Z again 26 | if (np.sum(np.sum(1-Z, axis=0) == 0) > 0 or np.sum(np.sum(1-Z, axis=1) == 0) > 0 or np.sum(Z)==0): 27 | if (pattern_tuple[0] == 'adaptive'): 28 | return Z, 'fail' 29 | continue 30 | break 31 | if (pattern_tuple[0] == 'block'): 32 | return Z, treat_units 33 | if (pattern_tuple[0] == 'adaptive'): 34 | return Z, 'success' 35 | return Z 36 | 37 | def adpative_treatment_pattern(lowest_T, lasting_T, M): 38 | ''' 39 | 40 | Input: lowest_T, lasting_T, M 41 | 42 | For each row i, if M(i,j) is the smallest among M(i, j-lowest_T:j) and no treatments on (i, j-lowest_T:j), then start the treatment on M(i,j+1) to M(i,j+lasting_T+1) 43 | ''' 44 | 45 | Z = np.zeros_like(M) 46 | for i in range(Z.shape[0]): 47 | j = 0 48 | #print(i) 49 | while j < Z.shape[1]: 50 | flag = 0 51 | for k in range(1, lowest_T+1): 52 | if (j-k < 0 or Z[i, j-k]==1 or M[i,j] > M[i,j-k]): 53 | flag = 1 54 | break 55 | 56 | #print(i, j) 57 | if (flag == 0): 58 | for k in range(1, lasting_T+1): 59 | if (j+k < Z.shape[1]): 60 | Z[i, j+k] = 1 61 | j += lasting_T + lowest_T 62 | else: 63 | j = j + 1 64 | return Z 65 | 66 | def iid_treatment(prob=1, shape=(1,1)): 67 | """ 68 | Generate treatment pattern Z by i.i.d Bernulli random variabls Bern(prob) 69 | 70 | Parameters: 71 | 72 | prob: Bern(prob) 73 | """ 74 | return np.random.rand(shape[0], shape[1]) <= prob 75 | 76 | def block_treatment_testone(m1, m2, M): 77 | Z = np.zeros_like(M) 78 | Z[:m1, m2:] = 1 79 | return Z 80 | 81 | def block_treatment_testtwo(M): 82 | Z = np.zeros_like(M) 83 | ratio = np.random.rand()*0.8 84 | m1 = int(M.shape[0]*ratio)+1 85 | m2 = int(M.shape[1]*(1-ratio))-1 86 | Z[:m1, m2:] = 1 87 | return Z 88 | 89 | def simultaneous_adoption(m1, m2, M): 90 | ''' 91 | randomly select m1 units, adopt the treatment in [m2:] 92 | ''' 93 | Z = np.zeros_like(M) 94 | treat_units = np.random.choice(range(M.shape[0]), m1, replace=False) 95 | Z[treat_units, m2:] = 1 96 | return Z, treat_units 97 | 98 | def stagger_adoption(m1, m2, M): 99 | ''' 100 | randomly select m1 units, adopt the treatment in after m2: randomly 101 | ''' 102 | Z = np.zeros_like(M) 103 | treat_units = np.random.choice(range(M.shape[0]), m1, replace=False) 104 | for i in treat_units: 105 | j = np.random.randint(m2, high=M.shape[1]) 106 | Z[i, j:] = 1 107 | #Z[treat_units, m2:] = 1 108 | return Z 109 | 110 | if (__name__ == 'main'): 111 | M = np.zeros((5, 5)) 112 | print(simultaneous_adoption(2, 2, M)) 113 | -------------------------------------------------------------------------------- /src/causaltensor/matlib/util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def noise_to_signal(X, M, Ω): 4 | return np.sqrt(np.sum((Ω*X - Ω*M)**2) / np.sum((Ω*M)**2)) 5 | 6 | def abs_mean(X, M, Ω): 7 | return np.sum(np.abs((X-M)*Ω)) / np.sum(Ω) 8 | 9 | def svd_fast(M): 10 | is_swap = False 11 | if M.shape[0] > M.shape[1]: 12 | is_swap = True 13 | M = M.T 14 | 15 | A = M @ M.T # this will speed up the calculation when M is asymmetric 16 | u, ss, uh = np.linalg.svd(A, full_matrices=False) 17 | ss[ss < 1e-7] = 0 18 | s = np.sqrt(ss) 19 | sinv = 1.0 / (s + 1e-7*(s<1e-7)) 20 | vh = sinv.reshape(M.shape[0], 1) * (uh @ M) 21 | 22 | if is_swap: 23 | return vh.T, s, u.T 24 | else: 25 | return u, s, vh 26 | 27 | ## least-squares solved via single SVD 28 | def SVD(M, r): 29 | """ 30 | input matrix M, approximating with rank r 31 | """ 32 | u, s, vh = svd_fast(M) 33 | s[r:] = 0 34 | return (u * s).dot(vh) 35 | 36 | def SVD_soft(X, l): 37 | u, s, vh = svd_fast(X) 38 | s_threshold = np.maximum(0,s-l) 39 | return (u * s_threshold).dot(vh) 40 | 41 | def L2_error(s, r): 42 | ''' 43 | s: a vector 44 | compute the L2 norm for the vector s[r:] 45 | ''' 46 | return np.sqrt(np.mean(s[r:]**2)) 47 | 48 | def L1_error(s, r): 49 | ''' 50 | s: a vector 51 | compute the L2 norm for the vector s[r:] 52 | ''' 53 | return np.mean(np.abs(s[r:])) 54 | 55 | def error_metric(M, tau, M0, tau_star): 56 | return np.sum((M - M0)**2) / np.sum(M0**2), np.amax(np.abs(M-M0)) / np.amax(np.abs(M0)), np.abs(tau-tau_star) #/ tau_star 57 | 58 | def metric_compute(M, tau, M0, tau_star, Z, metric_name = []): 59 | error_metrics = {} 60 | for metric in metric_name: 61 | if (metric == 'tau'): 62 | error_metrics[metric] = np.abs(tau-tau_star) / np.mean(np.abs(M0)) 63 | if (metric == 'RMSE_treat_elements'): 64 | error_metrics[metric] = np.sqrt(np.sum(Z*((M-M0)**2))/np.sum(Z)) 65 | if (metric == 'tau_diff'): 66 | error_metrics[metric] = tau-tau_star 67 | return error_metrics 68 | 69 | def convex_condition_test(M, Z, r): 70 | u, s, vh = np.linalg.svd(M, full_matrices = False) 71 | u = u[:, :r] 72 | vh = vh[:r, :] 73 | 74 | t1 = np.sum(Z*(u.dot(vh))) 75 | PTperpZ = (np.eye(u.shape[0]) - u.dot(u.T)).dot(Z).dot(np.eye(vh.shape[1]) - vh.T.dot(vh)) 76 | t2 = np.sum(PTperpZ**2) 77 | t3 = np.linalg.norm(PTperpZ, ord=2) 78 | return (t1*t3, t2) 79 | 80 | 81 | def transform_to_3D(Z): 82 | """ 83 | Z is a list of 2D numpy arrays or a single 2D/3D numpy array 84 | convert Z to a 3D numpy array with the last dimension being the index of interventions 85 | """ 86 | if isinstance(Z, list): #if Z is a list of numpy arrays 87 | Z = np.stack(Z, axis = 2) 88 | elif Z.ndim == 2: #if a single Z 89 | Z = Z.reshape(Z.shape[0], Z.shape[1], 1) 90 | return Z.astype(float) 91 | 92 | 93 | def remove_tangent_space_component(u, vh, Z): 94 | """ 95 | Remove the projection of Z (a single treatment) onto the tangent space of M in memory-aware manner 96 | """ 97 | 98 | # We conduct some checks for extremely wide or extremely long matrices, which may result in OOM errors with 99 | # naïve operation sequencing. If BOTH dimensions are extremely large, there may still be an OOM error, but this 100 | # case is quite rare. 101 | treatment_matrix_shape = Z.shape 102 | if max(treatment_matrix_shape) > 1e4: 103 | 104 | if treatment_matrix_shape[0] > treatment_matrix_shape[1]: 105 | first_factor = (Z - u.dot(u.T.dot(Z))) 106 | second_factor = np.eye(vh.shape[1]) - vh.T.dot(vh) 107 | else: 108 | first_factor = (np.eye(u.shape[0]) - u.dot(u.T)) 109 | second_factor = Z - (Z.dot(vh.T)).dot(vh) 110 | 111 | PTperpZ = first_factor.dot(second_factor) 112 | 113 | else: 114 | PTperpZ = (np.eye(u.shape[0]) - u.dot(u.T)).dot(Z).dot(np.eye(vh.shape[1]) - vh.T.dot(vh)) 115 | 116 | return PTperpZ 117 | -------------------------------------------------------------------------------- /src/causaltensor/sample_data/__init__.py: -------------------------------------------------------------------------------- 1 | from .fetch import * -------------------------------------------------------------------------------- /src/causaltensor/sample_data/fetch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "\n", 11 | "def fetch():\n", 12 | " O = np.loadtxt('MLAB_data.txt')\n", 13 | " return O" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 10, 19 | "metadata": {}, 20 | "outputs": [ 21 | { 22 | "data": { 23 | "text/plain": [ 24 | "array([ 96.19999695, 99.40000153, 73. , 71.40000153,\n", 25 | " 140.6999969 , 88.40000153, 66.90000153, 70. ,\n", 26 | " 125.5 , 88.90000153, 79.80000305, 156.1999969 ,\n", 27 | " 104.3000031 , 82.90000153, 76. , 97.19999695,\n", 28 | " 113.8000031 , 75.5 , 77.59999847, 93.19999695,\n", 29 | " 147.3000031 , 53.79999924, 109. , 72.5 ,\n", 30 | " 99.90000153, 108.9000015 , 87.90000153, 83.09999847,\n", 31 | " 103.9000015 , 75.09999847, 108.6999969 , 69.30000305,\n", 32 | " 40.70000076, 88.90000153, 96.69999695, 107.9000015 ,\n", 33 | " 80.09999847, 90.5 , 41.59999847])" 34 | ] 35 | }, 36 | "execution_count": 10, 37 | "metadata": {}, 38 | "output_type": "execute_result" 39 | } 40 | ], 41 | "source": [] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [] 49 | } 50 | ], 51 | "metadata": { 52 | "kernelspec": { 53 | "display_name": "base", 54 | "language": "python", 55 | "name": "python3" 56 | }, 57 | "language_info": { 58 | "codemirror_mode": { 59 | "name": "ipython", 60 | "version": 3 61 | }, 62 | "file_extension": ".py", 63 | "mimetype": "text/x-python", 64 | "name": "python", 65 | "nbconvert_exporter": "python", 66 | "pygments_lexer": "ipython3", 67 | "version": "3.7.4" 68 | }, 69 | "orig_nbformat": 4, 70 | "vscode": { 71 | "interpreter": { 72 | "hash": "40d3a090f54c6569ab1632332b64b2c03c39dcf918b08424e98f38b5ae0af88f" 73 | } 74 | } 75 | }, 76 | "nbformat": 4, 77 | "nbformat_minor": 2 78 | } 79 | -------------------------------------------------------------------------------- /src/causaltensor/sample_data/fetch.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def fetch(): 4 | O = np.loadtxt('MLAB_data.txt') 5 | return O -------------------------------------------------------------------------------- /src/causaltensor/tests/test_real_class.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from causaltensor.cauest.OLSSyntheticControl import ols_synthetic_control 4 | from causaltensor.cauest.DID import DID 5 | from causaltensor.cauest.SDID import SDID 6 | from causaltensor.cauest.DebiasConvex import DC_PR_auto_rank, DC_PR_with_suggested_rank 7 | from causaltensor.cauest.MCNNM import MC_NNM_with_cross_validation, MC_NNM_with_suggested_rank 8 | from causaltensor.matlib import low_rank_M0_normal 9 | from causaltensor.matlib import iid_treatment 10 | import os 11 | 12 | np.random.seed(0) 13 | 14 | 15 | class TestRealClass: 16 | @pytest.fixture 17 | def create_dataset(self): 18 | file_path = os.path.join('tests', 'MLAB_data.txt') 19 | O_raw = np.loadtxt(file_path) # California Smoke Dataset 20 | X = O_raw[1:8, :] ## predictors 21 | O = O_raw[8:, :] ## remove features that are not relevant in this demo 22 | O = O.T 23 | X = X.T 24 | Z = np.zeros_like(O) # Z has the same shape as O 25 | Z[-1, 19:] = 1 26 | return O, Z, X 27 | 28 | 29 | def test_did(self, create_dataset): 30 | O, Z, _ = create_dataset 31 | M, tau = DID(O, Z) 32 | # TODO: Check for better assertions 33 | assert M.shape == O.shape 34 | assert tau <= -20 and tau >= -30 35 | 36 | 37 | def test_sdid(self, create_dataset): 38 | O, Z, _ = create_dataset 39 | tau = SDID(O, Z) 40 | # TODO: Check for better assertions 41 | assert tau <= -10 and tau >= -20 42 | 43 | 44 | def test_synthetic_control(self, create_dataset): 45 | O, Z, X = create_dataset 46 | # SC on only outcomes 47 | M, tau = ols_synthetic_control(O.T, Z.T) 48 | assert M.shape == O.T.shape 49 | assert tau <= -10 and tau >= -20 50 | # SC on predictors 51 | M, tau = ols_synthetic_control(O.T, Z.T, X.T) 52 | assert M.shape == O.T.shape 53 | assert tau <= -10 and tau >= -20 54 | 55 | 56 | 57 | def test_dcpr(self, create_dataset): 58 | O, Z, _ = create_dataset 59 | M, tau, std = DC_PR_auto_rank(O, Z) 60 | # TODO: Check for better assertions 61 | assert M.shape == O.shape 62 | assert tau <= -10 and tau >= -20 63 | 64 | suggest_r = 2 65 | M, tau, std = DC_PR_with_suggested_rank(O, Z, suggest_r) 66 | # TODO: Check for better assertions 67 | assert M.shape == O.shape 68 | assert np.linalg.matrix_rank(M) == suggest_r 69 | assert tau <= -10 and tau >= -20 70 | 71 | 72 | def test_mc(self, create_dataset): 73 | O, Z, _ = create_dataset 74 | M, a, b, tau = MC_NNM_with_cross_validation(O, 1-Z) 75 | # TODO: Check for better assertions 76 | assert M.shape == O.shape 77 | assert tau <= -15 and tau >= -25 78 | 79 | suggest_r = 2 80 | M, a, b, tau = MC_NNM_with_suggested_rank(O, 1-Z, suggest_r) 81 | # TODO: Check for better assertions 82 | assert M.shape == O.shape 83 | assert tau <= -20 and tau >= -30 84 | assert np.linalg.matrix_rank(M) == suggest_r 85 | 86 | 87 | 88 | """ 89 | Run the following to run all test cases: 90 | pytest 91 | Run the following in the terminal to test and get coverage report: 92 | pytest --cov=./src/causaltensor/cauest --cov-report=term-missing 93 | """ 94 | 95 | -------------------------------------------------------------------------------- /src/causaltensor/tests/test_synthetic_class.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from causaltensor.cauest.OLSSyntheticControl import ols_synthetic_control 4 | from causaltensor.cauest.DID import DID 5 | from causaltensor.cauest.SDID import SDID 6 | from causaltensor.cauest.DebiasConvex import DC_PR_with_suggested_rank 7 | from causaltensor.cauest.MCNNM import MC_NNM_with_cross_validation, MC_NNM_with_suggested_rank 8 | from causaltensor.matlib.generation_treatment_pattern import iid_treatment, block_treatment_testone 9 | from causaltensor.matlib.generation import low_rank_M0_normal 10 | 11 | np.random.seed(0) 12 | 13 | 14 | class TestSyntheticClass: 15 | @pytest.fixture 16 | def create_dataset_factory(self): 17 | num_individuals = 100 18 | num_time_periods = 50 19 | treatment_level = 0.1 20 | 21 | 22 | def create_dataset(did=False, add_fixed_effects = False, r = 3, iid=False): 23 | # Generating synthetic data 24 | if did: 25 | a = np.random.rand(num_individuals) 26 | b = np.random.rand(num_time_periods) 27 | M = a[:, None] + b 28 | else: 29 | M = low_rank_M0_normal(num_individuals, num_time_periods, r) 30 | if add_fixed_effects: 31 | a = np.random.rand(num_individuals) 32 | b = np.random.rand(num_time_periods) 33 | M += a[:, None] + b 34 | 35 | self.tau = np.mean(np.abs(M)) * treatment_level 36 | 37 | # Generating treatment pattern 38 | if not iid: 39 | Z = block_treatment_testone(num_individuals//2, num_time_periods//2, M) 40 | else: 41 | Z = iid_treatment(0.3, (num_individuals, num_time_periods)) 42 | 43 | error = np.random.normal(0, np.abs(self.tau)*0.1, (num_individuals, num_time_periods)) 44 | 45 | #TODO: Should error be added only to treatment??? 46 | O = M + self.tau * Z + error 47 | return O, Z 48 | return create_dataset 49 | 50 | 51 | 52 | def test_did(self, create_dataset_factory): 53 | # Block pattern 54 | O, Z = create_dataset_factory(did=True) 55 | M, tau = DID(O, Z) 56 | assert M.shape == O.shape 57 | error = np.abs(self.tau-tau)/self.tau 58 | assert error <= 0.01 59 | 60 | # IID Pattern 61 | O, Z = create_dataset_factory(did=True, iid=True) 62 | M, tau = DID(O, Z) 63 | assert M.shape == O.shape 64 | error = np.abs(self.tau-tau)/self.tau 65 | assert error <= 0.01 66 | 67 | 68 | def test_sdid(self, create_dataset_factory): 69 | # Only Block pattern 70 | O, Z = create_dataset_factory() 71 | tau = SDID(O, Z) 72 | error = np.abs(self.tau-tau)/self.tau 73 | assert error <= 0.01 74 | 75 | 76 | def test_synthetic_control(self, create_dataset_factory): 77 | # Only Block pattern 78 | O, Z = create_dataset_factory() 79 | M, tau = ols_synthetic_control(O.T, Z.T) 80 | assert M.shape == O.T.shape 81 | error = np.abs(self.tau-tau)/self.tau 82 | assert error <= 0.2 83 | 84 | 85 | 86 | def test_mc(self, create_dataset_factory): 87 | r = 1 88 | # Block Pattern 89 | O, Z = create_dataset_factory(did=False, add_fixed_effects=True, r = r) 90 | M, a, b, tau = MC_NNM_with_cross_validation(O, 1-Z) 91 | assert M.shape == O.shape 92 | error = np.abs(self.tau-tau)/self.tau 93 | assert error <= 0.1 94 | 95 | suggest_r = r 96 | M, a, b, tau = MC_NNM_with_suggested_rank(O, 1-Z, suggest_r) 97 | assert M.shape == O.shape 98 | error = np.abs(self.tau-tau)/self.tau 99 | assert error <= 0.1 100 | assert np.linalg.matrix_rank(M) == suggest_r 101 | 102 | 103 | def test_dcpr(self, create_dataset_factory): 104 | suggest_r = 3 105 | 106 | # Block Pattern 107 | O, Z = create_dataset_factory() 108 | M, tau, std = DC_PR_with_suggested_rank(O, Z, suggest_r) 109 | assert np.linalg.matrix_rank(M) == suggest_r 110 | error = np.abs(self.tau-tau)/self.tau 111 | assert error <= 0.05 112 | 113 | # IID Pattern 114 | O, Z = create_dataset_factory(iid=True) 115 | M, tau, std = DC_PR_with_suggested_rank(O, Z, suggest_r) 116 | assert np.linalg.matrix_rank(M) == suggest_r 117 | error = np.abs(self.tau-tau)/self.tau 118 | assert error <= 0.05 119 | 120 | 121 | 122 | def test_dcpr_multiple(self): 123 | n1 = 100 124 | n2 = 50 125 | r = 3 126 | M0 = low_rank_M0_normal(n1 = n1, n2 = n2, r = r) #low rank baseline matrix 127 | 128 | num_treat = 2 129 | prob = 0.3 130 | Z = [] 131 | tau = [] 132 | for k in range(num_treat): 133 | # IID Pattern 134 | Z.append(iid_treatment(prob=prob, shape=M0.shape)) #treatment patterns 135 | tau.append(np.random.normal(loc=0, scale=1)) #treatment effects 136 | 137 | def adding_noise(M0, Z, tau, Sigma, SigmaZ): 138 | num_treat = len(Z) 139 | # TODO: Why are we multiplying normal & uniform noises? 140 | O = M0 + np.random.normal(loc=0, scale=1, size=M0.shape) * Sigma #add heterogenous noise to the baseline matrix 141 | for k in range(num_treat): 142 | # TODO: Why are we adding noise here? 143 | O += Z[k] * tau[k] + Z[k] * SigmaZ[k] * np.random.normal(loc=0, scale=1, size=M0.shape) #add heterogeneous noise to the treatment effects 144 | return O 145 | Sigma = np.random.rand(M0.shape[0], M0.shape[1]) 146 | SigmaZ = [] 147 | for k in range(num_treat): 148 | SigmaZ.append(np.random.rand(M0.shape[0], M0.shape[1])) 149 | 150 | O = adding_noise(M0, Z, tau, Sigma, SigmaZ) 151 | M, tau_hat, standard_deviation = DC_PR_with_suggested_rank(O, Z, suggest_r=r, method="non-convex") #solving a non-convex optimization to obtain M and tau 152 | error = np.linalg.norm(tau_hat - tau) / np.linalg.norm(tau) 153 | assert M.shape == O.shape 154 | assert error < 0.08 155 | 156 | 157 | 158 | """ 159 | Run the following to run all test cases: 160 | pytest 161 | Run the following in the terminal to test and get coverage report: 162 | pytest --cov=./src/causaltensor/cauest --cov-report=term-missing 163 | 164 | """ 165 | -------------------------------------------------------------------------------- /tests/.vscode/settings.json: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /tests/SDID_test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 22, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "import seaborn as sns\n", 14 | "\n", 15 | "from IPython.display import set_matplotlib_formats\n", 16 | "set_matplotlib_formats('retina')\n", 17 | "\n", 18 | "import cvxpy as cp\n", 19 | "import cvxopt\n", 20 | "from cvxopt import matrix\n", 21 | "\n", 22 | "from sklearn.metrics import mean_squared_error\n", 23 | "\n", 24 | "import warnings\n", 25 | "warnings.simplefilter('ignore')" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 37, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "def SDID(O, treat_units = [0], starting_time = 100):\n", 35 | "\n", 36 | " donor_units = []\n", 37 | " for i in range(O.shape[0]):\n", 38 | " if (i not in treat_units):\n", 39 | " donor_units.append(i) \n", 40 | " \n", 41 | " ##Step 1, Compute regularization parameter\n", 42 | " \n", 43 | " D = O[:, 1:starting_time+1] - O[:, :starting_time]\n", 44 | "\n", 45 | " D_bar = np.mean(O[donor_units, :-1])\n", 46 | "\n", 47 | " z_square = np.mean((D - D_bar)**2)\n", 48 | "\n", 49 | " ##Step 2, Compute w^{sdid}\n", 50 | " Nco = len(donor_units)\n", 51 | " Ntr = len(treat_units)\n", 52 | " Tpre = starting_time\n", 53 | " Tpost = O.shape[1] - starting_time\n", 54 | "\n", 55 | " w = cp.Variable(Nco)\n", 56 | " w0 = cp.Variable(1)\n", 57 | " G = np.eye(Nco)\n", 58 | " A = np.ones(Nco)\n", 59 | " #G @ w >= 0\n", 60 | " #A.T @ w == 1\n", 61 | "\n", 62 | " mean_treat = np.mean(O[treat_units, :Tpre], axis = 0)\n", 63 | "\n", 64 | " prob = cp.Problem(cp.Minimize(cp.sum_squares(w0+O[donor_units, :Tpre].T @ w - mean_treat) + z_square * Tpre * cp.sum_squares(w)), [G @ w >= 0, A.T @ w == 1])\n", 65 | " prob.solve()\n", 66 | " #print(\"\\nThe optimal value is\", prob.value) \n", 67 | " #print(\"A solution w is\")\n", 68 | " #print(w.value)\n", 69 | "\n", 70 | " w_sdid = np.zeros(O.shape[0]) \n", 71 | " w_sdid[donor_units] = w.value\n", 72 | " w_sdid[treat_units] = -1.0 / Ntr\n", 73 | "\n", 74 | " ##Step 3, Compute l^{sdid}\n", 75 | " l = cp.Variable(Tpre)\n", 76 | " l0 = cp.Variable(1)\n", 77 | " G = np.eye(Tpre)\n", 78 | " A = np.ones(Tpre)\n", 79 | " #G @ w >= 0\n", 80 | " #A.T @ w == 1\n", 81 | "\n", 82 | " mean_treat = np.mean(O[donor_units, Tpre:], axis = 1)\n", 83 | " #print(mean_treat.shape)\n", 84 | "\n", 85 | " prob = cp.Problem(cp.Minimize(cp.sum_squares(l0+O[donor_units, :Tpre] @ l - mean_treat)), [G @ l >= 0, A.T @ l == 1])\n", 86 | " prob.solve()\n", 87 | " #print(\"\\nThe optimal value is\", prob.value) \n", 88 | " #print(\"A solution w is\")\n", 89 | " #print(l.value)\n", 90 | "\n", 91 | " l_sdid = np.zeros(O.shape[1]) \n", 92 | " l_sdid[:Tpre] = l.value\n", 93 | " l_sdid[Tpre:] = -1.0 / Tpost\n", 94 | "\n", 95 | " ##Step 4, Compute SDID estimator\n", 96 | " tau = w_sdid.T @ O @ l_sdid\n", 97 | "\n", 98 | " return tau" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 41, 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "name": "stdout", 108 | "output_type": "stream", 109 | "text": [ 110 | "1.0026997923361534\n" 111 | ] 112 | } 113 | ], 114 | "source": [ 115 | "np.random.seed(1)\n", 116 | "n = 50\n", 117 | "T = 100\n", 118 | "r = 5\n", 119 | "mu = np.random.rand()\n", 120 | "a = np.random.rand(n,1)\n", 121 | "b = np.random.rand(1,T)\n", 122 | "tau = 1\n", 123 | "\n", 124 | "U = np.random.normal(loc=0, scale = 1, size = (n, r))\n", 125 | "V = np.random.normal(loc = 0, scale = 1, size = (T, r))\n", 126 | "M = 5 * U.dot(V.T) + np.random.normal(size = (n, T))\n", 127 | "\n", 128 | "Ntr = int(n / 5)\n", 129 | "Tpre = int(4*T / 5) \n", 130 | "treat_units = [i for i in range(Ntr)]\n", 131 | "W = np.zeros((n, T))\n", 132 | "W[treat_units, Tpre:] = 1\n", 133 | "\n", 134 | "#print(treat_units)\n", 135 | "tau_hat = SDID(M + W*tau, treat_units, Tpre) \n", 136 | "print(tau_hat)" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 13, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "def compute_zeta_square(O, treat_units, starting_time):\n", 146 | " donor_units = []\n", 147 | " for i in range(O.shape[0]):\n", 148 | " if (i not in treat_units):\n", 149 | " donor_units.append(i) \n", 150 | " \n", 151 | " ##Step 1, Compute regularization parameter\n", 152 | " \n", 153 | " D = O[:, 1:starting_time+1] - O[:, :starting_time]\n", 154 | "\n", 155 | " D_bar = np.mean(O[donor_units, :-1])\n", 156 | "\n", 157 | " z_square = np.mean((D - D_bar)**2)\n", 158 | "\n", 159 | " return z_square\n", 160 | "\n", 161 | "\n", 162 | "def SDID_download(Y, treat_units = [0], starting_time = 100):\n", 163 | " \n", 164 | " s = treat_units\n", 165 | " t = starting_time\n", 166 | "\n", 167 | " Y_c = np.delete(Y, s, axis=0)\n", 168 | " Y_t = Y[s, :]\n", 169 | "\n", 170 | " Y_c_pre = Y_c[:, :t]\n", 171 | " Y_c_post = Y_c[:, t:]\n", 172 | " Y_t_pre = Y_t[:, :t]\n", 173 | " Y_t_post = Y_t[:, t:]\n", 174 | "\n", 175 | " sum_omega_YiT = omega_hat.T @ Y_c_post\n", 176 | " sum_lambda_YNt= lambda_hat.T @ Y_t_pre\n", 177 | " sum_omega_lambda_Yit = omega_hat.T @ Y_c_pre @ lambda_hat\n", 178 | "\n", 179 | " Yhat_sdid = sum_omega_YiT + sum_lambda_YNt - sum_omega_lambda_Yit\n", 180 | " #Yhat_sc = sum_omega_YiT\n", 181 | " #Yhat_did = Y_c_post.mean() + Y_t_pre.mean() - Y_c_pre.mean()\n", 182 | "\n", 183 | " tau_sdid = np.mean(Y_t_post - Yhat_sdid) \n", 184 | " Y[s, t:] = Yhat_sdid\n", 185 | " return Yhat_sdid, tau_sdid" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 22, 191 | "metadata": {}, 192 | "outputs": [ 193 | { 194 | "name": "stdout", 195 | "output_type": "stream", 196 | "text": [ 197 | "\n", 198 | "The optimal value is 0.0\n", 199 | "A solution x is\n", 200 | "[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", 201 | "A dual solution corresponding to the inequality constraints is\n" 202 | ] 203 | }, 204 | { 205 | "ename": "IndexError", 206 | "evalue": "list index out of range", 207 | "output_type": "error", 208 | "traceback": [ 209 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 210 | "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", 211 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"A dual solution corresponding to the inequality constraints is\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 27\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconstraints\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdual_value\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 212 | "\u001b[0;31mIndexError\u001b[0m: list index out of range" 213 | ] 214 | } 215 | ], 216 | "source": [ 217 | "# Generate a random non-trivial quadratic program.\n", 218 | "m = 15\n", 219 | "n = 10\n", 220 | "p = 5\n", 221 | "np.random.seed(1)\n", 222 | "P = np.random.randn(n, n)\n", 223 | "P = P.T @ P\n", 224 | "q = np.random.randn(n)\n", 225 | "G = np.random.randn(m, n)\n", 226 | "h = G @ np.random.randn(n)\n", 227 | "A = np.random.randn(p, n)\n", 228 | "b = np.random.randn(p)\n", 229 | "\n", 230 | "# Define and solve the CVXPY problem.\n", 231 | "x = cp.Variable(n)\n", 232 | "#print(q.T @ q)\n", 233 | "#print(x.shape, q.shape, P.shape)\n", 234 | "prob = cp.Problem(cp.Minimize(cp.sum_squares(q.T @ x) + cp.sum_squares(x)))\n", 235 | "#prob = cp.Problem(cp.Minimize((1/2)*cp.quad_form(x, P) + q.T @ x), [G @ x <= h, A @ x == b])\n", 236 | "prob.solve()\n", 237 | "\n", 238 | "# Print result.\n", 239 | "print(\"\\nThe optimal value is\", prob.value)\n", 240 | "print(\"A solution x is\")\n", 241 | "print(x.value)\n", 242 | "print(\"A dual solution corresponding to the inequality constraints is\")\n", 243 | "print(prob.constraints[0].dual_value)" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [] 252 | } 253 | ], 254 | "metadata": { 255 | "kernelspec": { 256 | "display_name": "base", 257 | "language": "python", 258 | "name": "python3" 259 | }, 260 | "language_info": { 261 | "codemirror_mode": { 262 | "name": "ipython", 263 | "version": 3 264 | }, 265 | "file_extension": ".py", 266 | "mimetype": "text/x-python", 267 | "name": "python", 268 | "nbconvert_exporter": "python", 269 | "pygments_lexer": "ipython3", 270 | "version": "3.7.4 (default, Aug 13 2019, 15:17:50) \n[Clang 4.0.1 (tags/RELEASE_401/final)]" 271 | }, 272 | "metadata": { 273 | "interpreter": { 274 | "hash": "dca0ade3e726a953b501b15e8e990130d2b7799f14cfd9f4271676035ebe5511" 275 | } 276 | }, 277 | "orig_nbformat": 2, 278 | "vscode": { 279 | "interpreter": { 280 | "hash": "40d3a090f54c6569ab1632332b64b2c03c39dcf918b08424e98f38b5ae0af88f" 281 | } 282 | } 283 | }, 284 | "nbformat": 4, 285 | "nbformat_minor": 2 286 | } 287 | -------------------------------------------------------------------------------- /tests/distribution_test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import time \n", 11 | "import causaltensor as ct\n", 12 | "from causaltensor import low_rank_M0_Gamma\n", 13 | "from causaltensor.matlib import generate_Z\n", 14 | "from causaltensor.cauest import std_debiased_convex\n", 15 | "from causaltensor.cauest import projection_T_orthogonal\n", 16 | "from causaltensor.cauest import DC_PR_with_suggested_rank\n", 17 | "from causaltensor.cauest import non_convex_algorithm\n", 18 | "from causaltensor.cauest import DC_PR_auto_rank" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "## Distribution Test\n", 26 | "\n", 27 | "Evaluate the standard deviation estimator for the debiased convex algorithm" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "def synthetic_experiment_distribution_run_results(n1 = 50, n2 = 50, mean_M = 1, r = 5, num_experiment=1, sigma = 0.1, sigma_d = 0.1, pattern = 'stagger'):\n", 37 | " '''\n", 38 | " generate (M0, Z) pair:\n", 39 | " - M0 has shape (50x50) with mean_M and rank r\n", 40 | " - Z is generated in a stagger way, randomly select m1 rows, each row randomly gets treated after column m2\n", 41 | " - m1 ~ [1, n1), m2 ~ [n2/5, n2) uniformly\n", 42 | "\n", 43 | " for each (M0, Z) pair:\n", 44 | " - compute the score\n", 45 | " - \n", 46 | "\n", 47 | " '''\n", 48 | " samples = np.zeros(num_experiment)\n", 49 | " t1 = time.time()\n", 50 | " for T in range(num_experiment):\n", 51 | " if (T % 100 == 0):\n", 52 | " print(time.time() - t1)\n", 53 | " print('experiment ', T)\n", 54 | " #np.random.seed(1)\n", 55 | " M0 = low_rank_M0_Gamma(n1 = n1, n2 = n2, r = r, mean_M = mean_M)\n", 56 | " ## generating stagger pattern Z\n", 57 | " if (pattern == 'stagger'):\n", 58 | " m1 = np.random.randint(low=1, high=n1)\n", 59 | " m2 = np.random.randint(low=int(n2/2), high=n2)\n", 60 | " Z = generate_Z(pattern_tuple=['stagger', (m1, m2)], M0=M0)\n", 61 | "\n", 62 | " if (pattern == 'block'):\n", 63 | " m1 = np.random.randint(low=1, high=int(n1/3))\n", 64 | " m2 = np.random.randint(low=int(n2/2), high=n2)\n", 65 | " Z, treat_units = generate_Z(pattern_tuple=['block', (m1, m2)], M0=M0)\n", 66 | "\n", 67 | " print('***sparsity****', np.sum(Z) / np.size(Z))\n", 68 | "\n", 69 | " tau_star = 1\n", 70 | "\n", 71 | " PTperpZ = projection_T_orthogonal(Z, M0)\n", 72 | "\n", 73 | " # #predict_sigma = sigma / np.sqrt(np.sum(PTperpZ**2))\n", 74 | "\n", 75 | " predict_sigma = np.sqrt((sigma**2) / np.sum(PTperpZ**2) + (sigma_d**2) * np.sum((PTperpZ**2)*Z) / (np.sum(PTperpZ**2)**2))\n", 76 | "\n", 77 | " # #print(predict_sigma, sigma / np.sqrt(np.sum(PTperpZ**2)))\n", 78 | "\n", 79 | " s = np.linalg.svd(M0, full_matrices=False, compute_uv=False)\n", 80 | "\n", 81 | " def test():\n", 82 | " #np.random.seed(T)\n", 83 | " E = np.random.normal(loc=0, scale=sigma, size=M0.shape)\n", 84 | " delta = np.random.normal(loc = 0, scale = sigma_d, size = M0.shape)\n", 85 | " O = M0 + Z * tau_star + E + delta * Z\n", 86 | " E_op = np.linalg.norm(E + delta * Z, ord=2)\n", 87 | " suggest_l = min(s[r-1]/1.1, E_op*1.1)\n", 88 | "\n", 89 | " #input O/predict_sigma, eliminate precision issue\n", 90 | " #results = run_algo(['convex_debias', 'convex'], O, Z, suggest_r = -1, suggest_l = suggest_l, eps = predict_sigma/1000, de_mean_O=False)\n", 91 | " \n", 92 | " M_debias, tau_debias, M, tau = DC_PR_auto_rank(O, Z)\n", 93 | " print(np.linalg.matrix_rank(M), r)\n", 94 | "\n", 95 | " estimated_sigma_level = std_debiased_convex(O, Z, M, tau)\n", 96 | "\n", 97 | " return (tau_debias-tau_star)/estimated_sigma_level\n", 98 | "\n", 99 | " return (tau-tau_star)/predict_sigma\n", 100 | "\n", 101 | " # def KS_test():\n", 102 | " # total = 100\n", 103 | " # tau_samples = np.zeros(total)\n", 104 | " # for i in range(total):\n", 105 | " # tau_samples[i] = test()\n", 106 | " # KS_statistic, p_value = scipy.stats.ks_1samp(tau_samples, scipy.stats.norm.cdf)\n", 107 | " # print(KS_statistic, p_value)\n", 108 | " # return KS_statistic\n", 109 | "\n", 110 | " samples[T] = test()\n", 111 | " print('experiment {}, time elapses {}, tau error {}'.format(T, time.time() - t1, samples[T]))\n", 112 | " #print(samples[T], predict_sigma)\n", 113 | " return samples" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "### Set up\n", 123 | "n1 = 100\n", 124 | "n2 = 100\n", 125 | "mean_M = 10\n", 126 | "r = 2\n", 127 | "sigma = 1\n", 128 | "sigma_d = 1\n", 129 | "\n", 130 | "samples = synthetic_experiment_distribution_run_results(n1 = n1, n2 = n2, mean_M = mean_M, r = r, sigma = sigma, sigma_d = sigma, num_experiment = 100, pattern = 'block')" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "## Distribution Plot\n", 138 | "check whether the distribution is like Gaussian or not" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "import matplotlib.pyplot as plt\n", 148 | "from scipy.stats import norm\n", 149 | "import scipy.stats\n", 150 | "import seaborn as sns\n", 151 | "\n", 152 | "def synthetic_experiment_distribution_plot_distribution_results(samples):\n", 153 | " hist, bined = np.histogram(samples, bins = 30, density=True)\n", 154 | " plt.plot((bined[:-1]/2+bined[1:]/2), hist)\n", 155 | " pos_guassian = np.linspace(min(samples), max(samples), 1000)\n", 156 | " pdf_guassian = norm.pdf(pos_guassian, loc=0, scale=1)\n", 157 | " plt.plot(pos_guassian, pdf_guassian)\n", 158 | " plt.show()\n", 159 | "\n", 160 | " print(np.mean(samples), np.std(samples))\n", 161 | "\n", 162 | " g = sns.displot(data=samples, kind='hist', stat='density')\n", 163 | " g.set(xlim=(-4, 4))\n", 164 | " g.set(ylim=(0.0, 0.45))\n", 165 | " plt.plot(pos_guassian, pdf_guassian, label=r'$N(0, 1)$', color='r')\n", 166 | " plt.legend(fontsize = 17)\n", 167 | " plt.ylabel('Density', fontsize = 18)\n", 168 | " plt.tight_layout()\n", 169 | " plt.show()\n", 170 | " x = scipy.stats.norm.rvs(loc=0, size=100000)\n", 171 | " sns.ecdfplot(data=x)\n", 172 | " plt.show()\n", 173 | "\n", 174 | "synthetic_experiment_distribution_plot_distribution_results(samples)" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [] 183 | } 184 | ], 185 | "metadata": { 186 | "kernelspec": { 187 | "display_name": "Python 3.7.4 ('base')", 188 | "language": "python", 189 | "name": "python3" 190 | }, 191 | "language_info": { 192 | "codemirror_mode": { 193 | "name": "ipython", 194 | "version": 3 195 | }, 196 | "file_extension": ".py", 197 | "mimetype": "text/x-python", 198 | "name": "python", 199 | "nbconvert_exporter": "python", 200 | "pygments_lexer": "ipython3", 201 | "version": "3.7.4 (default, Aug 13 2019, 15:17:50) \n[Clang 4.0.1 (tags/RELEASE_401/final)]" 202 | }, 203 | "orig_nbformat": 4, 204 | "vscode": { 205 | "interpreter": { 206 | "hash": "40d3a090f54c6569ab1632332b64b2c03c39dcf918b08424e98f38b5ae0af88f" 207 | } 208 | } 209 | }, 210 | "nbformat": 4, 211 | "nbformat_minor": 2 212 | } 213 | -------------------------------------------------------------------------------- /tests/ols_synthetic_control.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import sys\n", 10 | "sys.path.append('c:\\\\Users\\\\Arushi Jain\\\\Dropbox (MIT)\\\\RAship\\\\causaltensor')\n", 11 | "import numpy as np\n", 12 | "from src.causaltensor.cauest.OLSSyntheticControl import ols_synthetic_control" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "name": "stdout", 22 | "output_type": "stream", 23 | "text": [ 24 | "(39, 31)\n" 25 | ] 26 | } 27 | ], 28 | "source": [ 29 | "O_raw = np.loadtxt('MLAB_data.txt')\n", 30 | "O = O_raw[8:, :] ## remove features that are not relevant in this demo\n", 31 | "O = O.T\n", 32 | "print(O.shape)\n", 33 | "## now O consists of the annual tobacco consumption of 39 states from 1970 to 2000\n", 34 | "## California is the last row of O" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 3, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "Z = np.zeros_like(O) # Z has the same shape as O\n", 44 | "Z[-1, 19:] = 1 #Only California (the last row) used the intervention, which started in 1989" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 4, 50 | "metadata": {}, 51 | "outputs": [ 52 | { 53 | "name": "stdout", 54 | "output_type": "stream", 55 | "text": [ 56 | "Final: [ True True True True True True True True True True True True\n", 57 | " True True True True True True True True True True True True\n", 58 | " True True True True True True True True True True True True\n", 59 | " True True], inf\n", 60 | "The estimation of Synthetic Control is -15.41945550009492\n" 61 | ] 62 | }, 63 | { 64 | "name": "stderr", 65 | "output_type": "stream", 66 | "text": [ 67 | "c:\\Users\\Arushi Jain\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\statsmodels\\regression\\linear_model.py:1671: RuntimeWarning: divide by zero encountered in double_scalars\n", 68 | " return np.dot(wresid, wresid) / self.df_resid\n" 69 | ] 70 | } 71 | ], 72 | "source": [ 73 | "M, tau = ols_synthetic_control(O.T, Z.T) \n", 74 | "print('The estimation of Synthetic Control is', tau)" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 5, 80 | "metadata": {}, 81 | "outputs": [ 82 | { 83 | "name": "stdout", 84 | "output_type": "stream", 85 | "text": [ 86 | "Final: [False False False True False False False False False True False False\n", 87 | " False False False False False False False True True False False False\n", 88 | " False False False False False False False False False False False True\n", 89 | " False True], 0.019541940252292167\n", 90 | "The estimation of Synthetic Control is -13.159653521660083\n" 91 | ] 92 | } 93 | ], 94 | "source": [ 95 | "M, tau = ols_synthetic_control(O.T, Z.T, select_features=True) \n", 96 | "print('The estimation of Synthetic Control is', tau)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [] 105 | } 106 | ], 107 | "metadata": { 108 | "kernelspec": { 109 | "display_name": "Python 3", 110 | "language": "python", 111 | "name": "python3" 112 | }, 113 | "language_info": { 114 | "codemirror_mode": { 115 | "name": "ipython", 116 | "version": 3 117 | }, 118 | "file_extension": ".py", 119 | "mimetype": "text/x-python", 120 | "name": "python", 121 | "nbconvert_exporter": "python", 122 | "pygments_lexer": "ipython3", 123 | "version": "3.10.8" 124 | }, 125 | "orig_nbformat": 4 126 | }, 127 | "nbformat": 4, 128 | "nbformat_minor": 2 129 | } 130 | -------------------------------------------------------------------------------- /tests/readme_synth_matlab.txt: -------------------------------------------------------------------------------- 1 | ### Synth MATLAB Code (11/07/2006) written for MATLAB 7.0 2 | ### by Alberto Abadie, Alexis Diamond, and Jens Hainmueller (all Harvard University) 3 | ### Contact: jhainm@harvard.edu 4 | 5 | The Synth MATLAB implements synthetic control methods for causal inference 6 | in comparative case studies with aggregate data as developed in Abadie and 7 | Gardeazabal (2003) and Abadie, Diamond, and Hainmueller (2006). 8 | 9 | Files: 10 | synth_code.m --> Main Script that runs the example and reproduces the main 11 | results of the paper (the results may differ 12 | slighly due to tolerance settings of optimization). 13 | Researchers are encouraged to adjust this code for their 14 | needs; please give due credit. 15 | 16 | 17 | loss_function.m --> loss function called by synth_code.m 18 | 19 | MLAB_data.txt --> 39 by 39 data matrix to run the example: 20 | First row contains state numbers: 21 | Alabama 1; Arkansas 2; Colorado 4; Connecticut 5; Delaware 6; 22 | Georgia 7; Idaho 8; Illinois 9; Indiana 10; Iowa 11; Kansas 12; 23 | Kentucky 13; Louisiana 14; Maine 15; Minnesota 16; Mississippi 17; 24 | Missouri 18; Montana 19; Nebraska 20; Nevada 21; New Hampshire 22; 25 | New Mexico 23; North Carolina 24; North Dakota 25; Ohio 26; Oklahoma 27; 26 | Pennsylvania 28; Rhode Island 29; South Carolina 30; South Dakota 31; 27 | Tennessee 32; Texas 33; Utah 34; Vermont 35; Virginia 36; West Virginia 37; 28 | Wisconsin 38; Wyoming 39; California 3. 29 | Predictors are stored in rows 2 to 8: 30 | row 2: income, row 3: retail price, row 4: percent_15-19; row 5: beer 31 | consumption (all averaged 1980 to 1988); 32 | row 6: smoking 1988, row 7: smoking 1980; row 8: smoking 1975; 33 | Outcome Data (smoking consumption in packs per capita) is stored in rows 9 to 39 34 | for the years 1970, 1971,...,2000 35 | 36 | 37 | -------------------------------------------------------------------------------- /tests/sales.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TianyiPeng/causaltensor/cafb53fc869c1f8902148fc34809b58472f1f124/tests/sales.p -------------------------------------------------------------------------------- /tests/test_DC_PR.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import time \n", 11 | "import causaltensor as ct\n", 12 | "from causaltensor import low_rank_M0_Gamma\n", 13 | "from causaltensor.matlib import generate_Z\n", 14 | "import causaltensor.cauest.DebiasConvex as DC\n", 15 | "import pandas as pd\n", 16 | "import pickle" 17 | ] 18 | }, 19 | { 20 | "attachments": {}, 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "## Semi-synthetic experiments on Sales data\n", 25 | "### Single Treatment" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 67, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "M0 = pickle.load(open('sales.p', 'rb'))\n", 35 | "s = np.linalg.svd(M0, full_matrices=False, compute_uv=False)\n", 36 | "\n", 37 | "from importlib import reload\n", 38 | "reload(DC)\n", 39 | "\n", 40 | "def experiments_run(M0, num_experiment=1, sigma = 0.1):\n", 41 | " results = 0\n", 42 | " for T in range(num_experiment):\n", 43 | " while True:\n", 44 | " a = np.random.randint(20)+5\n", 45 | " b = np.random.randint(20)+5\n", 46 | " Z, info = generate_Z(pattern_tuple = ['adaptive', (a, b)], M0=M0)\n", 47 | " if (info == 'fail'):\n", 48 | " continue\n", 49 | " break\n", 50 | " tau_star = np.mean(M0)/5\n", 51 | " E = np.random.normal(loc=0, scale=sigma, size=M0.shape)\n", 52 | " O = M0 + tau_star * Z + E \n", 53 | " M, tau, std = DC.DC_PR_auto_rank(O, Z)\n", 54 | " results += np.linalg.norm(tau-tau_star) / np.linalg.norm(tau_star)\n", 55 | " return results / num_experiment \n", 56 | "\n", 57 | "np.random.seed(0)\n", 58 | "error = experiments_run(M0, num_experiment=10, sigma = 0)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 68, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "assert error < 0.1" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 69, 73 | "metadata": {}, 74 | "outputs": [ 75 | { 76 | "data": { 77 | "text/plain": [ 78 | "0.024746699133216143" 79 | ] 80 | }, 81 | "execution_count": 69, 82 | "metadata": {}, 83 | "output_type": "execute_result" 84 | } 85 | ], 86 | "source": [ 87 | "error" 88 | ] 89 | } 90 | ], 91 | "metadata": { 92 | "kernelspec": { 93 | "display_name": "Python 3.7.4 ('base')", 94 | "language": "python", 95 | "name": "python3" 96 | }, 97 | "language_info": { 98 | "codemirror_mode": { 99 | "name": "ipython", 100 | "version": 3 101 | }, 102 | "file_extension": ".py", 103 | "mimetype": "text/x-python", 104 | "name": "python", 105 | "nbconvert_exporter": "python", 106 | "pygments_lexer": "ipython3", 107 | "version": "3.10.9" 108 | }, 109 | "orig_nbformat": 4, 110 | "vscode": { 111 | "interpreter": { 112 | "hash": "40d3a090f54c6569ab1632332b64b2c03c39dcf918b08424e98f38b5ae0af88f" 113 | } 114 | } 115 | }, 116 | "nbformat": 4, 117 | "nbformat_minor": 2 118 | } 119 | -------------------------------------------------------------------------------- /tutorials/Synth.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TianyiPeng/causaltensor/cafb53fc869c1f8902148fc34809b58472f1f124/tutorials/Synth.zip -------------------------------------------------------------------------------- /tutorials/loss_function.m: -------------------------------------------------------------------------------- 1 | function ssr = sumsq(v2,X1,X0,Z1,Z0) 2 | v = [1;v2]; 3 | D = diag(v); 4 | H = X0'*D*X0; 5 | f = - X1'*D*X0; 6 | l = size(Z0,2); 7 | [w,fval,e]=quadprog(H,f,[],[],ones(1,l),1,zeros(l,1),ones(l,1)); 8 | w = abs(w); 9 | e = Z1 - Z0*w; 10 | ssr = sum(e.^2); 11 | -------------------------------------------------------------------------------- /tutorials/readme_synth_matlab.txt: -------------------------------------------------------------------------------- 1 | ### Synth MATLAB Code (11/07/2006) written for MATLAB 7.0 2 | ### by Alberto Abadie, Alexis Diamond, and Jens Hainmueller (all Harvard University) 3 | ### Contact: jhainm@harvard.edu 4 | 5 | The Synth MATLAB implements synthetic control methods for causal inference 6 | in comparative case studies with aggregate data as developed in Abadie and 7 | Gardeazabal (2003) and Abadie, Diamond, and Hainmueller (2006). 8 | 9 | Files: 10 | synth_code.m --> Main Script that runs the example and reproduces the main 11 | results of the paper (the results may differ 12 | slighly due to tolerance settings of optimization). 13 | Researchers are encouraged to adjust this code for their 14 | needs; please give due credit. 15 | 16 | 17 | loss_function.m --> loss function called by synth_code.m 18 | 19 | MLAB_data.txt --> 39 by 39 data matrix to run the example: 20 | First row contains state numbers: 21 | Alabama 1; Arkansas 2; Colorado 4; Connecticut 5; Delaware 6; 22 | Georgia 7; Idaho 8; Illinois 9; Indiana 10; Iowa 11; Kansas 12; 23 | Kentucky 13; Louisiana 14; Maine 15; Minnesota 16; Mississippi 17; 24 | Missouri 18; Montana 19; Nebraska 20; Nevada 21; New Hampshire 22; 25 | New Mexico 23; North Carolina 24; North Dakota 25; Ohio 26; Oklahoma 27; 26 | Pennsylvania 28; Rhode Island 29; South Carolina 30; South Dakota 31; 27 | Tennessee 32; Texas 33; Utah 34; Vermont 35; Virginia 36; West Virginia 37; 28 | Wisconsin 38; Wyoming 39; California 3. 29 | Predictors are stored in rows 2 to 8: 30 | row 2: income, row 3: retail price, row 4: percent_15-19; row 5: beer 31 | consumption (all averaged 1980 to 1988); 32 | row 6: smoking 1988, row 7: smoking 1980; row 8: smoking 1975; 33 | Outcome Data (smoking consumption in packs per capita) is stored in rows 9 to 39 34 | for the years 1970, 1971,...,2000 35 | 36 | 37 | -------------------------------------------------------------------------------- /tutorials/synth_code.m: -------------------------------------------------------------------------------- 1 | clear all; 2 | diary main; 3 | 4 | %% Get Data 5 | load MLAB_data.txt; 6 | data = MLAB_data; 7 | 8 | %% Built Indices (see data description in readme file) 9 | 10 | % California is state no 3, stored in the last column no 39 11 | index_tr = [39]; 12 | % 38 Control states are 1,2 & 4,5,...,38, stored in columns 1 to 38 13 | index_co = [1:38]; 14 | 15 | % Predcitors are stored in rows 2 to 8 16 | index_predict = [2:8]; 17 | % Outcome Data is stored in rows 9 to 39; for 1970, 1971,...,2000 18 | index_Y = [9:39]; 19 | 20 | %% Define Matrices for Predictors 21 | % X0 : 7 X 38 matrix (7 smoking predictors for 38 control states) 22 | X0 = data(index_predict,index_co); 23 | 24 | % X1 : 10 X 1 matrix (10 crime predictors for 1 treated states) 25 | X1 = data(index_predict,index_tr); 26 | 27 | % Normalization (probably could be done more elegantly) 28 | bigdata = [X0,X1]; 29 | divisor = std(bigdata'); 30 | scamatrix = (bigdata' * diag(( 1./(divisor) * eye(size(bigdata,1))) ))'; 31 | X0sca = scamatrix([1:size(X0,1)],[1:size(X0,2)]); 32 | X1sca = scamatrix(1:size(X1,1),[size(scamatrix,2)]); 33 | X0 = X0sca; 34 | X1 = X1sca; 35 | clear divisor X0sca X1sca scamatrix bigdata; 36 | 37 | %% Define Matrices for Outcome Data 38 | % Y0 : 31 X 38 matrix (31 years of smoking data for 38 control states) 39 | Y0 = data(index_Y,index_co); 40 | % Y1 : 31 X 1 matrix (31 years of smoking data for 1 treated state) 41 | Y1 = data(index_Y,index_tr); 42 | 43 | % Now pick Z matrices, i.e. the pretreatment period 44 | % over which the loss function should be minmized 45 | % Here we pick Z to go from 1970 to 1988 46 | 47 | % Z0 : 19 X 38 matrix (31 years of pre-treatment smoking data for 38 control states) 48 | Z0 = Y0([1:19],1:38); 49 | % Z1 : 19 X 1 matrix (31 years of pre-treatment smoking data for 1 treated state) 50 | Z1 = Y1([1:19],1); 51 | 52 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 53 | % Now we implement Optimization 54 | 55 | % Check and maybe adjust optimization settings if necessary 56 | options = optimset('fmincon') 57 | 58 | % Get Starting Values 59 | s = std([X1 X0]')'; 60 | s2 = s; s2(1)=[]; 61 | s1 = s(1); 62 | v20 =((s1./s2).^2); 63 | 64 | [v2,fminv,exitflag] = fmincon('loss_function',v20,[],[],[],[],... 65 | zeros(size(X1)),[],[],options,X1,X0,Z1,Z0); 66 | display(sprintf('%15.4f',fminv)); 67 | v = [1;v2]; 68 | % V-weights 69 | v 70 | 71 | % Now recover W-weights 72 | D = diag(v); 73 | H = X0'*D*X0; 74 | f = - X1'*D*X0; 75 | options = optimset('quadprog') 76 | [w,fval,e]=quadprog(H,f,[],[],ones(1,length(X0)),1,zeros(length(X0),1),ones(length(X0),1),[],options); 77 | w = abs(w); 78 | 79 | % W-weights 80 | w 81 | 82 | %%%%%%%%%%%%%%%%%%%%%%%%%%%% 83 | 84 | %% Now Plot Results 85 | 86 | Y0_plot = Y0*w; 87 | years = [1970:2000]'; 88 | plot(years,Y1,'-', years,Y0_plot,'--'); 89 | axis([1970 2000 0 150]); 90 | xlabel('year'); 91 | ylabel('smoking consumtion per capita (in packs)'); 92 | legend('Solid Real California','Dashed Synthetic California',4); 93 | 94 | diary off; --------------------------------------------------------------------------------