├── .gitignore ├── .idea ├── .gitignore ├── .name ├── Dynamic-Panel-Data-Models-with-Python.iml ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml ├── other.xml └── vcs.xml ├── .ipynb_checkpoints ├── Untitled-checkpoint.ipynb └── untitled-checkpoint.md ├── Benchmark ├── code │ ├── R.R │ ├── benchmark.py │ ├── data.csv │ ├── data_gen.R │ ├── stata.do │ ├── test3.R │ └── test_data.csv ├── images │ ├── Test_1.svg │ ├── Test_2.svg │ └── benchmark_model_1_Eqn.svg ├── performance_comparison.md ├── test_1.md └── test_2.md ├── LICENSE ├── Main.py ├── Main2.py ├── README.md ├── build_upload.sh ├── contributing.md ├── data.csv ├── output.html ├── pydynpd.egg-info ├── PKG-INFO ├── SOURCES.txt ├── dependency_links.txt ├── requires.txt └── top_level.txt ├── pydynpd ├── .idea │ ├── .name │ ├── inspectionProfiles │ │ ├── Project_Default.xml │ │ └── profiles_settings.xml │ ├── misc.xml │ ├── modules.xml │ ├── pydynpd.iml │ ├── vcs.xml │ └── workspace.xml ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-39.pyc │ ├── command.cpython-39.pyc │ ├── common_functions.cpython-39.pyc │ ├── info.cpython-39.pyc │ ├── panel_data.cpython-39.pyc │ ├── regression.cpython-39.pyc │ ├── specification_tests.cpython-39.pyc │ └── variable.cpython-39.pyc ├── __version__.py ├── command.py ├── common_functions.py ├── dynamic_panel_model.py ├── info.py ├── instruments.py ├── model_organizer.py ├── model_summary.py ├── panel_data.py ├── regression.py ├── sandbox │ ├── multicollinearity.py │ └── pydynpd.zip ├── specification_tests.py └── variable.py ├── pyproject.toml ├── setup.py ├── test.py ├── test_data.csv ├── untitled.md └── vignettes ├── .ipynb_checkpoints ├── Guide-checkpoint.ipynb └── Tutorial-checkpoint.ipynb ├── API.md ├── Guide.ipynb ├── Images ├── .$pydynpd.drawio.bkp ├── .$pydynpd.drawio.dtmp ├── .gitkeep ├── list_models.svg ├── new_struct.svg ├── pydynpd.drawio └── traditional.svg ├── Test_1.png ├── Test_1.svg ├── Tutorial.ipynb ├── images ├── new_struct.png └── traditional.png ├── new_struct.png ├── paper.md ├── ref.bib └── traditional.png /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | pydynpd/__pycache__/__init__.cpython-39.pyc 3 | pydynpd/__pycache__/__init__.cpython-39.pyc 4 | pydynpd/__pycache__/command.cpython-39.pyc 5 | pydynpd/__pycache__/common_functions.cpython-39.pyc 6 | pydynpd/__pycache__/info.cpython-39.pyc 7 | pydynpd/__pycache__/panel_data.cpython-39.pyc 8 | pydynpd/__pycache__/regression.cpython-39.pyc 9 | pydynpd/__pycache__/specification_tests.cpython-39.pyc 10 | pydynpd/__pycache__/variable.cpython-39.pyc 11 | pydynpd/__pycache__/__init__.cpython-39.pyc 12 | pydynpd/__pycache__/command.cpython-39.pyc 13 | pydynpd/__pycache__/common_functions.cpython-39.pyc 14 | pydynpd/__pycache__/info.cpython-39.pyc 15 | pydynpd/__pycache__/panel_data.cpython-39.pyc 16 | pydynpd/__pycache__/regression.cpython-39.pyc 17 | pydynpd/__pycache__/specification_tests.cpython-39.pyc 18 | pydynpd/__pycache__/variable.cpython-39.pyc 19 | pydynpd/__pycache__/__init__.cpython-39.pyc 20 | pydynpd/__pycache__/__init__.cpython-39.pyc 21 | pydynpd/__pycache__/__init__.cpython-39.pyc 22 | pydynpd/__pycache__/command.cpython-39.pyc 23 | pydynpd/__pycache__/common_functions.cpython-39.pyc 24 | *.pyc 25 | pydynpd/__pycache__/__init__.cpython-39.pyc 26 | *.pyc 27 | *.pyc 28 | *.pyc 29 | *.pyc 30 | *.whl 31 | *.whl 32 | *.gz 33 | *.gz 34 | pydynpd.egg-info/PKG-INFO 35 | *.pyc 36 | *.pyc 37 | *.whl 38 | *.gz 39 | *.pyc 40 | *.pyc 41 | *.gz 42 | *.gz 43 | *.whl 44 | Benchmark/.Rhistory 45 | Benchmark/.RData 46 | pydynpd.egg-info/PKG-INFO 47 | *.pyc 48 | pydynpd.egg-info/PKG-INFO 49 | *.pyc 50 | *.whl 51 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Editor-based HTTP Client requests 5 | /httpRequests/ 6 | # Datasource local storage ignored files 7 | /dataSources/ 8 | /dataSources.local.xml 9 | -------------------------------------------------------------------------------- /.idea/.name: -------------------------------------------------------------------------------- 1 | Main.py -------------------------------------------------------------------------------- /.idea/Dynamic-Panel-Data-Models-with-Python.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/other.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/Untitled-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 5 6 | } 7 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/untitled-checkpoint.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/.ipynb_checkpoints/untitled-checkpoint.md -------------------------------------------------------------------------------- /Benchmark/code/R.R: -------------------------------------------------------------------------------- 1 | library(Matrix) 2 | library(pdynmc) 3 | library(plm) 4 | library(panelvar) 5 | 6 | abdata=read.csv("data.csv") 7 | 8 | ### the following code produces an error 9 | start=Sys.time() 10 | for (i in 1:100){ 11 | mc <- pdynmc(dat=abdata,varname.i = "id", varname.t = "year", 12 | use.mc.diff = TRUE, use.mc.lev = TRUE, use.mc.nonlin = FALSE, 13 | include.y = TRUE, varname.y = "n", lagTerms.y = 2, include.x = TRUE, 14 | varname.reg.pre = c("w", "k"), lagTerms.reg.pre = c(0,0), maxLags.reg.pre = c(3,3), 15 | include.dum = FALSE, dum.diff = FALSE, dum.lev = FALSE, 16 | w.mat = "iid.err", std.err = "corrected", estimation = "twostep", 17 | opt.meth = "none" 18 | ) 19 | summary(mc) 20 | } 21 | print(Sys.time()-start) 22 | 23 | ### the following code produces results inconsistent with other packages 24 | start=Sys.time() 25 | for (i in 1:100){ 26 | mc <- pdynmc(dat=abdata,varname.i = "id", varname.t = "year", 27 | use.mc.diff = TRUE, use.mc.lev = TRUE, use.mc.nonlin = FALSE, 28 | include.y = TRUE, varname.y = "n", lagTerms.y = 2, include.x = TRUE, 29 | varname.reg.pre = c("w", "k"), lagTerms.reg.pre = c(0,0), maxLags.reg.pre = c(3,3), 30 | include.dum = FALSE, dum.diff = FALSE, dum.lev = FALSE, 31 | w.mat = "iid.err", std.err = "corrected", estimation = "twostep", 32 | opt.meth = "none" 33 | ) 34 | summary(mc) 35 | } 36 | print(Sys.time()-start) 37 | 38 | 39 | ### panelvar 40 | start=Sys.time() 41 | for (i in 1:100){ 42 | ex3_abdata <-pvargmm( 43 | dependent_vars = c("n"), 44 | lags = 2, 45 | predet_vars = c("w"), 46 | exog_vars=c("k"), 47 | transformation = "fd", 48 | data = abdata, 49 | panel_identifier = c("id", "year"), 50 | steps = c("twostep"), 51 | system_instruments = TRUE, 52 | max_instr_dependent_vars = 3, 53 | max_instr_predet_vars = 3, 54 | min_instr_dependent_vars = 1L, 55 | min_instr_predet_vars = 1L, 56 | collapse = FALSE 57 | ) 58 | summary(ex3_abdata) 59 | } 60 | print(Sys.time()-start) 61 | 62 | 63 | ###### plm ################ 64 | 65 | start=Sys.time() 66 | for (i in 1:100){ 67 | 68 | pd <- pdata.frame(abdata, index = c("id", "year"), drop.index = TRUE) 69 | z1<-pgmm(n ~ 1+ lag(n, 1:2) + w + k |lag(n, 2:4) + lag(w, 1:3), data=pd, effect='individual', 70 | model="twosteps" ,transformation='ld' , fsm='FULL') 71 | summary(z1, robust=TRUE) 72 | 73 | } 74 | print(Sys.time()-start) 75 | -------------------------------------------------------------------------------- /Benchmark/code/benchmark.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import pandas as pd 4 | 5 | from pydynpd import regression 6 | 7 | start = time.time() 8 | for i in range(100): 9 | df = pd.read_csv("data.csv") 10 | 11 | mydpd = regression.abond('n L(1:2).n w k | gmm(n, 2:4) gmm(w, 1:3) iv(k)', df, ['id', 'year']) 12 | 13 | print(time.time() - start) 14 | -------------------------------------------------------------------------------- /Benchmark/code/data_gen.R: -------------------------------------------------------------------------------- 1 | #code is adapted from 2 | #Phillips PC, Han C (2019). “Chapter 5 - Dynamic panel GMM using R.” In HD Vinod, C Rao 3 | #(eds.), Conceptual Econometrics Using R, volume 41 of Handbook of Statistics, pp. 119 – 4 | #144. Elsevier. doi:https://doi.org/10.1016/bs.host.2019.01.002. 5 | 6 | library(Matrix) 7 | library(data.table) 8 | 9 | 10 | nsize = 1000 11 | tsize = 10 12 | burn = 20 13 | t.all = tsize + burn 14 | 15 | beta1 = .25 16 | beta2 = .1 17 | gamma0 = 5 18 | gamma1 = -.1 19 | gamma2 = .1 20 | gamma3 = 3 21 | sigma = list(alpha = 5, u = 1, x = 1, z = 1) 22 | mu0 = 1 23 | mu1 = .005 24 | rho = list(z = 1, x = 1) 25 | 26 | set.seed(1) 27 | x = sigma$x * matrix(rnorm(nsize * t.all), nsize, t.all) 28 | for (j in 2:ncol(x)) x[, j] = rho$x * x[, j - 1] + x[, j] 29 | if (rho$x == 1) x = x - x[, burn - 1] 30 | 31 | trend = mu0 + mu1 * seq(-burn + 1, tsize) 32 | z0 = stats::filter(rnorm(t.all), rho$z, method = "recursive") 33 | if (rho$z == 1) z0 = z0 - z0[burn - 1] 34 | z = trend + z0 35 | 36 | alpha = sigma$alpha * rnorm(nsize) 37 | u = sigma$u * matrix(rnorm(nsize * t.all), nsize, t.all) 38 | 39 | # With these components in hand, we recursively generate yit as follows: 40 | xbar = colMeans(x) 41 | y = matrix(NA, nsize, t.all) 42 | y[, 1] = alpha + u[, 1] 43 | for (j in 2:ncol(y)) { 44 | lambda = gamma0 + gamma1 * mean(y[, j - 1]) + gamma2 * xbar[j - 1] + gamma3 * z[j - 1] 45 | y[, j] = alpha + beta1 * y[, j - 1] + beta2 * x[, j - 1] + lambda + u[, j] 46 | } 47 | 48 | y = y[, burn:t.all] # y is the Nx(T+1) matrix t=0...T 49 | x = x[, burn:t.all] 50 | z = z[burn:t.all] 51 | 52 | w=data.frame(id=as.vector(row(y)), year=as.vector(col(y))-1, y=as.vector(y), x=as.vector(x)) 53 | write.csv(w, "test_data.csv") -------------------------------------------------------------------------------- /Benchmark/code/stata.do: -------------------------------------------------------------------------------- 1 | timer clear 2 | timer on 1 3 | foreach n of numlist 1/100{ 4 | clear 5 | insheet using "C:\Users\Tiger\OneDrive\Dynamic Panel\data.csv" 6 | xtset(id year) 7 | xtabond2 n L(1/2).n w k , gmm(n, lag(2 4)) gmm(w, lag(1 3)) iv(k ) twostep robust 8 | } 9 | timer off 1 10 | 11 | qui timer list 12 | di in r "First time: " r(t1) 13 | -------------------------------------------------------------------------------- /Benchmark/code/test3.R: -------------------------------------------------------------------------------- 1 | library(plm) 2 | library(pdynmc) 3 | library(panelvar) 4 | 5 | start=Sys.time() 6 | 7 | 8 | for(i in 1:100){ 9 | dat =read.csv('data.csv') 10 | pd <- pdata.frame(dat, index = c("id", "year"), drop.index = TRUE) 11 | z1<-pgmm(n ~ lag(n, 1:2) + w + k |lag(n, 2:4) , data=pd, effect='individual', 12 | model="twosteps" ,transformation='ld' , robust=TRUE) 13 | summary(z1, robust=TRUE) 14 | 15 | 16 | } 17 | 18 | print(Sys.time()-start) 19 | summary(z1, robust=TRUE) 20 | 21 | for (i in 1:100) { 22 | dat =read.csv('data.csv') 23 | 24 | 25 | 26 | m1 <- pdynmc(dat = dat, varname.i = "id", varname.t = "year", 27 | use.mc.diff = TRUE, use.mc.lev = FALSE, use.mc.nonlin = FALSE, 28 | include.y = TRUE, varname.y = "n", lagTerms.y = 2,maxLags.y=4, 29 | varname.reg.ex=c("w", "k"), 30 | include.x=TRUE, lagTerms.reg.ex=c(0,0), 31 | w.mat = "iid.err", std.err = "corrected", estimation = "twostep", 32 | opt.meth = "none") 33 | summary(m1) 34 | } 35 | 36 | print(Sys.time()-start) 37 | # 38 | # 39 | # 40 | m2 <- pdynmc(dat = dat, varname.i = "id", varname.t = "year", 41 | use.mc.diff = TRUE, use.mc.lev = TRUE, use.mc.nonlin = FALSE, 42 | include.y = TRUE, varname.y = "n", lagTerms.y = 2,maxLags.y=4, 43 | fur.con = TRUE, fur.con.diff = TRUE, fur.con.lev = FALSE, 44 | varname.reg.fur = c("w", "k"), lagTerms.reg.fur = c(0,0), 45 | include.dum = FALSE, dum.diff = TRUE, dum.lev = FALSE, varname.dum = "year", 46 | w.mat = "iid.err", std.err = "corrected", estimation = "twostep", 47 | opt.meth = "none") 48 | summary(m2) 49 | mtest.fct(m2, order = 2) 50 | 51 | start=Sys.time() 52 | for (i in 1:100) { 53 | dat =read.csv('data.csv') 54 | p1 <-pvargmm( 55 | dependent_vars = c("n"), 56 | lags = 2, 57 | exog_vars = c("w","k"), 58 | #exog_vars = c("w","k"), 59 | transformation = "fd", 60 | data = dat, 61 | panel_identifier = c("id", "year"), 62 | steps = c("twostep"), 63 | system_instruments = TRUE, 64 | max_instr_dependent_vars = 4, 65 | max_instr_predet_vars = 3, 66 | min_instr_dependent_vars = 2, 67 | min_instr_predet_vars = 1, 68 | collapse = FALSE, 69 | progressbar=FALSE 70 | ) 71 | 72 | summary(p1) 73 | } 74 | print(Sys.time()-start) 75 | summary(p1) 76 | 77 | 78 | -------------------------------------------------------------------------------- /Benchmark/performance_comparison.md: -------------------------------------------------------------------------------- 1 | 2 | The objective of the package is similar to the following open-source packages:
3 | Package | Language | version 4 | --- | --- | --- 5 | plm | R | 2.6-1 6 | panelvar | R| 0.5.3 7 | pdynmc | R| 0.9.7 8 | 9 | To compare pydynpd with similar packages, we performed two performance tests. More specifically, in each test for each package we run 100 times to estimate the same model with the same data. For verification, the tests also include Stata package xtabond2 though Stata is a commercial software. Please note that the main reason we include xtabond2 is for regression result verification (\*). Directly comparing its speed with R or Python packages is a little unfair because the calculation part of xtabond2 was compiled while pydynpd and the three R packages are interpreted; xtabond2 should have a clear advantage on speed. 10 | 11 | *Note: xtabond2 is the most popular package on dynamic panel model as evidenced by the 9667 citations it has received according to google scholar as of April 3, 2022. It also won the annual Editor's prize of Stata journal in 2012. 12 | 13 | ## Test configuration 14 | ### Hardware 15 | Intel CPU 9700K (8 cores)
16 | Memory: 64GB
17 | 18 | ### Software 19 | Debian-based Linux (Deepin 20.05)
20 | R 4.1.3
21 | Python 3.10.3
22 |

To make our comparison fair, we manually compiled R, numpy, and scipy with Intel's Math Kernel Libarary (MKL), so that these R and Python packages do calculations using the same Linear Algebra library.

23 | 24 | Configuration of R: 25 | ``` 26 | > sessionInfo() 27 | R version 4.1.3 (2022-03-10) 28 | Platform: x86_64-pc-linux-gnu (64-bit) 29 | Running under: Deepin 20.5 30 | 31 | Matrix products: default 32 | BLAS/LAPACK: /opt/intel/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin/libmkl_gf_lp64.so 33 | ``` 34 | Configuration of Python Numpy: 35 | ``` 36 | numpy.show_config() 37 | blas_armpl_info: 38 | NOT AVAILABLE 39 | blas_mkl_info: 40 | libraries = ['mkl_rt', 'pthread', 'mkl_rt'] 41 | library_dirs = ['/opt/intel/compilers_and_libraries/linux/mkl/lib/intel64'] 42 | define_macros = [('SCIPY_MKL_H', None), ('HAVE_CBLAS', None)] 43 | include_dirs = ['/opt/intel/compilers_and_libraries/linux/mkl/include'] 44 | blas_opt_info: 45 | libraries = ['mkl_rt', 'pthread', 'mkl_rt'] 46 | library_dirs = ['/opt/intel/compilers_and_libraries/linux/mkl/lib/intel64'] 47 | define_macros = [('SCIPY_MKL_H', None), ('HAVE_CBLAS', None)] 48 | include_dirs = ['/opt/intel/compilers_and_libraries/linux/mkl/include'] 49 | ``` 50 | 51 |

The tests are based on the data set employed in Arellano and Bond (1991) and is avaialble in the /Benchmark/code subfolder.

52 | 53 |

In the tests, we considered the following model:

54 | 55 | ![Alt text](./images/benchmark_model_1_Eqn.svg)
56 | 57 |

We performed two tests on the model above. Test 1 is a difference GMM and test 2 a system GMM. Regression results of each test are stored in https://github.com/dazhwu/pydynpd/blob/main/Benchmark/test_1.md and https://github.com/dazhwu/pydynpd/blob/main/Benchmark/test_2.md respectively. Scripts can be found in the /Benchmark/code subfolder

58 | 59 | 60 | ## Test 1: Difference GMM 61 | The following four packages produced the same regression result: plm, panelvar, pydynpd, and xtabond2. The result from R package pdynmc is slightly different. The table below compaires their speeds. Column 2 shows their running time in seconds per 100 loops, and column 3 calculates their relative speed (i.e, relative to the fastest package in the test). Please note that as xtabond2 was developed and compiled using Mata language, there are two different modes available in Stata. In the default mode, storage is favored over speed. But users can manually switch to the speed mode. Therefore, we report two speeds for xtabond2. More specifically, xtabond2 (default) balances between speed and storage, while xtabond2 (speed) represents the fastest speed this package can achieve. 62 | 63 | 64 | | Package | Running Time | Relative to the fastest | 65 | | ------------------ | ------------ | ----------------------- | 66 | | xtabond2 | 4.19 | 1.00 | 67 | | pydynpd | 4.81 | 1.15 | 68 | | panelvar | 661.893 | 157.97 | 69 | | plm | 11.02 | 2.63 | 70 | | pdynmc | 167.4 | 39.95 | 71 | 72 | 73 | 74 | ![Alt text](./images/Test_1.svg) 75 | 76 | The chart above shows that our package (pydynpd) is not far behind of xtabond2 even though it is an interpreted package. 77 | 78 | 79 | ## Test 2: System GMM 80 | 81 | In the second test, pydynpd produced the same regression results as those by xtabond2 and panelvar. plm has different results because it doesn't include constant term. On the other hand, we made several attempts but could not let R package pdynmc work; it kept on reporting the same error message (i.e., "Matrices must have same number of rows in cbind2(x, .Call(dense_to_Csparse, y)"). 82 | 83 | | Package | Running Time | Relative to the fastest | 84 | | ------------------ | ------------ | ----------------------- | 85 | | xtabond2 | 5.893 | 1.00 | 86 | | pydynpd | 6.21 | 1.05 | 87 | | plm | 14.26 | 2.42 | 88 | | panelvar | 718.2 | 121.87 | 89 | | pdynmc | NA | NA | 90 | 91 | 92 | 93 | ![Alt text](./images/Test_2.svg) 94 | 95 | 96 | -------------------------------------------------------------------------------- /Benchmark/test_1.md: -------------------------------------------------------------------------------- 1 | # plm 2 | ``` 3 | library(plm) 4 | abdata=read.csv("data.csv") 5 | pd <- pdata.frame(abdata, index = c("id", "year"), drop.index = TRUE) 6 | z1<-pgmm(n ~ 1+ lag(n, 1:2) + w + k |lag(n, 2:4) + lag(w, 1:3), data=pd, effect='individual', 7 | model="twosteps" ,transformation='d',robust = TRUE 8 | summary(z1) 9 | 10 | ``` 11 | ``` 12 | Oneway (individual) effect Two-steps model Difference GMM 13 | 14 | Call: 15 | pgmm(formula = n ~ 1 + lag(n, 1:2) + w + k | lag(n, 2:4) + lag(w, 16 | 1:3), data = pd, effect = "individual", model = "twosteps", 17 | transformation = "d", robust = TRUE) 18 | 19 | Unbalanced Panel: n = 140, T = 7-9, N = 1031 20 | 21 | Number of Observations Used: 611 22 | Residuals: 23 | Min. 1st Qu. Median Mean 3rd Qu. Max. 24 | -0.63963 -0.03725 0.00000 -0.00309 0.04356 0.48571 25 | 26 | Coefficients: 27 | Estimate Std. Error z-value Pr(>|z|) 28 | lag(n, 1:2)1 0.170062 0.104665 1.6248 0.1042 29 | lag(n, 1:2)2 -0.011338 0.037720 -0.3006 0.7637 30 | w -0.951058 0.127730 -7.4459 9.632e-14 *** 31 | k 0.463722 0.071833 6.4556 1.078e-10 *** 32 | --- 33 | Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 34 | 35 | Sargan test: chisq(32) = 47.85967 (p-value = 0.035436) 36 | Autocorrelation test (1): normal = -1.187819 (p-value = 0.2349) 37 | Autocorrelation test (2): normal = -0.8112457 (p-value = 0.41722) 38 | Wald test for coefficients: chisq(4) = 402.4636 (p-value = < 2.22e-16) 39 | ``` 40 | 41 | # panelvar 42 | ``` 43 | library(panelvar) 44 | abdata=read.csv("data.csv") 45 | 46 | p1 <-pvargmm( 47 | dependent_vars = c("n"), 48 | lags = 2, 49 | predet_vars = c("w"), 50 | exog_vars=c("k"), 51 | transformation = "fd", 52 | data = abdata, 53 | panel_identifier = c("id", "year"), 54 | steps = c("twostep"), 55 | system_instruments = FALSE, 56 | max_instr_dependent_vars = 3, 57 | max_instr_predet_vars = 3, 58 | min_instr_dependent_vars = 1L, 59 | min_instr_predet_vars = 1L, 60 | collapse = FALSE, 61 | progressbar = FALSE 62 | ) 63 | summary(p1) 64 | 65 | ``` 66 | ``` 67 | --------------------------------------------------- 68 | Dynamic Panel VAR estimation, two-step GMM 69 | --------------------------------------------------- 70 | Transformation: First-differences 71 | Group variable: id 72 | Time variable: year 73 | Number of observations = 611 74 | Number of groups = 140 75 | Obs per group: min = 4 76 | avg = 4.364286 77 | max = 6 78 | Number of instruments = 36 79 | 80 | =================== 81 | n 82 | ------------------- 83 | lag1_n 0.1701 84 | (0.1047) 85 | lag2_n -0.0113 86 | (0.0377) 87 | w -0.9511 *** 88 | (0.1277) 89 | k 0.4637 *** 90 | (0.0718) 91 | =================== 92 | *** p < 0.001; ** p < 0.01; * p < 0.05 93 | 94 | --------------------------------------------------- 95 | Instruments for equation 96 | Standard 97 | FD.(k) 98 | GMM-type 99 | Dependent vars: L(1, 3) 100 | Predet vars: L(1, 3) 101 | Collapse = FALSE 102 | --------------------------------------------------- 103 | 104 | Hansen test of overid. restrictions: chi2(32) = 47.86 Prob > chi2 = 0.035 105 | (Robust, but weakened by many instruments.) 106 | ``` 107 | 108 | # pdynmc 109 | 110 | ``` 111 | library(pdynmc) 112 | abdata=read.csv("data.csv") 113 | mc_1 <- pdynmc(dat=abdata,varname.i = "id", varname.t = "year", 114 | use.mc.diff = TRUE, use.mc.lev = FALSE, use.mc.nonlin = FALSE, 115 | include.y = TRUE, varname.y = "n", lagTerms.y = 2, maxLags.y=4, 116 | inst.stata = TRUE, include.x = TRUE, 117 | varname.reg.pre = c("w"), lagTerms.reg.pre = c(0), maxLags.reg.pre = c(3), 118 | fur.con = TRUE, fur.con.diff = TRUE, fur.con.lev = FALSE, 119 | varname.reg.fur = c("k"),lagTerms.reg.fur = c(0), 120 | w.mat = "iid.err", std.err = "corrected", estimation = "twostep", 121 | opt.meth = "none") 122 | summary(mc_1) 123 | mtest.fct(mc_1, order = 2) 124 | ``` 125 | ``` 126 | Dynamic linear panel estimation (twostep) 127 | Estimation steps: 2 128 | 129 | Coefficients: 130 | Estimate Std.Err.rob z-value.rob Pr(>|z.rob|) 131 | L1.n 0.17078 0.10597 1.611 0.107 132 | L2.n -0.01186 0.03862 -0.307 0.759 133 | L0.w -0.96426 0.12689 -7.599 <2e-16 *** 134 | L0.k 0.46357 0.07237 6.406 <2e-16 *** 135 | --- 136 | Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 137 | 138 | 36 total instruments are employed to estimate 4 parameters 139 | 35 linear (DIF) 140 | 1 further controls (DIF) 141 | no time dummies 142 | 143 | J-Test (overid restrictions): 47.49 with 32 DF, pvalue: 0.0383 144 | F-Statistic (slope coeff): 408.98 with 4 DF, pvalue: <0.001 145 | F-Statistic (time dummies): no time dummies included in estimation 146 | 147 | Arellano and Bond (1991) serial correlation test of degree 2 148 | 149 | data: 2step GMM Estimation 150 | normal = -0.9218, p-value = 0.3566 151 | alternative hypothesis: serial correlation of order 2 in the error terms 152 | 153 | 154 | 155 | Arellano and Bond (1991) serial correlation test of degree 2 156 | 157 | data: 2step GMM Estimation 158 | normal = -0.9218, p-value = 0.3566 159 | alternative hypothesis: serial correlation of order 2 in the error terms 160 | 161 | ``` 162 | # pydynpd 163 | ``` 164 | import pandas as pd 165 | from pydynpd import regression 166 | df = pd.read_csv("data.csv") 167 | mydpd = regression.abond('n L(1:2).n w k | gmm(n, 2:4) gmm(w, 1:3) iv(k) | nolevel', df, ['id', 'year']) 168 | ``` 169 | ``` 170 | Dynamic panel-data estimation, two-step difference GMM 171 | Group variable: id Number of obs = 611 172 | Time variable: year Min obs per group: 5 173 | Number of instruments = 36 Max obs per group: 7 174 | Number of groups = 140 Avg obs per group: 5.36 175 | +------+------------+---------------------+------------+-----------+-----+ 176 | | n | coef. | Corrected Std. Err. | z | P>|z| | | 177 | +------+------------+---------------------+------------+-----------+-----+ 178 | | L1.n | 0.1700616 | 0.1046652 | 1.6248154 | 0.1042019 | | 179 | | L2.n | -0.0113381 | 0.0377205 | -0.3005824 | 0.7637329 | | 180 | | w | -0.9510582 | 0.1277298 | -7.4458585 | 0.0000000 | *** | 181 | | k | 0.4637223 | 0.0718328 | 6.4555747 | 0.0000000 | *** | 182 | +------+------------+---------------------+------------+-----------+-----+ 183 | Hansen test of overid. restrictions: chi(32) = 47.860 Prob > Chi2 = 0.035 184 | Arellano-Bond test for AR(1) in first differences: z = -1.19 Pr > z =0.235 185 | Arellano-Bond test for AR(2) in first differences: z = -0.81 Pr > z =0.417 186 | ``` 187 | 188 | 189 | command_str='y L1.y L1.x | gmm(y, 2:4) iv(L1.x)| timedumm ' 190 | mydpd = regression.abond(command_str, df, ['id', 'year']) 191 | 192 | # xtabond2 193 | 194 | ``` 195 | insheet using "data.csv" 196 | xtset(id year) 197 | xtabond2 n L(1/2).n w k , gmm(n, lag(2 4)) gmm(w, lag(1 3)) iv(k ) nolevel twostep robust 198 | 199 | ``` 200 | ``` 201 | 202 | Favoring speed over space. To switch, type or click on mata: mata set matafavor space, perm. 203 | Warning: Two-step estimated covariance matrix of moments is singular. 204 | Using a generalized inverse to calculate optimal weighting matrix for two-step estimation. 205 | Difference-in-Sargan/Hansen statistics may be negative. 206 | 207 | Dynamic panel-data estimation, two-step difference GMM 208 | ------------------------------------------------------------------------------ 209 | Group variable: id Number of obs = 611 210 | Time variable : year Number of groups = 140 211 | Number of instruments = 36 Obs per group: min = 4 212 | Wald chi2(0) = . avg = 4.36 213 | Prob > chi2 = . max = 6 214 | ------------------------------------------------------------------------------ 215 | | Corrected 216 | n | Coef. Std. Err. z P>|z| [95% Conf. Interval] 217 | -------------+---------------------------------------------------------------- 218 | n | 219 | L1. | .1700616 .1046652 1.62 0.104 -.0350784 .3752016 220 | L2. | -.0113381 .0377205 -0.30 0.764 -.0852688 .0625926 221 | | 222 | w | -.9510582 .1277298 -7.45 0.000 -1.201404 -.7007124 223 | k | .4637223 .0718328 6.46 0.000 .3229325 .6045121 224 | ------------------------------------------------------------------------------ 225 | Instruments for first differences equation 226 | Standard 227 | D.k 228 | GMM-type (missing=0, separate instruments for each period unless collapsed) 229 | L(1/3).w 230 | L(2/4).n 231 | ------------------------------------------------------------------------------ 232 | Arellano-Bond test for AR(1) in first differences: z = -1.19 Pr > z = 0.235 233 | Arellano-Bond test for AR(2) in first differences: z = -0.81 Pr > z = 0.417 234 | ------------------------------------------------------------------------------ 235 | Sargan test of overid. restrictions: chi2(32) = 91.61 Prob > chi2 = 0.000 236 | (Not robust, but not weakened by many instruments.) 237 | Hansen test of overid. restrictions: chi2(32) = 47.86 Prob > chi2 = 0.035 238 | (Robust, but weakened by many instruments.) 239 | 240 | Difference-in-Hansen tests of exogeneity of instrument subsets: 241 | gmm(n, lag(2 4)) 242 | Hansen test excluding group: chi2(15) = 23.75 Prob > chi2 = 0.069 243 | Difference (null H = exogenous): chi2(17) = 24.11 Prob > chi2 = 0.117 244 | gmm(w, lag(1 3)) 245 | Hansen test excluding group: chi2(14) = 17.25 Prob > chi2 = 0.243 246 | Difference (null H = exogenous): chi2(18) = 30.61 Prob > chi2 = 0.032 247 | iv(k) 248 | Hansen test excluding group: chi2(31) = 38.33 Prob > chi2 = 0.171 249 | Difference (null H = exogenous): chi2(1) = 9.53 Prob > chi2 = 0.002 250 | 251 | ``` 252 | -------------------------------------------------------------------------------- /Benchmark/test_2.md: -------------------------------------------------------------------------------- 1 | # plm 2 | ``` 3 | library(plm) 4 | abdata=read.csv("data.csv") 5 | pd <- pdata.frame(abdata, index = c("id", "year"), drop.index = TRUE) 6 | z1<-pgmm(n ~ 1+ lag(n, 1:2) + w + k |lag(n, 2:4) + lag(w, 1:3), data=pd, effect='individual', 7 | model="twosteps" ,transformation='ld', robust = TRUE) 8 | summary(z1) 9 | 10 | ``` 11 | 12 | ``` 13 | pgmm(formula = n ~ lag(n, 1:2) + w + k | lag(n, 2:4) + lag(w, 14 | 1:3), data = pd, effect = "individual", model = "twosteps", 15 | transformation = "ld", robust = TRUE) 16 | 17 | Unbalanced Panel: n = 140, T = 7-9, N = 1031 18 | 19 | Number of Observations Used: 1362 20 | Residuals: 21 | Min. 1st Qu. Median Mean 3rd Qu. Max. 22 | -1.716999 -0.039468 0.000000 0.001151 0.049452 1.057841 23 | 24 | Coefficients: 25 | Estimate Std. Error z-value Pr(>|z|) 26 | lag(n, 1:2)1 0.993296 0.146555 6.7776 1.222e-11 *** 27 | lag(n, 1:2)2 -0.164000 0.107125 -1.5309 0.125791 28 | w 0.059379 0.028402 2.0906 0.036560 * 29 | k 0.140340 0.050027 2.8053 0.005027 ** 30 | --- 31 | Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 32 | 33 | Sargan test: chisq(47) = 105.7369 (p-value = 2.0581e-06) 34 | Autocorrelation test (1): normal = -1.926883 (p-value = 0.053994) 35 | Autocorrelation test (2): normal = -0.1281159 (p-value = 0.89806) 36 | Wald test for coefficients: chisq(4) = 8031.159 (p-value = < 2.22e-16) 37 | 38 | 39 | ``` 40 | 41 | # panelvar 42 | ``` 43 | library(panelvar) 44 | abdata=read.csv("data.csv") 45 | 46 | p1 <-pvargmm( 47 | dependent_vars = c("n"), 48 | lags = 2, 49 | predet_vars = c("w"), 50 | exog_vars=c("k"), 51 | transformation = "fd", 52 | data = abdata, 53 | panel_identifier = c("id", "year"), 54 | steps = c("twostep"), 55 | system_instruments = TRUE, 56 | max_instr_dependent_vars = 3, 57 | max_instr_predet_vars = 3, 58 | min_instr_dependent_vars = 1L, 59 | min_instr_predet_vars = 1L, 60 | collapse = FALSE, 61 | progressbar = FALSE 62 | ) 63 | summary(p1) 64 | 65 | ``` 66 | 67 | ``` 68 | ------------------------------------------------- 69 | Dynamic Panel VAR estimation, two-step GMM 70 | --------------------------------------------------- 71 | Transformation: First-differences 72 | Group variable: id 73 | Time variable: year 74 | Number of observations = 611 75 | Number of groups = 140 76 | Obs per group: min = 4 77 | avg = 4.364286 78 | max = 6 79 | Number of instruments = 51 80 | 81 | =================== 82 | n 83 | ------------------- 84 | lag1_n 0.9454 *** 85 | (0.1430) 86 | lag2_n -0.0860 87 | (0.1082) 88 | w -0.4478 ** 89 | (0.1522) 90 | k 0.1236 * 91 | (0.0509) 92 | const 1.5631 ** 93 | (0.4993) 94 | =================== 95 | *** p < 0.001; ** p < 0.01; * p < 0.05 96 | 97 | --------------------------------------------------- 98 | Instruments for equation 99 | Standard 100 | FD.(k) 101 | GMM-type 102 | Dependent vars: L(2, 4) 103 | Predet vars: L(1, 3) 104 | Collapse = FALSE 105 | --------------------------------------------------- 106 | 107 | Hansen test of overid. restrictions: chi2(46) = 96.44 Prob > chi2 = 0 108 | (Robust, but weakened by many instruments.) 109 | 110 | 111 | ``` 112 | 113 | # pdynmc 114 | 115 | ``` 116 | library(pdynmc) 117 | abdata=read.csv("data.csv") 118 | mc_1 <- pdynmc(dat=abdata,varname.i = "id", varname.t = "year", 119 | use.mc.diff = TRUE, use.mc.lev = TRUE, use.mc.nonlin = FALSE, 120 | include.y = TRUE, varname.y = "n", lagTerms.y = 2, maxLags.y=4, 121 | inst.stata = TRUE, include.x = TRUE, 122 | varname.reg.pre = c("w"), lagTerms.reg.pre = c(0), maxLags.reg.pre = c(3), 123 | fur.con = TRUE, fur.con.diff = TRUE, fur.con.lev = TRUE, 124 | varname.reg.fur = c("k"),lagTerms.reg.fur = c(0), 125 | w.mat = "iid.err", std.err = "corrected", estimation = "twostep", 126 | opt.meth = "none") 127 | summary(mc_1) 128 | mtest.fct(mc_1, order = 2) 129 | ``` 130 | ``` 131 | Error in mapply(ti = ti.temp, t.end = tend.temp, lagTerms = lagTerms, : non-numeric argument to binary operator 132 | Traceback: 133 | 134 | 1. pdynmc(dat = abdata, varname.i = "id", varname.t = "year", use.mc.diff = TRUE, 135 | . use.mc.lev = TRUE, use.mc.nonlin = FALSE, include.y = TRUE, 136 | . varname.y = "n", lagTerms.y = 2, maxLags.y = 4, inst.stata = TRUE, 137 | . include.x = TRUE, varname.reg.pre = c("w"), lagTerms.reg.pre = c(0), 138 | . maxLags.reg.pre = c(3), fur.con = TRUE, fur.con.diff = TRUE, 139 | . fur.con.lev = TRUE, varname.reg.fur = c("k"), lagTerms.reg.fur = c(0), 140 | . w.mat = "iid.err", std.err = "corrected", estimation = "twostep", 141 | . opt.meth = "none") 142 | 2. lapply(X = i_cases, FUN = Z_i.fct, Time = Time, varname.i = varname.i, 143 | . use.mc.diff = use.mc.diff, use.mc.lev = use.mc.lev, use.mc.nonlin = use.mc.nonlin, 144 | . use.mc.nonlinAS = use.mc.nonlinAS, include.y = include.y, 145 | . varname.y = varname.y, inst.stata = inst.stata, include.dum = include.dum, 146 | . dum.diff = dum.diff, dum.lev = dum.lev, colnames.dum = colnames.dum, 147 | . fur.con = fur.con, fur.con.diff = fur.con.diff, fur.con.lev = fur.con.lev, 148 | . varname.reg.estParam.fur = varname.reg.estParam.fur, include.x = include.x, 149 | . end.reg = end.reg, varname.reg.end = varname.reg.end, pre.reg = pre.reg, 150 | . varname.reg.pre = varname.reg.pre, ex.reg = ex.reg, varname.reg.ex = varname.reg.ex, 151 | . maxLags.y = maxLags.y, lagTerms.y = lagTerms.y, max.lagTerms = max.lagTerms, 152 | . maxLags.reg.end = maxLags.reg.end, maxLags.reg.pre = maxLags.reg.pre, 153 | . maxLags.reg.ex = maxLags.reg.ex, inst.reg.ex.expand = inst.reg.ex.expand, 154 | . dat = dat, dat.na = dat.na) 155 | 3. FUN(X[[i]], ...) 156 | 4. do.call(what = "cbind", args = sapply(FUN = LEV.pre.fct, i = i, 157 | . varname.ex.pre.temp, T.mcLev = T.mcLev.temp, use.mc.diff = use.mc.diff, 158 | . inst.stata = inst.stata, Time = Time, varname.i = varname.i, 159 | . lagTerms = max.lagTerms, dat = dat, dat.na = dat.na)) 160 | 5. sapply(FUN = LEV.pre.fct, i = i, varname.ex.pre.temp, T.mcLev = T.mcLev.temp, 161 | . use.mc.diff = use.mc.diff, inst.stata = inst.stata, Time = Time, 162 | . varname.i = varname.i, lagTerms = max.lagTerms, dat = dat, 163 | . dat.na = dat.na) 164 | 6. lapply(X = X, FUN = FUN, ...) 165 | 7. FUN(X[[i]], ...) 166 | 8. Matrix::bdiag(do.call(what = diag, args = list(mapply(ti = ti.temp, 167 | . t.end = tend.temp, lagTerms = lagTerms, FUN = datLEV.pre.fct, 168 | . varname = varname, MoreArgs = list(i = i, use.mc.diff = use.mc.diff, 169 | . inst.stata = inst.stata, dat = dat, dat.na = dat.na, 170 | . varname.i = varname.i, Time = Time)) * as.vector(!is.na(diff(dat.na[dat.na[, 171 | . varname.i] == i, varname][(lagTerms - 1):Time])))))) 172 | 9. do.call(what = diag, args = list(mapply(ti = ti.temp, t.end = tend.temp, 173 | . lagTerms = lagTerms, FUN = datLEV.pre.fct, varname = varname, 174 | . MoreArgs = list(i = i, use.mc.diff = use.mc.diff, inst.stata = inst.stata, 175 | . dat = dat, dat.na = dat.na, varname.i = varname.i, Time = Time)) * 176 | . as.vector(!is.na(diff(dat.na[dat.na[, varname.i] == i, varname][(lagTerms - 177 | . 1):Time]))))) 178 | 179 | ``` 180 | # pydynpd 181 | ``` 182 | import pandas as pd 183 | from pydynpd import regression 184 | df=pd.read_csv("data.csv") 185 | 186 | mydpd = regression.abond('n L(1:2).n w k | gmm(n, 2:4) gmm(w, 1:3) iv(k)', df, ['id', 'year']) 187 | ``` 188 | ``` 189 | Dynamic panel-data estimation, two-step system GMM 190 | Group variable: id Number of obs = 751 191 | Time variable: year Min obs per group: 5 192 | Number of instruments = 51 Max obs per group: 7 193 | Number of groups = 140 Avg obs per group: 5.36 194 | +------+------------+---------------------+------------+-----------+-----+ 195 | | n | coef. | Corrected Std. Err. | z | P>|z| | | 196 | +------+------------+---------------------+------------+-----------+-----+ 197 | | L1.n | 0.9453810 | 0.1429764 | 6.6121470 | 0.0000000 | *** | 198 | | L2.n | -0.0860069 | 0.1082318 | -0.7946553 | 0.4268140 | | 199 | | w | -0.4477795 | 0.1521917 | -2.9422068 | 0.0032588 | ** | 200 | | k | 0.1235808 | 0.0508836 | 2.4286941 | 0.0151533 | * | 201 | | _con | 1.5630849 | 0.4993484 | 3.1302492 | 0.0017466 | ** | 202 | +------+------------+---------------------+------------+-----------+-----+ 203 | Hansen test of overid. restrictions: chi(46) = 96.442 Prob > Chi2 = 0.000 204 | Arellano-Bond test for AR(1) in first differences: z = -2.35 Pr > z =0.019 205 | Arellano-Bond test for AR(2) in first differences: z = -1.15 Pr > z =0.251 206 | ``` 207 | 208 | 209 | 210 | 211 | # xtabond2 212 | 213 | ``` 214 | mata: mata set matafavor speed, perm 215 | insheet using "data.csv" 216 | xtset(id year) 217 | xtabond2 n L(1/2).n w k , gmm(n, lag(2 4)) gmm(w, lag(1 3)) iv(k ) nolevel twostep robust 218 | 219 | ``` 220 | ``` 221 | 222 | Favoring speed over space. To switch, type or click on mata: mata set matafavor space, perm. 223 | Warning: Two-step estimated covariance matrix of moments is singular. 224 | Using a generalized inverse to calculate optimal weighting matrix for two-step estimation. 225 | Difference-in-Sargan/Hansen statistics may be negative. 226 | 227 | Dynamic panel-data estimation, two-step difference GMM 228 | ------------------------------------------------------------------------------ 229 | Group variable: id Number of obs = 611 230 | Time variable : year Number of groups = 140 231 | Number of instruments = 36 Obs per group: min = 4 232 | Wald chi2(0) = . avg = 4.36 233 | Prob > chi2 = . max = 6 234 | ------------------------------------------------------------------------------ 235 | | Corrected 236 | n | Coef. Std. Err. z P>|z| [95% Conf. Interval] 237 | -------------+---------------------------------------------------------------- 238 | n | 239 | L1. | .1700616 .1046652 1.62 0.104 -.0350784 .3752016 240 | L2. | -.0113381 .0377205 -0.30 0.764 -.0852688 .0625926 241 | | 242 | w | -.9510582 .1277298 -7.45 0.000 -1.201404 -.7007124 243 | k | .4637223 .0718328 6.46 0.000 .3229325 .6045121 244 | ------------------------------------------------------------------------------ 245 | Instruments for first differences equation 246 | Standard 247 | D.k 248 | GMM-type (missing=0, separate instruments for each period unless collapsed) 249 | L(1/3).w 250 | L(2/4).n 251 | ------------------------------------------------------------------------------ 252 | Arellano-Bond test for AR(1) in first differences: z = -1.19 Pr > z = 0.235 253 | Arellano-Bond test for AR(2) in first differences: z = -0.81 Pr > z = 0.417 254 | ------------------------------------------------------------------------------ 255 | Sargan test of overid. restrictions: chi2(32) = 91.61 Prob > chi2 = 0.000 256 | (Not robust, but not weakened by many instruments.) 257 | Hansen test of overid. restrictions: chi2(32) = 47.86 Prob > chi2 = 0.035 258 | (Robust, but weakened by many instruments.) 259 | 260 | Difference-in-Hansen tests of exogeneity of instrument subsets: 261 | gmm(n, lag(2 4)) 262 | Hansen test excluding group: chi2(15) = 23.75 Prob > chi2 = 0.069 263 | Difference (null H = exogenous): chi2(17) = 24.11 Prob > chi2 = 0.117 264 | gmm(w, lag(1 3)) 265 | Hansen test excluding group: chi2(14) = 17.25 Prob > chi2 = 0.243 266 | Difference (null H = exogenous): chi2(18) = 30.61 Prob > chi2 = 0.032 267 | iv(k) 268 | Hansen test excluding group: chi2(31) = 38.33 Prob > chi2 = 0.171 269 | Difference (null H = exogenous): chi2(1) = 9.53 Prob > chi2 = 0.002 270 | 271 | ``` 272 | 273 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Dazhong Wu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /Main.py: -------------------------------------------------------------------------------- 1 | 2 | import pandas as pd 3 | from pydynpd import regression 4 | 5 | import time 6 | 7 | 8 | df = pd.read_csv("test_data.csv") 9 | 10 | 11 | #command_str='y L(1:?).y L(1:?).x | gmm(y, 2:3) iv(L(1:1).x)| timedumm' 12 | #mydpd = regression.abond(command_str, df, ['id', 'year']) 13 | df = pd.read_csv("data.csv") 14 | #mydpd = regression.abond('n L(1:2).n w k | gmm(n, 2:4) gmm(w, 1:3) iv(k) |nolevel fod ', df, ['id', 'year']) 15 | command_str='n L(1:?).n w k | gmm(n, 2:3) pred(w k)| fod' 16 | mydpd = regression.abond(command_str, df, ['id', 'year']) 17 | 18 | for i in range(0, len(mydpd.models)): 19 | print("model", end=" ") 20 | print(i+1, end=": bic= ") 21 | print(mydpd.models[i].MMSC_LU["bic"], end = "; hqic=") 22 | print(mydpd.models[i].MMSC_LU["hqic"], end="; aic=") 23 | print(mydpd.models[i].MMSC_LU["aic"]) 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /Main2.py: -------------------------------------------------------------------------------- 1 | 2 | import pandas as pd 3 | from pydynpd import regression 4 | 5 | import time 6 | 7 | a=time.time() 8 | 9 | 10 | df = pd.read_csv("data.csv") 11 | for i in range(0,101): 12 | #mydpd = regression.abond('n L(1:2).n w k | gmm(n, 2:4) gmm(w, 1:3) iv(k)', df, ['id', 'year']) 13 | mydpd = regression.abond('n L(1:2).n w k | gmm(n, 2:.) pred(w k)', df, ['id', 'year']) 14 | 15 | print(time.time()-a) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pydynpd: A python package for dynamic panel model 2 | [![DOI](https://zenodo.org/badge/466146436.svg)](https://zenodo.org/badge/latestdoi/466146436) 3 | [![pypi package](https://img.shields.io/pypi/v/pydynpd?style=plastic)](https://pypi.org/project/pydynpd/) 4 | 5 | pydynpd is the first python package to implement Difference and System GMM [1][2][3] to estimate dynamic panel data models. 6 | 7 | Below is a typical dynamic panel data model: 8 | 9 | ![y_{it}=\sum^p_{l=1} \alpha_l y_{i,t-l}+\beta x_{i,t}+\gamma s_{i,t}+u_i+\epsilon_{it}](https://latex.codecogs.com/svg.image?y_{it}=\sum^p_{l=1}&space;\alpha_l&space;y_{i,t-l}+\beta&space;x_{i,t}+\gamma&space;s_{i,t}+u_i+\epsilon_{it}) 10 | 11 | In the equation above, x is a predetermined variable that is potentially correlated with past errors, s is a strictly exogenous variable, and u is fixed effect. 12 | 13 | This software has been published in Journal of Open Source Software: 14 | ``` 15 | Wu et al., (2023). pydynpd: A Python package for dynamic panel model. 16 | Journal of Open Source Software, 8(83), 4416, https://doi.org/10.21105/joss.04416 17 | ``` 18 | ## Features supported: 19 | * Differene and System GMM 20 | * One-step, two-step, and iterative estimates 21 | * First-difference and forward orthogonal deviation transformations 22 | * Robust standard errors. For two-step GMM, the calculation suggested by Windmeijer (2005) is used. 23 | * Hansen over-identification test 24 | * Arellano-Bond test for autocorrelation 25 | * Time dummies 26 | * Collapse GMM instruments to limit instrument proliferation 27 | * Search for models based on users' request, rather than just run the model specified by users as other packages do 28 | 29 | 30 | ## Installation: 31 | ``` 32 | pip install pydynpd 33 | ``` 34 | This package requires: numpy, scipy, pandas, and PrettyTable 35 | 36 | ## Usage: 37 | ``` 38 | import pandas as pd 39 | from pydynpd import regression 40 | 41 | df = pd.read_csv("data.csv") 42 | command_str='n L(1:2).n w k | gmm(n, 2:4) gmm(w, 1:3) iv(k) | timedumm nolevel' 43 | mydpd = regression.abond(command_str, df, ['id', 'year']) 44 | ``` 45 | result: 46 | ``` 47 | Dynamic panel-data estimation, two-step difference GMM 48 | Group variable: id Number of obs = 611 49 | Time variable: year Number of groups = 140 50 | Number of instruments = 42 51 | +-----------+------------+---------------------+------------+-----------+ 52 | | n | coef. | Corrected Std. Err. | z | P>|z| | 53 | +-----------+------------+---------------------+------------+-----------+ 54 | | L1.n | 0.2710675 | 0.1382542 | 1.9606462 | 0.0499203 | 55 | | L2.n | -0.0233928 | 0.0419665 | -0.5574151 | 0.5772439 | 56 | | w | -0.5668527 | 0.2092231 | -2.7093219 | 0.0067421 | 57 | | k | 0.3613939 | 0.0662624 | 5.4539824 | 0.0000000 | 58 | | year_1979 | 0.0011898 | 0.0092322 | 0.1288765 | 0.8974554 | 59 | | year_1980 | -0.0316432 | 0.0116155 | -2.7242254 | 0.0064453 | 60 | | year_1981 | -0.0900163 | 0.0206593 | -4.3571693 | 0.0000132 | 61 | | year_1982 | -0.0996210 | 0.0296036 | -3.3651654 | 0.0007650 | 62 | | year_1983 | -0.0693308 | 0.0404276 | -1.7149347 | 0.0863572 | 63 | | year_1984 | -0.0614505 | 0.0475525 | -1.2922666 | 0.1962648 | 64 | +-----------+------------+---------------------+------------+-----------+ 65 | Hansen test of overid. restrictions: chi(32) = 32.666 Prob > Chi2 = 0.434 66 | Arellano-Bond test for AR(1) in first differences: z = -1.29 Pr > z =0.198 67 | Arellano-Bond test for AR(2) in first differences: z = -0.31 Pr > z =0.760 68 | ``` 69 | ## Tutorial 70 | A detailed tutorial is given in the following two documents:
71 | [inputs of the abond command](https://github.com/dazhwu/pydynpd/blob/main/vignettes/Tutorial.ipynb).
72 | [outputs of the abond command](https://github.com/dazhwu/pydynpd/blob/main/vignettes/API.md). 73 | 74 | ## Similar packages 75 | The objective of the package is similar to the following open-source packages:
76 | Package | Language | version 77 | --- | --- | --- 78 | plm | R | 2.6-1 79 | panelvar | R| 0.5.3 80 | pdynmc | R| 0.9.7 81 | 82 | To compare pydynpd with similar packages, we performed performance tests. More specifically, in each test for each package we run 100 times to estimate the same model with the same data. For verification, the tests also include Stata package xtabond2 though Stata is a commercial software. We use xtabond2 for regression result verification because it is the most popular package in estimating dynamic panel models. Figure below is from one of the tests. Note that directly comparing xtabond2's speed with R or Python packages is a little unfair because the calculation part of xtabond2 was compiled while pydynpd and the three R packages are interpreted; xtabond2 should have a clear advantage on speed. 83 | 84 | ![Alt text](https://raw.githubusercontent.com/dazhwu/pydynpd/main/Benchmark/images/Test_1.svg) 85 | 86 | Though developed in pure python, pydynpd is not far behind of xtabond2. Moreover, it is significanly faster than the three R packages which are interpreted scripts just like pydynpd. 87 | 88 | A detailed description of the tests can be found [here](https://github.com/dazhwu/pydynpd/blob/main/Benchmark/performance_comparison.md) 89 | 90 | ## FAQs 91 | ### How to extract coefficients from regression? 92 | For example, if you run: 93 | ``` 94 | df = pd.read_csv("data.csv") 95 | mydpd = regression.abond('n L(1:2).n w k | gmm(n, 2:4) gmm(w, 1:3) iv(k) ', df, ['id', 'year']) 96 | ``` 97 | 98 | The output regression table will be 99 | ``` 100 | +------+------------+---------------------+------------+-----------+-----+ 101 | | n | coef. | Corrected Std. Err. | z | P>|z| | | 102 | +------+------------+---------------------+------------+-----------+-----+ 103 | | L1.n | 0.9453810 | 0.1429764 | 6.6121470 | 0.0000000 | *** | 104 | | L2.n | -0.0860069 | 0.1082318 | -0.7946553 | 0.4268140 | | 105 | | w | -0.4477795 | 0.1521917 | -2.9422068 | 0.0032588 | ** | 106 | | k | 0.1235808 | 0.0508836 | 2.4286941 | 0.0151533 | * | 107 | | _con | 1.5630849 | 0.4993484 | 3.1302492 | 0.0017466 | ** | 108 | +------+------------+---------------------+------------+-----------+-----+ 109 | ``` 110 | If you want to programably extract a value, for example, the first z value (6.6121470) then you can add the following: 111 | ``` 112 | >>>mydpd.models[0].regression_table.iloc[0]['z_value'] 113 | 6.6121469997085915 114 | ``` 115 | Basically, the object mydpd returned above contains models because pydynpd allows us to run and compare multiple models at the same time. By default, it only contains one model which is models[0]. A model has a regression table which is a pandas dataframe: 116 | ``` 117 | >>>mydpd.models[0].regression_table 118 | 119 | variable coefficient std_err z_value p_value sig 120 | 0 L1.n 0.945381 0.142976 6.612147 3.787856e-11 *** 121 | 1 L2.n -0.086007 0.108232 -0.794655 4.268140e-01 122 | 2 w -0.447780 0.152192 -2.942207 3.258822e-03 ** 123 | 3 k 0.123581 0.050884 2.428694 1.515331e-02 * 124 | 4 _con 1.563085 0.499348 3.130249 1.746581e-03 ** 125 | 126 | ``` 127 | So you can extract any value from this dataframe. 128 | 129 | ### How to use pydynpd with R? 130 | First, you need to install Python on your computer; then install pydynpd. 131 | ``` 132 | pip install pydynpd 133 | ``` 134 | Second, in R environment install package reticulate: 135 | ``` 136 | install.packages("reticulate") 137 | ``` 138 | Third, you configure Rstudio so that it can communicate with Python installed in step 1. You can find instruction at 139 | https://www.rstudio.com/blog/rstudio-v1-4-preview-python-support/ 140 | 141 | Finally, you can use the following template to call pydynpd from R. For comparision, the corresponding Python code is also incuded. 142 | 143 | 144 | 145 | 154 | 155 | 156 | 163 | 164 |
R
146 | library(reticulate) 
147 | dynpd <- import("pydynpd.regression", convert = TRUE)
148 | fd <- import("pandas", convert=TRUE)
149 | df <- fd$read_csv("data.csv")
150 | 
151 | result <- dynpd$abond('n L(1:2).n w k | gmm(n, 2:4) gmm(w, 1:3) iv(k)', df, c('id', 'year'))
152 | 
153 |
Python
157 | import pandas as pd
158 | from  pydynpd import regression
159 | df = pd.read_csv("data.csv")
160 | mydpd = regression.abond('n L(1:2).n w k | gmm(n, 2:4) gmm(w, 1:3) iv(k)', df, ['id', 'year'])
161 | 
162 |
165 | 166 | Code above generates the following result: 167 | ``` 168 | Dynamic panel-data estimation, two-step system GMM 169 | Group variable: id Number of obs = 611 170 | Time variable: year Min obs per group: 4 171 | Number of instruments = 51 Max obs per group: 6 172 | Number of groups = 140 Avg obs per group: 4.36 173 | +------+------------+---------------------+------------+-----------+-----+ 174 | | n | coef. | Corrected Std. Err. | z | P>|z| | | 175 | +------+------------+---------------------+------------+-----------+-----+ 176 | | L1.n | 0.9453810 | 0.1429764 | 6.6121470 | 0.0000000 | *** | 177 | | L2.n | -0.0860069 | 0.1082318 | -0.7946553 | 0.4268140 | | 178 | | w | -0.4477795 | 0.1521917 | -2.9422068 | 0.0032588 | ** | 179 | | k | 0.1235808 | 0.0508836 | 2.4286941 | 0.0151533 | * | 180 | | _con | 1.5630849 | 0.4993484 | 3.1302492 | 0.0017466 | ** | 181 | +------+------------+---------------------+------------+-----------+-----+ 182 | Hansen test of overid. restrictions: chi(46) = 96.442 Prob > Chi2 = 0.000 183 | Arellano-Bond test for AR(1) in first differences: z = -2.35 Pr > z =0.019 184 | Arellano-Bond test for AR(2) in first differences: z = -1.15 Pr > z =0.251 185 | ``` 186 | As you can see, you don't need to change the command string in R. The only parameter you have to change is the identifiers; ['id', 'year'] in Python is changed to c('id', 'year') in R. Also, from R you can access the properties of the result above the same way you work on Python. For example, after running code above if you run the following R script: 187 | ``` 188 | reg_table=result$models[[1]]$regression_table 189 | print(reg_table) 190 | ``` 191 | The output is: 192 | ``` 193 | variable coefficient std_err z_value p_value sig 194 | 1 L1.n 0.94538100 0.14297640 6.6121470 3.787856e-11 *** 195 | 2 L2.n -0.08600694 0.10823176 -0.7946553 4.268140e-01 196 | 3 w -0.44777955 0.15219173 -2.9422068 3.258822e-03 ** 197 | 4 k 0.12358078 0.05088363 2.4286941 1.515331e-02 * 198 | 5 _con 1.56308487 0.49934839 3.1302492 1.746581e-03 ** 199 | ``` 200 | In the example above, reg_table is an R data frame. 201 | 202 | ## Contributing 203 | There are several ways to contribute to pydynpd: 204 | 205 | Submit issue/bug reports [here](https://github.com/dazhwu/pydynpd/issues/), or try to fix the problem yourself and then submit a [pull request](https://github.com/dazhwu/pydynpd/pulls). 206 | 207 | Browse the source code and see if anything looks out of place - let us know! 208 | 209 | ## References 210 | [1] 211 | Arellano, M., & Bond, S. (1991). Some tests of specification for panel data: Monte Carlo evidence and an application to employment equations. The review of economic studies, 58(2), 277-297. 212 | 213 | [2] 214 | Arellano, M., & Bover, O. (1995). Another look at the instrumental variable estimation of error-components models. Journal of econometrics, 68(1), 29-51. 215 | 216 | [3] 217 | Blundell, R., & Bond, S. (1998). Initial conditions and moment restrictions in dynamic panel data models. Journal of econometrics, 87(1), 115-143. 218 | 219 | [4] 220 | Roodman, D. (2009). How to do xtabond2: An introduction to difference and system GMM in Stata. The stata journal, 9(1), 86-136. 221 | 222 | [5] 223 | Windmeijer, F. (2005). A finite sample correction for the variance of linear efficient two-step GMM estimators. Journal of econometrics, 126(1), 25-51. 224 | -------------------------------------------------------------------------------- /build_upload.sh: -------------------------------------------------------------------------------- 1 | rm -rf dist/* 2 | python3 -m build 3 | python3 -m twine upload --repository pypi dist/*.whl 4 | -------------------------------------------------------------------------------- /contributing.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | There are several ways to contribute to pydynpd! 4 | - Submit issue/bug reports [here](https://github.com/wouterboomsma/eigency/issues), 5 | or try to fix the problem yourself and then [submit a pull request](https://github.com/wouterboomsma/eigency/pulls). 6 | - Request features or ask questions [here](https://github.com/wouterboomsma/eigency/issues). 7 | - Browse [the source code](https://github.com/wouterboomsma/eigency) and see if anything looks out of place - let us know! 8 | 9 | -------------------------------------------------------------------------------- /output.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/output.html -------------------------------------------------------------------------------- /pydynpd.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 2.1 2 | Name: pydynpd 3 | Version: 0.2.0 4 | Summary: A package to estimate dynamic panel data model using difference GMM and system GMM. 5 | Home-page: https://github.com/dazhwu/pydynpd 6 | Author: Dazhong Wu 7 | Author-email: wudz800@gmail.com 8 | License: MIT 9 | Classifier: License :: OSI Approved :: MIT License 10 | Classifier: Programming Language :: Python 11 | Classifier: Programming Language :: Python :: 3 12 | Classifier: Programming Language :: Python :: 3.6 13 | Classifier: Programming Language :: Python :: Implementation :: CPython 14 | Classifier: Programming Language :: Python :: Implementation :: PyPy 15 | Requires-Python: >=3.6.0 16 | Description-Content-Type: text/markdown 17 | License-File: LICENSE 18 | 19 | 20 | # pydynpd: Dynamic panel estimation for Difference and System GMM (generalized method-of-moments) 21 | [![DOI](https://zenodo.org/badge/466146436.svg)](https://zenodo.org/badge/latestdoi/466146436) 22 | [![pypi package](https://img.shields.io/pypi/v/pydynpd?style=plastic)](https://pypi.org/project/pydynpd/) 23 | 24 | pydynpd is the first python package to implement Difference and System GMM [1][2][3] to estimate dynamic panel data models. 25 | 26 | Below is a typical dynamic panel data model: 27 | 28 | ![y_{it}=\sum^p_{l=1} \alpha_l y_{i,t-l}+\beta x_{i,t}+\gamma s_{i,t}+u_i+\epsilon_{it}](https://latex.codecogs.com/svg.image?y_{it}=\sum^p_{l=1}&space;\alpha_l&space;y_{i,t-l}+\beta&space;x_{i,t}+\gamma&space;s_{i,t}+u_i+\epsilon_{it}) 29 | 30 | In the equation above, x is a predetermined variable that is potentially correlated with past errors, s is a strictly exogenous variable, and u is fixed effect. 31 | 32 | ## Features supported: 33 | * Differene and System GMM 34 | * One-step, two-step, and iterative estimates 35 | * First-difference and forward orthogonal deviation transformations 36 | * Robust standard errors. For two-step GMM, the calculation suggested by Windmeijer (2005) is used. 37 | * Hansen over-identification test 38 | * Arellano-Bond test for autocorrelation 39 | * Time dummies 40 | * Collapse GMM instruments to limit instrument proliferation 41 | * Search for models based on users' request, rather than just run the model specified by users as other packages do 42 | 43 | 44 | ## Installation: 45 | ``` 46 | pip install pydynpd 47 | ``` 48 | This package requires: numpy, scipy, pandas, and PrettyTable 49 | 50 | ## Usage: 51 | ``` 52 | import pandas as pd 53 | from pydynpd import regression 54 | 55 | df = pd.read_csv("data.csv") 56 | command_str='n L(1:2).n w k | gmm(n, 2:4) gmm(w, 1:3) iv(k) | timedumm nolevel' 57 | mydpd = regression.abond(command_str, df, ['id', 'year']) 58 | ``` 59 | result: 60 | ``` 61 | Dynamic panel-data estimation, two-step difference GMM 62 | Group variable: id Number of obs = 611 63 | Time variable: year Number of groups = 140 64 | Number of instruments = 42 65 | +-----------+------------+---------------------+------------+-----------+ 66 | | n | coef. | Corrected Std. Err. | z | P>|z| | 67 | +-----------+------------+---------------------+------------+-----------+ 68 | | L1.n | 0.2710675 | 0.1382542 | 1.9606462 | 0.0499203 | 69 | | L2.n | -0.0233928 | 0.0419665 | -0.5574151 | 0.5772439 | 70 | | w | -0.5668527 | 0.2092231 | -2.7093219 | 0.0067421 | 71 | | k | 0.3613939 | 0.0662624 | 5.4539824 | 0.0000000 | 72 | | year_1979 | 0.0011898 | 0.0092322 | 0.1288765 | 0.8974554 | 73 | | year_1980 | -0.0316432 | 0.0116155 | -2.7242254 | 0.0064453 | 74 | | year_1981 | -0.0900163 | 0.0206593 | -4.3571693 | 0.0000132 | 75 | | year_1982 | -0.0996210 | 0.0296036 | -3.3651654 | 0.0007650 | 76 | | year_1983 | -0.0693308 | 0.0404276 | -1.7149347 | 0.0863572 | 77 | | year_1984 | -0.0614505 | 0.0475525 | -1.2922666 | 0.1962648 | 78 | +-----------+------------+---------------------+------------+-----------+ 79 | Hansen test of overid. restrictions: chi(32) = 32.666 Prob > Chi2 = 0.434 80 | Arellano-Bond test for AR(1) in first differences: z = -1.29 Pr > z =0.198 81 | Arellano-Bond test for AR(2) in first differences: z = -0.31 Pr > z =0.760 82 | ``` 83 | ## Tutorial 84 | A detailed tutorial is [here](https://github.com/dazhwu/pydynpd/blob/main/vignettes/Tutorial.ipynb). 85 | 86 | ## Similar packages 87 | The objective of the package is similar to the following open-source packages:
88 | Package | Language | version 89 | --- | --- | --- 90 | plm | R | 2.6-1 91 | panelvar | R| 0.5.3 92 | pdynmc | R| 0.9.7 93 | 94 | To compare pydynpd with similar packages, we performed performance tests. More specifically, in each test for each package we run 100 times to estimate the same model with the same data. For verification, the tests also include Stata package xtabond2 though Stata is a commercial software. We use xtabond2 for regression result verification because it is the most popular package in estimating dynamic panel models. Figure below is from one of the tests. Note that directly comparing xtabond2's speed with R or Python packages is a little unfair because the calculation part of xtabond2 was compiled while pydynpd and the three R packages are interpreted; xtabond2 should have a clear advantage on speed. 95 | 96 | ![Alt text](https://raw.githubusercontent.com/dazhwu/pydynpd/main/Benchmark/images/Test_1.svg) 97 | 98 | Though developed in pure python, pydynpd is not far behind of xtabond2. Moreover, it is significanly faster than the three R packages which are interpreted scripts just like pydynpd. 99 | 100 | A detailed description of the tests can be found [here](https://github.com/dazhwu/pydynpd/blob/main/Benchmark/performance_comparison.md) 101 | 102 | ## FAQs 103 | 1. How to extract coefficients from regression? 104 | For example, if you run: 105 | ``` 106 | df = pd.read_csv("data.csv") 107 | mydpd = regression.abond('n L(1:2).n w k | gmm(n, 2:4) gmm(w, 1:3) iv(k) ', df, ['id', 'year']) 108 | ``` 109 | 110 | The output regression table will be 111 | ``` 112 | +------+------------+---------------------+------------+-----------+-----+ 113 | | n | coef. | Corrected Std. Err. | z | P>|z| | | 114 | +------+------------+---------------------+------------+-----------+-----+ 115 | | L1.n | 0.9453810 | 0.1429764 | 6.6121470 | 0.0000000 | *** | 116 | | L2.n | -0.0860069 | 0.1082318 | -0.7946553 | 0.4268140 | | 117 | | w | -0.4477795 | 0.1521917 | -2.9422068 | 0.0032588 | ** | 118 | | k | 0.1235808 | 0.0508836 | 2.4286941 | 0.0151533 | * | 119 | | _con | 1.5630849 | 0.4993484 | 3.1302492 | 0.0017466 | ** | 120 | +------+------------+---------------------+------------+-----------+-----+ 121 | ``` 122 | If you want to programably extract a value, for example, the first z value (6.6121470) then you can add the following: 123 | ``` 124 | >>>mydpd.models[0].regression_table.iloc[0]['z_value'] 125 | 6.6121469997085915 126 | ``` 127 | Basically, the object mydpd returned above contains models because pydynpd allows us to run and compare multiple models at the same time. By default, it only contains one model which is models[0]. A model has a regression table which is a pandas dataframe: 128 | ``` 129 | >>>mydpd.models[0].regression_table 130 | 131 | variable coefficient std_err z_value p_value sig 132 | 0 L1.n 0.945381 0.142976 6.612147 3.787856e-11 *** 133 | 1 L2.n -0.086007 0.108232 -0.794655 4.268140e-01 134 | 2 w -0.447780 0.152192 -2.942207 3.258822e-03 ** 135 | 3 k 0.123581 0.050884 2.428694 1.515331e-02 * 136 | 4 _con 1.563085 0.499348 3.130249 1.746581e-03 ** 137 | 138 | ``` 139 | So you can extract any value from this dataframe. 140 | 141 | 142 | ## Contributing 143 | There are several ways to contribute to pydynpd: 144 | 145 | Submit issue/bug reports [here](https://github.com/dazhwu/pydynpd/issues/), or try to fix the problem yourself and then submit a [pull request](https://github.com/dazhwu/pydynpd/pulls). 146 | 147 | Browse the source code and see if anything looks out of place - let us know! 148 | 149 | ## References 150 | [1] 151 | Arellano, M., & Bond, S. (1991). Some tests of specification for panel data: Monte Carlo evidence and an application to employment equations. The review of economic studies, 58(2), 277-297. 152 | 153 | [2] 154 | Arellano, M., & Bover, O. (1995). Another look at the instrumental variable estimation of error-components models. Journal of econometrics, 68(1), 29-51. 155 | 156 | [3] 157 | Blundell, R., & Bond, S. (1998). Initial conditions and moment restrictions in dynamic panel data models. Journal of econometrics, 87(1), 115-143. 158 | 159 | [4] 160 | Roodman, D. (2009). How to do xtabond2: An introduction to difference and system GMM in Stata. The stata journal, 9(1), 86-136. 161 | 162 | [5] 163 | Windmeijer, F. (2005). A finite sample correction for the variance of linear efficient two-step GMM estimators. Journal of econometrics, 126(1), 25-51. 164 | -------------------------------------------------------------------------------- /pydynpd.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | LICENSE 2 | README.md 3 | pyproject.toml 4 | setup.py 5 | pydynpd/__init__.py 6 | pydynpd/__version__.py 7 | pydynpd/command.py 8 | pydynpd/common_functions.py 9 | pydynpd/dynamic_panel_model.py 10 | pydynpd/info.py 11 | pydynpd/instruments.py 12 | pydynpd/model_organizer.py 13 | pydynpd/model_summary.py 14 | pydynpd/panel_data.py 15 | pydynpd/regression.py 16 | pydynpd/specification_tests.py 17 | pydynpd/variable.py 18 | pydynpd.egg-info/PKG-INFO 19 | pydynpd.egg-info/SOURCES.txt 20 | pydynpd.egg-info/dependency_links.txt 21 | pydynpd.egg-info/requires.txt 22 | pydynpd.egg-info/top_level.txt -------------------------------------------------------------------------------- /pydynpd.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /pydynpd.egg-info/requires.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | prettytable 4 | pandas 5 | -------------------------------------------------------------------------------- /pydynpd.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | pydynpd 2 | -------------------------------------------------------------------------------- /pydynpd/.idea/.name: -------------------------------------------------------------------------------- 1 | dynamic_panel_model.py -------------------------------------------------------------------------------- /pydynpd/.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 12 | -------------------------------------------------------------------------------- /pydynpd/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /pydynpd/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /pydynpd/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /pydynpd/.idea/pydynpd.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /pydynpd/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /pydynpd/.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 14 | 15 | 17 | 18 | 20 | 21 | 22 | 23 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 1647611579337 37 | 43 | 44 | 45 | 46 | 48 | -------------------------------------------------------------------------------- /pydynpd/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/pydynpd/__init__.py -------------------------------------------------------------------------------- /pydynpd/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/pydynpd/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /pydynpd/__pycache__/command.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/pydynpd/__pycache__/command.cpython-39.pyc -------------------------------------------------------------------------------- /pydynpd/__pycache__/common_functions.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/pydynpd/__pycache__/common_functions.cpython-39.pyc -------------------------------------------------------------------------------- /pydynpd/__pycache__/info.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/pydynpd/__pycache__/info.cpython-39.pyc -------------------------------------------------------------------------------- /pydynpd/__pycache__/panel_data.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/pydynpd/__pycache__/panel_data.cpython-39.pyc -------------------------------------------------------------------------------- /pydynpd/__pycache__/regression.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/pydynpd/__pycache__/regression.cpython-39.pyc -------------------------------------------------------------------------------- /pydynpd/__pycache__/specification_tests.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/pydynpd/__pycache__/specification_tests.cpython-39.pyc -------------------------------------------------------------------------------- /pydynpd/__pycache__/variable.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/pydynpd/__pycache__/variable.cpython-39.pyc -------------------------------------------------------------------------------- /pydynpd/__version__.py: -------------------------------------------------------------------------------- 1 | # 8b d8 Yb dP 88""Yb db dP""b8 88 dP db dP""b8 888888 2 | # 88b d88 YbdP 88__dP dPYb dP `" 88odP dPYb dP `" 88__ 3 | # 88YbdP88 8P 88""" dP__Yb Yb 88"Yb dP__Yb Yb "88 88"" 4 | # 88 YY 88 dP 88 dP""""Yb YboodP 88 Yb dP""""Yb YboodP 888888 5 | 6 | VERSION = (5, 2, 0) 7 | 8 | __version__ = '.'.join(map(str, VERSION)) 9 | -------------------------------------------------------------------------------- /pydynpd/command.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sys 3 | from sys import exit 4 | 5 | from pydynpd.info import options_info 6 | from pydynpd.variable import gmm_var, regular_variable 7 | 8 | 9 | class temp_list: 10 | def __init__(self, cols): 11 | self.names = [] 12 | self.lags = [] 13 | self.cols = cols 14 | self.adjustable_min_lags = [] # True False 15 | self.adjustable_max_lags = [] # True False 16 | 17 | def insert(self, name, lags, min_adj_lag=False, max_adj_lag=False): 18 | if name not in self.cols: 19 | return -1 20 | 21 | if name not in self.names: 22 | self.names.append(name) 23 | self.lags.append(lags) 24 | self.adjustable_max_lags.append(max_adj_lag) 25 | self.adjustable_min_lags.append(min_adj_lag) 26 | else: 27 | the_index = self.names.index(name) 28 | self.lags[the_index] += lags 29 | if min_adj_lag == True: 30 | self.adjustable_min_lags[the_index] = True 31 | 32 | if max_adj_lag == True: 33 | self.adjustable_max_lags[the_index] = True 34 | 35 | return 0 36 | 37 | def purge(self): 38 | self.lags = [sorted(list(set(the_list))) for the_list in self.lags] 39 | 40 | def check_contiguous(self): 41 | for i in range(len(self.names)): 42 | the_list = self.lags[i] 43 | if the_list != list(range(min(the_list), max(the_list) + 1)): 44 | print('variable ' + self.name[i] + ' has gaps') 45 | exit() 46 | 47 | 48 | class command(object): 49 | 50 | def __init__(self, command_str, df_col_names): 51 | self.command_str = command_str 52 | self.cols = df_col_names 53 | self._temp_part1_list = temp_list(df_col_names) 54 | self._temp_iv_list = temp_list(df_col_names) 55 | self.variables = None 56 | self.options = options_info() 57 | self.dep_GMM = None 58 | 59 | self.list_Dgmm = [] 60 | self.list_Lgmm = [] 61 | # self.adjustable={} 62 | # self.adjustable['indep']=[] 63 | 64 | self.parse_command() 65 | 66 | def parse_command(self): 67 | command_str = self.command_str 68 | parts = command_str.split('|') 69 | if len(parts) <= 1: 70 | print('There should be at least two parts in command string') 71 | exit() 72 | 73 | if len(parts) > 3: 74 | print('too many parts') 75 | exit() 76 | 77 | if len(parts) == 3: 78 | self.part_3 = parts[2] 79 | self.options = self.parse_options(self.part_3) 80 | else: 81 | self.part_3 = '' 82 | self.options = options_info() 83 | 84 | self.part_1 = parts[0] 85 | self.parse_dep_indep(self.part_1) 86 | 87 | self.part_2 = parts[1] 88 | self.parse_gmm_iv(self.part_2) 89 | 90 | self.check_dep_indep() 91 | self.check_GMM() 92 | self.check_iv() 93 | self.check_three_lists() 94 | # self.check_adjustable() 95 | 96 | self.variables = {} 97 | self.variables['dep_indep'] = self.tbr_list(self._temp_part1_list) 98 | self.variables['Dgmm'] = self.list_Dgmm 99 | self.variables['Lgmm'] = self.list_Lgmm 100 | self.variables['iv'] = self.tbr_list(self._temp_iv_list) 101 | 102 | def parse_spaced_vars(self, list_vars, dest_list): 103 | 104 | prog_1 = re.compile('^L\(([0-9]{1,})[:]([0-9]{1,})\)[.]([a-zA-Z_]{1,}[a-zA-Z_0-9]{0,})$') 105 | prog_2 = re.compile('^L([0-9]{1,})[.]([a-zA-Z_]{1,}[a-zA-Z_0-9]{0,})$') 106 | prog_3 = re.compile('^L\(([0-9]{1,})[:]([?])\)[.]([a-zA-Z_]{1,}[a-zA-Z_0-9]{0,})$') 107 | 108 | for var in list_vars: 109 | match_groups_multiple = prog_1.match(var) 110 | 111 | if match_groups_multiple: 112 | LB = int(match_groups_multiple.group(1)) 113 | UB = int(match_groups_multiple.group(2)) 114 | name = match_groups_multiple.group(3) 115 | ret = dest_list.insert(name, list(range(min(LB, UB), max(LB, UB) + 1))) 116 | else: 117 | match_groups_single = prog_2.match(var) 118 | if match_groups_single: 119 | lag = int(match_groups_single.group(1)) 120 | name = match_groups_single.group(2) 121 | ret = dest_list.insert(name, [lag]) 122 | else: 123 | match_groups_auto = prog_3.match(var) 124 | if match_groups_auto: 125 | LB = int(match_groups_auto.group(1)) 126 | name = match_groups_auto.group(3) 127 | self.options.beginner = True 128 | ret = dest_list.insert(name, [LB], min_adj_lag=False, max_adj_lag=True) 129 | # new_var=adjustable_lag_indep(name, LB, None) 130 | # self.adjustable['indep'].append(new_var) 131 | else: 132 | name = var 133 | ret = dest_list.insert(name, [0]) 134 | 135 | if ret == -1: 136 | return name 137 | 138 | return '' 139 | 140 | def parse_dep_indep(self, part_1): 141 | 142 | list_vars = part_1.split() 143 | 144 | ret = self.parse_spaced_vars(list_vars, self._temp_part1_list) 145 | 146 | if ret != '': 147 | print(part_1 + ': variable ' + ret + ' does not exist') 148 | exit() 149 | 150 | def parse_gmm_iv(self, part_2): 151 | 152 | matching_parts = [] 153 | 154 | self.parse_gmmStyle(matching_parts, part_2) 155 | self.parse_endo_pred(matching_parts, part_2) 156 | self.parse_IV(matching_parts, part_2) 157 | 158 | part2_cpy = part_2 159 | for part in matching_parts: 160 | part2_cpy = part2_cpy.replace(part, '') 161 | 162 | if len(part2_cpy.strip()) > 0: 163 | print(part2_cpy.strip() + ': invalid GMM or IV statement') 164 | exit() 165 | 166 | def parse_gmmStyle(self, matching_parts, part_2): 167 | 168 | gmm_search_parts = re.findall( 169 | 'gmm[(][a-zA-Z_0-9 ]{1,}[,][ ]{0,}[0-9]{1,}[ ]{0,}[:][ ]{0,}(?:(?:[.])|(?:[0-9]{1,}))[ ]{0,}[)]', part_2) 170 | prog_1 = re.compile( 171 | '^gmm[(]([a-zA-Z_0-9 ]{1,})[,][ ]{0,}([0-9]{1,})[ ]{0,}[:][ ]{0,}((?:[.])|(?:[0-9]{1,}))[ ]{0,}[)]$') 172 | 173 | for part in gmm_search_parts: 174 | 175 | matching_parts.append(part) 176 | match_groups_multiple = prog_1.match(part) 177 | vars = match_groups_multiple.group(1).split() 178 | 179 | min_lag = int(match_groups_multiple.group(2)) 180 | if match_groups_multiple.group(3) == '.': 181 | max_lag = sys.maxsize 182 | else: 183 | max_lag = int(match_groups_multiple.group(3)) 184 | 185 | self.process_GMM(vars, min_lag, max_lag, part) 186 | 187 | def parse_endo_pred(self, matching_parts, part_2): 188 | 189 | gmm_search_parts = re.findall('endo[(][a-zA-Z_0-9 ]{1,}[)]', part_2) 190 | prog_1 = re.compile('^endo[(]([a-zA-Z_0-9 ]{1,})[)]$') 191 | 192 | for part in gmm_search_parts: 193 | # prog_2 = re.compile('^L([0-9]{1,})[.]([a-zA-Z_]{1,}[a-zA-Z_0-9]{0,})$') 194 | matching_parts.append(part) 195 | match_groups_multiple = prog_1.match(part) 196 | 197 | vars = match_groups_multiple.group(1).split() 198 | min_lag = 2 199 | max_lag = sys.maxsize 200 | 201 | self.process_GMM(vars, min_lag, max_lag, part) 202 | 203 | gmm_search_parts = re.findall('pred[(][a-zA-Z_0-9 ]{1,}[)]', part_2) 204 | prog_1 = re.compile('^pred[(]([a-zA-Z_0-9 ]{1,})[)]$') 205 | 206 | for part in gmm_search_parts: 207 | matching_parts.append(part) 208 | match_groups_multiple = prog_1.match(part) 209 | 210 | vars = match_groups_multiple.group(1).split() 211 | min_lag = 1 212 | max_lag = sys.maxsize 213 | 214 | self.process_GMM(vars, min_lag, max_lag, part) 215 | 216 | def parse_IV(self, matching_parts, part_2): 217 | 218 | iv_search_parts = re.findall('iv[(].{1,}[)]', part_2) 219 | prog_2 = re.compile('^iv[(](.{1,})[)]$') 220 | for part in iv_search_parts: 221 | matching_parts.append(part) 222 | match_groups_multiple = prog_2.match(part) 223 | vars = match_groups_multiple.group(1).split() 224 | invalid_name = self.parse_spaced_vars(vars, self._temp_iv_list) 225 | if invalid_name != '': 226 | print(part + ': ' + invalid_name + ' does not exist') 227 | exit() 228 | 229 | def parse_options(self, part_3): 230 | list_options = [s.lower() for s in part_3.split()] 231 | 232 | options = self.options 233 | 234 | # possible_options=[{'onestep', 'iterated'},'nolevel', 'timedumm', 'collapse'] 235 | 236 | if "onestep" in list_options and "iterated" in list_options: 237 | print("One-step and iterative estimations are mutually exclusive") 238 | exit() 239 | 240 | for option in list_options: 241 | if option == 'onestep': 242 | options.steps = 1 243 | elif option == 'iterated': 244 | options.steps = 1000 245 | elif option == 'nolevel': 246 | options.level = False 247 | elif option == 'hqic': 248 | options.mmsc = 'hqic' 249 | elif option == 'fod': 250 | options.transformation = 'fod' 251 | elif option == 'timedumm': 252 | options.timedumm = True 253 | elif option == 'collapse': 254 | options.collapse = True 255 | else: 256 | print(option + ' is not an option allowed') 257 | exit() 258 | 259 | return (options) 260 | 261 | def process_GMM(self, vars, min_lag, max_lag, part): 262 | if min_lag > max_lag: 263 | print(part + ': minimum lag cannot be greater than maximum lag') 264 | exit() 265 | if min_lag < 0: 266 | print(part + ': lags must be non-negative') 267 | exit() 268 | if len(vars) == 0: 269 | print(part + ': no variable is included') 270 | exit() 271 | 272 | for var in vars: 273 | if (var not in self.cols): 274 | print(part + ': ' + var + ' does not exist') 275 | exit() 276 | existing_names = [v.name for v in self.list_Dgmm] 277 | if var in existing_names: 278 | print(part + ': ' + var + ' cannot be declared in part 2 for twice or more') 279 | exit() 280 | temp_var = gmm_var(var, min_lag, max_lag, 0) 281 | self.list_Dgmm.append(temp_var) 282 | Lmin_lag = max(min_lag - 1, 0) 283 | temp_var = gmm_var(var, Lmin_lag, min_lag, 0) 284 | self.list_Lgmm.append(temp_var) 285 | 286 | def tbr_list(self, temp_list): 287 | tbr = [] 288 | for i in range(len(temp_list.names)): 289 | var_name = temp_list.names[i] 290 | lags = temp_list.lags[i] 291 | num_lags = len(lags) 292 | for j in range(lags[0], lags[0] + num_lags): 293 | new_var = regular_variable(var_name, j) 294 | tbr.append(new_var) 295 | 296 | return (tbr) 297 | 298 | def check_dep_indep(self): 299 | 300 | self._temp_part1_list.purge() 301 | 302 | dep = self._temp_part1_list.names[0] 303 | dep_lags = self._temp_part1_list.lags[0] 304 | 305 | if len(dep_lags) > 0 and dep_lags[0] != 0: 306 | print('dependent variable should not be lagged on the left hand side of the model') 307 | exit() 308 | 309 | if len(dep_lags) == 1: 310 | print('lagged dependent variable should be included') 311 | exit() 312 | 313 | if dep_lags[1] != 1: 314 | print('lag 1 of the dependent variable is not included') 315 | exit() 316 | 317 | self._temp_part1_list.check_contiguous() 318 | 319 | def check_GMM(self): 320 | dep_name = self._temp_part1_list.names[0] 321 | 322 | for i in range(len(self.list_Dgmm)): 323 | var = self.list_Dgmm[i] 324 | if var.name == dep_name: 325 | self.dep_GMM = [i] 326 | if var.min_lag < 2: 327 | print('must use lag 2 or earlier of the dependent variable as instruments') 328 | exit() 329 | 330 | def check_iv(self): 331 | self._temp_iv_list.purge() 332 | self._temp_iv_list.check_contiguous() 333 | 334 | def check_three_lists(self): 335 | gmm_names = [var.name for var in self.list_Dgmm] 336 | iv_names = self._temp_iv_list.names 337 | 338 | for iv_name in iv_names: 339 | if iv_name in gmm_names: 340 | print('variable ' + iv_name + ': a variable can be either in GMM style or in IV style, but not both') 341 | exit() 342 | 343 | for i in range(len(self._temp_part1_list.lags)): 344 | var_name = self._temp_part1_list.names[i] 345 | var_lags = self._temp_part1_list.lags[i] 346 | bool_GMM = var_name in gmm_names 347 | bool_IV = var_name in iv_names 348 | 349 | if not (bool_GMM or bool_IV): 350 | self._temp_iv_list.insert(var_name, var_lags) 351 | 352 | # def check_adjustable(self): 353 | # 354 | # if len(self.adjustable['indep'])>0: 355 | # dep = self._temp_part1_list.names[0] 356 | # self._temp_part1_list.lags[0]=[1] 357 | # 358 | # for var in self.adjustable['indep']: 359 | # if var.name != dep: 360 | # print('in the current version, only the lags of the lagged dependent variable can be adjusted') 361 | # exit() 362 | # else: 363 | -------------------------------------------------------------------------------- /pydynpd/common_functions.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import numpy as np 4 | from scipy.sparse import csc_matrix 5 | 6 | 7 | def lag(mat, lagged, N, lag_number, fill=np.nan): 8 | height = int(mat.shape[0] / N) 9 | for i in range(N): 10 | start_row = i * height 11 | end_row = start_row + height 12 | mat_i = mat[start_row:end_row, :] 13 | lagged_i = lagged[start_row:end_row, :] 14 | 15 | lagged_i[0:lag_number, :] = fill 16 | lagged_i[lag_number:height, :] = mat_i[0:(height - lag_number), :] 17 | 18 | 19 | def get_first_diff_table(ori_arr: np.ndarray, N: int): 20 | num_cols = ori_arr.shape[1] 21 | num_rows = ori_arr.shape[0] 22 | height = int(num_rows / N) 23 | 24 | lag_arr = np.zeros((num_rows, num_cols), dtype='float64') 25 | tbr_arr = np.zeros((num_rows, num_cols), dtype='float64') 26 | 27 | lag(ori_arr, lag_arr, N, 1) 28 | 29 | tbr_arr = ori_arr - lag_arr 30 | return tbr_arr 31 | 32 | 33 | def get_fod_table(ori_arr: np.ndarray, N: int): 34 | num_rows = ori_arr.shape[0] 35 | height = int(num_rows / N) 36 | 37 | num_cols = ori_arr.shape[1] 38 | 39 | tbr = np.empty((num_rows, num_cols), dtype='float64') 40 | next_sum = np.empty((1, num_cols), dtype='float64') 41 | this_sum = np.empty((1, num_cols), dtype='float64') 42 | this_avg = np.empty((1, num_cols), dtype='float64') 43 | temp = np.empty((height, num_cols), dtype='float64') 44 | 45 | tbr[:] = np.nan 46 | 47 | this_sum[:] = np.nan 48 | 49 | for i in range(N): 50 | ori_i = ori_arr[i * height:(i * height + height), :] 51 | tbr_i = tbr[i * height:(i * height + height), :] 52 | temp.fill(np.nan) 53 | next_sum.fill(np.nan) 54 | next_count = 0 55 | for j in range(height - 2, -1, -1): 56 | 57 | if np.isnan(ori_i[range(j + 1, j + 2), :]).any(axis=1): 58 | this_count = next_count 59 | this_sum = next_sum 60 | temp[j, :] = temp[j + 1, :] 61 | else: 62 | this_count = next_count + 1 63 | 64 | this_sum = np.nansum(np.vstack([next_sum, ori_i[j + 1, :]]), axis=0) 65 | this_avg = this_sum * (1.0 / this_count) 66 | temp[j, :] = (ori_i[j, :] - this_avg) * math.sqrt(this_count / (this_count + 1)) 67 | 68 | next_sum = this_sum 69 | next_count = this_count 70 | 71 | tbr_i[0, :] = np.nan 72 | tbr_i[range(1, height), :] = temp[range(0, height - 1), :] 73 | 74 | return tbr 75 | 76 | 77 | def sum_product(listOflist, n_rows): 78 | num_elements = len(listOflist) 79 | 80 | for i in range(n_rows): 81 | list_temp = [] 82 | for j in range(num_elements): 83 | if type(listOflist[j]) == list: 84 | var_list = listOflist[j] 85 | list_temp.append(var_list[i]) 86 | elif type(listOflist[j]) == np.ndarray: 87 | var_mat = listOflist[j] 88 | list_temp.append(var_mat) 89 | else: 90 | pass # throw error 91 | temp = np.linalg.multi_dot(list_temp) 92 | if i == 0: 93 | tbr = temp 94 | else: 95 | tbr += temp 96 | 97 | return (tbr) 98 | 99 | 100 | def Windmeijer(M2, _M2_XZ_W2, W2_inv, zs2, vcov_step1, Cx_list, z_list, residual1, N): 101 | D = np.empty((M2.shape[0], M2.shape[1]), dtype='float64') 102 | 103 | x_height = int(Cx_list.shape[0] / N) 104 | z_height = int(z_list.shape[0] / N) 105 | for j in range(0, Cx_list.shape[1]): 106 | 107 | for i in range(0, N): 108 | x = Cx_list[(i * x_height):(i * x_height + x_height), :] 109 | 110 | u = residual1[(i * x_height):(i * x_height + x_height), 0:1] 111 | z = z_list[(i * z_height):(i * z_height + z_height), :] 112 | 113 | xu = np.matmul(x[:, j:(j + 1)], u.transpose()) 114 | temp = z @ (xu + xu.transpose()) @ z.transpose() 115 | # temp_zxuzt=z@ xu @ z.transpose() 116 | # temp=temp_zxuzt + temp_zxuzt.transpose() 117 | 118 | if i == 0: 119 | zxz = temp 120 | else: 121 | zxz += temp 122 | 123 | partial_dir = (-1.0 / N) * zxz 124 | 125 | Dj = np.linalg.multi_dot([_M2_XZ_W2, partial_dir, W2_inv, zs2]) 126 | Dj = (-1) * Dj 127 | 128 | D[:, j:(j + 1)] = Dj 129 | 130 | # temp = np.multiply(N, M2) + np.multiply(N, np.matmul(D, M2)) + np.multiply(N, np.matmul(M2, D.transpose())) 131 | temp_D_M2 = D @ M2 132 | temp = np.multiply(N, M2) + np.multiply(N, temp_D_M2) + np.multiply(N, temp_D_M2.transpose()) 133 | temp = temp + np.matmul(np.matmul(D, vcov_step1), D.transpose()) 134 | # 135 | return (temp) 136 | 137 | 138 | def make_sparse_list(arr_list): 139 | nrow = len(arr_list) 140 | new_list = [] 141 | for i in range(nrow): 142 | arr = arr_list[i] 143 | new_arr = csc_matrix(arr) 144 | new_list.append(new_arr) 145 | 146 | return (new_list) 147 | -------------------------------------------------------------------------------- /pydynpd/info.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | # from pandas import DataFrame 4 | import numpy as np 5 | 6 | 7 | @dataclass 8 | class df_info: 9 | N: int 10 | T: int 11 | ids: list 12 | first_diff_index: int 13 | last_diff_index: int 14 | first_level_index: int 15 | last_level_index: int 16 | max_lag: int 17 | # last_fod_index: int 18 | # first_fod_index: int 19 | 20 | 21 | @dataclass 22 | class z_info: 23 | diff_width: int 24 | diff_height: int 25 | level_width: int 26 | level_height: int 27 | width: int 28 | height: int 29 | num_Dgmm_instr: int 30 | num_Lgmm_instr: int 31 | num_instr: int 32 | # int num_vars 33 | # int num_gmm_instr 34 | 35 | 36 | @dataclass 37 | class hansen_test_info: 38 | test_value: float 39 | df: int 40 | p_value: float 41 | critical_value: float 42 | 43 | 44 | @dataclass 45 | class AR_test_info: 46 | lag: int 47 | AR: float 48 | P_value: float 49 | 50 | 51 | @dataclass 52 | class options_info: 53 | steps: int = 2 54 | level: bool = True 55 | beginner: bool = False 56 | timedumm: bool = False 57 | collapse: bool = False 58 | mmsc: str = 'bic' 59 | transformation: str = 'fd' 60 | 61 | 62 | @dataclass 63 | class sumproduct_task: 64 | array_list: list 65 | division_list: list 66 | 67 | 68 | @dataclass 69 | class beginner_models: 70 | model: str 71 | 72 | 73 | @dataclass 74 | class step_result: 75 | M: np.ndarray 76 | SS: np.ndarray 77 | W: np.ndarray 78 | W_inv: np.ndarray 79 | W_next: np.ndarray 80 | ZuuZ: np.ndarray 81 | beta: np.ndarray 82 | residual: np.ndarray 83 | _residual_t: np.ndarray 84 | 85 | vcov: np.ndarray 86 | zs: np.ndarray 87 | std_err: np.ndarray 88 | _M_XZ_W: np.ndarray 89 | _XZ_W: np.ndarray 90 | def __init__(self, W): 91 | self.W = W 92 | self.W_inv = np.linalg.pinv(W) 93 | -------------------------------------------------------------------------------- /pydynpd/instruments.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pydynpd.info import df_info, z_info, options_info 4 | 5 | 6 | class instruments(object): 7 | 8 | def __init__(self, variables: dict, gmm_tables: dict, df_information: df_info, options: options_info): 9 | level = options.level 10 | collapse = options.collapse 11 | transformation = options.transformation 12 | self.z_information, self.gmm_diff_info, self.iv_diff_info, self.gmm_level_info = self.calculate_z_dimension( 13 | variables, df_information, level, transformation, collapse) 14 | 15 | if level: 16 | self.z_table = self.build_z_level(variables, gmm_tables, df_information, transformation, collapse) 17 | else: 18 | self.z_table = self.build_z_diff(variables, gmm_tables, df_information, False, transformation, collapse) 19 | 20 | def build_z_level(self, variables: dict, gmm_tables: dict, info: df_info, transformation, collapse=False): 21 | last_level_index = info.last_level_index 22 | first_level_index = info.first_level_index 23 | 24 | z_table = self.build_z_diff( 25 | variables, gmm_tables, info, True, transformation, collapse) 26 | 27 | level_width = self.z_information.level_width 28 | level_height = self.z_information.level_height 29 | diff_width = self.z_information.diff_width 30 | diff_height = self.z_information.diff_height 31 | width = self.z_information.width 32 | height = self.z_information.height 33 | 34 | Lgmm_vars = variables['Lgmm'] 35 | iv_vars = variables['iv'] 36 | Lgmm_dat = gmm_tables['Lgmm'] 37 | iv_dat = gmm_tables['iv'] 38 | 39 | Lgmm_iv_height = int(Lgmm_dat.shape[0] / info.N) 40 | 41 | start_row = diff_height # z_table[0].shape[0]-height 42 | start_col = diff_width # z_table[0].shape[1]-width 43 | 44 | for i in range(info.N): 45 | z = z_table[(i * height):(i * height + height), :] 46 | # z[start_row-1,start_col:(start_col+self.level_width)]=1 47 | z[height - 1, start_col:width] = 1 48 | array_Lgmm = Lgmm_dat[(i * Lgmm_iv_height):(i * Lgmm_iv_height + Lgmm_iv_height), :] 49 | array_iv = iv_dat[(i * Lgmm_iv_height):(i * Lgmm_iv_height + Lgmm_iv_height), :] 50 | 51 | for var_id in range(len(Lgmm_vars)): 52 | lag = Lgmm_vars[var_id].min_lag 53 | t_info = self.gmm_level_info[3 * var_id: (3 * var_id + 3), :] 54 | for j in range(level_width): 55 | the_index = t_info[0, j] 56 | if the_index >= 0: 57 | the_row = start_row + t_info[2, j] 58 | 59 | the_index = t_info[0, j] 60 | 61 | z[the_row, start_col + j] = array_Lgmm[the_index, var_id] 62 | 63 | start_pos = self.z_information.num_Dgmm_instr 64 | for var_id in range(len(iv_vars)): 65 | var = iv_vars[var_id] 66 | z[start_pos + var_id, 67 | start_col:width] = array_iv[first_level_index:(last_level_index + 1), var_id] 68 | 69 | z[np.isnan(z)] = 0 70 | 71 | return z_table 72 | 73 | def prepare_z_gmm_level(self, variables: dict, level_width, info: df_info, collapse=False): 74 | 75 | gmm_vars = variables['Lgmm'] 76 | num_gmm = len(gmm_vars) 77 | t_info = np.empty((num_gmm * 3, level_width), 78 | dtype='int32') # row 0: gmm_index, 2: which row, 1: empty right now 79 | 80 | start_row = 0 81 | var_id = 0 82 | for var_id in range(num_gmm): 83 | var = gmm_vars[var_id] 84 | for i in range(level_width): 85 | the_index = info.first_level_index + i 86 | gmm_index = the_index - var.min_lag 87 | 88 | t_info[var_id * 3 + 2, i] = start_row 89 | 90 | if gmm_index >= 1: 91 | t_info[var_id * 3 + 0, i] = gmm_index 92 | if collapse: 93 | if i == level_width - 1: 94 | start_row += 1 95 | else: 96 | start_row += 1 97 | 98 | else: 99 | t_info[var_id * 3 + 0, i] = -9999 100 | 101 | num_Lgmm_instr = start_row # number of gmm instruments in diff eq 102 | 103 | return (num_Lgmm_instr, t_info) 104 | 105 | def calculate_z_dimension(self, variables: dict, info: df_info, level, transformation, collapse=False): 106 | Lgmm_vars = variables['Lgmm'] 107 | 108 | diff_width = info.last_diff_index - info.first_diff_index + 1 109 | 110 | level_width = info.last_level_index - info.first_level_index + 1 111 | 112 | level_height = 0 113 | num_Lgmm_instr = 0 114 | gmm_level_info = None 115 | if level: 116 | num_Lgmm_instr, gmm_level_info = self.prepare_z_gmm_level(variables, level_width, info, collapse) 117 | level_height = num_Lgmm_instr + 1 118 | 119 | num_Dgmm_instr, gmm_diff_info = self.prepare_Z_gmm_diff( 120 | variables, info, level, transformation, collapse) 121 | iv_diff_info = self.prepare_Z_iv_diff(variables, diff_width, info) 122 | 123 | diff_height = (num_Dgmm_instr + iv_diff_info.shape[0]) 124 | 125 | if level: 126 | height = diff_height + level_height 127 | width = diff_width + level_width 128 | else: 129 | height = diff_height 130 | width = diff_width 131 | 132 | z_information = z_info(diff_height=diff_height, diff_width=diff_width, level_width=level_width, 133 | level_height=level_height, height=height, width=width, 134 | num_Dgmm_instr=num_Dgmm_instr, num_Lgmm_instr=num_Lgmm_instr, num_instr=height) 135 | 136 | return (z_information, gmm_diff_info, iv_diff_info, gmm_level_info) 137 | 138 | def build_z_diff(self, variables: dict, gmm_tables: dict, info: df_info, level, transformation, collapse=False): 139 | 140 | gmm_vars = variables['Dgmm'] 141 | iv_vars = variables['iv'] 142 | Dgmm_dat = gmm_tables['Dgmm'] 143 | Div_dat = gmm_tables['Div'] 144 | 145 | gmm_Div_height = int(Dgmm_dat.shape[0] / info.N) 146 | 147 | diff_width = self.z_information.diff_width 148 | height = self.z_information.height 149 | width = self.z_information.width 150 | num_Dgmm_instr = self.z_information.num_Dgmm_instr 151 | 152 | z_table = np.zeros((height * info.N, width), dtype=np.float64) 153 | 154 | for i in range(info.N): 155 | if level and transformation == 'fod': 156 | z = z_table[i * height:(i + 1) * height, 1:diff_width] 157 | 158 | else: 159 | z = z_table[i * height:(i + 1) * height, 0:diff_width] 160 | 161 | z_width = z.shape[1] 162 | array_gmm = Dgmm_dat[i * gmm_Div_height:(i + 1) * gmm_Div_height, :] 163 | array_fd_iv = Div_dat[i * gmm_Div_height:(i + 1) * gmm_Div_height, :] 164 | 165 | var_id = 0 166 | for var in gmm_vars: 167 | for j in range(z_width): 168 | row_pos = self.gmm_diff_info[var_id * 3 + 2, j] 169 | 170 | start = self.gmm_diff_info[var_id * 3 + 0, j] 171 | end = self.gmm_diff_info[var_id * 3 + 1, j] 172 | 173 | for k in range(end - start + 1): 174 | z[row_pos + k, j] = array_gmm[end - k, var_id] 175 | var_id += 1 176 | 177 | row_pos = num_Dgmm_instr 178 | 179 | var_id = 0 180 | for var_id in range(len(iv_vars)): 181 | var = iv_vars[var_id] 182 | for j in range(z_width): 183 | index_to_take = self.iv_diff_info[var_id, j] 184 | 185 | z[row_pos, j] = array_fd_iv[index_to_take, var_id] 186 | 187 | row_pos += 1 188 | 189 | z[np.isnan(z)] = 0 190 | 191 | return z_table 192 | 193 | def prepare_Z_iv_diff(self, variables: dict, width, info: df_info): 194 | iv_vars = variables['iv'] # need to be placed at the beginning 195 | num_iv = len(iv_vars) 196 | 197 | t_info = np.empty((num_iv, width), dtype='int32') 198 | # num_iv_instr = 0 199 | var_id = 0 200 | for var_id in range(num_iv): 201 | var = iv_vars[var_id] 202 | t_info[var_id,] = range(info.first_diff_index, info.last_diff_index + 1) 203 | # num_iv_instr += width 204 | 205 | return (t_info) 206 | 207 | def prepare_Z_gmm_diff(self, variables: dict, info: df_info, level, transformation, collapse=False): 208 | start_row = 0 209 | 210 | gmm_vars = variables['Dgmm'] 211 | num_gmm = len(gmm_vars) 212 | 213 | var_id = 0 214 | if level and transformation == 'fod': 215 | first_index = info.first_diff_index + 1 216 | else: 217 | first_index = info.first_diff_index 218 | last_index = info.last_diff_index 219 | width = last_index - first_index + 1 220 | 221 | t_info = np.empty((num_gmm * 3, width), dtype='int32') 222 | 223 | for var_id in range(num_gmm): 224 | var = gmm_vars[var_id] 225 | 226 | first_tend = first_index - var.min_lag 227 | last_tend = last_index - var.min_lag 228 | 229 | tend = np.arange(first_tend, last_tend + 1, dtype='int32') 230 | t_info[var_id * 3 + 1,] = tend 231 | 232 | for i in range(width): 233 | tstart = max(0, tend[i] + var.min_lag - var.max_lag) 234 | t_info[var_id * 3 + 0, i] = tstart 235 | # t_info[var_id*3+1, i]=tend[i] 236 | # physical position of the row 237 | t_info[var_id * 3 + 2, i] = start_row 238 | 239 | num = tend[i] - tstart + 1 240 | # num_instru += num 241 | if collapse: 242 | if i == (width - 1): 243 | start_row += num 244 | else: 245 | start_row += num 246 | 247 | num_gmm_instr = start_row # number of gmm instruments in diff eq 248 | 249 | return ((num_gmm_instr, t_info)) 250 | -------------------------------------------------------------------------------- /pydynpd/model_organizer.py: -------------------------------------------------------------------------------- 1 | from pydynpd.command import command 2 | from pydynpd.panel_data import panel_data 3 | from pydynpd.variable import regular_variable 4 | 5 | 6 | class list_models: 7 | def __init__(self): 8 | self.list_variables = [] 9 | self.list_command_str = [] 10 | 11 | 12 | class model_oranizer(object): 13 | def __init__(self, user_command: command, pdata: panel_data): 14 | 15 | self._temp_list_indep = [] 16 | self._temp_list_command = [] 17 | 18 | self.ending_time = pdata.T 19 | first_list = [] 20 | self._temp_list_indep.append(first_list) 21 | self._temp_list_command.append('') 22 | 23 | the_list = user_command._temp_part1_list 24 | for i in range(len(the_list.names)): 25 | var_name = the_list.names[i] 26 | lags = the_list.lags[i] 27 | num_lags = len(lags) 28 | last_lag = lags[0] + num_lags - 1 29 | for j in range(lags[0], last_lag + 1): 30 | new_var = regular_variable(var_name, j) 31 | self._add_item(new_var) 32 | 33 | if the_list.adjustable_max_lags[i] == True: 34 | self._explode_model(var_name, last_lag) 35 | 36 | self.models = list_models() 37 | for i in range(len(self._temp_list_indep)): 38 | new_variables = {} 39 | new_variables['dep_indep'] = self._temp_list_indep[i] 40 | new_variables['Dgmm'] = user_command.variables['Dgmm'] 41 | new_variables['Lgmm'] = user_command.variables['Lgmm'] 42 | new_variables['iv'] = user_command.variables['iv'] 43 | self.models.list_variables.append(new_variables) 44 | self.models.list_command_str = self._temp_list_command 45 | 46 | def _add_item(self, new_var): 47 | for i in range(len(self._temp_list_indep)): 48 | 49 | self._temp_list_indep[i].append(new_var) 50 | if new_var.lag == 0: 51 | new_str = ' ' + new_var.name + ' ' 52 | else: 53 | new_str = ' L' + str(new_var.lag) + '.' + new_var.name + ' ' 54 | 55 | self._temp_list_command[i] += new_str 56 | 57 | def _explode_model(self, var_name, last_lag): 58 | new_list_variables = [] 59 | new_list_command_str = [] 60 | for i in range(len(self._temp_list_indep)): 61 | model = self._temp_list_indep[i] 62 | command_str = self._temp_list_command[i] 63 | 64 | for j in range(last_lag + 1, self.ending_time + 1): 65 | new_model = model.copy() 66 | new_str = str(command_str) 67 | 68 | for k in range(last_lag + 1, j + 1): 69 | new_var = regular_variable(var_name, k) 70 | new_model.append(new_var) 71 | new_str += ' L' + str(k) + '.' + var_name + ' ' 72 | 73 | new_list_variables.append(new_model) 74 | new_list_command_str.append(new_str) 75 | 76 | self._temp_list_indep += new_list_variables 77 | self._temp_list_command += new_list_command_str 78 | 79 | def _translate_to_command_str(self, variables, options): 80 | pass 81 | 82 | def check_model(): 83 | pass 84 | -------------------------------------------------------------------------------- /pydynpd/model_summary.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from prettytable import PrettyTable 4 | 5 | 6 | class model_list(object): 7 | def __init__(self, m_list: list): 8 | self.names = [] 9 | self.min_lags = [] 10 | self.max_lags = [] 11 | for m in m_list: 12 | for i in range(1, len(m.variables['dep_indep'])): 13 | indep = m.variables['dep_indep'][i] 14 | if not indep.name in self.names: 15 | self.names.append(indep.name) 16 | self.min_lags.append(indep.lag) 17 | self.max_lags.append(indep.lag) 18 | else: 19 | the_index = self.names.index(indep.name) 20 | if indep.lag > self.max_lags[the_index]: 21 | self.max_lags[the_index] = indep.lag 22 | 23 | if indep.lag < self.min_lags[the_index]: 24 | self.min_lags[the_index] = indep.lag 25 | 26 | 27 | class model_summary(object): 28 | 29 | def print_summary(self, model): 30 | 31 | if model.options.steps == 2: 32 | str_steps = 'two-step ' 33 | elif model.options.steps == 1: 34 | str_steps = 'one-step ' 35 | else: 36 | str_steps = str(model.options.steps) + '-step ' 37 | 38 | if model.options.level: 39 | str_gmm = 'system GMM' 40 | else: 41 | str_gmm = 'difference GMM' 42 | 43 | to_print = [] 44 | # to_print.append(model.command_str) 45 | to_print.append(' Dynamic panel-data estimation, ' + str_steps + str_gmm) 46 | to_print.append(self.basic_information(model)) 47 | to_print.append(self.regression_table(model)) 48 | to_print.append(self.test_results(model)) 49 | for line in to_print: 50 | print(line) 51 | 52 | def basic_information(self, model): 53 | 54 | basic_table = PrettyTable() 55 | middle_space = ' ' 56 | basic_table.field_names = [" ", " ", " "] 57 | basic_table.border = False 58 | basic_table.header = False 59 | basic_table.align = 'l' 60 | basic_table.add_row( 61 | ['Group variable: ' + model.pdata._individual, middle_space, 'Number of obs = ' + str(model.num_obs)]) 62 | basic_table.add_row( 63 | ['Time variable: ' + model.pdata._time, middle_space, 'Min obs per group: ' + str(model.min_obs)]) 64 | basic_table.add_row(['Number of instruments = ' + str(model.z_information.num_instr), middle_space, 65 | 'Max obs per group: ' + str(model.max_obs)]) 66 | basic_table.add_row( 67 | ['Number of groups = ' + str(model.N), middle_space, 68 | 'Avg obs per group: ' + '{0:.2f}'.format(model.avg_obs)]) 69 | 70 | return (basic_table.get_string()) 71 | 72 | def test_results(self, model): 73 | 74 | str_toprint = 'Hansen test of overid. restrictions: chi(' + str(model.hansen.df) + ') = ' + '{:.3f}'.format( 75 | model.hansen.test_value) 76 | str_toprint = str_toprint + ' Prob > Chi2 = ' + '{:.3f}'.format(model.hansen.p_value) + '\n' 77 | 78 | for i in range(len(model.AR_list)): 79 | the_AR = model.AR_list[i] 80 | AR = the_AR.AR 81 | P_value = the_AR.P_value 82 | 83 | str_toprint = str_toprint + 'Arellano-Bond test for AR(' + str( 84 | i + 1) + ') in first differences: z = ' + "{:.2f}".format(AR) + ' Pr > z =' + '{:.3f}'.format( 85 | P_value) + '\n' 86 | 87 | return (str_toprint) 88 | 89 | def regression_table(self, model): 90 | 91 | dep_name = model.variables['dep_indep'][0].name 92 | 93 | r_table = PrettyTable() 94 | 95 | r_table.field_names = [dep_name, "coef.", "Corrected Std. Err.", "z", "P>|z|", " "] 96 | 97 | r_table.float_format = '.7' 98 | regression_table = model.regression_table 99 | # , "z", "P>|z|", "[95% Conf. Interval]" ] 100 | num_indep = len(regression_table.index) 101 | 102 | for i in range(num_indep): 103 | var_name = regression_table['variable'][i] 104 | coeff = regression_table['coefficient'][i] 105 | stderr = regression_table['std_err'][i] 106 | 107 | z = regression_table['z_value'][i] 108 | p = regression_table['p_value'][i] 109 | sig = regression_table['sig'][i] 110 | r_table.add_row([var_name, coeff, stderr, z, p, sig]) 111 | 112 | return r_table.get_string() 113 | 114 | def print_good_list(self, the_list: list, level: bool, mmsc: str): 115 | the_list.sort(key=lambda x: x.MMSC_LU[mmsc]) 116 | m_list = model_list(the_list) 117 | 118 | r_table = PrettyTable() 119 | # col_names=['variables'] + [m.name for m in the_list] 120 | variable_names = [] 121 | for i in range(len(m_list.names)): 122 | v = m_list.names[i] 123 | for j in range(m_list.min_lags[i], m_list.max_lags[i] + 1): 124 | if j == 0: 125 | variable_names.append(v) 126 | else: 127 | variable_names.append('L' + str(j) + '.' + v) 128 | if level: 129 | variable_names.append('_con') 130 | r_table.add_column('variables', variable_names) 131 | for m in the_list: 132 | new_col = [] 133 | for i in range(len(variable_names)): 134 | new_col.append('') 135 | rt = m.regression_table 136 | ind = [variable_names.index(v) for v in rt['variable']] 137 | j = 0 138 | for i in ind: 139 | new_col[i] = '{:.3f}'.format(rt['coefficient'][j]) + rt['sig'][j] + '\n(' + '{:.3f}'.format( 140 | rt['std_err'][j]) + ')' 141 | j += 1 142 | 143 | r_table.add_column(m.name, new_col) 144 | 145 | print('models are sorted by ' + mmsc) 146 | print(r_table) 147 | 148 | try: 149 | with open('output.html', 'w') as f: 150 | # print(f.__dir__()) 151 | print('HTML output named "output.html" is located in folder ' + os.getcwd()) 152 | f.write(r_table.get_html_string( 153 | attributes={'border': 1, 'style': 'border-width: 1px; border-collapse: collapse;'})) 154 | except Exception as e: 155 | print(e) 156 | 157 | print('\n') 158 | print('MMSC_LU scores:') 159 | mmsc_table = PrettyTable() 160 | mmsc_table.field_names = ['model', 'aic', 'bic', 'hqic', 'command str'] 161 | mmsc_table.align['command str'] = "l" 162 | 163 | for m in the_list: 164 | mmsc_table.add_row([m.name, '{:.3f}'.format(m.MMSC_LU['aic']), '{:.3f}'.format(m.MMSC_LU['bic']), 165 | '{:.3f}'.format(m.MMSC_LU['hqic']), m.command_str]) 166 | 167 | print(mmsc_table) 168 | 169 | def print_bad_list(self, the_list: list): 170 | 171 | bad_table = PrettyTable() 172 | bad_table.field_names = ['model', 'command str'] 173 | bad_table.align['command str'] = "l" 174 | for m in the_list: 175 | bad_table.add_row([m.name, m.command_str]) 176 | 177 | print(bad_table) 178 | -------------------------------------------------------------------------------- /pydynpd/panel_data.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import numpy as np 4 | from pandas import DataFrame, get_dummies, concat 5 | 6 | from pydynpd.common_functions import get_first_diff_table, get_fod_table 7 | from pydynpd.info import options_info 8 | 9 | 10 | class panel_data(): 11 | def __init__(self, df: DataFrame, identifiers, variables, options: options_info): 12 | 13 | self._individual = identifiers[0] 14 | self._time = identifiers[1] 15 | 16 | cols = [] 17 | 18 | temp_list = [var.name for var in variables['dep_indep'] + variables['iv'] + variables['Dgmm']] 19 | for var_name in temp_list: 20 | if var_name not in cols: 21 | cols.append(var_name) 22 | 23 | # temp_df = df[ [self._individual, self._time] + cols].copy() 24 | self.N, self.T, self.ids = self.xtset(df, self._individual, self._time) 25 | 26 | if options.timedumm: 27 | df, self.col_timedumm = self.add_time_dummy(df, variables, self._time) 28 | else: 29 | self.col_timedumm = [] 30 | 31 | self.cols = self.ids + cols # make sure ids is the first column 32 | 33 | self.data = self.make_balanced(df[self.cols + self.col_timedumm].to_numpy(), self.N, self.T) 34 | num_cols = self.data.shape[1] 35 | 36 | self.fd_data = get_first_diff_table(self.data[:, range(0, num_cols)], self.N) 37 | if (options.transformation=='fod') & (options.level==False): 38 | self.fod_data=get_fod_table(self.data, self.N) 39 | 40 | def xtset(self, df: DataFrame, _individual, _time): 41 | df.sort_values(by=[_individual, _time]) 42 | df['_individual'] = df[_individual].astype('category').cat.codes 43 | df['_individual'] = df['_individual'].astype('int64') 44 | N = df['_individual'].unique().size 45 | 46 | df['_time'] = df[_time].astype('category').cat.codes 47 | df['_time'] = df['_time'].astype('int64') 48 | T = df['_time'].unique().size 49 | 50 | df['_NT'] = df['_individual'] * T + df['_time'] 51 | 52 | if N <= T: 53 | print( 54 | 'Warning: system and difference GMMs do not work well on long (T>=N) panel data') 55 | return (N, T, ['_NT']) 56 | 57 | def make_balanced(self, ori, n_individual, n_time): 58 | arr_full = np.empty((n_individual * n_time, ori.shape[1]), dtype='float64') 59 | 60 | arr_full[:] = np.nan 61 | # arr_full[:, 0] = np.repeat(range(0, N), T) 62 | arr_full[:, 0] = range(0, n_individual * n_time) 63 | # arr_full[:, 1] = arr_full[:, 2] % T 64 | 65 | mask = np.in1d(arr_full[:, 0], ori[:, 0]) 66 | 67 | arr_full[mask, 1:arr_full.shape[1]] = ori[:, 1:ori.shape[1]] 68 | arr_full = arr_full[arr_full[:, 0].argsort()] 69 | 70 | return (arr_full) 71 | 72 | def add_time_dummy(self, df: DataFrame, variables: dict, _time: str): 73 | 74 | timedumms = get_dummies(df[_time], prefix = _time, dtype = int) 75 | col_timedumm = timedumms.columns.tolist() 76 | 77 | df = concat([df, timedumms], axis = 1) 78 | 79 | return df, col_timedumm 80 | 81 | def generate_D_matrix(self, height, T, level): 82 | # matrix used in Forward Orthogonal Deviation 83 | temp=np.zeros((T,T), dtype='float64') 84 | D = np.zeros((height, T), dtype='float64') 85 | 86 | for i in range(T): 87 | for j in range(i, T): 88 | if i == j: 89 | temp[i, j] = math.sqrt((T - i - 1) / (T - i)) 90 | else: 91 | temp[i, j] = (-1) * math.sqrt(1 / ((T - i) * (T - i - 1))) 92 | 93 | if level: 94 | last=T-2 95 | 96 | 97 | else: 98 | last=T-3 99 | start = last + 1 - height 100 | 101 | D=temp[start:(last+1),:] 102 | 103 | return (D) 104 | -------------------------------------------------------------------------------- /pydynpd/regression.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from sys import exit 3 | 4 | import numpy as np 5 | from numpy.linalg import pinv 6 | from pandas import DataFrame 7 | 8 | import pydynpd.specification_tests as tests 9 | from pydynpd.command import command 10 | from pydynpd.common_functions import Windmeijer 11 | from pydynpd.dynamic_panel_model import dynamic_panel_model 12 | from pydynpd.info import step_result 13 | from pydynpd.model_organizer import model_oranizer 14 | from pydynpd.model_summary import model_summary 15 | from pydynpd.panel_data import panel_data 16 | 17 | warnings.filterwarnings("ignore", category=RuntimeWarning) 18 | 19 | 20 | class abond: 21 | 22 | def __init__(self, command_str, df: DataFrame, identifiers: list): 23 | 24 | if len(identifiers) != 2: 25 | print('two variables needed') 26 | exit() 27 | 28 | user_command = command(command_str, df.columns) 29 | pdata = panel_data(df, identifiers, user_command.variables, user_command.options) 30 | self.models = [] 31 | self._good_models = [] 32 | self._bad_models = [] 33 | if not user_command.options.beginner: 34 | model = dynamic_panel_model(pdata, user_command.variables, user_command.options, command_str, 35 | user_command.part_2, user_command.part_3) 36 | self.regular_process(model) 37 | self.form_results(model) 38 | 39 | else: 40 | m_manager = model_oranizer(user_command, pdata) 41 | num_models = len(m_manager.models.list_variables) 42 | j = 0 43 | for i in range(num_models): 44 | variables = m_manager.models.list_variables[i] 45 | com_str = m_manager.models.list_command_str[i] 46 | try: 47 | model = dynamic_panel_model(pdata, variables, user_command.options, com_str, user_command.part_2, 48 | user_command.part_3) 49 | self.regular_process(model) 50 | j += 1 51 | model.name = 'm' + str(j) 52 | if self.check_model(model): 53 | model.calculate_MMSC_LU() 54 | self._good_models.append(model) 55 | else: 56 | self._bad_models.append(model) 57 | except Exception as e: 58 | # print(e) 59 | continue 60 | 61 | for m in self._good_models: 62 | self.form_results(m) 63 | ms = model_summary() 64 | if len(self._good_models) >= 2: 65 | ms.print_good_list(self._good_models, user_command.options.level, user_command.options.mmsc) 66 | 67 | if len(self._bad_models) >= 1: 68 | print('\nThe following model(s) did not pass specification tests:') 69 | ms.print_bad_list(self._bad_models) 70 | 71 | def regular_process(self, model: dynamic_panel_model): 72 | 73 | model.step_results = [] 74 | 75 | _XZ, _Zy = self.calculate_basic(model) 76 | _XZ_t = _XZ.transpose() 77 | _Zy_t = _Zy.transpose() 78 | 79 | self.GMM(model, _XZ, _XZ_t, _Zy, _Zy_t, 1) 80 | if model.options.steps == 1 or model.options.steps == 2: 81 | self.GMM(model, _XZ, _XZ_t, _Zy, _Zy_t, 2) 82 | self.perform_test(model, 2) 83 | else: 84 | self.iterative_GMM(model, _XZ, _XZ_t, _Zy, _Zy_t) 85 | self.perform_test(model, model.options.steps) 86 | 87 | def iterative_GMM(self, model, _XZ, _XZ_t, _Zy, _Zy_t): 88 | current_step = 1 89 | converge = False 90 | while not converge: 91 | previous_step = current_step 92 | current_step += 1 93 | self.GMM(model, _XZ, _XZ_t, _Zy, _Zy_t, current_step) 94 | beta_current = model.step_results[current_step - 1].beta 95 | beta_previous = model.step_results[current_step - 2].beta 96 | for j in range(beta_current.shape[0]): 97 | temp = (beta_current[j] - beta_previous[j]) ** 2 98 | temp2 = (beta_previous[j]) ** 2 99 | if j == 0: 100 | nom = temp 101 | denom = temp2 102 | else: 103 | nom += temp 104 | denom += temp2 105 | crit = np.sqrt(nom / denom) 106 | 107 | if crit < 0.000001: 108 | converge = True 109 | model.options.steps = current_step 110 | 111 | def GMM(self, model: dynamic_panel_model, _XZ, _XZ_t, _Zy, _Zy_t, step: int): 112 | N = model.N 113 | num_obs = model.num_obs 114 | z_list = model.z_list 115 | _z_t_list = model._z_t_list 116 | Cx_list = model.final_xy_tables['Cx'] 117 | 118 | Cy_list = model.final_xy_tables['Cy'] 119 | 120 | if step == 1: 121 | H1 = self.get_H1(model, model.options.transformation) 122 | W = self.calculate_W(H1, model) 123 | current_step = step_result(W) 124 | W_inv = current_step.W_inv 125 | model.step_results.append(current_step) 126 | 127 | if step >= 2: 128 | previous_step = model.step_results[step - 2] 129 | W = previous_step.W_next 130 | current_step = step_result(W) 131 | model.step_results.append(current_step) 132 | W_inv = current_step.W_inv 133 | 134 | _XZ_W = _XZ @ W_inv 135 | _M_inv = _XZ_W @ _XZ_t 136 | M = pinv(_M_inv) 137 | _M_XZ_W = M @ _XZ_W 138 | 139 | beta = _M_XZ_W @ _Zy_t 140 | residual = self.calculate_residual(Cy_list, Cx_list, beta) 141 | _residual_t = residual.transpose() 142 | SS = (_residual_t @ residual) * (1.0 / 2 / num_obs) 143 | 144 | z_height = int(z_list.shape[0] / N) 145 | r_height = int(residual.shape[0] / N) 146 | self._zs_list = np.empty((N * z_height, 1), dtype=np.float64) 147 | for i in range(N): 148 | z = z_list[(i * z_height):(i * z_height + z_height), :] 149 | u = residual[(i * r_height):(i * r_height + r_height), :] 150 | # u_t=_residual_t[:, (i*r_height):(i*r_height+r_height)] 151 | temp_zs = z @ u 152 | self._zs_list[(i * z_height):(i * z_height + z_height), :] = temp_zs 153 | if i == 0: 154 | zs = temp_zs 155 | ZuuZ = temp_zs @ temp_zs.transpose() 156 | else: 157 | 158 | zs += temp_zs 159 | ZuuZ += temp_zs @ temp_zs.transpose() 160 | 161 | W_next = ZuuZ * (1.0 / N) 162 | 163 | current_step._XZ_W = _XZ_W 164 | current_step.M = M 165 | current_step._M_XZ_W = _M_XZ_W 166 | current_step.beta = beta 167 | current_step.residual = residual 168 | current_step._residual_t = _residual_t 169 | current_step.SS = SS 170 | current_step.zs = zs 171 | 172 | current_step.ZuuZ = ZuuZ 173 | 174 | current_step.W_next = W_next 175 | 176 | current_step.vcov = self.vcov(model, step) 177 | current_step.std_err = np.sqrt(np.diag(current_step.vcov)) 178 | 179 | def calculate_basic(self, model): 180 | 181 | z_list = model.z_list 182 | _z_t_list = model._z_t_list 183 | Cx_list = model.final_xy_tables['Cx'] 184 | Cy_list = model.final_xy_tables['Cy'] 185 | 186 | z_height = int(z_list.shape[0] / model.N) 187 | x_height = int(Cx_list.shape[0] / model.N) 188 | x_width = Cx_list.shape[1] 189 | # self._zx_list=np.empty((z_list.shape[0] , x_width), np.float64) 190 | 191 | for i in range(model.N): 192 | z = z_list[(z_height * i):(z_height * i + z_height), :] 193 | z_t = _z_t_list[:, (z_height * i):(z_height * i + z_height)] 194 | x = Cx_list[(x_height * i):(x_height * i + x_height), :] 195 | y = Cy_list[(x_height * i):(x_height * i + x_height), :] 196 | 197 | zx = z @ x 198 | 199 | # self._zx_list[(z_height * i):(z_height * i + z_height), :]=zx 200 | 201 | if i == 0: 202 | temp_xz = zx.transpose() 203 | temp_zy = (z @ y).transpose() 204 | else: 205 | temp_xz += zx.transpose() 206 | temp_zy += (z @ y).transpose() 207 | return (temp_xz, temp_zy) 208 | 209 | def calculate_W(self, H, model): 210 | # W1 = (1.0 / N) * sum_product2([z_list, H1, _z_t_list], [(N, 1), (1, 1), (1, N)]) 211 | z_height = int(model.z_list.shape[0] / model.N) 212 | 213 | for i in range(model.N): 214 | z = model.z_list[(z_height * i):(z_height * i + z_height), :] 215 | z_t = model._z_t_list[:, (z_height * i):(z_height * i + z_height)] 216 | 217 | if i == 0: 218 | temp_W = z @ H @ z_t 219 | 220 | else: 221 | temp_W += z @ H @ z_t 222 | 223 | return temp_W 224 | 225 | def calculate_residual(self, y_list, x_list, beta): 226 | 227 | tbr = y_list - x_list @ beta 228 | 229 | return (tbr) 230 | 231 | def vcov(self, model: dynamic_panel_model, step: int): 232 | # report robust vcov only 233 | 234 | z_list = model.z_list 235 | Cx = model.final_xy_tables['Cx'] 236 | 237 | if step >= 2: 238 | the_step = model.step_results[step - 1] 239 | previous_step = model.step_results[step - 2] 240 | step_one = model.step_results[0] 241 | M2 = the_step.M 242 | _M2_XZ_W2 = the_step._M_XZ_W 243 | _W2_inv = the_step.W_inv 244 | zs2 = the_step.zs 245 | vcov_step_previous = previous_step.vcov 246 | residual1 = previous_step.residual 247 | # residual1 = step_one.residual 248 | # vcov_step_previous = step_one.vcov 249 | return Windmeijer(M2, _M2_XZ_W2, _W2_inv, zs2, 250 | vcov_step_previous, Cx, z_list, residual1, model.N) 251 | elif step == 1: 252 | step_1 = model.step_results[0] 253 | _M_XZ_W = step_1._M_XZ_W 254 | W2 = step_1.W_next 255 | return model.N * (_M_XZ_W @ W2 @ _M_XZ_W.transpose()) 256 | 257 | def perform_test(self, model, step): 258 | step1 = model.step_results[0] 259 | step2 = model.step_results[1] 260 | num_instru = model.z_information.num_instr 261 | Cx = model.final_xy_tables['Cx'] 262 | 263 | step = model.options.steps 264 | if step == 1 or step == 2: 265 | _W2_inv = step2.W_inv 266 | zs = step2.zs 267 | 268 | model.hansen = tests.hansen_overid(_W2_inv, model.N, zs, num_instru, \ 269 | Cx.shape[1]) 270 | else: 271 | current_step = model.step_results[step - 1] 272 | _W2_inv = current_step.W_inv 273 | zs = current_step.zs 274 | model.hansen = tests.hansen_overid(_W2_inv, model.N, zs, num_instru, \ 275 | Cx.shape[1]) 276 | 277 | try: 278 | model.AR_list = tests.AR_test(model, self._zs_list, step, 2) 279 | 280 | except Exception as e: 281 | raise Exception(e) 282 | 283 | def get_H1(self, model: dynamic_panel_model, transformation): 284 | z_list = model.z_list 285 | z_inf = model.z_information 286 | width = z_list.shape[1] 287 | 288 | if transformation == 'fd': 289 | 290 | tbr = np.zeros((width, width), dtype='float64') 291 | i, j = np.indices(tbr.shape) 292 | tbr[np.logical_and(i == j, i < z_inf.diff_width)] = 2 293 | tbr[np.logical_and(i == j - 1, j < z_inf.diff_width)] = -1 294 | tbr[np.logical_and(j == i - 1, i < z_inf.diff_width)] = -1 295 | 296 | tbr[np.logical_and(i == j, i >= z_inf.diff_width)] = 1 297 | 298 | tbr[np.logical_and(i == j + z_inf.diff_width, j < z_inf.diff_width)] = -1 299 | tbr[np.logical_and(i == 1 + j + z_inf.diff_width, j < z_inf.diff_width)] = 1 300 | tbr[np.logical_and(j == i + z_inf.diff_width, i < z_inf.diff_width)] = -1 301 | tbr[np.logical_and(j == 1 + i + z_inf.diff_width, i < z_inf.diff_width)] = 1 302 | else: # fod 303 | if model.options.level: 304 | D = np.zeros((width, model.T), np.float64) 305 | 306 | up_height = model.z_information.diff_width 307 | D_up = model.pdata.generate_D_matrix(up_height, model.T, True) 308 | D[0:up_height, :] = D_up 309 | 310 | lower_start_row = up_height 311 | lower_start_col = model.T - (width - up_height) 312 | lower = D[lower_start_row: width, lower_start_col: model.T] 313 | i, j = np.indices(lower.shape) 314 | lower[i == j] = 1 315 | 316 | else: 317 | D = model.pdata.generate_D_matrix(width, model.T,False) 318 | tbr = D @ D.transpose() 319 | 320 | return (tbr) 321 | 322 | def form_results(self, model): 323 | # step = len(model.step_results) 324 | # the_list = model.step_results[step - 1] 325 | if model.name != '': 326 | print(' ' + model.name) 327 | 328 | model.form_regression_table() 329 | ms = model_summary() 330 | ms.print_summary(model) 331 | 332 | self.models.append(model) # results = {} 333 | 334 | def check_model(self, model): 335 | tbr = False 336 | num_ARs = len(model.AR_list) 337 | last_AR = model.AR_list[num_ARs - 1] 338 | 339 | if last_AR.P_value > 0.05: 340 | if model.hansen.p_value > 0.05 and model.hansen.p_value < 0.99999: 341 | return True 342 | else: 343 | return False 344 | -------------------------------------------------------------------------------- /pydynpd/sandbox/multicollinearity.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | # from sklearn.metrics.pairwise import cosine_similarity 5 | 6 | my_data = np.array([[5, 1, 1], 7 | [3, 2, 3], 8 | [1, 3, 2], 9 | [3, 1, 1], 10 | [4, 2, 2], 11 | [7, 3, 1], 12 | [7, 1, 1]]) 13 | 14 | df = pd.DataFrame(data=my_data, columns=['y', 'dummy', 'x']) 15 | just_dummies = pd.get_dummies(df['dummy']) 16 | 17 | step_1 = pd.concat([df, just_dummies], axis=1) 18 | step_1.drop(['dummy'], inplace=True, axis=1) 19 | A = step_1.to_numpy() 20 | B = np.cov(A, rowvar=False) 21 | 22 | for i in range(len(A)): 23 | # B = np.matmul( A.transpose(), A) 24 | B = np.cov(A, rowvar=False) 25 | C = np.linalg.eig(B) 26 | D = abs(C[0]) 27 | 28 | if (max(D) / min(D) > 10E+12): 29 | j = np.argmin(D[2:5]) + 2 30 | # A_sparse = sparse.csr_matrix(A) 31 | # similarities = abs(cosine_similarity(A_sparse)) 32 | # col_sum=np.sum(similarities,axis=0) 33 | # j=np.argmax(col_sum[2:5])+2 34 | A = np.delete(A, j, 1) 35 | print(j) 36 | 37 | # also can output sparse matrices 38 | similarities_sparse = cosine_similarity(A_sparse, dense_output=False) 39 | print('pairwise sparse output:\n {}\n'.format(similarities_sparse)) 40 | -------------------------------------------------------------------------------- /pydynpd/sandbox/pydynpd.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/pydynpd/sandbox/pydynpd.zip -------------------------------------------------------------------------------- /pydynpd/specification_tests.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import numpy as np 4 | from scipy import stats 5 | 6 | from pydynpd.common_functions import lag 7 | from pydynpd.info import hansen_test_info, AR_test_info 8 | 9 | 10 | def hansen_overid(W2_inv, N, zs, num_instru, num_indep): 11 | hansen_test = np.linalg.multi_dot([zs.transpose(), W2_inv, zs]) * (1.0 / N) 12 | df = num_instru - num_indep 13 | crit = stats.chi2.ppf(q=0.95, df=df) # if hansen > crit then H0 is not supported 14 | p_value = 1 - stats.chi2.cdf(hansen_test, df) 15 | 16 | hansen_test = hansen_test_info(float(hansen_test), df, float(p_value), crit) 17 | 18 | return (hansen_test) 19 | 20 | 21 | def AR_get_diff_XR(model, beta, ori_residual, r0_height): 22 | N = model.N 23 | 24 | ori_x = model.final_xy_tables['Cx'] 25 | x0_height = int(ori_x.shape[0] / N) 26 | 27 | if model.options.transformation == 'fod': 28 | diff_y = model.final_xy_tables['Diff_y'] 29 | diff_x = model.final_xy_tables['Diff_x'] 30 | 31 | diff_r = diff_y - diff_x @ beta 32 | elif model.options.level: 33 | diff_width = model.z_information.diff_width 34 | num_col = ori_x.shape[1] 35 | diff_r = np.empty((diff_width * N, 1), dtype=np.float64) 36 | diff_x = np.empty((diff_width * N, num_col), dtype=np.float64) 37 | for i in range(N): 38 | r_i = ori_residual[(i * r0_height):(i * r0_height + r0_height), :] 39 | diff_r[(i * diff_width):(i * diff_width + diff_width), 0:1] = r_i[0:diff_width, 0:1] 40 | 41 | x_i = ori_x[(i * r0_height):(i * r0_height + r0_height), :] 42 | diff_x[(i * diff_width):(i * diff_width + diff_width), :] = x_i[0:diff_width, :] 43 | else: 44 | diff_x = ori_x 45 | diff_r = ori_residual 46 | 47 | return (diff_x, diff_r) 48 | 49 | 50 | def AR_test(model, zs_list, step, m): 51 | N = model.N 52 | z_list = model.z_list 53 | z_height = int(z_list.shape[0] / N) 54 | 55 | current_step = model.step_results[step - 1] 56 | 57 | ori_residual = current_step.residual 58 | r0_height = int(ori_residual.shape[0] / N) 59 | 60 | M_XZ_W = current_step._M_XZ_W 61 | vcov = current_step.vcov 62 | 63 | diff_x, diff_r = AR_get_diff_XR(model, current_step.beta, ori_residual, r0_height) 64 | r_height = int(diff_r.shape[0] / N) 65 | x_height = int(diff_x.shape[0] / N) 66 | AR_list = [] 67 | temp = np.zeros((r_height * N, 1), np.float64) 68 | lag(diff_r, temp, N, 1, 0) 69 | for j in range(1, m + 1): 70 | for i in range(N): 71 | r_i = diff_r[(r_height * i):(r_height * i + r_height), 0:1] 72 | r_t_i = r_i.transpose() 73 | 74 | lag_res = np.ndarray((r_height, 1), dtype=np.float64) 75 | lag(r_i, lag_res, 1, j, 0) 76 | lag_res[np.isnan(lag_res)] = 0 77 | lag_res_t = lag_res.transpose() 78 | 79 | x = diff_x[(x_height * i):(x_height * i + x_height), :] 80 | # z = z_list[(z_height * i):(z_height * i + z_height), :] 81 | 82 | # r_whole_i = ori_residual[(i * r0_height):(i * r0_height + r0_height), 0:1] 83 | 84 | d0_temp = lag_res_t @ r_i 85 | d1_temp = d0_temp @ r_t_i @ lag_res 86 | EX_temp = lag_res_t @ x 87 | 88 | zs = zs_list[(z_height * i):(z_height * i + z_height), :] 89 | 90 | # temp3_temp = z @ r_whole_i @ r_t_i @ lag_res 91 | # temp3_temp = zs @ r_t_i @ lag_res 92 | temp3_temp = zs @ d0_temp.transpose() 93 | if i == 0: 94 | d0 = d0_temp 95 | d1 = d1_temp 96 | EX = EX_temp 97 | temp3 = temp3_temp 98 | else: 99 | d0 += d0_temp 100 | d1 += d1_temp 101 | EX += EX_temp 102 | temp3 += temp3_temp 103 | 104 | d2 = (-2) * np.linalg.multi_dot([EX, M_XZ_W, temp3]) 105 | 106 | d3 = np.linalg.multi_dot([EX, vcov, EX.transpose()]) 107 | try: 108 | AR_temp = float(d0 / math.sqrt(d1 + d2 + d3)) 109 | except Exception as e: 110 | raise Exception('AR test failed') 111 | 112 | P_value = stats.norm.sf(abs(AR_temp)) * 2 113 | new_AR = AR_test_info(j, AR_temp, P_value) 114 | AR_list.append(new_AR) 115 | 116 | return (AR_list) 117 | -------------------------------------------------------------------------------- /pydynpd/variable.py: -------------------------------------------------------------------------------- 1 | class regular_variable: 2 | def __init__(self, name, lag): 3 | self.name = name 4 | self.lag = lag 5 | 6 | 7 | class gmm_var(regular_variable): 8 | def __init__(self, name, min_lag, max_lag, lag): 9 | super().__init__(name, lag) 10 | self.min_lag = min_lag 11 | self.max_lag = max_lag 12 | 13 | 14 | class adjustable_lag_indep: 15 | def __init__(self, name, min_lag, max_lag): 16 | self.name = name 17 | self.min_lag = min_lag 18 | self.max_lag = max_lag 19 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=42"] 3 | build-backend = "setuptools.build_meta" -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # Note: To use the 'upload' functionality of this file, you must: 5 | # $ pipenv install twine --dev 6 | 7 | import io 8 | import os 9 | import sys 10 | from shutil import rmtree 11 | 12 | from setuptools import find_packages, setup, Command 13 | 14 | # Package meta-data. 15 | NAME = 'pydynpd' 16 | DESCRIPTION = 'A package to estimate dynamic panel data model using difference GMM and system GMM.' 17 | URL = 'https://github.com/dazhwu/pydynpd' 18 | EMAIL = 'wudz800@gmail.com' 19 | AUTHOR = 'Dazhong Wu' 20 | REQUIRES_PYTHON = '>=3.6.0' 21 | VERSION = '0.2.1' 22 | 23 | # What packages are required for this module to be executed? 24 | REQUIRED = [ 25 | 'numpy', 'scipy', 'prettytable', 'pandas' 26 | ] 27 | 28 | # What packages are optional? 29 | EXTRAS = { 30 | # 'fancy feature': ['django'], 31 | } 32 | 33 | # The rest you shouldn't have to touch too much :) 34 | # ------------------------------------------------ 35 | # Except, perhaps the License and Trove Classifiers! 36 | # If you do change the License, remember to change the Trove Classifier for that! 37 | 38 | here = os.path.abspath(os.path.dirname(__file__)) 39 | 40 | # Import the README and use it as the long-description. 41 | # Note: this will only work if 'README.md' is present in your MANIFEST.in file! 42 | try: 43 | with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f: 44 | long_description = '\n' + f.read() 45 | except FileNotFoundError: 46 | long_description = DESCRIPTION 47 | 48 | # Load the package's __version__.py module as a dictionary. 49 | about = {} 50 | if not VERSION: 51 | project_slug = NAME.lower().replace("-", "_").replace(" ", "_") 52 | with open(os.path.join(here, project_slug, '__version__.py')) as f: 53 | exec(f.read(), about) 54 | else: 55 | about['__version__'] = VERSION 56 | 57 | 58 | class UploadCommand(Command): 59 | """Support setup.py upload.""" 60 | 61 | description = 'Build and publish the package.' 62 | user_options = [] 63 | 64 | @staticmethod 65 | def status(s): 66 | """Prints things in bold.""" 67 | print('\033[1m{0}\033[0m'.format(s)) 68 | 69 | def initialize_options(self): 70 | pass 71 | 72 | def finalize_options(self): 73 | pass 74 | 75 | def run(self): 76 | try: 77 | self.status('Removing previous builds…') 78 | rmtree(os.path.join(here, 'dist')) 79 | except OSError: 80 | pass 81 | 82 | self.status('Building Source and Wheel (universal) distribution…') 83 | os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable)) 84 | 85 | self.status('Uploading the package to PyPI via Twine…') 86 | os.system('twine upload dist/*') 87 | 88 | self.status('Pushing git tags…') 89 | os.system('git tag v{0}'.format(about['__version__'])) 90 | os.system('git push --tags') 91 | 92 | sys.exit() 93 | 94 | 95 | # Where the magic happens: 96 | setup( 97 | name=NAME, 98 | version=about['__version__'], 99 | description=DESCRIPTION, 100 | long_description=long_description, 101 | long_description_content_type='text/markdown', 102 | author=AUTHOR, 103 | author_email=EMAIL, 104 | python_requires=REQUIRES_PYTHON, 105 | url=URL, 106 | packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]), 107 | # If your package is a single module, use this instead of 'packages': 108 | # py_modules=['mypackage'], 109 | 110 | # entry_points={ 111 | # 'console_scripts': ['mycli=mymodule:cli'], 112 | # }, 113 | install_requires=REQUIRED, 114 | extras_require=EXTRAS, 115 | include_package_data=True, 116 | license='MIT', 117 | classifiers=[ 118 | # Trove classifiers 119 | # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers 120 | 'License :: OSI Approved :: MIT License', 121 | 'Programming Language :: Python', 122 | 'Programming Language :: Python :: 3', 123 | 'Programming Language :: Python :: 3.6', 124 | 'Programming Language :: Python :: Implementation :: CPython', 125 | 'Programming Language :: Python :: Implementation :: PyPy' 126 | ], 127 | # $ setup.py publish support. 128 | cmdclass={ 129 | 'upload': UploadCommand, 130 | }, 131 | ) 132 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from pydynpd import regression 3 | 4 | df = pd.read_csv("tourism_covid_data-total.csv") #, index_col=False) 5 | df['monthly_cases']=df['monthly cases'] 6 | #command_str='tourism_demand L1.tourism_demand L1.monthly_cases | gmm(tourism_demand, 2 6) iv(L1.monthly_cases)| collapse ' 7 | command_str='tourism_demand L1.tourism_demand monthly_cases | gmm(tourism_demand, 2 6) iv(monthly_cases)| nolevel collapse ' 8 | mydpd = regression.abond(command_str, df, ['Country', 'month_year']) -------------------------------------------------------------------------------- /untitled.md: -------------------------------------------------------------------------------- 1 | Package pyndynpd is able to estimate dynamic panel models that take a form as follows: 2 | 3 | $$y_{it}=\sum_{j=1}^{p}\alpha_{j}y_{i,t-j}+\sum_{k=1}^{m}\sum_{j=0}^{q_{k}}\beta_{jk}r_{i,t-j}^{(k)}+\boldsymbol{\delta}\boldsymbol{d_{i,t}}+\boldsymbol{\gamma}\boldsymbol{s_{i,t}}+u_{i}+\epsilon_{it}\label{eq:typical_model}$$ In the model above, $y_{i,t-j}$ ($j=1,2,\ldots,p$) denotes a group of $p$ lagged dependent variables. $r_{i,t-j}^{(k)}$ represents a group of $m$ endogeneous variables other than lagged $y$. $\boldsymbol{d_{it}}$ is a vector of predetermined variables which may potentially correlate with past errors, $\boldsymbol{s_{it}}$ is a vector of exogenous variables, and $u_{i}$ represents fixed effect. For illustration purpose, let's consider a basic form of dynamic panel model: 4 | 5 | $$y_{it}=\alpha_{1}y_{i,t-1}+\delta d_{i,t}+u_{i}+\epsilon_{it}\label{eq:simple_model}$$ -------------------------------------------------------------------------------- /vignettes/.ipynb_checkpoints/Guide-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "60fac165-f308-4815-b379-28da3303aa81", 6 | "metadata": {}, 7 | "source": [ 8 | "# Technique Guide\n", 9 | "\n", 10 | "Package pyndynpd is able to estimate dynamic panel models that take a form as follows:\n", 11 | "\n", 12 | "$$y_{it}=\\sum_{j=1}^{p}\\alpha_{j}y_{i,t-j}+\\sum_{k=1}^{m}\\sum_{j=0}^{q_{k}}\\beta_{jk}r_{i,t-j}^{(k)}+\\boldsymbol{\\delta}\\boldsymbol{d_{i,t}}+\\boldsymbol{\\gamma}\\boldsymbol{s_{i,t}}+u_{i}+\\epsilon_{it} $$ \n", 13 | "\n", 14 | "In the model above, $y_{i,t-j}$ ($j=1,2,\\ldots,p$) denotes a group of $p$ lagged dependent variables. $r_{i,t-j}^{(k)}$ represents a group of $m$ endogeneous variables other than lagged $y$. $\\boldsymbol{d_{it}}$ is a vector of predetermined variables which may potentially correlate with past errors, $\\boldsymbol{s_{it}}$ is a vector of exogenous variables, and $u_{i}$ represents fixed effect. For illustration purpose, let's consider a basic form of dynamic panel model:\n", 15 | "\n", 16 | "$$\n", 17 | "\\begin{align}\n", 18 | "y_{it}=\\alpha_{1}y_{i,t-1}+\\delta d_{i,t}+u_{i}+\\epsilon_{it} \\label{basic}\\tag{1}\n", 19 | "\\end{align}\n", 20 | "$$\n", 21 | "\n", 22 | "As lagged dependent variable $y_{i,t-1}$ is included as regressor, the popular techniques in static panel models, such as fixed-effect and first-difference estimators, no longer produce consistent results. Researchers have developed many methods to estimate dynamic panel model. Essentially there are two types of GMM estimates, difference GMM and system GMM.\n", 23 | "\n", 24 | "## Difference GMM \n", 25 | "\n", 26 | "Difference GMM was developed by [@arellano1991some]. The first step in the process is to eliminate the fixed-effect term $u_{i}$. First differencing Eq ($\\ref{basic}$) yields:\n", 27 | "\n", 28 | "$$\n", 29 | "\\begin{align}\n", 30 | "\\Delta y_{it}=\\alpha_{1}\\Delta y_{i,t-1}+\\delta\\Delta d_{i,t}+\\Delta\\epsilon_{it}\\label{fd}\\tag{2} \n", 31 | "\\end{align}$$\n", 32 | "\n", 33 | "In the model above, $\\Delta y_{i,t-1}$ correlates with $\\Delta\\epsilon_{i,t}$ because $\\Delta y_{i,t-1}=y_{i,t-1}-y_{i,t-2}$, $\\Delta\\epsilon_{i,t}=\\epsilon_{i,t}-\\epsilon_{i,t-1}$, and $y_{i,t-1}$ is affected by $\\epsilon_{i,t-1}$. As a result, estimating Eq ($\\ref{fd}$) directly produces inconsistent result. Instrumental variables are used to solve the issue. [@arellano1991some]suggest to use all lagged $y$ dated $t-2$ and earlier (i.e., $y_{i,1}$, $y_{i,2}$,\\..., $y_{i,t-2}$) as instruments for $\\Delta y_{i,t-1}$. Similarly, the instruments for predetermined variable $\\Delta d_{it}$ include $d_{i,1}$, $d_{i,2}$,\\..., $d_{i,t-1}$. Let $z_{i}$ be the instrument variable matrix for individual i:\n", 34 | "\n", 35 | "$$\n", 36 | "z_{i}=\\left[\\begin{array}{ccccccccccccccccccc}\n", 37 | "y_{i1} & 0 & 0 & \\ldots & \\ldots & 0 & \\ldots & 0 & d_{i1} & d_{i2} & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\ldots & 0\\\\\n", 38 | "0 & y_{i1} & y_{i2} & 0 & 0 & 0 & \\ldots & 0 & 0 & 0 & d_{i1} & d_{i2} & d_{i3} & 0 & 0 & 0 & 0 & \\ldots & 0\\\\\n", 39 | "\\vdots & 0 & 0 & \\vdots & & & \\ldots & & & & & & & \\vdots & & & & \\ldots & 0\\\\\n", 40 | "0 & 0 & 0 & 0 & 0 & 0 & \\ldots & y_{i,T-2} & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\ldots & d_{i,T-1}\n", 41 | "\\end{array}\\right]$$\n", 42 | "\n", 43 | "Difference GMM is based on the moment condition $E(z_{i}^{\\prime}\\Delta\\epsilon_{i})=0$ where $\\Delta \\epsilon_{i}=(\\Delta \\epsilon_{i2}, \\Delta\\epsilon_{i3}\\textrm{, }...,\\Delta\\epsilon_{iT})^{\\prime}$ and $z_{i}$ is the instrument variable matrix. Applying this moment condition to sample data, we have $(1/N)\\sum_{i=1}^{N}z{}_{i}^{\\prime}(\\Delta y_{i}-\\theta\\Delta x_{i})=0$ where $\\theta=(\\alpha_{1},\\delta)'$ and $\\Delta x_{i}=(\\Delta y_{i,t-1},\\Delta d_{it})$ for t=3, \\... T. When the number of instruments is greater than the number of independent variables, the moment condition is overidentified and in general there is no $\\theta$ available to satisfy the moment condition. Instead, we look for a $\\theta$ to minimize moment condition. That is:\n", 44 | "\n", 45 | "$$\\hat{\\theta}_{gmm}=\\arg\\min_{\\theta}\\left(\\frac{1}{N}\\sum_{i=1}^{N}(\\Delta y_{i}-\\theta\\Delta x_{i})^{\\prime}z_{i}\\right)W\\left(\\frac{1}{N}\\sum_{i=1}^{N}z^{\\prime}{}_{i}(\\Delta y_{i}-\\theta\\Delta x_{i})\\right)$$\n", 46 | "\n", 47 | "where W is the weighting matrix of the moments. There are two popularly used weighting matrixes. In a one-step GMM estimate, the weighting matrix is\n", 48 | "\n", 49 | "$$W_{1}=\\left(\\frac{1}{N}Z^{\\prime}H_{1}Z\\right)^{-1}$$\n", 50 | "\n", 51 | "where matrix H has twos in the main diagnols, minus ones in the first subdiagnols, and zeros elsewhere:\n", 52 | "\n", 53 | "$$H_{1}=\\left[\\begin{array}{cccccc}\n", 54 | "2 & -1 & 0 & 0 & \\ldots & 0\\\\\n", 55 | "-1 & 2 & -1 & 0 & \\ldots & 0\\\\\n", 56 | "0 & \\ddots & \\ddots & \\ddots & \\ddots & \\vdots\\\\\n", 57 | "\\vdots & \\ddots & -1 & 2 & -1 & 0\\\\\n", 58 | "0 & \\ddots & 0 & -1 & 2 & -1\\\\\n", 59 | "0 & \\ldots & 0 & 0 & -1 & 2\n", 60 | "\\end{array}\\right]$$\n", 61 | "\n", 62 | "On the other hand, in a two-step GMM estimate, the weighting matrix is\n", 63 | "\n", 64 | "$$W_{2}=\\left(\\frac{1}{N}Z^{\\prime}H_{2}Z\\right)^{-1}$$\n", 65 | "\n", 66 | "where $H_{2}=\\Delta\\hat{\\epsilon}\\Delta\\hat{\\epsilon}^{\\prime}$ and $\\Delta\\hat{\\epsilon}$ is the residual from one-step GMM.\n", 67 | "\n", 68 | "## System GMM\n", 69 | "\n", 70 | "Compared with difference GMM, sytem GMM adds additional moment conditions, resulting in more instruments:\n", 71 | "\n", 72 | "$$\n", 73 | "\\begin{align}\n", 74 | "z_{i}=\\left[\\begin{array}{cccccccccccccccc|cccccccc}\n", 75 | "y_{i1} & 0 & 0 & 0 & 0 & 0 & \\ldots & 0 & d_{i1} & d_{i2} & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\ldots & 0 & 0 & 0 & 0 & \\ldots & 0\\\\\n", 76 | "0 & y_{i1} & y_{i2} & 0 & 0 & 0 & \\ldots & 0 & 0 & 0 & d_{i1} & d_{i2} & d_{i3} & 0 & 0 & 0 & 0 & \\ldots & 0 & 0 & 0 & 0 & \\ldots & 0\\\\\n", 77 | " & & & \\vdots & & & \\ldots & & \\vdots & & & & & \\vdots & \\ldots & \\vdots & & \\ldots & 0 & & & & \\ddots & 0\\\\\n", 78 | "0 & 0 & 0 & 0 & 0 & 0 & \\ldots & y_{i,T-2} & 0 & 0 & 0 & 0 & 0 & 0 & \\ldots & d_{i,T-1} & 0 & \\ldots & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", 79 | "\\hline 0 & \\ldots & 0 & & & & & & & & & & & & 0 & 0 & \\Delta y_{i2} & \\ldots & 0 & 0 & \\Delta d_{i3} & & 0\\\\\n", 80 | "\\vdots & & & & & & & & & & & & & & & 0 & 0 & \\Delta y_{i3} & \\ldots & 0 & 0 & \\Delta d_{i4} & & 0\\\\\n", 81 | "\\vdots & & & & & & & & & & & & & & & \\vdots & \\vdots & \\vdots & \\ddots & \\vdots & \\vdots & & \\ddots\\\\\n", 82 | "0 & \\ldots & & & & & & & & & & & & & \\ldots & 0 & 0 & 0 & \\ldots & \\Delta y_{i,T-1} & 0 & 0 & \\ldots & \\Delta y_{i,T}\n", 83 | "\\end{array}\\right] \\label{z_sys}\\tag{3}\n", 84 | "\\end{align}$$\n", 85 | "\n", 86 | "$$\\hat{\\theta}_{gmm}=\\arg\\min_{\\theta}\\left(\\frac{1}{N}\\sum_{i=1}^{N}(\\widetilde{y}_{i}-\\theta\\widetilde{x_{i}})^{\\prime}z_{i}\\right)W\\left(\\frac{1}{N}\\sum_{i=1}^{N}z^{\\prime}{}_{i}(\\widetilde{y}-\\theta\\widetilde{x_{i}})\\right)$$\n", 87 | "\n", 88 | "where $$\\widetilde{y}=\\left(\\begin{array}{c}\n", 89 | "\\Delta y_{i}\\\\\n", 90 | "\\hline y_{i}\n", 91 | "\\end{array}\\right)\\textrm{ and }\\widetilde{x_{i}}=\\left(\\begin{array}{c|c}\n", 92 | "\\Delta x_{i} & 0\\\\\n", 93 | "\\hline x_{i} & 1\n", 94 | "\\end{array}\\right)$$\n", 95 | "\n", 96 | "## Robust estimation of coefficients' covariance \n", 97 | "\n", 98 | "## Specification Test \n", 99 | "\n", 100 | "### Error serial correlation test \n", 101 | "\n", 102 | "Second-order serial correlation test: if $\\epsilon_{it}$ in Eq ($\\ref{basic}$) is serially correlated, GMM estimates are no longer consistent. In a first-differenced model (e.g., Eq ($\\ref{fd}$)), to test whether $\\epsilon_{i,t-1}$ is correlated with $\\epsilon_{i,t-2}$, the second-order autocovariance of the residuals, $\\textrm{AR(2)}$, is calculated as:\n", 103 | "\n", 104 | "$$AR(2)=\\frac{b_{0}}{\\sqrt{b_{1}+b_{2}+b_{3}}}\\textrm{ where}$$\n", 105 | "\n", 106 | "$$b_{0}=\\sum_{i=1}^{N}\\Delta\\hat{\\hat{\\epsilon}}_{i}^{\\prime}L_{\\Delta\\hat{\\hat{\\epsilon}}}^{2}$$\n", 107 | "\n", 108 | "$$b_{1}=\\sum_{i=1}^{N}L_{\\Delta\\hat{\\hat{\\epsilon}}_{i}^{\\prime}}^{2}H_{2}L_{\\Delta\\hat{\\hat{\\epsilon}}_{i}}^{2}$$ \n", 109 | "\n", 110 | "$$b_{2}=\\textrm{-}2\\left(\\sum_{i=1}^{N}L_{\\Delta\\hat{\\hat{\\epsilon}}_{i}^{\\prime}}^{2}x_{i}\\right)\\left[\\left(\\sum_{i=1}^{N}x_{i}^{\\prime}z_{i}\\right)W_{2}\\left(\\sum_{i=1}^{N}z_{i}^{\\prime}x_{i}\\right)\\right]^{-1}\\left(\\sum_{i=1}^{N}x_{i}^{\\prime}z_{i}\\right)W_{2}\\left(\\sum_{i=1}^{N}z_{i}^{\\prime}H_{2}L_{\\Delta\\hat{\\hat{\\epsilon}}_{i}}^{2}\\right)$$\n", 111 | "\n", 112 | "$$b_{3}=\\left(\\sum_{i=1}^{N}L_{\\Delta\\hat{\\hat{\\epsilon}}_{i}^{\\prime}}^{2}x_{i}\\right)\\hat{V}_{\\hat{\\hat{\\theta}}}\\left(\\sum_{i=1}^{N}x_{i}^{\\prime}L_{\\Delta\\hat{\\hat{\\epsilon}}_{i}}^{2}\\right)$$\n", 113 | "\n", 114 | "### Hansen overidentification test \n", 115 | "\n", 116 | "Hansen overidentification test is used to check if instruments are exogeneous. Under the null hypothesis that instruments are valid, test statistic, $S$, should be close to zero:\n", 117 | "\n", 118 | "$$S=\\left(\\sum_{i=1}^{N}\\Delta\\hat{\\hat{\\epsilon}}_{i}^{\\prime}z_{i}\\right)W_{2}\\left(\\sum_{i=1}^{N}z_{i}^{\\prime}\\Delta\\hat{\\hat{\\epsilon}}_{i}\\right)$$\n", 119 | "\n", 120 | "# Handling instrument proliferation issue \n", 121 | "\n", 122 | "Difference GMM and system GMM may generate too many instruments, which causes several problems (citation). Package pydynpd allows users to reduce the number of instruments in two ways. First, users can control the number of instruments in command string. For example, $\\textrm{gmm(w, 2:3)}$ states that only $n_{t-2}$ and $n_{t-3}$ are used as instruments, rather than all lagged $n$ dated $t-2$ and earlier. Second, users can choose to collapse the instrumental variable matrix. For example, if collapsed, matrix as in Eq ($\\ref{z_sys}$) is changed to:\n", 123 | "\n", 124 | "$$z_{i}=\\left[\\begin{array}{cccccccccc|cc}\n", 125 | "y_{i1} & 0 & 0 & \\ldots & 0 & d_{i1} & d_{i2} & 0 & \\ldots & 0\\\\\n", 126 | "y_{i1} & y_{i2} & 0 & \\ldots & 0 & d_{i1} & d_{i2} & d_{i3} & \\ldots & 0\\\\\n", 127 | "\\vdots & \\vdots & \\ddots & \\ldots & \\vdots & & & & \\ddots & \\vdots\\\\\n", 128 | "y_{i1} & y_{i2} & y_{i3} & \\ldots & y_{i,T-2} & d_{i1} & d_{i2} & d_{id} & \\ldots & d_{i,T-1}\\\\\n", 129 | "\\hline 0 & & & \\ldots & 0 & 0 & 0 & 0 & \\ldots & 0 & \\Delta y_{i2} & \\Delta d_{i3}\\\\\n", 130 | "0 & 0 & & & 0 & 0 & 0 & 0 & \\ldots & 0 & \\Delta y_{i3} & \\Delta d_{i4}\\\\\n", 131 | "\\vdots & & \\ddots & & \\vdots & & & \\vdots & & \\vdots & \\vdots & \\vdots\\\\\n", 132 | "0 & 0 & 0 & \\ldots & 0 & 0 & & 0 & \\ldots & 0 & \\Delta y_{i,T-1} & \\Delta d_{iT}\n", 133 | "\\end{array}\\right]$$\n", 134 | "\n", 135 | "This change dramatically reduces the number of instruments. Intuitively, the number of instruments is positively associated with the width of the matrix above.\n", 136 | "\n" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "id": "6eefc858", 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "id": "7ca931c1-e7d1-484e-aae8-727801b3a60d", 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "id": "28e3e167-e9c3-427d-ba56-521262920be1", 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [] 162 | } 163 | ], 164 | "metadata": { 165 | "kernelspec": { 166 | "display_name": "Python 3 (ipykernel)", 167 | "language": "python", 168 | "name": "python3" 169 | }, 170 | "language_info": { 171 | "codemirror_mode": { 172 | "name": "ipython", 173 | "version": 3 174 | }, 175 | "file_extension": ".py", 176 | "mimetype": "text/x-python", 177 | "name": "python", 178 | "nbconvert_exporter": "python", 179 | "pygments_lexer": "ipython3", 180 | "version": "3.9.7" 181 | } 182 | }, 183 | "nbformat": 4, 184 | "nbformat_minor": 5 185 | } 186 | -------------------------------------------------------------------------------- /vignettes/.ipynb_checkpoints/Tutorial-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "a4357a62", 6 | "metadata": {}, 7 | "source": [ 8 | "# abond command\n", 9 | "In this tutorial, we illustrate the functions of pydynpd with examples. The first data set is one from Arellano and Bond (1991). It is an unblanced panel with 140 firms over 9 years (1976-1984). You can download the data (data.csv) from the /benchmark/code folder. We first consider the following basic model:\n", 10 | "\n", 11 | "$$\n", 12 | "\\begin{align}\n", 13 | "n_{i,t}=\\alpha_1n_{i,t-1}+\\alpha_2n_{i,t-2}+\\beta_1w_{i,t}+\\gamma_1k_{i,t}+u_{i}+\\epsilon_{i,t}\n", 14 | "\\end{align}\n", 15 | "$$\n", 16 | "\n", 17 | "In the model above, variables $n$, $w$, and $k$ are the natural logarithm of employment, wage, and capital respectively. $u_{i}$ is unobserved fixed effect and $\\epsilon_{i,t}$ is idiosyncraic error. \n", 18 | "\n", 19 | "To estimate the model, we first load data to Pandas data frame:\n", 20 | "\n" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 1, 26 | "id": "7ca931c1-e7d1-484e-aae8-727801b3a60d", 27 | "metadata": {}, 28 | "outputs": [ 29 | { 30 | "ename": "ModuleNotFoundError", 31 | "evalue": "No module named 'pydynpd'", 32 | "output_type": "error", 33 | "traceback": [ 34 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 35 | "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", 36 | "\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_6876/497402013.py\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mpandas\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mpydynpd\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mregression\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3\u001b[0m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"test_data.csv\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 37 | "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'pydynpd'" 38 | ] 39 | } 40 | ], 41 | "source": [ 42 | "import pandas as pd\n", 43 | "from pydynpd import regression\n", 44 | "df = pd.read_csv(\"test_data.csv\")" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "id": "6a81769e", 50 | "metadata": {}, 51 | "source": [ 52 | "Then we construct command string to describe the model. A command string has two or three parts, which are separated by |.\n", 53 | "Part 1 is a list starting with the dependent variable, and followed by independent variables except time dummies. Given the model, part 1 is:\n", 54 | "\n", 55 | "```\n", 56 | "n L1.n L2.n w k\n", 57 | "```\n", 58 | "In the command above, L is the lag operator as in L1.n (i.e., $n_{i,t-1}$) or L2.n for 2 lags of n (i.e., $n_{i,t-2}$). \n", 59 | "\n", 60 | "Part 2 indicates how instruments are created. First, suppose we want to use the second and third lags of dependent variable n (i.e., L2.n and L3.n) as instruments, then we include the following GMM list:\n", 61 | "\n", 62 | "```\n", 63 | "GMM(n, 2:3)\n", 64 | "```\n", 65 | "Next, suppose we believe that variable w is a predetermined variable and use its first and deeper lags (i.e., L1.w, L2.w, ...) as instruments. Then we include a second GMM list:\n", 66 | "\n", 67 | "```\n", 68 | "GMM(w, 1:.)\n", 69 | "```\n", 70 | "The dot (.) above means there is no restriction regarding the maximum lag of $w$. In other words, we use all available lags.\n", 71 | "Next, suppose variable $k$ is a strictly exogenous variable. So, we use IV() list:\n", 72 | "\n", 73 | "```\n", 74 | "IV(k)\n", 75 | "```\n", 76 | "This tells pydynpd to use variable $k$ itself as instrument.\n", 77 | "Finally, we put all GMM and IV lists together to form part 2:\n", 78 | "```\n", 79 | "GMM(n, 2:3) GMM(w, 1:.) IV(k)\n", 80 | "```\n", 81 | "\n", 82 | "Suppose our command just has the two parts above, then we combine the two parts together:" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": null, 88 | "id": "8ef76f2b", 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "command_str='n L1.n L2.n w k | GMM(n, 2:3) GMM(w, 1:.) IV(k)'\n" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "id": "4ed2ed7f", 98 | "metadata": {}, 99 | "source": [ 100 | "Finally, we use abond function to estimate our model. Note that three parameters should be provided. The first one is the command string discussed above. The second one is the data, and the third one is a list of two variables that indentify individual firm and year respectively. " 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "id": "8a47a1de", 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "mydpd = regression.abond(command_str, df, ['id', 'year'])" 111 | ] 112 | } 113 | ], 114 | "metadata": { 115 | "kernelspec": { 116 | "display_name": "Python 3 (ipykernel)", 117 | "language": "python", 118 | "name": "python3" 119 | }, 120 | "language_info": { 121 | "codemirror_mode": { 122 | "name": "ipython", 123 | "version": 3 124 | }, 125 | "file_extension": ".py", 126 | "mimetype": "text/x-python", 127 | "name": "python", 128 | "nbconvert_exporter": "python", 129 | "pygments_lexer": "ipython3", 130 | "version": "3.9.7" 131 | } 132 | }, 133 | "nbformat": 4, 134 | "nbformat_minor": 5 135 | } 136 | -------------------------------------------------------------------------------- /vignettes/Guide.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "60fac165-f308-4815-b379-28da3303aa81", 6 | "metadata": {}, 7 | "source": [ 8 | "# Technique Guide\n", 9 | "\n", 10 | "Package pyndynpd is able to estimate dynamic panel models that take a form as follows:\n", 11 | "\n", 12 | "$$y_{it}=\\sum_{j=1}^{p}\\alpha_{j}y_{i,t-j}+\\sum_{k=1}^{m}\\sum_{j=0}^{q_{k}}\\beta_{jk}r_{i,t-j}^{(k)}+\\boldsymbol{\\delta}\\boldsymbol{d_{i,t}}+\\boldsymbol{\\gamma}\\boldsymbol{s_{i,t}}+u_{i}+\\epsilon_{it} $$ \n", 13 | "\n", 14 | "In the model above, $y_{i,t-j}$ ($j=1,2,\\ldots,p$) denotes a group of $p$ lagged dependent variables. $r_{i,t-j}^{(k)}$ represents a group of $m$ endogeneous variables other than lagged $y$. $\\boldsymbol{d_{it}}$ is a vector of predetermined variables which may potentially correlate with past errors, $\\boldsymbol{s_{it}}$ is a vector of exogenous variables, and $u_{i}$ represents fixed effect. For illustration purpose, let's consider a basic form of dynamic panel model:\n", 15 | "\n", 16 | "$$\n", 17 | "\\begin{align}\n", 18 | "y_{it}=\\alpha_{1}y_{i,t-1}+\\delta d_{i,t}+u_{i}+\\epsilon_{it} \\label{basic}\\tag{1}\n", 19 | "\\end{align}\n", 20 | "$$\n", 21 | "\n", 22 | "As lagged dependent variable $y_{i,t-1}$ is included as regressor, the popular techniques in static panel models, such as fixed-effect and first-difference estimators, no longer produce consistent results. Researchers have developed many methods to estimate dynamic panel model. Essentially there are two types of GMM estimates, difference GMM and system GMM.\n", 23 | "\n", 24 | "## Difference GMM \n", 25 | "\n", 26 | "The first step in the process is to eliminate the fixed-effect term $u_{i}$. First differencing Eq ($\\ref{basic}$) yields:\n", 27 | "\n", 28 | "$$\n", 29 | "\\begin{align}\n", 30 | "\\Delta y_{it}=\\alpha_{1}\\Delta y_{i,t-1}+\\delta\\Delta d_{i,t}+\\Delta\\epsilon_{it}\\label{fd}\\tag{2} \n", 31 | "\\end{align}$$\n", 32 | "\n", 33 | "In the model above, $\\Delta y_{i,t-1}$ correlates with $\\Delta\\epsilon_{i,t}$ because $\\Delta y_{i,t-1}=y_{i,t-1}-y_{i,t-2}$, $\\Delta\\epsilon_{i,t}=\\epsilon_{i,t}-\\epsilon_{i,t-1}$, and $y_{i,t-1}$ is affected by $\\epsilon_{i,t-1}$. As a result, estimating Eq ($\\ref{fd}$) directly produces inconsistent result. Instrumental variables are used to solve the issue. [@arellano1991some]suggest to use all lagged $y$ dated $t-2$ and earlier (i.e., $y_{i,1}$, $y_{i,2}$,\\..., $y_{i,t-2}$) as instruments for $\\Delta y_{i,t-1}$. Similarly, the instruments for predetermined variable $\\Delta d_{it}$ include $d_{i,1}$, $d_{i,2}$,\\..., $d_{i,t-1}$. Let $z_{i}$ be the instrument variable matrix for individual i:\n", 34 | "\n", 35 | "$$\n", 36 | "z_{i}=\\left[\\begin{array}{ccccccccccccccccccc}\n", 37 | "y_{i1} & 0 & 0 & \\ldots & \\ldots & 0 & \\ldots & 0 & d_{i1} & d_{i2} & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\ldots & 0\\\\\n", 38 | "0 & y_{i1} & y_{i2} & 0 & 0 & 0 & \\ldots & 0 & 0 & 0 & d_{i1} & d_{i2} & d_{i3} & 0 & 0 & 0 & 0 & \\ldots & 0\\\\\n", 39 | "\\vdots & 0 & 0 & \\vdots & & & \\ldots & & & & & & & \\vdots & & & & \\ldots & 0\\\\\n", 40 | "0 & 0 & 0 & 0 & 0 & 0 & \\ldots & y_{i,T-2} & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\ldots & d_{i,T-1}\n", 41 | "\\end{array}\\right]$$\n", 42 | "\n", 43 | "Difference GMM is based on the moment condition $E(z_{i}^{\\prime}\\Delta\\epsilon_{i})=0$ where $\\Delta \\epsilon_{i}=(\\Delta \\epsilon_{i2}, \\Delta\\epsilon_{i3}\\textrm{, }...,\\Delta\\epsilon_{iT})^{\\prime}$ and $z_{i}$ is the instrument variable matrix. Applying this moment condition to sample data, we have $(1/N)\\sum_{i=1}^{N}z{}_{i}^{\\prime}(\\Delta y_{i}-\\theta\\Delta x_{i})=0$ where $\\theta=(\\alpha_{1},\\delta)'$ and $\\Delta x_{i}=(\\Delta y_{i,t-1},\\Delta d_{it})$ for t=3, \\... T. When the number of instruments is greater than the number of independent variables, the moment condition is overidentified and in general there is no $\\theta$ available to satisfy the moment condition. Instead, we look for a $\\theta$ to minimize moment condition. That is:\n", 44 | "\n", 45 | "$$\\hat{\\theta}_{gmm}=\\arg\\min_{\\theta}\\left(\\frac{1}{N}\\sum_{i=1}^{N}(\\Delta y_{i}-\\theta\\Delta x_{i})^{\\prime}z_{i}\\right)W\\left(\\frac{1}{N}\\sum_{i=1}^{N}z^{\\prime}{}_{i}(\\Delta y_{i}-\\theta\\Delta x_{i})\\right)$$\n", 46 | "\n", 47 | "where W is the weighting matrix of the moments. There are two popularly used weighting matrixes. In a one-step GMM estimate, the weighting matrix is\n", 48 | "\n", 49 | "$$W_{1}=\\left(\\frac{1}{N}Z^{\\prime}H_{1}Z\\right)^{-1}$$\n", 50 | "\n", 51 | "where matrix H has twos in the main diagnols, minus ones in the first subdiagnols, and zeros elsewhere:\n", 52 | "\n", 53 | "$$H_{1}=\\left[\\begin{array}{cccccc}\n", 54 | "2 & -1 & 0 & 0 & \\ldots & 0\\\\\n", 55 | "-1 & 2 & -1 & 0 & \\ldots & 0\\\\\n", 56 | "0 & \\ddots & \\ddots & \\ddots & \\ddots & \\vdots\\\\\n", 57 | "\\vdots & \\ddots & -1 & 2 & -1 & 0\\\\\n", 58 | "0 & \\ddots & 0 & -1 & 2 & -1\\\\\n", 59 | "0 & \\ldots & 0 & 0 & -1 & 2\n", 60 | "\\end{array}\\right]$$\n", 61 | "\n", 62 | "On the other hand, in a two-step GMM estimate, the weighting matrix is\n", 63 | "\n", 64 | "$$W_{2}=\\left(\\frac{1}{N}Z^{\\prime}H_{2}Z\\right)^{-1}$$\n", 65 | "\n", 66 | "where $H_{2}=\\Delta\\hat{\\epsilon}\\Delta\\hat{\\epsilon}^{\\prime}$ and $\\Delta\\hat{\\epsilon}$ is the residual from one-step GMM.\n", 67 | "\n", 68 | "## System GMM\n", 69 | "\n", 70 | "Compared with difference GMM, sytem GMM adds additional moment conditions, resulting in more instruments:\n", 71 | "\n", 72 | "$$\n", 73 | "\\begin{align}\n", 74 | "z_{i}=\\left[\\begin{array}{cccccccccccccccc|cccccccc}\n", 75 | "y_{i1} & 0 & 0 & 0 & 0 & 0 & \\ldots & 0 & d_{i1} & d_{i2} & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\ldots & 0 & 0 & 0 & 0 & \\ldots & 0\\\\\n", 76 | "0 & y_{i1} & y_{i2} & 0 & 0 & 0 & \\ldots & 0 & 0 & 0 & d_{i1} & d_{i2} & d_{i3} & 0 & 0 & 0 & 0 & \\ldots & 0 & 0 & 0 & 0 & \\ldots & 0\\\\\n", 77 | " & & & \\vdots & & & \\ldots & & \\vdots & & & & & \\vdots & \\ldots & \\vdots & & \\ldots & 0 & & & & \\ddots & 0\\\\\n", 78 | "0 & 0 & 0 & 0 & 0 & 0 & \\ldots & y_{i,T-2} & 0 & 0 & 0 & 0 & 0 & 0 & \\ldots & d_{i,T-1} & 0 & \\ldots & 0 & 0 & 0 & 0 & 0 & 0\\\\\n", 79 | "\\hline 0 & \\ldots & 0 & & & & & & & & & & & & 0 & 0 & \\Delta y_{i2} & \\ldots & 0 & 0 & \\Delta d_{i3} & & 0\\\\\n", 80 | "\\vdots & & & & & & & & & & & & & & & 0 & 0 & \\Delta y_{i3} & \\ldots & 0 & 0 & \\Delta d_{i4} & & 0\\\\\n", 81 | "\\vdots & & & & & & & & & & & & & & & \\vdots & \\vdots & \\vdots & \\ddots & \\vdots & \\vdots & & \\ddots\\\\\n", 82 | "0 & \\ldots & & & & & & & & & & & & & \\ldots & 0 & 0 & 0 & \\ldots & \\Delta y_{i,T-1} & 0 & 0 & \\ldots & \\Delta y_{i,T}\n", 83 | "\\end{array}\\right] \\label{z_sys}\\tag{3}\n", 84 | "\\end{align}$$\n", 85 | "\n", 86 | "$$\\hat{\\theta}_{gmm}=\\arg\\min_{\\theta}\\left(\\frac{1}{N}\\sum_{i=1}^{N}(\\widetilde{y}_{i}-\\theta\\widetilde{x_{i}})^{\\prime}z_{i}\\right)W\\left(\\frac{1}{N}\\sum_{i=1}^{N}z^{\\prime}{}_{i}(\\widetilde{y}-\\theta\\widetilde{x_{i}})\\right)$$\n", 87 | "\n", 88 | "where $$\\widetilde{y}=\\left(\\begin{array}{c}\n", 89 | "\\Delta y_{i}\\\\\n", 90 | "\\hline y_{i}\n", 91 | "\\end{array}\\right)\\textrm{ and }\\widetilde{x_{i}}=\\left(\\begin{array}{c|c}\n", 92 | "\\Delta x_{i} & 0\\\\\n", 93 | "\\hline x_{i} & 1\n", 94 | "\\end{array}\\right)$$\n", 95 | "\n", 96 | "## Robust estimation of coefficients' covariance \n", 97 | "\n", 98 | "pydynpd reports robust standard errors for one-step and two-step estimators. For detail, please refer to [Windmeijer 2005](https://doi.org/10.1016/j.jeconom.2004.02.005).\n", 99 | "\n", 100 | "## Specification Test \n", 101 | "\n", 102 | "### Error serial correlation test \n", 103 | "\n", 104 | "Second-order serial correlation test: if $\\epsilon_{it}$ in Eq ($\\ref{basic}$) is serially correlated, GMM estimates are no longer consistent. In a first-differenced model (e.g., Eq ($\\ref{fd}$)), to test whether $\\epsilon_{i,t-1}$ is correlated with $\\epsilon_{i,t-2}$, the second-order autocovariance of the residuals, $\\textrm{AR(2)}$, is calculated as:\n", 105 | "\n", 106 | "$$AR(2)=\\frac{b_{0}}{\\sqrt{b_{1}+b_{2}+b_{3}}}\\textrm{ where}$$\n", 107 | "\n", 108 | "$$b_{0}=\\sum_{i=1}^{N}\\Delta\\hat{\\hat{\\epsilon}}_{i}^{\\prime}L_{\\Delta\\hat{\\hat{\\epsilon}}}^{2}$$\n", 109 | "\n", 110 | "$$b_{1}=\\sum_{i=1}^{N}L_{\\Delta\\hat{\\hat{\\epsilon}}_{i}^{\\prime}}^{2}H_{2}L_{\\Delta\\hat{\\hat{\\epsilon}}_{i}}^{2}$$ \n", 111 | "\n", 112 | "$$b_{2}=\\textrm{-}2\\left(\\sum_{i=1}^{N}L_{\\Delta\\hat{\\hat{\\epsilon}}_{i}^{\\prime}}^{2}x_{i}\\right)\\left[\\left(\\sum_{i=1}^{N}x_{i}^{\\prime}z_{i}\\right)W_{2}\\left(\\sum_{i=1}^{N}z_{i}^{\\prime}x_{i}\\right)\\right]^{-1}\\left(\\sum_{i=1}^{N}x_{i}^{\\prime}z_{i}\\right)W_{2}\\left(\\sum_{i=1}^{N}z_{i}^{\\prime}H_{2}L_{\\Delta\\hat{\\hat{\\epsilon}}_{i}}^{2}\\right)$$\n", 113 | "\n", 114 | "$$b_{3}=\\left(\\sum_{i=1}^{N}L_{\\Delta\\hat{\\hat{\\epsilon}}_{i}^{\\prime}}^{2}x_{i}\\right)\\hat{V}_{\\hat{\\hat{\\theta}}}\\left(\\sum_{i=1}^{N}x_{i}^{\\prime}L_{\\Delta\\hat{\\hat{\\epsilon}}_{i}}^{2}\\right)$$\n", 115 | "\n", 116 | "### Hansen overidentification test \n", 117 | "\n", 118 | "Hansen overidentification test is used to check if instruments are exogeneous. Under the null hypothesis that instruments are valid, test statistic, $S$, should be close to zero:\n", 119 | "\n", 120 | "$$S=\\left(\\sum_{i=1}^{N}\\Delta\\hat{\\hat{\\epsilon}}_{i}^{\\prime}z_{i}\\right)W_{2}\\left(\\sum_{i=1}^{N}z_{i}^{\\prime}\\Delta\\hat{\\hat{\\epsilon}}_{i}\\right)$$\n", 121 | "\n", 122 | "# Handling instrument proliferation issue \n", 123 | "\n", 124 | "Difference GMM and system GMM may generate too many instruments, which causes several problems (citation). Package pydynpd allows users to reduce the number of instruments in two ways. First, users can control the number of instruments in command string. For example, $\\textrm{gmm(w, 2:3)}$ states that only $n_{t-2}$ and $n_{t-3}$ are used as instruments, rather than all lagged $n$ dated $t-2$ and earlier. Second, users can choose to collapse the instrumental variable matrix. For example, if collapsed, matrix as in Eq ($\\ref{z_sys}$) is changed to:\n", 125 | "\n", 126 | "$$z_{i}=\\left[\\begin{array}{cccccccccc|cc}\n", 127 | "y_{i1} & 0 & 0 & \\ldots & 0 & d_{i1} & d_{i2} & 0 & \\ldots & 0\\\\\n", 128 | "y_{i1} & y_{i2} & 0 & \\ldots & 0 & d_{i1} & d_{i2} & d_{i3} & \\ldots & 0\\\\\n", 129 | "\\vdots & \\vdots & \\ddots & \\ldots & \\vdots & & & & \\ddots & \\vdots\\\\\n", 130 | "y_{i1} & y_{i2} & y_{i3} & \\ldots & y_{i,T-2} & d_{i1} & d_{i2} & d_{id} & \\ldots & d_{i,T-1}\\\\\n", 131 | "\\hline 0 & & & \\ldots & 0 & 0 & 0 & 0 & \\ldots & 0 & \\Delta y_{i2} & \\Delta d_{i3}\\\\\n", 132 | "0 & 0 & & & 0 & 0 & 0 & 0 & \\ldots & 0 & \\Delta y_{i3} & \\Delta d_{i4}\\\\\n", 133 | "\\vdots & & \\ddots & & \\vdots & & & \\vdots & & \\vdots & \\vdots & \\vdots\\\\\n", 134 | "0 & 0 & 0 & \\ldots & 0 & 0 & & 0 & \\ldots & 0 & \\Delta y_{i,T-1} & \\Delta d_{iT}\n", 135 | "\\end{array}\\right]$$\n", 136 | "\n", 137 | "This change dramatically reduces the number of instruments. Intuitively, the number of instruments is positively associated with the width of the matrix above.\n", 138 | "\n" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "id": "6eefc858", 145 | "metadata": {}, 146 | "outputs": [ 147 | { 148 | "ename": "", 149 | "evalue": "", 150 | "output_type": "error", 151 | "traceback": [ 152 | "\u001b[1;31mRunning cells with 'Python 3.10.5 64-bit (windows store)' requires ipykernel package.\n", 153 | "Run the following command to install 'ipykernel' into the Python environment. \n", 154 | "Command: 'c:/Users/Tiger/AppData/Local/Microsoft/WindowsApps/python3.10.exe -m pip install ipykernel -U --user --force-reinstall'" 155 | ] 156 | } 157 | ], 158 | "source": [] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "id": "7ca931c1-e7d1-484e-aae8-727801b3a60d", 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "id": "28e3e167-e9c3-427d-ba56-521262920be1", 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [] 175 | } 176 | ], 177 | "metadata": { 178 | "kernelspec": { 179 | "display_name": "Python 3.10.5 64-bit (windows store)", 180 | "language": "python", 181 | "name": "python3" 182 | }, 183 | "language_info": { 184 | "codemirror_mode": { 185 | "name": "ipython", 186 | "version": 3 187 | }, 188 | "file_extension": ".py", 189 | "mimetype": "text/x-python", 190 | "name": "python", 191 | "nbconvert_exporter": "python", 192 | "pygments_lexer": "ipython3", 193 | "version": "3.10.5" 194 | }, 195 | "vscode": { 196 | "interpreter": { 197 | "hash": "af20069095c057cbb0c2f1e67eacfd0be355d6958a577b80d2e85aa262675d05" 198 | } 199 | } 200 | }, 201 | "nbformat": 4, 202 | "nbformat_minor": 5 203 | } 204 | -------------------------------------------------------------------------------- /vignettes/Images/.$pydynpd.drawio.bkp: -------------------------------------------------------------------------------- 1 | 7Vpbe6M2EP01flx/4mryGDvZtF+bjRtvu9lHBWRQgpEj5MTeX18JxE3CNmnwZdvmIR8ahIA5c86MBg+syWJ9Q+EyuiUBigcmCNYD62pgmiPP5P+FYZMbbMPLDSHFQW4yKsMM/0DSCKR1hQOUNiYyQmKGl02jT5IE+axhg5SSt+a0OYmbd13CEGmGmQ9j3foNByzKrZ4DKvsvCIdRcWcDyDMLWEyWhjSCAXmrmazrgTWhhLD8aLGeoFj4rvBLft3nLWfLB6MoYV0uSL/MEs/+9Q7+5dE4eXq+Y0+bT5YlH45tijdGAXeAHBLKIhKSBMbXlXVMySoJkFgW8FE153dCltxocOMTYmwj0YQrRrgpYotYnkVrzB7k5eL4e+34SkSO4ZZDETeOPXTkeIooXiCGaHFJwujmoT7IFsvni2G2XnkyW06M5iRh8vkMV44nJCZi3YQk4jVTRslziTrHa6y7vAhJSEPEdvlZ0kA4t3alROoGEf5OdMMnUBRDhl+b0QdlEIflvPLSKcH8WUwg+VbypqAbEK5rLJKSFfWRvK4eL8pSlgmGlnNR/SkLj4aGoZ0tbpI7RLsJP6i9bWXKQvU9YSsd8wrjlXTVnymPCTWWazEXwDQqY/YtwgylS+iLeW9cuATcEVyK4WIdCiUbBvNgiNY80kT0JwyzTT4pJ7FYJYxhmspjnyywL49j+IjiMfSfw4wpe8NqjuNYmVQPx4FpgeyvXJrQAFHlChjjMOFDn8emYEce4dIVwgFpApdfSQ5Jbtkazq+IMrTeGafyrNMMN6sYv1VaKbic2aKaTqrxUo/sRrC8OzLckwpaqVPfa2cqCSoETVUg88MKlLO6gwLtlyqnb6n6GKCmRvUppCkaiHTnxvxNxo+c+G4ojlIEqR9lz/ayQinTYqFC2ihkYNaUgRqq73B/d8ZYttOkzKhU6DppzBbSFETq38e25uPZPk/y12Vt7lKFTNc2TamE8zAvuS7liQUOgoyXbfg0uVqqeiHxU8iEZGcWExjdeNYDrCboBKvVAqt1KFhtoKH2E2phgOZwlfG8Vzl0usrh6Lzk0NGoeoMSRCETiriQ268Y/xTaZ3st5cJRlc82NDcdkSJGjSAVXdop0sdG50jUGnWl1sV5UWukUWvO3cSfD2aZsCAXScTLRWgbz4qNRITWkMcF9+WytmmV1nIfa+7n4hyvUdEa2ZfRdkHakot3b1W27zb6yJjKjtIFuhiYhqGLgXMwMThtM2RoHj5hVipSCUd5tx0q8gE1uDhZom3va9hKpebaSjxt6V1cUgo3tWlLMSHdfh+3aFLI+4zU/tye53rvfNdxFAbkT9xr46VI4P/ekrJkiDF4V549AkNs7zgMMTyrGYleN4b01t270BLxVwoDzLCIGn5iSomP0vQMK1wXNEnvXJx6b2/r/ZNL4bUl8vEc+1Vds3d/333fTlGKf8DHbCnBDSmVfF1nPHCuxFqc42lV0XRtHXQjdQ84OsBUcHR1HEELjubBcNR7NPdFawbkdWpekyYCuP9RbUPVHjXb1W4bqm3t6sOhOjqffNr4Ardj/zl0vEZuNIZgX/mYjdRPd8ffmRYJdH8tahwp09pKplVr0S0f63or5zxNVD7jWEBkgttWATl9ilWLk7ZPTofKsFdf7EUyhWA+ud9YUzCPvj1MP+nfIsvm+T1KRRTvV+FT9M7/6d7/eHINlI/ZRS1Tg7pNrftoqLcirZdSt7sarv/tZGsp/R2nhaijA+Xa58eYzB6f6J0bRb89gyX94+Whhaf3KOR+TnHW0evM1TMAsQd4TEtpNzi2rqPGMfGxNHwGpvuyEr+PGt8QElSjorzVd4Dnoaw9wKNWBq6hs6cn7ePD6ldoeWVR/ZTPuv4b1Vpbd6I6FP41Pk4XhIv6WGvPdC6u45rO9EwfU4iYFggTYtX59SeBcAnBSmvR2odKNskm7P3taxhYV9HmM4XJckZ8FA6A4W8G1nQAgGkagP8IyjanDIeSEFDsy0kV4Rb/RZJoSOoK+yhVJjJCQoYTleiROEYeU2iQUrJWpy1IqD41gQHSCLceDHXqf9hny5w6coyKfoNwsGTlC8s7ESwmS0K6hD5Z10jW9cC6ooSw/CraXKFQCK+Qy83EcBLnDs2T1ezb+Bnf3j/+/pQz++c1S8pXoChmb2ZtmNQNrsB2/fgI1/bXx+m/5M+n4tXYtpAX8rn45JBQtiQBiWF4XVEnlKxiHwmuBh9Vc74TknCiyYmPiLGtxAJcMcJJSxaF8i7aYDbdyPXZYCsHKYOUXQqtc0JMYpTRKHkqdcelPsn3LDbaUPEe+ch5KVlRD70gFHMocQppgNhLE80SBtx+EIkQo1u+kKIQMvysbg9KIAflvHLpnGC+cWBIoxs6dr5EmhwYGyqLfGNyVaVyflHbRkXKgPAKUJinAsVvsfzCkaP7vXDh6qbbfJFrF+P7bGw7VkGo1mYjZfEcUczFhWgJwGODDXTF2vBArCkoeS0k5C6fYbiST5pDmqLs+X9WKGUaZCpACB2ul5ih2wRmoljzSKMqf5fYnxFlaPMGwetyklys0VCxLdN2OOAyyroKD2bh85e10OAaPQnXOqm91azNqFub6Sr25tilYTaNpjJD1QadPRZ4fGOzOxobeA+/zuMY3NYmJMJfp7vdvgXGKjSHbgNaOcd39fW2Zti/Uq7YJiBrwPFhuiyBl9l1qto1z5MSMYw2gUgpL/yFf4E2HC4CwjHDbJtPypMpwSUIYZrKa49E2JPXIXxA4QR6T0EG9ysSEvpSYrDAYdiYtCAxK0gDYBnZX8maUB/RxgoY4iDmQ49DTkA8Y1EYY+asYpj8JLnyckpfzspu+KqCbd1TuS2eajzuyVM5GloGwIWR0Hr8kIqfMhwYZCF2IGCUIA8vuFJ5Tp2XFw1wcakx1TlpOhCyxTypv5Q3Iuz7uR9EKf4LHzJWQq/SyDhfZzJwpoIXd31p7gWrUNNEiQ6c44UkYDTUXKR/+9QM+gpI7kkCUhFFykG3KPKWxPEENUVR++7N86x+agqg1hT2qAGe/A16qymGmuf4jGJEIRM55JeYK2QVoWzDM8go3pxLPgkacrUs48TZ5EiT9KXqhGcdnXBXT9ndWbdpTfUbPqbIY5iIZQimYkeLECd38ra4vjm6jkUtqdTjQNdxm3+2+lLx+KT+ueaSy6LhQ2X5nfs3h5bUO/J4s9G+GY5UFn23b07T1HtjEXge4btzR7Cf6G25jehtdoveOqPRHkZ9Y9PUotOcEg/x8g+4oXD3D7z4dANxNYdxdhYxhQyeSzrgjDukA+Yx0wETnMQbnIVVm1ZHs7YPtOrDNGjpufNsdi4WYRWFRuFxADhxgmzqPa8pShAXXuYpy45G/l/3S1/4zGr6HaRY9B/SDhn1B2hrUMKgzK8zm+MIwXEgMSBHE8IYiTjNOa7vNBwVKVZLH8RuQYrdG1L0fpdSqurY2FG8fkgoFIWV+VpYWMetsMfjvbAYHRUVroaKHyjgakqFAIHxA6WrUD8Q+5AYOJ4aTaB2OR3D6pYZ9dblNPWelKa04ixjDbdJ3vCfpE+IeUupCCHSIpVKSSieN5FqcAs9IXr9jHJ1ma26a3PTDYVn9lkHQO0ILodP7VCu5YTiiM2Shp6Bo2fAx3XiekPsXDJinmSc/GMY0DFFPvj7hPaC1R42mimGUZzXv7b21Vg1W+B9177j/UBUYbYnia+fbUp8xOS7ONjcafUtrqY8zy0Od+eQicPajMLfpM+YMFYzPuDoMcEpwr8SE6zRbiAd9qGLoSnpJ4U+FskRFKvLdsXH7J/vV2YLarITbpk8uD3qu3kIVdZ4799I58Pqy8zcgqvvW63r/wE= -------------------------------------------------------------------------------- /vignettes/Images/.$pydynpd.drawio.dtmp: -------------------------------------------------------------------------------- 1 | 7Vpbe6M2EP01flx/4mrnMXayab82GzfedrOPCsigBCMi5MTeX18JxE3CNmnwZdvmJWgQAubMOTMaPLCmy/UNhUl4S3wUDUzgrwfW1cA0DQPY/J+wbHLLaGTmhoBiX06qDHP8A0kjkNYV9lHamMgIiRhOmkaPxDHyWMMGKSVvzWkLEjXvmsAAaYa5ByPd+g37LMytYwdU9l8QDkJWvrA8s4TFZGlIQ+iTt5rJuh5YU0oIy4+W6ymKhPMKv+TXfd5ytnwwimLW5YL0yzwe27/ewb/GNIqfnu/Y0+aTZcmHY5vijZHPHSCHhLKQBCSG0XVlnVCyin0klgV8VM35nZCEGw1ufEKMbSSacMUIN4VsGcmzaI3Zg7xcHH+vHV+J0DHccijixrGHjhzPEMVLxBAtLokZ3TzUB9li+XwxzNYrT2bLidGCxEw+n+HK8ZRERKwbk1i8ZsooeS5R53hNdJcXIQlpgNguP8uoF86tXSmRukGEvxPd8AkURZDh12b0QRnEQTmvvHRGMH8WE0jClbyRdLOBcF1jkZSsqIfkdfV4UZayTDC0nIvqT1l4NDQM7Wxxk9wh2k34Qe1tK1MWqu8JW+mYVxitpKv+THlMqLFcizkfpmEZs28hZihNoCfmvXHlEnCHMBHD5ToQUjb0F/4QrXmkieiPGWabfFJOYrFKEME0lcceWWJPHkfwEUUT6D0HGVP2htUCR5EyqR6OA9MC2V+5NKE+osoVMMJBzIcej03BjjzCpSuEA9IYJl9JDklu2RrOr4gytN4Zp/Ks0ww3qxi/VVopuJzZwppOqvFSj+xGsLw7MtyTClqpU99rZyoJKgRNVSDzwwqUs7qDAu2XKqdvqfoYoKZG9RmkKRqIdOdG/E0mj5z4biCOUgSpF2bP9rJCKdNioULaKGRg3pSBGqrvcH93xli206TMqFToOmnMFtIUROrfx7bm4/k+T/LXZW3uUoVM1zZNqYTzMC+5LuWJJfb9jJdt+DS5Wqp6IfEzyIRkZxYTGN141gOsJugEq9UCq3UoWG2gofYTaqGPFnCV8bxXOXS6yuHovOTQ0ah6g2JEIROKuJT7rwj/FNpnj1vKhaMqn21objoiRYwaQSq6tFOkj43Okag16kqti/Oi1kij1oK7iT8fzDJhQS4Si5cL0TaeFRuJEK0hjwvuy6S2aZXWch9r7ufiAq9R0RrZl9F2QdqSi3dvVbbvNvrImMqO0gW6GJiGoYuBczAxOG0zZGgePmFWKlIJR3m3HSryATW4OFmibe9r2Eql5tpKPG3pXVxSCje1aYmYkG6/j1s0KYrOp9qf2/Nc753vOo7CgPyJe228FAn831tSlgwxBu/Ks0dgiD0+DkOMsdWMxHE3hvTW3bvQEvFXCn3MsIgafmJGiYfS9AwrXBc0Se9cnHpvb+v9k0vhtQR5eIG9qq7Zu7/vvm+nKMU/4GO2lOCGlEq+rjMZOFdiLc7xtKpourYOupG6BxwdYCo4ujqOoAVH82A46j2a+6I1A/I6Na9JYwHc/6i2oWqPmu1qtw3Vtnb14VAdnU8+bXyB27H/HDrjRm40hmBf+ZiN1E93x9+ZFgl0fy1qHCnT2kqmVWvRLR/reivnxpqofMaRgMgEt60CcvoUqxYnbZ+cDpVhr77Yy3gGwWJ6v7FmYBF+e5h90r9Fls3ze5SKKN6vwqfonf/Tvf/x5BooH7OLWqYGdZta99FQb0VaL6VudzVc/9vJ1lL6O04LUUcHyrXPjxGZPz7ROzcMf3sGCf3j5aGFp/co4H5OcdbR68zVMwCxB3hMS2k3OLauo8Yx8bE0fAam+7ISv4+a3BDiV6OivNV3gOehrD3Ao1YGrqGzpyft48PqV2h5ZVH9ls+6/hs=1Vpbd5s4EP41fmwOiIvtxzjOJt2tz+Y0bbZ5VEDGSjCiQo7t/vqVQAKEnJgkBjt9aNAgyTDzzTcXMXAulpsrCtPFjIQoHgAr3Ayc6QAA27Zc/kdItoVkOASFIKI4lJMqwS3+g6TQktIVDlGmTWSExAynujAgSYICpskgpWStT5uTWP/VFEbIENwGMDal/+GQLQrpyLMq+TXC0YKVLyzvLKGaLAXZAoZkXRM5lwPnghLCiqvl5gLFQnlKL9cTy0u9O3STrmb/jJ/x7f3jry/FZn+9ZUn5ChQl7N1bWzb1owuwXT8+wrX79+P0X/L7i3o1tlX6QiFXnxwSyhYkIgmMLyvphJJVEiKxq8VH1ZxvhKRcaHPhI2JsK7EAV4xw0YItY3kXbTCbbuT6fLCVg4xBys6F1bkgIQnKZZQ8lbbjWp+0VIdUW0ZWNECv6MAeSlhCGiH22kS5o1BRDVxS3VeILBGjWz6Bohgy/KwjEEogR+W8cukNwfxNgCWdbuhJn5MuB8aWvkXxpHJVZXJ+UXuMSpQD4Q2gsI8Fil9i+ZknR/d74cLtT7fFIt9V4/t87HqOElRr85G2+AZRzNWFaAnAjsEG2mJteGisaSh5KyTkYz/DeCV/6QbSDOW//3uFMmZApgKEsOF6gRm6TWGumzWPNLrxP6r2Z0QZ2ryqJ3nXGQ0137JdjwMul6yr8GArzl/UQoNvdaRc56j+VvM2q+5ttq/5m+eWjtl0msoNdR/09nhg587mtnQ20Amv8zgGt7UJqeDr7GXad8BYh+bQb0Cr2PGgXO8ajv0z44ZtArIGnBBmixJ4uV9nul/zPCkVw+UmEinlWTgPz9CGw0VAOGGYbYtJRTIldolimGXyOiBLHMjrGD6geAKDpyiH+wWJCX0tMZjjOG5MmpOEKdEAOFb+r9ya0BDRxgoY4yjhw4BjUEA830I5Y05WCUx/kMJ4haQrsnIbXKUgV2cqfwdTjccdMZVnoGUAfLgUVk8eMvGnDAcWmYsnEDBKUYDn3Kg8py7Kiwa4uDaYTk6GDYTOME/qz+WNJQ7DggdRhv/Ah3wrYVfpZHxfbzLwpmIvTn1ZwYJVqGmixAROfyEJWA0zq/Rvn5lBVwHJP0pAUlGkHLSLIu9JHLuvKVSpuzfPc3qqKYBeU7ijBniKV+qsphgazHGFEkQhEznk14QbZLVE+QPPIKN481nySdDQq+NYR84mR4amz3USnrUk4bZM2Z6sd1lN540QUxQwTMQyBDPxRPMYp3fytri+7t3GopbU6nFg2ngXPztdmXh8VH6uUXJZNBwzy2/dvzl4Sf1CHm832jfDkb5F1+2b4zT13lkEnmT4bt0R7Cl6O34jetvtore50WjPRl1jU5FvrZFESYB4+Qf8WND9Ay8+/Uhc3cAkP4uYQgY/SzrgjVukA3af6YANjsIGp+jVylv3urV7aK/+mAUdM3eezT6LRziq0FCMA8CRE2Tb7HlNUYq48nKmLDsaxf8mL33lM6vpd5Bi0X/IWmTUJ9DWoIRBmV/nPscRgpNIYkCOJoQxsuQyr1/utDwdKc6OPoi7AyluZ0gx+11aqWpi44Xi9SShoAor+62wcPqtsMfjvbAY9YoK30DFdxRxM2VCgcD6jrJVbB6InSQG+jOjDfQup2c57TKjzrqcttmTMoymzjLWcJsWDf9J9oRYsJCGECpVqVRGYvF7E2kGX9kJ0ctnVJjL3mm7XTTdMHjun3UA1I7gCvjUDuV2nFD02Cxp2Bl4ZgbcL4mbDbHPkhHzJKPvj2FAyxT58N8n7C5Y3WGjmWJZ6rz+rbWvsVWzBd517TveD0QdZnuS+PrZpsRHQr6Jg80XvX4H1ZTnuepw9wYycVibS/ibdBkTxnrGBzwzJngq/GsxwRm9DKSPfehiGUb6QWGIRXIExeqyXXGa/fP9xtyBmvyEWyYPfof2bh5ClTXe4RvpfFh9mVl4cPV9q3P5Pw==7VjbcpswEP0aHuPhEsf2Y42dNDNJp9N0entTYQG1skTFYuN+fVcgjAlJpp3m8hA8jNEe7YJ0zh7swQnCTXWhWZ5dqxiE47tx5QQrx/c9zz2lk0H2DTKb+Q2Qah7bpA644b/Bgq5FSx5D0UtEpQTyvA9GSkqIsIcxrdWun5Yo0b9rzlIYADcRE0P0M48xa/d1tugm3gJPM3vruT9rJjasTbY7KTIWq90RFKydINRKYTPaVCEIQ17LS1N3fs/sYWEaJP5NwfxjIk+q6zD/FnG9vvxUXcpfJ3axWyZKu+ErXiAhKqGvWs7CLh/3LScIFd1xWaBWPyFUQmnCY0hYKQyecCFa1PGD8/pDOBM8lYQJSEzaFjRyIvqNhVHl5qI5i7hMr+qc1WmHfLAcG0hRbSJqMjMex0DlS62QIfter9GlOFdcYi3+dEkHcRO6k6kzpSWFFHtdTIdJ1xgqSXtivOYTWIE7KLC+dCljMDR6FA15t1KYDUF1BFkdLkBtAPWeUuzswraE9YQ3t/Gu67A2JTvqrcBizPZ0erhwJzsNrPL/0AXe6UBjiMkGNiRqMpUqycS6Q5dRqbcHUjqKDPddwZUyqtYpPwBxbw3OSlQEZbgRdhYqjl9suRl/NWPSp4lW1dHUat8GMn5jLE6hVBIa5Jybvdfz9ypVqFJH8BAhlmnDwoOCahAM+bb/sLhLH1v63nRl1wgH5dtOWNySGJlOAW3VLZUPy/gP4d2B/9tH+LvR9o9te3/WV9u/w/ezZ/X9dPR9nxDvlfi+7d3O95PJZHT8kzvee2nHn42O7xMyexnH++4zO374T7/9pfdH3z+1773gpX0/H33fJ2TxSny/uNf33qAlRt8/su/Pnsz2FHavjppm6V7ABes/dZHBEoIgEIafhrtCk3U2y0snD50Z2YQZdBmk0Xr6dMCMsU4s3//vLrsQlrfjxXIjryhAE5qIkbAToTRNk910zOTpSZZRDxqrRDCtoFIvCDAJ9KEE9JHRIWqnTAxr7DqoXcS4tTjEtjvquKvhDWxAVXO9pTclnFzm2h9XoQTVyND6QDMvtHwxh0l6yQUOX4gVhOUW0fmoHXPQ8/KWvfi88x/18zALnfuRMAVr7ekS/RAr3g== -------------------------------------------------------------------------------- /vignettes/Images/.gitkeep: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /vignettes/Images/list_models.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | List of ModelsModel N...Model 2Model 1 -------------------------------------------------------------------------------- /vignettes/Images/pydynpd.drawio: -------------------------------------------------------------------------------- 1 | 7Vpbe6M2EP01flx/4mrnMXayab82GzfedrOPCsigBCMi5MTeX18JxE3CNmnwZdvmJWgQAubMOTMaPLCmy/UNhUl4S3wUDUzgrwfW1cA0DQPY/J+wbHLLaGTmhoBiX06qDHP8A0kjkNYV9lHamMgIiRhOmkaPxDHyWMMGKSVvzWkLEjXvmsAAaYa5ByPd+g37LMytYwdU9l8QDkJWvrA8s4TFZGlIQ+iTt5rJuh5YU0oIy4+W6ymKhPMKv+TXfd5ytnwwimLW5YL0yzwe27/ewb/GNIqfnu/Y0+aTZcmHY5vijZHPHSCHhLKQBCSG0XVlnVCyin0klgV8VM35nZCEGw1ufEKMbSSacMUIN4VsGcmzaI3Zg7xcHH+vHV+J0DHccijixrGHjhzPEMVLxBAtLokZ3TzUB9li+XwxzNYrT2bLidGCxEw+n+HK8ZRERKwbk1i8ZsooeS5R53hNdJcXIQlpgNguP8uoF86tXSmRukGEvxPd8AkURZDh12b0QRnEQTmvvHRGMH8WE0jClbyRdLOBcF1jkZSsqIfkdfV4UZayTDC0nIvqT1l4NDQM7Wxxk9wh2k34Qe1tK1MWqu8JW+mYVxitpKv+THlMqLFcizkfpmEZs28hZihNoCfmvXHlEnCHMBHD5ToQUjb0F/4QrXmkieiPGWabfFJOYrFKEME0lcceWWJPHkfwEUUT6D0HGVP2htUCR5EyqR6OA9MC2V+5NKE+osoVMMJBzIcej03BjjzCpSuEA9IYJl9JDklu2RrOr4gytN4Zp/Ks0ww3qxi/VVopuJzZwppOqvFSj+xGsLw7MtyTClqpU99rZyoJKgRNVSDzwwqUs7qDAu2XKqdvqfoYoKZG9RmkKRqIdOdG/E0mj5z4biCOUgSpF2bP9rJCKdNioULaKGRg3pSBGqrvcH93xli206TMqFToOmnMFtIUROrfx7bm4/k+T/LXZW3uUoVM1zZNqYTzMC+5LuWJJfb9jJdt+DS5Wqp6IfEzyIRkZxYTGN141gOsJugEq9UCq3UoWG2gofYTaqGPFnCV8bxXOXS6yuHovOTQ0ah6g2JEIROKuJT7rwj/FNpnj1vKhaMqn21objoiRYwaQSq6tFOkj43Okag16kqti/Oi1kij1oK7iT8fzDJhQS4Si5cL0TaeFRuJEK0hjwvuy6S2aZXWch9r7ufiAq9R0RrZl9F2QdqSi3dvVbbvNvrImMqO0gW6GJiGoYuBczAxOG0zZGgePmFWKlIJR3m3HSryATW4OFmibe9r2Eql5tpKPG3pXVxSCje1aYmYkG6/j1s0KYrOp9qf2/Nc753vOo7CgPyJe228FAn831tSlgwxBu/Ks0dgiD0+DkOMsdWMxHE3hvTW3bvQEvFXCn3MsIgafmJGiYfS9AwrXBc0Se9cnHpvb+v9k0vhtQR5eIG9qq7Zu7/vvm+nKMU/4GO2lOCGlEq+rjMZOFdiLc7xtKpourYOupG6BxwdYCo4ujqOoAVH82A46j2a+6I1A/I6Na9JYwHc/6i2oWqPmu1qtw3Vtnb14VAdnU8+bXyB27H/HDrjRm40hmBf+ZiN1E93x9+ZFgl0fy1qHCnT2kqmVWvRLR/reivnxpqofMaRgMgEt60CcvoUqxYnbZ+cDpVhr77Yy3gGwWJ6v7FmYBF+e5h90r9Fls3ze5SKKN6vwqfonf/Tvf/x5BooH7OLWqYGdZta99FQb0VaL6VudzVc/9vJ1lL6O04LUUcHyrXPjxGZPz7ROzcMf3sGCf3j5aGFp/co4H5OcdbR68zVMwCxB3hMS2k3OLauo8Yx8bE0fAam+7ISv4+a3BDiV6OivNV3gOehrD3Ao1YGrqGzpyft48PqV2h5ZVH9ls+6/hs=1Vpbd5s4EP41fmwOiIvtxzjOJt2tz+Y0bbZ5VEDGSjCiQo7t/vqVQAKEnJgkBjt9aNAgyTDzzTcXMXAulpsrCtPFjIQoHgAr3Ayc6QAA27Zc/kdItoVkOASFIKI4lJMqwS3+g6TQktIVDlGmTWSExAynujAgSYICpskgpWStT5uTWP/VFEbIENwGMDal/+GQLQrpyLMq+TXC0YKVLyzvLKGaLAXZAoZkXRM5lwPnghLCiqvl5gLFQnlKL9cTy0u9O3STrmb/jJ/x7f3jry/FZn+9ZUn5ChQl7N1bWzb1owuwXT8+wrX79+P0X/L7i3o1tlX6QiFXnxwSyhYkIgmMLyvphJJVEiKxq8VH1ZxvhKRcaHPhI2JsK7EAV4xw0YItY3kXbTCbbuT6fLCVg4xBys6F1bkgIQnKZZQ8lbbjWp+0VIdUW0ZWNECv6MAeSlhCGiH22kS5o1BRDVxS3VeILBGjWz6Bohgy/KwjEEogR+W8cukNwfxNgCWdbuhJn5MuB8aWvkXxpHJVZXJ+UXuMSpQD4Q2gsI8Fil9i+ZknR/d74cLtT7fFIt9V4/t87HqOElRr85G2+AZRzNWFaAnAjsEG2mJteGisaSh5KyTkYz/DeCV/6QbSDOW//3uFMmZApgKEsOF6gRm6TWGumzWPNLrxP6r2Z0QZ2ryqJ3nXGQ0137JdjwMul6yr8GArzl/UQoNvdaRc56j+VvM2q+5ttq/5m+eWjtl0msoNdR/09nhg587mtnQ20Amv8zgGt7UJqeDr7GXad8BYh+bQb0Cr2PGgXO8ajv0z44ZtArIGnBBmixJ4uV9nul/zPCkVw+UmEinlWTgPz9CGw0VAOGGYbYtJRTIldolimGXyOiBLHMjrGD6geAKDpyiH+wWJCX0tMZjjOG5MmpOEKdEAOFb+r9ya0BDRxgoY4yjhw4BjUEA830I5Y05WCUx/kMJ4haQrsnIbXKUgV2cqfwdTjccdMZVnoGUAfLgUVk8eMvGnDAcWmYsnEDBKUYDn3Kg8py7Kiwa4uDaYTk6GDYTOME/qz+WNJQ7DggdRhv/Ah3wrYVfpZHxfbzLwpmIvTn1ZwYJVqGmixAROfyEJWA0zq/Rvn5lBVwHJP0pAUlGkHLSLIu9JHLuvKVSpuzfPc3qqKYBeU7ijBniKV+qsphgazHGFEkQhEznk14QbZLVE+QPPIKN481nySdDQq+NYR84mR4amz3USnrUk4bZM2Z6sd1lN540QUxQwTMQyBDPxRPMYp3fytri+7t3GopbU6nFg2ngXPztdmXh8VH6uUXJZNBwzy2/dvzl4Sf1CHm832jfDkb5F1+2b4zT13lkEnmT4bt0R7Cl6O34jetvtore50WjPRl1jU5FvrZFESYB4+Qf8WND9Ay8+/Uhc3cAkP4uYQgY/SzrgjVukA3af6YANjsIGp+jVylv3urV7aK/+mAUdM3eezT6LRziq0FCMA8CRE2Tb7HlNUYq48nKmLDsaxf8mL33lM6vpd5Bi0X/IWmTUJ9DWoIRBmV/nPscRgpNIYkCOJoQxsuQyr1/utDwdKc6OPoi7AyluZ0gx+11aqWpi44Xi9SShoAor+62wcPqtsMfjvbAY9YoK30DFdxRxM2VCgcD6jrJVbB6InSQG+jOjDfQup2c57TKjzrqcttmTMoymzjLWcJsWDf9J9oRYsJCGECpVqVRGYvF7E2kGX9kJ0ctnVJjL3mm7XTTdMHjun3UA1I7gCvjUDuV2nFD02Cxp2Bl4ZgbcL4mbDbHPkhHzJKPvj2FAyxT58N8n7C5Y3WGjmWJZ6rz+rbWvsVWzBd517TveD0QdZnuS+PrZpsRHQr6Jg80XvX4H1ZTnuepw9wYycVibS/ibdBkTxnrGBzwzJngq/GsxwRm9DKSPfehiGUb6QWGIRXIExeqyXXGa/fP9xtyBmvyEWyYPfof2bh5ClTXe4RvpfFh9mVl4cPV9q3P5Pw==7VjbctowEP0aHsP4AgEegyFtZpJOp+n09qbaa1utkFx5DaZf35UtYxyHJJ3mMpnCeEB7tGtJ5+wBxgM/WJVvNMvSKxWBGHhOVA78xcDzXNcZ0YdBtjUymXg1kGge2aQWuOa/wYKORQseQd5JRKUE8qwLhkpKCLGDMa3VppsWK9FdNWMJ9IDrkIk++plHmDbnOp21E2+BJ6ldeupN6okVa5LtSfKURWqzB/nLgR9opbAercoAhCGv4aWuOz8wu9uYBokPKZh+jOVJeRVk30Kulxefygv568Ruds1EYQ98yXMkRMX0VsmZ2+3jtuEEoaQV5zlq9RMCJZQmPIKYFcLgMReiQQeef169CGeCJ5IwAbFJW4NGTkSfWRhVZm6asZDL5LLKWYxa5IPl2ECKamNRkZnyKAIqn2uFDNn3ao8OxZniEivxx3O6iJvAGY4HY9pSQLHbxnSZdI2BknQmxis+geW4gRyrWxcyAkOjS5FljHYP5UEp3J3A5AxQK0C9pRRbMLMtYT3hTm28aTusSUn3esu3GLM9nexu3MpOA6v8X3SBO+ppDBHZwIZETaoSJZlYtug8LPR6R0pLkeG+LbhURtUq5Qcgbq3BWYGKoBRXws5CyfGLLTfjr2ZM+tTRotybWmybQEZnxuIUSiWhRs65OXs1Xx/KnORunejgqtAh3MWQc7ugGgRDvu4ucJs+tvS96cq2EXbKN50wuyExMp0A2qobKu+28Q/COz3/N1/h7462f2zbe5Ou2t4tvp88q+/HR9/f4/sDir5237s93w+Hw6Pjn9zx7ks7/vR/cvz99p48WNFHdrznPLPj+//0m1967+j7p/a967+076dH3+/TMXuwoq/d97ODvnd7LXH0/SP7/vTJbE9h++iobpb2AZy//AM= -------------------------------------------------------------------------------- /vignettes/Test_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/vignettes/Test_1.png -------------------------------------------------------------------------------- /vignettes/images/new_struct.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/vignettes/images/new_struct.png -------------------------------------------------------------------------------- /vignettes/images/traditional.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/vignettes/images/traditional.png -------------------------------------------------------------------------------- /vignettes/new_struct.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/vignettes/new_struct.png -------------------------------------------------------------------------------- /vignettes/paper.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'pydynpd: A Python package for dynamic panel model' 3 | tags: 4 | - Python 5 | - dynamic panel model 6 | authors: 7 | - name: Dazhong Wu^[Corresponding author] 8 | affiliation: 1 9 | - name: Jian Hua 10 | affiliation: 1 11 | - name: Feng Xu 12 | affiliation: 1 13 | affiliations: 14 | - name: Department of Business Management, School of Business and Public Administration, University of the District of Columbia, USA 15 | index: 1 16 | date: April 9, 2022 17 | bibliography: ref.bib 18 | --- 19 | 20 | # Summary 21 | 22 | We present pydynpd, a Python package which implements all the 23 | features in dynamic panel model with GMM (general method of moments). 24 | These features include: (1) difference and system GMM, (2) one-step, two-step, and iterative estimators, (3) robust standard errors including the one 25 | suggested by [@windmeijer2005finite], (4) Hansen over-identification test, (5) 26 | Arellano-Bond test for autocorrelation, (6) time dummies, (7) allows 27 | users to collapse instruments to reduce instrument proliferation issue, 28 | and (8) a simple grammar for model specification. As far as we know, pydynpd is the first Python package that allows researchers to estimate dynamic panel model. 29 | 30 | What distinguishes pydynpd from any other dynamic panel model packages is its innovative feature: the capability to search for models based on users' request, rather than just run the model specified by users as other packages do. To the best of our knowledge, there is no other econometric software/package that offers this feature, let alone dynamic panel model packages. 31 | 32 | # Statement of need 33 | Over the past decade, dynamic panel model has become increasingly 34 | popular in empirical studies. For example, researchers use dynamic panel 35 | model to study the environmental impacts of climate change [@econometrics8030030] and COVID-19 [@anser2020does;@oehmke2021dynamic]. 36 | This is because many aspects of our social and natural systems are 37 | inherently dynamic, and the GMM methods proposed by Arellano & Bond [-@arellano1991some] and Blundell & Bond [-@blundell1998initial] allow us to model the dynamics that 38 | traditional static panel models are not able to capture. 39 | Correspondingly, the growing popularity of dynamic panel model will 40 | stimulate demand for the related packages in open source programs such 41 | as R, Python, and Julia, 42 | 43 | # Statement of field 44 | So far, there are several related packages in Stata and R. 45 | Stata is a commercial software, while existing R packages have some 46 | issues. For example, in our benchmark test R package panelvar [@sigmund2021panel] is more than 100 times slower than Stata package xtabond2 [@roodman2009xtabond2]. On the other hand, R package plm [@croissant2008panel] 47 | is fast enough, but it 48 | has calculation issue for system GMM. A third R package, pdynmc, crashed or refused to work several times in our tests. Due to these reasons, R packages above are far less popular than xtabond2, according to citations they 49 | have received. 50 | 51 | Moreover, there is no Python or Julia package yet to estimate dynamic 52 | panel model due to the complexity involved in implementation. Our 53 | package contributes to the open source community because (1) it 54 | implements all of the major features in the associated commercial packages in 55 | Stata, (2) its innovative feature (as mentioned above) will stimulate similar or even more revolutionary features in the empirical computing community, and (3) though Python is interpreted, our package is almost as 56 | fast as xtabond2 which was compiled as shown in figure below. This package will increase the usability of open source software in estimating dynamic panel models, because for a package to be attractive, it must be both accurate and fast. Moreover, unlike existing R 57 | packages which rely heavily on R-specific components (that is a main 58 | reason they are not fast), our code uses components common to any 59 | programming language, making it easy to translate to R or Julia. 60 | 61 | ![Running time (relative to the fastest).\label{fig:runtime}](Test_1.png){width=100%} 62 | 63 | # The pydynpd package 64 | 65 | pydynpd is able to estimate the most complicated linear dynamic panel 66 | models: 67 | 68 | $$y_{it}=\sum_{j=1}^{p}\alpha_{j}y_{i,t-j}+\sum_{k=1}^{m}\sum_{j=0}^{q_{k}}\beta_{jk}r_{i,t-j}^{(k)}+\boldsymbol{\delta}\boldsymbol{d_{i,t}}+\boldsymbol{\gamma}\boldsymbol{s_{i,t}}+u_{i}+\epsilon_{it}$$ 69 | 70 | In the model above, $y_{i,t-j}$ ($j=1,2,\ldots,p$) denotes a group of 71 | $p$ lagged dependent variables. $r_{i,t-j}^{(k)}$ represents a group of 72 | $m$ endogenous variables other than lagged $y$. $\boldsymbol{d_{it}}$ is 73 | a vector of predetermined variables which may potentially correlate with 74 | past errors, $\boldsymbol{s_{it}}$ is a vector of exogenous variables, 75 | and $u_{i}$ represents fixed effect. As lagged dependent variables such as $y_{i,t-1}$ are included as regressors, the 76 | popular techniques in static panel models no longer produce consistent 77 | results. Researchers have developed many methods to estimate dynamic 78 | panel models. Essentially there are two types of GMM estimates, 79 | difference GMM and system GMM. Just like other R and Stata packages, pydynpd fully implements these two methods. 80 | 81 | Due to space limit, we focus here on general discussion of the package. A detailed statistical/technique description of our package is available on [GitHub](https://github.com/dazhwu/pydynpd/blob/main/vignettes/Guide.ipynb). 82 | 83 | For illustration purpose, consider the following equation: 84 | $$y_{it}=\sum_{j=1}^{\colorbox{yellow}p}\alpha_{j}y_{i,t-j}+\sum_{j=1}^{\colorbox{yellow}q_k}\beta_{j}r_{i,t-j}+{\delta}d_{i,t}+\gamma_{i,t}+u_{i}+\epsilon_{it}$$ 85 | 86 | The equation above is related to a group/family of models with different combinations of $p$ and $q_{k}$ values. Unless existing economic theory indicates exactly what model to choose, researchers need to guess and try the values of $p$ and $q_{k}$ as highlighted in equation above. For example, if $p=2$ and $q_{k}=1$, then a specific model is formed: 87 | 88 | $$ y_{it}=\alpha_{1}y_{i,t-1}+\alpha_{2}y_{i,t-2}+\beta_{j}r_{i,t-j}+{\delta}d_{i,t}+\gamma_{i,t}+u_{i}+\epsilon_{it}$$ 89 | 90 | 91 | ![How alternative packages work. \label{fig:traditional}](traditional.png){width=100%} 92 | 93 | ![The automatic mode in pydnynpd. \label{fig:auto_mode}](new_struct.png){width=100%} 94 | 95 | Figure 2 shows how other packages work: a user needs to choose a specific model, then based on that particular model the system generates the corresponding instrument matrix and panel data with dependent/independent variables so that the GMM process can produce regression results. An innovative feature of pydynpd is that it can also run in its "automatic" mode in which it doesn't require users to choose a particular model. Instead, users may let pydynpd search for the lags (e.g., $p$ and $q_{k}$) so that the corresponding models satisfy certain standards. In other words, users may use pydynpd to estimate the following model with question markers indicating values not determined yet: 96 | 97 | $$y_{it}=\sum_{j=1}^{\colorbox{yellow} ?}\alpha_{j}y_{i,t-j}+\sum_{j=1}^{{\colorbox{yellow} ?}}\beta_{j}r_{i,t-j}+{\delta}d_{i,t}+\gamma_{i,t}+u_{i}+\epsilon_{it}$$ 98 | 99 | Figure 3 shows how pydynpd's automatic mode works: a user indicates what values pydynpd needs to search for (e.g., the question marks in equation above), and then pydynpd tries all possible models, and returns "good" models that pass dynamic models' specification tests (e.g., Hansen overidentification test and AR(2) test). Note that processes included in the dotted box in Figure 2 is represented as a black-box process named "traditional process" in Figure 3. 100 | 101 | # References 102 | 103 | -------------------------------------------------------------------------------- /vignettes/ref.bib: -------------------------------------------------------------------------------- 1 | @article{anser2020does, 2 | title={Does communicable diseases (including COVID-19) may increase global poverty risk? A cloud on the horizon}, 3 | author={Anser, Muhammad Khalid and Yousaf, Zahid and Khan, Muhammad Azhar and Nassani, Abdelmohsen A and Alotaibi, Saad M and Abro, Muhammad Moinuddin Qazi and Vo, Xuan Vinh and Zaman, Khalid}, 4 | journal={Environmental Research}, 5 | volume={187}, 6 | pages={109668}, 7 | year={2020}, 8 | publisher={Elsevier}, 9 | doi={10.1016/j.envres.2020.109668} 10 | } 11 | 12 | 13 | @article{arellano1991some, 14 | title={Some tests of specification for panel data: Monte Carlo evidence and an application to employment equations}, 15 | author={Arellano, Manuel and Bond, Stephen}, 16 | journal={The review of economic studies}, 17 | volume={58}, 18 | number={2}, 19 | pages={277--297}, 20 | year={1991}, 21 | publisher={Wiley-Blackwell}, 22 | doi={10.2307/2297968} 23 | } 24 | 25 | @article{blundell1998initial, 26 | title={Initial conditions and moment restrictions in dynamic panel data models}, 27 | author={Blundell, Richard and Bond, Stephen}, 28 | journal={Journal of econometrics}, 29 | volume={87}, 30 | number={1}, 31 | pages={115--143}, 32 | year={1998}, 33 | publisher={Elsevier}, 34 | doi={10.1016/S0304-4076(98)00009-8} 35 | } 36 | 37 | @article{croissant2008panel, 38 | title={Panel data econometrics in R: The plm package}, 39 | author={Croissant, Yves and Millo, Giovanni}, 40 | journal={Journal of statistical software}, 41 | volume={27}, 42 | number={2}, 43 | year={2008}, 44 | doi={10.18637/jss.v027.i02} 45 | } 46 | 47 | @Article{econometrics8030030, 48 | AUTHOR = {Phillips, Peter C. B.}, 49 | TITLE = {Dynamic Panel Modeling of Climate Change}, 50 | JOURNAL = {Econometrics}, 51 | VOLUME = {8}, 52 | YEAR = {2020}, 53 | NUMBER = {3}, 54 | ARTICLE-NUMBER = {30}, 55 | URL = {https://www.mdpi.com/2225-1146/8/3/30}, 56 | ISSN = {2225-1146}, 57 | doi = {10.3390/econometrics8030030} 58 | } 59 | 60 | @article{oehmke2021dynamic, 61 | title={Dynamic panel data modeling and surveillance of COVID-19 in metropolitan areas in the United States: Longitudinal trend analysis}, 62 | author={Oehmke, Theresa B and Post, Lori A and Moss, Charles B and Issa, Tariq Z and Boctor, Michael J and Welch, Sarah B and Oehmke, James F}, 63 | journal={Journal of medical Internet research}, 64 | volume={23}, 65 | number={2}, 66 | pages={e26081}, 67 | year={2021}, 68 | publisher={JMIR Publications Inc., Toronto, Canada}, 69 | doi={10.2196/26081} 70 | } 71 | 72 | @article{roodman2009xtabond2, 73 | title={How to do xtabond2: An introduction to difference and system GMM in Stata}, 74 | author={Roodman, David}, 75 | journal={The stata journal}, 76 | volume={9}, 77 | number={1}, 78 | pages={86--136}, 79 | year={2009}, 80 | publisher={SAGE Publications Sage CA: Los Angeles, CA}, 81 | doi={10.1177/1536867X0900900106} 82 | } 83 | 84 | 85 | @article{sigmund2021panel, 86 | title={Panel vector autoregression in R with the package panelvar}, 87 | author={Sigmund, Michael and Ferstl, Robert}, 88 | journal={The Quarterly Review of Economics and Finance}, 89 | volume={80}, 90 | pages={693--720}, 91 | year={2021}, 92 | publisher={Elsevier}, 93 | doi={10.1016/j.qref.2019.01.001} 94 | } 95 | 96 | @article{windmeijer2005finite, 97 | title={A finite sample correction for the variance of linear efficient two-step GMM estimators}, 98 | author={Windmeijer, Frank}, 99 | journal={Journal of econometrics}, 100 | volume={126}, 101 | number={1}, 102 | pages={25--51}, 103 | year={2005}, 104 | publisher={Elsevier}, 105 | doi={10.1016/j.jeconom.2004.02.005} 106 | } 107 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /vignettes/traditional.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/vignettes/traditional.png --------------------------------------------------------------------------------