├── .gitignore
├── .idea
    ├── .gitignore
    ├── .name
    ├── Dynamic-Panel-Data-Models-with-Python.iml
    ├── inspectionProfiles
    │   └── profiles_settings.xml
    ├── misc.xml
    ├── modules.xml
    ├── other.xml
    └── vcs.xml
├── .ipynb_checkpoints
    ├── Untitled-checkpoint.ipynb
    └── untitled-checkpoint.md
├── Benchmark
    ├── code
    │   ├── R.R
    │   ├── benchmark.py
    │   ├── data.csv
    │   ├── data_gen.R
    │   ├── stata.do
    │   ├── test3.R
    │   └── test_data.csv
    ├── images
    │   ├── Test_1.svg
    │   ├── Test_2.svg
    │   └── benchmark_model_1_Eqn.svg
    ├── performance_comparison.md
    ├── test_1.md
    └── test_2.md
├── LICENSE
├── Main.py
├── Main2.py
├── README.md
├── build_upload.sh
├── contributing.md
├── data.csv
├── output.html
├── pydynpd.egg-info
    ├── PKG-INFO
    ├── SOURCES.txt
    ├── dependency_links.txt
    ├── requires.txt
    └── top_level.txt
├── pydynpd
    ├── .idea
    │   ├── .name
    │   ├── inspectionProfiles
    │   │   ├── Project_Default.xml
    │   │   └── profiles_settings.xml
    │   ├── misc.xml
    │   ├── modules.xml
    │   ├── pydynpd.iml
    │   ├── vcs.xml
    │   └── workspace.xml
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-39.pyc
    │   ├── command.cpython-39.pyc
    │   ├── common_functions.cpython-39.pyc
    │   ├── info.cpython-39.pyc
    │   ├── panel_data.cpython-39.pyc
    │   ├── regression.cpython-39.pyc
    │   ├── specification_tests.cpython-39.pyc
    │   └── variable.cpython-39.pyc
    ├── __version__.py
    ├── command.py
    ├── common_functions.py
    ├── dynamic_panel_model.py
    ├── info.py
    ├── instruments.py
    ├── model_organizer.py
    ├── model_summary.py
    ├── panel_data.py
    ├── regression.py
    ├── sandbox
    │   ├── multicollinearity.py
    │   └── pydynpd.zip
    ├── specification_tests.py
    └── variable.py
├── pyproject.toml
├── setup.py
├── test.py
├── test_data.csv
├── untitled.md
└── vignettes
    ├── .ipynb_checkpoints
        ├── Guide-checkpoint.ipynb
        └── Tutorial-checkpoint.ipynb
    ├── API.md
    ├── Guide.ipynb
    ├── Images
        ├── .$pydynpd.drawio.bkp
        ├── .$pydynpd.drawio.dtmp
        ├── .gitkeep
        ├── list_models.svg
        ├── new_struct.svg
        ├── pydynpd.drawio
        └── traditional.svg
    ├── Test_1.png
    ├── Test_1.svg
    ├── Tutorial.ipynb
    ├── images
        ├── new_struct.png
        └── traditional.png
    ├── new_struct.png
    ├── paper.md
    ├── ref.bib
    └── traditional.png


/.gitignore:
--------------------------------------------------------------------------------
 1 | 
 2 | pydynpd/__pycache__/__init__.cpython-39.pyc
 3 | pydynpd/__pycache__/__init__.cpython-39.pyc
 4 | pydynpd/__pycache__/command.cpython-39.pyc
 5 | pydynpd/__pycache__/common_functions.cpython-39.pyc
 6 | pydynpd/__pycache__/info.cpython-39.pyc
 7 | pydynpd/__pycache__/panel_data.cpython-39.pyc
 8 | pydynpd/__pycache__/regression.cpython-39.pyc
 9 | pydynpd/__pycache__/specification_tests.cpython-39.pyc
10 | pydynpd/__pycache__/variable.cpython-39.pyc
11 | pydynpd/__pycache__/__init__.cpython-39.pyc
12 | pydynpd/__pycache__/command.cpython-39.pyc
13 | pydynpd/__pycache__/common_functions.cpython-39.pyc
14 | pydynpd/__pycache__/info.cpython-39.pyc
15 | pydynpd/__pycache__/panel_data.cpython-39.pyc
16 | pydynpd/__pycache__/regression.cpython-39.pyc
17 | pydynpd/__pycache__/specification_tests.cpython-39.pyc
18 | pydynpd/__pycache__/variable.cpython-39.pyc
19 | pydynpd/__pycache__/__init__.cpython-39.pyc
20 | pydynpd/__pycache__/__init__.cpython-39.pyc
21 | pydynpd/__pycache__/__init__.cpython-39.pyc
22 | pydynpd/__pycache__/command.cpython-39.pyc
23 | pydynpd/__pycache__/common_functions.cpython-39.pyc
24 | *.pyc
25 | pydynpd/__pycache__/__init__.cpython-39.pyc
26 | *.pyc
27 | *.pyc
28 | *.pyc
29 | *.pyc
30 | *.whl
31 | *.whl
32 | *.gz
33 | *.gz
34 | pydynpd.egg-info/PKG-INFO
35 | *.pyc
36 | *.pyc
37 | *.whl
38 | *.gz
39 | *.pyc
40 | *.pyc
41 | *.gz
42 | *.gz
43 | *.whl
44 | Benchmark/.Rhistory
45 | Benchmark/.RData
46 | pydynpd.egg-info/PKG-INFO
47 | *.pyc
48 | pydynpd.egg-info/PKG-INFO
49 | *.pyc
50 | *.whl
51 | 


--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Editor-based HTTP Client requests
5 | /httpRequests/
6 | # Datasource local storage ignored files
7 | /dataSources/
8 | /dataSources.local.xml
9 | 


--------------------------------------------------------------------------------
/.idea/.name:
--------------------------------------------------------------------------------
1 | Main.py


--------------------------------------------------------------------------------
/.idea/Dynamic-Panel-Data-Models-with-Python.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="jdk" jdkName="3.10 @ Ubuntu" jdkType="Python SDK" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="PyDocumentationSettings">
 9 |     <option name="renderExternalDocumentation" value="true" />
10 |   </component>
11 | </module>


--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="InspectionProjectProfileManager">
2 |   <settings>
3 |     <option name="USE_PROJECT_PROFILE" value="false" />
4 |     <version value="1.0" />
5 |   </settings>
6 | </component>


--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="3.10 @ Ubuntu" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/Dynamic-Panel-Data-Models-with-Python.iml" filepath="$PROJECT_DIR$/.idea/Dynamic-Panel-Data-Models-with-Python.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/.idea/other.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="PySciProjectComponent">
4 |     <option name="PY_SCI_VIEW" value="true" />
5 |     <option name="PY_SCI_VIEW_SUGGESTED" value="true" />
6 |   </component>
7 | </project>


--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/.ipynb_checkpoints/Untitled-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 5
6 | }
7 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/untitled-checkpoint.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/.ipynb_checkpoints/untitled-checkpoint.md


--------------------------------------------------------------------------------
/Benchmark/code/R.R:
--------------------------------------------------------------------------------
 1 | library(Matrix)
 2 | library(pdynmc)
 3 | library(plm)
 4 | library(panelvar)
 5 | 
 6 | abdata=read.csv("data.csv")
 7 | 
 8 | ### the following code produces an error
 9 | start=Sys.time()
10 | for (i in 1:100){
11 |  mc <- pdynmc(dat=abdata,varname.i = "id", varname.t = "year",
12 |              use.mc.diff = TRUE, use.mc.lev = TRUE, use.mc.nonlin = FALSE,
13 |              include.y = TRUE, varname.y = "n", lagTerms.y = 2, include.x = TRUE,
14 |              varname.reg.pre = c("w", "k"), lagTerms.reg.pre = c(0,0), maxLags.reg.pre = c(3,3),
15 |              include.dum = FALSE, dum.diff = FALSE, dum.lev = FALSE,
16 |              w.mat = "iid.err", std.err = "corrected", estimation = "twostep",
17 |              opt.meth = "none"
18 | )
19 | summary(mc)
20 | }
21 | print(Sys.time()-start)
22 | 
23 | ### the following code produces results inconsistent with other packages
24 | start=Sys.time()
25 | for (i in 1:100){
26 |  mc <- pdynmc(dat=abdata,varname.i = "id", varname.t = "year",
27 |              use.mc.diff = TRUE, use.mc.lev = TRUE, use.mc.nonlin = FALSE,
28 |              include.y = TRUE, varname.y = "n", lagTerms.y = 2, include.x = TRUE,
29 |              varname.reg.pre = c("w", "k"), lagTerms.reg.pre = c(0,0), maxLags.reg.pre = c(3,3),
30 |              include.dum = FALSE, dum.diff = FALSE, dum.lev = FALSE,
31 |              w.mat = "iid.err", std.err = "corrected", estimation = "twostep",
32 |              opt.meth = "none"
33 | )
34 | summary(mc)
35 | }
36 | print(Sys.time()-start)
37 | 
38 | 
39 | ### panelvar
40 | start=Sys.time()
41 | for (i in 1:100){
42 | ex3_abdata <-pvargmm(
43 |   dependent_vars = c("n"),
44 |   lags = 2,
45 |   predet_vars = c("w"),
46 |   exog_vars=c("k"),
47 |   transformation = "fd",
48 |   data = abdata,
49 |   panel_identifier = c("id", "year"),
50 |   steps = c("twostep"),
51 |   system_instruments = TRUE,
52 |   max_instr_dependent_vars = 3,
53 |   max_instr_predet_vars = 3,
54 |   min_instr_dependent_vars = 1L,
55 |   min_instr_predet_vars = 1L,
56 |   collapse = FALSE
57 | )
58 | summary(ex3_abdata)
59 | }
60 | print(Sys.time()-start)
61 | 
62 | 
63 | ######  plm  ################
64 | 
65 | start=Sys.time()
66 | for (i in 1:100){
67 | 
68 | pd <- pdata.frame(abdata, index = c("id", "year"), drop.index = TRUE)
69 | z1<-pgmm(n ~ 1+ lag(n, 1:2) + w + k |lag(n, 2:4) + lag(w, 1:3), data=pd, effect='individual',
70 |          model="twosteps" ,transformation='ld' , fsm='FULL')
71 | summary(z1, robust=TRUE)
72 | 
73 | }
74 | print(Sys.time()-start)
75 | 


--------------------------------------------------------------------------------
/Benchmark/code/benchmark.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import pandas as pd
 4 | 
 5 | from pydynpd import regression
 6 | 
 7 | start = time.time()
 8 | for i in range(100):
 9 |     df = pd.read_csv("data.csv")
10 | 
11 |     mydpd = regression.abond('n L(1:2).n w k  | gmm(n, 2:4) gmm(w, 1:3)  iv(k)', df, ['id', 'year'])
12 | 
13 | print(time.time() - start)
14 | 


--------------------------------------------------------------------------------
/Benchmark/code/data_gen.R:
--------------------------------------------------------------------------------
 1 | #code is adapted from 
 2 | #Phillips PC, Han C (2019). “Chapter 5 - Dynamic panel GMM using R.” In HD Vinod, C Rao
 3 | #(eds.), Conceptual Econometrics Using R, volume 41 of Handbook of Statistics, pp. 119 –
 4 | #144. Elsevier. doi:https://doi.org/10.1016/bs.host.2019.01.002.
 5 | 
 6 | library(Matrix)
 7 | library(data.table)
 8 | 
 9 | 
10 | nsize = 1000
11 | tsize = 10
12 | burn = 20
13 | t.all = tsize + burn
14 | 
15 | beta1 = .25
16 | beta2 = .1
17 | gamma0 = 5
18 | gamma1 = -.1
19 | gamma2 = .1
20 | gamma3 = 3
21 | sigma = list(alpha = 5, u = 1, x = 1, z = 1)
22 | mu0 = 1
23 | mu1 = .005
24 | rho = list(z = 1, x = 1)
25 | 
26 | set.seed(1)
27 | x = sigma$x * matrix(rnorm(nsize * t.all), nsize, t.all)
28 | for (j in 2:ncol(x)) x[, j] = rho$x * x[, j - 1] + x[, j]
29 | if (rho$x == 1) x = x - x[, burn - 1]
30 | 
31 | trend = mu0 + mu1 * seq(-burn + 1, tsize)
32 | z0 = stats::filter(rnorm(t.all), rho$z, method = "recursive")
33 | if (rho$z == 1) z0 = z0 - z0[burn - 1]
34 | z = trend + z0
35 | 
36 | alpha = sigma$alpha * rnorm(nsize)
37 | u = sigma$u * matrix(rnorm(nsize * t.all), nsize, t.all)
38 | 
39 | # With these components in hand, we recursively generate yit as follows:
40 | xbar = colMeans(x)
41 | y = matrix(NA, nsize, t.all)
42 | y[, 1] = alpha + u[, 1]
43 | for (j in 2:ncol(y)) {
44 |   lambda = gamma0 +     gamma1 * mean(y[, j - 1]) +     gamma2 * xbar[j - 1] +     gamma3 * z[j - 1]
45 |   y[, j] = alpha +     beta1 * y[, j - 1] +     beta2 * x[, j - 1] +     lambda +     u[, j]
46 | }
47 | 
48 | y = y[, burn:t.all]  # y is the Nx(T+1) matrix    t=0...T
49 | x = x[, burn:t.all]
50 | z = z[burn:t.all]
51 | 
52 | w=data.frame(id=as.vector(row(y)), year=as.vector(col(y))-1, y=as.vector(y), x=as.vector(x))
53 | write.csv(w, "test_data.csv")


--------------------------------------------------------------------------------
/Benchmark/code/stata.do:
--------------------------------------------------------------------------------
 1 | timer clear
 2 | timer on 1
 3 | foreach n of numlist 1/100{
 4 | clear
 5 | insheet using "C:\Users\Tiger\OneDrive\Dynamic Panel\data.csv"
 6 |   xtset(id year)
 7 | xtabond2 n L(1/2).n w k , gmm(n, lag(2 4)) gmm(w, lag(1 3)) iv(k )  twostep robust 
 8 | }
 9 | timer off 1
10 | 
11 | qui timer list
12 | di in r "First time: " r(t1) 
13 | 


--------------------------------------------------------------------------------
/Benchmark/code/test3.R:
--------------------------------------------------------------------------------
 1 | library(plm)
 2 | library(pdynmc)
 3 | library(panelvar)
 4 | 
 5 | start=Sys.time()
 6 | 
 7 | 
 8 | for(i in 1:100){
 9 |   dat =read.csv('data.csv')
10 |   pd <- pdata.frame(dat, index = c("id", "year"), drop.index = TRUE)
11 |   z1<-pgmm(n ~ lag(n, 1:2) + w + k |lag(n, 2:4) , data=pd, effect='individual',
12 |            model="twosteps" ,transformation='ld' , robust=TRUE)
13 |   summary(z1, robust=TRUE)
14 |   
15 |   
16 | }
17 | 
18 | print(Sys.time()-start)
19 | summary(z1, robust=TRUE)
20 | 
21 | for (i in 1:100) {
22 | dat =read.csv('data.csv')
23 | 
24 | 
25 | 
26 | m1 <- pdynmc(dat = dat, varname.i = "id", varname.t = "year",
27 |              use.mc.diff = TRUE, use.mc.lev = FALSE, use.mc.nonlin = FALSE,
28 |              include.y = TRUE, varname.y = "n", lagTerms.y = 2,maxLags.y=4,
29 |              varname.reg.ex=c("w", "k"), 
30 |              include.x=TRUE, lagTerms.reg.ex=c(0,0),
31 |              w.mat = "iid.err", std.err = "corrected", estimation = "twostep",
32 |              opt.meth = "none")
33 | summary(m1)
34 | }
35 | 
36 | print(Sys.time()-start)
37 | # 
38 | # 
39 | # 
40 | m2 <- pdynmc(dat = dat, varname.i = "id", varname.t = "year",
41 |              use.mc.diff = TRUE, use.mc.lev = TRUE, use.mc.nonlin = FALSE,
42 |              include.y = TRUE, varname.y = "n", lagTerms.y = 2,maxLags.y=4,
43 |              fur.con = TRUE, fur.con.diff = TRUE, fur.con.lev = FALSE,
44 |              varname.reg.fur = c("w", "k"), lagTerms.reg.fur = c(0,0),
45 |              include.dum = FALSE, dum.diff = TRUE, dum.lev = FALSE, varname.dum = "year",
46 |              w.mat = "iid.err", std.err = "corrected", estimation = "twostep",
47 |              opt.meth = "none")
48 | summary(m2)
49 | mtest.fct(m2, order = 2)
50 | 
51 | start=Sys.time()
52 | for (i in 1:100) {
53 | dat =read.csv('data.csv')  
54 | p1 <-pvargmm(
55 |   dependent_vars = c("n"),
56 |   lags = 2,
57 |   exog_vars = c("w","k"),
58 |   #exog_vars = c("w","k"),
59 |   transformation = "fd",
60 |   data = dat,
61 |   panel_identifier = c("id", "year"),
62 |   steps = c("twostep"),
63 |   system_instruments = TRUE,
64 |   max_instr_dependent_vars = 4,
65 |   max_instr_predet_vars = 3,
66 |   min_instr_dependent_vars = 2,
67 |   min_instr_predet_vars = 1,
68 |   collapse = FALSE,
69 |   progressbar=FALSE
70 | )
71 | 
72 | summary(p1)
73 | }
74 | print(Sys.time()-start)
75 | summary(p1)
76 | 
77 | 
78 | 


--------------------------------------------------------------------------------
/Benchmark/performance_comparison.md:
--------------------------------------------------------------------------------
 1 | 
 2 | The objective of the package is similar to the following open-source packages: <br>
 3 | Package | Language | version
 4 | --- | --- | ---
 5 | plm | R | 2.6-1
 6 | panelvar | R| 0.5.3
 7 | pdynmc | R| 0.9.7
 8 | 
 9 | To compare pydynpd with similar packages, we performed two performance tests. More specifically, in each test for each package we run 100 times to estimate the same model with the same data. For verification, the tests also include Stata package xtabond2 though Stata is a commercial software. Please note that the main reason we include xtabond2 is for regression result verification (\*). Directly comparing its speed with R or Python packages is a little unfair because the calculation part of xtabond2 was compiled while pydynpd and the three R packages are interpreted; xtabond2 should have a clear advantage on speed. 
10 | 
11 | *Note: xtabond2 is the most popular package on dynamic panel model as evidenced by the 9667 citations it has received according to google scholar as of April 3, 2022. It also won the annual Editor's prize of Stata journal in 2012.
12 | 
13 | ## Test configuration
14 | ### Hardware
15 | Intel CPU 9700K (8 cores) <br>
16 | Memory: 64GB <br>
17 | 
18 | ### Software
19 | Debian-based Linux (Deepin 20.05) <br>
20 | R 4.1.3 <br>
21 | Python 3.10.3 <br>
22 | <p>To make our comparison fair, we manually compiled R, numpy, and scipy with Intel's Math Kernel Libarary (MKL), so that these R and Python packages do calculations using the same Linear Algebra library.</p>
23 | 
24 | Configuration of R:
25 | ```
26 | > sessionInfo()
27 | R version 4.1.3 (2022-03-10)
28 | Platform: x86_64-pc-linux-gnu (64-bit)
29 | Running under: Deepin 20.5
30 | 
31 | Matrix products: default
32 | BLAS/LAPACK: /opt/intel/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin/libmkl_gf_lp64.so
33 | ```
34 | Configuration of Python Numpy:
35 | ```
36 | numpy.show_config()
37 | blas_armpl_info:
38 |   NOT AVAILABLE
39 | blas_mkl_info:
40 |     libraries = ['mkl_rt', 'pthread', 'mkl_rt']
41 |     library_dirs = ['/opt/intel/compilers_and_libraries/linux/mkl/lib/intel64']
42 |     define_macros = [('SCIPY_MKL_H', None), ('HAVE_CBLAS', None)]
43 |     include_dirs = ['/opt/intel/compilers_and_libraries/linux/mkl/include']
44 | blas_opt_info:
45 |     libraries = ['mkl_rt', 'pthread', 'mkl_rt']
46 |     library_dirs = ['/opt/intel/compilers_and_libraries/linux/mkl/lib/intel64']
47 |     define_macros = [('SCIPY_MKL_H', None), ('HAVE_CBLAS', None)]
48 |     include_dirs = ['/opt/intel/compilers_and_libraries/linux/mkl/include']
49 | ```
50 | 
51 | <p>The tests are based on the data set employed in Arellano and Bond (1991) and is avaialble in the /Benchmark/code subfolder. </p>
52 | 
53 | <p>In the tests, we considered the following model:</p>
54 | 
55 | ![Alt text](./images/benchmark_model_1_Eqn.svg) <br>
56 | 
57 | <p>We performed two tests on the model above. Test 1 is a difference GMM and test 2 a system GMM. Regression results of each test are stored in https://github.com/dazhwu/pydynpd/blob/main/Benchmark/test_1.md and https://github.com/dazhwu/pydynpd/blob/main/Benchmark/test_2.md respectively. Scripts can be found in the /Benchmark/code subfolder</p>
58 | 
59 | 
60 | ## Test 1: Difference GMM
61 | The following four packages produced the same regression result: plm, panelvar, pydynpd, and xtabond2. The result from R package pdynmc is slightly different. The table below compaires their speeds. Column 2 shows their running time in seconds per 100 loops, and column 3 calculates their relative speed (i.e, relative to the fastest package in the test). Please note that as xtabond2 was developed and compiled using Mata language, there are two different modes available in Stata. In the default mode, storage is favored over speed. But users can manually switch to the speed mode. Therefore, we report two speeds for xtabond2. More specifically, xtabond2 (default) balances between speed and storage, while xtabond2 (speed) represents the fastest speed this package can achieve.
62 | 
63 | 
64 | | Package            | Running Time | Relative to the fastest |
65 | | ------------------ | ------------ | ----------------------- |
66 | | xtabond2           | 4.19         | 1.00                    |
67 | | pydynpd            | 4.81         | 1.15                    |
68 | | panelvar           | 661.893      | 157.97                  |
69 | | plm                | 11.02        | 2.63                    |
70 | | pdynmc             | 167.4        | 39.95                   |
71 | 
72 | 
73 | 
74 | ![Alt text](./images/Test_1.svg)
75 | 
76 | The chart above shows that our package (pydynpd) is not far behind of xtabond2 even though it is an interpreted package.
77 | 
78 | 
79 | ## Test 2: System GMM
80 | 
81 | In the second test, pydynpd produced the same regression results as those by xtabond2 and panelvar. plm has different results because it doesn't include constant term. On the other hand, we made several attempts but could not let R package pdynmc work; it kept on reporting the same error message (i.e., "Matrices must have same number of rows in cbind2(x, .Call(dense_to_Csparse, y)").
82 | 
83 | | Package            | Running Time | Relative to the fastest |
84 | | ------------------ | ------------ | ----------------------- |
85 | | xtabond2           | 5.893        | 1.00                    |
86 | | pydynpd            | 6.21         | 1.05                    |
87 | | plm                | 14.26        | 2.42                    |
88 | | panelvar           | 718.2        | 121.87                  |
89 | | pdynmc             | NA           | NA                      |
90 | 
91 | 
92 | 
93 | ![Alt text](./images/Test_2.svg)
94 | 
95 | 
96 | 


--------------------------------------------------------------------------------
/Benchmark/test_1.md:
--------------------------------------------------------------------------------
  1 | # plm
  2 | ```
  3 | library(plm)
  4 | abdata=read.csv("data.csv")
  5 | pd <- pdata.frame(abdata, index = c("id", "year"), drop.index = TRUE)
  6 | z1<-pgmm(n ~ 1+ lag(n, 1:2) + w + k |lag(n, 2:4) + lag(w, 1:3), data=pd, effect='individual',
  7 |          model="twosteps" ,transformation='d',robust = TRUE
  8 | summary(z1)
  9 | 
 10 | ```
 11 | ```
 12 | Oneway (individual) effect Two-steps model Difference GMM 
 13 | 
 14 | Call:
 15 | pgmm(formula = n ~ 1 + lag(n, 1:2) + w + k | lag(n, 2:4) + lag(w, 
 16 |     1:3), data = pd, effect = "individual", model = "twosteps", 
 17 |     transformation = "d", robust = TRUE)
 18 | 
 19 | Unbalanced Panel: n = 140, T = 7-9, N = 1031
 20 | 
 21 | Number of Observations Used: 611
 22 | Residuals:
 23 |     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
 24 | -0.63963 -0.03725  0.00000 -0.00309  0.04356  0.48571 
 25 | 
 26 | Coefficients:
 27 |               Estimate Std. Error z-value  Pr(>|z|)    
 28 | lag(n, 1:2)1  0.170062   0.104665  1.6248    0.1042    
 29 | lag(n, 1:2)2 -0.011338   0.037720 -0.3006    0.7637    
 30 | w            -0.951058   0.127730 -7.4459 9.632e-14 ***
 31 | k             0.463722   0.071833  6.4556 1.078e-10 ***
 32 | ---
 33 | Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 34 | 
 35 | Sargan test: chisq(32) = 47.85967 (p-value = 0.035436)
 36 | Autocorrelation test (1): normal = -1.187819 (p-value = 0.2349)
 37 | Autocorrelation test (2): normal = -0.8112457 (p-value = 0.41722)
 38 | Wald test for coefficients: chisq(4) = 402.4636 (p-value = < 2.22e-16)
 39 | ```
 40 | 
 41 | # panelvar
 42 | ```
 43 | library(panelvar)
 44 | abdata=read.csv("data.csv")
 45 | 
 46 | p1 <-pvargmm(
 47 |   dependent_vars = c("n"),
 48 |   lags = 2,
 49 |   predet_vars = c("w"),
 50 |   exog_vars=c("k"),
 51 |   transformation = "fd",
 52 |   data = abdata,
 53 |   panel_identifier = c("id", "year"),
 54 |   steps = c("twostep"),
 55 |   system_instruments = FALSE,
 56 |   max_instr_dependent_vars = 3,
 57 |   max_instr_predet_vars = 3,
 58 |   min_instr_dependent_vars = 1L,
 59 |   min_instr_predet_vars = 1L,
 60 |   collapse = FALSE,
 61 |   progressbar = FALSE
 62 | )
 63 | summary(p1)
 64 | 
 65 | ```
 66 | ```
 67 | ---------------------------------------------------
 68 | Dynamic Panel VAR estimation, two-step GMM 
 69 | ---------------------------------------------------
 70 | Transformation: First-differences 
 71 | Group variable: id 
 72 | Time variable: year 
 73 | Number of observations = 611 
 74 | Number of groups = 140 
 75 | Obs per group: min = 4 
 76 |                avg = 4.364286 
 77 |                max = 6 
 78 | Number of instruments = 36 
 79 | 
 80 | ===================
 81 |         n          
 82 | -------------------
 83 | lag1_n   0.1701    
 84 |         (0.1047)   
 85 | lag2_n  -0.0113    
 86 |         (0.0377)   
 87 | w       -0.9511 ***
 88 |         (0.1277)   
 89 | k        0.4637 ***
 90 |         (0.0718)   
 91 | ===================
 92 | *** p < 0.001; ** p < 0.01; * p < 0.05
 93 | 
 94 | ---------------------------------------------------
 95 | Instruments for  equation
 96 |  Standard
 97 |   FD.(k)
 98 |  GMM-type
 99 |   Dependent vars: L(1, 3)
100 |   Predet vars: L(1, 3)
101 |   Collapse =  FALSE 
102 | ---------------------------------------------------
103 | 
104 | Hansen test of overid. restrictions: chi2(32) = 47.86 Prob > chi2 = 0.035
105 | (Robust, but weakened by many instruments.)
106 | ```
107 | 
108 | # pdynmc
109 | 
110 | ```
111 | library(pdynmc)
112 | abdata=read.csv("data.csv")
113 | mc_1 <- pdynmc(dat=abdata,varname.i = "id", varname.t = "year",
114 |                use.mc.diff = TRUE, use.mc.lev = FALSE, use.mc.nonlin = FALSE,
115 |                include.y = TRUE, varname.y = "n", lagTerms.y = 2, maxLags.y=4,
116 |                inst.stata = TRUE, include.x = TRUE,               
117 |                varname.reg.pre = c("w"), lagTerms.reg.pre = c(0), maxLags.reg.pre = c(3),
118 |                fur.con = TRUE, fur.con.diff = TRUE, fur.con.lev = FALSE,
119 |                varname.reg.fur = c("k"),lagTerms.reg.fur = c(0),
120 |                w.mat = "iid.err", std.err = "corrected", estimation = "twostep",
121 |                opt.meth = "none")
122 | summary(mc_1)
123 | mtest.fct(mc_1, order = 2)
124 | ```
125 | ```
126 | Dynamic linear panel estimation (twostep)
127 | Estimation steps: 2
128 | 
129 | Coefficients:
130 |      Estimate Std.Err.rob z-value.rob Pr(>|z.rob|)    
131 | L1.n  0.17078     0.10597       1.611        0.107    
132 | L2.n -0.01186     0.03862      -0.307        0.759    
133 | L0.w -0.96426     0.12689      -7.599       <2e-16 ***
134 | L0.k  0.46357     0.07237       6.406       <2e-16 ***
135 | ---
136 | Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
137 | 
138 |  36 total instruments are employed to estimate 4 parameters
139 |  35 linear (DIF) 
140 |  1 further controls (DIF) 
141 |  no time dummies 
142 |  
143 | J-Test (overid restrictions):  47.49 with 32 DF, pvalue: 0.0383
144 | F-Statistic (slope coeff):  408.98 with 4 DF, pvalue: <0.001
145 | F-Statistic (time dummies):  no time dummies included in estimation
146 | 
147 | 	Arellano and Bond (1991) serial correlation test of degree 2
148 | 
149 | data:  2step GMM Estimation
150 | normal = -0.9218, p-value = 0.3566
151 | alternative hypothesis: serial correlation of order 2 in the error terms
152 | 
153 | 
154 | 
155 | 	Arellano and Bond (1991) serial correlation test of degree 2
156 | 
157 | data:  2step GMM Estimation
158 | normal = -0.9218, p-value = 0.3566
159 | alternative hypothesis: serial correlation of order 2 in the error terms
160 | 
161 | ```
162 | # pydynpd
163 | ```
164 | import pandas as pd
165 | from  pydynpd import regression
166 | df = pd.read_csv("data.csv")
167 | mydpd = regression.abond('n L(1:2).n w k  | gmm(n, 2:4) gmm(w, 1:3)  iv(k) | nolevel', df, ['id', 'year'])
168 | ```
169 | ```
170 | Dynamic panel-data estimation, two-step difference GMM
171 |  Group variable: id             Number of obs = 611     
172 |  Time variable: year            Min obs per group: 5    
173 |  Number of instruments = 36     Max obs per group: 7    
174 |  Number of groups = 140         Avg obs per group: 5.36 
175 | +------+------------+---------------------+------------+-----------+-----+
176 | |  n   |   coef.    | Corrected Std. Err. |     z      |   P>|z|   |     |
177 | +------+------------+---------------------+------------+-----------+-----+
178 | | L1.n | 0.1700616  |      0.1046652      | 1.6248154  | 0.1042019 |     |
179 | | L2.n | -0.0113381 |      0.0377205      | -0.3005824 | 0.7637329 |     |
180 | |  w   | -0.9510582 |      0.1277298      | -7.4458585 | 0.0000000 | *** |
181 | |  k   | 0.4637223  |      0.0718328      | 6.4555747  | 0.0000000 | *** |
182 | +------+------------+---------------------+------------+-----------+-----+
183 | Hansen test of overid. restrictions: chi(32) = 47.860 Prob > Chi2 = 0.035
184 | Arellano-Bond test for AR(1) in first differences: z = -1.19 Pr > z =0.235
185 | Arellano-Bond test for AR(2) in first differences: z = -0.81 Pr > z =0.417
186 | ```
187 | 
188 | 
189 |     command_str='y L1.y L1.x  | gmm(y, 2:4) iv(L1.x)| timedumm '
190 |     mydpd = regression.abond(command_str, df, ['id', 'year'])
191 | 
192 | # xtabond2 
193 | 
194 | ```
195 | insheet using "data.csv"
196 | xtset(id year)
197 | xtabond2 n L(1/2).n w k , gmm(n, lag(2 4)) gmm(w, lag(1 3)) iv(k ) nolevel twostep robust 
198 | 
199 | ```
200 | ```
201 | 
202 | Favoring speed over space. To switch, type or click on mata: mata set matafavor space, perm.
203 | Warning: Two-step estimated covariance matrix of moments is singular.
204 |   Using a generalized inverse to calculate optimal weighting matrix for two-step estimation.
205 |   Difference-in-Sargan/Hansen statistics may be negative.
206 | 
207 | Dynamic panel-data estimation, two-step difference GMM
208 | ------------------------------------------------------------------------------
209 | Group variable: id                              Number of obs      =       611
210 | Time variable : year                            Number of groups   =       140
211 | Number of instruments = 36                      Obs per group: min =         4
212 | Wald chi2(0)  =         .                                      avg =      4.36
213 | Prob > chi2   =         .                                      max =         6
214 | ------------------------------------------------------------------------------
215 |              |              Corrected
216 |            n |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
217 | -------------+----------------------------------------------------------------
218 |            n |
219 |          L1. |   .1700616   .1046652     1.62   0.104    -.0350784    .3752016
220 |          L2. |  -.0113381   .0377205    -0.30   0.764    -.0852688    .0625926
221 |              |
222 |            w |  -.9510582   .1277298    -7.45   0.000    -1.201404   -.7007124
223 |            k |   .4637223   .0718328     6.46   0.000     .3229325    .6045121
224 | ------------------------------------------------------------------------------
225 | Instruments for first differences equation
226 |   Standard
227 |     D.k
228 |   GMM-type (missing=0, separate instruments for each period unless collapsed)
229 |     L(1/3).w
230 |     L(2/4).n
231 | ------------------------------------------------------------------------------
232 | Arellano-Bond test for AR(1) in first differences: z =  -1.19  Pr > z =  0.235
233 | Arellano-Bond test for AR(2) in first differences: z =  -0.81  Pr > z =  0.417
234 | ------------------------------------------------------------------------------
235 | Sargan test of overid. restrictions: chi2(32)   =  91.61  Prob > chi2 =  0.000
236 |   (Not robust, but not weakened by many instruments.)
237 | Hansen test of overid. restrictions: chi2(32)   =  47.86  Prob > chi2 =  0.035
238 |   (Robust, but weakened by many instruments.)
239 | 
240 | Difference-in-Hansen tests of exogeneity of instrument subsets:
241 |   gmm(n, lag(2 4))
242 |     Hansen test excluding group:     chi2(15)   =  23.75  Prob > chi2 =  0.069
243 |     Difference (null H = exogenous): chi2(17)   =  24.11  Prob > chi2 =  0.117
244 |   gmm(w, lag(1 3))
245 |     Hansen test excluding group:     chi2(14)   =  17.25  Prob > chi2 =  0.243
246 |     Difference (null H = exogenous): chi2(18)   =  30.61  Prob > chi2 =  0.032
247 |   iv(k)
248 |     Hansen test excluding group:     chi2(31)   =  38.33  Prob > chi2 =  0.171
249 |     Difference (null H = exogenous): chi2(1)    =   9.53  Prob > chi2 =  0.002
250 | 
251 | ```
252 | 


--------------------------------------------------------------------------------
/Benchmark/test_2.md:
--------------------------------------------------------------------------------
  1 | # plm
  2 | ```
  3 | library(plm)
  4 | abdata=read.csv("data.csv")
  5 | pd <- pdata.frame(abdata, index = c("id", "year"), drop.index = TRUE)
  6 | z1<-pgmm(n ~ 1+ lag(n, 1:2) + w + k |lag(n, 2:4) + lag(w, 1:3), data=pd, effect='individual',
  7 |          model="twosteps" ,transformation='ld', robust = TRUE)
  8 | summary(z1)
  9 | 
 10 | ```
 11 | 
 12 | ```
 13 | pgmm(formula = n ~ lag(n, 1:2) + w + k | lag(n, 2:4) + lag(w, 
 14 |     1:3), data = pd, effect = "individual", model = "twosteps", 
 15 |     transformation = "ld", robust = TRUE)
 16 | 
 17 | Unbalanced Panel: n = 140, T = 7-9, N = 1031
 18 | 
 19 | Number of Observations Used: 1362
 20 | Residuals:
 21 |      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
 22 | -1.716999 -0.039468  0.000000  0.001151  0.049452  1.057841 
 23 | 
 24 | Coefficients:
 25 |               Estimate Std. Error z-value  Pr(>|z|)    
 26 | lag(n, 1:2)1  0.993296   0.146555  6.7776 1.222e-11 ***
 27 | lag(n, 1:2)2 -0.164000   0.107125 -1.5309  0.125791    
 28 | w             0.059379   0.028402  2.0906  0.036560 *  
 29 | k             0.140340   0.050027  2.8053  0.005027 ** 
 30 | ---
 31 | Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 32 | 
 33 | Sargan test: chisq(47) = 105.7369 (p-value = 2.0581e-06)
 34 | Autocorrelation test (1): normal = -1.926883 (p-value = 0.053994)
 35 | Autocorrelation test (2): normal = -0.1281159 (p-value = 0.89806)
 36 | Wald test for coefficients: chisq(4) = 8031.159 (p-value = < 2.22e-16)
 37 | 
 38 | 
 39 | ```
 40 | 
 41 | # panelvar
 42 | ```
 43 | library(panelvar)
 44 | abdata=read.csv("data.csv")
 45 | 
 46 | p1 <-pvargmm(
 47 |   dependent_vars = c("n"),
 48 |   lags = 2,
 49 |   predet_vars = c("w"),
 50 |   exog_vars=c("k"),
 51 |   transformation = "fd",
 52 |   data = abdata,
 53 |   panel_identifier = c("id", "year"),
 54 |   steps = c("twostep"),
 55 |   system_instruments = TRUE,
 56 |   max_instr_dependent_vars = 3,
 57 |   max_instr_predet_vars = 3,
 58 |   min_instr_dependent_vars = 1L,
 59 |   min_instr_predet_vars = 1L,
 60 |   collapse = FALSE,
 61 |   progressbar = FALSE
 62 | )
 63 | summary(p1)
 64 | 
 65 | ```
 66 | 
 67 | ```
 68 | -------------------------------------------------
 69 | Dynamic Panel VAR estimation, two-step GMM 
 70 | ---------------------------------------------------
 71 | Transformation: First-differences 
 72 | Group variable: id 
 73 | Time variable: year 
 74 | Number of observations = 611 
 75 | Number of groups = 140 
 76 | Obs per group: min = 4 
 77 |                avg = 4.364286 
 78 |                max = 6 
 79 | Number of instruments = 51 
 80 | 
 81 | ===================
 82 |         n          
 83 | -------------------
 84 | lag1_n   0.9454 ***
 85 |         (0.1430)   
 86 | lag2_n  -0.0860    
 87 |         (0.1082)   
 88 | w       -0.4478 ** 
 89 |         (0.1522)   
 90 | k        0.1236 *  
 91 |         (0.0509)   
 92 | const    1.5631 ** 
 93 |         (0.4993)   
 94 | ===================
 95 | *** p < 0.001; ** p < 0.01; * p < 0.05
 96 | 
 97 | ---------------------------------------------------
 98 | Instruments for  equation
 99 |  Standard
100 |   FD.(k)
101 |  GMM-type
102 |   Dependent vars: L(2, 4)
103 |   Predet vars: L(1, 3)
104 |   Collapse =  FALSE 
105 | ---------------------------------------------------
106 | 
107 | Hansen test of overid. restrictions: chi2(46) = 96.44 Prob > chi2 = 0
108 | (Robust, but weakened by many instruments.)
109 | 
110 | 
111 | ```
112 | 
113 | # pdynmc
114 | 
115 | ```
116 | library(pdynmc)
117 | abdata=read.csv("data.csv")
118 | mc_1 <- pdynmc(dat=abdata,varname.i = "id", varname.t = "year",
119 |                use.mc.diff = TRUE, use.mc.lev = TRUE, use.mc.nonlin = FALSE,
120 |                include.y = TRUE, varname.y = "n", lagTerms.y = 2, maxLags.y=4,
121 |                inst.stata = TRUE, include.x = TRUE,               
122 |                varname.reg.pre = c("w"), lagTerms.reg.pre = c(0), maxLags.reg.pre = c(3),
123 |                fur.con = TRUE, fur.con.diff = TRUE, fur.con.lev = TRUE,
124 |                varname.reg.fur = c("k"),lagTerms.reg.fur = c(0),
125 |                w.mat = "iid.err", std.err = "corrected", estimation = "twostep",
126 |                opt.meth = "none")
127 | summary(mc_1)
128 | mtest.fct(mc_1, order = 2)
129 | ```
130 | ```
131 | Error in mapply(ti = ti.temp, t.end = tend.temp, lagTerms = lagTerms, : non-numeric argument to binary operator
132 | Traceback:
133 | 
134 | 1. pdynmc(dat = abdata, varname.i = "id", varname.t = "year", use.mc.diff = TRUE, 
135 |  .     use.mc.lev = TRUE, use.mc.nonlin = FALSE, include.y = TRUE, 
136 |  .     varname.y = "n", lagTerms.y = 2, maxLags.y = 4, inst.stata = TRUE, 
137 |  .     include.x = TRUE, varname.reg.pre = c("w"), lagTerms.reg.pre = c(0), 
138 |  .     maxLags.reg.pre = c(3), fur.con = TRUE, fur.con.diff = TRUE, 
139 |  .     fur.con.lev = TRUE, varname.reg.fur = c("k"), lagTerms.reg.fur = c(0), 
140 |  .     w.mat = "iid.err", std.err = "corrected", estimation = "twostep", 
141 |  .     opt.meth = "none")
142 | 2. lapply(X = i_cases, FUN = Z_i.fct, Time = Time, varname.i = varname.i, 
143 |  .     use.mc.diff = use.mc.diff, use.mc.lev = use.mc.lev, use.mc.nonlin = use.mc.nonlin, 
144 |  .     use.mc.nonlinAS = use.mc.nonlinAS, include.y = include.y, 
145 |  .     varname.y = varname.y, inst.stata = inst.stata, include.dum = include.dum, 
146 |  .     dum.diff = dum.diff, dum.lev = dum.lev, colnames.dum = colnames.dum, 
147 |  .     fur.con = fur.con, fur.con.diff = fur.con.diff, fur.con.lev = fur.con.lev, 
148 |  .     varname.reg.estParam.fur = varname.reg.estParam.fur, include.x = include.x, 
149 |  .     end.reg = end.reg, varname.reg.end = varname.reg.end, pre.reg = pre.reg, 
150 |  .     varname.reg.pre = varname.reg.pre, ex.reg = ex.reg, varname.reg.ex = varname.reg.ex, 
151 |  .     maxLags.y = maxLags.y, lagTerms.y = lagTerms.y, max.lagTerms = max.lagTerms, 
152 |  .     maxLags.reg.end = maxLags.reg.end, maxLags.reg.pre = maxLags.reg.pre, 
153 |  .     maxLags.reg.ex = maxLags.reg.ex, inst.reg.ex.expand = inst.reg.ex.expand, 
154 |  .     dat = dat, dat.na = dat.na)
155 | 3. FUN(X[[i]], ...)
156 | 4. do.call(what = "cbind", args = sapply(FUN = LEV.pre.fct, i = i, 
157 |  .     varname.ex.pre.temp, T.mcLev = T.mcLev.temp, use.mc.diff = use.mc.diff, 
158 |  .     inst.stata = inst.stata, Time = Time, varname.i = varname.i, 
159 |  .     lagTerms = max.lagTerms, dat = dat, dat.na = dat.na))
160 | 5. sapply(FUN = LEV.pre.fct, i = i, varname.ex.pre.temp, T.mcLev = T.mcLev.temp, 
161 |  .     use.mc.diff = use.mc.diff, inst.stata = inst.stata, Time = Time, 
162 |  .     varname.i = varname.i, lagTerms = max.lagTerms, dat = dat, 
163 |  .     dat.na = dat.na)
164 | 6. lapply(X = X, FUN = FUN, ...)
165 | 7. FUN(X[[i]], ...)
166 | 8. Matrix::bdiag(do.call(what = diag, args = list(mapply(ti = ti.temp, 
167 |  .     t.end = tend.temp, lagTerms = lagTerms, FUN = datLEV.pre.fct, 
168 |  .     varname = varname, MoreArgs = list(i = i, use.mc.diff = use.mc.diff, 
169 |  .         inst.stata = inst.stata, dat = dat, dat.na = dat.na, 
170 |  .         varname.i = varname.i, Time = Time)) * as.vector(!is.na(diff(dat.na[dat.na[, 
171 |  .     varname.i] == i, varname][(lagTerms - 1):Time]))))))
172 | 9. do.call(what = diag, args = list(mapply(ti = ti.temp, t.end = tend.temp, 
173 |  .     lagTerms = lagTerms, FUN = datLEV.pre.fct, varname = varname, 
174 |  .     MoreArgs = list(i = i, use.mc.diff = use.mc.diff, inst.stata = inst.stata, 
175 |  .         dat = dat, dat.na = dat.na, varname.i = varname.i, Time = Time)) * 
176 |  .     as.vector(!is.na(diff(dat.na[dat.na[, varname.i] == i, varname][(lagTerms - 
177 |  .         1):Time])))))
178 | 
179 | ```
180 | # pydynpd
181 | ```
182 | import pandas as pd
183 | from  pydynpd import regression
184 | df=pd.read_csv("data.csv")
185 | 
186 | mydpd = regression.abond('n L(1:2).n w k | gmm(n, 2:4) gmm(w, 1:3) iv(k)', df, ['id', 'year'])
187 | ```
188 | ```
189 | Dynamic panel-data estimation, two-step system GMM
190 |  Group variable: id             Number of obs = 751     
191 |  Time variable: year            Min obs per group: 5    
192 |  Number of instruments = 51     Max obs per group: 7    
193 |  Number of groups = 140         Avg obs per group: 5.36 
194 | +------+------------+---------------------+------------+-----------+-----+
195 | |  n   |   coef.    | Corrected Std. Err. |     z      |   P>|z|   |     |
196 | +------+------------+---------------------+------------+-----------+-----+
197 | | L1.n | 0.9453810  |      0.1429764      | 6.6121470  | 0.0000000 | *** |
198 | | L2.n | -0.0860069 |      0.1082318      | -0.7946553 | 0.4268140 |     |
199 | |  w   | -0.4477795 |      0.1521917      | -2.9422068 | 0.0032588 |  ** |
200 | |  k   | 0.1235808  |      0.0508836      | 2.4286941  | 0.0151533 |  *  |
201 | | _con | 1.5630849  |      0.4993484      | 3.1302492  | 0.0017466 |  ** |
202 | +------+------------+---------------------+------------+-----------+-----+
203 | Hansen test of overid. restrictions: chi(46) = 96.442 Prob > Chi2 = 0.000
204 | Arellano-Bond test for AR(1) in first differences: z = -2.35 Pr > z =0.019
205 | Arellano-Bond test for AR(2) in first differences: z = -1.15 Pr > z =0.251
206 | ```
207 | 
208 | 
209 | 
210 | 
211 | # xtabond2
212 | 
213 | ```
214 | mata: mata set matafavor speed, perm
215 | insheet using "data.csv"
216 | xtset(id year)
217 | xtabond2 n L(1/2).n w k , gmm(n, lag(2 4)) gmm(w, lag(1 3)) iv(k ) nolevel twostep robust 
218 | 
219 | ```
220 | ```
221 | 
222 | Favoring speed over space. To switch, type or click on mata: mata set matafavor space, perm.
223 | Warning: Two-step estimated covariance matrix of moments is singular.
224 |   Using a generalized inverse to calculate optimal weighting matrix for two-step estimation.
225 |   Difference-in-Sargan/Hansen statistics may be negative.
226 | 
227 | Dynamic panel-data estimation, two-step difference GMM
228 | ------------------------------------------------------------------------------
229 | Group variable: id                              Number of obs      =       611
230 | Time variable : year                            Number of groups   =       140
231 | Number of instruments = 36                      Obs per group: min =         4
232 | Wald chi2(0)  =         .                                      avg =      4.36
233 | Prob > chi2   =         .                                      max =         6
234 | ------------------------------------------------------------------------------
235 |              |              Corrected
236 |            n |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
237 | -------------+----------------------------------------------------------------
238 |            n |
239 |          L1. |   .1700616   .1046652     1.62   0.104    -.0350784    .3752016
240 |          L2. |  -.0113381   .0377205    -0.30   0.764    -.0852688    .0625926
241 |              |
242 |            w |  -.9510582   .1277298    -7.45   0.000    -1.201404   -.7007124
243 |            k |   .4637223   .0718328     6.46   0.000     .3229325    .6045121
244 | ------------------------------------------------------------------------------
245 | Instruments for first differences equation
246 |   Standard
247 |     D.k
248 |   GMM-type (missing=0, separate instruments for each period unless collapsed)
249 |     L(1/3).w
250 |     L(2/4).n
251 | ------------------------------------------------------------------------------
252 | Arellano-Bond test for AR(1) in first differences: z =  -1.19  Pr > z =  0.235
253 | Arellano-Bond test for AR(2) in first differences: z =  -0.81  Pr > z =  0.417
254 | ------------------------------------------------------------------------------
255 | Sargan test of overid. restrictions: chi2(32)   =  91.61  Prob > chi2 =  0.000
256 |   (Not robust, but not weakened by many instruments.)
257 | Hansen test of overid. restrictions: chi2(32)   =  47.86  Prob > chi2 =  0.035
258 |   (Robust, but weakened by many instruments.)
259 | 
260 | Difference-in-Hansen tests of exogeneity of instrument subsets:
261 |   gmm(n, lag(2 4))
262 |     Hansen test excluding group:     chi2(15)   =  23.75  Prob > chi2 =  0.069
263 |     Difference (null H = exogenous): chi2(17)   =  24.11  Prob > chi2 =  0.117
264 |   gmm(w, lag(1 3))
265 |     Hansen test excluding group:     chi2(14)   =  17.25  Prob > chi2 =  0.243
266 |     Difference (null H = exogenous): chi2(18)   =  30.61  Prob > chi2 =  0.032
267 |   iv(k)
268 |     Hansen test excluding group:     chi2(31)   =  38.33  Prob > chi2 =  0.171
269 |     Difference (null H = exogenous): chi2(1)    =   9.53  Prob > chi2 =  0.002
270 | 
271 | ```
272 | 
273 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Dazhong Wu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/Main.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pandas as pd
 3 | from  pydynpd import regression
 4 | 
 5 | import time
 6 | 
 7 | 
 8 | df = pd.read_csv("test_data.csv")
 9 | 
10 | 
11 | #command_str='y L(1:?).y L(1:?).x  | gmm(y, 2:3) iv(L(1:1).x)| timedumm'
12 | #mydpd = regression.abond(command_str, df, ['id', 'year'])
13 | df = pd.read_csv("data.csv")
14 | #mydpd = regression.abond('n L(1:2).n w k  | gmm(n, 2:4) gmm(w, 1:3)  iv(k) |nolevel fod ', df, ['id', 'year'])
15 | command_str='n L(1:?).n w k | gmm(n, 2:3) pred(w k)| fod'
16 | mydpd = regression.abond(command_str, df, ['id', 'year'])
17 | 
18 | for i in range(0, len(mydpd.models)):
19 |     print("model", end=" ")
20 |     print(i+1, end=": bic= ")
21 |     print(mydpd.models[i].MMSC_LU["bic"], end = "; hqic=")
22 |     print(mydpd.models[i].MMSC_LU["hqic"], end="; aic=")
23 |     print(mydpd.models[i].MMSC_LU["aic"])
24 | 
25 | 
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/Main2.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pandas as pd
 3 | from  pydynpd import regression
 4 | 
 5 | import time
 6 | 
 7 | a=time.time()
 8 | 
 9 | 
10 | df = pd.read_csv("data.csv")
11 | for i in range(0,101):
12 | #mydpd = regression.abond('n L(1:2).n w k | gmm(n, 2:4) gmm(w, 1:3) iv(k)', df, ['id', 'year'])
13 |     mydpd = regression.abond('n L(1:2).n w k | gmm(n, 2:.) pred(w k)', df, ['id', 'year'])
14 | 
15 | print(time.time()-a)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # pydynpd: A python package for dynamic panel model
  2 | [![DOI](https://zenodo.org/badge/466146436.svg)](https://zenodo.org/badge/latestdoi/466146436)
  3 | [![pypi package](https://img.shields.io/pypi/v/pydynpd?style=plastic)](https://pypi.org/project/pydynpd/)
  4 | 
  5 | pydynpd is the first python package to implement Difference and System GMM [1][2][3] to estimate dynamic panel data models.
  6 | 
  7 | Below is a typical dynamic panel data model:
  8 | 
  9 | ![y_{it}=\sum^p_{l=1} \alpha_l y_{i,t-l}+\beta x_{i,t}+\gamma s_{i,t}+u_i+\epsilon_{it}](https://latex.codecogs.com/svg.image?y_{it}=\sum^p_{l=1}&space;\alpha_l&space;y_{i,t-l}&plus;\beta&space;x_{i,t}&plus;\gamma&space;s_{i,t}&plus;u_i&plus;\epsilon_{it})
 10 |  
 11 | In the equation above, x is a predetermined variable that is potentially correlated with past errors, s is a strictly exogenous variable, and u is fixed effect.
 12 | 
 13 | This software has been published in Journal of Open Source Software: 
 14 | ```
 15 | Wu et al., (2023). pydynpd: A Python package for dynamic panel model. 
 16 | Journal of Open Source Software, 8(83), 4416, https://doi.org/10.21105/joss.04416
 17 | ```
 18 | ## Features supported:
 19 | * Differene and System GMM
 20 | * One-step, two-step, and iterative estimates
 21 | * First-difference and forward orthogonal deviation transformations
 22 | * Robust standard errors. For two-step GMM, the calculation suggested by Windmeijer (2005) is used.
 23 | * Hansen over-identification test
 24 | * Arellano-Bond test for autocorrelation
 25 | * Time dummies
 26 | * Collapse GMM instruments to limit instrument proliferation
 27 | * Search for models based on users' request, rather than just run the model specified by users as other packages do
 28 | 
 29 | 
 30 | ## Installation:
 31 | ``` 
 32 | pip install pydynpd
 33 | ``` 
 34 | This package requires: numpy, scipy, pandas, and PrettyTable
 35 | 
 36 | ## Usage:
 37 | ``` 
 38 | import pandas as pd
 39 | from  pydynpd import regression
 40 | 
 41 | df = pd.read_csv("data.csv")
 42 | command_str='n L(1:2).n w k  | gmm(n, 2:4) gmm(w, 1:3)  iv(k) | timedumm  nolevel'
 43 | mydpd = regression.abond(command_str, df, ['id', 'year'])
 44 | ``` 
 45 | result:
 46 | ``` 
 47 | Dynamic panel-data estimation, two-step difference GMM
 48 |  Group variable: id             Number of obs = 611    
 49 |  Time variable: year            Number of groups = 140 
 50 |  Number of instruments = 42                            
 51 | +-----------+------------+---------------------+------------+-----------+
 52 | |     n     |   coef.    | Corrected Std. Err. |     z      |   P>|z|   |
 53 | +-----------+------------+---------------------+------------+-----------+
 54 | |    L1.n   | 0.2710675  |      0.1382542      | 1.9606462  | 0.0499203 |
 55 | |    L2.n   | -0.0233928 |      0.0419665      | -0.5574151 | 0.5772439 |
 56 | |     w     | -0.5668527 |      0.2092231      | -2.7093219 | 0.0067421 |
 57 | |     k     | 0.3613939  |      0.0662624      | 5.4539824  | 0.0000000 |
 58 | | year_1979 | 0.0011898  |      0.0092322      | 0.1288765  | 0.8974554 |
 59 | | year_1980 | -0.0316432 |      0.0116155      | -2.7242254 | 0.0064453 |
 60 | | year_1981 | -0.0900163 |      0.0206593      | -4.3571693 | 0.0000132 |
 61 | | year_1982 | -0.0996210 |      0.0296036      | -3.3651654 | 0.0007650 |
 62 | | year_1983 | -0.0693308 |      0.0404276      | -1.7149347 | 0.0863572 |
 63 | | year_1984 | -0.0614505 |      0.0475525      | -1.2922666 | 0.1962648 |
 64 | +-----------+------------+---------------------+------------+-----------+
 65 | Hansen test of overid. restrictions: chi(32) = 32.666 Prob > Chi2 = 0.434
 66 | Arellano-Bond test for AR(1) in first differences: z = -1.29 Pr > z =0.198
 67 | Arellano-Bond test for AR(2) in first differences: z = -0.31 Pr > z =0.760
 68 | ``` 
 69 | ## Tutorial
 70 | A detailed tutorial is given in the following two documents:<br>
 71 | [inputs of the abond command](https://github.com/dazhwu/pydynpd/blob/main/vignettes/Tutorial.ipynb).<br>
 72 | [outputs of the abond command](https://github.com/dazhwu/pydynpd/blob/main/vignettes/API.md).
 73 | 
 74 | ## Similar packages
 75 | The objective of the package is similar to the following open-source packages: <br>
 76 | Package | Language | version
 77 | --- | --- | ---
 78 | plm | R | 2.6-1
 79 | panelvar | R| 0.5.3
 80 | pdynmc | R| 0.9.7
 81 | 
 82 | To compare pydynpd with similar packages, we performed performance tests. More specifically, in each test for each package we run 100 times to estimate the same model with the same data. For verification, the tests also include Stata package xtabond2 though Stata is a commercial software. We use xtabond2 for regression result verification because it is the most popular package in estimating dynamic panel models. Figure below is from one of the tests. Note that directly comparing xtabond2's speed with R or Python packages is a little unfair because the calculation part of xtabond2 was compiled while pydynpd and the three R packages are interpreted; xtabond2 should have a clear advantage on speed. 
 83 | 
 84 | ![Alt text](https://raw.githubusercontent.com/dazhwu/pydynpd/main/Benchmark/images/Test_1.svg)
 85 | 
 86 | Though developed in pure python, pydynpd is not far behind of xtabond2. Moreover, it is significanly faster than the three R packages which are interpreted scripts just like pydynpd.
 87 | 
 88 | A detailed description of the tests can be found [here](https://github.com/dazhwu/pydynpd/blob/main/Benchmark/performance_comparison.md)
 89 | 
 90 | ## FAQs
 91 | ### How to extract coefficients from regression?
 92 | For example, if you run:
 93 | ```
 94 | df = pd.read_csv("data.csv")
 95 | mydpd = regression.abond('n L(1:2).n w k  | gmm(n, 2:4) gmm(w, 1:3)  iv(k)  ', df, ['id', 'year'])
 96 | ```
 97 | 
 98 | The output regression table will be 
 99 | ```
100 | +------+------------+---------------------+------------+-----------+-----+
101 | |  n   |   coef.    | Corrected Std. Err. |     z      |   P>|z|   |     |
102 | +------+------------+---------------------+------------+-----------+-----+
103 | | L1.n | 0.9453810  |      0.1429764      | 6.6121470  | 0.0000000 | *** |
104 | | L2.n | -0.0860069 |      0.1082318      | -0.7946553 | 0.4268140 |     |
105 | |  w   | -0.4477795 |      0.1521917      | -2.9422068 | 0.0032588 |  ** |
106 | |  k   | 0.1235808  |      0.0508836      | 2.4286941  | 0.0151533 |  *  |
107 | | _con | 1.5630849  |      0.4993484      | 3.1302492  | 0.0017466 |  ** |
108 | +------+------------+---------------------+------------+-----------+-----+
109 | ```
110 | If you want to programably extract a value, for example, the first z value (6.6121470) then you can add the following:
111 | ```
112 | >>>mydpd.models[0].regression_table.iloc[0]['z_value']
113 | 6.6121469997085915
114 | ```
115 | Basically, the object mydpd returned above contains models because pydynpd allows us to run and compare multiple models at the same time. By default, it only contains one model which is models[0]. A model has a regression table which is a pandas dataframe:
116 | ```
117 |  >>>mydpd.models[0].regression_table
118 | 
119 |   variable  coefficient   std_err   z_value       p_value  sig
120 | 0     L1.n     0.945381  0.142976  6.612147  3.787856e-11  ***
121 | 1     L2.n    -0.086007  0.108232 -0.794655  4.268140e-01     
122 | 2        w    -0.447780  0.152192 -2.942207  3.258822e-03   **
123 | 3        k     0.123581  0.050884  2.428694  1.515331e-02    *
124 | 4     _con     1.563085  0.499348  3.130249  1.746581e-03   **
125 | 
126 | ```
127 | So you can extract any value from this dataframe.
128 | 
129 | ### How to use pydynpd with R?
130 | First, you need to install Python on your computer; then install pydynpd.
131 | ```
132 | pip install pydynpd
133 | ```
134 | Second, in R environment install package reticulate:
135 | ```
136 | install.packages("reticulate")
137 | ```
138 | Third, you configure Rstudio so that it can communicate with Python installed in step 1. You can find instruction at
139 | https://www.rstudio.com/blog/rstudio-v1-4-preview-python-support/
140 | 
141 | Finally, you can use the following template to call pydynpd from R. For comparision, the corresponding Python code is also incuded.
142 | <table>
143 |  <tr>
144 |   <td>   R  </td>
145 |   <td>   <pre lang="R">
146 | library(reticulate) 
147 | dynpd <- import("pydynpd.regression", convert = TRUE)
148 | fd <- import("pandas", convert=TRUE)
149 | df <- fd$read_csv("data.csv")
150 | 
151 | result <- dynpd$abond('n L(1:2).n w k | gmm(n, 2:4) gmm(w, 1:3) iv(k)', df, c('id', 'year'))
152 | </pre>
153 | </td>  </tr>
154 |  <tr> 
155 |   <td>    Python   </td>
156 |   <td>    <pre lang="Python">
157 | import pandas as pd
158 | from  pydynpd import regression
159 | df = pd.read_csv("data.csv")
160 | mydpd = regression.abond('n L(1:2).n w k | gmm(n, 2:4) gmm(w, 1:3) iv(k)', df, ['id', 'year'])
161 | </pre>
162 |   </td>
163 | </tr>
164 | </table>
165 | 
166 | Code above generates the following result:
167 | ```
168 |  Dynamic panel-data estimation, two-step system GMM
169 |  Group variable: id                               Number of obs = 611     
170 |  Time variable: year                              Min obs per group: 4    
171 |  Number of instruments = 51                       Max obs per group: 6    
172 |  Number of groups = 140                           Avg obs per group: 4.36 
173 | +------+------------+---------------------+------------+-----------+-----+
174 | |  n   |   coef.    | Corrected Std. Err. |     z      |   P>|z|   |     |
175 | +------+------------+---------------------+------------+-----------+-----+
176 | | L1.n | 0.9453810  |      0.1429764      | 6.6121470  | 0.0000000 | *** |
177 | | L2.n | -0.0860069 |      0.1082318      | -0.7946553 | 0.4268140 |     |
178 | |  w   | -0.4477795 |      0.1521917      | -2.9422068 | 0.0032588 |  ** |
179 | |  k   | 0.1235808  |      0.0508836      | 2.4286941  | 0.0151533 |  *  |
180 | | _con | 1.5630849  |      0.4993484      | 3.1302492  | 0.0017466 |  ** |
181 | +------+------------+---------------------+------------+-----------+-----+
182 | Hansen test of overid. restrictions: chi(46) = 96.442 Prob > Chi2 = 0.000
183 | Arellano-Bond test for AR(1) in first differences: z = -2.35 Pr > z =0.019
184 | Arellano-Bond test for AR(2) in first differences: z = -1.15 Pr > z =0.251
185 | ```
186 | As you can see, you don't need to change the command string in R. The only parameter you have to change is the identifiers; ['id', 'year'] in Python is changed to c('id', 'year') in R. Also, from R you can access the properties of the result above the same way you work on Python. For example, after running code above if you run the following R script:
187 | ```
188 | reg_table=result$models[[1]]$regression_table
189 | print(reg_table)
190 | ```
191 | The output is:
192 | ```
193 |   variable coefficient    std_err    z_value      p_value sig
194 | 1     L1.n  0.94538100 0.14297640  6.6121470 3.787856e-11 ***
195 | 2     L2.n -0.08600694 0.10823176 -0.7946553 4.268140e-01    
196 | 3        w -0.44777955 0.15219173 -2.9422068 3.258822e-03  **
197 | 4        k  0.12358078 0.05088363  2.4286941 1.515331e-02   *
198 | 5     _con  1.56308487 0.49934839  3.1302492 1.746581e-03  **
199 | ```
200 | In the example above, reg_table is an R data frame.
201 | 
202 | ## Contributing
203 | There are several ways to contribute to pydynpd:
204 | 
205 | Submit issue/bug reports [here](https://github.com/dazhwu/pydynpd/issues/), or try to fix the problem yourself and then submit a [pull request](https://github.com/dazhwu/pydynpd/pulls).
206 | 
207 | Browse the source code and see if anything looks out of place - let us know!
208 | 
209 | ## References
210 | <a id="1">[1]</a> 
211 | Arellano, M., & Bond, S. (1991). Some tests of specification for panel data: Monte Carlo evidence and an application to employment equations. The review of economic studies, 58(2), 277-297.
212 | 
213 | <a id="2">[2]</a> 
214 | Arellano, M., & Bover, O. (1995). Another look at the instrumental variable estimation of error-components models. Journal of econometrics, 68(1), 29-51.
215 | 
216 | <a id="3">[3]</a> 
217 | Blundell, R., & Bond, S. (1998). Initial conditions and moment restrictions in dynamic panel data models. Journal of econometrics, 87(1), 115-143.
218 | 
219 | <a id="4">[4]</a>
220 | Roodman, D. (2009). How to do xtabond2: An introduction to difference and system GMM in Stata. The stata journal, 9(1), 86-136.
221 | 
222 | <a id="5">[5]</a> 
223 | Windmeijer, F. (2005). A finite sample correction for the variance of linear efficient two-step GMM estimators. Journal of econometrics, 126(1), 25-51.
224 | 


--------------------------------------------------------------------------------
/build_upload.sh:
--------------------------------------------------------------------------------
1 | rm -rf dist/*
2 | python3 -m build
3 | python3 -m twine upload --repository pypi dist/*.whl
4 | 


--------------------------------------------------------------------------------
/contributing.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 | 
3 | There are several ways to contribute to pydynpd!
4 | - Submit issue/bug reports [here](https://github.com/wouterboomsma/eigency/issues),
5 | or try to fix the problem yourself and then [submit a pull request](https://github.com/wouterboomsma/eigency/pulls).
6 | - Request features or ask questions [here](https://github.com/wouterboomsma/eigency/issues).
7 | - Browse [the source code](https://github.com/wouterboomsma/eigency) and see if anything looks out of place - let us know!
8 | 
9 | 


--------------------------------------------------------------------------------
/output.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/output.html


--------------------------------------------------------------------------------
/pydynpd.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
  1 | Metadata-Version: 2.1
  2 | Name: pydynpd
  3 | Version: 0.2.0
  4 | Summary: A package to estimate dynamic panel data model using difference GMM and system GMM.
  5 | Home-page: https://github.com/dazhwu/pydynpd
  6 | Author: Dazhong Wu
  7 | Author-email: wudz800@gmail.com
  8 | License: MIT
  9 | Classifier: License :: OSI Approved :: MIT License
 10 | Classifier: Programming Language :: Python
 11 | Classifier: Programming Language :: Python :: 3
 12 | Classifier: Programming Language :: Python :: 3.6
 13 | Classifier: Programming Language :: Python :: Implementation :: CPython
 14 | Classifier: Programming Language :: Python :: Implementation :: PyPy
 15 | Requires-Python: >=3.6.0
 16 | Description-Content-Type: text/markdown
 17 | License-File: LICENSE
 18 | 
 19 | 
 20 | # pydynpd: Dynamic panel estimation for Difference and System GMM (generalized method-of-moments)
 21 | [![DOI](https://zenodo.org/badge/466146436.svg)](https://zenodo.org/badge/latestdoi/466146436)
 22 | [![pypi package](https://img.shields.io/pypi/v/pydynpd?style=plastic)](https://pypi.org/project/pydynpd/)
 23 | 
 24 | pydynpd is the first python package to implement Difference and System GMM [1][2][3] to estimate dynamic panel data models.
 25 | 
 26 | Below is a typical dynamic panel data model:
 27 | 
 28 | ![y_{it}=\sum^p_{l=1} \alpha_l y_{i,t-l}+\beta x_{i,t}+\gamma s_{i,t}+u_i+\epsilon_{it}](https://latex.codecogs.com/svg.image?y_{it}=\sum^p_{l=1}&space;\alpha_l&space;y_{i,t-l}&plus;\beta&space;x_{i,t}&plus;\gamma&space;s_{i,t}&plus;u_i&plus;\epsilon_{it})
 29 |  
 30 | In the equation above, x is a predetermined variable that is potentially correlated with past errors, s is a strictly exogenous variable, and u is fixed effect.
 31 | 
 32 | ## Features supported:
 33 | * Differene and System GMM
 34 | * One-step, two-step, and iterative estimates
 35 | * First-difference and forward orthogonal deviation transformations
 36 | * Robust standard errors. For two-step GMM, the calculation suggested by Windmeijer (2005) is used.
 37 | * Hansen over-identification test
 38 | * Arellano-Bond test for autocorrelation
 39 | * Time dummies
 40 | * Collapse GMM instruments to limit instrument proliferation
 41 | * Search for models based on users' request, rather than just run the model specified by users as other packages do
 42 | 
 43 | 
 44 | ## Installation:
 45 | ``` 
 46 | pip install pydynpd
 47 | ``` 
 48 | This package requires: numpy, scipy, pandas, and PrettyTable
 49 | 
 50 | ## Usage:
 51 | ``` 
 52 | import pandas as pd
 53 | from  pydynpd import regression
 54 | 
 55 | df = pd.read_csv("data.csv")
 56 | command_str='n L(1:2).n w k  | gmm(n, 2:4) gmm(w, 1:3)  iv(k) | timedumm  nolevel'
 57 | mydpd = regression.abond(command_str, df, ['id', 'year'])
 58 | ``` 
 59 | result:
 60 | ``` 
 61 | Dynamic panel-data estimation, two-step difference GMM
 62 |  Group variable: id             Number of obs = 611    
 63 |  Time variable: year            Number of groups = 140 
 64 |  Number of instruments = 42                            
 65 | +-----------+------------+---------------------+------------+-----------+
 66 | |     n     |   coef.    | Corrected Std. Err. |     z      |   P>|z|   |
 67 | +-----------+------------+---------------------+------------+-----------+
 68 | |    L1.n   | 0.2710675  |      0.1382542      | 1.9606462  | 0.0499203 |
 69 | |    L2.n   | -0.0233928 |      0.0419665      | -0.5574151 | 0.5772439 |
 70 | |     w     | -0.5668527 |      0.2092231      | -2.7093219 | 0.0067421 |
 71 | |     k     | 0.3613939  |      0.0662624      | 5.4539824  | 0.0000000 |
 72 | | year_1979 | 0.0011898  |      0.0092322      | 0.1288765  | 0.8974554 |
 73 | | year_1980 | -0.0316432 |      0.0116155      | -2.7242254 | 0.0064453 |
 74 | | year_1981 | -0.0900163 |      0.0206593      | -4.3571693 | 0.0000132 |
 75 | | year_1982 | -0.0996210 |      0.0296036      | -3.3651654 | 0.0007650 |
 76 | | year_1983 | -0.0693308 |      0.0404276      | -1.7149347 | 0.0863572 |
 77 | | year_1984 | -0.0614505 |      0.0475525      | -1.2922666 | 0.1962648 |
 78 | +-----------+------------+---------------------+------------+-----------+
 79 | Hansen test of overid. restrictions: chi(32) = 32.666 Prob > Chi2 = 0.434
 80 | Arellano-Bond test for AR(1) in first differences: z = -1.29 Pr > z =0.198
 81 | Arellano-Bond test for AR(2) in first differences: z = -0.31 Pr > z =0.760
 82 | ``` 
 83 | ## Tutorial
 84 | A detailed tutorial is [here](https://github.com/dazhwu/pydynpd/blob/main/vignettes/Tutorial.ipynb).
 85 | 
 86 | ## Similar packages
 87 | The objective of the package is similar to the following open-source packages: <br>
 88 | Package | Language | version
 89 | --- | --- | ---
 90 | plm | R | 2.6-1
 91 | panelvar | R| 0.5.3
 92 | pdynmc | R| 0.9.7
 93 | 
 94 | To compare pydynpd with similar packages, we performed performance tests. More specifically, in each test for each package we run 100 times to estimate the same model with the same data. For verification, the tests also include Stata package xtabond2 though Stata is a commercial software. We use xtabond2 for regression result verification because it is the most popular package in estimating dynamic panel models. Figure below is from one of the tests. Note that directly comparing xtabond2's speed with R or Python packages is a little unfair because the calculation part of xtabond2 was compiled while pydynpd and the three R packages are interpreted; xtabond2 should have a clear advantage on speed. 
 95 | 
 96 | ![Alt text](https://raw.githubusercontent.com/dazhwu/pydynpd/main/Benchmark/images/Test_1.svg)
 97 | 
 98 | Though developed in pure python, pydynpd is not far behind of xtabond2. Moreover, it is significanly faster than the three R packages which are interpreted scripts just like pydynpd.
 99 | 
100 | A detailed description of the tests can be found [here](https://github.com/dazhwu/pydynpd/blob/main/Benchmark/performance_comparison.md)
101 | 
102 | ## FAQs
103 | 1. How to extract coefficients from regression?
104 | For example, if you run:
105 | ```
106 | df = pd.read_csv("data.csv")
107 | mydpd = regression.abond('n L(1:2).n w k  | gmm(n, 2:4) gmm(w, 1:3)  iv(k)  ', df, ['id', 'year'])
108 | ```
109 | 
110 | The output regression table will be 
111 | ```
112 | +------+------------+---------------------+------------+-----------+-----+
113 | |  n   |   coef.    | Corrected Std. Err. |     z      |   P>|z|   |     |
114 | +------+------------+---------------------+------------+-----------+-----+
115 | | L1.n | 0.9453810  |      0.1429764      | 6.6121470  | 0.0000000 | *** |
116 | | L2.n | -0.0860069 |      0.1082318      | -0.7946553 | 0.4268140 |     |
117 | |  w   | -0.4477795 |      0.1521917      | -2.9422068 | 0.0032588 |  ** |
118 | |  k   | 0.1235808  |      0.0508836      | 2.4286941  | 0.0151533 |  *  |
119 | | _con | 1.5630849  |      0.4993484      | 3.1302492  | 0.0017466 |  ** |
120 | +------+------------+---------------------+------------+-----------+-----+
121 | ```
122 | If you want to programably extract a value, for example, the first z value (6.6121470) then you can add the following:
123 | ```
124 | >>>mydpd.models[0].regression_table.iloc[0]['z_value']
125 | 6.6121469997085915
126 | ```
127 | Basically, the object mydpd returned above contains models because pydynpd allows us to run and compare multiple models at the same time. By default, it only contains one model which is models[0]. A model has a regression table which is a pandas dataframe:
128 | ```
129 |  >>>mydpd.models[0].regression_table
130 | 
131 |   variable  coefficient   std_err   z_value       p_value  sig
132 | 0     L1.n     0.945381  0.142976  6.612147  3.787856e-11  ***
133 | 1     L2.n    -0.086007  0.108232 -0.794655  4.268140e-01     
134 | 2        w    -0.447780  0.152192 -2.942207  3.258822e-03   **
135 | 3        k     0.123581  0.050884  2.428694  1.515331e-02    *
136 | 4     _con     1.563085  0.499348  3.130249  1.746581e-03   **
137 | 
138 | ```
139 | So you can extract any value from this dataframe.
140 | 
141 | 
142 | ## Contributing
143 | There are several ways to contribute to pydynpd:
144 | 
145 | Submit issue/bug reports [here](https://github.com/dazhwu/pydynpd/issues/), or try to fix the problem yourself and then submit a [pull request](https://github.com/dazhwu/pydynpd/pulls).
146 | 
147 | Browse the source code and see if anything looks out of place - let us know!
148 | 
149 | ## References
150 | <a id="1">[1]</a> 
151 | Arellano, M., & Bond, S. (1991). Some tests of specification for panel data: Monte Carlo evidence and an application to employment equations. The review of economic studies, 58(2), 277-297.
152 | 
153 | <a id="2">[2]</a> 
154 | Arellano, M., & Bover, O. (1995). Another look at the instrumental variable estimation of error-components models. Journal of econometrics, 68(1), 29-51.
155 | 
156 | <a id="3">[3]</a> 
157 | Blundell, R., & Bond, S. (1998). Initial conditions and moment restrictions in dynamic panel data models. Journal of econometrics, 87(1), 115-143.
158 | 
159 | <a id="4">[4]</a>
160 | Roodman, D. (2009). How to do xtabond2: An introduction to difference and system GMM in Stata. The stata journal, 9(1), 86-136.
161 | 
162 | <a id="5">[5]</a> 
163 | Windmeijer, F. (2005). A finite sample correction for the variance of linear efficient two-step GMM estimators. Journal of econometrics, 126(1), 25-51.
164 | 


--------------------------------------------------------------------------------
/pydynpd.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
 1 | LICENSE
 2 | README.md
 3 | pyproject.toml
 4 | setup.py
 5 | pydynpd/__init__.py
 6 | pydynpd/__version__.py
 7 | pydynpd/command.py
 8 | pydynpd/common_functions.py
 9 | pydynpd/dynamic_panel_model.py
10 | pydynpd/info.py
11 | pydynpd/instruments.py
12 | pydynpd/model_organizer.py
13 | pydynpd/model_summary.py
14 | pydynpd/panel_data.py
15 | pydynpd/regression.py
16 | pydynpd/specification_tests.py
17 | pydynpd/variable.py
18 | pydynpd.egg-info/PKG-INFO
19 | pydynpd.egg-info/SOURCES.txt
20 | pydynpd.egg-info/dependency_links.txt
21 | pydynpd.egg-info/requires.txt
22 | pydynpd.egg-info/top_level.txt


--------------------------------------------------------------------------------
/pydynpd.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/pydynpd.egg-info/requires.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scipy
3 | prettytable
4 | pandas
5 | 


--------------------------------------------------------------------------------
/pydynpd.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | pydynpd
2 | 


--------------------------------------------------------------------------------
/pydynpd/.idea/.name:
--------------------------------------------------------------------------------
1 | dynamic_panel_model.py


--------------------------------------------------------------------------------
/pydynpd/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
 1 | <component name="InspectionProjectProfileManager">
 2 |   <profile version="1.0">
 3 |     <option name="myName" value="Project Default" />
 4 |     <inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
 5 |       <option name="ignoredErrors">
 6 |         <list>
 7 |           <option value="N801" />
 8 |         </list>
 9 |       </option>
10 |     </inspection_tool>
11 |   </profile>
12 | </component>


--------------------------------------------------------------------------------
/pydynpd/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="InspectionProjectProfileManager">
2 |   <settings>
3 |     <option name="USE_PROJECT_PROFILE" value="false" />
4 |     <version value="1.0" />
5 |   </settings>
6 | </component>


--------------------------------------------------------------------------------
/pydynpd/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="3.10 @ Ubuntu" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/pydynpd/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/pydynpd.iml" filepath="$PROJECT_DIR$/.idea/pydynpd.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/pydynpd/.idea/pydynpd.iml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <module type="PYTHON_MODULE" version="4">
3 |   <component name="NewModuleRootManager">
4 |     <content url="file://$MODULE_DIR$" />
5 |     <orderEntry type="jdk" jdkName="3.10 @ Ubuntu" jdkType="Python SDK" />
6 |     <orderEntry type="sourceFolder" forTests="false" />
7 |   </component>
8 | </module>


--------------------------------------------------------------------------------
/pydynpd/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$/.." vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/pydynpd/.idea/workspace.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="ChangeListManager">
 4 |     <list default="true" id="151ecc00-1259-4166-aff0-f4de56177a04" name="Changes" comment="">
 5 |       <change beforePath="$PROJECT_DIR$/.idea/misc.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/misc.xml" afterDir="false" />
 6 |       <change beforePath="$PROJECT_DIR$/.idea/pydynpd.iml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/pydynpd.iml" afterDir="false" />
 7 |       <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
 8 |     </list>
 9 |     <option name="SHOW_DIALOG" value="false" />
10 |     <option name="HIGHLIGHT_CONFLICTS" value="true" />
11 |     <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
12 |     <option name="LAST_RESOLUTION" value="IGNORE" />
13 |   </component>
14 |   <component name="Git.Settings">
15 |     <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$/.." />
16 |   </component>
17 |   <component name="MarkdownSettingsMigration">
18 |     <option name="stateVersion" value="1" />
19 |   </component>
20 |   <component name="ProjectId" id="26Yrtj5Ov9T3BHlM1aqdNXatHij" />
21 |   <component name="ProjectLevelVcsManager" settingsEditedManually="true" />
22 |   <component name="ProjectViewState">
23 |     <option name="hideEmptyMiddlePackages" value="true" />
24 |     <option name="showLibraryContents" value="true" />
25 |   </component>
26 |   <component name="PropertiesComponent">
27 |     <property name="RunOnceActivity.OpenProjectViewOnStart" value="true" />
28 |     <property name="RunOnceActivity.ShowReadmeOnStart" value="true" />
29 |     <property name="WebServerToolWindowFactoryState" value="false" />
30 |     <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
31 |   </component>
32 |   <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
33 |   <component name="TaskManager">
34 |     <task active="true" id="Default" summary="Default task">
35 |       <changelist id="151ecc00-1259-4166-aff0-f4de56177a04" name="Changes" comment="" />
36 |       <created>1647611579337</created>
37 |       <option name="number" value="Default" />
38 |       <option name="presentableId" value="Default" />
39 |       <updated>1647611579337</updated>
40 |       <workItem from="1647611580468" duration="16000" />
41 |       <workItem from="1658329878771" duration="342000" />
42 |     </task>
43 |     <servers />
44 |   </component>
45 |   <component name="TypeScriptGeneratedFilesManager">
46 |     <option name="version" value="3" />
47 |   </component>
48 | </project>


--------------------------------------------------------------------------------
/pydynpd/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/pydynpd/__init__.py


--------------------------------------------------------------------------------
/pydynpd/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/pydynpd/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/pydynpd/__pycache__/command.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/pydynpd/__pycache__/command.cpython-39.pyc


--------------------------------------------------------------------------------
/pydynpd/__pycache__/common_functions.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/pydynpd/__pycache__/common_functions.cpython-39.pyc


--------------------------------------------------------------------------------
/pydynpd/__pycache__/info.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/pydynpd/__pycache__/info.cpython-39.pyc


--------------------------------------------------------------------------------
/pydynpd/__pycache__/panel_data.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/pydynpd/__pycache__/panel_data.cpython-39.pyc


--------------------------------------------------------------------------------
/pydynpd/__pycache__/regression.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/pydynpd/__pycache__/regression.cpython-39.pyc


--------------------------------------------------------------------------------
/pydynpd/__pycache__/specification_tests.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/pydynpd/__pycache__/specification_tests.cpython-39.pyc


--------------------------------------------------------------------------------
/pydynpd/__pycache__/variable.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/pydynpd/__pycache__/variable.cpython-39.pyc


--------------------------------------------------------------------------------
/pydynpd/__version__.py:
--------------------------------------------------------------------------------
1 | # 8b    d8 Yb  dP 88""Yb    db     dP""b8 88  dP    db     dP""b8 888888
2 | # 88b  d88  YbdP  88__dP   dPYb   dP   `" 88odP    dPYb   dP   `" 88__
3 | # 88YbdP88   8P   88"""   dP__Yb  Yb      88"Yb   dP__Yb  Yb  "88 88""
4 | # 88 YY 88  dP    88     dP""""Yb  YboodP 88  Yb dP""""Yb  YboodP 888888
5 | 
6 | VERSION = (5, 2, 0)
7 | 
8 | __version__ = '.'.join(map(str, VERSION))
9 | 


--------------------------------------------------------------------------------
/pydynpd/command.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import sys
  3 | from sys import exit
  4 | 
  5 | from pydynpd.info import options_info
  6 | from pydynpd.variable import gmm_var, regular_variable
  7 | 
  8 | 
  9 | class temp_list:
 10 |     def __init__(self, cols):
 11 |         self.names = []
 12 |         self.lags = []
 13 |         self.cols = cols
 14 |         self.adjustable_min_lags = []  # True False
 15 |         self.adjustable_max_lags = []  # True False
 16 | 
 17 |     def insert(self, name, lags, min_adj_lag=False, max_adj_lag=False):
 18 |         if name not in self.cols:
 19 |             return -1
 20 | 
 21 |         if name not in self.names:
 22 |             self.names.append(name)
 23 |             self.lags.append(lags)
 24 |             self.adjustable_max_lags.append(max_adj_lag)
 25 |             self.adjustable_min_lags.append(min_adj_lag)
 26 |         else:
 27 |             the_index = self.names.index(name)
 28 |             self.lags[the_index] += lags
 29 |             if min_adj_lag == True:
 30 |                 self.adjustable_min_lags[the_index] = True
 31 | 
 32 |             if max_adj_lag == True:
 33 |                 self.adjustable_max_lags[the_index] = True
 34 | 
 35 |         return 0
 36 | 
 37 |     def purge(self):
 38 |         self.lags = [sorted(list(set(the_list))) for the_list in self.lags]
 39 | 
 40 |     def check_contiguous(self):
 41 |         for i in range(len(self.names)):
 42 |             the_list = self.lags[i]
 43 |             if the_list != list(range(min(the_list), max(the_list) + 1)):
 44 |                 print('variable ' + self.name[i] + ' has gaps')
 45 |                 exit()
 46 | 
 47 | 
 48 | class command(object):
 49 | 
 50 |     def __init__(self, command_str, df_col_names):
 51 |         self.command_str = command_str
 52 |         self.cols = df_col_names
 53 |         self._temp_part1_list = temp_list(df_col_names)
 54 |         self._temp_iv_list = temp_list(df_col_names)
 55 |         self.variables = None
 56 |         self.options = options_info()
 57 |         self.dep_GMM = None
 58 | 
 59 |         self.list_Dgmm = []
 60 |         self.list_Lgmm = []
 61 |         # self.adjustable={}
 62 |         # self.adjustable['indep']=[]
 63 | 
 64 |         self.parse_command()
 65 | 
 66 |     def parse_command(self):
 67 |         command_str = self.command_str
 68 |         parts = command_str.split('|')
 69 |         if len(parts) <= 1:
 70 |             print('There should be at least two parts in command string')
 71 |             exit()
 72 | 
 73 |         if len(parts) > 3:
 74 |             print('too many parts')
 75 |             exit()
 76 | 
 77 |         if len(parts) == 3:
 78 |             self.part_3 = parts[2]
 79 |             self.options = self.parse_options(self.part_3)
 80 |         else:
 81 |             self.part_3 = ''
 82 |             self.options = options_info()
 83 | 
 84 |         self.part_1 = parts[0]
 85 |         self.parse_dep_indep(self.part_1)
 86 | 
 87 |         self.part_2 = parts[1]
 88 |         self.parse_gmm_iv(self.part_2)
 89 | 
 90 |         self.check_dep_indep()
 91 |         self.check_GMM()
 92 |         self.check_iv()
 93 |         self.check_three_lists()
 94 |         # self.check_adjustable()
 95 | 
 96 |         self.variables = {}
 97 |         self.variables['dep_indep'] = self.tbr_list(self._temp_part1_list)
 98 |         self.variables['Dgmm'] = self.list_Dgmm
 99 |         self.variables['Lgmm'] = self.list_Lgmm
100 |         self.variables['iv'] = self.tbr_list(self._temp_iv_list)
101 | 
102 |     def parse_spaced_vars(self, list_vars, dest_list):
103 | 
104 |         prog_1 = re.compile('^L\(([0-9]{1,})[:]([0-9]{1,})\)[.]([a-zA-Z_]{1,}[a-zA-Z_0-9]{0,})$')
105 |         prog_2 = re.compile('^L([0-9]{1,})[.]([a-zA-Z_]{1,}[a-zA-Z_0-9]{0,})$')
106 |         prog_3 = re.compile('^L\(([0-9]{1,})[:]([?])\)[.]([a-zA-Z_]{1,}[a-zA-Z_0-9]{0,})$')
107 | 
108 |         for var in list_vars:
109 |             match_groups_multiple = prog_1.match(var)
110 | 
111 |             if match_groups_multiple:
112 |                 LB = int(match_groups_multiple.group(1))
113 |                 UB = int(match_groups_multiple.group(2))
114 |                 name = match_groups_multiple.group(3)
115 |                 ret = dest_list.insert(name, list(range(min(LB, UB), max(LB, UB) + 1)))
116 |             else:
117 |                 match_groups_single = prog_2.match(var)
118 |                 if match_groups_single:
119 |                     lag = int(match_groups_single.group(1))
120 |                     name = match_groups_single.group(2)
121 |                     ret = dest_list.insert(name, [lag])
122 |                 else:
123 |                     match_groups_auto = prog_3.match(var)
124 |                     if match_groups_auto:
125 |                         LB = int(match_groups_auto.group(1))
126 |                         name = match_groups_auto.group(3)
127 |                         self.options.beginner = True
128 |                         ret = dest_list.insert(name, [LB], min_adj_lag=False, max_adj_lag=True)
129 |                     #                        new_var=adjustable_lag_indep(name, LB, None)
130 |                     #                        self.adjustable['indep'].append(new_var)
131 |                     else:
132 |                         name = var
133 |                         ret = dest_list.insert(name, [0])
134 | 
135 |             if ret == -1:
136 |                 return name
137 | 
138 |         return ''
139 | 
140 |     def parse_dep_indep(self, part_1):
141 | 
142 |         list_vars = part_1.split()
143 | 
144 |         ret = self.parse_spaced_vars(list_vars, self._temp_part1_list)
145 | 
146 |         if ret != '':
147 |             print(part_1 + ':  variable ' + ret + ' does not exist')
148 |             exit()
149 | 
150 |     def parse_gmm_iv(self, part_2):
151 | 
152 |         matching_parts = []
153 | 
154 |         self.parse_gmmStyle(matching_parts, part_2)
155 |         self.parse_endo_pred(matching_parts, part_2)
156 |         self.parse_IV(matching_parts, part_2)
157 | 
158 |         part2_cpy = part_2
159 |         for part in matching_parts:
160 |             part2_cpy = part2_cpy.replace(part, '')
161 | 
162 |         if len(part2_cpy.strip()) > 0:
163 |             print(part2_cpy.strip() + ': invalid GMM or IV statement')
164 |             exit()
165 | 
166 |     def parse_gmmStyle(self, matching_parts, part_2):
167 | 
168 |         gmm_search_parts = re.findall(
169 |             'gmm[(][a-zA-Z_0-9 ]{1,}[,][ ]{0,}[0-9]{1,}[ ]{0,}[:][ ]{0,}(?:(?:[.])|(?:[0-9]{1,}))[ ]{0,}[)]', part_2)
170 |         prog_1 = re.compile(
171 |             '^gmm[(]([a-zA-Z_0-9 ]{1,})[,][ ]{0,}([0-9]{1,})[ ]{0,}[:][ ]{0,}((?:[.])|(?:[0-9]{1,}))[ ]{0,}[)]$')
172 | 
173 |         for part in gmm_search_parts:
174 | 
175 |             matching_parts.append(part)
176 |             match_groups_multiple = prog_1.match(part)
177 |             vars = match_groups_multiple.group(1).split()
178 | 
179 |             min_lag = int(match_groups_multiple.group(2))
180 |             if match_groups_multiple.group(3) == '.':
181 |                 max_lag = sys.maxsize
182 |             else:
183 |                 max_lag = int(match_groups_multiple.group(3))
184 | 
185 |             self.process_GMM(vars, min_lag, max_lag, part)
186 | 
187 |     def parse_endo_pred(self, matching_parts, part_2):
188 | 
189 |         gmm_search_parts = re.findall('endo[(][a-zA-Z_0-9 ]{1,}[)]', part_2)
190 |         prog_1 = re.compile('^endo[(]([a-zA-Z_0-9 ]{1,})[)]$')
191 | 
192 |         for part in gmm_search_parts:
193 |             # prog_2 = re.compile('^L([0-9]{1,})[.]([a-zA-Z_]{1,}[a-zA-Z_0-9]{0,})$')
194 |             matching_parts.append(part)
195 |             match_groups_multiple = prog_1.match(part)
196 | 
197 |             vars = match_groups_multiple.group(1).split()
198 |             min_lag = 2
199 |             max_lag = sys.maxsize
200 | 
201 |             self.process_GMM(vars, min_lag, max_lag, part)
202 | 
203 |         gmm_search_parts = re.findall('pred[(][a-zA-Z_0-9 ]{1,}[)]', part_2)
204 |         prog_1 = re.compile('^pred[(]([a-zA-Z_0-9 ]{1,})[)]$')
205 | 
206 |         for part in gmm_search_parts:
207 |             matching_parts.append(part)
208 |             match_groups_multiple = prog_1.match(part)
209 | 
210 |             vars = match_groups_multiple.group(1).split()
211 |             min_lag = 1
212 |             max_lag = sys.maxsize
213 | 
214 |             self.process_GMM(vars, min_lag, max_lag, part)
215 | 
216 |     def parse_IV(self, matching_parts, part_2):
217 | 
218 |         iv_search_parts = re.findall('iv[(].{1,}[)]', part_2)
219 |         prog_2 = re.compile('^iv[(](.{1,})[)]$')
220 |         for part in iv_search_parts:
221 |             matching_parts.append(part)
222 |             match_groups_multiple = prog_2.match(part)
223 |             vars = match_groups_multiple.group(1).split()
224 |             invalid_name = self.parse_spaced_vars(vars, self._temp_iv_list)
225 |             if invalid_name != '':
226 |                 print(part + ': ' + invalid_name + ' does not exist')
227 |                 exit()
228 | 
229 |     def parse_options(self, part_3):
230 |         list_options = [s.lower() for s in part_3.split()]
231 | 
232 |         options = self.options
233 | 
234 |         # possible_options=[{'onestep', 'iterated'},'nolevel', 'timedumm', 'collapse']
235 | 
236 |         if "onestep" in list_options and "iterated" in list_options:
237 |             print("One-step and iterative estimations are mutually exclusive")
238 |             exit()
239 | 
240 |         for option in list_options:
241 |             if option == 'onestep':
242 |                 options.steps = 1
243 |             elif option == 'iterated':
244 |                 options.steps = 1000
245 |             elif option == 'nolevel':
246 |                 options.level = False
247 |             elif option == 'hqic':
248 |                 options.mmsc = 'hqic'
249 |             elif option == 'fod':
250 |                 options.transformation = 'fod'
251 |             elif option == 'timedumm':
252 |                 options.timedumm = True
253 |             elif option == 'collapse':
254 |                 options.collapse = True
255 |             else:
256 |                 print(option + ' is not an option allowed')
257 |                 exit()
258 | 
259 |         return (options)
260 | 
261 |     def process_GMM(self, vars, min_lag, max_lag, part):
262 |         if min_lag > max_lag:
263 |             print(part + ': minimum lag cannot be greater than maximum lag')
264 |             exit()
265 |         if min_lag < 0:
266 |             print(part + ': lags must be non-negative')
267 |             exit()
268 |         if len(vars) == 0:
269 |             print(part + ': no variable is included')
270 |             exit()
271 | 
272 |         for var in vars:
273 |             if (var not in self.cols):
274 |                 print(part + ': ' + var + ' does not exist')
275 |                 exit()
276 |             existing_names = [v.name for v in self.list_Dgmm]
277 |             if var in existing_names:
278 |                 print(part + ': ' + var + ' cannot be declared in part 2 for twice or more')
279 |                 exit()
280 |             temp_var = gmm_var(var, min_lag, max_lag, 0)
281 |             self.list_Dgmm.append(temp_var)
282 |             Lmin_lag = max(min_lag - 1, 0)
283 |             temp_var = gmm_var(var, Lmin_lag, min_lag, 0)
284 |             self.list_Lgmm.append(temp_var)
285 | 
286 |     def tbr_list(self, temp_list):
287 |         tbr = []
288 |         for i in range(len(temp_list.names)):
289 |             var_name = temp_list.names[i]
290 |             lags = temp_list.lags[i]
291 |             num_lags = len(lags)
292 |             for j in range(lags[0], lags[0] + num_lags):
293 |                 new_var = regular_variable(var_name, j)
294 |                 tbr.append(new_var)
295 | 
296 |         return (tbr)
297 | 
298 |     def check_dep_indep(self):
299 | 
300 |         self._temp_part1_list.purge()
301 | 
302 |         dep = self._temp_part1_list.names[0]
303 |         dep_lags = self._temp_part1_list.lags[0]
304 | 
305 |         if len(dep_lags) > 0 and dep_lags[0] != 0:
306 |             print('dependent variable should not be lagged on the left hand side of the model')
307 |             exit()
308 | 
309 |         if len(dep_lags) == 1:
310 |             print('lagged dependent variable should be included')
311 |             exit()
312 | 
313 |         if dep_lags[1] != 1:
314 |             print('lag 1 of the dependent variable is not included')
315 |             exit()
316 | 
317 |         self._temp_part1_list.check_contiguous()
318 | 
319 |     def check_GMM(self):
320 |         dep_name = self._temp_part1_list.names[0]
321 | 
322 |         for i in range(len(self.list_Dgmm)):
323 |             var = self.list_Dgmm[i]
324 |             if var.name == dep_name:
325 |                 self.dep_GMM = [i]
326 |                 if var.min_lag < 2:
327 |                     print('must use lag 2 or earlier of the dependent variable as instruments')
328 |                     exit()
329 | 
330 |     def check_iv(self):
331 |         self._temp_iv_list.purge()
332 |         self._temp_iv_list.check_contiguous()
333 | 
334 |     def check_three_lists(self):
335 |         gmm_names = [var.name for var in self.list_Dgmm]
336 |         iv_names = self._temp_iv_list.names
337 | 
338 |         for iv_name in iv_names:
339 |             if iv_name in gmm_names:
340 |                 print('variable ' + iv_name + ': a variable can be either in GMM style or in IV style, but not both')
341 |                 exit()
342 | 
343 |         for i in range(len(self._temp_part1_list.lags)):
344 |             var_name = self._temp_part1_list.names[i]
345 |             var_lags = self._temp_part1_list.lags[i]
346 |             bool_GMM = var_name in gmm_names
347 |             bool_IV = var_name in iv_names
348 | 
349 |             if not (bool_GMM or bool_IV):
350 |                 self._temp_iv_list.insert(var_name, var_lags)
351 | 
352 |     # def check_adjustable(self):
353 |     #
354 |     #     if len(self.adjustable['indep'])>0:
355 |     #         dep = self._temp_part1_list.names[0]
356 |     #         self._temp_part1_list.lags[0]=[1]
357 |     #
358 |     #     for var in self.adjustable['indep']:
359 |     #         if var.name != dep:
360 |     #             print('in the current version, only the lags of the lagged dependent variable can be adjusted')
361 |     #             exit()
362 |     #         else:
363 | 


--------------------------------------------------------------------------------
/pydynpd/common_functions.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import numpy as np
  4 | from scipy.sparse import csc_matrix
  5 | 
  6 | 
  7 | def lag(mat, lagged, N, lag_number, fill=np.nan):
  8 |     height = int(mat.shape[0] / N)
  9 |     for i in range(N):
 10 |         start_row = i * height
 11 |         end_row = start_row + height
 12 |         mat_i = mat[start_row:end_row, :]
 13 |         lagged_i = lagged[start_row:end_row, :]
 14 | 
 15 |         lagged_i[0:lag_number, :] = fill
 16 |         lagged_i[lag_number:height, :] = mat_i[0:(height - lag_number), :]
 17 | 
 18 | 
 19 | def get_first_diff_table(ori_arr: np.ndarray, N: int):
 20 |     num_cols = ori_arr.shape[1]
 21 |     num_rows = ori_arr.shape[0]
 22 |     height = int(num_rows / N)
 23 | 
 24 |     lag_arr = np.zeros((num_rows, num_cols), dtype='float64')
 25 |     tbr_arr = np.zeros((num_rows, num_cols), dtype='float64')
 26 | 
 27 |     lag(ori_arr, lag_arr, N, 1)
 28 | 
 29 |     tbr_arr = ori_arr - lag_arr
 30 |     return tbr_arr
 31 | 
 32 | 
 33 | def get_fod_table(ori_arr: np.ndarray, N: int):
 34 |     num_rows = ori_arr.shape[0]
 35 |     height = int(num_rows / N)
 36 | 
 37 |     num_cols = ori_arr.shape[1]
 38 | 
 39 |     tbr = np.empty((num_rows, num_cols), dtype='float64')
 40 |     next_sum = np.empty((1, num_cols), dtype='float64')
 41 |     this_sum = np.empty((1, num_cols), dtype='float64')
 42 |     this_avg = np.empty((1, num_cols), dtype='float64')
 43 |     temp = np.empty((height, num_cols), dtype='float64')
 44 | 
 45 |     tbr[:] = np.nan
 46 | 
 47 |     this_sum[:] = np.nan
 48 | 
 49 |     for i in range(N):
 50 |         ori_i = ori_arr[i * height:(i * height + height), :]
 51 |         tbr_i = tbr[i * height:(i * height + height), :]
 52 |         temp.fill(np.nan)
 53 |         next_sum.fill(np.nan)
 54 |         next_count = 0
 55 |         for j in range(height - 2, -1, -1):
 56 | 
 57 |             if np.isnan(ori_i[range(j + 1, j + 2), :]).any(axis=1):
 58 |                 this_count = next_count
 59 |                 this_sum = next_sum
 60 |                 temp[j, :] = temp[j + 1, :]
 61 |             else:
 62 |                 this_count = next_count + 1
 63 | 
 64 |                 this_sum = np.nansum(np.vstack([next_sum, ori_i[j + 1, :]]), axis=0)
 65 |                 this_avg = this_sum * (1.0 / this_count)
 66 |                 temp[j, :] = (ori_i[j, :] - this_avg) * math.sqrt(this_count / (this_count + 1))
 67 | 
 68 |             next_sum = this_sum
 69 |             next_count = this_count
 70 | 
 71 |         tbr_i[0, :] = np.nan
 72 |         tbr_i[range(1, height), :] = temp[range(0, height - 1), :]
 73 | 
 74 |     return tbr
 75 | 
 76 | 
 77 | def sum_product(listOflist, n_rows):
 78 |     num_elements = len(listOflist)
 79 | 
 80 |     for i in range(n_rows):
 81 |         list_temp = []
 82 |         for j in range(num_elements):
 83 |             if type(listOflist[j]) == list:
 84 |                 var_list = listOflist[j]
 85 |                 list_temp.append(var_list[i])
 86 |             elif type(listOflist[j]) == np.ndarray:
 87 |                 var_mat = listOflist[j]
 88 |                 list_temp.append(var_mat)
 89 |             else:
 90 |                 pass  # throw error
 91 |         temp = np.linalg.multi_dot(list_temp)
 92 |         if i == 0:
 93 |             tbr = temp
 94 |         else:
 95 |             tbr += temp
 96 | 
 97 |     return (tbr)
 98 | 
 99 | 
100 | def Windmeijer(M2, _M2_XZ_W2, W2_inv, zs2, vcov_step1, Cx_list, z_list, residual1, N):
101 |     D = np.empty((M2.shape[0], M2.shape[1]), dtype='float64')
102 | 
103 |     x_height = int(Cx_list.shape[0] / N)
104 |     z_height = int(z_list.shape[0] / N)
105 |     for j in range(0, Cx_list.shape[1]):
106 | 
107 |         for i in range(0, N):
108 |             x = Cx_list[(i * x_height):(i * x_height + x_height), :]
109 | 
110 |             u = residual1[(i * x_height):(i * x_height + x_height), 0:1]
111 |             z = z_list[(i * z_height):(i * z_height + z_height), :]
112 | 
113 |             xu = np.matmul(x[:, j:(j + 1)], u.transpose())
114 |             temp = z @ (xu + xu.transpose()) @ z.transpose()
115 |             # temp_zxuzt=z@ xu @ z.transpose()
116 |             # temp=temp_zxuzt + temp_zxuzt.transpose()
117 | 
118 |             if i == 0:
119 |                 zxz = temp
120 |             else:
121 |                 zxz += temp
122 | 
123 |         partial_dir = (-1.0 / N) * zxz
124 | 
125 |         Dj = np.linalg.multi_dot([_M2_XZ_W2, partial_dir, W2_inv, zs2])
126 |         Dj = (-1) * Dj
127 | 
128 |         D[:, j:(j + 1)] = Dj
129 | 
130 |     # temp = np.multiply(N, M2) + np.multiply(N, np.matmul(D, M2)) + np.multiply(N, np.matmul(M2, D.transpose()))
131 |     temp_D_M2 = D @ M2
132 |     temp = np.multiply(N, M2) + np.multiply(N, temp_D_M2) + np.multiply(N, temp_D_M2.transpose())
133 |     temp = temp + np.matmul(np.matmul(D, vcov_step1), D.transpose())
134 |     #
135 |     return (temp)
136 | 
137 | 
138 | def make_sparse_list(arr_list):
139 |     nrow = len(arr_list)
140 |     new_list = []
141 |     for i in range(nrow):
142 |         arr = arr_list[i]
143 |         new_arr = csc_matrix(arr)
144 |         new_list.append(new_arr)
145 | 
146 |     return (new_list)
147 | 


--------------------------------------------------------------------------------
/pydynpd/info.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | # from pandas import DataFrame
 4 | import numpy as np
 5 | 
 6 | 
 7 | @dataclass
 8 | class df_info:
 9 |     N: int
10 |     T: int
11 |     ids: list
12 |     first_diff_index: int
13 |     last_diff_index: int
14 |     first_level_index: int
15 |     last_level_index: int
16 |     max_lag: int
17 |     # last_fod_index: int
18 |     # first_fod_index: int
19 | 
20 | 
21 | @dataclass
22 | class z_info:
23 |     diff_width: int
24 |     diff_height: int
25 |     level_width: int
26 |     level_height: int
27 |     width: int
28 |     height: int
29 |     num_Dgmm_instr: int
30 |     num_Lgmm_instr: int
31 |     num_instr: int
32 |     # int num_vars
33 |     # int num_gmm_instr
34 | 
35 | 
36 | @dataclass
37 | class hansen_test_info:
38 |     test_value: float
39 |     df: int
40 |     p_value: float
41 |     critical_value: float
42 | 
43 | 
44 | @dataclass
45 | class AR_test_info:
46 |     lag: int
47 |     AR: float
48 |     P_value: float
49 | 
50 | 
51 | @dataclass
52 | class options_info:
53 |     steps: int = 2
54 |     level: bool = True
55 |     beginner: bool = False
56 |     timedumm: bool = False
57 |     collapse: bool = False
58 |     mmsc: str = 'bic'
59 |     transformation: str = 'fd'
60 | 
61 | 
62 | @dataclass
63 | class sumproduct_task:
64 |     array_list: list
65 |     division_list: list
66 | 
67 | 
68 | @dataclass
69 | class beginner_models:
70 |     model: str
71 | 
72 | 
73 | @dataclass
74 | class step_result:
75 |     M: np.ndarray
76 |     SS: np.ndarray
77 |     W: np.ndarray
78 |     W_inv: np.ndarray
79 |     W_next: np.ndarray
80 |     ZuuZ: np.ndarray
81 |     beta: np.ndarray
82 |     residual: np.ndarray
83 |     _residual_t: np.ndarray
84 | 
85 |     vcov: np.ndarray
86 |     zs: np.ndarray
87 |     std_err: np.ndarray
88 |     _M_XZ_W: np.ndarray
89 |     _XZ_W: np.ndarray
90 |     def __init__(self, W):
91 |         self.W = W
92 |         self.W_inv = np.linalg.pinv(W)
93 | 


--------------------------------------------------------------------------------
/pydynpd/instruments.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from pydynpd.info import df_info, z_info, options_info
  4 | 
  5 | 
  6 | class instruments(object):
  7 | 
  8 |     def __init__(self, variables: dict, gmm_tables: dict, df_information: df_info, options: options_info):
  9 |         level = options.level
 10 |         collapse = options.collapse
 11 |         transformation = options.transformation
 12 |         self.z_information, self.gmm_diff_info, self.iv_diff_info, self.gmm_level_info = self.calculate_z_dimension(
 13 |             variables, df_information, level, transformation, collapse)
 14 | 
 15 |         if level:
 16 |             self.z_table = self.build_z_level(variables, gmm_tables, df_information, transformation, collapse)
 17 |         else:
 18 |             self.z_table = self.build_z_diff(variables, gmm_tables, df_information, False, transformation, collapse)
 19 | 
 20 |     def build_z_level(self, variables: dict, gmm_tables: dict, info: df_info, transformation, collapse=False):
 21 |         last_level_index = info.last_level_index
 22 |         first_level_index = info.first_level_index
 23 | 
 24 |         z_table = self.build_z_diff(
 25 |             variables, gmm_tables, info, True, transformation, collapse)
 26 | 
 27 |         level_width = self.z_information.level_width
 28 |         level_height = self.z_information.level_height
 29 |         diff_width = self.z_information.diff_width
 30 |         diff_height = self.z_information.diff_height
 31 |         width = self.z_information.width
 32 |         height = self.z_information.height
 33 | 
 34 |         Lgmm_vars = variables['Lgmm']
 35 |         iv_vars = variables['iv']
 36 |         Lgmm_dat = gmm_tables['Lgmm']
 37 |         iv_dat = gmm_tables['iv']
 38 | 
 39 |         Lgmm_iv_height = int(Lgmm_dat.shape[0] / info.N)
 40 | 
 41 |         start_row = diff_height  # z_table[0].shape[0]-height
 42 |         start_col = diff_width  # z_table[0].shape[1]-width
 43 | 
 44 |         for i in range(info.N):
 45 |             z = z_table[(i * height):(i * height + height), :]
 46 |             # z[start_row-1,start_col:(start_col+self.level_width)]=1
 47 |             z[height - 1, start_col:width] = 1
 48 |             array_Lgmm = Lgmm_dat[(i * Lgmm_iv_height):(i * Lgmm_iv_height + Lgmm_iv_height), :]
 49 |             array_iv = iv_dat[(i * Lgmm_iv_height):(i * Lgmm_iv_height + Lgmm_iv_height), :]
 50 | 
 51 |             for var_id in range(len(Lgmm_vars)):
 52 |                 lag = Lgmm_vars[var_id].min_lag
 53 |                 t_info = self.gmm_level_info[3 * var_id: (3 * var_id + 3), :]
 54 |                 for j in range(level_width):
 55 |                     the_index = t_info[0, j]
 56 |                     if the_index >= 0:
 57 |                         the_row = start_row + t_info[2, j]
 58 | 
 59 |                         the_index = t_info[0, j]
 60 | 
 61 |                         z[the_row, start_col + j] = array_Lgmm[the_index, var_id]
 62 | 
 63 |             start_pos = self.z_information.num_Dgmm_instr
 64 |             for var_id in range(len(iv_vars)):
 65 |                 var = iv_vars[var_id]
 66 |                 z[start_pos + var_id,
 67 |                 start_col:width] = array_iv[first_level_index:(last_level_index + 1), var_id]
 68 | 
 69 |             z[np.isnan(z)] = 0
 70 | 
 71 |         return z_table
 72 | 
 73 |     def prepare_z_gmm_level(self, variables: dict, level_width, info: df_info, collapse=False):
 74 | 
 75 |         gmm_vars = variables['Lgmm']
 76 |         num_gmm = len(gmm_vars)
 77 |         t_info = np.empty((num_gmm * 3, level_width),
 78 |                           dtype='int32')  # row 0: gmm_index, 2: which row, 1: empty right now
 79 | 
 80 |         start_row = 0
 81 |         var_id = 0
 82 |         for var_id in range(num_gmm):
 83 |             var = gmm_vars[var_id]
 84 |             for i in range(level_width):
 85 |                 the_index = info.first_level_index + i
 86 |                 gmm_index = the_index - var.min_lag
 87 | 
 88 |                 t_info[var_id * 3 + 2, i] = start_row
 89 | 
 90 |                 if gmm_index >= 1:
 91 |                     t_info[var_id * 3 + 0, i] = gmm_index
 92 |                     if collapse:
 93 |                         if i == level_width - 1:
 94 |                             start_row += 1
 95 |                     else:
 96 |                         start_row += 1
 97 | 
 98 |                 else:
 99 |                     t_info[var_id * 3 + 0, i] = -9999
100 | 
101 |         num_Lgmm_instr = start_row  # number of gmm instruments in diff eq
102 | 
103 |         return (num_Lgmm_instr, t_info)
104 | 
105 |     def calculate_z_dimension(self, variables: dict, info: df_info, level, transformation, collapse=False):
106 |         Lgmm_vars = variables['Lgmm']
107 | 
108 |         diff_width = info.last_diff_index - info.first_diff_index + 1
109 | 
110 |         level_width = info.last_level_index - info.first_level_index + 1
111 | 
112 |         level_height = 0
113 |         num_Lgmm_instr = 0
114 |         gmm_level_info = None
115 |         if level:
116 |             num_Lgmm_instr, gmm_level_info = self.prepare_z_gmm_level(variables, level_width, info, collapse)
117 |             level_height = num_Lgmm_instr + 1
118 | 
119 |         num_Dgmm_instr, gmm_diff_info = self.prepare_Z_gmm_diff(
120 |             variables, info, level, transformation, collapse)
121 |         iv_diff_info = self.prepare_Z_iv_diff(variables, diff_width, info)
122 | 
123 |         diff_height = (num_Dgmm_instr + iv_diff_info.shape[0])
124 | 
125 |         if level:
126 |             height = diff_height + level_height
127 |             width = diff_width + level_width
128 |         else:
129 |             height = diff_height
130 |             width = diff_width
131 | 
132 |         z_information = z_info(diff_height=diff_height, diff_width=diff_width, level_width=level_width,
133 |                                level_height=level_height, height=height, width=width,
134 |                                num_Dgmm_instr=num_Dgmm_instr, num_Lgmm_instr=num_Lgmm_instr, num_instr=height)
135 | 
136 |         return (z_information, gmm_diff_info, iv_diff_info, gmm_level_info)
137 | 
138 |     def build_z_diff(self, variables: dict, gmm_tables: dict, info: df_info, level, transformation, collapse=False):
139 | 
140 |         gmm_vars = variables['Dgmm']
141 |         iv_vars = variables['iv']
142 |         Dgmm_dat = gmm_tables['Dgmm']
143 |         Div_dat = gmm_tables['Div']
144 | 
145 |         gmm_Div_height = int(Dgmm_dat.shape[0] / info.N)
146 | 
147 |         diff_width = self.z_information.diff_width
148 |         height = self.z_information.height
149 |         width = self.z_information.width
150 |         num_Dgmm_instr = self.z_information.num_Dgmm_instr
151 | 
152 |         z_table = np.zeros((height * info.N, width), dtype=np.float64)
153 | 
154 |         for i in range(info.N):
155 |             if level and transformation == 'fod':
156 |                 z = z_table[i * height:(i + 1) * height, 1:diff_width]
157 | 
158 |             else:
159 |                 z = z_table[i * height:(i + 1) * height, 0:diff_width]
160 | 
161 |             z_width = z.shape[1]
162 |             array_gmm = Dgmm_dat[i * gmm_Div_height:(i + 1) * gmm_Div_height, :]
163 |             array_fd_iv = Div_dat[i * gmm_Div_height:(i + 1) * gmm_Div_height, :]
164 | 
165 |             var_id = 0
166 |             for var in gmm_vars:
167 |                 for j in range(z_width):
168 |                     row_pos = self.gmm_diff_info[var_id * 3 + 2, j]
169 | 
170 |                     start = self.gmm_diff_info[var_id * 3 + 0, j]
171 |                     end = self.gmm_diff_info[var_id * 3 + 1, j]
172 | 
173 |                     for k in range(end - start + 1):
174 |                         z[row_pos + k, j] = array_gmm[end - k, var_id]
175 |                 var_id += 1
176 | 
177 |             row_pos = num_Dgmm_instr
178 | 
179 |             var_id = 0
180 |             for var_id in range(len(iv_vars)):
181 |                 var = iv_vars[var_id]
182 |                 for j in range(z_width):
183 |                     index_to_take = self.iv_diff_info[var_id, j]
184 | 
185 |                     z[row_pos, j] = array_fd_iv[index_to_take, var_id]
186 | 
187 |                 row_pos += 1
188 | 
189 |             z[np.isnan(z)] = 0
190 | 
191 |         return z_table
192 | 
193 |     def prepare_Z_iv_diff(self, variables: dict, width, info: df_info):
194 |         iv_vars = variables['iv']  # need to be placed at the beginning
195 |         num_iv = len(iv_vars)
196 | 
197 |         t_info = np.empty((num_iv, width), dtype='int32')
198 |         # num_iv_instr = 0
199 |         var_id = 0
200 |         for var_id in range(num_iv):
201 |             var = iv_vars[var_id]
202 |             t_info[var_id,] = range(info.first_diff_index, info.last_diff_index + 1)
203 |             # num_iv_instr += width
204 | 
205 |         return (t_info)
206 | 
207 |     def prepare_Z_gmm_diff(self, variables: dict, info: df_info, level, transformation, collapse=False):
208 |         start_row = 0
209 | 
210 |         gmm_vars = variables['Dgmm']
211 |         num_gmm = len(gmm_vars)
212 | 
213 |         var_id = 0
214 |         if level and transformation == 'fod':
215 |             first_index = info.first_diff_index + 1
216 |         else:
217 |             first_index = info.first_diff_index
218 |         last_index = info.last_diff_index
219 |         width = last_index - first_index + 1
220 | 
221 |         t_info = np.empty((num_gmm * 3, width), dtype='int32')
222 | 
223 |         for var_id in range(num_gmm):
224 |             var = gmm_vars[var_id]
225 | 
226 |             first_tend = first_index - var.min_lag
227 |             last_tend = last_index - var.min_lag
228 | 
229 |             tend = np.arange(first_tend, last_tend + 1, dtype='int32')
230 |             t_info[var_id * 3 + 1,] = tend
231 | 
232 |             for i in range(width):
233 |                 tstart = max(0, tend[i] + var.min_lag - var.max_lag)
234 |                 t_info[var_id * 3 + 0, i] = tstart
235 |                 # t_info[var_id*3+1, i]=tend[i]
236 |                 # physical position of the row
237 |                 t_info[var_id * 3 + 2, i] = start_row
238 | 
239 |                 num = tend[i] - tstart + 1
240 |                 # num_instru += num
241 |                 if collapse:
242 |                     if i == (width - 1):
243 |                         start_row += num
244 |                 else:
245 |                     start_row += num
246 | 
247 |         num_gmm_instr = start_row  # number of gmm instruments in diff eq
248 | 
249 |         return ((num_gmm_instr, t_info))
250 | 


--------------------------------------------------------------------------------
/pydynpd/model_organizer.py:
--------------------------------------------------------------------------------
 1 | from pydynpd.command import command
 2 | from pydynpd.panel_data import panel_data
 3 | from pydynpd.variable import regular_variable
 4 | 
 5 | 
 6 | class list_models:
 7 |     def __init__(self):
 8 |         self.list_variables = []
 9 |         self.list_command_str = []
10 | 
11 | 
12 | class model_oranizer(object):
13 |     def __init__(self, user_command: command, pdata: panel_data):
14 | 
15 |         self._temp_list_indep = []
16 |         self._temp_list_command = []
17 | 
18 |         self.ending_time = pdata.T
19 |         first_list = []
20 |         self._temp_list_indep.append(first_list)
21 |         self._temp_list_command.append('')
22 | 
23 |         the_list = user_command._temp_part1_list
24 |         for i in range(len(the_list.names)):
25 |             var_name = the_list.names[i]
26 |             lags = the_list.lags[i]
27 |             num_lags = len(lags)
28 |             last_lag = lags[0] + num_lags - 1
29 |             for j in range(lags[0], last_lag + 1):
30 |                 new_var = regular_variable(var_name, j)
31 |                 self._add_item(new_var)
32 | 
33 |             if the_list.adjustable_max_lags[i] == True:
34 |                 self._explode_model(var_name, last_lag)
35 | 
36 |         self.models = list_models()
37 |         for i in range(len(self._temp_list_indep)):
38 |             new_variables = {}
39 |             new_variables['dep_indep'] = self._temp_list_indep[i]
40 |             new_variables['Dgmm'] = user_command.variables['Dgmm']
41 |             new_variables['Lgmm'] = user_command.variables['Lgmm']
42 |             new_variables['iv'] = user_command.variables['iv']
43 |             self.models.list_variables.append(new_variables)
44 |             self.models.list_command_str = self._temp_list_command
45 | 
46 |     def _add_item(self, new_var):
47 |         for i in range(len(self._temp_list_indep)):
48 | 
49 |             self._temp_list_indep[i].append(new_var)
50 |             if new_var.lag == 0:
51 |                 new_str = ' ' + new_var.name + ' '
52 |             else:
53 |                 new_str = ' L' + str(new_var.lag) + '.' + new_var.name + ' '
54 | 
55 |             self._temp_list_command[i] += new_str
56 | 
57 |     def _explode_model(self, var_name, last_lag):
58 |         new_list_variables = []
59 |         new_list_command_str = []
60 |         for i in range(len(self._temp_list_indep)):
61 |             model = self._temp_list_indep[i]
62 |             command_str = self._temp_list_command[i]
63 | 
64 |             for j in range(last_lag + 1, self.ending_time + 1):
65 |                 new_model = model.copy()
66 |                 new_str = str(command_str)
67 | 
68 |                 for k in range(last_lag + 1, j + 1):
69 |                     new_var = regular_variable(var_name, k)
70 |                     new_model.append(new_var)
71 |                     new_str += ' L' + str(k) + '.' + var_name + ' '
72 | 
73 |                 new_list_variables.append(new_model)
74 |                 new_list_command_str.append(new_str)
75 | 
76 |         self._temp_list_indep += new_list_variables
77 |         self._temp_list_command += new_list_command_str
78 | 
79 |     def _translate_to_command_str(self, variables, options):
80 |         pass
81 | 
82 |     def check_model():
83 |         pass
84 | 


--------------------------------------------------------------------------------
/pydynpd/model_summary.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | from prettytable import PrettyTable
  4 | 
  5 | 
  6 | class model_list(object):
  7 |     def __init__(self, m_list: list):
  8 |         self.names = []
  9 |         self.min_lags = []
 10 |         self.max_lags = []
 11 |         for m in m_list:
 12 |             for i in range(1, len(m.variables['dep_indep'])):
 13 |                 indep = m.variables['dep_indep'][i]
 14 |                 if not indep.name in self.names:
 15 |                     self.names.append(indep.name)
 16 |                     self.min_lags.append(indep.lag)
 17 |                     self.max_lags.append(indep.lag)
 18 |                 else:
 19 |                     the_index = self.names.index(indep.name)
 20 |                     if indep.lag > self.max_lags[the_index]:
 21 |                         self.max_lags[the_index] = indep.lag
 22 | 
 23 |                     if indep.lag < self.min_lags[the_index]:
 24 |                         self.min_lags[the_index] = indep.lag
 25 | 
 26 | 
 27 | class model_summary(object):
 28 | 
 29 |     def print_summary(self, model):
 30 | 
 31 |         if model.options.steps == 2:
 32 |             str_steps = 'two-step '
 33 |         elif model.options.steps == 1:
 34 |             str_steps = 'one-step '
 35 |         else:
 36 |             str_steps = str(model.options.steps) + '-step '
 37 | 
 38 |         if model.options.level:
 39 |             str_gmm = 'system GMM'
 40 |         else:
 41 |             str_gmm = 'difference GMM'
 42 | 
 43 |         to_print = []
 44 |         # to_print.append(model.command_str)
 45 |         to_print.append(' Dynamic panel-data estimation, ' + str_steps + str_gmm)
 46 |         to_print.append(self.basic_information(model))
 47 |         to_print.append(self.regression_table(model))
 48 |         to_print.append(self.test_results(model))
 49 |         for line in to_print:
 50 |             print(line)
 51 | 
 52 |     def basic_information(self, model):
 53 | 
 54 |         basic_table = PrettyTable()
 55 |         middle_space = '                   '
 56 |         basic_table.field_names = ["    ", "   ", "  "]
 57 |         basic_table.border = False
 58 |         basic_table.header = False
 59 |         basic_table.align = 'l'
 60 |         basic_table.add_row(
 61 |             ['Group variable: ' + model.pdata._individual, middle_space, 'Number of obs = ' + str(model.num_obs)])
 62 |         basic_table.add_row(
 63 |             ['Time variable: ' + model.pdata._time, middle_space, 'Min obs per group: ' + str(model.min_obs)])
 64 |         basic_table.add_row(['Number of instruments = ' + str(model.z_information.num_instr), middle_space,
 65 |                              'Max obs per group: ' + str(model.max_obs)])
 66 |         basic_table.add_row(
 67 |             ['Number of groups = ' + str(model.N), middle_space,
 68 |              'Avg obs per group: ' + '{0:.2f}'.format(model.avg_obs)])
 69 | 
 70 |         return (basic_table.get_string())
 71 | 
 72 |     def test_results(self, model):
 73 | 
 74 |         str_toprint = 'Hansen test of overid. restrictions: chi(' + str(model.hansen.df) + ') = ' + '{:.3f}'.format(
 75 |             model.hansen.test_value)
 76 |         str_toprint = str_toprint + ' Prob > Chi2 = ' + '{:.3f}'.format(model.hansen.p_value) + '\n'
 77 | 
 78 |         for i in range(len(model.AR_list)):
 79 |             the_AR = model.AR_list[i]
 80 |             AR = the_AR.AR
 81 |             P_value = the_AR.P_value
 82 | 
 83 |             str_toprint = str_toprint + 'Arellano-Bond test for AR(' + str(
 84 |                 i + 1) + ') in first differences: z = ' + "{:.2f}".format(AR) + ' Pr > z =' + '{:.3f}'.format(
 85 |                 P_value) + '\n'
 86 | 
 87 |         return (str_toprint)
 88 | 
 89 |     def regression_table(self, model):
 90 | 
 91 |         dep_name = model.variables['dep_indep'][0].name
 92 | 
 93 |         r_table = PrettyTable()
 94 | 
 95 |         r_table.field_names = [dep_name, "coef.", "Corrected Std. Err.", "z", "P>|z|", " "]
 96 | 
 97 |         r_table.float_format = '.7'
 98 |         regression_table = model.regression_table
 99 |         # , "z", "P>|z|", "[95% Conf. Interval]" ]
100 |         num_indep = len(regression_table.index)
101 | 
102 |         for i in range(num_indep):
103 |             var_name = regression_table['variable'][i]
104 |             coeff = regression_table['coefficient'][i]
105 |             stderr = regression_table['std_err'][i]
106 | 
107 |             z = regression_table['z_value'][i]
108 |             p = regression_table['p_value'][i]
109 |             sig = regression_table['sig'][i]
110 |             r_table.add_row([var_name, coeff, stderr, z, p, sig])
111 | 
112 |         return r_table.get_string()
113 | 
114 |     def print_good_list(self, the_list: list, level: bool, mmsc: str):
115 |         the_list.sort(key=lambda x: x.MMSC_LU[mmsc])
116 |         m_list = model_list(the_list)
117 | 
118 |         r_table = PrettyTable()
119 |         # col_names=['variables'] + [m.name for m in the_list]
120 |         variable_names = []
121 |         for i in range(len(m_list.names)):
122 |             v = m_list.names[i]
123 |             for j in range(m_list.min_lags[i], m_list.max_lags[i] + 1):
124 |                 if j == 0:
125 |                     variable_names.append(v)
126 |                 else:
127 |                     variable_names.append('L' + str(j) + '.' + v)
128 |         if level:
129 |             variable_names.append('_con')
130 |         r_table.add_column('variables', variable_names)
131 |         for m in the_list:
132 |             new_col = []
133 |             for i in range(len(variable_names)):
134 |                 new_col.append('')
135 |             rt = m.regression_table
136 |             ind = [variable_names.index(v) for v in rt['variable']]
137 |             j = 0
138 |             for i in ind:
139 |                 new_col[i] = '{:.3f}'.format(rt['coefficient'][j]) + rt['sig'][j] + '\n(' + '{:.3f}'.format(
140 |                     rt['std_err'][j]) + ')'
141 |                 j += 1
142 | 
143 |             r_table.add_column(m.name, new_col)
144 | 
145 |         print('models are sorted by ' + mmsc)
146 |         print(r_table)
147 | 
148 |         try:
149 |             with open('output.html', 'w') as f:
150 |                 # print(f.__dir__())
151 |                 print('HTML output named "output.html" is located in folder ' + os.getcwd())
152 |                 f.write(r_table.get_html_string(
153 |                     attributes={'border': 1, 'style': 'border-width: 1px; border-collapse: collapse;'}))
154 |         except Exception as e:
155 |             print(e)
156 | 
157 |         print('\n')
158 |         print('MMSC_LU scores:')
159 |         mmsc_table = PrettyTable()
160 |         mmsc_table.field_names = ['model', 'aic', 'bic', 'hqic', 'command str']
161 |         mmsc_table.align['command str'] = "l"
162 | 
163 |         for m in the_list:
164 |             mmsc_table.add_row([m.name, '{:.3f}'.format(m.MMSC_LU['aic']), '{:.3f}'.format(m.MMSC_LU['bic']),
165 |                                 '{:.3f}'.format(m.MMSC_LU['hqic']), m.command_str])
166 | 
167 |         print(mmsc_table)
168 | 
169 |     def print_bad_list(self, the_list: list):
170 | 
171 |         bad_table = PrettyTable()
172 |         bad_table.field_names = ['model', 'command str']
173 |         bad_table.align['command str'] = "l"
174 |         for m in the_list:
175 |             bad_table.add_row([m.name, m.command_str])
176 | 
177 |         print(bad_table)
178 | 


--------------------------------------------------------------------------------
/pydynpd/panel_data.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import numpy as np
  4 | from pandas import DataFrame, get_dummies, concat
  5 | 
  6 | from pydynpd.common_functions import get_first_diff_table, get_fod_table
  7 | from pydynpd.info import options_info
  8 | 
  9 | 
 10 | class panel_data():
 11 |     def __init__(self, df: DataFrame, identifiers, variables, options: options_info):
 12 | 
 13 |         self._individual = identifiers[0]
 14 |         self._time = identifiers[1]
 15 | 
 16 |         cols = []
 17 | 
 18 |         temp_list = [var.name for var in variables['dep_indep'] + variables['iv'] + variables['Dgmm']]
 19 |         for var_name in temp_list:
 20 |             if var_name not in cols:
 21 |                 cols.append(var_name)
 22 | 
 23 |         # temp_df = df[ [self._individual, self._time] + cols].copy()
 24 |         self.N, self.T, self.ids = self.xtset(df, self._individual, self._time)
 25 | 
 26 |         if options.timedumm:
 27 |             df, self.col_timedumm = self.add_time_dummy(df, variables, self._time)
 28 |         else:
 29 |             self.col_timedumm = []
 30 | 
 31 |         self.cols = self.ids + cols  # make sure ids is the first column
 32 | 
 33 |         self.data = self.make_balanced(df[self.cols + self.col_timedumm].to_numpy(), self.N, self.T)
 34 |         num_cols = self.data.shape[1]
 35 | 
 36 |         self.fd_data = get_first_diff_table(self.data[:, range(0, num_cols)], self.N)
 37 |         if (options.transformation=='fod') & (options.level==False):
 38 |             self.fod_data=get_fod_table(self.data, self.N)
 39 | 
 40 |     def xtset(self, df: DataFrame, _individual, _time):
 41 |         df.sort_values(by=[_individual, _time])
 42 |         df['_individual'] = df[_individual].astype('category').cat.codes
 43 |         df['_individual'] = df['_individual'].astype('int64')
 44 |         N = df['_individual'].unique().size
 45 | 
 46 |         df['_time'] = df[_time].astype('category').cat.codes
 47 |         df['_time'] = df['_time'].astype('int64')
 48 |         T = df['_time'].unique().size
 49 | 
 50 |         df['_NT'] = df['_individual'] * T + df['_time']
 51 | 
 52 |         if N <= T:
 53 |             print(
 54 |                 'Warning: system and difference GMMs do not work well on long (T>=N) panel data')
 55 |         return (N, T, ['_NT'])
 56 | 
 57 |     def make_balanced(self, ori, n_individual, n_time):
 58 |         arr_full = np.empty((n_individual * n_time, ori.shape[1]), dtype='float64')
 59 | 
 60 |         arr_full[:] = np.nan
 61 |         # arr_full[:, 0] = np.repeat(range(0, N), T)
 62 |         arr_full[:, 0] = range(0, n_individual * n_time)
 63 |         # arr_full[:, 1] = arr_full[:, 2] % T
 64 | 
 65 |         mask = np.in1d(arr_full[:, 0], ori[:, 0])
 66 | 
 67 |         arr_full[mask, 1:arr_full.shape[1]] = ori[:, 1:ori.shape[1]]
 68 |         arr_full = arr_full[arr_full[:, 0].argsort()]
 69 | 
 70 |         return (arr_full)
 71 | 
 72 |     def add_time_dummy(self, df: DataFrame, variables: dict, _time: str):
 73 | 
 74 |         timedumms = get_dummies(df[_time], prefix = _time, dtype = int)
 75 |         col_timedumm = timedumms.columns.tolist()
 76 | 
 77 |         df = concat([df, timedumms], axis = 1)
 78 | 
 79 |         return df, col_timedumm
 80 | 
 81 |     def generate_D_matrix(self, height, T, level):
 82 |         # matrix used in Forward Orthogonal Deviation
 83 |         temp=np.zeros((T,T), dtype='float64')
 84 |         D = np.zeros((height, T), dtype='float64')
 85 | 
 86 |         for i in range(T):
 87 |             for j in range(i, T):
 88 |                 if i == j:
 89 |                     temp[i, j] = math.sqrt((T - i - 1) / (T - i))
 90 |                 else:
 91 |                     temp[i, j] = (-1) * math.sqrt(1 / ((T - i) * (T - i - 1)))
 92 | 
 93 |         if level:
 94 |             last=T-2
 95 | 
 96 | 
 97 |         else:
 98 |             last=T-3
 99 |         start = last + 1 - height
100 | 
101 |         D=temp[start:(last+1),:]
102 | 
103 |         return (D)
104 | 


--------------------------------------------------------------------------------
/pydynpd/regression.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | from sys import exit
  3 | 
  4 | import numpy as np
  5 | from numpy.linalg import pinv
  6 | from pandas import DataFrame
  7 | 
  8 | import pydynpd.specification_tests as tests
  9 | from pydynpd.command import command
 10 | from pydynpd.common_functions import Windmeijer
 11 | from pydynpd.dynamic_panel_model import dynamic_panel_model
 12 | from pydynpd.info import step_result
 13 | from pydynpd.model_organizer import model_oranizer
 14 | from pydynpd.model_summary import model_summary
 15 | from pydynpd.panel_data import panel_data
 16 | 
 17 | warnings.filterwarnings("ignore", category=RuntimeWarning)
 18 | 
 19 | 
 20 | class abond:
 21 | 
 22 |     def __init__(self, command_str, df: DataFrame, identifiers: list):
 23 | 
 24 |         if len(identifiers) != 2:
 25 |             print('two variables needed')
 26 |             exit()
 27 | 
 28 |         user_command = command(command_str, df.columns)
 29 |         pdata = panel_data(df, identifiers, user_command.variables, user_command.options)
 30 |         self.models = []
 31 |         self._good_models = []
 32 |         self._bad_models = []
 33 |         if not user_command.options.beginner:
 34 |             model = dynamic_panel_model(pdata, user_command.variables, user_command.options, command_str,
 35 |                                         user_command.part_2, user_command.part_3)
 36 |             self.regular_process(model)
 37 |             self.form_results(model)
 38 | 
 39 |         else:
 40 |             m_manager = model_oranizer(user_command, pdata)
 41 |             num_models = len(m_manager.models.list_variables)
 42 |             j = 0
 43 |             for i in range(num_models):
 44 |                 variables = m_manager.models.list_variables[i]
 45 |                 com_str = m_manager.models.list_command_str[i]
 46 |                 try:
 47 |                     model = dynamic_panel_model(pdata, variables, user_command.options, com_str, user_command.part_2,
 48 |                                                 user_command.part_3)
 49 |                     self.regular_process(model)
 50 |                     j += 1
 51 |                     model.name = 'm' + str(j)
 52 |                     if self.check_model(model):
 53 |                         model.calculate_MMSC_LU()
 54 |                         self._good_models.append(model)
 55 |                     else:
 56 |                         self._bad_models.append(model)
 57 |                 except Exception as e:
 58 |                     # print(e)
 59 |                     continue
 60 | 
 61 |             for m in self._good_models:
 62 |                 self.form_results(m)
 63 |             ms = model_summary()
 64 |             if len(self._good_models) >= 2:
 65 |                 ms.print_good_list(self._good_models, user_command.options.level, user_command.options.mmsc)
 66 | 
 67 |             if len(self._bad_models) >= 1:
 68 |                 print('\nThe following model(s) did not pass specification tests:')
 69 |                 ms.print_bad_list(self._bad_models)
 70 | 
 71 |     def regular_process(self, model: dynamic_panel_model):
 72 | 
 73 |         model.step_results = []
 74 | 
 75 |         _XZ, _Zy = self.calculate_basic(model)
 76 |         _XZ_t = _XZ.transpose()
 77 |         _Zy_t = _Zy.transpose()
 78 | 
 79 |         self.GMM(model, _XZ, _XZ_t, _Zy, _Zy_t, 1)
 80 |         if model.options.steps == 1 or model.options.steps == 2:
 81 |             self.GMM(model, _XZ, _XZ_t, _Zy, _Zy_t, 2)
 82 |             self.perform_test(model, 2)
 83 |         else:
 84 |             self.iterative_GMM(model, _XZ, _XZ_t, _Zy, _Zy_t)
 85 |             self.perform_test(model, model.options.steps)
 86 | 
 87 |     def iterative_GMM(self, model, _XZ, _XZ_t, _Zy, _Zy_t):
 88 |         current_step = 1
 89 |         converge = False
 90 |         while not converge:
 91 |             previous_step = current_step
 92 |             current_step += 1
 93 |             self.GMM(model, _XZ, _XZ_t, _Zy, _Zy_t, current_step)
 94 |             beta_current = model.step_results[current_step - 1].beta
 95 |             beta_previous = model.step_results[current_step - 2].beta
 96 |             for j in range(beta_current.shape[0]):
 97 |                 temp = (beta_current[j] - beta_previous[j]) ** 2
 98 |                 temp2 = (beta_previous[j]) ** 2
 99 |                 if j == 0:
100 |                     nom = temp
101 |                     denom = temp2
102 |                 else:
103 |                     nom += temp
104 |                     denom += temp2
105 |             crit = np.sqrt(nom / denom)
106 | 
107 |             if crit < 0.000001:
108 |                 converge = True
109 |                 model.options.steps = current_step
110 | 
111 |     def GMM(self, model: dynamic_panel_model, _XZ, _XZ_t, _Zy, _Zy_t, step: int):
112 |         N = model.N
113 |         num_obs = model.num_obs
114 |         z_list = model.z_list
115 |         _z_t_list = model._z_t_list
116 |         Cx_list = model.final_xy_tables['Cx']
117 | 
118 |         Cy_list = model.final_xy_tables['Cy']
119 | 
120 |         if step == 1:
121 |             H1 = self.get_H1(model, model.options.transformation)
122 |             W = self.calculate_W(H1, model)
123 |             current_step = step_result(W)
124 |             W_inv = current_step.W_inv
125 |             model.step_results.append(current_step)
126 | 
127 |         if step >= 2:
128 |             previous_step = model.step_results[step - 2]
129 |             W = previous_step.W_next
130 |             current_step = step_result(W)
131 |             model.step_results.append(current_step)
132 |             W_inv = current_step.W_inv
133 | 
134 |         _XZ_W = _XZ @ W_inv
135 |         _M_inv = _XZ_W @ _XZ_t
136 |         M = pinv(_M_inv)
137 |         _M_XZ_W = M @ _XZ_W
138 | 
139 |         beta = _M_XZ_W @ _Zy_t
140 |         residual = self.calculate_residual(Cy_list, Cx_list, beta)
141 |         _residual_t = residual.transpose()
142 |         SS = (_residual_t @ residual) * (1.0 / 2 / num_obs)
143 | 
144 |         z_height = int(z_list.shape[0] / N)
145 |         r_height = int(residual.shape[0] / N)
146 |         self._zs_list = np.empty((N * z_height, 1), dtype=np.float64)
147 |         for i in range(N):
148 |             z = z_list[(i * z_height):(i * z_height + z_height), :]
149 |             u = residual[(i * r_height):(i * r_height + r_height), :]
150 |             # u_t=_residual_t[:, (i*r_height):(i*r_height+r_height)]
151 |             temp_zs = z @ u
152 |             self._zs_list[(i * z_height):(i * z_height + z_height), :] = temp_zs
153 |             if i == 0:
154 |                 zs = temp_zs
155 |                 ZuuZ = temp_zs @ temp_zs.transpose()
156 |             else:
157 | 
158 |                 zs += temp_zs
159 |                 ZuuZ += temp_zs @ temp_zs.transpose()
160 | 
161 |         W_next = ZuuZ * (1.0 / N)
162 | 
163 |         current_step._XZ_W = _XZ_W
164 |         current_step.M = M
165 |         current_step._M_XZ_W = _M_XZ_W
166 |         current_step.beta = beta
167 |         current_step.residual = residual
168 |         current_step._residual_t = _residual_t
169 |         current_step.SS = SS
170 |         current_step.zs = zs
171 | 
172 |         current_step.ZuuZ = ZuuZ
173 | 
174 |         current_step.W_next = W_next
175 | 
176 |         current_step.vcov = self.vcov(model, step)
177 |         current_step.std_err = np.sqrt(np.diag(current_step.vcov))
178 | 
179 |     def calculate_basic(self, model):
180 | 
181 |         z_list = model.z_list
182 |         _z_t_list = model._z_t_list
183 |         Cx_list = model.final_xy_tables['Cx']
184 |         Cy_list = model.final_xy_tables['Cy']
185 | 
186 |         z_height = int(z_list.shape[0] / model.N)
187 |         x_height = int(Cx_list.shape[0] / model.N)
188 |         x_width = Cx_list.shape[1]
189 |         # self._zx_list=np.empty((z_list.shape[0] , x_width), np.float64)
190 | 
191 |         for i in range(model.N):
192 |             z = z_list[(z_height * i):(z_height * i + z_height), :]
193 |             z_t = _z_t_list[:, (z_height * i):(z_height * i + z_height)]
194 |             x = Cx_list[(x_height * i):(x_height * i + x_height), :]
195 |             y = Cy_list[(x_height * i):(x_height * i + x_height), :]
196 | 
197 |             zx = z @ x
198 | 
199 |             # self._zx_list[(z_height * i):(z_height * i + z_height), :]=zx
200 | 
201 |             if i == 0:
202 |                 temp_xz = zx.transpose()
203 |                 temp_zy = (z @ y).transpose()
204 |             else:
205 |                 temp_xz += zx.transpose()
206 |                 temp_zy += (z @ y).transpose()
207 |         return (temp_xz, temp_zy)
208 | 
209 |     def calculate_W(self, H, model):
210 |         # W1 = (1.0 / N) * sum_product2([z_list, H1, _z_t_list], [(N, 1), (1, 1), (1, N)])
211 |         z_height = int(model.z_list.shape[0] / model.N)
212 | 
213 |         for i in range(model.N):
214 |             z = model.z_list[(z_height * i):(z_height * i + z_height), :]
215 |             z_t = model._z_t_list[:, (z_height * i):(z_height * i + z_height)]
216 | 
217 |             if i == 0:
218 |                 temp_W = z @ H @ z_t
219 | 
220 |             else:
221 |                 temp_W += z @ H @ z_t
222 | 
223 |         return temp_W
224 | 
225 |     def calculate_residual(self, y_list, x_list, beta):
226 | 
227 |         tbr = y_list - x_list @ beta
228 | 
229 |         return (tbr)
230 | 
231 |     def vcov(self, model: dynamic_panel_model, step: int):
232 |         # report robust vcov only
233 | 
234 |         z_list = model.z_list
235 |         Cx = model.final_xy_tables['Cx']
236 | 
237 |         if step >= 2:
238 |             the_step = model.step_results[step - 1]
239 |             previous_step = model.step_results[step - 2]
240 |             step_one = model.step_results[0]
241 |             M2 = the_step.M
242 |             _M2_XZ_W2 = the_step._M_XZ_W
243 |             _W2_inv = the_step.W_inv
244 |             zs2 = the_step.zs
245 |             vcov_step_previous = previous_step.vcov
246 |             residual1 = previous_step.residual
247 |             # residual1 = step_one.residual
248 |             # vcov_step_previous = step_one.vcov
249 |             return Windmeijer(M2, _M2_XZ_W2, _W2_inv, zs2,
250 |                               vcov_step_previous, Cx, z_list, residual1, model.N)
251 |         elif step == 1:
252 |             step_1 = model.step_results[0]
253 |             _M_XZ_W = step_1._M_XZ_W
254 |             W2 = step_1.W_next
255 |             return model.N * (_M_XZ_W @ W2 @ _M_XZ_W.transpose())
256 | 
257 |     def perform_test(self, model, step):
258 |         step1 = model.step_results[0]
259 |         step2 = model.step_results[1]
260 |         num_instru = model.z_information.num_instr
261 |         Cx = model.final_xy_tables['Cx']
262 | 
263 |         step = model.options.steps
264 |         if step == 1 or step == 2:
265 |             _W2_inv = step2.W_inv
266 |             zs = step2.zs
267 | 
268 |             model.hansen = tests.hansen_overid(_W2_inv, model.N, zs, num_instru, \
269 |                                                Cx.shape[1])
270 |         else:
271 |             current_step = model.step_results[step - 1]
272 |             _W2_inv = current_step.W_inv
273 |             zs = current_step.zs
274 |             model.hansen = tests.hansen_overid(_W2_inv, model.N, zs, num_instru, \
275 |                                                Cx.shape[1])
276 | 
277 |         try:
278 |             model.AR_list = tests.AR_test(model, self._zs_list, step, 2)
279 | 
280 |         except Exception as e:
281 |             raise Exception(e)
282 | 
283 |     def get_H1(self, model: dynamic_panel_model, transformation):
284 |         z_list = model.z_list
285 |         z_inf = model.z_information
286 |         width = z_list.shape[1]
287 | 
288 |         if transformation == 'fd':
289 | 
290 |             tbr = np.zeros((width, width), dtype='float64')
291 |             i, j = np.indices(tbr.shape)
292 |             tbr[np.logical_and(i == j, i < z_inf.diff_width)] = 2
293 |             tbr[np.logical_and(i == j - 1, j < z_inf.diff_width)] = -1
294 |             tbr[np.logical_and(j == i - 1, i < z_inf.diff_width)] = -1
295 | 
296 |             tbr[np.logical_and(i == j, i >= z_inf.diff_width)] = 1
297 | 
298 |             tbr[np.logical_and(i == j + z_inf.diff_width, j < z_inf.diff_width)] = -1
299 |             tbr[np.logical_and(i == 1 + j + z_inf.diff_width, j < z_inf.diff_width)] = 1
300 |             tbr[np.logical_and(j == i + z_inf.diff_width, i < z_inf.diff_width)] = -1
301 |             tbr[np.logical_and(j == 1 + i + z_inf.diff_width, i < z_inf.diff_width)] = 1
302 |         else:  # fod
303 |             if model.options.level:
304 |                 D = np.zeros((width, model.T), np.float64)
305 | 
306 |                 up_height = model.z_information.diff_width
307 |                 D_up = model.pdata.generate_D_matrix(up_height, model.T, True)
308 |                 D[0:up_height, :] = D_up
309 | 
310 |                 lower_start_row = up_height
311 |                 lower_start_col = model.T - (width - up_height)
312 |                 lower = D[lower_start_row: width, lower_start_col: model.T]
313 |                 i, j = np.indices(lower.shape)
314 |                 lower[i == j] = 1
315 | 
316 |             else:
317 |                 D = model.pdata.generate_D_matrix(width, model.T,False)
318 |             tbr = D @ D.transpose()
319 | 
320 |         return (tbr)
321 | 
322 |     def form_results(self, model):
323 |         # step = len(model.step_results)
324 |         # the_list = model.step_results[step - 1]
325 |         if model.name != '':
326 |             print(' ' + model.name)
327 | 
328 |         model.form_regression_table()
329 |         ms = model_summary()
330 |         ms.print_summary(model)
331 | 
332 |         self.models.append(model)  # results = {}
333 | 
334 |     def check_model(self, model):
335 |         tbr = False
336 |         num_ARs = len(model.AR_list)
337 |         last_AR = model.AR_list[num_ARs - 1]
338 | 
339 |         if last_AR.P_value > 0.05:
340 |             if model.hansen.p_value > 0.05 and model.hansen.p_value < 0.99999:
341 |                 return True
342 |         else:
343 |             return False
344 | 


--------------------------------------------------------------------------------
/pydynpd/sandbox/multicollinearity.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | # from sklearn.metrics.pairwise import cosine_similarity
 5 | 
 6 | my_data = np.array([[5, 1, 1],
 7 |                     [3, 2, 3],
 8 |                     [1, 3, 2],
 9 |                     [3, 1, 1],
10 |                     [4, 2, 2],
11 |                     [7, 3, 1],
12 |                     [7, 1, 1]])
13 | 
14 | df = pd.DataFrame(data=my_data, columns=['y', 'dummy', 'x'])
15 | just_dummies = pd.get_dummies(df['dummy'])
16 | 
17 | step_1 = pd.concat([df, just_dummies], axis=1)
18 | step_1.drop(['dummy'], inplace=True, axis=1)
19 | A = step_1.to_numpy()
20 | B = np.cov(A, rowvar=False)
21 | 
22 | for i in range(len(A)):
23 |     #    B = np.matmul( A.transpose(), A)
24 |     B = np.cov(A, rowvar=False)
25 |     C = np.linalg.eig(B)
26 |     D = abs(C[0])
27 | 
28 |     if (max(D) / min(D) > 10E+12):
29 |         j = np.argmin(D[2:5]) + 2
30 |         # A_sparse = sparse.csr_matrix(A)
31 |         # similarities = abs(cosine_similarity(A_sparse))
32 |         # col_sum=np.sum(similarities,axis=0)
33 |         # j=np.argmax(col_sum[2:5])+2
34 |         A = np.delete(A, j, 1)
35 |         print(j)
36 | 
37 | # also can output sparse matrices
38 | similarities_sparse = cosine_similarity(A_sparse, dense_output=False)
39 | print('pairwise sparse output:\n {}\n'.format(similarities_sparse))
40 | 


--------------------------------------------------------------------------------
/pydynpd/sandbox/pydynpd.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/pydynpd/sandbox/pydynpd.zip


--------------------------------------------------------------------------------
/pydynpd/specification_tests.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import numpy as np
  4 | from scipy import stats
  5 | 
  6 | from pydynpd.common_functions import lag
  7 | from pydynpd.info import hansen_test_info, AR_test_info
  8 | 
  9 | 
 10 | def hansen_overid(W2_inv, N, zs, num_instru, num_indep):
 11 |     hansen_test = np.linalg.multi_dot([zs.transpose(), W2_inv, zs]) * (1.0 / N)
 12 |     df = num_instru - num_indep
 13 |     crit = stats.chi2.ppf(q=0.95, df=df)  # if hansen > crit then H0 is not supported
 14 |     p_value = 1 - stats.chi2.cdf(hansen_test, df)
 15 | 
 16 |     hansen_test = hansen_test_info(float(hansen_test), df, float(p_value), crit)
 17 | 
 18 |     return (hansen_test)
 19 | 
 20 | 
 21 | def AR_get_diff_XR(model, beta, ori_residual, r0_height):
 22 |     N = model.N
 23 | 
 24 |     ori_x = model.final_xy_tables['Cx']
 25 |     x0_height = int(ori_x.shape[0] / N)
 26 | 
 27 |     if model.options.transformation == 'fod':
 28 |         diff_y = model.final_xy_tables['Diff_y']
 29 |         diff_x = model.final_xy_tables['Diff_x']
 30 | 
 31 |         diff_r = diff_y - diff_x @ beta
 32 |     elif model.options.level:
 33 |         diff_width = model.z_information.diff_width
 34 |         num_col = ori_x.shape[1]
 35 |         diff_r = np.empty((diff_width * N, 1), dtype=np.float64)
 36 |         diff_x = np.empty((diff_width * N, num_col), dtype=np.float64)
 37 |         for i in range(N):
 38 |             r_i = ori_residual[(i * r0_height):(i * r0_height + r0_height), :]
 39 |             diff_r[(i * diff_width):(i * diff_width + diff_width), 0:1] = r_i[0:diff_width, 0:1]
 40 | 
 41 |             x_i = ori_x[(i * r0_height):(i * r0_height + r0_height), :]
 42 |             diff_x[(i * diff_width):(i * diff_width + diff_width), :] = x_i[0:diff_width, :]
 43 |     else:
 44 |         diff_x = ori_x
 45 |         diff_r = ori_residual
 46 | 
 47 |     return (diff_x, diff_r)
 48 | 
 49 | 
 50 | def AR_test(model, zs_list, step, m):
 51 |     N = model.N
 52 |     z_list = model.z_list
 53 |     z_height = int(z_list.shape[0] / N)
 54 | 
 55 |     current_step = model.step_results[step - 1]
 56 | 
 57 |     ori_residual = current_step.residual
 58 |     r0_height = int(ori_residual.shape[0] / N)
 59 | 
 60 |     M_XZ_W = current_step._M_XZ_W
 61 |     vcov = current_step.vcov
 62 | 
 63 |     diff_x, diff_r = AR_get_diff_XR(model, current_step.beta, ori_residual, r0_height)
 64 |     r_height = int(diff_r.shape[0] / N)
 65 |     x_height = int(diff_x.shape[0] / N)
 66 |     AR_list = []
 67 |     temp = np.zeros((r_height * N, 1), np.float64)
 68 |     lag(diff_r, temp, N, 1, 0)
 69 |     for j in range(1, m + 1):
 70 |         for i in range(N):
 71 |             r_i = diff_r[(r_height * i):(r_height * i + r_height), 0:1]
 72 |             r_t_i = r_i.transpose()
 73 | 
 74 |             lag_res = np.ndarray((r_height, 1), dtype=np.float64)
 75 |             lag(r_i, lag_res, 1, j, 0)
 76 |             lag_res[np.isnan(lag_res)] = 0
 77 |             lag_res_t = lag_res.transpose()
 78 | 
 79 |             x = diff_x[(x_height * i):(x_height * i + x_height), :]
 80 |             # z = z_list[(z_height * i):(z_height * i + z_height), :]
 81 | 
 82 |             # r_whole_i = ori_residual[(i * r0_height):(i * r0_height + r0_height), 0:1]
 83 | 
 84 |             d0_temp = lag_res_t @ r_i
 85 |             d1_temp = d0_temp @ r_t_i @ lag_res
 86 |             EX_temp = lag_res_t @ x
 87 | 
 88 |             zs = zs_list[(z_height * i):(z_height * i + z_height), :]
 89 | 
 90 |             # temp3_temp = z @ r_whole_i @ r_t_i @ lag_res
 91 |             # temp3_temp = zs @ r_t_i @ lag_res
 92 |             temp3_temp = zs @ d0_temp.transpose()
 93 |             if i == 0:
 94 |                 d0 = d0_temp
 95 |                 d1 = d1_temp
 96 |                 EX = EX_temp
 97 |                 temp3 = temp3_temp
 98 |             else:
 99 |                 d0 += d0_temp
100 |                 d1 += d1_temp
101 |                 EX += EX_temp
102 |                 temp3 += temp3_temp
103 | 
104 |         d2 = (-2) * np.linalg.multi_dot([EX, M_XZ_W, temp3])
105 | 
106 |         d3 = np.linalg.multi_dot([EX, vcov, EX.transpose()])
107 |         try:
108 |             AR_temp = float(d0 / math.sqrt(d1 + d2 + d3))
109 |         except Exception as e:
110 |             raise Exception('AR test failed')
111 | 
112 |         P_value = stats.norm.sf(abs(AR_temp)) * 2
113 |         new_AR = AR_test_info(j, AR_temp, P_value)
114 |         AR_list.append(new_AR)
115 | 
116 |     return (AR_list)
117 | 


--------------------------------------------------------------------------------
/pydynpd/variable.py:
--------------------------------------------------------------------------------
 1 | class regular_variable:
 2 |     def __init__(self, name, lag):
 3 |         self.name = name
 4 |         self.lag = lag
 5 | 
 6 | 
 7 | class gmm_var(regular_variable):
 8 |     def __init__(self, name, min_lag, max_lag, lag):
 9 |         super().__init__(name, lag)
10 |         self.min_lag = min_lag
11 |         self.max_lag = max_lag
12 | 
13 | 
14 | class adjustable_lag_indep:
15 |     def __init__(self, name, min_lag, max_lag):
16 |         self.name = name
17 |         self.min_lag = min_lag
18 |         self.max_lag = max_lag
19 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=42"]
3 | build-backend = "setuptools.build_meta"


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Note: To use the 'upload' functionality of this file, you must:
  5 | #   $ pipenv install twine --dev
  6 | 
  7 | import io
  8 | import os
  9 | import sys
 10 | from shutil import rmtree
 11 | 
 12 | from setuptools import find_packages, setup, Command
 13 | 
 14 | # Package meta-data.
 15 | NAME = 'pydynpd'
 16 | DESCRIPTION = 'A package to estimate dynamic panel data model using difference GMM and system GMM.'
 17 | URL = 'https://github.com/dazhwu/pydynpd'
 18 | EMAIL = 'wudz800@gmail.com'
 19 | AUTHOR = 'Dazhong Wu'
 20 | REQUIRES_PYTHON = '>=3.6.0'
 21 | VERSION = '0.2.1'
 22 | 
 23 | # What packages are required for this module to be executed?
 24 | REQUIRED = [
 25 |     'numpy', 'scipy', 'prettytable', 'pandas'
 26 | ]
 27 | 
 28 | # What packages are optional?
 29 | EXTRAS = {
 30 |     # 'fancy feature': ['django'],
 31 | }
 32 | 
 33 | # The rest you shouldn't have to touch too much :)
 34 | # ------------------------------------------------
 35 | # Except, perhaps the License and Trove Classifiers!
 36 | # If you do change the License, remember to change the Trove Classifier for that!
 37 | 
 38 | here = os.path.abspath(os.path.dirname(__file__))
 39 | 
 40 | # Import the README and use it as the long-description.
 41 | # Note: this will only work if 'README.md' is present in your MANIFEST.in file!
 42 | try:
 43 |     with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
 44 |         long_description = '\n' + f.read()
 45 | except FileNotFoundError:
 46 |     long_description = DESCRIPTION
 47 | 
 48 | # Load the package's __version__.py module as a dictionary.
 49 | about = {}
 50 | if not VERSION:
 51 |     project_slug = NAME.lower().replace("-", "_").replace(" ", "_")
 52 |     with open(os.path.join(here, project_slug, '__version__.py')) as f:
 53 |         exec(f.read(), about)
 54 | else:
 55 |     about['__version__'] = VERSION
 56 | 
 57 | 
 58 | class UploadCommand(Command):
 59 |     """Support setup.py upload."""
 60 | 
 61 |     description = 'Build and publish the package.'
 62 |     user_options = []
 63 | 
 64 |     @staticmethod
 65 |     def status(s):
 66 |         """Prints things in bold."""
 67 |         print('\033[1m{0}\033[0m'.format(s))
 68 | 
 69 |     def initialize_options(self):
 70 |         pass
 71 | 
 72 |     def finalize_options(self):
 73 |         pass
 74 | 
 75 |     def run(self):
 76 |         try:
 77 |             self.status('Removing previous builds…')
 78 |             rmtree(os.path.join(here, 'dist'))
 79 |         except OSError:
 80 |             pass
 81 | 
 82 |         self.status('Building Source and Wheel (universal) distribution…')
 83 |         os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable))
 84 | 
 85 |         self.status('Uploading the package to PyPI via Twine…')
 86 |         os.system('twine upload dist/*')
 87 | 
 88 |         self.status('Pushing git tags…')
 89 |         os.system('git tag v{0}'.format(about['__version__']))
 90 |         os.system('git push --tags')
 91 | 
 92 |         sys.exit()
 93 | 
 94 | 
 95 | # Where the magic happens:
 96 | setup(
 97 |     name=NAME,
 98 |     version=about['__version__'],
 99 |     description=DESCRIPTION,
100 |     long_description=long_description,
101 |     long_description_content_type='text/markdown',
102 |     author=AUTHOR,
103 |     author_email=EMAIL,
104 |     python_requires=REQUIRES_PYTHON,
105 |     url=URL,
106 |     packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]),
107 |     # If your package is a single module, use this instead of 'packages':
108 |     # py_modules=['mypackage'],
109 | 
110 |     # entry_points={
111 |     #     'console_scripts': ['mycli=mymodule:cli'],
112 |     # },
113 |     install_requires=REQUIRED,
114 |     extras_require=EXTRAS,
115 |     include_package_data=True,
116 |     license='MIT',
117 |     classifiers=[
118 |         # Trove classifiers
119 |         # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
120 |         'License :: OSI Approved :: MIT License',
121 |         'Programming Language :: Python',
122 |         'Programming Language :: Python :: 3',
123 |         'Programming Language :: Python :: 3.6',
124 |         'Programming Language :: Python :: Implementation :: CPython',
125 |         'Programming Language :: Python :: Implementation :: PyPy'
126 |     ],
127 |     # $ setup.py publish support.
128 |     cmdclass={
129 |         'upload': UploadCommand,
130 |     },
131 | )
132 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | from  pydynpd import regression
3 | 
4 | df = pd.read_csv("tourism_covid_data-total.csv")  #, index_col=False)
5 | df['monthly_cases']=df['monthly cases']
6 | #command_str='tourism_demand L1.tourism_demand L1.monthly_cases  | gmm(tourism_demand, 2 6) iv(L1.monthly_cases)| collapse '
7 | command_str='tourism_demand L1.tourism_demand monthly_cases  | gmm(tourism_demand, 2 6) iv(monthly_cases)| nolevel collapse '
8 | mydpd = regression.abond(command_str, df, ['Country', 'month_year'])


--------------------------------------------------------------------------------
/untitled.md:
--------------------------------------------------------------------------------
1 | Package pyndynpd is able to estimate dynamic panel models that take a form as follows:
2 | 
3 | $$y_{it}=\sum_{j=1}^{p}\alpha_{j}y_{i,t-j}+\sum_{k=1}^{m}\sum_{j=0}^{q_{k}}\beta_{jk}r_{i,t-j}^{(k)}+\boldsymbol{\delta}\boldsymbol{d_{i,t}}+\boldsymbol{\gamma}\boldsymbol{s_{i,t}}+u_{i}+\epsilon_{it}\label{eq:typical_model}$$ In the model above, $y_{i,t-j}$ ($j=1,2,\ldots,p$) denotes a group of $p$ lagged dependent variables. $r_{i,t-j}^{(k)}$ represents a group of $m$ endogeneous variables other than lagged $y$. $\boldsymbol{d_{it}}$ is a vector of predetermined variables which may potentially correlate with past errors, $\boldsymbol{s_{it}}$ is a vector of exogenous variables, and $u_{i}$ represents fixed effect. For illustration purpose, let's consider a basic form of dynamic panel model:
4 | 
5 | $$y_{it}=\alpha_{1}y_{i,t-1}+\delta d_{i,t}+u_{i}+\epsilon_{it}\label{eq:simple_model}$$


--------------------------------------------------------------------------------
/vignettes/.ipynb_checkpoints/Guide-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "60fac165-f308-4815-b379-28da3303aa81",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Technique Guide\n",
  9 |     "\n",
 10 |     "Package pyndynpd is able to estimate dynamic panel models that take a form as follows:\n",
 11 |     "\n",
 12 |     "$$y_{it}=\\sum_{j=1}^{p}\\alpha_{j}y_{i,t-j}+\\sum_{k=1}^{m}\\sum_{j=0}^{q_{k}}\\beta_{jk}r_{i,t-j}^{(k)}+\\boldsymbol{\\delta}\\boldsymbol{d_{i,t}}+\\boldsymbol{\\gamma}\\boldsymbol{s_{i,t}}+u_{i}+\\epsilon_{it} $$ \n",
 13 |     "\n",
 14 |     "In the model above, $y_{i,t-j}$ ($j=1,2,\\ldots,p$) denotes a group of $p$ lagged dependent variables. $r_{i,t-j}^{(k)}$ represents a group of $m$ endogeneous variables other than lagged $y$. $\\boldsymbol{d_{it}}$ is a vector of predetermined variables which may potentially correlate with past errors, $\\boldsymbol{s_{it}}$ is a vector of exogenous variables, and $u_{i}$ represents fixed effect. For illustration purpose, let's consider a basic form of dynamic panel model:\n",
 15 |     "\n",
 16 |     "$$\n",
 17 |     "\\begin{align}\n",
 18 |     "y_{it}=\\alpha_{1}y_{i,t-1}+\\delta d_{i,t}+u_{i}+\\epsilon_{it} \\label{basic}\\tag{1}\n",
 19 |     "\\end{align}\n",
 20 |     "$$\n",
 21 |     "\n",
 22 |     "As lagged dependent variable $y_{i,t-1}$ is included as regressor, the popular techniques in static panel models, such as fixed-effect and first-difference estimators, no longer produce consistent results. Researchers have developed many methods to estimate dynamic panel model. Essentially there are two types of GMM estimates, difference GMM and system GMM.\n",
 23 |     "\n",
 24 |     "## Difference GMM \n",
 25 |     "\n",
 26 |     "Difference GMM was developed by [@arellano1991some]. The first step in the process is to eliminate the fixed-effect term $u_{i}$. First differencing Eq ($\\ref{basic}$) yields:\n",
 27 |     "\n",
 28 |     "$$\n",
 29 |     "\\begin{align}\n",
 30 |     "\\Delta y_{it}=\\alpha_{1}\\Delta y_{i,t-1}+\\delta\\Delta d_{i,t}+\\Delta\\epsilon_{it}\\label{fd}\\tag{2} \n",
 31 |     "\\end{align}$$\n",
 32 |     "\n",
 33 |     "In the model above, $\\Delta y_{i,t-1}$ correlates with $\\Delta\\epsilon_{i,t}$ because $\\Delta y_{i,t-1}=y_{i,t-1}-y_{i,t-2}$, $\\Delta\\epsilon_{i,t}=\\epsilon_{i,t}-\\epsilon_{i,t-1}$, and $y_{i,t-1}$ is affected by $\\epsilon_{i,t-1}$. As a result, estimating Eq ($\\ref{fd}$) directly produces inconsistent result. Instrumental variables are used to solve the issue. [@arellano1991some]suggest to use all lagged $y$ dated $t-2$ and earlier (i.e., $y_{i,1}$, $y_{i,2}$,\\..., $y_{i,t-2}$) as instruments for $\\Delta y_{i,t-1}$. Similarly, the instruments for predetermined variable $\\Delta d_{it}$ include $d_{i,1}$, $d_{i,2}$,\\..., $d_{i,t-1}$. Let $z_{i}$ be the instrument variable matrix for individual i:\n",
 34 |     "\n",
 35 |     "$$\n",
 36 |     "z_{i}=\\left[\\begin{array}{ccccccccccccccccccc}\n",
 37 |     "y_{i1} & 0 & 0 & \\ldots & \\ldots & 0 & \\ldots & 0 & d_{i1} & d_{i2} & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\ldots & 0\\\\\n",
 38 |     "0 & y_{i1} & y_{i2} & 0 & 0 & 0 & \\ldots & 0 & 0 & 0 & d_{i1} & d_{i2} & d_{i3} & 0 & 0 & 0 & 0 & \\ldots & 0\\\\\n",
 39 |     "\\vdots & 0 & 0 & \\vdots &  &  & \\ldots &  &  &  &  &  &  & \\vdots &  &  &  & \\ldots & 0\\\\\n",
 40 |     "0 & 0 & 0 & 0 & 0 & 0 & \\ldots & y_{i,T-2} & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\ldots & d_{i,T-1}\n",
 41 |     "\\end{array}\\right]$$\n",
 42 |     "\n",
 43 |     "Difference GMM is based on the moment condition $E(z_{i}^{\\prime}\\Delta\\epsilon_{i})=0$ where $\\Delta \\epsilon_{i}=(\\Delta \\epsilon_{i2}, \\Delta\\epsilon_{i3}\\textrm{, }...,\\Delta\\epsilon_{iT})^{\\prime}$ and $z_{i}$ is the instrument variable matrix. Applying this moment condition to sample data, we have $(1/N)\\sum_{i=1}^{N}z{}_{i}^{\\prime}(\\Delta y_{i}-\\theta\\Delta x_{i})=0$ where $\\theta=(\\alpha_{1},\\delta)'$ and $\\Delta x_{i}=(\\Delta y_{i,t-1},\\Delta d_{it})$ for t=3, \\... T. When the number of instruments is greater than the number of independent variables, the moment condition is overidentified and in general there is no $\\theta$ available to satisfy the moment condition. Instead, we look for a $\\theta$ to minimize moment condition. That is:\n",
 44 |     "\n",
 45 |     "$$\\hat{\\theta}_{gmm}=\\arg\\min_{\\theta}\\left(\\frac{1}{N}\\sum_{i=1}^{N}(\\Delta y_{i}-\\theta\\Delta x_{i})^{\\prime}z_{i}\\right)W\\left(\\frac{1}{N}\\sum_{i=1}^{N}z^{\\prime}{}_{i}(\\Delta y_{i}-\\theta\\Delta x_{i})\\right)$$\n",
 46 |     "\n",
 47 |     "where W is the weighting matrix of the moments. There are two popularly used weighting matrixes. In a one-step GMM estimate, the weighting matrix is\n",
 48 |     "\n",
 49 |     "$$W_{1}=\\left(\\frac{1}{N}Z^{\\prime}H_{1}Z\\right)^{-1}$$\n",
 50 |     "\n",
 51 |     "where matrix H has twos in the main diagnols, minus ones in the first subdiagnols, and zeros elsewhere:\n",
 52 |     "\n",
 53 |     "$$H_{1}=\\left[\\begin{array}{cccccc}\n",
 54 |     "2 & -1 & 0 & 0 & \\ldots & 0\\\\\n",
 55 |     "-1 & 2 & -1 & 0 & \\ldots & 0\\\\\n",
 56 |     "0 & \\ddots & \\ddots & \\ddots & \\ddots & \\vdots\\\\\n",
 57 |     "\\vdots & \\ddots & -1 & 2 & -1 & 0\\\\\n",
 58 |     "0 & \\ddots & 0 & -1 & 2 & -1\\\\\n",
 59 |     "0 & \\ldots & 0 & 0 & -1 & 2\n",
 60 |     "\\end{array}\\right]$$\n",
 61 |     "\n",
 62 |     "On the other hand, in a two-step GMM estimate, the weighting matrix is\n",
 63 |     "\n",
 64 |     "$$W_{2}=\\left(\\frac{1}{N}Z^{\\prime}H_{2}Z\\right)^{-1}$$\n",
 65 |     "\n",
 66 |     "where $H_{2}=\\Delta\\hat{\\epsilon}\\Delta\\hat{\\epsilon}^{\\prime}$ and $\\Delta\\hat{\\epsilon}$ is the residual from one-step GMM.\n",
 67 |     "\n",
 68 |     "## System GMM\n",
 69 |     "\n",
 70 |     "Compared with difference GMM, sytem GMM adds additional moment conditions, resulting in more instruments:\n",
 71 |     "\n",
 72 |     "$$\n",
 73 |     "\\begin{align}\n",
 74 |     "z_{i}=\\left[\\begin{array}{cccccccccccccccc|cccccccc}\n",
 75 |     "y_{i1} & 0 & 0 & 0 & 0 & 0 & \\ldots & 0 & d_{i1} & d_{i2} & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\ldots & 0 & 0 & 0 & 0 & \\ldots & 0\\\\\n",
 76 |     "0 & y_{i1} & y_{i2} & 0 & 0 & 0 & \\ldots & 0 & 0 & 0 & d_{i1} & d_{i2} & d_{i3} & 0 & 0 & 0 & 0 & \\ldots & 0 & 0 & 0 & 0 & \\ldots & 0\\\\\n",
 77 |     " &  &  & \\vdots &  &  & \\ldots &  & \\vdots &  &  &  &  & \\vdots & \\ldots & \\vdots &  & \\ldots & 0 &  &  &  & \\ddots & 0\\\\\n",
 78 |     "0 & 0 & 0 & 0 & 0 & 0 & \\ldots & y_{i,T-2} & 0 & 0 & 0 & 0 & 0 & 0 & \\ldots & d_{i,T-1} & 0 & \\ldots & 0 & 0 & 0 & 0 & 0 & 0\\\\\n",
 79 |     "\\hline 0 & \\ldots & 0 &  &  &  &  &  &  &  &  &  &  &  & 0 & 0 & \\Delta y_{i2} & \\ldots & 0 & 0 & \\Delta d_{i3} &  & 0\\\\\n",
 80 |     "\\vdots &  &  &  &  &  &  &  &  &  &  &  &  &  &  & 0 & 0 & \\Delta y_{i3} & \\ldots & 0 & 0 & \\Delta d_{i4} &  & 0\\\\\n",
 81 |     "\\vdots &  &  &  &  &  &  &  &  &  &  &  &  &  &  & \\vdots & \\vdots & \\vdots & \\ddots & \\vdots & \\vdots &  & \\ddots\\\\\n",
 82 |     "0 & \\ldots &  &  &  &  &  &  &  &  &  &  &  &  & \\ldots & 0 & 0 & 0 & \\ldots & \\Delta y_{i,T-1} & 0 & 0 & \\ldots & \\Delta y_{i,T}\n",
 83 |     "\\end{array}\\right] \\label{z_sys}\\tag{3}\n",
 84 |     "\\end{align}$$\n",
 85 |     "\n",
 86 |     "$$\\hat{\\theta}_{gmm}=\\arg\\min_{\\theta}\\left(\\frac{1}{N}\\sum_{i=1}^{N}(\\widetilde{y}_{i}-\\theta\\widetilde{x_{i}})^{\\prime}z_{i}\\right)W\\left(\\frac{1}{N}\\sum_{i=1}^{N}z^{\\prime}{}_{i}(\\widetilde{y}-\\theta\\widetilde{x_{i}})\\right)$$\n",
 87 |     "\n",
 88 |     "where $$\\widetilde{y}=\\left(\\begin{array}{c}\n",
 89 |     "\\Delta y_{i}\\\\\n",
 90 |     "\\hline y_{i}\n",
 91 |     "\\end{array}\\right)\\textrm{ and }\\widetilde{x_{i}}=\\left(\\begin{array}{c|c}\n",
 92 |     "\\Delta x_{i} & 0\\\\\n",
 93 |     "\\hline x_{i} & 1\n",
 94 |     "\\end{array}\\right)$$\n",
 95 |     "\n",
 96 |     "## Robust estimation of coefficients' covariance \n",
 97 |     "\n",
 98 |     "## Specification Test \n",
 99 |     "\n",
100 |     "### Error serial correlation test \n",
101 |     "\n",
102 |     "Second-order serial correlation test: if $\\epsilon_{it}$ in Eq ($\\ref{basic}$) is serially correlated, GMM estimates are no longer consistent. In a first-differenced model （e.g., Eq ($\\ref{fd}$)), to test whether $\\epsilon_{i,t-1}$ is correlated with $\\epsilon_{i,t-2}$, the second-order autocovariance of the residuals, $\\textrm{AR(2)}$, is calculated as:\n",
103 |     "\n",
104 |     "$$AR(2)=\\frac{b_{0}}{\\sqrt{b_{1}+b_{2}+b_{3}}}\\textrm{ where}$$\n",
105 |     "\n",
106 |     "$$b_{0}=\\sum_{i=1}^{N}\\Delta\\hat{\\hat{\\epsilon}}_{i}^{\\prime}L_{\\Delta\\hat{\\hat{\\epsilon}}}^{2}$$\n",
107 |     "\n",
108 |     "$$b_{1}=\\sum_{i=1}^{N}L_{\\Delta\\hat{\\hat{\\epsilon}}_{i}^{\\prime}}^{2}H_{2}L_{\\Delta\\hat{\\hat{\\epsilon}}_{i}}^{2}$$ \n",
109 |     "\n",
110 |     "$$b_{2}=\\textrm{-}2\\left(\\sum_{i=1}^{N}L_{\\Delta\\hat{\\hat{\\epsilon}}_{i}^{\\prime}}^{2}x_{i}\\right)\\left[\\left(\\sum_{i=1}^{N}x_{i}^{\\prime}z_{i}\\right)W_{2}\\left(\\sum_{i=1}^{N}z_{i}^{\\prime}x_{i}\\right)\\right]^{-1}\\left(\\sum_{i=1}^{N}x_{i}^{\\prime}z_{i}\\right)W_{2}\\left(\\sum_{i=1}^{N}z_{i}^{\\prime}H_{2}L_{\\Delta\\hat{\\hat{\\epsilon}}_{i}}^{2}\\right)$$\n",
111 |     "\n",
112 |     "$$b_{3}=\\left(\\sum_{i=1}^{N}L_{\\Delta\\hat{\\hat{\\epsilon}}_{i}^{\\prime}}^{2}x_{i}\\right)\\hat{V}_{\\hat{\\hat{\\theta}}}\\left(\\sum_{i=1}^{N}x_{i}^{\\prime}L_{\\Delta\\hat{\\hat{\\epsilon}}_{i}}^{2}\\right)$$\n",
113 |     "\n",
114 |     "### Hansen overidentification test \n",
115 |     "\n",
116 |     "Hansen overidentification test is used to check if instruments are exogeneous. Under the null hypothesis that instruments are valid, test statistic, $S$, should be close to zero:\n",
117 |     "\n",
118 |     "$$S=\\left(\\sum_{i=1}^{N}\\Delta\\hat{\\hat{\\epsilon}}_{i}^{\\prime}z_{i}\\right)W_{2}\\left(\\sum_{i=1}^{N}z_{i}^{\\prime}\\Delta\\hat{\\hat{\\epsilon}}_{i}\\right)$$\n",
119 |     "\n",
120 |     "# Handling instrument proliferation issue \n",
121 |     "\n",
122 |     "Difference GMM and system GMM may generate too many instruments, which causes several problems (citation). Package pydynpd allows users to reduce the number of instruments in two ways. First, users can control the number of instruments in command string. For example, $\\textrm{gmm(w, 2:3)}$ states that only $n_{t-2}$ and $n_{t-3}$ are used as instruments, rather than all lagged $n$ dated $t-2$ and earlier. Second, users can choose to collapse the instrumental variable matrix. For example, if collapsed, matrix as in Eq ($\\ref{z_sys}$) is changed to:\n",
123 |     "\n",
124 |     "$$z_{i}=\\left[\\begin{array}{cccccccccc|cc}\n",
125 |     "y_{i1} & 0 & 0 & \\ldots & 0 & d_{i1} & d_{i2} & 0 & \\ldots & 0\\\\\n",
126 |     "y_{i1} & y_{i2} & 0 & \\ldots & 0 & d_{i1} & d_{i2} & d_{i3} & \\ldots & 0\\\\\n",
127 |     "\\vdots & \\vdots & \\ddots & \\ldots & \\vdots &  &  &  & \\ddots & \\vdots\\\\\n",
128 |     "y_{i1} & y_{i2} & y_{i3} & \\ldots & y_{i,T-2} & d_{i1} & d_{i2} & d_{id} & \\ldots & d_{i,T-1}\\\\\n",
129 |     "\\hline 0 &  &  & \\ldots & 0 & 0 & 0 & 0 & \\ldots & 0 & \\Delta y_{i2} & \\Delta d_{i3}\\\\\n",
130 |     "0 & 0 &  &  & 0 & 0 & 0 & 0 & \\ldots & 0 & \\Delta y_{i3} & \\Delta d_{i4}\\\\\n",
131 |     "\\vdots &  & \\ddots &  & \\vdots &  &  & \\vdots &  & \\vdots & \\vdots & \\vdots\\\\\n",
132 |     "0 & 0 & 0 & \\ldots & 0 & 0 &  & 0 & \\ldots & 0 & \\Delta y_{i,T-1} & \\Delta d_{iT}\n",
133 |     "\\end{array}\\right]$$\n",
134 |     "\n",
135 |     "This change dramatically reduces the number of instruments. Intuitively, the number of instruments is positively associated with the width of the matrix above.\n",
136 |     "\n"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": null,
142 |    "id": "6eefc858",
143 |    "metadata": {},
144 |    "outputs": [],
145 |    "source": []
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "id": "7ca931c1-e7d1-484e-aae8-727801b3a60d",
151 |    "metadata": {},
152 |    "outputs": [],
153 |    "source": []
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": null,
158 |    "id": "28e3e167-e9c3-427d-ba56-521262920be1",
159 |    "metadata": {},
160 |    "outputs": [],
161 |    "source": []
162 |   }
163 |  ],
164 |  "metadata": {
165 |   "kernelspec": {
166 |    "display_name": "Python 3 (ipykernel)",
167 |    "language": "python",
168 |    "name": "python3"
169 |   },
170 |   "language_info": {
171 |    "codemirror_mode": {
172 |     "name": "ipython",
173 |     "version": 3
174 |    },
175 |    "file_extension": ".py",
176 |    "mimetype": "text/x-python",
177 |    "name": "python",
178 |    "nbconvert_exporter": "python",
179 |    "pygments_lexer": "ipython3",
180 |    "version": "3.9.7"
181 |   }
182 |  },
183 |  "nbformat": 4,
184 |  "nbformat_minor": 5
185 | }
186 | 


--------------------------------------------------------------------------------
/vignettes/.ipynb_checkpoints/Tutorial-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "a4357a62",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# abond command\n",
  9 |     "In this tutorial, we illustrate the functions of pydynpd with examples. The first data set is one from Arellano and Bond (1991). It is an unblanced panel with 140 firms over 9 years (1976-1984). You can download the data (data.csv) from the /benchmark/code folder. We first consider the following basic model:\n",
 10 |     "\n",
 11 |     "$$\n",
 12 |     "\\begin{align}\n",
 13 |     "n_{i,t}=\\alpha_1n_{i,t-1}+\\alpha_2n_{i,t-2}+\\beta_1w_{i,t}+\\gamma_1k_{i,t}+u_{i}+\\epsilon_{i,t}\n",
 14 |     "\\end{align}\n",
 15 |     "$$\n",
 16 |     "\n",
 17 |     "In the model above, variables $n$, $w$, and $k$ are the natural logarithm of employment, wage, and capital respectively. $u_{i}$ is unobserved fixed effect and $\\epsilon_{i,t}$ is idiosyncraic error. \n",
 18 |     "\n",
 19 |     "To estimate the model, we first load data to Pandas data frame:\n",
 20 |     "\n"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 1,
 26 |    "id": "7ca931c1-e7d1-484e-aae8-727801b3a60d",
 27 |    "metadata": {},
 28 |    "outputs": [
 29 |     {
 30 |      "ename": "ModuleNotFoundError",
 31 |      "evalue": "No module named 'pydynpd'",
 32 |      "output_type": "error",
 33 |      "traceback": [
 34 |       "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
 35 |       "\u001b[1;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
 36 |       "\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_6876/497402013.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mpandas\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[1;32mfrom\u001b[0m  \u001b[0mpydynpd\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mregression\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      3\u001b[0m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"test_data.csv\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 37 |       "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'pydynpd'"
 38 |      ]
 39 |     }
 40 |    ],
 41 |    "source": [
 42 |     "import pandas as pd\n",
 43 |     "from  pydynpd import regression\n",
 44 |     "df = pd.read_csv(\"test_data.csv\")"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "markdown",
 49 |    "id": "6a81769e",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "Then we construct command string to describe the model. A command string has two or three parts, which are separated by |.\n",
 53 |     "Part 1 is a list starting with the dependent variable, and followed by independent variables except time dummies. Given the model, part 1 is:\n",
 54 |     "\n",
 55 |     "```\n",
 56 |     "n L1.n L2.n w k\n",
 57 |     "```\n",
 58 |     "In the command above, L is the lag operator as in L1.n (i.e., $n_{i,t-1}$) or L2.n for 2 lags of n (i.e., $n_{i,t-2}$). \n",
 59 |     "\n",
 60 |     "Part 2 indicates how instruments are created. First, suppose we want to use the second and third lags of dependent variable n (i.e., L2.n and L3.n) as instruments, then we include the following GMM list:\n",
 61 |     "\n",
 62 |     "```\n",
 63 |     "GMM(n, 2:3)\n",
 64 |     "```\n",
 65 |     "Next, suppose we believe that variable w is a predetermined variable and use its first and deeper lags (i.e., L1.w, L2.w, ...) as instruments. Then we include a second GMM list:\n",
 66 |     "\n",
 67 |     "```\n",
 68 |     "GMM(w, 1:.)\n",
 69 |     "```\n",
 70 |     "The dot (.) above means there is no restriction regarding the maximum lag of $w$. In other words, we use all available lags.\n",
 71 |     "Next, suppose variable $k$ is a strictly exogenous variable. So, we use IV() list:\n",
 72 |     "\n",
 73 |     "```\n",
 74 |     "IV(k)\n",
 75 |     "```\n",
 76 |     "This tells pydynpd to use variable $k$ itself as instrument.\n",
 77 |     "Finally, we put all GMM and IV lists together to form part 2:\n",
 78 |     "```\n",
 79 |     "GMM(n, 2:3) GMM(w, 1:.) IV(k)\n",
 80 |     "```\n",
 81 |     "\n",
 82 |     "Suppose our command just has the two parts above, then we combine the two parts together:"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": null,
 88 |    "id": "8ef76f2b",
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "command_str='n L1.n L2.n w k  | GMM(n, 2:3) GMM(w, 1:.) IV(k)'\n"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "markdown",
 97 |    "id": "4ed2ed7f",
 98 |    "metadata": {},
 99 |    "source": [
100 |     "Finally, we use abond function to estimate our model. Note that three parameters should be provided. The first one is the command string discussed above. The second one is the data, and the third one is a list of two variables that indentify individual firm and year respectively. "
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "id": "8a47a1de",
107 |    "metadata": {},
108 |    "outputs": [],
109 |    "source": [
110 |     "mydpd = regression.abond(command_str, df, ['id', 'year'])"
111 |    ]
112 |   }
113 |  ],
114 |  "metadata": {
115 |   "kernelspec": {
116 |    "display_name": "Python 3 (ipykernel)",
117 |    "language": "python",
118 |    "name": "python3"
119 |   },
120 |   "language_info": {
121 |    "codemirror_mode": {
122 |     "name": "ipython",
123 |     "version": 3
124 |    },
125 |    "file_extension": ".py",
126 |    "mimetype": "text/x-python",
127 |    "name": "python",
128 |    "nbconvert_exporter": "python",
129 |    "pygments_lexer": "ipython3",
130 |    "version": "3.9.7"
131 |   }
132 |  },
133 |  "nbformat": 4,
134 |  "nbformat_minor": 5
135 | }
136 | 


--------------------------------------------------------------------------------
/vignettes/Guide.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "60fac165-f308-4815-b379-28da3303aa81",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Technique Guide\n",
  9 |     "\n",
 10 |     "Package pyndynpd is able to estimate dynamic panel models that take a form as follows:\n",
 11 |     "\n",
 12 |     "$$y_{it}=\\sum_{j=1}^{p}\\alpha_{j}y_{i,t-j}+\\sum_{k=1}^{m}\\sum_{j=0}^{q_{k}}\\beta_{jk}r_{i,t-j}^{(k)}+\\boldsymbol{\\delta}\\boldsymbol{d_{i,t}}+\\boldsymbol{\\gamma}\\boldsymbol{s_{i,t}}+u_{i}+\\epsilon_{it} $$ \n",
 13 |     "\n",
 14 |     "In the model above, $y_{i,t-j}$ ($j=1,2,\\ldots,p$) denotes a group of $p$ lagged dependent variables. $r_{i,t-j}^{(k)}$ represents a group of $m$ endogeneous variables other than lagged $y$. $\\boldsymbol{d_{it}}$ is a vector of predetermined variables which may potentially correlate with past errors, $\\boldsymbol{s_{it}}$ is a vector of exogenous variables, and $u_{i}$ represents fixed effect. For illustration purpose, let's consider a basic form of dynamic panel model:\n",
 15 |     "\n",
 16 |     "$$\n",
 17 |     "\\begin{align}\n",
 18 |     "y_{it}=\\alpha_{1}y_{i,t-1}+\\delta d_{i,t}+u_{i}+\\epsilon_{it} \\label{basic}\\tag{1}\n",
 19 |     "\\end{align}\n",
 20 |     "$$\n",
 21 |     "\n",
 22 |     "As lagged dependent variable $y_{i,t-1}$ is included as regressor, the popular techniques in static panel models, such as fixed-effect and first-difference estimators, no longer produce consistent results. Researchers have developed many methods to estimate dynamic panel model. Essentially there are two types of GMM estimates, difference GMM and system GMM.\n",
 23 |     "\n",
 24 |     "## Difference GMM \n",
 25 |     "\n",
 26 |     "The first step in the process is to eliminate the fixed-effect term $u_{i}$. First differencing Eq ($\\ref{basic}$) yields:\n",
 27 |     "\n",
 28 |     "$$\n",
 29 |     "\\begin{align}\n",
 30 |     "\\Delta y_{it}=\\alpha_{1}\\Delta y_{i,t-1}+\\delta\\Delta d_{i,t}+\\Delta\\epsilon_{it}\\label{fd}\\tag{2} \n",
 31 |     "\\end{align}$$\n",
 32 |     "\n",
 33 |     "In the model above, $\\Delta y_{i,t-1}$ correlates with $\\Delta\\epsilon_{i,t}$ because $\\Delta y_{i,t-1}=y_{i,t-1}-y_{i,t-2}$, $\\Delta\\epsilon_{i,t}=\\epsilon_{i,t}-\\epsilon_{i,t-1}$, and $y_{i,t-1}$ is affected by $\\epsilon_{i,t-1}$. As a result, estimating Eq ($\\ref{fd}$) directly produces inconsistent result. Instrumental variables are used to solve the issue. [@arellano1991some]suggest to use all lagged $y$ dated $t-2$ and earlier (i.e., $y_{i,1}$, $y_{i,2}$,\\..., $y_{i,t-2}$) as instruments for $\\Delta y_{i,t-1}$. Similarly, the instruments for predetermined variable $\\Delta d_{it}$ include $d_{i,1}$, $d_{i,2}$,\\..., $d_{i,t-1}$. Let $z_{i}$ be the instrument variable matrix for individual i:\n",
 34 |     "\n",
 35 |     "$$\n",
 36 |     "z_{i}=\\left[\\begin{array}{ccccccccccccccccccc}\n",
 37 |     "y_{i1} & 0 & 0 & \\ldots & \\ldots & 0 & \\ldots & 0 & d_{i1} & d_{i2} & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\ldots & 0\\\\\n",
 38 |     "0 & y_{i1} & y_{i2} & 0 & 0 & 0 & \\ldots & 0 & 0 & 0 & d_{i1} & d_{i2} & d_{i3} & 0 & 0 & 0 & 0 & \\ldots & 0\\\\\n",
 39 |     "\\vdots & 0 & 0 & \\vdots &  &  & \\ldots &  &  &  &  &  &  & \\vdots &  &  &  & \\ldots & 0\\\\\n",
 40 |     "0 & 0 & 0 & 0 & 0 & 0 & \\ldots & y_{i,T-2} & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\ldots & d_{i,T-1}\n",
 41 |     "\\end{array}\\right]$$\n",
 42 |     "\n",
 43 |     "Difference GMM is based on the moment condition $E(z_{i}^{\\prime}\\Delta\\epsilon_{i})=0$ where $\\Delta \\epsilon_{i}=(\\Delta \\epsilon_{i2}, \\Delta\\epsilon_{i3}\\textrm{, }...,\\Delta\\epsilon_{iT})^{\\prime}$ and $z_{i}$ is the instrument variable matrix. Applying this moment condition to sample data, we have $(1/N)\\sum_{i=1}^{N}z{}_{i}^{\\prime}(\\Delta y_{i}-\\theta\\Delta x_{i})=0$ where $\\theta=(\\alpha_{1},\\delta)'$ and $\\Delta x_{i}=(\\Delta y_{i,t-1},\\Delta d_{it})$ for t=3, \\... T. When the number of instruments is greater than the number of independent variables, the moment condition is overidentified and in general there is no $\\theta$ available to satisfy the moment condition. Instead, we look for a $\\theta$ to minimize moment condition. That is:\n",
 44 |     "\n",
 45 |     "$$\\hat{\\theta}_{gmm}=\\arg\\min_{\\theta}\\left(\\frac{1}{N}\\sum_{i=1}^{N}(\\Delta y_{i}-\\theta\\Delta x_{i})^{\\prime}z_{i}\\right)W\\left(\\frac{1}{N}\\sum_{i=1}^{N}z^{\\prime}{}_{i}(\\Delta y_{i}-\\theta\\Delta x_{i})\\right)$$\n",
 46 |     "\n",
 47 |     "where W is the weighting matrix of the moments. There are two popularly used weighting matrixes. In a one-step GMM estimate, the weighting matrix is\n",
 48 |     "\n",
 49 |     "$$W_{1}=\\left(\\frac{1}{N}Z^{\\prime}H_{1}Z\\right)^{-1}$$\n",
 50 |     "\n",
 51 |     "where matrix H has twos in the main diagnols, minus ones in the first subdiagnols, and zeros elsewhere:\n",
 52 |     "\n",
 53 |     "$$H_{1}=\\left[\\begin{array}{cccccc}\n",
 54 |     "2 & -1 & 0 & 0 & \\ldots & 0\\\\\n",
 55 |     "-1 & 2 & -1 & 0 & \\ldots & 0\\\\\n",
 56 |     "0 & \\ddots & \\ddots & \\ddots & \\ddots & \\vdots\\\\\n",
 57 |     "\\vdots & \\ddots & -1 & 2 & -1 & 0\\\\\n",
 58 |     "0 & \\ddots & 0 & -1 & 2 & -1\\\\\n",
 59 |     "0 & \\ldots & 0 & 0 & -1 & 2\n",
 60 |     "\\end{array}\\right]$$\n",
 61 |     "\n",
 62 |     "On the other hand, in a two-step GMM estimate, the weighting matrix is\n",
 63 |     "\n",
 64 |     "$$W_{2}=\\left(\\frac{1}{N}Z^{\\prime}H_{2}Z\\right)^{-1}$$\n",
 65 |     "\n",
 66 |     "where $H_{2}=\\Delta\\hat{\\epsilon}\\Delta\\hat{\\epsilon}^{\\prime}$ and $\\Delta\\hat{\\epsilon}$ is the residual from one-step GMM.\n",
 67 |     "\n",
 68 |     "## System GMM\n",
 69 |     "\n",
 70 |     "Compared with difference GMM, sytem GMM adds additional moment conditions, resulting in more instruments:\n",
 71 |     "\n",
 72 |     "$$\n",
 73 |     "\\begin{align}\n",
 74 |     "z_{i}=\\left[\\begin{array}{cccccccccccccccc|cccccccc}\n",
 75 |     "y_{i1} & 0 & 0 & 0 & 0 & 0 & \\ldots & 0 & d_{i1} & d_{i2} & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\ldots & 0 & 0 & 0 & 0 & \\ldots & 0\\\\\n",
 76 |     "0 & y_{i1} & y_{i2} & 0 & 0 & 0 & \\ldots & 0 & 0 & 0 & d_{i1} & d_{i2} & d_{i3} & 0 & 0 & 0 & 0 & \\ldots & 0 & 0 & 0 & 0 & \\ldots & 0\\\\\n",
 77 |     " &  &  & \\vdots &  &  & \\ldots &  & \\vdots &  &  &  &  & \\vdots & \\ldots & \\vdots &  & \\ldots & 0 &  &  &  & \\ddots & 0\\\\\n",
 78 |     "0 & 0 & 0 & 0 & 0 & 0 & \\ldots & y_{i,T-2} & 0 & 0 & 0 & 0 & 0 & 0 & \\ldots & d_{i,T-1} & 0 & \\ldots & 0 & 0 & 0 & 0 & 0 & 0\\\\\n",
 79 |     "\\hline 0 & \\ldots & 0 &  &  &  &  &  &  &  &  &  &  &  & 0 & 0 & \\Delta y_{i2} & \\ldots & 0 & 0 & \\Delta d_{i3} &  & 0\\\\\n",
 80 |     "\\vdots &  &  &  &  &  &  &  &  &  &  &  &  &  &  & 0 & 0 & \\Delta y_{i3} & \\ldots & 0 & 0 & \\Delta d_{i4} &  & 0\\\\\n",
 81 |     "\\vdots &  &  &  &  &  &  &  &  &  &  &  &  &  &  & \\vdots & \\vdots & \\vdots & \\ddots & \\vdots & \\vdots &  & \\ddots\\\\\n",
 82 |     "0 & \\ldots &  &  &  &  &  &  &  &  &  &  &  &  & \\ldots & 0 & 0 & 0 & \\ldots & \\Delta y_{i,T-1} & 0 & 0 & \\ldots & \\Delta y_{i,T}\n",
 83 |     "\\end{array}\\right] \\label{z_sys}\\tag{3}\n",
 84 |     "\\end{align}$$\n",
 85 |     "\n",
 86 |     "$$\\hat{\\theta}_{gmm}=\\arg\\min_{\\theta}\\left(\\frac{1}{N}\\sum_{i=1}^{N}(\\widetilde{y}_{i}-\\theta\\widetilde{x_{i}})^{\\prime}z_{i}\\right)W\\left(\\frac{1}{N}\\sum_{i=1}^{N}z^{\\prime}{}_{i}(\\widetilde{y}-\\theta\\widetilde{x_{i}})\\right)$$\n",
 87 |     "\n",
 88 |     "where $$\\widetilde{y}=\\left(\\begin{array}{c}\n",
 89 |     "\\Delta y_{i}\\\\\n",
 90 |     "\\hline y_{i}\n",
 91 |     "\\end{array}\\right)\\textrm{ and }\\widetilde{x_{i}}=\\left(\\begin{array}{c|c}\n",
 92 |     "\\Delta x_{i} & 0\\\\\n",
 93 |     "\\hline x_{i} & 1\n",
 94 |     "\\end{array}\\right)$$\n",
 95 |     "\n",
 96 |     "## Robust estimation of coefficients' covariance \n",
 97 |     "\n",
 98 |     "pydynpd reports robust standard errors for one-step and two-step estimators. For detail, please refer to [Windmeijer 2005](https://doi.org/10.1016/j.jeconom.2004.02.005).\n",
 99 |     "\n",
100 |     "## Specification Test \n",
101 |     "\n",
102 |     "### Error serial correlation test \n",
103 |     "\n",
104 |     "Second-order serial correlation test: if $\\epsilon_{it}$ in Eq ($\\ref{basic}$) is serially correlated, GMM estimates are no longer consistent. In a first-differenced model （e.g., Eq ($\\ref{fd}$)), to test whether $\\epsilon_{i,t-1}$ is correlated with $\\epsilon_{i,t-2}$, the second-order autocovariance of the residuals, $\\textrm{AR(2)}$, is calculated as:\n",
105 |     "\n",
106 |     "$$AR(2)=\\frac{b_{0}}{\\sqrt{b_{1}+b_{2}+b_{3}}}\\textrm{ where}$$\n",
107 |     "\n",
108 |     "$$b_{0}=\\sum_{i=1}^{N}\\Delta\\hat{\\hat{\\epsilon}}_{i}^{\\prime}L_{\\Delta\\hat{\\hat{\\epsilon}}}^{2}$$\n",
109 |     "\n",
110 |     "$$b_{1}=\\sum_{i=1}^{N}L_{\\Delta\\hat{\\hat{\\epsilon}}_{i}^{\\prime}}^{2}H_{2}L_{\\Delta\\hat{\\hat{\\epsilon}}_{i}}^{2}$$ \n",
111 |     "\n",
112 |     "$$b_{2}=\\textrm{-}2\\left(\\sum_{i=1}^{N}L_{\\Delta\\hat{\\hat{\\epsilon}}_{i}^{\\prime}}^{2}x_{i}\\right)\\left[\\left(\\sum_{i=1}^{N}x_{i}^{\\prime}z_{i}\\right)W_{2}\\left(\\sum_{i=1}^{N}z_{i}^{\\prime}x_{i}\\right)\\right]^{-1}\\left(\\sum_{i=1}^{N}x_{i}^{\\prime}z_{i}\\right)W_{2}\\left(\\sum_{i=1}^{N}z_{i}^{\\prime}H_{2}L_{\\Delta\\hat{\\hat{\\epsilon}}_{i}}^{2}\\right)$$\n",
113 |     "\n",
114 |     "$$b_{3}=\\left(\\sum_{i=1}^{N}L_{\\Delta\\hat{\\hat{\\epsilon}}_{i}^{\\prime}}^{2}x_{i}\\right)\\hat{V}_{\\hat{\\hat{\\theta}}}\\left(\\sum_{i=1}^{N}x_{i}^{\\prime}L_{\\Delta\\hat{\\hat{\\epsilon}}_{i}}^{2}\\right)$$\n",
115 |     "\n",
116 |     "### Hansen overidentification test \n",
117 |     "\n",
118 |     "Hansen overidentification test is used to check if instruments are exogeneous. Under the null hypothesis that instruments are valid, test statistic, $S$, should be close to zero:\n",
119 |     "\n",
120 |     "$$S=\\left(\\sum_{i=1}^{N}\\Delta\\hat{\\hat{\\epsilon}}_{i}^{\\prime}z_{i}\\right)W_{2}\\left(\\sum_{i=1}^{N}z_{i}^{\\prime}\\Delta\\hat{\\hat{\\epsilon}}_{i}\\right)$$\n",
121 |     "\n",
122 |     "# Handling instrument proliferation issue \n",
123 |     "\n",
124 |     "Difference GMM and system GMM may generate too many instruments, which causes several problems (citation). Package pydynpd allows users to reduce the number of instruments in two ways. First, users can control the number of instruments in command string. For example, $\\textrm{gmm(w, 2:3)}$ states that only $n_{t-2}$ and $n_{t-3}$ are used as instruments, rather than all lagged $n$ dated $t-2$ and earlier. Second, users can choose to collapse the instrumental variable matrix. For example, if collapsed, matrix as in Eq ($\\ref{z_sys}$) is changed to:\n",
125 |     "\n",
126 |     "$$z_{i}=\\left[\\begin{array}{cccccccccc|cc}\n",
127 |     "y_{i1} & 0 & 0 & \\ldots & 0 & d_{i1} & d_{i2} & 0 & \\ldots & 0\\\\\n",
128 |     "y_{i1} & y_{i2} & 0 & \\ldots & 0 & d_{i1} & d_{i2} & d_{i3} & \\ldots & 0\\\\\n",
129 |     "\\vdots & \\vdots & \\ddots & \\ldots & \\vdots &  &  &  & \\ddots & \\vdots\\\\\n",
130 |     "y_{i1} & y_{i2} & y_{i3} & \\ldots & y_{i,T-2} & d_{i1} & d_{i2} & d_{id} & \\ldots & d_{i,T-1}\\\\\n",
131 |     "\\hline 0 &  &  & \\ldots & 0 & 0 & 0 & 0 & \\ldots & 0 & \\Delta y_{i2} & \\Delta d_{i3}\\\\\n",
132 |     "0 & 0 &  &  & 0 & 0 & 0 & 0 & \\ldots & 0 & \\Delta y_{i3} & \\Delta d_{i4}\\\\\n",
133 |     "\\vdots &  & \\ddots &  & \\vdots &  &  & \\vdots &  & \\vdots & \\vdots & \\vdots\\\\\n",
134 |     "0 & 0 & 0 & \\ldots & 0 & 0 &  & 0 & \\ldots & 0 & \\Delta y_{i,T-1} & \\Delta d_{iT}\n",
135 |     "\\end{array}\\right]$$\n",
136 |     "\n",
137 |     "This change dramatically reduces the number of instruments. Intuitively, the number of instruments is positively associated with the width of the matrix above.\n",
138 |     "\n"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "id": "6eefc858",
145 |    "metadata": {},
146 |    "outputs": [
147 |     {
148 |      "ename": "",
149 |      "evalue": "",
150 |      "output_type": "error",
151 |      "traceback": [
152 |       "\u001b[1;31mRunning cells with 'Python 3.10.5 64-bit (windows store)' requires ipykernel package.\n",
153 |       "Run the following command to install 'ipykernel' into the Python environment. \n",
154 |       "Command: 'c:/Users/Tiger/AppData/Local/Microsoft/WindowsApps/python3.10.exe -m pip install ipykernel -U --user --force-reinstall'"
155 |      ]
156 |     }
157 |    ],
158 |    "source": []
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": null,
163 |    "id": "7ca931c1-e7d1-484e-aae8-727801b3a60d",
164 |    "metadata": {},
165 |    "outputs": [],
166 |    "source": []
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": null,
171 |    "id": "28e3e167-e9c3-427d-ba56-521262920be1",
172 |    "metadata": {},
173 |    "outputs": [],
174 |    "source": []
175 |   }
176 |  ],
177 |  "metadata": {
178 |   "kernelspec": {
179 |    "display_name": "Python 3.10.5 64-bit (windows store)",
180 |    "language": "python",
181 |    "name": "python3"
182 |   },
183 |   "language_info": {
184 |    "codemirror_mode": {
185 |     "name": "ipython",
186 |     "version": 3
187 |    },
188 |    "file_extension": ".py",
189 |    "mimetype": "text/x-python",
190 |    "name": "python",
191 |    "nbconvert_exporter": "python",
192 |    "pygments_lexer": "ipython3",
193 |    "version": "3.10.5"
194 |   },
195 |   "vscode": {
196 |    "interpreter": {
197 |     "hash": "af20069095c057cbb0c2f1e67eacfd0be355d6958a577b80d2e85aa262675d05"
198 |    }
199 |   }
200 |  },
201 |  "nbformat": 4,
202 |  "nbformat_minor": 5
203 | }
204 | 


--------------------------------------------------------------------------------
/vignettes/Images/.$pydynpd.drawio.bkp:
--------------------------------------------------------------------------------
1 | <mxfile host="app.diagrams.net" modified="2022-04-07T01:08:42.352Z" agent="5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36" etag="LjxmzCeLva5yngfajSYb" version="17.4.0" type="device" pages="2"><diagram id="mTngtMP_0UJyMqEpqlQN" name="Page-1">7Vpbe6M2EP01flx/4mryGDvZtF+bjRtvu9lHBWRQgpEj5MTeX18JxE3CNmnwZdvmIR8ahIA5c86MBg+syWJ9Q+EyuiUBigcmCNYD62pgmiPP5P+FYZMbbMPLDSHFQW4yKsMM/0DSCKR1hQOUNiYyQmKGl02jT5IE+axhg5SSt+a0OYmbd13CEGmGmQ9j3foNByzKrZ4DKvsvCIdRcWcDyDMLWEyWhjSCAXmrmazrgTWhhLD8aLGeoFj4rvBLft3nLWfLB6MoYV0uSL/MEs/+9Q7+5dE4eXq+Y0+bT5YlH45tijdGAXeAHBLKIhKSBMbXlXVMySoJkFgW8FE153dCltxocOMTYmwj0YQrRrgpYotYnkVrzB7k5eL4e+34SkSO4ZZDETeOPXTkeIooXiCGaHFJwujmoT7IFsvni2G2XnkyW06M5iRh8vkMV44nJCZi3YQk4jVTRslziTrHa6y7vAhJSEPEdvlZ0kA4t3alROoGEf5OdMMnUBRDhl+b0QdlEIflvPLSKcH8WUwg+VbypqAbEK5rLJKSFfWRvK4eL8pSlgmGlnNR/SkLj4aGoZ0tbpI7RLsJP6i9bWXKQvU9YSsd8wrjlXTVnymPCTWWazEXwDQqY/YtwgylS+iLeW9cuATcEVyK4WIdCiUbBvNgiNY80kT0JwyzTT4pJ7FYJYxhmspjnyywL49j+IjiMfSfw4wpe8NqjuNYmVQPx4FpgeyvXJrQAFHlChjjMOFDn8emYEce4dIVwgFpApdfSQ5Jbtkazq+IMrTeGafyrNMMN6sYv1VaKbic2aKaTqrxUo/sRrC8OzLckwpaqVPfa2cqCSoETVUg88MKlLO6gwLtlyqnb6n6GKCmRvUppCkaiHTnxvxNxo+c+G4ojlIEqR9lz/ayQinTYqFC2ihkYNaUgRqq73B/d8ZYttOkzKhU6DppzBbSFETq38e25uPZPk/y12Vt7lKFTNc2TamE8zAvuS7liQUOgoyXbfg0uVqqeiHxU8iEZGcWExjdeNYDrCboBKvVAqt1KFhtoKH2E2phgOZwlfG8Vzl0usrh6Lzk0NGoeoMSRCETiriQ268Y/xTaZ3st5cJRlc82NDcdkSJGjSAVXdop0sdG50jUGnWl1sV5UWukUWvO3cSfD2aZsCAXScTLRWgbz4qNRITWkMcF9+WytmmV1nIfa+7n4hyvUdEa2ZfRdkHakot3b1W27zb6yJjKjtIFuhiYhqGLgXMwMThtM2RoHj5hVipSCUd5tx0q8gE1uDhZom3va9hKpebaSjxt6V1cUgo3tWlLMSHdfh+3aFLI+4zU/tye53rvfNdxFAbkT9xr46VI4P/ekrJkiDF4V549AkNs7zgMMTyrGYleN4b01t270BLxVwoDzLCIGn5iSomP0vQMK1wXNEnvXJx6b2/r/ZNL4bUl8vEc+1Vds3d/333fTlGKf8DHbCnBDSmVfF1nPHCuxFqc42lV0XRtHXQjdQ84OsBUcHR1HEELjubBcNR7NPdFawbkdWpekyYCuP9RbUPVHjXb1W4bqm3t6sOhOjqffNr4Ardj/zl0vEZuNIZgX/mYjdRPd8ffmRYJdH8tahwp09pKplVr0S0f63or5zxNVD7jWEBkgttWATl9ilWLk7ZPTofKsFdf7EUyhWA+ud9YUzCPvj1MP+nfIsvm+T1KRRTvV+FT9M7/6d7/eHINlI/ZRS1Tg7pNrftoqLcirZdSt7sarv/tZGsp/R2nhaijA+Xa58eYzB6f6J0bRb89gyX94+Whhaf3KOR+TnHW0evM1TMAsQd4TEtpNzi2rqPGMfGxNHwGpvuyEr+PGt8QElSjorzVd4Dnoaw9wKNWBq6hs6cn7ePD6ldoeWVR/ZTPuv4b</diagram><diagram id="X5Q8cOgmqDiJ-UkGuhwi" name="Page-2">1Vpbd6I6FP41Pk4XhIv6WGvPdC6u45rO9EwfU4iYFggTYtX59SeBcAnBSmvR2odKNskm7P3taxhYV9HmM4XJckZ8FA6A4W8G1nQAgGkagP8IyjanDIeSEFDsy0kV4Rb/RZJoSOoK+yhVJjJCQoYTleiROEYeU2iQUrJWpy1IqD41gQHSCLceDHXqf9hny5w6coyKfoNwsGTlC8s7ESwmS0K6hD5Z10jW9cC6ooSw/CraXKFQCK+Qy83EcBLnDs2T1ezb+Bnf3j/+/pQz++c1S8pXoChmb2ZtmNQNrsB2/fgI1/bXx+m/5M+n4tXYtpAX8rn45JBQtiQBiWF4XVEnlKxiHwmuBh9Vc74TknCiyYmPiLGtxAJcMcJJSxaF8i7aYDbdyPXZYCsHKYOUXQqtc0JMYpTRKHkqdcelPsn3LDbaUPEe+ch5KVlRD70gFHMocQppgNhLE80SBtx+EIkQo1u+kKIQMvysbg9KIAflvHLpnGC+cWBIoxs6dr5EmhwYGyqLfGNyVaVyflHbRkXKgPAKUJinAsVvsfzCkaP7vXDh6qbbfJFrF+P7bGw7VkGo1mYjZfEcUczFhWgJwGODDXTF2vBArCkoeS0k5C6fYbiST5pDmqLs+X9WKGUaZCpACB2ul5ih2wRmoljzSKMqf5fYnxFlaPMGwetyklys0VCxLdN2OOAyyroKD2bh85e10OAaPQnXOqm91azNqFub6Sr25tilYTaNpjJD1QadPRZ4fGOzOxobeA+/zuMY3NYmJMJfp7vdvgXGKjSHbgNaOcd39fW2Zti/Uq7YJiBrwPFhuiyBl9l1qto1z5MSMYw2gUgpL/yFf4E2HC4CwjHDbJtPypMpwSUIYZrKa49E2JPXIXxA4QR6T0EG9ysSEvpSYrDAYdiYtCAxK0gDYBnZX8maUB/RxgoY4iDmQ49DTkA8Y1EYY+asYpj8JLnyckpfzspu+KqCbd1TuS2eajzuyVM5GloGwIWR0Hr8kIqfMhwYZCF2IGCUIA8vuFJ5Tp2XFw1wcakx1TlpOhCyxTypv5Q3Iuz7uR9EKf4LHzJWQq/SyDhfZzJwpoIXd31p7gWrUNNEiQ6c44UkYDTUXKR/+9QM+gpI7kkCUhFFykG3KPKWxPEENUVR++7N86x+agqg1hT2qAGe/A16qymGmuf4jGJEIRM55JeYK2QVoWzDM8go3pxLPgkacrUs48TZ5EiT9KXqhGcdnXBXT9ndWbdpTfUbPqbIY5iIZQimYkeLECd38ra4vjm6jkUtqdTjQNdxm3+2+lLx+KT+ueaSy6LhQ2X5nfs3h5bUO/J4s9G+GY5UFn23b07T1HtjEXge4btzR7Cf6G25jehtdoveOqPRHkZ9Y9PUotOcEg/x8g+4oXD3D7z4dANxNYdxdhYxhQyeSzrgjDukA+Yx0wETnMQbnIVVm1ZHs7YPtOrDNGjpufNsdi4WYRWFRuFxADhxgmzqPa8pShAXXuYpy45G/l/3S1/4zGr6HaRY9B/SDhn1B2hrUMKgzK8zm+MIwXEgMSBHE8IYiTjNOa7vNBwVKVZLH8RuQYrdG1L0fpdSqurY2FG8fkgoFIWV+VpYWMetsMfjvbAYHRUVroaKHyjgakqFAIHxA6WrUD8Q+5AYOJ4aTaB2OR3D6pYZ9dblNPWelKa04ixjDbdJ3vCfpE+IeUupCCHSIpVKSSieN5FqcAs9IXr9jHJ1ma26a3PTDYVn9lkHQO0ILodP7VCu5YTiiM2Shp6Bo2fAx3XiekPsXDJinmSc/GMY0DFFPvj7hPaC1R42mimGUZzXv7b21Vg1W+B9177j/UBUYbYnia+fbUp8xOS7ONjcafUtrqY8zy0Od+eQicPajMLfpM+YMFYzPuDoMcEpwr8SE6zRbiAd9qGLoSnpJ4U+FskRFKvLdsXH7J/vV2YLarITbpk8uD3qu3kIVdZ4799I58Pqy8zcgqvvW63r/wE=</diagram></mxfile>


--------------------------------------------------------------------------------
/vignettes/Images/.$pydynpd.drawio.dtmp:
--------------------------------------------------------------------------------
1 | <mxfile host="Electron" modified="2022-07-30T15:46:17.586Z" agent="5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/19.0.3 Chrome/102.0.5005.63 Electron/19.0.3 Safari/537.36" etag="kuhK11gmxfWyZD9egKy0" version="19.0.3" type="device" pages="4"><diagram id="mTngtMP_0UJyMqEpqlQN" name="Page-1">7Vpbe6M2EP01flx/4mrnMXayab82GzfedrOPCsigBCMi5MTeX18JxE3CNmnwZdvmJWgQAubMOTMaPLCmy/UNhUl4S3wUDUzgrwfW1cA0DQPY/J+wbHLLaGTmhoBiX06qDHP8A0kjkNYV9lHamMgIiRhOmkaPxDHyWMMGKSVvzWkLEjXvmsAAaYa5ByPd+g37LMytYwdU9l8QDkJWvrA8s4TFZGlIQ+iTt5rJuh5YU0oIy4+W6ymKhPMKv+TXfd5ytnwwimLW5YL0yzwe27/ewb/GNIqfnu/Y0+aTZcmHY5vijZHPHSCHhLKQBCSG0XVlnVCyin0klgV8VM35nZCEGw1ufEKMbSSacMUIN4VsGcmzaI3Zg7xcHH+vHV+J0DHccijixrGHjhzPEMVLxBAtLokZ3TzUB9li+XwxzNYrT2bLidGCxEw+n+HK8ZRERKwbk1i8ZsooeS5R53hNdJcXIQlpgNguP8uoF86tXSmRukGEvxPd8AkURZDh12b0QRnEQTmvvHRGMH8WE0jClbyRdLOBcF1jkZSsqIfkdfV4UZayTDC0nIvqT1l4NDQM7Wxxk9wh2k34Qe1tK1MWqu8JW+mYVxitpKv+THlMqLFcizkfpmEZs28hZihNoCfmvXHlEnCHMBHD5ToQUjb0F/4QrXmkieiPGWabfFJOYrFKEME0lcceWWJPHkfwEUUT6D0HGVP2htUCR5EyqR6OA9MC2V+5NKE+osoVMMJBzIcej03BjjzCpSuEA9IYJl9JDklu2RrOr4gytN4Zp/Ks0ww3qxi/VVopuJzZwppOqvFSj+xGsLw7MtyTClqpU99rZyoJKgRNVSDzwwqUs7qDAu2XKqdvqfoYoKZG9RmkKRqIdOdG/E0mj5z4biCOUgSpF2bP9rJCKdNioULaKGRg3pSBGqrvcH93xli206TMqFToOmnMFtIUROrfx7bm4/k+T/LXZW3uUoVM1zZNqYTzMC+5LuWJJfb9jJdt+DS5Wqp6IfEzyIRkZxYTGN141gOsJugEq9UCq3UoWG2gofYTaqGPFnCV8bxXOXS6yuHovOTQ0ah6g2JEIROKuJT7rwj/FNpnj1vKhaMqn21objoiRYwaQSq6tFOkj43Okag16kqti/Oi1kij1oK7iT8fzDJhQS4Si5cL0TaeFRuJEK0hjwvuy6S2aZXWch9r7ufiAq9R0RrZl9F2QdqSi3dvVbbvNvrImMqO0gW6GJiGoYuBczAxOG0zZGgePmFWKlIJR3m3HSryATW4OFmibe9r2Eql5tpKPG3pXVxSCje1aYmYkG6/j1s0KYrOp9qf2/Nc753vOo7CgPyJe228FAn831tSlgwxBu/Ks0dgiD0+DkOMsdWMxHE3hvTW3bvQEvFXCn3MsIgafmJGiYfS9AwrXBc0Se9cnHpvb+v9k0vhtQR5eIG9qq7Zu7/vvm+nKMU/4GO2lOCGlEq+rjMZOFdiLc7xtKpourYOupG6BxwdYCo4ujqOoAVH82A46j2a+6I1A/I6Na9JYwHc/6i2oWqPmu1qtw3Vtnb14VAdnU8+bXyB27H/HDrjRm40hmBf+ZiN1E93x9+ZFgl0fy1qHCnT2kqmVWvRLR/reivnxpqofMaRgMgEt60CcvoUqxYnbZ+cDpVhr77Yy3gGwWJ6v7FmYBF+e5h90r9Fls3ze5SKKN6vwqfonf/Tvf/x5BooH7OLWqYGdZta99FQb0VaL6VudzVc/9vJ1lL6O04LUUcHyrXPjxGZPz7ROzcMf3sGCf3j5aGFp/co4H5OcdbR68zVMwCxB3hMS2k3OLauo8Yx8bE0fAam+7ISv4+a3BDiV6OivNV3gOehrD3Ao1YGrqGzpyft48PqV2h5ZVH9ls+6/hs=</diagram><diagram id="X5Q8cOgmqDiJ-UkGuhwi" name="Page-2">1Vpbd5s4EP41fmwOiIvtxzjOJt2tz+Y0bbZ5VEDGSjCiQo7t/vqVQAKEnJgkBjt9aNAgyTDzzTcXMXAulpsrCtPFjIQoHgAr3Ayc6QAA27Zc/kdItoVkOASFIKI4lJMqwS3+g6TQktIVDlGmTWSExAynujAgSYICpskgpWStT5uTWP/VFEbIENwGMDal/+GQLQrpyLMq+TXC0YKVLyzvLKGaLAXZAoZkXRM5lwPnghLCiqvl5gLFQnlKL9cTy0u9O3STrmb/jJ/x7f3jry/FZn+9ZUn5ChQl7N1bWzb1owuwXT8+wrX79+P0X/L7i3o1tlX6QiFXnxwSyhYkIgmMLyvphJJVEiKxq8VH1ZxvhKRcaHPhI2JsK7EAV4xw0YItY3kXbTCbbuT6fLCVg4xBys6F1bkgIQnKZZQ8lbbjWp+0VIdUW0ZWNECv6MAeSlhCGiH22kS5o1BRDVxS3VeILBGjWz6Bohgy/KwjEEogR+W8cukNwfxNgCWdbuhJn5MuB8aWvkXxpHJVZXJ+UXuMSpQD4Q2gsI8Fil9i+ZknR/d74cLtT7fFIt9V4/t87HqOElRr85G2+AZRzNWFaAnAjsEG2mJteGisaSh5KyTkYz/DeCV/6QbSDOW//3uFMmZApgKEsOF6gRm6TWGumzWPNLrxP6r2Z0QZ2ryqJ3nXGQ0137JdjwMul6yr8GArzl/UQoNvdaRc56j+VvM2q+5ttq/5m+eWjtl0msoNdR/09nhg587mtnQ20Amv8zgGt7UJqeDr7GXad8BYh+bQb0Cr2PGgXO8ajv0z44ZtArIGnBBmixJ4uV9nul/zPCkVw+UmEinlWTgPz9CGw0VAOGGYbYtJRTIldolimGXyOiBLHMjrGD6geAKDpyiH+wWJCX0tMZjjOG5MmpOEKdEAOFb+r9ya0BDRxgoY4yjhw4BjUEA830I5Y05WCUx/kMJ4haQrsnIbXKUgV2cqfwdTjccdMZVnoGUAfLgUVk8eMvGnDAcWmYsnEDBKUYDn3Kg8py7Kiwa4uDaYTk6GDYTOME/qz+WNJQ7DggdRhv/Ah3wrYVfpZHxfbzLwpmIvTn1ZwYJVqGmixAROfyEJWA0zq/Rvn5lBVwHJP0pAUlGkHLSLIu9JHLuvKVSpuzfPc3qqKYBeU7ijBniKV+qsphgazHGFEkQhEznk14QbZLVE+QPPIKN481nySdDQq+NYR84mR4amz3USnrUk4bZM2Z6sd1lN540QUxQwTMQyBDPxRPMYp3fytri+7t3GopbU6nFg2ngXPztdmXh8VH6uUXJZNBwzy2/dvzl4Sf1CHm832jfDkb5F1+2b4zT13lkEnmT4bt0R7Cl6O34jetvtore50WjPRl1jU5FvrZFESYB4+Qf8WND9Ay8+/Uhc3cAkP4uYQgY/SzrgjVukA3af6YANjsIGp+jVylv3urV7aK/+mAUdM3eezT6LRziq0FCMA8CRE2Tb7HlNUYq48nKmLDsaxf8mL33lM6vpd5Bi0X/IWmTUJ9DWoIRBmV/nPscRgpNIYkCOJoQxsuQyr1/utDwdKc6OPoi7AyluZ0gx+11aqWpi44Xi9SShoAor+62wcPqtsMfjvbAY9YoK30DFdxRxM2VCgcD6jrJVbB6InSQG+jOjDfQup2c57TKjzrqcttmTMoymzjLWcJsWDf9J9oRYsJCGECpVqVRGYvF7E2kGX9kJ0ctnVJjL3mm7XTTdMHjun3UA1I7gCvjUDuV2nFD02Cxp2Bl4ZgbcL4mbDbHPkhHzJKPvj2FAyxT58N8n7C5Y3WGjmWJZ6rz+rbWvsVWzBd517TveD0QdZnuS+PrZpsRHQr6Jg80XvX4H1ZTnuepw9wYycVibS/ibdBkTxnrGBzwzJngq/GsxwRm9DKSPfehiGUb6QWGIRXIExeqyXXGa/fP9xtyBmvyEWyYPfof2bh5ClTXe4RvpfFh9mVl4cPV9q3P5Pw==</diagram><diagram id="JBxkRaY2V4fryi9kfNIv" name="Page-3">7VjbcpswEP0aHuPhEsf2Y42dNDNJp9N0entTYQG1skTFYuN+fVcgjAlJpp3m8hA8jNEe7YJ0zh7swQnCTXWhWZ5dqxiE47tx5QQrx/c9zz2lk0H2DTKb+Q2Qah7bpA644b/Bgq5FSx5D0UtEpQTyvA9GSkqIsIcxrdWun5Yo0b9rzlIYADcRE0P0M48xa/d1tugm3gJPM3vruT9rJjasTbY7KTIWq90RFKydINRKYTPaVCEIQ17LS1N3fs/sYWEaJP5NwfxjIk+q6zD/FnG9vvxUXcpfJ3axWyZKu+ErXiAhKqGvWs7CLh/3LScIFd1xWaBWPyFUQmnCY0hYKQyecCFa1PGD8/pDOBM8lYQJSEzaFjRyIvqNhVHl5qI5i7hMr+qc1WmHfLAcG0hRbSJqMjMex0DlS62QIfter9GlOFdcYi3+dEkHcRO6k6kzpSWFFHtdTIdJ1xgqSXtivOYTWIE7KLC+dCljMDR6FA15t1KYDUF1BFkdLkBtAPWeUuzswraE9YQ3t/Gu67A2JTvqrcBizPZ0erhwJzsNrPL/0AXe6UBjiMkGNiRqMpUqycS6Q5dRqbcHUjqKDPddwZUyqtYpPwBxbw3OSlQEZbgRdhYqjl9suRl/NWPSp4lW1dHUat8GMn5jLE6hVBIa5Jybvdfz9ypVqFJH8BAhlmnDwoOCahAM+bb/sLhLH1v63nRl1wgH5dtOWNySGJlOAW3VLZUPy/gP4d2B/9tH+LvR9o9te3/WV9u/w/ezZ/X9dPR9nxDvlfi+7d3O95PJZHT8kzvee2nHn42O7xMyexnH++4zO374T7/9pfdH3z+1773gpX0/H33fJ2TxSny/uNf33qAlRt8/su/Pnsz2FHavjppm6V7ABes/</diagram><diagram id="HtGqnUG2_LdsecNcr0Q7" name="Page-4">dZHBEoIgEIafhrtCk3U2y0snD50Z2YQZdBmk0Xr6dMCMsU4s3//vLrsQlrfjxXIjryhAE5qIkbAToTRNk910zOTpSZZRDxqrRDCtoFIvCDAJ9KEE9JHRIWqnTAxr7DqoXcS4tTjEtjvquKvhDWxAVXO9pTclnFzm2h9XoQTVyND6QDMvtHwxh0l6yQUOX4gVhOUW0fmoHXPQ8/KWvfi88x/18zALnfuRMAVr7ekS/RAr3g==</diagram></mxfile>


--------------------------------------------------------------------------------
/vignettes/Images/.gitkeep:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/vignettes/Images/list_models.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <!-- Do not edit this file with editors other than diagrams.net -->
3 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
4 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" width="251px" height="254px" viewBox="-0.5 -0.5 251 254" content="&lt;mxfile host=&quot;Electron&quot; modified=&quot;2022-07-30T14:15:11.694Z&quot; agent=&quot;5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/19.0.3 Chrome/102.0.5005.63 Electron/19.0.3 Safari/537.36&quot; etag=&quot;rqeWmVUcbfZPClqfu3c9&quot; version=&quot;19.0.3&quot; type=&quot;device&quot; pages=&quot;3&quot;&gt;&lt;diagram id=&quot;mTngtMP_0UJyMqEpqlQN&quot; name=&quot;Page-1&quot;&gt;7Vpbe6M2EP01flx/4mrnMXayab82GzfedrOPCsigBCMi5MTeX18JxE3CNmnwZdvmJWgQAubMOTMaPLCmy/UNhUl4S3wUDUzgrwfW1cA0DQPY/J+wbHLLaGTmhoBiX06qDHP8A0kjkNYV9lHamMgIiRhOmkaPxDHyWMMGKSVvzWkLEjXvmsAAaYa5ByPd+g37LMytYwdU9l8QDkJWvrA8s4TFZGlIQ+iTt5rJuh5YU0oIy4+W6ymKhPMKv+TXfd5ytnwwimLW5YL0yzwe27/ewb/GNIqfnu/Y0+aTZcmHY5vijZHPHSCHhLKQBCSG0XVlnVCyin0klgV8VM35nZCEGw1ufEKMbSSacMUIN4VsGcmzaI3Zg7xcHH+vHV+J0DHccijixrGHjhzPEMVLxBAtLokZ3TzUB9li+XwxzNYrT2bLidGCxEw+n+HK8ZRERKwbk1i8ZsooeS5R53hNdJcXIQlpgNguP8uoF86tXSmRukGEvxPd8AkURZDh12b0QRnEQTmvvHRGMH8WE0jClbyRdLOBcF1jkZSsqIfkdfV4UZayTDC0nIvqT1l4NDQM7Wxxk9wh2k34Qe1tK1MWqu8JW+mYVxitpKv+THlMqLFcizkfpmEZs28hZihNoCfmvXHlEnCHMBHD5ToQUjb0F/4QrXmkieiPGWabfFJOYrFKEME0lcceWWJPHkfwEUUT6D0HGVP2htUCR5EyqR6OA9MC2V+5NKE+osoVMMJBzIcej03BjjzCpSuEA9IYJl9JDklu2RrOr4gytN4Zp/Ks0ww3qxi/VVopuJzZwppOqvFSj+xGsLw7MtyTClqpU99rZyoJKgRNVSDzwwqUs7qDAu2XKqdvqfoYoKZG9RmkKRqIdOdG/E0mj5z4biCOUgSpF2bP9rJCKdNioULaKGRg3pSBGqrvcH93xli206TMqFToOmnMFtIUROrfx7bm4/k+T/LXZW3uUoVM1zZNqYTzMC+5LuWJJfb9jJdt+DS5Wqp6IfEzyIRkZxYTGN141gOsJugEq9UCq3UoWG2gofYTaqGPFnCV8bxXOXS6yuHovOTQ0ah6g2JEIROKuJT7rwj/FNpnj1vKhaMqn21objoiRYwaQSq6tFOkj43Okag16kqti/Oi1kij1oK7iT8fzDJhQS4Si5cL0TaeFRuJEK0hjwvuy6S2aZXWch9r7ufiAq9R0RrZl9F2QdqSi3dvVbbvNvrImMqO0gW6GJiGoYuBczAxOG0zZGgePmFWKlIJR3m3HSryATW4OFmibe9r2Eql5tpKPG3pXVxSCje1aYmYkG6/j1s0KYrOp9qf2/Nc753vOo7CgPyJe228FAn831tSlgwxBu/Ks0dgiD0+DkOMsdWMxHE3hvTW3bvQEvFXCn3MsIgafmJGiYfS9AwrXBc0Se9cnHpvb+v9k0vhtQR5eIG9qq7Zu7/vvm+nKMU/4GO2lOCGlEq+rjMZOFdiLc7xtKpourYOupG6BxwdYCo4ujqOoAVH82A46j2a+6I1A/I6Na9JYwHc/6i2oWqPmu1qtw3Vtnb14VAdnU8+bXyB27H/HDrjRm40hmBf+ZiN1E93x9+ZFgl0fy1qHCnT2kqmVWvRLR/reivnxpqofMaRgMgEt60CcvoUqxYnbZ+cDpVhr77Yy3gGwWJ6v7FmYBF+e5h90r9Fls3ze5SKKN6vwqfonf/Tvf/x5BooH7OLWqYGdZta99FQb0VaL6VudzVc/9vJ1lL6O04LUUcHyrXPjxGZPz7ROzcMf3sGCf3j5aGFp/co4H5OcdbR68zVMwCxB3hMS2k3OLauo8Yx8bE0fAam+7ISv4+a3BDiV6OivNV3gOehrD3Ao1YGrqGzpyft48PqV2h5ZVH9ls+6/hs=&lt;/diagram&gt;&lt;diagram id=&quot;X5Q8cOgmqDiJ-UkGuhwi&quot; name=&quot;Page-2&quot;&gt;1Vpbd5s4EP41fmwOiIvtxzjOJt2tz+Y0bbZ5VEDGSjCiQo7t/vqVQAKEnJgkBjt9aNAgyTDzzTcXMXAulpsrCtPFjIQoHgAr3Ayc6QAA27Zc/kdItoVkOASFIKI4lJMqwS3+g6TQktIVDlGmTWSExAynujAgSYICpskgpWStT5uTWP/VFEbIENwGMDal/+GQLQrpyLMq+TXC0YKVLyzvLKGaLAXZAoZkXRM5lwPnghLCiqvl5gLFQnlKL9cTy0u9O3STrmb/jJ/x7f3jry/FZn+9ZUn5ChQl7N1bWzb1owuwXT8+wrX79+P0X/L7i3o1tlX6QiFXnxwSyhYkIgmMLyvphJJVEiKxq8VH1ZxvhKRcaHPhI2JsK7EAV4xw0YItY3kXbTCbbuT6fLCVg4xBys6F1bkgIQnKZZQ8lbbjWp+0VIdUW0ZWNECv6MAeSlhCGiH22kS5o1BRDVxS3VeILBGjWz6Bohgy/KwjEEogR+W8cukNwfxNgCWdbuhJn5MuB8aWvkXxpHJVZXJ+UXuMSpQD4Q2gsI8Fil9i+ZknR/d74cLtT7fFIt9V4/t87HqOElRr85G2+AZRzNWFaAnAjsEG2mJteGisaSh5KyTkYz/DeCV/6QbSDOW//3uFMmZApgKEsOF6gRm6TWGumzWPNLrxP6r2Z0QZ2ryqJ3nXGQ0137JdjwMul6yr8GArzl/UQoNvdaRc56j+VvM2q+5ttq/5m+eWjtl0msoNdR/09nhg587mtnQ20Amv8zgGt7UJqeDr7GXad8BYh+bQb0Cr2PGgXO8ajv0z44ZtArIGnBBmixJ4uV9nul/zPCkVw+UmEinlWTgPz9CGw0VAOGGYbYtJRTIldolimGXyOiBLHMjrGD6geAKDpyiH+wWJCX0tMZjjOG5MmpOEKdEAOFb+r9ya0BDRxgoY4yjhw4BjUEA830I5Y05WCUx/kMJ4haQrsnIbXKUgV2cqfwdTjccdMZVnoGUAfLgUVk8eMvGnDAcWmYsnEDBKUYDn3Kg8py7Kiwa4uDaYTk6GDYTOME/qz+WNJQ7DggdRhv/Ah3wrYVfpZHxfbzLwpmIvTn1ZwYJVqGmixAROfyEJWA0zq/Rvn5lBVwHJP0pAUlGkHLSLIu9JHLuvKVSpuzfPc3qqKYBeU7ijBniKV+qsphgazHGFEkQhEznk14QbZLVE+QPPIKN481nySdDQq+NYR84mR4amz3USnrUk4bZM2Z6sd1lN540QUxQwTMQyBDPxRPMYp3fytri+7t3GopbU6nFg2ngXPztdmXh8VH6uUXJZNBwzy2/dvzl4Sf1CHm832jfDkb5F1+2b4zT13lkEnmT4bt0R7Cl6O34jetvtore50WjPRl1jU5FvrZFESYB4+Qf8WND9Ay8+/Uhc3cAkP4uYQgY/SzrgjVukA3af6YANjsIGp+jVylv3urV7aK/+mAUdM3eezT6LRziq0FCMA8CRE2Tb7HlNUYq48nKmLDsaxf8mL33lM6vpd5Bi0X/IWmTUJ9DWoIRBmV/nPscRgpNIYkCOJoQxsuQyr1/utDwdKc6OPoi7AyluZ0gx+11aqWpi44Xi9SShoAor+62wcPqtsMfjvbAY9YoK30DFdxRxM2VCgcD6jrJVbB6InSQG+jOjDfQup2c57TKjzrqcttmTMoymzjLWcJsWDf9J9oRYsJCGECpVqVRGYvF7E2kGX9kJ0ctnVJjL3mm7XTTdMHjun3UA1I7gCvjUDuV2nFD02Cxp2Bl4ZgbcL4mbDbHPkhHzJKPvj2FAyxT58N8n7C5Y3WGjmWJZ6rz+rbWvsVWzBd517TveD0QdZnuS+PrZpsRHQr6Jg80XvX4H1ZTnuepw9wYycVibS/ibdBkTxnrGBzwzJngq/GsxwRm9DKSPfehiGUb6QWGIRXIExeqyXXGa/fP9xtyBmvyEWyYPfof2bh5ClTXe4RvpfFh9mVl4cPV9q3P5Pw==&lt;/diagram&gt;&lt;diagram id=&quot;JBxkRaY2V4fryi9kfNIv&quot; name=&quot;Page-3&quot;&gt;7VjbctowEP0aHsP4AgEegyFtZpJOp+n09qbaa1utkFx5DaZf35UtYxyHJJ3mMpnCeEB7tGtJ5+wBxgM/WJVvNMvSKxWBGHhOVA78xcDzXNcZ0YdBtjUymXg1kGge2aQWuOa/wYKORQseQd5JRKUE8qwLhkpKCLGDMa3VppsWK9FdNWMJ9IDrkIk++plHmDbnOp21E2+BJ6ldeupN6okVa5LtSfKURWqzB/nLgR9opbAercoAhCGv4aWuOz8wu9uYBokPKZh+jOVJeRVk30Kulxefygv568Ruds1EYQ98yXMkRMX0VsmZ2+3jtuEEoaQV5zlq9RMCJZQmPIKYFcLgMReiQQeef169CGeCJ5IwAbFJW4NGTkSfWRhVZm6asZDL5LLKWYxa5IPl2ECKamNRkZnyKAIqn2uFDNn3ao8OxZniEivxx3O6iJvAGY4HY9pSQLHbxnSZdI2BknQmxis+geW4gRyrWxcyAkOjS5FljHYP5UEp3J3A5AxQK0C9pRRbMLMtYT3hTm28aTusSUn3esu3GLM9nexu3MpOA6v8X3SBO+ppDBHZwIZETaoSJZlYtug8LPR6R0pLkeG+LbhURtUq5Qcgbq3BWYGKoBRXws5CyfGLLTfjr2ZM+tTRotybWmybQEZnxuIUSiWhRs65OXs1Xx/KnORunejgqtAh3MWQc7ugGgRDvu4ucJs+tvS96cq2EXbKN50wuyExMp0A2qobKu+28Q/COz3/N1/h7462f2zbe5Ou2t4tvp88q+/HR9/f4/sDir5237s93w+Hw6Pjn9zx7ks7/vR/cvz99p48WNFHdrznPLPj+//0m1967+j7p/a967+076dH3+/TMXuwoq/d97ODvnd7LXH0/SP7/vTJbE9h++iobpb2AZy//AM=&lt;/diagram&gt;&lt;/mxfile&gt;" style="background-color: rgb(255, 255, 255);"><defs><clipPath id="mx-clip-4-125-82-30-0"><rect x="4" y="125" width="82" height="30"/></clipPath><clipPath id="mx-clip-184-225-62-30-0"><rect x="184" y="225" width="62" height="30"/></clipPath><clipPath id="mx-clip-184-155-62-30-0"><rect x="184" y="155" width="62" height="30"/></clipPath><clipPath id="mx-clip-184-75-62-30-0"><rect x="184" y="75" width="62" height="30"/></clipPath><clipPath id="mx-clip-184-5-62-30-0"><rect x="184" y="5" width="62" height="30"/></clipPath></defs><g><rect x="0" y="120" width="90" height="30" rx="4.5" ry="4.5" fill="#ffffff" stroke="rgb(0, 0, 0)" pointer-events="all"/><g fill="rgb(0, 0, 0)" font-family="Helvetica" clip-path="url(#mx-clip-4-125-82-30-0)" font-size="12px"><text x="5.5" y="137.5">List of Models</text></g><path d="M 180 235 Q 90 235 90 130" fill="none" stroke="rgb(0, 0, 0)" stroke-miterlimit="10" pointer-events="stroke"/><rect x="180" y="220" width="70" height="30" rx="4.5" ry="4.5" fill="#ffffff" stroke="rgb(0, 0, 0)" pointer-events="all"/><g fill="rgb(0, 0, 0)" font-family="Helvetica" clip-path="url(#mx-clip-184-225-62-30-0)" font-size="12px"><text x="185.5" y="237.5">Model N</text></g><path d="M 180 165 Q 90 165 90 130" fill="none" stroke="rgb(0, 0, 0)" stroke-miterlimit="10" pointer-events="stroke"/><rect x="180" y="150" width="70" height="30" rx="4.5" ry="4.5" fill="#ffffff" stroke="rgb(0, 0, 0)" pointer-events="all"/><g fill="rgb(0, 0, 0)" font-family="Helvetica" clip-path="url(#mx-clip-184-155-62-30-0)" font-size="12px"><text x="185.5" y="167.5">...</text></g><path d="M 180 85 Q 90 85 90 140" fill="none" stroke="rgb(0, 0, 0)" stroke-miterlimit="10" pointer-events="stroke"/><rect x="180" y="70" width="70" height="30" rx="4.5" ry="4.5" fill="#ffffff" stroke="rgb(0, 0, 0)" pointer-events="all"/><g fill="rgb(0, 0, 0)" font-family="Helvetica" clip-path="url(#mx-clip-184-75-62-30-0)" font-size="12px"><text x="185.5" y="87.5">Model 2</text></g><path d="M 180 15 Q 90 15 90 140" fill="none" stroke="rgb(0, 0, 0)" stroke-miterlimit="10" pointer-events="stroke"/><rect x="180" y="0" width="70" height="30" rx="4.5" ry="4.5" fill="#ffffff" stroke="rgb(0, 0, 0)" pointer-events="all"/><g fill="rgb(0, 0, 0)" font-family="Helvetica" clip-path="url(#mx-clip-184-5-62-30-0)" font-size="12px"><text x="185.5" y="17.5">Model 1</text></g></g></svg>


--------------------------------------------------------------------------------
/vignettes/Images/pydynpd.drawio:
--------------------------------------------------------------------------------
1 | <mxfile host="Electron" modified="2022-07-30T14:14:47.018Z" agent="5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/19.0.3 Chrome/102.0.5005.63 Electron/19.0.3 Safari/537.36" etag="6uFzF36l7kVhf7fMAjsP" version="19.0.3" type="device" pages="3"><diagram id="mTngtMP_0UJyMqEpqlQN" name="Page-1">7Vpbe6M2EP01flx/4mrnMXayab82GzfedrOPCsigBCMi5MTeX18JxE3CNmnwZdvmJWgQAubMOTMaPLCmy/UNhUl4S3wUDUzgrwfW1cA0DQPY/J+wbHLLaGTmhoBiX06qDHP8A0kjkNYV9lHamMgIiRhOmkaPxDHyWMMGKSVvzWkLEjXvmsAAaYa5ByPd+g37LMytYwdU9l8QDkJWvrA8s4TFZGlIQ+iTt5rJuh5YU0oIy4+W6ymKhPMKv+TXfd5ytnwwimLW5YL0yzwe27/ewb/GNIqfnu/Y0+aTZcmHY5vijZHPHSCHhLKQBCSG0XVlnVCyin0klgV8VM35nZCEGw1ufEKMbSSacMUIN4VsGcmzaI3Zg7xcHH+vHV+J0DHccijixrGHjhzPEMVLxBAtLokZ3TzUB9li+XwxzNYrT2bLidGCxEw+n+HK8ZRERKwbk1i8ZsooeS5R53hNdJcXIQlpgNguP8uoF86tXSmRukGEvxPd8AkURZDh12b0QRnEQTmvvHRGMH8WE0jClbyRdLOBcF1jkZSsqIfkdfV4UZayTDC0nIvqT1l4NDQM7Wxxk9wh2k34Qe1tK1MWqu8JW+mYVxitpKv+THlMqLFcizkfpmEZs28hZihNoCfmvXHlEnCHMBHD5ToQUjb0F/4QrXmkieiPGWabfFJOYrFKEME0lcceWWJPHkfwEUUT6D0HGVP2htUCR5EyqR6OA9MC2V+5NKE+osoVMMJBzIcej03BjjzCpSuEA9IYJl9JDklu2RrOr4gytN4Zp/Ks0ww3qxi/VVopuJzZwppOqvFSj+xGsLw7MtyTClqpU99rZyoJKgRNVSDzwwqUs7qDAu2XKqdvqfoYoKZG9RmkKRqIdOdG/E0mj5z4biCOUgSpF2bP9rJCKdNioULaKGRg3pSBGqrvcH93xli206TMqFToOmnMFtIUROrfx7bm4/k+T/LXZW3uUoVM1zZNqYTzMC+5LuWJJfb9jJdt+DS5Wqp6IfEzyIRkZxYTGN141gOsJugEq9UCq3UoWG2gofYTaqGPFnCV8bxXOXS6yuHovOTQ0ah6g2JEIROKuJT7rwj/FNpnj1vKhaMqn21objoiRYwaQSq6tFOkj43Okag16kqti/Oi1kij1oK7iT8fzDJhQS4Si5cL0TaeFRuJEK0hjwvuy6S2aZXWch9r7ufiAq9R0RrZl9F2QdqSi3dvVbbvNvrImMqO0gW6GJiGoYuBczAxOG0zZGgePmFWKlIJR3m3HSryATW4OFmibe9r2Eql5tpKPG3pXVxSCje1aYmYkG6/j1s0KYrOp9qf2/Nc753vOo7CgPyJe228FAn831tSlgwxBu/Ks0dgiD0+DkOMsdWMxHE3hvTW3bvQEvFXCn3MsIgafmJGiYfS9AwrXBc0Se9cnHpvb+v9k0vhtQR5eIG9qq7Zu7/vvm+nKMU/4GO2lOCGlEq+rjMZOFdiLc7xtKpourYOupG6BxwdYCo4ujqOoAVH82A46j2a+6I1A/I6Na9JYwHc/6i2oWqPmu1qtw3Vtnb14VAdnU8+bXyB27H/HDrjRm40hmBf+ZiN1E93x9+ZFgl0fy1qHCnT2kqmVWvRLR/reivnxpqofMaRgMgEt60CcvoUqxYnbZ+cDpVhr77Yy3gGwWJ6v7FmYBF+e5h90r9Fls3ze5SKKN6vwqfonf/Tvf/x5BooH7OLWqYGdZta99FQb0VaL6VudzVc/9vJ1lL6O04LUUcHyrXPjxGZPz7ROzcMf3sGCf3j5aGFp/co4H5OcdbR68zVMwCxB3hMS2k3OLauo8Yx8bE0fAam+7ISv4+a3BDiV6OivNV3gOehrD3Ao1YGrqGzpyft48PqV2h5ZVH9ls+6/hs=</diagram><diagram id="X5Q8cOgmqDiJ-UkGuhwi" name="Page-2">1Vpbd5s4EP41fmwOiIvtxzjOJt2tz+Y0bbZ5VEDGSjCiQo7t/vqVQAKEnJgkBjt9aNAgyTDzzTcXMXAulpsrCtPFjIQoHgAr3Ayc6QAA27Zc/kdItoVkOASFIKI4lJMqwS3+g6TQktIVDlGmTWSExAynujAgSYICpskgpWStT5uTWP/VFEbIENwGMDal/+GQLQrpyLMq+TXC0YKVLyzvLKGaLAXZAoZkXRM5lwPnghLCiqvl5gLFQnlKL9cTy0u9O3STrmb/jJ/x7f3jry/FZn+9ZUn5ChQl7N1bWzb1owuwXT8+wrX79+P0X/L7i3o1tlX6QiFXnxwSyhYkIgmMLyvphJJVEiKxq8VH1ZxvhKRcaHPhI2JsK7EAV4xw0YItY3kXbTCbbuT6fLCVg4xBys6F1bkgIQnKZZQ8lbbjWp+0VIdUW0ZWNECv6MAeSlhCGiH22kS5o1BRDVxS3VeILBGjWz6Bohgy/KwjEEogR+W8cukNwfxNgCWdbuhJn5MuB8aWvkXxpHJVZXJ+UXuMSpQD4Q2gsI8Fil9i+ZknR/d74cLtT7fFIt9V4/t87HqOElRr85G2+AZRzNWFaAnAjsEG2mJteGisaSh5KyTkYz/DeCV/6QbSDOW//3uFMmZApgKEsOF6gRm6TWGumzWPNLrxP6r2Z0QZ2ryqJ3nXGQ0137JdjwMul6yr8GArzl/UQoNvdaRc56j+VvM2q+5ttq/5m+eWjtl0msoNdR/09nhg587mtnQ20Amv8zgGt7UJqeDr7GXad8BYh+bQb0Cr2PGgXO8ajv0z44ZtArIGnBBmixJ4uV9nul/zPCkVw+UmEinlWTgPz9CGw0VAOGGYbYtJRTIldolimGXyOiBLHMjrGD6geAKDpyiH+wWJCX0tMZjjOG5MmpOEKdEAOFb+r9ya0BDRxgoY4yjhw4BjUEA830I5Y05WCUx/kMJ4haQrsnIbXKUgV2cqfwdTjccdMZVnoGUAfLgUVk8eMvGnDAcWmYsnEDBKUYDn3Kg8py7Kiwa4uDaYTk6GDYTOME/qz+WNJQ7DggdRhv/Ah3wrYVfpZHxfbzLwpmIvTn1ZwYJVqGmixAROfyEJWA0zq/Rvn5lBVwHJP0pAUlGkHLSLIu9JHLuvKVSpuzfPc3qqKYBeU7ijBniKV+qsphgazHGFEkQhEznk14QbZLVE+QPPIKN481nySdDQq+NYR84mR4amz3USnrUk4bZM2Z6sd1lN540QUxQwTMQyBDPxRPMYp3fytri+7t3GopbU6nFg2ngXPztdmXh8VH6uUXJZNBwzy2/dvzl4Sf1CHm832jfDkb5F1+2b4zT13lkEnmT4bt0R7Cl6O34jetvtore50WjPRl1jU5FvrZFESYB4+Qf8WND9Ay8+/Uhc3cAkP4uYQgY/SzrgjVukA3af6YANjsIGp+jVylv3urV7aK/+mAUdM3eezT6LRziq0FCMA8CRE2Tb7HlNUYq48nKmLDsaxf8mL33lM6vpd5Bi0X/IWmTUJ9DWoIRBmV/nPscRgpNIYkCOJoQxsuQyr1/utDwdKc6OPoi7AyluZ0gx+11aqWpi44Xi9SShoAor+62wcPqtsMfjvbAY9YoK30DFdxRxM2VCgcD6jrJVbB6InSQG+jOjDfQup2c57TKjzrqcttmTMoymzjLWcJsWDf9J9oRYsJCGECpVqVRGYvF7E2kGX9kJ0ctnVJjL3mm7XTTdMHjun3UA1I7gCvjUDuV2nFD02Cxp2Bl4ZgbcL4mbDbHPkhHzJKPvj2FAyxT58N8n7C5Y3WGjmWJZ6rz+rbWvsVWzBd517TveD0QdZnuS+PrZpsRHQr6Jg80XvX4H1ZTnuepw9wYycVibS/ibdBkTxnrGBzwzJngq/GsxwRm9DKSPfehiGUb6QWGIRXIExeqyXXGa/fP9xtyBmvyEWyYPfof2bh5ClTXe4RvpfFh9mVl4cPV9q3P5Pw==</diagram><diagram id="JBxkRaY2V4fryi9kfNIv" name="Page-3">7VjbctowEP0aHsP4AgEegyFtZpJOp+n09qbaa1utkFx5DaZf35UtYxyHJJ3mMpnCeEB7tGtJ5+wBxgM/WJVvNMvSKxWBGHhOVA78xcDzXNcZ0YdBtjUymXg1kGge2aQWuOa/wYKORQseQd5JRKUE8qwLhkpKCLGDMa3VppsWK9FdNWMJ9IDrkIk++plHmDbnOp21E2+BJ6ldeupN6okVa5LtSfKURWqzB/nLgR9opbAercoAhCGv4aWuOz8wu9uYBokPKZh+jOVJeRVk30Kulxefygv568Ruds1EYQ98yXMkRMX0VsmZ2+3jtuEEoaQV5zlq9RMCJZQmPIKYFcLgMReiQQeef169CGeCJ5IwAbFJW4NGTkSfWRhVZm6asZDL5LLKWYxa5IPl2ECKamNRkZnyKAIqn2uFDNn3ao8OxZniEivxx3O6iJvAGY4HY9pSQLHbxnSZdI2BknQmxis+geW4gRyrWxcyAkOjS5FljHYP5UEp3J3A5AxQK0C9pRRbMLMtYT3hTm28aTusSUn3esu3GLM9nexu3MpOA6v8X3SBO+ppDBHZwIZETaoSJZlYtug8LPR6R0pLkeG+LbhURtUq5Qcgbq3BWYGKoBRXws5CyfGLLTfjr2ZM+tTRotybWmybQEZnxuIUSiWhRs65OXs1Xx/KnORunejgqtAh3MWQc7ugGgRDvu4ucJs+tvS96cq2EXbKN50wuyExMp0A2qobKu+28Q/COz3/N1/h7462f2zbe5Ou2t4tvp88q+/HR9/f4/sDir5237s93w+Hw6Pjn9zx7ks7/vR/cvz99p48WNFHdrznPLPj+//0m1967+j7p/a967+076dH3+/TMXuwoq/d97ODvnd7LXH0/SP7/vTJbE9h++iobpb2AZy//AM=</diagram></mxfile>


--------------------------------------------------------------------------------
/vignettes/Test_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/vignettes/Test_1.png


--------------------------------------------------------------------------------
/vignettes/images/new_struct.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/vignettes/images/new_struct.png


--------------------------------------------------------------------------------
/vignettes/images/traditional.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/vignettes/images/traditional.png


--------------------------------------------------------------------------------
/vignettes/new_struct.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/vignettes/new_struct.png


--------------------------------------------------------------------------------
/vignettes/paper.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: 'pydynpd: A Python package for dynamic panel model'
  3 | tags:
  4 |   - Python
  5 |   - dynamic panel model
  6 | authors:
  7 |   - name: Dazhong Wu^[Corresponding author] 
  8 |     affiliation: 1
  9 |   - name: Jian Hua
 10 |     affiliation: 1
 11 |   - name: Feng Xu
 12 |     affiliation: 1
 13 | affiliations:
 14 |  - name: Department of Business Management, School of Business and Public Administration, University of the District of Columbia, USA
 15 |    index: 1
 16 | date: April 9, 2022
 17 | bibliography: ref.bib
 18 | ---
 19 | 
 20 | # Summary
 21 | 
 22 | We present pydynpd, a Python package which implements all the
 23 | features in dynamic panel model with GMM (general method of moments).
 24 | These features include: (1) difference and system GMM, (2) one-step, two-step, and iterative estimators, (3) robust standard errors including the one
 25 | suggested by [@windmeijer2005finite], (4) Hansen over-identification test, (5)
 26 | Arellano-Bond test for autocorrelation, (6) time dummies, (7) allows
 27 | users to collapse instruments to reduce instrument proliferation issue,
 28 | and (8) a simple grammar for model specification. As far as we know, pydynpd is the first Python package that allows researchers to estimate dynamic panel model.
 29 | 
 30 | What distinguishes pydynpd from any other dynamic panel model packages is its innovative feature: the capability to search for models based on users' request, rather than just run the model specified by users as other packages do. To the best of our knowledge, there is no other econometric software/package that offers this feature, let alone dynamic panel model packages.
 31 | 
 32 | # Statement of need 
 33 | Over the past decade, dynamic panel model has become increasingly
 34 | popular in empirical studies. For example, researchers use dynamic panel
 35 | model to study the environmental impacts of climate change [@econometrics8030030] and COVID-19 [@anser2020does;@oehmke2021dynamic].
 36 | This is because many aspects of our social and natural systems are
 37 | inherently dynamic, and the GMM methods proposed by Arellano & Bond [-@arellano1991some] and Blundell & Bond [-@blundell1998initial] allow us to model the dynamics that
 38 | traditional static panel models are not able to capture.
 39 | Correspondingly, the growing popularity of dynamic panel model will
 40 | stimulate demand for the related packages in open source programs such
 41 | as R, Python, and Julia,
 42 | 
 43 | # Statement of field 
 44 | So far, there are several related packages in Stata and R.
 45 | Stata is a commercial software, while existing R packages have some
 46 | issues. For example, in our benchmark test R package panelvar [@sigmund2021panel] is more than 100 times slower than Stata package xtabond2 [@roodman2009xtabond2]. On the other hand, R package plm [@croissant2008panel]
 47 | is fast enough, but it
 48 | has calculation issue for system GMM. A third R package, pdynmc, crashed or refused to work several times in our tests. Due to these reasons, R packages above are far less popular than xtabond2, according to citations they
 49 | have received.
 50 | 
 51 | Moreover, there is no Python or Julia package yet to estimate dynamic
 52 | panel model due to the complexity involved in implementation. Our
 53 | package contributes to the open source community because (1) it
 54 | implements all of the major features in the associated commercial packages in
 55 | Stata, (2) its innovative feature (as mentioned above) will stimulate similar or even more revolutionary features in the empirical computing community, and (3) though Python is interpreted, our package is almost as
 56 | fast as xtabond2 which was compiled as shown in figure below. This package will increase the usability of open source software in estimating dynamic panel models, because for a package to be attractive, it must be both accurate and fast. Moreover, unlike existing R
 57 | packages which rely heavily on R-specific components (that is a main
 58 | reason they are not fast), our code uses components common to any
 59 | programming language, making it easy to translate to R or Julia.
 60 | 
 61 | ![Running time (relative to the fastest).\label{fig:runtime}](Test_1.png){width=100%}
 62 | 
 63 | # The pydynpd package 
 64 | 
 65 | pydynpd is able to estimate the most complicated linear dynamic panel
 66 | models:
 67 | 
 68 | $$y_{it}=\sum_{j=1}^{p}\alpha_{j}y_{i,t-j}+\sum_{k=1}^{m}\sum_{j=0}^{q_{k}}\beta_{jk}r_{i,t-j}^{(k)}+\boldsymbol{\delta}\boldsymbol{d_{i,t}}+\boldsymbol{\gamma}\boldsymbol{s_{i,t}}+u_{i}+\epsilon_{it}$$
 69 | 
 70 | In the model above, $y_{i,t-j}$ ($j=1,2,\ldots,p$) denotes a group of
 71 | $p$ lagged dependent variables. $r_{i,t-j}^{(k)}$ represents a group of
 72 | $m$ endogenous variables other than lagged $y$. $\boldsymbol{d_{it}}$ is
 73 | a vector of predetermined variables which may potentially correlate with
 74 | past errors, $\boldsymbol{s_{it}}$ is a vector of exogenous variables,
 75 | and $u_{i}$ represents fixed effect. As lagged dependent variables such as $y_{i,t-1}$ are included as regressors, the
 76 | popular techniques in static panel models no longer produce consistent
 77 | results. Researchers have developed many methods to estimate dynamic
 78 | panel models. Essentially there are two types of GMM estimates,
 79 | difference GMM and system GMM. Just like other R and Stata packages, pydynpd fully implements these two methods.
 80 | 
 81 | Due to space limit, we focus here on general discussion of the package. A detailed statistical/technique description of our package is available on [GitHub](https://github.com/dazhwu/pydynpd/blob/main/vignettes/Guide.ipynb). 
 82 | 
 83 | For illustration purpose, consider the following equation:
 84 | $$y_{it}=\sum_{j=1}^{\colorbox{yellow}p}\alpha_{j}y_{i,t-j}+\sum_{j=1}^{\colorbox{yellow}q_k}\beta_{j}r_{i,t-j}+{\delta}d_{i,t}+\gamma_{i,t}+u_{i}+\epsilon_{it}$$
 85 | 
 86 | The equation above is related to a group/family of models with different combinations of $p$ and $q_{k}$ values. Unless existing economic theory indicates exactly what model to choose, researchers need to guess and try the values of $p$ and $q_{k}$ as highlighted in equation above. For example, if $p=2$ and $q_{k}=1$, then a specific model is formed:
 87 | 
 88 | $$ y_{it}=\alpha_{1}y_{i,t-1}+\alpha_{2}y_{i,t-2}+\beta_{j}r_{i,t-j}+{\delta}d_{i,t}+\gamma_{i,t}+u_{i}+\epsilon_{it}$$
 89 | 
 90 | 
 91 | ![How alternative packages work. \label{fig:traditional}](traditional.png){width=100%}
 92 | 
 93 | ![The automatic mode in pydnynpd. \label{fig:auto_mode}](new_struct.png){width=100%}
 94 | 
 95 | Figure 2 shows how other packages work: a user needs to choose a specific model, then based on that particular model the system generates the corresponding instrument matrix and panel data with dependent/independent variables so that the GMM process can produce regression results. An innovative feature of pydynpd is that it can also run in its "automatic" mode in which it doesn't require users to choose a particular model. Instead, users may let pydynpd search for the lags (e.g., $p$ and $q_{k}$) so that the corresponding models satisfy certain standards. In other words, users may use pydynpd to estimate the following model with question markers indicating values not determined yet:
 96 | 
 97 | $$y_{it}=\sum_{j=1}^{\colorbox{yellow} ?}\alpha_{j}y_{i,t-j}+\sum_{j=1}^{{\colorbox{yellow} ?}}\beta_{j}r_{i,t-j}+{\delta}d_{i,t}+\gamma_{i,t}+u_{i}+\epsilon_{it}$$
 98 | 
 99 | Figure 3 shows how pydynpd's automatic mode works: a user indicates what values pydynpd needs to search for (e.g., the question marks in equation above), and then pydynpd tries all possible models, and returns "good" models that pass dynamic models' specification tests (e.g., Hansen overidentification test and AR(2) test). Note that processes included in the dotted box in Figure 2 is represented as a black-box process named "traditional process" in Figure 3.
100 | 
101 | # References
102 | 
103 | 


--------------------------------------------------------------------------------
/vignettes/ref.bib:
--------------------------------------------------------------------------------
  1 | @article{anser2020does,
  2 |   title={Does communicable diseases (including COVID-19) may increase global poverty risk? A cloud on the horizon},
  3 |   author={Anser, Muhammad Khalid and Yousaf, Zahid and Khan, Muhammad Azhar and Nassani, Abdelmohsen A and Alotaibi, Saad M and Abro, Muhammad Moinuddin Qazi and Vo, Xuan Vinh and Zaman, Khalid},
  4 |   journal={Environmental Research},
  5 |   volume={187},
  6 |   pages={109668},
  7 |   year={2020},
  8 |   publisher={Elsevier},
  9 |   doi={10.1016/j.envres.2020.109668}
 10 | }
 11 | 
 12 | 
 13 | @article{arellano1991some,
 14 |   title={Some tests of specification for panel data: Monte Carlo evidence and an application to employment equations},
 15 |   author={Arellano, Manuel and Bond, Stephen},
 16 |   journal={The review of economic studies},
 17 |   volume={58},
 18 |   number={2},
 19 |   pages={277--297},
 20 |   year={1991},
 21 |   publisher={Wiley-Blackwell},
 22 |   doi={10.2307/2297968}
 23 | }
 24 | 
 25 | @article{blundell1998initial,
 26 |   title={Initial conditions and moment restrictions in dynamic panel data models},
 27 |   author={Blundell, Richard and Bond, Stephen},
 28 |   journal={Journal of econometrics},
 29 |   volume={87},
 30 |   number={1},
 31 |   pages={115--143},
 32 |   year={1998},
 33 |   publisher={Elsevier},
 34 |   doi={10.1016/S0304-4076(98)00009-8}
 35 | }
 36 | 
 37 | @article{croissant2008panel,
 38 |   title={Panel data econometrics in R: The plm package},
 39 |   author={Croissant, Yves and Millo, Giovanni},
 40 |   journal={Journal of statistical software},
 41 |   volume={27},
 42 |   number={2},
 43 |   year={2008},
 44 |   doi={10.18637/jss.v027.i02}
 45 | }
 46 | 
 47 | @Article{econometrics8030030,
 48 | AUTHOR = {Phillips, Peter C. B.},
 49 | TITLE = {Dynamic Panel Modeling of Climate Change},
 50 | JOURNAL = {Econometrics},
 51 | VOLUME = {8},
 52 | YEAR = {2020},
 53 | NUMBER = {3},
 54 | ARTICLE-NUMBER = {30},
 55 | URL = {https://www.mdpi.com/2225-1146/8/3/30},
 56 | ISSN = {2225-1146},
 57 | doi = {10.3390/econometrics8030030}
 58 | }
 59 | 
 60 | @article{oehmke2021dynamic,
 61 |   title={Dynamic panel data modeling and surveillance of COVID-19 in metropolitan areas in the United States: Longitudinal trend analysis},
 62 |   author={Oehmke, Theresa B and Post, Lori A and Moss, Charles B and Issa, Tariq Z and Boctor, Michael J and Welch, Sarah B and Oehmke, James F},
 63 |   journal={Journal of medical Internet research},
 64 |   volume={23},
 65 |   number={2},
 66 |   pages={e26081},
 67 |   year={2021},
 68 |   publisher={JMIR Publications Inc., Toronto, Canada},
 69 |   doi={10.2196/26081}
 70 | }
 71 | 
 72 | @article{roodman2009xtabond2,
 73 |   title={How to do xtabond2: An introduction to difference and system GMM in Stata},
 74 |   author={Roodman, David},
 75 |   journal={The stata journal},
 76 |   volume={9},
 77 |   number={1},
 78 |   pages={86--136},
 79 |   year={2009},
 80 |   publisher={SAGE Publications Sage CA: Los Angeles, CA},
 81 |   doi={10.1177/1536867X0900900106}
 82 | }
 83 | 
 84 | 
 85 | @article{sigmund2021panel,
 86 |   title={Panel vector autoregression in R with the package panelvar},
 87 |   author={Sigmund, Michael and Ferstl, Robert},
 88 |   journal={The Quarterly Review of Economics and Finance},
 89 |   volume={80},
 90 |   pages={693--720},
 91 |   year={2021},
 92 |   publisher={Elsevier},
 93 |   doi={10.1016/j.qref.2019.01.001}
 94 | }
 95 | 
 96 | @article{windmeijer2005finite,
 97 |   title={A finite sample correction for the variance of linear efficient two-step GMM estimators},
 98 |   author={Windmeijer, Frank},
 99 |   journal={Journal of econometrics},
100 |   volume={126},
101 |   number={1},
102 |   pages={25--51},
103 |   year={2005},
104 |   publisher={Elsevier},
105 |   doi={10.1016/j.jeconom.2004.02.005}
106 | }
107 | 
108 | 
109 | 
110 | 


--------------------------------------------------------------------------------
/vignettes/traditional.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dazhwu/pydynpd/706f69d75cb38feebf042c2dffd8f203e0a7aeb9/vignettes/traditional.png


--------------------------------------------------------------------------------