├── .gitignore
├── LICENSE
├── README.md
└── code_in_notes
    ├── CivilConflict.do
    ├── DID_disease.do
    ├── DID_divorce.do
    ├── DID_dynamic_trend.do
    ├── HCW.do
    ├── RD_r_and_d.do
    ├── Run_ALL.do
    ├── chow_test.do
    ├── cross_validation_reg.do
    ├── datasets
        ├── CivilConflict.dta
        ├── DDCGdata_final.dta
        ├── Divorce-Wolfers-AER.dta
        ├── Lalonde_nsw
        │   ├── cps_controls.dta
        │   ├── cps_controls2.dta
        │   ├── cps_controls3.dta
        │   ├── nsw.dta
        │   ├── nsw_dw.dta
        │   ├── psid_controls.dta
        │   └── psid_controls3.dta
        ├── NTV_Aggregate_Data_reshaped.dta
        ├── OHIE_QJE.dta
        ├── OHIE_Science.dta
        ├── Questionarie.pdf
        ├── angrist2002aer.dta
        ├── bronzini_r_and_d.dta
        ├── cfps_adult.dta
        ├── cfps_family_econ.dta
        ├── chfs_ind.dta
        ├── citydata.dta
        ├── disease_gender_gap.dta
        ├── hcw.dta
        ├── shannxi_export.dta
        ├── soep_female_labor.dta
        └── voting_cnty_clean.dta
    ├── fixed_effects_ntv.do
    ├── hcw_lasso.do
    ├── heteroscedasticity.do
    ├── ipw_doubly_robust.do
    ├── lasso_and_ridge_shrinkage.do
    ├── linear_panel.do
    ├── linear_panel_fd_np.do
    ├── linear_panel_fe_ntv.do
    ├── logit_and_roc.do
    ├── matching.do
    ├── matching_nonexp.do
    ├── matching_psm.do
    ├── mlogit_employ.do
    ├── model_selection.do
    ├── multi_collinearity.do
    ├── nls_mpc.do
    ├── ohie_qje.do
    ├── ohie_qje_joint_test.do
    ├── ohie_qje_test_one.do
    ├── ohie_science.do
    ├── oneway_anova.do
    ├── panel_binary.do
    ├── panel_dynamic.do
    ├── poisson.do
    ├── propensity_score.do
    ├── qreg_consump.do
    ├── qreg_outlier.do
    ├── qreg_simulate.do
    ├── qreg_with_dummy.do
    ├── reg_one_variate.do
    ├── reg_readings.do
    ├── reg_small_b.do
    ├── reg_with_dummies.do
    ├── reg_with_dummy.do
    ├── simpson_paradox.do
    ├── test_jointly_ntv.do
    ├── wald_ols.do
    ├── weighted_ols.do
    └── white_hetero.do


/.gitignore:
--------------------------------------------------------------------------------
1 | *.tex
2 | *.txt
3 | *.png
4 | *.eps
5 | .DS_*
6 | *.pdf
7 | *.doc
8 | *.gph
9 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Jichun Si
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 注意：本Repo已经被放弃，新的Repo在：https://github.com/sijichun/EconometricsCode 如有需要请移步。
2 | 


--------------------------------------------------------------------------------
/code_in_notes/CivilConflict.do:
--------------------------------------------------------------------------------
 1 | // CivilConflict.do
 2 | clear
 3 | set more off
 4 | 
 5 | use datasets/CivilConflict
 6 | 
 7 | // 最简单情况：一个内生变量一个工具变量
 8 | // 内生变量：GDP增长率 gdp_g
 9 | // 工具变量：降水增长率 GPCP_g
10 | // 第一阶段，first stage
11 | reg gdp_g GPCP_g i.ccode i.ccode#c.year
12 | predict gdp_g_pred
13 | // reduced-form
14 | reg any_prio GPCP_g i.ccode i.ccode#c.year, cl(ccode)
15 | // 错误做法：手动两阶段最小二乘
16 | reg any_prio gdp_g_pred i.ccode i.ccode#c.year,cl(ccode)
17 | // 两阶段最小二乘，三种命令
18 | ivregress 2sls  any_prio (gdp_g = GPCP_g) /*
19 | 	*/i.ccode i.ccode#c.year, cl(ccode)
20 | ivreghdfe any_prio (gdp_g = GPCP_g),/*
21 | 	*/ absorb(i.ccode i.ccode#c.year) cl(ccode)
22 | ivreg2 any_prio (gdp_g = GPCP_g) /*
23 | 	*/i.ccode i.ccode#c.year, cl(ccode)
24 | 
25 | // 两个内生变量，两个工具变量
26 | // 内生变量：GDP增长率 gdp_g、GDP增长率之后gdp_g_l
27 | // 工具变量：降水增长率 GPCP_g、降水增长率滞后 GPCP_g_l
28 | // 第一阶段，first stage
29 | drop gdp_g_pred
30 | reg gdp_g   GPCP_g GPCP_g_l i.ccode i.ccode#c.year
31 | predict gdp_g_pred
32 | reg gdp_g_l GPCP_g GPCP_g_l i.ccode i.ccode#c.year
33 | predict gdp_g_l_pred
34 | // 两阶段最小二乘，三种命令
35 | ivregress 2sls any_prio (gdp_g gdp_g_l = GPCP_g GPCP_g_l ) /*
36 | 	*/i.ccode i.ccode#c.year, cl(ccode)
37 | ivreghdfe any_prio (gdp_g gdp_g_l = GPCP_g GPCP_g_l ),/*
38 | 	*/ absorb(i.ccode i.ccode#c.year) cl(ccode)
39 | ivreg2  any_prio (gdp_g gdp_g_l = GPCP_g GPCP_g_l ) /*
40 | 	*/i.ccode i.ccode#c.year, cl(ccode)
41 | // 两阶段最小二乘，多余的(四个)工具变量
42 | ivreghdfe any_prio /*
43 | 	*/(gdp_g gdp_g_l =GPCP_g GPCP_g_l NCEP_g NCEP_g_l),/*
44 | 	*/ absorb(i.ccode i.ccode#c.year) cl(ccode)
45 | ** 有限信息极大似然
46 | ivreg2 any_prio (gdp_g gdp_g_l =GPCP_g GPCP_g_l )/*
47 | 	*/ i.ccode i.ccode#c.year, cl(ccode) liml
48 | 


--------------------------------------------------------------------------------
/code_in_notes/DID_disease.do:
--------------------------------------------------------------------------------
 1 | clear
 2 | set more off
 3 | use disease_gender_gap
 4 | *****
 5 | ** 变量说明
 6 | * year：调查的轮数
 7 | * district：区
 8 | * edu_years：教育年限
 9 | * year_birth：出生年龄，1960-1992  **注，流行性脑膜炎发生在1986
10 | ** 作者选取的cohort: 在1986年时0-5岁（不上学），6-12岁（小学），13-20岁（中学）
11 | ** 相应的年龄为1981-1986、1974-1980、1966-1973
12 | gen cohort1=(year_birth>=1981 & year_birth<=1986)
13 | gen cohort2=(year_birth>=1974 & year_birth<=1980)
14 | gen cohort3=(year_birth>=1966 & year_birth<=1973)
15 | ** 交叉项
16 | gen co1menin=cohort1*menin_avg
17 | gen co2menin=cohort2*menin_avg
18 | gen co3menin=cohort3*menin_avg
19 | 
20 | gen co1menin_female=cohort1*female*menin_avg
21 | gen co2menin_female=cohort2*female*menin_avg
22 | gen co3menin_female=cohort3*female*menin_avg
23 | ** district 为字符串，需要变成数字才能用i.district
24 | gen n=_n
25 | egen district_id=min(n), by(district)
26 | ** 第一个回归，才是实质上的DID，传统的时间其实是cohort，group变量是menin，三个cohort都是处理组，有两个未处理组
27 | reghdfe edu_years co1menin co2menin co3menin female `control', absorb(i.year i.year_birth i.district_id) cl(district_id)
28 | ** 第二个回归，其实是比较了不同性别的treatment effects
29 | reghdfe edu_years co1menin_female co2menin_female co3menin_female co1menin co2menin co3menin female `control', absorb(i.year i.year_birth i.district_id)  cl(district_id)
30 | ******问题：
31 | ** 为什么不控制个体的固定效应？i.indvidual_id？
32 | 
33 | ** 如果不看1986以后出生的？依然是稳健的
34 | preserve
35 | reghdfe edu_years co1menin co2menin co3menin female `control', absorb(i.year i.year_birth i.district_id) cl(district_id)
36 | reghdfe edu_years co1menin_female co2menin_female co3menin_female co1menin co2menin co3menin female `control', absorb(i.year i.year_birth i.district_id)  cl(district_id)
37 | drop if year_birth>1986
38 | restore
39 | 
40 | ** 如果考虑截面相关，比如同一年龄段的peer effects，该cluster什么？
41 | reghdfe edu_years co1menin co2menin co3menin female `control', absorb(i.year i.year_birth i.district_id) cl(district_id year_birth)
42 | reghdfe edu_years co1menin_female co2menin_female co3menin_female co1menin co2menin co3menin female `control', absorb(i.year i.year_birth i.district_id)  cl(district_id year_birth)
43 | 
44 | ** 如果继续严格控制固定效应
45 | reghdfe edu_years co1menin co2menin co3menin female `control', absorb(i.year i.year_birth#i.district_id) cl(district_id year_birth)
46 | reghdfe edu_years co1menin_female co2menin_female co3menin_female co1menin co2menin co3menin female `control', absorb(i.year i.year_birth#i.district_id)  cl(district_id year_birth)
47 | * 问题：第一个回归方程能估计出来吗？
48 | * 问题：第二个回归方程识别出来的是什么？
49 | 


--------------------------------------------------------------------------------
/code_in_notes/DID_divorce.do:
--------------------------------------------------------------------------------
 1 | clear
 2 | set more off
 3 | use "datasets/Divorce-Wolfers-AER.dta"
 4 | ** state dummies
 5 | egen state=group(st)
 6 | ** panel setting
 7 | xtset state year
 8 | gen year2=year^2
 9 | ** benchmark
10 | reghdfe div_rate unilateral divx* if year>1967 & year<1989 [w=stpop], absorb(i.state i.year) cl(state)
11 | ** state-specific trend
12 | reghdfe div_rate unilateral divx* if year>1967 & year<1989 [w=stpop], absorb(i.state i.year i.state#c.year) cl(state)
13 | reghdfe div_rate unilateral divx* if year>1967 & year<1989 [w=stpop], absorb(i.state i.year i.state#c.year i.state#c.year2) cl(state)
14 | 
15 | ** common trend
16 | 
17 | gen delta_year=lfdivlaw-year
18 | 
19 | gen L12unilateral = delta_year>=12
20 | gen L11unilateral = delta_year==11
21 | gen L10unilateral = delta_year==10
22 | gen L9unilateral = delta_year==9
23 | gen L8unilateral = delta_year==8
24 | gen L7unilateral = delta_year==7
25 | gen L6unilateral = delta_year==6
26 | gen L5unilateral = delta_year==5
27 | gen L4unilateral = delta_year==4
28 | gen L3unilateral = delta_year==3
29 | gen L2unilateral = delta_year==2
30 | gen unilateral0  = delta_year==0
31 | gen F1unilateral = delta_year==-1
32 | gen F2unilateral = delta_year==-2
33 | gen F3unilateral = delta_year==-3
34 | gen F4unilateral = delta_year==-4
35 | gen F5unilateral = delta_year==-5
36 | gen F6unilateral = delta_year==-6
37 | gen F7unilateral = delta_year==-7
38 | gen F8unilateral = delta_year==-8
39 | gen F9unilateral = delta_year==-9
40 | gen F10unilateral = delta_year==-10
41 | gen F11unilateral = delta_year==-11
42 | gen F12unilateral= delta_year==-12
43 | gen F13unilateral = delta_year==-13
44 | gen F14unilateral= delta_year==-14
45 | gen F15unilateral = delta_year==-15
46 | gen F16unilateral = delta_year==-16
47 | gen F17unilateral = delta_year==-17
48 | gen F18unilateral = delta_year==-18
49 | gen F19unilateral= delta_year==-19
50 | gen F20unilateral = delta_year<=-20
51 | reghdfe div_rate L12unilateral - F20unilateral divx* if year>1967 & year<1989 [w=stpop], absorb(i.state i.year)
52 | coefplot, keep(*unilateral*) vert xline(12)
53 | ** dynamic effects
54 | reghdfe div_rate unilateral0 - F20unilateral divx* if year>1967 & year<1989 [w=stpop], absorb(i.state i.year) cl(state)
55 | 


--------------------------------------------------------------------------------
/code_in_notes/DID_dynamic_trend.do:
--------------------------------------------------------------------------------
 1 | clear
 2 | set more off
 3 | 
 4 | ** 观测数，共15期
 5 | set obs 15
 6 | ** 产生时间以及政策变量
 7 | gen t=_n // 时间
 8 | gen t2=t^2
 9 | gen d=t>5 //第6期开始有政策发生
10 | ************************无动态效应*******************************
11 | ** 政策之前的y
12 | gen y=t+30+0.3*rnormal() if d==0
13 | gen true_trend=t+30
14 | label variable true_trend "真实的趋势"
15 | ** 政策之后的y，政策效应为20
16 | replace y=t+50+0.3*rnormal() if d==1
17 | ** 不控制时间趋势
18 | reg y d
19 | predict y0
20 | replace y0=y0-_b[d] if d==1
21 | label variable y0 "无时间趋势"
22 | ** 使用一次函数控制时间趋势
23 | reg y d t
24 | predict y1
25 | replace y1=y1-_b[d] if d==1
26 | label variable y1 "线性时间趋势"
27 | ** 使用二次函数控制时间趋势
28 | reg y d t t2
29 | predict y2
30 | replace y2=y2-_b[d] if d==1
31 | label variable y2 "二次时间趋势"
32 | twoway (connected y0 t, msymbol(triangle) msize(small))/*
33 | 	*/ (connected y1 t, msymbol(diamond) msize(small))/*
34 | 	*/ (connected y2 t, msymbol(square) msize(small))/*
35 | 	*/ (line true_trend t, lpattern(dash))/*
36 | 	*/ (scatter y t,  msymbol(circle))/*
37 | 	*/ , title("无动态效应") legend(rows(2)) scheme(s1mono)
38 | graph export DID_dynamic_trend_linear.pdf, replace
39 | graph export DID_dynamic_trend_linear.png, replace
40 | ************************具有动态效应*******************************
41 | keep t t2 d
42 | ** 政策之前的y
43 | gen y=t+30+0.3*rnormal() if d==0
44 | gen true_trend=t+30
45 | label variable true_trend "真实的趋势"
46 | ** 政策之后的y，第8期之前上升，之后下降
47 | replace y=-1*(t-8)^2+50+0.3*rnormal() if d==1
48 | ** 不控制时间趋势
49 | reg y d
50 | predict y0
51 | replace y0=y0-_b[d] if d==1
52 | label variable y0 "无时间趋势"
53 | ** 使用一次函数控制时间趋势
54 | reg y d t
55 | predict y1
56 | replace y1=y1-_b[d] if d==1
57 | label variable y1 "线性时间趋势"
58 | ** 使用二次函数控制时间趋势
59 | reg y d t t2
60 | predict y2
61 | replace y2=y2-_b[d] if d==1
62 | label variable y2 "二次时间趋势"
63 | twoway (connected y0 t, msymbol(triangle) msize(small))/*
64 | 	*/ (connected y1 t, msymbol(diamond) msize(small))/*
65 | 	*/ (connected y2 t, msymbol(square) msize(small))/*
66 | 	*/ (line true_trend t, lpattern(dash))/*
67 | 	*/ (scatter y t,  msymbol(circle))/*
68 | 	*/ , title("动态效应") legend(rows(2)) scheme(s1mono)
69 | graph export DID_dynamic_trend_dynamic.pdf, replace
70 | graph export DID_dynamic_trend_dynamic.png, replace
71 | 


--------------------------------------------------------------------------------
/code_in_notes/HCW.do:
--------------------------------------------------------------------------------
 1 | clear
 2 | use datasets/HCW.dta,clear
 3 | tsset time
 4 | gen n=_n
 5 | gen pretreat=(n<18)
 6 | reg HongKong Japan Korea Philippines Taiwan if pretreat
 7 | predict counterfactual
 8 | gen treatment = HongKong - counterfactual
 9 | tsline HongKong counterfactual,lp(solid dash) xline(150) yline(0)
10 | tsline treatment,xline(150) yline(0)
11 | 


--------------------------------------------------------------------------------
/code_in_notes/RD_r_and_d.do:
--------------------------------------------------------------------------------
 1 | clear
 2 | set more off
 3 | use datasets/bronzini_r_and_d.dta
 4 | 
 5 | gen s=score-75
 6 | gen treat=s>0
 7 | gen s2=s^2 
 8 | gen s3=s^3
 9 | 
10 | //左边
11 | gen ls=s*(1-treat)
12 | gen ls2=ls^2
13 | gen ls3=ls^3
14 | //右边
15 | gen rs=s*treat
16 | gen rs2=rs^2
17 | gen rs3=rs^3
18 | 
19 | //全样本，多项式，不要使用
20 | reg INVSALES treat ls rs, cluster(score)
21 | reg INVSALES treat ls ls2 rs rs2, cluster(score)
22 | reg INVSALES treat ls ls2 ls3 rs rs2 rs3, cluster(score)
23 | // 分数为10的窗宽
24 | reg INVSALES treat ls rs if s<10 & s>-10, cluster(score)
25 | reg INVSALES treat ls ls2 rs rs2 if s<10 & s>-10, cluster(score)
26 | reg INVSALES treat ls ls2 ls3 rs rs2 rs3 if s<10 & s>-10, cluster(score)
27 | // rdrobust
28 | rdrobust INVSALES s, c(0) h(10)
29 | rdrobust INVSALES s, c(0)
30 | 
31 | // 画图
32 | reg INVSALES treat ls ls2 ls3 if s<0 & s>-10
33 | predict l_pred_INVSALES
34 | reg INVSALES treat rs rs2 rs3 if s>=0 & s<10
35 | predict r_pred_INVSALES
36 | sort s
37 | twoway (scatter INVSALES s if s<10 & s>-10)/*
38 | 	*/ (line l_pred_INVSALES s if s<=0 & s>-10)/*
39 | 	*/ (line r_pred_INVSALES s if s>=0 & s<10)
40 | // manipulation检验
41 | rddensity s, c(0) plot
42 | 


--------------------------------------------------------------------------------
/code_in_notes/Run_ALL.do:
--------------------------------------------------------------------------------
1 | do reg_one_variate.do
2 | do lasso_and_ridge_shrinkage.do
3 | do DID_dynamic_trend.do
4 | do logit_and_roc.do
5 | do nls_mpc.do
6 | do weighted_ols.do
7 | do qreg_outlier.do
8 | do hcw_lasso.do
9 | 


--------------------------------------------------------------------------------
/code_in_notes/chow_test.do:
--------------------------------------------------------------------------------
 1 | // file: chow_test.do
 2 | use datasets/cfps_adult, clear
 3 | gen log_income=log(p_income+1)
 4 | drop if qq1101<0
 5 | drop if te4<0
 6 | // 产生变量
 7 | gen male=cfps_gender
 8 | gen female=1-cfps_gender
 9 | tab te4, gen(edu)
10 | local nulls "(male=female)"
11 | local explans "male female"
12 | foreach v of varlist qq1101 edu*{
13 |     gen `v'f=`v'*female
14 |     gen `v'm=`v'*male
15 |     local nulls "`nulls' (`v'f=`v'm)"
16 |     local explans "`explans' `v'f `v'm"
17 | }
18 | // 分组回归
19 | reg log_income qq1101 edu1-edu7 if male==1
20 | reg log_income qq1101 edu1-edu7 if female==1
21 | // 合并回归并检验
22 | reg log_income `explans', noconstant
23 | di "`nulls'"
24 | test `nulls'
25 | 


--------------------------------------------------------------------------------
/code_in_notes/cross_validation_reg.do:
--------------------------------------------------------------------------------
 1 | // file: cross_validation_reg.do
 2 | clear
 3 | set obs 50
 4 | gen x=runiform()*3
 5 | gen y=exp(x)+rnormal()*2
 6 | local control ""
 7 | scalar K=0
 8 | forvalues i=1/10{
 9 |   gen x`i'=x^`i'
10 |   local control "`control' x`i'"
11 |   local N= _N
12 |   quietly{
13 |     gen error=.
14 |     forvalues j=1/`N'{
15 |       reg y `control' if _n~=`j'
16 |       predict resid, residual
17 |       replace error=resid if _n==`j'
18 |       drop resid
19 |     }
20 |   }
21 |   gen error2=error^2
22 |   quietly: su error2
23 |   scalar mse=r(mean)
24 |   display `i' _skip mse
25 |   drop error error2
26 | }
27 | 


--------------------------------------------------------------------------------
/code_in_notes/datasets/CivilConflict.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/CivilConflict.dta


--------------------------------------------------------------------------------
/code_in_notes/datasets/DDCGdata_final.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/DDCGdata_final.dta


--------------------------------------------------------------------------------
/code_in_notes/datasets/Divorce-Wolfers-AER.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/Divorce-Wolfers-AER.dta


--------------------------------------------------------------------------------
/code_in_notes/datasets/Lalonde_nsw/cps_controls.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/Lalonde_nsw/cps_controls.dta


--------------------------------------------------------------------------------
/code_in_notes/datasets/Lalonde_nsw/cps_controls2.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/Lalonde_nsw/cps_controls2.dta


--------------------------------------------------------------------------------
/code_in_notes/datasets/Lalonde_nsw/cps_controls3.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/Lalonde_nsw/cps_controls3.dta


--------------------------------------------------------------------------------
/code_in_notes/datasets/Lalonde_nsw/nsw.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/Lalonde_nsw/nsw.dta


--------------------------------------------------------------------------------
/code_in_notes/datasets/Lalonde_nsw/nsw_dw.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/Lalonde_nsw/nsw_dw.dta


--------------------------------------------------------------------------------
/code_in_notes/datasets/Lalonde_nsw/psid_controls.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/Lalonde_nsw/psid_controls.dta


--------------------------------------------------------------------------------
/code_in_notes/datasets/Lalonde_nsw/psid_controls3.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/Lalonde_nsw/psid_controls3.dta


--------------------------------------------------------------------------------
/code_in_notes/datasets/NTV_Aggregate_Data_reshaped.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/NTV_Aggregate_Data_reshaped.dta


--------------------------------------------------------------------------------
/code_in_notes/datasets/OHIE_QJE.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/OHIE_QJE.dta


--------------------------------------------------------------------------------
/code_in_notes/datasets/OHIE_Science.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/OHIE_Science.dta


--------------------------------------------------------------------------------
/code_in_notes/datasets/Questionarie.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/Questionarie.pdf


--------------------------------------------------------------------------------
/code_in_notes/datasets/angrist2002aer.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/angrist2002aer.dta


--------------------------------------------------------------------------------
/code_in_notes/datasets/bronzini_r_and_d.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/bronzini_r_and_d.dta


--------------------------------------------------------------------------------
/code_in_notes/datasets/cfps_adult.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/cfps_adult.dta


--------------------------------------------------------------------------------
/code_in_notes/datasets/cfps_family_econ.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/cfps_family_econ.dta


--------------------------------------------------------------------------------
/code_in_notes/datasets/chfs_ind.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/chfs_ind.dta


--------------------------------------------------------------------------------
/code_in_notes/datasets/citydata.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/citydata.dta


--------------------------------------------------------------------------------
/code_in_notes/datasets/disease_gender_gap.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/disease_gender_gap.dta


--------------------------------------------------------------------------------
/code_in_notes/datasets/hcw.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/hcw.dta


--------------------------------------------------------------------------------
/code_in_notes/datasets/shannxi_export.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/shannxi_export.dta


--------------------------------------------------------------------------------
/code_in_notes/datasets/soep_female_labor.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/soep_female_labor.dta


--------------------------------------------------------------------------------
/code_in_notes/datasets/voting_cnty_clean.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/voting_cnty_clean.dta


--------------------------------------------------------------------------------
/code_in_notes/fixed_effects_ntv.do:
--------------------------------------------------------------------------------
 1 | // fixed_effects_ntv.do
 2 | clear
 3 | set more off
 4 | use datasets/NTV_Aggregate_Data_reshaped.dta
 5 | xtset tik_id year
 6 | forvalues i=1/5{
 7 |     gen log_pop_`i'=logpop^`i'
 8 |     gen log_wage_`i'=log_wage^`i'
 9 | }
10 | // 简单回归
11 | reghdfe Votes_Edinstvo_ Watch_probit if year==1999, absorb(region)  cluster(region)
12 | // 加入社会经济控制
13 | local E_controls "L.log_pop_* L.log_wage_* L.nurses L.doctors_pc"
14 | reghdfe Votes_Edinstvo_ Watch_probit `E_controls' if year==1999, absorb(region) cluster(region)
15 | // 加入1995年选举的变量
16 | local X_controls "L4.Votes_KPRF_ L4.Votes_Yabloko_ L4.Votes_NDR_ L4.Votes_LDPR_ L4.Turnout"
17 | reghdfe Votes_Edinstvo_ Watch_probit `E_controls' `X_controls' if year==1999, absorb(region) cluster(region)
18 | 


--------------------------------------------------------------------------------
/code_in_notes/hcw_lasso.do:
--------------------------------------------------------------------------------
 1 | clear
 2 | set more off
 3 | use datasets/shannxi_export.dta
 4 | tsset year
 5 | order year export_locationShaanxi
 6 | foreach v of varlist export_locationShaanxi- export_locationZhejiang{
 7 | 	replace `v'=log(`v')
 8 | 	egen std_`v'=std(`v')
 9 | }
10 | local control "std_export_locationShanghai std_export_locationShanxi std_export_locationFujian std_export_locationGuangdong std_export_locationGuizhou std_export_locationHeilongjiang std_export_locationJiangxi std_export_locationAnhui"
11 | lasso linear export_locationShaanxi `control' if year<2013
12 | local selected_vars=e(post_sel_vars)
13 | predict p_shaanxi_lasso
14 | reg `selected_vars' if year<2013
15 | predict p_shaanxi
16 | label variable export_locationShaanxi "陕西省实际出口"
17 | label variable p_shaanxi "陕西省出口的反事实估计"
18 | label variable p_shaanxi_lasso "陕西省出口的反事实估计Lasso"
19 | tsline export_locationShaanxi p_shaanxi p_shaanxi_lasso, lp(solid dash) xline(2013 2016, lstyle(grid)) scheme(s1mono) xtitle("年份") ytitle("出口对数值")
20 | graph export hcw_lasso.pdf, replace
21 | 


--------------------------------------------------------------------------------
/code_in_notes/heteroscedasticity.do:
--------------------------------------------------------------------------------
 1 | // file: heteroscedasticity.do
 2 | set seed 19880505
 3 | cap program drop dgp
 4 | program define dgp, rclass
 5 | 	syntax [, obs(integer 100) b(real 1) hetero(real 0)]
 6 | 	drop _all
 7 | 	set obs `obs'
 8 | 	tempvar x y sigma
 9 | 	gen `x'=rnormal()
10 | 	gen `sigma'=1+`hetero'*abs(`x')
11 | 	gen `y'=3+`b'*`x'+rnormal(0,`sigma')
12 | 	quietly: reg `y' `x'
13 | 	return scalar b=_b[`x']
14 | 	return scalar se=_se[`x']
15 | 	quietly: reg `y' `x', robust
16 | 	return scalar r_b=_b[`x']
17 | 	return scalar r_se=_se[`x']
18 | end
19 | // multicolinearty, size
20 | simulate b=r(b) se=r(se) rb=r(r_b) rse=r(r_se),/*
21 | 	*/ reps(2000): dgp
22 | su
23 | simulate b=r(b) se=r(se) rb=r(r_b) rse=r(r_se),/*
24 | 	*/ reps(2000): dgp, hetero(1)
25 | su
26 | 


--------------------------------------------------------------------------------
/code_in_notes/ipw_doubly_robust.do:
--------------------------------------------------------------------------------
 1 | // matching_psm.do
 2 | clear
 3 | set more off
 4 | use datasets/Lalonde_nsw/nsw.dta
 5 | append using datasets/Lalonde_nsw/cps_controls.dta
 6 | gen age2=age^2
 7 | gen age3=age^3
 8 | egen std_age2=std(age2)
 9 | egen std_age3=std(age3)
10 | gen edu2=education^2
11 | gen edu3=education^3
12 | gen unemployed75=re75==0
13 | gen re75s=re75^2
14 | gen re75c=re75^3
15 | egen std_re75s=std(re75s)
16 | egen std_re75c=std(re75c)
17 | gen re75_edu=re75*education
18 | gen hisp_unemp=hispanic*unemployed75
19 | local controls "age-nodegree std_age2-std_age3 edu2-edu3"
20 | local extra_controls "re75 std_re75s std_re75c unemployed75 re75_edu hisp_unemp"
21 | // 逆概率加权
22 | cap teffects ipw (re78) (treat `controls' re75 unemployed75, logit), osample(omitted)
23 | drop if omitted
24 | drop omitted
25 | teffects ipw (re78) (treat `controls'  re75 unemployed75, logit)
26 | // 双向稳健
27 | teffects aipw (re78 `controls' `extra_controls') (treat `controls' `extra_controls', logit)
28 | 


--------------------------------------------------------------------------------
/code_in_notes/lasso_and_ridge_shrinkage.do:
--------------------------------------------------------------------------------
 1 | clear
 2 | set more off
 3 | use "datasets/cfps_adult.dta"
 4 | // data cleaning and generating
 5 | gen log_income=log(p_income)
 6 | drop if te4<0
 7 | gen age=2014-cfps_birthy
 8 | gen age2=age^2
 9 | egen std_age=std(age)
10 | egen std_age2=std(age2)
11 | tab te4, gen(edu)
12 | tab provcd14, gen(province)
13 | // lasso regression
14 | lasso linear log_income (std_age) std_age2 edu* province*, /*
15 | 	*/selection(cv)
16 | lassoinfo
17 | local post_vars=e(post_sel_vars)
18 | coefpath
19 | graph export beta_lasso_plot.pdf, replace
20 | reg `post_vars'
21 | // ridge regression
22 | elasticnet linear log_income (std_age) std_age2 edu* province*, /*
23 | 	*/selection(cv) alpha(0)
24 | lassoinfo
25 | local post_vars=e(post_sel_vars)
26 | coefpath
27 | graph export beta_ridge_plot.pdf, replace
28 | reg `post_vars'


--------------------------------------------------------------------------------
/code_in_notes/linear_panel.do:
--------------------------------------------------------------------------------
 1 | clear
 2 | set more off
 3 | use datasets/citydata.dta
 4 | ** 声明面板数据
 5 | xtset CityCode Year
 6 | ** 产生新变量
 7 | gen log_gdp=log(v84)
 8 | gen growth_gdp=log_gdp-L.log_gdp //如何产生滞后？
 9 | 	sort CityCode Year
10 | 	by CityCode: gen growth_gdp2=log_gdp-log_gdp[_n-1]
11 | gen num_schools=v188
12 | gen log_edu_exp=log(v166)
13 | gen log_rd_exp=log(v167)
14 | ** controls
15 | local control "log_edu_exp log_rd_exp i.Year"
16 | ** 回归
17 | ** pooled OLS
18 | reg growth_gdp log_gdp i.Year, vce(cl CityCode)
19 | outreg2 using linear_panel.doc, replace addtext(FE, No, Time FE, Yes)
20 | ** random effects
21 | xtreg growth_gdp log_gdp i.Year, re vce(cl CityCode)
22 | outreg2 using linear_panel.doc, append addtext(FE, No, Time FE, Yes)
23 | ** between group estimator
24 | xtreg growth_gdp log_gdp, be
25 | outreg2 using linear_panel.doc, append addtext(FE, No, Time FE, No)
26 | ** fixed effects
27 | xtreg growth_gdp log_gdp i.Year, fe vce(cl CityCode)
28 | outreg2 using linear_panel.doc, append addtext(FE, Yes, Time FE, Yes)
29 | ** LSDV
30 | reghdfe growth_gdp log_gdp i.Year, absorb(CityCode) cluster(CityCode)
31 | outreg2 using linear_panel.doc, append addtext(FE, Yes, Time FE, Yes)
32 | ** LSDV twoway cluster
33 | reghdfe growth_gdp log_gdp, absorb(i.CityCode i.Year) cluster(CityCode Year)
34 | outreg2 using linear_panel.doc, append addtext(FE, Yes, Time FE, Yes)
35 | 
36 | ** 加入控制变量
37 | ** random effects
38 | xtreg growth_gdp log_gdp `control', re 
39 | estimates store re
40 | ** fixed effects
41 | xtreg growth_gdp log_gdp `control', fe 
42 | estimates store fe
43 | ** Hauman 检验
44 | hausman fe re
45 | 
46 | ** interactive fixed effects
47 | local control "log_edu_exp log_rd_exp"
48 | regife  growth_gdp log_gdp `control', factors(CityCode Year, 1)
49 | regife  growth_gdp log_gdp `control', factors(CityCode Year, 2)
50 | regife  growth_gdp log_gdp `control', factors(CityCode Year, 3)
51 | regife  growth_gdp log_gdp `control', factors(CityCode Year, 1) absorb(i.CityCode i.Year)
52 | 


--------------------------------------------------------------------------------
/code_in_notes/linear_panel_fd_np.do:
--------------------------------------------------------------------------------
 1 | // linear_panel_fd.do
 2 | clear
 3 | set more off
 4 | use datasets/voting_cnty_clean.dta
 5 | 
 6 | xtset cnty90 year
 7 | 
 8 | gen numdailies_l1=L4.numdailies
 9 | gen prestout_l1=L4.prestout
10 | gen changedailies=numdailies-numdailies_l1
11 | gen changeprestout=prestout-prestout_l1
12 | // 一阶差分
13 | reghdfe changeprestout changedailies if mainsample, absorb(i.st#i.year) cluster(cnty90) resid
14 | // 查看残差的自相关
15 | predict resid_fd, residual
16 | reg resid_fd L4.resid_fd
17 | // 固定效应，额外加入各州的异质性趋势
18 | reghdfe prestout numdailies if mainsample, absorb(i.st#i.year i.cnty90) cluster(cnty90)
19 | // 固定效应，额外加入各县的异质性线性趋势
20 | reghdfe prestout numdailies if mainsample, absorb(i.cnty90#c.year i.cnty90) cluster(cnty90)
21 | 


--------------------------------------------------------------------------------
/code_in_notes/linear_panel_fe_ntv.do:
--------------------------------------------------------------------------------
 1 | clear
 2 | set more off
 3 | use datasets/NTV_Aggregate_Data_reshaped.dta
 4 | xtset tik_id year
 5 | // 1995年NTV并没有成立
 6 | gen Watch_probit_p=0
 7 | replace Watch_probit_p=Watch_probit if year==1999
 8 | // 使用一阶差分
 9 | gen delta_Votes_SPS_=Votes_SPS_-L4.Votes_SPS_
10 | gen delta_Watch_probit_p=Watch_probit_p-L4.Watch_probit_p
11 | reg delta_Votes_SPS_ delta_Watch_probit_p i.year if year!=2003, cluster(region)
12 | // 简单回归
13 | reghdfe Votes_SPS_ Watch_probit_p i.year if year!=2003, absorb(i.tik_id) cluster(region)
14 | // 使用96年人口进行加权
15 | gen pop96t=population if year==1996
16 | egen pop96=mean(pop96), by(tik_id)
17 | reghdfe Votes_SPS_ Watch_probit_p i.year if year!=2003 [aw=pop96], absorb(i.tik_id) cluster(region)
18 | 


--------------------------------------------------------------------------------
/code_in_notes/logit_and_roc.do:
--------------------------------------------------------------------------------
 1 | clear
 2 | set more off
 3 | use datasets/cfps_adult.dta
 4 | // 清洗并产生数据
 5 | drop if employ2014<0
 6 | drop if te4<0
 7 | drop if qa301<0 | qa301==5
 8 | gen exit_labor=employ2014==3
 9 | gen age=2014-cfps_birth
10 | gen age2=age^2
11 | gen urban_hukou=qa301==3
12 | // odds ratio
13 | tab exit_labor cfps_gender
14 | // 回归并预测
15 | local x "age age2 cfps_gender urban_hukou i.provcd14 i.te4"
16 | local reportvar "age age2 cfps_gender urban_hukou"
17 | logit exit_labor cfps_gender
18 | margins, dydx(*) post
19 | outreg2 using logit_roc.doc, replace keep(`reportvar') ctitle(Logit)
20 | logistic exit_labor cfps_gender
21 | outreg2 using logit_roc.doc, append keep(`reportvar') ctitle(Logistic)
22 | logit exit_labor `x'
23 | margins, dydx(*) post
24 | outreg2 using logit_roc.doc, append keep(`reportvar') ctitle(Logit)
25 | probit exit_labor `x'
26 | margins, dydx(*) post
27 | outreg2 using logit_roc.doc, append keep(`reportvar') ctitle(Probit)
28 | logistic exit_labor `x'
29 | outreg2 using logit_roc.doc, append keep(`reportvar') ctitle(Logistic)
30 | // 如果用probit模型：probit exit_labor `x'
31 | predict p_exit // 预测概率
32 | // 计算cutoff=0.5时的查准率、查全率
33 | gen predict_exit=p_exit>0.5
34 | gen TP=(exit_labor==1 & predict_exit==1)
35 | gen FP=(exit_labor==0 & predict_exit==1)
36 | gen FN=(exit_labor==1 & predict_exit==0)
37 | gen TN=(exit_labor==0 & predict_exit==0)
38 | foreach v of varlist TP FP FN TN{
39 | 	quietly: su `v'
40 | 	local `v' = r(mean)
41 | }
42 | local R2=e(r2_p)
43 | local precision=`TP'/(`TP'+`FP')
44 | local recall=`TP'/(`TP'+`FN')
45 | local accuracy=(`TP'+`TN')/(`TP'+`TN'+`FP'+`FN')
46 | local F1=(2*`precision'*`recall')/(`precision'+`recall')
47 | di "R2=`R2'"
48 | di "查准率=`precision'"
49 | di "查全率=`recall'"
50 | di "精度=`accuracy'"
51 | di "F1=`F1'"
52 | // 画出ROC曲线
53 | lroc
54 | graph export roc.pdf, replace
55 | // 画出sensitivity 以及specificity
56 | lsens
57 | graph export sens_speci.pdf, replace
58 | 


--------------------------------------------------------------------------------
/code_in_notes/matching.do:
--------------------------------------------------------------------------------
 1 | // matching.do
 2 | clear
 3 | set more off
 4 | 
 5 | // 实验数据
 6 | use datasets/Lalonde_nsw/nsw.dta
 7 | local controls "age-nodegree"
 8 | // 事后比较
 9 | bysort treat: su re78
10 | reg re78 treat, r
11 | // 使用回归进行控制
12 | reg re78 treat `controls', r
13 | // 查看common support
14 | bysort treat: su `controls'
15 | drop if age>50
16 | drop if education>14
17 | // 使用临近匹配进行控制, ATE
18 | teffects nnmatch (re78 `controls') (treat), nn(1)
19 | // 评估两个组别的平衡性
20 | // 使用临近匹配进行控制, ATT
21 | teffects nnmatch (re78 `controls') (treat), nn(1) atet
22 | tebalance summarize
23 | // 评估unconfoundedness
24 | teffects nnmatch (re75 `controls') (treat), nn(1)
25 | 
26 | 
27 | // 精确匹配，首先扔掉那些不能精确匹配的样本
28 | cap teffects nnmatch (re78 `controls') (treat), nn(1) ematch(black hispanic married nodegree) osample(omitted)
29 | drop if omitted
30 | drop omitted
31 | cap teffects nnmatch (re78 `controls') (treat), nn(1) ematch(black hispanic married nodegree) osample(omitted)
32 | drop if omitted
33 | drop omitted
34 | // 同理进行匹配估计
35 | teffects nnmatch (re78 age education) (treat), nn(1) ematch(black hispanic married nodegree) gen(matched)
36 | tebalance summarize
37 | 


--------------------------------------------------------------------------------
/code_in_notes/matching_nonexp.do:
--------------------------------------------------------------------------------
 1 | // matching_nonexp.do
 2 | // 加入非实验的CPS数据来改进匹配
 3 | // 重新打开数据（由于上面删除了一些观测）
 4 | clear
 5 | set more off
 6 | use datasets/Lalonde_nsw/nsw.dta
 7 | append using datasets/Lalonde_nsw/cps_controls.dta
 8 | local controls "age-nodegree"
 9 | gen unemployed75=re75==0
10 | // 简单比较
11 | bysort treat: su re78
12 | reg re78 treat, r
13 | // 使用回归进行控制
14 | reg re78 treat `controls', r
15 | //查看common support
16 | bysort treat: su `controls'
17 | drop if age>50
18 | drop if education>17
19 | drop if education<4
20 | // 使用临近匹配进行控制, ATE
21 | local nn=5
22 | teffects nnmatch (re78 `controls') (treat), nn(`nn') atet
23 | teffects nnmatch (re78 `controls' re75) (treat), nn(`nn') atet
24 | cap teffects nnmatch (re78 age edu re75) (treat), nn(`nn') ematch(black hispanic married nodegree) atet osample(omitted)
25 | drop if omitted
26 | drop omitted
27 | cap teffects nnmatch (re78 age edu re75) (treat), nn(`nn') ematch(black hispanic married nodegree) atet osample(omitted)
28 | drop if omitted
29 | drop omitted
30 | teffects nnmatch (re78 age edu re75) (treat), nn(`nn') ematch(black hispanic married nodegree) atet
31 | // 评估两个组别的平衡性
32 | tebalance summarize
33 | // 评估unconfoundedness
34 | teffects nnmatch (re75 `controls') (treat), nn(20) atet
35 | 


--------------------------------------------------------------------------------
/code_in_notes/matching_psm.do:
--------------------------------------------------------------------------------
 1 | // matching_psm.do
 2 | clear
 3 | set more off
 4 | use datasets/Lalonde_nsw/nsw.dta
 5 | append using datasets/Lalonde_nsw/cps_controls.dta
 6 | gen age2=age^2
 7 | gen age3=age^3
 8 | egen std_age2=std(age2)
 9 | egen std_age3=std(age3)
10 | gen edu2=education^2
11 | gen edu3=education^3
12 | gen unemployed75=re75==0
13 | gen re75s=re75^2
14 | gen re75c=re75^3
15 | egen std_re75s=std(re75s)
16 | egen std_re75c=std(re75c)
17 | gen re75_edu=re75*education
18 | gen hisp_unemp=hispanic*unemployed75
19 | local controls "age-nodegree std_age2-std_age3 edu2-edu3"
20 | local extra_controls "re75 std_re75s std_re75c unemployed75 re75_edu hisp_unemp"
21 | // 计算propensity score
22 | logit treat `controls' `extra_controls'
23 | predict ps
24 | // 查看ps的描述统计
25 | bysort treat: su ps, de
26 | twoway (hist ps if treat==0, color(green%30))/*
27 |      */(hist ps if treat==1, color(red%30))/*
28 |      */, leg(lab(1 "Untreated") lab(2 "Treated"))
29 | // 检查common support
30 | gen ignore=ps<0.15
31 | twoway (hist ps if treat==0, color(green%30))/*
32 |      */(hist ps if treat==1, color(red%30))/*
33 |      */, leg(lab(1 "Untreated") lab(2 "Treated"))
34 | // 进行PSM
35 | local nn=3
36 | teffects psmatch (re78) (treat `controls') if ~ignore, atet nn(`nn')
37 | teffects psmatch (re78) (treat `controls' `extra_controls') if ~ignore, atet nn(`nn')
38 | tebalance summarize
39 | 


--------------------------------------------------------------------------------
/code_in_notes/mlogit_employ.do:
--------------------------------------------------------------------------------
 1 | // mlogit_employ.do
 2 | clear
 3 | set more off
 4 | use datasets/cfps_adult
 5 | // clean data
 6 | keep if qg2==1 | qg2==2 | qg2==3 | qg2==4
 7 | tab qg2
 8 | gen age=2014-cfps_birthy
 9 | drop if te4<0
10 | // regress
11 | mlogit qg2 age cfps_gender i.te4, baseoutcome(1)
12 | outreg2 using mlogit_employ.doc, replace
13 | // test
14 | test [2]age=[3]age
15 | 


--------------------------------------------------------------------------------
/code_in_notes/model_selection.do:
--------------------------------------------------------------------------------
 1 | // file: model_selection.do
 2 | clear
 3 | set obs 50
 4 | gen x=runiform()*3
 5 | gen y=exp(x)+rnormal()*2
 6 | local control ""
 7 | scalar K=0
 8 | forvalues i=1/10{
 9 |     gen x`i'=x^`i'
10 |     local control "`control' x`i'"
11 |     scalar K=K+1
12 |     quietly: reg y `control'
13 |     scalar r2=e(r2)
14 |     scalar r2a=e(r2_a)
15 |     scalar aic=-2*e(ll)+2*K
16 |     scalar bic=-2*e(ll)+log(e(N))*K
17 |     display `i' _skip r2 _skip r2a _skip aic _skip bic
18 | }
19 | 


--------------------------------------------------------------------------------
/code_in_notes/multi_collinearity.do:
--------------------------------------------------------------------------------
 1 | // file: multi_collinear.do
 2 | set seed 19880505
 3 | cap program drop dgp
 4 | program define dgp, rclass
 5 | 	syntax [, obs(integer 20) b(real 0) mc(real 1)]
 6 | 	drop _all
 7 | 	set obs `obs'
 8 | 	tempvar x1 x2 y sigma
 9 | 	gen `x1'=rnormal()
10 | 	if `mc'==1{
11 | 		gen `x2'=3/sqrt(10)*`x1'+1/sqrt(10)*rnormal()
12 | 	}
13 | 	else {
14 | 		gen `x2'=rnormal()
15 | 	}
16 | 	gen `y'=`b'*`x1'+`x2'+rnormal()
17 | 	quietly: reg `y' `x1' `x2'
18 | 	return scalar b=_b[`x1']
19 | 	return scalar se=_se[`x1']
20 | 	if abs(_b[`x1']/_se[`x1'])>=invt(`obs'-3,0.975) {
21 | 		return scalar rejected=1
22 | 	}
23 | 	else {
24 | 		return scalar rejected=0
25 | 	}
26 | 	return scalar neg=_b[`x1']<0
27 | end
28 | // multicolinearty, size
29 | simulate rejected=r(rejected) b=r(b) se=r(se) neg=r(neg)/*
30 |         */ ,reps(2000):dgp
31 | su
32 | // multicolinearty, power
33 | simulate rejected=r(rejected) b=r(b) se=r(se) neg=r(neg)/*
34 |         */ ,reps(2000):dgp, b(1)
35 | su
36 | // no multicolinearty, power
37 | simulate rejected=r(rejected) b=r(b) se=r(se) neg=r(neg)/*
38 |         */ ,reps(2000):dgp, b(1) mc(0)
39 | su
40 | // multicolinearty, power with large N
41 | simulate rejected=r(rejected) b=r(b) se=r(se) neg=r(neg)/*
42 |         */ ,reps(2000):dgp, b(1) obs(100)
43 | su
44 | 


--------------------------------------------------------------------------------
/code_in_notes/nls_mpc.do:
--------------------------------------------------------------------------------
 1 | clear
 2 | set more off
 3 | use "datasets/cfps_family_econ.dta"
 4 | // 线性回归
 5 | reg expense fincome1
 6 | predict expense_linear
 7 | // 非线性回归
 8 | drop if fincome1==. | fincome1<0
 9 | drop if expense==. |expense<0 | expense>5*fincome1
10 | nl (expense={alpha=_b[_cons]}+{beta=_b[fincome1]}*fincome1^{gamma=1})
11 | predict expense_nls
12 | // 计算MPC
13 | gen mpc=_b[/beta]*_b[/gamma]*fincome1^(_b[/gamma]-1)
14 | // 画图
15 | sort fincome1
16 | twoway (scatter expense fincome1)/*
17 |     */ (line expense_linear fincome1)/*
18 |     */ (line expense_nls fincome1)
19 | graph export nls_mpc.pdf, replace
20 | 


--------------------------------------------------------------------------------
/code_in_notes/ohie_qje.do:
--------------------------------------------------------------------------------
 1 | // replication of OHIE-QJE paper
 2 | clear frames
 3 | set more off
 4 | use datasets/OHIE_QJE.dta
 5 | 
 6 | // LATE，不加控制变量的结果
 7 | // 1st stage, 分母
 8 | reg ohp_std_ever_admin treatment if rx_num_mod_12m!=. , cl(household_id)
 9 | local first_stage=_b[treatment]
10 | // Intention-to-Treat
11 | reg rx_num_mod_12m treatment, cluster(household_id)
12 | local ITT=_b[treatment]
13 | // IV估计
14 | ivreg rx_num_mod_12m (ohp_std_ever_admin = treatment), cl(household_id)
15 | di `ITT'/`first_stage'
16 | 
17 | // 加入控制变量
18 | // 1st stage, medcaid 和 OHP standard，两者系数几乎相同，说明只对OHP Standard有影响
19 | // 控制变量：draw_lottery 为抽签的轮数，numhh为申请家庭的人数
20 | reghdfe ohp_all_ever_admin treatment, absorb(i.numhh#i.draw_lottery) cl(household_id)
21 | reghdfe ohp_std_ever_admin treatment, absorb(i.numhh#i.draw_lottery) cl(household_id)
22 | 
23 | // ITT
24 | // 处方药，extensive margin & total utilization
25 | reghdfe rx_any_12m treatment, absorb(i.numhh#i.draw_lottery) cl(household_id)
26 | reghdfe rx_num_mod_12m treatment, absorb(i.numhh#i.draw_lottery) cl(household_id)
27 | // 急救室，extensive margin & total utilization
28 | reghdfe er_any_12m treatment, absorb(i.numhh#i.draw_lottery) cl(household_id)
29 | reghdfe er_num_mod_12m treatment, absorb(i.numhh#i.draw_lottery) cl(household_id)
30 | 
31 | // IV估计
32 | ivreghdfe rx_any_12m (ohp_std_ever_admin = treatment), absorb(i.numhh#i.draw_lottery) cl(household_id)
33 | ivreghdfe rx_num_mod_12m (ohp_std_ever_admin = treatment), absorb(i.numhh#i.draw_lottery) cl(household_id)
34 | // 急救室，extensive margin & total utilization
35 | ivreghdfe er_any_12m (ohp_std_ever_admin = treatment), absorb(i.numhh#i.draw_lottery) cl(household_id)
36 | ivreghdfe er_num_mod_12m (ohp_std_ever_admin = treatment), absorb(i.numhh#i.draw_lottery) cl(household_id)
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/code_in_notes/ohie_qje_joint_test.do:
--------------------------------------------------------------------------------
 1 | // ohie_qje_joint_test.do
 2 | clear frames
 3 | set more off
 4 | use datasets/OHIE_QJE.dta
 5 | 
 6 | // table2 treatment-control balance
 7 | // control mean of respond to survey
 8 | // 注意权重的使用，附录中提供了调查抽样的设计，权重来自于抽样设计
 9 | su returned_12m if treatment==0 [iw=weight_12m ]
10 | // 第二种做法，使用回归，不需要自变量，只对常数项做回归
11 | reg returned_12m if treatment==0 [iw=weight_12m ]
12 | di e(rmse)
13 | 
14 | // 比较随机化之前的特征
15 | local lottery_list "birthyear_list female_list english_list self_list first_day_list have_phone_list pobox_list zip_msa"
16 | // 挨个比较
17 | foreach v of varlist `lottery_list'{
18 | 	reg `v' treatment [iw=weight_12m ], cl(household_id)
19 | 	test treatment
20 | }
21 | // 合在一起比较
22 | frames put `lottery_list' treatment weight_12m household_id numhh_list, into(joint_compare)
23 | frame change joint_compare
24 | local i=0
25 | foreach v of varlist `lottery_list'{
26 | 	local i=`i'+1
27 | 	rename `v' outcome`i'
28 | }
29 | gen id=_n
30 | reshape long outcome, i(id) j(varid)
31 | local test_coefs ""
32 | forvalues j=1/`i'{
33 | 	local test_coefs "`test_coefs' 1.treatment#`j'.varid"
34 | }
35 | di "`test_coefs'"
36 | reg outcome i.varid i.treatment#i.varid i.numhh_list#i.varid , cl(household_id)
37 | test `test_coefs'
38 | frame change default
39 | 


--------------------------------------------------------------------------------
/code_in_notes/ohie_qje_test_one.do:
--------------------------------------------------------------------------------
 1 | // ohie_qje_test_one.do
 2 | clear
 3 | set more off
 4 | use datasets/OHIE_QJE.dta
 5 | 
 6 | // 需要进行比较的特征
 7 | local lottery_list "birthyear_list female_list english_list /*
 8 |   */ self_list first_day_list have_phone_list pobox_list zip_msa"
 9 | // 挨个比较
10 | foreach v of varlist `lottery_list'{
11 | 	reg `v' treatment [iw=weight_12m ], robust
12 | 	test treatment
13 | }
14 | 


--------------------------------------------------------------------------------
/code_in_notes/ohie_science.do:
--------------------------------------------------------------------------------
 1 | // replication of OHIE-Science paper
 2 | clear frames
 3 | set more off
 4 | use datasets/OHIE_Science.dta
 5 | 
 6 | // 主要结果
 7 | // 1st stage
 8 | reg ohp_all_ever_30sep2009 treatment ed_visit_09mar2008 i.numhh_list, cl(household_id)
 9 | // ITT
10 | reg ed_visit_30sep2009 treatment  ed_visit_09mar2008 i.numhh_list, cl(household_id)
11 | reg num_visit_cens_ed treatment  num_visit_pre_cens_ed i.numhh_list, cl(household_id)
12 | // main results
13 | ivregress 2sls ed_visit_30sep2009 (ohp_all_ever_30sep2009=treatment ) ed_visit_09mar2008 i.numhh_list, cl(household_id)
14 | ivregress 2sls num_visit_cens_ed (ohp_all_ever_30sep2009=treatment ) num_visit_pre_cens_ed i.numhh_list, cl(household_id)
15 | 
16 | // 使用不同的控制结果是不一样的
17 | local extra_controls "female_list i.birthyear_list english_list"
18 | ivregress 2sls ed_visit_30sep2009 (ohp_all_ever_30sep2009=treatment ), cl(household_id)
19 | ivregress 2sls ed_visit_30sep2009 (ohp_all_ever_30sep2009=treatment ) i.numhh_list, cl(household_id)
20 | ivregress 2sls ed_visit_30sep2009 (ohp_all_ever_30sep2009=treatment ) ed_visit_09mar2008 i.numhh_list `extra_controls', cl(household_id)
21 | 
22 | // marginal treatment effects
23 | // 由于不能用i.的形式，所以需要产生虚拟变量
24 | quietly{
25 |     tab birthyear_list, gen(birthyear_dummies)
26 |     tab numhh_list, gen(numhh_dummies)
27 | }
28 | local controls "ed_visit_09mar2008 female_list numhh_dummies* birthyear_dummies* english_list"
29 | // propensity score
30 | logit ohp_all_ever_30sep2009 treatment `controls'
31 | // 边际处理效应，使用多项式拟合propensity score的函数
32 | margte ed_visit_30sep2009 `controls', treatment(ohp_all_ever_30sep2009 treatment `controls') common poly(4) noboot savepropensity
33 | mat mte_mat=e(mte)
34 | quietly: su p, de
35 | local max_p=r(p99)
36 | di `max_p'
37 | // 我们发现计算的平均处理效应不准，重新计算
38 | frame create fmte pclass mte
39 | forvalues i=1/99{
40 |     frame post fmte (`i') (mte_mat[1,`i'])
41 | }
42 | frame fmte: line mte p if p<`max_p'*100
43 | frame fmte: su mte if p<`max_p'*100
44 | // 边际处理效应，使用半参数方法拟合propensity score的函数
45 | margte ed_visit_30sep2009 `controls', treatment(ohp_all_ever_30sep2009 treatment `controls') semi
46 | 


--------------------------------------------------------------------------------
/code_in_notes/oneway_anova.do:
--------------------------------------------------------------------------------
 1 | // file: oneway_anova.do
 2 | use datasets/cfps_adult, clear
 3 | gen log_income=log(p_income+1)
 4 | drop if te4<0
 5 | tab te4
 6 | // 用回归方法：
 7 | reg log_income i.te4
 8 | di "F=" e(F) " RSS=" e(rss)
 9 | // 用单因素方差分析
10 | anova log_income te4
11 | di "F=" e(F) " RSS=" e(rss)
12 | 


--------------------------------------------------------------------------------
/code_in_notes/panel_binary.do:
--------------------------------------------------------------------------------
 1 | clear
 2 | set more off
 3 | *** gen data ***
 4 | use datasets/soep_female_labor
 5 | replace husworkhour=husworkhour/1000
 6 | gen logIncome=log(income)
 7 | egen AveIncome=mean(income),by(persnr)
 8 | gen logAveIncome=log(AveIncome)
 9 | drop AveIncome
10 | gen age2=age^2/100
11 | egen MaxChld6=max(chld6), by(persnr)
12 | *** controls ***
13 | xtset persnr year
14 | local control1 "chld6 chld16 age age2 logIncome husworkhour husemployment"
15 | local control2 "chld6 chld16 age2 logIncome husworkhour husemployment"
16 | local fixed "edu region"
17 | quietly: tab year, gen(year)
18 | local yeardummy "year1-year4"
19 | foreach v in `control2'{
20 | 	egen ave`v'=mean(`v'), by(persnr)
21 | }
22 | local controlmean1 "MaxChld6 avechld6 avechld16 logAveIncome avehusworkhour avehusemployment"
23 | *** descriptive stats ***
24 | egen temp=sum(employment),by(persnr)
25 | gen sortvara=1 if temp==0
26 | replace sortvara=2 if temp==5
27 | replace sortvara=3 if sortvara==.
28 | drop temp
29 | bysort sortvara: outreg2 using soep_female_labor_descr.tex, replace sum(log) keep(employment `control1' `fixed') eqkeep(mean sd min max)
30 | *** regression ***
31 | probit employment `control1' `fixed' `yeardummy'
32 | margins, dydx(*) post
33 | outreg2 using soep_female_labor_res.tex, replace ctitle("Probit") keep(`control1' `fixed')
34 | logit employment `control1' `fixed' `yeardummy'
35 | margins, dydx(*) post
36 | outreg2 using soep_female_labor_res.tex, append ctitle("Logit") keep(`control1' `fixed')
37 | xtprobit employment `control1' `fixed' `yeardummy'
38 | margins, dydx(*) post
39 | outreg2 using soep_female_labor_res.tex, append ctitle("Random Effects Probit") keep(`control1' `fixed')
40 | xtprobit employment `control1' `controlmean1' `fixed'  `yeardummy'
41 | margins, dydx(*) post
42 | outreg2 using soep_female_labor_res.tex, append ctitle("Chamberlain Probit") keep(`control1' `fixed' MaxChld6)
43 | xtlogit employment `control2'  `yeardummy', fe
44 | margins, dydx(*) post
45 | outreg2 using soep_female_labor_res.tex, append ctitle("Fixed Effects Logit") keep(`control2')
46 | 


--------------------------------------------------------------------------------
/code_in_notes/panel_dynamic.do:
--------------------------------------------------------------------------------
 1 | clear
 2 | set more off
 3 | set matsize 10000
 4 | use datasets/DDCGdata_final
 5 | // 国家代码
 6 | egen ccode=group(wbcode)
 7 | // 设置面板结构
 8 | xtset ccode year
 9 | // 固定效应回归
10 | xtreg y l.y dem i.year, fe cluster(ccode)
11 | xtreg y l(1/2).y dem i.year, fe cluster(wbcode2)
12 | xtreg y l(1/4).y dem i.year, fe cluster(wbcode2)
13 | // 动态面板，差分GMM
14 | xtabond2 y l.y dem i.year,/*
15 | 	*/gmmstyle(y, laglimits(2 .)) gmmstyle(dem, laglimits(1 .)) ivstyle(i.year, p)/*
16 | 	*/ noleveleq robust nodiffsargan
17 | xtabond2 y l(1/2).y dem i.year,/*
18 | 	*/gmmstyle(y, laglimits(2 .)) gmmstyle(dem, laglimits(1 .)) ivstyle(i.year, p)/*
19 | 	*/ noleveleq robust nodiffsargan
20 | xtabond2 y l(1/4).y dem i.year,/*
21 | 	*/gmmstyle(y, laglimits(2 .)) gmmstyle(dem, laglimits(1 .)) ivstyle(i.year, p)/*
22 | 	*/ noleveleq robust nodiffsargan
23 | // 动态面板，系统GMM
24 | xtabond2 y l.y dem i.year,/*
25 | 	*/gmmstyle(y, laglimits(2 4)) gmmstyle(dem, laglimits(1 3)) ivstyle(i.year)/*
26 | 	*/ robust nodiffsargan
27 | xtabond2 y l(1/2).y dem i.year,/*
28 | 	*/gmmstyle(y, laglimits(2 4)) gmmstyle(dem, laglimits(1 3)) ivstyle(i.year)/*
29 | 	*/ robust nodiffsargan
30 | xtabond2 y l(1/4).y dem i.year,/*
31 | 	*/gmmstyle(y, laglimits(2 4)) gmmstyle(dem, laglimits(1 3)) ivstyle(i.year)/*
32 | 	*/ robust nodiffsargan
33 | 


--------------------------------------------------------------------------------
/code_in_notes/poisson.do:
--------------------------------------------------------------------------------
 1 | // poisson.do
 2 | clear
 3 | set more off
 4 | use "C:\Users\sijic\Working\Econometrics\EconometricsCode\code_in_notes\datasets\CivilConflict.dta"
 5 | collapse (sum) any_prio (mean)lpop gdp_g y_0 gini aid_capita,by(ccode)
 6 | 
 7 | local control "lpop gdp_g y_0 gini aid_capita"
 8 | poisson any_prio `control'
 9 | nbreg any_prio `control'
10 | zip any_prio `control', inflate(_cons)
11 | zip, irr
12 | 


--------------------------------------------------------------------------------
/code_in_notes/propensity_score.do:
--------------------------------------------------------------------------------
 1 | // propensity_score.do
 2 | clear
 3 | set more off
 4 | 
 5 | // 实验数据
 6 | use datasets/Lalonde_nsw/nsw.dta
 7 | gen age2=age^2
 8 | local controls "age-nodegree age2"
 9 | //Logit估计
10 | logit treat `controls'
11 | predict ps_1
12 | //查看common support
13 | bysort treat: su ps_1
14 | //Logit估计 包含事前的y
15 | bysort treat: su re75
16 | logit treat `controls' re75
17 | //事后比较
18 | bysort treat: su re78
19 | reg re78 treat, r
20 | 
21 | 
22 | // 包含非实验数据
23 | append using datasets/Lalonde_nsw/cps_controls.dta
24 | replace age2=age^2
25 | //Logit估计
26 | logit treat `controls'
27 | predict ps_2
28 | //多项式Logit估计
29 | local polynomial_control "`controls'"
30 | foreach v of varlist `controls'{
31 | 	foreach w of varlist `controls'{
32 | 		gen _`v'_`w'=`v'*`w'
33 | 		local polynomial_control "`polynomial_control' _`v'_`w'"
34 | 	}
35 | }
36 | logit treat `polynomial_control'
37 | predict ps_3
38 | //使用Lasso选择变量
39 | local std_polynomial_control ""
40 | foreach v of varlist `polynomial_control'{
41 | 	egen std_`v'=std(`v')
42 | 	su std_`v', mean
43 | 	if r(N)>0 {
44 | 		local std_polynomial_control "`std_polynomial_control' std_`v'"
45 | 	}
46 | }
47 | lasso logit treat `controls', grid(5)
48 | local post_vars=e(allvars_sel)
49 | di "`post_vars'"
50 | //post-estimation
51 | logit treat e(`post_vars')
52 | predict ps_4
53 | //matching
54 | teffects psmatch (re78) (treat `post_vars')
55 | //使用指定的propensity score
56 | gen trans_ps2=log(ps_2/(1-ps_2))
57 | bysort treat: su ps_2
58 | teffects psmatch (re78) (treat trans_ps2), atet
59 | //后续：检查common support等
60 | 


--------------------------------------------------------------------------------
/code_in_notes/qreg_consump.do:
--------------------------------------------------------------------------------
 1 | // file: qreg_consump.do
 2 | use datasets/cfps_family_econ, clear
 3 | drop if expense<=0
 4 | drop if fincome1<=0
 5 | gen log_consump=log(expense)
 6 | gen log_income=log(fincome1)
 7 | // quantile regression, one variate
 8 | qreg log_consump log_income, q(0.25)
 9 | predict p_log_consump25
10 | label variable p_log_consump25 "25% quantile"
11 | qreg log_consump log_income, q(0.50)
12 | predict p_log_consump50
13 | label variable p_log_consump50 "50% quantile"
14 | qreg log_consump log_income, q(0.75)
15 | predict p_log_consump75
16 | label variable p_log_consump75 "75% quantile"
17 | // graph
18 | sort log_income
19 | twoway (scatter log_consump     log_income)/*
20 | 	*/ (line    p_log_consump25 log_income)/*
21 | 	*/ (line    p_log_consump50 log_income)/*
22 | 	*/ (line    p_log_consump75 log_income)
23 | // comparison
24 | sqreg log_consump log_income i.provcd14, q(0.25 0.5 0.75)
25 | test [q25]log_income=[q50]log_income=[q75]log_income
26 | 


--------------------------------------------------------------------------------
/code_in_notes/qreg_outlier.do:
--------------------------------------------------------------------------------
 1 | // file: qreg_outlier.do
 2 | clear
 3 | set more off
 4 | set obs 50
 5 | // generate vars
 6 | gen x=rnormal()
 7 | gen u=rnormal()
 8 | // y
 9 | gen y=1+x+u
10 | // 产生一个outlier
11 | sort x
12 | replace y=30 if _n==1
13 | // qreg
14 | reg y x
15 | predict p_y_reg
16 | label variable p_y_reg "OLS"
17 | qreg y x , q(0.50)
18 | predict p_y_qreg
19 | label variable p_y_qreg "QREg"
20 | // graph
21 | twoway (scatter y        x)/*
22 | 	*/ (line    p_y_reg  x)/*
23 | 	*/ (line    p_y_qreg x)
24 | graph export qreg_outlier.pdf, replace
25 | 


--------------------------------------------------------------------------------
/code_in_notes/qreg_simulate.do:
--------------------------------------------------------------------------------
 1 | // file: qreg_simulate.do
 2 | clear
 3 | set more off
 4 | set obs 1000
 5 | // generate vars
 6 | gen x=rnormal()
 7 | gen u=rnormal()
 8 | // u的分位数
 9 | gen q=normal(u)
10 | // 系数：b(0.25)=1.25，b(0.5)=1.5
11 | gen b=1+q
12 | // y
13 | gen y=1+b*x+u
14 | // qreg
15 | qreg y x , q(0.25)
16 | qreg y x , q(0.50)
17 | qreg y x , q(0.75)
18 | 


--------------------------------------------------------------------------------
/code_in_notes/qreg_with_dummy.do:
--------------------------------------------------------------------------------
1 | // file: qreg_with_dummy.do
2 | use datasets/cfps_adult, clear
3 | keep cfps_gender p_income
4 | drop if p_income<0
5 | bysort cfps_gender: su p_income, de
6 | qreg p_income cfps_gender, q(0.5)
7 | qreg p_income cfps_gender, q(0.75)
8 | qreg p_income cfps_gender, q(0.9)
9 | 


--------------------------------------------------------------------------------
/code_in_notes/reg_one_variate.do:
--------------------------------------------------------------------------------
 1 | // file: reg_one_variate.do
 2 | use datasets/cfps_adult, clear
 3 | drop if qp102<0
 4 | drop if qp101<130
 5 | reg qp102 qp101
 6 | outreg2 using reg_one_variate.tex, replace
 7 | predict p_weight
 8 | label variable p_weight "预测的体重"
 9 | sort qp101
10 | twoway (scatter qp102 qp101 if mod(_n,30)==20)/*
11 |      */(line p_weight qp101 if qp101>130 & qp101<200),/*
12 |      */ xscale(range(130 200))
13 | graph export reg_one_variate.pdf, replace
14 | 


--------------------------------------------------------------------------------
/code_in_notes/reg_readings.do:
--------------------------------------------------------------------------------
 1 | // file: reg_readings.do
 2 | use datasets/cfps_adult, clear
 3 | gen log_income=log(p_income+1)
 4 | drop if te4<0
 5 | drop if qq1101<0
 6 | reg log_income qq1101
 7 | outreg2 using reg_readings.tex, replace /*
 8 | */ addt(学历固定效应, "No", 省份固定效应, "No",/*
 9 | */ 学历×省份固定效应, "No")
10 | reg log_income qq1101 i.te4
11 | outreg2 using reg_readings.tex, append keep(qq1101) /*
12 | */ addt(学历固定效应, "Yes", 省份固定效应, "No",/*
13 | */ 学历×省份固定效应, "No")
14 | reghdfe log_income qq1101, absorb(i.te4 i.provcd14)
15 | outreg2 using reg_readings.tex, append /*
16 | */ addt(学历固定效应, "Yes", 省份固定效应, "Yes",/*
17 | */ 学历×省份固定效应, "No")
18 | reghdfe log_income qq1101, absorb(i.te4#i.provcd14)
19 | outreg2 using reg_readings.tex, append /*
20 | */ addt(学历固定效应, "No", 省份固定效应, "No",/*
21 | */ 学历×省份固定效应, "Yes")
22 | 


--------------------------------------------------------------------------------
/code_in_notes/reg_small_b.do:
--------------------------------------------------------------------------------
 1 | ** 设置样本容量与自由度
 2 | local sample_size=4
 3 | local d_of_f=`sample_size'-2
 4 | local replications=1000
 5 | 
 6 | cap program drop random_b_reg
 7 | program define random_b_reg, rclass
 8 | 	version 12
 9 | 	syntax [,obs(integer 4) random(real 0)]
10 | 	drop _all
11 | 	set obs `obs'
12 | 	tempvar x y u
13 | 	gen `x'=_n+(rnormal()^2-1)/sqrt(2)*`random'
14 | 	gen `u'=rnormal()
15 | 	gen `y'=`x'+`u'
16 | 	reg `y' `x'
17 | 	return scalar b=(_b[`x']-1)
18 | 	return scalar std_b=(_b[`x']-1)/_se[`x']
19 | end
20 | 
21 | simulate b=r(b) std_b=r(std_b), reps(`replications'): random_b_reg, obs(`sample_size') random(1.5)
22 | 
23 | // dist. of b
24 | quietly: su b
25 | replace b=(b-r(mean))/r(sd)
26 | sort b
27 | gen b_empirical=_n/_N
28 | gen dist_normal=normal(b)
29 | label variable dist_normal "Normal Distribution"
30 | twoway (line b_empirical b) ///
31 |         (line dist_normal b) ///
32 |         , graphr(fcolor(white) color(white))  xtitle("") ///
33 |         saving(reg_small_b_random, replace)
34 | 
35 | 
36 | // dist. of standarized b
37 | sort std_b
38 | gen std_b_empirical=_n/_N
39 | gen t_den=t(`d_of_f',std_b)
40 | label variable t_den "t(`d_of_f') Distribution"
41 | twoway (line std_b_empirical std_b) ///
42 |         (line t_den std_b) ///
43 |         , graphr(fcolor(white) color(white))  xtitle("") ///
44 |         saving(reg_small_stdb_random, replace)
45 | 
46 | 
47 | simulate b=r(b) std_b=r(std_b), reps(`replications'): random_b_reg, obs(`sample_size') random(0)
48 | // dist. of b
49 | quietly: su b
50 | replace b=(b-r(mean))/r(sd)
51 | sort b
52 | gen b_empirical=_n/_N
53 | gen dist_normal=normal(b)
54 | label variable dist_normal "Normal Distribution"
55 | twoway (line b_empirical b) ///
56 |         (line dist_normal b) ///
57 |         , graphr(fcolor(white) color(white))  xtitle("") ///
58 |         saving(reg_small_b_fixed, replace)
59 | 
60 | 
61 | // dist. of standarized b
62 | sort std_b
63 | gen std_b_empirical=_n/_N
64 | gen t_den=t(`d_of_f',std_b)
65 | label variable t_den "t(`d_of_f') Distribution"
66 | twoway (line std_b_empirical std_b) ///
67 |         (line t_den std_b) ///
68 |         , graphr(fcolor(white) color(white))  xtitle("") ///
69 |         saving(reg_small_stdb_fixed, replace)
70 | 
71 | 
72 | graph combine reg_small_b_random.gph reg_small_stdb_random.gph ///
73 |         reg_small_b_fixed.gph reg_small_stdb_fixed.gph, ///
74 |         graphr(fcolor(white) color(white)) col(2)
75 | graph export reg_small.eps, replace
76 | 


--------------------------------------------------------------------------------
/code_in_notes/reg_with_dummies.do:
--------------------------------------------------------------------------------
 1 | // file: reg_with_dummies.do
 2 | use datasets/cfps_adult, clear
 3 | drop if p_income<0
 4 | drop if te4<0
 5 | tab te4, gen(edu)
 6 | reg p_income edu*
 7 | outreg2 using reg_with_dummies.tex, replace
 8 | reg p_income edu*, noconstant
 9 | outreg2 using reg_with_dummies.tex, append
10 | 


--------------------------------------------------------------------------------
/code_in_notes/reg_with_dummy.do:
--------------------------------------------------------------------------------
1 | // file: reg_with_dummy.do
2 | use datasets/cfps_adult, clear
3 | keep cfps_gender p_income
4 | drop if p_income<0
5 | bysort cfps_gender: outreg2 using reg_with_dummy_su.tex,/*
6 | 	*/replace sum(log) eqkeep(N mean) keep(p_income)
7 | reg p_income cfps_gender
8 | outreg2 using reg_with_dummy.tex, replace
9 | 


--------------------------------------------------------------------------------
/code_in_notes/simpson_paradox.do:
--------------------------------------------------------------------------------
 1 | // file: simpson_paradox.do
 2 | clear
 3 | set obs 1000
 4 | gen gender=runiform()<0.5
 5 | gen     exer=runiform()<0.8 if gender==1
 6 | replace exer=runiform()<0.3 if gender==0
 7 | gen y=80-10*gender+3*exer+rnormal()
 8 | reg y exer
 9 | outreg2 using simpson_paradox.tex, replace
10 | reg y exer if gender==1
11 | outreg2 using simpson_paradox.tex, append
12 | reg y exer if gender==0
13 | outreg2 using simpson_paradox.tex, append
14 | reg y exer gender
15 | outreg2 using simpson_paradox.tex, append
16 | 


--------------------------------------------------------------------------------
/code_in_notes/test_jointly_ntv.do:
--------------------------------------------------------------------------------
 1 | // test_jointly_ntv.do
 2 | clear
 3 | set more off
 4 | use datasets/NTV_Aggregate_Data_reshaped.dta
 5 | xtset tik_id year
 6 | // 不进行控制进行检验
 7 | reghdfe NTV L4.Votes_NDR_ L4.Votes_SPS_ L4.Votes_Yabloko_ L4.Votes_KPRF_ L4.Votes_LDPR_ L4.Turnout if year==1999, absorb(region) 
 8 | test L4.Votes_NDR_ L4.Votes_SPS_ L4.Votes_Yabloko_ L4.Votes_KPRF_ L4.Votes_LDPR_ L4.Turnout
 9 | // 控制社会经济变量以及人口、工资的多项式
10 | forvalues i=1/5{
11 |     gen log_pop_`i'=logpop^`i'
12 |     gen log_wage_`i'=log_wage^`i'
13 | }
14 | local E_controls "L.log_pop_* L.log_wage_* L.nurses L.doctors_pc"
15 | reghdfe NTV L4.Votes_NDR_ L4.Votes_SPS_ L4.Votes_Yabloko_ L4.Votes_KPRF_ L4.Votes_LDPR_ L4.Turnout `E_controls' if year==1999, absorb(region)
16 | test L4.Votes_NDR_ L4.Votes_SPS_ L4.Votes_Yabloko_ L4.Votes_KPRF_ L4.Votes_LDPR_ L4.Turnout
17 | 


--------------------------------------------------------------------------------
/code_in_notes/wald_ols.do:
--------------------------------------------------------------------------------
 1 | // file: wald_ols.do
 2 | set seed 19880505
 3 | cap program drop dgp
 4 | program define dgp, rclass
 5 | 	syntax [, obs(integer 1000) b(real -1.0)]
 6 | 	drop _all
 7 | 	set obs `obs'
 8 | 	tempvar x y u
 9 | 	tempname vkk V test_stat bk p
10 | 	gen `x'=rnormal()
11 | 	gen `u'=rnormal()^3
12 | 	gen `y'=`b'*`x'+`u'
13 | 	reg `y' `x' , robust
14 | 	local `bk'=_b[`x']
15 | 	mat `V'=e(V)
16 | 	local `vkk'=`V'[rownumb(`V',"`x'"),colnumb(`V',"`x'")]
17 | 	local `test_stat'=((``bk'')^2-1)^2/(4*(``bk'')^2*``vkk'')
18 | 	local `p'=1-chi2(1,``test_stat'')
19 | 	if ``p''<0.05{
20 | 		return scalar rejected=1
21 | 	}
22 | 	else{
23 | 		return scalar rejected=0
24 | 	}
25 | 	return scalar teststat=``test_stat''
26 | end
27 | // simulate
28 | simulate rejected=r(rejected) teststat=r(teststat)/*
29 |         */ ,reps(2000):dgp
30 | su
31 | hist teststat
32 | simulate rejected=r(rejected) teststat=r(teststat)/*
33 |         */ ,reps(2000):dgp, b(0)
34 | su
35 | 


--------------------------------------------------------------------------------
/code_in_notes/weighted_ols.do:
--------------------------------------------------------------------------------
 1 | // weighted_ols.do
 2 | clear
 3 | set more off
 4 | // 生成数据
 5 | set seed 19880505
 6 | set obs 300
 7 | gen x=2*runiform()
 8 | gen y=exp(sin(x^3))+rnormal()
 9 | gen y_true=exp(sin(x^3))
10 | label variable y_true "True function"
11 | //回归
12 | gen x2=x^2
13 | gen x3=x^3
14 | gen x4=x^4
15 | reg y x*
16 | predict yhat_unweighted
17 | label variable yhat_unweighted "Unweighted"
18 | //加权回归
19 | gen w=normalden(2*x)
20 | reg y x* [iw=w]
21 | predict yhat_weighted
22 | label variable yhat_weighted "Weighted"
23 | //画图
24 | sort x
25 | twoway (scatter y x) /*
26 |     */(line yhat_unweighted x) /*
27 | 	*/(line y_true x) /*
28 |     */(line yhat_weighted x) 
29 | graph export wls.pdf, replace
30 | 


--------------------------------------------------------------------------------
/code_in_notes/white_hetero.do:
--------------------------------------------------------------------------------
 1 | // file: white_hetero.do
 2 | cap program drop dgp
 3 | program define dgp, rclass
 4 | 	syntax [, obs(integer 100) robust b(real 0)]
 5 | 	drop _all
 6 | 	set obs `obs'
 7 | 	tempvar x y sigma
 8 | 	gen `x'=rnormal()
 9 | 	gen `sigma'=sqrt(`x'^2)
10 | 	gen `y'=`b'*`x'+`sigma'*rnormal()
11 | 	quietly: reg `y' `x', `robust'
12 | 	return scalar b=_b[`x']
13 | 	return scalar se=_se[`x']
14 | 	if abs(_b[`x']/_se[`x'])>=1.96 {
15 | 		return scalar rejected=1
16 | 	}
17 | 	else {
18 | 		return scalar rejected=0
19 | 	}
20 | end
21 | 
22 | simulate rejected=r(rejected) b=r(b) se=r(se)/*
23 |         */ ,reps(1000):dgp
24 | su
25 | simulate rejected=r(rejected) b=r(b) se=r(se)/*
26 |         */,reps(1000):dgp, robust
27 | su
28 | 


--------------------------------------------------------------------------------