├── .gitignore ├── LICENSE ├── README.md └── code_in_notes ├── CivilConflict.do ├── DID_disease.do ├── DID_divorce.do ├── DID_dynamic_trend.do ├── HCW.do ├── RD_r_and_d.do ├── Run_ALL.do ├── chow_test.do ├── cross_validation_reg.do ├── datasets ├── CivilConflict.dta ├── DDCGdata_final.dta ├── Divorce-Wolfers-AER.dta ├── Lalonde_nsw │ ├── cps_controls.dta │ ├── cps_controls2.dta │ ├── cps_controls3.dta │ ├── nsw.dta │ ├── nsw_dw.dta │ ├── psid_controls.dta │ └── psid_controls3.dta ├── NTV_Aggregate_Data_reshaped.dta ├── OHIE_QJE.dta ├── OHIE_Science.dta ├── Questionarie.pdf ├── angrist2002aer.dta ├── bronzini_r_and_d.dta ├── cfps_adult.dta ├── cfps_family_econ.dta ├── chfs_ind.dta ├── citydata.dta ├── disease_gender_gap.dta ├── hcw.dta ├── shannxi_export.dta ├── soep_female_labor.dta └── voting_cnty_clean.dta ├── fixed_effects_ntv.do ├── hcw_lasso.do ├── heteroscedasticity.do ├── ipw_doubly_robust.do ├── lasso_and_ridge_shrinkage.do ├── linear_panel.do ├── linear_panel_fd_np.do ├── linear_panel_fe_ntv.do ├── logit_and_roc.do ├── matching.do ├── matching_nonexp.do ├── matching_psm.do ├── mlogit_employ.do ├── model_selection.do ├── multi_collinearity.do ├── nls_mpc.do ├── ohie_qje.do ├── ohie_qje_joint_test.do ├── ohie_qje_test_one.do ├── ohie_science.do ├── oneway_anova.do ├── panel_binary.do ├── panel_dynamic.do ├── poisson.do ├── propensity_score.do ├── qreg_consump.do ├── qreg_outlier.do ├── qreg_simulate.do ├── qreg_with_dummy.do ├── reg_one_variate.do ├── reg_readings.do ├── reg_small_b.do ├── reg_with_dummies.do ├── reg_with_dummy.do ├── simpson_paradox.do ├── test_jointly_ntv.do ├── wald_ols.do ├── weighted_ols.do └── white_hetero.do /.gitignore: -------------------------------------------------------------------------------- 1 | *.tex 2 | *.txt 3 | *.png 4 | *.eps 5 | .DS_* 6 | *.pdf 7 | *.doc 8 | *.gph 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Jichun Si 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 注意:本Repo已经被放弃,新的Repo在:https://github.com/sijichun/EconometricsCode 如有需要请移步。 2 | -------------------------------------------------------------------------------- /code_in_notes/CivilConflict.do: -------------------------------------------------------------------------------- 1 | // CivilConflict.do 2 | clear 3 | set more off 4 | 5 | use datasets/CivilConflict 6 | 7 | // 最简单情况:一个内生变量一个工具变量 8 | // 内生变量:GDP增长率 gdp_g 9 | // 工具变量:降水增长率 GPCP_g 10 | // 第一阶段,first stage 11 | reg gdp_g GPCP_g i.ccode i.ccode#c.year 12 | predict gdp_g_pred 13 | // reduced-form 14 | reg any_prio GPCP_g i.ccode i.ccode#c.year, cl(ccode) 15 | // 错误做法:手动两阶段最小二乘 16 | reg any_prio gdp_g_pred i.ccode i.ccode#c.year,cl(ccode) 17 | // 两阶段最小二乘,三种命令 18 | ivregress 2sls any_prio (gdp_g = GPCP_g) /* 19 | */i.ccode i.ccode#c.year, cl(ccode) 20 | ivreghdfe any_prio (gdp_g = GPCP_g),/* 21 | */ absorb(i.ccode i.ccode#c.year) cl(ccode) 22 | ivreg2 any_prio (gdp_g = GPCP_g) /* 23 | */i.ccode i.ccode#c.year, cl(ccode) 24 | 25 | // 两个内生变量,两个工具变量 26 | // 内生变量:GDP增长率 gdp_g、GDP增长率之后gdp_g_l 27 | // 工具变量:降水增长率 GPCP_g、降水增长率滞后 GPCP_g_l 28 | // 第一阶段,first stage 29 | drop gdp_g_pred 30 | reg gdp_g GPCP_g GPCP_g_l i.ccode i.ccode#c.year 31 | predict gdp_g_pred 32 | reg gdp_g_l GPCP_g GPCP_g_l i.ccode i.ccode#c.year 33 | predict gdp_g_l_pred 34 | // 两阶段最小二乘,三种命令 35 | ivregress 2sls any_prio (gdp_g gdp_g_l = GPCP_g GPCP_g_l ) /* 36 | */i.ccode i.ccode#c.year, cl(ccode) 37 | ivreghdfe any_prio (gdp_g gdp_g_l = GPCP_g GPCP_g_l ),/* 38 | */ absorb(i.ccode i.ccode#c.year) cl(ccode) 39 | ivreg2 any_prio (gdp_g gdp_g_l = GPCP_g GPCP_g_l ) /* 40 | */i.ccode i.ccode#c.year, cl(ccode) 41 | // 两阶段最小二乘,多余的(四个)工具变量 42 | ivreghdfe any_prio /* 43 | */(gdp_g gdp_g_l =GPCP_g GPCP_g_l NCEP_g NCEP_g_l),/* 44 | */ absorb(i.ccode i.ccode#c.year) cl(ccode) 45 | ** 有限信息极大似然 46 | ivreg2 any_prio (gdp_g gdp_g_l =GPCP_g GPCP_g_l )/* 47 | */ i.ccode i.ccode#c.year, cl(ccode) liml 48 | -------------------------------------------------------------------------------- /code_in_notes/DID_disease.do: -------------------------------------------------------------------------------- 1 | clear 2 | set more off 3 | use disease_gender_gap 4 | ***** 5 | ** 变量说明 6 | * year:调查的轮数 7 | * district:区 8 | * edu_years:教育年限 9 | * year_birth:出生年龄,1960-1992 **注,流行性脑膜炎发生在1986 10 | ** 作者选取的cohort: 在1986年时0-5岁(不上学),6-12岁(小学),13-20岁(中学) 11 | ** 相应的年龄为1981-1986、1974-1980、1966-1973 12 | gen cohort1=(year_birth>=1981 & year_birth<=1986) 13 | gen cohort2=(year_birth>=1974 & year_birth<=1980) 14 | gen cohort3=(year_birth>=1966 & year_birth<=1973) 15 | ** 交叉项 16 | gen co1menin=cohort1*menin_avg 17 | gen co2menin=cohort2*menin_avg 18 | gen co3menin=cohort3*menin_avg 19 | 20 | gen co1menin_female=cohort1*female*menin_avg 21 | gen co2menin_female=cohort2*female*menin_avg 22 | gen co3menin_female=cohort3*female*menin_avg 23 | ** district 为字符串,需要变成数字才能用i.district 24 | gen n=_n 25 | egen district_id=min(n), by(district) 26 | ** 第一个回归,才是实质上的DID,传统的时间其实是cohort,group变量是menin,三个cohort都是处理组,有两个未处理组 27 | reghdfe edu_years co1menin co2menin co3menin female `control', absorb(i.year i.year_birth i.district_id) cl(district_id) 28 | ** 第二个回归,其实是比较了不同性别的treatment effects 29 | reghdfe edu_years co1menin_female co2menin_female co3menin_female co1menin co2menin co3menin female `control', absorb(i.year i.year_birth i.district_id) cl(district_id) 30 | ******问题: 31 | ** 为什么不控制个体的固定效应?i.indvidual_id? 32 | 33 | ** 如果不看1986以后出生的?依然是稳健的 34 | preserve 35 | reghdfe edu_years co1menin co2menin co3menin female `control', absorb(i.year i.year_birth i.district_id) cl(district_id) 36 | reghdfe edu_years co1menin_female co2menin_female co3menin_female co1menin co2menin co3menin female `control', absorb(i.year i.year_birth i.district_id) cl(district_id) 37 | drop if year_birth>1986 38 | restore 39 | 40 | ** 如果考虑截面相关,比如同一年龄段的peer effects,该cluster什么? 41 | reghdfe edu_years co1menin co2menin co3menin female `control', absorb(i.year i.year_birth i.district_id) cl(district_id year_birth) 42 | reghdfe edu_years co1menin_female co2menin_female co3menin_female co1menin co2menin co3menin female `control', absorb(i.year i.year_birth i.district_id) cl(district_id year_birth) 43 | 44 | ** 如果继续严格控制固定效应 45 | reghdfe edu_years co1menin co2menin co3menin female `control', absorb(i.year i.year_birth#i.district_id) cl(district_id year_birth) 46 | reghdfe edu_years co1menin_female co2menin_female co3menin_female co1menin co2menin co3menin female `control', absorb(i.year i.year_birth#i.district_id) cl(district_id year_birth) 47 | * 问题:第一个回归方程能估计出来吗? 48 | * 问题:第二个回归方程识别出来的是什么? 49 | -------------------------------------------------------------------------------- /code_in_notes/DID_divorce.do: -------------------------------------------------------------------------------- 1 | clear 2 | set more off 3 | use "datasets/Divorce-Wolfers-AER.dta" 4 | ** state dummies 5 | egen state=group(st) 6 | ** panel setting 7 | xtset state year 8 | gen year2=year^2 9 | ** benchmark 10 | reghdfe div_rate unilateral divx* if year>1967 & year<1989 [w=stpop], absorb(i.state i.year) cl(state) 11 | ** state-specific trend 12 | reghdfe div_rate unilateral divx* if year>1967 & year<1989 [w=stpop], absorb(i.state i.year i.state#c.year) cl(state) 13 | reghdfe div_rate unilateral divx* if year>1967 & year<1989 [w=stpop], absorb(i.state i.year i.state#c.year i.state#c.year2) cl(state) 14 | 15 | ** common trend 16 | 17 | gen delta_year=lfdivlaw-year 18 | 19 | gen L12unilateral = delta_year>=12 20 | gen L11unilateral = delta_year==11 21 | gen L10unilateral = delta_year==10 22 | gen L9unilateral = delta_year==9 23 | gen L8unilateral = delta_year==8 24 | gen L7unilateral = delta_year==7 25 | gen L6unilateral = delta_year==6 26 | gen L5unilateral = delta_year==5 27 | gen L4unilateral = delta_year==4 28 | gen L3unilateral = delta_year==3 29 | gen L2unilateral = delta_year==2 30 | gen unilateral0 = delta_year==0 31 | gen F1unilateral = delta_year==-1 32 | gen F2unilateral = delta_year==-2 33 | gen F3unilateral = delta_year==-3 34 | gen F4unilateral = delta_year==-4 35 | gen F5unilateral = delta_year==-5 36 | gen F6unilateral = delta_year==-6 37 | gen F7unilateral = delta_year==-7 38 | gen F8unilateral = delta_year==-8 39 | gen F9unilateral = delta_year==-9 40 | gen F10unilateral = delta_year==-10 41 | gen F11unilateral = delta_year==-11 42 | gen F12unilateral= delta_year==-12 43 | gen F13unilateral = delta_year==-13 44 | gen F14unilateral= delta_year==-14 45 | gen F15unilateral = delta_year==-15 46 | gen F16unilateral = delta_year==-16 47 | gen F17unilateral = delta_year==-17 48 | gen F18unilateral = delta_year==-18 49 | gen F19unilateral= delta_year==-19 50 | gen F20unilateral = delta_year<=-20 51 | reghdfe div_rate L12unilateral - F20unilateral divx* if year>1967 & year<1989 [w=stpop], absorb(i.state i.year) 52 | coefplot, keep(*unilateral*) vert xline(12) 53 | ** dynamic effects 54 | reghdfe div_rate unilateral0 - F20unilateral divx* if year>1967 & year<1989 [w=stpop], absorb(i.state i.year) cl(state) 55 | -------------------------------------------------------------------------------- /code_in_notes/DID_dynamic_trend.do: -------------------------------------------------------------------------------- 1 | clear 2 | set more off 3 | 4 | ** 观测数,共15期 5 | set obs 15 6 | ** 产生时间以及政策变量 7 | gen t=_n // 时间 8 | gen t2=t^2 9 | gen d=t>5 //第6期开始有政策发生 10 | ************************无动态效应******************************* 11 | ** 政策之前的y 12 | gen y=t+30+0.3*rnormal() if d==0 13 | gen true_trend=t+30 14 | label variable true_trend "真实的趋势" 15 | ** 政策之后的y,政策效应为20 16 | replace y=t+50+0.3*rnormal() if d==1 17 | ** 不控制时间趋势 18 | reg y d 19 | predict y0 20 | replace y0=y0-_b[d] if d==1 21 | label variable y0 "无时间趋势" 22 | ** 使用一次函数控制时间趋势 23 | reg y d t 24 | predict y1 25 | replace y1=y1-_b[d] if d==1 26 | label variable y1 "线性时间趋势" 27 | ** 使用二次函数控制时间趋势 28 | reg y d t t2 29 | predict y2 30 | replace y2=y2-_b[d] if d==1 31 | label variable y2 "二次时间趋势" 32 | twoway (connected y0 t, msymbol(triangle) msize(small))/* 33 | */ (connected y1 t, msymbol(diamond) msize(small))/* 34 | */ (connected y2 t, msymbol(square) msize(small))/* 35 | */ (line true_trend t, lpattern(dash))/* 36 | */ (scatter y t, msymbol(circle))/* 37 | */ , title("无动态效应") legend(rows(2)) scheme(s1mono) 38 | graph export DID_dynamic_trend_linear.pdf, replace 39 | graph export DID_dynamic_trend_linear.png, replace 40 | ************************具有动态效应******************************* 41 | keep t t2 d 42 | ** 政策之前的y 43 | gen y=t+30+0.3*rnormal() if d==0 44 | gen true_trend=t+30 45 | label variable true_trend "真实的趋势" 46 | ** 政策之后的y,第8期之前上升,之后下降 47 | replace y=-1*(t-8)^2+50+0.3*rnormal() if d==1 48 | ** 不控制时间趋势 49 | reg y d 50 | predict y0 51 | replace y0=y0-_b[d] if d==1 52 | label variable y0 "无时间趋势" 53 | ** 使用一次函数控制时间趋势 54 | reg y d t 55 | predict y1 56 | replace y1=y1-_b[d] if d==1 57 | label variable y1 "线性时间趋势" 58 | ** 使用二次函数控制时间趋势 59 | reg y d t t2 60 | predict y2 61 | replace y2=y2-_b[d] if d==1 62 | label variable y2 "二次时间趋势" 63 | twoway (connected y0 t, msymbol(triangle) msize(small))/* 64 | */ (connected y1 t, msymbol(diamond) msize(small))/* 65 | */ (connected y2 t, msymbol(square) msize(small))/* 66 | */ (line true_trend t, lpattern(dash))/* 67 | */ (scatter y t, msymbol(circle))/* 68 | */ , title("动态效应") legend(rows(2)) scheme(s1mono) 69 | graph export DID_dynamic_trend_dynamic.pdf, replace 70 | graph export DID_dynamic_trend_dynamic.png, replace 71 | -------------------------------------------------------------------------------- /code_in_notes/HCW.do: -------------------------------------------------------------------------------- 1 | clear 2 | use datasets/HCW.dta,clear 3 | tsset time 4 | gen n=_n 5 | gen pretreat=(n<18) 6 | reg HongKong Japan Korea Philippines Taiwan if pretreat 7 | predict counterfactual 8 | gen treatment = HongKong - counterfactual 9 | tsline HongKong counterfactual,lp(solid dash) xline(150) yline(0) 10 | tsline treatment,xline(150) yline(0) 11 | -------------------------------------------------------------------------------- /code_in_notes/RD_r_and_d.do: -------------------------------------------------------------------------------- 1 | clear 2 | set more off 3 | use datasets/bronzini_r_and_d.dta 4 | 5 | gen s=score-75 6 | gen treat=s>0 7 | gen s2=s^2 8 | gen s3=s^3 9 | 10 | //左边 11 | gen ls=s*(1-treat) 12 | gen ls2=ls^2 13 | gen ls3=ls^3 14 | //右边 15 | gen rs=s*treat 16 | gen rs2=rs^2 17 | gen rs3=rs^3 18 | 19 | //全样本,多项式,不要使用 20 | reg INVSALES treat ls rs, cluster(score) 21 | reg INVSALES treat ls ls2 rs rs2, cluster(score) 22 | reg INVSALES treat ls ls2 ls3 rs rs2 rs3, cluster(score) 23 | // 分数为10的窗宽 24 | reg INVSALES treat ls rs if s<10 & s>-10, cluster(score) 25 | reg INVSALES treat ls ls2 rs rs2 if s<10 & s>-10, cluster(score) 26 | reg INVSALES treat ls ls2 ls3 rs rs2 rs3 if s<10 & s>-10, cluster(score) 27 | // rdrobust 28 | rdrobust INVSALES s, c(0) h(10) 29 | rdrobust INVSALES s, c(0) 30 | 31 | // 画图 32 | reg INVSALES treat ls ls2 ls3 if s<0 & s>-10 33 | predict l_pred_INVSALES 34 | reg INVSALES treat rs rs2 rs3 if s>=0 & s<10 35 | predict r_pred_INVSALES 36 | sort s 37 | twoway (scatter INVSALES s if s<10 & s>-10)/* 38 | */ (line l_pred_INVSALES s if s<=0 & s>-10)/* 39 | */ (line r_pred_INVSALES s if s>=0 & s<10) 40 | // manipulation检验 41 | rddensity s, c(0) plot 42 | -------------------------------------------------------------------------------- /code_in_notes/Run_ALL.do: -------------------------------------------------------------------------------- 1 | do reg_one_variate.do 2 | do lasso_and_ridge_shrinkage.do 3 | do DID_dynamic_trend.do 4 | do logit_and_roc.do 5 | do nls_mpc.do 6 | do weighted_ols.do 7 | do qreg_outlier.do 8 | do hcw_lasso.do 9 | -------------------------------------------------------------------------------- /code_in_notes/chow_test.do: -------------------------------------------------------------------------------- 1 | // file: chow_test.do 2 | use datasets/cfps_adult, clear 3 | gen log_income=log(p_income+1) 4 | drop if qq1101<0 5 | drop if te4<0 6 | // 产生变量 7 | gen male=cfps_gender 8 | gen female=1-cfps_gender 9 | tab te4, gen(edu) 10 | local nulls "(male=female)" 11 | local explans "male female" 12 | foreach v of varlist qq1101 edu*{ 13 | gen `v'f=`v'*female 14 | gen `v'm=`v'*male 15 | local nulls "`nulls' (`v'f=`v'm)" 16 | local explans "`explans' `v'f `v'm" 17 | } 18 | // 分组回归 19 | reg log_income qq1101 edu1-edu7 if male==1 20 | reg log_income qq1101 edu1-edu7 if female==1 21 | // 合并回归并检验 22 | reg log_income `explans', noconstant 23 | di "`nulls'" 24 | test `nulls' 25 | -------------------------------------------------------------------------------- /code_in_notes/cross_validation_reg.do: -------------------------------------------------------------------------------- 1 | // file: cross_validation_reg.do 2 | clear 3 | set obs 50 4 | gen x=runiform()*3 5 | gen y=exp(x)+rnormal()*2 6 | local control "" 7 | scalar K=0 8 | forvalues i=1/10{ 9 | gen x`i'=x^`i' 10 | local control "`control' x`i'" 11 | local N= _N 12 | quietly{ 13 | gen error=. 14 | forvalues j=1/`N'{ 15 | reg y `control' if _n~=`j' 16 | predict resid, residual 17 | replace error=resid if _n==`j' 18 | drop resid 19 | } 20 | } 21 | gen error2=error^2 22 | quietly: su error2 23 | scalar mse=r(mean) 24 | display `i' _skip mse 25 | drop error error2 26 | } 27 | -------------------------------------------------------------------------------- /code_in_notes/datasets/CivilConflict.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/CivilConflict.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/DDCGdata_final.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/DDCGdata_final.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/Divorce-Wolfers-AER.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/Divorce-Wolfers-AER.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/Lalonde_nsw/cps_controls.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/Lalonde_nsw/cps_controls.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/Lalonde_nsw/cps_controls2.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/Lalonde_nsw/cps_controls2.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/Lalonde_nsw/cps_controls3.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/Lalonde_nsw/cps_controls3.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/Lalonde_nsw/nsw.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/Lalonde_nsw/nsw.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/Lalonde_nsw/nsw_dw.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/Lalonde_nsw/nsw_dw.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/Lalonde_nsw/psid_controls.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/Lalonde_nsw/psid_controls.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/Lalonde_nsw/psid_controls3.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/Lalonde_nsw/psid_controls3.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/NTV_Aggregate_Data_reshaped.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/NTV_Aggregate_Data_reshaped.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/OHIE_QJE.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/OHIE_QJE.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/OHIE_Science.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/OHIE_Science.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/Questionarie.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/Questionarie.pdf -------------------------------------------------------------------------------- /code_in_notes/datasets/angrist2002aer.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/angrist2002aer.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/bronzini_r_and_d.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/bronzini_r_and_d.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/cfps_adult.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/cfps_adult.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/cfps_family_econ.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/cfps_family_econ.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/chfs_ind.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/chfs_ind.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/citydata.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/citydata.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/disease_gender_gap.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/disease_gender_gap.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/hcw.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/hcw.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/shannxi_export.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/shannxi_export.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/soep_female_labor.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/soep_female_labor.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/voting_cnty_clean.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/EconometricsCode_backup/cc28970b30fdd1224f24414049ad4f5991d6599f/code_in_notes/datasets/voting_cnty_clean.dta -------------------------------------------------------------------------------- /code_in_notes/fixed_effects_ntv.do: -------------------------------------------------------------------------------- 1 | // fixed_effects_ntv.do 2 | clear 3 | set more off 4 | use datasets/NTV_Aggregate_Data_reshaped.dta 5 | xtset tik_id year 6 | forvalues i=1/5{ 7 | gen log_pop_`i'=logpop^`i' 8 | gen log_wage_`i'=log_wage^`i' 9 | } 10 | // 简单回归 11 | reghdfe Votes_Edinstvo_ Watch_probit if year==1999, absorb(region) cluster(region) 12 | // 加入社会经济控制 13 | local E_controls "L.log_pop_* L.log_wage_* L.nurses L.doctors_pc" 14 | reghdfe Votes_Edinstvo_ Watch_probit `E_controls' if year==1999, absorb(region) cluster(region) 15 | // 加入1995年选举的变量 16 | local X_controls "L4.Votes_KPRF_ L4.Votes_Yabloko_ L4.Votes_NDR_ L4.Votes_LDPR_ L4.Turnout" 17 | reghdfe Votes_Edinstvo_ Watch_probit `E_controls' `X_controls' if year==1999, absorb(region) cluster(region) 18 | -------------------------------------------------------------------------------- /code_in_notes/hcw_lasso.do: -------------------------------------------------------------------------------- 1 | clear 2 | set more off 3 | use datasets/shannxi_export.dta 4 | tsset year 5 | order year export_locationShaanxi 6 | foreach v of varlist export_locationShaanxi- export_locationZhejiang{ 7 | replace `v'=log(`v') 8 | egen std_`v'=std(`v') 9 | } 10 | local control "std_export_locationShanghai std_export_locationShanxi std_export_locationFujian std_export_locationGuangdong std_export_locationGuizhou std_export_locationHeilongjiang std_export_locationJiangxi std_export_locationAnhui" 11 | lasso linear export_locationShaanxi `control' if year<2013 12 | local selected_vars=e(post_sel_vars) 13 | predict p_shaanxi_lasso 14 | reg `selected_vars' if year<2013 15 | predict p_shaanxi 16 | label variable export_locationShaanxi "陕西省实际出口" 17 | label variable p_shaanxi "陕西省出口的反事实估计" 18 | label variable p_shaanxi_lasso "陕西省出口的反事实估计Lasso" 19 | tsline export_locationShaanxi p_shaanxi p_shaanxi_lasso, lp(solid dash) xline(2013 2016, lstyle(grid)) scheme(s1mono) xtitle("年份") ytitle("出口对数值") 20 | graph export hcw_lasso.pdf, replace 21 | -------------------------------------------------------------------------------- /code_in_notes/heteroscedasticity.do: -------------------------------------------------------------------------------- 1 | // file: heteroscedasticity.do 2 | set seed 19880505 3 | cap program drop dgp 4 | program define dgp, rclass 5 | syntax [, obs(integer 100) b(real 1) hetero(real 0)] 6 | drop _all 7 | set obs `obs' 8 | tempvar x y sigma 9 | gen `x'=rnormal() 10 | gen `sigma'=1+`hetero'*abs(`x') 11 | gen `y'=3+`b'*`x'+rnormal(0,`sigma') 12 | quietly: reg `y' `x' 13 | return scalar b=_b[`x'] 14 | return scalar se=_se[`x'] 15 | quietly: reg `y' `x', robust 16 | return scalar r_b=_b[`x'] 17 | return scalar r_se=_se[`x'] 18 | end 19 | // multicolinearty, size 20 | simulate b=r(b) se=r(se) rb=r(r_b) rse=r(r_se),/* 21 | */ reps(2000): dgp 22 | su 23 | simulate b=r(b) se=r(se) rb=r(r_b) rse=r(r_se),/* 24 | */ reps(2000): dgp, hetero(1) 25 | su 26 | -------------------------------------------------------------------------------- /code_in_notes/ipw_doubly_robust.do: -------------------------------------------------------------------------------- 1 | // matching_psm.do 2 | clear 3 | set more off 4 | use datasets/Lalonde_nsw/nsw.dta 5 | append using datasets/Lalonde_nsw/cps_controls.dta 6 | gen age2=age^2 7 | gen age3=age^3 8 | egen std_age2=std(age2) 9 | egen std_age3=std(age3) 10 | gen edu2=education^2 11 | gen edu3=education^3 12 | gen unemployed75=re75==0 13 | gen re75s=re75^2 14 | gen re75c=re75^3 15 | egen std_re75s=std(re75s) 16 | egen std_re75c=std(re75c) 17 | gen re75_edu=re75*education 18 | gen hisp_unemp=hispanic*unemployed75 19 | local controls "age-nodegree std_age2-std_age3 edu2-edu3" 20 | local extra_controls "re75 std_re75s std_re75c unemployed75 re75_edu hisp_unemp" 21 | // 逆概率加权 22 | cap teffects ipw (re78) (treat `controls' re75 unemployed75, logit), osample(omitted) 23 | drop if omitted 24 | drop omitted 25 | teffects ipw (re78) (treat `controls' re75 unemployed75, logit) 26 | // 双向稳健 27 | teffects aipw (re78 `controls' `extra_controls') (treat `controls' `extra_controls', logit) 28 | -------------------------------------------------------------------------------- /code_in_notes/lasso_and_ridge_shrinkage.do: -------------------------------------------------------------------------------- 1 | clear 2 | set more off 3 | use "datasets/cfps_adult.dta" 4 | // data cleaning and generating 5 | gen log_income=log(p_income) 6 | drop if te4<0 7 | gen age=2014-cfps_birthy 8 | gen age2=age^2 9 | egen std_age=std(age) 10 | egen std_age2=std(age2) 11 | tab te4, gen(edu) 12 | tab provcd14, gen(province) 13 | // lasso regression 14 | lasso linear log_income (std_age) std_age2 edu* province*, /* 15 | */selection(cv) 16 | lassoinfo 17 | local post_vars=e(post_sel_vars) 18 | coefpath 19 | graph export beta_lasso_plot.pdf, replace 20 | reg `post_vars' 21 | // ridge regression 22 | elasticnet linear log_income (std_age) std_age2 edu* province*, /* 23 | */selection(cv) alpha(0) 24 | lassoinfo 25 | local post_vars=e(post_sel_vars) 26 | coefpath 27 | graph export beta_ridge_plot.pdf, replace 28 | reg `post_vars' -------------------------------------------------------------------------------- /code_in_notes/linear_panel.do: -------------------------------------------------------------------------------- 1 | clear 2 | set more off 3 | use datasets/citydata.dta 4 | ** 声明面板数据 5 | xtset CityCode Year 6 | ** 产生新变量 7 | gen log_gdp=log(v84) 8 | gen growth_gdp=log_gdp-L.log_gdp //如何产生滞后? 9 | sort CityCode Year 10 | by CityCode: gen growth_gdp2=log_gdp-log_gdp[_n-1] 11 | gen num_schools=v188 12 | gen log_edu_exp=log(v166) 13 | gen log_rd_exp=log(v167) 14 | ** controls 15 | local control "log_edu_exp log_rd_exp i.Year" 16 | ** 回归 17 | ** pooled OLS 18 | reg growth_gdp log_gdp i.Year, vce(cl CityCode) 19 | outreg2 using linear_panel.doc, replace addtext(FE, No, Time FE, Yes) 20 | ** random effects 21 | xtreg growth_gdp log_gdp i.Year, re vce(cl CityCode) 22 | outreg2 using linear_panel.doc, append addtext(FE, No, Time FE, Yes) 23 | ** between group estimator 24 | xtreg growth_gdp log_gdp, be 25 | outreg2 using linear_panel.doc, append addtext(FE, No, Time FE, No) 26 | ** fixed effects 27 | xtreg growth_gdp log_gdp i.Year, fe vce(cl CityCode) 28 | outreg2 using linear_panel.doc, append addtext(FE, Yes, Time FE, Yes) 29 | ** LSDV 30 | reghdfe growth_gdp log_gdp i.Year, absorb(CityCode) cluster(CityCode) 31 | outreg2 using linear_panel.doc, append addtext(FE, Yes, Time FE, Yes) 32 | ** LSDV twoway cluster 33 | reghdfe growth_gdp log_gdp, absorb(i.CityCode i.Year) cluster(CityCode Year) 34 | outreg2 using linear_panel.doc, append addtext(FE, Yes, Time FE, Yes) 35 | 36 | ** 加入控制变量 37 | ** random effects 38 | xtreg growth_gdp log_gdp `control', re 39 | estimates store re 40 | ** fixed effects 41 | xtreg growth_gdp log_gdp `control', fe 42 | estimates store fe 43 | ** Hauman 检验 44 | hausman fe re 45 | 46 | ** interactive fixed effects 47 | local control "log_edu_exp log_rd_exp" 48 | regife growth_gdp log_gdp `control', factors(CityCode Year, 1) 49 | regife growth_gdp log_gdp `control', factors(CityCode Year, 2) 50 | regife growth_gdp log_gdp `control', factors(CityCode Year, 3) 51 | regife growth_gdp log_gdp `control', factors(CityCode Year, 1) absorb(i.CityCode i.Year) 52 | -------------------------------------------------------------------------------- /code_in_notes/linear_panel_fd_np.do: -------------------------------------------------------------------------------- 1 | // linear_panel_fd.do 2 | clear 3 | set more off 4 | use datasets/voting_cnty_clean.dta 5 | 6 | xtset cnty90 year 7 | 8 | gen numdailies_l1=L4.numdailies 9 | gen prestout_l1=L4.prestout 10 | gen changedailies=numdailies-numdailies_l1 11 | gen changeprestout=prestout-prestout_l1 12 | // 一阶差分 13 | reghdfe changeprestout changedailies if mainsample, absorb(i.st#i.year) cluster(cnty90) resid 14 | // 查看残差的自相关 15 | predict resid_fd, residual 16 | reg resid_fd L4.resid_fd 17 | // 固定效应,额外加入各州的异质性趋势 18 | reghdfe prestout numdailies if mainsample, absorb(i.st#i.year i.cnty90) cluster(cnty90) 19 | // 固定效应,额外加入各县的异质性线性趋势 20 | reghdfe prestout numdailies if mainsample, absorb(i.cnty90#c.year i.cnty90) cluster(cnty90) 21 | -------------------------------------------------------------------------------- /code_in_notes/linear_panel_fe_ntv.do: -------------------------------------------------------------------------------- 1 | clear 2 | set more off 3 | use datasets/NTV_Aggregate_Data_reshaped.dta 4 | xtset tik_id year 5 | // 1995年NTV并没有成立 6 | gen Watch_probit_p=0 7 | replace Watch_probit_p=Watch_probit if year==1999 8 | // 使用一阶差分 9 | gen delta_Votes_SPS_=Votes_SPS_-L4.Votes_SPS_ 10 | gen delta_Watch_probit_p=Watch_probit_p-L4.Watch_probit_p 11 | reg delta_Votes_SPS_ delta_Watch_probit_p i.year if year!=2003, cluster(region) 12 | // 简单回归 13 | reghdfe Votes_SPS_ Watch_probit_p i.year if year!=2003, absorb(i.tik_id) cluster(region) 14 | // 使用96年人口进行加权 15 | gen pop96t=population if year==1996 16 | egen pop96=mean(pop96), by(tik_id) 17 | reghdfe Votes_SPS_ Watch_probit_p i.year if year!=2003 [aw=pop96], absorb(i.tik_id) cluster(region) 18 | -------------------------------------------------------------------------------- /code_in_notes/logit_and_roc.do: -------------------------------------------------------------------------------- 1 | clear 2 | set more off 3 | use datasets/cfps_adult.dta 4 | // 清洗并产生数据 5 | drop if employ2014<0 6 | drop if te4<0 7 | drop if qa301<0 | qa301==5 8 | gen exit_labor=employ2014==3 9 | gen age=2014-cfps_birth 10 | gen age2=age^2 11 | gen urban_hukou=qa301==3 12 | // odds ratio 13 | tab exit_labor cfps_gender 14 | // 回归并预测 15 | local x "age age2 cfps_gender urban_hukou i.provcd14 i.te4" 16 | local reportvar "age age2 cfps_gender urban_hukou" 17 | logit exit_labor cfps_gender 18 | margins, dydx(*) post 19 | outreg2 using logit_roc.doc, replace keep(`reportvar') ctitle(Logit) 20 | logistic exit_labor cfps_gender 21 | outreg2 using logit_roc.doc, append keep(`reportvar') ctitle(Logistic) 22 | logit exit_labor `x' 23 | margins, dydx(*) post 24 | outreg2 using logit_roc.doc, append keep(`reportvar') ctitle(Logit) 25 | probit exit_labor `x' 26 | margins, dydx(*) post 27 | outreg2 using logit_roc.doc, append keep(`reportvar') ctitle(Probit) 28 | logistic exit_labor `x' 29 | outreg2 using logit_roc.doc, append keep(`reportvar') ctitle(Logistic) 30 | // 如果用probit模型:probit exit_labor `x' 31 | predict p_exit // 预测概率 32 | // 计算cutoff=0.5时的查准率、查全率 33 | gen predict_exit=p_exit>0.5 34 | gen TP=(exit_labor==1 & predict_exit==1) 35 | gen FP=(exit_labor==0 & predict_exit==1) 36 | gen FN=(exit_labor==1 & predict_exit==0) 37 | gen TN=(exit_labor==0 & predict_exit==0) 38 | foreach v of varlist TP FP FN TN{ 39 | quietly: su `v' 40 | local `v' = r(mean) 41 | } 42 | local R2=e(r2_p) 43 | local precision=`TP'/(`TP'+`FP') 44 | local recall=`TP'/(`TP'+`FN') 45 | local accuracy=(`TP'+`TN')/(`TP'+`TN'+`FP'+`FN') 46 | local F1=(2*`precision'*`recall')/(`precision'+`recall') 47 | di "R2=`R2'" 48 | di "查准率=`precision'" 49 | di "查全率=`recall'" 50 | di "精度=`accuracy'" 51 | di "F1=`F1'" 52 | // 画出ROC曲线 53 | lroc 54 | graph export roc.pdf, replace 55 | // 画出sensitivity 以及specificity 56 | lsens 57 | graph export sens_speci.pdf, replace 58 | -------------------------------------------------------------------------------- /code_in_notes/matching.do: -------------------------------------------------------------------------------- 1 | // matching.do 2 | clear 3 | set more off 4 | 5 | // 实验数据 6 | use datasets/Lalonde_nsw/nsw.dta 7 | local controls "age-nodegree" 8 | // 事后比较 9 | bysort treat: su re78 10 | reg re78 treat, r 11 | // 使用回归进行控制 12 | reg re78 treat `controls', r 13 | // 查看common support 14 | bysort treat: su `controls' 15 | drop if age>50 16 | drop if education>14 17 | // 使用临近匹配进行控制, ATE 18 | teffects nnmatch (re78 `controls') (treat), nn(1) 19 | // 评估两个组别的平衡性 20 | // 使用临近匹配进行控制, ATT 21 | teffects nnmatch (re78 `controls') (treat), nn(1) atet 22 | tebalance summarize 23 | // 评估unconfoundedness 24 | teffects nnmatch (re75 `controls') (treat), nn(1) 25 | 26 | 27 | // 精确匹配,首先扔掉那些不能精确匹配的样本 28 | cap teffects nnmatch (re78 `controls') (treat), nn(1) ematch(black hispanic married nodegree) osample(omitted) 29 | drop if omitted 30 | drop omitted 31 | cap teffects nnmatch (re78 `controls') (treat), nn(1) ematch(black hispanic married nodegree) osample(omitted) 32 | drop if omitted 33 | drop omitted 34 | // 同理进行匹配估计 35 | teffects nnmatch (re78 age education) (treat), nn(1) ematch(black hispanic married nodegree) gen(matched) 36 | tebalance summarize 37 | -------------------------------------------------------------------------------- /code_in_notes/matching_nonexp.do: -------------------------------------------------------------------------------- 1 | // matching_nonexp.do 2 | // 加入非实验的CPS数据来改进匹配 3 | // 重新打开数据(由于上面删除了一些观测) 4 | clear 5 | set more off 6 | use datasets/Lalonde_nsw/nsw.dta 7 | append using datasets/Lalonde_nsw/cps_controls.dta 8 | local controls "age-nodegree" 9 | gen unemployed75=re75==0 10 | // 简单比较 11 | bysort treat: su re78 12 | reg re78 treat, r 13 | // 使用回归进行控制 14 | reg re78 treat `controls', r 15 | //查看common support 16 | bysort treat: su `controls' 17 | drop if age>50 18 | drop if education>17 19 | drop if education<4 20 | // 使用临近匹配进行控制, ATE 21 | local nn=5 22 | teffects nnmatch (re78 `controls') (treat), nn(`nn') atet 23 | teffects nnmatch (re78 `controls' re75) (treat), nn(`nn') atet 24 | cap teffects nnmatch (re78 age edu re75) (treat), nn(`nn') ematch(black hispanic married nodegree) atet osample(omitted) 25 | drop if omitted 26 | drop omitted 27 | cap teffects nnmatch (re78 age edu re75) (treat), nn(`nn') ematch(black hispanic married nodegree) atet osample(omitted) 28 | drop if omitted 29 | drop omitted 30 | teffects nnmatch (re78 age edu re75) (treat), nn(`nn') ematch(black hispanic married nodegree) atet 31 | // 评估两个组别的平衡性 32 | tebalance summarize 33 | // 评估unconfoundedness 34 | teffects nnmatch (re75 `controls') (treat), nn(20) atet 35 | -------------------------------------------------------------------------------- /code_in_notes/matching_psm.do: -------------------------------------------------------------------------------- 1 | // matching_psm.do 2 | clear 3 | set more off 4 | use datasets/Lalonde_nsw/nsw.dta 5 | append using datasets/Lalonde_nsw/cps_controls.dta 6 | gen age2=age^2 7 | gen age3=age^3 8 | egen std_age2=std(age2) 9 | egen std_age3=std(age3) 10 | gen edu2=education^2 11 | gen edu3=education^3 12 | gen unemployed75=re75==0 13 | gen re75s=re75^2 14 | gen re75c=re75^3 15 | egen std_re75s=std(re75s) 16 | egen std_re75c=std(re75c) 17 | gen re75_edu=re75*education 18 | gen hisp_unemp=hispanic*unemployed75 19 | local controls "age-nodegree std_age2-std_age3 edu2-edu3" 20 | local extra_controls "re75 std_re75s std_re75c unemployed75 re75_edu hisp_unemp" 21 | // 计算propensity score 22 | logit treat `controls' `extra_controls' 23 | predict ps 24 | // 查看ps的描述统计 25 | bysort treat: su ps, de 26 | twoway (hist ps if treat==0, color(green%30))/* 27 | */(hist ps if treat==1, color(red%30))/* 28 | */, leg(lab(1 "Untreated") lab(2 "Treated")) 29 | // 检查common support 30 | gen ignore=ps<0.15 31 | twoway (hist ps if treat==0, color(green%30))/* 32 | */(hist ps if treat==1, color(red%30))/* 33 | */, leg(lab(1 "Untreated") lab(2 "Treated")) 34 | // 进行PSM 35 | local nn=3 36 | teffects psmatch (re78) (treat `controls') if ~ignore, atet nn(`nn') 37 | teffects psmatch (re78) (treat `controls' `extra_controls') if ~ignore, atet nn(`nn') 38 | tebalance summarize 39 | -------------------------------------------------------------------------------- /code_in_notes/mlogit_employ.do: -------------------------------------------------------------------------------- 1 | // mlogit_employ.do 2 | clear 3 | set more off 4 | use datasets/cfps_adult 5 | // clean data 6 | keep if qg2==1 | qg2==2 | qg2==3 | qg2==4 7 | tab qg2 8 | gen age=2014-cfps_birthy 9 | drop if te4<0 10 | // regress 11 | mlogit qg2 age cfps_gender i.te4, baseoutcome(1) 12 | outreg2 using mlogit_employ.doc, replace 13 | // test 14 | test [2]age=[3]age 15 | -------------------------------------------------------------------------------- /code_in_notes/model_selection.do: -------------------------------------------------------------------------------- 1 | // file: model_selection.do 2 | clear 3 | set obs 50 4 | gen x=runiform()*3 5 | gen y=exp(x)+rnormal()*2 6 | local control "" 7 | scalar K=0 8 | forvalues i=1/10{ 9 | gen x`i'=x^`i' 10 | local control "`control' x`i'" 11 | scalar K=K+1 12 | quietly: reg y `control' 13 | scalar r2=e(r2) 14 | scalar r2a=e(r2_a) 15 | scalar aic=-2*e(ll)+2*K 16 | scalar bic=-2*e(ll)+log(e(N))*K 17 | display `i' _skip r2 _skip r2a _skip aic _skip bic 18 | } 19 | -------------------------------------------------------------------------------- /code_in_notes/multi_collinearity.do: -------------------------------------------------------------------------------- 1 | // file: multi_collinear.do 2 | set seed 19880505 3 | cap program drop dgp 4 | program define dgp, rclass 5 | syntax [, obs(integer 20) b(real 0) mc(real 1)] 6 | drop _all 7 | set obs `obs' 8 | tempvar x1 x2 y sigma 9 | gen `x1'=rnormal() 10 | if `mc'==1{ 11 | gen `x2'=3/sqrt(10)*`x1'+1/sqrt(10)*rnormal() 12 | } 13 | else { 14 | gen `x2'=rnormal() 15 | } 16 | gen `y'=`b'*`x1'+`x2'+rnormal() 17 | quietly: reg `y' `x1' `x2' 18 | return scalar b=_b[`x1'] 19 | return scalar se=_se[`x1'] 20 | if abs(_b[`x1']/_se[`x1'])>=invt(`obs'-3,0.975) { 21 | return scalar rejected=1 22 | } 23 | else { 24 | return scalar rejected=0 25 | } 26 | return scalar neg=_b[`x1']<0 27 | end 28 | // multicolinearty, size 29 | simulate rejected=r(rejected) b=r(b) se=r(se) neg=r(neg)/* 30 | */ ,reps(2000):dgp 31 | su 32 | // multicolinearty, power 33 | simulate rejected=r(rejected) b=r(b) se=r(se) neg=r(neg)/* 34 | */ ,reps(2000):dgp, b(1) 35 | su 36 | // no multicolinearty, power 37 | simulate rejected=r(rejected) b=r(b) se=r(se) neg=r(neg)/* 38 | */ ,reps(2000):dgp, b(1) mc(0) 39 | su 40 | // multicolinearty, power with large N 41 | simulate rejected=r(rejected) b=r(b) se=r(se) neg=r(neg)/* 42 | */ ,reps(2000):dgp, b(1) obs(100) 43 | su 44 | -------------------------------------------------------------------------------- /code_in_notes/nls_mpc.do: -------------------------------------------------------------------------------- 1 | clear 2 | set more off 3 | use "datasets/cfps_family_econ.dta" 4 | // 线性回归 5 | reg expense fincome1 6 | predict expense_linear 7 | // 非线性回归 8 | drop if fincome1==. | fincome1<0 9 | drop if expense==. |expense<0 | expense>5*fincome1 10 | nl (expense={alpha=_b[_cons]}+{beta=_b[fincome1]}*fincome1^{gamma=1}) 11 | predict expense_nls 12 | // 计算MPC 13 | gen mpc=_b[/beta]*_b[/gamma]*fincome1^(_b[/gamma]-1) 14 | // 画图 15 | sort fincome1 16 | twoway (scatter expense fincome1)/* 17 | */ (line expense_linear fincome1)/* 18 | */ (line expense_nls fincome1) 19 | graph export nls_mpc.pdf, replace 20 | -------------------------------------------------------------------------------- /code_in_notes/ohie_qje.do: -------------------------------------------------------------------------------- 1 | // replication of OHIE-QJE paper 2 | clear frames 3 | set more off 4 | use datasets/OHIE_QJE.dta 5 | 6 | // LATE,不加控制变量的结果 7 | // 1st stage, 分母 8 | reg ohp_std_ever_admin treatment if rx_num_mod_12m!=. , cl(household_id) 9 | local first_stage=_b[treatment] 10 | // Intention-to-Treat 11 | reg rx_num_mod_12m treatment, cluster(household_id) 12 | local ITT=_b[treatment] 13 | // IV估计 14 | ivreg rx_num_mod_12m (ohp_std_ever_admin = treatment), cl(household_id) 15 | di `ITT'/`first_stage' 16 | 17 | // 加入控制变量 18 | // 1st stage, medcaid 和 OHP standard,两者系数几乎相同,说明只对OHP Standard有影响 19 | // 控制变量:draw_lottery 为抽签的轮数,numhh为申请家庭的人数 20 | reghdfe ohp_all_ever_admin treatment, absorb(i.numhh#i.draw_lottery) cl(household_id) 21 | reghdfe ohp_std_ever_admin treatment, absorb(i.numhh#i.draw_lottery) cl(household_id) 22 | 23 | // ITT 24 | // 处方药,extensive margin & total utilization 25 | reghdfe rx_any_12m treatment, absorb(i.numhh#i.draw_lottery) cl(household_id) 26 | reghdfe rx_num_mod_12m treatment, absorb(i.numhh#i.draw_lottery) cl(household_id) 27 | // 急救室,extensive margin & total utilization 28 | reghdfe er_any_12m treatment, absorb(i.numhh#i.draw_lottery) cl(household_id) 29 | reghdfe er_num_mod_12m treatment, absorb(i.numhh#i.draw_lottery) cl(household_id) 30 | 31 | // IV估计 32 | ivreghdfe rx_any_12m (ohp_std_ever_admin = treatment), absorb(i.numhh#i.draw_lottery) cl(household_id) 33 | ivreghdfe rx_num_mod_12m (ohp_std_ever_admin = treatment), absorb(i.numhh#i.draw_lottery) cl(household_id) 34 | // 急救室,extensive margin & total utilization 35 | ivreghdfe er_any_12m (ohp_std_ever_admin = treatment), absorb(i.numhh#i.draw_lottery) cl(household_id) 36 | ivreghdfe er_num_mod_12m (ohp_std_ever_admin = treatment), absorb(i.numhh#i.draw_lottery) cl(household_id) 37 | 38 | 39 | -------------------------------------------------------------------------------- /code_in_notes/ohie_qje_joint_test.do: -------------------------------------------------------------------------------- 1 | // ohie_qje_joint_test.do 2 | clear frames 3 | set more off 4 | use datasets/OHIE_QJE.dta 5 | 6 | // table2 treatment-control balance 7 | // control mean of respond to survey 8 | // 注意权重的使用,附录中提供了调查抽样的设计,权重来自于抽样设计 9 | su returned_12m if treatment==0 [iw=weight_12m ] 10 | // 第二种做法,使用回归,不需要自变量,只对常数项做回归 11 | reg returned_12m if treatment==0 [iw=weight_12m ] 12 | di e(rmse) 13 | 14 | // 比较随机化之前的特征 15 | local lottery_list "birthyear_list female_list english_list self_list first_day_list have_phone_list pobox_list zip_msa" 16 | // 挨个比较 17 | foreach v of varlist `lottery_list'{ 18 | reg `v' treatment [iw=weight_12m ], cl(household_id) 19 | test treatment 20 | } 21 | // 合在一起比较 22 | frames put `lottery_list' treatment weight_12m household_id numhh_list, into(joint_compare) 23 | frame change joint_compare 24 | local i=0 25 | foreach v of varlist `lottery_list'{ 26 | local i=`i'+1 27 | rename `v' outcome`i' 28 | } 29 | gen id=_n 30 | reshape long outcome, i(id) j(varid) 31 | local test_coefs "" 32 | forvalues j=1/`i'{ 33 | local test_coefs "`test_coefs' 1.treatment#`j'.varid" 34 | } 35 | di "`test_coefs'" 36 | reg outcome i.varid i.treatment#i.varid i.numhh_list#i.varid , cl(household_id) 37 | test `test_coefs' 38 | frame change default 39 | -------------------------------------------------------------------------------- /code_in_notes/ohie_qje_test_one.do: -------------------------------------------------------------------------------- 1 | // ohie_qje_test_one.do 2 | clear 3 | set more off 4 | use datasets/OHIE_QJE.dta 5 | 6 | // 需要进行比较的特征 7 | local lottery_list "birthyear_list female_list english_list /* 8 | */ self_list first_day_list have_phone_list pobox_list zip_msa" 9 | // 挨个比较 10 | foreach v of varlist `lottery_list'{ 11 | reg `v' treatment [iw=weight_12m ], robust 12 | test treatment 13 | } 14 | -------------------------------------------------------------------------------- /code_in_notes/ohie_science.do: -------------------------------------------------------------------------------- 1 | // replication of OHIE-Science paper 2 | clear frames 3 | set more off 4 | use datasets/OHIE_Science.dta 5 | 6 | // 主要结果 7 | // 1st stage 8 | reg ohp_all_ever_30sep2009 treatment ed_visit_09mar2008 i.numhh_list, cl(household_id) 9 | // ITT 10 | reg ed_visit_30sep2009 treatment ed_visit_09mar2008 i.numhh_list, cl(household_id) 11 | reg num_visit_cens_ed treatment num_visit_pre_cens_ed i.numhh_list, cl(household_id) 12 | // main results 13 | ivregress 2sls ed_visit_30sep2009 (ohp_all_ever_30sep2009=treatment ) ed_visit_09mar2008 i.numhh_list, cl(household_id) 14 | ivregress 2sls num_visit_cens_ed (ohp_all_ever_30sep2009=treatment ) num_visit_pre_cens_ed i.numhh_list, cl(household_id) 15 | 16 | // 使用不同的控制结果是不一样的 17 | local extra_controls "female_list i.birthyear_list english_list" 18 | ivregress 2sls ed_visit_30sep2009 (ohp_all_ever_30sep2009=treatment ), cl(household_id) 19 | ivregress 2sls ed_visit_30sep2009 (ohp_all_ever_30sep2009=treatment ) i.numhh_list, cl(household_id) 20 | ivregress 2sls ed_visit_30sep2009 (ohp_all_ever_30sep2009=treatment ) ed_visit_09mar2008 i.numhh_list `extra_controls', cl(household_id) 21 | 22 | // marginal treatment effects 23 | // 由于不能用i.的形式,所以需要产生虚拟变量 24 | quietly{ 25 | tab birthyear_list, gen(birthyear_dummies) 26 | tab numhh_list, gen(numhh_dummies) 27 | } 28 | local controls "ed_visit_09mar2008 female_list numhh_dummies* birthyear_dummies* english_list" 29 | // propensity score 30 | logit ohp_all_ever_30sep2009 treatment `controls' 31 | // 边际处理效应,使用多项式拟合propensity score的函数 32 | margte ed_visit_30sep2009 `controls', treatment(ohp_all_ever_30sep2009 treatment `controls') common poly(4) noboot savepropensity 33 | mat mte_mat=e(mte) 34 | quietly: su p, de 35 | local max_p=r(p99) 36 | di `max_p' 37 | // 我们发现计算的平均处理效应不准,重新计算 38 | frame create fmte pclass mte 39 | forvalues i=1/99{ 40 | frame post fmte (`i') (mte_mat[1,`i']) 41 | } 42 | frame fmte: line mte p if p<`max_p'*100 43 | frame fmte: su mte if p<`max_p'*100 44 | // 边际处理效应,使用半参数方法拟合propensity score的函数 45 | margte ed_visit_30sep2009 `controls', treatment(ohp_all_ever_30sep2009 treatment `controls') semi 46 | -------------------------------------------------------------------------------- /code_in_notes/oneway_anova.do: -------------------------------------------------------------------------------- 1 | // file: oneway_anova.do 2 | use datasets/cfps_adult, clear 3 | gen log_income=log(p_income+1) 4 | drop if te4<0 5 | tab te4 6 | // 用回归方法: 7 | reg log_income i.te4 8 | di "F=" e(F) " RSS=" e(rss) 9 | // 用单因素方差分析 10 | anova log_income te4 11 | di "F=" e(F) " RSS=" e(rss) 12 | -------------------------------------------------------------------------------- /code_in_notes/panel_binary.do: -------------------------------------------------------------------------------- 1 | clear 2 | set more off 3 | *** gen data *** 4 | use datasets/soep_female_labor 5 | replace husworkhour=husworkhour/1000 6 | gen logIncome=log(income) 7 | egen AveIncome=mean(income),by(persnr) 8 | gen logAveIncome=log(AveIncome) 9 | drop AveIncome 10 | gen age2=age^2/100 11 | egen MaxChld6=max(chld6), by(persnr) 12 | *** controls *** 13 | xtset persnr year 14 | local control1 "chld6 chld16 age age2 logIncome husworkhour husemployment" 15 | local control2 "chld6 chld16 age2 logIncome husworkhour husemployment" 16 | local fixed "edu region" 17 | quietly: tab year, gen(year) 18 | local yeardummy "year1-year4" 19 | foreach v in `control2'{ 20 | egen ave`v'=mean(`v'), by(persnr) 21 | } 22 | local controlmean1 "MaxChld6 avechld6 avechld16 logAveIncome avehusworkhour avehusemployment" 23 | *** descriptive stats *** 24 | egen temp=sum(employment),by(persnr) 25 | gen sortvara=1 if temp==0 26 | replace sortvara=2 if temp==5 27 | replace sortvara=3 if sortvara==. 28 | drop temp 29 | bysort sortvara: outreg2 using soep_female_labor_descr.tex, replace sum(log) keep(employment `control1' `fixed') eqkeep(mean sd min max) 30 | *** regression *** 31 | probit employment `control1' `fixed' `yeardummy' 32 | margins, dydx(*) post 33 | outreg2 using soep_female_labor_res.tex, replace ctitle("Probit") keep(`control1' `fixed') 34 | logit employment `control1' `fixed' `yeardummy' 35 | margins, dydx(*) post 36 | outreg2 using soep_female_labor_res.tex, append ctitle("Logit") keep(`control1' `fixed') 37 | xtprobit employment `control1' `fixed' `yeardummy' 38 | margins, dydx(*) post 39 | outreg2 using soep_female_labor_res.tex, append ctitle("Random Effects Probit") keep(`control1' `fixed') 40 | xtprobit employment `control1' `controlmean1' `fixed' `yeardummy' 41 | margins, dydx(*) post 42 | outreg2 using soep_female_labor_res.tex, append ctitle("Chamberlain Probit") keep(`control1' `fixed' MaxChld6) 43 | xtlogit employment `control2' `yeardummy', fe 44 | margins, dydx(*) post 45 | outreg2 using soep_female_labor_res.tex, append ctitle("Fixed Effects Logit") keep(`control2') 46 | -------------------------------------------------------------------------------- /code_in_notes/panel_dynamic.do: -------------------------------------------------------------------------------- 1 | clear 2 | set more off 3 | set matsize 10000 4 | use datasets/DDCGdata_final 5 | // 国家代码 6 | egen ccode=group(wbcode) 7 | // 设置面板结构 8 | xtset ccode year 9 | // 固定效应回归 10 | xtreg y l.y dem i.year, fe cluster(ccode) 11 | xtreg y l(1/2).y dem i.year, fe cluster(wbcode2) 12 | xtreg y l(1/4).y dem i.year, fe cluster(wbcode2) 13 | // 动态面板,差分GMM 14 | xtabond2 y l.y dem i.year,/* 15 | */gmmstyle(y, laglimits(2 .)) gmmstyle(dem, laglimits(1 .)) ivstyle(i.year, p)/* 16 | */ noleveleq robust nodiffsargan 17 | xtabond2 y l(1/2).y dem i.year,/* 18 | */gmmstyle(y, laglimits(2 .)) gmmstyle(dem, laglimits(1 .)) ivstyle(i.year, p)/* 19 | */ noleveleq robust nodiffsargan 20 | xtabond2 y l(1/4).y dem i.year,/* 21 | */gmmstyle(y, laglimits(2 .)) gmmstyle(dem, laglimits(1 .)) ivstyle(i.year, p)/* 22 | */ noleveleq robust nodiffsargan 23 | // 动态面板,系统GMM 24 | xtabond2 y l.y dem i.year,/* 25 | */gmmstyle(y, laglimits(2 4)) gmmstyle(dem, laglimits(1 3)) ivstyle(i.year)/* 26 | */ robust nodiffsargan 27 | xtabond2 y l(1/2).y dem i.year,/* 28 | */gmmstyle(y, laglimits(2 4)) gmmstyle(dem, laglimits(1 3)) ivstyle(i.year)/* 29 | */ robust nodiffsargan 30 | xtabond2 y l(1/4).y dem i.year,/* 31 | */gmmstyle(y, laglimits(2 4)) gmmstyle(dem, laglimits(1 3)) ivstyle(i.year)/* 32 | */ robust nodiffsargan 33 | -------------------------------------------------------------------------------- /code_in_notes/poisson.do: -------------------------------------------------------------------------------- 1 | // poisson.do 2 | clear 3 | set more off 4 | use "C:\Users\sijic\Working\Econometrics\EconometricsCode\code_in_notes\datasets\CivilConflict.dta" 5 | collapse (sum) any_prio (mean)lpop gdp_g y_0 gini aid_capita,by(ccode) 6 | 7 | local control "lpop gdp_g y_0 gini aid_capita" 8 | poisson any_prio `control' 9 | nbreg any_prio `control' 10 | zip any_prio `control', inflate(_cons) 11 | zip, irr 12 | -------------------------------------------------------------------------------- /code_in_notes/propensity_score.do: -------------------------------------------------------------------------------- 1 | // propensity_score.do 2 | clear 3 | set more off 4 | 5 | // 实验数据 6 | use datasets/Lalonde_nsw/nsw.dta 7 | gen age2=age^2 8 | local controls "age-nodegree age2" 9 | //Logit估计 10 | logit treat `controls' 11 | predict ps_1 12 | //查看common support 13 | bysort treat: su ps_1 14 | //Logit估计 包含事前的y 15 | bysort treat: su re75 16 | logit treat `controls' re75 17 | //事后比较 18 | bysort treat: su re78 19 | reg re78 treat, r 20 | 21 | 22 | // 包含非实验数据 23 | append using datasets/Lalonde_nsw/cps_controls.dta 24 | replace age2=age^2 25 | //Logit估计 26 | logit treat `controls' 27 | predict ps_2 28 | //多项式Logit估计 29 | local polynomial_control "`controls'" 30 | foreach v of varlist `controls'{ 31 | foreach w of varlist `controls'{ 32 | gen _`v'_`w'=`v'*`w' 33 | local polynomial_control "`polynomial_control' _`v'_`w'" 34 | } 35 | } 36 | logit treat `polynomial_control' 37 | predict ps_3 38 | //使用Lasso选择变量 39 | local std_polynomial_control "" 40 | foreach v of varlist `polynomial_control'{ 41 | egen std_`v'=std(`v') 42 | su std_`v', mean 43 | if r(N)>0 { 44 | local std_polynomial_control "`std_polynomial_control' std_`v'" 45 | } 46 | } 47 | lasso logit treat `controls', grid(5) 48 | local post_vars=e(allvars_sel) 49 | di "`post_vars'" 50 | //post-estimation 51 | logit treat e(`post_vars') 52 | predict ps_4 53 | //matching 54 | teffects psmatch (re78) (treat `post_vars') 55 | //使用指定的propensity score 56 | gen trans_ps2=log(ps_2/(1-ps_2)) 57 | bysort treat: su ps_2 58 | teffects psmatch (re78) (treat trans_ps2), atet 59 | //后续:检查common support等 60 | -------------------------------------------------------------------------------- /code_in_notes/qreg_consump.do: -------------------------------------------------------------------------------- 1 | // file: qreg_consump.do 2 | use datasets/cfps_family_econ, clear 3 | drop if expense<=0 4 | drop if fincome1<=0 5 | gen log_consump=log(expense) 6 | gen log_income=log(fincome1) 7 | // quantile regression, one variate 8 | qreg log_consump log_income, q(0.25) 9 | predict p_log_consump25 10 | label variable p_log_consump25 "25% quantile" 11 | qreg log_consump log_income, q(0.50) 12 | predict p_log_consump50 13 | label variable p_log_consump50 "50% quantile" 14 | qreg log_consump log_income, q(0.75) 15 | predict p_log_consump75 16 | label variable p_log_consump75 "75% quantile" 17 | // graph 18 | sort log_income 19 | twoway (scatter log_consump log_income)/* 20 | */ (line p_log_consump25 log_income)/* 21 | */ (line p_log_consump50 log_income)/* 22 | */ (line p_log_consump75 log_income) 23 | // comparison 24 | sqreg log_consump log_income i.provcd14, q(0.25 0.5 0.75) 25 | test [q25]log_income=[q50]log_income=[q75]log_income 26 | -------------------------------------------------------------------------------- /code_in_notes/qreg_outlier.do: -------------------------------------------------------------------------------- 1 | // file: qreg_outlier.do 2 | clear 3 | set more off 4 | set obs 50 5 | // generate vars 6 | gen x=rnormal() 7 | gen u=rnormal() 8 | // y 9 | gen y=1+x+u 10 | // 产生一个outlier 11 | sort x 12 | replace y=30 if _n==1 13 | // qreg 14 | reg y x 15 | predict p_y_reg 16 | label variable p_y_reg "OLS" 17 | qreg y x , q(0.50) 18 | predict p_y_qreg 19 | label variable p_y_qreg "QREg" 20 | // graph 21 | twoway (scatter y x)/* 22 | */ (line p_y_reg x)/* 23 | */ (line p_y_qreg x) 24 | graph export qreg_outlier.pdf, replace 25 | -------------------------------------------------------------------------------- /code_in_notes/qreg_simulate.do: -------------------------------------------------------------------------------- 1 | // file: qreg_simulate.do 2 | clear 3 | set more off 4 | set obs 1000 5 | // generate vars 6 | gen x=rnormal() 7 | gen u=rnormal() 8 | // u的分位数 9 | gen q=normal(u) 10 | // 系数:b(0.25)=1.25,b(0.5)=1.5 11 | gen b=1+q 12 | // y 13 | gen y=1+b*x+u 14 | // qreg 15 | qreg y x , q(0.25) 16 | qreg y x , q(0.50) 17 | qreg y x , q(0.75) 18 | -------------------------------------------------------------------------------- /code_in_notes/qreg_with_dummy.do: -------------------------------------------------------------------------------- 1 | // file: qreg_with_dummy.do 2 | use datasets/cfps_adult, clear 3 | keep cfps_gender p_income 4 | drop if p_income<0 5 | bysort cfps_gender: su p_income, de 6 | qreg p_income cfps_gender, q(0.5) 7 | qreg p_income cfps_gender, q(0.75) 8 | qreg p_income cfps_gender, q(0.9) 9 | -------------------------------------------------------------------------------- /code_in_notes/reg_one_variate.do: -------------------------------------------------------------------------------- 1 | // file: reg_one_variate.do 2 | use datasets/cfps_adult, clear 3 | drop if qp102<0 4 | drop if qp101<130 5 | reg qp102 qp101 6 | outreg2 using reg_one_variate.tex, replace 7 | predict p_weight 8 | label variable p_weight "预测的体重" 9 | sort qp101 10 | twoway (scatter qp102 qp101 if mod(_n,30)==20)/* 11 | */(line p_weight qp101 if qp101>130 & qp101<200),/* 12 | */ xscale(range(130 200)) 13 | graph export reg_one_variate.pdf, replace 14 | -------------------------------------------------------------------------------- /code_in_notes/reg_readings.do: -------------------------------------------------------------------------------- 1 | // file: reg_readings.do 2 | use datasets/cfps_adult, clear 3 | gen log_income=log(p_income+1) 4 | drop if te4<0 5 | drop if qq1101<0 6 | reg log_income qq1101 7 | outreg2 using reg_readings.tex, replace /* 8 | */ addt(学历固定效应, "No", 省份固定效应, "No",/* 9 | */ 学历×省份固定效应, "No") 10 | reg log_income qq1101 i.te4 11 | outreg2 using reg_readings.tex, append keep(qq1101) /* 12 | */ addt(学历固定效应, "Yes", 省份固定效应, "No",/* 13 | */ 学历×省份固定效应, "No") 14 | reghdfe log_income qq1101, absorb(i.te4 i.provcd14) 15 | outreg2 using reg_readings.tex, append /* 16 | */ addt(学历固定效应, "Yes", 省份固定效应, "Yes",/* 17 | */ 学历×省份固定效应, "No") 18 | reghdfe log_income qq1101, absorb(i.te4#i.provcd14) 19 | outreg2 using reg_readings.tex, append /* 20 | */ addt(学历固定效应, "No", 省份固定效应, "No",/* 21 | */ 学历×省份固定效应, "Yes") 22 | -------------------------------------------------------------------------------- /code_in_notes/reg_small_b.do: -------------------------------------------------------------------------------- 1 | ** 设置样本容量与自由度 2 | local sample_size=4 3 | local d_of_f=`sample_size'-2 4 | local replications=1000 5 | 6 | cap program drop random_b_reg 7 | program define random_b_reg, rclass 8 | version 12 9 | syntax [,obs(integer 4) random(real 0)] 10 | drop _all 11 | set obs `obs' 12 | tempvar x y u 13 | gen `x'=_n+(rnormal()^2-1)/sqrt(2)*`random' 14 | gen `u'=rnormal() 15 | gen `y'=`x'+`u' 16 | reg `y' `x' 17 | return scalar b=(_b[`x']-1) 18 | return scalar std_b=(_b[`x']-1)/_se[`x'] 19 | end 20 | 21 | simulate b=r(b) std_b=r(std_b), reps(`replications'): random_b_reg, obs(`sample_size') random(1.5) 22 | 23 | // dist. of b 24 | quietly: su b 25 | replace b=(b-r(mean))/r(sd) 26 | sort b 27 | gen b_empirical=_n/_N 28 | gen dist_normal=normal(b) 29 | label variable dist_normal "Normal Distribution" 30 | twoway (line b_empirical b) /// 31 | (line dist_normal b) /// 32 | , graphr(fcolor(white) color(white)) xtitle("") /// 33 | saving(reg_small_b_random, replace) 34 | 35 | 36 | // dist. of standarized b 37 | sort std_b 38 | gen std_b_empirical=_n/_N 39 | gen t_den=t(`d_of_f',std_b) 40 | label variable t_den "t(`d_of_f') Distribution" 41 | twoway (line std_b_empirical std_b) /// 42 | (line t_den std_b) /// 43 | , graphr(fcolor(white) color(white)) xtitle("") /// 44 | saving(reg_small_stdb_random, replace) 45 | 46 | 47 | simulate b=r(b) std_b=r(std_b), reps(`replications'): random_b_reg, obs(`sample_size') random(0) 48 | // dist. of b 49 | quietly: su b 50 | replace b=(b-r(mean))/r(sd) 51 | sort b 52 | gen b_empirical=_n/_N 53 | gen dist_normal=normal(b) 54 | label variable dist_normal "Normal Distribution" 55 | twoway (line b_empirical b) /// 56 | (line dist_normal b) /// 57 | , graphr(fcolor(white) color(white)) xtitle("") /// 58 | saving(reg_small_b_fixed, replace) 59 | 60 | 61 | // dist. of standarized b 62 | sort std_b 63 | gen std_b_empirical=_n/_N 64 | gen t_den=t(`d_of_f',std_b) 65 | label variable t_den "t(`d_of_f') Distribution" 66 | twoway (line std_b_empirical std_b) /// 67 | (line t_den std_b) /// 68 | , graphr(fcolor(white) color(white)) xtitle("") /// 69 | saving(reg_small_stdb_fixed, replace) 70 | 71 | 72 | graph combine reg_small_b_random.gph reg_small_stdb_random.gph /// 73 | reg_small_b_fixed.gph reg_small_stdb_fixed.gph, /// 74 | graphr(fcolor(white) color(white)) col(2) 75 | graph export reg_small.eps, replace 76 | -------------------------------------------------------------------------------- /code_in_notes/reg_with_dummies.do: -------------------------------------------------------------------------------- 1 | // file: reg_with_dummies.do 2 | use datasets/cfps_adult, clear 3 | drop if p_income<0 4 | drop if te4<0 5 | tab te4, gen(edu) 6 | reg p_income edu* 7 | outreg2 using reg_with_dummies.tex, replace 8 | reg p_income edu*, noconstant 9 | outreg2 using reg_with_dummies.tex, append 10 | -------------------------------------------------------------------------------- /code_in_notes/reg_with_dummy.do: -------------------------------------------------------------------------------- 1 | // file: reg_with_dummy.do 2 | use datasets/cfps_adult, clear 3 | keep cfps_gender p_income 4 | drop if p_income<0 5 | bysort cfps_gender: outreg2 using reg_with_dummy_su.tex,/* 6 | */replace sum(log) eqkeep(N mean) keep(p_income) 7 | reg p_income cfps_gender 8 | outreg2 using reg_with_dummy.tex, replace 9 | -------------------------------------------------------------------------------- /code_in_notes/simpson_paradox.do: -------------------------------------------------------------------------------- 1 | // file: simpson_paradox.do 2 | clear 3 | set obs 1000 4 | gen gender=runiform()<0.5 5 | gen exer=runiform()<0.8 if gender==1 6 | replace exer=runiform()<0.3 if gender==0 7 | gen y=80-10*gender+3*exer+rnormal() 8 | reg y exer 9 | outreg2 using simpson_paradox.tex, replace 10 | reg y exer if gender==1 11 | outreg2 using simpson_paradox.tex, append 12 | reg y exer if gender==0 13 | outreg2 using simpson_paradox.tex, append 14 | reg y exer gender 15 | outreg2 using simpson_paradox.tex, append 16 | -------------------------------------------------------------------------------- /code_in_notes/test_jointly_ntv.do: -------------------------------------------------------------------------------- 1 | // test_jointly_ntv.do 2 | clear 3 | set more off 4 | use datasets/NTV_Aggregate_Data_reshaped.dta 5 | xtset tik_id year 6 | // 不进行控制进行检验 7 | reghdfe NTV L4.Votes_NDR_ L4.Votes_SPS_ L4.Votes_Yabloko_ L4.Votes_KPRF_ L4.Votes_LDPR_ L4.Turnout if year==1999, absorb(region) 8 | test L4.Votes_NDR_ L4.Votes_SPS_ L4.Votes_Yabloko_ L4.Votes_KPRF_ L4.Votes_LDPR_ L4.Turnout 9 | // 控制社会经济变量以及人口、工资的多项式 10 | forvalues i=1/5{ 11 | gen log_pop_`i'=logpop^`i' 12 | gen log_wage_`i'=log_wage^`i' 13 | } 14 | local E_controls "L.log_pop_* L.log_wage_* L.nurses L.doctors_pc" 15 | reghdfe NTV L4.Votes_NDR_ L4.Votes_SPS_ L4.Votes_Yabloko_ L4.Votes_KPRF_ L4.Votes_LDPR_ L4.Turnout `E_controls' if year==1999, absorb(region) 16 | test L4.Votes_NDR_ L4.Votes_SPS_ L4.Votes_Yabloko_ L4.Votes_KPRF_ L4.Votes_LDPR_ L4.Turnout 17 | -------------------------------------------------------------------------------- /code_in_notes/wald_ols.do: -------------------------------------------------------------------------------- 1 | // file: wald_ols.do 2 | set seed 19880505 3 | cap program drop dgp 4 | program define dgp, rclass 5 | syntax [, obs(integer 1000) b(real -1.0)] 6 | drop _all 7 | set obs `obs' 8 | tempvar x y u 9 | tempname vkk V test_stat bk p 10 | gen `x'=rnormal() 11 | gen `u'=rnormal()^3 12 | gen `y'=`b'*`x'+`u' 13 | reg `y' `x' , robust 14 | local `bk'=_b[`x'] 15 | mat `V'=e(V) 16 | local `vkk'=`V'[rownumb(`V',"`x'"),colnumb(`V',"`x'")] 17 | local `test_stat'=((``bk'')^2-1)^2/(4*(``bk'')^2*``vkk'') 18 | local `p'=1-chi2(1,``test_stat'') 19 | if ``p''<0.05{ 20 | return scalar rejected=1 21 | } 22 | else{ 23 | return scalar rejected=0 24 | } 25 | return scalar teststat=``test_stat'' 26 | end 27 | // simulate 28 | simulate rejected=r(rejected) teststat=r(teststat)/* 29 | */ ,reps(2000):dgp 30 | su 31 | hist teststat 32 | simulate rejected=r(rejected) teststat=r(teststat)/* 33 | */ ,reps(2000):dgp, b(0) 34 | su 35 | -------------------------------------------------------------------------------- /code_in_notes/weighted_ols.do: -------------------------------------------------------------------------------- 1 | // weighted_ols.do 2 | clear 3 | set more off 4 | // 生成数据 5 | set seed 19880505 6 | set obs 300 7 | gen x=2*runiform() 8 | gen y=exp(sin(x^3))+rnormal() 9 | gen y_true=exp(sin(x^3)) 10 | label variable y_true "True function" 11 | //回归 12 | gen x2=x^2 13 | gen x3=x^3 14 | gen x4=x^4 15 | reg y x* 16 | predict yhat_unweighted 17 | label variable yhat_unweighted "Unweighted" 18 | //加权回归 19 | gen w=normalden(2*x) 20 | reg y x* [iw=w] 21 | predict yhat_weighted 22 | label variable yhat_weighted "Weighted" 23 | //画图 24 | sort x 25 | twoway (scatter y x) /* 26 | */(line yhat_unweighted x) /* 27 | */(line y_true x) /* 28 | */(line yhat_weighted x) 29 | graph export wls.pdf, replace 30 | -------------------------------------------------------------------------------- /code_in_notes/white_hetero.do: -------------------------------------------------------------------------------- 1 | // file: white_hetero.do 2 | cap program drop dgp 3 | program define dgp, rclass 4 | syntax [, obs(integer 100) robust b(real 0)] 5 | drop _all 6 | set obs `obs' 7 | tempvar x y sigma 8 | gen `x'=rnormal() 9 | gen `sigma'=sqrt(`x'^2) 10 | gen `y'=`b'*`x'+`sigma'*rnormal() 11 | quietly: reg `y' `x', `robust' 12 | return scalar b=_b[`x'] 13 | return scalar se=_se[`x'] 14 | if abs(_b[`x']/_se[`x'])>=1.96 { 15 | return scalar rejected=1 16 | } 17 | else { 18 | return scalar rejected=0 19 | } 20 | end 21 | 22 | simulate rejected=r(rejected) b=r(b) se=r(se)/* 23 | */ ,reps(1000):dgp 24 | su 25 | simulate rejected=r(rejected) b=r(b) se=r(se)/* 26 | */,reps(1000):dgp, robust 27 | su 28 | --------------------------------------------------------------------------------