├── .gitignore ├── README.md ├── code_in_notes ├── Benford.do ├── CI_large_sample.do ├── CI_small_sample.do ├── GMM_Poisson.do ├── GMM_ettobit.do ├── GMM_mini_chi2.do ├── MCMC_independent_integ.py ├── MCMC_independent_mh.py ├── MCMC_logistic.py ├── MCMC_random_walk.py ├── MLE_beta.do ├── MLE_beta_nod.do ├── MLE_censor.do ├── MLE_liml.do ├── MM_beta.do ├── MonteCarlo_simple.py ├── RUN_ALL.do ├── Titanic.do ├── ab_test.do ├── bootstrap_lognormal.do ├── bootstrap_lognormal_np.do ├── bootstrap_student.do ├── box_plot.do ├── brownian.py ├── bubble_chart.do ├── charts_bar.do ├── check_random.do ├── check_random.py ├── clustering_wholesale.do ├── datasets │ ├── OHIE_ED_visit.csv │ ├── OHIE_QJE.dta │ ├── Questionarie.pdf │ ├── ba_sales_data.csv │ ├── bond_interest.xlsx │ ├── ccapm.dta │ ├── cfps_adult.dta │ ├── cfps_family_econ.dta │ ├── chfs_ind.dta │ ├── citydata.dta │ ├── export.dta │ ├── hs300index.dta │ ├── rr000005.dta │ ├── soep_female_labor.dta │ ├── titanic.dta │ └── wholesale.csv ├── descriptive_mode.do ├── dgp_mle_censor.ado ├── dgp_mle_censor_simulate.do ├── empirical_distribution.do ├── exponential.do ├── exponential.py ├── gaussian_mixture.do ├── gencatutility.ado ├── geometric_mean.do ├── ginindex.do ├── histogram_kdensity.do ├── importance_sampling.py ├── qqplot_hs300.do ├── rejection.py ├── rejection_beta.do ├── rejection_beta.py ├── rejection_trunc.do ├── rejection_trunc_optimal.do ├── scatter_weight_height.do ├── simulate_CLT.do ├── simulate_CLT.py ├── simulate_LLN.do ├── simulate_LLN.py ├── simulation_lm_ols_test.do ├── simulation_lm_test.do ├── simulation_lr_test.do ├── simulation_wald_test.do ├── st_call_py_tree.do ├── standard_normal_mean.do ├── standard_normal_var.do ├── statistic_mean.do ├── test_power_function.do ├── test_under_null.do ├── ttest_mean_comparison.do ├── twosls_by_hand.do ├── uniform.c ├── uniform.do ├── uniform.py └── zipf_law.do ├── notebook_julia └── Julia.ipynb └── notebook_python ├── Bayes.ipynb ├── Brownian.ipynb ├── DiscreteMarkov.ipynb ├── LLN_CLT.ipynb ├── Martingale.ipynb ├── MonteCarlo.ipynb ├── Normal.ipynb ├── Numpy+Matplotlib+random.ipynb ├── Poisson_Process.ipynb ├── Random_number.ipynb ├── SDE.ipynb ├── Stochastic_integral.ipynb ├── Testing.ipynb ├── estimation.ipynb └── rejection_beta.pdf /.gitignore: -------------------------------------------------------------------------------- 1 | *.out 2 | .DS_Store 3 | *.eps 4 | *.pdf 5 | *.tex 6 | *.txt 7 | *.gph 8 | *.ipynb_* 9 | *.stswp 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 《数理统计》、《随机过程》课程代码 2 | 该项目中主要包含上海对外经贸大学司继春《数理统计》课程以及《随机过程》的相关代码,主要包括讲义中的示意性代码以及Python、Julia语言的Jupyter Notebook。 3 | ## 讲义中代码 4 | 讲义中的代码主要包含在以下地址中:[Code in Notes](https://github.com/sijichun/MathStatsCode/tree/master/code_in_notes),其中Stata数据文件包含在以下地址中:[Datasets](https://github.com/sijichun/MathStatsCode/tree/master/code_in_notes/datasets)。 5 | ## Python Notebook 6 | 除讲义中的代码以外,本课程还提供一些Jupyter Notebook。这些Notebooks可以作为编程作业的参考: 7 | ### 《数理统计》代码 8 | * [Numpy+Matplotlib+random 介绍](https://github.com/sijichun/MathStatsCode/blob/master/notebook_python/Numpy%2BMatplotlib%2Brandom.ipynb) 9 | * [生成多元正态分布](https://github.com/sijichun/MathStatsCode/blob/master/notebook_python/Normal.ipynb) 10 | * [大数定律与中心极限定律](https://github.com/sijichun/MathStatsCode/blob/master/notebook_python/LLN_CLT.ipynb) 11 | * [矩估计、极大似然估计与区间估计](https://github.com/sijichun/MathStatsCode/blob/master/notebook_python/estimation.ipynb) 12 | * [假设检验](https://github.com/sijichun/MathStatsCode/blob/master/notebook_python/Testing.ipynb) 13 | ### 《随机过程》代码 14 | 15 | * [马尔可夫链](https://github.com/sijichun/MathStatsCode/blob/master/notebook_python/DiscreteMarkov.ipynb) 16 | * [泊松过程](https://github.com/sijichun/MathStatsCode/blob/master/notebook_python/Poisson_Process.ipynb) 17 | * [布朗运动](https://github.com/sijichun/MathStatsCode/blob/master/notebook_python/Brownian.ipynb) 18 | * [蒙特卡洛](https://github.com/sijichun/MathStatsCode/blob/master/notebook_python/MonteCarlo.ipynb) 19 | ## Julia Notebook 20 | * [Julia介绍](https://github.com/sijichun/MathStatsCode/blob/master/notebook_julia/Julia.ipynb) 21 | -------------------------------------------------------------------------------- /code_in_notes/Benford.do: -------------------------------------------------------------------------------- 1 | // Benford.do 2 | clear all 3 | use datasets/citydata.dta 4 | keep if Year==2011 5 | 6 | // 将GDP转为字符串 7 | tostring v84, gen(gdp_string) 8 | // 取出GDP数字字符串的第一位 9 | gen first_digit=substr(gdp_string, 1,1) 10 | // 将第一位数字字符串转化为数字 11 | destring first_digit, replace 12 | // 统计频数 13 | tab first_digit 14 | // 创建一个数据框用于检验 15 | frame create chi2test Ng Ng_star 16 | quietly: su first_digit 17 | local N=r(N) 18 | forvalues i=1/9{ 19 | quietly: su first_digit if first_digit==`i' 20 | local Ng=r(N) 21 | local Ng_star=`N'*(log10(`i'+1)-log10(`i')) 22 | frame post chi2test (`Ng') (`Ng_star') 23 | } 24 | frame chi2test: gen test_stat=(Ng-Ng_star)^2/Ng_star 25 | frame chi2test: su test_stat 26 | frame chi2test: local test_stat=r(sum) 27 | di "test statistics= `test_stat'" 28 | local p=1-chi2(8 ,`test_stat') 29 | di "p-value= `p'" 30 | -------------------------------------------------------------------------------- /code_in_notes/CI_large_sample.do: -------------------------------------------------------------------------------- 1 | // CI_large_sample.do 2 | clear 3 | set seed 97 4 | set more off 5 | cap program drop CIz 6 | program define CIz, rclass 7 | syntax [, obs(integer 100) mu(real 3) confident_level(real 0.95)] 8 | quietly{ 9 | drop _all 10 | set obs `obs' 11 | tempvar d x 12 | tempname lb ub alpha 13 | local `alpha'=1-`confident_level' 14 | gen `d'=runiform()<0.5 15 | gen `x'=`d'*(rnormal()+`mu')+(1-`d')*(rnormal()-`mu') 16 | su `x' 17 | local `lb'=r(mean)-invnormal(1-``alpha''/2)*r(sd)/sqrt(`obs') 18 | local `ub'=r(mean)+invnormal(1-``alpha''/2)*r(sd)/sqrt(`obs') 19 | return scalar lb=``lb'' 20 | return scalar ub=``ub'' 21 | return scalar include_true=(``lb''<=0 & ``ub''>=0) 22 | } 23 | end 24 | 25 | simulate it=r(include_true), reps(10000): CIz, obs(10) 26 | su 27 | simulate it=r(include_true), reps(10000): CIz, obs(100) 28 | su 29 | -------------------------------------------------------------------------------- /code_in_notes/CI_small_sample.do: -------------------------------------------------------------------------------- 1 | // CI_small_sample.do 2 | clear 3 | set seed 97 4 | set more off 5 | cap program drop CIt 6 | program define CIt, rclass 7 | syntax [, obs(integer 10) mu(real 0) sigma(real 1) confident_level(real 0.95)] 8 | quietly{ 9 | drop _all 10 | set obs `obs' 11 | tempvar x 12 | tempname lb ub df alpha 13 | local `alpha'=1-`confident_level' 14 | local `df'=`obs'-1 15 | gen `x'=`sigma'*rnormal()+`mu' 16 | su `x' 17 | local `lb'=r(mean)-invttail(``df'',``alpha''/2)*r(sd)/sqrt(`obs') 18 | local `ub'=r(mean)+invttail(``df'',``alpha''/2)*r(sd)/sqrt(`obs') 19 | return scalar lb=``lb'' 20 | return scalar ub=``ub'' 21 | return scalar include_true=(``lb''<=`mu' & ``ub''>=`mu') 22 | } 23 | end 24 | 25 | simulate it=r(include_true), reps(10000): CIt, obs(10) mu(5) sigma(10) 26 | su -------------------------------------------------------------------------------- /code_in_notes/GMM_Poisson.do: -------------------------------------------------------------------------------- 1 | // GMM_Poisson.do 2 | clear all 3 | set more off 4 | set seed 20231130 5 | cap program drop est_lambda 6 | program est_lambda, eclass 7 | syntax varlist(max=1) [, lambda(real 1)] 8 | tempvar x2 9 | quietly{ 10 | gen `x2'=`varlist'^2 11 | gmm (`varlist'-{lambda=`lambda'}) (`x2'-({lambda}+{lambda}^2)), winit(identity) onestep 12 | } 13 | mat b=e(b) 14 | mat V=e(V) 15 | ereturn post b V 16 | ereturn display 17 | end 18 | 19 | frame create simulation lambda xmean 20 | forvalues i=1/1000{ 21 | clear 22 | quietly{ 23 | set obs 100 24 | gen x=rpoisson(10) 25 | su x 26 | local mean_x = r(mean) 27 | est_lambda x 28 | frame post simulation (_b[/lambda]) (`mean_x') 29 | } 30 | } 31 | frame simulation: su 32 | -------------------------------------------------------------------------------- /code_in_notes/GMM_ettobit.do: -------------------------------------------------------------------------------- 1 | // GMM_ettobit.do 2 | clear 3 | set more off 4 | set obs 500 5 | set seed 8997 6 | // 生成数据 7 | gen x=rnormal(3,2) 8 | local a=10 9 | local alpha=5 10 | local beta=3 11 | local sigma=2 12 | gen y = exp(`alpha'+`beta'*x+rnormal()*sqrt(`sigma'))-`a' 13 | // 生成工具 14 | gen x2=x^2 15 | gen x3=x^3 16 | gen x_e=exp(x) 17 | gen x_sin=sin(x) 18 | // 计算a的初始值 19 | su y 20 | local a_init=-1*r(min)+5 21 | // GMM估计 22 | gmm (log({a=`a_init'}+y)-{alpha=0}-{beta=0}*x) ((log({a}+y)-{alpha}-{beta}*x)*x) ((log({a}+y)-{alpha}-{beta}*x)*x2) ((log({a}+y)-{alpha}-{beta}*x)*x3) ((log({a}+y)-{alpha}-{beta}*x)*x_e) ((log({a}+y)-{alpha}-{beta}*x)*x_sin), winit(identity) 23 | 24 | -------------------------------------------------------------------------------- /code_in_notes/GMM_mini_chi2.do: -------------------------------------------------------------------------------- 1 | // GMM_mini_chi2.do 2 | clear all 3 | set more off 4 | set seed 20231213 5 | cap program drop est_lambda_chi2 6 | program est_lambda_chi2, rclass 7 | syntax varname [, lambda(real 1) lambda0(real 10) n(integer 100)] 8 | tempvar x2 m1_0 m2_0 9 | tempname mean_m1_0 mean_m2_0 vec_g chi2_est chi2_true wmatrix objf 10 | quietly{ 11 | gen `x2'=`varlist'^2 12 | gmm (`varlist'-{lambda=`lambda'}) (`x2'-({lambda}+{lambda}^2)), winit(identity) 13 | local `chi2_est'=e(J) 14 | mat `wmatrix'=e(W) 15 | gen `m1_0'=`varlist'-`lambda0' 16 | gen `m2_0'=`x2'-(`lambda0'+`lambda0'^2) 17 | su `m1_0' 18 | local `mean_m1_0'=r(sum) 19 | su `m2_0' 20 | local `mean_m2_0'=r(sum) 21 | mat `vec_g'=(``mean_m1_0'' \ ``mean_m2_0'') 22 | mat `objf'=`vec_g''*`wmatrix'*`vec_g' 23 | local `chi2_true'=`objf'[1,1]/`n' 24 | } 25 | return scalar chi2_est = ``chi2_est'' 26 | return scalar chi2_true = ``chi2_true'' 27 | end 28 | 29 | frame create simulation chi2_est chi2_true 30 | forvalues i=1/2000{ 31 | quietly{ 32 | clear 33 | set obs 100 34 | gen x=rpoisson(10) 35 | est_lambda_chi2 x, lambda0(10) n(100) 36 | frame post simulation (r(chi2_est)) (r(chi2_true)) 37 | } 38 | } 39 | frame change simulation 40 | label variable chi2_est "估计值处的目标函数" 41 | label variable chi2_true "真值处的目标函数" 42 | gen density_chi2_1=chi2den(1,chi2_est) 43 | label variable density_chi2_1 "Chi(1)密度函数" 44 | gen density_chi2_2=chi2den(2,chi2_est) 45 | label variable density_chi2_2 "Chi(2)密度函数" 46 | sort chi2_est 47 | twoway (hist chi2_est, density) (line density_chi2_1 chi2_est if chi2_est>0.3) (line density_chi2_2 chi2_est if chi2_est>0.3), legend(pos(1) ring(0) label(1 "估计值处的目标函数密度")) xtitle("") 48 | graph export GMM_mini_chi2_1.pdf, replace 49 | twoway (hist chi2_true, density) (line density_chi2_1 chi2_est if chi2_est>0.3) (line density_chi2_2 chi2_est if chi2_est>0.3), legend(pos(1) ring(0) label(1 "真值处的目标函数密度")) xtitle("") 50 | graph export GMM_mini_chi2_2.pdf, replace -------------------------------------------------------------------------------- /code_in_notes/MCMC_independent_integ.py: -------------------------------------------------------------------------------- 1 | ## MCMC_independent_integ.py 2 | 3 | import numpy as np 4 | from numpy import random as nprd 5 | nprd.seed(19880505) 6 | 7 | ##设定参数 8 | M = 10000 9 | h1 = lambda x: 0.5 * np.exp(-90 * (x[0] - 0.5)**2 - 45 * (x[1] + 0.1)**2) 10 | 11 | ##抽样 12 | x = [(nprd.random() * 2 - 1, nprd.random() * 2 - 1) for i in range(M)] 13 | 14 | ##计算h(x) 15 | sample = list(map(h, x)) 16 | integral = 4 * np.mean(sample) 17 | se = 4 * np.std(sample) / np.sqrt(M) 18 | subsample_001 = list(filter(lambda x: x > 0.01, sample)) 19 | print("Intgral=", integral) 20 | print("s.e. of Integral=", se) 21 | print("95% C.I.:", integral - 1.96 * se, "~", integral + 1.96 * se) 22 | print("Ratio of >0.01 samples:", len(subsample_001) / M) 23 | -------------------------------------------------------------------------------- /code_in_notes/MCMC_independent_mh.py: -------------------------------------------------------------------------------- 1 | ## mcmc_independent_mh.py 2 | ##独立的MCMC算法,输入: 3 | ## N_samples : 抽样次数 4 | ## pai(x) : 目标密度函数 5 | ## q(y) : 工具密度函数 6 | ## q_sampler : 给定x,从q中抽样的函数 7 | ## x0 : 初始值 8 | from numpy import random as nprd 9 | def MH_independent(N_samples, pai, q, q_sampler, x0): 10 | X = [] 11 | x = x0 12 | for i in range(N_samples): 13 | y = q_sampler() 14 | rho = min(1, pai(y) * q(x) / (pai(x) * q(y))) 15 | if nprd.uniform() <= rho: 16 | X.append(y) 17 | x = y 18 | else: 19 | X.append(x) 20 | return X 21 | -------------------------------------------------------------------------------- /code_in_notes/MCMC_logistic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | ## file: MonteCarlo.py 3 | 4 | import numpy as np 5 | from numpy import random as nprd 6 | 7 | ##设定参数 8 | M =20000 9 | #真值 10 | beta_0=1 11 | beta_1=1 12 | beta_2=-1 13 | #样本量 14 | N=200 15 | #Logistic函数 16 | Logistic=lambda x: 1.0/(1+np.exp(-1*x)) 17 | 18 | ##产生数据 19 | def gen_logit(N): 20 | Data=[] 21 | for n in range(N): 22 | x1=nprd.normal()*1.414+1 23 | x2=nprd.chisquare(2) 24 | d_star=beta_0+beta_1*x1+beta_2*x2 25 | p_star=Logistic(d_star) 26 | d=(1 if nprd.uniform() 0.01, sample)) 18 | print("Intgral=", integral) 19 | print("s.e. of Integral=", se) 20 | print("95% C.I.:", integral - 1.96 * se, "~", integral + 1.96 * se) 21 | print("Ratio of >0.01 samples:", len(subsample_001) / M) 22 | -------------------------------------------------------------------------------- /code_in_notes/RUN_ALL.do: -------------------------------------------------------------------------------- 1 | clear 2 | set more off 3 | ** begin 4 | do uniform.do 5 | do check_random.do 6 | do exponential.do 7 | do rejection_trunc.do 8 | do rejection_trunc_optimal.do 9 | do gaussian_mixture.do 10 | do standard_normal_var.do 11 | do standard_normal_mean.do 12 | do test_power_function.do 13 | do qqplot_hs300.do 14 | do statistic_mean.do 15 | do empirical_distribution.do 16 | do simulate_LLN.do 17 | do simulate_CLT.do 18 | do histogram_kdensity.do 19 | do charts_bar.do 20 | do scatter_weight_height.do 21 | do zipf_law.do 22 | do bubble_chart.do 23 | do box_plot.do 24 | do bootstrap_lognormal.do 25 | do test_power_function.do 26 | do simulation_wald_test.do 27 | do simulation_lm_test.do 28 | do simulation_lm_ols_test.do 29 | do simulation_lr_test.do 30 | do GMM_mini_chi2.do 31 | do dgp_mle_censor_simulate.do 32 | -------------------------------------------------------------------------------- /code_in_notes/Titanic.do: -------------------------------------------------------------------------------- 1 | // Titanic.do 2 | clear all 3 | use datasets/titanic.dta 4 | // 按照性别加总 5 | collapse (sum)num_aboard (sum)num_death, by(sex) 6 | // 计算幸存人数 7 | gen num_survive=num_aboard-num_death 8 | // 计算死亡总人数、幸存总人数 9 | qui: sum num_death 10 | local death=r(sum) 11 | qui: sum num_survive 12 | local survive=r(sum) 13 | local N=`death'+`survive' 14 | // 计算理论死亡、幸存人数 15 | gen e_num_death=num_aboard*`death'/`N' 16 | gen e_num_survive=num_aboard*`survive'/`N' 17 | // 计算检验统计量 18 | gen chi2_death=(num_death-e_num_death)^2/e_num_death 19 | gen chi2_survive=(num_survive-e_num_survive)^2/e_num_survive 20 | gen chi2=chi2_death+chi2_survive 21 | qui: su chi2 22 | local test_stat=r(sum) 23 | di "test statistics= `test_stat'" 24 | local p=1-chi2(2 ,`test_stat') 25 | di "p-value= `p'" 26 | -------------------------------------------------------------------------------- /code_in_notes/ab_test.do: -------------------------------------------------------------------------------- 1 | // ab_test.do 2 | // 导入数据 3 | clear 4 | set more off 5 | import delimited "datasets/ba_sales_data.csv" 6 | 7 | // 加总用户交易量 8 | collapse (sum) sale_price (max) test_option, by(user_id) 9 | 10 | // 进行检验 11 | ttest sale_price, by(test_option) 12 | -------------------------------------------------------------------------------- /code_in_notes/bootstrap_lognormal.do: -------------------------------------------------------------------------------- 1 | // bootstrap_lognormal.do 2 | clear all 3 | set more off 4 | set seed 1000 5 | set obs 10 6 | // 产生样本 7 | local mu=2 8 | gen z=rnormal(`mu',sqrt(2)) 9 | gen x=exp(z) 10 | // 进行估计 11 | su x 12 | local mu_hat=log(r(mean))-1 13 | di `mu_hat' 14 | // 进行再抽样,将再抽样结果放入到数据框bootstrap中 15 | frame create bootstrap mu_star 16 | local B=10000 17 | forvalues i=1/`B'{ 18 | quietly{ 19 | gen z_star=rnormal(`mu_hat',sqrt(2)) 20 | gen x_star=exp(z_star) 21 | su x_star 22 | local mu_star=log(r(mean))-1 23 | frame post bootstrap (`mu_star') 24 | drop z_star 25 | drop x_star 26 | } 27 | } 28 | frame bootstrap: hist mu_star 29 | graph export bootstrap_lognormal.pdf, replace 30 | frame bootstrap: su mu_star, de 31 | local mu_star_bar=r(mean) 32 | di "mu_bc=" _skip 2*`mu_hat'-`mu_star_bar' -------------------------------------------------------------------------------- /code_in_notes/bootstrap_lognormal_np.do: -------------------------------------------------------------------------------- 1 | // bootstrap_lognormal_np.do 2 | clear all 3 | set more off 4 | set seed 1000 5 | set obs 10 6 | // 产生样本 7 | local mu=2 8 | gen z=rnormal(`mu',sqrt(2)) 9 | gen x=exp(z) 10 | // 定义估计的program 11 | cap drop program lognormalest 12 | program define lognormalest, rclass 13 | syntax varname 14 | quietly { 15 | su `varlist' 16 | local mu=log(r(mean))-1 17 | return scalar mu=`mu' 18 | } 19 | end 20 | // bootstrap 21 | local B=1000 22 | bootstrap mu=r(mu), reps(`B'): lognormalest x 23 | estat bootstrap -------------------------------------------------------------------------------- /code_in_notes/bootstrap_student.do: -------------------------------------------------------------------------------- 1 | // bootstrap_student.do 2 | clear all 3 | use datasets/OHIE_QJE.dta 4 | // bootstrap 5 | bootstrap b=_b[treatment] se=_se[treatment], reps(200) seed(55) cluster(household_id) saving(bootstrap_student.dta, replace): reg birthyear_list treatment, vce(bootstrap) 6 | // 进行回归,保存系数和标准误 7 | reg birthyear_list treatment 8 | local b=_b[treatment] 9 | local se=_se[treatment] 10 | // 打开一个数据框,装入bootstrap_student.dta 11 | frame create bootstrap_sample 12 | frame change bootstrap_sample 13 | use bootstrap_student.dta 14 | gen t=(b-`b')/se 15 | sort t 16 | local alpha_25=(t[floor(_N*0.025)]+t[ceil(_N*0.025)])/2 17 | local alpha_975=(t[floor(_N*0.975)]+t[ceil(_N*0.975)])/2 18 | // 计算好分位数,返回 19 | frame change default 20 | rm bootstrap_student.dta 21 | // 计算置信区间 22 | di "置信区间为:" _skip "[" `b'-`alpha_975'*`se' "," `b'-`alpha_25'*`se' "]" 23 | -------------------------------------------------------------------------------- /code_in_notes/box_plot.do: -------------------------------------------------------------------------------- 1 | // box_plot.do 2 | clear 3 | use datasets/cfps_adult.dta 4 | drop if qp101<0 5 | graph box qp101, noout 6 | graph export box_plot.pdf, replace 7 | graph hbox qp101, over(cfps_gender) 8 | graph export box_plot_by_sex.pdf, replace 9 | -------------------------------------------------------------------------------- /code_in_notes/brownian.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as nprd 3 | 4 | t=np.linspace(0.0,5.0,5000) 5 | B1=np.zeros(len(t)) 6 | B2=np.zeros(len(t)) 7 | B3=np.zeros(len(t)) 8 | B4=np.zeros(len(t)) 9 | B5=np.zeros(len(t)) 10 | B6=np.zeros(len(t)) 11 | b1=b2=b3=b4=b5=b6=0 12 | epsilon=np.sqrt(t[1]-t[0]) 13 | for tt in range(len(t)): 14 | B1[tt]=b1 15 | b1=b1+nprd.normal(0,epsilon) 16 | B2[tt]=b2 17 | b2=b2+nprd.normal(0,epsilon) 18 | B3[tt]=b3 19 | b3=b3+nprd.normal(0,epsilon) 20 | B4[tt]=b4 21 | b4=b4+nprd.normal(0,epsilon) 22 | B5[tt]=b5 23 | b5=b5+nprd.normal(0,epsilon) 24 | B6[tt]=b6 25 | b6=b6+nprd.normal(0,epsilon) 26 | 27 | import matplotlib.pyplot as plt 28 | # 设定图像大小 29 | plt.rcParams['figure.figsize'] = (10.0, 10.0) 30 | plt.figure(0) 31 | plt.plot(t,B1,label=r'Brownian Motion1',color='blue') 32 | plt.plot(t,B2,label=r'Brownian Motion2',color='red') 33 | plt.plot(t,B3,label=r'Brownian Motion3',color='pink') 34 | plt.plot(t,B4,label=r'Brownian Motion4',color='green') 35 | plt.plot(t,B5,label=r'Brownian Motion5',color='orange') 36 | plt.plot(t,B6,label=r'Brownian Motion6',color='yellow') 37 | plt.legend(loc='upper left', frameon=True) 38 | plt.savefig("brownian_motion.eps") 39 | 40 | t=np.linspace(0.0,5.0,10000) 41 | B1=np.zeros(len(t)) 42 | B2=np.zeros(len(t)) 43 | B3=np.zeros(len(t)) 44 | B4=np.zeros(len(t)) 45 | B5=np.zeros(len(t)) 46 | B6=np.zeros(len(t)) 47 | b1=b2=b3=b4=b5=b6=0 48 | epsilon=np.sqrt(t[1]-t[0]) 49 | for tt in range(len(t)): 50 | B1[tt]=b1 51 | b1=b1+nprd.normal(0,epsilon) 52 | B2[tt]=b2 53 | b2=b2+nprd.normal(0,epsilon) 54 | B3[tt]=b3 55 | b3=b3+nprd.normal(0,epsilon) 56 | B4[tt]=b4 57 | b4=b4+nprd.normal(0,epsilon) 58 | B5[tt]=b5 59 | b5=b5+nprd.normal(0,epsilon) 60 | B6[tt]=b6 61 | b6=b6+nprd.normal(0,epsilon) 62 | 63 | plt.figure(1) 64 | plt.plot(B1,B2,label=r'Brownian Motion1',color='blue') 65 | plt.plot(B3,B4,label=r'Brownian Motion2',color='red') 66 | plt.plot(B5,B6,label=r'Brownian Motion3',color='green') 67 | plt.legend(loc='upper left', frameon=True) 68 | plt.savefig("brownian_motion_2d.eps") 69 | -------------------------------------------------------------------------------- /code_in_notes/bubble_chart.do: -------------------------------------------------------------------------------- 1 | // bubble_chart.do 2 | clear 3 | use datasets/citydata.dta 4 | keep if Year==2011 5 | // 省份代码、判断东中西部 6 | gen prov=floor(CityCode/10000) 7 | gen east=inlist(prov,11,12,13,21,31,32,33,35,37,44,46) 8 | gen middle=inlist(prov,14,22,23,34,36,41,43,42) 9 | gen west=inlist(prov,15,45,50,51,52,53,54,61,62,63,64,65) 10 | // 产生变量 11 | gen log_gdp_per_capita=log(v84)-log(v86) 12 | gen log_pop=log(v86) 13 | gen log_waste_water=log(v266) 14 | // 画图 15 | twoway (scatter log_waste_water log_gdp_per_capita [fw=v86] if east, msize(tiny) mcolor(blue%30)) (scatter log_waste_water log_gdp_per_capita [fw=v86] if middle, msize(tiny) mcolor(green%30)) (scatter log_waste_water log_gdp_per_capita [fw=v86] if west, msize(tiny) mcolor(red%30)), legend(off) xtitle("对数人均GDP") ytitle("对数工业废水排放量") 16 | graph export bubble_chart.pdf, replace 17 | -------------------------------------------------------------------------------- /code_in_notes/charts_bar.do: -------------------------------------------------------------------------------- 1 | // charts_bar.do 2 | use datasets/cfps_adult.dta, clear 3 | drop if te4<0 4 | graph bar, over(te4, label(angle(15))) 5 | graph export chart_bar1.pdf, replace 6 | graph bar (count), over(te4, label(angle(15))) 7 | graph export chart_bar2.pdf, replace 8 | graph hbar (mean) p_income, over(te4) 9 | graph export chart_bar3.pdf, replace 10 | collapse (count) p_income (mean) mean_income = p_income (median) median_income = p_income, by(te4) 11 | gen te4_left=te4-0.2 12 | gen te4_right=te4+0.2 13 | twoway (bar mean_income te4_left, barw(0.2)) (bar median_income te4, barw(0.2)) (bar p_income te4_right, yaxis(2) barw(0.2)), xlabel(1 "文盲" 2 "小学" 3 "初中" 4 "高中" 5 "大专'" 6 "本科" 7 "研究生") ytitle("金额", axis(1)) ytitle("人数", axis(2)) legend(pos(11) ring(0) label(1 "收入均值") label(2 "收入中位数") lab(3 "人数")) 14 | graph export chart_bar4.pdf, replace 15 | -------------------------------------------------------------------------------- /code_in_notes/check_random.do: -------------------------------------------------------------------------------- 1 | // check_random.do 2 | set more off 3 | clear 4 | set obs 1000 5 | // 行号是奇数还是偶数 6 | gen variable_id=mod(_n,2) 7 | gen group=ceil(_n/2) 8 | // 产生序贯的随机数 9 | gen x=runiform() 10 | // 将奇数观测作为x1,偶数观测作为x0 11 | reshape wide x, i(group) j(variable_id) 12 | // 画散点图 13 | scatter x1 x0 , graphr(fcolor(white) color(white)) 14 | // 保存图片 15 | graph export check_random_stata.eps, replace 16 | -------------------------------------------------------------------------------- /code_in_notes/check_random.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as nprd 3 | # 获取序贯的1000个均匀分布随机数 4 | x = nprd.random(1000) 5 | # 将1000个均匀分布随机数按照奇偶数分成两个序列x0 和x1 6 | x0 = np.array([x[i] for i in range(1000) if i % 2 == 0]) 7 | x1 = np.array([x[i] for i in range(1000) if i % 2 == 1]) 8 | # 画图 9 | import matplotlib.pyplot as plt 10 | # 设定图像大小 11 | plt.rcParams['figure.figsize'] = (8.0, 5.0) 12 | plt.scatter(x0, x1, color='blue') ## 画出散点图 13 | plt.savefig("check_random_py.pdf") -------------------------------------------------------------------------------- /code_in_notes/clustering_wholesale.do: -------------------------------------------------------------------------------- 1 | // clustering.do 2 | // 导入数据 3 | clear 4 | set more off 5 | import delimited "datasets/wholesale.csv" 6 | 7 | /* 进行聚类,使用层次聚类: 8 | average linkage 9 | angular(角度,余弦相似度)*/ 10 | cluster averagelinkage fresh milk grocery frozen detergents_paper delicassen, measure(angular) 11 | // 画图 12 | //cluster dendrogram, cutnumber(30) 13 | // 分5类 14 | cluster gen cluster1=groups(5) 15 | // kmeans 聚类,使用层次聚类的结果作为初试类别 16 | cluster kmeans fresh milk grocery frozen, /* 17 | */k(5) measure(angular) start(group(cluster1)) 18 | rename _clus_2 cluster_kmeans 19 | // 画图,比如fresh和grocery 20 | twoway (scatter fresh grocery if cluster_kmeans==1)/* 21 | */(scatter fresh grocery if cluster_kmeans==2)/* 22 | */(scatter fresh grocery if cluster_kmeans==3)/* 23 | */(scatter fresh grocery if cluster_kmeans==4)/* 24 | */(scatter fresh grocery if cluster_kmeans==5), legend(off) 25 | -------------------------------------------------------------------------------- /code_in_notes/datasets/OHIE_QJE.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/MathStatsCode/a06caa49c76c7f80040640b0d4b3a467c3c03b56/code_in_notes/datasets/OHIE_QJE.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/Questionarie.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/MathStatsCode/a06caa49c76c7f80040640b0d4b3a467c3c03b56/code_in_notes/datasets/Questionarie.pdf -------------------------------------------------------------------------------- /code_in_notes/datasets/bond_interest.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/MathStatsCode/a06caa49c76c7f80040640b0d4b3a467c3c03b56/code_in_notes/datasets/bond_interest.xlsx -------------------------------------------------------------------------------- /code_in_notes/datasets/ccapm.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/MathStatsCode/a06caa49c76c7f80040640b0d4b3a467c3c03b56/code_in_notes/datasets/ccapm.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/cfps_adult.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/MathStatsCode/a06caa49c76c7f80040640b0d4b3a467c3c03b56/code_in_notes/datasets/cfps_adult.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/cfps_family_econ.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/MathStatsCode/a06caa49c76c7f80040640b0d4b3a467c3c03b56/code_in_notes/datasets/cfps_family_econ.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/chfs_ind.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/MathStatsCode/a06caa49c76c7f80040640b0d4b3a467c3c03b56/code_in_notes/datasets/chfs_ind.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/citydata.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/MathStatsCode/a06caa49c76c7f80040640b0d4b3a467c3c03b56/code_in_notes/datasets/citydata.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/export.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/MathStatsCode/a06caa49c76c7f80040640b0d4b3a467c3c03b56/code_in_notes/datasets/export.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/hs300index.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/MathStatsCode/a06caa49c76c7f80040640b0d4b3a467c3c03b56/code_in_notes/datasets/hs300index.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/rr000005.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/MathStatsCode/a06caa49c76c7f80040640b0d4b3a467c3c03b56/code_in_notes/datasets/rr000005.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/soep_female_labor.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/MathStatsCode/a06caa49c76c7f80040640b0d4b3a467c3c03b56/code_in_notes/datasets/soep_female_labor.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/titanic.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/MathStatsCode/a06caa49c76c7f80040640b0d4b3a467c3c03b56/code_in_notes/datasets/titanic.dta -------------------------------------------------------------------------------- /code_in_notes/datasets/wholesale.csv: -------------------------------------------------------------------------------- 1 | Channel,Region,Fresh,Milk,Grocery,Frozen,Detergents_Paper,Delicassen 2 | 2,3,12669,9656,7561,214,2674,1338 3 | 2,3,7057,9810,9568,1762,3293,1776 4 | 2,3,6353,8808,7684,2405,3516,7844 5 | 1,3,13265,1196,4221,6404,507,1788 6 | 2,3,22615,5410,7198,3915,1777,5185 7 | 2,3,9413,8259,5126,666,1795,1451 8 | 2,3,12126,3199,6975,480,3140,545 9 | 2,3,7579,4956,9426,1669,3321,2566 10 | 1,3,5963,3648,6192,425,1716,750 11 | 2,3,6006,11093,18881,1159,7425,2098 12 | 2,3,3366,5403,12974,4400,5977,1744 13 | 2,3,13146,1124,4523,1420,549,497 14 | 2,3,31714,12319,11757,287,3881,2931 15 | 2,3,21217,6208,14982,3095,6707,602 16 | 2,3,24653,9465,12091,294,5058,2168 17 | 1,3,10253,1114,3821,397,964,412 18 | 2,3,1020,8816,12121,134,4508,1080 19 | 1,3,5876,6157,2933,839,370,4478 20 | 2,3,18601,6327,10099,2205,2767,3181 21 | 1,3,7780,2495,9464,669,2518,501 22 | 2,3,17546,4519,4602,1066,2259,2124 23 | 1,3,5567,871,2010,3383,375,569 24 | 1,3,31276,1917,4469,9408,2381,4334 25 | 2,3,26373,36423,22019,5154,4337,16523 26 | 2,3,22647,9776,13792,2915,4482,5778 27 | 2,3,16165,4230,7595,201,4003,57 28 | 1,3,9898,961,2861,3151,242,833 29 | 1,3,14276,803,3045,485,100,518 30 | 2,3,4113,20484,25957,1158,8604,5206 31 | 1,3,43088,2100,2609,1200,1107,823 32 | 1,3,18815,3610,11107,1148,2134,2963 33 | 1,3,2612,4339,3133,2088,820,985 34 | 1,3,21632,1318,2886,266,918,405 35 | 1,3,29729,4786,7326,6130,361,1083 36 | 1,3,1502,1979,2262,425,483,395 37 | 2,3,688,5491,11091,833,4239,436 38 | 1,3,29955,4362,5428,1729,862,4626 39 | 2,3,15168,10556,12477,1920,6506,714 40 | 2,3,4591,15729,16709,33,6956,433 41 | 1,3,56159,555,902,10002,212,2916 42 | 1,3,24025,4332,4757,9510,1145,5864 43 | 1,3,19176,3065,5956,2033,2575,2802 44 | 2,3,10850,7555,14961,188,6899,46 45 | 2,3,630,11095,23998,787,9529,72 46 | 2,3,9670,7027,10471,541,4618,65 47 | 2,3,5181,22044,21531,1740,7353,4985 48 | 2,3,3103,14069,21955,1668,6792,1452 49 | 2,3,44466,54259,55571,7782,24171,6465 50 | 2,3,11519,6152,10868,584,5121,1476 51 | 2,3,4967,21412,28921,1798,13583,1163 52 | 1,3,6269,1095,1980,3860,609,2162 53 | 1,3,3347,4051,6996,239,1538,301 54 | 2,3,40721,3916,5876,532,2587,1278 55 | 2,3,491,10473,11532,744,5611,224 56 | 1,3,27329,1449,1947,2436,204,1333 57 | 1,3,5264,3683,5005,1057,2024,1130 58 | 2,3,4098,29892,26866,2616,17740,1340 59 | 2,3,5417,9933,10487,38,7572,1282 60 | 1,3,13779,1970,1648,596,227,436 61 | 1,3,6137,5360,8040,129,3084,1603 62 | 2,3,8590,3045,7854,96,4095,225 63 | 2,3,35942,38369,59598,3254,26701,2017 64 | 2,3,7823,6245,6544,4154,4074,964 65 | 2,3,9396,11601,15775,2896,7677,1295 66 | 1,3,4760,1227,3250,3724,1247,1145 67 | 2,3,85,20959,45828,36,24231,1423 68 | 1,3,9,1534,7417,175,3468,27 69 | 2,3,19913,6759,13462,1256,5141,834 70 | 1,3,2446,7260,3993,5870,788,3095 71 | 1,3,8352,2820,1293,779,656,144 72 | 1,3,16705,2037,3202,10643,116,1365 73 | 1,3,18291,1266,21042,5373,4173,14472 74 | 1,3,4420,5139,2661,8872,1321,181 75 | 2,3,19899,5332,8713,8132,764,648 76 | 2,3,8190,6343,9794,1285,1901,1780 77 | 1,3,20398,1137,3,4407,3,975 78 | 1,3,717,3587,6532,7530,529,894 79 | 2,3,12205,12697,28540,869,12034,1009 80 | 1,3,10766,1175,2067,2096,301,167 81 | 1,3,1640,3259,3655,868,1202,1653 82 | 1,3,7005,829,3009,430,610,529 83 | 2,3,219,9540,14403,283,7818,156 84 | 2,3,10362,9232,11009,737,3537,2342 85 | 1,3,20874,1563,1783,2320,550,772 86 | 2,3,11867,3327,4814,1178,3837,120 87 | 2,3,16117,46197,92780,1026,40827,2944 88 | 2,3,22925,73498,32114,987,20070,903 89 | 1,3,43265,5025,8117,6312,1579,14351 90 | 1,3,7864,542,4042,9735,165,46 91 | 1,3,24904,3836,5330,3443,454,3178 92 | 1,3,11405,596,1638,3347,69,360 93 | 1,3,12754,2762,2530,8693,627,1117 94 | 2,3,9198,27472,32034,3232,18906,5130 95 | 1,3,11314,3090,2062,35009,71,2698 96 | 2,3,5626,12220,11323,206,5038,244 97 | 1,3,3,2920,6252,440,223,709 98 | 2,3,23,2616,8118,145,3874,217 99 | 1,3,403,254,610,774,54,63 100 | 1,3,503,112,778,895,56,132 101 | 1,3,9658,2182,1909,5639,215,323 102 | 2,3,11594,7779,12144,3252,8035,3029 103 | 2,3,1420,10810,16267,1593,6766,1838 104 | 2,3,2932,6459,7677,2561,4573,1386 105 | 1,3,56082,3504,8906,18028,1480,2498 106 | 1,3,14100,2132,3445,1336,1491,548 107 | 1,3,15587,1014,3970,910,139,1378 108 | 2,3,1454,6337,10704,133,6830,1831 109 | 2,3,8797,10646,14886,2471,8969,1438 110 | 2,3,1531,8397,6981,247,2505,1236 111 | 2,3,1406,16729,28986,673,836,3 112 | 1,3,11818,1648,1694,2276,169,1647 113 | 2,3,12579,11114,17569,805,6457,1519 114 | 1,3,19046,2770,2469,8853,483,2708 115 | 1,3,14438,2295,1733,3220,585,1561 116 | 1,3,18044,1080,2000,2555,118,1266 117 | 1,3,11134,793,2988,2715,276,610 118 | 1,3,11173,2521,3355,1517,310,222 119 | 1,3,6990,3880,5380,1647,319,1160 120 | 1,3,20049,1891,2362,5343,411,933 121 | 1,3,8258,2344,2147,3896,266,635 122 | 1,3,17160,1200,3412,2417,174,1136 123 | 1,3,4020,3234,1498,2395,264,255 124 | 1,3,12212,201,245,1991,25,860 125 | 2,3,11170,10769,8814,2194,1976,143 126 | 1,3,36050,1642,2961,4787,500,1621 127 | 1,3,76237,3473,7102,16538,778,918 128 | 1,3,19219,1840,1658,8195,349,483 129 | 2,3,21465,7243,10685,880,2386,2749 130 | 1,3,140,8847,3823,142,1062,3 131 | 1,3,42312,926,1510,1718,410,1819 132 | 1,3,7149,2428,699,6316,395,911 133 | 1,3,2101,589,314,346,70,310 134 | 1,3,14903,2032,2479,576,955,328 135 | 1,3,9434,1042,1235,436,256,396 136 | 1,3,7388,1882,2174,720,47,537 137 | 1,3,6300,1289,2591,1170,199,326 138 | 1,3,4625,8579,7030,4575,2447,1542 139 | 1,3,3087,8080,8282,661,721,36 140 | 1,3,13537,4257,5034,155,249,3271 141 | 1,3,5387,4979,3343,825,637,929 142 | 1,3,17623,4280,7305,2279,960,2616 143 | 1,3,30379,13252,5189,321,51,1450 144 | 1,3,37036,7152,8253,2995,20,3 145 | 1,3,10405,1596,1096,8425,399,318 146 | 1,3,18827,3677,1988,118,516,201 147 | 2,3,22039,8384,34792,42,12591,4430 148 | 1,3,7769,1936,2177,926,73,520 149 | 1,3,9203,3373,2707,1286,1082,526 150 | 1,3,5924,584,542,4052,283,434 151 | 1,3,31812,1433,1651,800,113,1440 152 | 1,3,16225,1825,1765,853,170,1067 153 | 1,3,1289,3328,2022,531,255,1774 154 | 1,3,18840,1371,3135,3001,352,184 155 | 1,3,3463,9250,2368,779,302,1627 156 | 1,3,622,55,137,75,7,8 157 | 2,3,1989,10690,19460,233,11577,2153 158 | 2,3,3830,5291,14855,317,6694,3182 159 | 1,3,17773,1366,2474,3378,811,418 160 | 2,3,2861,6570,9618,930,4004,1682 161 | 2,3,355,7704,14682,398,8077,303 162 | 2,3,1725,3651,12822,824,4424,2157 163 | 1,3,12434,540,283,1092,3,2233 164 | 1,3,15177,2024,3810,2665,232,610 165 | 2,3,5531,15726,26870,2367,13726,446 166 | 2,3,5224,7603,8584,2540,3674,238 167 | 2,3,15615,12653,19858,4425,7108,2379 168 | 2,3,4822,6721,9170,993,4973,3637 169 | 1,3,2926,3195,3268,405,1680,693 170 | 1,3,5809,735,803,1393,79,429 171 | 1,3,5414,717,2155,2399,69,750 172 | 2,3,260,8675,13430,1116,7015,323 173 | 2,3,200,25862,19816,651,8773,6250 174 | 1,3,955,5479,6536,333,2840,707 175 | 2,3,514,7677,19805,937,9836,716 176 | 1,3,286,1208,5241,2515,153,1442 177 | 2,3,2343,7845,11874,52,4196,1697 178 | 1,3,45640,6958,6536,7368,1532,230 179 | 1,3,12759,7330,4533,1752,20,2631 180 | 1,3,11002,7075,4945,1152,120,395 181 | 1,3,3157,4888,2500,4477,273,2165 182 | 1,3,12356,6036,8887,402,1382,2794 183 | 1,3,112151,29627,18148,16745,4948,8550 184 | 1,3,694,8533,10518,443,6907,156 185 | 1,3,36847,43950,20170,36534,239,47943 186 | 1,3,327,918,4710,74,334,11 187 | 1,3,8170,6448,1139,2181,58,247 188 | 1,3,3009,521,854,3470,949,727 189 | 1,3,2438,8002,9819,6269,3459,3 190 | 2,3,8040,7639,11687,2758,6839,404 191 | 2,3,834,11577,11522,275,4027,1856 192 | 1,3,16936,6250,1981,7332,118,64 193 | 1,3,13624,295,1381,890,43,84 194 | 1,3,5509,1461,2251,547,187,409 195 | 2,3,180,3485,20292,959,5618,666 196 | 1,3,7107,1012,2974,806,355,1142 197 | 1,3,17023,5139,5230,7888,330,1755 198 | 1,1,30624,7209,4897,18711,763,2876 199 | 2,1,2427,7097,10391,1127,4314,1468 200 | 1,1,11686,2154,6824,3527,592,697 201 | 1,1,9670,2280,2112,520,402,347 202 | 2,1,3067,13240,23127,3941,9959,731 203 | 2,1,4484,14399,24708,3549,14235,1681 204 | 1,1,25203,11487,9490,5065,284,6854 205 | 1,1,583,685,2216,469,954,18 206 | 1,1,1956,891,5226,1383,5,1328 207 | 2,1,1107,11711,23596,955,9265,710 208 | 1,1,6373,780,950,878,288,285 209 | 2,1,2541,4737,6089,2946,5316,120 210 | 1,1,1537,3748,5838,1859,3381,806 211 | 2,1,5550,12729,16767,864,12420,797 212 | 1,1,18567,1895,1393,1801,244,2100 213 | 2,1,12119,28326,39694,4736,19410,2870 214 | 1,1,7291,1012,2062,1291,240,1775 215 | 1,1,3317,6602,6861,1329,3961,1215 216 | 2,1,2362,6551,11364,913,5957,791 217 | 1,1,2806,10765,15538,1374,5828,2388 218 | 2,1,2532,16599,36486,179,13308,674 219 | 1,1,18044,1475,2046,2532,130,1158 220 | 2,1,18,7504,15205,1285,4797,6372 221 | 1,1,4155,367,1390,2306,86,130 222 | 1,1,14755,899,1382,1765,56,749 223 | 1,1,5396,7503,10646,91,4167,239 224 | 1,1,5041,1115,2856,7496,256,375 225 | 2,1,2790,2527,5265,5612,788,1360 226 | 1,1,7274,659,1499,784,70,659 227 | 1,1,12680,3243,4157,660,761,786 228 | 2,1,20782,5921,9212,1759,2568,1553 229 | 1,1,4042,2204,1563,2286,263,689 230 | 1,1,1869,577,572,950,4762,203 231 | 1,1,8656,2746,2501,6845,694,980 232 | 2,1,11072,5989,5615,8321,955,2137 233 | 1,1,2344,10678,3828,1439,1566,490 234 | 1,1,25962,1780,3838,638,284,834 235 | 1,1,964,4984,3316,937,409,7 236 | 1,1,15603,2703,3833,4260,325,2563 237 | 1,1,1838,6380,2824,1218,1216,295 238 | 1,1,8635,820,3047,2312,415,225 239 | 1,1,18692,3838,593,4634,28,1215 240 | 1,1,7363,475,585,1112,72,216 241 | 1,1,47493,2567,3779,5243,828,2253 242 | 1,1,22096,3575,7041,11422,343,2564 243 | 1,1,24929,1801,2475,2216,412,1047 244 | 1,1,18226,659,2914,3752,586,578 245 | 1,1,11210,3576,5119,561,1682,2398 246 | 1,1,6202,7775,10817,1183,3143,1970 247 | 2,1,3062,6154,13916,230,8933,2784 248 | 1,1,8885,2428,1777,1777,430,610 249 | 1,1,13569,346,489,2077,44,659 250 | 1,1,15671,5279,2406,559,562,572 251 | 1,1,8040,3795,2070,6340,918,291 252 | 1,1,3191,1993,1799,1730,234,710 253 | 2,1,6134,23133,33586,6746,18594,5121 254 | 1,1,6623,1860,4740,7683,205,1693 255 | 1,1,29526,7961,16966,432,363,1391 256 | 1,1,10379,17972,4748,4686,1547,3265 257 | 1,1,31614,489,1495,3242,111,615 258 | 1,1,11092,5008,5249,453,392,373 259 | 1,1,8475,1931,1883,5004,3593,987 260 | 1,1,56083,4563,2124,6422,730,3321 261 | 1,1,53205,4959,7336,3012,967,818 262 | 1,1,9193,4885,2157,327,780,548 263 | 1,1,7858,1110,1094,6818,49,287 264 | 1,1,23257,1372,1677,982,429,655 265 | 1,1,2153,1115,6684,4324,2894,411 266 | 2,1,1073,9679,15445,61,5980,1265 267 | 1,1,5909,23527,13699,10155,830,3636 268 | 2,1,572,9763,22182,2221,4882,2563 269 | 1,1,20893,1222,2576,3975,737,3628 270 | 2,1,11908,8053,19847,1069,6374,698 271 | 1,1,15218,258,1138,2516,333,204 272 | 1,1,4720,1032,975,5500,197,56 273 | 1,1,2083,5007,1563,1120,147,1550 274 | 1,1,514,8323,6869,529,93,1040 275 | 1,3,36817,3045,1493,4802,210,1824 276 | 1,3,894,1703,1841,744,759,1153 277 | 1,3,680,1610,223,862,96,379 278 | 1,3,27901,3749,6964,4479,603,2503 279 | 1,3,9061,829,683,16919,621,139 280 | 1,3,11693,2317,2543,5845,274,1409 281 | 2,3,17360,6200,9694,1293,3620,1721 282 | 1,3,3366,2884,2431,977,167,1104 283 | 2,3,12238,7108,6235,1093,2328,2079 284 | 1,3,49063,3965,4252,5970,1041,1404 285 | 1,3,25767,3613,2013,10303,314,1384 286 | 1,3,68951,4411,12609,8692,751,2406 287 | 1,3,40254,640,3600,1042,436,18 288 | 1,3,7149,2247,1242,1619,1226,128 289 | 1,3,15354,2102,2828,8366,386,1027 290 | 1,3,16260,594,1296,848,445,258 291 | 1,3,42786,286,471,1388,32,22 292 | 1,3,2708,2160,2642,502,965,1522 293 | 1,3,6022,3354,3261,2507,212,686 294 | 1,3,2838,3086,4329,3838,825,1060 295 | 2,2,3996,11103,12469,902,5952,741 296 | 1,2,21273,2013,6550,909,811,1854 297 | 2,2,7588,1897,5234,417,2208,254 298 | 1,2,19087,1304,3643,3045,710,898 299 | 2,2,8090,3199,6986,1455,3712,531 300 | 2,2,6758,4560,9965,934,4538,1037 301 | 1,2,444,879,2060,264,290,259 302 | 2,2,16448,6243,6360,824,2662,2005 303 | 2,2,5283,13316,20399,1809,8752,172 304 | 2,2,2886,5302,9785,364,6236,555 305 | 2,2,2599,3688,13829,492,10069,59 306 | 2,2,161,7460,24773,617,11783,2410 307 | 2,2,243,12939,8852,799,3909,211 308 | 2,2,6468,12867,21570,1840,7558,1543 309 | 1,2,17327,2374,2842,1149,351,925 310 | 1,2,6987,1020,3007,416,257,656 311 | 2,2,918,20655,13567,1465,6846,806 312 | 1,2,7034,1492,2405,12569,299,1117 313 | 1,2,29635,2335,8280,3046,371,117 314 | 2,2,2137,3737,19172,1274,17120,142 315 | 1,2,9784,925,2405,4447,183,297 316 | 1,2,10617,1795,7647,1483,857,1233 317 | 2,2,1479,14982,11924,662,3891,3508 318 | 1,2,7127,1375,2201,2679,83,1059 319 | 1,2,1182,3088,6114,978,821,1637 320 | 1,2,11800,2713,3558,2121,706,51 321 | 2,2,9759,25071,17645,1128,12408,1625 322 | 1,2,1774,3696,2280,514,275,834 323 | 1,2,9155,1897,5167,2714,228,1113 324 | 1,2,15881,713,3315,3703,1470,229 325 | 1,2,13360,944,11593,915,1679,573 326 | 1,2,25977,3587,2464,2369,140,1092 327 | 1,2,32717,16784,13626,60869,1272,5609 328 | 1,2,4414,1610,1431,3498,387,834 329 | 1,2,542,899,1664,414,88,522 330 | 1,2,16933,2209,3389,7849,210,1534 331 | 1,2,5113,1486,4583,5127,492,739 332 | 1,2,9790,1786,5109,3570,182,1043 333 | 2,2,11223,14881,26839,1234,9606,1102 334 | 1,2,22321,3216,1447,2208,178,2602 335 | 2,2,8565,4980,67298,131,38102,1215 336 | 2,2,16823,928,2743,11559,332,3486 337 | 2,2,27082,6817,10790,1365,4111,2139 338 | 1,2,13970,1511,1330,650,146,778 339 | 1,2,9351,1347,2611,8170,442,868 340 | 1,2,3,333,7021,15601,15,550 341 | 1,2,2617,1188,5332,9584,573,1942 342 | 2,3,381,4025,9670,388,7271,1371 343 | 2,3,2320,5763,11238,767,5162,2158 344 | 1,3,255,5758,5923,349,4595,1328 345 | 2,3,1689,6964,26316,1456,15469,37 346 | 1,3,3043,1172,1763,2234,217,379 347 | 1,3,1198,2602,8335,402,3843,303 348 | 2,3,2771,6939,15541,2693,6600,1115 349 | 2,3,27380,7184,12311,2809,4621,1022 350 | 1,3,3428,2380,2028,1341,1184,665 351 | 2,3,5981,14641,20521,2005,12218,445 352 | 1,3,3521,1099,1997,1796,173,995 353 | 2,3,1210,10044,22294,1741,12638,3137 354 | 1,3,608,1106,1533,830,90,195 355 | 2,3,117,6264,21203,228,8682,1111 356 | 1,3,14039,7393,2548,6386,1333,2341 357 | 1,3,190,727,2012,245,184,127 358 | 1,3,22686,134,218,3157,9,548 359 | 2,3,37,1275,22272,137,6747,110 360 | 1,3,759,18664,1660,6114,536,4100 361 | 1,3,796,5878,2109,340,232,776 362 | 1,3,19746,2872,2006,2601,468,503 363 | 1,3,4734,607,864,1206,159,405 364 | 1,3,2121,1601,2453,560,179,712 365 | 1,3,4627,997,4438,191,1335,314 366 | 1,3,2615,873,1524,1103,514,468 367 | 2,3,4692,6128,8025,1619,4515,3105 368 | 1,3,9561,2217,1664,1173,222,447 369 | 1,3,3477,894,534,1457,252,342 370 | 1,3,22335,1196,2406,2046,101,558 371 | 1,3,6211,337,683,1089,41,296 372 | 2,3,39679,3944,4955,1364,523,2235 373 | 1,3,20105,1887,1939,8164,716,790 374 | 1,3,3884,3801,1641,876,397,4829 375 | 2,3,15076,6257,7398,1504,1916,3113 376 | 1,3,6338,2256,1668,1492,311,686 377 | 1,3,5841,1450,1162,597,476,70 378 | 2,3,3136,8630,13586,5641,4666,1426 379 | 1,3,38793,3154,2648,1034,96,1242 380 | 1,3,3225,3294,1902,282,68,1114 381 | 2,3,4048,5164,10391,130,813,179 382 | 1,3,28257,944,2146,3881,600,270 383 | 1,3,17770,4591,1617,9927,246,532 384 | 1,3,34454,7435,8469,2540,1711,2893 385 | 1,3,1821,1364,3450,4006,397,361 386 | 1,3,10683,21858,15400,3635,282,5120 387 | 1,3,11635,922,1614,2583,192,1068 388 | 1,3,1206,3620,2857,1945,353,967 389 | 1,3,20918,1916,1573,1960,231,961 390 | 1,3,9785,848,1172,1677,200,406 391 | 1,3,9385,1530,1422,3019,227,684 392 | 1,3,3352,1181,1328,5502,311,1000 393 | 1,3,2647,2761,2313,907,95,1827 394 | 1,3,518,4180,3600,659,122,654 395 | 1,3,23632,6730,3842,8620,385,819 396 | 1,3,12377,865,3204,1398,149,452 397 | 1,3,9602,1316,1263,2921,841,290 398 | 2,3,4515,11991,9345,2644,3378,2213 399 | 1,3,11535,1666,1428,6838,64,743 400 | 1,3,11442,1032,582,5390,74,247 401 | 1,3,9612,577,935,1601,469,375 402 | 1,3,4446,906,1238,3576,153,1014 403 | 1,3,27167,2801,2128,13223,92,1902 404 | 1,3,26539,4753,5091,220,10,340 405 | 1,3,25606,11006,4604,127,632,288 406 | 1,3,18073,4613,3444,4324,914,715 407 | 1,3,6884,1046,1167,2069,593,378 408 | 1,3,25066,5010,5026,9806,1092,960 409 | 2,3,7362,12844,18683,2854,7883,553 410 | 2,3,8257,3880,6407,1646,2730,344 411 | 1,3,8708,3634,6100,2349,2123,5137 412 | 1,3,6633,2096,4563,1389,1860,1892 413 | 1,3,2126,3289,3281,1535,235,4365 414 | 1,3,97,3605,12400,98,2970,62 415 | 1,3,4983,4859,6633,17866,912,2435 416 | 1,3,5969,1990,3417,5679,1135,290 417 | 2,3,7842,6046,8552,1691,3540,1874 418 | 2,3,4389,10940,10908,848,6728,993 419 | 1,3,5065,5499,11055,364,3485,1063 420 | 2,3,660,8494,18622,133,6740,776 421 | 1,3,8861,3783,2223,633,1580,1521 422 | 1,3,4456,5266,13227,25,6818,1393 423 | 2,3,17063,4847,9053,1031,3415,1784 424 | 1,3,26400,1377,4172,830,948,1218 425 | 2,3,17565,3686,4657,1059,1803,668 426 | 2,3,16980,2884,12232,874,3213,249 427 | 1,3,11243,2408,2593,15348,108,1886 428 | 1,3,13134,9347,14316,3141,5079,1894 429 | 1,3,31012,16687,5429,15082,439,1163 430 | 1,3,3047,5970,4910,2198,850,317 431 | 1,3,8607,1750,3580,47,84,2501 432 | 1,3,3097,4230,16483,575,241,2080 433 | 1,3,8533,5506,5160,13486,1377,1498 434 | 1,3,21117,1162,4754,269,1328,395 435 | 1,3,1982,3218,1493,1541,356,1449 436 | 1,3,16731,3922,7994,688,2371,838 437 | 1,3,29703,12051,16027,13135,182,2204 438 | 1,3,39228,1431,764,4510,93,2346 439 | 2,3,14531,15488,30243,437,14841,1867 440 | 1,3,10290,1981,2232,1038,168,2125 441 | 1,3,2787,1698,2510,65,477,52 442 | -------------------------------------------------------------------------------- /code_in_notes/descriptive_mode.do: -------------------------------------------------------------------------------- 1 | // descriptive_mode.do 2 | use datasets/cfps_adult.dta, clear 3 | drop if qp101<0 4 | su qp101 5 | local min_qp101=r(min) 6 | local max_qp101=r(max) 7 | local n_group=15 8 | gen group=floor((qp101-`min_qp101')/(`max_qp101'-`min_qp101'+0.0001)*`n_group') 9 | tab group 10 | // 样本数最多的组为第9组 11 | di "众数=" `min_qp101'+(9+0.5)/`n_group'*(`max_qp101'-`min_qp101'+0.0001) 12 | -------------------------------------------------------------------------------- /code_in_notes/dgp_mle_censor.ado: -------------------------------------------------------------------------------- 1 | // dgp_mle_censor.ado 2 | // 数据生成过程 3 | cap program drop dgp_mle_censor 4 | program dgp_mle_censor 5 | syntax newvarlist(max=1) [, obs(integer 500) mu(real 3.2) sigma2(real 0.5)] 6 | quietly{ 7 | clear 8 | set obs `obs' 9 | tempvar xstar x 10 | gen `xstar'=sqrt(`sigma2')*rnormal()+`mu' 11 | gen `x'= `xstar' if `xstar'<4 & `xstar'>3 12 | replace `x'=4 if `xstar'>=4 13 | replace `x'=3 if `xstar'<=3 14 | gen `varlist'=10^`x' 15 | } 16 | end 17 | -------------------------------------------------------------------------------- /code_in_notes/dgp_mle_censor_simulate.do: -------------------------------------------------------------------------------- 1 | // dgp_mle_censor_simulate.do 2 | clear 3 | set seed 8855 4 | dgp_mle_censor x , obs(500) mu(3.3) sigma2(1) 5 | hist x 6 | graph export dgp_mle_censor_hist.pdf, replace 7 | sort x 8 | gen p=_n/_N 9 | line p x, xline(1000 10000, lpattern(dot)) 10 | graph export dgp_mle_censor_cdf.pdf, replace 11 | -------------------------------------------------------------------------------- /code_in_notes/empirical_distribution.do: -------------------------------------------------------------------------------- 1 | // empirical_distribution.do 2 | clear 3 | use datasets/cfps_adult.dta 4 | drop if qp101<0 5 | // 排序 6 | sort qp101 7 | // 生成排序序号 8 | // 注意qp101中有缺失值,这里不删除缺失值,但是在计算时将其排除 9 | gen notmissing_qp101=qp101~=. 10 | qui: su notmissing_qp101 11 | gen rank_qp101=_n/r(sum) 12 | // 画图 13 | qui: su qp101, de 14 | local median_qp101=r(p50) 15 | line rank_qp101 qp101, xline(`median_qp101') 16 | graph export empirical_distribution.pdf, replace 17 | -------------------------------------------------------------------------------- /code_in_notes/exponential.do: -------------------------------------------------------------------------------- 1 | // file: exponential.do 2 | set more off 3 | clear 4 | set obs 100 5 | // 产生随机数 6 | gen x=-1*log(runiform()) 7 | -------------------------------------------------------------------------------- /code_in_notes/exponential.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | ## file: exponential.py 3 | 4 | # 导入math包,使用math中的log()函数 5 | import math 6 | import random as rd 7 | # 获取5个均匀分布随机数 8 | x=[-1*math.log(rd.random()) for i in range(5)] 9 | print(x) 10 | # 导入numpy包,使用其中的log()函数 11 | import numpy as np 12 | import numpy.random as nprd 13 | # 获取5个均匀分布随机数 14 | x=-1*np.log(nprd.random(5)) 15 | print(x) 16 | -------------------------------------------------------------------------------- /code_in_notes/gaussian_mixture.do: -------------------------------------------------------------------------------- 1 | // check_random.do 2 | set more off 3 | clear 4 | set obs 1000 5 | // generate x1 and x0 6 | gen x0=rnormal(2.5,sqrt(0.8)) 7 | gen x1=rnormal(-2,1) 8 | // about 30% patient 9 | gen d=runiform()<0.7 10 | // generate x 11 | gen x=d*x1+(1-d)*x0 12 | // histogram 13 | hist x 14 | -------------------------------------------------------------------------------- /code_in_notes/gencatutility.ado: -------------------------------------------------------------------------------- 1 | // gencatutility.ado 2 | // Chapter 2.6 in Happiness Quantified: a Satisfaction Calculus Approach, van Praag and Ferrer-i-Carbonell 3 | cap program drop gencatutility 4 | program define gencatutility 5 | version 16 6 | syntax varname, Gen(name) [Verbose] 7 | tempname level n_levels nl cum cum_nl sum_nl p_nl temp1 temp2 f last_f last_p u uu 8 | local `n_levels' = 0 9 | local `cum'=0 10 | local `cum_nl' "" 11 | local `sum_nl'=0 12 | local `p_nl' "" 13 | local `f' "" 14 | local `u' "" 15 | quietly{ 16 | levelsof `varlist', local(`level') 17 | foreach l of local `level'{ 18 | sum `varlist' if `varlist' == `l' 19 | local `nl' = r(N) 20 | local `cum'=``cum''+``nl'' 21 | local `cum_nl' "``cum_nl'' ``cum''" 22 | local `n_levels'=``n_levels''+1 23 | local `sum_nl'=``sum_nl''+r(N) 24 | } 25 | foreach l of local `cum_nl'{ 26 | local `nl'=`l'/``sum_nl'' 27 | local `p_nl' "``p_nl'' ``nl''" 28 | local `temp1'=invnormal(``nl'') 29 | local `temp2'=normalden(``temp1'',0,1) 30 | if ``temp1''==.{ 31 | local `temp2'=0 32 | } 33 | local `f' "``f'' ``temp2''" 34 | } 35 | forvalues i=1/``n_levels''{ 36 | local `temp1': word `i' of ``f'' 37 | local `temp2': word `i' of ``p_nl'' 38 | if `i'==1{ 39 | local `uu'=(0-``temp1'')/(``temp2'') 40 | local `u' "``u'' ``uu''" 41 | local `last_f'=``temp1'' 42 | local `last_p'=``temp2'' 43 | } 44 | else{ 45 | local `uu'=(``last_f''-``temp1'')/(``temp2''-``last_p'') 46 | local `u' "``u'' ``uu''" 47 | local `last_f'=``temp1'' 48 | local `last_p'=``temp2'' 49 | } 50 | } 51 | gen `gen'=. 52 | if "`verbose'" == "verbose"{ 53 | noisily: di "level --- utility" 54 | } 55 | forvalues i=1/``n_levels''{ 56 | local `temp1': word `i' of ``level'' 57 | local `temp2': word `i' of ``u'' 58 | replace `gen'=``temp2'' if `varlist'==``temp1'' 59 | if "`verbose'" == "verbose"{ 60 | noisily: di "``temp1'' --- ``temp2''" 61 | } 62 | } 63 | } 64 | end 65 | -------------------------------------------------------------------------------- /code_in_notes/geometric_mean.do: -------------------------------------------------------------------------------- 1 | // geometric_mean.do 2 | use datasets/hs300index.dta, clear 3 | // 按照时间排序,用行号作为新的时间 4 | sort day 5 | gen t=_n 6 | tsset t 7 | // 计算增长率 8 | gen r=clsindex/L.clsindex 9 | gen log_r=log(r) 10 | su log_r 11 | local geo_mean=exp(r(mean)) 12 | di "几何平均数=`geo_mean'" 13 | di "第一天沪深300指数=" clsindex[1] 14 | di "最后一天沪深300指数=" clsindex[_N] 15 | di "使用几何平均数计算:" clsindex[1]*(`geo_mean'^(_N-1)) 16 | -------------------------------------------------------------------------------- /code_in_notes/ginindex.do: -------------------------------------------------------------------------------- 1 | cap program drop ginindex 2 | program define ginindex, byable(onecall) 3 | version 15 4 | syntax varlist(max=1), gen(name) 5 | quietly{ 6 | if "`_byvars'"==""{ 7 | tempvar class 8 | gen `class'=1 9 | local _byvars "`class'" 10 | } 11 | // 临时变量 12 | tempvar rank totalincome cum rat num rect B delta 13 | // 排序并产生排序变量 14 | sort `_byvars' `varlist' 15 | by `_byvars': gen `rank'=_n 16 | // 产生地区户数、总收入和累积收入及比例 17 | egen `num'=count(`varlist'), by(`_byvars') 18 | egen `totalincome'=total(`varlist'), by(`_byvars') 19 | gen `cum'=`varlist' if `rank'==1 20 | by `_byvars': replace `cum'=`cum'[_n-1]+`varlist'[_n] if `rank'>1 21 | gen `rat'=`cum'/`totalincome' 22 | // 计算每个个体的柱形面积 23 | by `_byvars': gen `rect'=`rat'/`num' 24 | // 调整小样本误差,即减去柱子最上面小三角的面积 25 | gen `delta'=`rat'/2 if `rank'==1 26 | by `_byvars': replace `delta'=/* 27 | */ `rat'[_n]-`rat'[_n-1] if `rank'>1 28 | replace `rect'=`rect'-`delta'/`num'/2 29 | //最终的指数 30 | egen `B'=total(`rect'), by(`_byvars') 31 | gen `gen'=1-2*`B' 32 | } 33 | end 34 | 35 | use "cfps_family_econ.dta", clear 36 | ginindex fincome1, gen(gini_all) 37 | bysort provcd14: ginindex fincome1, gen(gini) 38 | -------------------------------------------------------------------------------- /code_in_notes/histogram_kdensity.do: -------------------------------------------------------------------------------- 1 | // histogram_kdensity.do 2 | clear 3 | use datasets/cfps_adult.dta 4 | drop if qp101<0 5 | twoway (hist qp101,bin(40) density) (kdensity qp101) 6 | graph export hist_kdens.pdf, replace 7 | -------------------------------------------------------------------------------- /code_in_notes/importance_sampling.py: -------------------------------------------------------------------------------- 1 | ## importance_sampling.py 2 | import numpy as np 3 | from numpy import random as nprd 4 | ##设定参数 5 | M = 10000 6 | h1 = lambda x: 0.5 * np.exp(-90 * (x[0] - 0.5)**2 - 45 * (x[1] + 0.1)**2) 7 | domain = lambda x: (x[0] >= -1) * (x[1] >= -1) * (x[0] <= 1) * (x[1] <= 1) 8 | pai = lambda x: 1 / 4 * domain(x) 9 | h1_star = lambda x: 4 * h1(x) 10 | mu1 = 0.5 11 | mu2 = -0.1 12 | sigma1 = np.sqrt(1 / 180) 13 | sigma2 = np.sqrt(1 / 90) 14 | m = lambda x: 1/(2 * np.pi * sigma1 * sigma2) * \ 15 | np.exp(-1 * (x[0]-mu1)**2/(2 * sigma1**2)\ 16 | - (x[1]-mu2)**2 / (2 * sigma2**2)) 17 | 18 | #从m(x)中采样 19 | x = [(nprd.normal(mu1, sigma1), nprd.normal(mu2, sigma2))\ 20 | for i in range(M)] 21 | 22 | ## 计算积分 23 | H = list(map(lambda x: h1_star(x) * pai(x) / m(x), x)) 24 | integral = np.mean(H) 25 | se = np.std(H) / np.sqrt(M) 26 | print("Intgral=", integral) 27 | print("s.e. of Integral=", se) 28 | print("95% C.I.:", integral - 1.96 * se, "~", integral + 1.96 * se) 29 | 30 | -------------------------------------------------------------------------------- /code_in_notes/qqplot_hs300.do: -------------------------------------------------------------------------------- 1 | // qqplot.do 2 | clear 3 | use datasets/hs300index.dta 4 | // 计算quantile,为了避免出现0和1,减去了0.5 5 | sort retindex 6 | gen q=(_n-0.5)/_N 7 | // 计算均值、标准差 8 | sum retindex 9 | local mu=r(mean) 10 | local sd=r(sd) 11 | // 计算正态分布的quantile 12 | gen normal_q = `sd'*invnormal(q)+`mu' 13 | // 画图 14 | twoway (scatter retindex normal_q) /* 15 | */(line normal_q normal_q), legend(off) 16 | graph export "qqplot_manual_hs300.pdf", replace 17 | // 官方命令 18 | qnorm retindex 19 | graph export "qqplot_hs300.pdf", replace 20 | // 直方图 21 | hist retindex, norm 22 | graph export "hist_hs300.pdf", replace 23 | -------------------------------------------------------------------------------- /code_in_notes/rejection.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | ## file: rejection.py 3 | import numpy as np 4 | import numpy.random as nprd 5 | import scipy.special as scisp 6 | ## 设定参数 7 | c=3 #抽取大于5的正态分布样本 8 | N=1000 #抽取200个 9 | ## 直接使用正态分布进行拒绝抽样 10 | times=0 #产生了多少次正态分布 11 | accept=0 #接受了多少个 12 | sample1=[] #结果 13 | while accept=c: 16 | sample1.append(x) 17 | accept=accept+1 18 | times=times+1 19 | print("接受率=",N/times) 20 | 21 | ## 使用指数分布进行拒绝抽样 22 | times=0 #产生了多少次指数分布 23 | accept=0 #接受了多少个 24 | sample2=[] #结果 25 | # 计算参数 26 | lam=(c+np.sqrt(c**2+4))/2 27 | M=np.exp((lam**2-2*lam*c)/2)/(np.sqrt(2*np.pi)* \ 28 | lam*(1-scisp.ndtr(c))) 29 | normal_m=1/np.sqrt(2*np.pi) #正态分布密度函数前面的常数 30 | # 截断正态分布密度函数 31 | f_trunc_normal = lambda x: \ 32 | normal_m*np.exp(-0.5*x**2)/(1-scisp.ndtr(c)) 33 | # 指数分布密度函数 34 | f_exponential = lambda x: lam*np.exp(-1*lam*(x-c)) 35 | while accept=0.5 8 | gen x=d*rnormal(3,1)+(1-d)*rnormal(-3,1) 9 | hist x 10 | graph export simulate_CLT_mix.pdf, replace 11 | // 模拟 12 | local graphs "" 13 | foreach N in 2 3 4 10 100 1000{ 14 | frame create mean`N' m 15 | frame mean`N': label variable m "样本均值" 16 | forvalues j=1/2000{ 17 | clear 18 | set obs `N' 19 | gen d=runiform()>=0.5 20 | gen x=d*rnormal(3,1)+(1-d)*rnormal(-3,1) 21 | su x 22 | frame post mean`N' (r(mean)) 23 | } 24 | frame mean`N': hist m, normal saving(simulate_CLT`N', replace) title("N=`N'") 25 | local graphs "`graphs' simulate_CLT`N'.gph" 26 | } 27 | graph combine `graphs', col(3) 28 | graph export simulate_CLT.pdf, replace 29 | -------------------------------------------------------------------------------- /code_in_notes/simulate_CLT.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy import random as nprd 3 | 4 | 5 | def sampling(N): 6 | ## 产生一组样本,以0.5的概率为z+3,0.5的概率为z-3,其中z~N(0,1) 7 | d = nprd.rand(N) < 0.5 8 | z = nprd.randn(N) 9 | x = np.array([z[i] + 3 if d[i] else z[i] - 3 for i in range(N)]) 10 | return x 11 | 12 | 13 | N = [2, 3, 4, 10, 100, 1000] # sample size 14 | M = 2000 15 | MEANS = [] 16 | for n in N: 17 | mean_x = np.zeros(M) 18 | for i in range(M): 19 | x = sampling(n) 20 | mean_x[i] = np.mean(x) / np.sqrt(10 / n) ## 标准化,因为var(x)=10 21 | MEANS.append(mean_x) 22 | 23 | ## 导入matplotlib 24 | import matplotlib.pyplot as plt 25 | 26 | # 设定图像大小 27 | plt.rcParams['figure.figsize'] = (10.0, 8.0) 28 | 29 | x = sampling(1000) 30 | plt.xlabel('x') 31 | plt.ylabel('Density') 32 | plt.title('Histogram of Mixed Normal') 33 | plt.hist(x, bins=30, density=True) ## histgram 34 | plt.savefig("simulate_CLT_mix.pdf") 35 | plt.show() 36 | 37 | ## 均值 38 | ax1 = plt.subplot(2, 3, 1) 39 | ax2 = plt.subplot(2, 3, 2) 40 | ax3 = plt.subplot(2, 3, 3) 41 | ax4 = plt.subplot(2, 3, 4) 42 | ax5 = plt.subplot(2, 3, 5) 43 | ax6 = plt.subplot(2, 3, 6) 44 | 45 | ## normal density 46 | x = np.linspace(-3, 3, 100) 47 | d = [1.0 / np.sqrt(2 * np.pi) * np.exp(-i**2 / 2) for i in x] 48 | 49 | 50 | def plot_density(ax, data, N): 51 | ax.hist(data, bins=30, density=True) ## histgram 52 | ax.plot(x, d) 53 | ax.set_title(r'Histogram of $\bar{x}$:N=%d' % N) 54 | 55 | 56 | plot_density(ax1, MEANS[0], N[0]) 57 | plot_density(ax2, MEANS[1], N[1]) 58 | plot_density(ax3, MEANS[2], N[2]) 59 | plot_density(ax4, MEANS[3], N[3]) 60 | plot_density(ax5, MEANS[4], N[4]) 61 | plot_density(ax6, MEANS[5], N[5]) 62 | plt.savefig("simulate_CLT.pdf") 63 | plt.show() -------------------------------------------------------------------------------- /code_in_notes/simulate_LLN.do: -------------------------------------------------------------------------------- 1 | // simulate_LLN.do 2 | clear 3 | set more off 4 | set seed 8855 5 | local M=5000 6 | set obs `M' 7 | gen u=runiform() 8 | gen x=u>=0.5 9 | gen x_bar=. 10 | forvalues i=1/`M'{ 11 | qui{ 12 | su x if _n<=`i' 13 | replace x_bar = r(mean) if _n==`i' 14 | } 15 | } 16 | gen n=_n 17 | line x_bar n, yline(0.5) 18 | graph export simulate_LLN.pdf, replace 19 | -------------------------------------------------------------------------------- /code_in_notes/simulate_LLN.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy import random as nprd 3 | 4 | True_P = 0.5 5 | 6 | 7 | def sampling(N): 8 | ## 产生Bernouli样本 9 | x = nprd.rand(N) < True_P 10 | return x 11 | 12 | 13 | M = 10000 #模拟次数 14 | xbar = np.zeros(M) 15 | N = np.array([i + 1 for i in range(M)]) 16 | x = sampling(M) 17 | for i in range(M): 18 | if i == 0: 19 | xbar[i] = x[i] 20 | else: 21 | xbar[i] = (x[i] + xbar[i - 1] * i) / (i + 1) 22 | 23 | ## 导入matplotlib 24 | import matplotlib.pyplot as plt 25 | # 设定图像大小 26 | plt.rcParams['figure.figsize'] = (10.0, 8.0) 27 | 28 | plt.plot(N, xbar, label=r'$\bar{x}$', color='pink') ## xbar 29 | xtrue = np.ones(M) * True_P 30 | plt.plot(N, xtrue, label=r'$0.5$', color='black') ## true xbar 31 | plt.xlabel('N') 32 | plt.ylabel(r'$\bar{x}$') 33 | plt.legend(loc='upper right', frameon=True) 34 | plt.savefig("simulate_LLN.pdf") 35 | plt.show() -------------------------------------------------------------------------------- /code_in_notes/simulation_lm_ols_test.do: -------------------------------------------------------------------------------- 1 | // simulation_lm_ols_test.do 2 | clear 3 | set more off 4 | set seed 42 5 | // test statistic 6 | cap program drop ols_lm_test_stat 7 | program define ols_lm_test_stat, rclass 8 | syntax [,obs(integer 100) beta(real 0) alpha(real 1) sigma(real 1)] 9 | clear 10 | tempvar x y e xe 11 | quietly{ 12 | set obs `obs' 13 | gen `x'=rnormal(0,2) 14 | gen `y'=`x'*`beta'+`alpha'+rnormal(0,`sigma') 15 | su `y' 16 | gen `e'=`y'-r(mean) 17 | gen `xe'=`x'*`e' 18 | sum `xe' 19 | return scalar chi2_stat=(r(sum)^2)/(`obs'*r(Var)) 20 | } 21 | end 22 | // upper tail of chi2(1) 23 | local cv=invchi2tail(1,0.05) 24 | // simulation under H0 25 | simulate chi2=r(chi2_stat),reps(5000): ols_lm_test_stat 26 | gen density=chi2den(1,chi2) if chi2>0.1 27 | sort chi2 28 | twoway (hist chi2, density) (line density chi2), xline(`cv') 29 | graph export simulation_lm_ols_test0.pdf, replace 30 | gen p_005=chi2>=`cv' 31 | su p_005 32 | // simulation under H1 33 | clear 34 | simulate chi2=r(chi2_stat),reps(5000): ols_lm_test_stat, beta(0.2) 35 | gen density=chi2den(1,chi2) if chi2>0.1 36 | sort chi2 37 | twoway (hist chi2, density) (line density chi2), xline(`cv') 38 | graph export simulation_lm_ols_test1.pdf, replace 39 | gen p_005=chi2>=`cv' 40 | su p_005 41 | -------------------------------------------------------------------------------- /code_in_notes/simulation_lm_test.do: -------------------------------------------------------------------------------- 1 | // simulation_lm_test.do 2 | clear 3 | set more off 4 | set seed 42 5 | // test statistic 6 | cap program drop lm_test_stat 7 | program define lm_test_stat, rclass 8 | syntax [,obs(integer 100) lambda(real 10)] 9 | clear 10 | tempvar x x_10 11 | quietly{ 12 | set obs `obs' 13 | gen `x'=rpoisson(`lambda') 14 | gen `x_10'=`x'-10 15 | su `x_10' 16 | return scalar chi2_stat=(r(sum))^2/(`obs'*10) 17 | } 18 | end 19 | // upper tail of chi2(1) 20 | local cv=invchi2tail(1,0.05) 21 | // simulation under H0 22 | simulate chi2=r(chi2_stat),reps(5000): lm_test_stat 23 | gen density=chi2den(1,chi2) if chi2>0.1 24 | sort chi2 25 | twoway (hist chi2, density) (line density chi2), xline(`cv') 26 | graph export simulation_lm_test0.pdf, replace 27 | gen p_005=chi2>=`cv' 28 | su p_005 29 | // simulation under H1 30 | clear 31 | simulate chi2=r(chi2_stat),reps(5000): lm_test_stat, lambda(11.5) 32 | gen density=chi2den(1,chi2) if chi2>0.1 33 | sort chi2 34 | twoway (hist chi2, density) (line density chi2), xline(`cv') 35 | graph export simulation_lm_test1.pdf, replace 36 | gen p_005=chi2>=`cv' 37 | su p_005 38 | -------------------------------------------------------------------------------- /code_in_notes/simulation_lr_test.do: -------------------------------------------------------------------------------- 1 | // simulation_lr_test.do 2 | clear 3 | set more off 4 | set seed 42 5 | // test statistic 6 | cap program drop lr_test_stat 7 | program define lr_test_stat, rclass 8 | syntax [,obs(integer 100) lambda(real 1)] 9 | clear 10 | tempvar x 11 | quietly{ 12 | set obs `obs' 13 | gen `x'=rpoisson(`lambda') 14 | su `x' 15 | return scalar chi2_stat=2*`obs'*(log(r(mean))*r(mean)-r(mean)+1) 16 | } 17 | end 18 | // upper tail of chi2(1) 19 | local cv=invchi2tail(1,0.05) 20 | // simulation under H0 21 | simulate chi2=r(chi2_stat),reps(5000): lr_test_stat 22 | gen density=chi2den(1,chi2) if chi2>0.1 23 | sort chi2 24 | twoway (hist chi2, density) (line density chi2), xline(`cv') 25 | graph export simulation_lr_test0.pdf, replace 26 | gen p_005=chi2>=`cv' 27 | su p_005 28 | // simulation under H1 29 | clear 30 | simulate chi2=r(chi2_stat),reps(5000): lr_test_stat, lambda(1.5) 31 | gen density=chi2den(1,chi2) if chi2>0.1 32 | sort chi2 33 | twoway (hist chi2, density) (line density chi2), xline(`cv') 34 | graph export simulation_lr_test1.pdf, replace 35 | gen p_005=chi2>=`cv' 36 | su p_005 37 | -------------------------------------------------------------------------------- /code_in_notes/simulation_wald_test.do: -------------------------------------------------------------------------------- 1 | // simulation_wald_test.do 2 | clear 3 | set more off 4 | set seed 42 5 | // test statistic 6 | cap program drop wald_test_stat 7 | program define wald_test_stat, rclass 8 | syntax [,obs(integer 100) mu(real -1) sigma2(real 1)] 9 | clear 10 | tempvar x 11 | quietly{ 12 | set obs `obs' 13 | gen `x'=rnormal(`mu',sqrt(`sigma2')) 14 | su `x' 15 | return scalar chi2_stat=`obs'*(r(mean)^2-1)^2/(4*r(mean)^2*r(Var)) 16 | } 17 | end 18 | // upper tail of chi2(1) 19 | local cv=invchi2tail(1,0.05) 20 | // simulation under H0 21 | simulate chi2=r(chi2_stat),reps(5000): wald_test_stat 22 | gen density=chi2den(1,chi2) if chi2>0.1 23 | sort chi2 24 | twoway (hist chi2, density) (line density chi2), xline(`cv') 25 | graph export simulation_wald_test0.pdf, replace 26 | gen p_005=chi2>=`cv' 27 | su p_005 28 | // simulation under H1 29 | clear 30 | simulate chi2=r(chi2_stat),reps(5000): wald_test_stat, mu(1.5) 31 | gen density=chi2den(1,chi2) if chi2>0.1 32 | sort chi2 33 | twoway (hist chi2, density) (line density chi2), xline(`cv') 34 | graph export simulation_wald_test1.pdf, replace 35 | gen p_005=chi2>=`cv' 36 | su p_005 37 | -------------------------------------------------------------------------------- /code_in_notes/st_call_py_tree.do: -------------------------------------------------------------------------------- 1 | clear 2 | set more off 3 | use datasets/soep_female_labor.dta 4 | 5 | // 定义变量 6 | local y "employment" 7 | local controls "chld6 chld16 age income husworkhour husemployment region edu husedu" 8 | 9 | // 首先做logit回归做初步探索 10 | logit `y' `controls' 11 | predict ps_employment_logit 12 | gen predict_employment_logit = ps_employment_logit>=0.5 13 | gen accuracy_logit = employment == predict_employment_logit 14 | 15 | //放到Python中去做回归树 16 | python 17 | import numpy as np 18 | import pandas as pd 19 | from sfi import Data, Macro, Missing 20 | def stata2numpy(varname): 21 | data=Data.get(varname) 22 | x=[d if not Missing.isMissing(d) else np.nan for d in data] 23 | return np.array(x) 24 | 25 | y_var_name = Macro.getLocal("y") 26 | x_var_name = Macro.getLocal("controls").split(" ") 27 | y=stata2numpy(y_var_name) 28 | X=pd.DataFrame() 29 | for x in x_var_name: 30 | X[x]=stata2numpy(x) 31 | print(X) 32 | from sklearn import tree 33 | dtree=tree.DecisionTreeClassifier(max_depth=10) ##设定最大深度 34 | dtree.fit(X,y) ##训练 35 | prob_df=dtree.predict_proba(X)[:,1] 36 | pred_df=dtree.predict(X) 37 | 38 | def array2stata(npname): 39 | return [n if np.isfinite(n) else Missing.getValue() for n in npname] 40 | 41 | prob=array2stata(prob_df) 42 | pred=array2stata(pred_df) 43 | Data.addVarDouble("predict_employment_tree") 44 | Data.addVarDouble("ps_employment_tree") 45 | Data.store("predict_employment_tree",None,pred) 46 | Data.store("ps_employment_tree",None,prob) 47 | end 48 | 49 | gen accuracy_tree = employment == predict_employment_tree 50 | su accuracy_logit accuracy_tree -------------------------------------------------------------------------------- /code_in_notes/standard_normal_mean.do: -------------------------------------------------------------------------------- 1 | // standard_normal_mean.do 2 | clear 3 | set more off 4 | cap program drop std_normal_mean 5 | program define std_normal_mean, rclass 6 | version 12 7 | syntax [,obs(integer 10) mu(real 0) sigma(real 1)] 8 | drop _all 9 | set obs `obs' 10 | tempvar x 11 | gen `x'=`sigma'*rnormal()+`mu' 12 | su `x' 13 | return scalar mean1=sqrt(`obs')*(r(mean)-`mu')/`sigma' 14 | return scalar mean2=sqrt(`obs')*(r(mean)-`mu')/r(sd) 15 | end 16 | local N=8 17 | local df=`N'-1 18 | simulate std_mean=r(mean1) sample_mean=r(mean2), reps(20000): std_normal_mean, obs(`N') 19 | 20 | // standardised with sigma 21 | sort std_mean 22 | gen normal_den_std_mean=normalden(std_mean) 23 | label variable normal_den_std_mean "Std Normal Density" 24 | gen student_den_std_mean=tden(`df',std_mean) 25 | label variable student_den_std_mean "t(`df') Density" 26 | twoway (hist std_mean, density) (line normal_den_std_mean std_mean) (line student_den_std_mean std_mean) , graphr(fcolor(white) color(white)) xtitle("") saving(standard_normal_mean_1, replace) 27 | 28 | // standardised with std deviation 29 | sort sample_mean 30 | gen normal_den_std_mean2=normalden(sample_mean) 31 | label variable normal_den_std_mean2 "Std Normal Density" 32 | gen t_den_std_sample_mean=tden(`df',sample_mean) 33 | label variable t_den_std_sample_mean "t(`df') Density" 34 | twoway (hist sample_mean, density) (line normal_den_std_mean2 sample_mean) (line t_den_std_sample_mean sample_mean), graphr(fcolor(white) color(white)) xtitle("") saving(standard_normal_mean_2, replace) 35 | 36 | graph combine standard_normal_mean_1.gph standard_normal_mean_2.gph, graphr(fcolor(white) color(white)) col(2) 37 | graph export standard_normal_mean.pdf, replace 38 | -------------------------------------------------------------------------------- /code_in_notes/standard_normal_var.do: -------------------------------------------------------------------------------- 1 | // standard_normal_var.do 2 | cap program drop std_normal_var 3 | program define std_normal_var, rclass 4 | version 12 5 | syntax [,obs(integer 10) mu(real 0) sigma(real 1)] 6 | drop _all 7 | set obs `obs' 8 | tempvar x 9 | gen `x'=`sigma'*rnormal()+`mu' 10 | su `x' 11 | return scalar std_var=(`obs'-1)*r(Var)/(`sigma'^2) 12 | end 13 | 14 | simulate std_var=r(std_var), reps(20000): std_normal_var 15 | 16 | 17 | sort std_var 18 | gen chi2_den=chi2den(9,std_var) 19 | label variable chi2_den "Chi2 Density" 20 | twoway (hist std_var, density) (line chi2_den std_var), graphr(fcolor(white) color(white)) xtitle("") 21 | graph export standard_normal_var.pdf, replace 22 | -------------------------------------------------------------------------------- /code_in_notes/statistic_mean.do: -------------------------------------------------------------------------------- 1 | // statistic_mean.do 2 | clear all 3 | set seed 19880505 4 | frame create means m 5 | forvalues i=1/10000{ 6 | clear 7 | set obs 100 8 | gen x=rnormal(1,2) 9 | qui: su x 10 | frame post means (r(mean)) 11 | } 12 | frame change means 13 | su m 14 | hist m 15 | graph export statistc_mean.pdf, replace 16 | -------------------------------------------------------------------------------- /code_in_notes/test_power_function.do: -------------------------------------------------------------------------------- 1 | // test_power_function.do 2 | clear 3 | set more off 4 | 5 | ** test under H0: mu=mu0 at 5% significant level 6 | cap program drop test_normal_mean 7 | program define test_normal_mean, rclass 8 | version 12 9 | syntax [,obs(integer 10) mu(real 0) mu0(real 0) sigma(real 1)] 10 | drop _all 11 | set obs `obs' 12 | tempvar x 13 | gen `x'=`sigma'*rnormal()+`mu' 14 | quietly: su `x' 15 | tempname std_mean rejected 16 | scalar `std_mean'=sqrt(`obs')*(r(mean)-`mu0')/r(sd) 17 | di `std_mean' 18 | if abs(`std_mean')>invt(`obs'-1,1-0.05/2){ 19 | scalar `rejected'=1 20 | } 21 | else{ 22 | scalar `rejected'=0 23 | } 24 | return scalar rejected=`rejected' 25 | end 26 | 27 | ** replicate test for 1000 times by default 28 | cap program drop compute_power 29 | program define compute_power, rclass 30 | version 12 31 | syntax [,obs(integer 10) mu(real 0) mu0(real 0)/* 32 | */ sigma(real 1) reps(integer 1000)] 33 | quietly{ 34 | preserve // 缓存内存中现有的变量 35 | drop _all 36 | simulate rejected=r(rejected),reps(`reps'):/* 37 | */ test_normal_mean, obs(`obs') mu(`mu')/* 38 | */ mu0(`mu0') sigma(`sigma') 39 | quietly: su rejected 40 | return scalar power=r(mean) 41 | restore // 恢复preserve之前内存变量 42 | } 43 | end 44 | 45 | ** draw power function for sample size=[5 30 100] 46 | set obs 100 47 | gen mu=-3+(_n-1)*6/99 48 | tempname mu 49 | gen power5=. 50 | gen power30=. 51 | gen power100=. 52 | foreach ss in 5 30 100{ 53 | forvalues i= 1/100{ 54 | ** mu in [-3,3] 55 | local `mu'=-3+(`i'-1)*6/99 56 | compute_power, mu(``mu'') obs(`ss') 57 | replace power`ss'=r(power) in `i' 58 | } 59 | } 60 | label variable mu "Ture value of mu" 61 | label variable power5 "Power 5 Samples" 62 | label variable power30 "Power 30 Samples" 63 | label variable power100 "Power 100 Samples" 64 | twoway (line power5 mu, lpattern(solid))/* 65 | */ (line power30 mu, lpattern(dash))/* 66 | */ (line power100 mu, lpattern(dot)) 67 | graph export power_function.pdf, replace 68 | -------------------------------------------------------------------------------- /code_in_notes/test_under_null.do: -------------------------------------------------------------------------------- 1 | // test_under_null.do 2 | clear 3 | set more off 4 | set seed 880505 5 | // test under H0: mu=mu0 at 5% significant level 6 | cap program drop test_normal_mean 7 | program define test_normal_mean, rclass 8 | version 12 9 | syntax [,obs(integer 10) mu(real 0) mu0(real 0) sigma(real 1)] 10 | drop _all 11 | set obs `obs' 12 | tempvar x 13 | gen `x'=`sigma'*rnormal()+`mu' 14 | quietly: su `x' 15 | tempname std_mean rejected 16 | scalar `std_mean'=sqrt(`obs')*(r(mean)-`mu0')/r(sd) 17 | di `std_mean' 18 | if abs(`std_mean')>invt(`obs'-1,1-0.05/2){ 19 | scalar `rejected'=1 20 | } 21 | else{ 22 | scalar `rejected'=0 23 | } 24 | return scalar rejected=`rejected' 25 | end 26 | 27 | simulate rejected=r(rejected),reps(10000): test_normal_mean 28 | su rejected 29 | -------------------------------------------------------------------------------- /code_in_notes/ttest_mean_comparison.do: -------------------------------------------------------------------------------- 1 | // t-test of two means comparison 2 | use datasets/cfps_adult.dta, clear 3 | gen log_income=log(1+p_income) 4 | // 方差相同假设下的检验 5 | ttest log_income, by(cfps_gender) 6 | // 方差不相同假设下的检验 7 | ttest log_income, by(cfps_gender) unequal 8 | // 方差不相同的假设下比较 9 | -------------------------------------------------------------------------------- /code_in_notes/twosls_by_hand.do: -------------------------------------------------------------------------------- 1 | // 2SLS by hand 2 | cap program drop twosls 3 | program twosls, eclass 4 | version 15.0 5 | syntax varlist(max=1), endo(varlist) instru(varlist) /* 6 | */control(varlist) 7 | tempname dep Xhat XhhX pred_endo pred_var b sigma2 V 8 | tempvar resid 9 | local `dep' "`varlist'" 10 | quietly{ 11 | // get the predicted value of endo x 12 | local `pred_var' "" 13 | foreach x of varlist `endo'{ 14 | reg `x' `instru' `control' 15 | predict `pred_endo'_`x' 16 | local `pred_var' "``pred_var'' `pred_endo'_`x'" 17 | } 18 | local `Xhat' "``pred_var'' `control'" 19 | // ols of y on Xhat, get b 20 | reg ``dep'' ``Xhat'' 21 | matrix `b'=e(b) 22 | mat colnames `b'= `endo' `control' _cons 23 | ///// compute variance under homoskedasticity 24 | // first compute residuals 25 | gen `resid'=``dep'' 26 | foreach x of varlist `endo' `control'{ 27 | replace `resid'=`resid'-`b'[1,colnumb(`b',"`x'")]*`x' 28 | } 29 | replace `resid'=`resid'-`b'[1,colnumb(`b',"_cons")] 30 | su `resid' 31 | scalar `sigma2'=r(Var) 32 | // then compute XhhX 33 | matrix accum `XhhX'=``Xhat'' 34 | matrix `V'=`sigma2'*invsym(`XhhX') 35 | mat colnames `V'= `endo' `control' _cons 36 | mat rownames `V'= `endo' `control' _cons 37 | drop ``pred_var'' 38 | } 39 | ereturn clear 40 | ereturn post `b' `V' 41 | ereturn local depvar "``dep''" 42 | ereturn display 43 | end 44 | // test the program 45 | clear 46 | set more off 47 | set obs 1000 48 | // generate fake data 49 | gen z=rnormal() 50 | gen u=rnormal() 51 | gen x1=z+rnormal()+u 52 | gen x2=z+0.5*rnormal() 53 | gen y=2+x1+x2+u 54 | // compare it with ivregress commmand 55 | ivregress 2sls y (x1=z) x2 56 | twosls y, endo(x1) instru(z) control(x2) 57 | -------------------------------------------------------------------------------- /code_in_notes/uniform.c: -------------------------------------------------------------------------------- 1 | // file: uniform.c 2 | #include 3 | #include // 使用标准库,包含rand() 4 | int main(int argc, char const *argv[]) { 5 | // 打印出rand()所生成的最大整数 6 | printf("%d\n",RAND_MAX); 7 | // 设置seed 8 | srand(505); 9 | // 生成(0,1)上的10个随机数 10 | int i; 11 | for (i=0; i<5; ++i){ 12 | double x=(double)rand()/RAND_MAX; 13 | printf("%f ",x); 14 | } 15 | printf("\n"); 16 | // 如果每次都设置seed生成(0,1)上的5个随机数 17 | //那么每次生成的随机数都相同 18 | for (i=0; i<5; ++i){ 19 | srand(505); 20 | double x=(double)rand()/RAND_MAX; 21 | printf("%f ",x); 22 | } 23 | printf("\n"); 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /code_in_notes/uniform.do: -------------------------------------------------------------------------------- 1 | // file: uniform.do 2 | // 关闭--more-- 3 | set more off 4 | // 清除工作区内所有数据 5 | clear 6 | // 设置样本量 7 | set obs 100 8 | // 设置seed 9 | set seed 505 10 | // 产生随机数 11 | gen x=runiform() 12 | -------------------------------------------------------------------------------- /code_in_notes/uniform.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | ## file: uniform.py 3 | 4 | # 导入random包 5 | import random as rd 6 | # 设置seed 7 | rd.seed(505) 8 | # 获取5个均匀分布随机数 9 | x=[rd.random() for i in range(5)] 10 | print(x) 11 | # 导入numpy中的random包 12 | import numpy.random as nprd 13 | # 设置seed 14 | nprd.seed(505) 15 | # 获取5个均匀分布随机数 16 | x=nprd.random(5) 17 | print(x) 18 | -------------------------------------------------------------------------------- /code_in_notes/zipf_law.do: -------------------------------------------------------------------------------- 1 | // zipf_law.do 2 | clear 3 | use datasets/citydata.dta 4 | keep if Year==2011 5 | sort v87 6 | gen rank=_N-_n+1 7 | gen log_rank=log10(rank) 8 | gen log_pop=log10(v87*10000) // 数据的单位为万人 9 | twoway (scatter log_pop log_rank if _n<_N-5) (scatter log_pop log_rank if _n>=_N-5, mlabel(City)), legend(off) xtitle("城市大小排序的对数(从大到小)") ytitle("人口的对数") 10 | graph export zipf_law.pdf, replace 11 | -------------------------------------------------------------------------------- /notebook_julia/Julia.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Julia的安装\n", 8 | "Julia是一个开源的数值计算软件,由于具有JIT等特性,所以具有非常高的性能。我们可以从Julia官网:https://julialang.org 下载和安装。\n", 9 | "Julia的界面非常简单,不适合编写程序,因而我们一般通过配置Atom, sublime等编辑器写Julia程序。此外,通过安装Jupyter,再在Julia中安装IJulia也可以使用Jupyter调用Julia。安装IJulia的方法很简单,只需要在Julia的命令界面数据「Pkg.add(\"IJulia\")」即可,其中「Pkg.add()」即安装Julia扩展包的命令。\n", 10 | "为了展示方便,我们仍然推荐使用Jupyter作为学习工具。比如此文档就是使用Jupyter写作。\n", 11 | "# 使用Julia生成随机数\n", 12 | "在Julia中,可以使用rand()函数生成随机数。最基本的是生成一个在$(0,1)$区间内的均匀分布的随机数,使用此均匀分布随机数,给定任意的分布函数$F$,可以生成服从$F$的随机数。" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "name": "stdout", 22 | "output_type": "stream", 23 | "text": [ 24 | "一个(0,1)区间内的随机数:0.37548057506738663\n", 25 | "20个(0,1)区间内的随机数:\n", 26 | "[0.719472, 0.447485, 0.101696, 0.166868, 0.042199, 0.29615, 0.957232, 0.688239, 0.583882, 0.30275, 0.718666, 0.945527, 0.150395, 0.650269, 0.811649, 0.770101, 0.681383, 0.066861, 0.0560345, 0.760834]" 27 | ] 28 | } 29 | ], 30 | "source": [ 31 | "## 生成一个x~Uniform(0,1)\n", 32 | "x=rand()\n", 33 | "print(\"一个(0,1)区间内的随机数:\",x,\"\\n\")\n", 34 | "\n", 35 | "## 生成20个z~Uniform(0,1)\n", 36 | "z=rand(20)\n", 37 | "print(\"20个(0,1)区间内的随机数:\",\"\\n\")\n", 38 | "print(z)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "如果我们需要生成一个服从分布函数$F: R\\rightarrow [0,1]$的随机数,那么只要首先生成一个$(0,1)$的随机数$u$,并令$x=F^{-1}(u)$,那么新生成的$x$即服从$F$的分布。比如,指数分布的分布函数为$1-e^{-\\frac{1}{b}\\cdot x}$,其中b为一个参数,因而我们可以使用$x=-b\\cdot \\ln(u)$来生成服从指数分布的随机数。" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 5, 51 | "metadata": { 52 | "scrolled": true 53 | }, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "20个服从指数分布F(x)=1-exp{-(1/b)*x}的随机数:\n", 60 | "[2.58645, 11.1848, 0.918512, 0.543136, 3.53971, 0.259657, 1.99849, 0.350091, 3.09114, 0.165935, 0.0657694, 0.949447, 9.25605, 0.393429, 10.0648, 1.87895, 2.65323, 6.0845, 6.48842, 2.73542]" 61 | ] 62 | } 63 | ], 64 | "source": [ 65 | "## 设定参数\n", 66 | "b=3\n", 67 | "\n", 68 | "## 生成20个x~F(x)=1-exp{-(1/b)*x}\n", 69 | "x=-1*log.(rand(20))*b\n", 70 | "print(\"20个服从指数分布F(x)=1-exp{-(1/b)*x}的随机数:\\n\")\n", 71 | "print(x)" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "我们可以使用经验分布函数(Empirical distribution function)与理论的分布函数比较,来判断我们生成的随机数是否满足某一分布。经验分布函数的定义为:$\\hat{F}(x)=\\frac{1}{N}\\cdot \\sum_{i=1}^N 1\\{X_i \\leq x\\}$,也就是给定一个$x$,其经验分布函数的值为样本中小于等于$x$的比例,比如:" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 20, 84 | "metadata": {}, 85 | "outputs": [ 86 | { 87 | "name": "stdout", 88 | "output_type": "stream", 89 | "text": [ 90 | " x | 分布函数 | 经验分布函数| 差的绝对值\n", 91 | "0.1 |0.0327838995179941 | 0.035 | 0.002216100482005906 \n", 92 | "0.3 |0.09516258196404037 | 0.1 | 0.004837418035959634 \n", 93 | "0.5 |0.15351827510938598 | 0.185 | 0.031481724890614016 \n", 94 | "0.7 |0.20811043366321835 | 0.255 | 0.04688956633678165 \n", 95 | "0.9 |0.2591817793182821 | 0.295 | 0.03581822068171786 \n", 96 | "1.1 |0.3069593799135585 | 0.325 | 0.018040620086441528 \n", 97 | "1.3 |0.3516556589984903 | 0.4 | 0.04834434100150975 \n", 98 | "1.5 |0.3934693402873666 | 0.455 | 0.06153065971263344 \n", 99 | "1.7 |0.43258633120299617 | 0.495 | 0.06241366879700383 \n", 100 | "1.9 |0.46918054943798604 | 0.54 | 0.070819450562014 \n", 101 | "2.1 |0.5034146962085905 | 0.575 | 0.07158530379140948 \n", 102 | "2.3 |0.5354409796390884 | 0.625 | 0.08955902036091157 \n", 103 | "2.5 |0.5654017914929217 | 0.655 | 0.08959820850707834 \n", 104 | "2.7 |0.5934303402594009 | 0.68 | 0.08656965974059916 \n", 105 | "2.9 |0.6196512434107414 | 0.715 | 0.09534875658925857 \n", 106 | "3.1 |0.6441810814626581 | 0.735 | 0.09081891853734192 \n", 107 | "3.3 |0.6671289163019204 | 0.745 | 0.0778710836980796 \n", 108 | "3.5 |0.6885967760854023 | 0.76 | 0.07140322391459775 \n", 109 | "3.7 |0.708680108866529 | 0.77 | 0.06131989113347103 \n", 110 | "3.9 |0.7274682069659874 | 0.785 | 0.05753179303401268 \n", 111 | "4.1 |0.7450446039734899 | 0.795 | 0.04995539602651011 \n", 112 | "4.3 |0.7614874461456975 | 0.8 | 0.03851255385430252 \n", 113 | "4.5 |0.7768698398515702 | 0.81 | 0.033130160148429844 \n", 114 | "4.7 |0.7912601766099203 | 0.825 | 0.03373982339007964 \n", 115 | "4.9 |0.8047224371643142 | 0.835 | 0.030277562835685723 \n", 116 | "5.1 |0.8173164759472653 | 0.845 | 0.027683524052734665 \n", 117 | "5.3 |0.8290982871984751 | 0.85 | 0.020901712801524863 \n", 118 | "5.5 |0.8401202539203061 | 0.86 | 0.019879746079693894 \n", 119 | "5.7 |0.8504313807773649 | 0.87 | 0.019568619222635086 \n", 120 | "5.9 |0.8600775119756906 | 0.87 | 0.009922488024309395 \n", 121 | "6.1 |0.8691015350902557 | 0.88 | 0.010898464909744332 \n", 122 | "6.3 |0.877543571747018 | 0.895 | 0.017456428252981993 \n", 123 | "6.5 |0.8854411560073123 | 0.895 | 0.009558843992687693 \n", 124 | "6.7 |0.8928294012476933 | 0.9 | 0.007170598752306745 \n", 125 | "6.9 |0.8997411562771962 | 0.9 | 0.0002588437228038254 \n", 126 | "7.1 |0.9062071513861237 | 0.91 | 0.0037928486138762985 \n", 127 | "7.3 |0.9122561349757057 | 0.91 | 0.002256134975705648 \n", 128 | "7.5 |0.9179150013761012 | 0.92 | 0.0020849986238988816 \n", 129 | "7.7 |0.9232089104210319 | 0.92 | 0.0032089104210318853 \n", 130 | "7.9 |0.9281613993106868 | 0.92 | 0.008161399310686712 \n", 131 | "8.1 |0.9327944872602503 | 0.925 | 0.007794487260250227 \n", 132 | "8.3 |0.9371287733993328 | 0.925 | 0.012128773399332715 \n", 133 | "8.5 |0.9411835283575701 | 0.94 | 0.001183528357570185 \n", 134 | "8.7 |0.9449767799435927 | 0.94 | 0.004976779943592802 \n", 135 | "8.9 |0.9485253932982992 | 0.945 | 0.003525393298299262 \n", 136 | "9.1 |0.9518451458788036 | 0.955 | 0.0031548541211963155 \n", 137 | "9.3 |0.9549507976064422 | 0.955 | 4.92023935577679e-5 \n", 138 | "9.5 |0.9578561564907236 | 0.955 | 0.0028561564907236825 \n", 139 | "9.7 |0.9605741400209925 | 0.96 | 0.0005741400209925418 \n", 140 | "9.9 |0.96311683259876 | 0.96 | 0.0031168325987600554 \n", 141 | "Mean absolute bias:0.03123553671578694" 142 | ] 143 | } 144 | ], 145 | "source": [ 146 | "## 设定参数\n", 147 | "b=3\n", 148 | "\n", 149 | "## 生成200个x~F(x)=1-exp{-(1/b)*x}\n", 150 | "x=-1*log.(rand(200))*b\n", 151 | "\n", 152 | "## 给定一些点,在这些点上计算分布函数和经验分布函数\n", 153 | "x_eval=[i for i in 0.1:0.2:9.9] #0.1,0.3,...,9.9\n", 154 | "\n", 155 | "## 计算理论的分布函数\n", 156 | "F=1-exp.(-1/b.*x_eval)\n", 157 | "\n", 158 | "## 给定z计算经验分布函数\n", 159 | "function empirical_F(x,z::Float64)::Float64\n", 160 | " return mean(x.<=z)\n", 161 | "end\n", 162 | "\n", 163 | "## 计算经验分布函数\n", 164 | "Fhat=[empirical_F(x,z) for z in x_eval]\n", 165 | "\n", 166 | "## 计算经验分布函数与真实的分布函数之间的绝对差异\n", 167 | "bias=mean(abs.(Fhat-F))\n", 168 | "## 打印两个分布函数及其绝对差异,以及平均的绝对差异\n", 169 | "print(\" x | 分布函数 | 经验分布函数| 差的绝对值\\n\")\n", 170 | "for i in 1:1:length(x_eval)\n", 171 | " print(\"$(x_eval[i]) |$(F[i]) | $(Fhat[i]) | $(abs(F[i]-Fhat[i])) \\n\")\n", 172 | "end\n", 173 | "print(\"Mean absolute bias:\",bias)" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "metadata": { 180 | "collapsed": true 181 | }, 182 | "outputs": [], 183 | "source": [] 184 | } 185 | ], 186 | "metadata": { 187 | "kernelspec": { 188 | "display_name": "Julia 0.6.0", 189 | "language": "julia", 190 | "name": "julia-0.6" 191 | }, 192 | "language_info": { 193 | "file_extension": ".jl", 194 | "mimetype": "application/julia", 195 | "name": "julia", 196 | "version": "0.6.0" 197 | }, 198 | "latex_envs": { 199 | "LaTeX_envs_menu_present": true, 200 | "autocomplete": true, 201 | "bibliofile": "biblio.bib", 202 | "cite_by": "apalike", 203 | "current_citInitial": 1, 204 | "eqLabelWithNumbers": true, 205 | "eqNumInitial": 1, 206 | "hotkeys": { 207 | "equation": "Ctrl-E", 208 | "itemize": "Ctrl-I" 209 | }, 210 | "labels_anchors": false, 211 | "latex_user_defs": false, 212 | "report_style_numbering": false, 213 | "user_envs_cfg": false 214 | } 215 | }, 216 | "nbformat": 4, 217 | "nbformat_minor": 2 218 | } 219 | -------------------------------------------------------------------------------- /notebook_python/Poisson_Process.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 生成泊松过程\n", 8 | "算法:首先生成一系列的指数分布,假设:$x_i\\sim E(0,1/\\lambda),i=0,1,...$,那么给定时间$t$,泊松分布可以由以下过程生成:$$ N(t)=\\arg \\min_t \\left\\{ \\sum _{i=0} ^{\\infty} x_i \\leq t \\right\\}$$" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "import numpy as np\n", 18 | "import numpy.random as nprd\n", 19 | "\n", 20 | "## 设定参数\n", 21 | "b=1.0/100 #指数分布参数,一个小时平均到达100次\n", 22 | "Nexp=2000 #生成足够多的指数分布\n", 23 | "\n", 24 | "## 生成N个x~F(x)=1-exp{-(1/b)*x}\n", 25 | "X=-b*np.log(nprd.random(Nexp))\n", 26 | "S=[]\n", 27 | "s=0\n", 28 | "# 产生累积和\n", 29 | "for x in X:\n", 30 | " s=s+x\n", 31 | " S.append(s)\n", 32 | "\n", 33 | "## 给定一些时间点,比如从第一分钟到敌五分钟,在这些点上计算到达的个数\n", 34 | "t=np.linspace(0.0,5.0/60,500) #0.1,0.3,...,9.9\n", 35 | "N=np.zeros(len(t))\n", 36 | "i=0\n", 37 | "for tt in range(len(t)):\n", 38 | " while t[tt]>=S[i]:\n", 39 | " i=i+1\n", 40 | " N[tt]=i\n" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 2, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "data": { 50 | "image/png": "", 51 | "text/plain": [ 52 | "
" 53 | ] 54 | }, 55 | "metadata": { 56 | "needs_background": "light" 57 | }, 58 | "output_type": "display_data" 59 | } 60 | ], 61 | "source": [ 62 | "## 导入matplotlib\n", 63 | "import matplotlib.pyplot as plt \n", 64 | "## 使图形直接插入到jupyter中\n", 65 | "%matplotlib inline\n", 66 | "# 设定图像大小\n", 67 | "plt.rcParams['figure.figsize'] = (15.0, 8.0)\n", 68 | "\n", 69 | "t_plot=[tt*60 for tt in t]\n", 70 | "plt.plot(t_plot,N,label=r'$Poisson Process$',color='blue')\n", 71 | "plt.legend(loc='upper left', frameon=True)\n", 72 | "plt.show() ## 画图" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "# 复合泊松过程\n", 80 | "算法:给定$N(t)$为一个泊松过程,现在假设$Z_j\\sim N(0,1)$,那么复合泊松过程为:$$M(t)=\\sum _{j=1} ^{N(t)} Z_j$$" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 3, 86 | "metadata": {}, 87 | "outputs": [ 88 | { 89 | "data": { 90 | "image/png": "", 91 | "text/plain": [ 92 | "
" 93 | ] 94 | }, 95 | "metadata": { 96 | "needs_background": "light" 97 | }, 98 | "output_type": "display_data" 99 | } 100 | ], 101 | "source": [ 102 | "# 生成一系列正态分布\n", 103 | "Z=nprd.normal(0,1,int(N[-1]))\n", 104 | "SZ=[]\n", 105 | "s=0\n", 106 | "for z in Z:\n", 107 | " s=s+z\n", 108 | " SZ.append(s)\n", 109 | "\n", 110 | "# 生成复合泊松过程\n", 111 | "M=[]\n", 112 | "for n in N:\n", 113 | " if int(n)==0:\n", 114 | " M.append(0)\n", 115 | " else:\n", 116 | " M.append(SZ[int(n)-1])\n", 117 | "M=np.array(M)\n", 118 | "\n", 119 | "## 导入matplotlib\n", 120 | "import matplotlib.pyplot as plt \n", 121 | "## 使图形直接插入到jupyter中\n", 122 | "%matplotlib inline\n", 123 | "# 设定图像大小\n", 124 | "plt.rcParams['figure.figsize'] = (15.0, 8.0)\n", 125 | "\n", 126 | "t_plot=[tt*60 for tt in t]\n", 127 | "plt.plot(t_plot,M,label=r'$Compound Poisson Process$',color='blue')\n", 128 | "plt.legend(loc='upper left', frameon=True)\n", 129 | "plt.show() ## 画图" 130 | ] 131 | } 132 | ], 133 | "metadata": { 134 | "kernelspec": { 135 | "display_name": "Anaconda", 136 | "language": "python", 137 | "name": "anaconda" 138 | }, 139 | "language_info": { 140 | "codemirror_mode": { 141 | "name": "ipython", 142 | "version": 3 143 | }, 144 | "file_extension": ".py", 145 | "mimetype": "text/x-python", 146 | "name": "python", 147 | "nbconvert_exporter": "python", 148 | "pygments_lexer": "ipython3", 149 | "version": "3.9.12" 150 | } 151 | }, 152 | "nbformat": 4, 153 | "nbformat_minor": 2 154 | } 155 | -------------------------------------------------------------------------------- /notebook_python/Random_number.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "data": { 10 | "image/png": "", 11 | "text/plain": [ 12 | "
" 13 | ] 14 | }, 15 | "metadata": { 16 | "needs_background": "light" 17 | }, 18 | "output_type": "display_data" 19 | } 20 | ], 21 | "source": [ 22 | "import numpy as np\n", 23 | "import numpy.random as nprd\n", 24 | "# 获取序贯的1000个均匀分布随机数\n", 25 | "x = nprd.random(1000)\n", 26 | "# 将1000个均匀分布随机数按照奇偶数分成两个序列x0 和x1\n", 27 | "x0 = np.array([x[i] for i in range(1000) if i % 2 == 0])\n", 28 | "x1 = np.array([x[i] for i in range(1000) if i % 2 == 1])\n", 29 | "# 画图\n", 30 | "import matplotlib.pyplot as plt\n", 31 | "# 设定图像大小\n", 32 | "plt.rcParams['figure.figsize'] = (8.0, 5.0)\n", 33 | "plt.scatter(x0, x1, color='blue') ## 画出散点图\n", 34 | "plt.savefig(\"check_random_py.pdf\")\n" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "name": "stdout", 44 | "output_type": "stream", 45 | "text": [ 46 | "2.109375\n" 47 | ] 48 | }, 49 | { 50 | "data": { 51 | "image/png": "", 52 | "text/plain": [ 53 | "
" 54 | ] 55 | }, 56 | "metadata": { 57 | "needs_background": "light" 58 | }, 59 | "output_type": "display_data" 60 | } 61 | ], 62 | "source": [ 63 | "#!/usr/bin/python3\n", 64 | "## file: rejection_beta.py\n", 65 | "import numpy as np\n", 66 | "import numpy.random as nprd\n", 67 | "import scipy.special as scisp\n", 68 | "# 设定参数\n", 69 | "alpha = 4\n", 70 | "beta = 2\n", 71 | "# beta分布密度函数\n", 72 | "f=lambda x: 1/scisp.beta(alpha,beta)* \\\n", 73 | " x**(alpha-1) * (1-x)**(beta-1)\n", 74 | "# 计算密度函数最大值\n", 75 | "M = f((1 - alpha) / (2 - alpha - beta))\n", 76 | "print(M)\n", 77 | "# 随机抽两个均匀分布,一个为(0,1),一个为(0,M),抽500个\n", 78 | "N = 500\n", 79 | "x = nprd.random(N)\n", 80 | "u = nprd.random(N) * M\n", 81 | "# 挑出使得u=lower and mu<=upper:\n", 187 | " included[i]=1\n", 188 | "print(\"The prob. of included=\",np.mean(included))" 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "metadata": {}, 194 | "source": [ 195 | "# 区间估计\n", 196 | "## 大样本总体未知\n", 197 | "如果$x_i\\sim \\left( \\mu, \\sigma^2 \\right)$,那么当样本量足够大时,$\\bar{x}\\sim N\\left(\\mu,\\frac{\\sigma^2}{N}\\right)$,或者,如果我们用样本方差替代总体方差,那么当样本量足够大时,有:$$\\frac{\\bar{x}-\\mu}{\\sqrt{\\frac{s^2}{N}}}\\sim N\\left(0,1\\right)$$\n", 198 | "下面我们将重复1000次区间估计,理论上,在95%置信水平下,应该有950次区间估计包含真值,我们将看到这一结论是否成立。" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 4, 204 | "metadata": {}, 205 | "outputs": [ 206 | { 207 | "name": "stdout", 208 | "output_type": "stream", 209 | "text": [ 210 | "Sample size=10, the prob. of included= 0.93\n", 211 | "Sample size=50, the prob. of included= 0.94\n", 212 | "Sample size=100, the prob. of included= 0.94\n", 213 | "Sample size=500, the prob. of included= 0.955\n", 214 | "Sample size=1000, the prob. of included= 0.948\n" 215 | ] 216 | } 217 | ], 218 | "source": [ 219 | "import numpy as np\n", 220 | "from numpy import random as nprd\n", 221 | "from scipy import special as func\n", 222 | "\n", 223 | "def sampling(N):\n", 224 | " ## 产生一组样本,以0.5的概率为z+3,0.5的概率为z-3,其中z~N(0,1),因而期望为0,方差为10\n", 225 | " d=nprd.rand(N)<0.5\n", 226 | " z=nprd.randn(N)\n", 227 | " x=np.array([z[i]+3 if d[i] else z[i]-3 for i in range(N)])\n", 228 | " return x\n", 229 | "\n", 230 | "## true value\n", 231 | "N=[10,50,100,500,1000] # sample size\n", 232 | "## iteration times\n", 233 | "M=1000\n", 234 | "## confidence level\n", 235 | "alpha=0.05\n", 236 | "for n in N:\n", 237 | " ## results\n", 238 | " included=np.zeros(M)\n", 239 | " for i in range(M):\n", 240 | " x=sampling(n)\n", 241 | " xmean=np.mean(x)\n", 242 | " xstd=np.std(x)\n", 243 | " lower=xmean-func.ndtri(1-alpha/2)*xstd/np.sqrt(n)\n", 244 | " upper=xmean+func.ndtri(1-alpha/2)*xstd/np.sqrt(n)\n", 245 | " # 如果包含真值\n", 246 | " #print(mu, lower, upper, mu>=lower, mu<=upper)\n", 247 | " if 0>=lower and 0<=upper:\n", 248 | " included[i]=1\n", 249 | " print(\"Sample size=%s, the prob. of included=\" % n, np.mean(included))" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": null, 255 | "metadata": { 256 | "collapsed": true 257 | }, 258 | "outputs": [], 259 | "source": [] 260 | } 261 | ], 262 | "metadata": { 263 | "kernelspec": { 264 | "display_name": "Anaconda", 265 | "language": "python", 266 | "name": "anaconda" 267 | }, 268 | "language_info": { 269 | "codemirror_mode": { 270 | "name": "ipython", 271 | "version": 3 272 | }, 273 | "file_extension": ".py", 274 | "mimetype": "text/x-python", 275 | "name": "python", 276 | "nbconvert_exporter": "python", 277 | "pygments_lexer": "ipython3", 278 | "version": "3.9.12" 279 | }, 280 | "latex_envs": { 281 | "LaTeX_envs_menu_present": true, 282 | "autocomplete": true, 283 | "bibliofile": "biblio.bib", 284 | "cite_by": "apalike", 285 | "current_citInitial": 1, 286 | "eqLabelWithNumbers": true, 287 | "eqNumInitial": 1, 288 | "hotkeys": { 289 | "equation": "Ctrl-E", 290 | "itemize": "Ctrl-I" 291 | }, 292 | "labels_anchors": false, 293 | "latex_user_defs": false, 294 | "report_style_numbering": false, 295 | "user_envs_cfg": false 296 | } 297 | }, 298 | "nbformat": 4, 299 | "nbformat_minor": 2 300 | } 301 | -------------------------------------------------------------------------------- /notebook_python/rejection_beta.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sijichun/MathStatsCode/a06caa49c76c7f80040640b0d4b3a467c3c03b56/notebook_python/rejection_beta.pdf --------------------------------------------------------------------------------