├── bestreg.pkg ├── bestreg ├── bestreg.ado ├── bestreg.hlp └── lbestreg.mlib ├── bioprobit.pkg ├── bioprobit ├── bioprobit.ado ├── bioprobit.hlp ├── bioprobit_d2.ado └── bioprobit_p.ado ├── egen_inequal.pkg ├── egen_inequal ├── _gcov.ado ├── _gentropy.ado ├── _gfgt.ado ├── _ggini.ado ├── _ghalf.ado ├── _ginequal.ado ├── _gkakwani.ado ├── _gmehran.ado ├── _gmld.ado ├── _gpiesch.ado ├── _grmd.ado ├── _gsdl.ado ├── _gtheil.ado └── egen_inequal.hlp ├── elmo.pkg ├── elmo ├── elmo.ado └── elmo.hlp ├── fastgini.pkg ├── fastgini ├── fastgini.ado └── fastgini.hlp ├── gconc.pkg ├── gconc ├── gconc.ado └── gconc.sthlp ├── gicurve.pkg ├── gicurve ├── gicurve.ado └── gicurve.hlp ├── gidecomposition.pkg ├── gidecomposition ├── gidecomposition.ado └── gidecomposition.hlp ├── googletrans.pkg ├── googletrans ├── googletrans.ado └── googletrans.sthlp ├── lookfor_all.pkg ├── lookfor_all ├── lookfor_all.ado └── lookfor_all.hlp ├── movestay.pkg ├── movestay ├── movestay.ado ├── movestay.hlp ├── movestay_d2.ado ├── movestay_example.do ├── movestay_example.dta └── movestay_p.ado ├── pov_robust.pkg ├── pov_robust ├── pov_robust.ado └── pov_robust.hlp ├── ppreg.pkg ├── ppreg ├── ppreg.ado └── ppreg.hlp ├── readme.md ├── sedecomposition.pkg ├── sedecomposition ├── sedecomposition.ado └── sedecomposition.hlp ├── stata.toc ├── switch_probit ├── switch_probit.ado ├── switch_probit.sthlp ├── switch_probit_d2.ado ├── switch_probit_example.do ├── switch_probit_example.dta └── switch_probit_p.ado └── xml_tab ├── xml_tab.ado ├── xml_tab.hlp ├── xml_tab.pkg └── xml_tab_example.do /bestreg.pkg: -------------------------------------------------------------------------------- 1 | v 3 2 | d 'bestreg': Best variable subset selection. 3 | d 4 | d Distribution-Date: 04dec2006 5 | d 6 | d Author: Zurab Sajaia, World Bank 7 | d Support: email zsajaia@hotmail.com 8 | 9 | f bestreg/bestreg.ado 10 | f bestreg/bestreg.hlp 11 | f bestreg/lbestreg.mlib -------------------------------------------------------------------------------- /bestreg/bestreg.ado: -------------------------------------------------------------------------------- 1 | *! version 1.0 04dec2006 Z. Sajaia 2 | 3 | program define bestreg, eclass 4 | version 9.2 5 | 6 | syntax [anything] [if] [in] [aweight pweight iweight fweight/] [,FIXed(varlist) Graph Vars(numlist integer) *] 7 | 8 | tempname tmp tmp1 tmp2 tmp3 9 | if ~missing("`anything'") { 10 | _rmcoll `fixed' 11 | local fixed "`r(varlist)'" 12 | local k1 : word count `fixed' 13 | local ++k1 14 | 15 | _rmcollright `fixed' `anything' 16 | 17 | if `:word count `r(block`k1')''>1 { 18 | display as error "dependent variable missing" 19 | exit 197 20 | } 21 | 22 | local k =r(k) 23 | local ++k1 24 | local d = 0 25 | matrix `tmp' = (1\1\0) 26 | forvalues i=`k1'/`k' { 27 | local g :word count `r(block`i')' 28 | matrix `tmp'=`tmp', (`i'-`k1'+1 \ `d' + 1 \ `d'+`g') 29 | local d = `d'+ `g' 30 | } 31 | 32 | local vlist "`r(varblocklist)'" 33 | local vlist : list vlist - fixed 34 | local vlist : subinstr local vlist "(" "", all 35 | local vlist : subinstr local vlist ")" "", all 36 | 37 | local k=`k'-`k1'+2 38 | 39 | marksample touse 40 | markout `touse' `vlist' 41 | 42 | matrix `tmp1'=`tmp' 43 | local fn : word count `fixed' 44 | 45 | display _newline 46 | 47 | display as input `k'-1 _c 48 | if `fn'>0 display as text "(" as input "+`fn'" as text ")" _c 49 | display as text " distinct variables(blocks) in the regression" 50 | display as input 2^(`k'-1)-1 as text " possible combinations" 51 | 52 | local vlist "`vlist' `fixed'" 53 | 54 | mata : bestreg_mataleaps("`tmp1'") 55 | 56 | display as input cnt as text " regressions performed" 57 | window manage maintitle reset 58 | 59 | if ~missing("`weight'") local weight "[`weight'=`exp']" 60 | gettoken depvar vlist : vlist 61 | 62 | forvalues i=2/`k' { 63 | matrix `tmp2'=J(`tmp'[3,`i']-`tmp'[2,`i']+1, 1, 1) 64 | matrix `tmp2'=`tmp2' # `tmp1'[`i'-1,....] 65 | matrix `tmp3'=nullmat(`tmp3') \ `tmp2' 66 | } 67 | if `fn'>0 matrix `tmp3'=`tmp3'\J(`:word count `fixed'', colsof(`tmp3'), 1) 68 | matrix `tmp3'=`tmp3' \ `tmp1'[`k'...,....] 69 | 70 | ereturn post, esample(`touse') properties () 71 | 72 | ereturn scalar regs = cnt 73 | ereturn scalar bestsub= bestcol 74 | ereturn scalar blocks = `k'-1 75 | ereturn scalar fixed =`fn' 76 | 77 | ereturn local cmd "bestreg" 78 | ereturn local criterium "RSS" 79 | ereturn local depvar `depvar' 80 | ereturn local vlist `vlist' 81 | ereturn local weight "`weight'" 82 | 83 | local k=`k'+`fn'-1 84 | 85 | local cnames 86 | forvalues i=`fn'/`k' { 87 | local cnames "`cnames' b`i'" 88 | } 89 | matrix roweq `tmp3'=: 90 | matrix coleq `tmp3'=: 91 | matrix colnames `tmp3'=`cnames' 92 | matrix rownames `tmp3'=`vlist' RSS R2 AdjR2 93 | ereturn matrix best = `tmp3', copy 94 | } 95 | else { 96 | if "`e(cmd)'"!="bestreg" { 97 | display as error "last estimates not found" 98 | exit 301 99 | } 100 | local depvar "`e(depvar)'" 101 | matrix `tmp3'=e(best) 102 | local vlist "`e(vlist)'" 103 | local k = e(blocks)+e(fixed) 104 | local weight "`e(weight)'" 105 | 106 | scalar bestcol=e(bestsub) 107 | } 108 | 109 | if missing("`vars'") local vars =bestcol 110 | tempname tname 111 | _estimates hold `tname', copy 112 | foreach i of numlist `vars' { 113 | local j=0 114 | local reglist 115 | foreach var in `vlist' { 116 | if `tmp3'[`++j',colnumb(`tmp3',"b`i'")]==1 local reglist "`reglist' `var'" 117 | } 118 | display _newline 119 | display as input "Best `i'-variable(block) regression" 120 | regress `depvar' `reglist' `weight' if e(sample) 121 | } 122 | _estimates unhold `tname' 123 | 124 | if ~missing("`graph'") { 125 | tempvar N R2 126 | matrix `tmp3'=`tmp3'["R2".."AdjR2",....]' 127 | svmat `tmp3', names(`R2') 128 | quietly generate byte `N'=_n-1 if `R2'2<. 129 | label variable `R2'1 "R-squared" 130 | label variable `R2'2 "Adjusted R-squared" 131 | label variable `N' "Subset size" 132 | twoway line `R2'1 `N' || line `R2'2 `N', `options' 133 | } 134 | end 135 | -------------------------------------------------------------------------------- /bestreg/bestreg.hlp: -------------------------------------------------------------------------------- 1 | {smcl} 2 | {* 23June2006}{...} 3 | {cmd:help bestreg} 4 | {hline} 5 | 6 | {title:Title} 7 | 8 | {p2colset 9 20 22 2}{...} 9 | {p2col :{hi:bestreg} {hline 2}}Best variable subset selection{p_end} 10 | {p2colreset}{...} 11 | 12 | 13 | {title:Syntax} 14 | 15 | {p 8 16 2} 16 | {cmd:bestreg} [{depvar} {it:varblocklist}] {ifin} {weight} [{cmd:,} {opt fix:ed(varlist)} {opt v:ars(numlist)} {opt g:raph} {it:graph_options}] 17 | 18 | {pstd} 19 | where {it:varblocklist} is a list of one or more {it:varblock}s, and a {it:varblock} is either {varname} 20 | or {cmd:(}{varlist}{cmd:)}. A {it:varlist} in parenthesis indicates that this list of variables is to be considered 21 | as a group. 22 | 23 | 24 | {pstd} 25 | {cmd:aweight}s, {cmd:fweight}s, {cmd:iweight}s, and {cmd:pweight}s are allowed; see {help weight}.{p_end} 26 | 27 | {title:Description} 28 | 29 | {pstd} 30 | {cmd:bestreg} finds subsets of {it:varblocklist} that best predict {it:depvar} by a linear regression. Here we define 31 | the 'best' subset of a size {it:p} as the one with minimum residual sum of squares (RSS). 32 | Many criteria, which are used in identifying the best {it:p}-size subsets are monotone functions of RSS, 33 | including the adjusted R-square, the minimum mean square residual, and the Cp statistic of Mallows. 34 | 35 | {pstd} 36 | {cmd:bestreg} implements Duarte Silva's adaptation [1] of Furnival and Wilson's Leaps and Bounds Algorithm [2] 37 | for variable selection in regression analysis. The algorithm allows finding the best subset regressions without 38 | computing all possible regressions. 39 | 40 | {pstd} 41 | The result of the program is saved in {cmd: e(best)}. The variables are in rows, and for each subset size {it:p} 42 | there is a column of {bf:0}s and {bf:1}s, where variables with {bf:1}s were included in the regression. If {opt vars()} 43 | was not specified, regression results for the subset with overall highest adjusted R-squared will be displayed. 44 | 45 | 46 | 47 | {dlgtab:Main} 48 | 49 | {phang} 50 | {opt fixed(varlist)} specifies the list of variables that must be always included in the regressions. 51 | 52 | 53 | {phang} 54 | {opt vars(numlist)} for each {it:p}-member of the {it: numlist} the regression results for the best subset 55 | of size {it:p} will be displayed. By default, {cmd:bestreg} will 56 | 57 | 58 | {phang} 59 | {opt graph} displays a graph of R-squared and adjusted R-squared against subset size. This graph can be useful for 60 | 61 | {pmore} 62 | additional {it:graph_options} can be specified as well; see {help twoway_line}. 63 | 64 | 65 | {title:Examples} 66 | 67 | {phang}{cmd:.bestreg price mpg headroom trunk}{p_end} 68 | 69 | {phang}{cmd:.tabulate headroom, generate(d_h)}{p_end} 70 | {phang}{cmd:.bestreg price mpg (d_h*) trunk}{p_end} 71 | 72 | 73 | {title:Author} 74 | 75 | {phang}Zurab Sajaia, zsajaia@worldbank.org 76 | 77 | 78 | {title:References} 79 | 80 | {phang}[1] Duarte Silva, A.P.(2002) Discarding Variables in a Principal Component Analysis: 81 | Algorithms for All-Subsets Comparisons, Computational Statistics, Vol. 17, 251-271. 82 | 83 | {phang}[2] Furnival, G.M. and Wilson, R.W. (1974). Regressions by Leaps and Bounds, Technometrics, Vol. 16, 499-511. 84 | 85 | 86 | {title:Also see} 87 | 88 | {psee} 89 | Online: {helpb regress}{p_end} 90 | -------------------------------------------------------------------------------- /bestreg/lbestreg.mlib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vavalomi/stata_tools/67b6c92bfdbc5c81cf4b2d859d40bb901ec6c96f/bestreg/lbestreg.mlib -------------------------------------------------------------------------------- /bioprobit.pkg: -------------------------------------------------------------------------------- 1 | v 3 2 | d 'bioprobit': Maximum-likelihood estimation of two-equation ordered probit models with endogeneity. 3 | d 4 | d Distribution-Date: 17Aug2007 5 | d 6 | d Author: Zurab Sajaia, World Bank 7 | d Support: email zsajaia@hotmail.com 8 | 9 | f bioprobit/bioprobit.ado 10 | f bioprobit/bioprobit_d2.ado 11 | f bioprobit/bioprobit_p.ado 12 | f bioprobit/bioprobit.hlp 13 | -------------------------------------------------------------------------------- /bioprobit/bioprobit.ado: -------------------------------------------------------------------------------- 1 | *! version 1.11 17Aug2007 2 | #delimit ; 3 | 4 | program define bioprobit; 5 | version 9; 6 | if replay() {; 7 | if ("`e(cmd)'" ~= "bioprobit") error 301; 8 | bioprobit_replay `0'; 9 | }; // end if 10 | else bioprobit_mx `0'; 11 | end; // end program bioprobit 12 | 13 | program define bioprobit_replay; 14 | syntax [,Level(cilevel)]; 15 | local rho diparm(athrho , tanh label("rho")); 16 | _coef_table_header; 17 | display; 18 | _coef_table, level(`level') `rho' notest; 19 | display in green e(chi2_ct) " test of indep. eqns. :" 20 | _col(38) "chi2(" in yellow "1" in green ") = " in yellow %8.2f e(chi2_c) 21 | _col(59) in green "Prob > chi2 = " in yellow %6.4f e(p_c); 22 | if strpos("`e(title)'", "Simultaneous") display in smcl in green "{hline 78}"; 23 | 24 | end; // end program bioprobit_reply 25 | 26 | program define bioprobit_mx, eclass; 27 | 28 | gettoken first : 0, match(paren); 29 | if missing("`paren'") {; // syntax 1, bivariate ordered probit models 30 | syntax varlist [if] [in] [pweight iweight fweight] 31 | [, offset1(varname) offset2(varname) COLlinear 32 | Robust CLuster(varname) Level(cilevel) end *]; 33 | gettoken y1 varlist : varlist; 34 | gettoken y2 varlist : varlist; 35 | local x1 `varlist'; 36 | local x2 `varlist'; 37 | local pref "B"; 38 | }; 39 | else {; // syntax 2, seemingly unrelated bivariate ordered probit model 40 | syntax anything(id="equation id" equalok) [if] [in] [pweight iweight fweight] 41 | [, offset1(varname) offset2(varname) COLlinear 42 | Robust CLuster(varname) Level(cilevel) end *]; 43 | gettoken eq1 eq2_ : anything, match(parns) bind; 44 | tokenize "`eq1'", parse("="); 45 | if ("`2'"!="=") {; 46 | tokenize "`eq1'"; 47 | local y1 `1'; 48 | macro shift; 49 | local x1 `*'; 50 | }; 51 | else {; 52 | local y1 `1'; 53 | local x1 `3'; 54 | }; 55 | gettoken eq2 : eq2_ , match(parns) bind; 56 | if ("`parns'"!="(") local eq2 "`eq2_'"; 57 | tokenize "`eq2'", parse("="); 58 | if ("`2'"!="=") {; 59 | tokenize "`eq2'"; 60 | local y2 `1'; 61 | macro shift; 62 | local x2 `*'; 63 | }; 64 | else {; 65 | local y2 `1'; 66 | local x2 `3'; 67 | }; 68 | local pref "Seemingly unrelated b"; 69 | }; // if 70 | global END =~missing("`end'"); 71 | 72 | quietly {; 73 | 74 | // define standard dep vars 75 | tempvar sy1 sy2; 76 | egen `sy1' = group(`y1'); 77 | egen `sy2' = group(`y2'); 78 | 79 | marksample touse; 80 | markout `touse' `sy1' `sy2' `x1' `x2' `cluster' `offset1' `offset2', strok; 81 | 82 | mlopts mlopts, `options'; 83 | 84 | if (~missing("`offset1'")) local offo1 "offset(`offset1')"; 85 | if (~missing("`offset2'")) local offo2 "offset(`offset2')"; 86 | if (~missing("`weight'")) local weight "[`weight'`exp']"; 87 | if (~missing("`cluster'")) local clopt cluster(`cluster'); 88 | if "`weight'" == "pweight" | (~missing("`cluster'")) local robust "robust"; 89 | 90 | // Remove collinear variables 91 | noisily _rmdcoll `sy1' `x1' `weight' if `touse', `collinear'; 92 | local x1 "`r(varlist)'"; 93 | noi tabulate `sy2' if `touse'; 94 | 95 | noisily _rmdcoll `sy2' `x2' `weight' if `touse', `collinear'; 96 | local x2 "`r(varlist)'"; 97 | 98 | summarize `sy1'; global NC1 = r(max); 99 | summarize `sy2'; global NC2 = r(max); // number of categiries 100 | if ($NC1==1) {; 101 | noisily display as error "`y1' does not vary"; 102 | exit 2000; 103 | }; 104 | if ($NC2==1) {; 105 | noisily display as error "`y2' does not vary"; 106 | exit 2000; 107 | }; 108 | local NC1_1 = $NC1-1; 109 | local NC2_1 = $NC2-1; 110 | 111 | // define initial values 112 | tempname Ib_op1 Ib_op2 Ic_op Ic_op1 Ic_op2 I_rho TMP ll_0; 113 | scalar `ll_0' = 0; 114 | 115 | oprobit `sy1' `x1' `weight' if `touse', `offo1'; 116 | matrix `TMP' = e(b); 117 | matrix `Ib_op1' = `TMP'[1,"`sy1':"]; 118 | matrix coleq `Ib_op1' = `y1'; 119 | matrix `Ic_op1' = `TMP'[1,"cut1:_cons".."cut`NC1_1':_cons"]; 120 | scalar `ll_0'=`ll_0'+e(ll); 121 | if $END {; 122 | tempvar xb1; 123 | matrix score `xb1' = `Ib_op1'; 124 | }; 125 | oprobit `sy2' `xb1' `x2' `weight' if `touse', `offo2'; 126 | matrix `TMP' = e(b); 127 | matrix `Ib_op2' = `TMP'[1,"`sy2':"]; 128 | matrix coleq `Ib_op2' = `y2'; 129 | matrix `Ic_op2' = `TMP'[1,"cut1:_cons".."cut`NC2_1':_cons"]; 130 | scalar `ll_0'=`ll_0'+e(ll); 131 | 132 | 133 | matrix `Ic_op' = `Ic_op1'[1,1]; 134 | forvalues i = 2/`NC1_1' {; matrix `Ic_op' =`Ic_op',sqrt(`Ic_op1'[1,`i']-`Ic_op1'[1,`i'-1]); }; 135 | matrix `Ic_op' = `Ic_op', `Ic_op2'[1,1]; 136 | forvalues i = 2/`NC2_1' {; matrix `Ic_op' =`Ic_op',sqrt(`Ic_op2'[1,`i']-`Ic_op2'[1,`i'-1]); }; 137 | 138 | correlate `sy1' `sy2' if `touse'; 139 | matrix `I_rho' = .5;//atanh(r(rho)); 140 | matrix colnames `I_rho' = athrho:_cons; 141 | 142 | local cuts; 143 | forvalues k = 1/`NC1_1' {; local cuts "`cuts' /cut1`k'"; local ceqs "`ceqs' cut1`k'"; }; 144 | forvalues k = 1/`NC2_1' {; local cuts "`cuts' /cut2`k'"; local ceqs "`ceqs' cut2`k'"; }; 145 | matrix coleq `Ic_op' = `ceqs'; 146 | matrix colnames `Ic_op' = _cons; 147 | }; // end quietly 148 | 149 | // maximization 150 | if $END {; 151 | matrix `I_rho' =`I_rho',`Ib_op2'[1,1]; 152 | matrix colnames `I_rho' = athrho:_cons gamma:_cons; 153 | 154 | matrix `Ib_op2' =`Ib_op2'[1,2...]; 155 | local gamma "/gamma"; 156 | local title "title(Simultaneous bivariate ordered probit regression)"; 157 | }; 158 | else local title "title(`pref'ivariate ordered probit regression)"; 159 | 160 | ml model d2 bioprobit_d2 161 | (`y1' : `sy1' = `x1', noconstant) 162 | (`y2' : `sy2' = `x2', noconstant) 163 | /athrho 164 | `gamma' 165 | `cuts' 166 | `weight' 167 | if `touse' 168 | , 169 | `title' 170 | difficult 171 | collinear 172 | missing 173 | search(on) 174 | init(`Ib_op1' `Ib_op2' `I_rho' `Ic_op') 175 | maximize 176 | `clopt' 177 | `robust' 178 | `mlopts' 179 | ; 180 | 181 | tempname b D D1 V; 182 | matrix `b' =e(b); 183 | matrix `V' =e(V); 184 | local f = colnumb(`b',"cut11:_cons") -1; 185 | matrix `D' = I(`f'), J(`f', `NC1_1'+`NC2_1',0); // matrix of derivatives 186 | matrix `D1' = (J(1,`f',0), 1, J(1,`NC1_1'+`NC2_1'-1, 0)); 187 | matrix `D' = `D' \ `D1'; 188 | forvalues k = 2/`NC1_1' {; 189 | matrix `D1'[1,`f'+`k'] = 2*`b'[1,`f'+`k']; 190 | matrix `D' = `D' \ `D1'; 191 | matrix `b'[1,`f'+`k'] =`b'[1,`f'+`k'-1]+`b'[1,`f'+`k']^2; 192 | }; 193 | local f = colnumb(`b',"cut21:_cons") -1; 194 | if (`NC2_1' > 1) matrix `D1' = (J(1,`f', 0), 1, J(1,`NC2_1'-1, 0)); 195 | else matrix `D1' = (J(1,`f', 0), 1); 196 | matrix `D' = `D' \ `D1'; 197 | forvalues k = 2/`NC2_1' {; 198 | matrix `D1'[1,`f'+`k'] = 2*`b'[1,`f'+`k']; 199 | matrix `D' = `D' \ `D1'; 200 | matrix `b'[1,`f'+`k'] =`b'[1,`f'+`k'-1]+`b'[1,`f'+`k']^2; 201 | }; 202 | matrix `V' = `D'*`V'*`D''; 203 | ereturn repost b=`b' V=`V'; 204 | 205 | ereturn scalar ll_0 = `ll_0'; // loglikelihood for non-correlated case 206 | ereturn scalar k_aux= `NC1_1'+`NC2_1'; // identify ancillary parameters 207 | 208 | ereturn local cmd "bioprobit"; 209 | ereturn local predict "bioprobit_p"; 210 | ereturn local depvar "`y1' `y2'"; 211 | ereturn local offset1 `offset1'; 212 | ereturn local offset2 `offset2'; 213 | 214 | if missing("`robust'") {; 215 | ereturn local chi2_ct "LR"; 216 | ereturn scalar chi2_c = 2 * (e(ll) - `ll_0'); 217 | }; 218 | else {; 219 | ereturn local chi2_ct "Wald"; 220 | quietly test [athrho]_b[_cons]; 221 | ereturn scalar chi2_c = r(chi2); 222 | if $END {; 223 | ereturn local 224 | quietly test [athrho]_b[_cons]+[gamma]_b[_cons]==0; 225 | ereturn scalar chi2_c2 = r(chi2); 226 | ereturn scalar p_c2 = chiprob(2, e(chi2_c2)); 227 | }; 228 | }; 229 | ereturn scalar p_c = chiprob(1, e(chi2_c)); 230 | global END; 231 | bioprobit_replay, level(`level'); 232 | end; // end program bioprobit_mx 233 | 234 | -------------------------------------------------------------------------------- /bioprobit/bioprobit.hlp: -------------------------------------------------------------------------------- 1 | {smcl} 2 | {* 04Sep2006}{...} 3 | {cmd:help bioprobit} 4 | {hline} 5 | 6 | {title:Title} 7 | 8 | {p2colset 5 20 22 2}{...} 9 | {p2col :{hi: bioprobit} {hline 2}}Bivariate ordered probit regression{p_end} 10 | {p2colreset}{...} 11 | 12 | 13 | {title:Syntax} 14 | 15 | {phang}Bivariate ordered probit model 16 | 17 | {p 8 17 2} 18 | {cmd:bioprobit} 19 | {it:{help depvar:depvar1} depvar2} {varlist} 20 | {ifin} {weight} 21 | [{cmd:,} 22 | {it:{help bioprobit##options:options}}] 23 | 24 | {phang}Simultaneous bivariate ordered probit model 25 | 26 | {p 8 17 2} 27 | {cmd:bioprobit} 28 | ({it:{help depvar:depvar1}} [=] {varlist:1}) ({it:{help depvar:depvar2}} [=] {varlist:2}) 29 | {ifin} 30 | {weight} 31 | [{cmd:,} {it:{help bioprobit##options:options}}] 32 | 33 | 34 | {title:Syntax for predict} 35 | 36 | {p 8 16 2} 37 | {cmd:predict} 38 | {dtype} 39 | {it:{help newvar}} 40 | {ifin} 41 | [{cmd:,} {it:statistic} 42 | {opt o:utcome(outcome pair)} {opt nooff:set}] 43 | 44 | {synoptset 11 tabbed}{...} 45 | {synopthdr :statistic} 46 | {synoptline} 47 | {syntab :Main} 48 | {synopt :{opt xb1}} fitted values for equation 1{p_end} 49 | {synopt :{opt xb2}} fitted values for equation 2{p_end} 50 | {synopt :{opt stdp1}} standard error of fitted values for equation 1{p_end} 51 | {synopt :{opt stdp2}} standard error of fitted values for equation 2{p_end} 52 | {synoptline} 53 | {p2colreset}{...} 54 | 55 | {synoptset 20 tabbed}{...} 56 | {marker options}{...} 57 | {synopthdr} 58 | {synoptline} 59 | {syntab :Model} 60 | {synopt :{opth off:set1(varname)}} offset variable for first equation{p_end} 61 | {synopt :{opth off:set2(varname)}} offset variable for second equation{p_end} 62 | {synopt :{opt col:linear}} keep collinear variables 63 | 64 | {syntab :SE/Robust} 65 | {synopt :{opt r:obust}}synonym for {cmd:vce(robust)}{p_end} 66 | {synopt :{opth cl:uster(varname)}}adjust standard errors for intragroup correlation{p_end} 67 | 68 | {syntab :Reporting} 69 | {synopt :{opt l:evel(#)}}set confidence level; default is {cmd:level(95)}{p_end} 70 | 71 | {syntab :Max option} 72 | {synopt :{it:{help bioprobit##maximize_options:maximize_options}}}control the maximization process;{p_end} 73 | {synoptline} 74 | {p2colreset}{...} 75 | {p 4 6 2} 76 | {opt fweight}s, {opt iweight}s, and {opt pweight}s are allowed;see {help weight}.{p_end} 77 | 78 | {title:Description} 79 | 80 | {pstd} 81 | {opt bioprobit} fits maximum-likelihood two-equation ordered probit models of ordinal variables 82 | {depvar:1} and {depvar:2} on the independent variables {indepvars:1} and {indepvars:2}. 83 | The actual values taken on by dependent variables are irrelevant, except that larger values are assumed to 84 | correspond to "higher" outcomes. //Up to 50 outcomes are allowed in Stata/SE 85 | and Intercooled Stata, and up to 20 outcomes in Small Stata//. 86 | 87 | {pstd} 88 | See {help logistic estimation commands} for a list of related estimation 89 | commands. 90 | 91 | 92 | {title:Options} 93 | 94 | {dlgtab:Model} 95 | 96 | {phang} 97 | {opth offset1(varname)}, {opth offset2(varname)}, {opt collinear}; see {help estimation options}. 98 | 99 | {dlgtab:Options} 100 | 101 | 102 | {phang} 103 | {opt robust}, {opth cluster(varname)}; see {help estimation options}. 104 | {opt cluster()} can be used with {opt pweight}s to produce estimates 105 | for unstratified cluster-sampled data. 106 | 107 | {dlgtab:Reporting} 108 | 109 | {phang} 110 | {opt level(#)}; see {help estimation options}. 111 | 112 | {marker maximize_options}{...} 113 | {dlgtab:Max options} 114 | 115 | {phang} 116 | {it:maximize_options}: {opt tech:nique(algorithm_spec)}, 117 | {opt iter:ate(#)}, [{cmdab:no:}]{opt lo:g}, {opt tr:ace}, {opt hess:ian}, 118 | {opt grad:ient}, {opt showstep}, {opt tol:erance(#)}, {opt ltol:erance(#)}, 119 | {opt gtol:erance(#)}, {opt nrtol:erance(#)}, {opt nonrtol:erance}, 120 | {opt from(init_specs)}; see {help maximize}. 121 | 122 | {title:Options for predict} 123 | 124 | {phang} 125 | {opt xb1} calculates the linear prediction for equation 1. 126 | 127 | {phang} 128 | {opt xb2} calculates the linear prediction for equation 2. 129 | 130 | {phang} 131 | {opt stdp1} calculates the standard error of the linear prediction of 132 | equation 1. 133 | 134 | {phang} 135 | {opt stdp2} calculates the standard error of the linear prediction of 136 | equation 2. 137 | 138 | {phang} 139 | {opt outcome(outcome pair)} specifies for which outcome pair the predicted probabilities 140 | are to be calculated. {opt outcome()} should contain pair of either values of 141 | the dependent variables or one of {opt #1}, {opt #2}, {it:...}, with {opt #1} 142 | meaning the first category of a dependent variable, {opt #2} the second 143 | category, etc. If one of the arguments is missing then result will be the marginal probability, 144 | i.e. {opt outcome(., k)} wil retun Pr(y2=k) 145 | 146 | {phang} 147 | {opt nooffset} is relevant only if you specified {opth offset1(varname)} or 148 | {opt offset2(varname)} for 149 | {cmd:bioprobit}. It modifies the calculations made by {opt predict} so that 150 | they ignore the offset variables; the linear predictions are treated as 151 | xb1 rather than xb1 + offset1 and xb2 rather than as 152 | xb2 + offset2. 153 | 154 | 155 | {title:Examples} 156 | 157 | {phang}{cmd:. bioprobit headroom foreign price length mpg turn}{p_end} 158 | 159 | 160 | {title:Also see} 161 | 162 | {psee} 163 | Online: {helpb biprobit}, {helpb oprobit}, {helpb ml}{p_end} 164 | -------------------------------------------------------------------------------- /bioprobit/bioprobit_d2.ado: -------------------------------------------------------------------------------- 1 | *! version 1.11 17Aug2007 2 | #delimit ; 3 | 4 | program define bioprobit_d2, eclass; 5 | 6 | local NC1 = $NC1; 7 | local NC1_1 = `NC1'-1; 8 | local NC2 = $NC2; 9 | local NC2_1 = `NC2'-1; 10 | local K1 = 4+$END; 11 | local K2 = `K1'-1+`NC1'; 12 | local N = `K1'-1+`NC1_1'+`NC2_1'; 13 | 14 | forvalues i = 1/`N' {; local grad "`grad' g`i'"; }; 15 | 16 | args todo b lnf g negH `grad'; 17 | quietly {; 18 | 19 | tempvar p1_b p2_b lnf1; 20 | tempname r rho gamma d dtanh_dr drho_dr drho_dgamma rr drr1_dr d2tanh_dr2; 21 | 22 | mleval `p1_b' = `b' , eq(1); 23 | mleval `p2_b' = `b' , eq(2); 24 | mleval `r' = `b' , eq(3) scalar; 25 | if $END mleval `gamma' = `b' , eq(4) scalar; 26 | else scalar `gamma' = 0; 27 | forvalues k = 1/`NC1_1' {; 28 | tempname c`k'1; 29 | mleval `c`k'1' = `b' , eq(`=`K1'-1+`k'') scalar; 30 | }; 31 | forvalues k = 1/`NC2_1' {; 32 | tempname c`k'2; 33 | mleval `c`k'2' = `b' , eq(`=`K1'-1+`NC1_1'+`k'') scalar; 34 | }; 35 | 36 | scalar `rr' = 1/sqrt(1+(`gamma')^2+2*`gamma'*tanh(`r')); 37 | scalar `rho' = (tanh(`r')+`gamma')*`rr'; 38 | 39 | tempvar cut1 cut1_1 cut2 cut2_1; 40 | tempname CUT1 CUT2; 41 | 42 | local neginf = minfloat(); 43 | local posinf = maxfloat(); 44 | if (`NC1_1' > 1) matrix `CUT1' = `neginf' \ `c11' \ J(`NC1_1'-1,1,.) \ `posinf'; 45 | else matrix `CUT1' = `neginf' \ `c11' \ `posinf'; 46 | if (`NC2_1' > 1) matrix `CUT2' = `neginf' \ `c12' \ J(`NC2_1'-1,1,.) \ `posinf'; 47 | else matrix `CUT2' = `neginf' \ `c12' \ `posinf'; 48 | forvalues k = 2/`NC1_1' {; 49 | matrix `CUT1'[`k'+1,1] = `CUT1'[`k',1]+`c`k'1'^2; 50 | }; 51 | forvalues k = 2/`NC2_1' {; 52 | matrix `CUT2'[`k'+1,1] = `CUT2'[`k',1]+`c`k'2'^2; 53 | }; 54 | 55 | generate double `cut1' = `CUT1'[$ML_y1+1,1]; 56 | generate double `cut1_1' = `CUT1'[$ML_y1 ,1]; 57 | generate double `cut2' = `CUT2'[$ML_y2+1,1]; 58 | generate double `cut2_1' = `CUT2'[$ML_y2 ,1]; 59 | 60 | local arg11 "(`cut1' -`p1_b')"; local arg21 "(`rr'*(`cut2' -`p2_b'-`gamma'*`p1_b'))"; local cond11 "$ML_y1"; local cond21 "$ML_y2"; 61 | local arg12 "(`cut1_1'-`p1_b')"; local arg22 "(`rr'*(`cut2' -`p2_b'-`gamma'*`p1_b'))"; local cond12 "$ML_y1-1"; local cond22 "$ML_y2"; 62 | local arg13 "(`cut1' -`p1_b')"; local arg23 "(`rr'*(`cut2_1'-`p2_b'-`gamma'*`p1_b'))"; local cond13 "$ML_y1"; local cond23 "$ML_y2-1"; 63 | local arg14 "(`cut1_1'-`p1_b')"; local arg24 "(`rr'*(`cut2_1'-`p2_b'-`gamma'*`p1_b'))"; local cond14 "$ML_y1-1"; local cond24 "$ML_y2-1"; 64 | 65 | forvalues i = 1/4 {; 66 | tempvar q`i'; 67 | generate double `q`i''= binorm(`arg1`i'',`arg2`i'',`rho'); 68 | }; 69 | 70 | generate double `lnf1' = `q1'-`q2'-`q3'+`q4'; 71 | replace `lnf1' = 0.1D-60 if(`lnf1'<=0); 72 | mlsum `lnf' = log(`lnf1'); 73 | 74 | if (`todo'==0 | `lnf'==.) exit; // calculate first derivatives 75 | 76 | scalar `d' = 1/sqrt(1-(`rho')^2); 77 | scalar `dtanh_dr' = 4*exp(2*`r')/((1+exp(2*`r'))^2); 78 | scalar `drho_dr' = (`rr'-`gamma'*`rho'*(`rr')^2)*`dtanh_dr'; 79 | scalar `drr1_dr' = -`gamma'*(`rr')^2*`dtanh_dr'; 80 | scalar `drho_dgamma' = `rr'*(1-(`rho')^2); 81 | 82 | forvalues i = 1/4 {; 83 | tempvar dF1`i' dF2`i' dF3`i' d1q`i' d3q`i' d`K2'q`i'; 84 | 85 | generate double `dF1`i'' = normden(`arg1`i'')*norm(`d'*(`arg2`i'' - `rho'*`arg1`i'')); 86 | generate double `dF2`i'' = normden(`arg2`i'')*norm(`d'*(`arg1`i'' - `rho'*`arg2`i'')); 87 | generate double `dF3`i'' = exp(-0.5*((`arg1`i'')^2+(`arg2`i'')^2-2*`rho'*`arg1`i''*`arg2`i'')*`d'^2)/(2*_pi*sqrt(1-(`rho')^2)); 88 | 89 | generate double `d1q`i'' = - `dF1`i'' - `gamma'*`rr'*`dF2`i''; 90 | local d2q`i' = "(-`dF2`i''*`rr')"; 91 | generate double `d3q`i'' = `dF2`i''*`arg2`i''*`drr1_dr'+`dF3`i''*`drho_dr'; 92 | if $END {; 93 | tempvar d4q`i'; 94 | generate double `d4q`i'' = -(`rho'*`arg2`i''+`p1_b')*`rr'*`dF2`i''+`dF3`i''*`drho_dgamma'; 95 | }; 96 | local d`K1'q`i' = "`dF1`i''"; 97 | generate double `d`K2'q`i'' = `rr'*`dF2`i''; 98 | forvalues k = 2/`NC1_1' {; 99 | local eqn = `K1'-1+`k'; 100 | tempvar d`eqn'q`i'; 101 | generate double `d`eqn'q`i'' = cond(`cond1`i''>=`k', `dF1`i''*2*`c`k'1', 0); 102 | }; // k 103 | forvalues k = 2/`NC2_1' {; 104 | local eqn = `K1'-1+`NC1_1'+`k'; 105 | tempvar d`eqn'q`i'; 106 | generate double `d`eqn'q`i'' = cond(`cond2`i''>=`k', `rr'*`dF2`i''*2*`c`k'2', 0); 107 | }; // k 108 | }; // i 109 | 110 | capture matrix drop `g'; 111 | forvalues i = 1/`N'{; 112 | tempvar g`i'i; 113 | generate double `g`i'i' = `d`i'q1'-`d`i'q2'-`d`i'q3'+`d`i'q4'; 114 | replace `g`i'' = `g`i'i'/`lnf1' ; 115 | mlvecsum `lnf' `g`i'' = `g`i'', eq(`i'); 116 | matrix `g' = (nullmat(`g'), `g`i''); 117 | }; // i 118 | 119 | if (`todo'==1 | `lnf'==. ) exit; // calculate second derivative 120 | 121 | scalar `d2tanh_dr2' = 8*exp(2*`r')*(1-exp(2*`r'))/(1+exp(2*`r'))^3; 122 | 123 | forvalues i = 1/4 {; 124 | tempvar d1_1q`i' d1_2q`i' d1_3q`i' d1_4q`i' d1_5q`i' 125 | d2_2q`i' d2_3q`i' d2_4q`i' d2_5q`i' 126 | d3_3q`i' d3_4q`i' d3_5q`i' 127 | d4_4q`i' d4_5q`i' 128 | d5_5q`i'; 129 | 130 | tempvar dF1_1`i' dF1_3`i' dF2_2`i' dF2_3`i' dF3_3`i'; 131 | generate double `dF1_1`i''=-(`arg1`i''*`dF1`i''+`rho'*`dF3`i''); 132 | local dF1_2`i' ="`dF3`i''"; 133 | generate double `dF1_3`i''=`dF3`i''*(`rho'*`arg2`i''-`arg1`i'')*`d'^2; 134 | 135 | generate double `dF2_2`i''=-(`arg2`i''*`dF2`i''+`rho'*`dF3`i''); 136 | generate double `dF2_3`i''=`dF3`i''*(`rho'*`arg1`i''-`arg2`i'')*`d'^2; 137 | 138 | generate double `dF3_3`i''=`dF3`i''*(`arg1`i''*`arg2`i''+`rho'-`rho'*(`arg1`i''^2+`arg2`i''^2-2*`rho'*`arg1`i''*`arg2`i'')*`d'^2)*`d'^2; 139 | 140 | tempvar dA2_dgamma; 141 | generate double `dA2_dgamma' = -`rr'*(`rho'*`arg2`i''+`p1_b'); 142 | 143 | 144 | generate double `d1_1q`i''= -(`dF1_1`i''+2*`rr'*`gamma'*`dF1_2`i''+(`gamma'*`rr')^2*`dF2_2`i''); 145 | generate double `d1_2q`i''= -(`dF1_2`i''+ `rr'*`gamma'*`dF2_2`i'')*`rr'; 146 | generate double `d1_3q`i''= (`dF1_2`i''+ `rr'*`gamma'*`dF2_2`i'')*`arg2`i''*`drr1_dr' + 147 | (`dF1_3`i''+ `rr'*`gamma'*`dF2_3`i'')*`drho_dr'+ 148 | `rr'*`gamma'*`dF2`i''*`drr1_dr'; 149 | if $END generate double `d1_4q`i''= (`dF1_2`i''+ `rr'*`gamma'*`dF2_2`i'')*`dA2_dgamma'+ 150 | (`dF1_3`i''+ `rr'*`gamma'*`dF2_3`i'')*`drho_dgamma'+ 151 | (1-`rho'*`rr'*`gamma')*`dF2`i''*`rr'; 152 | 153 | generate double `d1_`K1'q`i''= `dF1_1`i''+ `rr'*`gamma'*`dF1_2`i''; 154 | 155 | generate double `d2_2q`i''= -(`rr')^2*`dF2_2`i''; 156 | 157 | generate double `d2_3q`i'' = (`dF2_2`i''*`arg2`i''*`drr1_dr'+`dF2_3`i''*`drho_dr'+ `dF2`i''*`drr1_dr')*`rr'; 158 | if $END generate double `d2_4q`i'' = (`dF2_2`i''*`dA2_dgamma'+ `dF2_3`i''*`drho_dgamma'-`dF2`i''*`rr'*`rho')*`rr'; 159 | generate double `d2_`K1'q`i''= `rr'*`dF1_2`i''; 160 | 161 | generate double `d3_3q`i'' =-((`dF2_2`i''*`arg2`i''*`drr1_dr'+`dF2_3`i''*`drho_dr')*`arg2`i''*`drr1_dr'+ 162 | (`dF2_3`i''*`arg2`i''*`drr1_dr'+`dF3_3`i''*`drho_dr')*`drho_dr'+ 163 | 3*`arg2`i''*(`drr1_dr')^2*`dF2`i''+ 164 | (`rho'*`drr1_dr'+2*`drho_dr')*`drr1_dr'*`dF3`i''); 165 | 166 | if $END generate double `d3_4q`i''=-((`dF2_2`i''*`arg2`i''*`drr1_dr'+`dF2_3`i''*`drho_dr')*`dA2_dgamma'+ 167 | (`dF2_3`i''*`arg2`i''*`drr1_dr'+`dF3_3`i''*`drho_dr')*`drho_dgamma'+ 168 | `dF2`i''*(`drr1_dr'*`dA2_dgamma'-`arg2`i''*`rr'*`rho'*`drr1_dr'-`arg2`i''*`rr'*`drho_dr')+ 169 | `dF3`i''*(`drr1_dr'*`drho_dgamma'-2*`rho'*`rr'*`drho_dr')); 170 | 171 | generate double `d3_`K1'q`i''= -(`dF1_2`i''*`arg2`i''*`drr1_dr'+`dF1_3`i''*`drho_dr'); 172 | 173 | if $END generate double `d4_4q`i''= -((`dF2_2`i''*`dA2_dgamma'+`dF2_3`i''*`drho_dgamma')*`dA2_dgamma'+ 174 | (`dF2_3`i''*`dA2_dgamma'+`dF3_3`i''*`drho_dgamma')*`drho_dgamma'- 175 | `dF2`i''*`rr'*(2*`rho'*`dA2_dgamma'+`arg2`i''*`drho_dgamma')- 176 | `dF3`i''*3*`rr'*`rho'*`drho_dgamma'); 177 | if $END generate double `d4_5q`i''=-(`dF1_2`i''*`dA2_dgamma'+`dF1_3`i''*`drho_dgamma'); 178 | 179 | generate double `d`K1'_`K1'q`i''=- `dF1_1`i''; 180 | 181 | forvalues k = 2/`NC1_1' {; 182 | local eqn = `K1'-1+`k'; 183 | forvalues l = 1/`K1' {; 184 | tempvar d`l'_`eqn'q`i'; 185 | generate double `d`l'_`eqn'q`i'' = cond(`cond1`i''>=`k', `d`l'_`K1'q`i''*2*`c`k'1', 0); 186 | }; 187 | 188 | forvalues kk = 2/`NC2_1' {; 189 | local eqn2 = `K1'-1+`NC1_1'+`kk'; 190 | tempvar d`eqn'_`eqn2'q`i'; 191 | generate double `d`eqn'_`eqn2'q`i'' = cond((`cond1`i''>=`k' & `cond2`i''>=`kk'), -`d2_`K1'q`i''*4*`c`k'1'*`c`kk'2', 0); 192 | }; // kk 193 | }; // k 194 | 195 | forvalues k = 2/`NC2_1' {; 196 | local eqn = `K1'-1+`NC1_1'+`k'; 197 | tempvar d1_`eqn'q`i' d2_`eqn'q`i' d3_`eqn'q`i' d`K1'_`eqn'q`i'; 198 | generate double `d1_`eqn'q`i'' = cond(`cond2`i''>=`k', -`d1_2q`i''*2*`c`k'2', 0); 199 | generate double `d2_`eqn'q`i'' = cond(`cond2`i''>=`k', -`d2_2q`i''*2*`c`k'2', 0); 200 | generate double `d3_`eqn'q`i'' = cond(`cond2`i''>=`k', -`d2_3q`i''*2*`c`k'2', 0); 201 | if $END {; 202 | tempvar d4_`eqn'q`i'; 203 | generate double `d4_`eqn'q`i'' = cond(`cond2`i''>=`k', -`d2_4q`i''*2*`c`k'2', 0); 204 | }; 205 | generate double `d`K1'_`eqn'q`i'' = cond(`cond2`i''>=`k', -`d2_`K1'q`i''*2*`c`k'2', 0); 206 | }; // k 207 | }; // i 208 | 209 | forvalues k = 1/`K1' {; 210 | forvalues l = `k'/`K1' {; 211 | tempvar g`k'_`l'i; 212 | generate double `g`k'_`l'i'= `d`k'_`l'q1'-`d`k'_`l'q2'-`d`k'_`l'q3'+`d`k'_`l'q4'+`g`k'i'*`g`l'i'/`lnf1'; 213 | }; // l 214 | }; //k 215 | 216 | local g1_`K2'i = "-`g1_2i'"; 217 | local g2_`K2'i = "-`g2_2i'"; 218 | local g3_`K2'i = "-`g2_3i'"; 219 | if $END local g4_`K2'i = "-`g2_4i'"; 220 | local g`K1'_`K2'i = "-`g2_`K1'i'"; 221 | local g`K2'_`K2'i = " `g2_2i'"; 222 | 223 | forvalues k = 2/`NC1_1' {; 224 | local eqn = `K1'-1+`k'; 225 | forvalues l = 1/`K1' {; 226 | tempvar g`l'_`eqn'i; 227 | generate double `g`l'_`eqn'i' = `d`l'_`eqn'q1'-`d`l'_`eqn'q2'-`d`l'_`eqn'q3'+`d`l'_`eqn'q4'+`g`l'i'*`g`eqn'i'/`lnf1'; 228 | }; // l 229 | tempvar g`eqn'_`eqn'i; 230 | generate double `g`eqn'_`eqn'i' =(`d`K1'_`eqn'q1'-`d`K1'_`eqn'q2'-`d`K1'_`eqn'q3'+`d`K1'_`eqn'q4')*2*`c`k'1'-`g`eqn'i'/(`c`k'1')+`g`eqn'i'^2/`lnf1'; 231 | local g`eqn'_`K2'i = "-`g2_`eqn'i'"; 232 | local k_1 = `k'+1; 233 | forvalues kk = `k_1'/`NC1_1' {; 234 | local eqn1 = `K1'-1+`kk'; 235 | tempvar g`eqn'_`eqn1'i; 236 | generate double `g`eqn'_`eqn1'i' = (`d`K1'_`eqn1'q1'-`d`K1'_`eqn1'q2'-`d`K1'_`eqn1'q3'+`d`K1'_`eqn1'q4')*2*`c`k'1'+`g`eqn'i'*`g`eqn1'i'/`lnf1'; 237 | }; // kk 238 | forvalues kk = 2/`NC2_1' {; 239 | local eqn2 = `K1'-1+`NC1_1'+`kk'; 240 | tempvar g`eqn'_`eqn2'i; 241 | generate double `g`eqn'_`eqn2'i' = `d`eqn'_`eqn2'q1'-`d`eqn'_`eqn2'q2'-`d`eqn'_`eqn2'q3'+`d`eqn'_`eqn2'q4'+ `g`eqn'i'*`g`eqn2'i'/`lnf1'; 242 | }; // kk 243 | }; // k 244 | forvalues k = 2/`NC2_1' {; 245 | local eqn = `K1'-1+`NC1_1'+`k'; 246 | forvalues l = 1/`K1' {; 247 | tempvar g`l'_`eqn'i; 248 | generate double `g`l'_`eqn'i' =`d`l'_`eqn'q1'-`d`l'_`eqn'q2'-`d`l'_`eqn'q3'+`d`l'_`eqn'q4'+`g`l'i'*`g`eqn'i'/`lnf1'; 249 | }; // l 250 | 251 | tempvar g`eqn'_`eqn'i; 252 | generate double `g`eqn'_`eqn'i' = -(`d2_`eqn'q1'-`d2_`eqn'q2'-`d2_`eqn'q3'+`d2_`eqn'q4')*2*`c`k'2'-`g`eqn'i'/(`c`k'2')+`g`eqn'i'^2/`lnf1'; 253 | local g`K2'_`eqn'i = "-`g2_`eqn'i'"; 254 | local k_1 = `k'+1; 255 | forvalues kk = `k_1'/`NC2_1' {; 256 | local eqn2 = `K1'-1+`NC1_1'+`kk'; 257 | tempvar g`eqn'_`eqn2'i; 258 | generate double `g`eqn'_`eqn2'i' =-(`d2_`eqn2'q1'-`d2_`eqn2'q2'-`d2_`eqn2'q3'+`d2_`eqn2'q4')*2*`c`k'2'+`g`eqn'i'*`g`eqn2'i'/`lnf1'; 259 | }; // kk 260 | }; // k 261 | 262 | replace `g3_3i' = `g3_3i'-(`g3i'/`dtanh_dr')*`d2tanh_dr2'; 263 | 264 | capture matrix drop `negH'; 265 | forvalues k = 1/`N' {; 266 | tempname H; 267 | tempname g`k'_`k'; 268 | mlmatsum `lnf' `g`k'_`k'' = `g`k'_`k'i'/`lnf1', eq(`k'); 269 | forvalues l = 1/`N' {; 270 | if (`l'>`k') {; 271 | tempname g`k'_`l'; 272 | mlmatsum `lnf' `g`k'_`l'' = `g`k'_`l'i'/`lnf1', eq(`k',`l'); 273 | matrix `H' =nullmat(`H'),`g`k'_`l''; 274 | }; 275 | else {; 276 | matrix `H' =nullmat(`H'),`g`l'_`k'''; 277 | }; 278 | }; // l 279 | matrix `negH' = nullmat(`negH') \ `H'; 280 | }; // k 281 | }; // end queitly 282 | end; // end program bioprobite_d2 283 | 284 | -------------------------------------------------------------------------------- /bioprobit/bioprobit_p.ado: -------------------------------------------------------------------------------- 1 | *! version 1.0 27Aug2006 Z. Sajaia 2 | #delim ; 3 | 4 | program define bioprobit_p; 5 | version 9; 6 | 7 | quietly {; 8 | //stolen from bipr_p.ado 9 | syntax [anything] [if] [in] [, SCores * ]; 10 | if ~missing(`"`scores'"') {; 11 | ml_score `0'; 12 | exit; 13 | }; 14 | 15 | local myopts XB1 XB2 STDP1 STDP2 Outcome(string); 16 | 17 | _pred_se "`myopts'" `0'; 18 | if (`s(done)') exit; 19 | local vtyp `s(typ)'; 20 | local varn `s(varn)'; 21 | local 0 `"`s(rest)'"'; 22 | 23 | syntax [if] [in] [, `myopts' noOFFset]; 24 | 25 | local type `xb1'`xb2'`stdp1'`stdp2'; 26 | 27 | marksample touse; 28 | 29 | local y1 : word 1 of `e(depvar)'; 30 | local y2 : word 2 of `e(depvar)'; 31 | 32 | if (missing("`type'") & missing("`outcome'")) {; 33 | local outcome "#1, #1"; 34 | noisily display as text "(option outcome(#1, #1) assumed)"; 35 | }; 36 | 37 | if e(df_m) != 0 {; 38 | tempvar txb1 txb2; 39 | _predict double `txb1' if `touse', eq(#1) `offset'; 40 | _predict double `txb2' if `touse', eq(#2) `offset'; 41 | }; 42 | else {; 43 | if (~missing("`e(offset1)'") & missing("`offset1'")) local txb1 `e(offset1)'; 44 | else local txb1 0; 45 | if (~missing("`e(offset2)'") & missing("`offset1'")) local txb2 `e(offset1)'; 46 | else local txb2 0; 47 | }; // if 48 | 49 | if ~missing("`outcome'") {; 50 | // get cuttoff points 51 | _parse comma out1 out2: outcome; 52 | local out2 : subinstr local out2 "," "", all; 53 | local out2 = trim("`out2'"); 54 | forvalues i = 1/2 {; 55 | tempvar sy; 56 | egen `sy' = group(`y`i''); 57 | capture confirm numeric variable `y`i''; scalar rc = _rc; 58 | if substr(`"`out`i''"',1,1)=="#" {; 59 | local out = substr(`"`out`i''"',2,.); 60 | Chk confirm integer number `out'; 61 | Chk assert `out' >= 1; 62 | quietly tabulate `y`i'' if `touse'; 63 | if (`out'>r(r)) {; 64 | noisily display as error "there is no outcome #`out'" _n 65 | "there are only `r(r)' categories in `y`i''"; 66 | exit 111; 67 | }; 68 | else local c =`out'; 69 | }; 70 | else {; 71 | if (rc) local out`i' "`"`out`i''"'"; 72 | if missing(`out`i'') local c=-99; 73 | else {; 74 | Chk confirm number `out`i''; 75 | summarize `sy' if `touse' & `y`i''==`out`i'', meanonly; local c = r(mean); 76 | if missing(`c') {; 77 | noisily display as error "`y`i'' never takes value `out`i''"; 78 | exit 111; 79 | }; 80 | }; 81 | }; 82 | if (`c' == -99) {; 83 | local cut`i' = maxfloat(); 84 | local cut`i'_1 = minfloat(); 85 | }; 86 | else {; 87 | quietly summarize `sy' if `touse'; 88 | if (`c'==r(max)) local cut`i' = maxfloat(); 89 | else local cut`i' = _b[cut`i'`c':_cons]; 90 | local --c; 91 | if (`c'==0) local cut`i'_1 = minfloat(); 92 | else local cut`i'_1 = _b[cut`i'`c':_cons]; 93 | }; 94 | }; // end forvalues 95 | 96 | tempname gamma rr rho; 97 | 98 | capture scalar `gamma' = _b[gamma:_cons]; 99 | if _rc scalar `gamma' = 0; // it's a SUR model. 100 | 101 | scalar `rr' = 1/sqrt(1+(`gamma')^2+2*`gamma'*tanh(_b[athrho:_cons])); 102 | scalar `rho' = (tanh(_b[athrho:_cons])+`gamma')*`rr'; 103 | 104 | generate `vtyp' `varn' = binorm(`cut1' -`txb1',`rr'*(`cut2' -`txb2'-`gamma'*`txb1'),`rho') 105 | - binorm(`cut1_1'-`txb1',`rr'*(`cut2' -`txb2'-`gamma'*`txb1'),`rho') 106 | - binorm(`cut1' -`txb1',`rr'*(`cut2_1'-`txb2'-`gamma'*`txb1'),`rho') 107 | + binorm(`cut1_1'-`txb1',`rr'*(`cut2_1'-`txb2'-`gamma'*`txb1'),`rho'); 108 | exit; 109 | }; // outcome 110 | 111 | if ("`type'" == "xb1") {; 112 | generate `vtyp' `varn' = `txb1'; 113 | label variable `varn' "linear prediction of `y1'"; 114 | exit; 115 | }; 116 | if ("`type'" == "xb2") {; 117 | generate `vtyp' `varn' = `txb2'; 118 | label variable `varn' "linear prediction of `y2'"; 119 | exit; 120 | }; 121 | if ("`type'" == "stdp1") {; 122 | _predict `vtyp' `varn', stdp eq(#1) `offset', if `touse'; 123 | label variable `varn' "S.E. of prediction of `y1'"; 124 | exit; 125 | }; 126 | if ("`type'" == "stdp2") {; 127 | _predict `vtyp' `varn', stdp eq(#2) `offset', if `touse'; 128 | label variable `varn' "S.E. of prediction of `dy2'"; 129 | exit; 130 | }; 131 | error 198; 132 | }; // quietly 133 | end; 134 | 135 | //stolen from ologit_p.ado 136 | program define Chk; 137 | capture `0'; 138 | if _rc {; 139 | noisily display as error "outcome() must be pair of either values of `e(depvar)'," 140 | _n "or #1, #2, ..."; 141 | exit 111; 142 | }; 143 | end; 144 | 145 | -------------------------------------------------------------------------------- /egen_inequal.pkg: -------------------------------------------------------------------------------- 1 | v 3 2 | d egen_inequal - egen extention functions for inequality and poverty measures. 3 | d Distribution-Date: 08sep2006 4 | 5 | f egen_inequal/_gcov.ado 6 | f egen_inequal/_gentropy.ado 7 | f egen_inequal/_gfgt.ado 8 | f egen_inequal/_ggini.ado 9 | f egen_inequal/_ghalf.ado 10 | f egen_inequal/_ginequal.ado 11 | f egen_inequal/_gkakwani.ado 12 | f egen_inequal/_gmehran.ado 13 | f egen_inequal/_gmld.ado 14 | f egen_inequal/_gpiesch.ado 15 | f egen_inequal/_grmd.ado 16 | f egen_inequal/_gsdl.ado 17 | f egen_inequal/_gtheil.ado 18 | f egen_inequal/_gxtile.ado 19 | f egen_inequal/egen_inequal.hlp 20 | -------------------------------------------------------------------------------- /egen_inequal/_gcov.ado: -------------------------------------------------------------------------------- 1 | program define _gcov 2 | if strpos("`0'", ",")==0 { 3 | local t "," 4 | } 5 | _ginequal `0' `t' index(cov) 6 | end // program _gcov 7 | 8 | 9 | -------------------------------------------------------------------------------- /egen_inequal/_gentropy.ado: -------------------------------------------------------------------------------- 1 | program define _gentropy 2 | if strpos("`0'", ",")==0 { 3 | local t "," 4 | } 5 | _ginequal `0' `t' index(entropy) 6 | end // program _gentropy 7 | 8 | 9 | -------------------------------------------------------------------------------- /egen_inequal/_gfgt.ado: -------------------------------------------------------------------------------- 1 | # delimit ; 2 | program define _gfgt; 3 | syntax newvarname =/exp [if] [in] [, BY(varlist) 4 | Weights(varname) 5 | Alpha(numlist max=1 >=0)] 6 | PLine(string); 7 | 8 | tempname touse poor z nk vk fik; 9 | 10 | capture confirm number `pline'; 11 | if _rc {; 12 | confirm numeric variable `pline'; 13 | local z `pline'; 14 | }; 15 | else generate `z'=`pline'; 16 | 17 | local inc : subinstr local exp "(" ""; 18 | local inc : subinstr local inc ")" ""; 19 | local inc = trim("`inc'"); 20 | confirm variable `inc'; 21 | 22 | marksample touse, novarlist; 23 | markout `touse' `inc' `by'; 24 | 25 | if ("`by'" == "") {; 26 | tempvar by; 27 | generate byte `by' = 1; 28 | }; 29 | else local bystr "by(`by')"; 30 | 31 | if ("`weights'" == "") {; 32 | tempvar wi; 33 | generate byte `wi' = 1; 34 | }; 35 | else {; 36 | tempname wi; 37 | local wi `weights'; 38 | }; 39 | if ("`alpha'"=="") {; 40 | local alpha = 0; 41 | }; 42 | quietly {;{; 43 | sum `inc' [w=`wi'] if `touse'; 44 | local sumwi = _result(2); 45 | 46 | generate `poor' = (`inc' < `z') if `touse'; 47 | egen `nk' = sum(`wi') if `touse', by(`by'); 48 | generate `vk' = `nk'/`sumwi' if `touse'; 49 | generate `fik' = `wi'/`nk' if `touse'; 50 | egen `varlist' = sum(`fik'*`poor'*((`z'-`inc')/`z')^(`alpha')) if `touse', by(`by'); 51 | local index = cond(`alpha'==0, "(headcount)", 52 | cond(`alpha'==1, "(poverty gap)", 53 | cond(`alpha'==2, "(poverty severity)","(fgt(`alpha'))"))); 54 | label variable `varlist' "`index' `inc' `bystr'"; 55 | }; // quietly 56 | end; // program _ggft 57 | -------------------------------------------------------------------------------- /egen_inequal/_ggini.ado: -------------------------------------------------------------------------------- 1 | program define _ggini 2 | if strpos("`0'", ",")==0 { 3 | local t "," 4 | } 5 | _ginequal `0' `t' index(gini) 6 | end // program _ggini 7 | 8 | 9 | -------------------------------------------------------------------------------- /egen_inequal/_ghalf.ado: -------------------------------------------------------------------------------- 1 | program define _ghalf 2 | if strpos("`0'", ",")==0 { 3 | local t "," 4 | } 5 | _ginequal `0' `t' index(half) 6 | end // program _ghalf 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /egen_inequal/_ginequal.ado: -------------------------------------------------------------------------------- 1 | // Last modified: 2-May-2016 2 | # delimit ; 3 | program define _ginequal; 4 | syntax newvarname =/exp [if] [in] [, BY(varlist) Weights(varname) INDex(string)]; 5 | 6 | local inc : subinstr local exp "(" ""; 7 | local inc : subinstr local inc ")" ""; 8 | local inc = trim("`inc'"); 9 | confirm variable `inc'; 10 | 11 | tempvar badinc touse nk nk0 fik meanyk meanyk0 lnm pyk i tmptmp N; 12 | 13 | quietly {; // quietly 14 | marksample touse , novarlist; 15 | count if `inc' <= 0 & `touse'; 16 | local ct = r(N); 17 | if `ct' > 0 {; 18 | noisily display; 19 | noisily display in blue "Warning: `inc' has `ct' values <= 0." _c; 20 | noisily display in blue " Not used in calculations"; 21 | }; 22 | generate `badinc' = 0; 23 | replace `badinc' =. if (`inc' <= 0 | `inc'==.); 24 | markout `touse' `badinc'; 25 | 26 | markout `touse' `by', strok; 27 | 28 | if ("`by'" == "") {; 29 | tempvar by; 30 | generate byte `by' = 1; 31 | }; 32 | else {; 33 | unab by : `by'; 34 | local bystr "by(`by')"; 35 | }; 36 | 37 | generate `i' = .; 38 | generate `tmptmp' = .; 39 | 40 | gsort -`touse' + `by' `inc'; 41 | 42 | if ("`weights'" == "") {; 43 | tempvar wi; 44 | generate byte `wi' = 1; 45 | bys `by': replace `i' = _n; 46 | }; 47 | else {; 48 | tempname wi; 49 | local wi `weights'; 50 | bys `by': replace `tmptmp' = sum(`wi'); 51 | replace `i' = ((2*`tmptmp')-`wi'+1)/2; 52 | }; 53 | if ("`index'" == "") {; 54 | local index "gini"; 55 | }; 56 | 57 | count if (`wi' < 0); 58 | if (_result(1) > 0) {; 59 | noisily display as error "`wi' contains negative values"; 60 | exit 402; 61 | }; 62 | 63 | egen `nk' = sum(`wi') if `touse', by(`by'); 64 | generate `fik' = `wi'/`nk' if `touse'; 65 | egen `meanyk' = sum(`fik'*`inc') if `touse', by(`by'); 66 | 67 | // relative mean deviation 68 | if ("`index'" == "rmd") {; 69 | egen `varlist' = sum(`fik'*abs(`inc'-`meanyk')/(2*`meanyk')) if `touse', by(`by'); 70 | }; 71 | // coefficient of variation 72 | else if ("`index'" == "cov") {; 73 | egen `varlist' = sum(`wi'*(`inc'-`meanyk')^2) if `touse', by(`by'); 74 | egen `N' = count(`inc') if `touse', by(`by'); 75 | replace `varlist' = sqrt(`varlist'/`nk'*`N'/(`N'-1))/`meanyk' if `touse'; 76 | }; 77 | // standard deviation of logs 78 | else if ("`index'" == "sdl") {; 79 | egen `lnm' = sum(`fik'*ln(`inc')) if `touse', by(`by'); 80 | egen `varlist' = sum(`wi'*(ln(`inc')-`lnm')^2) if `touse', by(`by'); 81 | egen `N' = count(`inc') if `touse', by(`by'); 82 | replace `varlist' = sqrt(`varlist'/`nk'*`N'/(`N'-1)) if `touse'; 83 | }; 84 | // gini 85 | else if ("`index'" == "gini") {; 86 | sort `by' `inc'; 87 | by `by': generate `pyk' = (2*sum(`wi') - `wi' + 1)/(2 * `nk' ) if `touse'; 88 | egen `varlist' = sum(`fik'*(2/`meanyk')*`pyk'*(`inc'-`meanyk')) if `touse', by(`by'); 89 | }; 90 | // mehran 91 | else if ("`index'" == "mehran") {; 92 | egen `varlist' = sum(3*`wi'*`i'*(2*`nk'+1 -`i')*(`inc' - `meanyk')/(`nk'^3*`meanyk')) if `touse', by(`by'); 93 | }; 94 | // piesch 95 | else if ("`index'" == "piesch") {; 96 | egen `varlist' = sum(`wi'*`i'*(`i'-1)*(`inc'-`meanyk')) if `touse', by(`by'); 97 | replace `varlist' = 3*`varlist'/(2*`nk'^3*`meanyk') if `touse'; 98 | }; 99 | // kakwani 100 | else if ("`index'" == "kakwani") {; 101 | egen `varlist' = sum(`wi'*((`inc'^2+`meanyk'^2)^0.5)) if `touse', by(`by'); 102 | replace `varlist' = (1/(2-2^0.5))*((`varlist'/(`nk'*`meanyk')-2^0.5)) if `touse'; 103 | }; 104 | else {; 105 | egen `nk0' = sum(`wi') if `touse' & `inc' > 0, by(`by'); 106 | egen `meanyk0' = sum(`fik'*`inc') if `touse' & `inc' > 0, by(`by'); 107 | // theil 108 | if ("`index'" == "theil") {; 109 | egen `varlist' = sum(`wi'*((`inc'/`meanyk0')*(log(`inc'/`meanyk0')))/`nk0') if `touse', by(`by'); 110 | }; 111 | // mean log deviation 112 | else if ("`index'" == "mld") {; 113 | egen `varlist' = sum(`wi'*(log(`meanyk0'/`inc'))/`nk0') if `touse', by(`by'); 114 | }; 115 | // GE -1 116 | else if ("`index'" == "entropy") {; 117 | egen `varlist' = sum(`wi'*(`meanyk0'/`inc')) if `touse', by(`by'); 118 | replace `varlist' = ((`varlist'/`nk0')-1)/2 if `touse'; 119 | }; 120 | // GE 2 121 | else if ("`index'" == "half") {; 122 | egen `varlist' = sum(`wi'*((`inc'/`meanyk0')^2)) if `touse', by(`by'); 123 | replace `varlist' = ((`varlist'/`nk0')-1)/2 if `touse'; 124 | }; 125 | }; 126 | label variable `varlist' "(`index') `inc' `bystr'"; 127 | }; // end quietly 128 | end; // program _ginequal 129 | -------------------------------------------------------------------------------- /egen_inequal/_gkakwani.ado: -------------------------------------------------------------------------------- 1 | program define _gkakwani 2 | if strpos("`0'", ",")==0 { 3 | local t "," 4 | } 5 | _ginequal `0' `t' index(kakwani) 6 | end // program _gkakwani 7 | 8 | 9 | -------------------------------------------------------------------------------- /egen_inequal/_gmehran.ado: -------------------------------------------------------------------------------- 1 | program define _gmehran 2 | if strpos("`0'", ",")==0 { 3 | local t "," 4 | } 5 | _ginequal `0' `t' index(mehran) 6 | end // program _gmehran 7 | 8 | 9 | -------------------------------------------------------------------------------- /egen_inequal/_gmld.ado: -------------------------------------------------------------------------------- 1 | program define _gmld 2 | if strpos("`0'", ",")==0 { 3 | local t "," 4 | } 5 | _ginequal `0' `t' index(mld) 6 | end // program _gmld 7 | 8 | 9 | -------------------------------------------------------------------------------- /egen_inequal/_gpiesch.ado: -------------------------------------------------------------------------------- 1 | program define _gpiesch 2 | if strpos("`0'", ",")==0 { 3 | local t "," 4 | } 5 | _ginequal `0' `t' index(piesch) 6 | end // program _gpiesch 7 | 8 | 9 | -------------------------------------------------------------------------------- /egen_inequal/_grmd.ado: -------------------------------------------------------------------------------- 1 | program define _grmd 2 | if strpos("`0'", ",")==0 { 3 | local t "," 4 | } 5 | _ginequal `0' `t' index(rmd) 6 | end // program _grmd 7 | 8 | 9 | -------------------------------------------------------------------------------- /egen_inequal/_gsdl.ado: -------------------------------------------------------------------------------- 1 | program define _gsdl 2 | if strpos("`0'", ",")==0 { 3 | local t "," 4 | } 5 | _ginequal `0' `t' index(sdl) 6 | end // program _gsdl 7 | 8 | 9 | -------------------------------------------------------------------------------- /egen_inequal/_gtheil.ado: -------------------------------------------------------------------------------- 1 | program define _gtheil 2 | if strpos("`0'", ",")==0 { 3 | local t "," 4 | } 5 | _ginequal `0' `t' index(theil) 6 | end // program _gtheil 7 | 8 | 9 | -------------------------------------------------------------------------------- /egen_inequal/egen_inequal.hlp: -------------------------------------------------------------------------------- 1 | {smcl} 2 | {* 19Mar2006}{...} 3 | {cmd:help egen_inequal} 4 | {hline} 5 | 6 | {title:Title} 7 | 8 | {p2colset 5 17 19 2}{...} 9 | {p2col :{hi:[D] egen} {hline 2}}Extensions to generate inequality and poverty measures{p_end} 10 | {p2colreset}{...} 11 | 12 | {title:Syntax for FGT poverty measures} 13 | 14 | {p 8 14 2} 15 | {cmd:egen} [{it:{help type}}] {newvar} {cmd:=} fgt({it:{help varname}}) {ifin} 16 | {cmd:,} {opt pl:ine(varname or #)} [ {opth by:(varlist)} {opth w:eights(varname)} 17 | {opt a:lpha(#)} ] 18 | 19 | {title:Syntax for inequality measures} 20 | 21 | {p 8 14 2} 22 | {cmd:egen} [{it:{help type}}] {newvar} {cmd:=} inequal({it:{help varname}}) {ifin} 23 | [{cmd:,} {opth by:(varlist)} {opth w:eights(varname)} {opth ind:ex(egen_inequal##indlist:fcn)} ] 24 | 25 | {p 4 4 2} Or alternatively,{p_end} 26 | 27 | {p 8 14 2} 28 | {cmd:egen} [{it:{help type}}] {newvar} {cmd:=} fcn({it:{help varname}}) {ifin} 29 | [{cmd:,} {opth by:(varlist)} {opth w:eights(varname)}] 30 | 31 | {title:Description} 32 | 33 | {p 4 4 2} 34 | The set of programs in this package uses {cmd:egen} to create {newvar} of the optionally 35 | specified storage {it:{help type}} equal to one of the standard inequality and 36 | Foster-Greer-Thorbecke (FGT) poverty measures. 37 | 38 | {p 4 4 2} 39 | (Note that egen may change the sort order of your data). 40 | 41 | {title:Options} 42 | 43 | {dlgtab:Common options} 44 | 45 | {p 4 4 2} 46 | The option {opth by:(varlist)} means that computations are performed 47 | separately for each group defined by {varlist}. 48 | 49 | {p 4 4 2} 50 | The option {opth w:eights(varname)} means that computation of poverty and inequality 51 | measures are performed with weights taken from the values of variable {varname}. 52 | 53 | {dlgtab:Poverty function options} 54 | 55 | {p 4 4 2} 56 | {opt pl:ine(varname or #)} is a non-optional parameter that defines the poverty line. 57 | The poverty line could be specified either as a variable or as a numeric value. 58 | 59 | {p 4 4 2} 60 | {opt a:lpha(#)} is an optional positive argument that specifies the parameter in the FGT-type 61 | poverty measures. {opt a:lpha(0)} corresponds to poverty rate; {opt a:lpha(1)} corresponds to 62 | average poverty gap index; {opt a:lpha(2)} corresponds to severity of poverty index. By default 63 | (alpha is not specified) the poverty rate is generated. 64 | 65 | {dlgtab:Inequality function options} 66 | 67 | {p 4 4 2} 68 | An optional parameter {opt ind:ex(fcn)} specifies the inequality function {it:fcn} that 69 | generates a new variable. The Gini inequality index is the default option (index is not specified). 70 | Alternative syntax uses the type of inequality index directly to generate the new variable. 71 | No {opt ind:ex(fcn)} parameter needs to be specified in that case. See examples for explanation. 72 | 73 | {p 4 4 2}list of inequality functions {it:fcn}:{p_end} 74 | 75 | {synoptset 10 tabbed}{...} 76 | {marker indlist}{...} 77 | {synoptline} 78 | {synopt :{cmd: rmd}} the relative mean deviation {p_end} 79 | {synopt :{cmd: cov}} the coefficient of variation {p_end} 80 | {synopt :{cmd: sdl}} the standard deviation of logs {p_end} 81 | {synopt :{cmd: gini}} the Gini index {p_end} 82 | {synopt :{cmd: mehran}} the Mehran index {p_end} 83 | {synopt :{cmd: piesch}} the Piesch index {p_end} 84 | {synopt :{cmd: kakwani}} the Kakwani index {p_end} 85 | {synopt :{cmd: theil}} Theil entropy index {p_end} 86 | {synopt :{cmd: mld}} the mean log deviation {p_end} 87 | {synopt :{cmd: entropy}} generalized entropy measure (GE -1) {p_end} 88 | {synopt :{cmd: half}} generalized entropy measure (GE 2) {p_end} 89 | {synoptline} 90 | 91 | {title:Examples} 92 | 93 | {p 4 4 2}To create a new variable {it:gini_reg} containing the value of gini coefficient for the distribution of 94 | household expenditure {it:expend} by {it:region} using household weight {it:hhweight}: {p_end} 95 | 96 | {p 8 8 1}{cmd:. egen gini_reg = inequal(expend), by(region) weight(hhweight) index(gini)} {p_end} 97 | 98 | {p 6 6 1}Alternative syntax: {p_end} 99 | 100 | {p 8 8 1}{cmd:. egen gini_reg = gini(expend), by(region) weight(hhweight)} {p_end} 101 | 102 | {p 4 4 2}To create a new variable {it:mld_ur} containing the value of mean log deviation for the distribution of 103 | household expenditure {it:expend} by {it:urbrur} (urban and rural) using household weight {it:hhweight}: {p_end} 104 | 105 | {p 8 8 1}{cmd:. egen mld_ur = inequal(expend), by(urbrur) weight(hhweight) ind(mld)} {p_end} 106 | 107 | {p 6 6 1}Alternative syntax: {p_end} 108 | 109 | {p 8 8 1}{cmd:. egen mld_ur = mld(expend), by(urbrur) w(hhweight)} {p_end} 110 | 111 | {p 4 4 2}To create a new variable {it:p1_reg} containing value of the average poverty gap 112 | corresponding to the poverty line specified in variable {it:rpl} for the distribution of 113 | household expenditure {it:expend} by {it:region} using household weight {it:hhweight}: {p_end} 114 | 115 | {p 8 8 1}{cmd:. egen p1_reg = fgt(expend), by(region) weight(hhweight) pl(rpl) alpha(1)} {p_end} 116 | 117 | {p 4 4 2}To create a new variable {it:p0_reg} containing the poverty rate 118 | corresponding to the poverty line equal to {it:1234} for the distribution of 119 | household expenditure {it:expend} by {it:region} using household weight {it:hhweight}: {p_end} 120 | 121 | {p 8 8 1}{cmd:. egen p0_reg = fgt(expend), by(region) w(hhweight) pl(1234) a(0)} {p_end} 122 | 123 | {title:Authors} 124 | 125 | {p 4 4 2}M. Lokshin and Z. Sajaia (DECRG, The World Bank). 126 | 127 | {title:Also see} 128 | 129 | {psee} 130 | Online: help for {helpb egen}, if installed: {helpb rspread}, {helpb inequal}, 131 | {helpb ineqdeco}, {helpb poverty}, {helpb povdeco} 132 | {p_end} 133 | -------------------------------------------------------------------------------- /elmo.pkg: -------------------------------------------------------------------------------- 1 | v 3 2 | d ELMO - Growth Incidence Curve. 3 | d Distribution-Date: 6July2010 4 | 5 | f elmo/elmo.ado 6 | f elmo/elmo.hlp 7 | -------------------------------------------------------------------------------- /elmo/elmo.ado: -------------------------------------------------------------------------------- 1 | *! version 1.21 15November2023 Z. Sajaia 2 | // version 1.20 6July2010 Z. Sajaia 3 | 4 | program elmo, sortpreserve rclass 5 | 6 | version 9.2 7 | syntax varname(numeric) [aweight fweight] [if/] [in] [, BYgroup(varname numeric) nomax nofail keepzeros] 8 | 9 | local inc "`varlist'" 10 | 11 | tempvar wi 12 | if missing(`"`weight'"') generate byte `wi' = 1 13 | else generate `wi' `exp' 14 | 15 | marksample touse 16 | if ~missing(`"`bygroup'"') markout `touse' `bygroup' 17 | 18 | 19 | local cnames "ge0 ge1 ge2" 20 | 21 | quietly { 22 | if ~missing("`bygroup'") { 23 | levelsof `bygroup' if `touse', local(lvls) 24 | if missing("`max'") & (`: word count `lvls'' > 60) { 25 | noisily display as error "`bygroup' must have no more then 60 categories" 26 | if missing("`fail'") exit 134 27 | else exit 0 28 | } 29 | foreach lvl of local lvls { 30 | if int(`lvl') != `lvl' | (`lvl' < 0) { 31 | noisily display as error "`bygroup' contains non-integer or negative values" 32 | if missing("`fail'") exit 459 33 | else exit 0 34 | } 35 | } 36 | } 37 | 38 | count if `inc' < 0 & `touse' 39 | noisily if r(N) > 0 { 40 | display " " 41 | display as txt "Warning: `inc' has `r(N)' values < 0." _c 42 | display as txt " Not used in calculations" 43 | } 44 | replace `touse' = 0 if `inc' < 0 45 | 46 | if "`keepzeros'" == "" { 47 | count if `inc' == 0 & `touse' 48 | noisily if r(N) > 0 { 49 | display " " 50 | display as txt "Warning: `inc' has `r(N)' values = 0." _c 51 | display as txt " Not used in calculations" 52 | } 53 | replace `touse' = 0 if `inc' == 0 54 | } 55 | 56 | count if `touse' 57 | if r(N) == 0 error 2000 58 | else local N = r(N) 59 | 60 | tempname meany sumwi 61 | summarize `inc' [w = `wi'] if `touse', meanonly 62 | scalar `meany' = r(mean) 63 | scalar `sumwi' = r(sum_w) 64 | return scalar meany = `meany' 65 | 66 | tempvar fi loginc 67 | generate double `fi' = `wi' / `sumwi' if `touse' 68 | generate double `loginc' = log(`inc') if `touse' 69 | 70 | tempname ge0 ge1 ge2 71 | summarize `loginc' [w = `fi'] if `touse', meanonly 72 | scalar `ge0' = log(`meany') - r(sum) 73 | 74 | summarize `loginc' [w = `fi' * `inc'] if `touse', meanonly 75 | scalar `ge1' = r(sum)/`meany' - log(`meany') 76 | 77 | summarize `inc' [w = `fi' * `inc'] if `touse', meanonly 78 | scalar `ge2' = (r(sum)/`meany'^2 - 1)/2 79 | 80 | local rnames "Overall" 81 | 82 | tempname b R F 83 | matrix `b' = `ge0', `ge1', `ge2' 84 | matrix `F' = `N', `N', `N' 85 | 86 | matrix colnames `b' = `cnames' 87 | matrix rownames `b' = `rnames' 88 | matrix colnames `F' = `cnames' 89 | matrix rownames `F' = `rnames' 90 | 91 | matrix `R' = `b' 92 | 93 | return matrix overall = `b' 94 | return matrix overall_F = `F' 95 | 96 | if missing(`"`bygroup'"') { 97 | noisily matlist `R', format("%10.6f") 98 | exit 99 | } 100 | 101 | //========================== SUBGROUP DECOMPOSITIONS 102 | 103 | tempvar fik 104 | generate double `fik' = . 105 | 106 | tempname mi logmi M F GE0 GE1 GE2 SUMW 107 | foreach lvl of local lvls { 108 | summarize `inc' [aw = `wi'] if `touse' & `bygroup' == `lvl', meanonly 109 | scalar `mi' = r(mean) 110 | scalar `logmi' = log(`mi') 111 | matrix `M' = nullmat(`M') \ `mi' 112 | matrix `SUMW' = nullmat(`SUMW') \ (r(sum_w)/`sumwi') 113 | 114 | replace `fik' = `wi' / r(sum_w) if `touse' & `bygroup' == `lvl' 115 | 116 | summarize `loginc' [aw = `fik'] if `touse' & `bygroup' == `lvl', meanonly 117 | matrix `GE0' = nullmat(`GE0') \ (`logmi' - r(sum)) 118 | 119 | summarize `loginc' [aw = `fik' * `inc'] if `touse' & `bygroup' == `lvl', meanonly 120 | matrix `GE1' = nullmat(`GE1') \ (r(sum)/`mi' - `logmi') 121 | 122 | summarize `inc' [aw = `fik' * `inc'] if `touse' & `bygroup' == `lvl', meanonly 123 | matrix `GE2' = nullmat(`GE2') \ ((r(sum) / (`mi'^2) - 1)/2) 124 | 125 | matrix `F' = nullmat(`F') \ r(N) 126 | } 127 | return matrix mean = `M', copy 128 | return matrix sumw = `SUMW', copy 129 | 130 | matrix `b' = `GE0', `GE1', `GE2' 131 | matrix colnames `b' = `cnames' 132 | matrix rownames `b' = `lvls' 133 | 134 | matrix `F' = `F', `F', `F' 135 | matrix colnames `F' = `cnames' 136 | matrix rownames `F' = `lvls' 137 | 138 | 139 | matrix `R' = `R' \ `b' 140 | local rnames "`rnames' `lvls'" 141 | 142 | return matrix bygroup = `b' 143 | return matrix bygroup_F = `F' 144 | return local levels `lvls' 145 | 146 | // ===================== GE index within-group inequalities 147 | 148 | tempname W0 W1 W2 149 | 150 | matrix `W0' = `SUMW'' * `GE0' 151 | mata : st_matrix("`W1'", st_matrix("`GE1'"):*st_matrix("`M'")) 152 | matrix `W1' = `SUMW'' * `W1' / `meany' 153 | mata : st_matrix("`W2'", st_matrix("`GE2'"):*st_matrix("`M'"):*st_matrix("`M'")) 154 | matrix `W2' = `SUMW'' * `W2' / `meany'^2 155 | 156 | matrix `b' = `W0', `W1', `W2' 157 | matrix colnames `b' = `cnames' 158 | matrix rownames `b' = "within" 159 | 160 | matrix `R' = `R' \ `b' 161 | local rnames "`rnames' within" 162 | 163 | return matrix within = `b' 164 | 165 | // ================== GE index between-group inequalities 166 | 167 | tempvar negtouse 168 | generate byte `negtouse' = - `touse' 169 | 170 | sort `negtouse' `inc', stable 171 | 172 | capture drop _cummInc _cummPop _sortID 173 | generate double _cummInc = sum(`inc'*`wi') if `touse' 174 | generate double _cummPop = sum(`wi') if `touse' 175 | 176 | generate _sortID = _n if `touse' 177 | 178 | tempname logmeany 179 | scalar `logmeany' = log(`meany') 180 | CalcBetweenPart if `touse', meany(`meany') logmeany(`logmeany') sumw(`SUMW') m(`M') 181 | 182 | matrix `b' = r(Result) 183 | matrix colnames `b' = `cnames' 184 | matrix rownames `b' = "between" 185 | 186 | matrix `R' = `R' \ `b' 187 | local rnames "`rnames' between" 188 | 189 | return matrix between = `b' 190 | 191 | // ================== ELMO index between-group inequalities with the original "packing order" 192 | 193 | tempname SORTED_SUMW 194 | mata : st_matrix("`SORTED_SUMW'", sort((st_matrix("`SUMW'"), st_matrix("`M'")), 2)[., 1]) 195 | CalcBetweenPart if `touse', meany(`meany') logmeany(`logmeany') sumw(`SORTED_SUMW') totw(`sumwi') 196 | 197 | matrix `b' = r(Result) 198 | matrix colnames `b' = `cnames' 199 | matrix rownames `b' = "elmo_current" 200 | 201 | matrix `R' = `R' \ `b' 202 | local rnames "`rnames' elmo_current" 203 | return matrix elmo_current = `b' 204 | 205 | // =============== ELMO maximum between-group inequality 206 | if missing("`max'") { 207 | tempname ELMO_max 208 | local cmdline "CalcBetweenPart if `touse', meany(`meany') logmeany(`logmeany') sumw(`SORTED_SUMW') totw(`sumwi')" 209 | mata : mata_ELMOPermutations("`ELMO_max'", "`SUMW'", "`cmdline'", "`SORTED_SUMW'") 210 | matrix colnames `ELMO_max' = `cnames' 211 | matrix rownames `ELMO_max' = "elmo_max" 212 | 213 | matrix `R' = `R' \ `ELMO_max' 214 | local rnames "`rnames' elmo_max" 215 | return matrix elmo_max = `ELMO_max' 216 | window manage maintitle reset 217 | } 218 | } // quietly 219 | 220 | matrix rownames `R' = `rnames' 221 | matlist `R' 222 | return matrix R = `R' 223 | return scalar N = `N' 224 | capture drop _cummInc _cummPop _sortID 225 | end 226 | 227 | program define CalcBetweenPart, rclass 228 | syntax [if], meany(string) logmeany(string) sumw(string) [m(string) totw(string)] 229 | 230 | tempname b B0 B1 B2 logM cummPop sumInc 231 | 232 | if missing("`m'") { 233 | tempname m 234 | scalar `cummPop' = 0 235 | scalar `sumInc' = 0 236 | 237 | forvalues i=1/`=rowsof(`sumw')' { 238 | scalar `cummPop' = `cummPop' + `sumw'[`i', 1] 239 | 240 | summarize _sortID `if' & _cummPop<=`cummPop'*`totw', meanonly 241 | local n = r(max) 242 | tempname inc delta 243 | if `cummPop' < 0 { 244 | scalar `delta' = (_cummInc[`=`n'+1'] - _cummInc[`n'])* /// 245 | (`cummPop'*`totw' - _cummPop[`n']) / (_cummPop[`=`n'+1'] - _cummPop[`n']) 246 | } 247 | else scalar `delta' = 0 248 | 249 | scalar `inc' = _cummInc[`n'] + `delta' 250 | matrix `m' = nullmat(`m') \ (`inc' - `sumInc') / (`sumw'[`i', 1]*`totw') 251 | scalar `sumInc' = `inc' 252 | } 253 | } 254 | mata: st_matrix("`logM'", log(st_matrix("`m'"))) 255 | 256 | mata : st_matrix("`B1'", st_matrix("`m'"):*st_matrix("`logM'")) 257 | matrix `B1' = `sumw'' * `B1' / `meany' - `logmeany' 258 | 259 | matrix `B0' = `logmeany' - `sumw''*`logM' 260 | 261 | mata : st_matrix("`B2'", st_matrix("`m'"):*st_matrix("`m'")) 262 | matrix `B2' = (`sumw''*`B2'/(`meany'*`meany') - 1)/2 263 | 264 | matrix `b' = `B0', `B1', `B2' 265 | 266 | return matrix Result = `b' 267 | end // program CalcBetweenPart 268 | 269 | mata 270 | 271 | void function mata_ELMOPermutations(string scalar matname, string scalar sumw, string scalar cmdline, string scalar sorted_sumw) 272 | // sumw - matrix name with the original groupweights 273 | // cmdline will contain something like: CalcBetweenPart `inc' [w] [if], sumw(`sorted_sumw') 274 | // sorted_sumw - temporary name for the matrix to be created 275 | { 276 | maxval0 = 0 277 | maxval1 = 0 278 | maxval2 = 0 279 | 280 | V1 = st_matrix(sumw) 281 | 282 | info = cvpermutesetup(V1) 283 | i = 0 284 | i_max = factorial(rows(st_matrix(sorted_sumw))) 285 | 286 | while ((V1=cvpermute(info)) != J(0,1,.)) { 287 | st_matrix(sorted_sumw, V1) 288 | stata(cmdline) 289 | GE = st_matrix("r(Result)") 290 | 291 | if (maxval0 <= GE[1, 1]) { 292 | maxval0 = GE[1, 1] 293 | } 294 | if (maxval1 <= GE[1, 2]) { 295 | maxval1 = GE[1, 2] 296 | } 297 | if (maxval2 <= GE[1, 3]) { 298 | maxval2 = GE[1, 3] 299 | } 300 | i = i + 1 301 | p = round(i / i_max * 100, .1) 302 | stata(`"window manage maintitle ""' + strofreal(i) + " out of " + strofreal(i_max) + " : " + strofreal(p) + `"%""') 303 | } 304 | st_matrix(matname, (maxval0, maxval1, maxval2)) 305 | } 306 | 307 | end 308 | -------------------------------------------------------------------------------- /elmo/elmo.hlp: -------------------------------------------------------------------------------- 1 | {smcl} 2 | {* 16ONov2023}{...} 3 | {cmd:help elmo} 4 | {hline} 5 | 6 | {title:Title} 7 | 8 | {p2colset 5 20 22 2}{...} 9 | {p2col :{hi: elmo} {hline 2}}Sub-Group Inequality Decomposition{p_end} 10 | {p2colreset}{...} 11 | 12 | 13 | {title:Syntax} 14 | 15 | {p 8 18 2} 16 | {cmd:elmo} 17 | {varname} 18 | {ifin} 19 | {weight} 20 | [{cmd:,} 21 | {it:options}] 22 | 23 | {synoptset 16 tabbed}{...} 24 | {synopthdr} 25 | {synoptline} 26 | {syntab:Main} 27 | {synopt:{opt by:group(varname)}}numeric variable that defines groups{p_end} 28 | {synopt:{opt nomax}}do not compute the maximum between-group inequality{p_end} 29 | {synopt:{opt nofail}}do not return an error code when exiting{p_end} 30 | {synopt:{opt keepzeros}}include observations with 0s in the {it:varname}; default is to drop them{p_end} 31 | 32 | 33 | {p 4 6 2} 34 | {opt aweight}s and {opt fweight}s are allowed; see {help weight}.{p_end} 35 | 36 | {title:Description} 37 | 38 | {pstd} 39 | {opt elmo} implements sub-group inequality decomposition measure as proposed by Elbers, Lanjouw, Mistiaen, and Ozler. 40 | 41 | 42 | {title:References} 43 | {p 1 14} {hi: Chris Elbers, Peter Lanjouw, Johan A. Mistiaen and Berk Ozler (2005) "Re-Interpreting Sub-Group Inequality Decompositions", {it:World Bank Policy Research Working Paper} No. 3687} 44 | 45 | -------------------------------------------------------------------------------- /fastgini.pkg: -------------------------------------------------------------------------------- 1 | v 3 2 | d 'fastgini': Fast algorithm for calculation of Gini coefficient and it's jackknife standard errors. 3 | d 4 | d Distribution-Date: 15Feb2007 5 | d 6 | d Author: Zurab Sajaia, World Bank 7 | d Support: email zsajaia@hotmail.com 8 | 9 | f fastgini/fastgini.ado 10 | f fastgini/fastgini.hlp 11 | -------------------------------------------------------------------------------- /fastgini/fastgini.ado: -------------------------------------------------------------------------------- 1 | *! version 1.0 15feb2007 Z. Sajaia 2 | 3 | program define fastgini, rclass 4 | syntax varname [if] [in] [pweight fweight], /// 5 | [bin(numlist integer min=1 max=1 >0) noCHeck jk Level(cilevel)] 6 | version 9.2 7 | 8 | if missing("`check'") { 9 | tempvar goodx 10 | 11 | marksample touse 12 | quietly count if `varlist' <= 0 & `touse' 13 | local ct = r(N) 14 | if `ct' > 0 { 15 | display 16 | display as text "Warning: `varlist' has `ct' values <= 0. Not used in calculations" 17 | } 18 | quietly generate `goodx' = 1 if (`varlist' > 0) 19 | markout `touse' `goodx' `varlist' 20 | quietly count if `touse' 21 | local ct = r(N) 22 | if `ct'==0 error 2000 23 | } 24 | 25 | if ~missing("`exp'") { 26 | tempvar w 27 | generate double `w' `exp' 28 | } 29 | local vlist "`varlist' `w'" 30 | 31 | tempname gini gini_jk se mse 32 | 33 | 34 | mata: mata_callfastgini() 35 | 36 | if missing("`jk'") display as text _n "Gini coefficient = " as result %9.7f `gini' 37 | else { 38 | display _newline 39 | display as text "Gini coefficient" _c 40 | display as text _col(56) "Number of obs" _col(70) "= " as res %7.0f `ct' 41 | display "" 42 | 43 | tempname Tab t p ll ul z 44 | 45 | .`Tab' = ._tab.new, col(7) lmargin(0) ignore(.b) 46 | // column 1 2 3 4 5 6 7 47 | .`Tab'.width 13 |11 11 9 9 12 12 48 | .`Tab'.titlefmt . %11s %12s %7s . %24s . 49 | .`Tab'.strfmt . %11s . . . . . 50 | .`Tab'.pad . 2 2 1 3 3 3 51 | .`Tab'.numfmt . %9.0g %9.0g %8.2f %5.3f %9.0g %9.0g 52 | .`Tab'.strcolor . result . . . . . 53 | 54 | .`Tab'.sep, top 55 | .`Tab'.titles "`varlist'" /// 1 56 | "Gini" /// 2 57 | "Std. Err." /// 3 58 | "t" /// 4 59 | "P>|t|" /// 5 60 | `"[`=strsubdp("`level'")'% Conf. Interval]"' "" // 6 7 61 | 62 | 63 | scalar `t' = `gini'/`se' 64 | scalar `p' = 2*norm(-abs(`t')) 65 | scalar `z' = invnorm((100+`level')/200) 66 | scalar `ll' = `gini' - `se'*`z' 67 | scalar `ul' = `gini' + `se'*`z' 68 | 69 | .`Tab'.sep 70 | .`Tab'.row "" `gini' `se' `t' `p' `ll' `ul' 71 | .`Tab'.sep, bottom 72 | 73 | return scalar mse = `mse' 74 | return scalar gini_jk = `gini_jk' 75 | return scalar se = `se' 76 | } 77 | return scalar gini = `gini' 78 | end 79 | 80 | mata: 81 | 82 | void function mata_callfastgini() 83 | { 84 | RET = fastgini(st_data( .,tokens(st_local("vlist")), 85 | st_local("touse")), st_local("weight"), 86 | (st_local("jk")!=""), 87 | strtoreal(st_local("bin"))) 88 | 89 | st_numscalar(st_local("gini"), RET[1,1]) 90 | st_numscalar(st_local("se"), RET[2,1]) 91 | st_numscalar(st_local("gini_jk"), RET[3,1]) 92 | st_numscalar(st_local("mse"), RET[4,1]) 93 | } 94 | 95 | real matrix function fastgini(real matrix X, | string scalar weight, real scalar jk, real scalar bin) 96 | { 97 | colvector Xi, WX 98 | 99 | N = rows(X) 100 | 101 | if (missing(bin)) { // do 'regular' gini 102 | R = X[order(X,1),] 103 | M = N 104 | } 105 | else { // 'fast' gini, do aggregation 106 | M = bin 107 | MM = minmax(X[.,1]) 108 | bsize = (MM[1,2] - MM[1,1])/M 109 | R = ((1::M):*bsize:+MM[1, 1], J(M, 1, 0)) 110 | Xi = trunc((X[.,1]:-MM[1,1]):/bsize:-0.01):+1 111 | 112 | if (cols(X)==1) { 113 | for (i=1; i<=N; ++i) { // no weights 114 | R[Xi[i], 2] = R[Xi[i], 2] + 1 115 | } 116 | } 117 | else { 118 | for (i=1; i<=N; ++i) { 119 | R[Xi[i], 2] = R[Xi[i], 2] + X[i,2] 120 | } 121 | } 122 | } 123 | Xi=. 124 | C = cols(R) 125 | R = R \ J(1, C, .) 126 | 127 | if (C == 1) { 128 | sumW=N 129 | } 130 | else { 131 | WX =R[., 1]:*R[., 2] 132 | sumW=quadcolsum(R)[1,2] 133 | } 134 | 135 | sumWX=0 136 | if (missing(jk)) { //----------------- JUST GINI --------------------------------- 137 | if (C == 1) { 138 | sumWX=quadcross(R, ((M::1):*2:-1) \ .)*0.5 139 | sumX=quadcolsum(R) 140 | } 141 | else { 142 | sumX =0 143 | for (i=1; i<=M; ++i) { 144 | sumWX = sumWX + R[i, 2]*(sumX+WX[i]*0.5) 145 | sumX = sumX + WX[i] 146 | } 147 | } 148 | g = 1- 2*sumWX/sumW/sumX 149 | RET = g 150 | } 151 | else { // ---------------------- GINI PLUS JACKKNIFE STANDARD ERRORS ------------------- 152 | 153 | SUM = J(M+1, 2, 0) // W x*W 154 | 155 | if (C == 1) { 156 | sumWX=quadcross(R, ((M::1):*2:-1) \ .)*0.5 157 | SUM[., 1] = (0::M) 158 | for (i=1; i<=M; ++i) { 159 | SUM[i+1, 2] = SUM[i, 2] + R[i,1] 160 | } 161 | } 162 | else { 163 | for (i=1; i<=M; ++i) { 164 | sumWX = sumWX + R[i, 2]*(SUM[i, 2]+WX[i]*0.5) 165 | SUM[i+1, 1] = SUM[i, 1] + R[i, 2] 166 | SUM[i+1, 2] = SUM[i, 2] + WX[i] 167 | } 168 | } 169 | sumW=SUM[M+1, 1] 170 | sumX=SUM[M+1, 2] 171 | g = 1- 2*sumWX/sumW/sumX 172 | RET = g 173 | 174 | if (weight == "pweight") { 175 | G= 1:-2:*(sumWX:-(sumW:-R[.,2]:*0.5:-SUM[.,1]):*WX:-R[.,2]:*SUM[.,2]):/(sumW:-R[.,2]):/(sumX:-WX) 176 | 177 | MV=quadmeanvariance(G) 178 | V1=MV[2,1]+M/(M-1)*(MV[1,1]-g)^2 179 | 180 | RET = RET \ sqrt(MV[2,1]*(M-1)^2/M) 181 | RET = RET \ MV[1,1] 182 | RET = RET \ sqrt(V1*(M-1)^2/M) 183 | } 184 | else { 185 | G= (- SUM[.,2]-R[.,1]:*(-SUM[.,1]:+(sumW-0.5)):+sumWX):/(R[.,1]:-SUM[M+1, 2]):*(2/(sumW - 1)):+1 186 | if (C == 1) MV=quadmeanvariance(G) 187 | else MV=quadmeanvariance(G, R[.,2]) 188 | V1=MV[2,1]+sumW/(sumW-1)*(MV[1,1]-g)^2 189 | 190 | RET = RET \ sqrt(MV[2,1]*(sumW-1)^2/sumW) 191 | RET = RET \ MV[1,1] 192 | RET = RET \ sqrt(V1*(sumW-1)^2/sumW) 193 | } 194 | } 195 | return(RET) 196 | } 197 | 198 | end 199 | 200 | -------------------------------------------------------------------------------- /fastgini/fastgini.hlp: -------------------------------------------------------------------------------- 1 | {smcl} 2 | {* 13Feb2007}{...} 3 | {cmd:help fastgini} 4 | {hline} 5 | 6 | {title:Title} 7 | 8 | {p2colset 8 20 22 2}{...} 9 | {p2col :{hi:fastgini} {hline 2}}Fast algorithm for calculation of Gini coefficient and it's jackknife standard 10 | errors{p_end} {p2colreset}{...} 11 | 12 | 13 | {title:Syntax} 14 | 15 | {p 8 16 2} 16 | {cmd:fastgini} {varname} {ifin} {weight} [{cmd:,} {opt bin(#)} {opt jk} {opt L:evel(#)} {opt noch:eck}] 17 | 18 | 19 | {p 4 6 2} 20 | {opt pweight}s and {opt fweight}s are allowed; see {help weight}.{p_end} 21 | 22 | 23 | {title:Description} 24 | 25 | {pstd} 26 | {cmd:fastgini} calculates the Gini coefficient for either unit-level or aggregated level data. Optionally 27 | it returns the jackknife estimates of the standard error. {cmd:fastgini} uses a fast optimized algorithm 28 | that could be especially useful when calculating the Gini coefficient and it's standard errors for the large samples. 29 | The command implements algorithms for both exact and approximate calculation of the Gini coefficient. 30 | 31 | {dlgtab:Main} 32 | 33 | {phang} 34 | {opt bin(#)} set number of bins. Specifying this option can dramatically reduce the computation time when working with 35 | large datasets (1M+ obs). When {opt bin(#)} is specified {cmd:fastgini} uses approximation algorithm for Gini 36 | calculation. Specifying the sufficient number bins allows obtaining the approximation for the Gini at any desired level 37 | of precision. For example, on the dataset of 1,000,000 observations {opt bin(100,000)} will in most 38 | cases estimate computer-exact value of Gini. This calculation required significantly less computer time compared to the 39 | exact estimation of the Ginin on whole sample. 40 | 41 | {phang} 42 | {opt jk} estimate jackknife (leave-one-out) standard error of the Gini coefficient. An efficient method of calculating 43 | jackknife estimates involves only two (one to get the Gini coefficient itself and another for standard errors) 44 | runs through the data. 45 | 46 | {phang} 47 | {opt level(#)} set confidence level for the reported jackknife confidence intervals; default is {opt level(95)}. 48 | 49 | {phang} 50 | {opt nocheck} by default, non-positive values of {it:varname} are excluded from Gini calculations. Specifying {opt 51 | nocheck} skips the value check as well as ignores {ifin} conditions. The option can be useful to speed-up the 52 | execution if {cmd:fastgini} is used within loops. 53 | 54 | 55 | {title:Saved Results} 56 | 57 | {phang}{cmd:fastgini} saves in {cmd:r()}: 58 | 59 | {p 8 4 2}{cmd:r(gini)} calculated Gini coefficient; 60 | 61 | {phang} if {opt jk} option specified: 62 | 63 | {p 8 4 2}{cmd:r(se)} jackknife estimate for the standard error of the Gini; 64 | 65 | {p 8 4 2}{cmd:r(mse)} jackknife estimate for the mean standard error of the Gini; 66 | 67 | {p 8 4 2}{cmd:r(gini_jk)} jackknife estimate for the Gini. 68 | 69 | 70 | {title:Remarks} 71 | 72 | {phang}{cmd:fastgini} uses formula: 73 | 74 | i=N j=i 75 | SUM W_i*(SUM W_j*X_j - W_i*X_i/2) 76 | i=1 j=1 77 | G = 1 - 2* ---------------------------------- 78 | i=N i=N 79 | SUM W_i*X_i * SUM W_i 80 | i=1 i=1 81 | 82 | {pmore}where observations are sorted in ascending order of X. 83 | 84 | 85 | {phang}if {opt bin(M)} is specified, the data are aggregated into {it:M} equal-size bins, i.e. 86 | 87 | ~ 88 | X_i = (X_min + i * binsize) binsize = (X_max - X_min)/M 89 | 90 | ~ ~ ~ 91 | W_i = SUM W_j (if X_(i-1)<=X_j 1 { 13 | di as err "only one of vce, robust, cluster or svy can be specified" 14 | exit 198 15 | } 16 | 17 | quietly { 18 | 19 | if missing("`exp'") { 20 | tempvar wi 21 | gen byte `wi'=1 22 | } 23 | else { 24 | tempvar wi 25 | generate `wi' = `exp' 26 | } 27 | 28 | if !missing("`svy'") local svy svy : 29 | else local pwgt [pw=`wi'] 30 | 31 | unab varlist : `varlist' 32 | if `: word count `varlist'' == 1 local varlist `varlist' `varlist' 33 | tokenize `varlist' 34 | local y `1' 35 | local x `2' 36 | 37 | tempvar touse0 38 | marksample touse, novarlist 39 | generate byte `touse0' = `touse' 40 | markout `touse' `x' `y' `cluster' 41 | 42 | if missing("`keepnegatives'") replace `touse' = 0 if `y' < 0 // negative incomes are excluded by default 43 | if ~missing("`zeros'") replace `touse' = 0 if `y' == 0 // zeros are by default included ! 44 | 45 | count if `touse' 46 | if r(N) == 0 exit 2000 47 | 48 | tempvar cdfy negcdfynu xbyy 49 | 50 | * compute the cdf of y 51 | bysort `touse' `over' (`y' `x') : gen double `cdfy' = sum(`wi'*`touse') 52 | 53 | by `touse' `over' : replace `cdfy' = `cdfy' / `cdfy'[_N] 54 | 55 | gen double `negcdfynu' = -( 1 - `cdfy')^(`nu'-1) if `touse' 56 | gen double `xbyy' = `x'*`negcdfynu' if `touse' 57 | 58 | if !missing("`cluster'") local cluster cluster(`cluster') 59 | 60 | `svy' mean `x' `negcdfynu' `xbyy' if `touse' `pwgt', over( `over', nolabel ) /// 61 | `subpop' `vce' `robust' `cluster' 62 | * store some formatting stuff 63 | local evce `e(vce)' 64 | 65 | local evcetype `e(vcetype)' 66 | if !missing("`cluster'") { 67 | local eclustvar `e(clustvar)' 68 | local ecluster `e(cluster)' 69 | } 70 | if !missing("`score'") { 71 | gen double `score' = `xbyy'/_b[`x'] - _b[`xbyy']/(_b[`x']*_b[`x'])*`x' - `negcdfynu' 72 | } 73 | 74 | local e_over `e(over)' 75 | local e_N_over = e(N_over) 76 | local e_over_namelist `e(over_namelist)' 77 | local e_over_labels `"`e(over_labels)'"' 78 | if e(N_over) == 1 { 79 | local first : word 1 of `: colfullnames e(b)' 80 | local second : word 2 of `: colfullnames e(b)' 81 | local third : word 3 of `: colfullnames e(b)' 82 | qui nlcom (conc: `nu'*(_b[`third']/_b[`first']-_b[`second']) ), post 83 | } 84 | else { 85 | forvalues k=1/`e_N_over' { 86 | * generate an estimate for the `k'-th group 87 | local lab : word `k' of `e(over_namelist)' 88 | * produce a neat... or ugly?.. label for it 89 | if !missing("`speclabel'") { 90 | * parse over_labels 91 | local cats : word `k' of `e(over_labels)' 92 | local tolab = strtoname("`cats'") 93 | if length("`valuemask'")>0 { 94 | * tolab is _a_b ... 95 | tokenize `tolab', parse("_") 96 | local tolab 97 | while !missing("`1'") { 98 | if "`1'" == "_" local tolab `tolab'_ 99 | else { 100 | * replace the last length("`1'") symbols of `valuemask' with `1' 101 | local temp = substr("`valuemask'",1,length("`valuemask'")-length("`1'")) + "`1'" 102 | local tolab `tolab'`temp' 103 | } 104 | mac shift 105 | } 106 | } 107 | } 108 | else { 109 | local tolab _`k' 110 | } 111 | local tonlcom `tonlcom' (conc`tolab': `nu'*(_b[`xbyy':`lab']/_b[`x':`lab']-_b[`negcdfynu':`lab']) ) 112 | } 113 | nlcom `tonlcom', post 114 | } 115 | } 116 | 117 | ereturn local sortedby `y' 118 | ereturn local depvar `x' 119 | if !missing("`over'") ereturn local over `over' 120 | ereturn scalar nu = `nu' 121 | 122 | ereturn local predict gconc_p 123 | ereturn local cmd gconc 124 | ereturn local scorevar `score' 125 | 126 | ereturn local vce `evce' 127 | ereturn local vcetype `evcetype' 128 | if !missing("`cluster'") { 129 | ereturn local clustvar `eclustvar' 130 | ereturn local cluster `ecluster' 131 | } 132 | 133 | if !missing("`e_over'") { 134 | ereturn local over `"`e_over'"' 135 | ereturn local over_nolabel nolabel 136 | ereturn scalar N_over = `e_N_over' 137 | ereturn local over_namelist `e_over_namelist' 138 | ereturn local over_labels `e_over_labels' 139 | } 140 | 141 | } 142 | else { // replay 143 | if "`e(cmd)'"!="gconc" error 301 144 | syntax , [Level(cilevel)] 145 | } 146 | 147 | di _n "{txt}Generalized concentration coefficient" 148 | di "{txt}Sorted = {res}`e(sortedby)'{txt}; response variable = {res}`e(depvar)'" _c 149 | di "{txt}; shape parameter = {res}`e(nu)'" _n 150 | 151 | ereturn display , level(`level') 152 | 153 | end 154 | -------------------------------------------------------------------------------- /gconc/gconc.sthlp: -------------------------------------------------------------------------------- 1 | 2 | {smcl} 3 | {* *! version 1.0 11jan2010}{...} 4 | {cmd:help gconc} 5 | {hline} 6 | 7 | {title:Title} 8 | 9 | {p2colset 5 14 22 2}{...} 10 | {p2col :{cmd:gconc} {hline 2}}Generalized measures of concentraction{p_end} 11 | {p2colreset}{...} 12 | 13 | 14 | {title:Syntax} 15 | 16 | {p 8 16 2} 17 | {opt gconc} {it:incomevar} [{it:outcomevar}] {ifin} {weight} 18 | [{cmd:, nu(}{it:#}{cmd:) keepnegatives nozeros over(}{varlist}{cmd:)} 19 | {cmd:vce(}{help vce_option}{cmd:) robust svy} {cmdab:cl:uster(}{varlist}}{cmd:)} 20 | {cmd:subpop(}{it:subpopspec}{cmd:)} 21 | ] 22 | 23 | 24 | 25 | {title:Description} 26 | 27 | {pstd} 28 | {cmd:gconc} computes generalized measures of inequality and concentration including 29 | Gini, generalized Gini (S-Gini) and concentration indices. 30 | Probability weights (pweights) and importance weights (iweights) are allowed. 31 | If only {it:incomevar} is specified, the S-Gini coefficient 32 | is computed for this variable. If both {it:incomevar} and {it:outcomevar} 33 | are specified, then the coefficient of generalized concentration 34 | of {it:outcomevar} with respect to ranking on {it:incomevar} is computed. 35 | 36 | {title:Options} 37 | 38 | {synoptset} 39 | {synopthdr} 40 | {synoptline} 41 | {synopt:{cmd:nu(}{it:#}{cmd:)}}The shape parameter for generalized concentration coefficient. 42 | The default value is 2 corresponding to the ranks of {it:incomevar} 43 | entering linearly. 44 | 45 | {synopt:{cmd:keepnegatives}}Specifies whether the negative values of {it:incomevar} are allowed. 46 | 47 | {synopt:{cmd:nozeros}}Specifies whether the zero values of {it:incomevar} are allowed. 48 | By default, they are used in estimation. 49 | 50 | {synopt:{cmd:over(}{varlist}{cmd:)}}Requests estimation for separate subgroups in the sample. 51 | Note that generalized measures of concentration are not decomposable 52 | into subgroups in the strict technical sense: the total concentration is 53 | not equal to the sum of subgroup concentrations plus the between group 54 | concentration. 55 | 56 | {synopt:{cmd:vce(}{help vce_option}{cmd:)}}Specifies the method to compute standard errors. See {help vce_option}. 57 | 58 | {synopt:{cmd:robust}}Requests computation of the standard errors robust to heteroskedasticity in {it:incomvar}. 59 | 60 | {synopt:{cmdab:cl:uster(}{varlist}{cmd:)}}Requests computation of the standard errors robust to 61 | intraclass correlation due to {it:varlist}. 62 | 63 | {synopt:{cmd:svy}}Requests estimation that respects complex sampling designs. The resampling 64 | variance estimators {cmd:svy, vce(jackknife): gconc ...} and 65 | {cmd:svy, vce(brr): gconc ...} will work without this option. 66 | The linearized standard errors, {cmd:svy, vce(linearized)}, 67 | are not directly supported, but can be obtained with this option. 68 | 69 | {synopt:{cmd:subpop(}{it:subpopspec}{cmd:)}}For complex survey samples, {cmd:subpop} option 70 | should be specified to subset the data, rather than {cmd:if} condition. 71 | See {manlink SVY subpopulation estimation}. 72 | 73 | 74 | {title:Notes and examples} 75 | 76 | {pstd}To compute Gini coefficient and its standard error:{p_end} 77 | 78 | {phang2}{cmd:. sysuse nlsw88 }{p_end} 79 | {phang2}{cmd:. gconc wage}{p_end} 80 | 81 | {pstd}To compute Gini coefficient with the bootstrap standard error:{p_end} 82 | 83 | {phang2}{cmd:. sysuse nlsw88 }{p_end} 84 | {phang2}{cmd:. bootstrap, reps(200): gconc wage}{p_end} 85 | 86 | {pstd}To compute concentration coefficient for complex survey data:{p_end} 87 | 88 | {phang2}{cmd:. webuse nhanes2}{p_end} 89 | {phang2}{cmd:. gconc height weight, svy}{p_end} 90 | 91 | {pstd}To compute concentration coefficient with the jackknife standard errors:{p_end} 92 | 93 | {phang2}{cmd:. webuse nhanes2}{p_end} 94 | {phang2}{cmd:. svy, vce(jackknife): gconc height weight}{p_end} 95 | 96 | {pstd} 97 | Extensions of the Gini index that allow for different sensitivities in different 98 | parts of distribution were given by Yitzhaki (1983). 99 | Computations by {cmd:gconc} utilize the representations of the Gini 100 | method family of concentration measures given by Yitzhaki (1991). 101 | Sandstrom, Wretman and Walden (1988) report that linearization variance estimator 102 | of the Gini coefficient was biased by a factor of 10 in their simulations, and 103 | recommend using jackknife estimator. Jackknife is also the preferred estimatior 104 | in Yitzhaki (1991). However results in Barrett and Donald (2009) showed quite accurate 105 | performance of the linearization estimator. 106 | 107 | 108 | {title:References} 109 | 110 | {phang} 111 | Barrett, G. F., and Donald, S. G. (2009). 112 | Statistical Inference with Generalized Gini Indices of Inequality, Poverty, and Welfare. 113 | {it:Journal of Business and Economic Statistics}, {cmd:27} (1), 1-17, 114 | {browse "http://dx.doi.org/10.1198/jbes.2009.0001":doi:10.1198/jbes.2009.0001}. 115 | 116 | {phang} 117 | Sandstrom, A., Wretman, J. H., and Walden, B. (1988). Variance Estimators 118 | of the Gini Coefficient: Probability Sampling. 119 | {it:Journal of Business and Economic Statistics}, 120 | {cmd:6} (1), 113--119, {browse "http://www.jstor.org/stable/1391424"}. 121 | 122 | {phang}Yitzhaki, S. (1983). On an extension of the Gini inequality index. 123 | {it:International Economic Review}, {cmd:24} (3), 617--628, 124 | {browse "http://dx.doi.org/10.2307/2648789":doi:10.2307/2648789}. 125 | 126 | {phang}Yitzhaki, S. (1991). 127 | Calculating jackknife variance estimators for parameters of the gini method. 128 | {it:Journal of Business and Economic Statistics}, {cmd:9} (2), 235--239, 129 | {browse "http://www.jstor.org/stable/1391792"}. 130 | 131 | 132 | {title:Also see} 133 | 134 | {psee} 135 | Help: {help inequality}, {help glcurve} (if installed). 136 | 137 | 138 | {title:Authors} 139 | 140 | {phang} 141 | Stanislav Kolenikov (skolenik at gmail dot com) 142 | 143 | {phang} 144 | Zurab Sajaia (zsajaia at worldbank dot org) 145 | -------------------------------------------------------------------------------- /gicurve.pkg: -------------------------------------------------------------------------------- 1 | v 3 2 | d gicurve - Growth Incidence Curve. 3 | d Distribution-Date: 5Sep2008 4 | 5 | f gicurve/gicurve.ado 6 | f gicurve/gicurve.hlp 7 | 8 | -------------------------------------------------------------------------------- /gicurve/gicurve.hlp: -------------------------------------------------------------------------------- 1 | {smcl} 2 | {* version 2.11 02March2007 M. Lokshin*}{...} 3 | {hline} 4 | help for {hi:gicurve}{right:({hi:Michael M. Lokshin and Martin Ravallion}, 2007,The World Bank)} 5 | {hline} 6 | 7 | {title: Growth incedence curves} 8 | 9 | {p 8 12}{cmd:gicurve} {cmd:using} {it:file1} [{it:weight}] [{cmd:if} {it:expr}] {cmd:,} {cmd:var1(}{it:varname}{cmd:)} 10 | {cmd:var2(}{it:varname}{cmd:)} [{cmd:np(}{it:#}{cmd:)} {cmdab:out:putfile(}{it:file2}{cmd:)} {cmdab:hc:index(}{it:#}{cmd:)} 11 | {cmdab:pl:ine(}{it:#}{cmd:)} {cmdab:yp:eriod(}{it:#}{cmd:)} {cmd:nograph} {cmd:ginmean} {cmd:gatmedian} {cmd:mgrpp} 12 | {cmd:meangr} {cmd:bands(}{it:#}{cmd:)} {cmd:knots(}{it:#}{cmd:)} {cmd:ci(}{it:# [#]}{cmd:) minmax addplot(plot)} 13 | {it:graph_options}] 14 | 15 | {p}where {p_end} 16 | {p 8 8} {it:file1} is a file with a second year data. {p_end} 17 | {p 8 8} {cmd:var1} is a welfare indicator variable in the first year data. {p_end} 18 | 19 | {p 8 8} {cmd:var2} is a welfare indicator variable in the second year data. {p_end} 20 | 21 | {p 8 8} {cmd:np(#) } is a number of percentiles. {p_end} 22 | 23 | {p 8 8} {cmdab:out:putfile} is a Stata file where results could be saved. {p_end} 24 | 25 | {p 8 8} {cmdab:hc:index} is a headcount index that could be specified. {p_end} 26 | 27 | {p 8 8} {cmdab:pl:ine} is a poverty line that could be specified. {p_end} 28 | 29 | {p 8 8} {cmd:nograph} is an option to suppress graph output. {p_end} 30 | 31 | {p 8 8} {cmdab:yp:eriod(#)} is an option convert growth rate over 32 | the period to annual growth rate. {p_end} 33 | 34 | {p 8 8} {cmd:minmax} is an option to exclude the highest and the lowest value from the {cmd:gicurve} graph{p_end} 35 | 36 | {p 4 0} {it:Options for graph:} {p_end} 37 | 38 | {p 8 8} {cmd:ginmean} outputs growth in mean line on the graph. {p_end} 39 | 40 | {p 8 8} {cmd:gatmedian} outputs growth at median line on the graph. {p_end} 41 | 42 | {p 8 8} {cmd:mgrpp} outputs mean growth rate of the poorest p% curve on the graph. {p_end} 43 | 44 | {p 8 8} {cmd:meangr} outputs mean percentile growth rate line on the graph. {p_end} 45 | 46 | {p 8 8} {cmd:bands(#)} gicurve smoothing criteria. {p_end} 47 | 48 | {p 8 8} {cmd:knots(#)} another gicurve smoothing criteria. {p_end} 49 | 50 | {p 8 8} {cmd:ci(# [#])} option to generate confidence intervals around GIC. {p_end} 51 | 52 | {p 8 8} {cmd:minmax} option for dropping minimum and maximum values of gicurve on the graph.{p_end} 53 | 54 | {p 8 8} {cmd:addplot(plot)} option for adding additional twoway plots to gicurve.{p_end} 55 | 56 | {p 8 8} {it:graph_options} all graph options for {help twoway_line} are allowed. {p_end} 57 | 58 | {title:Description} 59 | 60 | {p}{cmd:gicurve} produces the growth incidence curve and calculates a measure of the rate of pro-poor growth. 61 | The growth incidence curve gives growth rates by quintiles ranked by welfare measure. 62 | Integrating this curve up to the headcount index of poverty gives a measure of the rate of pro-poor growth. 63 | In adddition, {cmd:gicurve} calculates and can graph the rate of pro-poor growth curve, growth at median, in mean, 64 | and the mean percentile growth rate line.{p_end} 65 | 66 | {p}{cmd:gicurve} uses observations from the dataset currently in memory (the master dataset) 67 | and observations from the Stata-format dataset stored as {it:file1} (using dataset).{p_end} 68 | 69 | {p}{cmd:Important note:} In order to specify weights and/or sample restrictions {cmd:if}, the weight variables 70 | and the variables used in the {cmd:if} condition should be present in both datasets and have the same names.{p_end} 71 | 72 | {title:Options} 73 | 74 | {p 0 4} {it:file1} is a file with a second year data. This file should contain the welfare 75 | indicator variable (e.g., expenditure or income) and all the variables the are used in 76 | in {cmd:if} statement. That file must be specified. If filename is specified without an 77 | extension, .dta is assumed. In order to run {cmd:gicurve} you should have open one of 78 | the files with data.{p_end} 79 | 80 | {p 0 4} {cmd:var1} is a welfare indicator variable in the first year data. This variable 81 | should be present in the opened file. This variable must be specified. {p_end} 82 | 83 | {p 0 4} {cmd:var2} is a welfare indicator variable in the second year data. This variable 84 | should be present in the {cmd:using} file. This variable must be specified. {p_end} 85 | 86 | {p 0 4} {cmd:np(#)} is a number of percentiles. This is an optional parameter. If it is not 87 | specified, the default value of 100 is used (i.e., growth incidence curve is calculated for 88 | each percentile.{p_end} 89 | 90 | {p 0 4} {cmdab:out:putfile} is a Stata file where results could be saved. This is an 91 | optional parameter that specifies Stata file where the results of calculations could 92 | by saved. The following variables are saved: {it:welf1} and {it:welf2} are the welfare 93 | indicator from the first and the second files, {it:pctl} is the corresponding percentile, 94 | {it:pr_growth} is the rate of growth at the particular percentile, {it:intgrl1} is the 95 | mean growth rate for the poorest percentiles,{it:gr_in_mean} is the growth rate in mean, and 96 | {it:gr_in_median} is the growth rate in median.{p_end} 97 | 98 | {p 0 4} {cmdab:hc:index} is a headcount index that could be specified. This is an optional 99 | parameter. If {cmdab:hc:index} is specified {cmd:gicurve} calculates the rate of pro-poor 100 | growth for the {cmdab:hc:index}% of population. {p_end} 101 | 102 | {p 0 4} {cmdab:pl:ine} is a poverty line that could be specified. This is an optional 103 | parameter. If {cmdab:pl:ine} is specified {cmd:gicurve} calculates the rate of pro-poor 104 | growth for the households with the level of welfare less of equal {cmdab:pl:ine}. It also 105 | outputs the headcound index that corresponds to the specified {cmdab:pl:ine}. If neither 106 | {cmdab:hc:index} nor {cmdab:pl:ine} is specified, {cmd:gicurve} calculates the mean 107 | growth rate of the poorest 10%, 15%, 20%, 25%, 30% and 100% of population.{p_end} 108 | 109 | {p 0 4} {cmdab:yp:eriod(#)} is an option to convert growth rate over the period to annual growth 110 | rate. The conversion is made by the following formula: 111 | annual growth rate = (growth rate+1)^((1/number_of_years))-1. {p_end} 112 | 113 | {p 0 4} {cmd:bands(#)} and {cmd:knots(#)} are the options to smooth the gicurve line. The lower is the 114 | number of bands and the higher is the number of nods the smoother the gicurve is. Use the number equal 115 | to the specified number of percentiles in bands to get the least smoothed line. See {help twoway_spline} 116 | for a discussion of these options {p_end} 117 | 118 | {p 0 4} {cmd:ci(# [#])} is an option to draw confidence intervals for the {cmd:gicurve} line. The firt argument in the 119 | option specifies the number of bootstrap repetitions to calculate the confidence intervals. The second (optional) 120 | argument specifies the significance level for the confidence interval. If no significance is specified, 121 | {cmd:gicurve} produces 95% confidence intervals. The procedure could be excessively slow for the large datasets and/or 122 | if the large number of repetitions is specified.{p_end} 123 | 124 | {p 0 4} {cmd:addplot(plot)} The {cmd:addplot()} option allows adding any twoway graph to the graph generated by 125 | gicurve. The option might be useful if, for example, one wants to add vertical lines to the gicureve graph. 126 | This could be done by specifying the following command:{p_end} 127 | 128 | {p 4 8} . gicurve ..., ... addplot(pci 10 20 40 20 10 40 60 40, lp(dash dot)){p_end} 129 | 130 | {p 0 4} {cmd:nograph} is an option to suppress graph output. {p_end} 131 | 132 | {title:Examples} 133 | 134 | {p 4 8}{inp:. use data1}{p_end} 135 | {p 4 8}{inp:. gicurve using data2, var1(expnd_1) var2(expend_2)}{p_end} 136 | {p 4 8}{inp:. gicurve using data2, var1(expnd_1) var2(expend_2) s(.) c(s)}{p_end} 137 | {p 4 8}{inp:. gicurve using data2 if region==1, var1(expend_1) var1(expend_2)}{p_end} 138 | {p 4 8}{inp:. gicurve using data2 if region==1, var1(expend_1) var1(expend_2) np(200) out(file1)} {p_end} 139 | {p 4 8}{inp:. gicurve using data2 if region==1, var1(expend_1) var1(expend_2) np(200) pl(3299.02)}{p_end} 140 | {p 4 8}{inp:. gicurve using data2 if region==1, var1(expend_1) var1(expend_2) np(200) hc(28)}{p_end} 141 | {p 4 8}{inp:. gicurve using data2, var1(expnd_1) var2(expend_2) nograph} {p_end} 142 | 143 | 144 | {title:Also see} 145 | 146 | {p 1 14} Reference: {hi: Martin Ravallion and Shaohua Chen (2002) "Measuring pro-poor growth", The World Bank},{p_end} 147 | 148 | {title:Authors} 149 | 150 | {p 4 4 2} Michael Lokshin, DECRG, The World Bank. If you observe any problems 151 | {browse "mailto:mlokshin@worldbank.org"}. 152 | 153 | {p 4 4} Martin Ravallion, DECRG, The World Bank. 154 | -------------------------------------------------------------------------------- /gidecomposition.pkg: -------------------------------------------------------------------------------- 1 | v 3 2 | d gidecomposition - Growth-inequality decomposition. 3 | d Distribution-Date: 15May2007 4 | 5 | f gidecomposition/gidecomposition.ado 6 | f gidecomposition/gidecomposition.hlp 7 | -------------------------------------------------------------------------------- /gidecomposition/gidecomposition.ado: -------------------------------------------------------------------------------- 1 | *! version 2.3 15May2007 M. Lokshin, The World Bank 2 | #delimit ; 3 | 4 | program define gidecomposition, rclass; 5 | version 8.0; 6 | syntax using [fweight aweight] [if], 7 | var1(varname numeric) var2(string) pline1(string) pline2(string) [hc pg pgs]; 8 | 9 | if missing("`hc'`pg'`pgs'") | (`: word count `hc' `pg' `pgs' '>1) {; 10 | local hc "hc"; 11 | local pg; 12 | local pgs; 13 | }; 14 | 15 | preserve; 16 | quietly {; 17 | tempvar p0 p1 p2; 18 | tempname M11 M12 M21 M22; 19 | 20 | if (!missing("`if'")) keep `if'; 21 | 22 | summarize `var1' [`weight' `exp'], meanonly; local mean_1_1 = r(mean); 23 | 24 | generate `p0' = (`var1' < `pline1') * 100; // poverty headcount 25 | generate `p1' = (`var1' < `pline1') * ((`pline1' - `var1') / `pline1')* 100; // poverty gap 26 | generate `p2' = `p1'^2/100; // poverty severety 27 | 28 | tabstat `p0' `p1' `p2' [`weight' `exp'], save; 29 | matrix `M11' = r(StatTot)'; // in version 8 this was the matrix name 30 | 31 | 32 | /****************** FILE 2 *****************************************/ 33 | use `using', clear; if ("`if'" ~= "") keep `if'; 34 | 35 | summarize `var2' [`weight' `exp'], meanonly; local mean_2_2 = r(mean); 36 | 37 | generate `p0' = (`var2' < `pline2') * 100; // poverty headcount 2 38 | generate `p1' = (`var2' < `pline2') * ((`pline2' - `var2') / `pline2') * 100; // poverty gap 2 39 | generate `p2' = `p1'^ 2/100; // poverty severity 40 | 41 | tabstat `p0' `p1' `p2' [`weight' `exp'], save; 42 | matrix `M22' = r(StatTot)'; 43 | 44 | replace `var2' = `var2' * `mean_1_1' / `mean_2_2'; // change means in file 2 45 | 46 | replace `p0' = (`var2' < `pline2') * 100; 47 | replace `p1' = (`var2' < `pline2') * ((`pline2' - `var2') / `pline2') * 100; // poverty gap 2 48 | replace `p2' = `p1'^ 2/100; // poverty severity 49 | 50 | tabstat `p0' `p1' `p2' [`weight' `exp'], save; 51 | matrix `M12' = r(StatTot)'; 52 | 53 | restore, preserve; 54 | /***************** FILE 1 ******************************************/ 55 | if ("`if'" ~= "") keep `if'; 56 | replace `var1' = `var1' * `mean_2_2' / `mean_1_1'; 57 | 58 | generate `p0' = (`var1' < `pline1') * 100; // poverty headcount 2 59 | generate `p1' = (`var1' < `pline1') * ((`pline1' - `var1') / `pline1') * 100; // poverty gap 2 60 | generate `p2' = `p1'^ 2 / 100; // poverty severity 61 | 62 | tabstat `p0' `p1' `p2' [`weight' `exp'], save; 63 | matrix `M21' = r(StatTot)'; 64 | 65 | tempname b P CHN GRO RED RES; 66 | 67 | matrix `CHN' = (`M22' - `M11'); 68 | matrix `GRO' = (`M21' - `M11'), (`M22' - `M12'); 69 | matrix `RED' = (`M12' - `M11'), (`M22' - `M21'); 70 | matrix `RES' = (`CHN', `CHN') - `GRO' - `RED'; 71 | 72 | matrix `P' = `M11', `M22', `CHN'; 73 | matrix `b' = (`GRO'[1,....], `GRO'[2,....], `GRO'[3,....]) \ 74 | (`RED'[1,....], `RED'[2,....], `RED'[3,....]) \ 75 | (`RES'[1,....], `RES'[2,....], `RES'[3,....]) ; 76 | 77 | matrix rownames `b'= growth redistribution interaction; 78 | matrix colnames `b'=P0_base1 P0_base2 P1_base1 P1_base2 P2_base1 P2_base2; 79 | matrix rownames `P'=P0 P1 P2; 80 | matrix colnames `P'=base1 base2 change; 81 | }; // end quietly 82 | 83 | if (!missing("`hc'")) {; local index = 1; local title1 " Poverty rate (P0)"; local title2 "P0"; }; 84 | if (!missing("`pg'")) {; local index = 2; local title1 " Poverty gap (P1)"; local title2 "P1"; }; 85 | if (!missing("`pgs'")) {; local index = 3; local title1 " Poverty severity (P2)"; local title2 "P2"; }; 86 | 87 | display as text _newline "{hline 80}"; 88 | display as text " Growth and Inequality Poverty Decomposition"; 89 | display as text "{hline 80}"; 90 | display as text _col(30) "Base year 1" _col(48) "Base year 2" _col(64) "Average effect"; 91 | display as text "{hline 80}"; 92 | display as text "`title1'" _col(32) as result %6.3f (`=`P'[`index', 1]') _col(50) %6.3f (`=`P'[`index', 2]'); 93 | display as text "{hline 80}"; 94 | display as text " Change in `title2'" 95 | _col(32) as result %6.3f (`=`P'[`index', 3]') 96 | _col(50) %6.3f (`=`P'[`index', 3]') 97 | _col(68) %6.3f (`=`P'[`index', 3]'); 98 | display as text "{hline 80}"; 99 | display as text " Growth component" 100 | _col(32) as result %6.3f (`=`GRO'[`index', 1]') 101 | _col(50) %6.3f (`=`GRO'[`index', 2]') 102 | _col(68) %6.3f (`=`GRO'[`index', 1]+`GRO'[`index', 2]')/2; 103 | display as text " Redistribution component" 104 | _col(32) as result %6.3f (`=`RED'[`index', 1]') 105 | _col(50) %6.3f (`=`RED'[`index', 2]') 106 | _col(68) %6.3f (`=`RED'[`index', 1]+`RED'[`index', 2]')/2; 107 | display as text " Interaction component" 108 | _col(32) as result %6.3f (`=`RES'[`index', 1]') 109 | _col(50) %6.3f (`=`RES'[`index', 1]') 110 | _col(68) %6.3f (`=`RES'[`index', 1]+`RES'[`index', 2]')/2; 111 | display as text "{hline 80}"; 112 | 113 | return matrix b = `b'; 114 | return matrix P = `P'; 115 | restore; 116 | end; 117 | 118 | -------------------------------------------------------------------------------- /gidecomposition/gidecomposition.hlp: -------------------------------------------------------------------------------- 1 | {smcl} 2 | {* 24jan2006}{...} 3 | {hline} 4 | help for {hi:gidecomposition}{right:(Michael M. Lokshin and Martin Ravallion, 2006,The World Bank)} 5 | {hline} 6 | 7 | {title: Growth and inequality decomposition} 8 | 9 | {p 8 12}{cmd:gidecomposition} {cmd:using} {it:file1} [{it:weight}] [{cmd:if} {it:expr}] {cmd:,} {cmd:var1(}{it:varname}{cmd:)} 10 | {cmd:var2(}{it:varname}{cmd:)} {cmd:pline1(}{it:varname/#}{cmd:)} {cmd:pline2(}{it:varname/#}{cmd:) {cmd:hs} {cmd:pg} {cmd:pgs}} 11 | 12 | {p}where {p_end} 13 | {p 8 8} {cmd:file1} is a file with a second year data. {p_end} 14 | {p 8 8} {cmd:var1} is a welfare indicator variable in the first year data. {p_end} 15 | 16 | {p 8 8} {cmd:var2} is a welfare indicator variable in the second year data. {p_end} 17 | 18 | {p 8 8} {cmd:pline1} is a poverty line for the first year data that could be specified either 19 | as a numeric value or as a variable.{p_end} 20 | 21 | {p 8 8} {cmd:pline2} is a poverty line for the second year data that could be specified either 22 | as a numeric value or as a variable.{p_end} 23 | 24 | {p 8 8} {cmd:hs} is an optional argument to specify that the poverty rate is decomposed.{p_end} 25 | {p 8 8} {cmd:pg} is an optional argument to specify that the poverty gap is decomposed.{p_end} 26 | {p 8 8} {cmd:pgs} is an optional argument to specify that the poverty gap squared is decomposed.{p_end} 27 | 28 | 29 | {title:Description} 30 | 31 | {p}{cmd:gidecomposition} The change in poverty between two years could be decomposed 32 | into three components. The growth component is the difference between the two poverty indices keeping the 33 | distribution constants. The redistribution component is the change in poverty if the mean of the two 34 | distributions is kept constant. The interaction component (residual) shows the change in poverty due to interaction of 35 | growth and inequality. {p_end} 36 | 37 | {p}{cmd:gidecomposition} uses observations from the dataset currently in memory (the master dataset) 38 | and observations from the Stata-format dataset stored as {it:file1} (using dataset).{p_end} 39 | 40 | {title:Options} 41 | 42 | {p 0 4} {it:file1} is a file with a second year data. This file should contain the welfare 43 | indicator variable (e.g., expenditure or income) and all the variables the are used in 44 | in {cmd:if} and/or {cmd:weight} statements. That file must be specified. If filename is specified without an 45 | extension, .dta is assumed. In order to run {cmd:gidecomposition} you should have open one of 46 | the files with data.{p_end} 47 | 48 | {p 0 4} {cmd:var1} is a welfare indicator variable in the first year data. This variable 49 | should be present in the opened file. This variable must be specified. {p_end} 50 | 51 | {p 0 4} {cmd:var2} is a welfare indicator variable in the second year data. This variable 52 | should be present in the {cmd:using} file. This variable must be specified. {p_end} 53 | 54 | {p 0 4} {cmd:pline1} is a poverty line for the first year of data. This poverty line can be 55 | specified either as a number as a variable that contains the value corresponding to the poverty line 56 | {p_end} 57 | 58 | {p 0 4} {cmd:pline2} is a poverty line for the first year of data. This poverty line can be 59 | specified either as a number as a variable that contains the value corresponding to the poverty line 60 | {p_end} 61 | 62 | {p 0 4} {cmd:hc} is an optional argument that specifies that the poverty is decomposed based on poverty head count (P0). 63 | If no poverty measure is specified, P0 decomposition is outputed. 64 | {p_end} 65 | 66 | {p 0 4} {cmd:pg} is an optional argument that specifies that the poverty is decomposed based on poverty gap (P1) 67 | {p_end} 68 | 69 | {p 0 4} {cmd:pgs} is an optional argument that specifies that the poverty is decomposed based on poverty severity (P2) 70 | {p_end} 71 | 72 | {title:Returned values} 73 | 74 | {p}{cmd:gidecomposition} stores all the results into the r()-type scalars. See {help return}. 75 | {p_end} 76 | 77 | {title:Examples} 78 | 79 | {p 4 8}{inp:. use data1}{p_end} 80 | 81 | {p 4 8}{inp:. gidecomposition using data2, var1(expnd_1) var2(expend_2)} pline1(2000) pline2(2200){p_end} 82 | 83 | {p 4 8}{inp:. gidecomposition using data2 if region==1, var1(expend_1) var1(expend_2) pline1(pl1) pline2(pl2)}{p_end} 84 | 85 | {p 4 8}{inp:. gidecomposition using data2 if region==1, var1(expend_1) var1(expend_2) pline1(pl1) pline2(pl2) hc}{p_end} 86 | 87 | {p 4 8}{inp:. gidecomposition using data2 if region==1, var1(expend_1) var1(expend_2) pline1(pl1) pline2(pl2) pgs}{p_end} 88 | 89 | {title:Also see} 90 | 91 | Online: help for {help pov_robust}, {help gicurve}, {help sedecomposition}, 92 | 93 | {title:References} 94 | 95 | {p 1 14} {hi: Gaurav Datt and Martin Ravallion (1992) "Growth and Redistribution Components of Changes in Poverty: A Decomposition with applications to Brazil and China in 1980s", {it:Journal of Development Economics} 38: 275-295} 96 | 97 | {title:Authors} 98 | 99 | {p 4 4 2} Michael Lokshin, DECRG, The World Bank. If you observe any problems 100 | {browse "mailto:mlokshin@worldbank.org"}.{p_end} 101 | {p 4 4} Martin Ravallion, DECRG, The World Bank. {p_end} 102 | {p 4 4} Amadou Bassirou Diallo, AFTPM, The World Bank and Univerty of Auvergne, France, contributed in the revision of the program to version 2. 103 | -------------------------------------------------------------------------------- /googletrans.pkg: -------------------------------------------------------------------------------- 1 | v 3 2 | d googletrans - Translate strings using Google Translate web API. 3 | d This command wraps `googletrans` python library to translate variable and value labels persent in the dataset 4 | d Author: Zurab Sajaia 5 | 6 | f googletrans/googletrans.ado 7 | f googletrans/googletrans.sthlp 8 | e 9 | -------------------------------------------------------------------------------- /googletrans/googletrans.ado: -------------------------------------------------------------------------------- 1 | program define googletrans 2 | 3 | version 16.1 4 | 5 | syntax [anything], [src(string) dest(string) LISTLanguages] 6 | 7 | if "`listlanguages'" != "" { 8 | python: list_languages() 9 | exit 10 | } 11 | if `"`src'"' == "" { 12 | local src "auto" 13 | } 14 | if `"`dest'"' == "" { 15 | local dest "en" 16 | } 17 | python: trans(src_text=Macro.getLocal("anything"), src="`src'", dest="`dest'") 18 | end 19 | 20 | version 16.1 21 | 22 | python: 23 | try: 24 | from googletrans import Translator, LANGUAGES 25 | except ImportError: 26 | raise ImportError("you have to install googletrans package first") 27 | 28 | from sfi import Data, Macro, ValueLabel, Characteristic, SFIToolkit 29 | 30 | def trans(src_text, src="auto", dest="en"): 31 | translator = Translator() 32 | 33 | if src_text: 34 | res = translator.translate(text=src_text, src=src, dest=dest) 35 | SFIToolkit.display(res.text) 36 | return 37 | 38 | SFIToolkit.displayln("translating variable labels:") 39 | for vi in range(0, Data.getVarCount()): 40 | lang = Characteristic.getVariableChar(vi, "lang") 41 | SFIToolkit.display(" {0} ... ".format(Data.getVarName(vi))) 42 | if lang != dest: 43 | res = translator.translate(text=Data.getVarLabel(vi), src=src, dest=dest) 44 | Data.setVarLabel(vi, res.text) 45 | Characteristic.setVariableChar(vi, "lang", dest) 46 | SFIToolkit.displayln("done") 47 | else: 48 | SFIToolkit.displayln("already in {0}".format(lang)) 49 | 50 | SFIToolkit.displayln("translating value labels:") 51 | for li in ValueLabel.getNames(): 52 | SFIToolkit.display(" {0} ... ".format(li)) 53 | charname = "{0}_lang".format(li) 54 | lang = Characteristic.getDtaChar(charname) 55 | if lang != dest: 56 | lbls = ValueLabel.getLabels(li) 57 | vals = ValueLabel.getValues(li) 58 | results = translator.translate(text=lbls, src=src, dest=dest) 59 | for i in range(0, len(lbls)): 60 | ValueLabel.setLabelValue(li, vals[i], results[i].text) 61 | Characteristic.setDtaChar(charname, dest) 62 | SFIToolkit.displayln("done") 63 | else: 64 | SFIToolkit.displayln("already in {0}".format(lang)) 65 | 66 | def list_languages(): 67 | SFIToolkit.displayln("{hline 6}{c TT}{hline 22}") 68 | SFIToolkit.displayln("{lalign 5:code} {c |} language") 69 | SFIToolkit.displayln("{hline 6}{c +}{hline 22}") 70 | for lng in LANGUAGES.items(): 71 | SFIToolkit.displayln("{{lalign 5:{0}}}".format(lng[0]) + " {c |} " + lng[1]) 72 | SFIToolkit.displayln("{hline 6}{c BT}{hline 22}") 73 | end 74 | -------------------------------------------------------------------------------- /googletrans/googletrans.sthlp: -------------------------------------------------------------------------------- 1 | {smcl} 2 | {* *! version 1.0 27oct2020}{...} 3 | {viewerjumpto "Syntax" "googletrans##syntax"}{...} 4 | {viewerjumpto "Description" "googletrans##description"}{...} 5 | {viewerjumpto "Options" "googletrans##options"}{...} 6 | {viewerjumpto "Installation" "googletrans##installation"}{...} 7 | {viewerjumpto "Examples" "googletrans##examples"}{...} 8 | {vieweralsosee "" "--"}{...} 9 | {vieweralsosee "googletrans python module" "https://py-googletrans.readthedocs.io"}{...} 10 | {vieweralsosee "Google Translate" "https://translate.google.com"}{...} 11 | 12 | {title:Title} 13 | 14 | {phang} 15 | {bf:googletrans} {hline 2} Translate strings using Google Translate web API 16 | 17 | 18 | {marker syntax}{...} 19 | {title:Syntax} 20 | 21 | {p 8 17 2} 22 | {cmdab:googletrans} 23 | [anythong] 24 | [{cmd:,} {it:options}] 25 | 26 | {synoptset 20 tabbed}{...} 27 | {synopthdr} 28 | {synoptline} 29 | {syntab:Main} 30 | {synopt:{opt src}}the language of the source text{p_end} 31 | {synopt:{opt dest}}the language to translate the source text into{p_end} 32 | {synopt:{opt listl:anguages}}list all available languages{p_end} 33 | 34 | {synoptline} 35 | 36 | 37 | {marker description}{...} 38 | {title:Description} 39 | 40 | {pstd} 41 | {cmd:googletrans} translates variable and value labes present in the current dataset using Google Translate web API. This is a Stata wrapper of to {browse "https://pypi.org/project/googletrans":googletrans} package in Python. if {cmd:anything} is specified, only that string will be translated. Otherwise, all variable and value labels will be translated and replaced with the translation. 42 | 43 | 44 | {marker options}{...} 45 | {title:Options} 46 | 47 | {dlgtab:Main} 48 | 49 | {phang} 50 | {opt src} The language of the source text. The value should be one of the language codes from {cmd:listlanguages} option. If a language is not specified, the system will attempt to identify the source language automatically. 51 | 52 | {phang} 53 | {opt dest} The language to translate the source text into. The value should be one of the language codes {cmd:listlanguages} option. If not specified, {it:en} will be assumed. 54 | 55 | {phang} 56 | {opt listlanguages} Display the list of all available language codes and exit. All other parameters are ignored. 57 | 58 | 59 | {marker installation}{...} 60 | {title:Installation} 61 | 62 | {pstd} 63 | The command relies on Python integration thus Stata 16.1 or newer is required. 64 | 65 | Before you're able to use the comand googletrans python module must be also installed: 66 | {phang}{cmd:> pip install googletrans}{p_end} 67 | 68 | If the following command produces no error you have everything set up correctly: 69 | {phang}{cmd:. python which googletrans}{p_end} 70 | 71 | {marker examples}{...} 72 | {title:Examples} 73 | 74 | {phang}{cmd:. googletrans "გამარჯობა"}{p_end} 75 | 76 | {phang}{cmd:. googletrans, dest("es")}{p_end} 77 | -------------------------------------------------------------------------------- /lookfor_all.pkg: -------------------------------------------------------------------------------- 1 | v 3 2 | d lookfor_all - An extention of a Stata program lookfor. 3 | d Distribution-Date: 17Mar2008 4 | 5 | f lookfor_all/lookfor_all.ado 6 | f lookfor_all/lookfor_all.hlp 7 | -------------------------------------------------------------------------------- /lookfor_all/lookfor_all.ado: -------------------------------------------------------------------------------- 1 | *! version 2.22 17Mar2008 M. Lokshin, Z. Sajaia 2 | # delimit ; 3 | 4 | program define lookfor_all; 5 | version 9.0; 6 | 7 | syntax [anything] [, SUBdir DIRectory(string) Describe CODEbook *]; 8 | 9 | 10 | if c(changed) {; 11 | tempfile tmpfile; 12 | quietly save `tmpfile'; 13 | }; 14 | else local tmpfile `"`c(filename)'"'; 15 | 16 | drop _all; // important 17 | 18 | tempname D; matrix `D'=J(1,100,1); 19 | local i=1; 20 | local cur_dir `c(pwd)'; 21 | 22 | if (!missing(`"`directory'"')) quietly cd `"`directory'"'; 23 | 24 | _lookforall `anything', `describe' `options' `codebook'; // search in the root directory 25 | 26 | if (!missing(`"`subdir'"')) {; // case when subdirectory option specified 27 | while (1) {; 28 | local dirs : dir . dirs "*"; 29 | local j=`D'[1,`i']; 30 | local dirname : word `j' of `dirs'; 31 | if ((!missing("`dirname'")) & _rc!=601) {; 32 | capture cd "`dirname'"; // in case we found system folder 33 | if (_rc!=170) {; 34 | _lookforall `anything', `describe' `options' `codebook'; 35 | local ++i; 36 | }; 37 | else matrix `D'[1,`i']=`D'[1,`i']+1; 38 | }; 39 | else {; 40 | matrix `D'[1,`i']=1; 41 | local i=`i'-1; 42 | quietly cd ..; 43 | if (`i'==0) continue, break; 44 | matrix `D'[1,`i']=`D'[1,`i']+1; 45 | }; 46 | }; 47 | quietly cd "`cur_dir'"; 48 | }; 49 | 50 | if ~missing(`"`tmpfile'"') quietly use `"`tmpfile'"'; 51 | end; 52 | 53 | /*******************************************************************/ 54 | /*******************************************************************/ 55 | 56 | program define _lookforall; 57 | syntax [anything] [, Describe CODEbook *]; 58 | 59 | local all_files : dir . files "*.dta"; 60 | local n_files : word count `all_files'; 61 | local c_files = 0; 62 | foreach file of local all_files {; 63 | if missing("`codebook'") local instr "in 1"; 64 | quietly capture use `"`file'"' `instr'; 65 | if (_rc == 459) display as error "File " 66 | as result "`file'" 67 | as error " cannot be open in current version of Stata"; 68 | else {; 69 | local c_files=`c_files'+(_rc==0); 70 | local cmd "use "`c(pwd)'`c(dirsep)'`file'""; 71 | quietly lookfor `anything'; 72 | if (!missing(r(varlist))) {; 73 | local vlist `r(varlist)'; 74 | if(missing("`codebook'") & missing("`describe'")) 75 | display `"{ stata `"`cmd'"' : `c(pwd)'`c(dirsep)'`file' }"', as text "`vlist'"; 76 | else {; 77 | display _newline `"{ stata `"`cmd'"' : `c(pwd)'`c(dirsep)'`file' }"'; 78 | if(!missing("`describe'") & missing("`codebook'")) describe `vlist', `options'; 79 | if(!missing("`codebook'") & missing("`describe'")) codebook `vlist', compact; 80 | }; 81 | }; 82 | }; 83 | }; 84 | display _newline as text " Total " as result `c_files' 85 | as text " out of " as result `n_files' as text " files checked in " _c; 86 | display `"{stata `"dir "`c(pwd)'\""' : `c(pwd)'\}"'; 87 | if (`c_files' < `n_files') {; 88 | display _newline as text "(In order to check all `n_files' files in the folder, " _c; 89 | display "increase memory allocated to the data area)"; 90 | }; 91 | end; 92 | 93 | -------------------------------------------------------------------------------- /lookfor_all/lookfor_all.hlp: -------------------------------------------------------------------------------- 1 | {smcl} 2 | {* 05Feb2007}{...} 3 | {cmd:help lookfor_all} {right:{hi: Michael Lokshin and Zurab Sajaia}} 4 | {hline} 5 | 6 | {title:Title} 7 | 8 | {p2colset 8 23 2 1} 9 | {p2col :{hi:lookfor_all} {hline 2}}Searches for variables/patterns in .dta files 10 | {p2colreset}{...} 11 | 12 | 13 | {title:Syntax} 14 | 15 | {pstd} 16 | Searching within current directory 17 | 18 | {p 8 16 2} 19 | {cmd:lookfor_all} {it:pattern}[{it:pattern} [...]] [{cmd:,} {it:{help lookfor_all##options:options}}] 20 | 21 | {pstd}where {it:pattern} is a word that could be a name (or part of the name) of 22 | the variable and/or a word (or part of a word) in the variable label.{p_end} 23 | 24 | {pstd} 25 | Searching withing currect directory and subdirectories 26 | 27 | {p 8 16 2} 28 | {cmd:lookfor_all} {it:pattern} [{cmd:,} {it:{help lookfor_all##options:options}}] 29 | 30 | 31 | {marker options}{...} 32 | {synoptset 17 tabbed}{...} 33 | {synopthdr:options} 34 | {synoptline} 35 | {syntab:Main} 36 | 37 | {synopt:{opt sub:dir}}an option for searching subdirectories{p_end} 38 | {synopt:{opt dir:ectory(path)}}an option to specify the root directory where to search{p_end} 39 | {synopt:{opt d:escribe}}an option to format the output{p_end} 40 | {synopt:{opt code:book}}another option to format the output{p_end} 41 | 42 | {title:Description} 43 | 44 | {pstd} 45 | {cmd:lookfor_all} is an extension of a Stata program {cmd:lookfor}. {cmd:lookfor_all} searches through all Stata data files in a current directory and subdirectories 46 | for the variable(s) with a specified patterns. Each {it:pattern} could be a part/whole variable name and/or a word(part 47 | of the word) in the variable label. {cmd:lookfor_all} outputs the list of data files where the {it:pattern} was found and the 48 | list of variables within every data file that match the {it:pattern}. The list of files is clickable. By clicking on the file 49 | Stata loads that data file in memory and the file became active. {cmd:lookfor_all} can also search directories other than a 50 | current by specifying the path for a search. 51 | 52 | {title:Options} 53 | 54 | {dlgtab 4 2 :Main} 55 | 56 | {phang} 57 | {opt sub:dir} 58 | if specified {cmd:lookfor_all} searches through all .dta files in a current directory and subdirectories. If this option 59 | is not specified {cmd:lookfor_all} searches only within the files in a current directory. 60 | 61 | {phang} 62 | {opt dir:ectory(path)} specifies the {it:path} where {cmd:lookfor_all} should start searching. By default {cmd:lookfor_all} searches files 63 | in the current directory and it's subdirectories. Specifying {it:path} would make Stata to search data files in the 64 | {it:path} directory and its subdirectories. 65 | 66 | {phang} 67 | {opt d:escribe} controls the way the variables that match the {it:pattern} are outputted. By default {cmd:lookfor_all} outputs 68 | the variables following the name of the file. Specifying {opt d:escribe} makes Stata to produce a table of variables with 69 | their types and lables. You can control the output using the option of the standard Stata command {helpb describe}. These options 70 | are {opt simple short details fullname numbers}. 71 | 72 | {phang} 73 | {opt code:book} controls the way the variables that match the {it:pattern} are outputted. Specifying {opt code:book} makes Stata to produce a table of variables with 74 | number of unique observations, mean, minimum, maximum and the label of the matching variables. 75 | 76 | 77 | {title:Examples of searching within the current directory} 78 | 79 | {phang}{cmd:.lookfor_all edu}{p_end} 80 | 81 | {phang}{cmd:.lookfor_all edu, d simple}{p_end} 82 | 83 | {phang}{cmd:.lookfor_all edu, d short}{p_end} 84 | 85 | 86 | {title:Examples of searching the current directory and sub-directories} 87 | 88 | {phang}{cmd:.lookfor_all edu, sub}{p_end} 89 | 90 | {phang}{cmd:.lookfor_all edu, sub d fullnames}{p_end} 91 | 92 | {phang}{cmd:.lookfor_all edu, sub d numbers}{p_end} 93 | 94 | 95 | {title:Examples of searching the specified directory} 96 | 97 | {phang}{cmd:.lookfor_all edu, sub d dir("c:\data\")}{p_end} 98 | 99 | 100 | {title:Also see} 101 | 102 | {psee} 103 | Online: {helpb lookfor}, {helpb describe}{p_end} 104 | -------------------------------------------------------------------------------- /movestay.pkg: -------------------------------------------------------------------------------- 1 | v 3 2 | d movestay - FIML algorithm to estimate the endogenous switching regression model. 3 | 4 | d Distribution-Date: 10mar2008 5 | 6 | f movestay/movestay.ado 7 | f movestay/movestay_d2.ado 8 | f movestay/movestay_p.ado 9 | f movestay/movestay.hlp 10 | 11 | -------------------------------------------------------------------------------- /movestay/movestay.ado: -------------------------------------------------------------------------------- 1 | *! version 3.0.2 21Apr2008 M. Lokshin, Z. Sajaia 2 | #delim ; 3 | 4 | program define movestay, sortpreserve; 5 | version 8; 6 | if replay() {; 7 | if ("`e(cmd)'" ~= "movestay") error 301; 8 | movestay_replay `0'; 9 | }; 10 | else movestay_mx `0'; 11 | end; // end program movestay 12 | 13 | program define movestay_replay; 14 | syntax, [Level(int $S_level)]; 15 | 16 | local sigma0 diparm(lns0, exp label("sigma0")); 17 | local rho0 diparm(r0 , tanh label("rho0")); 18 | local sigma1 diparm(lns1, exp label("sigma1")); 19 | local rho1 diparm(r1 , tanh label("rho1")); 20 | 21 | ml display, level(`level') `sigma0' `sigma1' `rho0' `rho1'; 22 | 23 | if ("`e(vcetype)'" != "Robust") local testtyp LR; 24 | else local testtyp Wald; 25 | display in green "`testtyp' test of indep. eqns. :" 26 | _col(38) "chi2(" in yellow "2" in green ") = " in yellow %8.2f e(chi2_c) 27 | _col(59) in green "Prob > chi2 = " in yellow %6.4f e(p_c); 28 | display in smcl in green "{hline 78}"; 29 | end; // end program movestay_reply 30 | 31 | program define movestay_mx, eclass; 32 | 33 | syntax anything(id="equations id" equalok) [pweight iweight fweight] 34 | [if] [in], SELect(string) [CLuster(varname) Robust *]; 35 | 36 | mlopts mlopts, `options'; 37 | local coll `s(collinear)'; 38 | 39 | quietly {; 40 | 41 | // read vector of parameters 42 | 43 | gettoken reg0 reg1_ : anything, match(parns) bind; 44 | if ("`parns'"!="(") {; 45 | local reg0 "`anything'"; 46 | local reg1_ ""; 47 | }; 48 | tokenize "`reg0'", parse("()"); 49 | if ("`1'"!="`reg0'") {; 50 | display as error "If more than one, equations must be enclosed in brackets"; 51 | exit 198; 52 | }; 53 | tokenize "`reg0'", parse("="); 54 | if ("`2'"!="=") {; 55 | tokenize "`reg0'"; 56 | local y_reg0 `1'; 57 | macro shift; 58 | local x_reg0 `*'; 59 | }; 60 | else {; // else_63 61 | if ("`4'"=="=") {; 62 | display as error "If more than one, equations must be enclosed in brackets"; 63 | exit 198; 64 | }; 65 | local y_reg0 `1'; local x_reg0 `3'; 66 | }; // else_63 67 | unab x_reg0 : `x_reg0'; 68 | 69 | if ("`reg1_'"!="") {; // if_64 70 | gettoken reg1 : reg1_ , match(parns) bind; 71 | if ("`parns'"!="(") local reg1 "`reg1_'"; 72 | tokenize "`reg1'", parse("="); 73 | if ("`2'"!="=") {; // if_66 74 | tokenize "`reg1'"; 75 | local y_reg1 `1'; 76 | macro shift; 77 | local x_reg1 `*'; 78 | }; // if_66 79 | else {; 80 | if ("`4'"=="=") {; 81 | display as error "If more than one, equations must be enclosed in brackets"; 82 | exit 198; 83 | }; 84 | local y_reg1 `1'; local x_reg1 `3'; 85 | }; // else 86 | }; // if_64 87 | else {; 88 | local y_reg1 `y_reg0'; local x_reg1 `x_reg0'; 89 | }; // else 90 | unab x_reg1 : `x_reg1'; 91 | 92 | tokenize "`select'", parse("="); // define vars for regression equation 93 | if ("`2'"!="=") {; 94 | tokenize "`select'"; 95 | local y_prob `1'; 96 | macro shift; 97 | local x_prob `*'; 98 | }; 99 | else {; 100 | local y_prob `1'; 101 | local x_prob `3'; 102 | }; 103 | unab x_prob : `x_prob'; 104 | 105 | 106 | // Drop observations with missing values 107 | 108 | marksample touse; 109 | markout `touse' `y_prob' `x_prob' `cluster', strok; 110 | 111 | tabulate `y_prob' if `touse'; 112 | if (r(r) != 2) {; 113 | noisily display as error "`y_prob' must have exactly two distinct values"; 114 | exit 2000; 115 | }; 116 | else {; 117 | tempvar sel; 118 | egen byte `sel' = group(`y_prob'); 119 | replace `sel' = `sel'-1; // `sel' has values 0 and 1 now 120 | }; 121 | tempvar touse0 touse1; 122 | generate byte `touse0'=`touse'; 123 | generate byte `touse1'=`touse'; 124 | markout `touse0' `y_reg0' `x_reg0'; 125 | markout `touse1' `y_reg1' `x_reg1'; 126 | replace `touse' = cond(`sel'==0,`touse0', `touse1') if `touse'; 127 | 128 | if ("`weight'" == "pweight" | "`cluster'" != "") local robust robust; 129 | if ~missing("`cluster'") local clopt cluster(`cluster'); 130 | if ~missing("`weight'") local weight "[`weight'`exp']"; 131 | 132 | 133 | // conditional collinearity 134 | forvalues i =0/1 {; 135 | _rmdcoll `y_reg`i'' `x_reg`i'' if `touse' & `sel'==`i', `coll'; 136 | local result "`r(varlist)'"; 137 | local colls`i': list x_reg`i' - result; 138 | if ~missing("`colls`i''") {; 139 | noisily display as text "note: `colls`i'' dropped from the `i' equation due to collinearity"; 140 | local x_reg`i' `result'; 141 | }; 142 | }; // i 143 | 144 | _rmdcoll `y_prob' `x_prob' if `touse', `coll'; 145 | local result "`r(varlist)'"; 146 | local colls: list x_prob - result; 147 | noisily display _newline; 148 | if ~missing("`colls'") noisily display as text "note: `colls' dropped from the selection equation due to collinearity"; 149 | local x_prob `result'; 150 | 151 | if ("`y_reg0'"=="`y_reg1'") {; 152 | local eq0 "`y_reg0'0"; 153 | local eq1 "`y_reg0'1"; 154 | }; 155 | else {; 156 | local eq0 "`y_reg0'"; local eq1 "`y_reg1'"; 157 | }; 158 | 159 | // define initial values 160 | tempname I_reg0 I_reg1 I_prob TMP ll_0; 161 | tempvar y_reg0_t y_reg1_t; 162 | 163 | generate double `y_reg0_t'=`y_reg0' if `sel'==0; // define subsamples for initial value regressions 164 | generate double `y_reg1_t'=`y_reg1' if `sel'==1; 165 | 166 | scalar `ll_0' = 0; 167 | 168 | noisily display _newline as text "Fitting initial values " _continue; 169 | 170 | probit `y_prob' `x_prob' `weight' if `touse', nocoef; 171 | scalar `ll_0'=`ll_0'+e(ll); noisily display "." _continue; 172 | regress `y_reg0_t' `x_reg0' `weight' if `touse', nohead; 173 | scalar `ll_0'=`ll_0'+e(ll); noisily display "." _continue; 174 | regress `y_reg1_t' `x_reg1' `weight' if `touse', nohead; 175 | scalar `ll_0'=`ll_0'+e(ll); noisily display "." _continue; 176 | 177 | if missing("`coll'") local hopts twostep; 178 | else local hopts iterate(20); 179 | capture heckman `y_reg0_t' `x_reg0' `weight' if `touse', select(`x_prob') `hopts' `coll'; 180 | if _rc exit _rc; 181 | noisily display "." _continue; 182 | matrix `TMP' = e(b); 183 | matrix `I_reg0'= `TMP'[1,"`y_reg0_t':"]; // initial values for regression 1 184 | matrix colnames `I_reg0'= `x_reg0'; 185 | scalar erho = e(rho); 186 | if (abs(erho) == 1) scalar erho = sign(erho)*(1-0.1D-8); // check so rho !=1 187 | local I_rho0 = atanh(erho); // initial values for rho0 188 | local I_sig0 = ln(e(sigma)); // initial values for sigma0 189 |  capture heckman `y_reg1_t' `x_reg1' `weight' if `touse', select(`x_prob') `hopts' `coll'; 190 | if _rc exit _rc; 191 | noisily display "." _continue; 192 | matrix `TMP' = e(b); 193 | matrix `I_reg1'= `TMP'[1,"`y_reg1_t':"]; // initial values for regression 2 194 | matrix `I_prob'= `TMP'[1,"select:"]; // initial values for probit 195 | scalar erho = e(rho); 196 | if (abs(erho) == 1) scalar erho = sign(erho)*(1-0.1D-8); // check so rho !=1 197 | local I_rho1 = atanh(erho); // initial values for rho1 198 | local I_sig1 = ln(e(sigma)); // initial values for sigma1 199 | }; // end quietly 200 | 201 | // maximization 202 | ml model d2 movestay_d2 203 | (`eq0' :`y_reg0' = `x_reg0') 204 | (`eq1' :`y_reg1' = `x_reg1') 205 | (select :`sel' = `x_prob') 206 | /lns0 207 | /lns1 208 | /r0 209 | /r1 210 | `weight' 211 | if `touse' 212 | , 213 | title("Endogenous switching regression model") 214 | search(on) 215 | init(`I_reg0' `I_reg1' `I_prob' `I_sig0' `I_sig1' `I_rho0' `I_rho1', copy) 216 | difficult 217 | collinear 218 | maximize 219 | nopreserve 220 | `clopt' 221 | `robust' 222 | `mlopts' 223 | ; 224 | 225 | ereturn scalar ll_0 = `ll_0'; // loglikelihood for non-correlated case 226 | ereturn scalar k_aux= 4; // identify ancillary parameters 227 | ereturn local cmd "movestay"; // fill in e(cmd) ; 228 | ereturn local predict "movestay_p"; 229 | ereturn local depvar "`eq0' `eq1' `y_prob'"; 230 | 231 | if missing("`robust'") {; 232 | ereturn local chi2_ct "LR"; 233 | ereturn scalar chi2_c = 2 * (e(ll) - `ll_0'); 234 | }; // end if 235 | else {; 236 | ereturn local chi2_ct "Wald"; 237 | quietly test [r0]_b[_cons] [r1]_b[_cons]; 238 | ereturn scalar chi2_c = r(chi2); 239 | }; // end else 240 | ereturn scalar p_c = chiprob(2, e(chi2_c)); 241 | 242 | movestay_replay; 243 | end; // end program movestay_mx 244 | 245 | 246 | -------------------------------------------------------------------------------- /movestay/movestay.hlp: -------------------------------------------------------------------------------- 1 | {smcl} 2 | {* 10Mar2008}{...} 3 | 4 | cmd:help movestay} 5 | {hline} 6 | 7 | {title:Title} 8 | 9 | {p2colset 5 20 22 2}{...} 10 | {p2col :{hi: movestay} {hline 2}}Maximum-likelihood estimation of endogenous switching regression model{p_end} 11 | {p2colreset}{...} 12 | 13 | 14 | {title:Syntax} 15 | 16 | {p 8 17 2} 17 | {cmd:movestay} 18 | ({it:{help depvar:depvar0}} [=] {varlist:0}) ({it:depvar1 [=] varlist1}) 19 | {ifin} 20 | {weight} 21 | {cmd:,} 22 | {cmdab:sel:ect:(}{it:depvar_s} [{cmd:=}] {it:varlist_s}{cmd:)} 23 | [{it:{help movestay##options:options}}] 24 | 25 | 26 | {title:Syntax for predict} 27 | 28 | {p 8 16 2} 29 | {cmd:predict} 30 | {dtype} 31 | {it:{help newvar}} 32 | {ifin} 33 | [{cmd:,} {it:statistic}] 34 | 35 | {synoptset 11 tabbed}{...} 36 | {synopthdr :statistic} 37 | {synoptline} 38 | {syntab :Main} 39 | {synopt :{opt ps:el}} the probability of being in regime 1{p_end} 40 | {synopt :{opt xb0}} fitted values for regime 0{p_end} 41 | {synopt :{opt xb1}} fitted values for regime 1{p_end} 42 | {synopt :{opt yc0}} fitted values for regime 1{p_end} 43 | {synopt :{opt yc1}} fitted values for regime 1{p_end} 44 | {synopt :{opt mills0}} Mills' ratio for regime 0{p_end} 45 | {synopt :{opt mills1}} Mills' ratio for regime 1{p_end} 46 | {synoptline} 47 | {p2colreset}{...} 48 | 49 | 50 | {synoptset 20 tabbed}{...} 51 | {marker options}{...} 52 | {synopthdr} 53 | {synoptline} 54 | {syntab :Model} 55 | {synopt :{opt select()}} specify selection equation: dependent and independent variables{p_end} 56 | {synopt :{opt col:linear}} keep collinear variables{p_end} 57 | 58 | {syntab :SE/Robust} 59 | {synopt :{opt r:obust}}robust estimator of variance{p_end} 60 | {synopt :{opth cl:uster(varname)}}adjust standard errors for intragroup correlation{p_end} 61 | 62 | {syntab :Reporting} 63 | {synopt :{opt l:evel(#)}}set confidence level; default is {cmd:level(95)}{p_end} 64 | 65 | {syntab :Max option} 66 | {synopt :{it:{help movestay##maximize_options:maximize_options}}}control the maximization process;{p_end} 67 | {synoptline} 68 | {p2colreset}{...} 69 | {p 4 6 2} 70 | {opt fweight}s, {opt iweight}s, and {opt pweight}s are allowed;see {help weight}.{p_end} 71 | 72 | 73 | {title:Description} 74 | 75 | {pstd} 76 | {opt movestay} uses the maximum likelihood method to estimate the endogenous switching regression model. 77 | It is implemented using the {cmd:d2} evaluator to calculate the overall log 78 | likelihood together with its first and second derivatives. 79 | 80 | {p 4 4 2} 81 | {cmd:movestay} estimates all of the parameters in the model: 82 | 83 | {p 8 8 2}(regression equation for regime 0: y0 is {it:depvar0}, x1 is {it:varlist0}){p_end} 84 | {p 16 16 2}y0 = x0 * b0 + e_0 85 | 86 | {p 8 8 2}(regression equation for regime 1: y1 is {it:depvar1}, x1 is {it:varlist1}){p_end} 87 | {p 16 16 2}y1 = x1 * b1 + e_1 88 | 89 | {p 8 8 2}(selection equation: Z is {it:varlist_s}){p_end} 90 | {p 16 16 2}y0 observed if Zg + u <= 0{p_end} 91 | {p 16 16 2}y1 observed if Zg + u > 0{p_end} 92 | 93 | 94 | {p 8 8 2}where:{p_end} 95 | {p 16 16 2}e_0 ~ N(0, sigma0){p_end} 96 | {p 16 16 2}e_1 ~ N(0, sigma1){p_end} 97 | {p 16 16 2}u ~ N(0, 1){p_end} 98 | {p 16 16 2}corr(e_0, u) = rho_0{p_end} 99 | {p 16 16 2}corr(e_1, u) = rho_1{p_end} 100 | 101 | {p 4 4 2}Here {it:depvar0}, {it:depvar1} and {it:varlist0}, {it:varlist1} 102 | are the 103 | dependent variables and regressors for the underlying regression models 104 | (y0, y1 = xb), and {it:varlist_s} specifies the variables Z thought to determine 105 | which regime is observed. 106 | 107 | 108 | {title:Options} 109 | 110 | {dlgtab:Model} 111 | 112 | {phang} 113 | {opt select()} specifies variables in the selection equation. {it:varlist_s} 114 | includes the set of instruments that help identify the model. This option is 115 | an integral part of the {cmd:movestay} estimation and is required. The 116 | selection equation is estimated based on all exogenous variables specified in 117 | the continuous equations plus instruments. If there are no instrumental 118 | variables in the model, {it:depvar_s} must be specified. In that case the 119 | model will be identified by non-linearities and the selection equation will 120 | contain all the independent variables that enter in the continuous equations. 121 | 122 | {phang} 123 | {opt collinear} see {help estimation options}. 124 | 125 | {dlgtab:SE/Robust} 126 | 127 | {phang} 128 | {opt robust} specifies that the Huber/White/sandwich estimator of the 129 | variance is to be used in place of the conventional MLE variance estimator. 130 | {opt robust} combined with {opt cluster} further allows observations which 131 | are not independent within cluster (although they must be independent between 132 | clusters). 133 | 134 | {pmore} 135 | If you specify {cmd:pweight}s, {cmd:robust} is implied. 136 | See {hi:[U] 23.14 Obtaining robust variance estimates}. 137 | 138 | {phang} 139 | {opth cluster(varname)} specifies that the observations are 140 | independent across groups (clusters) but not necessarily within groups. 141 | {it:varname} specifies to which group each observation belongs; e.g., 142 | {opt cluster(personid)} in data with repeated observations on individuals. 143 | {opt cluster()} affects the estimated standard errors and variance-covariance 144 | matrix of the estimators (VCE), but not the estimated coefficients. 145 | {cmd:cluster()} can be used with {help pweight}s to produce estimates for 146 | unstratified cluster-sampled data. Specifying {cmd:cluster()} implies 147 | {cmd:robust}. 148 | 149 | {dlgtab:Reporting} 150 | 151 | {phang} 152 | {opt level(#)}; see {help estimation options}. 153 | 154 | {marker maximize_options}{...} 155 | 156 | {dlgtab:Max options} 157 | 158 | {phang} 159 | {it:maximize_options} control the maximization process; see 160 | {help maximize}. With the possible exception of {cmd:iterate(0)} and 161 | {cmd:trace}, you should only have to specify them if the model is unstable. 162 | The maximization uses option {cmd:difficult} by default. This option need not be specified. 163 | 164 | 165 | {dlgtab: predict options} 166 | 167 | {phang} 168 | {opt psel} calculates the probability of being in regime 1. 169 | 170 | {phang} 171 | {opt xb0} calculates the linear prediction for equation 0. 172 | 173 | {phang} 174 | {opt xb1} calculates the linear prediction for equation 1. 175 | 176 | {phang} 177 | {opt yc0} returns the predicted value of the dependent variable(s) in the regime 0. For example, 178 | if earning function is modeled for two sectors (regimes), then this option predicts the wage rate 179 | in sector one for all individuals in the sample. 180 | 181 | {phang} 182 | {opt yc1} returns the predicted value of the dependent variable(s) in the regime 1. 183 | 184 | {phang} 185 | {opt mills0} and {opt mills1} calculate corresponding Mills' ratios for two regimes 186 | 187 | 188 | 189 | 190 | {title:Examples} 191 | 192 | {p 4 4 2}To obtain full ML estimates: 193 | 194 | {p 6 6 2} Using instruments: 195 | 196 | {p 8 8 2}{cmd:. movestay y1 x1 x2 x3 x4, select(regime1=z1 z2)} 197 | 198 | {p 8 8 2}{cmd:. movestay (y1= x1 x2 x3 x4) (y1= x1 x2 x3 x5), select(regime1=z1 z2)} 199 | 200 | {p 6 6 2}Model is identified through non-linearities: 201 | 202 | {p 8 8 2}{cmd:. movestay (y1= x1 x2 x3 x4) (y1= x1 x2 x3 x5), select(regime1)} 203 | 204 | {p 4 4 2}To define and use each equation separately: 205 | 206 | {p 8 8 2}{cmd:. global wage_eqn y x1 x2 x3 x4} 207 | {break}{cmd:. global select_eqn regime z1 z2} 208 | 209 | {p 8 8 2}{cmd:. movestay ($wage_eqn), select($select_equn)} 210 | 211 | {p 4 4 2}To use options: 212 | 213 | {p 8 8 2}{cmd:. movestay y= x1 x2 x3 x4 if region=1 [w= hhweight], select(regime= z1 z2)} 214 | 215 | {p 8 8 2}{cmd:. movestay (y= x1 x2 x3 x4) if region=1, select(regime= z1 z2) tech("dfp")} 216 | 217 | {p 4 4 2}Prediction: 218 | 219 | {p 8 8 2}{cmd:. movestay y x1 x2 x3 x4, select(regime= z1 z2)} 220 | 221 | {p 8 8 2}{cmd:. predict yexpected, xb} 222 | 223 | {p 8 8 2}{cmd:. predict mymills1, mills1} 224 | 225 | {p 4 4 2}Example from the {it:Stata Journal}: 226 | 227 | {p 8 8 2}{stata `"do http://siteresources.worldbank.org/INTPOVRES/Resources/movestay_example.do"': . movestay lmo_wage age age2 edu13 edu4 edu5 reg2 reg3 reg4, select(private =m_s1 job_hold)} 228 | 229 | 230 | {title:Authors} 231 | 232 | {p 4 4 2}M. Lokshin (DECRG, The World Bank) and Z. Sajaia (Stanford University). 233 | 234 | 235 | {title:Also see} 236 | 237 | {p 4 13 2} 238 | Online: help for {help regress}, {help heckman}, {help ml} 239 | 240 | -------------------------------------------------------------------------------- /movestay/movestay_d2.ado: -------------------------------------------------------------------------------- 1 | *! version 3.0.1 10Mar2008 M. Lokshin, Z. Sajaia 2 | #delim ; 3 | program define movestay_d2, eclass; 4 | args todo b lnf g negH g1 g2 g3 g4 g5 g6 g7; 5 | 6 | tempname lns1 lns0 theta0 theta1 xb0 xb1 xb3; 7 | 8 | mleval `xb0' =`b', eq(1); 9 | mleval `xb1' =`b', eq(2); 10 | mleval `xb3' =`b', eq(3); 11 | mleval `lns0' =`b', eq(4) scalar; 12 | mleval `lns1' =`b', eq(5) scalar; 13 | mleval `theta0' =`b', eq(6) scalar; 14 | mleval `theta1' =`b', eq(7) scalar; 15 | 16 | quietly {; 17 | 18 | tempname sig1 sig0 rho1 rho0 rr1 rr0; 19 | 20 | scalar `sig0' = exp(`lns0'); 21 | scalar `sig1' = exp(`lns1'); 22 | scalar `rho0' = tanh(`theta0'); 23 | scalar `rho1' = tanh(`theta1'); 24 | scalar `rr0' = 1/sqrt(1-`rho0'^2); 25 | scalar `rr1' = 1/sqrt(1-`rho1'^2); 26 | 27 | tempvar eta0 eta1 lf1 eps0 eps1; 28 | 29 | generate double `eta0' = (`xb3' + ($ML_y1 - `xb0') * `rho0'/`sig0')*`rr0'; replace `eta0'= 37 if (`eta0'> 37); 30 | generate double `eta1' = (`xb3' + ($ML_y2 - `xb1') * `rho1'/`sig1')*`rr1'; replace `eta1'=-37 if (`eta1'<-37); 31 | generate double `eps0' = $ML_y1 - `xb0'; 32 | generate double `eps1' = $ML_y2 - `xb1'; 33 | 34 | tempname const0 const1; 35 | 36 | scalar `const0'=0.5*ln(2*_pi*`sig0'^2); 37 | scalar `const1'=0.5*ln(2*_pi*`sig1'^2); 38 | 39 | mlsum `lnf' = cond($ML_y3==1, ln(norm( `eta1'))-`const1'-0.5*(`eps1'/`sig1')^2, 40 | ln(norm(- `eta0'))-`const0'-0.5*(`eps0'/`sig0')^2); 41 | 42 | if (`todo'==0 | `lnf'==.) exit; // calculate first derivative 43 | 44 | tempname deta_dx deta_dsig deta_drho M; 45 | 46 | generate double `deta_dx' = -cond($ML_y3==1,`rho1'/`sig1'*`rr1',`rho0'/`sig0'*`rr0'); 47 | generate double `deta_dsig' = -cond($ML_y3==1,`eps1'*`rho1'/`sig1'^2*`rr1',`eps0'*`rho0'/`sig0'^2*`rr0'); 48 | 49 | generate double `deta_drho' = cond($ML_y3==1,`eps1'*`rr1'/`sig1'+(`rho1'*(`xb3'+`rho1'*`eps1'/`sig1'))*`rr1'^3, 50 | `eps0'*`rr0'/`sig0'+(`rho0'*(`xb3'+`rho0'*`eps0'/`sig0'))*`rr0'^3); 51 | 52 | generate double `M' = cond($ML_y3==1,normden(`eta1')/norm(`eta1'),-normden(`eta0')/norm(-`eta0')); 53 | 54 | tempname drho0_dtheta0 drho1_dtheta1; 55 | 56 | scalar `drho0_dtheta0' = 4*exp(2*`theta0')/((1+exp(2*`theta0'))^2); // derivative of rho w.r.t theta 57 | scalar `drho1_dtheta1' = 4*exp(2*`theta1')/((1+exp(2*`theta1'))^2); // derivative of rho w.r.t theta 58 | 59 | replace `g1' = cond($ML_y3==0,`M'*`deta_dx'+`eps0'/(`sig0'^2),0); 60 | replace `g2' = cond($ML_y3==1,`M'*`deta_dx'+`eps1'/(`sig1'^2),0); 61 | replace `g3' = cond($ML_y3==1,`M'*`rr1',`M'*`rr0'); 62 | replace `g4' = cond($ML_y3==0,(`M'*`deta_dsig'-(1/`sig0')+(`eps0'^2)/(`sig0'^3))*`sig0',0); 63 | replace `g5' = cond($ML_y3==1,(`M'*`deta_dsig'-(1/`sig1')+(`eps1'^2)/(`sig1'^3))*`sig1',0); 64 | replace `g6' = cond($ML_y3==0,`M'*`deta_drho'*`drho0_dtheta0',0); 65 | replace `g7' = cond($ML_y3==1,`M'*`deta_drho'*`drho1_dtheta1',0); 66 | 67 | tempname d1 d2 d3 d4 d5 d6 d7; 68 | 69 | mlvecsum `lnf' `d1' = `g1' , eq(1); 70 | mlvecsum `lnf' `d2' = `g2' , eq(2); 71 | mlvecsum `lnf' `d3' = `g3' , eq(3); 72 | mlvecsum `lnf' `d4' = `g4' , eq(4); 73 | mlvecsum `lnf' `d5' = `g5' , eq(5); 74 | mlvecsum `lnf' `d6' = `g6' , eq(6); 75 | mlvecsum `lnf' `d7' = `g7' , eq(7); 76 | 77 | matrix `g' = (`d1' , `d2' , `d3' , `d4' , `d5' , `d6', `d7' ); 78 | 79 | if (`todo'==1 | `lnf'==. ) exit; // calculate second derivative 80 | 81 | tempvar DM d2eta_drho2 d2rho0_dtheta02 d2rho1_dtheta12; 82 | 83 | generate double `DM' = -cond($ML_y3==1,`M'*(`eta1'+`M'),`M'*(`eta0'+`M')); 84 | 85 | generate double `d2eta_drho2' = cond($ML_y3==1,`rr1'^3*(2*`rho1'*`eps1'/`sig1'+(`xb3'+`rho1'*`eps1'/`sig1')*(1+3*(`rho1'*`rr1')^2)), 86 | `rr0'^3*(2*`rho0'*`eps0'/`sig0'+(`xb3'+`rho0'*`eps0'/`sig0')*(1+3*(`rho0'*`rr0')^2))); 87 | 88 | scalar `d2rho1_dtheta12' = 8*exp(2*`theta1')*(1-exp(2*`theta1'))/(1+exp(2*`theta1'))^3; // 2nd derivative of rho w.r.t theta 89 | scalar `d2rho0_dtheta02' = 8*exp(2*`theta0')*(1-exp(2*`theta0'))/(1+exp(2*`theta0'))^3; 90 | 91 | tempvar g11 g12 g13 g14 g15 g16 g17 92 | g22 g23 g24 g25 g26 g27 93 | g33 g34 g35 g36 g37 94 | g44 g45 g46 g47 95 | g55 g56 g57 96 | g66 g67 97 | g77 98 | ; 99 | 100 | tempvar d11 d12 d13 d14 d15 d16 d17 101 | d22 d23 d24 d25 d26 d27 102 | d33 d34 d35 d36 d37 103 | d44 d45 d46 d47 104 | d55 d56 d57 105 | d66 d67 106 | d77 107 | ; 108 | 109 | generate double `g22' = - cond($ML_y3==1,`DM'*`deta_dx'^2-1/`sig1'^2,0); 110 | generate double `g23' = - cond($ML_y3==1,`DM'*`deta_dx'*`rr1',0); 111 | generate double `g25' = - cond($ML_y3==1,(`DM'*`deta_dsig'-`M'/`sig1')*`deta_dx'-2*`eps1'/`sig1'^3,0); 112 | generate double `g27' = - cond($ML_y3==1,(`DM'*`deta_drho'+`M'*(1/`rho1'+`rho1'*`rr1'^2))*`deta_dx',0); 113 | 114 | generate double `g33' = - cond($ML_y3==1,`DM'*`rr1'^2,`DM'*`rr0'^2); 115 | generate double `g35' = - cond($ML_y3==1,`DM'*`deta_dsig'*`rr1',0); 116 | generate double `g34' = - cond($ML_y3==0,`DM'*`deta_dsig'*`rr0',0); 117 | generate double `g36' = - cond($ML_y3==0,`DM'*`deta_drho'*`rr0'+`M'*`rho0'*`rr0'^3,0); 118 | generate double `g37' = - cond($ML_y3==1,`DM'*`deta_drho'*`rr1'+`M'*`rho1'*`rr1'^3,0); 119 | 120 | generate double `g55' = - cond($ML_y3==1,(`DM'*`deta_dsig'^2-2*`M'*`deta_dsig'/`sig1'+1/`sig1'^2-3*`eps1'^2/`sig1'^4)*`sig1',0)-`g5'; 121 | generate double `g57' = - cond($ML_y3==1,`DM'*`deta_drho'*`deta_dsig'+`M'*`deta_dsig'*(1/`rho1'+`rho1'*`rr1'^2),0); 122 | 123 | generate double `g44' = - cond($ML_y3==0,(`DM'*`deta_dsig'^2-2*`M'*`deta_dsig'/`sig0'+1/`sig0'^2-3*`eps0'^2/`sig0'^4)*`sig0',0)-`g4'; 124 | generate double `g46' = - cond($ML_y3==0,`DM'*`deta_drho'*`deta_dsig'+`M'*`deta_dsig'*(1/`rho0'+`rho0'*`rr0'^2),0); 125 | 126 | generate double `g66' = - cond($ML_y3==0,(`DM'*`deta_drho'^2+`M'*`d2eta_drho2')*`drho0_dtheta0'^2,0)-`g6'*`d2rho0_dtheta02'; 127 | 128 | generate double `g77' = - cond($ML_y3==1,(`DM'*`deta_drho'^2+`M'*`d2eta_drho2')*`drho1_dtheta1'^2,0)-`g7'*`d2rho1_dtheta12'; 129 | 130 | mlmatsum `lnf' `d11'=-(`DM'*`deta_dx'^2-1/`sig0'^2) if ($ML_y3==0), eq(1); 131 | mlmatsum `lnf' `d12'= 0, eq(1,2); 132 | mlmatsum `lnf' `d13'=-`DM'*`deta_dx'*`rr0' if ($ML_y3==0), eq(1,3); 133 | mlmatsum `lnf' `d14'=-((`DM'*`deta_dsig'-`M'/`sig0')*`deta_dx'-2*`eps0'/`sig0'^3)*`sig0' if ($ML_y3==0), eq(1,4); 134 | mlmatsum `lnf' `d15'= 0, eq(1,5); 135 | mlmatsum `lnf' `d16'=-(`DM'*`deta_drho'+`M'*(1/`rho0'+`rho0'*`rr0'^2))*`deta_dx'*`drho0_dtheta0' if ($ML_y3==0), eq(1,6); 136 | mlmatsum `lnf' `d17'= 0, eq(1,7); 137 | 138 | mlmatsum `lnf' `d22'=`g22', eq(2); 139 | mlmatsum `lnf' `d23'=`g23', eq(2,3); 140 | mlmatsum `lnf' `d24'= 0, eq(2,4); 141 | mlmatsum `lnf' `d25'=`g25'*`sig1', eq(2,5); 142 | mlmatsum `lnf' `d26'= 0, eq(2,6); 143 | mlmatsum `lnf' `d27'=`g27'*`drho1_dtheta1', eq(2,7); 144 | 145 | mlmatsum `lnf' `d33'=`g33', eq(3); 146 | mlmatsum `lnf' `d34'=`g34'*`sig0', eq(3,4); 147 | mlmatsum `lnf' `d35'=`g35'*`sig1', eq(3,5); 148 | mlmatsum `lnf' `d36'=`g36'*`drho0_dtheta0', eq(3,6); 149 | mlmatsum `lnf' `d37'=`g37'*`drho1_dtheta1', eq(3,7); 150 | 151 | mlmatsum `lnf' `d44'=`g44'*`sig0', eq(4); 152 | mlmatsum `lnf' `d45'= 0, eq(4,5); 153 | mlmatsum `lnf' `d46'=`g46'*`sig0'*`drho0_dtheta0', eq(4,6); 154 | mlmatsum `lnf' `d47'=0, eq(4,7); 155 | 156 | mlmatsum `lnf' `d55'=`g55'*`sig1', eq(5); 157 | mlmatsum `lnf' `d56'=0, eq(5,6); 158 | mlmatsum `lnf' `d57'=`g57'*`sig1'*`drho1_dtheta1', eq(5,7); 159 | 160 | mlmatsum `lnf' `d66'=`g66', eq(6); 161 | mlmatsum `lnf' `d67'= 0, eq(6,7); 162 | 163 | mlmatsum `lnf' `d77'=`g77', eq(7); 164 | 165 | capture matrix drop `negH'; 166 | forvalues k = 1/7 {; 167 | tempname H; 168 | forvalues l = 1/7 {; 169 | if (`l'>`k') matrix `H' =nullmat(`H'),`d`k'`l''; 170 | else matrix `H' =nullmat(`H'),`d`l'`k'''; 171 | }; // l 172 | matrix `negH' = nullmat(`negH') \ `H'; 173 | }; // k 174 | 175 | }; // end quietly 176 | end; 177 | 178 | -------------------------------------------------------------------------------- /movestay/movestay_example.do: -------------------------------------------------------------------------------- 1 | #delim ; 2 | set more off; 3 | 4 | use movestay_example, clear; 5 | 6 | local str age age2 edu13 edu4 edu5 reg2 reg3 reg4 ; 7 | 8 | movestay lmo_wage `str', select(private =m_s1 job_hold); 9 | 10 | -------------------------------------------------------------------------------- /movestay/movestay_example.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vavalomi/stata_tools/67b6c92bfdbc5c81cf4b2d859d40bb901ec6c96f/movestay/movestay_example.dta -------------------------------------------------------------------------------- /movestay/movestay_p.ado: -------------------------------------------------------------------------------- 1 | *! version 3.0.2 13Apr2008 M. Lokshin, Z. Sajaia 2 | 3 | // vague naming problem of the statistics was corrected 4 | // yc0 gives predicted y (for the whole sample) if they were in regime 0, 5 | // yc1 gives predicted y (for the whole sample) if they were in regime 1, 6 | //--------------------------------------------------------------------------------------------------------- 7 | // statistics -yc1_1- -yc2_1- -yc1_2- -yc2_2- are still supported for backward compatibility 8 | // but using new, better named -yc0- -yc1- is recommended 9 | 10 | #delim ; 11 | program define movestay_p; 12 | version 8; 13 | syntax anything [if] [in] [, PSel xb0 xb1 yc0 yc1 mills0 mills1 xb2 yc1_1 yc2_1 yc1_2 yc2_2 mills2]; 14 | 15 | marksample touse; 16 | 17 | syntax newvarname [if] [in] [, PSel xb0 xb1 yc0 yc1 mills0 mills1 xb2 yc1_1 yc2_1 yc1_2 yc2_2 mills2]; 18 | 19 | if ("`e(cmd)'" ~= "movestay") error 301; 20 | local check "`psel' `xb0' `xb1' `yc0' `yc1' `mills0' `mills1' `xb2' `yc1_1' `yc2_1' `yc1_2' `yc2_2' `mills2'"; 21 | if (wordcount("`check'")>1) {; 22 | display as error "Only one statistic is allowed"; 23 | exit 198; 24 | }; 25 | if wordcount("`check'")==0 {; 26 | noisily display as text "(Option psel assumed; Pr(`y_prob'))"; 27 | local psel "psel"; 28 | }; 29 | 30 | //treat statistics from the old version 31 | if ~missing("`xb2'") local xb0 "xb0"; 32 | if ~missing("`mills2'") local mills0 "mills0"; 33 | 34 | quietly {; 35 | 36 | local y_reg0: word 1 of `e(depvar)'; 37 | local y_reg1: word 2 of `e(depvar)'; 38 | local y_prob: word 3 of `e(depvar)'; 39 | 40 | tempname b sigma0 sigma1 rho0 rho1 reg0 reg1 prob; 41 | matrix `b' = e(b); 42 | 43 | scalar `sigma0' = exp(_b[lns0:_cons]); 44 | scalar `sigma1' = exp(_b[lns1:_cons]); 45 | scalar `rho0' = tanh(_b[r0:_cons]); 46 | scalar `rho1' = tanh(_b[r1:_cons]); 47 | matrix `reg0' = `b'[1,"`y_reg0':"]; 48 | matrix `reg1' = `b'[1,"`y_reg1':"]; 49 | matrix `prob' = `b'[1,"select:"]; 50 | 51 | tempvar xb_prob x_b0 x_b1 q sel; 52 | 53 | matrix score double `xb_prob' = `prob' if `touse'; 54 | 55 | if ~missing("`psel'") {; 56 | generate `typlist' `varlist' = norm(`xb_prob') if `touse'; 57 | exit; 58 | }; 59 | 60 | if ~missing("`mills0'") {; 61 | generate `typlist' `varlist' = normden(`xb_prob')/(1-norm(`xb_prob')) if `touse'; 62 | exit; 63 | }; 64 | if ~missing("`mills1'") {; 65 | generate `typlist' `varlist' = normden(`xb_prob')/norm(`xb_prob') if `touse'; 66 | exit; 67 | }; 68 | 69 | matrix score double `x_b0' = `reg0' if `touse'; 70 | matrix score double `x_b1' = `reg1' if `touse'; 71 | 72 | egen byte `sel' = group(`y_prob'); 73 | replace `sel' = `sel'-1; 74 | generate byte `q' = 2*`sel'-1; 75 | 76 | if ~missing("`yc0'") {; 77 | generate `typlist' `varlist' = `x_b0' + `q'*`sigma0'*`rho0'*normden(`xb_prob')/norm(`q'*`xb_prob') if `touse'; 78 | exit; 79 | }; 80 | if ~missing("`yc1'") {; 81 | generate `typlist' `varlist' = `x_b1' + `q'*`sigma1'*`rho1'*normden(`xb_prob')/norm(`q'*`xb_prob') if `touse'; 82 | exit; 83 | }; 84 | if ~missing("`xb0'") {; 85 | generate `typlist' `varlist' = `x_b0' if `touse'; 86 | exit; 87 | }; 88 | if ~missing("`xb1'") {; 89 | generate `typlist' `varlist' = `x_b1' if `touse'; 90 | exit; 91 | }; 92 | 93 | // old version again 94 | if ~missing("`yc1_1'") {; 95 | generate `typlist' `varlist' = `x_b1'+`sigma1'*`rho1'*normden(`xb_prob')/norm(`xb_prob') if (`sel'==1) & `touse'; 96 | exit; 97 | }; 98 | if ~missing("`yc2_1'") {; 99 | generate `typlist' `varlist' = `x_b0'+`sigma0'*`rho0'*normden(`xb_prob')/norm(`xb_prob') if (`sel'==1) & `touse'; 100 | exit; 101 | }; 102 | if ~missing("`yc1_2'") {; 103 | generate `typlist' `varlist' = `x_b1'-`sigma1'*`rho1'*normden(`xb_prob')/(1-norm(`xb_prob')) if (`sel'==0) & `touse'; 104 | exit; 105 | }; 106 | if ~missing("`yc2_2'") {; 107 | generate `typlist' `varlist' = `x_b0'-`sigma0'*`rho0'*normden(`xb_prob')/(1-norm(`xb_prob')) if (`sel'==0) & `touse'; 108 | exit; 109 | }; 110 | 111 | }; // end quietly 112 | 113 | end; // end mspredict 114 | 115 | -------------------------------------------------------------------------------- /pov_robust.pkg: -------------------------------------------------------------------------------- 1 | v 3 2 | d pov_robust - Stochastic Dominance Analysis. 3 | d Distribution-Date: 11Feb2008 4 | 5 | f povrobust/pov_robust.ado 6 | f povrobust/pov_robust.hlp 7 | -------------------------------------------------------------------------------- /pov_robust/pov_robust.ado: -------------------------------------------------------------------------------- 1 | *! version 3.05 27Feb2008 M. Lokshin 2 | #delimit ; 3 | 4 | program define pov_robust, rclass; 5 | version 8.0; 6 | 7 | syntax [using] [if] [aw fw], var1(varname numeric) [var2(string) automax pi pd ps *]; 8 | 9 | if missing(`"`using'"')!=missing("`var2'") {; 10 | display as error "using and var2 options must be used together"; 11 | exit 198; 12 | }; 13 | 14 | preserve; 15 | 16 | tempfile file1; 17 | tempvar c_year c_welf; 18 | 19 | quietly {; 20 | if ~missing("`if'") keep `if'; // restrict observation for user-specified if conditions 21 | generate byte `c_year'=1; 22 | if ~missing("`exp'") {; 23 | tempvar c_wgt; 24 | generate `c_wgt' `exp'; 25 | }; 26 | rename `var1' `c_welf'; 27 | keep `c_welf' `c_year' `c_wgt'; 28 | save `file1'; 29 | 30 | if ~missing(`"`using'"') {; 31 | use `using', clear; 32 | if ~missing("`if'") keep `if'; // restrict observation for user-specified if conditions 33 | generate byte `c_year'=2; 34 | rename `var2' `c_welf'; 35 | if ~missing("`exp'") {; 36 | generate `c_wgt' `exp'; 37 | }; 38 | keep `c_welf' `c_year' `c_wgt'; 39 | append using `file1'; 40 | local years = 2; 41 | local lab " in year \`y'"; 42 | }; 43 | else local years = 1; 44 | 45 | if ~missing("`exp'") local exp "=`c_wgt'"; 46 | 47 | drop if `c_welf'==.; 48 | label variable `c_welf' "Welfare indicator"; 49 | 50 | forvalues y = 1/`years' {; 51 | tempvar step`y' pi`y'; 52 | cumul `c_welf' if `c_year'==`y' [`weight' `exp'], gen(`pi`y''); // poverty incidence curve 53 | label variable `pi`y'' "Poverty incidence curve`lab'"; 54 | 55 | sort `c_year' `c_welf'; 56 | generate `step`y''=(`c_welf'-`c_welf'[_n-1]) if `c_year'==`y'; 57 | replace `step`y''=. if `step`y''<0; // for the first obs in year 2 58 | 59 | if ~missing("`pd'`ps'") {; 60 | tempvar pd`y'; 61 | generate `pd`y''=sum(`step`y''*`pi`y'') if `c_year'==`y'; // poverty deficit curve 62 | label variable `pd`y'' "Poverty depth curve`lab'"; 63 | }; 64 | if ~missing("`ps'") {; 65 | tempvar ps`y'; 66 | generate `ps`y''=sum(`step`y''*`pd`y'') if `c_year'==`y'; // poverty severity curve 67 | label variable `ps1' "Poverty severity curve`lab'"; 68 | }; 69 | }; 70 | local p_line `pi1' `pi2'; 71 | if (!missing("`ps'")) local p_line `ps1' `ps2'; 72 | if (!missing("`pd'")) local p_line `pd1' `pd2'; 73 | }; // end quietly 74 | 75 | // Everything is prepared. Now - rescale 76 | if ~missing("`automax'") {; 77 | local FLAG=1; 78 | while `FLAG' {; 79 | // Determine if we can divide ALL the variables in p_line by 1000 80 | foreach pline of local p_line {; 81 | summarize `pline',meanonly; 82 | if `r(max)'<1000 local FLAG=0; // if the max is less than 1000 then switch off the flag 83 | }; 84 | // If FLAG is set, then the max of each variable in p_line is more than 1000 85 | if `FLAG' foreach pline of local p_line {; 86 | quietly replace `pline'=`pline'/1000; 87 | }; 88 | }; 89 | 90 | summarize `c_welf',meanonly; 91 | roundnice `r(max)'; // make a nice maximum 92 | round_det `r(roundnice)'; // round to 000s, mlns or blns 93 | local max_c_welf=`r(number)'; local max_c_welf_u="`r(units)'"; 94 | if "`max_c_welf_u'"=="'000" replace `c_welf'=`c_welf'/1000; 95 | if "`max_c_welf_u'"=="mln" replace `c_welf'=`c_welf'/1000000; 96 | if "`max_c_welf_u'"=="bln" replace `c_welf'=`c_welf'/1000000000; 97 | local stepnice=`max_c_welf'/5; 98 | 99 | local lbl : variable label `c_welf'; 100 | 101 | twoway line `p_line' `c_welf', 102 | xtitle("`lbl', `max_c_welf_u'", size(3) margin(0 0 0 2)) 103 | xlabel(0(`stepnice')`max_c_welf', labsize(2.9)) 104 | `options'; 105 | }; 106 | else twoway line `p_line' `c_welf', `options'; 107 | 108 | restore; 109 | 110 | end; 111 | 112 | program define round_det, rclass; 113 | args number; 114 | local units "units"; 115 | if `number'>1000 {; local number=round(`number'/1000, 0.1); local units="'000"; }; 116 | if `number'>1000 {; local number=round(`number'/1000, 0.1); local units="mln"; }; 117 | if `number'>1000 {; local number=round(`number'/1000, 0.1); local units="bln"; }; 118 | return local number `number'; return local units "`units'"; 119 | end; 120 | 121 | #delimit cr 122 | 123 | program roundnice, rclass 124 | local roundwhat `1' 125 | local round_method="ceil" 126 | if "`2'"=="0" local round_method="round" 127 | if "`2'"=="-1" local round_method="floor" 128 | 129 | local l1 50 100 500 1000 2000 5000 10000 20000 100000 200000 500000 1000000 5000000 /* If X is less than this number */ 130 | local l2 1 10 50 100 200 500 1000 2000 5000 10000 20000 50000 100000 /* round it to multiples of this number */ 131 | 132 | capture confirm number `roundwhat' 133 | if _rc { 134 | window stopbox stop "Error, `roundwhat' is not a number or missing" 135 | exit 136 | } 137 | 138 | local roundnice = `roundwhat' 139 | 140 | local indx=1 141 | foreach t of local l1 { 142 | if `roundwhat'<`t' { 143 | local r=word("`l2'",`indx') 144 | local roundnice = `=`round_method'(`roundwhat'/`r')*`r'' 145 | continue, break 146 | } 147 | local indx=`indx'+1 148 | } 149 | return scalar roundnice=`roundnice' 150 | end 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | -------------------------------------------------------------------------------- /pov_robust/pov_robust.hlp: -------------------------------------------------------------------------------- 1 | {smcl} 2 | {* 9february2004}{...} 3 | {hline} 4 | help for {hi:pov_robust}{right:({hi:Michael M. Lokshin and Martin Ravallion}, 2002-2004,The World Bank)} 5 | {hline} 6 | 7 | {title: Poverty stochastic dominance analysis} 8 | 9 | {p 8 12}{cmd:pov_robust} {cmd:using} {it:file1} [{it:weight}] [{cmd:if} {it:expr}] {cmd:,} {cmd:var1(}{it:varname}{cmd:)} 10 | {cmd:var2(}{it:varname}{cmd:)} [ {cmd:pi} {cmd:pd} {cmd:ps} {it:graph_options}] 11 | 12 | {p}where {p_end} 13 | {p 8 8} {it:file1} is a file with a second year data. {p_end} 14 | {p 8 8} {cmd:var1} is a welfare indicator variable in the first year data. {p_end} 15 | 16 | {p 8 8} {cmd:var2} is a welfare indicator variable in the second year data. {p_end} 17 | 18 | {p 8 8} {cmd:pi } is an option to graph the poverty incidence curve. {p_end} 19 | 20 | {p 8 8} {cmd:pd } is an option to graph the poverty deficit curve. {p_end} 21 | 22 | {p 8 8} {cmd:ps } is an option to graph the poverty severity curve. {p_end} 23 | 24 | {p 8 8} {it:graph_options} all graph options for {help twoway_line} are allowed. {p_end} 25 | 26 | {title:Description} 27 | 28 | {p}{cmd:pov_robust} graphs poverty {it:incidence}, {it:deficit} and {it:severity} curves. Each point 29 | on the poverty incidence curve gives the proportion of the population consuming less than the amount 30 | given on the horizontal axis of the graph. The line that represent the area under the {it:poverty incidence 31 | curve} up to each point is called {it:poverty deficit curve}. Each point on the {it:poverty deficit curve} 32 | is the value of the poverty gap index times the corresponding poverty line. If one calculates the area under 33 | the {it:poverty deficit curve} at each point of income distribution then one obtains the {it:poverty severity curve} 34 | Each point of the {it:poverty severity curve} is directly proportional to FGT measure P2.{p_end} 35 | 36 | {p}{cmd:pov_robust} uses observations from the dataset currently in memory (the master dataset) 37 | and observations from the Stata dataset stored as {it:file1} (using dataset). If {it:weight} or 38 | [{cmd:if} {it:expr}] is specified, the variables used for {it:weight} or {cmd:if} conditions should 39 | have the same name in both datasets.{p_end} 40 | 41 | {title:Options} 42 | 43 | {p 0 4} {it:file1} is a file with a second year data. This file should contain the welfare 44 | indicator variable (e.g., expenditure or income) and all the variables the are used in 45 | in {cmd:if} statement. That file must be specified. If filename is specified without an 46 | extension, .dta is assumed. In order to run {cmd:gicurver} you should have open one of 47 | the files with data.{p_end} 48 | 49 | {p 0 4} {cmd:var1} is a welfare indicator variable in the first year data. This variable 50 | should be present in the opened file. This variable must be specified. {p_end} 51 | 52 | {p 0 4} {cmd:var2} is a welfare indicator variable in the second year data. This variable 53 | should be present in the {cmd:using} file. This variable must be specified. {p_end} 54 | 55 | {p 0 4} {cmd:pi, pd, ps} Are the optional parameters. By specifying any one of these options 56 | the {it:poverty incidence}, {it:deficit}, or {it:poverty severity curve} could be plotted. 57 | If none of {cmd:pi, pd, ps} is specified, the {it:poverty incidence cureve} is plotted.{p_end} 58 | 59 | {p 0 4} {it:graph_options} all graph options for {help twoway_line} are allowed. {p_end} 60 | 61 | {title:Examples} 62 | 63 | {p 4 8}{inp:. use data1}{p_end} 64 | {p 4 8}{inp:. pov_robust using data2, var1(expnd_1) var2(expend_2)}{p_end} 65 | {p 4 8}{inp:. pov_robust using data2, var1(expnd_1) var2(expend_2) ps s(.) c(s)}{p_end} 66 | {p 4 8}{inp:. pov_robust using data2 if region==1, var1(expend_1) var1(expend_2) pd}{p_end} 67 | 68 | {title:Also see} 69 | 70 | {p 1 14}Reference: {hi: e.g., Atkinson (1987) 'On Measurment of Poverty', {it:Econometrica}, Vol. 55: 244-263}{p_end} 71 | 72 | {title:Authors} 73 | 74 | {p 4 4 2} Michael Lokshin, DECRG, The World Bank. If you observe any problems 75 | {browse "mailto:mlokshin@worldbank.org"}. 76 | 77 | {p 4 4} Martin Ravallion, DECRG, The World Bank. 78 | -------------------------------------------------------------------------------- /ppreg.pkg: -------------------------------------------------------------------------------- 1 | v 3 2 | d 'ppreg': Efficient estimation of regressions based on pseudo-panel data 3 | 4 | d 5 | d KW: pseudo-panel 6 | d 7 | d Requires: Stata version 8.0 8 | d 9 | d Distribution-Date: 8Dec2008 10 | d 11 | 12 | d Author: Zurab Sajaia, World Bank 13 | d Support: email zsajaia@hotmail.com 14 | d Author: Richard Chiburis, Princeton University 15 | 16 | f ppreg/ppreg.ado 17 | f ppreg/ppreg.hlp 18 | -------------------------------------------------------------------------------- /ppreg/ppreg.ado: -------------------------------------------------------------------------------- 1 | // Last saved 8Dec2008 2 | #delimit ; 3 | 4 | program define ppreg; 5 | version 9; 6 | if replay() {; 7 | if ("`e(cmd)'" ~= "ppreg") error 301; 8 | ppreg_replay `0'; 9 | }; 10 | else ppreg_est `0'; 11 | end; // end program ppreg 12 | 13 | program define ppreg_replay; 14 | syntax [,Level(cilevel)]; 15 | _coef_table_header; 16 | display; 17 | ereturn display, level(`level'); 18 | end; // end program ppreg_reply 19 | 20 | program define ppreg_est, eclass; 21 | syntax [varlist(default=none)] [if] [in] [aweight pweight iweight fweight], Time(varname) Cohort(varname); 22 | quietly {; 23 | tabulate `time'; scalar T=r(r); 24 | tabulate `cohort'; scalar C=r(r); 25 | 26 | marksample touse; 27 | markout `touse' `varlist' `time' `cohort', strok; 28 | gettoken y x : varlist; 29 | preserve; 30 | keep if `touse'; 31 | 32 | tempvar w e tw; 33 | if (~missing("`weight'")) {; 34 | generate double `w'`exp'; 35 | local ww "[`weight'=`w']"; 36 | }; 37 | else generate `w'=1; 38 | 39 | egen double `tw' = total(`w'), by(`time' `cohort'); 40 | summarize `w'; local ttw = r(sum); 41 | foreach var of varlist `varlist' {; 42 | tempvar dvar tm`var'; 43 | egen double `tm`var'' = total(`var'*`w'/`ttw'); 44 | egen double `dvar' = total(`var'*`w'/`tw'), by(`time' `cohort'); 45 | replace `dvar' = (`var' - `dvar') / sqrt((`tw' - 1)*`tw'); 46 | local dvarlist "`dvarlist' `dvar'"; 47 | local tmvarlist "`tmvarlist' `tm`var''"; 48 | }; 49 | tempname S Syy Sxy Sxx M Mxy Mxx invOMEGA b V V1 Xb EX; 50 | matrix accum `S' = `dvarlist' `ww', noconstant; 51 | matrix `S' = `S' / (C*T); 52 | matrix `Sxy' = `S'[2..., 1]; 53 | matrix `Sxx' = `S'[2..., 2...]; 54 | collapse (mean) `varlist' (min) `tmvarlist' (count) `w'=`y' `ww', by(`time' `cohort'); 55 | 56 | if (T*C > r(r)) {; noisily display as error "not every cohort-time cell has observations"; exit 2000; }; 57 | 58 | egen double `tw' = total(`w'), by(`cohort'); 59 | foreach var of varlist `varlist' {; 60 | tempvar mean; 61 | egen double `mean' = total(`var'*`w'/`tw'), by(`cohort'); 62 | replace `var' = (`var' - `mean'); 63 | }; 64 | 65 | matrix accum `M' = `varlist' `ww', noconstant; 66 | matrix `M' = `M' / (C*(T-1)); 67 | matrix `Mxy' = `M'[2..., 1]; 68 | matrix `Mxx' = `M'[2..., 2...]; 69 | matrix `invOMEGA' = inv(`Mxx'-`Sxx'); 70 | matrix `b' = (`invOMEGA' * (`Mxy'-`Sxy'))'; 71 | matrix rownames `b' = `y'; 72 | 73 | matrix colnames `b' = `x'; 74 | matrix score double `Xb' = `b'; 75 | generate double `e' = `y' - `Xb'; 76 | 77 | matrix accum `EX' = `e' `x' `ww', noconstant; 78 | matrix `EX' = `EX' / (C*(T-1)); 79 | matrix `V' = `invOMEGA'*(`Mxx'*`EX'[1,1] +`EX'[2...,1]*`EX'[1,2...])*`invOMEGA'/(C*(T-1)); 80 | matrix `V1' = `invOMEGA'*(`Mxx'*(`M'[1,1]-2*`Mxy''*`b''+`b'*`Mxx'*`b'')+`EX'[2...,1]*`EX'[1,2...])*`invOMEGA'/(C*(T-1)); 81 | restore; 82 | count if `touse'; 83 | 84 | matrix `V' =`V'+`V1'/`r(N)'*C*T; 85 | 86 | matrix colnames `V' = `x'; 87 | matrix rownames `V' = `x'; 88 | 89 | ereturn post `b' `V', esample(`touse') depname("`y'") obs(`r(N)'); 90 | 91 | ereturn local wtype "`weight'"; 92 | ereturn local wexp "`exp'"; 93 | ereturn local cmd "ppreg"; 94 | ereturn local depvar "`y'"; 95 | ereturn local title "Pseudo panel regression"; 96 | 97 | }; // quietly 98 | ppreg_replay; 99 | end; // program ppreg_est 100 | 101 | -------------------------------------------------------------------------------- /ppreg/ppreg.hlp: -------------------------------------------------------------------------------- 1 | {smcl} 2 | {* 13Oct2006}{...} 3 | {cmd:help ppreg} 4 | {hline} 5 | 6 | {title:Title} 7 | 8 | {p2colset 5 20 22 2}{...} 9 | {p2col :{hi: ppreg} {hline 2}}Pseudo-panel regression{p_end} 10 | {p2colreset}{...} 11 | 12 | 13 | {title:Syntax} 14 | 15 | {p 8 17 2} 16 | {cmd:ppreg} 17 | {depvar} {varlist} 18 | {ifin} 19 | {weight} 20 | {cmd:,} {opth t:ime(varname)} {opth c:ohort(varname)} 21 | 22 | {p 4 6 2} 23 | {opt aweight}s, {opt fweight}s, {opt iweight}s, and {opt pweight}s are allowed; see {help weight}.{p_end} 24 | 25 | {title:Description} 26 | 27 | {pstd} 28 | {opt ppreg} fits fixed-effects model with correction for the measurement error in observed cohort means. 29 | 30 | {pstd} 31 | Let us suppose that we have a set of repeated independent cross-sections for {it:t}-time periods and we are 32 | estimating the following model: 33 | 34 | y_ht=x_ht*beta+theta_h+e_ht (1) 35 | 36 | A 'cohort' is defined as a group with fixed membership, indifiduals of which can be identified as the show up in 37 | the survays. 38 | 39 | Then model (1) can be estimated using "within estimator" by 40 | 41 | {title:Options} 42 | 43 | {phang} 44 | {opt time(varname)} use {it:varname} to define the id for time period in the dataset. If datasets consists of set 45 | of repeated annual cross-sections, variable {cmd:year} will be the time id. 46 | 47 | {phang} 48 | {opt cohort(varname)} specifies the variable that is used to define cohorts. For example, if cohorts are formed based 49 | on individuals' age(birth year) then {it:varname} will be the {cmd:age} ({cmd:birthdate}) variable. 50 | 51 | {title:Examples} 52 | 53 | {phang}{cmd:. ppreg }{p_end} 54 | 55 | 56 | {title:Also see} 57 | 58 | {psee} 59 | Online: {helpb xtreg}{p_end} 60 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | ## Set of Stata routines for poverty and inequality analysis 2 | 3 | - **egen_inequal**: module providing extension functions for -egen- command to generate inequality and poverty measures 4 | - **fastgini**: calculate Gini coefficient with jackknife standard errors 5 | - **gicurve**: generate growth incidence curve and calculates a measure of the rate of pro-poor growth 6 | - **gidecomposition**: growth and inequality decomposition 7 | - **pov_robust**: poverty stochastic dominance analysis 8 | - **sedecomposition**: sectoral decomposition of poverty changes 9 | - **gconc**: generalized measures of concentration 10 | 11 | ## Endogenous variables and maximum likelihood estimation 12 | 13 | - **movestay**: endogenous switching regression. [Stata Journal article](https://www.stata-journal.com/article.html?article=st0071) 14 | - **switch_probit**: binary choice models with binary endogenous regressors. [Stata Journal article](https://www.stata-journal.com/article.html?article=st0233) 15 | - **bioprobit**: bivariate ordered probit regression 16 | 17 | - **bestreg**: best variable subset selection 18 | - **xml_tab**: print-ready tables in Stata. [Stata Journal article](https://www.stata-journal.com/article.html?article=dm0037) 19 | - **lookfor_all**: search for variables/patterns in .dta files 20 | 21 | - **ppreg**: efficient estimation of regressions based on pseudo-panel data 22 | 23 | ## Installation instructions 24 | 25 | You can browse and install any of the commands by typing: 26 | 27 | ```. net from https://raw.githubusercontent.com/vavalomi/stata_tools/refs/heads/master ``` 28 | -------------------------------------------------------------------------------- /sedecomposition.pkg: -------------------------------------------------------------------------------- 1 | v 3 2 | d sedecomposition - Sectoral decomposition of inequality. 3 | d Distribution-Date: 26Mar2008 4 | 5 | f sedecomposition/sedecomposition.ado 6 | f sedecomposition/sedecomposition.hlp 7 | -------------------------------------------------------------------------------- /sedecomposition/sedecomposition.ado: -------------------------------------------------------------------------------- 1 | *! version 2.31 26Mar2008 M. Lokshin 2 | 3 | #delimit ; 4 | 5 | program define sedecomposition, rclass; 6 | version 8.0; 7 | 8 | syntax using [if] [aweight fweight], sector(varname numeric) 9 | [pline1(string) pline2(string) var1(varname numeric) var2(string) hc pg pgs] ; 10 | 11 | /* possible errors */ 12 | if (missing("`poor1'") & (missing("`var1'") | missing("`pline1'")) ) {; 13 | display as error "You need to specify either pline()--var() pairs, or poor()"; 14 | exit; 15 | }; 16 | if ("`poor1'"=="" & "`var1'"=="") {; 17 | display as error "You need to specify welf() if you do not specify poor()"; 18 | exit; 19 | }; 20 | if ("`poor1'"~="" & "`poor2'"=="") {; 21 | display as error "You need to specify poor2 if you specify poor1"; 22 | exit; 23 | }; 24 | if ("`var1'"~="" & "`var2'"=="") {; 25 | display as error "You need to specify var2 if you specify var1"; 26 | exit; 27 | }; 28 | 29 | quietly {; 30 | 31 | preserve; 32 | 33 | tempname POPS1 POOR_S1; 34 | tempvar _fgt1 _fgt2 _poor; 35 | 36 | local fgt=1; local titl1 = "HeadCount"; local titl2="(HC)"; /* indicator for HC */ 37 | if ("`pg'" ~="") {; 38 | local fgt=2; local titl1 = "Poverty Gap"; local titl2="(PG)"; }; /* indicator for PG */ 39 | if ("`pgs'"~="") {; 40 | local fgt=3; local titl1 = "Poverty Gap Squared"; local titl2="(PG2)"; }; /* indicator for PG2 */ 41 | 42 | if ("`if'"~="") keep `if'; /* restrict observation for user-specified if conditions */ 43 | 44 | generate byte `_poor'=(`var1'<=`pline1'); /* HC */ 45 | generate `_fgt1'=`_poor'*((`pline1'-`var1')/`pline1'); /* PG */ 46 | generate `_fgt2'=`_fgt1'^2; /* SP */ 47 | 48 | tabulate `sector' [`weight' `exp'], matcell(`POPS1'); local n_sec1=r(r); local n_obs1=r(N); 49 | matrix `POPS1' = `POPS1'/`n_obs1'; 50 | 51 | tabstat `_poor' `_fgt1' `_fgt2' [`weight' `exp'], by(`sector') s(mean) save; 52 | local p1t_1 = el(r(StatTot),1,1); /* total poverty level */ 53 | local p2t_1 = el(r(StatTot),1,2); /* total poverty gap */ 54 | local p3t_1 = el(r(StatTot),1,3); /* total poverty gap squared */ 55 | local rlbl = subinstr(r(name1)," ","_",10); 56 | local lbl1 = r(name1); 57 | matrix `POOR_S1' = r(Stat1); /* means and count */ 58 | forvalues i=2/`n_sec1' {; 59 | matrix `POOR_S1' = `POOR_S1' \ r(Stat`i'); 60 | local lbl`i' = r(name`i'); /* main labels */ 61 | local rlbl `"`rlbl' `"`lbl`i''"'"'; 62 | }; 63 | matrix colname `POOR_S1'=P0 P1 P2; 64 | matrix rowname `POOR_S1'=`rlbl'; 65 | 66 | /* second file */ 67 | 68 | use `using', clear; 69 | tempname POOR_S2 POPS2 INTER_S INTRA_S SHIFT_S AC_1 AC_2 AC_3 AC PC_1 PC_2 PC_3 PC; 70 | 71 | if ("`if'"~="") keep `if'; 72 | 73 | generate byte `_poor' = (`var2'<=`pline2'); /* HC */ 74 | generate `_fgt1' = `_poor'*((`pline2'-`var2')/`pline2'); /* PG */ 75 | generate `_fgt2' = `_fgt1'^2; /* SP */ 76 | 77 | tabulate `sector' [`weight' `exp'], matcell(`POPS2'); local n_sec2=r(r); local n_obs2=r(N); 78 | matrix `POPS2' = `POPS2'/`n_obs2'; 79 | if (`n_sec2'~=`n_sec1') {; 80 | noisily disp _newline in red "Number of sectors is different in year 1 and year 2"; 81 | exit; }; 82 | 83 | tabstat `_poor' `_fgt1' `_fgt2' [`weight' `exp'], by(`sector') s(mean) save; 84 | local p1t_2 = el(r(StatTot),1,1); /* total poverty level */ 85 | local p2t_2 = el(r(StatTot),1,2); /* total poverty gap */ 86 | local p3t_2 = el(r(StatTot),1,3); /* total poverty gap squared */ 87 | matrix `POOR_S2' = r(Stat1); 88 | forvalues i=2/`n_sec2' {; matrix `POOR_S2' = `POOR_S2' \ r(Stat`i'); }; 89 | matrix colname `POOR_S2' = P0 P1 P2; 90 | 91 | 92 | matrix `AC_1'=vecdiag((`POOR_S2'[1...,"P0"]-`POOR_S1'[1...,"P0"])*`POPS1'')'; /* absulute change P0 */ 93 | matrix `AC_2'=vecdiag((`POOR_S2'[1...,"P1"]-`POOR_S1'[1...,"P1"])*`POPS1'')'; /* absulute change P0 */ 94 | matrix `AC_3'=vecdiag((`POOR_S2'[1...,"P2"]-`POOR_S1'[1...,"P2"])*`POPS1'')'; /* absulute change P0 */ 95 | matrix `AC' =`AC_1',`AC_2',`AC_3'; 96 | 97 | local p1_change = `p1t_2'-`p1t_1'; 98 | local p2_change = `p2t_2'-`p2t_1'; 99 | local p3_change = `p3t_2'-`p3t_1'; 100 | 101 | matrix `PC_1'=`AC_1'*100/`p1_change'; /* percent change relative to total poverty change */ 102 | matrix `PC_2'=`AC_2'*100/`p2_change'; /* % change in poverty gap */ 103 | matrix `PC_3'=`AC_3'*100/`p3_change'; /* % change in poverty gap squared */ 104 | matrix `PC' =`PC_1',`PC_2',`PC_3'; 105 | 106 | /* Total effect calculation */ 107 | matrix `INTRA_S'=(`POOR_S2'[1...,`fgt']-`POOR_S1'[1...,`fgt'])'*`POPS1'; local intra_e=`INTRA_S'[1,1]; 108 | matrix `INTER_S'=(`POOR_S2'[1...,`fgt']-`POOR_S1'[1...,`fgt'])'*(`POPS2'-`POPS1'); local inter_e=`INTER_S'[1,1]; 109 | matrix `SHIFT_S'=(`POPS2' -`POPS1' )'*`POOR_S1'[1...,`fgt']; local shift_e=`SHIFT_S'[1,1]; 110 | }; // end quietly 111 | 112 | display as text _newline "{hline 70}"; 113 | display as text " Sectoral Decomposition of a Change in Poverty: " "`titl1'"; 114 | display as text "{hline 70}"; 115 | display as text " Poverty in period 1 " "`titl1'" _col(51) in yellow %6.4f `p`fgt't_1'*100 ; 116 | display as text " Poverty in period 2 " "`titl1'" _col(51) in yellow %6.4f `p`fgt't_2'*100 ; 117 | display as text "{hline 70}"; 118 | display as text " Sector Population share Absolute Percentage"; 119 | display as text " in period 1 change change"; 120 | display as text "{hline 70}"; 121 | 122 | forvalues i=1(1)`n_sec1'{; 123 | disp in yellow _col(4) "`lbl`i''" 124 | _col(32) %8.2f `POPS1'[`i',1]*100 125 | _col(49) %8.4f `AC'[`i',`fgt']*100 126 | _col(65) %6.2f `PC'[`i',`fgt']; }; 127 | matrix b_sec = (`POPS1'[1...,1]*100, `AC'[1...,`fgt']*100, `PC'[1...,`fgt']); 128 | matrix rowname b_sec = `rlbl'; 129 | matrix colname b_sec = "Population share in period 1" "Absolute change" "Percentage change"; 130 | 131 | disp in green "{hline 70}"; 132 | disp in green " Total Intra-sectoral effect" _col(49) 133 | in yellow %8.4f `intra_e'*100 _col(65) %6.2f `intra_e'*100/`p`fgt'_change'; 134 | disp in green " Population-shift effect" _col(49) 135 | in yellow %8.4f `shift_e'*100 _col(65) %6.2f `shift_e'*100/`p`fgt'_change'; 136 | disp in green " Interaction effect" _col(49) 137 | in yellow %8.4f `inter_e'*100 _col(65) %6.2f `inter_e'*100/`p`fgt'_change'; 138 | disp in green "{hline 70}"; 139 | disp in green " Change in poverty " "`titl2'" _col(34) 140 | in yellow _col(49) %8.4f `p`fgt'_change'*100 _col(65) %6.2f 100 in green _newline "{hline 70}"; 141 | matrix b_tot = (`p`fgt'_change'*100, 100 \ 142 | `intra_e'*100 ,`intra_e'*100/`p`fgt'_change' \ 143 | `shift_e'*100 ,`shift_e'*100/`p`fgt'_change' \ 144 | `inter_e'*100 , `inter_e'*100/`p`fgt'_change'); 145 | matrix rowname b_tot ="Change in poverty `titl2'" "Total Intra-sectoral effect" "Population-shift effect" "Interaction effect"; 146 | matrix colname b_tot ="Absolute change" "Percentage change"; 147 | 148 | return matrix b_sec = b_sec; 149 | return matrix b_tot = b_tot; 150 | restore; 151 | end; 152 | 153 | 154 | 155 | 156 | -------------------------------------------------------------------------------- /sedecomposition/sedecomposition.hlp: -------------------------------------------------------------------------------- 1 | {smcl} 2 | {* 22January2003}{...} 3 | {hline} 4 | help for {hi:sedecomposition}{right:(Michael Lokshin and Martin Ravallion, 2004,The World Bank)} 5 | {hline} 6 | 7 | {title: Sectoral Decomposition of a Change in Poverty} 8 | 9 | {p 8 12}{cmd:sedecomposition} {cmd:using} {it:file1} [{it:weight}] [{cmd:if} {it:expr}] {cmd:,} {cmd:sector(}{it:varname}{cmd:)} 10 | {cmd:pline1(}{it:varname/#}{cmd:)} {cmd:pline2(}{it:varname/#}{cmd:)} {cmd:var1(}{it:varname}{cmd:)} {cmd:var2(}{it:varname}{cmd:)} 11 | [{cmd:hc} {cmd:pg} {cmd:pgs}] 12 | 13 | {p}where {p_end} 14 | {p 8 8} {cmd:file1} is a file with a second year data. {p_end} 15 | 16 | {p 8 8} {cmd:var1} is a welfare indicator variable in the first year data. {p_end} 17 | {p 8 8} {cmd:var2} is a welfare indicator variable in the second year data. {p_end} 18 | 19 | {p 8 8} {cmd:pline1} is a poverty line for the first year data that could be specified either 20 | as a numeric value or as a variable.{p_end} 21 | {p 8 8} {cmd:pline2} is a poverty line for the first year data that could be specified either 22 | as a numeric value or as a variable.{p_end} 23 | 24 | {p 8 8} {cmd:hc} is an option for poverty headcount decomposition {p_end} 25 | {p 8 8} {cmd:pg} is an option for poverty gap decomposition {p_end} 26 | {p 8 8} {cmd:pgs} is an option for poverty gap squared decomposition {p_end} 27 | 28 | 29 | {title:Description} 30 | 31 | {p}{cmd:sedecomposition} A change in poverty between two dates could be decomposed into three components: 32 | The {it:Intra-sectoral component} shows the contribution of poverty changes within the sector, controlling for 33 | their base period population share. The {it:Population-shift} component tells us how much of the poverty in the 34 | first data was reduced by the various changes in population shares of sectors between then and the second date. 35 | The {it:Interaction} component arise from the possible correlation between sectoral gains and population 36 | shift, the sign of the interaction effect tells us whether people tented to switch to the sectors where 37 | poverty was falling or not. 38 | {p_end} 39 | 40 | {p}{cmd:sedecomposition} uses observations from the dataset currently in memory (the master dataset) 41 | and observations from the Stata-format dataset stored as {it:file1} (using dataset).{p_end} 42 | 43 | 44 | {title:Options} 45 | 46 | {p 0 4} {it:file1} is a file with a second year data. This file should contain the welfare 47 | indicator variable (e.g., expenditure or income) and all the variables the are used in 48 | in {cmd:if} and/or {cmd:weight} statements. That file must be specified. If filename is specified without an 49 | extension, .dta is assumed. In order to run {cmd:sedecomposition} you should have open one of 50 | the files with data.{p_end} 51 | 52 | {p 0 4} {cmd:var1} is a welfare indicator variable in the first year data. This variable 53 | should be present in the opened file. This variable must be specified together with {cmd:pline1} variable. 54 | This variable should not be specified if {cmd:poor1} variable is specified. {p_end} 55 | 56 | {p 0 4} {cmd:var2} is a welfare indicator variable in the second year data. This variable 57 | should be present in the {cmd:using} file. This variable must be specified together with {cmd:pline2} variable. 58 | This variable should not be specified if {cmd:poor2} variable is specified.{p_end} 59 | 60 | {p 0 4} {cmd:pline1} is a poverty line for the first year of data. This poverty line can be 61 | specified either as a number as a variable that contains the value corresponding to the poverty line in the first date. 62 | This variable must be specified together with {cmd:welf1} variable. This variable should not be specified if {cmd:poor1} variable is specified. 63 | {p_end} 64 | 65 | {p 0 4} {cmd:pline2} is a poverty line for the first year of data. This poverty line can be 66 | specified either as a number as a variable that contains the value corresponding to the poverty line in the second date. 67 | This variable must be specified together with {cmd:welf2} variable.This variable should not be specified if {cmd:poor2} variable is specified. 68 | {p_end} 69 | 70 | {p 0 4} {cmd:hc}, {cmd:pg}, {cmd:pgs} are optional indicators of the poverty measure to be decomposed. If {cmd:hc} 71 | is specified the poverty head count (FGT_0) decomposition will be performed. Specifying {cmd:pg} or {cmd:pgs} will 72 | produce sectoral decomposiiton of poverty gap or squared poverty gap, respectivly. If none of these options 73 | is specified, head count decomposition will be performed. 74 | {p_end} 75 | 76 | 77 | 78 | {title:Examples} 79 | 80 | {p 4 8}{inp:. use data1}{p_end} 81 | 82 | {p 4 8}{inp:. sedecomposition using data2, welf1(expnd_1) welf2(expend_2) pline1(2000) pline2(2200) pg}{p_end} 83 | 84 | {p 4 8}{inp:. sedecomposition using data2 if region==1 [w=tot_weight], welf1(expend_1) welf2(expend_2) pline1(pl1) pline2(pl2)}{p_end} 85 | 86 | {title:Also see} 87 | 88 | {p 1 14} Reference: {hi: Monica Huppi and Martin Ravallion (1991) "The Sectoral Structure of Poverty During an Adjustment Period. Evidence for Indonesia in the Mid-1980s", {it:World Development} 19: 1653-1678} 89 | 90 | {title:Authors} 91 | 92 | {p 4 4 2} Michael Lokshin, DECRG, The World Bank. If you observe any problems 93 | {browse "mailto:mlokshin@worldbank.org"}. 94 | 95 | {p 4 4} Martin Ravallion, DECRG, The World Bank. 96 | 97 | -------------------------------------------------------------------------------- /stata.toc: -------------------------------------------------------------------------------- 1 | v 3 2 | 3 | p bestreg Best variable subset selection 4 | p bioprobit Bivariate ordered probit regression 5 | p egen_inequal Extension functions for -egen- command to generate inequality and poverty measures 6 | p elmo Sub-group inequality decomposition 7 | p fastgini Gini coefficient with jackknife standard errors 8 | p gicurve Growth incidence curve a measure of the rate of pro-poor growth 9 | p gidecomposition Growth and inequality decomposition 10 | p gconc Generalized measures of concentration 11 | p googletrans Translate strings using Google Translate web API 12 | p lookfor_all Search for variables/patterns in .dta files 13 | p movestay Endogenous switching regression 14 | p pov_robust Poverty stochastic dominance analysis 15 | p ppreg Efficient estimation of regressions based on pseudo-panel data 16 | p sedecomposition Sectoral decomposition of poverty changes 17 | p switch_probit Binary choice models with binary endogenous regressors 18 | p xml_tab Print-ready tables in Stata -------------------------------------------------------------------------------- /switch_probit/switch_probit.ado: -------------------------------------------------------------------------------- 1 | *! version 1.0 14Jan2009 M. Lokshin, Z. Sajaia 2 | 3 | # delimit ; 4 | 5 | program define switch_probit, properties(ml_score); 6 | version 8.1; 7 | if replay() {; 8 | if ("`e(cmd)'" != "switch_probit") error 301; 9 | switch_probit_replay `0'; 10 | }; 11 | else switch_probit_mx `0'; 12 | 13 | end; // program switch_probit 14 | 15 | program define switch_probit_replay; 16 | syntax [,Level(cilevel)]; 17 | 18 | local rho1 diparm(athrho1, tanh label("rho1")); 19 | local rho0 diparm(athrho0, tanh label("rho0")); 20 | 21 | _coef_table_header; 22 | display; 23 | _coef_table, level(`level') `rho1' `rho0' notest; 24 | 25 | if "`e(vcetype)'" != "Robust" local testtyp LR; 26 | else local testtyp Wald; 27 | display as text "`testtyp' test of indep. eqns. (rho1=rho0=0):" 28 | _col(38) "chi2(" as result "2" as text ") = " 29 | as result %8.2f e(chi2_c) 30 | _col(59) as text "Prob > chi2 = " as result %6.4f e(p_c); 31 | display in smcl in green "{hline 78}"; 32 | _prefix_footnote; 33 | exit e(rc); 34 | end; // program switch_probit_replay 35 | 36 | program define switch_probit_mx, eclass; 37 | 38 | syntax anything(id="equations id" equalok) [pweight iweight fweight] 39 | [if] [in], SELect(string) [CLuster(varname) Robust noSKIP noCONstant noLOg CRITTYPE(passthru) 40 | offset_s(varname) offset1(varname) offset0(varname) *]; 41 | mlopts mlopts, `options'; 42 | local coll `s(collinear)'; 43 | 44 | // read vector of parameters 45 | 46 | gettoken prob1 prob0_ : anything, match(parns) bind; 47 | if ("`parns'"!="(") {; 48 | local prob1 "`anything'"; 49 | local prob0_; 50 | }; 51 | tokenize "`prob1'", parse("()"); 52 | if ("`1'"!="`prob1'") {; 53 | display as error "If more than one, equations must be enclosed in brackets"; 54 | exit 198; 55 | }; 56 | 57 | tokenize "`prob1'", parse("="); 58 | if ("`2'"!="=") {; 59 | tokenize "`prob1'"; 60 | local y_prob1 `1'; 61 | macro shift; 62 | local x_prob1 `*'; 63 | }; 64 | else {; // else_63 65 | if ("`4'"=="=") {; 66 | display as error "If more than one, equations must be enclosed in brackets"; 67 | exit 198; 68 | }; 69 | local y_prob1 `1'; 70 | local x_prob1 `3'; 71 | }; // else_63 72 | unab x_prob1 : `x_prob1', min(0); 73 | local l_y1 "`y_prob1'"; 74 | 75 | if ("`prob0_'"!="") {; // if_64 76 | gettoken prob0 : prob0_ , match(parns) bind; 77 | if ("`parns'"!="(") local prob0 "`prob0_'"; 78 | tokenize "`prob0'", parse("="); 79 | if ("`2'"!="=") {; // if_66 80 | tokenize "`prob0'"; 81 | local y_prob0 `1'; 82 | macro shift; 83 | local x_prob0 `*'; 84 | }; // if_66 85 | else {; 86 | if ("`4'"=="=") {; 87 | display as error "If more than one, equations must be enclosed in brackets"; 88 | exit 198; 89 | }; 90 | local y_prob0 `1'; local x_prob0 `3'; 91 | }; // else 92 | local l_y0 "`y_prob0'"; 93 | }; // if_64 94 | else {; 95 | local y_prob0 `y_prob1'; 96 | local x_prob0 `x_prob1'; 97 | 98 | local l_y1 "`y_prob1'_1"; 99 | local l_y0 "`y_prob1'_0"; 100 | }; // else 101 | unab x_prob0 : `x_prob0', min(0); 102 | 103 | tokenize "`select'", parse("="); // define vars for regression equation 104 | if ("`2'"!="=") {; 105 | tokenize "`select'"; 106 | local y_sel `1'; 107 | macro shift; 108 | local x_sel `*'; 109 | }; 110 | else {; 111 | local y_sel `1'; 112 | local x_sel `3'; 113 | }; 114 | unab x_sel : `x_sel', name("selection equation"); 115 | 116 | 117 | if ("`weight'" == "pweight" | "`cluster'" != "") {; 118 | local robust "robust"; 119 | if missing(`"`crittyp'"') local crtype crittype("log pseudolikelihood"); 120 | }; 121 | else local crtype `crittyp'; 122 | if ~missing("`cluster'") local clopt "cluster(`cluster')"; 123 | if ~missing("`weight'") local wgt `"[`weight'`exp']"'; 124 | if ~missing("`log'") local quietly "quietly"; 125 | if ~missing("`offset_s'") local offo_s "offset(`offset_s')"; 126 | if ~missing("`offset1'") local offo1 "offset(`offset1')"; 127 | if ~missing("`offset0'") local offo0 "offset(`offset0')"; 128 | 129 | // Drop observations with missing values 130 | marksample touse; 131 | markout `touse' `y_sel' `x_sel' `cluster' `offset_s', strok; 132 | 133 | quietly {; 134 | tabulate `y_sel' if `touse'; 135 | if (r(r) != 2) {; 136 | noisily display as error "`y_sel' must have exactly two distinct values"; 137 | exit 2000; 138 | }; 139 | 140 | tempvar touse1 touse0; 141 | generate byte `touse1'=`touse'; 142 | generate byte `touse0'=`touse'; 143 | markout `touse1' `y_prob1' `x_prob1' `offset1'; 144 | markout `touse0' `y_prob0' `x_prob0' `offset0'; 145 | replace `touse' = cond(`y_sel'==1,`touse1', `touse0') if `touse'; 146 | 147 | // conditional collinearity 148 | forvalues i =0/1 {; 149 | tabulate `y_prob`i'' if `touse`i''; 150 | if (r(r) != 2) {; 151 | noisily display as error "`y_prob`i'' must have exactly two distinct values"; 152 | exit 2000; 153 | }; 154 | 155 | _rmdcoll `y_prob`i'' `x_prob`i'' if `touse' & `y_sel'==`i', `coll'; 156 | local result "`r(varlist)'"; 157 | local colls`i': list x_prob`i' - result; 158 | if ~missing("`colls`i''") {; 159 | noisily display as text "note: `colls`i'' dropped from the `i' equation due to collinearity"; 160 | local x_prob`i' `result'; 161 | }; 162 | }; // i 163 | _rmdcoll `y_sel' `x_sel' if `touse', `coll'; 164 | local result "`r(varlist)'"; 165 | local colls: list x_sel - result; 166 | noisily display _newline; 167 | if ~missing("`colls'") noisily display as text "note: `colls' dropped from the selection equation due to collinearity"; 168 | local x_sel `result'; 169 | 170 | }; // quietly 171 | tempname llc b0 TMP b0sel b00; 172 | tempvar nshaz1 nshaz0; 173 | 174 | `quietly' display in green _n "Fitting probit model for `y_sel'=1:"; 175 | `quietly' probit `y_prob1' `x_prob1' `wgt' if `touse' & `y_sel'==1, `constant' nocoef `crtype' `offo1'; 176 | scalar `llc' = e(ll); 177 | 178 | `quietly' display in green _n "Fitting probit model for `y_sel'=0:"; 179 | `quietly' probit `y_prob0' `x_prob0' `wgt' if `touse' & `y_sel'==0, `constant' nocoef `crtype' `offo0'; 180 | scalar `llc' = `llc' + e(ll); 181 | 182 | `quietly' display in green _n "Fitting selection model:"; 183 | `quietly' probit `y_sel' `x_sel' `wgt' if `touse', asis nocoef `crtype' `offo_s'; 184 | matrix `b0sel' = e(b); 185 | if missing("`robust'") {; 186 | scalar `llc' = `llc' + e(ll); 187 | `quietly' display in green _n "Comparison: log likelihood = " in yellow %10.0g `llc'; 188 | }; 189 | 190 | quietly predict double `nshaz1', xb; 191 | quietly generate double `nshaz0' = normd(`nshaz1') / (1-normprob(`nshaz1')); // sel = 0; 192 | quietly replace `nshaz1' = normd(`nshaz1') / normprob(`nshaz1'); // sel = 1; 193 | 194 | if missing("`constant'") {; 195 | tempname one; 196 | generate byte `one' = 1; 197 | }; 198 | 199 | if ~missing("`skip'") {; 200 | `quietly' display in green _n "Fitting constant-only starting values:"; 201 | `quietly' probit `y_prob1' `one' `nshaz1' `wgt' if `touse' & `y_sel'==1, noconstant nocoef asis `crtype' `offo1'; 202 | 203 | matrix `TMP' = e(b); 204 | local k = colsof(`TMP'); 205 | 206 | tempname rho athrho0 athrho1; 207 | 208 | scalar `rho' = _b[`nshaz1']; 209 | scalar `rho' = max(min(`rho',.85), -.85); 210 | scalar `athrho1' = atanh(`rho'); 211 | 212 | matrix `b00' = `b0sel' , `TMP'[1,1..`k'-1]; 213 | 214 | `quietly' probit `y_prob0' `one' `nshaz0' `wgt' if `touse' & `y_sel'==0, noconstant nocoef asis `crtype' `offo0'; 215 | 216 | matrix `TMP' = e(b); 217 | local k = colsof(`TMP'); 218 | 219 | scalar `rho' = _b[`nshaz0']; 220 | scalar `rho' = max(min(`rho',.85), -.85); 221 | scalar `athrho0' = -atanh(`rho'); 222 | 223 | matrix `b00' = `b00' , `TMP'[1,1..`k'-1], `athrho1', `athrho0'; 224 | local from0 "`b00', copy"; 225 | }; 226 | 227 | `quietly' display in green _n "Fitting starting values:"; 228 | `quietly' probit `y_prob1' `x_prob1' `one' `nshaz1' `wgt' if `touse' & `y_sel'==1, noconstant nocoef asis `crtype' `offo1'; 229 | 230 | matrix `TMP' = e(b); 231 | local k = colsof(`TMP'); 232 | 233 | tempname rho athrho1 athrho0; 234 | 235 | scalar `rho' = _b[`nshaz1']; 236 | scalar `rho' = max(min(`rho',.85), -.85); 237 | scalar `athrho1' = atanh(`rho'); 238 | 239 | matrix `b0' = `b0sel' , `TMP'[1,1..`k'-1]; 240 | 241 | `quietly' probit `y_prob0' `x_prob0' `one' `nshaz0' `wgt' if `touse' & `y_sel'==0, noconstant nocoef asis `crtype' `offo0'; 242 | 243 | matrix `TMP' = e(b); 244 | local k = colsof(`TMP'); 245 | 246 | scalar `rho' = _b[`nshaz0']; 247 | scalar `rho' = max(min(`rho',.85), -.85); 248 | scalar `athrho0' = -atanh(`rho'); 249 | 250 | matrix `b0' = `b0' , `TMP'[1,1..`k'-1], `athrho1', `athrho0'; 251 | local from "`b0', copy"; 252 | 253 | if ~missing("`skip'") {; 254 | `quietly' display in green _n "Fitting constant-only model:"; 255 | 256 | capture noisily ml model d2 switch_probit_d2 257 | (`y_sel' : `y_sel' = `x_sel', `offo_s') 258 | (`l_y1' : `y_prob1' =, `offo1') 259 | (`l_y0' : `y_prob0' =, `offo0') 260 | /athrho1 261 | /athrho0 262 | `wgt' if `touse' , waldtest(0) 263 | collinear missing maximize nooutput nopreserve 264 | init(`from0') search(off) `log' `mlopts' `crittyp' 265 | nocnsnotes; 266 | if _rc == 1400 & "`from'" == "`b0', copy" {; 267 | display as text "note: default initial values infeasible; starting from B=0"; 268 | 269 | ml model d2 switch_probit_d2 270 | (`y_sel' : `y_sel' = `x_sel', `offo_s') 271 | (`l_y1' : `y_prob1' =, `offo1') 272 | (`l_y0' : `y_prob0' =, `offo0') 273 | /athrho1 274 | /athrho0 275 | `wgt' if `touse' , waldtest(0) 276 | collinear missing maximize nooutput nopreserve difficult 277 | init(/athrho1=0 /athrho0=0) search(off) `log' `mlopts' `crittyp' nocnsnotes; 278 | }; 279 | else if _rc {; 280 | error _rc; 281 | }; 282 | local continu "continue"; 283 | }; 284 | 285 | global counter=0; 286 | 287 | `quietly' display in green _n "Fitting full model:"; 288 | 289 | capture noisily ml model d2 switch_probit_d2 290 | (`y_sel' : `y_sel' = `x_sel', `offo_s') 291 | (`l_y1' : `y_prob1' = `x_prob1', `offo1') 292 | (`l_y0' : `y_prob0' = `x_prob0', `offo0') 293 | /athrho1 294 | /athrho0 295 | if `touse' `wgt', 296 | collinear missing maximize nooutput nopreserve difficult 297 | title("Switching probit model") 298 | init(`from') search(off) `continu' `log' `mlopts' `crittyp' `robust' `clopt'; 299 | 300 | if _rc == 1400 & "`from'" == "`b0', copy" {; 301 | display as text "note: default initial values infeasible; starting from B=0"; 302 | 303 | ml model d2 switch_probit_d2 304 | (`y_sel' : `y_sel' = `x_sel', `offo_s') 305 | (`l_y1' : `y_prob1' = `x_prob1', `offo1') 306 | (`l_y0' : `y_prob0' = `x_prob0', `offo0') 307 | /athrho1 308 | /athrho0 309 | `wgt' 310 | if `touse' 311 | , 312 | title("Switching probit model") 313 | difficult collinear missing maximize nooutput nopreserve 314 | init(/athrho1=0 /athrho0=0) search(off) `clopt' `continu' `robust' `mlopts' `log'; 315 | 316 | }; 317 | else if _rc error _rc; 318 | 319 | ereturn scalar k_aux = 2; 320 | 321 | if missing("`robust'") {; // test of independent equations 322 | ereturn scalar ll_c = `llc'; 323 | ereturn scalar chi2_c = abs(-2*(e(ll)-e(ll_c))); 324 | ereturn local chi2_ct "LR"; 325 | }; 326 | else {; 327 | quietly test [athrho1]_cons = [athrho0]_cons = 0; 328 | ereturn scalar chi2_c = r(chi2); 329 | ereturn local chi2_ct "Wald"; 330 | }; 331 | ereturn scalar p_c = chiprob(2, e(chi2_c)); 332 | 333 | ereturn scalar rho1 = tanh([athrho1]_b[_cons]); 334 | ereturn scalar rho0 = tanh([athrho0]_b[_cons]); 335 | 336 | ereturn local marginsok "zb xb1 xb0 p11 p01 p10 p00 psel pcond1 pcond0 te"; 337 | ereturn local marginsnotok "stdp1 stdp0 mte"; // tt tu 338 | ereturn local depvar "`y_sel' `y_prob1' `y_prob0'"; 339 | ereturn local predict "switch_probit_p"; 340 | ereturn local cmd "switch_probit"; 341 | 342 | switch_probit_replay; 343 | 344 | end; // program switch_probit_mx 345 | 346 | -------------------------------------------------------------------------------- /switch_probit/switch_probit_d2.ado: -------------------------------------------------------------------------------- 1 | #delimit ; 2 | 3 | program define switch_probit_d2; 4 | args todo b lnf g negH g1 g2 g3 g4 g5; 5 | 6 | tempname theta1 theta0 zb xb1 xb0; 7 | tempvar lnf1; 8 | 9 | mleval `zb' =`b', eq(1); 10 | mleval `xb1' =`b', eq(2); 11 | mleval `xb0' =`b', eq(3); 12 | mleval `theta1' =`b', eq(4) scalar; 13 | mleval `theta0' =`b', eq(5) scalar; 14 | 15 | tempname rho1 rho0; 16 | 17 | scalar `rho1' = tanh(`theta1'); 18 | scalar `rho0' = tanh(`theta0'); 19 | 20 | quietly {; 21 | 22 | tempvar qseli qi xb rhoi; 23 | generate byte `qseli' = (2*$ML_y1-1); 24 | generate byte `qi' = (2*$ML_y2-1) if $ML_y1==1; 25 | replace `qi' = (2*$ML_y3-1) if $ML_y1==0; 26 | 27 | generate double `xb' = `xb1' if $ML_y1==1; 28 | replace `xb' = `xb0' if $ML_y1==0; 29 | 30 | generate double `rhoi' = `qseli'*`qi'*`rho1' if $ML_y1==1; 31 | replace `rhoi' = `qseli'*`qi'*`rho0' if $ML_y1==0; 32 | 33 | generate double `lnf1' = binorm(`qseli'*`zb', `qi'*`xb', `rhoi'); 34 | 35 | mlsum `lnf' = log(`lnf1'); 36 | 37 | if (`todo'==0 | `lnf'==.) exit; 38 | 39 | tempvar di w1i w2i g1i g2i phi2; 40 | 41 | generate double `di' = 1/(sqrt(1-`rhoi'^2)); 42 | generate double `w1i' = `qseli'*`zb'; 43 | generate double `w2i' = `qi' *`xb'; 44 | generate double `g1i' = normden(`w1i')*norm(`di' * (`w2i' - `rhoi'*`w1i')); 45 | generate double `g2i' = normden(`w2i')*norm(`di' * (`w1i' - `rhoi'*`w2i')); 46 | generate double `phi2' = exp(-0.5*(`w1i'^2+`w2i'^2-2*`rhoi'*`w1i'*`w2i')/(1-`rhoi'^2))/(2*_pi*sqrt(1-`rhoi'^2)); 47 | 48 | tempname drho1_dtheta1 drho0_dtheta0; 49 | scalar `drho1_dtheta1' = 4*exp(2*`theta1')/((1+exp(2*`theta1'))^2); 50 | scalar `drho0_dtheta0' = 4*exp(2*`theta0')/((1+exp(2*`theta0'))^2); 51 | 52 | // scores 53 | replace `g1' = `qseli'*`g1i'/`lnf1'; 54 | replace `g2' = `qi' *`g2i'/`lnf1' *($ML_y1==1); 55 | replace `g3' = `qi' *`g2i'/`lnf1' *($ML_y1==0); 56 | replace `g4' = `qi' *`phi2'/`lnf1'*`drho1_dtheta1'*($ML_y1==1); 57 | replace `g5' = -`qi' *`phi2'/`lnf1'*`drho0_dtheta0'*($ML_y1==0); 58 | 59 | tempname d1 d2 d3 d4 d5; 60 | 61 | mlvecsum `lnf' `d1' = `g1', eq(1); 62 | mlvecsum `lnf' `d2' = `g2', eq(2); 63 | mlvecsum `lnf' `d3' = `g3', eq(3); 64 | mlvecsum `lnf' `d4' = `g4', eq(4); 65 | mlvecsum `lnf' `d5' = `g5', eq(5); 66 | 67 | matrix `g' = (`d1', `d2', `d3', `d4', `d5'); 68 | 69 | if (`todo'==1 | `lnf'==. ) exit; 70 | 71 | tempname d2rho1_dtheta12 d2rho2_dtheta22; 72 | 73 | scalar `d2rho1_dtheta12' = 8*exp(2*`theta1')*(1-exp(2*`theta1'))/(exp(2*`theta1')+1)^3; 74 | scalar `d2rho2_dtheta22' = 8*exp(2*`theta0')*(1-exp(2*`theta0'))/(exp(2*`theta0')+1)^3; 75 | 76 | tempname d1d1 d1d2 d1d3 d1d4 d1d5 77 | d2d2 d2d3 d2d4 d2d5 78 | d3d3 d3d4 d3d5 79 | d4d4 d4d5 80 | d5d5; 81 | 82 | 83 | mlmatsum `lnf' `d1d1' = (`w1i'*`g1i'+`rhoi'*`phi2'+`g1i'^2/`lnf1')/`lnf1', eq(1); 84 | mlmatsum `lnf' `d1d2' = (`qseli'*`qi')/`lnf1'*(`g1i'*`g2i'/`lnf1'-`phi2') if $ML_y1==1, eq(1, 2); 85 | mlmatsum `lnf' `d1d3' = (`qseli'*`qi')/`lnf1'*(`g1i'*`g2i'/`lnf1'-`phi2') if $ML_y1==0, eq(1, 3); 86 | mlmatsum `lnf' `d1d4' = `qi'*`phi2'/`lnf1'*`drho1_dtheta1'* 87 | (-`rhoi'*`di'^2*(`w2i'-`rhoi'*`w1i')+`w1i'+`g1i'/`lnf1') if $ML_y1==1, eq(1, 4); 88 | 89 | mlmatsum `lnf' `d1d5' = `qi'*`phi2'/`lnf1'*`drho0_dtheta0'* 90 | (-`rhoi'*`di'^2*(`w2i'-`rhoi'*`w1i')+`w1i'+`g1i'/`lnf1') if $ML_y1==0, eq(1, 5); 91 | 92 | mlmatsum `lnf' `d2d2' = (`w2i'*`g2i'+`rhoi'*`phi2'+`g2i'^2/`lnf1')/`lnf1' if $ML_y1==1, eq(2); 93 | mlmatsum `lnf' `d2d3' = 0, eq(2, 3); 94 | mlmatsum `lnf' `d2d4' = `qseli'*`phi2'/`lnf1'*`drho1_dtheta1'* 95 | (-`rhoi'*`di'^2*(`w1i'-`rhoi'*`w2i')+`w2i'+`g2i'/`lnf1') if $ML_y1==1, eq(2, 4); 96 | mlmatsum `lnf' `d2d5' = 0, eq(2, 5); 97 | 98 | mlmatsum `lnf' `d3d3' = (`w2i'*`g2i'+`rhoi'*`phi2'+`g2i'^2/`lnf1')/`lnf1' if $ML_y1==0, eq(3); 99 | mlmatsum `lnf' `d3d4' = 0, eq(3, 4); 100 | mlmatsum `lnf' `d3d5' = `qseli'*`phi2'/`lnf1'*`drho0_dtheta0'* 101 | (-`rhoi'*`di'^2*(`w1i'-`rhoi'*`w2i')+`w2i'+`g2i'/`lnf1') if $ML_y1==0, eq(3, 5); 102 | 103 | mlmatsum `lnf' `d4d4' = (`phi2'/`lnf1')*(`di'^2*`rhoi'*(-1+`di'^2*(`w1i'^2+`w2i'^2-2*`rhoi'*`w1i'*`w2i'))- 104 | `di'^2*`w1i'*`w2i'+`phi2'/`lnf1')*((`drho1_dtheta1')^2)-`qseli'*`qi'*`phi2'/`lnf1'*`d2rho1_dtheta12' if $ML_y1==1, eq(4); 105 | mlmatsum `lnf' `d4d5' = 0, eq(4, 5); 106 | 107 | mlmatsum `lnf' `d5d5' = (`phi2'/`lnf1')*(`di'^2*`rhoi'*(-1+`di'^2*(`w1i'^2+`w2i'^2-2*`rhoi'*`w1i'*`w2i'))- 108 | `di'^2*`w1i'*`w2i'+`phi2'/`lnf1')*((`drho0_dtheta0')^2)-`qseli'*`qi'*`phi2'/`lnf1'*`d2rho2_dtheta22' if $ML_y1==0, eq(5); 109 | 110 | matrix `negH' = (`d1d1', `d1d2', `d1d3', `d1d4', `d1d5' \ 111 | `d1d2'', `d2d2', `d2d3', `d2d4', `d2d5' \ 112 | `d1d3'', `d2d3'', `d3d3', `d3d4', `d3d5' \ 113 | `d1d4'', `d2d4'', `d3d4'', `d4d4', `d4d5' \ 114 | `d1d5'', `d2d5'', `d3d5'', `d4d5'', `d5d5' ); 115 | }; // quietly 116 | end; 117 | 118 | -------------------------------------------------------------------------------- /switch_probit/switch_probit_example.do: -------------------------------------------------------------------------------- 1 | preserve 2 | use switch_probit_example, clear 3 | switch_probit works age age2 wedu_2-wedu_5 hhsize hhsize2 reg_*, select(migrant age age2 wedu_2-wedu_5 hhsize hhsize2 reg_* pmigrants) 4 | predict tt, tt 5 | summarize tt if (migrant == 1) 6 | restore 7 | -------------------------------------------------------------------------------- /switch_probit/switch_probit_example.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vavalomi/stata_tools/67b6c92bfdbc5c81cf4b2d859d40bb901ec6c96f/switch_probit/switch_probit_example.dta -------------------------------------------------------------------------------- /switch_probit/switch_probit_p.ado: -------------------------------------------------------------------------------- 1 | #delimit ; 2 | 3 | program define switch_probit_p; 4 | version 8.1; 5 | 6 | quietly {; 7 | //stolen from bipr_p.ado 8 | syntax [anything] [if] [in] [, SCores * ]; 9 | if ~missing(`"`scores'"') {; 10 | ml_score `0'; 11 | exit; 12 | }; 13 | 14 | local myopts ZB XB1 XB0 P11 P01 P10 P00 PCOND1 PCOND0 PSEL STDP1 STDP0 TT TE TU MTE; 15 | 16 | _pred_se "`myopts'" `0'; 17 | if (`s(done)') exit; 18 | local vtyp `s(typ)'; 19 | local varn `s(varn)'; 20 | local 0 `"`s(rest)'"'; 21 | 22 | syntax [if] [in] [, `myopts' noOFFset]; 23 | 24 | local type `zb'`xb1'`xb0'`p11'`p10'`p01'`p00'`pcond1'`pcond0'`psel'`stdp1'`stdp0'`tt'`te'`tu' `mte'; 25 | 26 | marksample touse; 27 | 28 | local y_sel : word 1 of `e(depvar)'; 29 | local y_1 : word 2 of `e(depvar)'; 30 | local y_0 : word 3 of `e(depvar)'; 31 | 32 | if e(df_m) != 0 {; 33 | tempvar tzb txb1 txb0; 34 | _predict double `tzb' if `touse', eq(#1) `offset'; 35 | _predict double `txb1' if `touse', eq(#2) `offset'; 36 | _predict double `txb0' if `touse', eq(#3) `offset'; 37 | }; 38 | else {; 39 | if (~missing("`e(offset1)'") & missing("`offset1'")) local txb1 `e(offset1)'; 40 | else local txb1 0; 41 | if (~missing("`e(offset0)'") & missing("`offset0'")) local txb0 `e(offset0)'; 42 | else local txb0 0; 43 | }; // if 44 | tempvar psel; 45 | generate double `psel' = normal(`tzb'); 46 | replace `psel' = epsdouble() if `psel' == 0; 47 | replace `psel' = 1 - epsdouble() if `psel' == 1; 48 | 49 | tempname rho1 rho0; 50 | scalar `rho1' = tanh([athrho1]_b[_cons]); 51 | scalar `rho0' = tanh([athrho0]_b[_cons]); 52 | 53 | if (missing("`type'") | "`type'" == "p11") {; 54 | if missing("`type'") noisily display as text "(option p11 assumed; Pr(`y_sel'=1, `y_1'=1)"; 55 | 56 | generate `vtyp' `varn' = binorm(`tzb',`txb1', `rho1'); 57 | label variable `varn' "Pr(`y_sel'=1,`y_1'=1)"; 58 | exit; 59 | }; 60 | if "`type'"=="p10" {; 61 | generate `vtyp' `varn' = binorm(`tzb',-`txb1', -`rho1'); 62 | label variable `varn' "Pr(`y_sel'=1,`y_1'=0)"; 63 | exit; 64 | }; 65 | 66 | if "`type'"=="p01" {; 67 | generate `vtyp' `varn' = binorm(-`tzb', `txb0', -`rho0'); 68 | label variable `varn' "Pr(`y_sel'=0,`y_0'=1)"; 69 | exit; 70 | }; 71 | if "`type'"=="p00" {; 72 | generate `vtyp' `varn' = binorm(-`tzb',-`txb0', `rho0'); 73 | label variable `varn' "Pr(`y_sel'=0,`y_0'=0)"; 74 | exit; 75 | }; 76 | if "`type'" == "pcond1" {; 77 | generate `vtyp' `varn' = binorm(`tzb', `txb1', `rho1')/ `psel'; 78 | label variable `varn' "Pr(`y_1'=1 | y_sel'=1)"; 79 | exit; 80 | }; 81 | if "`type'" == "pcond0" {; 82 | generate `vtyp' `varn' = binorm(-`tzb', `txb0', -`rho0')/(1-`psel'); 83 | label variable `varn' "Pr(`y_0'=1 | y_sel'=0)"; 84 | exit; 85 | }; 86 | if "`type'" == "psel" {; 87 | generate `vtyp' `varn' = `psel'; 88 | label variable `varn' "Pr(`y_sel'=1)"; 89 | exit; 90 | }; 91 | if ("`type'" == "zb") {; 92 | generate `vtyp' `varn' = `tzb'; 93 | label variable `varn' "linear prediction of `y_sel'"; 94 | exit; 95 | }; 96 | if ("`type'" == "xb1") {; 97 | generate `vtyp' `varn' = `txb1'; 98 | label variable `varn' "linear prediction of `y_1'"; 99 | exit; 100 | }; 101 | if ("`type'" == "xb0") {; 102 | generate `vtyp' `varn' = `txb0'; 103 | label variable `varn' "linear prediction of `y_0'"; 104 | exit; 105 | }; 106 | if ("`type'" == "stdpsel") {; 107 | _predict `vtyp' `varn', stdp eq(#1) `offset', if `touse'; 108 | label variable `varn' "S.E. of prediction of `y_sel'"; 109 | exit; 110 | }; 111 | if ("`type'" == "stdp1") {; 112 | _predict `vtyp' `varn', stdp eq(#2) `offset', if `touse'; 113 | label variable `varn' "S.E. of prediction of `y_1'"; 114 | exit; 115 | }; 116 | if ("`type'" == "stdp0") {; 117 | _predict `vtyp' `varn', stdp eq(#3) `offset', if `touse'; 118 | label variable `varn' "S.E. of prediction of `y_0'"; 119 | exit; 120 | }; 121 | if ("`type'" == "tt") {; 122 | generate `vtyp' `varn' = (binorm(`tzb',`txb1', `rho1')-binorm(`tzb',`txb0', `rho0'))/`psel' if `touse' & `y_sel' == 1; 123 | label variable `varn' "tt"; 124 | exit; 125 | }; 126 | if ("`type'" == "tu") {; 127 | generate `vtyp' `varn' = (binorm(-`tzb',`txb1',-`rho1')-binorm(-`tzb',`txb0',-`rho0'))/(1-`psel') if `touse' & `y_sel' == 0; 128 | label variable `varn' "tu"; 129 | exit; 130 | }; 131 | if ("`type'" == "te") {; 132 | generate `vtyp' `varn' = normal(`txb1')-normal(`txb0') if `touse'; 133 | label variable `varn' "te"; 134 | exit; 135 | }; 136 | if ("`type'" == "mte") {; 137 | local select : word 1 of `=e(depvar)'; 138 | summarize `txb1' if (`y_sel' == 1), meanonly; local mean_x1 = r(mean); 139 | summarize `txb0' if (`y_sel' == 0), meanonly; local mean_x0 = r(mean); 140 | generate ___grid = _n/100 in 1/100; 141 | 142 | generate `vtyp' `varn' = normal((`mean_x1'+`rho1'*(-___grid))/sqrt(1-(`rho1')^2) ) - 143 | normal((`mean_x0'+(`rho0')*(-___grid))/sqrt(1-(`rho0')^2) ) 144 | if `touse' in 1/100; 145 | label variable `varn' "mte"; 146 | exit; 147 | }; 148 | display as error "unknown statistic"; 149 | exit 198; 150 | }; // quietly 151 | end; 152 | -------------------------------------------------------------------------------- /xml_tab/xml_tab.pkg: -------------------------------------------------------------------------------- 1 | v 3 2 | d 'XML_TAB': module to save results in Excel XML format 3 | 4 | d xml_tab saves Stata output directly into XML file that could be opened with 5 | Microsoft Excel or OpenOffice Calc. The program is relatively flexible and produces 6 | print-ready tables in Excel or Calc. xml_tab allows users to apply different formats 7 | to the elements of the output table and essentially do everything MS Excel or OO Calc 8 | can do in terms of formatting from within Stata. 9 | d 10 | d KW: XML 11 | d KW: results 12 | d KW: output 13 | d KW: Excel 14 | d KW: OpenOffice 15 | d 16 | d Requires: Stata version 8.0 17 | d 18 | d Distribution-Date: 4Jun2008 19 | d 20 | d Author: Michael Lokshin, World Bank 21 | d Support: email Mlokshin@worldbank.org 22 | d 23 | d Author: Zurab Sajaia, World Bank 24 | d Support: email zsajaia@worldbank.org 25 | 26 | f xml_tab.ado 27 | f xml_tab.hlp 28 | f xml_tab_example.do 29 | f xml_tab_example.dta 30 | -------------------------------------------------------------------------------- /xml_tab/xml_tab_example.do: -------------------------------------------------------------------------------- 1 | #delim ; 2 | 3 | capture program drop table1; 4 | program define table1; 5 | quietly {; // quietly 6 | use xml_tab_example, clear; 7 | 8 | local dstr ulog_wage mother_smoked father_smoked 9 | age married divorced widowed relig_* educat_* speaks_* 10 | lhhsize s_* urban; 11 | 12 | tabstat `dstr' if ufilter, by(smokes) s(mean sd) save notot; 13 | matrix SMOKERS = r(Stat2); 14 | matrix NSMOKERS = r(Stat1); 15 | matrix TAB = SMOKERS',NSMOKERS'; 16 | 17 | xml_tab TAB, format(S2103 N2203 N2123) sheet(Table1) noi 18 | rblank(age "Marital status" S2123, 19 | widowed "Religion" S2123, 20 | relig_5 "Education" S2123, 21 | hours_pweek "Subjective health assesment" S2123 22 | ) 23 | replace 24 | ; 25 | 26 | }; // end quietly 27 | end; // program table1 28 | 29 | /*******************************************************************/ 30 | /*******************************************************************/ 31 | capture program drop table2; 32 | program define table2; 33 | quietly {; 34 | use http://siteresources.worldbank.org/INTPOVRES/Resources/smoking, clear; 35 | keep if (ufilter); 36 | local dstr $ind $edu $hh $dst; 37 | reg ulog_wage smokes `dstr' if (ufilter); estimates store ols; 38 | reg smoke mother_sm father_sm `dstr' if (ufilter); estimates store _2sls_1; 39 | ivreg ulog_wage `dstr' (smokes= mother_sm father_sm) if (ufilter), first; estimates store _2sls_2; 40 | 41 | noi xml_tab ols _2sls_1 _2sls_2, 42 | rblank(age3 "Marital Status" S2123, 43 | widowed "Religion" S2123, 44 | relig_6 "Education" S2123, 45 | lhours_pweek "HH Characteristics" S2123) 46 | drop(dist_* mother_e* father_e*) 47 | style(s1) 48 | sheet(Table 2) 49 | title(2SLS estimation) append 50 | ; 51 | }; // end quietly 52 | end; // program table2 53 | 54 | preserve; 55 | clear; 56 | set more off; 57 | set matsize 400; 58 | set mem 10m; 59 | 60 | global ind age age2 age3 married divorced widowed relig_2-relig_5; 61 | global edu educat_2-educat_6 speaks_* lhours_pweek mother_edu_2-mother_edu_6 father_edu_2-father_edu_6; 62 | global hh lhhsize* urban s_*; 63 | global dst dist_2-dist_15 dist_17-dist_36; 64 | 65 | // Descriptive statistics 66 | table1; 67 | // OLS and 2SLS 68 | table2; 69 | 70 | restore; 71 | display _n; 72 | display as text "type {help xml_tab:help xml_tab} for more examples" 73 | --------------------------------------------------------------------------------