├── .gitignore ├── lmhtexp.mlib ├── stata11 └── lmhtexp11.mlib ├── stata13 └── lmhtexp13.mlib ├── mhtexp_examples.do ├── README.md ├── mhtexp.ado ├── matlab ├── mhtexp_examples.m └── mhtexp.m └── mhtexp.sthlp /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *~ 3 | -------------------------------------------------------------------------------- /lmhtexp.mlib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seidelj/mht/HEAD/lmhtexp.mlib -------------------------------------------------------------------------------- /stata11/lmhtexp11.mlib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seidelj/mht/HEAD/stata11/lmhtexp11.mlib -------------------------------------------------------------------------------- /stata13/lmhtexp13.mlib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seidelj/mht/HEAD/stata13/lmhtexp13.mlib -------------------------------------------------------------------------------- /mhtexp_examples.do: -------------------------------------------------------------------------------- 1 | clear all 2 | insheet using data.csv, comma names 3 | //Creating outcome variable 4 | gen amountmat = amount * (1+ratio) 5 | gen groupid = (redcty==1 & red0 == 1) + (redcty==0 & red0 == 1)*2 + (redcty==0 & red0 == 0)*3 + (redcty==1 & red0 == 0)*4 6 | replace groupid = . if groupid == 0 7 | 8 | mata: mata mlib index 9 | 10 | // help mhtexp 11 | 12 | //Example 1: Hypothesis testing with multiple outcomes: 13 | // We consider four outcome variables: response rate, dollars given not including 14 | // match, dollars given including match, and amount change. 15 | mhtexp gave amount amountmat amountchange, treatment(treatment) 16 | 17 | //Example 2: Hypothesis testing with multiple subgroups: 18 | // We consider four subgroups: red county in a red state, blue county in a red state, 19 | // red county in a blue state, and blue county in a blue state. We focus on 20 | // the outcome response rate. 21 | mhtexp gave, treatment(treatment) subgroup(groupid) 22 | 23 | //Example 3: Hypothesis testing with multiple treatments: 24 | // We consider the three treatments for match ratio: 1:1, 2:1, and 3:1. We focus on the 25 | // outcome dollars given not including match. 26 | // Here we compare each treatment group to the control 27 | mhtexp amount, treatment(ratio) 28 | 29 | //Example 4: Hypothesis testing with multiple treatments (continued) 30 | // All pairwise comparisons among the treatment and control groups 31 | mhtexp amount, treatment(ratio) combo("pairwise") 32 | 33 | //Example 5: Hypothesis testing with multiple outcomes, subgroups, treatments: 34 | // We consider four outcome variables: response rate, dollars given not including 35 | // match, dollars given including match, and amount change. We also consider 36 | // four subgroups: red county in a red state, blue county in a red state, 37 | // red county in a blue state, and blue county in a blue state. Lastly, 38 | // we compare the control to the three treatments for matching ratio: 1:1, 2:1, and 3:1. 39 | mhtexp gave amount amountmat amountchange, subgroup(groupid) treatment(ratio) 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Multiple Hypothesis Testing 2 | Stata code for the procedure detailed in List, Shaikh, and Xu (2015) 3 | [link](https://ideas.repec.org/p/feb/artefa/00402.html) 4 | 5 | The software is made available as is, and no warranty - about the software, its performance or its conformity to any specification - is given or implied. 6 | 7 | ## Stata 14 Users 8 | You can install mhtexp using Stata's ssc command. 9 | ``` 10 | ssc install mhtexp 11 | ``` 12 | 13 | Once the command finishes, you should be able to use the command. A data set, data.csv, and an example do file, mhtexp_examples.do are available in this repository for download if you want to run an example. 14 | 15 | 16 | ## Manual Stata Install 17 | Download or clone this repository and make sure your current directory contains 18 | * mhtexp.ado -- this initializes the stata comand "mhtexp" for usage from the command line or .do file 19 | * lmhtexp.mlib -- the required mata functions that perform the computation 20 | * mhtexp.sthlp -- OPTIONAL but recommended. Usage: from stata command line: `help mhtexp` 21 | 22 | If it is your first time running the code, ensure that Stata knows to look in lmhtexp.mlib. From the Stata command line enter 23 | ``` 24 | mata: mata mlib index 25 | ``` 26 | 27 | 28 | See mhtexp_examples.do for usage example OR from stata terminal type `help mhtexp` 29 | 30 | 31 | #### Summary of contents 32 | 33 | * mhtexp_examples.do do file with examples using the included data set. 34 | * mhtexp.sthlp Stata14 help file 35 | * mhtexp.ado Contains the Stata command definition of mhtexp 36 | * data.csv contains the data set used in the List, Shaikh, and Xu (2015) 37 | * lmhtexp.mlib contains mata function required for the command; compiled using Stata14 38 | * stata11/lmhtexp11.mlib contains the mata functions required for the command; compiled using Stata11. 39 | 40 | #### For older versions of Stata (Stata11) 41 | Stata versions that are atleast Stata11 can still use this command. However, the bootstrap option is currently unavailable for older versions of Stata. To use this command with an older version of Stata, first replace lmhtexp.mlib with lmhtexp11.mlib and remove or comment out line 2 in mhtexp.ado. 42 | 43 | ``` 44 | ### Remove ### 45 | version 14 46 | ``` 47 | 48 | The key difference in these two files (outside of how they are compiled), is the way in which ids are selected for the bootstrap sample. 49 | 50 | In both cases the same random number generater is used to select random variates over [a, b]. While Stata14 has a built in method, Stata11 does not. 51 | ``` 52 | floor( (b-a+1) * runiform() + a) // in lmhtexp11.mlib (Stata11) 53 | runiformint(r, c, a, b) // in lmhtexp.mlib (Stata14) 54 | ``` 55 | The two methods both produce the desired result, but the matrix of IDs is slightly different accross these two methods. Therefore, the bootstrapped statistics used to generate the outputted p-values will not be identical to the results presented in List, Shaikh, Xu 2015. 56 | 57 | 58 | contact: seidelj@uchicago.edu 59 | 60 | -------------------------------------------------------------------------------- /mhtexp.ado: -------------------------------------------------------------------------------- 1 | program mhtexp 2 | version 14 3 | syntax varlist [if] [in], treatment(varlist) [ subgroup(varname) combo(string) exclude(name) only(name) bootstrap(integer 3000)] 4 | //args outcomes subgroupid treatment combo select 5 | 6 | if ("`combo'" != "" & "`combo'" != "pairwise" & "`combo'" != "treatmentcontrol"){ 7 | display "INVALID combo choose either pairwise or treatmentcontrol" 8 | error 9 | } 10 | 11 | 12 | if ("`exclude'" == "") mata: excludemat = (.,.,.) 13 | else mata: excludemat = `exclude' 14 | if ("`only'" == "") mata: onlymat = (.,.,.) 15 | else mata: onlymat = `only' 16 | 17 | mata: Y = buildY("`varlist'") 18 | mata: D = buildD("`treatment'") 19 | mata: sub = buildsub("`subgroup'", D) 20 | mata: sizes = buildsizes(Y, D, sub) 21 | mata: combo = buildcombo("`combo'", sizes[3]) 22 | mata: numpc = buildnumpc(combo) 23 | mata: select = buildselect(onlymat, excludemat, sizes[1], sizes[2], numpc) 24 | mata: results = seidelxu(Y, sub, D, combo, select, `bootstrap') 25 | mata: buildoutput("results", results) 26 | 27 | matlist results 28 | end 29 | 30 | mata: 31 | 32 | function buildY(string scalar outcomes){ 33 | Y = st_data(., tokens(outcomes)) 34 | return(Y) 35 | } 36 | function buildD(string scalar treatment){ 37 | D = st_data(., tokens(treatment)) 38 | return(D) 39 | } 40 | function buildsub(string scalar subgroup, real matrix D){ 41 | if (subgroup == ""){ 42 | sub = J(rows(D), 1,1) 43 | }else{ 44 | sub = st_data(., (subgroup)) 45 | } 46 | return(sub) 47 | } 48 | function buildsizes(real matrix Y, real matrix D, real matrix sub){ 49 | numoc = cols(Y) 50 | numsub = colnonmissing(uniqrows(sub)) 51 | numg = rows(uniqrows(D)) - 1 52 | 53 | return((numoc, numsub, numg)) 54 | } 55 | function buildcombo(string scalar strcombo, real scalar numg){ 56 | if (strcombo == "pairwise"){ 57 | combo = nchoosek((0::numg), 2) 58 | }else{ 59 | combo = (J(numg,1,0), (1::numg)) 60 | } 61 | return(combo) 62 | } 63 | function buildnumpc(real matrix combo){ 64 | return(rows(combo)) 65 | } 66 | function buildselect(real matrix only, real matrix exclude, real scalar numoc, real scalar numsub, real scalar numpc){ 67 | if (rownonmissing(only) != 0){ 68 | select = mdarray((numoc, numsub, numpc),0) 69 | for (r = 1; r <= rows(only); r++){ 70 | i = only[r, 1] 71 | j = only[r, 2] 72 | k = only[r, 3] 73 | put(1, select, (i,j,k)) 74 | } 75 | }else{ 76 | select = mdarray((numoc, numsub, numpc), 1) 77 | } 78 | if (rownonmissing(exclude) !=0){ 79 | for (r=1; r <= rows(exclude); r++){ 80 | i = exclude[r, 1] 81 | j = exclude[r, 2] 82 | k = exclude[r, 3] 83 | put(0, select, (i,j,k)) 84 | } 85 | } 86 | return(select) 87 | } 88 | 89 | end 90 | -------------------------------------------------------------------------------- /matlab/mhtexp_examples.m: -------------------------------------------------------------------------------- 1 | data = importdata('data.csv'); 2 | data = data.data; % read the dataset 3 | B=3000; % the number of simulated samples 4 | 5 | 6 | %% Hypothesis testing with multiple outcomes: 7 | 8 | % We consider four outcome variables: response rate, dollars given not including 9 | % match, dollars given including match, and amount change. 10 | amountmat = data(:,1).*(data(:,10)+ones(size(data,1),1)); % dollars raised per letter including match 11 | Y = [data(:,[12,1]) amountmat data(:,35)]; % the matrix of outcomes 12 | D = data(:,8); % the vector of treatment statuses 13 | sub = ones(size(D,1),1); % the subgroup ID's 14 | numoc = size(Y,2); % the number of outcomes 15 | numsub = size(unique(sub),1); % the number of subgroups 16 | numg = size(unique(D),1)-1; % the number of treatment groups (not including the control group) 17 | combo = [zeros(numg,1) (1:numg)']; % We compare each treatment to the control. 18 | numpc =size(combo,1); % the number of pairs of treatment (control) groups of interest 19 | select = ones(numoc,numsub,numpc); % We are interested in all the numoc*numsub*numpc hypotheses. 20 | [example1] = mhtexp(B,Y,sub,D,combo,select) 21 | %% Hypothesis testing with multiple subgroups: 22 | 23 | % We consider four subgroups: red county in a red state, blue county in a red state, 24 | % red county in a blue state, and blue county in a blue state. We focus on 25 | % the outcome response rate. 26 | Y = data(:,12); % the vector of outcomes 27 | D = data(:,8); % the vector of treatment status 28 | sub = (data(:,17)==1 & data(:,32)==1)+(data(:,17)==0 & data(:,32)==1)*2.... 29 | +(data(:,17)==0 & data(:,32)==0)*3+(data(:,17)==1 & data(:,32)==0)*4; % subgroup id's, where sub=0 indicates missing subgroup information 30 | numoc = size(Y,2); % the number of outcomes 31 | numsub = size(unique(sub),1)-(sum(sub==0)>0); % the number of subgroups 32 | numg = size(unique(D),1)-1; % the number of treatment groups (not including the control group) 33 | combo = [zeros(numg,1) (1:numg)']; % We compare each treatment to the control. 34 | numpc = size(combo,1); % the number of pairs of treatment (control) groups of interest 35 | select = ones(numoc,numsub,numpc); % We are interested in all the numoc*numsub*numpc hypotheses. 36 | [example2] = mhtexp(B,Y,sub,D,combo,select) 37 | %% Hypothesis testing with multiple treatments: 38 | 39 | % We consider the three treatments for match ratio: 1:1, 2:1, and 3:1. We focus on the 40 | % outcome dollars given not including match. 41 | Y = data(:,1); % the vector of outcomes 42 | D = data(:,10); % Treatment (control) status 43 | sub = ones(size(D,1),1); % the subgroup ID's 44 | numoc = size(Y,2); % the number of outcomes 45 | numsub = size(unique(sub),1); % the number of subgroups 46 | numg = size(unique(D),1)-1; % the number of treatment groups (not including the control group) 47 | 48 | % compare each treatment group to the control 49 | combo = [zeros(numg,1) (1:numg)']; % We compare each treatment to the control. 50 | numpc = size(combo,1); % the number of pairs of treatment (control) groups of interest 51 | select = ones(numoc,numsub,numpc); % We are interested in all the numoc*numsub*numpc hypotheses. 52 | [example3] = mhtexp(B,Y,sub,D,combo,select) 53 | 54 | % all pairwise comparisons among the treatment and control groups 55 | combo = nchoosek(0:numg,2); % We consider all the pairwise comparisons across the treatment and control groups. 56 | numpc = size(combo,1); % the number of pairs of treatment (control) groups of interest 57 | select = ones(numoc,numsub,numpc); % We are interested in all the numoc*numsub*numpc hypotheses. 58 | [example4] = mhtexp(B,Y,sub,D,combo,select) 59 | %% Hypothesis testing with multiple outcomes, subgroups, treatments: 60 | 61 | % We consider four outcome variables: response rate, dollars given not including 62 | % match, dollars given including match, and amount change. We also consider 63 | % four subgroups: red county in a red state, blue county in a red state, 64 | % red county in a blue state, and blue county in a blue state. Lastly, 65 | % we compare the control to the three treatments for matching ratio: 1:1, 2:1, and 3:1. 66 | amountmat = data(:,1).*(data(:,10)+ones(size(data,1),1)); % dollars raised per letter including match 67 | Y = [data(:,[12,1]) amountmat data(:,35)]; % the matrix of outcomes 68 | D = data(:,10); % treatment (control) status 69 | sub = (data(:,17)==1 & data(:,32)==1)+(data(:,17)==0 & data(:,32)==1)*2.... 70 | +(data(:,17)==0 & data(:,32)==0)*3+(data(:,17)==1 & data(:,32)==0)*4; % subgroup id's, where sub=0 indicates missing subgroup information 71 | numoc = size(Y,2); % the number of outcomes 72 | numsub = size(unique(sub),1)-(sum(sub==0)>0); % the number of subgroups 73 | numg = size(unique(D),1)-1; % the number of treatment groups (not including the control group) 74 | combo = [zeros(numg,1) (1:numg)']; % We compare each treatment to the control. 75 | numpc = size(combo,1); % the number of pairs of treatment (control) groups of interest 76 | select = ones(numoc,numsub,numpc); % We are interested in all the numoc*numsub*numpc hypotheses. 77 | [example5] = mhtexp(B,Y,sub,D,combo,select) 78 | -------------------------------------------------------------------------------- /mhtexp.sthlp: -------------------------------------------------------------------------------- 1 | {smcl} 2 | {findalias asfradohelp}{...} 3 | {vieweralsosee "" "--"}{...} 4 | {vieweralsosee "[R] help" "help help"}{...} 5 | {viewerjumpto "Syntax" "mhtexp##syntax"}{...} 6 | {viewerjumpto "Description" "mhtexp##description"}{...} 7 | {viewerjumpto "Options" "mhtexp##options"}{...} 8 | {viewerjumpto "Remarks" "listetlal##remarks"}{...} 9 | {viewerjumpto "Examples" "mhtexp##examples"}{...} 10 | {title:Title} 11 | 12 | {phang} 13 | {bf:mhtexp} {hline 2} Stata command for the procedure detailed in List, Shaikh, and Xu (2015) 14 | 15 | 16 | {marker syntax}{...} 17 | {title:Syntax} 18 | 19 | {p 8 17 2} 20 | {cmdab:mhtexp} 21 | {varlist} 22 | {cmd:, } {it:treatment} [{it:options}] 23 | 24 | {synoptset 20 tabbed}{...} 25 | {synopthdr} 26 | {synoptline} 27 | {syntab:Main} 28 | {synopt:{opth treatment(varlist)}}treatment status variables {it:varlist}{p_end} 29 | {synopt:{opth subgroup(varname)}}group identifier variable {it:varname}{p_end} 30 | {synopt:{opth combo(string)}}compare "treatmentcontrol" or "pairwise"; default is 31 | {cmd:combo("treatmentcontrol")}{p_end} 32 | {synopt:{opth only(name)}} the numoc*numsub*numpc hypotheses to be tested{p_end} 33 | {synopt:{opth exclude(name)}} the numoc*numsub*numpc hypotheses not to be tested{p_end} 34 | {synopt:{opth boostrap(integer)}} the number of simulated samples to use{p_end} 35 | {synoptline} 36 | {p2colreset}{...} 37 | 38 | {marker description}{...} 39 | {title:Description} 40 | 41 | {pstd} 42 | {cmd:mhtexp} testing procedure for multiple hypothesis testing that asymptotically controls 43 | familywise error rate and is asymptotically balanced for outcomes specified via {varlist}{p_end} 44 | 45 | {marker options}{...} 46 | {title:Options} 47 | 48 | {dlgtab:Main} 49 | 50 | {phang} 51 | {opt treatment(varlist)} user provided variable containing treatment status of the observations; required.{p_end} 52 | 53 | {phang} 54 | {opt subgroup(varname)} user provided variable containing subgroup ids; optional.{p_end} 55 | 56 | {phang} 57 | {opt combo(string)} user provided string to specify the comparison between treatment and control. 58 | {cmd:combo("pairwise")} will compare all pairwise comparisons across treatment and control. 59 | The default is {cmd:combo("treatmentcontrol")}, compares each treatment to the control; optional 60 | {p_end} 61 | 62 | {phang} 63 | {opt only(name)} N by 3 matrix specifying which hypothesis to be tested; optional.{p_end} 64 | 65 | {phang} 66 | {opt exclude(name)} N by 3 matrix specifying which hypothesis not to be tested; optional.{p_end} 67 | {phang} 68 | The matrix in either case should be defined where in each row, column 1 is the outcome, column 2 is the 69 | subgroup and column 3 is the treatment-control comparison. Where...{p_end} 70 | {phang3} 1 <= column 1 <= number of outcomes{p_end} 71 | {phang3} 1 <= column 2 <= number of subgroups{p_end} 72 | {phang3} 1 <= column 3 <= number of treatment-control comparisons{p_end} 73 | 74 | {phang} 75 | By default {cmd:mhtexp} will calculate all hypothesis based on the number of outcomes, subgroups and treatments provided by the user 76 | in {it:varlist} {it:group(varname)} and {it:treatment(varname)}, respectively. In section 4.4 of List, Shaikh and Xu (2015) simultaniously consider 77 | 4 outcome variables, 4 subgroups and 3 treatment conditions, producting a table of 48 hypothesis test. However, there are cases in which you 78 | may only be interested in certain outcome by subgroup by treatment hypothesis. use {opt only} or {opt exclude}.{p_end} 79 | 80 | 81 | {phang} 82 | {opt bootstrap(integer)} the number of simulated samples. the default is 3000, but a larger number is recommended when there are a large number of hypotheses; optional.{p_end} 83 | 84 | {marker remarks}{...} 85 | {title:Remarks} 86 | 87 | {pstd} 88 | For detailed information on the procedure, see URL Multiple Hypothesis Testing in Experimental Economics.{p_end} 89 | 90 | {pstd} 91 | If you are running the command for the first time and receive an error message claiming certain functions are not found, 92 | ie nchoosek(), make sure that lmhtexp.mlib exists in your current dir and enter the command{p_end} 93 | {phang2} 94 | {cmd:. mata: mata mlib index}{p_end} 95 | {pstd} 96 | Which tells Stata to look in lmhtexp.mlib for mata functions that are required to run the command{p_end} 97 | 98 | {marker examples}{...} 99 | {title:Examples} 100 | {pstd} 101 | Suppose a data set containing. You can access this dataset at github.com/seidelj/mht "data/data.csv"{p_end} 102 | 103 | {phang} outcome variables {it:gave amount amountchange}{p_end} 104 | {phang} treatment variables {it: treatment ratio}{p_end} 105 | 106 | {pstd} 107 | Setup{p_end} 108 | {phang} {cmd:. gen amountmat = amount * ratio }{p_end} 109 | {phang} {cmd:. gen groupid = (redcty==1 & red0 == 1) + (redcty==0 & red0 == 1)*2 + (redcty==0 & red0 == 0)*3 + (redcty==1 & red0 == 0)*4}{p_end} 110 | {phang} {cmd:. replace groupid = . if groupid == 0 }{p_end} 111 | 112 | {pstd} 113 | Example 1: Hypothesis testing with multiple outcomes{p_end} 114 | {phang}{cmd:. mhtexp gave amount amountmat amountchange, treatment(treatment) }{p_end} 115 | 116 | {pstd} 117 | example 2: Hypothesis testing with multiple subgroups{p_end} 118 | {phang}{cmd:. mhtexp gave, treatment(treatment) subgroup(groupid) }{p_end} 119 | 120 | {pstd} 121 | example 3: Hypothesis testing with multiple treatments{p_end} 122 | {phang}{cmd:. mhtexp amount, treatment(ratio) } 123 | 124 | {pstd} 125 | Example 4: Hypothesis testing for all pairwise comparisons among the treatment and control groups{p_end} 126 | {phang}{cmd:. mhtexp amount, treatment(ratio) combo("pairwise") } 127 | 128 | {pstd} 129 | Example 5: Hypothesis testing with multiple outcomes, subgroups and treatments{p_end} 130 | {phang}{cmd:. mhtexp gave amount amountmat amountchange, subgroup(groupid) treatment(ratio) }{p_end} 131 | 132 | {pstd} 133 | Example 6: Now let's consider example 5, however we are only interested in the first outcome, subgroup and treatment-control comparison 134 | hypothesis{p_end} 135 | {pstd} 136 | First an N by 3 matrix must be defined. For more on basic mata and matrices see {m1_first} {p_end} 137 | {phang}{cmd:. mata: onlyHyp = (1,1,1) }{p_end} 138 | {pstd} 139 | Now we have a 1 by 3 matrix named onlyHyp to be passed to {opt only}{p_end} 140 | {phang}{cmd:. mhtexp gave amount amountmat amountchange, subgroup(groupid) treatment(ratio) only(onlyHyp)}{p_end} 141 | 142 | {pstd} 143 | Example 7: Lets consider example 5 once more, but this time we are interested in all 144 | but the last outcome, subgroup and treatment hypothesis.{p_end} 145 | {pstd} 146 | Create another N by 3 matrix. Recall, we have 4 outcomes, 4 subgroups and 3 treatment control comparisons.{p_end} 147 | {phang}{cmd:. mata: excludeHyp = (4,4,3)}{p_end} 148 | {phang}{cmd:. mhtexp gave amount amountmat amountchange, subgroup(groupid) treatment(ratio) exclude(excludeHyp)}{p_end} 149 | -------------------------------------------------------------------------------- /matlab/mhtexp.m: -------------------------------------------------------------------------------- 1 | 2 | function [output] = mhtexp(B,Y,sub,D,combo,select) 3 | 4 | % MHTEXP considers the multiple hypothesis testing problem in 5 | % experimental economics described in List, Shaikh, and Xu (2015). 6 | % 7 | % Denote by n the number of units, by numoc the number of outcomes, by 8 | % numsub the number of subgroups, and by numpc the number of pairs of 9 | % treatment (control) groups of interest. 10 | % 11 | % Among the input arguments of mhtexp: 12 | % B is the number of simulated samples (the suggested number is 3000, 13 | % but a larger number is recommended when there are a large number of hypotheses); 14 | % Y is an n by numoc matrix with the ijth element being the jth outcome 15 | % of the ith unit; 16 | % sub is an n by 1 matrix with the ith element being the subgroup ID 17 | % of the ith unit, where a subgroup ID is coded as an integer in [1,numsub]; 18 | % D is an n by 1 matrix in which the ith element is the treatment status 19 | % of the ith unit (the control group is coded as 0); 20 | % combo is a numpc by 2 matrix, each row of which indicates a pairwise 21 | % comparison of interest; 22 | % select is a numoc by numsub by numpc matrix, where the ijkth element 23 | % is equal to 1 if we are interested in the hypothesis for the ith outcome, 24 | % the jth subgroup, and the kth pairwise comparison, and the ijkth element is 25 | % equal to 0 otherwise. 26 | % 27 | % The output argument "output" is a matrix with 10 columns: 28 | % columns 1-4 present the id's of the corresponding outcomes, subgroups, 29 | % and treatment (control) groups; 30 | % the 5th column presents the absolute values of difference in sample means; 31 | % the 6th column presents the p-values based on the single testing procedure 32 | % described in Remark 3.1 of List, Shaikh, and Xu (2015); 33 | % the 7th column presents the p-values based on the multiple testing procedure 34 | % described in Theorem 3.1 of List, Shaikh, and Xu (2015); 35 | % the 8th column presents the p-values based on the multiple testing procedure 36 | % described in Remark 3.7 of List, Shaikh, and Xu (2015); 37 | % the 9th column presents the p-values based on the Bonferroni method; 38 | % the 10th column presents the p-values based on the Holm's method. 39 | % 40 | % Please refer to List, Shaikh, and Xu (2015) for examples. 41 | 42 | n = size(Y,1); % the number of units 43 | numoc = size(Y,2); % the number of outcomes 44 | numsub = size(unique(sub),1)-(sum(sub==0)>0); % the number of subgroups 45 | numg = size(unique(D),1)-1; % the number of treatment groups (not including the control group) 46 | numpc = size(combo,1); % the number of pairs of treatment (control) groups of interest 47 | 48 | % compute the studentized differences in means for all the hypotheses based on the actual data 49 | 50 | meanact = zeros(numoc,numsub,numg+1); % a matrix of sample means of the actual data for all the hypotheses 51 | varact = zeros(numoc,numsub,numg+1); % a matrix of sample variances of the actual data for all the hypotheses 52 | Nact = zeros(numoc,numsub,numg+1); % a matrix of sample sizes of the actual data for all the hypotheses 53 | 54 | for i = 1:numoc 55 | for j = 1:numsub 56 | for k = 0:numg 57 | meanact(i,j,k+1) = mean(Y(sub==j & D==k,i)); 58 | varact(i,j,k+1) = var(Y(sub==j & D==k,i)); 59 | Nact(i,j,k+1) = size(Y(sub==j & D==k,i),1); 60 | end 61 | end 62 | end 63 | diffact = meanact(:,:,combo(:,1)+ones(numpc,1))-meanact(:,:,combo(:,2)+ones(numpc,1)); % a matrix of differences in sample means for all outcomes, subgroups, and pairwise comparisons based on actual data 64 | abdiffact = abs(diffact); % a matrix of absolute differences in sample means for all outcomes, subgroups, and pairwise comparisons based on actual data 65 | statsact = abdiffact./sqrt(varact(:,:,combo(:,1)+ones(numpc,1))./Nact(:,:,combo(:,1)+ones(numpc,1))... 66 | +varact(:,:,combo(:,2)+ones(numpc,1))./Nact(:,:,combo(:,2)+ones(numpc,1))); % a matrix of studentized absolute differences in sample means for all outcomes, subgroups, and pairwise comparisons based on actual data 67 | 68 | % Construct bootstrap samples and compute the test statistics and the corresponding 1-p values for each simulated sample 69 | 70 | rng default; 71 | idboot = randi(n,n,B); % an n by B matrix of simulated samples of all the units with replacement 72 | statsboot = zeros(B,numoc,numsub,numpc); % a matrix of the test statistics for all the simulated samples 73 | meanboot = zeros(numoc,numsub,numg+1); % a matrix of sample means of a simulated sample for all the hypotheses 74 | varboot = zeros(numoc,numsub,numg+1); % a matrix of sample variances of a simulated sample for all the hypotheses 75 | Nboot = zeros(numoc,numsub,numg+1); % a matrix of sample sizes of a simulated sample for all the hypotheses 76 | 77 | for i = 1:B 78 | Yboot = Y(idboot(:,i),:); % a matrix of all the outcomes for the ith simulated sample 79 | subboot = sub(idboot(:,i),:); % a matrix of all the subgroup id's for the ith simulated sample 80 | Dboot = D(idboot(:,i),:); % a matrix of all the treatment (control) status for the ith simulated sample 81 | for j = 1:numoc 82 | for k = 1:numsub 83 | for l = 0:numg 84 | meanboot(j,k,l+1) = mean(Yboot(subboot==k & Dboot==l,j)); 85 | varboot(j,k,l+1) = var(Yboot(subboot==k & Dboot==l,j)); 86 | Nboot(j,k,l+1) = size(Yboot(subboot==k & Dboot==l,j),1); 87 | end 88 | end 89 | end 90 | statsboot(i,:,:,:) = abs(meanboot(:,:,combo(:,1)+ones(numpc,1))-meanboot(:,:,combo(:,2)+ones(numpc,1))-diffact)./... 91 | sqrt(varboot(:,:,combo(:,1)+ones(numpc,1))./Nboot(:,:,combo(:,1)+ones(numpc,1))... 92 | +varboot(:,:,combo(:,2)+ones(numpc,1))./Nboot(:,:,combo(:,2)+ones(numpc,1))); 93 | end 94 | 95 | pact = zeros(numoc,numsub,numpc); % a matrix of 1-p values of the actual data 96 | pboot = zeros(B,numoc,numsub,numpc); % a matrix of 1-p values of all the simulated data 97 | 98 | for i = 1:numoc 99 | for j = 1:numsub 100 | for k = 1:numpc 101 | pact(i,j,k) = 1-(sum((statsboot(:,i,j,k)>=statsact(i,j,k)*ones(B,1))))/B; 102 | for l=1:B 103 | pboot(l,i,j,k) = 1-(sum((statsboot(:,i,j,k)>=statsboot(l,i,j,k)*ones(B,1))))/B; 104 | end 105 | end 106 | end 107 | end 108 | 109 | % calculate p-values based on single hypothesis testing 110 | 111 | alphasin = zeros(numoc,numsub,numpc); % the smallest alpha's that reject the hypotheses based on the single testing procedure described in Remark 3.1 112 | 113 | for i=1:numoc 114 | for j=1:numsub 115 | for k=1:numpc 116 | ptemp = pboot(:,i,j,k); 117 | sortp = sort(ptemp,'descend'); 118 | q = find(pact(i,j,k)*ones(B,1)>=sortp,1)/B; 119 | if isempty(q)==0 120 | alphasin(i,j,k) = q; 121 | else 122 | alphasin(i,j,k) = 1; 123 | end 124 | end 125 | end 126 | end 127 | 128 | psin = alphasin; % p-values based on the single testing procedure described in Remark 3.1 of List, Shaikh, and Xu (2015) 129 | 130 | % calculate p-values based on multiple hypothesis testing 131 | 132 | nh = sum(sum(sum(select))); % the number of hypotheses of interest 133 | statsall = zeros(nh,8+B); % columns 1-5 present the id's of the hypotheses, outcomes, subgroups, and treatment (control) groups; 134 | % the 6th column shows the studentized differences in means for all the hypotheses based on the actual data 135 | % the 7th column presents the p-values based on the single testing procedure described in Remark 3.1 of List, Shaikh, and Xu (2015); 136 | % the 8th column presents the 1-p values based on the actual data; 137 | % the subsequent columns present the corresponding 1-p values based on the simulated samples 138 | counter = 1; % the loop counter 139 | 140 | for i=1:numoc 141 | for j=1:numsub 142 | for k=1:numpc 143 | if select(i,j,k)==1; 144 | statsall(counter,:) = [counter i j combo(k,:) abdiffact(i,j,k) psin(i,j,k) pact(i,j,k) pboot(:,i,j,k)']; 145 | counter = counter+1; 146 | end 147 | end 148 | end 149 | end 150 | 151 | statsrank = sortrows(statsall,7); % rank the rows according to the p-values based on single hypothesis testing 152 | alphamul = zeros(nh,1); % the smallest alpha's that reject the hypotheses based on Theorem 3.1 153 | alphamulm = zeros(nh,1); % the smallest alpha's that reject the hypotheses based on Remark 3.7 154 | 155 | for i=1:nh 156 | maxstats = max(statsrank(i:end,9:end),[],1); % the maximums of the 1-p values among all the remaning hypotheses for all the simulated samples 157 | sortmaxstats = sort(maxstats,2,'descend'); % sort "maxstats" in a descending order 158 | q = find(statsrank(i,8)>=sortmaxstats,1)/B; 159 | if isempty(q)==0 160 | alphamul(i) = q; 161 | else 162 | alphamul(i) = 1; 163 | end 164 | if i==1 165 | alphamulm(i) = alphamul(i); 166 | else 167 | sortmaxstatsm = zeros(1,B); % compute at each quantile the maximum of the critical values of all the "true" subsets of hypotheses 168 | for j=nh-i+1:-1:1 169 | subset = nchoosek(statsrank(i:end,1),j); % all the subsets of hypotheses with j elements 170 | sumcont = 0; % the total number of subsets of hypotheses with j elements that contradict any of the previously rejected hypotheses 171 | for k=1:size(subset,1) 172 | cont = 0; % cont=1 if any of the previously rejected hypotheses contradicts the current subset of hypotheses 173 | for l=1:i-1 174 | sameocsub = subset(k,ismember(statsall(subset(k,:),2:3),statsrank(l,2:3),'rows')==1); % the hypotheses in "subset(k,:)" with the same outcome and subgroup as the lth hypothesis 175 | tran = mat2cell(statsall(sameocsub,4:5),ones(1,size(sameocsub,2)),2); % this cell array presents all the sets of "connected" treatment (control) groups implied by "transitivity" under the null hypotheses in "sameocsub" 176 | trantemp = tran; 177 | if size(sameocsub,2)<=1 178 | cont = 0; 179 | maxstatsm = max(statsall(subset(k,:),9:end),[],1); % the maximums of the 1-p values within the subset of hypotheses for all the simulated samples 180 | sortmaxstatsm = max(sortmaxstatsm,sort(maxstatsm,'descend')); 181 | break; 182 | else 183 | counter = 1; 184 | while size(tran,1)>size(trantemp,1) || counter==1 185 | tran = trantemp; 186 | trantemp = tran(1); 187 | counter = counter+1; 188 | for m=2:size(tran,1) 189 | belong = 0; % the total number of rows of "transtemp" that "tran{m}" can be connected to by "transitivity" 190 | for N=1:size(trantemp,1) 191 | if unique([trantemp{N} tran{m}])=sortmaxstatsm,1)/B; 223 | if isempty(qm)==0 224 | alphamulm(i) = qm; 225 | else 226 | alphamulm(i) = 1; 227 | end 228 | end 229 | end 230 | 231 | bon = min(statsrank(:,7)*nh,ones(nh,1)); % p-values based on the Bonferroni method 232 | holm = min(statsrank(:,7).*(nh:-1:1)',ones(nh,1)); % p-values based on the Holm's method 233 | 234 | output = sortrows([statsrank(:,1:7) alphamul alphamulm bon holm],1); % restore the order 235 | output = output(:,2:end); 236 | check = output(:,6)<=output(:,7) & output(:,7)>=output(:,8) & output(:,7)<=output(:,9) & output(:,7)<=output(:,10); % check if the results are what we should expect 237 | output = dataset({output,'outcome','subgroup','treatment1','treatment2','diff_in_means','Remark3_1','Thm3_1','Remark3_7','Bonf','Holm'}); 238 | 239 | 240 | end --------------------------------------------------------------------------------