├── .gitignore
├── lmhtexp.mlib
├── stata11
    └── lmhtexp11.mlib
├── stata13
    └── lmhtexp13.mlib
├── mhtexp_examples.do
├── README.md
├── mhtexp.ado
├── matlab
    ├── mhtexp_examples.m
    └── mhtexp.m
└── mhtexp.sthlp


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | *~
3 | 


--------------------------------------------------------------------------------
/lmhtexp.mlib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seidelj/mht/HEAD/lmhtexp.mlib


--------------------------------------------------------------------------------
/stata11/lmhtexp11.mlib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seidelj/mht/HEAD/stata11/lmhtexp11.mlib


--------------------------------------------------------------------------------
/stata13/lmhtexp13.mlib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seidelj/mht/HEAD/stata13/lmhtexp13.mlib


--------------------------------------------------------------------------------
/mhtexp_examples.do:
--------------------------------------------------------------------------------
 1 | clear all
 2 | insheet using data.csv, comma names
 3 | //Creating outcome variable
 4 | gen amountmat = amount * (1+ratio)
 5 | gen groupid = (redcty==1 & red0 == 1) + (redcty==0 & red0 == 1)*2 + (redcty==0 & red0 == 0)*3 + (redcty==1 & red0 == 0)*4
 6 | replace groupid = . if groupid == 0
 7 | 
 8 | mata: mata mlib index
 9 | 
10 | // help mhtexp
11 | 
12 | //Example 1: Hypothesis testing with multiple outcomes: 
13 | //  We consider four outcome variables: response rate, dollars given not including
14 | //  match, dollars given including match, and amount change.
15 | mhtexp gave amount amountmat amountchange, treatment(treatment)
16 | 
17 | //Example 2: Hypothesis testing with multiple subgroups: 
18 | //  We consider four subgroups: red county in a red state, blue county in a red state,
19 | //  red county in a blue state, and blue county in a blue state. We focus on
20 | //  the outcome response rate.
21 | mhtexp gave, treatment(treatment) subgroup(groupid)
22 | 
23 | //Example 3: Hypothesis testing with multiple treatments: 
24 | //  We consider the three treatments for match ratio: 1:1, 2:1, and 3:1. We focus on the
25 | //  outcome dollars given not including match.
26 | //  Here we compare each treatment group to the control
27 | mhtexp amount, treatment(ratio)
28 | 
29 | //Example 4: Hypothesis testing with multiple treatments (continued)
30 | //  All pairwise comparisons among the treatment and control groups
31 | mhtexp amount, treatment(ratio) combo("pairwise")
32 | 
33 | //Example 5: Hypothesis testing with multiple outcomes, subgroups, treatments: 
34 | //  We consider four outcome variables: response rate, dollars given not including
35 | //  match, dollars given including match, and amount change. We also consider
36 | //  four subgroups: red county in a red state, blue county in a red state,
37 | //  red county in a blue state, and blue county in a blue state. Lastly, 
38 | //  we compare the control to the three treatments for matching ratio: 1:1, 2:1, and 3:1. 
39 | mhtexp gave amount amountmat amountchange, subgroup(groupid) treatment(ratio)
40 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Multiple Hypothesis Testing
 2 | Stata code for the procedure detailed in List, Shaikh, and Xu (2015)
 3 | [link](https://ideas.repec.org/p/feb/artefa/00402.html)
 4 | 
 5 | The software is made available as is, and no warranty - about the software, its performance or its conformity to any specification - is given or implied. 
 6 | 
 7 | ## Stata 14 Users
 8 | You can install mhtexp using Stata's ssc command.
 9 | ```
10 | ssc install mhtexp
11 | ```
12 | 
13 | Once the command finishes, you should be able to use the command.  A data set, data.csv, and an example do file, mhtexp_examples.do are available in this repository for download if you want to run an example.
14 | 
15 | 
16 | ## Manual Stata Install 
17 | Download or clone this repository and make sure your current directory contains
18 | * mhtexp.ado -- this initializes the stata comand "mhtexp" for usage from the command line or .do file
19 | * lmhtexp.mlib -- the required mata functions that perform the computation
20 | * mhtexp.sthlp -- OPTIONAL but recommended.  Usage: from stata command line: `help mhtexp`
21 | 
22 | If it is your first time running the code, ensure that Stata knows to look in lmhtexp.mlib. From the Stata command line enter
23 | ```
24 | mata: mata mlib index
25 | ```
26 | 
27 | 
28 | See mhtexp_examples.do for usage example OR from stata terminal type `help mhtexp`
29 | 
30 | 
31 | #### Summary of contents
32 | 
33 | * mhtexp_examples.do do file with examples using the included data set.
34 | * mhtexp.sthlp  Stata14 help file
35 | * mhtexp.ado Contains the Stata command definition of mhtexp
36 | * data.csv contains the data set used in the List, Shaikh, and Xu (2015)
37 | * lmhtexp.mlib contains mata function required for the command; compiled using Stata14
38 | * stata11/lmhtexp11.mlib contains the mata functions required for the command; compiled using Stata11.
39 | 
40 | #### For older versions of Stata (<Stata14 and =>Stata11)
41 | Stata versions that are atleast Stata11 can still use this command.  However, the bootstrap option is currently unavailable for older versions of Stata.  To use this command with an older version of Stata, first replace lmhtexp.mlib with lmhtexp11.mlib and remove or comment out line 2 in mhtexp.ado.
42 | 
43 | ```
44 | ### Remove ###
45 | version 14
46 | ```
47 | 
48 | The key difference in these two files (outside of how they are compiled), is the way in which ids are selected for the bootstrap sample.
49 | 
50 | In both cases the same random number generater is used to select random variates over [a, b].  While Stata14 has a built in method, Stata11 does not.
51 | ```
52 | floor( (b-a+1) * runiform() + a)  // in lmhtexp11.mlib (Stata11)
53 | runiformint(r, c, a, b) // in lmhtexp.mlib (Stata14)
54 | ```
55 | The two methods both produce the desired result, but the matrix of IDs is slightly different accross these two methods.  Therefore, the bootstrapped statistics used to generate the outputted p-values will not be identical to the results presented in List, Shaikh, Xu 2015.
56 | 
57 | 
58 | contact: seidelj@uchicago.edu
59 | 
60 | 


--------------------------------------------------------------------------------
/mhtexp.ado:
--------------------------------------------------------------------------------
 1 | program mhtexp
 2 |     version 14
 3 |     syntax varlist [if] [in], treatment(varlist) [ subgroup(varname) combo(string) exclude(name) only(name) bootstrap(integer 3000)]
 4 |     //args outcomes subgroupid treatment combo select
 5 | 
 6 |     if ("`combo'" != "" & "`combo'" != "pairwise" & "`combo'" != "treatmentcontrol"){
 7 |         display "INVALID combo choose either pairwise or treatmentcontrol"
 8 |         error
 9 |     }
10 | 
11 | 
12 |     if ("`exclude'" == "") mata: excludemat = (.,.,.)
13 |     else mata: excludemat = `exclude'
14 |     if ("`only'" == "") mata: onlymat = (.,.,.)
15 |     else mata: onlymat = `only'
16 | 
17 |     mata: Y = buildY("`varlist'")
18 |     mata: D = buildD("`treatment'")
19 |     mata: sub = buildsub("`subgroup'", D)
20 |     mata: sizes = buildsizes(Y, D, sub)
21 |     mata: combo = buildcombo("`combo'", sizes[3])
22 |     mata: numpc = buildnumpc(combo)
23 |     mata: select = buildselect(onlymat, excludemat, sizes[1], sizes[2], numpc)
24 |     mata: results = seidelxu(Y, sub, D, combo, select, `bootstrap')
25 |     mata: buildoutput("results", results)
26 | 
27 |     matlist results
28 | end
29 | 
30 | mata:
31 | 
32 |     function buildY(string scalar outcomes){
33 |         Y = st_data(., tokens(outcomes))
34 |         return(Y)
35 |     }
36 |     function buildD(string scalar treatment){
37 |         D = st_data(., tokens(treatment))
38 |         return(D)
39 |     }
40 |     function buildsub(string scalar subgroup, real matrix D){
41 |         if (subgroup == ""){
42 |             sub = J(rows(D), 1,1)
43 |         }else{
44 |             sub = st_data(., (subgroup))
45 |         }
46 |         return(sub)
47 |     }
48 |     function buildsizes(real matrix Y, real matrix D, real matrix sub){
49 |         numoc = cols(Y)
50 |         numsub = colnonmissing(uniqrows(sub))
51 |         numg = rows(uniqrows(D)) - 1
52 | 
53 |         return((numoc, numsub, numg))
54 |     }
55 |     function buildcombo(string scalar strcombo, real scalar numg){
56 |         if (strcombo == "pairwise"){
57 |     		combo = nchoosek((0::numg), 2)
58 |     	}else{
59 |     		combo = (J(numg,1,0), (1::numg))
60 |     	}
61 |         return(combo)
62 |     }
63 |     function buildnumpc(real matrix combo){
64 |         return(rows(combo))
65 |     }
66 |     function buildselect(real matrix only, real matrix exclude, real scalar numoc, real scalar numsub, real scalar numpc){
67 |         if (rownonmissing(only) != 0){
68 |             select = mdarray((numoc, numsub, numpc),0)
69 |             for (r = 1; r <= rows(only); r++){
70 |                 i = only[r, 1]
71 |                 j = only[r, 2]
72 |                 k = only[r, 3]
73 |                 put(1, select, (i,j,k))
74 |             }
75 |         }else{
76 |             select = mdarray((numoc, numsub, numpc), 1)
77 |         }
78 |         if (rownonmissing(exclude) !=0){
79 |             for (r=1; r <= rows(exclude); r++){
80 |                 i = exclude[r, 1]
81 |                 j = exclude[r, 2]
82 |                 k = exclude[r, 3]
83 |                 put(0, select, (i,j,k))
84 |             }
85 |         }
86 |         return(select)
87 |     }
88 | 
89 | end
90 | 


--------------------------------------------------------------------------------
/matlab/mhtexp_examples.m:
--------------------------------------------------------------------------------
 1 | data = importdata('data.csv');
 2 | data = data.data;               % read the dataset
 3 | B=3000;                         % the number of simulated samples
 4 | 
 5 | 
 6 | %% Hypothesis testing with multiple outcomes: 
 7 | 
 8 | % We consider four outcome variables: response rate, dollars given not including
 9 | % match, dollars given including match, and amount change.
10 | amountmat = data(:,1).*(data(:,10)+ones(size(data,1),1));             % dollars raised per letter including match
11 | Y = [data(:,[12,1]) amountmat data(:,35)];     % the matrix of outcomes
12 | D = data(:,8);                                 % the vector of treatment statuses
13 | sub = ones(size(D,1),1);                       % the subgroup ID's
14 | numoc = size(Y,2);                             % the number of outcomes
15 | numsub = size(unique(sub),1);                  % the number of subgroups
16 | numg = size(unique(D),1)-1;                    % the number of treatment groups (not including the control group)
17 | combo = [zeros(numg,1) (1:numg)'];             % We compare each treatment to the control.
18 | numpc =size(combo,1);                          % the number of pairs of treatment (control) groups of interest
19 | select = ones(numoc,numsub,numpc);             % We are interested in all the numoc*numsub*numpc hypotheses.
20 | [example1] = mhtexp(B,Y,sub,D,combo,select)
21 | %% Hypothesis testing with multiple subgroups: 
22 | 
23 | % We consider four subgroups: red county in a red state, blue county in a red state,
24 | % red county in a blue state, and blue county in a blue state. We focus on
25 | % the outcome response rate.
26 | Y = data(:,12);    % the vector of outcomes
27 | D = data(:,8);    % the vector of treatment status
28 | sub = (data(:,17)==1 & data(:,32)==1)+(data(:,17)==0 & data(:,32)==1)*2....
29 |     +(data(:,17)==0 & data(:,32)==0)*3+(data(:,17)==1 & data(:,32)==0)*4; % subgroup id's, where sub=0 indicates missing subgroup information
30 | numoc = size(Y,2);                             % the number of outcomes
31 | numsub = size(unique(sub),1)-(sum(sub==0)>0);  % the number of subgroups
32 | numg = size(unique(D),1)-1;                    % the number of treatment groups (not including the control group)
33 | combo = [zeros(numg,1) (1:numg)'];             % We compare each treatment to the control.
34 | numpc = size(combo,1);                         % the number of pairs of treatment (control) groups of interest
35 | select = ones(numoc,numsub,numpc);             % We are interested in all the numoc*numsub*numpc hypotheses.
36 | [example2] = mhtexp(B,Y,sub,D,combo,select)
37 | %% Hypothesis testing with multiple treatments: 
38 | 
39 | % We consider the three treatments for match ratio: 1:1, 2:1, and 3:1. We focus on the
40 | % outcome dollars given not including match.
41 | Y = data(:,1);    % the vector of outcomes
42 | D = data(:,10);   % Treatment (control) status
43 | sub = ones(size(D,1),1);   % the subgroup ID's
44 | numoc = size(Y,2);                             % the number of outcomes
45 | numsub = size(unique(sub),1);                  % the number of subgroups
46 | numg = size(unique(D),1)-1;                    % the number of treatment groups (not including the control group)
47 | 
48 | % compare each treatment group to the control
49 | combo = [zeros(numg,1) (1:numg)'];             % We compare each treatment to the control.
50 | numpc = size(combo,1);                         % the number of pairs of treatment (control) groups of interest
51 | select = ones(numoc,numsub,numpc);             % We are interested in all the numoc*numsub*numpc hypotheses.
52 | [example3] = mhtexp(B,Y,sub,D,combo,select)
53 | 
54 | % all pairwise comparisons among the treatment and control groups
55 | combo =  nchoosek(0:numg,2);                    % We consider all the pairwise comparisons across the treatment and control groups.
56 | numpc = size(combo,1);                         % the number of pairs of treatment (control) groups of interest
57 | select = ones(numoc,numsub,numpc);             % We are interested in all the numoc*numsub*numpc hypotheses.
58 | [example4] = mhtexp(B,Y,sub,D,combo,select)
59 | %% Hypothesis testing with multiple outcomes, subgroups, treatments: 
60 | 
61 | % We consider four outcome variables: response rate, dollars given not including
62 | % match, dollars given including match, and amount change. We also consider
63 | % four subgroups: red county in a red state, blue county in a red state,
64 | % red county in a blue state, and blue county in a blue state. Lastly, 
65 | % we compare the control to the three treatments for matching ratio: 1:1, 2:1, and 3:1. 
66 | amountmat = data(:,1).*(data(:,10)+ones(size(data,1),1));             % dollars raised per letter including match
67 | Y = [data(:,[12,1]) amountmat data(:,35)];     % the matrix of outcomes
68 | D = data(:,10);                                % treatment (control) status
69 | sub = (data(:,17)==1 & data(:,32)==1)+(data(:,17)==0 & data(:,32)==1)*2....
70 |     +(data(:,17)==0 & data(:,32)==0)*3+(data(:,17)==1 & data(:,32)==0)*4; % subgroup id's, where sub=0 indicates missing subgroup information
71 | numoc = size(Y,2);                             % the number of outcomes
72 | numsub = size(unique(sub),1)-(sum(sub==0)>0);  % the number of subgroups
73 | numg = size(unique(D),1)-1;                    % the number of treatment groups (not including the control group)
74 | combo = [zeros(numg,1) (1:numg)'];             % We compare each treatment to the control.
75 | numpc = size(combo,1);                         % the number of pairs of treatment (control) groups of interest
76 | select = ones(numoc,numsub,numpc);             % We are interested in all the numoc*numsub*numpc hypotheses.
77 | [example5] = mhtexp(B,Y,sub,D,combo,select)
78 | 


--------------------------------------------------------------------------------
/mhtexp.sthlp:
--------------------------------------------------------------------------------
  1 | {smcl}
  2 | {findalias asfradohelp}{...}
  3 | {vieweralsosee "" "--"}{...}
  4 | {vieweralsosee "[R] help" "help help"}{...}
  5 | {viewerjumpto "Syntax" "mhtexp##syntax"}{...}
  6 | {viewerjumpto "Description" "mhtexp##description"}{...}
  7 | {viewerjumpto "Options" "mhtexp##options"}{...}
  8 | {viewerjumpto "Remarks" "listetlal##remarks"}{...}
  9 | {viewerjumpto "Examples" "mhtexp##examples"}{...}
 10 | {title:Title}
 11 | 
 12 | {phang}
 13 | {bf:mhtexp} {hline 2} Stata command for the procedure detailed in List, Shaikh, and Xu (2015)
 14 | 
 15 | 
 16 | {marker syntax}{...}
 17 | {title:Syntax}
 18 | 
 19 | {p 8 17 2}
 20 | {cmdab:mhtexp}
 21 | {varlist}
 22 | {cmd:, } {it:treatment} [{it:options}]
 23 | 
 24 | {synoptset 20 tabbed}{...}
 25 | {synopthdr}
 26 | {synoptline}
 27 | {syntab:Main}
 28 | {synopt:{opth treatment(varlist)}}treatment status variables {it:varlist}{p_end}
 29 | {synopt:{opth subgroup(varname)}}group identifier variable {it:varname}{p_end}
 30 | {synopt:{opth combo(string)}}compare "treatmentcontrol" or "pairwise"; default is
 31 |     {cmd:combo("treatmentcontrol")}{p_end}
 32 | {synopt:{opth only(name)}} the numoc*numsub*numpc hypotheses to be tested{p_end}
 33 | {synopt:{opth exclude(name)}} the numoc*numsub*numpc hypotheses not to be tested{p_end}
 34 | {synopt:{opth boostrap(integer)}} the number of simulated samples to use{p_end}
 35 | {synoptline}
 36 | {p2colreset}{...}
 37 | 
 38 | {marker description}{...}
 39 | {title:Description}
 40 | 
 41 | {pstd}
 42 | {cmd:mhtexp} testing procedure for multiple hypothesis testing that asymptotically controls
 43 | familywise error rate and is asymptotically balanced for outcomes specified via {varlist}{p_end}
 44 | 
 45 | {marker options}{...}
 46 | {title:Options}
 47 | 
 48 | {dlgtab:Main}
 49 | 
 50 | {phang}
 51 | {opt treatment(varlist)} user provided variable containing treatment status of the observations; required.{p_end}
 52 | 
 53 | {phang}
 54 | {opt subgroup(varname)} user provided variable containing subgroup ids; optional.{p_end}
 55 | 
 56 | {phang}
 57 | {opt combo(string)} user provided string to specify the comparison between treatment and control.
 58 | {cmd:combo("pairwise")} will compare all pairwise comparisons across treatment and control.
 59 | The default is {cmd:combo("treatmentcontrol")}, compares each treatment to the control; optional
 60 | {p_end}
 61 | 
 62 | {phang}
 63 | {opt only(name)} N by 3 matrix specifying which hypothesis to be tested; optional.{p_end}
 64 | 
 65 | {phang}
 66 | {opt exclude(name)} N by 3 matrix specifying which hypothesis not to be tested; optional.{p_end}
 67 | {phang}
 68 | The matrix in either case should be defined where in each row, column 1 is the outcome, column 2 is the
 69 | subgroup and column 3 is the treatment-control comparison. Where...{p_end}
 70 | {phang3} 1 <= column 1 <= number of outcomes{p_end}
 71 | {phang3} 1 <= column 2 <= number of subgroups{p_end}
 72 | {phang3} 1 <= column 3 <= number of treatment-control comparisons{p_end}
 73 | 
 74 | {phang}
 75 | By default {cmd:mhtexp} will calculate all hypothesis based on the number of outcomes, subgroups and treatments provided by the user
 76 | in {it:varlist} {it:group(varname)} and {it:treatment(varname)}, respectively. In section 4.4 of List, Shaikh and Xu (2015) simultaniously consider
 77 | 4 outcome variables, 4 subgroups and 3 treatment conditions, producting a table of 48 hypothesis test. However, there are cases in which you
 78 | may only be interested in certain outcome by subgroup by treatment hypothesis. use {opt only} or {opt exclude}.{p_end}
 79 | 
 80 | 
 81 | {phang}
 82 | {opt bootstrap(integer)} the number of simulated samples. the default is 3000,  but a larger number is recommended when there are a large number of hypotheses; optional.{p_end}
 83 | 
 84 | {marker remarks}{...}
 85 | {title:Remarks}
 86 | 
 87 | {pstd}
 88 | For detailed information on the procedure, see URL Multiple Hypothesis Testing in Experimental Economics.{p_end}
 89 | 
 90 | {pstd}
 91 | If you are running the command for the first time and receive an error message claiming certain functions are not found,
 92 | ie nchoosek(), make sure that lmhtexp.mlib exists in your current dir and enter the command{p_end}
 93 | {phang2}
 94 | {cmd:. mata: mata mlib index}{p_end}
 95 | {pstd}
 96 | Which tells Stata to look in lmhtexp.mlib for mata functions that are required to run the command{p_end}
 97 | 
 98 | {marker examples}{...}
 99 | {title:Examples}
100 | {pstd}
101 | Suppose a data set containing. You can access this dataset at github.com/seidelj/mht "data/data.csv"{p_end}
102 | 
103 | {phang} outcome variables {it:gave amount  amountchange}{p_end}
104 | {phang} treatment variables {it: treatment ratio}{p_end}
105 | 
106 | {pstd}
107 | Setup{p_end}
108 | {phang} {cmd:. gen amountmat = amount * ratio }{p_end}
109 | {phang} {cmd:. gen groupid = (redcty==1 & red0 == 1) + (redcty==0 & red0 == 1)*2 + (redcty==0 & red0 == 0)*3 + (redcty==1 & red0 == 0)*4}{p_end}
110 | {phang} {cmd:. replace groupid = . if groupid == 0 }{p_end}
111 | 
112 | {pstd}
113 | Example 1: Hypothesis testing with multiple outcomes{p_end}
114 | {phang}{cmd:. mhtexp gave amount amountmat amountchange, treatment(treatment) }{p_end}
115 | 
116 | {pstd}
117 | example 2: Hypothesis testing with multiple subgroups{p_end}
118 | {phang}{cmd:. mhtexp gave, treatment(treatment) subgroup(groupid) }{p_end}
119 | 
120 | {pstd}
121 | example 3: Hypothesis testing with multiple treatments{p_end}
122 | {phang}{cmd:. mhtexp amount, treatment(ratio) }
123 | 
124 | {pstd}
125 | Example 4: Hypothesis testing for all pairwise comparisons among the treatment and control groups{p_end}
126 | {phang}{cmd:. mhtexp amount, treatment(ratio) combo("pairwise") }
127 | 
128 | {pstd}
129 | Example 5: Hypothesis testing with multiple outcomes, subgroups and treatments{p_end}
130 | {phang}{cmd:. mhtexp gave amount amountmat amountchange, subgroup(groupid) treatment(ratio) }{p_end}
131 | 
132 | {pstd}
133 | Example 6: Now let's consider example 5, however we are only interested in the first outcome, subgroup and treatment-control comparison
134 | hypothesis{p_end}
135 | {pstd}
136 | First an N by 3 matrix must be defined. For more on basic mata and matrices see {m1_first} {p_end}
137 | {phang}{cmd:. mata: onlyHyp = (1,1,1) }{p_end}
138 | {pstd}
139 | Now we have a 1 by 3 matrix named onlyHyp to be passed to {opt only}{p_end}
140 | {phang}{cmd:. mhtexp gave amount amountmat amountchange, subgroup(groupid) treatment(ratio) only(onlyHyp)}{p_end}
141 | 
142 | {pstd}
143 | Example 7: Lets consider example 5 once more, but this time we are interested in all
144 | but the last outcome, subgroup and treatment hypothesis.{p_end}
145 | {pstd}
146 | Create another N by 3 matrix. Recall, we have 4 outcomes, 4 subgroups and 3 treatment control comparisons.{p_end}
147 | {phang}{cmd:. mata: excludeHyp = (4,4,3)}{p_end}
148 | {phang}{cmd:. mhtexp gave amount amountmat amountchange, subgroup(groupid) treatment(ratio) exclude(excludeHyp)}{p_end}
149 | 


--------------------------------------------------------------------------------
/matlab/mhtexp.m:
--------------------------------------------------------------------------------
  1 | 
  2 | function [output] = mhtexp(B,Y,sub,D,combo,select)
  3 | 
  4 | % MHTEXP considers the multiple hypothesis testing problem in
  5 | % experimental economics described in List, Shaikh, and Xu (2015).
  6 | %
  7 | %   Denote by n the number of units, by numoc the number of outcomes, by
  8 | %   numsub the number of subgroups, and by numpc the number of pairs of
  9 | %   treatment (control) groups of interest.
 10 | %
 11 | %   Among the input arguments of mhtexp: 
 12 | %   B is the number of simulated samples (the suggested number is 3000, 
 13 | %   but a larger number is recommended when there are a large number of hypotheses);
 14 | %   Y is an n by numoc matrix with the ijth element being the jth outcome 
 15 | %   of the ith unit;
 16 | %   sub is an n by 1 matrix with the ith element being the subgroup ID 
 17 | %   of the ith unit, where a subgroup ID is coded as an integer in [1,numsub];
 18 | %   D is an n by 1 matrix in which the ith element is the treatment status
 19 | %   of the ith unit (the control group is coded as 0);
 20 | %   combo is a numpc by 2 matrix, each row of which indicates a pairwise
 21 | %   comparison of interest;
 22 | %   select is a numoc by numsub by numpc matrix, where the ijkth element 
 23 | %   is equal to 1 if we are interested in the hypothesis for the ith outcome, 
 24 | %   the jth subgroup, and the kth pairwise comparison, and the ijkth element is 
 25 | %   equal to 0 otherwise.
 26 | %
 27 | %   The output argument "output" is a matrix with 10 columns:
 28 | %   columns 1-4 present the id's of the corresponding outcomes, subgroups, 
 29 | %   and treatment (control) groups;
 30 | %   the 5th column presents the absolute values of difference in sample means;
 31 | %   the 6th column presents the p-values based on the single testing procedure 
 32 | %   described in Remark 3.1 of List, Shaikh, and Xu (2015);
 33 | %   the 7th column presents the p-values based on the multiple testing procedure
 34 | %   described in Theorem 3.1 of List, Shaikh, and Xu (2015);
 35 | %   the 8th column presents the p-values based on the multiple testing procedure
 36 | %   described in Remark 3.7 of List, Shaikh, and Xu (2015);
 37 | %   the 9th column presents the p-values based on the Bonferroni method;
 38 | %   the 10th column presents the p-values based on the Holm's method.
 39 | %   
 40 | %   Please refer to List, Shaikh, and Xu (2015) for examples.
 41 | 
 42 | n = size(Y,1); % the number of units
 43 | numoc = size(Y,2); % the number of outcomes
 44 | numsub = size(unique(sub),1)-(sum(sub==0)>0); % the number of subgroups
 45 | numg = size(unique(D),1)-1;  % the number of treatment groups (not including the control group)
 46 | numpc = size(combo,1); % the number of pairs of treatment (control) groups of interest
 47 | 
 48 | % compute the studentized differences in means for all the hypotheses based on the actual data
 49 | 
 50 | meanact = zeros(numoc,numsub,numg+1); % a matrix of sample means of the actual data for all the hypotheses
 51 | varact = zeros(numoc,numsub,numg+1); % a matrix of sample variances of the actual data for all the hypotheses
 52 | Nact = zeros(numoc,numsub,numg+1); % a matrix of sample sizes of the actual data for all the hypotheses
 53 | 
 54 | for i = 1:numoc
 55 |     for j = 1:numsub
 56 |         for k = 0:numg
 57 |     meanact(i,j,k+1) = mean(Y(sub==j & D==k,i));
 58 |     varact(i,j,k+1) = var(Y(sub==j & D==k,i));
 59 |     Nact(i,j,k+1) = size(Y(sub==j & D==k,i),1);
 60 |         end
 61 |     end
 62 | end
 63 | diffact = meanact(:,:,combo(:,1)+ones(numpc,1))-meanact(:,:,combo(:,2)+ones(numpc,1)); % a matrix of differences in sample means for all outcomes, subgroups, and pairwise comparisons based on actual data
 64 | abdiffact = abs(diffact); % a matrix of absolute differences in sample means for all outcomes, subgroups, and pairwise comparisons based on actual data
 65 | statsact = abdiffact./sqrt(varact(:,:,combo(:,1)+ones(numpc,1))./Nact(:,:,combo(:,1)+ones(numpc,1))...
 66 |     +varact(:,:,combo(:,2)+ones(numpc,1))./Nact(:,:,combo(:,2)+ones(numpc,1))); % a matrix of studentized absolute differences in sample means for all outcomes, subgroups, and pairwise comparisons based on actual data
 67 | 
 68 | % Construct bootstrap samples and compute the test statistics and the corresponding 1-p values for each simulated sample
 69 | 
 70 | rng default;
 71 | idboot = randi(n,n,B); % an n by B matrix of simulated samples of all the units with replacement
 72 | statsboot = zeros(B,numoc,numsub,numpc); % a matrix of the test statistics for all the simulated samples
 73 | meanboot = zeros(numoc,numsub,numg+1); % a matrix of sample means of a simulated sample for all the hypotheses
 74 | varboot = zeros(numoc,numsub,numg+1); % a matrix of sample variances of a simulated sample for all the hypotheses
 75 | Nboot = zeros(numoc,numsub,numg+1); % a matrix of sample sizes of a simulated sample for all the hypotheses
 76 | 
 77 | for i = 1:B
 78 |     Yboot = Y(idboot(:,i),:); % a matrix of all the outcomes for the ith simulated sample
 79 |     subboot = sub(idboot(:,i),:); % a matrix of all the subgroup id's for the ith simulated sample
 80 |     Dboot = D(idboot(:,i),:); % a matrix of all the treatment (control) status for the ith simulated sample
 81 |     for j = 1:numoc
 82 |         for k = 1:numsub
 83 |             for l = 0:numg
 84 |     meanboot(j,k,l+1) = mean(Yboot(subboot==k & Dboot==l,j));
 85 |     varboot(j,k,l+1) = var(Yboot(subboot==k & Dboot==l,j));
 86 |     Nboot(j,k,l+1) = size(Yboot(subboot==k & Dboot==l,j),1);
 87 |             end
 88 |         end
 89 |     end
 90 |     statsboot(i,:,:,:) = abs(meanboot(:,:,combo(:,1)+ones(numpc,1))-meanboot(:,:,combo(:,2)+ones(numpc,1))-diffact)./...
 91 |         sqrt(varboot(:,:,combo(:,1)+ones(numpc,1))./Nboot(:,:,combo(:,1)+ones(numpc,1))...
 92 |     +varboot(:,:,combo(:,2)+ones(numpc,1))./Nboot(:,:,combo(:,2)+ones(numpc,1)));
 93 | end
 94 | 
 95 | pact = zeros(numoc,numsub,numpc); % a matrix of 1-p values of the actual data
 96 | pboot = zeros(B,numoc,numsub,numpc); % a matrix of 1-p values of all the simulated data
 97 | 
 98 | for i = 1:numoc
 99 |     for j = 1:numsub
100 |         for k = 1:numpc
101 |             pact(i,j,k) = 1-(sum((statsboot(:,i,j,k)>=statsact(i,j,k)*ones(B,1))))/B;
102 |             for l=1:B
103 |                 pboot(l,i,j,k) = 1-(sum((statsboot(:,i,j,k)>=statsboot(l,i,j,k)*ones(B,1))))/B;
104 |             end
105 |         end
106 |     end
107 | end
108 | 
109 | % calculate p-values based on single hypothesis testing
110 | 
111 | alphasin = zeros(numoc,numsub,numpc); % the smallest alpha's that reject the hypotheses based on the single testing procedure described in Remark 3.1
112 | 
113 | for i=1:numoc
114 |     for j=1:numsub
115 |         for k=1:numpc
116 |             ptemp = pboot(:,i,j,k);
117 |             sortp = sort(ptemp,'descend');
118 |             q = find(pact(i,j,k)*ones(B,1)>=sortp,1)/B;
119 |             if isempty(q)==0
120 |             alphasin(i,j,k) = q;
121 |             else
122 |             alphasin(i,j,k) = 1;
123 |             end
124 |         end
125 |     end
126 | end
127 | 
128 | psin = alphasin; % p-values based on the single testing procedure described in Remark 3.1 of List, Shaikh, and Xu (2015)
129 | 
130 | % calculate p-values based on multiple hypothesis testing
131 | 
132 | nh = sum(sum(sum(select)));   % the number of hypotheses of interest
133 | statsall = zeros(nh,8+B);     % columns 1-5 present the id's of the hypotheses, outcomes, subgroups, and treatment (control) groups;
134 |                               % the 6th column shows the studentized differences in means for all the hypotheses based on the actual data
135 |                               % the 7th column presents the p-values based on the single testing procedure described in Remark 3.1 of List, Shaikh, and Xu (2015);
136 |                               % the 8th column presents the 1-p values based on the actual data;
137 |                               % the subsequent columns present the corresponding 1-p values based on the simulated samples
138 | counter = 1;                  % the loop counter
139 | 
140 | for i=1:numoc
141 |     for j=1:numsub
142 |         for k=1:numpc
143 |             if select(i,j,k)==1;
144 |             statsall(counter,:) = [counter i j combo(k,:) abdiffact(i,j,k) psin(i,j,k) pact(i,j,k) pboot(:,i,j,k)'];
145 |             counter = counter+1;
146 |             end
147 |         end
148 |     end
149 | end
150 | 
151 | statsrank = sortrows(statsall,7); % rank the rows according to the p-values based on single hypothesis testing
152 | alphamul = zeros(nh,1); % the smallest alpha's that reject the hypotheses based on Theorem 3.1
153 | alphamulm = zeros(nh,1); % the smallest alpha's that reject the hypotheses based on Remark 3.7
154 | 
155 | for i=1:nh
156 |     maxstats = max(statsrank(i:end,9:end),[],1); % the maximums of the 1-p values among all the remaning hypotheses for all the simulated samples
157 |     sortmaxstats = sort(maxstats,2,'descend'); % sort "maxstats" in a descending order 
158 |     q = find(statsrank(i,8)>=sortmaxstats,1)/B;
159 |     if isempty(q)==0
160 |         alphamul(i) = q;
161 |     else    
162 |         alphamul(i) = 1;
163 |     end
164 |     if i==1
165 |         alphamulm(i) = alphamul(i);
166 |     else
167 |         sortmaxstatsm = zeros(1,B); % compute at each quantile the maximum of the critical values of all the "true" subsets of hypotheses
168 |     for j=nh-i+1:-1:1
169 |         subset = nchoosek(statsrank(i:end,1),j); % all the subsets of hypotheses with j elements
170 |         sumcont = 0; % the total number of subsets of hypotheses with j elements that contradict any of the previously rejected hypotheses
171 |         for k=1:size(subset,1)
172 |             cont = 0; % cont=1 if any of the previously rejected hypotheses contradicts the current subset of hypotheses
173 |         for l=1:i-1
174 |             sameocsub = subset(k,ismember(statsall(subset(k,:),2:3),statsrank(l,2:3),'rows')==1); % the hypotheses in "subset(k,:)" with the same outcome and subgroup as the lth hypothesis
175 |             tran = mat2cell(statsall(sameocsub,4:5),ones(1,size(sameocsub,2)),2); % this cell array presents all the sets of "connected" treatment (control) groups implied by "transitivity" under the null hypotheses in "sameocsub" 
176 |             trantemp = tran;
177 |             if size(sameocsub,2)<=1
178 |                 cont = 0;
179 |                 maxstatsm = max(statsall(subset(k,:),9:end),[],1); % the maximums of the 1-p values within the subset of hypotheses for all the simulated samples
180 |                 sortmaxstatsm = max(sortmaxstatsm,sort(maxstatsm,'descend'));
181 |                 break;
182 |             else
183 |                 counter = 1;
184 |                 while size(tran,1)>size(trantemp,1) || counter==1
185 |                 tran = trantemp;
186 |                 trantemp = tran(1);
187 |                 counter = counter+1;
188 |                 for m=2:size(tran,1)
189 |                     belong = 0; % the total number of rows of "transtemp" that "tran{m}" can be connected to by "transitivity"
190 |                     for N=1:size(trantemp,1)
191 |                     if unique([trantemp{N} tran{m}])<size(trantemp{N},2)+size(tran{m},2)
192 |                trantemp{N} = unique([trantemp{N} tran{m}]);
193 |                belong = belong+1;
194 |                if N==size(trantemp,1) && belong==0
195 |                trantemp = [trantemp;tran{m}];
196 |                end
197 |                     end
198 |                     end
199 |                 end
200 |                 end
201 |                 for p=1:size(tran,1)
202 |                     if sum(ismember(statsrank(l,4:5),tran{p,:}))==2 % the lth previously rejected hypotheses contract the current subset of hypotheses
203 |                         cont = 1;
204 |                         break;
205 |                     end
206 |                 end
207 |             end
208 |             if cont==1
209 |                 break;
210 |             end
211 |         end
212 |         sumcont = sumcont+cont;
213 |         if cont==0
214 |             maxstatsm = max(statsall(subset(k,:),9:end),[],1); 
215 |             sortmaxstatsm = max(sortmaxstatsm,sort(maxstatsm,'descend'));
216 |         end
217 |         end
218 |         if sumcont==0
219 |             break; % If all the subsets of hypotheses with j elements do not contradict any of the previously rejected hypotheses, smaller subsets do not either.
220 |         end
221 |     end
222 |     qm=find(statsrank(i,8)>=sortmaxstatsm,1)/B;
223 |     if isempty(qm)==0
224 |         alphamulm(i) = qm;
225 |     else    
226 |         alphamulm(i) = 1;
227 |     end
228 |     end
229 | end
230 |     
231 | bon = min(statsrank(:,7)*nh,ones(nh,1)); % p-values based on the Bonferroni method
232 | holm = min(statsrank(:,7).*(nh:-1:1)',ones(nh,1)); % p-values based on the Holm's method
233 | 
234 | output = sortrows([statsrank(:,1:7) alphamul alphamulm bon holm],1); % restore the order
235 | output = output(:,2:end);
236 | check = output(:,6)<=output(:,7) & output(:,7)>=output(:,8) & output(:,7)<=output(:,9) & output(:,7)<=output(:,10); % check if the results are what we should expect
237 | output = dataset({output,'outcome','subgroup','treatment1','treatment2','diff_in_means','Remark3_1','Thm3_1','Remark3_7','Bonf','Holm'});
238 | 
239 | 
240 | end


--------------------------------------------------------------------------------