├── +utils
    ├── zerodiag.m
    ├── putdiag.m
    ├── spdmat.m
    ├── pearsonIIIpval.m
    ├── distkern.m
    ├── sqdist.m
    ├── kernel.m
    ├── fwht.m
    ├── rotation_angle_axis.m
    ├── whiten.m
    ├── ucenter.m
    ├── spatialMedian.m
    ├── dcenter.m
    ├── jbld.m
    ├── DyadUpdate.c
    ├── nystrom.m
    ├── sigest.m
    ├── rbf.m
    ├── mexDyadUpdate.c
    ├── poldecomp.m
    ├── tri2sqind.m
    ├── permMoments.m
    ├── approxmtimes.m
    ├── mexHadamard.c
    └── rfm.m
├── +sphere
    ├── vpq.m
    ├── spatialSign.m
    ├── ajne.m
    ├── psivec.m
    ├── gine.m
    ├── rpcdf.m
    ├── gine3.m
    ├── gineajne.m
    ├── rp.m
    ├── rayleigh.m
    ├── bingham.m
    ├── rppdf.m
    ├── rptest.m
    ├── sumchi2cdf.m
    ├── signtest.m
    ├── vmfrnd.m
    └── jsn.m
├── .gitignore
├── +diff
    ├── mmd_.m
    ├── mmd.m
    ├── hotell2.m
    ├── mmdtest.m
    ├── covtest.m
    ├── kstest2d.m
    └── minentest.m
├── +dim
    ├── krztest.m
    ├── krzsim.m
    └── cpca.m
├── Testing
    ├── test_uniSphereTestPower_plot.m
    ├── test_sphericity.m
    ├── test_uniSphereTestPower2.m
    ├── test_dcorr.m
    ├── test_sphericity3.m
    ├── Test_dcov_dcorr.m
    ├── test_uniSphereTestPower.m
    ├── test_rank.m
    ├── test_PAIRS.m
    ├── test_uniSphereTestNull.m
    ├── test_covtest.m
    ├── test_sphericity4.m
    ├── test_sphericity2.m
    └── Test_rv.m
├── +dep
    ├── rdc.m
    ├── rank.m
    ├── dcorr.m
    ├── rvtest.m
    ├── rv.m
    ├── fdcov.m
    ├── ranktest.m
    ├── rpdcov.m
    ├── dcov.m
    ├── hsic.m
    ├── dcorrtest.m
    └── dcovtest.m
├── setup_highdim.m
├── README.md
├── DepTest1.m
└── UniSphereTest.m


/+utils/zerodiag.m:
--------------------------------------------------------------------------------
1 | function M = zerodiag(M)
2 | 
3 | M = utils.putdiag(M,0);
4 | 


--------------------------------------------------------------------------------
/+sphere/vpq.m:
--------------------------------------------------------------------------------
1 | function v = vpq(p,q)
2 | 
3 | v = nchoosek(p+q-2,p-1) + nchoosek(p+q-2,p-1);
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | *.mat
3 | temp/*
4 | *.mexmaci64
5 | *.mexa64
6 | *.mexw64
7 | *.mexw32
8 | 
9 | 


--------------------------------------------------------------------------------
/+utils/putdiag.m:
--------------------------------------------------------------------------------
1 | function M = putdiag(M,x)
2 | 
3 | [m,n] = size(M);
4 | 
5 | assert((numel(x)==1)||(numel(x)==min(m,n)),'Wrong # of elements for diagonal');
6 | 
7 | M(1:(m+1):min(m*m,m*n)) = x;
8 | 


--------------------------------------------------------------------------------
/+utils/spdmat.m:
--------------------------------------------------------------------------------
1 | % Generate a dense n x n symmetric, positive definite matrix
2 | function A = spdmat(n)
3 | 
4 | A = rand(n,n);
5 | A = A+A';
6 | % since A(i,j) < 1 by construction and a symmetric diagonally dominant matrix
7 | %   is symmetric positive definite, which can be ensured by adding nI
8 | A = A + n*eye(n);
9 | 


--------------------------------------------------------------------------------
/+diff/mmd_.m:
--------------------------------------------------------------------------------
 1 | % TODO
 2 | %  o Looks like the definition of MMD in Gretton's publicly available code
 3 | %    differs slightly from their paper (re. the diagonal terms)
 4 | 
 5 | function stat = mmd_(K,L,KL,m,n,biased)
 6 | 
 7 | if biased
 8 |    stat = (sum(K(:))+m)/m^2 + (sum(L(:))+n)/n^2 - 2*sum(KL(:))/m/n;
 9 | else
10 |    stat = sum(K(:))/m/(m-1) + sum(L(:))/n/(n-1) - 2*sum(KL(:))/m/n;
11 | end


--------------------------------------------------------------------------------
/+dim/krztest.m:
--------------------------------------------------------------------------------
 1 | function [pval,stat,delta] = krztest(x,y,s)
 2 | 
 3 | nboot = 500;
 4 | 
 5 | [m,p] = size(x);
 6 | [n,q] = size(x);
 7 | 
 8 | [k,stat,delta,R] = dim.krzsim(x,y,s);
 9 | yR = y*R';
10 | 
11 | for i = 1:nboot
12 |    ind = unidrnd(m,m,1);
13 |    xb = x(ind,:);
14 |    ind = unidrnd(n,n,1);
15 |    yb = yR(ind,:);
16 |    [~,Tm(i)] = dim.krzsim(xb,yb,s);
17 | end
18 | %hist(Tm);
19 | pval = sum(Tm<=stat)/nboot;
20 | 
21 | 


--------------------------------------------------------------------------------
/+utils/pearsonIIIpval.m:
--------------------------------------------------------------------------------
 1 | %    Calculate p-value for statistic of the form trace(A*B) using Pearson 
 2 | %    Type III approximation using exact first three moments of the
 3 | %    permutation distribution
 4 | function [pval,stat] = pearsonIIIpval(A,B,stat)
 5 | 
 6 | if nargin < 3
 7 |    stat = sum(sum(A.*B));
 8 | end
 9 | 
10 | % Exact moments of permutation distribution
11 | [mu,sigma2,skew] = utils.permMoments(A,B);
12 | 
13 | stat = (stat - mu)/sqrt(sigma2);
14 | 
15 | if skew >= 0
16 |    pval = gamcdf(stat - (-2/skew),4/skew^2,skew/2,'upper');
17 | else
18 |    as = abs(skew);
19 |    pval = gamcdf(skew/as*stat + 2/as,4/skew^2,as/2);
20 | end
21 | 


--------------------------------------------------------------------------------
/+utils/distkern.m:
--------------------------------------------------------------------------------
 1 | % Sejdinovic et al, pg. 2272, example 15
 2 | % Brownian distance kernel
 3 | function k = distkern(X,Y,varargin)
 4 | 
 5 | par = inputParser;
 6 | par.KeepUnmatched = true;
 7 | addRequired(par,'X',@isnumeric);
 8 | addRequired(par,'Y',@isnumeric);
 9 | addParamValue(par,'index',1,@(x) isscalar(x) && (x>0) && (x<=2));
10 | parse(par,X,Y,varargin{:});
11 | 
12 | Yt = Y';
13 | XX = sqrt(sum(X.*X,2));
14 | YY = sqrt(sum(Yt.*Yt));
15 | D = sqrt(utils.sqdist(X,Y));
16 | 
17 | if par.Results.index ~= 1
18 |    XX = XX.^par.Results.index;
19 |    YY = YY.^par.Results.index;
20 |    D = D.^par.Results.index;
21 | end
22 | 
23 | k = 0.5 * (bsxfun(@plus,XX,YY) - D);
24 | 


--------------------------------------------------------------------------------
/+dim/krzsim.m:
--------------------------------------------------------------------------------
 1 | % Krzanowski similarity
 2 | function [k,Tm,delta,R] = krzsim(x,y,m)
 3 | 
 4 | S1 = cov(x);
 5 | S2 = cov(y);
 6 | 
 7 | [Q1,D1] = eig(S1);
 8 | [Q2,D2] = eig(S2);
 9 | 
10 | Q11 = Q1(:,1:m);
11 | Q12 = Q1(:,(m+1):end);
12 | Q21 = Q2(:,1:m);
13 | Q22 = Q2(:,(m+1):end);
14 | 
15 | [k,delta,u,v] = princvec(Q11,Q21);
16 | [~,~,u2,v2] = princvec(Q12,Q22);
17 | 
18 | R = [u u2]*[v';v2'];
19 | Tm = m - k;
20 | %Tm = trace(Q12'*Q21*Q21'*Q12);
21 | 
22 | function [k,delta,u,v] = princvec(L,M)
23 | N = L'*M*M'*L;
24 | [V,D] = eig(N);
25 | lambda = diag(D);
26 | % Krzanowski similarity
27 | %k = trace(N)
28 | k = sum(lambda);
29 | sl = lambda.^0.5;
30 | delta = real(rad2deg(acos(sl)));
31 | 
32 | u = L*V;
33 | v = M*M'*u;
34 | 


--------------------------------------------------------------------------------
/+utils/sqdist.m:
--------------------------------------------------------------------------------
 1 | % Squared euclidean distance matrix
 2 | % Faster than pdist2(x,x) & squareform(pdist(x))
 3 | %
 4 | % x = randn(5000,1000);
 5 | % y = randn(200,1000);
 6 | % tic; sqrt(utils.sqdist(x)); toc
 7 | % tic; pdist2(x,x); toc
 8 | % norm(utils.sqdist(x) - pdist2(x,x).^2,'fro')
 9 | % norm(utils.sqdist(x,y) - pdist2(x,y).^2,'fro')
10 | function D = sqdist(X,Y)
11 | 
12 | if (nargin == 1) || isempty(Y)
13 |    XX = sum(X.*X,2);
14 |    D = bsxfun(@plus,XX,XX') - 2*(X*X');
15 | else
16 |    [m,p] = size(X);
17 |    [n,q] = size(Y);
18 |    assert(p==q,'Input dimensions must match');
19 | 
20 |    Yt = Y';
21 |    XX = sum(X.*X,2);
22 |    YY = sum(Yt.*Yt,1);
23 |    D = bsxfun(@plus,XX,YY) - 2*(X*Yt);
24 | end
25 | 
26 | %D(D<0) = 0;
27 | 


--------------------------------------------------------------------------------
/+utils/kernel.m:
--------------------------------------------------------------------------------
 1 | function [K,varargout] = kernel(X,Y,varargin)
 2 | 
 3 | par = inputParser;
 4 | par.KeepUnmatched = true;
 5 | addRequired(par,'X',@isnumeric);
 6 | addRequired(par,'Y',@isnumeric);
 7 | addParamValue(par,'kernel','rbf',@ischar);
 8 | parse(par,X,Y,varargin{:});
 9 | 
10 | switch lower(par.Results.kernel)
11 |    case {'linear'}
12 |       if isempty(Y)
13 |          K = X*X';
14 |       else
15 |          K = X*Y';
16 |       end
17 |    case {'poly'}
18 |       % TODO
19 |    case {'rbf' 'gaussian' 'gauss'}
20 |       [K,sigma] = utils.rbf(X,Y,par.Unmatched);
21 |       if nargout > 1
22 |          varargout{1} = sigma;
23 |       end
24 |    case {'brownian' 'dist' 'distance'}
25 |       if isempty(Y)
26 |          K = utils.distkern(X,X);
27 |       else
28 |          K = utils.distkern(X,Y);
29 |       end
30 | end


--------------------------------------------------------------------------------
/Testing/test_uniSphereTestPower_plot.m:
--------------------------------------------------------------------------------
 1 | load('/Users/brian/Dropbox/Temp/sphere/Testing/test_uniSphereTestPower_n80_1.mat');
 2 | 
 3 | prob_r1 = prob_r;
 4 | prob_ga1 = prob_ga;
 5 | prob_p1 = prob_p;
 6 | 
 7 | load('/Users/brian/Dropbox/Temp/sphere/Testing/test_uniSphereTestPower_n80_2.mat');
 8 | 
 9 | prob_r2 = prob_r;
10 | prob_ga2 = prob_ga;
11 | prob_p2 = prob_p;
12 | 
13 | prob_r = (prob_r1+prob_r2)/2;
14 | prob_ga = (prob_ga1+prob_ga2)/2;
15 | prob_p = (prob_p1+prob_p2)/2;
16 | 
17 | 
18 | figure;
19 | for i = 1:3
20 |    subplot(3,1,i); hold on
21 |    plot(kappa,prob_r(:,i),'-',kappa,prob_ga(:,i),'-',kappa,prob_p(:,i),'--');
22 | %    plot(kappa,prob_ga(:,i),'--');
23 | %    plot(kappa,prob_p(:,i),':');
24 |    title(sprintf('dimension = %g',p(i)));
25 |    if i == 1
26 |       legend({'Rayleigh','Gine-Ajne','PAIRS'})
27 |    end
28 | end
29 | 
30 | ylabel('Empirical power')
31 | xlabel('Kappa');


--------------------------------------------------------------------------------
/+utils/fwht.m:
--------------------------------------------------------------------------------
 1 | % FWHT                        Fast Discrete Walsh-Hadamard Transform
 2 | % 
 3 | %     Y = fwht(X)
 4 | %
 5 | %     Wrapper for efficient mex version of FWHT (mexHadamard.c).
 6 | %
 7 | %     INPUTS
 8 | %     X - input matrix or column vector
 9 | %
10 | %     OUTPUTS
11 | %     Y - transformed data
12 | 
13 | function Y = fwht(X)
14 | 
15 | [n,m] = size(X);
16 | n2 = nextpow2(n);
17 | 
18 | % Zero-pad to nextpow2
19 | if n ~= 2^n2
20 |    X = [X ; zeros(2^n2-n,m)];
21 | end
22 | 
23 | try
24 |    % Scaled to match Matlab fwht
25 |    Y = utils.mexHadamard(X)/2^n2;
26 | catch err
27 |    if strcmp(err.identifier,'MATLAB:UndefinedFunction')
28 |       warning('fwht:mex',...
29 |          sprintf(['Mex file ''mexHadamard.c'' has not be compiled\n'...
30 |          'Transform will be done with slow Matlab version.']));
31 |       Y = fwht(X,2^n2,'hadamard');
32 |    else
33 |       rethrow(err);
34 |    end
35 | end


--------------------------------------------------------------------------------
/+sphere/spatialSign.m:
--------------------------------------------------------------------------------
 1 | % SPATIALSIGN                 Project data onto unit hypersphere
 2 | % 
 3 | %     U = spatialSign(x)
 4 | %
 5 | %     INPUTS
 6 | %     x - [n x p] matrix, p being data-dimensionality
 7 | %
 8 | %     OUTPUTS
 9 | %     U - [n x p] matrix, each row normalized to unit length
10 | 
11 | %     $ Copyright (C) 2014 Brian Lau http://www.subcortex.net/ $
12 | %     The full license and most recent version of the code can be found at:
13 | %     https://github.com/brian-lau/highdim
14 | %
15 | %     This program is free software: you can redistribute it and/or modify
16 | %     it under the terms of the GNU General Public License as published by
17 | %     the Free Software Foundation, either version 3 of the License, or
18 | %     (at your option) any later version.
19 | % 
20 | %     This program is distributed in the hope that it will be useful,
21 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
22 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23 | %     GNU General Public License for more details.
24 | 
25 | function U = spatialSign(x)
26 | 
27 | U = bsxfun(@rdivide,x,sqrt(sum(x.^2,2)));
28 | U(isnan(U)) = 0;


--------------------------------------------------------------------------------
/+utils/rotation_angle_axis.m:
--------------------------------------------------------------------------------
 1 | function R = rotation_angle_axis(theta,u)
 2 | %ROTATION_ANGLE_AXIS The Rodrigues' formula for rotation matrices.
 3 | %   R = ROTATION_ANGLE_AXIS(THETA,U) 
 4 | %
 5 | %  The formula recieves an angle of rotation given by theta and a unit vector, 
 6 | %  u, that defines the axis of rotation.
 7 | % 
 8 | %       ARGUMENT DESCRIPTION:
 9 | %           THETA - angle of rotation (radians).
10 | %               U - unit vector
11 | % 
12 | %       OUTPUT DESCRIPTION:
13 | %               R - rotation matrix.
14 | % 
15 | %   Example
16 | %   -------------
17 | %   R = rotation_angle_axis(deg2rad(pi/6),[sqrt(2)/2, 0.0, sqrt(2)/2])
18 | % 
19 | 
20 | % Credits:
21 | % Daniel Simoes Lopes
22 | % IDMEC
23 | % Instituto Superior Tecnico - Universidade Tecnica de Lisboa
24 | % danlopes (at) dem ist utl pt
25 | % http://web.ist.utl.pt/daniel.s.lopes/
26 | %
27 | % July 2011 original version.
28 | 
29 | 
30 | %__________________________________________________________________________
31 | %  Rodrigues' rotation formula.
32 | u = u./norm(u,2);
33 | S = [    0  u(3) -u(2);
34 |       -u(3)   0   u(1);
35 |        u(2) -u(1)   0  ];
36 | R = eye(3) + sin(theta)*S + (1-cos(theta))*S^2;


--------------------------------------------------------------------------------
/Testing/test_sphericity.m:
--------------------------------------------------------------------------------
 1 | % Zou et al (2014). Multivariate sign-based high-dimensional tests for
 2 | %   sphericity. Biometrika 101: 229-236
 3 | 
 4 | % bias-corrected sign test
 5 | % Check null distribution approximation
 6 | n = 1000;
 7 | p1 = zeros(n,1);
 8 | s1 = zeros(n,1);
 9 | p0 = zeros(n,1);
10 | s0 = zeros(n,1);
11 | for i = 1:n
12 |    %x = randn(40,100);
13 |    x = trnd(4,40,100);
14 |    [p1(i),s1(i)] = sphere.signtest(x,'test','bcs','approx',false);
15 |    [p0(i),s0(i)] = sphere.signtest(x,'test','bcs','approx',true);
16 | end
17 | 
18 | figure;
19 | dx = 0.1; xx = -3:dx:3;
20 | n = histc(s0,xx);
21 | subplot(211);hold on
22 | bar(xx,n./sum(n),'histc');
23 | plot(xx,normpdf(xx)*dx,'m');
24 | title('normal approximation');
25 | n = histc(s1,xx);
26 | subplot(212);hold on
27 | bar(xx,n./sum(n),'histc');
28 | plot(xx,normpdf(xx)*dx,'m');
29 | title('exact');
30 | 
31 | % Standard sign test
32 | % Check null distribution approximation
33 | n = 1000;
34 | p = zeros(n,1);
35 | s = zeros(n,1);
36 | for i = 1:n
37 |    x = randn(10,3);
38 |    [p(i),s(i)] = sphere.signtest(x,'test','sign');
39 | end
40 | 
41 | figure;
42 | dx = 1; xx = 0:1:25;
43 | n = histc(s,xx);
44 | hold on
45 | bar(xx,n./sum(n),'histc');
46 | plot(xx,chi2pdf(xx,(3+2)*(3-1)/2)*dx,'m')
47 | 


--------------------------------------------------------------------------------
/Testing/test_uniSphereTestPower2.m:
--------------------------------------------------------------------------------
 1 | % pairsClusterTest from here: https://sites.google.com/site/antimatt/software
 2 | % randvonMisesFisherm from here: http://www.stat.pitt.edu/sungkyu/MiscPage.html
 3 | clear all;
 4 | n = 60;
 5 | p = [4 8 16];%[4 10 20];
 6 | sigma = [1 10 20 40];%[0 1 2 4];
 7 | reps = 50;%2500;
 8 | 
 9 | prob_ga = zeros(numel(sigma),numel(p));
10 | prob_p = zeros(numel(sigma),numel(p));
11 | 
12 | test = UniSphereTest('autoRun',false);
13 | test.params.nboot = 500;
14 | for i = 1:numel(sigma)
15 |    for j = 1:numel(p)
16 |       for k = 1:reps
17 |          
18 |          x = zeros(n,p(j));
19 |          count = 0;
20 |          for m = 1:p(j)
21 |             S = eye(p(j));
22 |             if (rand < 0.25) && (count <=6)
23 |                S(m,m) = sigma(i);
24 |                count = count + 1;
25 |             end
26 |             x = x + mvnrnd(zeros(1,p(j)),S,n);
27 |          end
28 | 
29 |          test.x = x;
30 |          
31 |          test.test = 'gine-ajne'; test.run();
32 |          h_ga(k) = test.h;
33 | 
34 |          [clusteriness, temp, dists, k2] = pairsClusterTest(x);
35 |          pv(k) = temp;
36 | 
37 |       end
38 |       prob_ga(i,j) = mean(h_ga);
39 |       prob_p(i,j) = mean(pv<=0.05);
40 |    end
41 |    i
42 | end
43 | 


--------------------------------------------------------------------------------
/+sphere/ajne.m:
--------------------------------------------------------------------------------
 1 | % AJNE                        Ajne statistic for spherical uniformity 
 2 | % 
 3 | %     A = ajne(U)
 4 | %
 5 | %     INPUTS
 6 | %     U - [n x p] matrix, n samples with dimensionality p
 7 | %         the data should already be projected to the unit hypersphere
 8 | %
 9 | %     OUTPUTS
10 | %     A - statistic
11 | %
12 | %     REFERENCE
13 | %     Mardia, KV, Jupp, PE (2000). Directional Statistics. John Wiley
14 | %
15 | %     SEE ALSO
16 | %     UniSphereTest, spatialSign
17 | 
18 | %     $ Copyright (C) 2014 Brian Lau http://www.subcortex.net/ $
19 | %     The full license and most recent version of the code can be found at:
20 | %     https://github.com/brian-lau/highdim
21 | %
22 | %     This program is free software: you can redistribute it and/or modify
23 | %     it under the terms of the GNU General Public License as published by
24 | %     the Free Software Foundation, either version 3 of the License, or
25 | %     (at your option) any later version.
26 | % 
27 | %     This program is distributed in the hope that it will be useful,
28 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
29 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
30 | %     GNU General Public License for more details.
31 | 
32 | function A = ajne(U)
33 | 
34 | [n,p] = size(U);
35 | 
36 | psi = sphere.psivec(U,n);
37 | % eq. 10.4.10
38 | A = (n/4) - (1/(n*pi))*sum(psi);
39 | 
40 | 


--------------------------------------------------------------------------------
/+sphere/psivec.m:
--------------------------------------------------------------------------------
 1 | % PSIVEC                      Vector pairwise angles, i < j
 2 | % 
 3 | %     psi = psivec(U,n)
 4 | %
 5 | %     INPUTS
 6 | %     U - [n x p] matrix, n samples with dimensionality p
 7 | %         the data should already be projected to the unit hypersphere
 8 | %     n - number of samples
 9 | %
10 | %     OUTPUTS
11 | %     psi - vector from psi matrix (U*U'), i < j
12 | %
13 | %     REFERENCE
14 | %     Mardia, KV, Jupp, PE (2000). Directional Statistics. John Wiley   
15 | %
16 | %     SEE ALSO
17 | %     gine, gine3, ajne, gineajne
18 | 
19 | %     $ Copyright (C) 2014 Brian Lau http://www.subcortex.net/ $
20 | %     The full license and most recent version of the code can be found at:
21 | %     https://github.com/brian-lau/highdim
22 | %
23 | %     This program is free software: you can redistribute it and/or modify
24 | %     it under the terms of the GNU General Public License as published by
25 | %     the Free Software Foundation, either version 3 of the License, or
26 | %     (at your option) any later version.
27 | % 
28 | %     This program is distributed in the hope that it will be useful,
29 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
30 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
31 | %     GNU General Public License for more details.
32 | 
33 | function psi = psivec(U,n)
34 | 
35 | xx = triu(U*U',1);
36 | ind = triu(ones(n,n),1);
37 | psi = acos(xx(ind==1));
38 | 


--------------------------------------------------------------------------------
/+sphere/gine.m:
--------------------------------------------------------------------------------
 1 | % GINE                        Gine statistic for spherical uniformity 
 2 | % 
 3 | %     G = gine(U)
 4 | %
 5 | %     INPUTS
 6 | %     U - [n x p] matrix, n samples with dimensionality p
 7 | %         the data should already be projected to the unit hypersphere
 8 | %
 9 | %     OUTPUTS
10 | %     G - statistic
11 | %
12 | %     REFERENCE
13 | %     Mardia, KV, Jupp, PE (2000). Directional Statistics. John Wiley
14 | %
15 | %     SEE ALSO
16 | %     UniSphereTest, spatialSign
17 | 
18 | %     $ Copyright (C) 2017 Brian Lau, brian.lau@upmc.fr $
19 | %     The full license and most recent version of the code can be found at:
20 | %     https://github.com/brian-lau/highdim
21 | %
22 | %     This program is free software: you can redistribute it and/or modify
23 | %     it under the terms of the GNU General Public License as published by
24 | %     the Free Software Foundation, either version 3 of the License, or
25 | %     (at your option) any later version.
26 | % 
27 | %     This program is distributed in the hope that it will be useful,
28 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
29 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
30 | %     GNU General Public License for more details.
31 | 
32 | function G = gine(U)
33 | 
34 | [n,p] = size(U);
35 | 
36 | psi = sphere.psivec(U,n);
37 | % eq. 10.7.5, avoiding overflow
38 | G = n/2 - (p-1)/(2*n) * ( exp(gammaln((p-1)/2) - gammaln(p/2)) )^2 * sum(sin(psi));
39 | 


--------------------------------------------------------------------------------
/+utils/whiten.m:
--------------------------------------------------------------------------------
 1 | %function [X,mu,invMat] = whiten(X,epsilon)
 2 | %
 3 | % ZCA whitening of a data matrix (make the covariance matrix identity)
 4 | %
 5 | % WARNING
 6 | % This form of whitening performs poorly if the number of dimensions are
 7 | % much greater than the number of instances
 8 | %
 9 | % INPUT
10 | % X: rows are the instances, columns are the features
11 | % epsilon: small number to compensate for nearly 0 eigenvalue [DEFAULT =
12 | % 0.0001]
13 | %
14 | % OUTPUT
15 | % Xwh: whitened data, rows are instances, columns are features
16 | % whMat: the whitening matrix
17 | 
18 | % Copyright (c) 2012, Colorado Reed 
19 | % All rights reserved.
20 | % 
21 | % Redistribution and use in source and binary forms, with or without 
22 | % modification, are permitted provided that the following conditions are 
23 | % met:
24 | % 
25 | % * Redistributions of source code must retain the above copyright 
26 | % notice, this list of conditions and the following disclaimer. 
27 | % * Redistributions in binary form must reproduce the above copyright 
28 | % notice, this list of conditions and the following disclaimer in 
29 | % the documentation and/or other materials provided with the distribution
30 | 
31 | function [X,whMat] = whiten(X,epsilon)
32 | 
33 | if nargin < 2
34 |    epsilon = 0.0001;
35 | end
36 | 
37 | mu = mean(X); 
38 | X = bsxfun(@minus, X, mu);
39 | A = X'*X;
40 | [V,D,~] = svd(A);
41 | whMat = sqrt(size(X,1)-1)*V*sqrtm(inv(D + eye(size(D))*epsilon))*V';
42 | X = X*whMat;
43 | 


--------------------------------------------------------------------------------
/+sphere/rpcdf.m:
--------------------------------------------------------------------------------
 1 | % RPCDF                       CDF of angles on a uniform hypersphere
 2 | % 
 3 | %     c = rpcdf(theta,p,dx)
 4 | %
 5 | %     INPUTS
 6 | %     theta - angles (radians) to evaluate pdf
 7 | %     p     - dimensionality (R^p)
 8 | %
 9 | %     OPTIONAL
10 | %     dx    - resolution (default = 0.001);
11 | %
12 | %     OUTPUTS
13 | %     h     - cdf
14 | %
15 | %     REFERENCE
16 | %     Cai, T et al (2013). Distribution of angles in random packing on
17 | %     spheres. J of Machine Learning Research 14: 1837-1864.
18 | %
19 | %     SEE ALSO
20 | %     rppdf, rp
21 | 
22 | %     $ Copyright (C) 2014 Brian Lau http://www.subcortex.net/ $
23 | %     The full license and most recent version of the code can be found at:
24 | %     https://github.com/brian-lau/highdim
25 | %
26 | %     This program is free software: you can redistribute it and/or modify
27 | %     it under the terms of the GNU General Public License as published by
28 | %     the Free Software Foundation, either version 3 of the License, or
29 | %     (at your option) any later version.
30 | % 
31 | %     This program is distributed in the hope that it will be useful,
32 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
33 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
34 | %     GNU General Public License for more details.
35 | 
36 | function c = rpcdf(theta,p,dx)
37 | 
38 | if nargin < 3
39 |    dx = 0.001;
40 | end
41 | 
42 | x = 0:dx:pi;
43 | h = sphere.rppdf(x,p);
44 | 
45 | c = cumtrapz(x,h);
46 | c = interp1(x,c,theta);


--------------------------------------------------------------------------------
/+sphere/gine3.m:
--------------------------------------------------------------------------------
 1 | % GINE3                       Gine test for spherical uniformity (p=3)
 2 | % 
 3 | %     [pval,Fn] = gine3(U)
 4 | %
 5 | %     INPUTS
 6 | %     U - [n x 3] matrix, n samples with dimensionality 3
 7 | %         the data should already be projected to the unit hypersphere
 8 | %
 9 | %     OUTPUTS
10 | %     pval - p-value
11 | %     Fn - statistic
12 | %
13 | %     REFERENCE
14 | %     Mardia, KV, Jupp, PE (2000). Directional Statistics. John Wiley
15 | %
16 | %     SEE ALSO
17 | %     UniSphereTest, spatialSign
18 | 
19 | %     $ Copyright (C) 2014 Brian Lau http://www.subcortex.net/ $
20 | %     The full license and most recent version of the code can be found at:
21 | %     https://github.com/brian-lau/highdim
22 | %
23 | %     This program is free software: you can redistribute it and/or modify
24 | %     it under the terms of the GNU General Public License as published by
25 | %     the Free Software Foundation, either version 3 of the License, or
26 | %     (at your option) any later version.
27 | % 
28 | %     This program is distributed in the hope that it will be useful,
29 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
30 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
31 | %     GNU General Public License for more details.
32 | 
33 | function [pval,Fn] = gine3(U)
34 | 
35 | [n,p] = size(U);
36 | 
37 | if p ~= 3
38 |    error('Only valid for p = 3');
39 | end
40 | 
41 | psi = sphere.psivec(U,n);
42 | % eq. 10.4.8
43 | Fn = (3*n)/2 - (4/(n*pi)) * sum(psi + sin(psi));
44 | 
45 | pval = 1 - sphere.sumchi2cdf(Fn,3);
46 | 


--------------------------------------------------------------------------------
/+utils/ucenter.m:
--------------------------------------------------------------------------------
 1 | % UCENTER                     U-center distance matrix
 2 | % 
 3 | %     [X,X_j,X__] = ucenter(X)
 4 | %
 5 | %     U-center distance matrix
 6 | %
 7 | %        X_{ij} - X_{i.}/(n-2) - X_{.j}/(n-2) + X_{..}/((n-1)(n-2)), i \neq j
 8 | %   
 9 | %     and zero diagonal
10 | %
11 | %     INPUTS
12 | %     X - [n x n] symmetric distance matrix
13 | %
14 | %     OUTPUTS
15 | %     X - centered distance matrix
16 | %     X_j - column means of X (input)
17 | %     X__ - mean of X (input)
18 | %
19 | %     SEE ALSO
20 | %     pcenter
21 | 
22 | %     $ Copyright (C) 2017 Brian Lau, brian.lau@upmc.fr $
23 | %     The full license and most recent version of the code can be found at:
24 | %     https://github.com/brian-lau/highdim
25 | %
26 | %     This program is free software: you can redistribute it and/or modify
27 | %     it under the terms of the GNU General Public License as published by
28 | %     the Free Software Foundation, either version 3 of the License, or
29 | %     (at your option) any later version.
30 | % 
31 | %     This program is distributed in the hope that it will be useful,
32 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
33 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
34 | %     GNU General Public License for more details.
35 | 
36 | function [X,X_j,X__] = ucenter(X)
37 | 
38 | [n,m] = size(X);
39 | assert(m==n,'UCENTER operates on square, symmetric distance matrices');
40 | 
41 | X_j = sum(X);
42 | X__ = sum(X_j); % sum(X(:))
43 | X = X - bsxfun(@plus,X_j,X_j')/(n-2) + X__/((n-1)*(n-2));
44 | X(1:(n+1):n*n) = 0;
45 | 


--------------------------------------------------------------------------------
/+utils/spatialMedian.m:
--------------------------------------------------------------------------------
 1 | function y = spatialMedian(X,tol,y,max_iter)
 2 | % Calculate the geometric median for a set of observations (mean under a 
 3 | % Laplacian noise distribution) This is using Weiszfeld's algorithm.
 4 | %
 5 | % In:
 6 | %   X : the data, as in mean
 7 | %   tol : tolerance (default: 1.e-5)
 8 | %   y : initial value (default: median(X))
 9 | %   max_iter : max number of iterations (default: 500)
10 | %
11 | % Out:
12 | %   g : geometric median over X
13 | 
14 | % https://github.com/sccn/BCILAB/code/misc/geometric_median.m
15 | % Copyright (C) Christian Kothe, SCCN, 2012, christian@sccn.ucsd.edu
16 | %
17 | % This program is free software; you can redistribute it and/or modify it 
18 | % under the terms of the GNU General Public License as published by the 
19 | % Free Software Foundation; either version 2 of the License, or (at your 
20 | % option) any later version.
21 | %
22 | % This program is distributed in the hope that it will be useful, but 
23 | % WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 
24 | % or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
25 | % for more details.
26 | 
27 | if ~exist('tol','var') || isempty(tol)
28 |    tol = 1.e-5; end
29 | if ~exist('y','var') || isempty(y)
30 |    y = median(X); end
31 | if ~exist('max_iter','var') || isempty(max_iter)
32 |    max_iter = 500; end
33 | 
34 | for i=1:max_iter
35 |    invnorms = 1./sqrt(sum(bsxfun(@minus,X,y).^2,2));
36 |    [y,oldy] = deal(sum(bsxfun(@times,X,invnorms)) / sum(invnorms),y);
37 |    if norm(y-oldy)/norm(y) < tol
38 |       break; end
39 | end


--------------------------------------------------------------------------------
/+utils/dcenter.m:
--------------------------------------------------------------------------------
 1 | % DCENTER                     Double-center distance matrix
 2 | % 
 3 | %     [X,X_j,X__] = dcenter(X)
 4 | %
 5 | %     Double-centers distance matrix X:
 6 | %     
 7 | %        X_{ij} - X_{i.}/n - X_{.j}/n + X_{..}/n^2, all i, j
 8 | %
 9 | %     Faster & more memory-efficient than using a centering matrix
10 | %        H = eye(n) - ones(n)/n; X = H*X*H;
11 | %
12 | %     INPUTS
13 | %     X - [n x n] symmetric distance matrix
14 | %
15 | %     OUTPUTS
16 | %     X   - centered distance matrix
17 | %     X_j - column means of X (input)
18 | %     X__ - mean of X (input)
19 | %
20 | %     SEE ALSO
21 | %     ucenter
22 | 
23 | %     $ Copyright (C) 2017 Brian Lau, brian.lau@upmc.fr $
24 | %     The full license and most recent version of the code can be found at:
25 | %     https://github.com/brian-lau/highdim
26 | %
27 | %     This program is free software: you can redistribute it and/or modify
28 | %     it under the terms of the GNU General Public License as published by
29 | %     the Free Software Foundation, either version 3 of the License, or
30 | %     (at your option) any later version.
31 | % 
32 | %     This program is distributed in the hope that it will be useful,
33 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
34 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
35 | %     GNU General Public License for more details.
36 | 
37 | function [X,X_j,X__] = dcenter(X)
38 | 
39 | [n,m] = size(X);
40 | assert(m==n,'DCENTER operates on square, symmetric distance matrices');
41 | 
42 | X_j = mean(X);
43 | X__ = mean(X_j); % mean(X(:))
44 | X = X - bsxfun(@plus,X_j,X_j') + X__;


--------------------------------------------------------------------------------
/Testing/test_dcorr.m:
--------------------------------------------------------------------------------
 1 | 
 2 | %% Table 1 from
 3 | %     Szekely & Rizzo (2013). The distance correlation t-test of independence 
 4 | %       in high dimension. J Multiv Analysis 117: 193-213
 5 | % Note that their table is a single sample
 6 | clear;
 7 | n = 30;
 8 | p = [1 2 4 8 16 32 64 128 256 512 1024 2048 4096];
 9 | reps = 1;
10 | 
11 | for i = 1:numel(p)
12 |    for j = 1:reps
13 |       x = rand(30,p(i));
14 |       y = rand(30,p(i));
15 |       r(j,i) = dep.dcorr(x,y);
16 |       
17 |       rstar(j,i) = dep.dcorr(x,y,true);
18 |       T(j,i) = sqrt(n*(n-3)/2-1)*rstar(j,i)/sqrt(1-rstar(j,i)^2);
19 |    end
20 | end
21 | 
22 | table(p',mean(r,1)',mean(rstar,1)',mean(T,1)',...
23 |    'VariableNames',{'pq','R','Rstar','T'})
24 | 
25 | % [pval,r,T] =dep.dcorrtest([1 2 3 4 5]',[1.4 1.4 3.5 4.2 4.8]')
26 | % DepTest2([1 2 3 4 5]',[1.4 1.4 3.5 4.2 4.8]','test','dcorr')
27 | % % Replicate using R 'energy' package
28 | % dcor.ttest(c(1,2,3,4,5),c(1.4,1.4,3.5,4.2,4.8))
29 | % 
30 | % 	dcor t-test of independence
31 | % 
32 | % data:  c(1, 2, 3, 4, 5) and c(1.4, 1.4, 3.5, 4.2, 4.8)
33 | % T = 5.6569, df = 4, p-value = 0.002406
34 | % sample estimates:
35 | % Bias corrected dcor 
36 | %            0.942809 
37 | 
38 | % Section 3, example 1, page 200
39 | clear;
40 | n = 30;
41 | p = 30;
42 | q = 30;
43 | reps = 1000;
44 | 
45 | for i = 1:reps
46 |    x = rand(n,p);
47 |    y = rand(n,q);
48 |    [pval(i),~,T(i)] = dep.dcorrtest(x,y);
49 | end
50 | 
51 | clear;
52 | n = 30;
53 | p = 30;
54 | q = 30;
55 | reps = 1000;
56 | 
57 | for i = 1:reps
58 |    x = rand(n,p);
59 |    y = x + sqrt(.2)*randn(n,q); % I think there is a typo in the paper
60 |    [pval(i),~,T(i)] = dep.dcorrtest(x,y);
61 | end


--------------------------------------------------------------------------------
/+utils/jbld.m:
--------------------------------------------------------------------------------
 1 | % JBLD                        Jensen-Bregman LogDet Divergence
 2 | % 
 3 | %     div = jbld(x,y)
 4 | %
 5 | %     INPUTS
 6 | %     x - [n x n] positive semi-definite matrix
 7 | %     y - [n x n] positive semi-definite matrix
 8 | %
 9 | %     OUTPUTS
10 | %     div - Jensen-Bregman LogDet Divergence
11 | %
12 | %     REFERENCE
13 | %     Cherian et al (2012). Jensen-Bregman LogDet Divergence with Application 
14 | %       to Efficient Similarity Search for Covariance Matrices. 
15 | %       Trans Pattern Analysis & Machine Intelligence 
16 | 
17 | %     $ Copyright (C) 2014 Brian Lau http://www.subcortex.net/ $
18 | %     The full license and most recent version of the code can be found at:
19 | %     https://github.com/brian-lau/highdim
20 | %
21 | %     This program is free software: you can redistribute it and/or modify
22 | %     it under the terms of the GNU General Public License as published by
23 | %     the Free Software Foundation, either version 3 of the License, or
24 | %     (at your option) any later version.
25 | % 
26 | %     This program is distributed in the hope that it will be useful,
27 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
28 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
29 | %     GNU General Public License for more details.
30 | 
31 | function div = jbld(x,y)
32 | 
33 | [m,p] = size(x);
34 | [n,q] = size(y);
35 | 
36 | if (m~=n) || (p~=q)
37 |    error('x and y must be the same size');
38 | end
39 | 
40 | cxy = chol((x+y)/2);
41 | cx = chol(x);
42 | cy = chol(y);
43 | div = log(prod(diag(cxy).^2)) - log(prod(diag(cx).^2)*prod(diag(cy).^2))/2;
44 | 
45 | % div2 = log(det((x+y)/2)) - log(det(x*y))/2;


--------------------------------------------------------------------------------
/+sphere/gineajne.m:
--------------------------------------------------------------------------------
 1 | % GINEAJNE                    Weighted Gine/Ajne statistic for spherical uniformity
 2 | % 
 3 | %     F = gineajne(U)
 4 | %
 5 | %     A weighted sum of Gine's and Anje's statistics is consistent against
 6 | %     all alternatives to uniformity on S^(p-1), the unit sphere in R^p.
 7 | %
 8 | %     INPUTS
 9 | %     U - [n x p] matrix, n samples with dimensionality p
10 | %         the data should already be projected to the unit hypersphere
11 | %
12 | %     OUTPUTS
13 | %     F - statistic
14 | %
15 | %     REFERENCE
16 | %     Prentice, MJ (1978). On invariant tests of uniformity for directions
17 | %       and orientations. Annals of Statistics 6: 169-176.
18 | %
19 | %     SEE ALSO
20 | %     UniSphereTest, gine, ajne, spatialSign
21 | 
22 | %     $ Copyright (C) 2017 Brian Lau, brian.lau@upmc.fr $
23 | %     The full license and most recent version of the code can be found at:
24 | %     https://github.com/brian-lau/highdim
25 | %
26 | %     This program is free software: you can redistribute it and/or modify
27 | %     it under the terms of the GNU General Public License as published by
28 | %     the Free Software Foundation, either version 3 of the License, or
29 | %     (at your option) any later version.
30 | % 
31 | %     This program is distributed in the hope that it will be useful,
32 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
33 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
34 | %     GNU General Public License for more details.
35 | 
36 | function F = gineajne(U)
37 | 
38 | [n,p] = size(U);
39 | 
40 | psi = sphere.psivec(U,n);
41 | G = n/2 - (p-1)/(2*n) * ( exp(gammaln((p-1)/2) - gammaln(p/2)) )^2 * sum(sin(psi));
42 | A = (n/4) - (1/(n*pi))*sum(psi);
43 | F = G + A;
44 | 


--------------------------------------------------------------------------------
/Testing/test_sphericity3.m:
--------------------------------------------------------------------------------
 1 | %% Compare bias-corrected sign test size & power with table 1 from
 2 | % Zou et al (2014). Multivariate sign-based high-dimensional tests for
 3 | %   sphericity. Biometrika 101: 229-236
 4 | clear all;
 5 | n = [40 80];
 6 | p = [55 181 642];
 7 | reps = 100;
 8 | v = [0 0.125 0.250];
 9 | 
10 | tic;
11 | for i = 1:numel(n)
12 |    for j = 1:numel(p)
13 |       for k = 1:numel(v)
14 |          for m = 1:reps
15 |             y = randn(n(i),p(j));
16 |             vp = round(v(k)*p(j));
17 |             A = [sqrt(2)*ones(vp,1) ; ones(p(j)-vp,1)];
18 |             x = (diag(A)*y')';
19 |             pval(m) = sphere.signtest(x,'test','bcs');
20 |          end
21 |          prob(i,j,k) = mean(pval<=0.05);
22 |       end
23 |       toc
24 |    end
25 | end
26 | 
27 | 100*prob
28 | 
29 | % reps = 2000 % 24.11.2014
30 | % approx = true
31 | % ans(:,:,1) =
32 | % 
33 | %     4.7000    5.7500    5.8000
34 | %     6.2500    3.9500    4.7000
35 | % 
36 | % ans(:,:,2) =
37 | % 
38 | %    45.1500   47.8500   49.9000
39 | %    87.6500   93.3000   94.1500
40 | % 
41 | % ans(:,:,3) =
42 | % 
43 | %    64.7000   69.6500   72.4500
44 | %    98.8000   99.3500   99.6500
45 | 
46 | % reps = 2000 % 25.11.2014
47 | % approx = false
48 | % ans(:,:,1) =
49 | % 
50 | %     5.8000    5.8000    5.9000
51 | %     5.5500    5.1500    4.4000
52 | % 
53 | % ans(:,:,2) =
54 | % 
55 | %    43.9000   50.4500   50.4500
56 | %    86.6000   92.8500   94.6500
57 | % 
58 | % ans(:,:,3) =
59 | % 
60 | %    67.2000   68.7000   71.8500
61 | %    98.7500   99.6500   99.6000
62 | 
63 | % values from Zou et al. Table 1
64 | pZ(:,:,1) = [...
65 | 4.9 4.9 5.1;...
66 | 4.7 5.2 5.1];
67 | pZ(:,:,2) = [...
68 | 41 47 49;...
69 | 84 91 94];
70 | pZ(:,:,3) = [...
71 | 64 68 72;...
72 | 99 100 100]
73 | 


--------------------------------------------------------------------------------
/+sphere/rp.m:
--------------------------------------------------------------------------------
 1 | % RP                          Random projection stat for spherical uniformity 
 2 | % 
 3 | %     stat = rp(U,k)
 4 | %
 5 | %     INPUTS
 6 | %     U - [n x p] matrix, n samples with dimensionality p
 7 | %         the data should already be projected to the unit hypersphere
 8 | %     k - number of random vectors to project onto
 9 | %
10 | %     OUTPUTS
11 | %     stat - [n x k] vector of of angles between data and k random vectors
12 | %
13 | %     REFERENCE
14 | %     Cuesta-Albertos, JA et al (2009). On projection-based tests for 
15 | %       directional and compositional data. Stat Comput 19: 367-380
16 | %     Cuesta-Albertos, JA et al (2007). A sharp form of the Cramer-Wold 
17 | %       theorem. J Theor Probab 20: 201-209
18 | %
19 | %     SEE ALSO
20 | %     UniSphereTest, spatialSign
21 | 
22 | %     $ Copyright (C) 2014 Brian Lau http://www.subcortex.net/ $
23 | %     The full license and most recent version of the code can be found at:
24 | %     https://github.com/brian-lau/highdim
25 | %
26 | %     This program is free software: you can redistribute it and/or modify
27 | %     it under the terms of the GNU General Public License as published by
28 | %     the Free Software Foundation, either version 3 of the License, or
29 | %     (at your option) any later version.
30 | % 
31 | %     This program is distributed in the hope that it will be useful,
32 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
33 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
34 | %     GNU General Public License for more details.
35 | 
36 | function stat = rp(U,k)
37 | 
38 | [n,p] = size(U);
39 | 
40 | % Uniform random directions
41 | u0 = sphere.spatialSign(randn(k,p));
42 | stat = zeros(n,k);
43 | 
44 | for i = 1:k
45 |    stat(:,i) = acos(U*u0(i,:)');
46 | end
47 | 


--------------------------------------------------------------------------------
/+sphere/rayleigh.m:
--------------------------------------------------------------------------------
 1 | % RAYLEIGH                    Rayleigh statistic for spherical uniformity 
 2 | % 
 3 | %     [pval,R] = rayleigh(U)
 4 | %
 5 | %     Most powerful invariant test against von Mises alternative.
 6 | %     Not consistent against alternatives with zero resultant length
 7 | %     (Mardia & Jupp, pg 209).
 8 | %
 9 | %     INPUTS
10 | %     U - [n x p] matrix, n samples with dimensionality p
11 | %         the data should already be projected to the unit hypersphere
12 | %
13 | %     OUTPUTS
14 | %     pval - p-value
15 | %     R - statistic
16 | %
17 | %     REFERENCE
18 | %     Mardia, KV, Jupp, PE (2000). Directional Statistics. John Wiley
19 | %
20 | %     SEE ALSO
21 | %     UniSphereTest, spatialSign
22 | 
23 | %     $ Copyright (C) 2014 Brian Lau http://www.subcortex.net/ $
24 | %     The full license and most recent version of the code can be found at:
25 | %     https://github.com/brian-lau/highdim
26 | %
27 | %     This program is free software: you can redistribute it and/or modify
28 | %     it under the terms of the GNU General Public License as published by
29 | %     the Free Software Foundation, either version 3 of the License, or
30 | %     (at your option) any later version.
31 | % 
32 | %     This program is distributed in the hope that it will be useful,
33 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
34 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
35 | %     GNU General Public License for more details.
36 | 
37 | function [pval,R] = rayleigh(U)
38 | 
39 | [n,p] = size(U);
40 | 
41 | if 0
42 |    R = (p/n)*sum(sum(U*U'));
43 | else
44 |    % Modified Rayleigh test statistic (Mardia & Jupp, eq. 10.4.6)
45 |    Ubar = mean(U);
46 |    T = n*p*sum(Ubar.^2);
47 |    R = (1-1/(2*n))*T + (1/(2*n*(p+2)))*T^2;
48 | end
49 | 
50 | pval = 1 - chi2cdf(R,p);
51 | 


--------------------------------------------------------------------------------
/+utils/DyadUpdate.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *         The main routine for DyadUpdate.c
 3 |  *
 4 |  * Huo & Szekely (2017). Fast Computing for Distance Covariance,
 5 |  *   Technometrics, 2016, 58, 435?447.
 6 |  *
 7 |  * Copyright (c) 2014  Xiaoming Huo
 8 |  */
 9 | 
10 | #ifdef MATLAB_MEX_FILE
11 | #include "mex.h"
12 | #define calloc   mxCalloc
13 | #define free     mxFree
14 | #endif
15 | 
16 | #define S(i)     s_p[i-1]
17 | #define Y(i)     Y_p[i-1]
18 | #define C(i)     C_p[i-1]
19 | #define GAMMA(i) GAMMA_p[i-1]
20 | 
21 | void DyadUpdate(double GAMMA_p[],double Y_p[],double C_p[],const int n)
22 | {
23 |    int L,ii,ell,k,pos,scale,s_length;
24 |    double *s_p;
25 |    
26 |    L = (int) ceil(log((double) n)/log((double) 2));
27 |    s_length = (int) pow(((double) 2), ((double) L+1));
28 |    s_p = (double *) calloc(s_length, sizeof(double));
29 |    
30 |    for(ii=2;ii<=n;ii++){
31 |       for(ell=0;ell<=L-1;ell++){
32 |          k = (int) ceil(Y(ii-1)/((int) pow(((double) 2), ((double) ell))));
33 |          pos = k;
34 |          if(ell>0){
35 |             for(scale=ell-1;scale>=0;scale--){
36 |                pos = pos + (int) pow(((double) 2), ((double) L-scale));
37 |             }
38 |          }
39 |          S(pos) = S(pos) + C(ii-1);
40 |       }
41 |       for(ell=0;ell<=L-1;ell++){
42 |          k = (int) floor((double) (Y(ii)-1)/((int) pow(((double) 2), ((double) ell))));
43 |          if((double) k/2 > (int) floor(((double) k)/2)){
44 |             pos = k;
45 |             if(ell>0){
46 |                for(scale=ell-1;scale>=0;scale--){
47 |                   pos = pos + (int) pow(((double) 2), ((double) L-scale));
48 |                }
49 |             }
50 |             GAMMA(ii) = GAMMA(ii) + S(pos);
51 |          }
52 |       }
53 |    }
54 |    
55 |    free(s_p);
56 | }
57 | 
58 | #undef S
59 | #undef Y
60 | #undef C
61 | #undef GAMMA
62 | 


--------------------------------------------------------------------------------
/Testing/Test_dcov_dcorr.m:
--------------------------------------------------------------------------------
 1 | % xUnit framework required
 2 | % https://psexton.github.io/matlab-xunit/
 3 | 
 4 | % energy package 1.6.2
 5 | % > dcov(c(1,2,3,4),c(1,1,2,6))
 6 | % [1] 1.118034
 7 | % > dcor(c(1,2,3,4),c(1,1,2,6))
 8 | % [1] 0.8947853
 9 | % > dcov(c(1,2,3),c(.5,2,3.4))
10 | % [1] 0.846197
11 | % > dcor(c(1,2,3),c(.5,2,3.4))
12 | % [1] 0.9998217
13 | % > dcov(c(-11,2,3),c(.5,2,3.4))
14 | % [1] 2.258591
15 | % > dcor(c(-11,2,3),c(.5,2,3.4))
16 | % [1] 0.9206351
17 | 
18 | classdef Test_dcov_dcorr < TestCase
19 |    properties
20 |    end
21 |    
22 |    methods
23 |       function self = Test_dcov_dcorr(name)
24 |          self = self@TestCase(name);         
25 |       end
26 |       
27 |       function setUp(self)
28 |       end
29 |       
30 |       function test_dcov1(self)
31 |          d = dep.dcov([1 2 3 4]',[1 1 2 6]');
32 |          assertElementsAlmostEqual(d,1.118034,'absolute',1e-5);
33 |       end
34 |       
35 |       function test_dcov2(self)
36 |          d = dep.dcov([1 2 3]',[.5 2 3.4]');
37 |          assertElementsAlmostEqual(d,0.846197,'absolute',1e-5);
38 |       end
39 |       
40 |       function test_dcov3(self)
41 |          d = dep.dcov([-11 2 3]',[.5 2 3.4]');
42 |          assertElementsAlmostEqual(d,2.258591,'absolute',1e-5);
43 |       end
44 |       
45 |       function test_dcorr1(self)
46 |          d = dep.dcorr([1 2 3 4]',[1 1 2 6]');
47 |          assertElementsAlmostEqual(d,0.8947853,'absolute',1e-5);
48 |       end
49 |       
50 |       function test_dcorr2(self)
51 |          d = dep.dcorr([1 2 3]',[.5 2 3.4]');
52 |          assertElementsAlmostEqual(d,0.9998217,'absolute',1e-5);
53 |       end
54 |       
55 |       function test_dcorr3(self)
56 |          d = dep.dcorr([-11 2 3]',[.5 2 3.4]');
57 |          assertElementsAlmostEqual(d,0.9206351,'absolute',1e-5);
58 |       end
59 |             
60 |       function tearDown(self)
61 |       end
62 |    end
63 | end


--------------------------------------------------------------------------------
/+utils/nystrom.m:
--------------------------------------------------------------------------------
 1 | % NYSTROM                     Nystrom approximation of kernel matrix
 2 | % 
 3 | %     [phi,K] = nystrom(X,varargin)
 4 | % 
 5 | %     INPUTS
 6 | %     X     - [n x p] n samples of dimensionality p
 7 | % 
 8 | %     OPTIONAL
 9 | %     c    - scalar, number of columns to sample (without replacement)
10 | %     rsvd - boolean indicating whether to use randomized SVD
11 | %     tol  - scalar tolerance for truncating small singular values
12 | % 
13 | %     Additional name/value pairs are passed through to RSVD if true.
14 | % 
15 | %     OUTPUTS
16 | %     phi   - approximate feature mapped data
17 | %     K     - approximate Gram matrix
18 | %
19 | %     REFERENCES
20 | %     Wang (2015). A Practical Guide to Randomized Matrix Computations with 
21 | %       MATLAB Implementations. https://arxiv.org/abs/1505.07570
22 | %
23 | %     SEE ALSO
24 | %     rsvd
25 | 
26 | function [phi,K] = nystrom(X,varargin)
27 | 
28 | par = inputParser;
29 | par.KeepUnmatched = true;
30 | addRequired(par,'X',@isnumeric);
31 | addParamValue(par,'c',[],@(x) isnumeric(x) && isscalar(x));
32 | addParamValue(par,'rsvd',false,@islogical);
33 | addParamValue(par,'tol',[],@(x) isnumeric(x) && isscalar(x));
34 | parse(par,X,varargin{:});
35 | 
36 | [n,p] = size(X);
37 | if isempty(par.Results.c)
38 |    c = fix(0.25*n); % Default to 25% columns
39 | else
40 |    c = min(par.Results.c,n);
41 | end
42 | 
43 | ind = randperm(n);
44 | ind = ind(1:c);
45 | C = utils.kernel(X,X(ind,:),par.Unmatched); % C = K(:,ind)
46 | W = C(ind,:);
47 | 
48 | if par.Results.rsvd
49 |    %[U,S] = utils.rsvd(W,par.Unmatched);
50 |    [U,S] = utils.rsvd(W,30,10,3);
51 | else
52 |    [U,S] = svd(W);
53 | end
54 | s = diag(S);
55 | if isempty(par.Results.tol)
56 |    tol = max(size(W)) * eps(norm(s,inf)); % from pinv
57 | else
58 |    tol = par.Results.tol;
59 | end
60 | c = sum(s > tol);
61 | s = 1./sqrt(s(1:c));
62 | UW = bsxfun(@times,U(:,1:c),s');
63 | 
64 | phi = C*UW;
65 | 
66 | if nargout == 2
67 |    K = phi*phi';
68 | end


--------------------------------------------------------------------------------
/+dep/rdc.m:
--------------------------------------------------------------------------------
 1 | % RDC                         Randomized dependence coefficient
 2 | %
 3 | %     r = rdc(x,y,varargin)
 4 | %
 5 | %     RDC is the largest canonical correlation as computed by RCCA on random 
 6 | %     features of the copula transformations of two random samples
 7 | %
 8 | %     INPUTS
 9 | %     x     - [n x p] n samples of dimensionality p
10 | %     y     - [n x q] n samples of dimensionality q
11 | %
12 | %     OPTIONAL
13 | %     k
14 | %     s
15 | %     f
16 | %     demean
17 | %
18 | %     OUTPUTS
19 | %
20 | %     REFERENCE
21 | %
22 | 
23 | % Based on R code:
24 | % https://github.com/lopezpaz/randomized_dependence_coefficient/blob/master/code/algorithms.r
25 | % rdc <- function(x,y,k=20,s=1/6,f=sin) {
26 | %   x <- cbind(apply(as.matrix(x),2,function(u)rank(u)/length(u)),1)
27 | %   y <- cbind(apply(as.matrix(y),2,function(u)rank(u)/length(u)),1)
28 | %   x <- s/ncol(x)*x%*%matrix(rnorm(ncol(x)*k),ncol(x))
29 | %   y <- s/ncol(y)*y%*%matrix(rnorm(ncol(y)*k),ncol(y))
30 | %   cancor(cbind(f(x),1),cbind(f(y),1))$cor[1]
31 | % }
32 | 
33 | function r = rdc(x,y,varargin)
34 | 
35 | par = inputParser;
36 | par.KeepUnmatched = true;
37 | addRequired(par,'x',@isnumeric);
38 | addRequired(par,'y',@isnumeric);
39 | addParamValue(par,'k',20,@isscalar);
40 | addParamValue(par,'s',1/6,@isscalar);
41 | addParamValue(par,'f',@sin,@(x) isa(x,'function_handle'));
42 | addParamValue(par,'demean',false,@islogical);
43 | parse(par,x,y,varargin{:});
44 | 
45 | n = size(x,1);
46 | if par.Results.demean
47 |    x = bsxfun(@minus,x,mean(x));
48 |    y = bsxfun(@minus,y,mean(y));
49 | end
50 | 
51 | x = [tiedrank(x)/n ones(n,1)];
52 | y = [tiedrank(y)/n ones(n,1)];
53 | 
54 | f = par.Results.f;
55 | s = par.Results.s;
56 | k = par.Results.k;
57 | x = f(s/size(x,2)*x*randn(size(x,2),k));
58 | y = f(s/size(y,2)*y*randn(size(y,2),k));
59 | 
60 | warning('off','stats:canoncorr:NotFullRank');
61 | [~,~,r] = canoncorr([x ones(n,1)],[y ones(n,1)]);
62 | warning('on','stats:canoncorr:NotFullRank');
63 | 
64 | r = r(1);
65 | 
66 | 


--------------------------------------------------------------------------------
/+sphere/bingham.m:
--------------------------------------------------------------------------------
 1 | % BINGHAM                     Bingham test for spherical uniformity 
 2 | % 
 3 | %     [pval,B] = bingham(U)
 4 | %
 5 | %     Antipodially symmetric
 6 | %     Not consistent against alternatives with E[xx'] = (1/p)*Ip
 7 | %
 8 | %     INPUTS
 9 | %     U - [n x p] matrix, n samples with dimensionality p
10 | %         the data should already be projected to the unit hypersphere
11 | %
12 | %     OUTPUTS
13 | %     pval - p-value
14 | %     B - statistic
15 | %
16 | %     REFERENCE
17 | %     Mardia, KV, Jupp, PE (2000). Directional Statistics. John Wiley
18 | %
19 | %     SEE ALSO
20 | %     UniSphereTest, spatialSign
21 | 
22 | %     $ Copyright (C) 2014 Brian Lau http://www.subcortex.net/ $
23 | %     The full license and most recent version of the code can be found at:
24 | %     https://github.com/brian-lau/highdim
25 | %
26 | %     This program is free software: you can redistribute it and/or modify
27 | %     it under the terms of the GNU General Public License as published by
28 | %     the Free Software Foundation, either version 3 of the License, or
29 | %     (at your option) any later version.
30 | % 
31 | %     This program is distributed in the hope that it will be useful,
32 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
33 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
34 | %     GNU General Public License for more details.
35 | 
36 | function [pval,B] = bingham(U)
37 | 
38 | [n,p] = size(U);
39 | 
40 | if 1
41 |    % eq. 10.7.1
42 |    T = (1/n)*U'*U;
43 |    B = ((n*p*(p+2))/2)*(trace(T^2) - 1/p);
44 | else
45 |    % Modified Bingham test statistic (Mardia & Jupp, eq. 10.7.3)
46 |    % seems to blow up for certain data?
47 |    T = (1/n)*U'*U;
48 |    B = ((n*p*(p+2))/2)*(trace(T^2) - 1/p);
49 |    B0 = (2*p^2+3*p+4)/(6*(p+4));
50 |    B1 = -(4*p^2+3*p-4)/(3*(p+4)*(p^2+p+2));
51 |    B2 = 4*(p^2-4)/(3*(p+4)*(p^2+p+2)*(p^2+p+6));
52 |    B = B*(1 - (1/n)*(B0 + B1*B + B2*B^2));
53 | end
54 | 
55 | pval = 1 - chi2cdf(B,((p-1)*(p+2))/2);
56 | 


--------------------------------------------------------------------------------
/+dep/rank.m:
--------------------------------------------------------------------------------
 1 | % RANK                        Rank-based statistics for testing independence
 2 | % 
 3 | %     r = rank(x,type)
 4 | %
 5 | %     INPUTS
 6 | %     x    - [n x p] matrix, n samples with dimensionality p
 7 | %
 8 | %     OPTIONAL
 9 | %     type - 'spearman' - R1 from Han & Liu (DEFAULT)
10 | %            'kendall' - R2 from Han & Liu
11 | %
12 | %     OUTPUTS
13 | %     r - rank statistic
14 | %
15 | %     REFERENCE
16 | %     Han & Liu (2014). Distribution-free tests of independence with
17 | %       applications to testing more structures. arXiv:1410.4179v1
18 | %
19 | %     SEE ALSO
20 | %     DepTest1, ranktest
21 | 
22 | %     $ Copyright (C) 2014 Brian Lau http://www.subcortex.net/ $
23 | %     The full license and most recent version of the code can be found at:
24 | %     https://github.com/brian-lau/highdim
25 | %
26 | %     This program is free software: you can redistribute it and/or modify
27 | %     it under the terms of the GNU General Public License as published by
28 | %     the Free Software Foundation, either version 3 of the License, or
29 | %     (at your option) any later version.
30 | % 
31 | %     This program is distributed in the hope that it will be useful,
32 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
33 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
34 | %     GNU General Public License for more details.
35 | 
36 | function r = rank(x,type)
37 | 
38 | if nargin < 2
39 |    type = 'spearman';
40 | end
41 | 
42 | [n,p] = size(x);
43 | 
44 | switch lower(type)
45 |    case {'spearman','s'}
46 |       rho = corr(x,'type','spearman');
47 |       rho2 = rho.^2;
48 |       rho2 = tril(rho2,-1);
49 |       r = (n-1)*max(rho2(:)) - 4*log(p) + log(log(p));
50 |    case {'kendall','k'}
51 |       %tau = corr(x,'type','kendall');
52 |       tau = dep.kendalltau(x);
53 |       tau2 = tau.^2;
54 |       tau2 = tril(tau2,-1);
55 |       r = ((9*n*(n-1))/(2*(2*n+5)))*max(tau2(:)) - 4*log(p) + log(log(p));
56 |    otherwise
57 |       error('Unknown type');
58 | end


--------------------------------------------------------------------------------
/+dep/dcorr.m:
--------------------------------------------------------------------------------
 1 | % DCORR                       Distance correlation
 2 | % 
 3 | %     r = dcorr(x,y,varargin)
 4 | %
 5 | %     INPUTS
 6 | %     x - [n x p] n samples of dimensionality p
 7 | %     y - [n x q] n samples of dimensionality q
 8 | %
 9 | %     OPTIONAL (as name/value pairs, order irrelevant)
10 | %     unbiased - true indicates bias-corrected estimate (default=false)
11 | %     dist     - true indicates x & y are distance matrices (default=false)
12 | %     doublecenter - true indicates x & y are double-centered distance 
13 | %                matrices (default=false)
14 | %
15 | %     OUTPUTS
16 | %     r - distance correlation between x,y
17 | %
18 | %     REFERENCE
19 | %     Szekely et al (2007). Measuring and testing independence by correlation 
20 | %       of distances. Ann Statist 35: 2769-2794
21 | %     Szekely & Rizzo (2013). The distance correlation t-test of independence 
22 | %       in high dimension. J Multiv Analysis 117: 193-213
23 | %
24 | %     SEE ALSO
25 | %     dcorrtest, dcov
26 | 
27 | %     $ Copyright (C) 2017 Brian Lau, brian.lau@upmc.fr $
28 | %     The full license and most recent version of the code can be found at:
29 | %     https://github.com/brian-lau/highdim
30 | %
31 | %     This program is free software: you can redistribute it and/or modify
32 | %     it under the terms of the GNU General Public License as published by
33 | %     the Free Software Foundation, either version 3 of the License, or
34 | %     (at your option) any later version.
35 | % 
36 | %     This program is distributed in the hope that it will be useful,
37 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
38 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
39 | %     GNU General Public License for more details.
40 | 
41 | function r = dcorr(x,y,varargin)
42 | 
43 | par = inputParser;
44 | par.KeepUnmatched = true;
45 | addRequired(par,'x',@isnumeric);
46 | addRequired(par,'y',@isnumeric);
47 | parse(par,x,y,varargin{:});
48 | 
49 | [d,dvx,dvy] = dep.dcov(x,y,par.Unmatched);
50 | if (dvx*dvy) > eps
51 |    r = d/sqrt(dvx*dvy);
52 | else
53 |    r = 0;
54 | end
55 | 


--------------------------------------------------------------------------------
/+sphere/rppdf.m:
--------------------------------------------------------------------------------
 1 | % RPPDF                       Distribution of angles on a uniform hypersphere
 2 | % 
 3 | %     h = rppdf(theta,p)
 4 | %
 5 | %     The distribution of pairwise angles between vectors X1,...,Xn that 
 6 | %     are random points independently chosen with the uniform distribution 
 7 | %     on S^(p-1), the unit sphere in R^p.
 8 | %
 9 | %     INPUTS
10 | %     theta - angles (radians) to evaluate pdf
11 | %     p     - dimensionality (R^p)
12 | %
13 | %     OUTPUTS
14 | %     h     - pdf
15 | %
16 | %     EXAMPLE
17 | %     p = 8;
18 | %     x = randn(50000,p);
19 | %     U = sphere.spatialSign(x);
20 | %     u0 = sphere.spatialSign(randn(1,p));
21 | %     dx = 0.05; xx = 0:dx:pi;
22 | %     n = histc(acos(U*u0'),xx);
23 | %     hold on
24 | %     bar(xx,n./sum(n),'histc');
25 | %     plot(xx,sphere.rppdf(xx,p)*dx,'m')
26 | % 
27 | %     integral(@(x) sphere.rppdf(x,p),0,pi)
28 | %
29 | %     REFERENCE
30 | %     Cai, T et al (2013). Distribution of angles in random packing on
31 | %       spheres. J of Machine Learning Research 14: 1837-1864.
32 | %
33 | %     SEE ALSO
34 | %     rp, rpcdf
35 | 
36 | %     $ Copyright (C) 2017 Brian Lau, brian.lau@upmc.fr $
37 | %     The full license and most recent version of the code can be found at:
38 | %     https://github.com/brian-lau/highdim
39 | %
40 | %     This program is free software: you can redistribute it and/or modify
41 | %     it under the terms of the GNU General Public License as published by
42 | %     the Free Software Foundation, either version 3 of the License, or
43 | %     (at your option) any later version.
44 | % 
45 | %     This program is distributed in the hope that it will be useful,
46 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
47 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
48 | %     GNU General Public License for more details.
49 | 
50 | function h = rppdf(theta,p)
51 | 
52 | assert(all(theta>=0)&&all(theta<=pi),'theta must be 0<=theta<=pi.');
53 | assert((mod(p,1)==0)&&(p>1),'p must be integer > 0.');
54 | 
55 | h = (1/sqrt(pi)) * exp( gammaln(p/2) - gammaln((p-1)/2) )*...
56 |     (sin(theta).^(p-2));
57 | 


--------------------------------------------------------------------------------
/+utils/sigest.m:
--------------------------------------------------------------------------------
 1 | % SIGEST                      Estimate bandwidth of Gaussian kernel
 2 | % 
 3 | %     sigma = sigest(X,varargin)
 4 | % 
 5 | %     INPUTS
 6 | %     X     - [n x p] m samples of dimensionality p
 7 | % 
 8 | %     OPTIONAL
 9 | %     sigest - string indicating method for estimating sigma, 
10 | %              'median' - Median heuristic, Gretton et al. 2012
11 | %              'adapt'  - 
12 | %     frac   - scalar (0,1] indicating fraction of data to use for sigest
13 | % 
14 | %     OUTPUTS
15 | %     sigma - standard deviation of Gaussian kernel
16 | % 
17 | %     SEE ALSO
18 | %     rbf
19 | 
20 | %     $ Copyright (C) 2017 Brian Lau, brian.lau@upmc.fr $
21 | %     The full license and most recent version of the code can be found at:
22 | %     https://github.com/brian-lau/highdim
23 | %
24 | %     This program is free software: you can redistribute it and/or modify
25 | %     it under the terms of the GNU General Public License as published by
26 | %     the Free Software Foundation, either version 3 of the License, or
27 | %     (at your option) any later version.
28 | %
29 | %     This program is distributed in the hope that it will be useful,
30 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
31 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
32 | %     GNU General Public License for more details.
33 | 
34 | function sigma = sigest(X,varargin)
35 | 
36 | par = inputParser;
37 | par.KeepUnmatched = true;
38 | addRequired(par,'X',@isnumeric);
39 | addParamValue(par,'frac',[],@(x) isscalar(x) && (x>0) && (x<=1));
40 | addParamValue(par,'sigest','median',@ischar);
41 | parse(par,X,varargin{:});
42 | 
43 | [n,p] = size(X);
44 | if isempty(par.Results.frac)
45 |    ind = ceil(n*0.1);
46 |    X = X(1:min(n,ind),:);
47 | elseif par.Results.frac ~= 1
48 |    ind = ceil(n*par.Results.frac);
49 |    X = X(1:min(n,ind),:);
50 | end
51 | 
52 | switch lower(par.Results.sigest)
53 |    case {'median'}
54 |       % Median heuristic, Gretton et al. 2012
55 |       sigma = sqrt(0.5*median(pdist(X).^2));
56 |    case {'adapt'}
57 |       % TODO
58 |    otherwise
59 |       error('Unknown sigma estimator');
60 | end


--------------------------------------------------------------------------------
/setup_highdim.m:
--------------------------------------------------------------------------------
 1 | function setup_highdim
 2 | 
 3 | %% Setup path
 4 | baseDirectory = fileparts(mfilename('fullpath'));
 5 | addpath(genpath_ignoreHiddenDir(baseDirectory));
 6 | 
 7 | %% Compile DyadUpdate
 8 | if exist('+utils/mexDyadUpdate','file')~=3
 9 |     here = pwd;
10 |     cd( fullfile(baseDirectory,'+utils') );
11 |     disp('Compiling DyadUpdate code');
12 |     mex -largeArrayDims -O mexDyadUpdate.c
13 |     cd(here);
14 | end
15 | 
16 | %% FJLT (Fast Hadamard) code
17 | if exist('+utils/mexHadamard','file')~=3
18 |     here = pwd;
19 |     cd( fullfile(baseDirectory,'+utils') );
20 |     disp('Compiling fast Hadamard code');
21 |     if isunix
22 |         % Assuming we are using gcc, so I know some fancier flags
23 |         % This might make a difference on new computers (> 2012) that have AVX
24 |         mex -O CFLAGS="\$CFLAGS -march=native -O3" mexHadamard.c -DNO_UCHAR
25 |     else
26 |         mex -O mexHadamard.c
27 |     end
28 |     cd(here);
29 | end
30 | 
31 | 
32 | function p = genpath_ignoreHiddenDir(d)
33 | %%
34 | % initialise variables
35 | classsep = '@';  % qualifier for overloaded class directories
36 | packagesep = '+';  % qualifier for overloaded package directories
37 | p = '';           % path to be returned
38 | 
39 | % Generate path based on given root directory
40 | files = dir(d);
41 | if isempty(files)
42 |   return
43 | end
44 | 
45 | % Add d to the path even if it is empty.
46 | p = [p d pathsep];
47 | 
48 | % set logical vector for subdirectory entries in d
49 | isdir = logical(cat(1,files.isdir));
50 | %
51 | % Recursively descend through directories which are neither
52 | % private nor "class" directories.
53 | %
54 | dirs = files(isdir); % select only directory entries from the current listing
55 | 
56 | for i=1:length(dirs)
57 |    dirname = dirs(i).name;
58 |    if    ~strcmp( dirname,'.')          && ...
59 |          ~strcmp( dirname,'..')         && ...
60 |          ~strncmp( dirname,classsep,1) && ...
61 |          ~strncmp( dirname,packagesep,1) && ...
62 |          ~strcmp( dirname,'private') && ...
63 |          ~strcmpi( dirname(1), '.' ) % added in order to exclude .git/ files
64 |       p = [p genpath(fullfile(d,dirname))]; % recursive calling of this function.
65 |    end
66 | end
67 | 


--------------------------------------------------------------------------------
/+utils/rbf.m:
--------------------------------------------------------------------------------
 1 | % RBF                         Kernel matrix using Gaussian radial basis
 2 | % 
 3 | %     [k,sigma] = rbf(x,y,varargin)
 4 | % 
 5 | %     INPUTS
 6 | %     x     - [m x p] m samples of dimensionality p
 7 | %     y     - [n x p] n samples of dimensionality p
 8 | %             OR [], empty
 9 | % 
10 | %     OPTIONAL
11 | %     sigma  - scalar, standard deviation of Gaussian kernel, default = []
12 | %     Only valid when sigma = []
13 | %     sigest - string indicating method for estimating sigma, 
14 | %              'median' - Median heuristic, Gretton et al. 2012
15 | %              'adapt'  - 
16 | %     frac   - scalar (0,1] indicating fraction of data to use for sigest
17 | % 
18 | %     Additional name/value pairs are passed through to function for 
19 | %     estimating the kernel when using an approximation method.
20 | % 
21 | %     OUTPUTS
22 | %     k     - kernel matrix
23 | %     sigma - standard deviation of Gaussian kernel
24 | % 
25 | %     SEE ALSO
26 | %     sigest
27 | 
28 | %     $ Copyright (C) 2017 Brian Lau, brian.lau@upmc.fr $
29 | %     The full license and most recent version of the code can be found at:
30 | %     https://github.com/brian-lau/highdim
31 | %
32 | %     This program is free software: you can redistribute it and/or modify
33 | %     it under the terms of the GNU General Public License as published by
34 | %     the Free Software Foundation, either version 3 of the License, or
35 | %     (at your option) any later version.
36 | %
37 | %     This program is distributed in the hope that it will be useful,
38 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
39 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
40 | %     GNU General Public License for more details.
41 | 
42 | function [k,sigma] = rbf(x,y,varargin)
43 | 
44 | par = inputParser;
45 | par.KeepUnmatched = true;
46 | addRequired(par,'x',@isnumeric);
47 | addRequired(par,'y',@isnumeric);
48 | addParamValue(par,'sigma',[],@(x) isnumeric(x) && isscalar(x));
49 | parse(par,x,y,varargin{:});
50 | 
51 | if isempty(par.Results.sigma)
52 |    % Set sigma based on first input
53 |    sigma = utils.sigest(x,par.Unmatched);
54 | else
55 |    sigma = par.Results.sigma;
56 | end
57 | 
58 | k = exp(-utils.sqdist(x,y)/(2*sigma^2));


--------------------------------------------------------------------------------
/+diff/mmd.m:
--------------------------------------------------------------------------------
 1 | % MMD                         Maximal mean discrepancy
 2 | % 
 3 | %     [m,sigma] = mmd(x,y,varargin)
 4 | %
 5 | %     INPUTS
 6 | %     x - [m x p] m samples of dimensionality p
 7 | %     y - [n x p] n samples of dimensionality p
 8 | %
 9 | %     OPTIONAL (name/value pairs)
10 | %     sigma - gaussian bandwidth, default = median heuristic
11 | %     biased - boolean indicated biased estimator (default=false)
12 | %
13 | %     OUTPUTS
14 | %     stat - maximal mean discrepancy
15 | %
16 | %     REFERENCE
17 | %     Gretton et al (2012). A kernel two-sample test. 
18 | %       Journal of Machine Learning Research 13: 723-773
19 | %
20 | %     SEE ALSO
21 | %     mmdtest
22 | 
23 | %     $ Copyright (C) 2014 Brian Lau http://www.subcortex.net/ $
24 | %     The full license and most recent version of the code can be found at:
25 | %     https://github.com/brian-lau/highdim
26 | %
27 | %     This program is free software: you can redistribute it and/or modify
28 | %     it under the terms of the GNU General Public License as published by
29 | %     the Free Software Foundation, either version 3 of the License, or
30 | %     (at your option) any later version.
31 | % 
32 | %     This program is distributed in the hope that it will be useful,
33 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
34 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
35 | %     GNU General Public License for more details.
36 | 
37 | function [stat,K,L,KL,sigma,biased] = mmd(x,y,varargin)
38 | 
39 | par = inputParser;
40 | par.KeepUnmatched = true;
41 | addRequired(par,'x',@isnumeric);
42 | addRequired(par,'y',@isnumeric);
43 | addParamValue(par,'sigma',[],@isnumeric);
44 | addParamValue(par,'biased',false,@(x) isnumeric(x) || islogical(x));
45 | parse(par,x,y,varargin{:});
46 | 
47 | [m,p] = size(x);
48 | [n,q] = size(y);
49 | if p ~= q
50 |    error('x and y must have same dimensionality (# of columns)');
51 | end
52 | 
53 | if isempty(par.Results.sigma)
54 |    % Median heuristic, Gretton et al. 2012
55 |    sigma = sqrt(0.5*median(pdist([x;y]).^2));
56 | else
57 |    sigma = par.Results.sigma;
58 | end
59 | 
60 | K = utils.rbf(sigma,x);
61 | L = utils.rbf(sigma,y);
62 | KL = utils.rbf(sigma,x,y);
63 | K = utils.zerodiag(K);
64 | L = utils.zerodiag(L);
65 | 
66 | biased = par.Results.biased;
67 | stat = diff.mmd_(K,L,KL,m,n,biased);
68 | 


--------------------------------------------------------------------------------
/Testing/test_uniSphereTestPower.m:
--------------------------------------------------------------------------------
 1 | % pairsClusterTest from here: https://sites.google.com/site/antimatt/software
 2 | % randvonMisesFisherm from here: http://www.stat.pitt.edu/sungkyu/MiscPage.html
 3 | clear all;
 4 | n = 80;% 1000];
 5 | p = [4 8 16];%[4 10 20];
 6 | kappa = [0 0.25 0.5 1 2 4];%[0 1 2 4];
 7 | reps = 500;%2500;
 8 | 
 9 | prob_r = zeros(numel(kappa),numel(p));
10 | prob_rp = zeros(numel(kappa),numel(p));
11 | prob_b = zeros(numel(kappa),numel(p));
12 | prob_g = zeros(numel(kappa),numel(p));
13 | prob_a = zeros(numel(kappa),numel(p));
14 | prob_ga = zeros(numel(kappa),numel(p));
15 | prob_p = zeros(numel(kappa),numel(p));
16 | 
17 | test = UniSphereTest('autoRun',false);
18 | test.params.nboot = 500;
19 | tic;
20 | for i = 1:numel(kappa)
21 |    for j = 1:numel(p)
22 |       for k = 1:reps
23 |          x = sphere.vmfrnd(p(j),n,kappa(i))';
24 |          
25 |          % with noise
26 |          %x = [randn(n,p(j)) ; sphere.vmfrnd(p(j),n,kappa(i))'];
27 |          
28 | % antipodally symmetric
29 | %          mu = zeros(1,p(j));
30 | %          mu(end) = 1;
31 | %          x = [sphere.vmfrnd(p(j),n/2,kappa(i),mu)' ;...
32 | %             sphere.vmfrnd(p(j),n/2,kappa(i),-mu)'];
33 | % mixture of vmf
34 | %          mu = zeros(1,p(j));
35 | %          mu(end) = 1;
36 | %          x = [sphere.vmfrnd(p(j),n/3,kappa(i),mu)' ;...
37 | %             sphere.vmfrnd(p(j),n/3,kappa(i),-mu)' ;...
38 | %             sphere.vmfrnd(p(j),n/3,kappa(i),rand(size(mu)))'];
39 |          
40 |          test.x = x;
41 |          
42 |          test.test = 'rayleigh'; test.run();
43 |          h_r(k) = test.h;
44 |          test.test = 'randproj'; test.run();
45 |          h_rp(k) = test.h;
46 |          test.test = 'bingham'; test.run();
47 |          h_b(k) = test.h;
48 |          test.test = 'gine'; test.run();
49 |          h_g(k) = test.h;
50 |          test.test = 'ajne'; test.run();
51 |          h_a(k) = test.h;
52 |          test.test = 'gine-ajne'; test.run();
53 |          h_ga(k) = test.h;
54 | 
55 |          [clusteriness, temp, dists, k2] = pairsClusterTest(x);
56 |          pv(k) = temp;
57 | 
58 |       end
59 |       prob_r(i,j) = mean(h_r);
60 |       prob_rp(i,j) = mean(h_rp);
61 |       prob_b(i,j) = mean(h_b);
62 |       prob_g(i,j) = mean(h_g);
63 |       prob_a(i,j) = mean(h_a);
64 |       prob_ga(i,j) = mean(h_ga);
65 |       prob_p(i,j) = mean(pv<=0.05);
66 |    end
67 |    toc
68 |    i
69 | end
70 | 


--------------------------------------------------------------------------------
/+dep/rvtest.m:
--------------------------------------------------------------------------------
 1 | % RVTEST                      Test RV coefficient of dependence
 2 | % 
 3 | %     [pval,rv,stat] = rvtest(x,y)
 4 | %
 5 | %     INPUTS
 6 | %     x - [n x p] n samples of dimensionality p
 7 | %     y - [n x q] n samples of dimensionality q
 8 | %
 9 | %     OUTPUTS
10 | %     pval - p-value from Pearson type III approximation
11 | %     rv   - RV coefficient
12 | %     stat - test statistic, normalized RV coefficient
13 | %
14 | %     REFERENCE
15 | %     Josse et al (2008). Testing the significance of the RV coefficient.
16 | %       Computational Statistics and Data Analysis 53: 82-91
17 | %
18 | %     SEE ALSO
19 | %     rv, dcorr, dcorrtest, DepTest2
20 | 
21 | %     $ Copyright (C) 2017 Brian Lau, brian.lau@upmc.fr $
22 | %     The full license and most recent version of the code can be found at:
23 | %     https://github.com/brian-lau/highdim
24 | %
25 | %     This program is free software: you can redistribute it and/or modify
26 | %     it under the terms of the GNU General Public License as published by
27 | %     the Free Software Foundation, either version 3 of the License, or
28 | %     (at your option) any later version.
29 | % 
30 | %     This program is distributed in the hope that it will be useful,
31 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
32 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
33 | %     GNU General Public License for more details.
34 | 
35 | function [pval,rv,stat] = rvtest(x,y)
36 | 
37 | [n,~] = size(x);
38 | assert(n == size(y,1),'RVTEST requires x and y to have the same # of samples');
39 | 
40 | [rv,xx,yy] = dep.rv(x,y);
41 |       
42 | % mean
43 | bx = trace(xx)^2/trace(xx^2);
44 | by = trace(yy)^2/trace(yy^2);
45 | mu_rv = sqrt(bx*by)/(n-1);
46 | 
47 | % variance
48 | tx = (n-1)/((n-3)*(n-1-bx)) * ...
49 |      (n*(n+1)*(sum(diag(xx).^2)/trace(xx^2)) - (n-1)*(bx+2));
50 | ty = (n-1)/((n-3)*(n-1-by)) * ...
51 |      (n*(n+1)*(sum(diag(yy).^2)/trace(yy^2)) - (n-1)*(by+2));
52 | var_rv = (2*(n-1-bx)*(n-1-by))/((n+1)*(n-1)^2*(n-2)) *...
53 |      (1 + ((n-3)/(2*n*(n-1)))*tx*ty);
54 | 
55 | % Standardized RV coefficient
56 | stat = (rv - mu_rv)/sqrt(var_rv);
57 | 
58 | % Skewness estimate for Pearson III approximation
59 | [~,~,skew] = utils.permMoments(xx,yy);
60 | 
61 | if skew >= 0
62 |    pval = gamcdf(stat - (-2/skew),4/skew^2,skew/2,'upper');
63 | else
64 |    as = abs(skew);
65 |    pval = gamcdf(skew/as*stat + 2/as,4/skew^2,as/2);
66 | end
67 | 
68 | end


--------------------------------------------------------------------------------
/+utils/mexDyadUpdate.c:
--------------------------------------------------------------------------------
 1 | /* function gamma = DyadUpdate(y,c)
 2 |  *
 3 |  * Inputs
 4 |  *    y
 5 |  *    c
 6 |  * Output
 7 |  *    gamma
 8 |  *
 9 |  * Huo & Szekely (2017). Fast Computing for Distance Covariance,
10 |  *   Technometrics, 2016, 58, 435?447.
11 |  *
12 |  * Copyright (c) 2014  Xiaoming Huo
13 |  */
14 | 
15 | #include <math.h>
16 | #include <stdio.h>
17 | #include "mex.h"
18 | #include "matrix.h"
19 | 
20 | /* Input Arguments */
21 | #define Y   prhs[0]
22 | #define C   prhs[1]
23 | 
24 | /* Output Arguments */
25 | #define GAMMA       plhs[0]
26 | 
27 | /* subroutines declaration */
28 | void DyadUpdate(double GAMMA_p[],double Y_p[],double C_p[],const int n);
29 | 
30 | /* Gateway Routine */
31 | void mexFunction(int nlhs,mxArray *plhs[],int nrhs,const mxArray *prhs[])
32 | {
33 |    /* Variables declarations */
34 |    int n,m_Y,n_Y,m_c,n_c;
35 |    double *Y_p,*C_p,*GAMMA_p;
36 |    
37 |    /* Check for proper number of arguments. */
38 |    if (nrhs != 2) {
39 |       mexErrMsgTxt("DyadUpdate requires 2 input arguments.");
40 |    } else if (nlhs != 1) {
41 |       mexErrMsgTxt("DyadUpdate requires 1 output arguments.");
42 |    }
43 |    
44 |    /* i1. first input */
45 |    /* Get dimensions for 1st input. It should be a column vector. */
46 |    m_Y =(int) mxGetM(Y); n_Y =(int) mxGetN(Y);
47 |    if (n_Y > 1)
48 |       mexErrMsgTxt("'Y' must be a column vector.");
49 |    if (mxIsComplex(Y))
50 |       mexErrMsgTxt("'Y' must be a Real vector.");
51 |    
52 |    /* Get pointers to the inputs */
53 |    Y_p = mxGetPr(Y); n = m_Y;
54 |    
55 |    /* i2. second input */
56 |    /* Get dimensions for 2nd input. It should be a vector. */
57 |    m_c =(int) mxGetM(C); n_c =(int) mxGetN(C);
58 |    if (n_c > 1)
59 |       mexErrMsgTxt("'C' must a column vector.");
60 |    if (mxIsComplex(C))
61 |       mexErrMsgTxt("'C' must be a Real vector.");
62 |    if (m_c != m_Y)
63 |       mexErrMsgTxt("Inputs Y and C must have the same dimensions.");
64 |    
65 |    /* Get pointers to the inputs */
66 |    C_p = mxGetPr(C);
67 |    
68 |    /* o1. output */
69 |    GAMMA = mxCreateDoubleMatrix((int) n, (int) 1, mxREAL);
70 |    if (GAMMA == NULL)
71 |       mexErrMsgTxt("Could not allocate memory for GAMMA.");
72 |    
73 |    GAMMA_p = mxGetPr(GAMMA);
74 |    
75 |    /* Call subroutine to do the computation */
76 |    DyadUpdate(GAMMA_p,Y_p,C_p,n);
77 |    return;
78 | }
79 | 
80 | #undef Y
81 | #undef C
82 | #undef GAMMA
83 | 
84 | #include "DyadUpdate.c"
85 | 


--------------------------------------------------------------------------------
/+dep/rv.m:
--------------------------------------------------------------------------------
 1 | % RV                          RV coefficient of dependence
 2 | % 
 3 | %     [r,xx,yy] = rv(x,y,varargin)
 4 | %
 5 | %     INPUTS
 6 | %     x - [n x p] n samples of dimensionality p
 7 | %     y - [n x q] n samples of dimensionality q
 8 | %
 9 | %     OPTIONAL (name/value pairs)
10 | %     type - 'mod' to calculate modified RV (Smiles et al), default='standard'
11 | %     demean - boolean indicating to subtract mean for each var, default=TRUE
12 | %
13 | %     OUTPUTS
14 | %     r  - RV coefficient
15 | %     xx - inner product matrix of x
16 | %     yy - inner product matrix of y 
17 | %
18 | %     REFERENCE
19 | %     Smilde et al (2009). Matrix correlations for high-dimensional data: 
20 | %       the modified RV-coefficient. Bioinformatics 25: 401-405
21 | %
22 | %     SEE ALSO
23 | %     rvtest, dcorr, dcorrtest, DepTest2
24 | 
25 | %     $ Copyright (C) 2017 Brian Lau, brian.lau@upmc.fr $
26 | %     The full license and most recent version of the code can be found at:
27 | %     https://github.com/brian-lau/highdim
28 | %
29 | %     This program is free software: you can redistribute it and/or modify
30 | %     it under the terms of the GNU General Public License as published by
31 | %     the Free Software Foundation, either version 3 of the License, or
32 | %     (at your option) any later version.
33 | % 
34 | %     This program is distributed in the hope that it will be useful,
35 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
36 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
37 | %     GNU General Public License for more details.
38 | 
39 | function [r,xx,yy] = rv(x,y,varargin)
40 | 
41 | par = inputParser;
42 | par.KeepUnmatched = true;
43 | addRequired(par,'x',@isnumeric);
44 | addRequired(par,'y',@isnumeric);
45 | addParamValue(par,'type','standard',@ischar);
46 | addParamValue(par,'demean',true,@islogical);
47 | parse(par,x,y,varargin{:});
48 | 
49 | [n,~] = size(x);
50 | assert(n == size(y,1),'RV requires x and y to have the same # of samples');
51 | 
52 | if par.Results.demean
53 |    x = bsxfun(@minus,x,mean(x));
54 |    y = bsxfun(@minus,y,mean(y));
55 | end
56 | xx = x*x';
57 | yy = y*y';
58 | 
59 | switch lower(par.Results.type)
60 |    case {'mod'}
61 |       dind = 1:(n+1):n*n;
62 |       xx(dind) = xx(dind)' - diag(xx);
63 |       yy(dind) =  yy(dind)' - diag(yy);
64 |       r = trace(xx*yy) / sqrt(trace(xx^2)*trace(yy^2));
65 |    otherwise
66 |       r = trace(xx*yy) / sqrt(trace(xx^2)*trace(yy^2));
67 | end
68 | 


--------------------------------------------------------------------------------
/+diff/hotell2.m:
--------------------------------------------------------------------------------
 1 | % HOTELL2                     Hotelling's T-Squared test for two multivariate samples 
 2 | % 
 3 | %     [pval,T2] = hotell2(x,y)
 4 | %
 5 | %     Hotelling's T-Squared test for comparing d-dimensional data from two 
 6 | %     independent samples, assuming normality w/ common covariance matrix.
 7 | %
 8 | %     INPUTS
 9 | %     x    - [n1 x d] matrix
10 | %     y    - [n2 x d] matrix
11 | %
12 | %     OUTPUTS
13 | %     pval - asymptotic p-value
14 | %     T2   - Hotelling T^2 statistic
15 | %
16 | %     REFERENCE
17 | %     Mardia, K, Kent, J, Bibby J (1979) Multivariate Analysis. Section 3.6.1
18 | %
19 | %     SEE ALSO
20 | %     kstest2d, minentest
21 | 
22 | %     $ Copyright (C) 2014 Brian Lau http://www.subcortex.net/ $
23 | %     The full license and most recent version of the code can be found on GitHub:
24 | %     https://github.com/brian-lau/highdim
25 | %
26 | %     This program is free software: you can redistribute it and/or modify
27 | %     it under the terms of the GNU General Public License as published by
28 | %     the Free Software Foundation, either version 3 of the License, or
29 | %     (at your option) any later version.
30 | % 
31 | %     This program is distributed in the hope that it will be useful,
32 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
33 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
34 | %     GNU General Public License for more details.
35 | 
36 | function [pval,T2] = hotell2(x,y)
37 | 
38 | [nx,px] = size(x);
39 | [ny,py] = size(y);
40 | 
41 | if px ~= py
42 |    error('# of columns in X and Y must match');
43 | else
44 |    p = px;
45 | end
46 | 
47 | n = nx + ny;
48 | mux = mean(x);
49 | muy = mean(y);
50 | 
51 | Sx = cov(x);
52 | Sy = cov(y);
53 | 
54 | % Hotelling T2 statistic, Section 3.6.1 Mardia et al.
55 | %Su = ((nx-1)*Sx + (ny-1)*Sy) / (n-2);
56 | Su = (nx*Sx + ny*Sy) / (n-2); % unbiased estimate
57 | d = mux - muy;
58 | D2 = d*inv(Su)*d';
59 | T2 = ((nx*ny)/n)*D2;
60 | F = T2 * (n-p-1) / ((n-2)*p);
61 | 
62 | pval = 1 - fcdf(F,p,n-p-1);
63 | 
64 | if nargout == 0
65 |    fprintf('-------------------------------\n');
66 |    fprintf('  nx = %g\n',nx);
67 |    fprintf('  ny = %g\n',ny);
68 |    fprintf('  mean(x) = ');
69 |    fprintf('%1.3f, ',mux);
70 |    fprintf('\n');
71 |    fprintf('  mean(y) = ');
72 |    fprintf('%1.3f, ',muy);
73 |    fprintf('\n');
74 |    fprintf('  T2 = %5.3f\n',T2);
75 |    fprintf('  F(%g,%g) = %5.3f\n',p,n-p-1,F);
76 |    fprintf('  p = %5.5f\n',pval);
77 |    fprintf('-------------------------------\n');
78 | end


--------------------------------------------------------------------------------
/Testing/test_rank.m:
--------------------------------------------------------------------------------
  1 | %% Reproduce size and power from:
  2 | %     Han & Liu (2014). Distribution-free tests of independence with
  3 | %       applications to testing more structures. arXiv:1410.4179v1
  4 | % Table 1
  5 | 
  6 | % Model 1
  7 | n = [60 100];
  8 | p = [50 100 200 400 800];
  9 | reps = 10;
 10 | 
 11 | d = DepTest1();
 12 | 
 13 | tic;
 14 | for i = 1:numel(n)
 15 |    for j = 1:numel(p)
 16 |       for k = 1:reps
 17 |          x = randn(n(i),p(j));
 18 |          
 19 |          d.x = x;
 20 |          h(k) = d.h;
 21 |       end
 22 |       prob(i,j) = mean(h);
 23 |    end
 24 |    toc
 25 | end
 26 | % prob =
 27 | % 
 28 | %     0.0240    0.0110    0.0070    0.0060    0.0030
 29 | %     0.0230    0.0200    0.0180    0.0150    0.0050
 30 | 
 31 | % Model 5
 32 | n = [60 100];
 33 | p = [50 100 200 400 800];
 34 | reps = 100;
 35 | 
 36 | d = DepTest1();
 37 | 
 38 | tic;
 39 | for i = 1:numel(n)
 40 |    for j = 1:numel(p)
 41 |       for k = 1:reps
 42 |          dim = p(j);
 43 |          ind = triu(ones(dim,dim),1);
 44 |          f_ind = find(ind);
 45 |          r = randperm(numel(f_ind));
 46 |          nz = f_ind(r(1:4));
 47 |          t = zeros(dim,dim);
 48 |          t(nz) = rand(4,1);
 49 |          t = t + t';
 50 |          
 51 |          [~,D] = eig(eye(dim)+t);
 52 |          lambdamin = min(diag(D));
 53 |          delta = (-lambdamin+0.05)*(lambdamin<=0);
 54 |          R = eye(dim) + t + delta*eye(dim);
 55 |          
 56 |          x = mvnrnd(zeros(p(j),1),R,n(i));
 57 |          
 58 |          d.x = x;
 59 |          h(k) = d.h;
 60 |       end
 61 |       prob(i,j) = mean(h);
 62 |    end
 63 |    toc
 64 | end
 65 | 
 66 | % Model 7
 67 | n = [60 100];
 68 | p = [50 100 200 400 800];
 69 | reps = 100;
 70 | 
 71 | d = DepTest1();
 72 | 
 73 | tic;
 74 | for i = 1:numel(n)
 75 |    for j = 1:numel(p)
 76 |       for k = 1:reps
 77 |          dim = p(j);
 78 |          ind = triu(ones(dim,dim),1);
 79 |          f_ind = find(ind);
 80 |          r = randperm(numel(f_ind));
 81 |          nz = f_ind(r(1:4));
 82 |          t = zeros(dim,dim);
 83 |          t(nz) = rand(4,1);
 84 |          t = t + t';
 85 |          
 86 |          [~,D] = eig(eye(dim)+t);
 87 |          lambdamin = min(diag(D));
 88 |          delta = (-lambdamin+0.05)*(lambdamin<=0);
 89 |          R = eye(dim) + t + delta*eye(dim);
 90 |          
 91 |          x = mvnrnd(zeros(p(j),1),R,n(i));
 92 |          
 93 |          d.x = x.^3;
 94 |          h(k) = d.h;
 95 |       end
 96 |       prob(i,j) = mean(h);
 97 |    end
 98 |    toc
 99 | end
100 | 


--------------------------------------------------------------------------------
/+dep/fdcov.m:
--------------------------------------------------------------------------------
 1 | % FDCOV                       Fast distance covariance
 2 | % 
 3 | %     d = fdcov(x,y)
 4 | %
 5 | %     Estimate (unbiased) distance covariance using Huo & Szekely algorithm,
 6 | %     which has O(n log n) complexity and O(n) storage compared to 
 7 | %     O(n^2) complexity and O(n^2) storage of the naive estimator.
 8 | %     Valid for univariate and real inputs.
 9 | %
10 | %     INPUTS
11 | %     x - [n x 1] samples
12 | %     y - [n x 1] samples 
13 | %
14 | %     OUTPUTS
15 | %     d - distance covariance between x,y
16 | %
17 | %     REFERENCE
18 | %     Huo & Szekely (2016). Fast Computing for Distance Covariance,
19 | %       Technometrics, 58, 435?447. DOI:10.1080/00401706.2015.1054435
20 | %
21 | %     SEE ALSO
22 | %     fdcorr, rpdcov
23 | 
24 | %     Modified from supplementary materials of Huo & Szekely
25 | %     $ Copyright (C) 2014 Xiaoming Huo $
26 | 
27 | function d = fdcov(x,y)
28 | 
29 | n = length(x);
30 | assert(isvector(x) && isvector(y),'FDCOV requires x & y to be univariate');
31 | assert(n == numel(y),'FDCOV requires x & y to be the same length');
32 | 
33 | if isrow(x)
34 |    x = x';
35 | end
36 | 
37 | if isrow(y)
38 |    y = y';
39 | end
40 | 
41 | temp = (1:n)';
42 | [vx,Ix0] = sort(x); Ix(Ix0) = temp; Ix = Ix'; 
43 | [vy,Iy0] = sort(y); Iy(Iy0) = temp; Iy = Iy'; 
44 | sx = cumsum(vx); 
45 | sy = cumsum(vy); 
46 | alphax = Ix - 1; 
47 | alphay = Iy - 1; 
48 | betax = sx(Ix) - vx(Ix); 
49 | betay = sy(Iy) - vy(Iy); 
50 | xdot = sum(x); 
51 | ydot = sum(y); 
52 | 
53 | aidot = xdot + (2*alphax-n).*x - 2*betax; 
54 | bidot = ydot + (2*alphay-n).*y - 2*betay; 
55 | Sab = sum(aidot.*bidot); 
56 | 
57 | adotdot = 2*sum(alphax.*x) - 2*sum(betax); 
58 | bdotdot = 2*sum(alphay.*y) - 2*sum(betay); 
59 | 
60 | gamma_1  = partialSum2D(x,y,ones(n,1));
61 | gamma_x  = partialSum2D(x,y,x);
62 | gamma_y  = partialSum2D(x,y,y);
63 | gamma_xy = partialSum2D(x,y,x.*y);
64 | 
65 | aijbij = sum(x.*y.*gamma_1 + gamma_xy - x.*gamma_y - y.*gamma_x); 
66 | d = aijbij/n/(n-3) - 2*Sab/n/(n-2)/(n-3) + adotdot*bdotdot/n/(n-1)/(n-2)/(n-3); 
67 | 
68 | function gamma = partialSum2D(x,y,c)
69 | 
70 | n = length(x);
71 | temp = (1:n)';
72 | 
73 | [~,Ix0] = sort(x);
74 | Ix(Ix0) = temp; % Ix = order stat
75 |  
76 | y = y(Ix0); 
77 | c = c(Ix0);     % so x is at increasing order
78 | [~,Iy0] = sort(y);
79 | Iy(Iy0) = temp;
80 | y = Iy';        % y is a perm of {1,...,n}
81 | 
82 | sy = cumsum(c(Iy0)) - c(Iy0);
83 | sx = cumsum(c) - c;
84 | cdot = sum(c);
85 | 
86 | gamma1 = utils.mexDyadUpdate(y,c);
87 | 
88 | gamma = cdot - c - 2*sy(Iy) - 2*sx + 4*gamma1;
89 | gamma = gamma(Ix);


--------------------------------------------------------------------------------
/Testing/test_PAIRS.m:
--------------------------------------------------------------------------------
 1 | % Run some simulations to test the power of the test used by Raposo et al
 2 | % to detect non-uniform distributions on a hypersphere.
 3 | 
 4 | % You will need the highdim library here:
 5 | 
 6 | % as well as the following functions
 7 | % fdr_bh from here:
 8 | %    http://www.mathworks.com/matlabcentral/fileexchange/27418-benjamini---hochberg-yekutieli-procedure-for-controlling-false-discovery-rate
 9 | % pairsClusterTest from here: 
10 | %    https://sites.google.com/site/antimatt/RaposoKaufmanChurchland2014.zip
11 | % randvonMisesFisherm from here: 
12 | %    http://www.stat.pitt.edu/sungkyu/software/randvonMisesFisherm.zip
13 | 
14 | n = 100; % sample size
15 | p = [4 8 16 32]; % dimensionality
16 | kappa = [0 1 2]; % von-Mises concentration, 0 is uniform for checking size
17 | reps = 100; % repetitions of experiment
18 | 
19 | prob_r = zeros(numel(kappa),numel(p));
20 | prob_rp = zeros(numel(kappa),numel(p));
21 | prob_ga = zeros(numel(kappa),numel(p));
22 | prob_p = zeros(numel(kappa),numel(p));
23 | 
24 | tic;
25 | for i = 1:numel(kappa)
26 |    for j = 1:numel(p)
27 |       for k = 1:reps
28 |          % Simple unimodal model
29 |          x = randvonMisesFisherm(p(j),n,kappa(i))';
30 |          
31 |          pval_r(k) = uniSphereTest(x,'rayleigh');
32 |          pval_rp(k) = uniSphereTest(x,'rp');
33 |          pval_ga(k) = uniSphereTest(x,'ga');
34 |          
35 |          [clusteriness, temp, dists, k2] = pairsClusterTest(x);
36 |          pval_p(k) = temp;
37 | 
38 |          % PCA reduce first?
39 | %          [~, ~, latent] = princomp(x);
40 | %          vaf = cumsum(latent)./sum(latent);
41 | %          ind = find(vaf>=.9);
42 | %          [clusteriness, temp, dists, k2] = pairsClusterTest(x(:,1:ind(1)));
43 | %          pval_p(i,j,k) = temp;
44 |       end
45 |       prob_r(i,j) = mean(pval_r<0.05);
46 |       prob_rp(i,j) = mean(pval_rp<0.05);
47 |       prob_ga(i,j) = mean(pval_ga<0.05);
48 |       prob_p(i,j) = mean(pval_p<0.05);
49 |    end
50 |    toc
51 | end
52 | 
53 | figure; 
54 | subplot(221); hold on
55 | plot(kappa,prob_r,'--');
56 | title('Rayleigh test');
57 | legend('p=4','8','16','32')
58 | axis([kappa(1) kappa(end) 0 1]);
59 | subplot(222); hold on
60 | plot(kappa,prob_r,'--');
61 | plot(kappa,prob_ga);
62 | title('Gine-Ajne test (solid)');
63 | axis([kappa(1) kappa(end) 0 1]);
64 | subplot(223); hold on
65 | plot(kappa,prob_r,'--');
66 | plot(kappa,prob_rp);
67 | title('Random projection test (solid)');
68 | axis([kappa(1) kappa(end) 0 1]);
69 | subplot(224); hold on
70 | plot(kappa,prob_r,'--');
71 | plot(kappa,prob_p);
72 | title('PAIRS test (solid)');
73 | axis([kappa(1) kappa(end) 0 1]);
74 | 
75 | 


--------------------------------------------------------------------------------
/+utils/poldecomp.m:
--------------------------------------------------------------------------------
 1 | function [R U V] = poldecomp(F)
 2 | %POLDECOMP  Performs the polar decomposition of a regular square matrix.
 3 | %   [R U V] = POLDECOMP(F) factorizes a non-singular square matrix F such
 4 | %   that F=R*U and F=V*R, where
 5 | %   U and V are symmetric, positive definite matrices and
 6 | %   R is a rotational matrix
 7 | %
 8 | %   See also EIG, DIAG, REPMAT
 9 | 
10 | % Copyright (c) 2014, Zoltan Csati
11 | % All rights reserved.
12 | % 
13 | % Redistribution and use in source and binary forms, with or without
14 | % modification, are permitted provided that the following conditions are
15 | % met:
16 | % 
17 | %     * Redistributions of source code must retain the above copyright
18 | %       notice, this list of conditions and the following disclaimer.
19 | %     * Redistributions in binary form must reproduce the above copyright
20 | %       notice, this list of conditions and the following disclaimer in
21 | %       the documentation and/or other materials provided with the distribution
22 | % 
23 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 | % AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 | % IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 | % ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27 | % LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 | % CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 | % SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 | % INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 | % CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 | % ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 | % POSSIBILITY OF SUCH DAMAGE.
34 | 
35 | % This kind of decomposition is often used in continuum mechanics so it is
36 | % convenient to comment the code that way. From now, we use the matrix 
37 | % formalism of tensors. C is the right Cauchy-Green deformation tensor, 
38 | % F is the deformation tensor, lambda is the stretch.
39 | 
40 | % Check input
41 | [m n] = size(F);
42 | if m ~= n
43 |     error('Matrix must be square.');
44 | end
45 | 
46 | C = F'*F;
47 | [Q0 lambdasquare] = eig(C);
48 | lambda = sqrt(diag((lambdasquare))); % extract the components
49 | % Uinv is the inverse of U and is constructed with the help of Q0. Uinv is
50 | % produced in the same base as F not in the base of its eigenvectors.
51 | Uinv = repmat(1./lambda',size(F,1),1).*Q0*Q0';
52 | % Using the definition, R, U and V can now be calculated
53 | R = F*Uinv;
54 | U = R'*F;
55 | V = F*R';


--------------------------------------------------------------------------------
/+diff/mmdtest.m:
--------------------------------------------------------------------------------
 1 | % MMDTEST                     Two-sample maximal mean discrepancy test
 2 | % 
 3 | %     [pval,stat,boot] = mmdtest(x,y,varargin)
 4 | %
 5 | %     Given a sample X1,...,Xm from a p-dimensional multivariate distribution,
 6 | %     and a sample Y1,...,Xn from a q-dimensional multivariate distribution,
 7 | %     test the hypothesis:
 8 | %
 9 | %     H0 : X and Y are drawn from the same distribution
10 | %
11 | %     INPUTS
12 | %     x - [m x p] m samples of dimensionality p
13 | %     y - [n x p] n samples of dimensionality p
14 | %
15 | %     OPTIONAL
16 | %     nboot - # bootstrap samples (default = 1000)
17 | %     sigma - gaussian bandwidth (default = median heuristic)
18 | %     biased - boolean indicated biased estimator (default = false)
19 | %
20 | %     OUTPUTS
21 | %     pval - p-value
22 | %     stat - maximal mean discrepancy
23 | %     boot - bootstrap samples
24 | %
25 | %     REFERENCE
26 | %     Gretton et al (2012). A kernel two-sample test. 
27 | %       Journal of Machine Learning Research 13: 723-773
28 | %
29 | %     SEE ALSO
30 | %     mmd, DepTest2
31 | 
32 | %     $ Copyright (C) 2014 Brian Lau http://www.subcortex.net/ $
33 | %     The full license and most recent version of the code can be found at:
34 | %     https://github.com/brian-lau/highdim
35 | %
36 | %     This program is free software: you can redistribute it and/or modify
37 | %     it under the terms of the GNU General Public License as published by
38 | %     the Free Software Foundation, either version 3 of the License, or
39 | %     (at your option) any later version.
40 | % 
41 | %     This program is distributed in the hope that it will be useful,
42 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
43 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
44 | %     GNU General Public License for more details.
45 | 
46 | function [pval,stat,boot] = mmdtest(x,y,varargin)
47 | 
48 | par = inputParser;
49 | par.KeepUnmatched = true;
50 | addRequired(par,'x',@isnumeric);
51 | addRequired(par,'y',@isnumeric);
52 | addParamValue(par,'nboot',1000,@(x) isscalar(x)&&isnumeric(x));
53 | parse(par,x,y,varargin{:});
54 | 
55 | [m,p] = size(x);
56 | [n,q] = size(y);
57 | if p ~= q
58 |    error('x and y must have same dimensionality (# of columns)');
59 | end
60 | 
61 | [stat,K,L,KL,sigma,biased] = diff.mmd(x,y,par.Unmatched);
62 | 
63 | nboot = par.Results.nboot;
64 | boot = zeros(nboot,1);
65 | % aggregated kernel matrix
66 | M = [K KL; KL' L];
67 | for i = 1:nboot
68 |    ind = randperm(n+m);
69 |    K = M(ind(1:m),ind(1:m));
70 |    L = M(ind(m+1:end),ind(m+1:end));
71 |    KL = M(ind(1:n),ind(m+1:end));
72 |    boot(i) = diff.mmd_(K,L,KL,m,n,biased);
73 | end
74 | 
75 | pval = sum(boot>=stat)./nboot;
76 | 


--------------------------------------------------------------------------------
/+sphere/rptest.m:
--------------------------------------------------------------------------------
 1 | % RPTEST               Random projection test for spherical uniformity 
 2 | % 
 3 | %     [pval,stat] = rptest(U,varargin)
 4 | %
 5 | %     INPUTS
 6 | %     U - [n x p] matrix, n samples with dimensionality p
 7 | %         the data should already be projected to the unit hypersphere
 8 | %
 9 | %     OPTIONAL
10 | %     test - 
11 | %
12 | %     OUTPUTS
13 | %     pval - p-value
14 | %     stat - statistic, projections onto k random p-vectors
15 | %
16 | %     REFERENCE
17 | %     Cuesta-Albertos, JA et al (2009). On projection-based tests for 
18 | %       directional and compositional data. Stat Comput 19: 367-380
19 | %     Cuesta-Albertos, JA et al (2007). A sharp form of the Cramer-Wold 
20 | %       theorem. J Theor Probab 20: 201-209
21 | %
22 | %     SEE ALSO
23 | %     UniSphereTest, rp, rppdf, rpcdf
24 | 
25 | %     $ Copyright (C) 2014 Brian Lau http://www.subcortex.net/ $
26 | %     The full license and most recent version of the code can be found at:
27 | %     https://github.com/brian-lau/highdim
28 | %
29 | %     This program is free software: you can redistribute it and/or modify
30 | %     it under the terms of the GNU General Public License as published by
31 | %     the Free Software Foundation, either version 3 of the License, or
32 | %     (at your option) any later version.
33 | % 
34 | %     This program is distributed in the hope that it will be useful,
35 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
36 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
37 | %     GNU General Public License for more details.
38 | 
39 | function [pval,stat] = rptest(U,varargin)
40 | 
41 | import sphere.*
42 | 
43 | par = inputParser;
44 | par.KeepUnmatched = true;
45 | addRequired(par,'U',@isnumeric);
46 | addParamValue(par,'correction','fdr',@ischar);
47 | addParamValue(par,'nmc',2000,@isnumeric);
48 | addParamValue(par,'k',20,@isnumeric);
49 | addParamValue(par,'dist','empirical',@ischar);
50 | parse(par,U,varargin{:});
51 | k = par.Results.k;
52 | 
53 | [n,p] = size(U);
54 | stat = rp(U,k);
55 | 
56 | switch lower(par.Results.dist)
57 |    case 'asymp'
58 |       pval = zeros(k,1);
59 |       for i = 1:k
60 |          test_cdf = [ stat(:,i) , rpcdf(stat(:,i),p)];
61 |          [~,pval(i)] = kstest(stat(:,i),'CDF',test_cdf);
62 |       end
63 |    otherwise % empirical
64 |       Umc = spatialSign(randn(par.Results.nmc,p));
65 |       u0 = spatialSign(randn(1,p));
66 |       Ymc = acos(Umc*u0');
67 |       pval = zeros(k,1);
68 |       for i = 1:k
69 |          [~,pval(i)] = kstest2(stat(:,i),Ymc);
70 |       end
71 | end
72 | 
73 | switch lower(par.Results.correction)
74 |    case 'bonferroni'
75 |       adj_p = pval*k;
76 |    case 'fdr'
77 |       [~,~,adj_p] = utils.fdr_bh(pval,.05,'pdep');
78 |    otherwise
79 |       error('Invalid p-value correction');
80 | end
81 | pval = min(adj_p);
82 | 


--------------------------------------------------------------------------------
/+utils/tri2sqind.m:
--------------------------------------------------------------------------------
 1 | function [i,j,k] = tri2sqind( m, k )
 2 | %TRI2SQIND subscript and linear indices for upper tri portion of matrix
 3 | %
 4 | % get indices into a square matrix for a vector representing a the upper
 5 | % triangular portion of a matrix such as those returned by pdist.
 6 | %
 7 | % [i,j,k] = tri2sqind( m, k )
 8 | %  If V is a hypothetical vector representing the upper triangular portion
 9 | %  of a matrix (not including the diagonal) and
10 | %  M is the size of a square matrix and
11 | %  K is an optional vector of indices into V then tri2sqind returns
12 | %  (i,j) the subscripted indices into the equivalent square matrix.
13 | %  K is an integer index into the equivalent square matrix
14 | %
15 | % Example
16 | %  X = randn(5, 20);
17 | %  Y = pdist(X, 'euclidean');
18 | %  [i,j,k] = tri2sqind( 5 );
19 | %  S = squareform(Y);
20 | %  isequal( Y(:), S(k) );
21 | %  Z = zeros(5);
22 | %  Z(k) = Y;
23 | %
24 | % Copyright 2012 Mike Boedigheimer
25 | % Amgen Inc.
26 | % Department of Computational Biology
27 | % mboedigh@amgen.com
28 | 
29 | % Copyright (c) 2013, Michael Boedigheimer; Chris Rorden
30 | % All rights reserved.
31 | % 
32 | % Redistribution and use in source and binary forms, with or without
33 | % modification, are permitted provided that the following conditions are
34 | % met:
35 | % 
36 | %     * Redistributions of source code must retain the above copyright
37 | %       notice, this list of conditions and the following disclaimer.
38 | %     * Redistributions in binary form must reproduce the above copyright
39 | %       notice, this list of conditions and the following disclaimer in
40 | %       the documentation and/or other materials provided with the distribution
41 | % 
42 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
43 | % AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 | % IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 | % ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
46 | % LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
47 | % CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
48 | % SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
49 | % INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
50 | % CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
51 | % ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
52 | % POSSIBILITY OF SUCH DAMAGE.
53 | 
54 | max_k = m*(m-1)/2;
55 | 
56 | if ( nargin < 2 )
57 |     k = (1:max_k)';
58 | end;
59 | 
60 | if any( k > max_k )
61 |     error('linstats:tri2sqind:InvalidArgument', 'ind2subl:Out of range subscript');
62 | end;
63 | 
64 | 
65 | i = floor(m+1/2-sqrt(m^2-m+1/4-2.*(k-1)));
66 | j = k - (i-1).*(m-i/2)+i;
67 | k = sub2ind( [m m], i, j );
68 | %end tri2sqind()


--------------------------------------------------------------------------------
/Testing/test_uniSphereTestNull.m:
--------------------------------------------------------------------------------
 1 | %% Check the 95th percentiles of the statistics under uniformity
 2 | 
 3 | %% Gine & Bingham
 4 | clear all;
 5 | n = [10 30 50 100 150];
 6 | p = [10 20 30 40 50 100];
 7 | reps = 200;
 8 | 
 9 | tic;
10 | for i = 1:numel(n)
11 |    for j = 1:numel(p)
12 |       for k = 1:reps
13 |          x = randn(n(i),p(j));
14 |          U = sphere.spatialSign(x);
15 |          G(i,j,k) = sphere.gine(U);
16 |          [~,B(i,j,k)] = sphere.bingham(U);
17 |       end
18 |       pctlG(i,j) = prctile(G(i,j,:),95);
19 |       pctlB(i,j) = prctile(B(i,j,:),95);
20 |    end
21 |    toc
22 | end
23 | 
24 | % Note that n,p refer to dim,samples in 
25 | % Figueiredo & Gomes (2003). Power of Tests of Uniformity Defined on the 
26 | %    Hypersphere. Communications in Statistics 32: 87-94
27 | pB = ...
28 | [71.249,  243.558, 514.535, NaN,     NaN,        NaN;...
29 |  71.631,  243.709, 515.788, 885.647, NaN,        NaN;...
30 |  72.040,  244.300, 515.409, 886.401, 1356.267,   5214.739;...
31 |  NaN,     NaN,     NaN,     885.969, 1355.913,   5219.373;...
32 |  NaN,     NaN,     NaN,     NaN,     1357.249,   5215.061];
33 | 
34 | pG = ...
35 | [0.588,  0.543, 0.528, NaN,   NaN,     NaN;...
36 |  0.590,  0.544, 0.529, 0.521, NaN,     NaN;...
37 |  0.592,  0.544, 0.529, 0.521, 0.516,   0.508;...
38 |  NaN,    NaN,     NaN, 0.521, 0.516,   0.508;...
39 |  NaN,    NaN,     NaN, NaN,   0.517,   0.509];
40 | 
41 | pctlB-pB
42 | pctlG-pG
43 | 
44 | %% Rayleigh & Anje
45 | clear all;
46 | n = [10 30 50 70 100 150];
47 | p = [10 20 30 40 50 100];
48 | reps = 500;
49 | 
50 | tic;
51 | for i = 1:numel(n)
52 |    for j = 1:numel(p)
53 |       for k = 1:reps
54 |          x = randn(n(i),p(j));
55 |          U = sphere.spatialSign(x);
56 |          [~,R(i,j,k)] = sphere.rayleigh(U);
57 |          A(i,j,k) = sphere.ajne(U);
58 |       end
59 |       pctlR(i,j) = prctile(R(i,j,:),95);
60 |       pctlA(i,j) = prctile(A(i,j,:),95);
61 |    end
62 |    toc
63 | end
64 | 
65 | % Note that n,p refer to samples,dim in 
66 | % Figueiredo (2007) Comparison of tests of uniformity defined on the 
67 | %   hypersphere. Statistics & Probability Letters 77: 329-334
68 | % 
69 | pR = ...
70 | [17.763, 30.694,  42.818,  54.723,  66.227,  122.647;...
71 |  18.168, 31.193,  43.373,  55.625,  66.896,  124.296;...
72 |  18.051, 31.305,  43.923,  55.631,  66.609,  124.318;...
73 |  18.045, 31.317,  43.806,  55.820,  67.162,  123.986;...
74 |  18.176, 31.195,  43.753,  55.557,  67.356,  123.091;...
75 |  18.335, 31.511,  43.699,  55.551,  67.681,  124.109];
76 | 
77 | pA = ...
78 | [0.379,  0.337,   0.319,   0.309,   0.302,   0.286;...
79 |  0.387,  0.341,   0.322,   0.313,   0.304,   0.289;...
80 |  0.385,  0.342,   0.325,   0.313,   0.303,   0.289;...
81 |  0.384,  0.342,   0.324,   0.314,   0.305,   0.288;...
82 |  0.387,  0.341,   0.324,   0.313,   0.306,   0.288;...
83 |  0.390,  0.344,   0.324,   0.313,   0.307,   0.289];
84 | 
85 | pctlR-pR
86 | pctlA-pA


--------------------------------------------------------------------------------
/+dep/ranktest.m:
--------------------------------------------------------------------------------
 1 | % RANKTEST                    Rank-based tests high-dimensional independence
 2 | % 
 3 | %     [pval,r,rmc] = ranktest(x,varargin)
 4 | %
 5 | %     Given a sample X1,...,Xn from a p-dimensional multivariate distribution,
 6 | %     test the hypothesis:
 7 | %
 8 | %     H0 : X1,...,Xp are mutually independent
 9 | %
10 | %     INPUTS
11 | %     x    - [n x p] matrix, n samples with dimensionality p
12 | %
13 | %     OPTIONAL (name/value pairs)
14 | %     test - 'spearman' - R1 from Han & Liu (default)
15 | %            'kendall'  - R2 from Han & Liu 
16 | %     empirical - boolean to monte-carlo sample null distribution
17 | %                 DEFAULT=FALSE, which uses asymptotic distribution
18 | %     nmc - number of monte-carlo samples, if empirical=true
19 | %     rmc - vector of monte-carlo samples. Since the null distribution is
20 | %           distribution-free (does not depend on data other than size), if
21 | %           you have already estimated the empirical, you can avoid doing
22 | %           it again
23 | %
24 | %     OUTPUTS
25 | %     pval - p-value
26 | %     r    - rank statistic
27 | %     rmc  - monte-carlo samples of empirical null
28 | %
29 | %     REFERENCE
30 | %     Han & Liu (2014). Distribution-free tests of independence with
31 | %       applications to testing more structures. arXiv:1410.4179v1
32 | %
33 | %     SEE ALSO
34 | %     rank, DepTest1
35 | 
36 | %     $ Copyright (C) 2017 Brian Lau, brian.lau@upmc.fr $
37 | %     The full license and most recent version of the code can be found at:
38 | %     https://github.com/brian-lau/highdim
39 | %
40 | %     This program is free software: you can redistribute it and/or modify
41 | %     it under the terms of the GNU General Public License as published by
42 | %     the Free Software Foundation, either version 3 of the License, or
43 | %     (at your option) any later version.
44 | % 
45 | %     This program is distributed in the hope that it will be useful,
46 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
47 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
48 | %     GNU General Public License for more details.
49 | 
50 | function [pval,r,rmc] = ranktest(x,varargin)
51 | 
52 | par = inputParser;
53 | par.KeepUnmatched = true;
54 | addRequired(par,'x',@isnumeric);
55 | addParamValue(par,'test','spearman',@ischar);
56 | addParamValue(par,'empirical',false,@(x) isnumeric(x) || islogical(x));
57 | addParamValue(par,'nmc',1000,@(x) isnumeric(x) && isscalar(x));
58 | addParamValue(par,'rmc',[],@isnumeric);
59 | parse(par,x,varargin{:});
60 | 
61 | [n,p] = size(x);
62 | r = dep.rank(x,par.Results.test);
63 | 
64 | if par.Results.empirical
65 |    nmc = par.Results.nmc;
66 |    rmc = par.Results.rmc;
67 |    if isempty(rmc)
68 |       rmc = zeros(nmc,1);
69 |    else
70 |       pval = sum(rmc>=r)/nmc;
71 |       return;
72 |    end
73 |    % Otherwise re-estimate, TODO: check whether this depends on n,p?
74 |    for i = 1:nmc
75 |       xmc = randn(n,p);
76 |       rmc(i) = dep.rank(xmc,par.Results.test);
77 |    end
78 |    pval = sum(rmc>=r)/nmc;
79 | else
80 |    % Asymptotic, extreme value type 1 cdf
81 |    cdf = @(y) exp(-exp(-y/2)/sqrt(8*pi));
82 |    pval = 1 - cdf(r);
83 |    rmc = [];
84 | end


--------------------------------------------------------------------------------
/+sphere/sumchi2cdf.m:
--------------------------------------------------------------------------------
 1 | % SUMCHI2CDF                  CDF for infinite weighted sums of chi-square
 2 | % 
 3 | %     Fxval = sumchi2cdf(xval,p)
 4 | %
 5 | %     INPUTS
 6 | %     xval
 7 | %     p
 8 | %
 9 | %     OUTPUTS
10 | %     Fxval - CDF value
11 | %
12 | %     REFERENCE
13 | %     Keilson J et al (1983). Significance points for some tests of uniformity 
14 | %       on the sphere. J Statist Comput Simul 17: 195-218.
15 | 
16 | %     $ Copyright (C) 2014 Brian Lau http://www.subcortex.net/ $
17 | %     The full license and most recent version of the code can be found at:
18 | %     https://github.com/brian-lau/highdim
19 | %
20 | %     This program is free software: you can redistribute it and/or modify
21 | %     it under the terms of the GNU General Public License as published by
22 | %     the Free Software Foundation, either version 3 of the License, or
23 | %     (at your option) any later version.
24 | % 
25 | %     This program is distributed in the hope that it will be useful,
26 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
27 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
28 | %     GNU General Public License for more details.
29 | 
30 | function Fxval = sumchi2cdf(xval,p)
31 | 
32 | switch p
33 |    case 3
34 |       % Table V from Keilson et al (1983)
35 |       if xval > 5
36 |          Fxval = 1;
37 |       elseif xval < 0
38 |          Fxval = 0;
39 |       else
40 |          x = 0:.05:5;
41 |          Fx = [0 0 0 0 0 0 0 0 ,...
42 |             0.00006,0.00040,0.00170,0.00500,0.01152,0.02228,0.03792,0.05860,...
43 |             0.08408,0.11385,0.14718,0.18331,0.22144,0.26083,0.30083,0.34085,...
44 |             0.38043,0.41917,0.45670,0.49303,0.52776,0.56085,0.59224,0.62191,...
45 |             0.64985,0.67609,0.70067,0.72364,0.74506,0.76501,0.78355,0.80076,...
46 |             0.81671,0.83148,0.84514,0.85777,0.86942,0.88017,0.89008,0.89921,...
47 |             0.90762,0.91535,0.92246,0.92899,0.93500,0.94051,0.94557,0.95021,...
48 |             0.95446,0.95836,0.96194,0.96522,0.96822,0.97096,0.97348,0.97578,...
49 |             0.97788,0.97981,0.98157,0.98318,0.98465,0.98600,0.98723,0.98835,...
50 |             0.98937,0.99031,0.99116,0.99194,0.99266,0.99331,0.99390,0.99444,...
51 |             0.99493,0.99538,0.99580,0.99617,0.99651,0.99682,0.99711,0.99737,...
52 |             0.99760,0.99782,0.99801,0.99819,0.99835,0.99850,0.99863,0.99876,...
53 |             0.99887,0.99897,0.99906,0.99915,0.99923];
54 |          Fxval = interp1(x,Fx,xval,'linear');
55 |       end
56 |    otherwise
57 |       error('No approximation for p requested');
58 | end
59 | 
60 | %
61 | % alpha = (p-1)/2;
62 | % q = 1:10;
63 | % a2 = (p*(2*q-1))/(8*pi*(2*q+p)) *...
64 | %    (gamma(alpha + 0.5)*gamma(q-0.5)) ./...
65 | %    (gamma(q+alpha+0.5)).^2;
66 | % 
67 | % temp = 0;
68 | % for i = 1:numel(q)
69 | %    vp2q = vpq(p,2*q(i));
70 | %    temp = temp + a2(i) * chi2pdf(xval,vp2q);
71 | % end
72 | % 
73 | 
74 | % 
75 | % p = 3
76 | % syms theta
77 | % hp = (1/sqrt(pi)) * (gamma(p/2)/(gamma((p-1)/2)*sqrt(2)))*...
78 | %    (sin(theta).^(p-2));
79 | % qsym = simplify(int(hp,theta,p)); % Solve integral symbolically
80 | % pretty(qsym)
81 | % 
82 | % double(subs(qsym,{theta},{0:.1:pi}))
83 | 


--------------------------------------------------------------------------------
/+dep/rpdcov.m:
--------------------------------------------------------------------------------
 1 | % RPDCOV                      Randomly projected distance covariance
 2 | % 
 3 | %     [d,omega_k] = rpdcov(x,y,k)
 4 | %
 5 | %     Estimate (unbiased) distance covariance using Huang & Huo algorithm,
 6 | %     which has O(nk (log n + p + q)) complexity and O(max(n,k)) storage 
 7 | %     compared to O(n^2(p + q)) complexity and O(n^2) storage of the naive 
 8 | %     estimator.
 9 | %
10 | %     The random projection estimator is an unbiased estimator of distance
11 | %     covariance (bias-corrected variant). The difference converges to zero
12 | %     at a rate no worse than O(1/sqrt(k)), where k is the number of random 
13 | %     projections.
14 | %
15 | %     The direct estimator will perform better when high-dimensional data
16 | %     have low-dimensional dependency structure.
17 | %
18 | %     INPUTS
19 | %     x - [n x p] n samples of dimensionality p
20 | %     y - [n x q] n samples of dimensionality q
21 | %
22 | %     OPTIONAL
23 | %     k - scalar integer, number of random projections, default = 50
24 | %
25 | %     OUTPUTS
26 | %     d - distance covariance between x,y
27 | %     omega_k - distance covariance of k univariate random projections
28 | %
29 | %     EXAMPLE
30 | %     rng(1234)
31 | %     n = 10000; p = 500; q = p;
32 | %     x = rand(n,p);
33 | %     y = x.^2;
34 | %     tic; dep.dcov(x,y,'unbiased',true)   % naive (unbiased) estimator
35 | %     toc
36 | %     tic; dep.rpdcov(x,y)
37 | %     toc
38 | %     tic; dep.rpdcov(x,y,100)
39 | %     toc
40 | %
41 | %     REFERENCE
42 | %     Huang & Huo (2017). A statistically and numerically efficient
43 | %       independence test based on random projections and distance
44 | %       covariance. arxiv.org/abs/1701.06054v1
45 | %
46 | %     SEE ALSO
47 | %     fdcov, dcov
48 | 
49 | %     $ Copyright (C) 2017 Brian Lau, brian.lau@upmc.fr $
50 | %     The full license and most recent version of the code can be found at:
51 | %     https://github.com/brian-lau/highdim
52 | %
53 | %     This program is free software: you can redistribute it and/or modify
54 | %     it under the terms of the GNU General Public License as published by
55 | %     the Free Software Foundation, either version 3 of the License, or
56 | %     (at your option) any later version.
57 | % 
58 | %     This program is distributed in the hope that it will be useful,
59 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
60 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
61 | %     GNU General Public License for more details.
62 | 
63 | % o parfor
64 | % o data too large to fit in memory
65 | 
66 | function [d,omega_k] = rpdcov(x,y,k)
67 | 
68 | if nargin < 3
69 |    k = 50;
70 | end
71 | 
72 | [nx,p] = size(x);
73 | [ny,q] = size(y);
74 | assert(nx == ny,'RPDCOV requires x and y to have the same # of samples');
75 | 
76 | % Normalization constants, avoiding overflow
77 | Cp = sqrt(pi) * exp(gammaln((p+1)/2) - gammaln(p/2));
78 | Cq = sqrt(pi) * exp(gammaln((q+1)/2) - gammaln(q/2));
79 | 
80 | omega_k = zeros(k,1);
81 | for kk = 1:k
82 |    % Project onto random basis on unit hypersphere
83 |    ux = x * sphere.spatialSign(randn(1,p))';
84 |    vy = y * sphere.spatialSign(randn(1,q))';
85 | 
86 |    % Fast O(n log n) distance covariance
87 |    omega_k(kk) = dep.fdcov(ux,vy);
88 | end
89 | 
90 | d = mean(Cp*Cq*omega_k);


--------------------------------------------------------------------------------
/+diff/covtest.m:
--------------------------------------------------------------------------------
 1 | % COVTEST                     Two-sample covariance matrix test
 2 | % 
 3 | %     [pval,stat,Mthresh] = covtest(x,y,varargin)
 4 | %
 5 | %     Given a sample X1,...,Xm from a p-dimensional multivariate distribution,
 6 | %     and a sample Y1,...,Xn from a q-dimensional multivariate distribution,
 7 | %     test one of the hypotheses:
 8 | %
 9 | %     H0 : cov(X) = cov(Y)
10 | %
11 | %     It is also possible to test the support of cov(x) ~= cov(y), which is
12 | %     controlled at family-wise error rate = alpha.
13 | %
14 | %     INPUTS
15 | %     x - [m x p] m samples of dimensionality p
16 | %     y - [n x p] n samples of dimensionality p
17 | %
18 | %     OPTIONAL
19 | %     alpha - level for test of support cov(x) ~= cov(y) (default = 0.05)
20 | %
21 | %     OUTPUTS
22 | %     pval - p-value
23 | %     stat - statistic
24 | %     Mthresh - support cov(x) ~= cov(y), indicating significantly different
25 | %           entries at level alpha
26 | %
27 | %     REFERENCE
28 | %     Cai et al (2013). Two-sample covariance matrix testing and support
29 | %       recovery in high-dimensional and sparse settings. Journal of the
30 | %       American Statistical Association 108: 265-277
31 | 
32 | %     $ Copyright (C) 2014 Brian Lau http://www.subcortex.net/ $
33 | %     The full license and most recent version of the code can be found at:
34 | %     https://github.com/brian-lau/highdim
35 | %
36 | %     This program is free software: you can redistribute it and/or modify
37 | %     it under the terms of the GNU General Public License as published by
38 | %     the Free Software Foundation, either version 3 of the License, or
39 | %     (at your option) any later version.
40 | % 
41 | %     This program is distributed in the hope that it will be useful,
42 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
43 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
44 | %     GNU General Public License for more details.
45 | 
46 | % TODO
47 | %  o small sample (n<30) modification
48 | %  x support test
49 | %  o row test
50 | 
51 | function [pval,stat,Mthresh] = covtest(x,y,varargin)
52 | 
53 | par = inputParser;
54 | par.KeepUnmatched = true;
55 | addRequired(par,'x',@isnumeric);
56 | addRequired(par,'y',@isnumeric);
57 | addParamValue(par,'alpha',0.05,@(x) isscalar(x)&&isnumeric(x));
58 | addParamValue(par,'row',[],@(x) isscalar(x)&&isnumeric(x));
59 | parse(par,x,y,varargin{:});
60 | 
61 | [m,p] = size(x);
62 | [n,q] = size(y);
63 | 
64 | if ne(p,q)
65 |    error('Dimensions must match');
66 | end
67 | 
68 | Sx = cov(x,1);
69 | Sy = cov(y,1);
70 | x_theta = normvar(x,p,m,Sx);
71 | y_theta = normvar(y,p,n,Sy);
72 | M = (Sx - Sy).^2 ./ (x_theta/m + y_theta/n); % eq 2
73 | Mn = max(max(triu(M)));
74 | 
75 | stat = Mn - 4*log(p) + log(log(p));
76 | cdf = @(y) exp(-exp(-y/2)/sqrt(8*pi));
77 | pval = 1 - cdf(stat);
78 | 
79 | if nargout > 2
80 |    % Support Sx-Sy
81 |    Mthresh = M;
82 |    q = -log(8*pi) - 2*log(log(1 - par.Results.alpha))^(-1);
83 |    Mthresh = Mthresh >= (4*log(p) - log(log(p)) + q);
84 |    Mthresh = utils.putdiag(Mthresh,diag(M) >= 2*log(p));
85 | end
86 | 
87 | function theta = normvar(x,p,n,S)
88 | mu = mean(x);
89 | theta = zeros(p,p);
90 | for i = 1:p
91 |    for j = 1:p
92 |       for k = 1:n
93 |          theta(i,j) = theta(i,j) +...
94 |             ((x(k,i) - mu(i))*(x(k,j) - mu(j)) - S(i,j))^2;
95 |       end
96 |    end
97 | end
98 | theta = theta/n;
99 | 


--------------------------------------------------------------------------------
/+sphere/signtest.m:
--------------------------------------------------------------------------------
 1 | % SIGNTEST                  Nonparametric test for high-dimensional sphericity
 2 | % 
 3 | %     [pval,stat] = signtest(x,varargin)
 4 | %
 5 | %     Tests whether the covariance matrix of a sample X1, ..., Xn from a 
 6 | %     p-dimensional multivariate distribution is proportional to the identity.
 7 | %     This test is non-parametric, relying only on the spatial sign of the
 8 | %     data.
 9 | %
10 | %     INPUTS
11 | %     x     - [n x p] matrix, n samples with dimensionality p
12 | %
13 | %     OPTIONAL (name/value pairs)
14 | %     test - 'sign' - standard multivariate sign, biased if p grows
15 | %            'bcs' - corrected sign, p can increase as n^2, (DEFAULT)
16 | %     approx - multivariate normal approximation (DEFAULT=true)
17 | %
18 | %     OUTPUTS
19 | %     pval - p-value
20 | %     stat - statistic
21 | %
22 | %     REFERENCE
23 | %     Zou et al (2014). Multivariate sign-based high-dimensional tests for
24 | %       sphericity. Biometrika 101: 229-236
25 | %
26 | %     SEE ALSO
27 | %     UniSphereTest
28 | 
29 | %     $ Copyright (C) 2014 Brian Lau http://www.subcortex.net/ $
30 | %     The full license and most recent version of the code can be found at:
31 | %     https://github.com/brian-lau/highdim
32 | %
33 | %     This program is free software: you can redistribute it and/or modify
34 | %     it under the terms of the GNU General Public License as published by
35 | %     the Free Software Foundation, either version 3 of the License, or
36 | %     (at your option) any later version.
37 | % 
38 | %     This program is distributed in the hope that it will be useful,
39 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
40 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
41 | %     GNU General Public License for more details.
42 | 
43 | function [pval,stat] = signtest(x,varargin)
44 | 
45 | import sphere.*
46 | 
47 | par = inputParser;
48 | par.KeepUnmatched = true;
49 | addRequired(par,'x',@isnumeric);
50 | addParamValue(par,'test','bcs',@ischar);
51 | addParamValue(par,'approx',true,@(x) isnumeric(x) || islogical(x));
52 | parse(par,x,varargin{:});
53 | 
54 | [n,p] = size(x);
55 | theta = utils.spatialMedian(x);
56 | U = spatialSign(bsxfun(@minus,x,theta));
57 | 
58 | % TODO, block process for n large
59 | UtU = U*U';
60 | UtU(sub2ind([n n],1:n,1:n)) = 0;
61 | UtU = sum(UtU(:).^2);
62 | 
63 | switch lower(par.Results.test)
64 |    case {'sign','s'}
65 |       Q = p/n + (n*(n-1)/n^2) * (p/(n*(n-1))) * UtU - 1;
66 |       stat = n*(p+2)*Q/2;
67 |       pval = 1 - chi2cdf(stat,(p+2)*(p-1)/2);
68 |    case {'bcs','b'}
69 |       % Bias-corrected sign test, p = O(n^2)
70 |       Q = (p/(n*(n-1))) * UtU - 1;
71 |       sigma0 = sqrt( 4*(p-1)/(n*(n-1)*(p+2)) );
72 |       
73 |       if par.Results.approx
74 |          % Approximation when x is multivariate normal
75 |          deltanp = n^(-2) + 2*n^(-3);
76 |       else
77 |          % General case (Theorem 1, Zou et al.)
78 |          R = sqrt(sum(bsxfun(@minus,x,theta).^2,2));
79 |          Rstar = R + U*theta' - sum(theta.^2)./(2*R);
80 |          erk2 = erk(Rstar,2,n);
81 |          deltanp = (1/n^2) * (2 - 2*erk2 + erk2^2) ...
82 |             + (1/n^3) * (8*erk2 - 6*erk2^2 ...
83 |             + 2*erk2*erk(Rstar,3,n) - 2*erk(Rstar,3,n));
84 |       end
85 |       
86 |       stat = (Q - p*deltanp) / sigma0;
87 |       pval = 1 - normcdf(stat);
88 |    otherwise
89 |       error('Unknown test.');
90 | end
91 | 
92 | function y = erk(Rstar,k,n)
93 | d = sum(1./Rstar)^k;
94 | y = n^(k-1) * sum( Rstar.^(-k) ./ d);
95 | 


--------------------------------------------------------------------------------
/+dim/cpca.m:
--------------------------------------------------------------------------------
  1 | % CPCA                        Common principal component analysis
  2 | %
  3 | %     [Q,D,iter] = cpca(S,n,varargin)
  4 | %
  5 | %     INPUTS
  6 | %     S - covariance matrices, [n x n x groups] matrix or cell array
  7 | %     n - sample size for each S_i, vector or cell array
  8 | %
  9 | %     OPTIONAL
 10 | %     k - number of common components to return (default = all)
 11 | %     maxit - maximum number of iterations (default = 100)
 12 | %     tol - stopping criteria (default = 1e-6)
 13 | %
 14 | %     OUTPUTS
 15 | %
 16 | %     REFERENCE
 17 | %     Trendafilov (2010). Stepwise estimation of common principal
 18 | %       components. Computational Statistics & Data Analysis 54: 3446-3457
 19 | %
 20 | %     Based on Matlab code provided by Dr. Trendafilov, modified to include
 21 | %     stopping criterion.
 22 | 
 23 | %     $ Copyright (C) 2014 Brian Lau http://www.subcortex.net/ $
 24 | %     The full license and most recent version of the code can be found at:
 25 | %     https://github.com/brian-lau/highdim
 26 | %
 27 | %     This program is free software: you can redistribute it and/or modify
 28 | %     it under the terms of the GNU General Public License as published by
 29 | %     the Free Software Foundation, either version 3 of the License, or
 30 | %     (at your option) any later version.
 31 | % 
 32 | %     This program is distributed in the hope that it will be useful,
 33 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
 34 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 35 | %     GNU General Public License for more details.
 36 | 
 37 | function [Q,D,iter] = cpca(S,n,varargin)
 38 | 
 39 | par = inputParser;
 40 | par.KeepUnmatched = true;
 41 | addRequired(par,'S',@(x) isnumeric(x)||iscell(x));
 42 | addRequired(par,'n',@(x) isnumeric(x)||iscell(x));
 43 | addParamValue(par,'k',[],@isnumeric);
 44 | addParamValue(par,'maxit',100,@(x) isnumeric(x) && isscalar(x));
 45 | addParamValue(par,'tol',1e-6,@isnumeric);
 46 | parse(par,S,n,varargin{:});
 47 | 
 48 | if iscell(S)
 49 |    S = cat(3,S{:});
 50 | end
 51 | 
 52 | if iscell(n)
 53 |    n = cat(2,n{:});
 54 | end
 55 | 
 56 | p = size(S,1);
 57 | nS = size(S,3);
 58 | if nS ~= numel(n)
 59 |    error('n should indicate the # of samples for each group');
 60 | end
 61 | 
 62 | if isempty(par.Results.k)
 63 |    k = p;
 64 | elseif par.Results.k <= p
 65 |    k = par.Results.k;
 66 | else
 67 |    error('k must be less than dimensionality');
 68 | end
 69 | 
 70 | nf = n./sum(n);
 71 | D = zeros(k,nS);
 72 | Q = zeros(p,k);
 73 | Qw = eye(p);
 74 | s = zeros(p);
 75 | for j = 1:nS
 76 |    s = s + nf(j)*S(:,:,j);
 77 | end
 78 | 
 79 | [q0,d0] = eig(s);
 80 | if d0(1,1) < d0(p,p)
 81 |    q0 = q0(:,p:-1:1);
 82 | end
 83 | 
 84 | iter = zeros(1,k);
 85 | for i = 1:k
 86 |    q = q0(:,i);
 87 |    d = zeros(1,nS);
 88 |    for j = 1:nS
 89 |       d(j) = q'*S(:,:,j)*q;
 90 |    end
 91 | 
 92 |    crit = 1;
 93 |    while (iter(i) < par.Results.maxit) && (crit > par.Results.tol)
 94 |       s = zeros(p);
 95 |       for j = 1:nS
 96 |          s = s + n(j)*S(:,:,j)/d(j);
 97 |       end
 98 |       
 99 |       w = s*q;
100 |       if i ~= 1
101 |          w = Qw*w;
102 |       end
103 |       q = w/((w'*w)^.5);
104 |       
105 |       for j = 1:nS
106 |          d(j) = q'*S(:,:,j)*q;
107 |       end
108 |            
109 |       if iter(i) > 1
110 |          crit = old - norm(d);
111 |       end
112 |       old = norm(d);
113 |       iter(i) = iter(i) + 1;
114 |    end
115 |    
116 |    D(i,:) = d;
117 |    Q(:,i) = q;
118 |    Qw = Qw - q*q';
119 | end
120 | 


--------------------------------------------------------------------------------
/+utils/permMoments.m:
--------------------------------------------------------------------------------
 1 | % PERMMOMENTS                 Exact moments of permutation distribution
 2 | % 
 3 | %     [mu,sigma2,skew] = permMoments(A1,A2,approx)
 4 | %
 5 | %     Returns the first three moments of the permutation distribution of 
 6 | %     T = trace(A1*A2). Exact expressions have been obtained by Kazi-Aoual
 7 | %     et al (1995). The specific formulation used here comes from Bilodeau
 8 | %     and Guetsop Nangue (2017).
 9 | %     
10 | %     INPUTS
11 | %     A1 - [n x n] matrix
12 | %     A2 - [n x n] matrix
13 | %
14 | %     OPTIONAL
15 | %     approx - scalar integer >= 0, positive integers determine rank of 
16 | %              approximate multiplication A1*A2, default = 0 (exact)
17 | %
18 | %     REFERENCE
19 | %     Bilodeau & Guetsop Nangue (2017). Approximations to permutation tests 
20 | %       of independence between two random vectors. 
21 | %       Computational Statistics & Data Analysis, submitted.
22 | %     Kazi-Aoual et al (1995). Refined approximations to permutation tests 
23 | %       for multivariate inference. Computational Statistics & Data Analysis.
24 | %       20: 643-656
25 | 
26 | %     $ Copyright (C) 2017 Brian Lau, brian.lau@upmc.fr $
27 | %     The full license and most recent version of the code can be found at:
28 | %     https://github.com/brian-lau/highdim
29 | %
30 | %     This program is free software: you can redistribute it and/or modify
31 | %     it under the terms of the GNU General Public License as published by
32 | %     the Free Software Foundation, either version 3 of the License, or
33 | %     (at your option) any later version.
34 | % 
35 | %     This program is distributed in the hope that it will be useful,
36 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
37 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
38 | %     GNU General Public License for more details.
39 | 
40 | function [mu,sigma2,skew] = permMoments(A1,A2,approx)
41 | 
42 | if nargin < 3
43 |    approx = 0;
44 | end
45 | 
46 | assert(all(size(A1)==size(A2)),'A1 and A2 must have the same size.');
47 | [m,n] = size(A1);
48 | assert(m==n,'A1 and A2 must be square.');
49 | 
50 | [T(1),T2(1),S2(1),T3(1),S3(1),U(1),R(1),B(1)] = useful(A1,approx);
51 | [T(2),T2(2),S2(2),T3(2),S3(2),U(2),R(2),B(2)] = useful(A2,approx);
52 | 
53 | % First moment
54 | m1 = prod(T)/n + prod(-T)/(n*(n-1));
55 | 
56 | % Second moment
57 | m2 = prod(S2)/n...
58 |    + ( prod(T.^2-S2) + 2*prod(T2-S2) + 4*prod(-S2) ) / (n*(n-1))...
59 |    + ( 4*prod(2*S2-T2) + 2*prod(2*S2-T.^2) ) / (n*(n-1)*(n-2))...
60 |    + prod(2*T2-6*S2+T.^2) / (n*(n-1)*(n-2)*(n-3));
61 | 
62 | % Third moment
63 | SP1 = prod(S3)/n;
64 | SP2 = ( 4*prod(-S3+U) + 3*prod(T.*S2-S3) + 6*prod(-S3)...
65 |    + 12*prod(-S3+R) + 6*prod(-S3+B) ) / (n*(n-1));
66 | SP3 = ( 3*prod(-T.*S2+2*S3) + prod(T.^3-3*T.*S2+2*S3)...
67 |    + 12*prod(-T.*S2+2*S3-B) + 12*prod(2*S3-R) + 24*prod(2*S3-R-B)...
68 |    + 6*prod(T.*(T2-S2)+2*S3-2*R) + 24*prod(2*S3-U-R)...
69 |    + 8*prod(T3+2*S3-3*R) ) / (n*(n-1)*(n-2));
70 | SP4 = ( 12*prod(T.*S2-6*S3+2*R+2*B) + 6*prod(T.*(-T2+S2)-6*S3+2*U+4*R)...
71 |    + 3*prod(-T.^3+5*T.*S2-6*S3+2*B) + 12*prod(T.*(-T2+2*S2)-6*S3+3*R+2*B)...
72 |    + 8*prod(-6*S3+2*U+3*R) + 24*prod(-T3-6*S3+U+5*R+B) ) / (n*(n-1)*(n-2)*(n-3));
73 | SP5 = ( 3*prod(T.^3+2*T.*(T2-5*S2) + 24*S3-8*R-8*B)...
74 |    + 12*prod(T.*(T2-2*S2) + 2*T3+24*S3-4*U-16*R-4*B) ) / (n*(n-1)*(n-2)*(n-3)*(n-4));
75 | SP6 = prod(-T.^3-6*T.*(T2-3*S2)-8*T3-120*S3+16*U+72*R+24*B)...
76 |    / (n*(n-1)*(n-2)*(n-3)*(n-4)*(n-5));
77 | m3 = SP1 + SP2 + SP3 + SP4 + SP5 + SP6;
78 | 
79 | mu = m1;
80 | sigma2 = m2 - m1^2;
81 | skew = (m3 - 3*sigma2*m1 - m1^3) / (sigma2^(3/2));
82 | 
83 | function [T,T2,S2,T3,S3,U,R,B] = useful(A,approx)
84 | T = trace(A);
85 | if approx
86 |    AA = utils.approxmtimes(A,A,approx);
87 | else
88 |    AA = A*A;
89 | end
90 | T2 = sum(sum(A.^2)); 
91 | S2 = sum(diag(A.^2));
92 | T3 = sum(sum(AA.*A));
93 | S3 = sum(diag(A).^3);
94 | U = sum(sum(A.^2.*A));
95 | R = diag(A)'*diag(AA);
96 | B = diag(A)'*A*diag(A);
97 | 


--------------------------------------------------------------------------------
/Testing/test_covtest.m:
--------------------------------------------------------------------------------
  1 | % Check empirical size and power against table 1 from
  2 | %     Cai et al (2013). Two-sample covariance matrix testing and support
  3 | %       recovery in high-dimensional and sparse settings. Journal of the
  4 | %       American Statistical Association 108: 265-277
  5 | 
  6 | clear all;
  7 | p = 200;
  8 | n = 60;
  9 | model = 3;
 10 | 
 11 | for i = 1:500
 12 |    if model == 3
 13 |       sigma = zeros(p);
 14 |       for ii = 1:p
 15 |          for jj = 1:p
 16 |             if ii < jj
 17 |                if rand < 0.05
 18 |                   sigma(ii,jj) = 0.5;
 19 |                end
 20 |             end
 21 |          end
 22 |       end
 23 |       sigma = sigma + sigma';
 24 |       sigma = utils.putdiag(sigma,1);
 25 |       [~,ds] = eig(sigma);
 26 |       d = abs(min(diag(ds))) + 0.05;
 27 |       D = diag(unifrnd(0.5,2.5,p,1));
 28 |       S = sqrt(D)*((sigma+d*eye(p))/(1+d))*sqrt(D);
 29 |    elseif model == 2
 30 |       % Model 2
 31 |       for ii = 1:p
 32 |          for jj = 1:p
 33 |             sigma(ii,jj) = 0.5^abs(ii-jj);
 34 |          end
 35 |       end
 36 |       D = diag(unifrnd(0.5,2.5,p,1));
 37 |       S = D^.5*sigma*D^.5;
 38 |    elseif model == 4
 39 |       % Model 4
 40 |       for ii = 1:p
 41 |          for jj = 1:p
 42 |             delta(ii,jj) = (-1)^(ii+jj)*0.4^(abs(ii-jj)^(1/10));
 43 |          end
 44 |       end
 45 |       O = diag(unifrnd(1,5,p,1));
 46 |       S = O*delta*O;
 47 |    end
 48 |    U = zeros(p,p);
 49 |    [~,~,k] = utils.tri2sqind(p);
 50 |    r = randperm(numel(k));
 51 |    U(k(r(1:4))) = unifrnd(0,4,4,1)*max(diag(S));
 52 |    U = U + U';
 53 |    [~,da] = eig(S);
 54 |    [~,db] = eig(S+U);
 55 |    d = abs(min([diag(da);diag(db)])) + 0.05;
 56 |    
 57 |    S1 = S + d*eye(p);
 58 |    S2 = S + U + d*(eye(p));
 59 |    
 60 |    x = mvnrnd(zeros(1,p),S1,n);
 61 |    y = mvnrnd(zeros(1,p),S2,n);
 62 |    [pval(i),stat(i)] = diff.covtest(x,y);
 63 | end
 64 | 
 65 | %% Support recovery
 66 | % Not quite matching yet. I think this is due to a problem generating exactly
 67 | % the same covariance matrices as Cai et al. The off diagonal terms do not fall
 68 | % into the same range (pg 272 of paper). 
 69 | clear all;
 70 | p = 50;
 71 | n = 100;
 72 | model = 4;
 73 | 
 74 | if model == 3
 75 |    sigma = zeros(p);
 76 |    for ii = 1:p
 77 |       for jj = 1:p
 78 |          if ii < jj
 79 |             if rand < 0.05
 80 |                sigma(ii,jj) = 0.5;
 81 |             end
 82 |          end
 83 |       end
 84 |    end
 85 |    sigma = sigma + sigma';
 86 |    sigma = utils.putdiag(sigma,1);
 87 |    [~,ds] = eig(sigma);
 88 |    d = abs(min(diag(ds))) + 0.05;
 89 |    D = eye(p);
 90 |    S = D^.5*((sigma+d*eye(p))/(1+d))*D^.5;
 91 | elseif model == 2
 92 |    % Model 2
 93 |    for ii = 1:p
 94 |       for jj = 1:p
 95 |          sigma(ii,jj) = 0.5^abs(ii-jj);
 96 |       end
 97 |    end
 98 |    D = eye(p);
 99 |    S = D^.5*sigma*D^.5;
100 | elseif model == 4
101 |    % Model 4
102 |    for ii = 1:p
103 |       for jj = 1:p
104 |          delta(ii,jj) = (-1)^(ii+jj)*0.4^(abs(ii-jj)^(1/10));
105 |       end
106 |    end
107 |    O = eye(p);
108 |    S = O*delta*O;
109 | end
110 | U = zeros(p,p);
111 | [~,~,k] = utils.tri2sqind(p);
112 | r = randperm(numel(k));
113 | U(k(r(1:25))) = 2;
114 | U = U + U';
115 | [~,da] = eig(S);
116 | [~,db] = eig(S+U);
117 | d = abs(min([diag(da);diag(db)])) + 0.05;
118 | 
119 | S1 = (S + d*eye(p))/(1+d);
120 | S2 = (S + U + d*(eye(p)))/(1+d);
121 | sdiff = S2-S1;
122 | min(sdiff(sdiff>0))
123 | sd = (S2-S1)>0;
124 | for i = 1:100  
125 |    x = mvnrnd(zeros(1,p),S1,n);
126 |    y = mvnrnd(zeros(1,p),S2,n);
127 |    [pval(i),stat(i),Mthresh] = diff.covtest(x,y);
128 |    temp = Mthresh & sd;
129 |    s(i) = sum(temp(:))/sqrt(sum(Mthresh(:))*sum(sd(:)));
130 | end
131 | 
132 | % Check aymptotic distribution
133 | % figure;
134 | % dx = 0.1; xx = -5:dx:25;
135 | % n = histc(stat,xx);
136 | % hold on
137 | % plot(xx,cumsum(n)./sum(n));
138 | % plot(xx,exp((-1/sqrt(8*pi))*exp(-xx/2)),'r')
139 | 


--------------------------------------------------------------------------------
/+sphere/vmfrnd.m:
--------------------------------------------------------------------------------
  1 | function [ X ] = vmfrnd(m, n, kappa, mu)
  2 | % RANDVONMISESFISHERM Random number generation from von Mises Fisher
  3 | % distribution.
  4 | % X = randvonMisesFisherm(m, n, kappa) returns n samples of random unit
  5 | % directions in m dimensional space, with concentration parameter kappa,
  6 | % and the direction parameter mu = e_m
  7 | % X = randvonMisesFisherm(m, n, kappa, mu) with direction parameter mu
  8 | % (m-dimensional column unit vector)
  9 | %
 10 | % Sungkyu Jung, Feb 3, 2010.
 11 | 
 12 | if nargin < 3, help randvonMisesFisher3, return, end
 13 | if nargin == 3, muflag = false;
 14 | else muflag = true;
 15 | end
 16 | 
 17 | if m < 2;
 18 |    disp('Message from randvonMisesFisherm.m: dimension m must be > 2');
 19 |    disp('Message from randvonMisesFisherm.m: Set m to be 2');
 20 |    m = 2;
 21 | end
 22 | 
 23 | if kappa < 0;
 24 |    disp('Message from randvonMisesFisherm.m: kappa must be >= 0');
 25 |    disp('Message from randvonMisesFisherm.m: Set kappa to be 0');
 26 |    kappa = 0;
 27 | end
 28 | 
 29 | %
 30 | % the following algorithm is following the modified Ulrich's algorithm
 31 | % discussed by Andrew T.A. Wood in "SIMULATION OF THE VON MISES FISHER
 32 | % DISTRIBUTION", COMMUN. STATIST 23(1), 1994.
 33 | 
 34 | % step 0 : initialize
 35 | b = (-2*kappa + sqrt(4*kappa^2 + (m-1)^2))/(m-1);
 36 | x0 = (1-b)/(1+b);
 37 | c = kappa*x0 + (m-1)*log(1-x0^2);
 38 | 
 39 | % step 1 & step 2
 40 | nnow = n; w = [];
 41 | %cnt = 0;
 42 | while(true)
 43 |    ntrial = max(round(nnow*1.2),nnow+10) ;
 44 |    Z = betarnd((m-1)/2,(m-1)/2,ntrial,1);
 45 |    U = rand(ntrial,1);
 46 |    W = (1-(1+b)*Z)./(1-(1-b)*Z);
 47 |    
 48 |    indicator = kappa*W + (m-1)*log(1-x0*W) - c >= log(U);
 49 |    if sum(indicator) >= nnow
 50 |       w1 = W(indicator);
 51 |       w = [w ;w1(1:nnow)];
 52 |       break;
 53 |    else
 54 |       w = [w ; W(indicator)];
 55 |       nnow = nnow-sum(indicator);
 56 |       %cnt = cnt+1;disp(['retrial' num2str(cnt) '.' num2str(sum(indicator))]);
 57 |    end
 58 | end
 59 | 
 60 | % step 3
 61 | V = UNIFORMdirections(m-1,n);
 62 | X = [repmat(sqrt(1-w'.^2),m-1,1).*V ;w'];
 63 | 
 64 | if muflag
 65 |    mu = mu / norm(mu);
 66 |    X = rotMat(mu)'*X;
 67 | end
 68 | end
 69 | 
 70 | 
 71 | function V = UNIFORMdirections(m,n)
 72 | % generate n uniformly distributed m dim'l random directions
 73 | % Using the logic: "directions of Normal distribution are uniform on sphere"
 74 | 
 75 | V = zeros(m,n);
 76 | nr = randn(m,n); %Normal random
 77 | for i=1:n
 78 |    while 1
 79 |       ni=nr(:,i)'*nr(:,i); % length of ith vector
 80 |       % exclude too small values to avoid numerical discretization
 81 |       if ni<1e-10
 82 |          % so repeat random generation
 83 |          nr(:,i)=randn(m,1);
 84 |       else
 85 |          V(:,i)=nr(:,i)/sqrt(ni);
 86 |          break;
 87 |       end
 88 |    end
 89 | end
 90 | 
 91 | end
 92 | 
 93 | function rot = rotMat(b,a,alpha)
 94 | % ROTMAT returns a rotation matrix that rotates unit vector b to a
 95 | %
 96 | %   rot = rotMat(b) returns a d x d rotation matrix that rotate
 97 | %   unit vector b to the north pole (0,0,...,0,1)
 98 | %
 99 | %   rot = rotMat(b,a ) returns a d x d rotation matrix that rotate
100 | %   unit vector b to a
101 | %
102 | %   rot = rotMat(b,a,alpha) returns a d x d rotation matrix that rotate
103 | %   unit vector b towards a by alpha (in radian)
104 | %
105 | %    See also .
106 | 
107 | % Last updated Nov 7, 2009
108 | % Sungkyu Jung
109 | 
110 | 
111 | [s1 s2]=size(b);
112 | d = max(s1,s2);
113 | b= b/norm(b);
114 | if min(s1,s2) ~= 1 || nargin==0 , help rotMat, return, end
115 | 
116 | if s1<=s2;    b = b'; end
117 | 
118 | if nargin == 1;
119 |    a = [zeros(d-1,1); 1];
120 |    alpha = acos(a'*b);
121 | end
122 | 
123 | if nargin == 2;
124 |    alpha = acos(a'*b);
125 | end
126 | if abs(a'*b - 1) < 1e-15; rot = eye(d); return, end
127 | if abs(a'*b + 1) < 1e-15; rot = -eye(d); return, end
128 | 
129 | c = b - a * (a'*b); c = c / norm(c);
130 | A = a*c' - c*a' ;
131 | 
132 | rot = eye(d) + sin(alpha)*A + (cos(alpha) - 1)*(a*a' +c*c');
133 | end
134 | 
135 | 


--------------------------------------------------------------------------------
/+sphere/jsn.m:
--------------------------------------------------------------------------------
  1 | % JSN                  John, Sugiura, Nagao test of high-deminsional sphericity
  2 | % 
  3 | %     [pval,stat] = jsn(x,varargin)
  4 | %
  5 | %     Given a sample X1,...,Xn from a p-dimensional multivariate distribution,
  6 | %     test the hypothesis:
  7 | %
  8 | %     H0 : Covariance matrix of sample is proportional to the identity
  9 | %
 10 | %     This test is the locally most powerful invariant test for sphericity,
 11 | %     is n-consistent, and remains valid even when n and p grow together
 12 | %     (method='john' or 'nagao'). Moreover, the n,p-consistent variant
 13 | %     (method = 'wang') only requires the existence of fourth moments.
 14 | %
 15 | %     INPUTS
 16 | %     x    - [n x p] matrix, n samples with dimensionality p
 17 | %
 18 | %     OPTIONAL (name/value pairs)
 19 | %     test - 'john' - fixed p, n goes to infinity (DEFAULT)
 20 | %            'nagao' - Box-Bartlett like refinements to asymptotic dist
 21 | %            'wang' - p,n -> inf, p/n -> y>0, universal
 22 | %
 23 | %     OUTPUTS
 24 | %     pval - p-value
 25 | %     stat - statistic
 26 | %
 27 | %     REFERENCE
 28 | %     Ledoit & Wolf (2002). Some hypothesis tests for the covariance matrix
 29 | %       when the dimension is large compared to the sample size. Annals of 
 30 | %       Statistics 30: 1081-1102   
 31 | %     Wang, Q and Yao J (2013). On the sphericity test with large-dimensional
 32 | %       observations. Electronic Journal of Statistics 7: 2164-2192
 33 | %
 34 | %     SEE ALSO
 35 | %     DepTest1
 36 | 
 37 | %     $ Copyright (C) 2014 Brian Lau http://www.subcortex.net/ $
 38 | %     The full license and most recent version of the code can be found at:
 39 | %     https://github.com/brian-lau/highdim
 40 | %
 41 | %     This program is free software: you can redistribute it and/or modify
 42 | %     it under the terms of the GNU General Public License as published by
 43 | %     the Free Software Foundation, either version 3 of the License, or
 44 | %     (at your option) any later version.
 45 | % 
 46 | %     This program is distributed in the hope that it will be useful,
 47 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
 48 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 49 | %     GNU General Public License for more details.
 50 | 
 51 | function [pval,stat] = jsn(x,varargin)
 52 | 
 53 | par = inputParser;
 54 | par.KeepUnmatched = true;
 55 | addRequired(par,'x',@isnumeric);
 56 | addParamValue(par,'test','john',@ischar);
 57 | parse(par,x,varargin{:});
 58 | 
 59 | [n,p] = size(x);
 60 | 
 61 | % Ledoit & Wolf (2002)
 62 | S = cov(x,0);
 63 | U = (1/p)*trace((S/((1/p)*trace(S)) - eye(p))^2);
 64 | T2 = (n-1)*p/2*U;
 65 | 
 66 | switch lower(par.Results.test)
 67 |    case {'john','j'}
 68 |       f = 0.5*p*(p+1) - 1;
 69 |       pval = 1 - chi2cdf(T2,f);
 70 |       stat = T2;
 71 |    case {'nagao','n'}
 72 |       f = 0.5*p*(p+1) - 1;
 73 |       % From Nagao (1973) theorem 5.1
 74 |       ap = (1/12)*(p^3+3*p^2-8*p-12-200/p);
 75 |       bp = (1/8)*(-2*p^3-5*p^2+7*p+12+420/p);
 76 |       cp = (1/4)*(p^3+2*p^2-p-2-216/p);
 77 |       dp = (1/24)*(-2*p^3-3*p^2+p+436/p);
 78 |       
 79 |       Pf = chi2cdf(T2,f);
 80 |       Pf2 = chi2cdf(T2,f+2);
 81 |       Pf4 = chi2cdf(T2,f+4);
 82 |       Pf6 = chi2cdf(T2,f+6);
 83 |       P = Pf + (1/n)*(ap*Pf6 + bp*Pf4 + cp*Pf2 + dp*Pf);
 84 |       % Truncate negative p-values
 85 |       pval = max(0,1 - P);
 86 |       stat = T2;
 87 |    case {'wang','w'}
 88 |       % Wang & Yao (2013), theorem 2.2
 89 |       N = n-1;
 90 |       if all(isreal(x))
 91 |          k = 2;
 92 |       else
 93 |          k = 1;
 94 |       end
 95 |       b = (1/(N*p)) * sum(abs(x(:)).^4) - k - 1;
 96 |       stat = N*U-p;
 97 |       pval = 1 - normcdf(stat,k+b-1,sqrt(2*k));
 98 |    otherwise
 99 |       error('Unknown method');
100 | end
101 | 
102 | %% Various equivalent definitions of T2
103 | % % John (1972)
104 | % U = (trace(S^2)) / (trace(S))^2;
105 | % T = (p*U-1)/(p-1);
106 | % T2 = (0.5*n*p)*(p-1)*T;
107 | % % Wang & Yao (2013)
108 | % [~,D] = eig(S);
109 | % l = diag(D);
110 | % lbar = mean(l);
111 | % T2 = ((n*p)/2) * (sum((l-lbar).^2)/p) / lbar^2;
112 | % % Nagao (1973) 3.6
113 | % T2 = ((p^2*n)/2) * trace((S./trace(S) - eye(p)./p)^2);
114 | 


--------------------------------------------------------------------------------
/+diff/kstest2d.m:
--------------------------------------------------------------------------------
  1 | % kstest2d                   Two-dimensional, 2-sample Kolmorogov-Smirnov test
  2 | % 
  3 | %     [p,D] = kstest2d(s1,s2);
  4 | %
  5 | %     Compare two, 2-dimensional distributions using Fasano & Franceschini's
  6 | %     generalization of the KS-test.
  7 | %
  8 | %     The analytic distribution of the statistic is unknown, and p-values
  9 | %     are estimated using an approximation (Press et al., 1992) to FF's Monte
 10 | %     Carlo simulations.
 11 | %
 12 | %     INPUTS
 13 | %     s1 - [n1 x 2] matrix
 14 | %     s2 - [n2 x 2] matrix
 15 | %
 16 | %     OUTPUTS
 17 | %     p  - approximate p-value
 18 | %     D  - K-S statistic
 19 | %
 20 | %     REFERENCE
 21 | %     Fasano, G, Franceschini, A (1987) A multidimensional version of the
 22 | %       Kolmorogov-Smirnov test. Mon Not R astr Soc 225: 155-170
 23 | %     Press et al (1992). Numerical Recipes in C, section 14.7
 24 | %
 25 | %     SEE ALSO
 26 | %     minentest, hotell2, DepTest2
 27 | 
 28 | %     $ Copyright (C) 2014 Brian Lau http://www.subcortex.net/ $
 29 | %     The full license and most recent version of the code can be found on GitHub:
 30 | %     https://github.com/brian-lau/highdim
 31 | %
 32 | %     This program is free software: you can redistribute it and/or modify
 33 | %     it under the terms of the GNU General Public License as published by
 34 | %     the Free Software Foundation, either version 3 of the License, or
 35 | %     (at your option) any later version.
 36 | % 
 37 | %     This program is distributed in the hope that it will be useful,
 38 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
 39 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 40 | %     GNU General Public License for more details.
 41 | %
 42 | %     REVISION HISTORY:
 43 | %     brian 03.14.06 written
 44 | %     brian 08.23.11 added flag to assign point to quadrant that maximizes D
 45 | %                    http://www.nr.com/forum/showthread.php?t=576
 46 | 
 47 | function [p,D] = kstest2d(s1,s2)
 48 | 
 49 | assign_point = false; % Set true to assign center point to maximizing quadrant
 50 |                       % Leave this false if you want FF's original procedure
 51 | 
 52 | [n1,m1] = size(s1);
 53 | [n2,m2] = size(s2);
 54 | 
 55 | if ~all([m1,m2]==2)
 56 |    error('# of columns in X and Y must equal 2');
 57 | end
 58 | 
 59 | D = zeros(n1+n2,4);
 60 | count = 0;
 61 | for i = 1:n1
 62 |    count = count + 1;
 63 |    [a1,b1,c1,d1] = quadcnt(s1(i,1),s1(i,2),s1,n1-1);
 64 |    [a2,b2,c2,d2] = quadcnt(s1(i,1),s1(i,2),s2,n2);
 65 | 
 66 |    temp = abs([a1-a2 , b1-b2 , c1-c2 , d1-d2]);
 67 |    if assign_point
 68 |       % Assign point to quadrant where it maximizes difference
 69 |       ind = find(max(temp));
 70 |       if length(ind) >= 1
 71 |          ind = ind(1); % take first maximum
 72 |          temp(ind) = temp(ind) + 1/length(s1);
 73 |       end
 74 |    end
 75 |    D(count,:) = temp;
 76 | end
 77 | for i = 1:n2
 78 |    count = count + 1;
 79 |    [a1,b1,c1,d1] = quadcnt(s2(i,1),s2(i,2),s1,n1);
 80 |    [a2,b2,c2,d2] = quadcnt(s2(i,1),s2(i,2),s2,n2-1);
 81 |  
 82 |    temp = abs([a1-a2 , b1-b2 , c1-c2 , d1-d2]);
 83 |    if assign_point
 84 |       % Assign point to quadrant where it maximizes difference
 85 |       ind = find(max(temp));
 86 |       if length(ind) >= 1
 87 |          ind = ind(1); % take first maximum
 88 |          temp(ind) = temp(ind) + 1/length(s2);
 89 |       end
 90 |    end
 91 |    D(count,:) = temp;
 92 | end
 93 | 
 94 | D = max(max(D));
 95 | 
 96 | % Average correlation coefficients
 97 | r1 = corrcoef(s1); r1 = r1(1,2);
 98 | r2 = corrcoef(s2); r2 = r2(1,2);
 99 | rr = 0.5*(r1*r1 + r2*r2);
100 | 
101 | p = probks(n1,n2,D,rr);
102 | 
103 | %----- Count fractions of points in s in quadrants defined around point (x,y).
104 | % s is a nx2 matrix
105 | %
106 | % a|b
107 | %-----
108 | % c|d
109 | %
110 | % Currently, the point x,y is not counted in any fraction
111 | function [a,b,c,d] = quadcnt(x,y,s,d)
112 | 
113 | slx = s(:,1)<x;
114 | sgx = s(:,1)>x;
115 | sly = s(:,2)<y;
116 | sgy = s(:,2)>y;
117 | 
118 | inda = slx & sgy;
119 | indb = sgx & sgy;
120 | indc = slx & sly;
121 | indd = sgx & sly;
122 | 
123 | a = sum(inda)/d;
124 | b = sum(indb)/d;
125 | c = sum(indc)/d;
126 | d = sum(indd)/d;
127 | 
128 | %----- Asymptotic Q-function to approximate the 2-sided P-value
129 | function p = probks(n1,n2,D,rr)
130 | 
131 | % Numerical Recipes in C, section 14.7
132 | N = (n1*n2)/(n1+n2);
133 | lambda = (sqrt(N)*D) / (1 + sqrt(1 - rr)*(.25 - .75/sqrt(N)));
134 | 
135 | j = (1:101)';
136 | p = 2 * sum((-1).^(j-1).*exp(-2*lambda*lambda*j.^2));
137 | p = min(max(p,0),1);
138 | 
139 | 
140 | 


--------------------------------------------------------------------------------
/+dep/dcov.m:
--------------------------------------------------------------------------------
  1 | % DCOV                        Distance covariance
  2 | % 
  3 | %     [d,dvx,dvy,A,B] = dcov(x,y,varargin)
  4 | %
  5 | %     INPUTS
  6 | %     x - [n x p] n samples of dimensionality p
  7 | %     y - [n x q] n samples of dimensionality q
  8 | %
  9 | %     OPTIONAL (as name/value pairs, order irrelevant)
 10 | %     unbiased - true indicates bias-corrected estimate (default=false)
 11 | %     index    - scalar in (0,2], exponent on Euclidean distance, default = 1
 12 | %     dist     - true indicates x & y are distance matrices (default=false)
 13 | %     doublecenter - true indicates x & y are double-centered distance 
 14 | %                matrices (default=false)
 15 | %
 16 | %     OUTPUTS
 17 | %     d   - distance covariance between x & y
 18 | %     dvx - x sample distance variance
 19 | %     dvy - y sample distance variance
 20 | %     A   - double-centered or U-centered distance matrix for x
 21 | %     B   - double-centered or U-centered distance matrix for y
 22 | %
 23 | %     EXAMPLE
 24 | %     rng(1234)
 25 | %     n = 1000; p = 50; q = p;
 26 | %     x = rand(n,p);
 27 | %     y = x.^2;
 28 | %     d = dep.dcov(x,y)
 29 | %
 30 | %     % Equivalence between distance covariance (squared) & HSIC
 31 | %     h = dep.hsic(x,y,'kernel','brownian');
 32 | %     [4*h d^2]
 33 | %
 34 | %     REFERENCE
 35 | %     Szekely et al (2007). Measuring and testing independence by correlation 
 36 | %       of distances. Ann Statist 35: 2769-2794
 37 | %     Szekely & Rizzo (2013). The distance correlation t-test of independence 
 38 | %       in high dimension. J Multiv Analysis 117: 193-213
 39 | %
 40 | %     SEE ALSO
 41 | %     dcovtest, dcorr, dcorrtest, rpdcov, fdcov
 42 | 
 43 | %     $ Copyright (C) 2017 Brian Lau, brian.lau@upmc.fr $
 44 | %     The full license and most recent version of the code can be found at:
 45 | %     https://github.com/brian-lau/highdim
 46 | %
 47 | %     This program is free software: you can redistribute it and/or modify
 48 | %     it under the terms of the GNU General Public License as published by
 49 | %     the Free Software Foundation, either version 3 of the License, or
 50 | %     (at your option) any later version.
 51 | % 
 52 | %     This program is distributed in the hope that it will be useful,
 53 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
 54 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 55 | %     GNU General Public License for more details.
 56 | 
 57 | function [d,dvx,dvy,A,B] = dcov(x,y,varargin)
 58 | 
 59 | par = inputParser;
 60 | par.KeepUnmatched = true;
 61 | par.PartialMatching = false;
 62 | addRequired(par,'x',@isnumeric);
 63 | addRequired(par,'y',@isnumeric);
 64 | addParamValue(par,'approx','none',@ischar);
 65 | addParamValue(par,'unbiased',false,@isscalar);
 66 | addParamValue(par,'index',1,@(x) isscalar(x) && (x>0) && (x<=2));
 67 | addParamValue(par,'dist',false,@isscalar);
 68 | addParamValue(par,'doublecenter',false,@isscalar);
 69 | parse(par,x,y,varargin{:});
 70 | 
 71 | [n,~] = size(x);
 72 | assert(n == size(y,1),'DCOV requires x and y to have the same # of samples');
 73 | 
 74 | if par.Results.doublecenter
 75 |    % Inputs are already double-centered distance matrices
 76 |    A = x;
 77 |    B = y;
 78 | else
 79 |    if par.Results.dist
 80 |       % Inputs are euclidean distance matrices
 81 |       a = x;
 82 |       b = y;
 83 |    elseif strcmp(par.Results.approx,'nystrom')
 84 |       % Looks like A&B are scaled versions of K&L
 85 |       % utils.dcenter(L)*2+B
 86 |       [h,K,L] = dep.hsic(x,y,'approx','nys','kernel','brownian',par.Unmatched);
 87 |       d = sqrt(4*h);
 88 |       if nargout > 1
 89 |          A = -2*utils.dcenter(K*K');
 90 |          B = -2*utils.dcenter(L*L');
 91 |          dvx = sqrt(sum(sum(A.*A))/n^2);
 92 |          dvy = sqrt(sum(sum(B.*B))/n^2);
 93 |       end
 94 |       return;
 95 | %    elseif any(strcmp(par.Results.approx,{'rp' 'randomproj'}))
 96 | %       
 97 |    else
 98 |       % Distance matrices
 99 |       a = sqrt(utils.sqdist(x,x));
100 |       b = sqrt(utils.sqdist(y,y));
101 |    end
102 |    
103 |    if par.Results.index ~= 1
104 |       a = a.^par.Results.index;
105 |       b = b.^par.Results.index;
106 |    end
107 | end
108 | 
109 | if par.Results.unbiased
110 |    A = utils.ucenter(a);
111 |    B = utils.ucenter(b);
112 |    
113 |    d = sum(sum(A.*B))/(n*(n-3));
114 |    if nargout > 1
115 |       dvx = sum(sum(A.*A))/(n*(n-3));
116 |       dvy = sum(sum(B.*B))/(n*(n-3));
117 |    end
118 | else
119 |    A = utils.dcenter(a);
120 |    B = utils.dcenter(b);
121 |    
122 |    d = sqrt(sum(sum(A.*B))/n^2);
123 |    if nargout > 1
124 |       dvx = sqrt(sum(sum(A.*A))/n^2);
125 |       dvy = sqrt(sum(sum(B.*B))/n^2);
126 |    end
127 | end


--------------------------------------------------------------------------------
/+utils/approxmtimes.m:
--------------------------------------------------------------------------------
  1 | % APPROXMTIMES                Approximate matrix multiplication
  2 | % 
  3 | %     AB = approxmtimes(A,B,c,method,uni)
  4 | %
  5 | %     Given matrices A [m x n] and B [n x p], approximates the product A*B
  6 | %     with a sum of rank-one matrices by selecting c columns (rows) of A (B)
  7 | %      
  8 | %                A*B \approx \sum_{c in C} A(:,c)*B(c,:)
  9 | %
 10 | %     Two algorithms are available, one using randomized selection (Drineas
 11 | %     et al) and the other using greedy deterministic selection (Belabbas &
 12 | %     Wolfe).
 13 | %     
 14 | %     Complexity:
 15 | %     sampling - O(c(m+n+p))
 16 | %     greedy   - O(m(n+c^2) + c^3)
 17 | %
 18 | %     INPUTS
 19 | %     A - [m x n] matrix
 20 | %     B - [n x p] matrix
 21 | %     c - scalar < n, approximant rank
 22 | %
 23 | %     OPTIONAL
 24 | %     method - string indicating approximation algorithm (default = 'sampling')
 25 | %            'sampling' - monte-carlo column-row selections using either
 26 | %                         uniform probabilities or probabilities that 
 27 | %                         minimize expected normwise absolute error
 28 | %            'greedy'   - deterministic approximation to optimal subset
 29 | %     uni    - boolean indicating uniform sampling (default = false)
 30 | %              only applies for method = 'random'
 31 | %
 32 | %     OUTPUTS
 33 | %     AB - approximation of A*B
 34 | %
 35 | %     EXAMPLE
 36 | %     import utils.* 
 37 | %     rng(1);
 38 | %     m = 3000;
 39 | %     n = m;
 40 | %     A = [randn(m/2,n) ; rand(m/2,n)*20];
 41 | %     B = [rand(m/2,n)*20 ; randn(m/2,n)];
 42 | % 
 43 | %     tic; AB = A*B; toc
 44 | %     tic; AB1 = approxmtimes(A,B,25); toc
 45 | %     tic; AB2 = approxmtimes(A,B,25,'greedy'); toc
 46 | % 
 47 | %     norm(AB1-AB,'fro')^2/norm(AB,'fro')^2
 48 | %     norm(AB2-AB,'fro')^2/norm(AB,'fro')^2
 49 | %
 50 | %     REFERENCE
 51 | %     Drineas et al. (2006). Fast Monte Carlo algorithms for matrices I: 
 52 | %       Approximating matrix multiplication. SIAM Journal on Computing, 
 53 | %       36, 132-157
 54 | %     Belabbas & Wolfe (2008). On sparse representations of linear operators 
 55 | %       and the approximation of matrix products. In Information Sciences 
 56 | %       and Systems. CISS 2008, 258-263
 57 | 
 58 | %     $ Copyright (C) 2017 Brian Lau, brian.lau@upmc.fr $
 59 | %     The full license and most recent version of the code can be found at:
 60 | %     https://github.com/brian-lau/highdim
 61 | %
 62 | %     This program is free software: you can redistribute it and/or modify
 63 | %     it under the terms of the GNU General Public License as published by
 64 | %     the Free Software Foundation, either version 3 of the License, or
 65 | %     (at your option) any later version.
 66 | % 
 67 | %     This program is distributed in the hope that it will be useful,
 68 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
 69 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 70 | %     GNU General Public License for more details.
 71 | 
 72 | % TODO
 73 | %  o B = A
 74 | %  o B = A'
 75 | %  o streaming (one-pass for uni with n known, two-pass for other cases)
 76 | %  o nargout == 2 should return C & R for all algs, AB \approx C*R
 77 | %  o faster randsample
 78 | 
 79 | function AB = approxmtimes(A,B,c,method,uni)
 80 | 
 81 | if nargin < 5
 82 |    uni = false;
 83 | end
 84 | 
 85 | if nargin < 4
 86 |    method = 'sampling';
 87 | end
 88 | 
 89 | [m,n] = size(A);
 90 | [n2,p] = size(B);
 91 | c = fix(c);
 92 | 
 93 | assert(n==n2,'Inner matrix dimensions must agree.');
 94 | assert(c>=1,'c must be >= 1.');
 95 | 
 96 | switch lower(method)
 97 |    case {'greedy'}
 98 |       A2 = A.^2;
 99 |       An = sum(A2)';
100 |       B2 = B.^2;
101 |       Bn = sum(B2,2);
102 |       [~,J] = sort(An.*Bn,'descend');
103 |       J = J(1:c);
104 |       
105 |       Q =  (A(:,J)'*A(:,J)) .* (B(J,:)*B(J,:)');
106 |       r = sum( (A'*A(:,J)) .* (B*B(J,:)') )';
107 |       w = Q\r;
108 |       
109 |       AB = A(:,J)*diag(w)*B(J,:);
110 |    case {'sampling'}
111 |       if uni
112 |          p_k = repmat(1/n,n,1);
113 |       else
114 |          % Probabilities that minimize expected normwise absolute error
115 |          A2 = A.^2;
116 |          An = sqrt(sum(A2))';
117 |          B2 = B.^2;
118 |          Bn = sqrt(sum(B2,2));
119 |          
120 |          p_k = An.*Bn;
121 |          p_k = p_k/sum(p_k);
122 |       end
123 |       
124 |       J = randsample(1:n,c,true,p_k);
125 |       
126 |       cp = sqrt(c*p_k(J));
127 |       C = bsxfun(@rdivide,A(:,J),cp');
128 |       R = bsxfun(@rdivide,B(J,:),cp);
129 |       
130 |       AB = C*R;
131 |    otherwise
132 |       error('Unrecognized method for approximate matrix multiplication');
133 | end


--------------------------------------------------------------------------------
/+diff/minentest.m:
--------------------------------------------------------------------------------
  1 | % MINENTEST                  N-dimensional, 2-sample comparison of 2 distributions
  2 | % 
  3 | %     [p,e_n,e_n_boot] = minentest(x,y,varargin)
  4 | %
  5 | %     Compares d-dimensional data from two samples using a measure based on
  6 | %     statistical energy. The test is non-parametric, does not require binning
  7 | %     and easily scales to arbitrary dimensions.
  8 | %
  9 | %     The analytic distribution of the statistic is unknown, and p-values
 10 | %     are estimated using a permutation procedure, which works well
 11 | %     according to simulations by Aslan & Zech.
 12 | %
 13 | %     INPUTS
 14 | %     x     - [n1 x d] matrix
 15 | %     y     - [n2 x d] matrix
 16 | %
 17 | %     OPTIONAL (name/value pairs)
 18 | %     flag  - 'sr', Szekely & Rizzo energy statistic 
 19 | %             'az', Aslan & Zech energy statistic (default)
 20 | %     nboot - # of bootstrap resamples (default = 1000)
 21 | %     replace - boolean for sampling with replacement (default = false)
 22 | %
 23 | %     OUTPUTS
 24 | %     p    - p-value by permutation
 25 | %     e_n  - minimum energy statistic
 26 | %     e_n_boot - bootstrap samples
 27 | %
 28 | %     REFERENCE
 29 | %     Aslan, B, Zech, G (2005) Statistical energy as a tool for binning-free
 30 | %       multivariate goodness-of-fit tests, two-sample comparison and unfolding.
 31 | %       Nuc Instr and Meth in Phys Res A 537: 626-636
 32 | %     Szekely, G, Rizzo, M (2014) Energy statistics: A class of statistics
 33 | %       based on distances. J Stat Planning & Infer 143: 1249-1272
 34 | %
 35 | %     SEE ALSO
 36 | %     kstest2d, hotell2, DepTest2
 37 | 
 38 | %     $ Copyright (C) 2014 Brian Lau http://www.subcortex.net/ $
 39 | %     The full license and most recent version of the code can be found on GitHub:
 40 | %     https://github.com/brian-lau/highdim
 41 | %
 42 | %     This program is free software: you can redistribute it and/or modify
 43 | %     it under the terms of the GNU General Public License as published by
 44 | %     the Free Software Foundation, either version 3 of the License, or
 45 | %     (at your option) any later version.
 46 | % 
 47 | %     This program is distributed in the hope that it will be useful,
 48 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
 49 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 50 | %     GNU General Public License for more details.
 51 | %
 52 | %     REVISION HISTORY:
 53 | %     brian 08.25.11 written
 54 | 
 55 | % TODO 
 56 | %  o calculate distance matrix once and cache, permute index
 57 | %    attempted once, https://github.com/brian-lau/multdist/commit/ae58496848464cea50fe134ab6f1e2f929632c88
 58 | %  o k-sample version
 59 | %  o incomplete V-statistic
 60 | 
 61 | 
 62 | function [p,e_n,e_n_boot] = minentest(x,y,varargin)
 63 | 
 64 | par = inputParser;
 65 | par.KeepUnmatched = true;
 66 | addRequired(par,'x',@isnumeric);
 67 | addRequired(par,'y',@isnumeric);
 68 | addParamValue(par,'flag','sr',@ischar);
 69 | addParamValue(par,'nboot',1000,@(x) isscalar(x)&&isnumeric(x));
 70 | addParamValue(par,'replace',false,@(x) islogical(x)||isnumeric(x));
 71 | parse(par,x,y,varargin{:});
 72 | 
 73 | [n,ny] = size(x);
 74 | [m,my] = size(y);
 75 | 
 76 | if ny ~= my
 77 |    error('# of columns in X and Y must match');
 78 | end
 79 | 
 80 | pooled = [x ; y];
 81 | 
 82 | flag = par.Results.flag;
 83 | nboot = par.Results.nboot;
 84 | replace = par.Results.replace;
 85 | e_n = energy(x,y,flag);
 86 | e_n_boot = zeros(nboot,1);
 87 | e_n_boot(1) = e_n;
 88 | for i = 2:nboot
 89 |    if replace
 90 |       ind = unidrnd(n+m,1,n+m);
 91 |    else
 92 |       ind = randperm(n+m);
 93 |    end
 94 |    e_n_boot(i) = energy(pooled(ind(1:n),:),pooled(ind(n+1:end),:),flag);
 95 | end
 96 | 
 97 | p = sum(e_n_boot>=e_n)./nboot;
 98 | 
 99 | function [dx,dy,dxy] = dist(x,y)
100 | dx = pdist(x,'euclidean');
101 | dy = pdist(y,'euclidean');
102 | dxy = pdist2(x,y,'euclidean');
103 | 
104 | function z = energy(x,y,flag)
105 | % FIXME, equal samples will generate infinite values, will produce
106 | % unreliable results, more of a problem for discrete data.
107 | n = size(x,1);
108 | m = size(y,1);
109 | [dx,dy,dxy] = dist(x,y);
110 | switch flag
111 |    case 'az'
112 |       % Aslan & Zech definition of energy statistic 
113 |       z = (1/(n*(n-1)))*sum(-log(dx)) + (1/(m*(m-1)))*sum(-log(dy))...
114 |          - (1/(n*m))*sum(-log(dxy(:)));
115 |    case 'sr'
116 |       % Szekely & Rizzo definition of energy statistic
117 |       % Verified against their R package 'energy'
118 |       % in R:
119 |       %   data(iris)
120 |       %   eqdist.etest(iris[,1:4], c(75,75), R = 199)
121 |       %   E-statistic = 126.0453, p-value = 0.005
122 |       % in Matlab:
123 |       %   load fisheriris;
124 |       %   [p,en] = minentest(meas(1:75,:),meas(76:end,:),'sr',200)
125 |       z = (2/(n*m))*sum(dxy(:)) - (1/(n^2))*sum(2*dx) - (1/(m^2))*sum(2*dy);
126 |       z = ((n*m)/(n+m)) * z;
127 |    otherwise
128 |       error('Bad FLAG');
129 | end


--------------------------------------------------------------------------------
/Testing/test_sphericity4.m:
--------------------------------------------------------------------------------
  1 | %% Compare JNS to simulations of size and power in
  2 | % Wang, Q and Yao J (2013). On the sphericity test with large-dimensional
  3 | %   observations. Electronic Journal of Statistics 7: 2164-2192
  4 | 
  5 | %% Table 2
  6 | clear all;
  7 | n = [64 128 256 512];
  8 | p{1} = [4  8  16  32  48  56  60];
  9 | p{2} = [8  16 32  64  96  112 120];
 10 | p{3} = [16 32 64  128 192 224 240];
 11 | p{4} = [32 64 128 256 384 448 480];
 12 | 
 13 | reps = 20;
 14 | tic;
 15 | for i = 1:numel(n)
 16 |    for j = 1:numel(p{i})
 17 |       for k = 1:reps
 18 |          x = gamrnd(4,1/2,n(i),p{i}(j))-2;
 19 |          %x = randn(n(i),p{i}(j));
 20 |          pval(k) = sphere.jsn(x,'test','wang');
 21 |       end
 22 |       prob(i,j) = mean(pval<=0.05);
 23 |    end
 24 |    toc
 25 | end
 26 | 
 27 | pN = [... % Normal(0,1)
 28 | 0.0498 0.0545 0.0539 0.0558 0.0551 0.0547 0.0523;...
 29 | 0.0539 0.0523 0.051  0.0538 0.055  0.0543 0.0545;...
 30 | 0.0544 0.0534 0.0519 0.0507 0.0507 0.0503 0.0494;...
 31 | 0.0542 0.0512 0.0519 0.0491 0.0487 0.0496 0.0488]
 32 | 
 33 | % Normal, reps = 2000, method = 'wang'
 34 | % prob =
 35 | % 
 36 | %     0.0585    0.0645    0.0570    0.0525    0.0485    0.0520    0.0565
 37 | %     0.0625    0.0620    0.0605    0.0570    0.0510    0.0480    0.0525
 38 | %     0.0570    0.0565    0.0505    0.0550    0.0465    0.0550    0.0545
 39 | %     0.0555    0.0540    0.0490    0.0420    0.0510    0.0490    0.0435
 40 | 
 41 | % Normal, reps = 2000, method = 'wang'
 42 | % prob =
 43 | % 
 44 | %     0.0565    0.0600    0.0495    0.0445    0.0590    0.0515    0.0605
 45 | %     0.0550    0.0575    0.0545    0.0525    0.0470    0.0505    0.0510
 46 | %     0.0605    0.0485    0.0540    0.0555    0.0515    0.0545    0.0395
 47 | %     0.0530    0.0555    0.0480    0.0485    0.0445    0.0460    0.0525
 48 | 
 49 | pG = [... % Gamma(4,2)-2, note parametrization in matlab = gamrnd(4,1/2)
 50 | 0.0698 0.0804 0.078  0.0703 0.0685 0.0615 0.0615;...
 51 | 0.075  0.0724 0.0695 0.0603 0.0577 0.0591 0.0598;...
 52 | 0.0719 0.0634 0.0598 0.0555 0.052  0.0541 0.0533;...
 53 | 0.0606 0.0579 0.0507 0.0495 0.0502 0.0482 0.053]
 54 | 
 55 | % Gamma, reps = 2000, method = 'wang'
 56 | % prob =
 57 | % 
 58 | %     0.0650    0.0590    0.0650    0.0620    0.0580    0.0565    0.0615
 59 | %     0.0675    0.0595    0.0680    0.0525    0.0495    0.0580    0.0525
 60 | %     0.0645    0.0580    0.0720    0.0495    0.0505    0.0410    0.0540
 61 | %     0.0550    0.0575    0.0490    0.0560    0.0535    0.0595    0.0420
 62 | 
 63 | % Gamma, reps = 2000, method = 'wang'
 64 | % prob =
 65 | % 
 66 | %     0.0795    0.0660    0.0595    0.0590    0.0520    0.0530    0.0475
 67 | %     0.0715    0.0540    0.0645    0.0525    0.0540    0.0585    0.0550
 68 | %     0.0615    0.0620    0.0520    0.0610    0.0540    0.0535    0.0580
 69 | %     0.0505    0.0610    0.0530    0.0475    0.0520    0.0455    0.0490
 70 | 
 71 | %% Table 3
 72 | clear all;
 73 | n = [64 128];
 74 | p{1} = [4  8  16  32  48  56  60];
 75 | p{2} = [8  16 32  64  96  112 120];
 76 | 
 77 | reps = 250;
 78 | tic;
 79 | for i = 1:numel(n)
 80 |    for j = 1:numel(p{i})
 81 |       for k = 1:reps
 82 |          %x = gamrnd(4,1/2,n(i),p{i}(j))-2;
 83 |          x = randn(n(i),p{i}(j));
 84 |          v = round(p{i}(j)/2);
 85 |          sigma = [0.5*ones(v,1);ones(p{i}(j)-v,1)]';
 86 |          x = (diag(sqrt(sigma))*x')';
 87 |          pval(k) = sphere.jsn(x,'test','n');
 88 |       end
 89 |       prob(i,j) = mean(pval<=0.05);
 90 |    end
 91 |    toc
 92 | end
 93 | 
 94 | pN = [... % Normal, Power 1, CJ
 95 | 0.7754 0.8662 0.912 0.9384 0.9471 0.949 0.9501;...
 96 | 0.9984 0.9998 1 1 1 1 1]
 97 | 
 98 | % Normal, Power 1, reps = 2500, method = 'wang'
 99 | % prob =
100 | % 
101 | %     0.8640    0.9256    0.9664    0.9744    0.9840    0.9812    0.9820
102 | %     0.9996    1.0000    1.0000    1.0000    1.0000    1.0000    1.0000
103 | 
104 | % Normal, Power 1, reps = 2500, method = 'john'
105 | % prob =
106 | % 
107 | %     0.7348    0.8336    0.8876    0.9304    0.9284    0.9364    0.9392
108 | %     0.9980    1.0000    1.0000    1.0000    1.0000    1.0000    1.0000
109 | 
110 | % Normal, Power 1, reps = 2500, method = 'nagao'
111 | % prob =
112 | % 
113 | %     0.7516    0.8468    0.8876    0.9404    0.9336    0.9308    0.9392
114 | %     0.9992    1.0000    1.0000    1.0000    1.0000    1.0000    1.0000
115 | 
116 | pN = [... % Normal, Power 2, CJ
117 | 0.4694 0.5313 0.5732 0.5868 0.6035 0.6025 0.6048;
118 | 0.9424 0.9698 0.9781 0.9823 0.9824 0.9841 0.9844]
119 | 
120 | % Normal, Power 2, reps = 2500, method = 'wang'
121 | % prob =
122 | % 
123 | %     0.5400    0.5880    0.6460    0.6700    0.6660    0.6752    0.6900
124 | %     0.9720    0.9856    0.9844    0.9908    0.9904    0.9892    0.9920
125 | 
126 | % Normal, Power 2, reps = 2500, method = 'john'
127 | % prob =
128 | % 
129 | %     0.4412    0.4828    0.5360    0.5720    0.5732    0.5852    0.5696
130 | %     0.9444    0.9584    0.9724    0.9852    0.9836    0.9864    0.9836
131 | 
132 | % Normal, Power 2, reps = 2500, method = 'nagao'
133 | % prob =
134 | % 
135 | %     0.4596    0.5008    0.5260    0.5520    0.5788    0.5924    0.6016
136 | %     0.9384    0.9540    0.9756    0.9756    0.9836    0.9776    0.9820
137 | 


--------------------------------------------------------------------------------
/Testing/test_sphericity2.m:
--------------------------------------------------------------------------------
  1 | %% Compare jns.m to size & power from
  2 | % Ledoit & Wolf (2002). Some hypothesis tests for the covariance matrix
  3 | %   when the dimension is large compared to the sample size.
  4 | %   Annals of Statistics 30: 1081-1102
  5 | 
  6 | %% Table 1
  7 | clear all;
  8 | n = [4 8 16 32 64 128 256];
  9 | p = [4 8 16 32 64 128 256];
 10 | reps = 200;
 11 | tic;
 12 | for i = 1:numel(p)
 13 |    for j = 1:numel(n)
 14 |       for k = 1:reps
 15 |          x = randn(n(j),p(i));
 16 |          pval(k) = sphere.jsn(x,'test','wang');
 17 |       end
 18 |       prob(i,j) = mean(pval<=0.05);
 19 |    end
 20 |    toc
 21 | end
 22 | 
 23 | % Ledoit & Wolf (method is John's test)
 24 | pL = [...
 25 | 0.01 0.03 0.04 0.05 0.05 0.05 0.05;...
 26 | 0.03 0.04 0.04 0.05 0.05 0.05 0.05;...
 27 | 0.04 0.05 0.05 0.05 0.05 0.05 0.05;...
 28 | 0.05 0.05 0.05 0.05 0.05 0.05 0.05;...
 29 | 0.05 0.05 0.05 0.05 0.05 0.05 0.05;...
 30 | 0.05 0.05 0.05 0.05 0.05 0.05 0.05;...
 31 | 0.05 0.05 0.05 0.05 0.05 0.05 0.05];
 32 | 
 33 | % method = 'john', reps = 2000; 25.11.14
 34 | % prob =
 35 | % 
 36 | %     0.0010    0.0230    0.0400    0.0500    0.0415    0.0505    0.0535
 37 | %     0.0220    0.0370    0.0435    0.0485    0.0520    0.0490    0.0635
 38 | %     0.0335    0.0415    0.0460    0.0515    0.0605    0.0460    0.0470
 39 | %     0.0445    0.0505    0.0490    0.0460    0.0505    0.0505    0.0460
 40 | %     0.0470    0.0515    0.0560    0.0535    0.0530    0.0480    0.0500
 41 | %     0.0565    0.0545    0.0505    0.0515    0.0520    0.0515    0.0605
 42 | %     0.0580    0.0510    0.0530    0.0530    0.0575    0.0550    0.0490
 43 | 
 44 | % method = 'nagao', reps = 2000; 25.11.14
 45 | % prob =
 46 | % 
 47 | %     0.0420    0.0400    0.0540    0.0485    0.0495    0.0455    0.0510
 48 | %     0.0720    0.0585    0.0540    0.0490    0.0475    0.0455    0.0550
 49 | %     0.0310    0.0510    0.0485    0.0410    0.0555    0.0530    0.0530
 50 | %     0.0280    0.0430    0.0430    0.0510    0.0475    0.0455    0.0505
 51 | %     0.0315    0.0475    0.0480    0.0485    0.0530    0.0450    0.0495
 52 | %     0.0370    0.0430    0.0410    0.0475    0.0565    0.0520    0.0535
 53 | %     0.0355    0.0415    0.0450    0.0495    0.0435    0.0525    0.0370
 54 | 
 55 | % method = 'wang', reps = 2000; 25.11.14
 56 | % prob =
 57 | % 
 58 | %     0.0270    0.0285    0.0445    0.0495    0.0485    0.0515    0.0620
 59 | %     0.0505    0.0595    0.0380    0.0495    0.0485    0.0575    0.0535
 60 | %     0.0495    0.0750    0.0635    0.0490    0.0695    0.0555    0.0570
 61 | %     0.0500    0.0780    0.0965    0.0780    0.0575    0.0585    0.0550
 62 | %     0.0475    0.0900    0.1030    0.1090    0.0725    0.0485    0.0480
 63 | %     0.0525    0.0860    0.1030    0.1125    0.1215    0.0845    0.0455
 64 | %     0.0585    0.0795    0.1070    0.1135    0.1250    0.1140    0.0885
 65 | 
 66 | %% Table 2
 67 | clear all;
 68 | n = [4 8 16 32 64 128 256];
 69 | p = [4 8 16 32 64 128 256];
 70 | reps = 200;
 71 | tic;
 72 | for i = 1:numel(p)
 73 |    for j = 1:numel(n)
 74 |       for k = 1:reps
 75 |          sigma = [0.5*ones(round(p(i)/2),1);ones(p(i)-round(p(i)/2),1)]';
 76 |          x = (diag(sqrt(sigma))*randn(n(j),p(i))')';
 77 |          pval(k) = sphere.jsn(x,'test','w');
 78 |       end
 79 |       prob(i,j) = mean(pval<=0.05);
 80 |    end
 81 |    toc
 82 | end
 83 | 
 84 | % Ledoit & Wolf (method is John's test)
 85 | pL = [...
 86 | 0.02 0.06 0.15 0.37 0.76 0.98 1;...
 87 | 0.05 0.09 0.18 0.42 0.85 1.00 1;...
 88 | 0.06 0.11 0.20 0.48 0.90 1.00 1;...
 89 | 0.08 0.13 0.22 0.50 0.93 1.00 1;...
 90 | 0.09 0.13 0.24 0.52 0.95 1.00 1;...
 91 | 0.09 0.14 0.23 0.53 0.95 1.00 1;...
 92 | 0.09 0.14 0.24 0.54 0.96 1.00 1];
 93 | 
 94 | % method = 'john', reps = 2000; 25.11.14
 95 | % prob =
 96 | % 
 97 | %     0.0010    0.0505    0.1310    0.3425    0.7420    0.9880    1.0000
 98 | %     0.0360    0.0830    0.1785    0.4155    0.8385    0.9980    1.0000
 99 | %     0.0535    0.0935    0.1960    0.4605    0.9045    1.0000    1.0000
100 | %     0.0585    0.1175    0.2025    0.4970    0.9225    1.0000    1.0000
101 | %     0.0755    0.1145    0.2115    0.5000    0.9390    1.0000    1.0000
102 | %     0.0750    0.1165    0.2285    0.5185    0.9445    1.0000    1.0000
103 | %     0.0890    0.1205    0.2375    0.5235    0.9625    1.0000    1.0000
104 | 
105 | % method = 'nagao', reps = 2000; 25.11.14
106 | % prob =
107 | % 
108 | %     0.0555    0.1050    0.1780    0.3995    0.7700    0.9795    1.0000
109 | %     0.0815    0.0905    0.1950    0.4260    0.8470    0.9990    1.0000
110 | %     0.0570    0.1005    0.1995    0.4605    0.9005    1.0000    1.0000
111 | %     0.0435    0.1065    0.2150    0.4700    0.9335    1.0000    1.0000
112 | %     0.0475    0.1035    0.2085    0.4815    0.9370    1.0000    1.0000
113 | %     0.0535    0.1110    0.2175    0.5090    0.9490    1.0000    1.0000
114 | %     0.0560    0.0995    0.1855    0.4890    0.9525    1.0000    1.0000
115 | 
116 | % method = 'wang', reps = 2000; 25.11.14
117 | % prob =
118 | % 
119 | %     0.0580    0.1025    0.2190    0.5230    0.8735    0.9945    1.0000
120 | %     0.1220    0.1930    0.2915    0.5815    0.9265    1.0000    1.0000
121 | %     0.1375    0.2855    0.4100    0.6500    0.9625    1.0000    1.0000
122 | %     0.1570    0.3140    0.4950    0.7425    0.9810    1.0000    1.0000
123 | %     0.1565    0.3085    0.5345    0.8165    0.9900    1.0000    1.0000
124 | %     0.1775    0.3290    0.5225    0.8010    0.9970    1.0000    1.0000
125 | %     0.1560    0.3235    0.5290    0.8190    0.9965    1.0000    1.0000
126 | % 
127 | 


--------------------------------------------------------------------------------
/Testing/Test_rv.m:
--------------------------------------------------------------------------------
 1 | % xUnit framework required
 2 | % https://psexton.github.io/matlab-xunit/
 3 | 
 4 | % R package example, uses Pearson III approx for p-value
 5 | % library(FactoMineR)
 6 | % data(wine)
 7 | % X <- wine[,3:7]
 8 | % Y <- wine[,11:20]
 9 | % coeffRV(X,Y)
10 | % $rv
11 | % [1] 0.6220991
12 | % $rvstd
13 | % [1] 8.100868
14 | % $mean
15 | % [1] 0.1307783
16 | % $variance
17 | % [1] 0.003678469
18 | % $skewness
19 | % [1] 1.390012
20 | % $p.value
21 | % [1] 1.885726e-05
22 | 
23 | classdef Test_rv < TestCase
24 |    properties
25 |       x
26 |       y
27 |    end
28 |    
29 |    methods
30 |       function self = Test_rv(name)
31 |          self = self@TestCase(name);         
32 |          self.x = [...
33 |             3.074                        3.000                 2.714                 2.280                1.960;...
34 |             2.964                        2.821                 2.375                 2.280                1.680;...
35 |             2.857                        2.929                 2.560                 1.960                2.077;...
36 |             2.808                        2.593                 2.417                 1.913                2.160;...
37 |             3.607                        3.429                 3.154                 2.154                2.040;...
38 |             2.857                        3.111                 2.577                 2.040                2.077;...
39 |             3.214                        3.222                 2.962                 2.115                2.040;...
40 |             3.120                        2.852                 2.500                 2.200                2.185;...
41 |             2.857                        2.815                 2.808                 1.923                2.074;...
42 |             2.893                        3.000                 2.571                 1.846                1.680;...
43 |             3.250                        3.286                 2.714                 1.926                1.962;...
44 |             3.393                        3.179                 2.769                 2.038                1.920;...
45 |             3.179                        3.286                 2.778                 2.231                1.760;...
46 |             3.071                        3.107                 2.731                 2.120                1.800;...
47 |             3.107                        3.143                 2.846                 2.185                1.962;...
48 |             2.929                        3.179                 2.852                 2.000                2.037;...
49 |             3.036                        3.179                 3.037                 2.231                1.667;...
50 |             3.071                        2.926                 2.741                 2.000                1.880;...
51 |             2.643                        2.786                 2.536                 1.889                1.808;...
52 |             3.696                        3.192                 2.833                 1.826                2.385;...
53 |             3.708                        2.926                 2.520                 2.040                2.667];
54 |          
55 |          self.y = [...
56 |             3.407            3.308  2.885  2.320 1.840  2.000    1.650           3.259             2.963         3.200;...
57 |             3.370            3.000  2.560  2.440 1.739  2.000    1.381           2.962             2.808         2.926;...
58 |             3.250            2.929  2.769  2.192 2.250  1.750    1.250           3.077             2.800         3.077;...
59 |             3.160            2.880  2.391  2.083 2.167  2.304    1.476           2.542             2.583         2.478;...
60 |             3.536            3.360  3.160  2.231 2.148  1.762    1.600           3.615             3.296         3.462;...
61 |             3.179            3.385  2.800  2.240 2.148  1.750    1.476           3.214             3.148         3.321;...
62 |             3.429            3.500  3.038  2.200 2.385  1.826    1.476           3.250             3.222         3.385;...
63 |             3.654            3.077  2.520  2.320 2.444  2.080    1.905           3.280             3.160         2.962;...
64 |             3.357            3.346  3.000  2.040 2.125  1.875    1.524           3.148             2.893         3.308;...
65 |             3.222            3.259  2.926  2.040 2.042  2.000    1.773           3.077             2.704         2.778;...
66 |             3.607            3.385  2.889  2.115 2.160  1.955    1.571           3.286             3.036         3.222;...
67 |             3.481            3.385  2.962  2.000 2.200  2.042    1.545           3.321             3.071         3.143;...
68 |             3.481            3.423  2.963  2.269 2.154  1.957    1.571           3.481             3.259         3.269;...
69 |             3.357            3.444  2.885  2.120 2.346  1.826    1.550           3.269             3.080         3.192;...
70 |             3.357            3.370  2.846  2.240 2.280  1.750    1.524           3.333             3.037         3.370;...
71 |             3.286            3.308  3.115  2.269 2.000  1.917    1.400           3.040             2.960         3.200;...
72 |             3.444            3.500  3.185  2.160 2.240  1.913    1.750           3.520             3.296         3.462;...
73 |             3.370            3.360  2.963  2.308 1.917  2.000    1.429           3.250             2.920         2.880;...
74 |             2.889            2.800  2.500  1.962 2.111  2.080    1.318           2.680             2.308         2.556;...
75 |             3.737            3.080  2.833  1.773 2.440  2.292    1.571           3.437             2.958         2.600;...
76 |             3.727            2.885  2.600  2.083 2.609  2.174    1.650           3.095             3.136         2.545];
77 |       end
78 |       
79 |       function setUp(self)
80 |       end
81 |       
82 |       function test(self)
83 |          [pval,rv,rvstd] = dep.rvtest(self.x,self.y);
84 |          assertElementsAlmostEqual(pval,1.885726e-05,'absolute',1e-5);
85 |          assertElementsAlmostEqual(rv,0.6220991,'absolute',1e-5);
86 |          assertElementsAlmostEqual(rvstd,8.100868,'absolute',1e-5);
87 |       end
88 |             
89 |       function tearDown(self)
90 |       end
91 |    end
92 | end


--------------------------------------------------------------------------------
/+utils/mexHadamard.c:
--------------------------------------------------------------------------------
  1 | /* Hadamard Transform
  2 |    mex function to take hadamard transform
  3 | 
  4 |    Usage: w = hadamard(x)
  5 |    x must be a REAL VALUED COLUMN VECTOR or MATRIX
  6 |    m = size(x,1) must be a POWER OF TWO
  7 | 
  8 |    Notes:
  9 |    1) This implementation uses exactly m*log2(m) additions/subtractions.
 10 |    2) This is symmetric and orthogonal. To invert, apply again and
 11 |       divide by vector length.
 12 | 
 13 |    Written by: Peter Stobbe, Caltech
 14 |    Email: stobbe@acm.caltech.edu
 15 |    Created: August 2008
 16 |    Edits by Stephen Becker, 2009--2014
 17 | 
 18 |       Note: in R2008b, Matlab added "fwht" and "ifwht" (the Fast Walsh-
 19 |           Hadamart Transform and the inverse) to its Signal Processing
 20 |           Toolbox.  With the default ordering and scaling, it's not
 21 |           equivalent to this, but you can change this with the following:
 22 |           y = length(x) * fwht( x, [], 'hadamard' );
 23 |           Then y should be the same as hadamard(x) up to roundoff.
 24 |           However, it appears that this code is faster than fwht.
 25 | 
 26 |    Update Stephen Becker, Feb 27 2014, fix compiling issue for Mac OS X
 27 |    Update Stephen Becker, Mar  3 2014, issue error if input data is sparse
 28 |    https://github.com/stephenbeckr/SparsifiedKMeans
 29 |   
 30 |    Copyright (c) 2011, Peter Stobbe
 31 |    All rights reserved.
 32 | 
 33 |    Redistribution and use in source and binary forms, with or without
 34 |    modification, are permitted provided that the following conditions are
 35 |    met:
 36 | 
 37 |        * Redistributions of source code must retain the above copyright
 38 |          notice, this list of conditions and the following disclaimer.
 39 |        * Redistributions in binary form must reproduce the above copyright
 40 |          notice, this list of conditions and the following disclaimer in
 41 |          the documentation and/or other materials provided with the distribution
 42 | 
 43 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 44 |    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 45 |    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 46 |    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 47 |    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 48 |    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 49 |    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 50 |    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 51 |    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 52 |    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 53 |    POSSIBILITY OF SUCH DAMAGE.
 54 | 
 55 | */
 56 | 
 57 | #include <stdlib.h>
 58 | 
 59 | 
 60 | /* SRB: Feb 27 2014, gcc-4.8 has problems with char16_t not being defined. 
 61 |  * This  seems to fix it
 62 |  * (and do this BEFORE including mex.h) */
 63 | /* See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=56086#c4 
 64 |  (but for, e.g., Mac w/ Xcode and Clang, this fails, so test
 65 |  for gcc. more possibilities here:
 66 |   https://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html
 67 |  but clang defines GNUC too!
 68 |  http://nadeausoftware.com/articles/2012/10/c_c_tip_how_detect_compiler_name_and_version_using_compiler_predefined_macros
 69 |  */
 70 | #ifndef NO_UCHAR
 71 | #define UCHAR_OK
 72 | #endif
 73 | #if defined(__GNUC__) && !(defined(__clang__)) && defined(UCHAR_OK)
 74 | #include <uchar.h>
 75 | #endif
 76 | 
 77 | #include "mex.h"
 78 | 
 79 | /* 
 80 |  y - output
 81 |  x - input
 82 |  m - length of vector
 83 |  */
 84 | void hadamard_apply_vector(double *y, double *x, unsigned m)
 85 | {
 86 |   unsigned bit, j, k;
 87 |   double temp;
 88 | 
 89 |   for (j = 0; j < m; j+=2) {
 90 |       k = j+1;
 91 |       y[j] = x[j] + x[k];
 92 |       y[k] = x[j] - x[k];
 93 |   }
 94 | 
 95 |   for (bit = 2; bit < m; bit <<= 1) {   
 96 |     for (j = 0; j < m; j++) {
 97 |         if( (bit & j) == 0 ) {
 98 |               k = j | bit;
 99 |               temp = y[j];
100 |               y[j] = y[j] + y[k];
101 |               y[k] = temp - y[k];
102 |         }
103 |     }
104 |   }
105 | }
106 | 
107 | /* 
108 |  y - output
109 |  x - input
110 |  m - length of vectors (number of rows)
111 |  n - number of vectors (number of columns)
112 |  */
113 | void hadamard_apply_matrix(double *y, double *x, unsigned m, unsigned n)
114 | {
115 |     unsigned j;
116 |     for(j = 0; j < n; j++) {
117 |         hadamard_apply_vector(y + j*m, x + j*m, m);
118 |     }
119 | }
120 | 
121 | 
122 | /* check that the vector length is a power of 2,
123 |    just using bitshifting instead of log */
124 | void checkPowerTwo(unsigned m)
125 | {
126 |     /* check that it's not a degenerate 0 by 1 vector or singleton */
127 |     if (m <= 1) {
128 |         mexErrMsgTxt("Vector length must be greater than 1.");
129 |     }
130 |     /* keep dividing by two until result is odd */
131 |     while( (m & 1) == 0 ){
132 |         m >>= 1;
133 |     }
134 |     /* check that m is not a multiple of an odd number greater than 1 */
135 |     if (m > 1) {
136 |         mexErrMsgTxt("Vector length must be power of 2.");
137 |     }
138 | }
139 | 
140 | 
141 | /* The gateway routine. */
142 | void mexFunction(int nlhs, mxArray *plhs[],
143 |                  int nrhs, const mxArray *prhs[])
144 | {
145 |   double *x, *y;
146 |   unsigned m, n;
147 |     
148 |   /* Check for the proper number of arguments. */
149 |   if (nrhs != 1) {
150 |     mexErrMsgTxt("One and only one input required; must be a column vector or matrix, with # rows a power of 2.");
151 |   }
152 |   if (nlhs > 1) {
153 |     mexErrMsgTxt("Too many output arguments.");
154 |   }
155 | 
156 |   /* input size */
157 |   m = mxGetM(prhs[0]);
158 |   checkPowerTwo(m);
159 |   n = mxGetN(prhs[0]);
160 |   
161 |   if (mxIsComplex(prhs[0])) {
162 |     mexErrMsgTxt("Input must be real.");   
163 |   } else if (mxIsSparse(prhs[0])) {
164 |     mexErrMsgTxt("Input must be a full matrix, not sparse.");   
165 |   } else if (!mxIsDouble(prhs[0])) {
166 |     mexErrMsgTxt("Input must be of type double.");      
167 |   }
168 |   
169 |   /* Create matrix for the return argument. */
170 |   plhs[0] = mxCreateDoubleMatrix(m, n, mxREAL);
171 |   
172 |   /* Assign pointers to each input and output. */
173 |   x = mxGetPr(prhs[0]);
174 |   y = mxGetPr(plhs[0]);
175 |   
176 |   /* Call the C subroutine. */
177 |   hadamard_apply_matrix(y, x, m, n);
178 |   return;
179 | }
180 | 


--------------------------------------------------------------------------------
/+dep/hsic.m:
--------------------------------------------------------------------------------
  1 | % HSIC                        Hilbert-Schmidt Independence Criterion
  2 | %
  3 | %     [stat,K,L,varargout] = hsic(x,y,varargin)
  4 | %
  5 | %     Estimate the Hilbert-Schmidt Independence Criterion (HSIC).
  6 | %
  7 | %     INPUTS
  8 | %     x - [n x p] n samples of dimensionality p
  9 | %     y - [n x q] n samples of dimensionality q
 10 | %
 11 | %     OPTIONAL (name/value pairs)
 12 | %     kernel   - string indicating kernel type
 13 | %     approx   - string indicating approximation
 14 | %     unbiased - boolean indicating unbiased estimator (default=false)
 15 | %     gram     - true indicates x & y are Gram matrices (default=false)
 16 | %     doublecenter - true indicates x & y are double-centered Gram
 17 | %                matrices (default=false)
 18 | %
 19 | %     Additional name/value pairs are passed through to the kernel function.
 20 | %
 21 | %     OUTPUTS
 22 | %     h - Hilbert-Schmidt Independence Criterion
 23 | %     K - [n x n] Gram matrix for x
 24 | %     L - [n x n] Gram matrix for y
 25 | %     params - 
 26 | %
 27 | %     EXAMPLE
 28 | %     rng(1234)
 29 | %     n = 1000; p = 50; q = p;
 30 | %     x = rand(n,p);
 31 | %     y = x.^2;
 32 | %     h = dep.hsic(x,y) % default Gaussian kernel with median heuristic
 33 | %
 34 | %     % Equivalence between distance covariance (squared) & HSIC
 35 | %     h = dep.hsic(x,y,'kernel','brownian');
 36 | %     d = dep.dcov(x,y);
 37 | %     [4*h d^2]
 38 | %
 39 | %     % Approximate using random fourier features
 40 | %     h = dep.hsic(x,y,'approx','rfm','D',100,'sigma',2)
 41 | %
 42 | %     % Approximate using Nystrom
 43 | %     h = dep.hsic(x,y,'approx','nystrom','k',100,'sigma',2)
 44 | %
 45 | %     REFERENCE
 46 | %     Gretton et al (2008). A kernel statistical test of independence. In 
 47 | %       Advances in neural information processing systems, 585-592
 48 | %     Sejdinovic et al (2013). Equivalence of distance-based and RKHS-based
 49 | %       statistics in hypothesis testing. Annals of Statistics 41: 2263-2291
 50 | %     Song et al (2012). Feature Selection via Dependence Maximization.
 51 | %       Journal of Machine Learning Research 13: 1393-1434
 52 | %
 53 | %     SEE ALSO
 54 | %     hsictest, rfm
 55 | 
 56 | %     $ Copyright (C) 2017 Brian Lau, brian.lau@upmc.fr $
 57 | %     The full license and most recent version of the code can be found at:
 58 | %     https://github.com/brian-lau/highdim
 59 | %
 60 | %     This program is free software: you can redistribute it and/or modify
 61 | %     it under the terms of the GNU General Public License as published by
 62 | %     the Free Software Foundation, either version 3 of the License, or
 63 | %     (at your option) any later version.
 64 | %
 65 | %     This program is distributed in the hope that it will be useful,
 66 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
 67 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 68 | %     GNU General Public License for more details.
 69 | 
 70 | % TODO
 71 | % o error for unbiased && approx, I don't know how to estimate the unbiased
 72 | % version using feature maps, could potentially reconstruct full Gram
 73 | % matrix?
 74 | function [h,K,L,params] = hsic(x,y,varargin)
 75 | 
 76 | par = inputParser;
 77 | par.KeepUnmatched = true;
 78 | par.PartialMatching = false;
 79 | addRequired(par,'x',@isnumeric);
 80 | addRequired(par,'y',@isnumeric);
 81 | addParamValue(par,'kernel','rbf',@ischar);
 82 | addParamValue(par,'approx','none',@ischar);
 83 | addParamValue(par,'unbiased',false,@(x) isnumeric(x) || islogical(x));
 84 | addParamValue(par,'gram',false,@isscalar);
 85 | addParamValue(par,'doublecenter',false,@isscalar);
 86 | parse(par,x,y,varargin{:});
 87 | 
 88 | [m,p] = size(x);
 89 | [n,q] = size(y);
 90 | 
 91 | assert(m == n,'HSIC requires x and y to have the same # of samples');
 92 | assert(~(par.Results.doublecenter&&par.Results.unbiased),...
 93 |    'Cannot compute unbiased HSIC estimate with double-centered Gram matrices.');
 94 | 
 95 | if par.Results.doublecenter
 96 |    Kc = x;
 97 |    Lc = y;
 98 | elseif par.Results.gram
 99 |    K = x;
100 |    L = y;
101 | else
102 |    [K,L,params] = getKL(x,y,par.Results.kernel,par.Results.approx,par.Unmatched);
103 | end
104 | 
105 | if par.Results.unbiased % U-statistic
106 |    K = utils.zerodiag(K);
107 |    L = utils.zerodiag(L);
108 |    
109 |    % l = ones(m,1);
110 |    % h = trace(K*L) + (l'*K*l*l'*L*l)/(n-1)/(n-2) - 2*(l'*K*L*l)/(n-2);
111 |    % h = h/(n*(n-3));
112 |    
113 |    % Equivalent, but faster
114 |    Kc = utils.ucenter(K);
115 |    Lc = utils.ucenter(L);
116 |    h = sum(sum(Kc.*Lc))/(n*(n-3));
117 | else                    % V-statistic
118 |    if any(strcmp(par.Results.approx,{'rfm' 'nys' 'nystrom'}))
119 |       % K & L are feature maps
120 |       phiXc = bsxfun(@minus,K,mean(K));
121 |       phiYc = bsxfun(@minus,L,mean(L));
122 |       h = (norm(phiXc'*phiYc,'fro')/n)^2;
123 |       if nargin > 1
124 |          K = K*K';
125 |          L = L*L';
126 |       end
127 |    else
128 |       % K & L are Gram matrices
129 |       
130 |       % H = eye(n) - ones(n)/n;
131 |       % h = trace(K*H*L*H)/n^2
132 |       
133 |       % Equivalent, but faster
134 |       if ~exist('Kc','var')
135 |          Kc = utils.dcenter(K);
136 |          Lc = utils.dcenter(L);
137 |       end
138 |       h = sum(sum(Kc.*Lc))/n^2;
139 |    end
140 | end
141 | 
142 | %% 
143 | function [K,L,params] = getKL(x,y,kernel,approx,par)
144 | 
145 | switch lower(kernel)
146 |    case {'rbf' 'gauss' 'gaussian'}
147 |       switch lower(approx)
148 |          case {'rfm'}
149 |             K = utils.rfm(x,par);
150 |             L = utils.rfm(y,par);
151 |          case {'nys' 'nystrom'}
152 |             K = utils.nystrom(x,'kernel','rbf',par);
153 |             L = utils.nystrom(y,'kernel','rbf',par);
154 |          case {'none'}
155 |             [K,sigmax] = utils.rbf(x,[],par);
156 |             [L,sigmay] = utils.rbf(y,[],par);
157 |          otherwise
158 |             error('Unknown approximation for rbf kernel');
159 |       end
160 |    case {'distance' 'brownian'}
161 |       switch lower(approx)
162 |          case {'nys' 'nystrom'}
163 |             K = utils.nystrom(x,'kernel','brownian',par);
164 |             L = utils.nystrom(y,'kernel','brownian',par);
165 |          case {'none'}
166 |             K = utils.distkern(x,x,par);
167 |             L = utils.distkern(y,y,par);
168 |          otherwise
169 |             error('Unknown approximation for brownian kernel');
170 |       end
171 |    otherwise
172 |       error('Unsupported kernel');
173 | end
174 | 
175 | if exist('sigmax','var')
176 |    params.sigmax = sigmax;
177 | end
178 | if exist('sigmay','var')
179 |    params.sigmay = sigmay;
180 | end
181 | if ~exist('params','var')
182 |    params = struct();
183 | end
184 | 


--------------------------------------------------------------------------------
/+dep/dcorrtest.m:
--------------------------------------------------------------------------------
  1 | % DCORRTEST                   Distance correlation test of independence
  2 | % 
  3 | %     [pval,r,stat,null] = dcorrtest(x,y,varargin)
  4 | %
  5 | %     Given a sample X1,...,Xn from a p-dimensional multivariate distribution,
  6 | %     and a sample Y1,...,Xn from a q-dimensional multivariate distribution,
  7 | %     test the hypothesis:
  8 | %
  9 | %     H0 : X and Y are mutually independent
 10 | %
 11 | %     The default test is based on a modified distance correlation statistic 
 12 | %     that when suitably transformed converges to a Student t distribution 
 13 | %     under independence (Szekely & Rizzo 2013). The resulting t-test is 
 14 | %     unbiased for sample sizes greater than three and all significance 
 15 | %     levels. 
 16 | %
 17 | %     Several different permutation methods are also available. See DCOVTEST
 18 | %     for details. These are included mostly for testing since the t-test
 19 | %     is well-behaved even in small samples, and very computationally efficient.
 20 | %
 21 | %     INPUTS
 22 | %     x - [n x p] n samples of dimensionality p
 23 | %     y - [n x q] n samples of dimensionality q
 24 | %
 25 | %     OPTIONAL (as name/value pairs, order irrelevant)
 26 | %     method - 't'          - t-test from Szekely & Rizzo (2013), DEFAULT 
 27 | %              'pearson'    - Pearson type III approx by moment matching
 28 | %              'perm-dist'  - randomization using permutation of the rows &
 29 | %                             columns of distance matrices
 30 | %              'perm-brute' - brute force randomization, directly permuting
 31 | %                             one of the inputs, which requires recalculating 
 32 | %                             and centering distance matrices
 33 | %     nboot - # permutations if not t-test
 34 | %
 35 | %     OUTPUTS
 36 | %     pval - p-value
 37 | %     r    - distance correlation
 38 | %     stat - test statistic
 39 | %     null - permutation statistics
 40 | %
 41 | %     EXAMPLE
 42 | %     rng(1234);
 43 | %     p = 100;
 44 | %     n = 2000;
 45 | %     X = rand(n,p);  Y = X.^2 + 15*randn(n,p);
 46 | %
 47 | %     tic;[pval,r] = dep.dcorrtest(X,Y); toc % default t-test
 48 | %     [pval , r]
 49 | %     tic;[pval,r] = dep.dcorrtest(X,Y,'method','pearson'); toc
 50 | %     [pval , r]
 51 | %     tic;[pval,r] = dep.dcorrtest(X,Y,'method','perm-dist','nboot',200);toc
 52 | %     [pval , r]
 53 | %     tic;[pval,r] = dep.dcorrtest(X,Y,'method','perm-brute','nboot',200);toc
 54 | %     [pval , r]
 55 | %
 56 | %     REFERENCE
 57 | %     Szekely et al (2007). Measuring and testing independence by correlation 
 58 | %       of distances. Ann Statist 35: 2769-2794
 59 | %     Szekely & Rizzo (2013). The distance correlation t-test of independence 
 60 | %       in high dimension. J Multiv Analysis 117: 193-213
 61 | %
 62 | %     SEE ALSO
 63 | %     dcorr, DepTest2
 64 | 
 65 | %     $ Copyright (C) 2017 Brian Lau, brian.lau@upmc.fr $
 66 | %     The full license and most recent version of the code can be found at:
 67 | %     https://github.com/brian-lau/highdim
 68 | %
 69 | %     This program is free software: you can redistribute it and/or modify
 70 | %     it under the terms of the GNU General Public License as published by
 71 | %     the Free Software Foundation, either version 3 of the License, or
 72 | %     (at your option) any later version.
 73 | % 
 74 | %     This program is distributed in the hope that it will be useful,
 75 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
 76 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 77 | %     GNU General Public License for more details.
 78 | 
 79 | function [pval,r,stat,varargout] = dcorrtest(x,y,varargin)
 80 | 
 81 | par = inputParser;
 82 | par.KeepUnmatched = true;
 83 | addRequired(par,'x',@isnumeric);
 84 | addRequired(par,'y',@isnumeric);
 85 | addParamValue(par,'method','t',@ischar);
 86 | addParamValue(par,'nboot',999,@(x) isnumeric(x) && isscalar(x));
 87 | parse(par,x,y,varargin{:});
 88 | 
 89 | [n,~] = size(x);
 90 | assert(n == size(y,1),'DCORRTEST requires x and y to have the same # of samples');
 91 | 
 92 | permMethods = {'perm-dist' 'perm-brute'};
 93 | nboot = par.Results.nboot;
 94 | method = lower(par.Results.method);
 95 | 
 96 | switch method
 97 |    case {'pearson'}
 98 |       if ~isfield(par.Unmatched,'unbiased')
 99 |          % Override dcov default, we generally want unbiased dcorr
100 |          [d,dvx,dvy,A,B] = dep.dcov(x,y,'unbiased',true,par.Unmatched);
101 |       else
102 |          [d,dvx,dvy,A,B] = dep.dcov(x,y,par.Unmatched);
103 |       end
104 |       r = d/sqrt(dvx*dvy);
105 |       
106 |       if isfield(par.Unmatched,'unbiased') && par.Unmatched.unbiased
107 |          stat = (n*(n-3))*d; %  = sum(sum(A.*B)) for unbiased estimator
108 |       elseif ~isfield(par.Unmatched,'unbiased')
109 |          stat = (n*(n-3))*d; %  = sum(sum(A.*B)) for unbiased estimator
110 |       else
111 |          stat = (n^2)*d^2; %  = sum(sum(A.*B)) for biased estimator
112 |       end
113 |       
114 |       [pval,stat] = utils.pearsonIIIpval(A,B,stat);
115 |       return;
116 |    case {'t','ttest','t-test'}
117 |       if isfield(par.Unmatched,'unbiased') && ~par.Unmatched.unbiased
118 |          error('This method is only valid for UNBIASED estimator');
119 |       elseif ~isfield(par.Unmatched,'unbiased')
120 |          r = dep.dcorr(x,y,'unbiased',true,par.Unmatched);
121 |       else
122 |          r = dep.dcorr(x,y,par.Unmatched);
123 |       end
124 |       
125 |       v = n*(n-3)/2;
126 |       stat = sqrt(v-1) * r/sqrt(1-r^2);
127 |       pval = tcdf(stat,v-1,'upper');
128 |       return;
129 |    case {'perm-dist'}      
130 |       a = sqrt(utils.sqdist(x,x));
131 |       b = sqrt(utils.sqdist(y,y));
132 |       [d,dvx,dvy] = dep.dcov(a,b,'dist',true,'unbiased',true);
133 |       r = d/sqrt(dvx*dvy);
134 | 
135 |       null = zeros(nboot,1);
136 |       for i = 1:nboot
137 |          ind = randperm(n);
138 |          [d2,dvx2,dvy2] = dep.dcov(a,b(ind,ind),'dist',true,'unbiased',true);
139 |          null(i) = d2/sqrt(dvx2*dvy2);
140 |       end
141 |    case {'perm-brute'}
142 |       [d,dvx,dvy] = dep.dcov(x,y,'unbiased',true);
143 |       r = d/sqrt(dvx*dvy);
144 | 
145 |       null = zeros(nboot,1);
146 |       for i = 1:nboot
147 |          ind = randperm(n);
148 |          [d2,dvx2,dvy2] = dep.dcov(x,y(ind,:),'unbiased',true);
149 |          null(i) = d2/sqrt(dvx2*dvy2);
150 |       end
151 |    otherwise
152 |       error('Unrecognized test method');
153 | end
154 | 
155 | % One of the permutation methods
156 | if any(strcmp(method,permMethods))
157 |    if ~exist('stat','var')
158 |       stat = r;
159 |    end
160 |    pval = (1 + sum(null>stat)) / (1 + nboot);
161 | end
162 | 
163 | if nargout == 4
164 |    if exist('null','var')
165 |       varargout{1} = null;
166 |    else
167 |       varargout{1} = [];
168 |    end
169 | end


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | highdim
  2 | ==========
  3 | A Matlab library for statistical testing of high-dimensional data, including
  4 | one and two-sample tests for homogeneity, uniformity, sphericity and
  5 | independence. Of note are implementations of some modern tests 
  6 | appropriate for data where dimensionality grows with samples size, possibly
  7 | exceeding the number of samples.
  8 | 
  9 | # Installation
 10 | Download [highdim](https://github.com/brian-lau/highdim/archive/master.zip) and 
 11 | add the resulting folder to your Matlab path. 
 12 | Folders prefixed by a `+` are packages that should not be explicitly added to your path, 
 13 | although their [parent folder should be](http://www.mathworks.com/help/matlab/matlab_oop/scoping-classes-with-packages.html#brfynt_-3).
 14 | 
 15 | The Statistics toolbox is required.
 16 | 
 17 | # Examples
 18 | The various tests are most easily accessed through three interfaces: `DepTest1`, 
 19 | `DepTest2` and `UniSphereTest` for one-sample tests, two-sample tests and 
 20 | one-sample tests on the sphere, respectively.
 21 | 
 22 | Detailed simulations of size, power and comparisons between tests are available 
 23 | in the [wiki](https://github.com/brian-lau/highdim/wiki). The examples below 
 24 | give an idea of what's available.
 25 | 
 26 | ### Multivariate (In)dependence, Sphericity and Homogeneity
 27 | ```
 28 | % Independent, but non-spherical data
 29 | sigma = diag([ones(1,25),0.5*ones(1,5)]);
 30 | x = (sigma*randn(50,30)')';
 31 | 
 32 | % Independence tests (Han & Liu, 2014)
 33 | DepTest1(x,'test','spearman') 
 34 | DepTest1(x,'test','kendall') 
 35 | 
 36 | % Sphericity tests (Ledoit & Wolf, 2002; Wang & Yao, 2013; Zou et al., 2014)
 37 | DepTest1(x,'test','john')
 38 | DepTest1(x,'test','wang')
 39 | DepTest1(x,'test','sign')
 40 | DepTest1(x,'test','bcs')
 41 | ```
 42 | * Han, F & Liu, H (2014). Distribution-free tests of independence with 
 43 |   applications to testing more structures. [arXiv:1410.4179](http://arxiv.org/abs/1410.4179)
 44 | * Ledoit, O & Wolf, M (2002). Some hypothesis tests for the covariance matrix
 45 |   when the dimension is large compared to the sample size. 
 46 |   [Annals of Statistics 30: 1081-1102](http://projecteuclid.org/euclid.aos/1031689018)
 47 | * Wang, Q & Yao, J (2013). On the sphericity test with large-dimensional
 48 |   observations. [Electronic Journal of Statistics 7: 2164-2192](http://projecteuclid.org/euclid.ejs/1378817880)
 49 | * Zou, C et al (2014). Multivariate sign-based high-dimensional tests for
 50 |   sphericity. [Biometrika 101: 229-236](http://biomet.oxfordjournals.org/content/101/1/229)
 51 | 
 52 | ```
 53 | % Non-indepedent data, with ~0 correlation, from the same distribution
 54 | x = rand(200,1); y = rand(200,1);
 55 | xx = 0.5*(x+y)-0.5; yy = 0.5*(x-y);
 56 | corr(xx,yy)
 57 | 
 58 | % Two-sample Independence tests (Gretton et al, 2008; Szekely & Rizzo, 2013)
 59 | DepTest2(xx,yy,'test','dcorr') % Distance correlation t-test
 60 | DepTest2(xx,yy,'test','hsic') % Hilbert Schmidt Independence Criterion
 61 | 
 62 | % Do the samples come from the same distribution? (Gretton et al, 2012; Szekely et al. 2007)
 63 | DepTest2(xx,yy,'test','mmd') % Maximum mean discrepancy
 64 | DepTest2(xx,yy,'test','energy') % statistical energy
 65 | ```
 66 | * Gretton, A et al (2008). A kernel statistical test of independence. [Neural Information Processing Systems](http://papers.nips.cc/paper/3201-a-kernel-statistical-test-of-independence.pdf)
 67 | * Gretton, A et al (2012). A kernel two-sample test. [Journal of Machine Learning Research 13: 723-773](http://www.jmlr.org/papers/volume13/gretton12a/gretton12a.pdf)
 68 | * Szekely, G et al (2007). Measuring and testing independence by correlation of distances. [Annals of Statistics 35: 2769-2794](http://projecteuclid.org/euclid.aos/1201012979)
 69 | * Szekely, G & Rizzo, M (2013). The distance correlation t-test of independence 
 70 | in high dimension. [Journal of Multivariate Analysis 117: 193-213](http://dx.doi.org/10.1016/j.jmva.2013.02.012)
 71 | 
 72 | ```
 73 | % Independent data, different distributions
 74 | x = randn(200,1); y = rand(200,1);
 75 | 
 76 | % Two-sample Independence tests
 77 | DepTest2(x,y,'test','dcorr')
 78 | DepTest2(x,y,'test','hsic')
 79 | 
 80 | % Do the samples come from the same distribution?
 81 | DepTest2(x,y,'test','mmd')
 82 | DepTest2(x,y,'test','energy')
 83 | ```
 84 | ### Differences in multivariate means and covariances
 85 | ```
 86 | % Two high-dimensional samples with sparse difference in covariance matrix (4 entries)
 87 | p = 50; n = 100;
 88 | for ii = 1:p
 89 |    for jj = 1:p
 90 |       sigma(ii,jj) = 0.5^abs(ii-jj);
 91 |    end
 92 | end
 93 | D = diag(unifrnd(0.5,2.5,p,1));
 94 | S = D^.5*sigma*D^.5; U = zeros(p,p);
 95 | [~,~,k] = utils.tri2sqind(p);
 96 | r = randperm(numel(k));
 97 | U(k(r(1:4))) = unifrnd(0,4,4,1)*max(diag(S));
 98 | U = U + U';
 99 | [~,da] = eig(S); [~,db] = eig(S+U);
100 | d = abs(min([diag(da);diag(db)])) + 0.05;
101 | 
102 | x = mvnrnd(zeros(1,p),S+d*eye(p),n);
103 | y = mvnrnd(zeros(1,p),S+U+d*(eye(p)),n);
104 | 
105 | DepTest2(x,y,'test','covdiff')
106 | 
107 | % Directly calling the test returns M, a matrix indicating where covariance 
108 | % elements are significantly different (FWER controlled at alpha)
109 | [pval,stat,M] = diff.covtest(x,y);
110 | ```
111 | * Cai, T et al (2013). Two-sample covariance matrix testing and support
112 | recovery in high-dimensional and sparse settings. [Journal of the
113 | American Statistical Association 108: 265-277](http://www.tandfonline.com/doi/abs/10.1080/01621459.2012.758041)
114 | 
115 | ### Uniformity on hypersphere
116 | ```
117 | % Non-uniform samples, antipodally distributed on the sphere
118 | sigma = diag([1 5 1]);
119 | x = (sigma*randn(50,3)')';
120 | 
121 | % Is projection onto unit hypersphere uniformly distributed?
122 | UniSphereTest(x,'test','rayleigh') % Rayleigh test fails since resultant is zero
123 | UniSphereTest(x,'test','gine-ajne') % Weighted Gine-Ajne
124 | UniSphereTest(x,'test','randproj') % random projection
125 | UniSphereTest(x,'test','bingham') % Bingham
126 | ```
127 | * Cai, T et al (2013). Distribution of angles in random packing on spheres. [Journal of Machine Learning Research 14: 1837-1864](http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4196685/)
128 | * Cuesta-Albertos, J et al (2009). On projection-based tests for 
129 | directional and compositional data. [Statistics & Computing 19: 367-380](http://link.springer.com/article/10.1007%2Fs11222-008-9098-3#page-1)
130 | * Gine, E (1975) Invariant tests for uniformity on compact Riemannian manifolds based on Sobolev norms. [Annals of Statistics 3: 1243-1266](http://www.jstor.org/discover/10.2307/2958247)
131 | * Mardia, K & Jupp, P (2000). [Directional Statistics](https://books.google.fr/books?id=PTNiCm4Q-M0C&printsec=frontcover&source=gbs_ge_summary_r&cad=0#v=onepage&q&f=false). John Wiley
132 | * Prentice, M (1978). On invariant tests of uniformity for directions
133 | and orientations. [Annals of Statistics 6: 169-176](http://projecteuclid.org/euclid.aos/1176344075)
134 | 
135 | Contributions
136 | --------------------------------
137 | Copyright (c) 2017 Brian Lau [brian.lau@upmc.fr](mailto:brian.lau@upmc.fr), see [LICENSE](https://github.com/brian-lau/highdim/blob/master/LICENSE)
138 | 
139 | Please feel free to [fork](https://github.com/brian-lau/highdim/fork) and contribute!
140 | 


--------------------------------------------------------------------------------
/DepTest1.m:
--------------------------------------------------------------------------------
  1 | % DEPTEST1                    Interface for one-sample tests
  2 | %
  3 | %     Given a sample X1,...,Xn from a p-dimensional multivariate distribution,
  4 | %     test one of the hypotheses:
  5 | %
  6 | %     H0 : Covariance matrix of sample is proportional to the identity
  7 | %     
  8 | %     using the following tests,
  9 | %        'john'  - John, Sugiura, Nagao test (JSN)
 10 | %        'nagao' - JSN with Box-Bartlett correction
 11 | %        'wang'  - JSN with correction for large p
 12 | %        'sign'  - multivariate sign, non-parametric
 13 | %        'bcs'   - multivariate sign, correction for large p
 14 | %
 15 | %     H0 : X1,...,Xp are mutually independent
 16 | %
 17 | %     using the following rank-based tests suitable for high-dimensional data
 18 | %        'spearman' - R1 from Han & Liu (default)
 19 | %        'kendall'  - R2 from Han & Liu 
 20 | %
 21 | %     PROPERTIES
 22 | %     x       - [n x p] matrix, n samples with dimensionality p
 23 | %     n       - # of samples
 24 | %     p       - # of dimensions
 25 | %     test    - string (see above, default = 'bcs')
 26 | %     params  - parameters passed through for specific tests
 27 | %     alpha   - alpha level (default = 0.05)
 28 | %     stat    - corresponding statistic
 29 | %     pval    - p-value
 30 | %     h       - boolean, 1 indicates rejection of null at alpha
 31 | %     runtime - elapsed time for running test, in seconds
 32 | %
 33 | %     EXAMPLE
 34 | %     % Independent, but non-spherical data
 35 | %     sigma = diag([ones(1,25),0.5*ones(1,5)]);
 36 | %     x = (sigma*randn(50,30)')';
 37 | %     % Sphericity tests
 38 | %     DepTest1(x,'test','john')
 39 | %     DepTest1(x,'test','wang')
 40 | %     DepTest1(x,'test','sign')
 41 | %     DepTest1(x,'test','bcs')
 42 | %     % Independence tests
 43 | %     DepTest1(x,'test','spearman')
 44 | %     DepTest1(x,'test','kendall')
 45 | %
 46 | %     REFERENCE
 47 | %     Han & Liu (2014). Distribution-free tests of independence with
 48 | %       applications to testing more structures. arXiv:1410.4179v1
 49 | %     Ledoit & Wolf (2002). Some hypothesis tests for the covariance matrix
 50 | %       when the dimension is large compared to the sample size. Annals of 
 51 | %       Statistics 30: 1081-1102   
 52 | %     Wang, Q and Yao J (2013). On the sphericity test with large-dimensional
 53 | %       observations. Electronic Journal of Statistics 7: 2164-2192
 54 | %     Zou et al (2014). Multivariate sign-based high-dimensional tests for
 55 | %       sphericity. Biometrika 101: 229-236
 56 | %
 57 | %     SEE ALSO
 58 | %     DepTest2, UniSphereTest
 59 | 
 60 | %     $ Copyright (C) 2017 Brian Lau, brian.lau@upmc.fr $
 61 | %     The full license and most recent version of the code can be found
 62 | %     https://github.com/brian-lau/highdim
 63 | %
 64 | %     This program is free software: you can redistribute it and/or modify
 65 | %     it under the terms of the GNU General Public License as published by
 66 | %     the Free Software Foundation, either version 3 of the License, or
 67 | %     (at your option) any later version.
 68 | % 
 69 | %     This program is distributed in the hope that it will be useful,
 70 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
 71 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 72 | %     GNU General Public License for more details.
 73 | 
 74 | classdef DepTest1 < handle
 75 |    properties
 76 |       x
 77 |    end
 78 |    properties (Dependent=true,SetAccess=private)
 79 |       n
 80 |       p
 81 |    end
 82 |    properties
 83 |       test
 84 |       params
 85 |       alpha = 0.05;
 86 |    end
 87 |    properties (SetAccess=private)
 88 |       stat
 89 |       pval
 90 |       h
 91 |       runtime
 92 |    end
 93 |    properties (Hidden=true,SetAccess=private)
 94 |       mc % monte carlo samples of empirical null distribution
 95 |       autoRun
 96 |       validTests = {'spearman' 'kendall' 'sign' 'bcs' ...
 97 |          'john' 'nagao' 'wang'};
 98 |    end
 99 |    properties(SetAccess = protected)
100 |       version = '0.1.0'
101 |    end
102 |    
103 |    methods
104 |       function self = DepTest1(varargin)
105 |          if (nargin == 1) || (rem(nargin,2) == 1)
106 |             varargin = {'x' varargin{:}};
107 |          end
108 |          
109 |          par = inputParser;
110 |          par.KeepUnmatched = true;
111 |          addParamValue(par,'x',[],@isnumeric);
112 |          addParamValue(par,'autoRun',true,@islogical);
113 |          addParamValue(par,'test','spearman',@ischar);
114 |          parse(par,varargin{:});
115 | 
116 |          self.autoRun = par.Results.autoRun;
117 |          self.params = par.Unmatched;
118 |          self.test = par.Results.test;
119 |          self.x = par.Results.x;
120 |       end
121 |       
122 |       function set.x(self,x)
123 |          [n,p] = size(x);
124 |          % Clear cache of monte-carlo samples if dimensions change
125 |          % Only applies for rank-based tests of independence
126 |          if (self.n~=n) || (self.p~=p)
127 |             self.mc = [];
128 |          end
129 |          self.x = x;
130 |          if ~isempty(self.x) && self.autoRun
131 |             self.run();
132 |          end
133 |       end
134 |       
135 |       function set.test(self,test)
136 |          test = lower(test);
137 |          if any(strcmp(test,self.validTests))
138 |             self.test = test;
139 |             if ~isempty(self.x) && self.autoRun
140 |                self.run();
141 |             end
142 |          else
143 |             error('Invalid test');
144 |          end
145 |       end
146 |       
147 |       function set.params(self,params)
148 |          self.params = params;
149 |          if ~isempty(self.x) && self.autoRun
150 |             self.run();
151 |          end
152 |       end
153 |       
154 |       function set.alpha(self,alpha)
155 |          assert((alpha>0)&&(alpha<1),'0<alpha<1');
156 |          self.alpha = alpha;
157 |       end
158 |             
159 |       function n = get.n(self)
160 |          n = size(self.x,1);
161 |       end
162 |       
163 |       function p = get.p(self)
164 |          p = size(self.x,2);
165 |       end
166 |       
167 |       function h = get.h(self)
168 |          h = self.pval<self.alpha;
169 |       end
170 |       
171 |       function run(self)
172 |          tic;
173 |          switch self.test
174 |             case {'spearman','kendall'}
175 |                if isempty(self.mc)
176 |                   [self.pval,self.stat,mc] = ...
177 |                      dep.ranktest(self.x,'test',self.test,self.params);
178 |                   % Cache the monte-carlo samples, these rank-tests are
179 |                   % distribution free (do not depend on input distribution)
180 |                   self.mc = mc;
181 |                else
182 |                   [self.pval,self.stat] = ...
183 |                      dep.ranktest(self.x,'test',self.test,'rmc',self.mc,self.params);
184 |                end
185 |             case {'sign','bcs'}
186 |                [self.pval,self.stat] = ...
187 |                   sphere.signtest(self.x,'test',self.test,self.params);
188 |             case {'john','nagao','wang'}
189 |                [self.pval,self.stat] = ...
190 |                   sphere.jsn(self.x,'test',self.test,self.params);
191 |             otherwise
192 |                % Never
193 |          end
194 |          self.runtime = toc;
195 |       end
196 |    end
197 | end
198 | 


--------------------------------------------------------------------------------
/UniSphereTest.m:
--------------------------------------------------------------------------------
  1 | % UNISPHERETEST               Test uniform distribution on unit hypersphere 
  2 | % 
  3 | %     Given a sample X1,...,Xn from a p-dimensional multivariate distribution,
  4 | %     test the hypothesis:
  5 | %
  6 | %     H0 : Sample is uniformly distributed on the unit hypersphere (S_{p-1})
  7 | %     
  8 | %     using the following tests,
  9 | %        'rayleigh'  - Rayleigh test, parametric (default)
 10 | %        'gine'      - Gine test
 11 | %        'gine3'     - Gine test with fast approximation for p = 3
 12 | %        'bingham'   - Bingham test
 13 | %        'ajne'      - Ajne test, non-parametric
 14 | %        'gine-ajne' - Weighted Gine/Ajne test, non-parametric
 15 | %        'randproj'  - Random projection test, non-parametric
 16 | %
 17 | %     PROPERTIES
 18 | %     x       - [n x p] matrix, n samples with dimensionality p
 19 | %     n       - # of samples
 20 | %     p       - # of dimensions
 21 | %     test    - string (see above, default = 'rayleigh')
 22 | %     params  - parameters passed through for specific tests
 23 | %     alpha   - alpha level (default = 0.05)
 24 | %     stat    - corresponding statistic
 25 | %     pval    - p-value
 26 | %     h       - boolean, 1 indicates rejection of null at alpha
 27 | %     runtime - elapsed time for running test, in seconds
 28 | %
 29 | %     EXAMPLE
 30 | %     sigma = diag([1 5 1]);
 31 | %     x = (sigma*randn(50,3)')';
 32 | %     % Note failure of Rayleigh test, since resultant is zero
 33 | %     UniSphereTest(x,'test','rayleigh') 
 34 | %     UniSphereTest(x,'test','gine-ajne') 
 35 | %     UniSphereTest(x,'test','randproj') 
 36 | %     UniSphereTest(x,'test','bingham') 
 37 | %
 38 | %     REFERENCE
 39 | %     Cai, T et al (2013). Distribution of angles in random packing on
 40 | %       spheres. J of Machine Learning Research 14: 1837-1864.
 41 | %     Cuesta-Albertos, JA et al (2009). On projection-based tests for 
 42 | %       directional and compositional data. Stat Comput 19: 367-380
 43 | %     Mardia, KV, Jupp, PE (2000). Directional Statistics. John Wiley
 44 | %     Prentice, MJ (1978). On invariant tests of uniformity for directions
 45 | %       and orientations. Annals of Statistics 6: 169-176.
 46 | %
 47 | %     SEE ALSO
 48 | %     DepTest1, DepTest2
 49 | 
 50 | %     $ Copyright (C) 2017 Brian Lau, brian.lau@upmc.fr$
 51 | %     The full license and most recent version of the code can be found at:
 52 | %     https://github.com/brian-lau/highdim
 53 | %
 54 | %     This program is free software: you can redistribute it and/or modify
 55 | %     it under the terms of the GNU General Public License as published by
 56 | %     the Free Software Foundation, either version 3 of the License, or
 57 | %     (at your option) any later version.
 58 | % 
 59 | %     This program is distributed in the hope that it will be useful,
 60 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
 61 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 62 | %     GNU General Public License for more details.
 63 | 
 64 | classdef UniSphereTest < handle
 65 |    properties
 66 |       x
 67 |    end
 68 |    properties (Dependent=true,SetAccess=private)
 69 |       n
 70 |       p
 71 |    end
 72 |    properties
 73 |       test
 74 |       params
 75 |       alpha = 0.05;
 76 |    end
 77 |    properties (SetAccess=private)
 78 |       stat
 79 |       pval
 80 |       h
 81 |       runtime
 82 |    end
 83 |    properties (Hidden=true,SetAccess=private)
 84 |       autoRun
 85 |       validTests = {'rayleigh' 'gine' 'gine3' 'ajne' ...
 86 |          'gine-ajne' 'bingham' 'randproj'};
 87 |    end
 88 |    properties(SetAccess = protected)
 89 |       version = '0.1.0'
 90 |    end
 91 |    
 92 |    methods
 93 |       function self = UniSphereTest(varargin)
 94 |          if (nargin == 1) || (rem(nargin,2) == 1)
 95 |             varargin = {'x' varargin{:}};
 96 |          end
 97 |          
 98 |          par = inputParser;
 99 |          par.KeepUnmatched = true;
100 |          addParamValue(par,'x',[],@isnumeric);
101 |          addParamValue(par,'autoRun',true,@islogical);
102 |          addParamValue(par,'test','rayleigh',@ischar);
103 |          parse(par,varargin{:});
104 | 
105 |          self.autoRun = par.Results.autoRun;
106 |          self.params = par.Unmatched;
107 |          if ~isfield(self.params,'nboot')
108 |             self.params.nboot = 1000;
109 |          end
110 |          self.test = par.Results.test;
111 |          self.x = par.Results.x;
112 |       end
113 |       
114 |       function set.x(self,x)
115 |          [n,p] = size(x);
116 |          self.x = x;
117 |          if ~isempty(self.x) && self.autoRun
118 |             self.run();
119 |          end
120 |       end
121 |       
122 |       function set.test(self,test)
123 |          test = lower(test);
124 |          if any(strcmp(test,self.validTests))
125 |             self.test = test;
126 |             if ~isempty(self.x) && self.autoRun
127 |                self.run();
128 |             end
129 |          else
130 |             error('Invalid test');
131 |          end
132 |       end
133 |       
134 |       function set.params(self,params)
135 |          self.params = params;
136 |          if ~isempty(self.x) && self.autoRun
137 |             self.run();
138 |          end
139 |       end
140 |       
141 |       function set.alpha(self,alpha)
142 |          assert((alpha>0)&&(alpha<1),'0<alpha<1');
143 |          self.alpha = alpha;
144 |       end
145 |             
146 |       function n = get.n(self)
147 |          n = size(self.x,1);
148 |       end
149 |       
150 |       function p = get.p(self)
151 |          p = size(self.x,2);
152 |       end
153 |       
154 |       function h = get.h(self)
155 |          h = self.pval<self.alpha;
156 |       end
157 |             
158 |       function run(self)
159 |          U = sphere.spatialSign(self.x);
160 |          tic;
161 |          switch self.test
162 |             case {'rayleigh'}
163 |                [self.pval,self.stat] = sphere.rayleigh(U);
164 |             case {'gine'}
165 |                self.stat = sphere.gine(U);
166 |                [self.pval,boot] = ...
167 |                   self.bootstrap('sphere.gine',self.params.nboot,self.n,self.p,self.stat);
168 |             case {'gine3'}
169 |                [self.pval,self.stat] = sphere.gine3(U);
170 |             case {'ajne'}
171 |                self.stat = sphere.ajne(U);
172 |                [self.pval,boot] = ...
173 |                   self.bootstrap('sphere.ajne',self.params.nboot,self.n,self.p,self.stat);
174 |             case {'gine-ajne'}
175 |                self.stat = sphere.gineajne(U);
176 |                [self.pval,boot] = ...
177 |                   self.bootstrap('sphere.gineajne',self.params.nboot,self.n,self.p,self.stat);
178 |             case {'bingham'}
179 |                [self.pval,self.stat] = sphere.bingham(U);
180 |             case {'randproj'}
181 |                [self.pval,self.stat] = sphere.rptest(U,self.params);
182 |             otherwise
183 |                % Never
184 |          end
185 |          self.runtime = toc;
186 |       end
187 |    end
188 |    
189 |    methods (Static)
190 |       function [pval,boot] = bootstrap(f,nboot,n,p,stat)
191 |          boot = zeros(nboot,1);
192 |          for j = 1:nboot
193 |             Umc = sphere.spatialSign(randn(n,p));
194 |             boot(j) = feval(f,Umc);
195 |          end
196 |          pval = sum(boot>=stat)/nboot;
197 |       end
198 |    end
199 | end


--------------------------------------------------------------------------------
/+dep/dcovtest.m:
--------------------------------------------------------------------------------
  1 | % DCOVTEST                    Distance covariance test of independence
  2 | % 
  3 | %     [pval,r,stat,null] = dcovtest(x,y,varargin)
  4 | %
  5 | %     Given a sample X1,...,Xn from a p-dimensional multivariate distribution,
  6 | %     and a sample Y1,...,Xn from a q-dimensional multivariate distribution,
  7 | %     test the hypothesis:
  8 | %
  9 | %     H0 : X and Y are mutually independent
 10 | %
 11 | %     This hypothesis is tested using several different permutation methods. 
 12 | %
 13 | %     The default permutation method avoids permuting the data altogether 
 14 | %     by approximating the permutation distribution using a moment-matched 
 15 | %     Pearson Type III distribution (Bilodeau & Guetsop Nangue 2017; Josse 
 16 | %     et al 2008; Minas & Montana 2014). The first three moments of the 
 17 | %     permutation distribution can be calculated exactly for distance 
 18 | %     covariance and related statistics (Kazi-Aoual et al 1995), and the 
 19 | %     Pearson type III fit using these moments is a robust and accurate 
 20 | %     approximation to the null distribution (Josse et al 2008). Since this 
 21 | %     method does not actually permute the data, it is very fast, achieving 
 22 | %     the same statistical power that would otherwise require millions of 
 23 | %     permutations (Minas & Montana, 2014).
 24 | %
 25 | %     Testing using actual permutations of the data are also implemented.
 26 | %     Naive permutation of the rows of X or Y is expensive due to O(n^2) 
 27 | %     distance calculations. This can be avoided since it is equivalent to 
 28 | %     simultaneously permuting the rows and columns of the distance matrix, 
 29 | %     and recomputing the statistic with the permuted distance matrix.
 30 | %
 31 | %     INPUTS
 32 | %     x - [n x p] n samples of dimensionality p
 33 | %     y - [n x q] n samples of dimensionality q
 34 | %
 35 | %     OPTIONAL (as name/value pairs, order irrelevant)
 36 | %     method - 'pearson'    - Pearson type III approx by moment matching (DEFAULT)
 37 | %              'perm'       - randomization using permutation of the rows &
 38 | %                             columns of the double-centered distance matrices
 39 | %              'perm-dist'  - randomization using permutation of the rows &
 40 | %                             columns of distance matrices
 41 | %              'perm-brute' - brute force randomization, directly permuting
 42 | %                             one of the inputs, which requires recalculating 
 43 | %                             and centering distance matrices
 44 | %     nboot - # permutations if method != 'pearson'
 45 | %
 46 | %     OUTPUTS
 47 | %     pval - p-value
 48 | %     d    - distance covariance
 49 | %     stat - test statistic
 50 | %     null - permutation statistics
 51 | %
 52 | %     EXAMPLE
 53 | %     rng(1234);
 54 | %     p = 100;
 55 | %     n = 2000;
 56 | %     X = rand(n,p);  Y = X.^2 + 15*randn(n,p);
 57 | %
 58 | %     tic;[pval,d] = dep.dcovtest(X,Y,'method','pearson'); toc
 59 | %     [pval, d]
 60 | %     tic;[pval,d] = dep.dcovtest(X,Y,'method','pearson','unbiased',true); toc
 61 | %     [pval, d]
 62 | %     tic;[pval,d] = dep.dcovtest(X,Y,'method','perm','nboot',200);toc
 63 | %     [pval, d]
 64 | %     tic;[pval,d] = dep.dcovtest(X,Y,'method','perm-brute','nboot',200);toc
 65 | %     [pval, d]
 66 | %
 67 | %     REFERENCE
 68 | %     Bilodeau & Guetsop Nangue (2017). Approximations to permutation tests 
 69 | %       of independence between two random vectors. 
 70 | %       Computational Statistics & Data Analysis, submitted.
 71 | %     Josse, Pages & Husson (2008). Testing the significance of the RV 
 72 | %       coefficient. Computational Statistics and Data Analysis. 53: 82-91
 73 | %     Kazi-Aoual et al (1995). Refined approximations to permutation tests 
 74 | %       for multivariate inference. Computational Statistics & Data Analysis.
 75 | %       20: 643-656
 76 | %     Minas & Montana (2014). Distance-based analysis of variance: 
 77 | %       Approximate inference. Statistical Analysis & Data Mining. 7: 450-470
 78 | %
 79 | %     SEE ALSO
 80 | %     dcov, dcorr, dcorrtest, DepTest2
 81 | 
 82 | %     $ Copyright (C) 2017 Brian Lau, brian.lau@upmc.fr $
 83 | %     The full license and most recent version of the code can be found at:
 84 | %     https://github.com/brian-lau/highdim
 85 | %
 86 | %     This program is free software: you can redistribute it and/or modify
 87 | %     it under the terms of the GNU General Public License as published by
 88 | %     the Free Software Foundation, either version 3 of the License, or
 89 | %     (at your option) any later version.
 90 | % 
 91 | %     This program is distributed in the hope that it will be useful,
 92 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
 93 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 94 | %     GNU General Public License for more details.
 95 | 
 96 | function [pval,d,stat,varargout] = dcovtest(x,y,varargin)
 97 | 
 98 | par = inputParser;
 99 | par.KeepUnmatched = true;
100 | addRequired(par,'x',@isnumeric);
101 | addRequired(par,'y',@isnumeric);
102 | addParamValue(par,'method','pearson',@ischar);
103 | addParamValue(par,'nboot',999,@(x) isnumeric(x) && isscalar(x));
104 | parse(par,x,y,varargin{:});
105 | 
106 | [n,~] = size(x);
107 | assert(n == size(y,1),'DCOVTEST requires x and y to have the same # of samples');
108 | 
109 | permMethods = {'perm' 'perm-dist' 'perm-brute'};
110 | nboot = par.Results.nboot;
111 | method = lower(par.Results.method);
112 | 
113 | switch method
114 |    case {'pearson'}
115 |       [d,~,~,A,B] = dep.dcov(x,y,par.Unmatched);
116 |       
117 |       if isfield(par.Unmatched,'unbiased') && par.Unmatched.unbiased
118 |          stat = (n*(n-3))*d; %  = sum(sum(A.*B)) for unbiased estimator
119 |       else
120 |          stat = (n^2)*d^2; %  = sum(sum(A.*B)) for biased estimator
121 |       end
122 |       
123 |       [pval,stat] = utils.pearsonIIIpval(A,B,stat);      
124 |    case {'perm'}
125 |       if isfield(par.Unmatched,'unbiased') && par.Unmatched.unbiased
126 |          % This only works for BIASED estimator, since distance matrices are
127 |          % necessary for calculating the UNBIASED estimator
128 |          error('Cannot use unbiased estimator for method = ''perm''');
129 |       end
130 |       [d,~,~,A,B] = dep.dcov(x,y);
131 | 
132 |       null = zeros(nboot,1);
133 |       for i = 1:nboot
134 |          ind = randperm(n);
135 |          null(i) = dep.dcov(A,B(ind,ind),'doublecenter',true);
136 |       end
137 |    case {'perm-dist'}
138 |       a = sqrt(utils.sqdist(x,x));
139 |       b = sqrt(utils.sqdist(y,y));
140 |       d = dep.dcov(a,b,'dist',true);
141 |       
142 |       null = zeros(nboot,1);
143 |       for i = 1:nboot
144 |          ind = randperm(n);
145 |          null(i) = dep.dcov(a,b(ind,ind),'dist',true);
146 |       end
147 |    case {'perm-brute'}
148 |       d = dep.dcov(x,y,par.Unmatched);
149 | 
150 |       null = zeros(nboot,1);
151 |       for i = 1:nboot
152 |          ind = randperm(n);
153 |          null(i) = dep.dcov(x,y(ind,:),par.Unmatched);
154 |       end
155 |    otherwise
156 |       error('Unrecognized test method');
157 | end
158 | 
159 | % One of the permutation methods
160 | if any(strcmp(method,permMethods))
161 |    if ~exist('stat','var')
162 |       stat = d;
163 |    end
164 |    pval = (1 + sum(null>stat)) / (1 + nboot);
165 | end
166 | 
167 | if nargout == 4
168 |    if exist('null','var')
169 |       varargout{1} = null;
170 |    else
171 |       varargout{1} = [];
172 |    end
173 | end


--------------------------------------------------------------------------------
/+utils/rfm.m:
--------------------------------------------------------------------------------
  1 | % RFM                         Random feature maps for Gaussian kernel
  2 | %
  3 | %     [phi,W,rngState] = rfm(X,varargin)
  4 | %
  5 | %     INPUTS
  6 | %     X     - [n x d] n samples of dimensionality d
  7 | %
  8 | %     OPTIONAL
  9 | %     sigma    - scalar, standard deviation of Gaussian kernel, default = 1
 10 | %     sampling - string indicating method for sampling random features
 11 | %                'uniform' - Classic random fourier features (DEFAULT)
 12 | %                'qmc' - Quasi-Monte Carlo using Halton sequence
 13 | %                'orf' - Orthogonal Random Features
 14 | %                'sorf'- Structured Orthogonal Random Features
 15 | %                'mm'  - Moment-Matched
 16 | %     D        - scalar, target dimensionality of feature map
 17 | %     W        - [D x d] pre-computed feature map, convenience for a
 18 | %                applying feature map to new data
 19 | %     complex  - boolean, true returns map as complex
 20 | %     sincos   - boolean, true returns sin/cos embedding, default = true
 21 | %     The following parameters are specific for sampling = 'qmc'
 22 | %     skip     - scalar, # initial points to omit, default = 1000
 23 | %     leap     - scalar, # points in between sets, detault = 700
 24 | %     scramble - boolean, scramble sequence, default = true
 25 | %     state    - scalar, state of qmc generator
 26 | %
 27 | %     OUTPUTS
 28 | %     phi - feature mapped data
 29 | %           [n x D] when 'complex' = true
 30 | %           [n x 2D] when 'complex' = false, cos and sin components stacked
 31 | %     W   - [D x d] feature map
 32 | %     rngState - state of the RNG before sampling
 33 | %
 34 | %     REFERENCES
 35 | %     Felix et al (2016). Orthogonal random features. Advances in Neural 
 36 | %       Information Processing Systems, 1975-1983
 37 | %     Rahimi & Recht (2007). Random features for large-scale kernel machines.
 38 | %       Proc 20th Int Conf on Neural Information Processing Systems, 1177-1184
 39 | %     Shen et al (2017). Random features for shift-invariant kernels with 
 40 | %       moment matching. Proc 31st AAAI Conf on AI, 2520-2526
 41 | %     Sutherland & Schneider (2015). On the error of random fourier features.
 42 | %       UAI'15 Proc 31st Conf on Uncertainty in AI, 862-871
 43 | %     Yang et al (2014). Quasi-Monte Carlo feature maps for shift-invariant 
 44 | %       kernels. Proc 31st Int Conf on Machine Learning (ICML-14), 485-493
 45 | 
 46 | %     $ Copyright (C) 2017 Brian Lau, brian.lau@upmc.fr $
 47 | %     The full license and most recent version of the code can be found at:
 48 | %     https://github.com/brian-lau/highdim
 49 | %
 50 | %     This program is free software: you can redistribute it and/or modify
 51 | %     it under the terms of the GNU General Public License as published by
 52 | %     the Free Software Foundation, either version 3 of the License, or
 53 | %     (at your option) any later version.
 54 | %
 55 | %     This program is distributed in the hope that it will be useful,
 56 | %     but WITHOUT ANY WARRANTY; without even the implied warranty of
 57 | %     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 58 | %     GNU General Public License for more details.
 59 | 
 60 | % TODO
 61 | % o ORF, SORF should probably be run in blocks
 62 | %   currently generates W that is dxd and extracts Dxd segment
 63 | % o fastfood
 64 | % o better default D
 65 | 
 66 | function [phi,W,rngState] = rfm(X,varargin)
 67 | persistent pstream; % for qmc
 68 | 
 69 | par = inputParser;
 70 | par.KeepUnmatched = true;
 71 | addRequired(par,'X',@isnumeric);
 72 | addParamValue(par,'sigma',[],@(x) isnumeric(x) && isscalar(x));
 73 | addParamValue(par,'sampling','uniform',@ischar);
 74 | addParamValue(par,'W',[],@ismatrix);
 75 | addParamValue(par,'D',2^4,@(x) isnumeric(x) && isscalar(x));
 76 | addParamValue(par,'complex',false,@islogical);
 77 | addParamValue(par,'sincos',true,@islogical);
 78 | addParamValue(par,'skip',1000,@(x) isnumeric(x) && isscalar(x));
 79 | addParamValue(par,'leap',700,@(x) isnumeric(x) && isscalar(x));
 80 | addParamValue(par,'scramble',true,@(x) isnumeric(x) || islogical(x));
 81 | addParamValue(par,'state',[],@(x) isnumeric(x) && isscalar(x));
 82 | parse(par,X,varargin{:});
 83 | 
 84 | [n,d] = size(X);    % # of dimensions
 85 | D = par.Results.D;  % # of random bases
 86 | if isempty(par.Results.sigma)
 87 |    sigma = utils.sigest(X,par.Unmatched);
 88 | else
 89 |    sigma = par.Results.sigma;
 90 | end
 91 | 
 92 | if nargout == 3
 93 |    rngState = rng; 
 94 | end
 95 | 
 96 | if ~isempty(par.Results.W)
 97 |    assert(size(par.Results.W,2)==d,'Feature map dimensionality must match input data');
 98 |    W = p.Results.W;
 99 | else
100 |    switch lower(par.Results.sampling)
101 |       case {'uniform' 'uni' 'mc' 'rff'}
102 |          % Random fourier features
103 |          W = randn(D,d)/sigma;
104 |       case {'mm'}
105 |          if D < d
106 |             warning('Risk of poor approximation for D << d');
107 |          end
108 |          G = randn(D,d);
109 |          W = utils.whiten(G)/sigma;
110 |       case {'qmc'}
111 |          if isempty(pstream) ...
112 |                 || ~isa(pstream,'qrandstream') ...
113 |                 || (pstream.PointSet.size(2) ~= d)
114 |             pset = haltonset(d,'Skip',par.Results.skip,...
115 |                'Leap',par.Results.leap);
116 |             if par.Results.scramble
117 |                pset = scramble(pset,'RR2');
118 |             end
119 |             % Persistent stream to properly increment draws on subsequent calls
120 |             pstream = qrandstream(pset);
121 |             %fprintf('Halton random stream opened\n')
122 |          end
123 |          
124 |          if ~isempty(par.Results.state)
125 |             pstream.State = par.Results.state;
126 |          end
127 |          %fprintf('Stream state: %g\n',pstream.State);
128 |          omega = pstream.qrand(D);
129 |          W = norminv(omega,0,1)/sigma;
130 |       case {'orf'}
131 |          G = randn(max(d,D),max(d,D));
132 |          [Q,~] = qr(G);
133 |          
134 |          % Chi-distributed with max(d,D) degrees of freedom
135 |          s = sqrt(chi2rnd(max(d,D),max(d,D),1));
136 |          % S ensures that the row norms of SQ & G are identically distributed
137 |          S = diag(s);
138 |          
139 |          W = (S*Q)/sigma;
140 |          W = W(1:D,1:d);
141 |       case {'sorf'}
142 |          n2 = nextpow2(max(D,d));
143 |          % Brute-force matrix multiplication, O(d^2)
144 |          % H = (1/sqrt(2^n2))*hadamard(2^n2);
145 |          % D1 = diag(2*(rand(2^n2,1)<0.5) - 1);
146 |          % D2 = diag(2*(rand(2^n2,1)<0.5) - 1);
147 |          % D3 = diag(2*(rand(2^n2,1)<0.5) - 1);
148 |          % W = sqrt(2^n2)*H*D1*H*D2*H*D3;
149 | 
150 |          % Using Fast Hadamard transform, O(d log d)
151 |          Ds = 2*(rand(2^n2,3)<0.5) - 1; % Rademacher distributed diagonals
152 |          HD1 = sqrt(2^n2)*utils.fwht( diag(Ds(:,1)) );
153 |          HD2 = sqrt(2^n2)*utils.fwht( diag(Ds(:,2)) );
154 |          HD3 = sqrt(2^n2)*utils.fwht( diag(Ds(:,3)) );
155 |          
156 |          W = sqrt(2^n2)*HD1*HD2*HD3;
157 |          W = W(1:D,1:d)/sigma;
158 |       case {'sc'}
159 |          %Signed Circulant Matrix Projection
160 |          % http://felixyu.org/pdf/cbe_slides.pdf
161 |       otherwise
162 |          error('Unrecognized sampling method');
163 |    end
164 | end
165 | 
166 | Z = X*W'; % [n x d] * [D x d]'
167 | 
168 | if par.Results.sincos
169 |    % Use the version with sin & cos features, which is more accurate,
170 |    % Sutherland & Schneider (2105)
171 |    if par.Results.complex
172 |       phi = (cos(Z) - 1i*sin(Z)) * sqrt(1/D);
173 |    else
174 |       phi = [cos(Z) , sin(Z)] * sqrt(1/D);
175 |    end
176 | else
177 |    b = rand(1,D)*2*pi;
178 |    phi = cos(bsxfun(@plus,Z,b)) * sqrt(1/D);
179 | end
180 | 


--------------------------------------------------------------------------------