├── KCI-test
├── CI_PERM
│ ├── COPYING
│ ├── hsicTestBootIC.m
│ ├── hsiccondIC.m
│ ├── hsiccondTestIC.m
│ ├── inchol.m
│ ├── medbw.m
│ ├── pickK.m
│ └── rbf.m
├── CONTENT
├── COPYING
├── README
├── UInd_KCItest.m
├── UInd_KCItest_RFF.m
├── algorithms
│ ├── CInd_test_new_withGP.m
│ ├── CInd_test_new_withGP_t.m
│ ├── CInd_test_new_withGP_t_RFF.m
│ ├── check_markov_equiv.m
│ ├── condVect.m
│ ├── dist2.m
│ ├── eigdec.m
│ ├── gpr_multi.m
│ ├── gpr_multi2.m
│ ├── gpr_multi_alln.m
│ ├── gpr_multi_alln_K.m
│ ├── kernel.m
│ ├── logdet.m
│ ├── minimize.m
│ ├── pdinv.m
│ └── stack.m
├── data
│ ├── README
│ ├── abalone.dat
│ ├── boston_names
│ └── boston_wout_discrete.dat
├── exp
│ └── simulation1
│ │ ├── CInd_test_new_withGP_UsedInUAIPaper_ButNotFinal.m
│ │ ├── test_effect_D_caseI.m
│ │ └── test_effect_D_caseII.m
├── gpml-matlab
│ ├── README
│ ├── doc
│ │ ├── alg21.gif
│ │ ├── alg31.gif
│ │ ├── alg32.gif
│ │ ├── alg35.gif
│ │ ├── alg36.gif
│ │ ├── alg51.gif
│ │ ├── alg52.gif
│ │ ├── classification.html
│ │ ├── fig2d.gif
│ │ ├── fig2de1.gif
│ │ ├── fig2de2.gif
│ │ ├── fig2de3.gif
│ │ ├── fig2dl1.gif
│ │ ├── fig2dl2.gif
│ │ ├── fig2dl3.gif
│ │ ├── figepp.gif
│ │ ├── figepp2.gif
│ │ ├── figl.gif
│ │ ├── figl1.gif
│ │ ├── figlapp.gif
│ │ ├── figlapp2.gif
│ │ ├── figlf.gif
│ │ ├── figlm.gif
│ │ ├── index.html
│ │ ├── regression.html
│ │ ├── sparse-approx.html
│ │ └── style.css
│ ├── gpml-demo
│ │ ├── Contents.m
│ │ ├── data_6darm.mat
│ │ ├── data_boston.mat
│ │ ├── demo_ep_2d.m
│ │ ├── demo_ep_usps.m
│ │ ├── demo_gparm.m
│ │ ├── demo_gpr.m
│ │ ├── demo_gprsparse.m
│ │ ├── demo_laplace_2d.m
│ │ └── demo_laplace_usps.m
│ └── gpml
│ │ ├── Contents.m
│ │ ├── Copyright
│ │ ├── Makefile
│ │ ├── approxEP.m
│ │ ├── approxLA.m
│ │ ├── approximations.m
│ │ ├── binaryEPGP.m
│ │ ├── binaryGP.m
│ │ ├── binaryLaplaceGP.m
│ │ ├── covConst.m
│ │ ├── covFunctions.m
│ │ ├── covLINard.m
│ │ ├── covLINone.m
│ │ ├── covMatern3iso.m
│ │ ├── covMatern5iso.m
│ │ ├── covNNone.m
│ │ ├── covNoise.m
│ │ ├── covPeriodic.m
│ │ ├── covProd.m
│ │ ├── covRQard.m
│ │ ├── covRQiso.m
│ │ ├── covSEard.m
│ │ ├── covSEiso.m
│ │ ├── covSum.m
│ │ ├── cumGauss.m
│ │ ├── gauher.m
│ │ ├── gpr.m
│ │ ├── gpr2.m
│ │ ├── gprSRPP.m
│ │ ├── likelihoods.m
│ │ ├── logistic.m
│ │ ├── minimize.m
│ │ ├── solve_chol.c
│ │ ├── solve_chol.m
│ │ ├── sq_dist.c
│ │ ├── sq_dist.log
│ │ └── sq_dist.m
├── indtest_corr.m
├── indtest_hsic.m
├── indtest_new.m
├── indtest_new_t.m
└── indtest_new_t_RFF.m
├── README
├── README.md
├── cd_non_con_fun.m
├── example1.m
├── example2.m
├── example3.m
├── example4.m
├── infer_nonsta_dir.m
├── kPCA_kernel_orig.m
├── meeks.m
├── nonsta_cd_new.m
├── nonsta_cd_new_multi.m
├── pdinv.m
├── setdiag.m
├── smooth_module.mat
└── transformFeatures.m
/KCI-test/CI_PERM/COPYING:
--------------------------------------------------------------------------------
1 | Copyright (c) 2010-2011 Robert Tillman [rtillman@cmu.edu]
2 | Copyright (c) 2007 Arthur Gretton [arthur.gretton@tuebingen.mpg.de]
3 | Copyright (c) 2005 Francis Bach [francis.bach@ens.fr]
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | - Redistributions of source code must retain the above copyright notice,
10 | this list of conditions and the following disclaimer.
11 | - Redistributions in binary form must reproduce the above copyright notice,
12 | this list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 |
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 | POSSIBILITY OF SUCH DAMAGE.
26 |
--------------------------------------------------------------------------------
/KCI-test/CI_PERM/hsicTestBootIC.m:
--------------------------------------------------------------------------------
1 | %This function implements the HSIC independence test using a bootstrap approximation
2 | %to the test threshold
3 |
4 | %Inputs:
5 | % X contains dx columns, m rows. Each row is an i.i.d sample
6 | % Y contains dy columns, m rows. Each row is an i.i.d sample
7 | % alpha is the level of the test
8 | % shuffles is number of shuffles to approximate null distribution
9 |
10 | %Outputs:
11 | % sig: boolean indicator of whether the test was significant
12 | % p: p-value
13 |
14 | %Set kernel size to median distance between points, if no kernel specified
15 |
16 | % Copyright (c) 2010 Robert Tillman [rtillman@cmu.edu]
17 | % 2007 Arthur Gretton [arthur.gretton@tuebingen.mpg.de]
18 | % All rights reserved. See the file COPYING for license terms.
19 |
20 | function [sig,p] = hsicTestBootIC(X,Y,alpha,shuffles);
21 |
22 | % tolerance for incomplete Cholesky
23 | tol = 1e-12;
24 |
25 | m=size(X,1);
26 |
27 | % set kernel size to median distance between points
28 | maxpoints = 1000;
29 | sigx = medbw(X, maxpoints);
30 | sigy = medbw(Y, maxpoints);
31 |
32 | %Compute the approximations of Gram matrices
33 | [K, Pk] = inchol(X,sigx,tol);
34 | [L, Pl] = inchol(Y,sigy,tol);
35 |
36 | %bone = ones(m,1);
37 | %H = eye(m)-1/m*ones(m,m);
38 |
39 | % center Gram matrices and permute indices
40 | Kc = K(Pk,:) - repmat((sum(K)/m),m,1);
41 | Lc = L(Pl,:) - repmat((sum(L)/m),m,1);
42 |
43 | testStat = (1/m^2)*sum(sum(Kc.*((Kc'*Lc)*Lc')'));
44 |
45 | HSICarr = zeros(shuffles,1);
46 | for whichSh=1:shuffles
47 |
48 | [notUsed,indL] = sort(rand(m,1));
49 |
50 | newLc = Lc(indL,:);
51 | HSICarr(whichSh) = (1/m^2)*sum(sum(Kc.*((Kc'*newLc)*newLc')'));
52 |
53 | end
54 |
55 | % get p-value from empirical cdf
56 | p = length(find(HSICarr>=testStat))/shuffles;
57 |
58 | % determine significance
59 | sig=(p<=alpha);
60 |
--------------------------------------------------------------------------------
/KCI-test/CI_PERM/hsiccondIC.m:
--------------------------------------------------------------------------------
1 | % Conditional dependence operator empirical estimator with incomplete Choleksy
2 | % factorization for low rank approximation of Gram matrices
3 | %
4 | % Arguments:
5 | % Gx low rank approximation for centered Gram matrix for X
6 | % Gy low rank approximation for centered Gram matrix for Y
7 | % Gz low rank approximation for centered Gram matrix for Z
8 | % epsilon the smoothing constant
9 | %
10 | % Output:
11 | % emphsic the test statistic
12 | %
13 | % Copyright (c) 2010 Robert Tillman [rtillman@cmu.edu]
14 | % All rights reserved. See the file COPYING for license terms.
15 |
16 | function emphsic = hsiccondIC(Gx,Gy,Gz,epsilon)
17 |
18 | n = size(Gx,1);
19 | if (n~=size(Gy,1) || n~=size(Gz,1))
20 | error('Gx, Gy, and Gz must have the same number of rows');
21 | end
22 | if (epsilon<=0)
23 | error('epsilon must > 0');
24 | end
25 |
26 | mx = size(Gx,2);
27 | my = size(Gy,2);
28 | mz = size(Gz,2);
29 |
30 | [Ux, Sx, Vx] = svd(Gx,'econ');
31 | [Uy, Sy, Vy] = svd(Gy,'econ');
32 | [Uz, Sz, Vz] = svd(Gz,'econ');
33 |
34 | Sxsq = diag(Sx).^2;
35 | Sysq = diag(Sy).^2;
36 | Szsq = diag(Sz).^2;
37 | Szsqe = Szsq + epsilon;
38 | Szsqt = Szsq./Szsqe;
39 |
40 | % first term - GxGx'GyGy'
41 | first = sum(sum((Ux*(diag(Sxsq)*(Ux'*Uy)*diag(Sysq))).*Uy));
42 |
43 | % second term - 2GyGy'GzGz'(GzGz' - epsilonI)^(-2)GzGz'GxGx'
44 | second1 = Ux*(diag(Sxsq)*(Ux'*Uz)*diag(Szsqt)*(Uz'*Uy)*diag(Sysq));
45 | second = -2*sum(sum(second1.*Uy));
46 |
47 | % third term - 2GyGy'GzGz'(GzGz' - epsilonI)^(-2)GzGz'GxGx'GzGz'(GzGz' - epsilonI)^(-2)GzGz'
48 | third = sum(sum((second1*(Uy'*Uz)*diag(Szsqt)).*Uz));
49 |
50 | % compute test statistic using first, second, and third terms above with
51 | % the U-statistic
52 | emphsic = (first+second+third)/((n-1)^2);
53 |
--------------------------------------------------------------------------------
/KCI-test/CI_PERM/hsiccondTestIC.m:
--------------------------------------------------------------------------------
1 | % Statistical test for kernel conditional independence of X and Y given Z with
2 | % incomplete Cholesky factorization for low rank approximation of Gram matrices
3 | %
4 | % Arguments:
5 | % X n x p vector of data points
6 | % Y n x m vector of data points
7 | % Z n x r vector of data points
8 | % alpha significance level
9 | % shuffles number of shuffles for the permutation test
10 | %
11 | % Output:
12 | % sig boolean indicator of whether the test was significant for the given alpha
13 | % p resulting p-value
14 | %
15 | % Copyright (c) 2010 Robert Tillman [rtillman@cmu.edu]
16 | % All rights reserved. See the file COPYING for license terms.
17 |
18 | function [sig,p,testStat] = hsiccondTestIC(X,Y,Z,alpha,shuffles)
19 |
20 | n = size(X,1);
21 | if (n~=size(Y,1) || n~=size(Z,1))
22 | error('X, Y, and Z must have the same number of data points');
23 | end
24 | if (alpha<0 || alpha>1)
25 | error('alpha must be between 0 and 1');
26 | end
27 | if (shuffles<=0 || shuffles~=int32(shuffles))
28 | error('number of shuffles must be a positive integer');
29 | end
30 |
31 | % smoothing constant for conditional cross covariance operator
32 | epsilon=1e-4;
33 | % threshold for eigenvalues to consider in low rank Gram matrix approximations
34 | tol = 1e-4;
35 |
36 | % augment X and Y for conditional test
37 | X = [X,Z];
38 | Y = [Y,Z];
39 |
40 | % set kernel size to median distance between points
41 | maxpoints = 1000;
42 | sigx = medbw(X, maxpoints);
43 | sigy = medbw(Y, maxpoints);
44 | sigz = medbw(Z, maxpoints);
45 |
46 | % low rank approximation of Gram matrices using incomplete Cholesky factorization
47 | [K, Pk] = inchol(X,sigx,tol);
48 | [L, Pl] = inchol(Y,sigy,tol);
49 | [M, Pm] = inchol(Z,sigz,tol);
50 |
51 | % center Gram matrices factoring in permutations made during low rank approximation
52 | Kc = K(Pk,:) - repmat((sum(K)/n),n,1);
53 | Lc = L(Pl,:) - repmat((sum(L)/n),n,1);
54 | Mc = M(Pm,:) - repmat((sum(M)/n),n,1);
55 |
56 | % compute the U-statistic
57 | %pairs = nchoosek(1:n,2);
58 | %bz = n*(n-1)/sum(rbf(Z(pairs(:,1)),Z(pairs(:,2)),sigz).^2);
59 |
60 | % compute HSIC dependence value
61 | testStat = hsiccondIC(Kc,Lc,Mc,epsilon);
62 |
63 | % first cluster Z;
64 | nc = pickK(Z);
65 | clusters = kmeans(Z,nc,'EmptyAction','drop','MaxIter',1000,'Display','off');
66 | %[centers,clusters,datapoints] = MeanShiftCluster(Z,sigz,false);
67 | %nc = length(centers);
68 |
69 | % simulate null distribution and permutation test
70 | nullapprox = zeros(shuffles,1);
71 | for i=1:shuffles
72 | % permute within clusters
73 | Plnew = 1:n;
74 | for j=1:nc
75 | indj = find(clusters==j);
76 | pj = indj(randperm(length(indj)));
77 | Plnew(indj) = Plnew(pj);
78 | end
79 | % centered Gram matrix for new sample
80 | newLc = Lc(Plnew,:);
81 | % compute HSIC dependence value for new sample
82 | nullapprox(i)=hsiccondIC(Kc,newLc,Mc,epsilon);
83 | end
84 |
85 | % get p-value from empirical cdf
86 | p = length(find(nullapprox>=testStat))/shuffles;
87 |
88 | % determine significance
89 | sig=(p<=alpha);
90 |
--------------------------------------------------------------------------------
/KCI-test/CI_PERM/inchol.m:
--------------------------------------------------------------------------------
1 | % Incomplete Cholesky factorization with RBF kernel
2 | %
3 | % Description:
4 | % Finds low rank approximation of RBF kernel Gram matrix K = PGG'P for the
5 | % n x p data matrix X. Here, K is an n x n Gram matrix, G is n x m with m << n,
6 | % and P is a permutation matrix.
7 | %
8 | % Arguments:
9 | % X n x p data matrix
10 | % sigma bandwidth for RBF kernel
11 | % tol threshold for remaining eigenvalues to consider
12 | %
13 | % Output:
14 | % G n x m matrix (m << n)
15 | % P n vector of permutation indices
16 | %
17 | %
18 | % Adapted from Francis Bach's Cholesky with side information implementation
19 | %
20 | % Copyright (c) 2010 Robert Tillman [rtillman@cmu.edu]
21 | % 2005 Francis Bach [francis.bach@ens.fr]
22 | % All rights reserved. See the file COPYING for license terms.
23 | function [G,P] = inchol(X,sigma,tol)
24 |
25 | if (sigma<=0)
26 | error('sigma must be > 0');
27 | end
28 | if (tol<=0)
29 | error('tol must be > 0');
30 | end
31 |
32 | n = size(X,1);
33 | % begin with full matrix
34 | G = zeros(n,n);
35 | % using RBF kernel so diagonal entries are ones
36 | diagK = ones(n,1);
37 | % permutation indices;
38 | P = 1:n;
39 | % updated diagonal elements
40 | D = diagK;
41 |
42 | % construct columns of K until threshold is met
43 | for k=1:n
44 |
45 | % select next most informative pivot
46 | best = D(k);
47 | bestInd = k;
48 | for j=k:n
49 | if (D(j) > best/.99)
50 | best = D(j);
51 | bestInd = j;
52 | end
53 | end
54 |
55 | % threshold met so remove columns to the right and break
56 | if bestmaxpoints)
27 | med = X(1:maxpoints,:);
28 | n = maxpoints;
29 | else
30 | med = X;
31 | end
32 |
33 | % finds median distance between points
34 | G = sum((med.*med),2);
35 | Q = repmat(G,1,n);
36 | R = repmat(G',n,1);
37 | dists = Q + R - 2*med*med';
38 | dists = dists-tril(dists);
39 | dists=reshape(dists,n^2,1);
40 | sigma = sqrt(0.5*median(dists(dists>0)));
41 |
--------------------------------------------------------------------------------
/KCI-test/CI_PERM/pickK.m:
--------------------------------------------------------------------------------
1 | % picks number of clusters for k-means clustering
2 | % Copyright (c) 2010 Robert Tillman [rtillman@cmu.edu]
3 | % All rights reserved. See the file COPYING for license terms.
4 | function k = pickK(X)
5 |
6 | a = 1;
7 | n = size(X,1);
8 | b = n;
9 | step = 2;
10 |
11 | v = sum(diag(diag(var(X))));
12 |
13 | while (step>1&&b<=n)
14 |
15 | step = max(round((b-a+1)/10),1);
16 |
17 | for k=a:step:b
18 |
19 | [idx, c, sumd] = kmeans(X,k,'EmptyAction','drop','MaxIter',1000,'Display','off');
20 | % [idx, c, sumd] = kmeans(X,k=k,maxloops=1000);
21 |
22 | c = sum(sumd)/n;
23 |
24 | if (k~=a)
25 | if ((lastc-c)/v<.05)
26 | k = k-step;
27 | break;
28 | end
29 | end
30 |
31 | lastc = c;
32 |
33 | end
34 |
35 | a = k;
36 | b = k+step;
37 |
38 | end
39 |
--------------------------------------------------------------------------------
/KCI-test/CI_PERM/rbf.m:
--------------------------------------------------------------------------------
1 | % RBF kernel evaluation
2 | %
3 | % Description:
4 | % Evaluates RBF kernel for n points
5 | %
6 | % Input:
7 | % x1 n x p matrix (n points with dimensionality p)
8 | % x2 n x p matrix (n points with dimensionality p)
9 | % sigma kernel bandwidth
10 | %
11 | % Output:
12 | % k n x 1 matrix of k(x1,x2) evaluations
13 | %
14 | % Copyright (c) 2010 Robert Tillman [rtillman@cmu.edu]
15 | % All rights reserved. See the file COPYING for license terms.
16 |
17 | function k = rbf(x1,x2,sigma)
18 |
19 | if (size(x1,1)~=size(x2,1))
20 | error('x1 and x2 must contain the same number of data points');
21 | end
22 | if (size(x1,2)~=size(x2,2))
23 | error('x1 and x2 must be of the same dimensionality');
24 | end
25 | if (sigma<=0)
26 | error('sig must be > 0');
27 | end
28 |
29 | k = exp(-.5*sum((x1-x2).^2,2)/(sigma^2));
30 |
--------------------------------------------------------------------------------
/KCI-test/CONTENT:
--------------------------------------------------------------------------------
1 |
2 | - indtest_new: WRAPPER performing KCI-test, the conditional independence testing method submitted to UAI 2011;
3 |
4 | - UInd_KCItest: function to perform unconditional independence testing given in our paper; it is theoretically equiavalent to Gretton et al. (2008) but is computationally easier when generating the null distribution with simulations.
5 |
6 | - indtest_corr: partial correlation test;
7 |
8 | - indtest_hsic: WRAPPER performing either an HSIC test (Gretton et al., 2008) or CI_PERM
9 |
10 |
11 | DIRECTORIES:
12 | - algorithms: contains the functions which are called by the conditional independence testing method or the PC algorithm;
13 |
14 | - exp: contains the files used in simulations and experiments;
15 |
16 | - data: contains the real-world data used in the experiments.
17 |
18 | - CI_PERM: contains the functions which are used in CI_PERM, the conditional independence testing method which combines the conditional dependence measure (Fukumizu et al., 2008) and local boostrapping.
19 |
20 | - gpml-matlab: the gpml toolbox.
21 |
--------------------------------------------------------------------------------
/KCI-test/COPYING:
--------------------------------------------------------------------------------
1 | Copyright (c)
2 | 2010-2011 Kun Zhang
3 | 2010-2011 Jonas Peters
4 | 1996-2001 Ian T. Nabney
5 | 2001-2006 Carl Edward Rasmussen
6 |
7 | All rights reserved.
8 |
9 | Redistribution and use in source and binary forms, with or without
10 | modification, are permitted provided that the following conditions are met:
11 |
12 | - Redistributions of source code must retain the above copyright notice,
13 | this list of conditions and the following disclaimer.
14 | - Redistributions in binary form must reproduce the above copyright notice,
15 | this list of conditions and the following disclaimer in the documentation
16 | and/or other materials provided with the distribution.
17 | - Neither the name of the Aston University, Birmingham, U.K.
18 | nor the names of its contributors may be used to endorse or promote products
19 | derived from this software without specific prior written permission.
20 |
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 | POSSIBILITY OF SUCH DAMAGE.
32 |
--------------------------------------------------------------------------------
/KCI-test/README:
--------------------------------------------------------------------------------
1 | Copyright (c)
2 | 2010-2011 Kun Zhang
3 | 2010-2011 Jonas Peters
4 | 2001-2006 Carl Edward Rasmussen
5 | 1996-2001 Ian T. Nabney
6 |
7 | This package contains code to the paper
8 | "A kernel-based conditional independence test and application in causal discovery", K. Zhang, J. Peters, D. Janzing, and B. Schoelkopf, In Proceedings of the 27th Conference on Uncertainty in Artificial Intelligence (UAI 2011), Barcelona, Spain, July 14-17, 2011.
9 |
10 | % Please see CONTENT for the content
11 |
12 | It is written in Matlab 7.7.0 and should work on any machine.
13 |
14 |
15 |
16 |
17 | %%%%%%%%%%%%
18 | CODE NOT PROVIDED
19 | %%%%%%%%%%%%
20 | conditional hsic
21 |
22 |
23 |
24 | %%%%%%%%%%%%%
25 | IMPORTANT FUNCTIONS
26 | %%%%%%%%%%%%%
27 | function [pval, stat] = indtest_new(X, Y, Z, pars)
28 | % function [pval, stat] = indtest_new(X, Y, Z, pars)
29 | %
30 | % This function is a WRAPPER
31 | % Performs new method (submitted to UAI 2011)
32 | %
33 | % INPUT:
34 | % X Nxd1 matrix of samples (N data points, d1 dimensions)
35 | % Y Nxd2 matrix of samples (N data points, d2 dimensions)
36 | % Z Nxd3 matrix of samples (N data points, d3 dimensions)
37 | % pars structure containing parameters for the independence test
38 | % .pairwise if true, the test is performed pairwise if d1>1 (standard: false)
39 | % .bonferroni if true, bonferroni correction is performed (standard: false)
40 | % .width kernel width (standard: 0, which results in an automatic -heuristic- choice)
41 | %
42 | % OUTPUT:
43 | % pval p value of the test
44 | % stat test statistic
45 |
46 |
47 | %%%%%%%%%%%%%
48 | REQUIREMENTS
49 | %%%%%%%%%%%%%
50 | This code requires the statistics toolbox and further the GPML toolbox (the old version, unfortunately). It is provided in the subfolder. Have a look at its README how to compile the mex files. This makes the toolbox faster. Make sure, the path is added in Matlab (e.g.
51 | addpath('gpml-matlab/gpml')
52 | )
53 |
54 | %%%%%%%%%%%%%
55 | EXAMPLE
56 | %%%%%%%%%%%%%
57 | As a first example type
58 |
59 | X=randn(300,1);
60 | Y=X+0.5*randn(300,1);
61 | Z=Y+0.5*randn(300,1);
62 | [p_val stat]=indtest_new(X,Z,[],[]);
63 | p_val % X and Z should be dependent
64 | [p_val stat]=indtest_new(X,Z,Y,[]);
65 | p_val % X and Z should be conditionally independent given Y
66 |
67 | into Matlab.
68 |
69 |
70 | %%%%%%%%%%%%%
71 | REPRODUCING FIGURES
72 | %%%%%%%%%%%%%
73 | The exp_ files in the folder exp describe (hopefully self-explaining), how the experiments were performed in the paper. The folder mat-files contains the results.
74 |
75 |
76 | %%%%%%%%%%%%%
77 | CITATION
78 | %%%%%%%%%%%%%
79 | If you use this code, please cite the following paper:
80 | K. Zhang, J. Peters, D. Janzing, and B. Schoelkopf, "A kernel-based conditional independence test and application in causal discovery," In UAI 2011.
81 |
82 | %%%%%%%%%%%%%
83 | PROBLEMS
84 | %%%%%%%%%%%%%
85 | If you have problems or questions, do not hesitate to send an email to
86 | kzhang@tuebingen.mpg.de, or kun.kzhang@gmail.com
87 |
88 |
89 |
--------------------------------------------------------------------------------
/KCI-test/UInd_KCItest.m:
--------------------------------------------------------------------------------
1 | function [p_val, Sta] = UInd_KCItest(x, y, pars)
2 | % function [p_val] = UInd_test(x, y)
3 | % To test if x and y are unconditionally independent with bootstrap (which is
4 | % the same as in HSIC test) or with the finite-sample Gamma approximation.
5 | % INPUT:
6 | % X and Y: data matrices of size number_of_samples * dimensionality.
7 | % width (optional): the kernel width for x and y.
8 | % Output:
9 | % p_val: the p value obtained by bootstrapping (if the sample size is
10 | % smaller than 1000) or by Gamma approximation (if the sample size is
11 | % large).
12 | % Copyright (c) 2010-2011 Kun Zhang, Jonas Peters.
13 | % All rights reserved. See the file COPYING for license terms.
14 | %
15 | % For details of the method, see K. Zhang, J. Peters, D. Janzing, and B. Schoelkopf,
16 | % "A kernel-based conditional independence test and application in causal discovery,"
17 | % In UAI 2011,
18 | % and
19 | % A. Gretton, K. Fukumizu, C.-H. Teo, L. Song, B. Schoelkopf and A. Smola, "A kernel
20 | % Statistical test of independence." In NIPS 21, 2007.
21 |
22 | T = length(y); % the sample size
23 |
24 | % Controlling parameters
25 | width = pars.width;
26 | Bootstrap = 1;
27 | Method_kernel_width = 1; % 1: empirical value; 2: median
28 |
29 | % Num_eig = floor(T/4); % how many eigenvalues are to be calculated?
30 | if T>1000
31 | Num_eig = floor(T/2);
32 | else
33 | Num_eig = T;
34 | end
35 | T_BS = 2000;
36 | lambda = 1E-3; % the regularization paramter
37 | Thresh = 1E-6;
38 | % normalize the data
39 | x = x - repmat(mean(x), T, 1);
40 | x = x * diag(1./std(x));
41 | y = y - repmat(mean(y), T, 1);
42 | y = y * diag(1./std(y));
43 | Cri = []; Sta = []; p_val = []; Cri_appr = []; p_appr = [];
44 |
45 | % use empirical kernel width instead of the median
46 | if ~exist('width', 'var')|isempty(width)|width==0
47 | if T < 200
48 | width = 0.8;
49 | elseif T < 1200
50 | width =0.5;
51 | else
52 | width = 0.3;
53 | end
54 | end
55 | if Method_kernel_width == 1
56 | theta = 1/(width^2); % I use this parameter to construct kernel matices. Watch out!! width = sqrt(2) sigma AND theta= 1/(2*sigma^2)
57 | else
58 | theta = 0;
59 | end
60 | % width = sqrt(2)*medbw(x, 1000); %use median heuristic for the band width.
61 | %theta = 1/(width^2); % I use this parameter to construct kernel matices. Watch out!! width = sqrt(2) sigma AND theta= 1/(2*sigma^2)
62 |
63 | H = eye(T) - ones(T,T)/T; % for centering of the data in feature space
64 | % Kx = kernel([x], [x], [theta/size(x,2),1]); Kx = H * Kx * H; %%%%Problem
65 | % Ky = kernel([y], [y], [theta/size(y,2),1]); Ky = H * Ky * H; %%%%Problem
66 | Kx = kernel([x], [x], [theta * size(x,2),1]); Kx = H * Kx * H; %%%%Problem
67 | Ky = kernel([y], [y], [theta * size(y,2),1]); Ky = H * Ky * H; %%%%Problem
68 |
69 | Sta = trace(Kx * Ky);
70 |
71 |
72 | Cri = -1;
73 | p_val = -1;
74 | if Bootstrap
75 | % calculate the eigenvalues that will be used later
76 | % Due to numerical issues, Kx and Ky may not be symmetric:
77 | [eig_Kx, eivx] = eigdec((Kx+Kx')/2,Num_eig);
78 | [eig_Ky, eivy] = eigdec((Ky+Ky')/2,Num_eig);
79 | % calculate Cri...
80 | % first calculate the product of the eigenvalues
81 | eig_prod = stack( (eig_Kx * ones(1,Num_eig)) .* (ones(Num_eig,1) * eig_Ky'));
82 | II = find(eig_prod > max(eig_prod) * Thresh);
83 | eig_prod = eig_prod(II); %%% new method
84 |
85 | % use mixture of F distributions to generate the Null dstr
86 | if length(eig_prod) * T < 1E6
87 | f_rand1 = chi2rnd(1,length(eig_prod),T_BS);
88 | Null_dstr = eig_prod'/T * f_rand1; %%%%Problem
89 | else
90 | % iteratively calcuate the null dstr to save memory
91 | Null_dstr = zeros(1,T_BS);
92 | Length = max(floor(1E6/T),100);
93 | Itmax = floor(length(eig_prod)/Length);
94 | for iter = 1:Itmax
95 | f_rand1 = chi2rnd(1,Length,T_BS);
96 | Null_dstr = Null_dstr + eig_prod((iter-1)*Length+1:iter*Length)'/T * f_rand1;
97 |
98 | end
99 | Null_dstr = Null_dstr + eig_prod(Itmax*Length+1:length(eig_prod))'/T *... %%%%Problem
100 | chi2rnd(1, length(eig_prod) - Itmax*Length,T_BS);
101 | end
102 | sort_Null_dstr = sort(Null_dstr);
103 | p_val = sum(Null_dstr>Sta)/T_BS;
104 | end
105 |
106 |
--------------------------------------------------------------------------------
/KCI-test/UInd_KCItest_RFF.m:
--------------------------------------------------------------------------------
1 | function [p_val Sta] = UInd_KCItest_RFF(x, y, pars)
2 | % function [p_val] = UInd_test(x, y)
3 | % To test if x and y are unconditionally independent with bootstrap (which is
4 | % the same as in HSIC test) or with the finite-sample Gamma approximation.
5 | % INPUT:
6 | % X and Y: data matrices of size number_of_samples * dimensionality.
7 | % width (optional): the kernel width for x and y.
8 | % Output:
9 | % p_val: the p value obtained by bootstrapping (if the sample size is
10 | % smaller than 1000) or by Gamma approximation (if the sample size is
11 | % large).
12 |
13 | T = length(y); % the sample size
14 |
15 | % Controlling parameters
16 | width = pars.width;
17 | Bootstrap = 1;
18 |
19 | Method_kernel_width = 1; % 1: empirical value; 2: median
20 |
21 | % Num_eig = floor(T/4); % how many eigenvalues are to be calculated?
22 | if T>1000
23 | Num_eig = floor(T/2);
24 | else
25 | Num_eig = T;
26 | end
27 | T_BS = 5000;
28 | lambda = 1E-3; % the regularization paramter
29 | Thresh = 1E-6;
30 | % normalize the data
31 | x = x - repmat(mean(x), T, 1);
32 | x = x * diag(1./std(x));
33 | y = y - repmat(mean(y), T, 1);
34 | y = y * diag(1./std(y));
35 | Cri = []; Sta = []; p_val = []; Cri_appr = []; p_appr = [];
36 |
37 | % use empirical kernel width instead of the median
38 | if ~exist('width', 'var')|isempty(width)|width==0
39 | if T < 200
40 | width = 0.8;
41 | elseif T < 1200
42 | width =0.5;
43 | else
44 | width = 0.3;
45 | end
46 | end
47 |
48 |
49 | Zx = transformFeatures(x/width); % calculate random fourier featuress
50 | Zy = transformFeatures(y/width);
51 | C = (Zx - repmat(mean(Zx,2), 1, T)) * (Zy - repmat(mean(Zy,2), 1, T))';
52 | Sta = norm(C,'fro')^2;
53 |
54 |
55 | Cri = -1;
56 | p_val = -1;
57 | if Bootstrap
58 | % calculate the eigenvalues that will be used later
59 | [eig_Kx, eivx] = eigdec(Zx'*Zx,Num_eig);
60 | [eig_Ky, eivy] = eigdec(Zy'*Zy,Num_eig);
61 | % calculate Cri...
62 | % first calculate the product of the eigenvalues
63 | eig_prod = stack( (eig_Kx * ones(1,length(eig_Kx))) .* (ones(length(eig_Kx),1) * eig_Ky'));
64 | II = find(eig_prod > max(eig_prod) * Thresh);
65 | eig_prod = eig_prod(II); %%% new method
66 |
67 | % use mixture of F distributions to generate the Null dstr
68 | if length(eig_prod) * T < 1E6
69 | f_rand1 = chi2rnd(1,length(eig_prod),T_BS);
70 | Null_dstr = eig_prod'/T * f_rand1; %%%%Problem
71 | else
72 | % iteratively calcuate the null dstr to save memory
73 | Null_dstr = zeros(1,T_BS);
74 | Length = max(floor(1E6/T),100);
75 | Itmax = floor(length(eig_prod)/Length);
76 | for iter = 1:Itmax
77 | f_rand1 = chi2rnd(1,Length,T_BS);
78 | Null_dstr = Null_dstr + eig_prod((iter-1)*Length+1:iter*Length)'/T * f_rand1;
79 |
80 | end
81 | Null_dstr = Null_dstr + eig_prod(Itmax*Length+1:length(eig_prod))'/T *... %%%%Problem
82 | chi2rnd(1, length(eig_prod) - Itmax*Length,T_BS);
83 | end
84 | sort_Null_dstr = sort(Null_dstr);
85 | p_val = sum(Null_dstr>Sta)/T_BS;
86 | end
87 |
88 |
--------------------------------------------------------------------------------
/KCI-test/algorithms/CInd_test_new_withGP_t_RFF.m:
--------------------------------------------------------------------------------
1 | function [p_val, Sta, Cri] = CInd_test_new_withGP_t_RFF(x, y, z, alpha, pars)
2 | % To test if x and y are independent.
3 | % INPUT:
4 | % The number of rows of x and y is the sample size.
5 | % alpha is the significance level (we suggest 1%).
6 | % pars contains the kernel width and whether to use GP to optimize the kernel width.
7 | % Output:
8 | % Cri: the critical point at the p-value equal to alpha obtained by bootstrapping.
9 | % Sta: the statistic Tr(K_{\ddot{X}|Z} * K_{Y|Z}).
10 | % p_val: the p value obtained by bootstrapping.
11 | % If Sta > Cri, the null hypothesis (x is independent from y) is rejected.
12 | % Copyright (c) 2010-2011 ...
13 | % All rights reserved. See the file COPYING for license terms.
14 |
15 | % Controlling parameters
16 | width = pars.width;
17 | if(pars.widthT==0) % kernel width on time index when IF_GP=0, need tunning!!!!!
18 | widthT = 0.1;
19 | else
20 | widthT = pars.widthT;
21 | end
22 | IF_unbiased = 0;
23 | Bootstrap = 1;
24 |
25 | T = length(y); % the sample size
26 | % Num_eig = floor(T/4); % how many eigenvalues are to be calculated?
27 | Num_eig = T;
28 | T_BS = 10000; % 5000
29 | lambda = 1E-3; % the regularization paramter %%%%Problem
30 | Thresh = 1E-5;
31 | % normalize the data
32 | x = x - repmat(mean(x), T, 1);
33 | x = x * diag(1./std(x));
34 | y = y - repmat(mean(y), T, 1);
35 | y = y * diag(1./std(y));
36 | z = z - repmat(mean(z), T, 1);
37 | z = z * diag(1./std(z));
38 |
39 | D = size(z, 2);
40 | logtheta_x = []; logtheta_y = []; df_x = []; df_y = [];
41 | Cri = []; Sta = []; p_val = []; Cri_appr = []; p_appr = [];
42 |
43 | if width ==0
44 | if T <= 200
45 | width = 1.2; % 0.8
46 | elseif T < 1200
47 | width = 0.6;
48 | else
49 | width = 0.4; % 0.3
50 | end
51 | end
52 |
53 | Zx = transformFeatures([x z/2]/(width*sqrt(D)/sqrt(2))); % calculate random fourier featuress
54 | Zx = Zx - repmat(mean(Zx,2), 1, T);
55 | Zy = transformFeatures(y/(width*sqrt(D)/sqrt(2)));
56 | Zy = Zy - repmat(mean(Zy,2), 1, T);
57 | % check whether the last dimension of z is the time index
58 | tmp = [1:T]';
59 | tmp = tmp - repmat(mean(tmp), T, 1);
60 | tmp = tmp * diag(1./std(tmp));
61 | if(norm(z(:,end)-tmp)<1e-5)
62 | if(D>1)
63 | Zz = transformFeatures([z(:,1:end-1)/(width*sqrt(D-1)/sqrt(2)),z(:,end)/(widthT/sqrt(2))]);
64 | else
65 | Zz = transformFeatures(z/(widthT/sqrt(2)));
66 | end
67 | else
68 | Zz = transformFeatures(z/(width*sqrt(D)/sqrt(2)));
69 | end
70 | Zz = Zz - repmat(mean(Zz,2), 1, T);
71 |
72 |
73 | Cxz = Zx*Zz';
74 | Cyz = Zy*Zz';
75 | Czz = Zz*Zz';
76 |
77 | Ex = Zx - (Cxz/(Czz+1e-5*eye(size(Czz,1))))*Zz;
78 | Ex = Ex - repmat(mean(Ex,2), 1, T);
79 | Ey = Zy - (Cyz/(Czz+1e-5*eye(size(Czz,1))))*Zz;
80 | Ey = Ey - repmat(mean(Ey,2), 1, T);
81 | C = Ex * Ey';
82 | % calculate the statistic
83 | Sta = norm(C,'fro')^2;
84 |
85 | % calculate the eigenvalues that will be used later
86 | [eig_Kxz, eivx] = eigdec(Ex'*Ex,Num_eig);
87 | [eig_Kyz, eivy] = eigdec(Ey'*Ey,Num_eig);
88 |
89 | % calculate the product of the square root of the eigvector and the eigenvector
90 | IIx = find(eig_Kxz > max(eig_Kxz) * Thresh);
91 | IIy = find(eig_Kyz > max(eig_Kyz) * Thresh);
92 | eig_Kxz = eig_Kxz(IIx);
93 | eivx = eivx(:,IIx);
94 | eig_Kyz = eig_Kyz(IIy);
95 | eivy = eivy(:,IIy);
96 |
97 | eiv_prodx = eivx * diag(sqrt(eig_Kxz));
98 | eiv_prody = eivy * diag(sqrt(eig_Kyz));
99 | clear eivx eig_Kxz eivy eig_Kyz
100 | % calculate their product
101 | Num_eigx = size(eiv_prodx, 2);
102 | Num_eigy = size(eiv_prody, 2);
103 | Size_u = Num_eigx * Num_eigy;
104 | uu = zeros(T, Size_u);
105 | for i=1:Num_eigx
106 | for j=1:Num_eigy
107 | uu(:,(i-1)*Num_eigy + j) = eiv_prodx(:,i) .* eiv_prody(:,j);
108 | end
109 | end
110 | if Size_u > T
111 | uu_prod = uu * uu';
112 | else
113 | uu_prod = uu' * uu;
114 | end
115 | if Bootstrap
116 | eig_uu = eigdec(uu_prod,min(T,Size_u));
117 | II_f = find(eig_uu > max(eig_uu) * Thresh);
118 | eig_uu = eig_uu(II_f);
119 | end
120 |
121 | Cri=-1;
122 | p_val=-1;
123 |
124 |
125 | if Bootstrap
126 | % use mixture of F distributions to generate the Null dstr
127 | if length(eig_uu) * T < 1E6
128 | f_rand1 = chi2rnd(1,length(eig_uu),T_BS);
129 | if IF_unbiased
130 | Null_dstr = T^2/(T-1-df_x)/(T-1-df_y) * eig_uu' * f_rand1; %%%%Problem
131 | else
132 | Null_dstr = eig_uu' * f_rand1;
133 | end
134 | else
135 | % iteratively calcuate the null dstr to save memory
136 | Null_dstr = zeros(1,T_BS);
137 | Length = max(floor(1E6/T),100);
138 | Itmax = floor(length(eig_uu)/Length);
139 | for iter = 1:Itmax
140 | f_rand1 = chi2rnd(1,Length,T_BS);
141 | if IF_unbiased
142 | Null_dstr = Null_dstr + T^2/(T-1-df_x)/(T-1-df_y) *... %%%%Problem
143 | eig_uu((iter-1)*Length+1:iter*Length)' * f_rand1;
144 | else
145 | Null_dstr = Null_dstr + ... %%%%Problem
146 | eig_uu((iter-1)*Length+1:iter*Length)' * f_rand1;
147 | end
148 |
149 | end
150 | end
151 | sort_Null_dstr = sort(Null_dstr);
152 | Cri = sort_Null_dstr(ceil((1-alpha)*T_BS));
153 | p_val = sum(Null_dstr>Sta)/T_BS;
154 | end
155 |
156 |
157 |
158 |
--------------------------------------------------------------------------------
/KCI-test/algorithms/check_markov_equiv.m:
--------------------------------------------------------------------------------
1 | function [res]=check_markov_equiv(g1,g2)
2 | % function res=check_markov_equiv(g1,g2)
3 | % INPUT: two graphs g1, g2.
4 | % g(i,j)=-1 if there is a directed arrow from i to j.
5 | % g(i,j)=g(j,i)=1 if there is an undirected edge between i and j
6 | %
7 | % OUTPUT: res==1: the two graphs are markov equivalent
8 | % res==0: they are not
9 | %
10 | % Copyright (c) 2010-2011 ...
11 | % All rights reserved. See the file COPYING for license terms.
12 |
13 |
14 | res=1;
15 | num_nodes=size(g1,1);
16 |
17 |
18 | %check whether they have the same skeleton
19 | %
20 | skeleton1=g1+g1'; skeleton1(skeleton1~=0)=skeleton1(skeleton1~=0)./skeleton1(skeleton1~=0);
21 | skeleton2=g2+g2'; skeleton2(skeleton2~=0)=skeleton2(skeleton2~=0)./skeleton2(skeleton2~=0);
22 | if ~isequal(skeleton1,skeleton2)
23 | res=0;
24 | fprintf('not the same skeletons\n');
25 | end
26 |
27 | if res==1
28 | %check whether they have the same set of immoralites
29 | %
30 | for i=1:num_nodes
31 | i_parents=find(g1(:,i)==-1);
32 | for ii1=1:(length(i_parents)-1)
33 | for ii2=(ii1+1):length(i_parents)
34 | if g1(i_parents(ii2),i_parents(ii1))==0 & g1(i_parents(ii1),i_parents(ii2))==0
35 | if g2(i,i_parents(ii1))~=0 | g2(i,i_parents(ii2))~=0
36 | res=0;
37 | fprintf('there is an immorality in the 1st graph that is not in the 2nd graph\n');
38 | end
39 | end
40 | end
41 | end
42 | end
43 |
44 | for i=1:num_nodes
45 | i_parents=find(g2(:,i)==-1);
46 | for ii1=1:(length(i_parents)-1)
47 | for ii2=(ii1+1):length(i_parents)
48 | if g2(i_parents(ii2),i_parents(ii1))==0 & g2(i_parents(ii1),i_parents(ii2))==0
49 | if g1(i,i_parents(ii1))~=0 | g1(i,i_parents(ii2))~=0
50 | res=0;
51 | fprintf('there is an immorality in the 2nd graph that is not in the 1st graph\n');
52 | end
53 | end
54 | end
55 | end
56 | end
57 | end
58 |
--------------------------------------------------------------------------------
/KCI-test/algorithms/condVect.m:
--------------------------------------------------------------------------------
1 | function[res] = condVect(S,T,U,M,ths,BG)
2 | %calculates independence measure on vectors
3 | %
4 | %S,T,U: disjoint lists of of indizes
5 | %M: covariance matrix of vectors
6 | %ths: threshold for independence if ths=0, then condVect returns I(S:T|U),
7 | %if ths > 0, then condVect returns 1, if I(S:T|U)<=ths and 0 otherwise
8 | %BG: list of indizes of background variables (are always conditioned on but do not count as variables for PC)
9 | %
10 | %example:
11 | %condVect([1,2],[3],[4,5],M,0,[6]) returns I(1,2 : 3 | 4,5,6)
12 | %condVect([1,2],[3],[],M,0.5,[]) returns 1, if I(1,2 : 3)<=0.5, and 0
13 | %otherwise
14 | % Copyright (c) 2010-2011 ...
15 | % All rights reserved. See the file COPYING for license terms.
16 |
17 | if (ths>=0)
18 | fprintf('Calculating I(%s : %s | %s) ',num2str(S,'%1.0d '),num2str(T,' %1.0d '),num2str(U,' %1.0d'));
19 | end
20 | U=union(U,BG);
21 | I = union(S,T);I=union(I,U); %set of indices
22 |
23 | res = entVect(M(union(S,U),union(S,U)))+entVect(M(union(T,U),union(T,U)))-entVect(M(U,U))-entVect(M(I,I));
24 |
25 | if (ths >= 0)
26 | fprintf(' Result: %1.2d\n',res);
27 | res = (res<=ths);
28 | end
29 | end
30 |
31 | %information from covariance matrix
32 | function[res] = entVect(M)
33 |
34 | res = 1/2*log(det(M));
35 | end
36 |
--------------------------------------------------------------------------------
/KCI-test/algorithms/dist2.m:
--------------------------------------------------------------------------------
1 | function n2 = dist2(x, c)
2 | %DIST2 Calculates squared distance between two sets of points.
3 | %
4 | % Description
5 | % D = DIST2(X, C) takes two matrices of vectors and calculates the
6 | % squared Euclidean distance between them. Both matrices must be of
7 | % the same column dimension. If X has M rows and N columns, and C has
8 | % L rows and N columns, then the result has M rows and L columns. The
9 | % I, Jth entry is the squared distance from the Ith row of X to the
10 | % Jth row of C.
11 | %
12 | % See also
13 | % GMMACTIV, KMEANS, RBFFWD
14 | %
15 |
16 | % Copyright (c) Ian T Nabney (1996-2001)
17 | % All rights reserved. See the file COPYING for license terms.
18 |
19 | [ndata, dimx] = size(x);
20 | [ncentres, dimc] = size(c);
21 | if dimx ~= dimc
22 | error('Data dimension does not match dimension of centres')
23 | end
24 |
25 | n2 = (ones(ncentres, 1) * sum((x.^2)', 1))' + ...
26 | ones(ndata, 1) * sum((c.^2)',1) - ...
27 | 2.*(x*(c'));
28 |
29 | % Rounding errors occasionally cause negative entries in n2
30 | if any(any(n2<0))
31 | n2(n2<0) = 0;
32 | end
33 |
--------------------------------------------------------------------------------
/KCI-test/algorithms/eigdec.m:
--------------------------------------------------------------------------------
1 | function [evals, evec] = eigdec(x, N)
2 | %EIGDEC Sorted eigendecomposition
3 | %
4 | % Description
5 | % EVALS = EIGDEC(X, N computes the largest N eigenvalues of the
6 | % matrix X in descending order. [EVALS, EVEC] = EIGDEC(X, N) also
7 | % computes the corresponding eigenvectors.
8 | %
9 | % See also
10 | % PCA, PPCA
11 | %
12 |
13 | % Copyright (c) Ian T Nabney (1996-2001)
14 | % All rights reserved. See the file COPYING for license terms.
15 |
16 | if nargout == 1
17 | evals_only = logical(1);
18 | else
19 | evals_only = logical(0);
20 | end
21 |
22 | if N ~= round(N) | N < 1 | N > size(x, 2)
23 | error('Number of PCs must be integer, >0, < dim');
24 | end
25 |
26 | % Find the eigenvalues of the data covariance matrix
27 | if evals_only
28 | % Use eig function as always more efficient than eigs here
29 | temp_evals = eig(x);
30 | else
31 | % Use eig function unless fraction of eigenvalues required is tiny
32 | if (N/size(x, 2)) > 0.04
33 | [temp_evec, temp_evals] = eig(x);
34 | else
35 | options.disp = 0;
36 | [temp_evec, temp_evals] = eigs(x, N, 'LM', options);
37 | end
38 | temp_evals = diag(temp_evals);
39 | end
40 |
41 | % Eigenvalues nearly always returned in descending order, but just
42 | % to make sure.....
43 | [evals perm] = sort(-temp_evals);
44 | evals = -evals(1:N);
45 | if ~evals_only
46 | if evals == temp_evals(1:N)
47 | % Originals were in order
48 | evec = temp_evec(:, 1:N);
49 | return
50 | else
51 | % Need to reorder the eigenvectors
52 | for i=1:N
53 | evec(:,i) = temp_evec(:,perm(i));
54 | end
55 | end
56 | end
57 |
--------------------------------------------------------------------------------
/KCI-test/algorithms/gpr_multi.m:
--------------------------------------------------------------------------------
1 | function [out1, out2] = gpr_multi(logtheta, covfunc, x, y, xstar);
2 | % Here we change the function gpr to gpr_multi, in which y contains a set
3 | % of vectors on which we do repression from x
4 |
5 | % gpr - Gaussian process regression, with a named covariance function. Two
6 | % modes are possible: training and prediction: if no test data are given, the
7 | % function returns minus the log likelihood and its partial derivatives with
8 | % respect to the hyperparameters; this mode is used to fit the hyperparameters.
9 | % If test data are given, then (marginal) Gaussian predictions are computed,
10 | % whose mean and variance are returned. Note that in cases where the covariance
11 | % function has noise contributions, the variance returned in S2 is for noisy
12 | % test targets; if you want the variance of the noise-free latent function, you
13 | % must substract the noise variance.
14 | %
15 | % usage: [nlml dnlml] = gpr(logtheta, covfunc, x, y)
16 | % or: [mu S2] = gpr(logtheta, covfunc, x, y, xstar)
17 | %
18 | % where:
19 | %
20 | % logtheta is a (column) vector of log hyperparameters
21 | % covfunc is the covariance function
22 | % x is a n by D matrix of training inputs
23 | % y is a (column) vector (of size n) of targets
24 | % xstar is a nn by D matrix of test inputs
25 | % nlml is the returned value of the negative log marginal likelihood
26 | % dnlml is a (column) vector of partial derivatives of the negative
27 | % log marginal likelihood wrt each log hyperparameter
28 | % mu is a (column) vector (of size nn) of prediced means
29 | % S2 is a (column) vector (of size nn) of predicted variances
30 | %
31 | % For more help on covariance functions, see "help covFunctions".
32 | %
33 | % (C) copyright 2006 by Carl Edward Rasmussen (2006-03-20).
34 |
35 | if ischar(covfunc), covfunc = cellstr(covfunc); end % convert to cell if needed
36 | [n, D] = size(x);
37 | [n, m] = size(y);
38 | if eval(feval(covfunc{:})) ~= size(logtheta, 1)
39 | error('Error: Number of parameters do not agree with covariance function')
40 | end
41 |
42 | K = feval(covfunc{:}, logtheta, x); % compute training set covariance matrix
43 |
44 | L = chol(K)'; % cholesky factorization of the covariance
45 | % for i = 1:m
46 | % alpha(:,i) = solve_chol(L',y(:,i));
47 | % end
48 | alpha = solve_chol(L',y);
49 |
50 | if nargin == 4 % if no test cases, compute the negative log marginal likelihood
51 |
52 | out1 = 0.5* trace(y'*alpha) + m*sum(log(diag(L))) + 0.5*m*n*log(2*pi);
53 |
54 | if nargout == 2 % ... and if requested, its partial derivatives
55 | out2 = zeros(size(logtheta)); % set the size of the derivative vector
56 | W = m * (L'\(L\eye(n))) - alpha*alpha'; % precompute for convenience
57 | for i = 1:length(out2)
58 | out2(i) = sum(sum(W.*feval(covfunc{:}, logtheta, x, i)))/2;
59 | end
60 | end
61 |
62 | else % ... otherwise compute (marginal) test predictions ...
63 |
64 | [Kss, Kstar] = feval(covfunc{:}, logtheta, x, xstar); % test covariances
65 |
66 | out1 = Kstar' * alpha; % predicted means
67 |
68 | if nargout == 2
69 | v = L\Kstar;
70 | out2 = Kss - sum(v.*v)';
71 | end
72 |
73 | end
74 |
--------------------------------------------------------------------------------
/KCI-test/algorithms/gpr_multi2.m:
--------------------------------------------------------------------------------
1 | function [out1, out2] = gpr_multi2(logtheta, covfunc, x, y, xstar);
2 | % Here we change the function gpr to gpr_multi, in which y contains a set
3 | % of vectors on which we do repression from x
4 |
5 | % gpr - Gaussian process regression, with a named covariance function. Two
6 | % modes are possible: training and prediction: if no test data are given, the
7 | % function returns minus the log likelihood and its partial derivatives with
8 | % respect to the hyperparameters; this mode is used to fit the hyperparameters.
9 | % If test data are given, then (marginal) Gaussian predictions are computed,
10 | % whose mean and variance are returned. Note that in cases where the covariance
11 | % function has noise contributions, the variance returned in S2 is for noisy
12 | % test targets; if you want the variance of the noise-free latent function, you
13 | % must substract the noise variance.
14 | %
15 | % usage: [nlml dnlml] = gpr(logtheta, covfunc, x, y)
16 | % or: [mu S2] = gpr(logtheta, covfunc, x, y, xstar)
17 | %
18 | % where:
19 | %
20 | % logtheta is a (column) vector of log hyperparameters
21 | % covfunc is the covariance function
22 | % x is a n by D matrix of training inputs
23 | % y is a (column) vector (of size n) of targets
24 | % xstar is a nn by D matrix of test inputs
25 | % nlml is the returned value of the negative log marginal likelihood
26 | % dnlml is a (column) vector of partial derivatives of the negative
27 | % log marginal likelihood wrt each log hyperparameter
28 | % mu is a (column) vector (of size nn) of prediced means
29 | % S2 is a (column) vector (of size nn) of predicted variances
30 | %
31 | % For more help on covariance functions, see "help covFunctions".
32 | %
33 | % (C) copyright 2006 by Carl Edward Rasmussen (2006-03-20).
34 |
35 | if ischar(covfunc), covfunc = cellstr(covfunc); end % convert to cell if needed
36 | [n, D] = size(x);
37 | [n, m] = size(y);
38 | if eval(feval(covfunc{:})) ~= size(logtheta, 1)
39 | error('Error: Number of parameters do not agree with covariance function')
40 | end
41 |
42 | K = feval(covfunc{:}, logtheta, x); % compute training set covariance matrix
43 |
44 | L = chol(K)'; % cholesky factorization of the covariance
45 | % for i = 1:m
46 | % alpha(:,i) = solve_chol(L',y(:,i));
47 | % end
48 | alpha = solve_chol(L',y);
49 |
50 | if nargin == 4 % if no test cases, compute the negative log marginal likelihood
51 |
52 | out1 = 0.5* trace(y'*alpha) + m*sum(log(diag(L))) + 0.5*m*n*log(2*pi);
53 |
54 | if nargout == 2 % ... and if requested, its partial derivatives
55 | out2 = zeros(size(logtheta)); % set the size of the derivative vector
56 | W = m * (L'\(L\eye(n))) - alpha*alpha'; % precompute for convenience
57 | for i = 1:length(out2)
58 | out2(i) = sum(sum(W.*feval(covfunc{:}, logtheta, x, i)))/2;
59 | end
60 | out2(end-2)=0;
61 | end
62 |
63 | else % ... otherwise compute (marginal) test predictions ...
64 |
65 | [Kss, Kstar] = feval(covfunc{:}, logtheta, x, xstar); % test covariances
66 |
67 | out1 = Kstar' * alpha; % predicted means
68 |
69 | if nargout == 2
70 | v = L\Kstar;
71 | out2 = Kss - sum(v.*v)';
72 | end
73 |
74 | end
75 |
--------------------------------------------------------------------------------
/KCI-test/algorithms/gpr_multi_alln.m:
--------------------------------------------------------------------------------
1 | function [out1, out2] = gpr_multi(logtheta, covfunc, x, y, xstar);
2 | % Here we change the function gpr to gpr_multi, in which y contains a set
3 | % of vectors on which we do repression from x
4 |
5 | % gpr - Gaussian process regression, with a named covariance function. Two
6 | % modes are possible: training and prediction: if no test data are given, the
7 | % function returns minus the log likelihood and its partial derivatives with
8 | % respect to the hyperparameters; this mode is used to fit the hyperparameters.
9 | % If test data are given, then (marginal) Gaussian predictions are computed,
10 | % whose mean and variance are returned. Note that in cases where the covariance
11 | % function has noise contributions, the variance returned in S2 is for noisy
12 | % test targets; if you want the variance of the noise-free latent function, you
13 | % must substract the noise variance.
14 | %
15 | % usage: [nlml dnlml] = gpr(logtheta, covfunc, x, y)
16 | % or: [mu S2] = gpr(logtheta, covfunc, x, y, xstar)
17 | %
18 | % where:
19 | %
20 | % logtheta is a (column) vector of log hyperparameters
21 | % covfunc is the covariance function
22 | % x is a n by D matrix of training inputs
23 | % y is a (column) vector (of size n) of targets
24 | % xstar is a nn by D matrix of test inputs
25 | % nlml is the returned value of the negative log marginal likelihood
26 | % dnlml is a (column) vector of partial derivatives of the negative
27 | % log marginal likelihood wrt each log hyperparameter
28 | % mu is a (column) vector (of size nn) of prediced means
29 | % S2 is a (column) vector (of size nn) of predicted variances
30 | %
31 | % For more help on covariance functions, see "help covFunctions".
32 | %
33 | % (C) copyright 2006 by Carl Edward Rasmussen (2006-03-20).
34 |
35 | if ischar(covfunc), covfunc = cellstr(covfunc); end % convert to cell if needed
36 | [n, D] = size(x);
37 | [n, m] = size(y);
38 | if eval(feval(covfunc{:})) ~= size(logtheta, 1)
39 | error('Error: Number of parameters do not agree with covariance function')
40 | end
41 |
42 | K = feval(covfunc{:}, logtheta, x); % compute training set covariance matrix
43 |
44 | L = chol(K)'; % cholesky factorization of the covariance
45 | % for i = 1:m
46 | % alpha(:,i) = solve_chol(L',y(:,i));
47 | % end
48 | alpha = solve_chol(L',y);
49 |
50 | if nargin == 4 % if no test cases, compute the negative log marginal likelihood
51 |
52 | out1 = 0.5* trace(y'*alpha) + n*sum(log(diag(L))) + 0.5*n*n*log(2*pi);
53 |
54 | if nargout == 2 % ... and if requested, its partial derivatives
55 | out2 = zeros(size(logtheta)); % set the size of the derivative vector
56 | W = n * (L'\(L\eye(n))) - alpha*alpha'; % precompute for convenience
57 | for i = 1:length(out2)
58 | out2(i) = sum(sum(W.*feval(covfunc{:}, logtheta, x, i)))/2;
59 | end
60 | end
61 |
62 | else % ... otherwise compute (marginal) test predictions ...
63 |
64 | [Kss, Kstar] = feval(covfunc{:}, logtheta, x, xstar); % test covariances
65 |
66 | out1 = Kstar' * alpha; % predicted means
67 |
68 | if nargout == 2
69 | v = L\Kstar;
70 | out2 = Kss - sum(v.*v)';
71 | end
72 |
73 | end
74 |
--------------------------------------------------------------------------------
/KCI-test/algorithms/gpr_multi_alln_K.m:
--------------------------------------------------------------------------------
1 | function [out1, out2] = gpr_multi(logtheta, covfunc, x, Ky, xstar);
2 | % Here we change the function gpr to gpr_multi, in which y contains a set
3 | % of vectors on which we do repression from x
4 |
5 | % gpr - Gaussian process regression, with a named covariance function. Two
6 | % modes are possible: training and prediction: if no test data are given, the
7 | % function returns minus the log likelihood and its partial derivatives with
8 | % respect to the hyperparameters; this mode is used to fit the hyperparameters.
9 | % If test data are given, then (marginal) Gaussian predictions are computed,
10 | % whose mean and variance are returned. Note that in cases where the covariance
11 | % function has noise contributions, the variance returned in S2 is for noisy
12 | % test targets; if you want the variance of the noise-free latent function, you
13 | % must substract the noise variance.
14 | %
15 | % usage: [nlml dnlml] = gpr(logtheta, covfunc, x, y)
16 | % or: [mu S2] = gpr(logtheta, covfunc, x, y, xstar)
17 | %
18 | % where:
19 | %
20 | % logtheta is a (column) vector of log hyperparameters
21 | % covfunc is the covariance function
22 | % x is a n by D matrix of training inputs
23 | % y is a (column) vector (of size n) of targets
24 | % xstar is a nn by D matrix of test inputs
25 | % nlml is the returned value of the negative log marginal likelihood
26 | % dnlml is a (column) vector of partial derivatives of the negative
27 | % log marginal likelihood wrt each log hyperparameter
28 | % mu is a (column) vector (of size nn) of prediced means
29 | % S2 is a (column) vector (of size nn) of predicted variances
30 | %
31 | % For more help on covariance functions, see "help covFunctions".
32 | %
33 | % (C) copyright 2006 by Carl Edward Rasmussen (2006-03-20).
34 |
35 | if ischar(covfunc), covfunc = cellstr(covfunc); end % convert to cell if needed
36 | [n, D] = size(x);
37 | [n, m] = size(Ky);
38 | if eval(feval(covfunc{:})) ~= size(logtheta, 1)
39 | error('Error: Number of parameters do not agree with covariance function')
40 | end
41 |
42 | K = feval(covfunc{:}, logtheta, x); % compute training set covariance matrix
43 |
44 | L = chol(K)'; % cholesky factorization of the covariance
45 | % for i = 1:m
46 | % alpha(:,i) = solve_chol(L',y(:,i));
47 | % end
48 | % alpha = solve_chol(L',y);
49 | K_inv = solve_chol(L',eye(n));
50 |
51 | if nargin == 4 % if no test cases, compute the negative log marginal likelihood
52 |
53 | % out1 = 0.5* trace(y'*alpha) + n*sum(log(diag(L))) + 0.5*n*n*log(2*pi);
54 | out1 = 0.5* trace(K_inv * Ky) + n*sum(log(diag(L))) + 0.5*n*n*log(2*pi);
55 |
56 | if nargout == 2 % ... and if requested, its partial derivatives
57 | out2 = zeros(size(logtheta)); % set the size of the derivative vector
58 | W = K_inv * (n *eye(n) - Ky * K_inv); % precompute for convenience
59 | for i = 1:length(out2)
60 | out2(i) = sum(sum(W.*feval(covfunc{:}, logtheta, x, i)))/2;
61 | end
62 | end
63 |
64 | else % ... otherwise compute (marginal) test predictions ...
65 |
66 | [Kss, Kstar] = feval(covfunc{:}, logtheta, x, xstar); % test covariances
67 |
68 | out1 = Kstar' * alpha; % predicted means
69 |
70 | if nargout == 2
71 | v = L\Kstar;
72 | out2 = Kss - sum(v.*v)';
73 | end
74 |
75 | end
76 |
--------------------------------------------------------------------------------
/KCI-test/algorithms/kernel.m:
--------------------------------------------------------------------------------
1 | function [kx, bw_new] = kernel(x, xKern, theta)
2 |
3 | % KERNEL Compute the rbf kernel
4 | % Copyright (c) 2010-2011 ...
5 | % All rights reserved. See the file COPYING for license terms.
6 | n2 = dist2(x, xKern);
7 | if theta(1)==0
8 | theta(1)=2/median(n2(tril(n2)>0));
9 | theta_new=theta(1);
10 | end
11 | wi2 = theta(1)/2;
12 | kx = theta(2)*exp(-n2*wi2);
13 | bw_new=1/theta(1);
14 |
15 |
--------------------------------------------------------------------------------
/KCI-test/algorithms/logdet.m:
--------------------------------------------------------------------------------
1 | function y = logdet(A)
2 | % log(det(A)) where A is positive-definite.
3 | % This is faster and more stable than using log(det(A)).
4 | % Copyright (c) 2010-2011 ...
5 | % All rights reserved. See the file COPYING for license terms.
6 | try
7 | U = chol(A);
8 | y = 2*sum(log(diag(U)));
9 | catch
10 | [void, errid] = lasterr;
11 | if strcmp(errid, 'MATLAB:posdef')
12 | warning(['Matrix is not positive definite in logdet, using log(det())'])
13 | y = log(det(A));
14 | return
15 | else
16 | error(lasterr)
17 | end
18 | end
19 |
--------------------------------------------------------------------------------
/KCI-test/algorithms/pdinv.m:
--------------------------------------------------------------------------------
1 | function Ainv = pdinv(A);
2 |
3 | % PDINV Computes the inverse of a positive definite matrix
4 | % Copyright (c) 2010-2011 ...
5 | % All rights reserved. See the file COPYING for license terms.
6 | numData = size(A, 1);
7 | try
8 | U = chol(A);
9 | invU = eye(numData)/U;
10 | Ainv = invU*invU';
11 | catch
12 | [void, errid] = lasterr;
13 | if strcmp(errid, 'MATLAB:posdef')
14 | warning(['Matrix is not positive definite in pdinv, inverting' ...
15 | ' using svd'])
16 | [U, S, V] = svd(A);
17 | Ainv = V*diag(1./diag(S))*U';
18 | return
19 | else
20 | error(lasterr)
21 | end
22 | end
23 |
24 |
--------------------------------------------------------------------------------
/KCI-test/algorithms/stack.m:
--------------------------------------------------------------------------------
1 | function v = stack(M)
2 | % stack the matrix M into the vector v
3 | % Copyright (c) 2010-2011 ...
4 | % All rights reserved. See the file COPYING for license terms.
5 | [n,t] = size(M);
6 | v = zeros(n*t,1);
7 |
8 | for i=1:t
9 | v((i-1)*n+1:i*n) = M(:,i);
10 | end
11 |
--------------------------------------------------------------------------------
/KCI-test/data/README:
--------------------------------------------------------------------------------
1 | These data are taken from the UCI Machine Learning Repository
2 | Frank, A. & Asuncion, A. (2010). UCI Machine Learning Repository [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science.
3 |
--------------------------------------------------------------------------------
/KCI-test/data/boston_names:
--------------------------------------------------------------------------------
1 | new1 1. CRIM: per capita crime rate by town
2 | new2 2. ZN: proportion of residential land zoned for lots over 25,000 sq.ft.
3 | new3 3. INDUS: proportion of non-retail business acres per town
4 | new4 5. NOX: nitric oxides concentration (parts per 10 million)
5 | new5 6. RM: average number of rooms per dwelling
6 | new6 7. AGE: proportion of owner-occupied units built prior to 1940
7 | new7 8. DIS: weighted distances to five Boston employment centres
8 | new8 10. TAX: full-value property-tax rate per $10,000
9 | new9 11. PTRATIO: pupil-teacher ratio by town
10 | new10 12. B: 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
11 | new11 13. LSTAT: % lower status of the population
12 | new12 14. MEDV: Median value of owner-occupied homes in $1000's
13 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/README:
--------------------------------------------------------------------------------
1 | ABOUT THESE PROGRAMS
2 | ====================
3 |
4 | This collection of matlab programs implement and demonstrates some fo the
5 | algorithms described in the book Rasmussen and Williams: "Gaussian Processes
6 | for Machine Learning", the MIT Press 2006.
7 |
8 |
9 | There are 3 subdirectories: gpml, gpml-demo and doc.
10 |
11 | gpml: contains code which implements the algorithms. Please see the Copyright
12 | notice contained in the file named "Copyright".
13 |
14 | gpml-demo: contains matlab scripts with names "demo_*.m". These provide small
15 | demonstrations of the various programs provided.
16 |
17 | doc: contains four html files providing documentation. The best place to start
18 | is index.html, the other pages are linked from there. This information
19 | is also available from http://www.GaussianProcess.org/gpml/code
20 |
21 | When running the demos, it is assumed that your current directory is the
22 | gpml-demo directory. Otherwise, you should manually add both the gpml-demo and
23 | gpml directories to the matab path.
24 |
25 |
26 | VERSION
27 | =======
28 |
29 | The current version of the programs is dated: 2007-07-25. Previous versions of
30 | the code may be avaiable at http://www.gaussianprocess.org/gpml/code/old
31 |
32 |
33 | CHANGES FROM PREVIOUS VERSIONS
34 | ==============================
35 |
36 |
37 | Changes from the 2007-06-25 version:
38 | ------------------------------------
39 |
40 | covConst.m: fixed a bug which caused an error in the derivative of the log marginal
41 | likelihood for certain combinations of covariance functions and approximation
42 | methods. (Thanks to Antonio Eleuteri for reporting the problem)
43 |
44 | gauher.m: added the function "gauher.m" which was mistakenly missing from the
45 | previous release. This caused an error for certain combinations of
46 | approximation method and likelihood function.
47 |
48 | logistic.m: modified the approximation of moments calculation to use a mixture
49 | of cumulative Gaussian, rather than Gauss-Hermite quadrature, as the former
50 | turns out to be more accurate.
51 |
52 |
53 | Changes from the 2006-09-08 version:
54 | ------------------------------------
55 |
56 | Some code restructuring has taken place for the classification code to make it
57 | more modular, to facilitate addition of new likelihood functions and
58 | approximations methods. Now, all classification is done using the binaryGP
59 | function, which (among other things) takes an approximation method and a
60 | likelihood function as an arguments. Thus, binaryGP replaces both binaryEPGP
61 | and binaryLapaceGP, although wrapper functions are still provided for backward
62 | compatibility. This gives added flexibility: now EP can also be used wth the
63 | logistic likelihood function (implemented using Gauss-Hermite quadrature).
64 |
65 | approxEP.m: New file, containing the Expectation Propagation approximation
66 | method, which was previously contained in binaryEPGP.m
67 |
68 | approxLA.m: New file, containing Laplaces approximation method, which was
69 | previously contained in binaryLaplace.m
70 |
71 | approximations.m: New file, help for the approximation methods.
72 |
73 | binaryEPGP.m: This file has been replaced by a wrapper (for backward
74 | compatibility) which calls the more general binaryGP function.
75 |
76 | binaryGP.m: New general function to do binary classification.
77 |
78 | binaryLaplaceGP.m: This file has been replaced by a wrapper (for backward
79 | compatibility) which calls the more general binaryGP function.
80 |
81 | covMatern3iso.m, covMatern5iso.m, covNNone.m, covRQard.m, covRQiso.m,
82 | cosSEard, covSEiso: now check more carefully, that persistent variables have
83 | the correct sizes, and some variable names have been modified.
84 |
85 | cumGauss.m: New file, containing code for the cumulative Gaussian
86 | likelihood function
87 |
88 | likelihoods.m: New file, help for likelihood functions
89 |
90 | logistic.m: New file, logistic likelihood
91 |
92 |
93 | Changes from the 2006-05-10 version:
94 | ------------------------------------
95 |
96 | covRQard.m: bugfix: replaced x with x' and z with z' in line 36
97 |
98 | covRQiso.m: bugfix: replaced x with x' and z with z' in line 28
99 |
100 | minimize.m: correction: replaced "error()" with "error('')", and
101 | made a few cosmetic changes
102 |
103 | binaryEPGP.m: added the line "lml = -n*log(2);" in line 77. This change
104 | should be largely inconsequential, but occationally may save things
105 | when the covariance matrix is exceptionally badly conditioned.
106 |
107 |
108 | Changes from the 2006-04-12 version:
109 | ------------------------------------
110 |
111 | added the "erfint" function to "binaryLaplaceGP.m". The erfint function
112 | was missing by mistake, preventing the use of the "logistic" likelihood.
113 |
114 |
115 | Changes from the 2006-03-29 version:
116 | ------------------------------------
117 |
118 | added files: "covProd.m" and "covPeriodic.m"
119 |
120 | changes: "covSEiso.m" was changed slightly to avoid the use of persistent
121 | variables
122 |
123 |
124 | DATASETS
125 | ========
126 |
127 | The datasets needed for some of the demos can be downloaded from
128 | http://www.GaussianProcess.org/gpml/data
129 |
130 |
131 |
132 | ABOUT MEX FILES
133 | ===============
134 |
135 | Some of the programs make use of the mex facility in matlab for more efficient
136 | implementation. However, if you don't know about how to compile mex files, you
137 | do not need to worry about this - the code should run anyway. If you do
138 | compile the mex files, this should be automatically detected, and the program
139 | will run more efficiently. Particularly the demonstrations of classification
140 | on the usps digits require a lot of computation.
141 |
142 |
143 |
144 | COMPILING MEX FILES
145 | ===================
146 |
147 | As mentioned above, it is not necessary to compile the mex files, but it can
148 | speed up execution considerably. We cannot give a detailed account, but here
149 | are some hints:
150 |
151 | Generally, you just type "mex file.c" at the matlab prompt or in your shell to
152 | compile, where "file.c" is the program you want to compile. There is a Makefile
153 | which works for unix/linux on x86 machines. Just type "make".
154 |
155 | In some cases (solve_chol.c), routines from the lapack numerical library are
156 | used. This should pose no problem on linux. On windows, you have to 1) remove
157 | the trailing underscore from the name of the lapack function ("dpotrs", two
158 | occurences) and 2) pass the location of the lapack library to mex, ie
159 | something like
160 |
161 | mex file.c /extern/lib/win32/lcc/libmwlapack.lib
162 |
163 | where is the root of your matlab installation. If your installation
164 | doesn't include the libmwlapack.lib you may be able to get it from
165 |
166 | http://www.cise.ufl.edu/research/sparse/umfpack/v4.4/UMFPACKv4.4/UMFPACK/
167 | MATLAB/lcc_lib/
168 |
169 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/doc/alg21.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/alg21.gif
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/doc/alg31.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/alg31.gif
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/doc/alg32.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/alg32.gif
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/doc/alg35.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/alg35.gif
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/doc/alg36.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/alg36.gif
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/doc/alg51.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/alg51.gif
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/doc/alg52.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/alg52.gif
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/doc/fig2d.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/fig2d.gif
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/doc/fig2de1.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/fig2de1.gif
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/doc/fig2de2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/fig2de2.gif
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/doc/fig2de3.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/fig2de3.gif
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/doc/fig2dl1.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/fig2dl1.gif
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/doc/fig2dl2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/fig2dl2.gif
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/doc/fig2dl3.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/fig2dl3.gif
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/doc/figepp.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/figepp.gif
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/doc/figepp2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/figepp2.gif
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/doc/figl.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/figl.gif
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/doc/figl1.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/figl1.gif
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/doc/figlapp.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/figlapp.gif
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/doc/figlapp2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/figlapp2.gif
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/doc/figlf.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/figlf.gif
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/doc/figlm.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/figlm.gif
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/doc/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Documentation for GPML Matlab Code
5 |
6 |
7 |
8 |
9 |
10 |
15 |
16 | The code is written in Matlab®, and should work with version 6 and
17 | version 7. Bug reports should be sent to the authors. All the code
18 | including demonstrations and html documentation can be downloaded in a
19 | tar
21 | or zip
23 | archive file. Previous versions of the code may be available here. Please
25 | read the copyright notice.
26 |
27 | After unpacking the tar or zip file you will find
28 | 3 subdirectories: gpml, gpml-demo and doc.
29 |
30 |
31 | The directory gpml contains the basic functions for GP regression,
32 | GP binary classification, and sparse approximate methods for GP regression.
33 |
34 |
35 | The directory gpml-demo contains
36 | Matlab® scripts with names "demo_*.m". These provide small
37 | demonstrations of the various programs provided.
38 |
39 |
40 | The directory doc contains four html files providing
41 | documentation. This information can also be accessed via
42 | the www at http://www.GaussianProcess.org/gpml/code.
43 |
44 |
45 | The code should run directly as provided, but some demos require a lot of
46 | computation. A significant speedup may be attained by compiling the mex
47 | files, see the rudimentary instructions on how to do this in the README file.
49 |
50 | The documentation is divided into three sections:
51 |
52 |
Regression
53 |
54 | Basic Gaussian process regression (GPR)
55 | code allowing flexible specification of the covariance function.
56 |
57 |
Binary Classification
58 |
59 | Gaussian process classification (GPC)
60 | demonstrates implementations of Laplace and EP approximation methods for binary
61 | GP classification.
62 |
63 |
Sparse Approximation methods for Gaussian Process Regression
64 |
65 | Approximation methods for GPR demonstrates the
66 | methods of subset of datapoints (SD), subset of regressors (SR)
67 | and projected process (PP) approximations.
68 |
69 |
85 |
86 | Go back to the web page for
87 | Gaussian Processes for Machine Learning.
88 |
89 |
90 |
91 |
92 | Last modified: Tue Jun 26 10:43:51 CET 2007
93 |
94 |
95 |
96 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/doc/style.css:
--------------------------------------------------------------------------------
1 | body {font-family: sans-serif; font-size: 16px}
2 | table {font-size: inherit;}
3 |
4 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml-demo/Contents.m:
--------------------------------------------------------------------------------
1 | % demonstration scripts and data for the gpml software
2 | % date: 2006-03-27.
3 | %
4 | % data_6darm.mat - MacKay's robot arm data, with 6 input dimensions
5 | % data_boston.mat - the boston housing data
6 | %
7 | % demo_ep_2d.m - Demonstrate EP GP classifier on 2-d data
8 | % demo_ep_usps.m - Demonstrate EP GP classifier on USPS digits data
9 | % demo_gparm.m - Demonstrate GP regression on MacKay's robot arm problem
10 | % demo_gpr.m - Demonstrate gpr function for flexible covariances
11 | % demo_gprsparse.m - Demonstrate approx GP regression on Boston housing data
12 | % demo_laplace_2d.m - Demonstrate Laplace binary GPC on 2-d data
13 | % demo_laplace_usps.m - Demonstrate Laplace GP classifier on USPS digits data
14 | %
15 | % Copyright (c) 2005, 2006 by Carl Edward Rasmussen and Chris Williams
16 |
17 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml-demo/data_6darm.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/gpml-demo/data_6darm.mat
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml-demo/data_boston.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/gpml-demo/data_boston.mat
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml-demo/demo_ep_usps.m:
--------------------------------------------------------------------------------
1 | % Demo script to illustrate use of binaryEP on a binary digit classification
2 | % task. 2006-03-29.
3 |
4 | if isempty(regexp(path,['gpml' pathsep]))
5 | cd ..; w = pwd; addpath([w, '/gpml']); cd gpml-demo % add code dir to path
6 | end
7 |
8 | hold off
9 | clear
10 | clf
11 | clc
12 |
13 | disp('This demonstration illustrates the use of the Expectation Propagation')
14 | disp('(EP) approximation for binary Gaussian process classification applied')
15 | disp('to a digit task.')
16 | disp(' ')
17 |
18 | if exist('loadBinaryUSPS') ~= 2
19 | disp('Error: can''t find the loadBinaryUSPS.m file. For this example, you')
20 | disp('need to download the usps_resampled archive. It is available at')
21 | disp('http://www.GaussianProcess.org/gpml/data')
22 | return
23 | end
24 |
25 | disp('The data consists of 16 by 16 grey scale images of handwritten digits')
26 | disp('derived from the USPS data set. We will consider the binary')
27 | disp('classification task of separating 3''s from 5''s. The training set')
28 | disp('contains 767 cases and the test set 773 cases. Here is an example')
29 | disp('image of a digit 3.');
30 | disp(' ');
31 |
32 | disp(' [x y xx yy] = loadBinaryUSPS(3, 5);')
33 | [x y xx yy] = loadBinaryUSPS(3, 5);
34 | disp(' imagesc(reshape(x(3,:),16,16)''), colormap(''gray'')')
35 | imagesc(reshape(x(3,:),16,16)'), colormap('gray')
36 |
37 | disp(' ')
38 | disp('Press any key to continue.')
39 | disp(' ')
40 | pause
41 |
42 | disp('We must specify a covariance function. The demonstration uses the')
43 | disp('squared exponential (SE) covariance function but many other covariance')
44 | disp('functions are supported as well. The SE covariance function has two')
45 | disp('parameters; a log length-scale parameter and a log magnitude parameter.')
46 | disp('As an initial guess for the parameters, we set the log length-scale to')
47 | disp('the log of the average pairwise distance between training points,')
48 | disp('roughly log(22)=3 and the magnitude is set to unity, ie it''s log to 0.')
49 | disp('Other initial choices could be reasonable too.')
50 | disp(' ');
51 | disp('We then call the binaryEPGP function, which constructs the EP')
52 | disp('approximation of the posterior over functions based on the training set')
53 | disp('and produces probabilistic predictions for the test cases. This may')
54 | disp('take a few minutes or so... depending on whether you compiled the mex')
55 | disp('files... ')
56 | disp(' ')
57 |
58 | disp(' loghyper = [3.0; 0.0]; % set the log hyperparameters')
59 | loghyper = [3.0; 0.0]; % set the log hyperparameters
60 | disp(' p = binaryEPGP(loghyper, ''covSEiso'', x, y, xx);')
61 | p = binaryEPGP(loghyper, 'covSEiso', x, y, xx);
62 | disp(' ')
63 |
64 | disp(' plot(p,''.'')');
65 | plot(p,'.')
66 | disp(' hold on');
67 | hold on
68 | disp(' plot([1 length(p)],[0.5 0.5],''r'')');
69 | plot([1 length(p)],[0.5 0.5],'r')
70 | xlabel('test case number')
71 | ylabel('predictive probability')
72 | axis([0 length(p) 0 1])
73 |
74 | disp(' ')
75 | disp('Press any key to continue.')
76 | disp(' ')
77 | pause
78 |
79 | disp('Keep in mind that the test cases are ordered according to their')
80 | disp('target class. Notice that there are misclassifications, but there are')
81 | disp('no very confident misclassifications. The number of test set errors')
82 | disp('(out of 773 test cases) when thresholding the predictive probability at')
83 | disp('0.5 and the average amount of information about the test set labels in')
84 | disp('excess of a 50/50 model in bits are given by:')
85 | disp(' ')
86 |
87 | disp(' sum((p>0.5)~=(yy>0))')
88 | sum((p>0.5)~=(yy>0))
89 | disp(' mean((yy==1).*log2(p)+(yy==-1).*log2(1-p))+1')
90 | mean((yy==1).*log2(p)+(yy==-1).*log2(1-p))+1
91 |
92 | disp(' ')
93 | disp('Press any key to continue.')
94 | disp(' ')
95 | pause
96 |
97 | disp('These results were obtained by simply guessing some values for the')
98 | disp('hyperparameters. We can instead optimize the marginal likelihood on')
99 | disp('the training set w.r.t. the hyperparameters. The current values');
100 | disp('of the log hyperparameters (2 numbers), and the initial value')
101 | disp('of the negative log marginal likelihood are:')
102 | disp(' ')
103 |
104 | disp(' [loghyper'' binaryEPGP(loghyper, ''covSEiso'', x, y)]')
105 | [loghyper' binaryEPGP(loghyper, 'covSEiso', x, y)]
106 |
107 |
108 |
109 | disp(' ')
110 | disp('Press any key to continue.')
111 | disp(' ')
112 | pause
113 |
114 | disp('Now minimize the negative log marginal likelihood w.r.t. the')
115 | disp('hyperparameters, starting at the current values of loghyper. The third')
116 | disp('argument, -20, tells minimize to evaluate the function a maximum of 20')
117 | disp('times... WARNING: this may take 30 minutes or so... depending on your')
118 | disp('machine and whether you compiled the mex files... press ''ctrl-C'' to')
119 | disp('abort now, otherwise...')
120 | disp(' ')
121 | disp('Press any key to continue.')
122 | disp(' ')
123 | pause
124 |
125 | disp(' [newloghyper logmarglik] = minimize(loghyper, ''binaryEPGP'', -20, ''covSEiso'', x, y);')
126 | [newloghyper logmarglik] = minimize(loghyper, 'binaryEPGP', -20, 'covSEiso', x, y);
127 | disp(' [newloghyper'' logmarglik(end)]')
128 | [newloghyper' logmarglik(end)]
129 |
130 | disp(' ')
131 | disp('This shows that the log marginal likelihood was increased from -222 to')
132 | disp('-90 by optimizing the hyperparameters. This means that the marginal')
133 | disp('likelihood as increased by a factor of exp(295-90) = 2e+57.')
134 |
135 | disp(' ')
136 | disp('Press any key to continue.')
137 | disp(' ')
138 | pause
139 |
140 | disp('Finally, we can make test set predictions with the new hyperparameters:')
141 | disp(' ')
142 |
143 | disp(' pp = binaryEPGP(newloghyper, ''covSEiso'', x, y, xx);')
144 | pp = binaryEPGP(newloghyper, 'covSEiso', x, y, xx);
145 | disp(' plot(pp,''g.'')');
146 | plot(pp,'g.')
147 |
148 | disp(' ')
149 | disp('We note that the new predictions (in green) take much more extreme')
150 | disp('values than the old ones (in blue).')
151 |
152 | disp(' ')
153 | disp('Press any key to continue.')
154 | disp(' ')
155 | pause
156 |
157 | disp('The number of test set errors (out of 773 test cases) when')
158 | disp('thresholding the predictive probability at 0.5 and the average amount')
159 | disp('of information about the test set labels in excess of a 50/50 model')
160 | disp('in bits are given by:')
161 | disp(' ')
162 |
163 | disp(' sum((pp>0.5)~=(yy>0))')
164 | sum((pp>0.5)~=(yy>0))
165 | disp(' mean((yy==1).*log2(pp)+(yy==-1).*log2(1-pp))+1')
166 | mean((yy==1).*log2(pp)+(yy==-1).*log2(1-pp))+1
167 |
168 | disp(' ')
169 | disp('showing that misclassification rate has dropped and the information')
170 | disp('about the test target labels has increased compared to using the old')
171 | disp('initially guessed values for the hyperparaneters.')
172 | disp(' ')
173 | disp('Press any key to exit.')
174 | disp(' ')
175 | pause
176 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml-demo/demo_gprsparse.m:
--------------------------------------------------------------------------------
1 | % demo script to show Subset of Data (SD), Subset of Regressors (SR) and
2 | % Projected Process (PP) approximations to GPR described in chapter 8 on the
3 | % Boston Housing data
4 |
5 | % The Boston housing data set was originally published by Harrison, D. and
6 | % Rubinfeld, D. L., Hedonic housing prices and the demand for clean air,
7 | % Journal of Environmental Economics and Management 5, 81-102 (1978) and is
8 | % publicly available at the UCI database "UCI Repository of machine learning
9 | % databases", http://www.ics.uci.edu/~mlearn/MLRepository.html and DELVE
10 | % http://www.cs.utoronto.ca/~delve
11 | %
12 | % (C) Copyright 2005, 2006 by Chris Williams (2006-03-29)
13 |
14 | if isempty(regexp(path,['gpml' pathsep]))
15 | cd ..; w = pwd; addpath([w, '/gpml']); cd gpml-demo % add code dir to path
16 | end
17 |
18 | hold off
19 | clear
20 | clc
21 |
22 | disp('This demonstration illustrates the use of three approximate methods for')
23 | disp('GPR, namely the subset of datapoints (SD), subset of regressors (SR)')
24 | disp('and projected process (PP) methods.')
25 | disp(' ');
26 | disp('We use the Boston housing data of Harrison, D. and Rubinfeld, D. L.,')
27 | disp('Journal of Environmental Economics and Management 5, 81-102 (1978).')
28 | disp('This dataset has 13 input variables and one output target. A split of')
29 | disp('455 training points and 51 test points is used. The data has been')
30 | disp('scaled so that each variable has approximately zero mean and unit')
31 | disp('variance.')
32 | disp(' ')
33 | disp('We use Gaussian process regression with a squared exponential')
34 | disp('covariance function, and allow a separate lengthscale for each input')
35 | disp('dimension, as in eqs. 5.1 and 5.2 of Rasmussen and Williams (2006).')
36 | disp(' ')
37 | disp('Press any key to continue')
38 | pause;
39 |
40 | disp(' ')
41 | disp('The training and test data is contained in the file data_boston.mat')
42 | disp('The raw training data is in the input matrix X (455 by 13) and the')
43 | disp('target vector y (455 by 1). First, load the data')
44 | disp(' ')
45 | disp(' load data_boston;')
46 | load data_boston;
47 |
48 | disp('the data has been scaled to zero mean and unit variance')
49 | disp('over the training and test data');
50 |
51 | [n,D]=size(X);
52 | nstar = size(Xstar,1);
53 |
54 | % compute error of mean(y) predictor
55 |
56 | diff = ystar - ones(nstar,1)*mean(y);
57 | mse_dumb=sum(diff.^2)/nstar;
58 | vdumb = var(y)*ones(nstar,1);
59 | pll_dumb = (-0.5*sum(log(2*pi*vdumb)) - 0.5*sum((diff.*diff)./vdumb))/nstar;
60 |
61 | disp(' ')
62 | disp(' m = 200; % choose size of the subset, m<=n')
63 | m = 200; % choose size of the subset, m<=n
64 | disp(' ')
65 | disp('A random subset of the training data points are selected using the')
66 | disp('randperm function. This set is of size m.')
67 | disp(' ')
68 |
69 | % now select random training set of size m
70 | rand('state',0);
71 | disp(' perm = randperm(n);')
72 | perm = randperm(n);
73 | disp(' INDEX = perm(1:m);')
74 | INDEX = perm(1:m);
75 | disp(' Xm = X(INDEX,:);')
76 | Xm = X(INDEX,:);
77 | disp(' ym = y(INDEX);')
78 | ym = y(INDEX);
79 |
80 | disp(' ')
81 | disp('We use a covariance function made up of the sum of a squared')
82 | disp('exponential (SE) covariance term with ARD, and independent noise.')
83 | disp('Thus, the covariance function is specified as follows:')
84 | disp(' ')
85 | disp(' covfunc = {''covSum'', {''covSEard'',''covNoise''}};')
86 | covfunc = {'covSum', {'covSEard','covNoise'}};
87 |
88 | disp(' ');
89 | disp('The hyperparameters are stored as')
90 | disp(' ')
91 | disp(' logtheta = [log(ell_1), log(ell_2), ... log(ell_13), log(sigma_f), log(sigma_n)]')
92 | disp(' ')
93 | disp('(as D = 13), and are initialized to')
94 | disp(' ')
95 | disp(' logtheta0 = [0 0 ... 0 0 -1.15]')
96 | disp(' ');
97 | disp('Note that the noise standard deviation is set to exp(-1.15)')
98 | disp('corresponding to a noise variance of 0.1.')
99 | disp(' ')
100 | disp('The hyperparameters are trained by maximizing the approximate marginal')
101 | disp('likelihood of the SD method as per eq. 8.31, which simply computes the')
102 | disp('marginal likelihood of the subset of size m.')
103 | disp(' ')
104 | disp('Press any key to optimize the approximate marginal likelihood.')
105 | pause;
106 |
107 | % train hyperparameters
108 | logtheta0 = zeros(D+2,1); % starting values of log hyperparameters
109 | logtheta0(D+2) = -1.15; % starting value for log(noise std dev)
110 |
111 | disp(' ')
112 | disp(' logtheta = minimize(logtheta0, ''gpr'', -100, covfunc, Xm, ym);')
113 | disp(' ')
114 | logtheta = minimize(logtheta0, 'gpr', -100, covfunc, Xm, ym);
115 |
116 | disp(' ')
117 | disp('Predictions can now be made:')
118 | disp(' ')
119 | disp('(1) using the SD method, which is implemented by calling gpr.m with the')
120 | disp(' appropriate subset of the training data')
121 | disp('(2) using the SR method,')
122 | disp('(3) using the PP method.')
123 | disp(' ')
124 | disp('The SR and PP methods are implemented in the function gprSRPP.m')
125 | disp(' ')
126 | disp('For comparison we also make predictions using gpr.m on the full')
127 | disp('training dataset, and a dumb predictor that just predicts the mean and')
128 | disp('variance of the training data.')
129 | disp(' ')
130 | disp('Press any key to make the predictions.')
131 | pause;
132 |
133 | % now make predictions: SD method
134 |
135 | disp(' ')
136 | disp(' [fstarSD S2SD] = gpr(logtheta, covfunc, Xm, ym, Xstar); % SD method')
137 | [fstarSD S2SD] = gpr(logtheta, covfunc, Xm, ym, Xstar);
138 |
139 | resSD = fstarSD-ystar; % residuals
140 | mseSD = mean(resSD.^2);
141 | pllSD = (-0.5*sum(log(2*pi*S2SD)) - 0.5*sum((resSD.*resSD)./S2SD))/nstar;
142 |
143 |
144 | % now make predictions: SR and PP methods
145 |
146 | disp(' [fstarSRPP S2SR S2PP] = gprSRPP(logtheta, covfunc, X, INDEX, y, Xstar); % SR,PP')
147 | [fstarSRPP S2SR S2PP] = gprSRPP(logtheta, covfunc, X, INDEX, y, Xstar);
148 |
149 | resSR = fstarSRPP-ystar;
150 | mseSR = sum(resSR.^2)/nstar;
151 | msePP = mseSR;
152 | pllSR = -0.5*mean(log(2*pi*S2SR)+resSR.^2./S2SR);
153 | pllPP = -0.5*mean(log(2*pi*S2PP)+resSR.^2./S2PP);
154 |
155 | % for comparison, make predictions with the full training dataset
156 |
157 | [fstar S2] = gpr(logtheta, covfunc, X, y, Xstar);
158 |
159 | res = fstar-ystar; % residuals
160 | mse = mean(res.^2);
161 | pll = -0.5*mean(log(2*pi*S2)+res.^2./S2);
162 |
163 |
164 | disp(' ')
165 | disp('The test results are:')
166 |
167 | fprintf(1,'mse_full %g\t pll_full %g\n', mse, pll);
168 | fprintf(1,'mse_SD %g\t pll_SD %g\n', mseSD, pllSD);
169 | fprintf(1,'mse_SR %g\t pll_SR %g\n', mseSR, pllSR);
170 | fprintf(1,'mse_PP %g\t pll_PP %g\n', msePP, pllPP);
171 | fprintf(1,'mse_dumb %g\t pll_dumb %g\n', mse_dumb, pll_dumb);
172 |
173 | disp(' ')
174 | disp('where mse denotes mean squared error and pll denotes predictive log')
175 | disp('likelihood. A higher (less negative) pll is more desirable. Note that')
176 | disp('the mse for the SR and PP methods is identical as expected. The SR and')
177 | disp('PP methods outperform SD on mse, and are close to the full mse. On pll,')
178 | disp('the PP method does slightly better than the full predictor, followed by')
179 | disp('the SD and SR methods.')
180 |
181 | disp(' ')
182 | disp('Press any key to end.')
183 | pause
184 |
185 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml-demo/demo_laplace_usps.m:
--------------------------------------------------------------------------------
1 | % Demo script to illustrate use of binaryLaplaceGP.m on a binary digit
2 | % classification task. 2006-03-29.
3 |
4 | if isempty(regexp(path,['gpml' pathsep]))
5 | cd ..; w = pwd; addpath([w, '/gpml']); cd gpml-demo % add code dir to path
6 | end
7 |
8 | hold off
9 | clear
10 | clf
11 | clc
12 |
13 | disp('This demonstration illustrates the use of Laplace''s approximation for')
14 | disp('binary Gaussian process classification applied to a digit task.')
15 | disp(' ')
16 |
17 | if exist('loadBinaryUSPS') ~= 2
18 | disp('Error: can''t find the loadBinaryUSPS.m file. For this example, you')
19 | disp('need to download the usps_resampled archive. It is available at')
20 | disp('http://www.GaussianProcess.org/gpml/data')
21 | return
22 | end
23 |
24 | disp('The data consists of 16 by 16 grey scale images of handwritten digits')
25 | disp('derived from the USPS data set. We will consider the binary')
26 | disp('classification task of separating 3''s from 5''s. The training set')
27 | disp('contains 767 cases and the test set 773 cases. Here is an example')
28 | disp('image of a digit 3.');
29 | disp(' ');
30 |
31 | disp(' [x y xx yy] = loadBinaryUSPS(3, 5);')
32 | [x y xx yy] = loadBinaryUSPS(3, 5);
33 | disp(' imagesc(reshape(x(3,:),16,16)''), colormap(''gray'')')
34 | imagesc(reshape(x(3,:),16,16)'), colormap('gray')
35 |
36 | disp(' ')
37 | disp('Press any key to continue.')
38 | disp(' ')
39 | pause
40 |
41 | disp('We must specify a covariance function and a likelihood function. For')
42 | disp('the demonstration we use the squared exponential (SE) covariance')
43 | disp('function but many other covariance functions are supported as well.')
44 | disp('The SE covariance function has two parameters; a log length-scale')
45 | disp('parameter and a log magnitude parameter. As an initial guess for the')
46 | disp('parameters, we set the log length-scale to the log of the average')
47 | disp('pairwise distance between training points, roughly log(22)=3 and the')
48 | disp('magnitude is set to unity, ie it''s log to 0. Other initial choices')
49 | disp('could be reasonable too. We chose the cumulative Gaussian as')
50 | disp('likelihood function.')
51 | disp(' ');
52 | disp('We then call the binaryLaplaceGP function, which constructs the Laplace')
53 | disp('approximation of the posterior over functions based on the training set')
54 | disp('and produces probabilistic predictions for the test cases. This may')
55 | disp('take a minute or so...')
56 | disp(' ')
57 |
58 |
59 | disp(' loghyper = [3.0; 0.0]; % set the log hyperparameters')
60 | loghyper = [3.0; 0.0]; % set the log hyperparameters
61 | disp(' p = binaryLaplaceGP(loghyper, ''covSEiso'', ''cumGauss'', x, y, xx);')
62 | p = binaryLaplaceGP(loghyper, 'covSEiso', 'cumGauss', x, y, xx);
63 | disp(' ')
64 |
65 | disp(' plot(p,''.'')');
66 | plot(p,'.')
67 | disp(' hold on');
68 | hold on
69 | disp(' plot([1 length(p)],[0.5 0.5],''r'')');
70 | plot([1 length(p)],[0.5 0.5],'r')
71 | xlabel('test case number')
72 | ylabel('predictive probability')
73 | axis([0 length(p) 0 1])
74 |
75 | disp(' ')
76 | disp('Press any key to continue.')
77 | disp(' ')
78 | pause
79 |
80 | disp('Keep in mind that the test cases are ordered according to their')
81 | disp('target class. Notice that there are misclassifications, but there are')
82 | disp('no very confident misclassifications. The number of test set errors')
83 | disp('(out of 773 test cases) when thresholding the predictive probability at')
84 | disp('0.5 and the average amount of information about the test set labels in')
85 | disp('excess of a 50/50 model in bits are given by:')
86 | disp(' ')
87 |
88 | disp(' sum((p>0.5)~=(yy>0))')
89 | sum((p>0.5)~=(yy>0))
90 | disp(' mean((yy==1).*log2(p)+(yy==-1).*log2(1-p))+1')
91 | mean((yy==1).*log2(p)+(yy==-1).*log2(1-p))+1
92 |
93 | disp(' ')
94 | disp('Press any key to continue.')
95 | disp(' ')
96 | pause
97 |
98 | disp('These results were obtained by simply guessing some values for the')
99 | disp('hyperparameters. We can instead optimize the marginal likelihood on')
100 | disp('the training set w.r.t. the hyperparameters. The current values');
101 | disp('of the log hyperparameters (2 numbers), and the initial value')
102 | disp('of the negative log marginal likelihood are:')
103 | disp(' ')
104 |
105 | disp(' [loghyper'' binaryLaplaceGP(loghyper, ''covSEiso'', ''cumGauss'', x, y)]')
106 | [loghyper' binaryLaplaceGP(loghyper, 'covSEiso', 'cumGauss', x, y)]
107 |
108 | disp(' ')
109 | disp('Press any key to continue.')
110 | disp(' ')
111 | pause
112 |
113 | disp('Now, we minimize the negative log marginal likelihood w.r.t. the')
114 | disp('hyperparameters. The third argument -20 tells minimize to evaluate the')
115 | disp('function a maximum of 20 times... this may take a few minutes or so...')
116 | disp('depending on whether you compiled the mex files...')
117 | disp(' ')
118 |
119 | disp(' [newloghyper logmarglik] = minimize(loghyper, ''binaryLaplaceGP'', -20, ''covSEiso'', ''cumGauss'', x, y);')
120 | [newloghyper logmarglik] = minimize(loghyper, 'binaryLaplaceGP', -20, 'covSEiso', 'cumGauss', x, y);
121 | disp(' [newloghyper'' logmarglik(end)]')
122 | [newloghyper' logmarglik(end)]
123 |
124 | disp(' ')
125 | disp('This shows that the log marginal likelihood was increased from -222 to')
126 | disp('-99 by optimizing the hyperparameters. This means that the marginal')
127 | disp('likelihood has increased by a factor of exp(222-99) = 3e+53.')
128 |
129 | disp(' ')
130 | disp('Press any key to continue.')
131 | disp(' ')
132 | pause
133 |
134 | disp('Finally, we can make test set predictions with the new hyperparameters:')
135 | disp(' ')
136 |
137 | disp(' pp = binaryLaplaceGP(newloghyper, ''covSEiso'', ''cumGauss'', x, y, xx);')
138 | pp = binaryLaplaceGP(newloghyper, 'covSEiso', 'cumGauss', x, y, xx);
139 | disp(' plot(pp,''g.'')');
140 | plot(pp,'g.')
141 |
142 | disp(' ')
143 | disp('We note that the new predictions (in green) take much more extreme')
144 | disp('values values than the old ones (in blue).')
145 |
146 | disp(' ')
147 | disp('Press any key to continue.')
148 | disp(' ')
149 | pause
150 |
151 | disp('The number of test set errors (out of 773 test cases) when')
152 | disp('thresholding the predictive probability at 0.5 and the average amount')
153 | disp('of information about the test set labels in excess of a 50/50 model')
154 | disp('in bits are given by:')
155 | disp(' ')
156 |
157 | disp(' sum((pp>0.5)~=(yy>0))')
158 | sum((pp>0.5)~=(yy>0))
159 | disp(' mean((yy==1).*log2(pp)+(yy==-1).*log2(1-pp))+1')
160 | mean((yy==1).*log2(pp)+(yy==-1).*log2(1-pp))+1
161 |
162 | disp(' ')
163 | disp('showing that misclassification rate has dropped and the information')
164 | disp('about the test target labels has increased compared to using the old')
165 | disp('initially guessed values for the hyperparaneters.')
166 | disp(' ')
167 | disp('Press any key to exit.')
168 | disp(' ')
169 | pause
170 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/Contents.m:
--------------------------------------------------------------------------------
1 | % gpml: code from Rasmussen & Williams: Gaussian Processes for Machine Learning
2 | % date: 2007-07-25.
3 | %
4 | % approxEP.m - the approximation method for Expectation Propagation
5 | % approxLA.m - the approximation method for Laplace's approximation
6 | % approximations.m - help for approximation methods
7 | % binaryEPGP.m - outdated, the EP approx for binary GP classification
8 | % binaryGP.m - binary Gaussian process classification
9 | % binaryLaplaceGP.m - outdated, Laplace's approx for binary GP classification
10 | %
11 | % covConst.m - covariance for constant functions
12 | % covFunctions.m - help file with overview of covariance functions
13 | % covLINard.m - linear covariance function with ard
14 | % covLINone.m - linear covaraince function
15 | % covMatern3iso.m - Matern covariance function with nu=3/2
16 | % covMatern5iso.m - Matern covaraince function with nu=5/2
17 | % covNNone.m - neural network covariance function
18 | % covNoise.m - independent covaraince function (ie white noise)
19 | % covPeriodic.m - covariance for smooth periodic function, with unit period
20 | % covProd.m - function for multiplying other covariance functions
21 | % covRQard.m - rational quadratic covariance function with ard
22 | % covRQiso.m - isotropic rational quadratic covariance function
23 | % covSEard.m - squared exponential covariance function with ard
24 | % covSEiso.m - isotropic squared exponential covariance function
25 | % covSum.m - function for adding other covariance functions
26 | %
27 | % cumGauss.m - cumulative Gaussian likelihood function
28 | % gpr.m - Gaussian process regression with general covariance
29 | % function
30 | % gprSRPP.m - Implements SR and PP approximations to GPR
31 | % likelihoods.m - help function for classification likelihoods
32 | % logistic.m - logistic likelihood function
33 | % minimize.m - Minimize a differentiable multivariate function
34 | % solve_chol.c - Solve linear equations from the Cholesky factorization
35 | % should be compiled into a mex file
36 | % solve_chol.m - A matlab implementation of the above, used only in case
37 | % the mex file wasn't generated (not very efficient)
38 | % sq_dist.c - Compute a matrix of all pairwise squared distances
39 | % should be compiled into a mex file
40 | % sq_dist.m - A matlab implementation of the above, used only in case
41 | % the mex file wasn't generated (not very efficient)
42 | %
43 | % See also the help for the demonstration scripts in the gpml-demo directory
44 | %
45 | % Copyright (c) 2005, 2006 by Carl Edward Rasmussen and Chris Williams
46 |
47 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/Copyright:
--------------------------------------------------------------------------------
1 |
2 | Software that implements
3 |
4 | GAUSSIAN PROCESS REGRESSION AND CLASSIFICATION
5 |
6 | Copyright (c) 2005 - 2007 by Carl Edward Rasmussen and Chris Williams
7 |
8 | Permission is granted for anyone to copy, use, or modify these programs for
9 | purposes of research or education, provided this copyright notice is retained,
10 | and note is made of any changes that have been made.
11 |
12 | These programs are distributed without any warranty, express or
13 | implied. As these programs were written for research purposes only, they
14 | have not been tested to the degree that would be advisable in any
15 | important application. All use of these programs is entirely at the
16 | user's own risk.
17 |
18 | The code and associated documentation are avaiable from
19 |
20 | http://www.GaussianProcess.org/gpml/code
21 |
22 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/Makefile:
--------------------------------------------------------------------------------
1 | all: sq_dist.mexglx solve_chol.mexglx
2 |
3 | sq_dist.mexglx: sq_dist.c
4 | mex sq_dist.c
5 |
6 | solve_chol.mexglx: solve_chol.c
7 | mex solve_chol.c
8 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/approxEP.m:
--------------------------------------------------------------------------------
1 | function [alpha, sW, L, nlZ, dnlZ] = approxEP(hyper, covfunc, lik, x, y)
2 |
3 | % Expectation Propagation approximation to the posterior Gaussian Process.
4 | % The function takes a specified covariance function (see covFunction.m) and
5 | % likelihood function (see likelihoods.m), and is designed to be used with
6 | % binaryGP.m. See also approximations.m. In the EP algorithm, the sites are
7 | % updated in random order, for better performance when cases are ordered
8 | % according to the targets.
9 | %
10 | % Copyright (c) 2006, 2007 Carl Edward Rasmussen and Hannes Nickisch 2007-07-24
11 |
12 | persistent best_ttau best_tnu best_nlZ % keep tilde parameters between calls
13 | tol = 1e-3; max_sweep = 10; % tolerance for when to stop EP iterations
14 |
15 | n = size(x,1);
16 | K = feval(covfunc{:}, hyper, x); % evaluate the covariance matrix
17 |
18 | % A note on naming: variables are given short but descriptive names in
19 | % accordance with Rasmussen & Williams "GPs for Machine Learning" (2006): mu
20 | % and s2 are mean and variance, nu and tau are natural parameters. A leading t
21 | % means tilde, a subscript _ni means "not i" (for cavity parameters), or _n
22 | % for a vector of cavity parameters.
23 |
24 | if any(size(best_ttau) ~= [n 1]) % find starting point for tilde parameters
25 | ttau = zeros(n,1); % initialize to zero if we have no better guess
26 | tnu = zeros(n,1);
27 | Sigma = K; % initialize Sigma and mu, the parameters of ..
28 | mu = zeros(n, 1); % .. the Gaussian posterior approximation
29 | nlZ = n*log(2);
30 | best_nlZ = Inf;
31 | else
32 | ttau = best_ttau; % try the tilde values from previous call
33 | tnu = best_tnu;
34 | [Sigma, mu, nlZ, L] = epComputeParams(K, y, ttau, tnu, lik);
35 | if nlZ > n*log(2) % if zero is better ..
36 | ttau = zeros(n,1); % .. then initialize with zero instead
37 | tnu = zeros(n,1);
38 | Sigma = K; % initialize Sigma and mu, the parameters of ..
39 | mu = zeros(n, 1); % .. the Gaussian posterior approximation
40 | nlZ = n*log(2);
41 | end
42 | end
43 | nlZ_old = Inf; sweep = 0; % make sure while loop starts
44 |
45 | while nlZ < nlZ_old - tol && sweep < max_sweep % converged or max. sweeps?
46 |
47 | nlZ_old = nlZ; sweep = sweep+1;
48 | for i = randperm(n) % iterate EP updates (in random order) over examples
49 |
50 | tau_ni = 1/Sigma(i,i)-ttau(i); % first find the cavity distribution ..
51 | nu_ni = mu(i)/Sigma(i,i)-tnu(i); % .. parameters tau_ni and nu_ni
52 |
53 | % compute the desired raw moments m0, m1=hmu and m2; m0 is not used
54 | [m0, m1, m2] = feval(lik, y(i), nu_ni/tau_ni, 1/tau_ni);
55 | hmu = m1./m0;
56 | hs2 = m2./m0 - hmu^2; % compute second central moment
57 |
58 | ttau_old = ttau(i); % then find the new tilde parameters
59 | ttau(i) = 1/hs2 - tau_ni;
60 | tnu(i) = hmu/hs2 - nu_ni;
61 |
62 | ds2 = ttau(i) - ttau_old; % finally rank-1 update Sigma ..
63 | si = Sigma(:,i);
64 | Sigma = Sigma - ds2/(1+ds2*si(i))*si*si'; % takes 70% of total time
65 | mu = Sigma*tnu; % .. and recompute mu
66 |
67 | end
68 |
69 | [Sigma, mu, nlZ, L] = epComputeParams(K, y, ttau, tnu, lik); % recompute
70 | % Sigma & mu since repeated rank-one updates can destroy numerical precision
71 | end
72 |
73 | if sweep == max_sweep
74 | disp('Warning: maximum number of sweeps reached in function approxEP')
75 | end
76 |
77 | if nlZ < best_nlZ % if best so far ..
78 | best_ttau = ttau; best_tnu = tnu; best_nlZ = nlZ; % .. keep for next call
79 | end
80 |
81 | sW = sqrt(ttau); % compute output arguments, L and nlZ are done
82 | alpha = tnu-sW.*solve_chol(L,sW.*(K*tnu));
83 |
84 | if nargout > 4 % do we want derivatives?
85 | dnlZ = zeros(size(hyper)); % allocate space for derivatives
86 | F = alpha*alpha'-repmat(sW,1,n).*solve_chol(L,diag(sW));
87 | for j=1:length(hyper)
88 | dK = feval(covfunc{:}, hyper, x, j);
89 | dnlZ(j) = -sum(sum(F.*dK))/2;
90 | end
91 | end
92 |
93 |
94 | % function to compute the parameters of the Gaussian approximation, Sigma and
95 | % mu, and the negative log marginal likelihood, nlZ, from the current site
96 | % parameters, ttau and tnu. Also returns L (useful for predictions).
97 | function [Sigma, mu, nlZ, L] = epComputeParams(K, y, ttau, tnu, lik)
98 |
99 | n = length(y); % number of training cases
100 | ssi = sqrt(ttau); % compute Sigma and mu
101 | L = chol(eye(n)+ssi*ssi'.*K); % L'*L=B=eye(n)+sW*K*sW
102 | V = L'\(repmat(ssi,1,n).*K);
103 | Sigma = K - V'*V;
104 | mu = Sigma*tnu;
105 |
106 | tau_n = 1./diag(Sigma)-ttau; % compute the log marginal likelihood
107 | nu_n = mu./diag(Sigma)-tnu; % vectors of cavity parameters
108 | nlZ = sum(log(diag(L))) - sum(log(feval(lik, y, nu_n./tau_n, 1./tau_n))) ...
109 | -tnu'*Sigma*tnu/2 - nu_n'*((ttau./tau_n.*nu_n-2*tnu)./(ttau+tau_n))/2 ...
110 | +sum(tnu.^2./(tau_n+ttau))/2-sum(log(1+ttau./tau_n))/2;
111 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/approxLA.m:
--------------------------------------------------------------------------------
1 | function [alpha, sW, L, nlZ, dnlZ] = approxLA(hyper, covfunc, lik, x, y)
2 |
3 | % Laplace approximation to the posterior Gaussian Process.
4 | % The function takes a specified covariance function (see covFunction.m) and
5 | % likelihood function (see likelihoods.m), and is designed to be used with
6 | % binaryGP.m. See also approximations.m.
7 | %
8 | % Copyright (c) 2006, 2007 Carl Edward Rasmussen and Hannes Nickisch 2007-03-29
9 |
10 | persistent best_alpha best_nlZ % copy of the best alpha and its obj value
11 | tol = 1e-6; % tolerance for when to stop the Newton iterations
12 |
13 | n = size(x,1);
14 | K = feval(covfunc{:}, hyper, x); % evaluate the covariance matrix
15 |
16 | if any(size(best_alpha) ~= [n,1]) % find a good starting point for alpha and f
17 | f = zeros(n,1); alpha = f; % start at zero
18 | [lp,dlp,d2lp] = feval(lik,y,f,'deriv'); W=-d2lp;
19 | Psi_new = lp; best_nlZ = Inf;
20 | else
21 | alpha = best_alpha; f = K*alpha; % try best so far
22 | [lp,dlp,d2lp] = feval(lik,y,f,'deriv'); W=-d2lp;
23 | Psi_new = -alpha'*f/2 + lp;
24 | if Psi_new < -n*log(2) % if zero is better ..
25 | f = zeros(n,1); alpha = f; % .. go back
26 | [lp,dlp,d2lp] = feval(lik,y,f,'deriv'); W=-d2lp;
27 | Psi_new = -alpha'*f/2 + lp;
28 | end
29 | end
30 | Psi_old = -Inf; % make sure while loop starts
31 |
32 | while Psi_new - Psi_old > tol % begin Newton's iterations
33 | Psi_old = Psi_new; alpha_old = alpha;
34 | sW = sqrt(W);
35 | L = chol(eye(n)+sW*sW'.*K); % L'*L=B=eye(n)+sW*K*sW
36 | b = W.*f+dlp;
37 | alpha = b - sW.*solve_chol(L,sW.*(K*b));
38 | f = K*alpha;
39 | [lp,dlp,d2lp,d3lp] = feval(lik,y,f,'deriv'); W=-d2lp;
40 |
41 | Psi_new = -alpha'*f/2 + lp;
42 | i = 0;
43 | while i < 10 && Psi_new < Psi_old % if objective didn't increase
44 | alpha = (alpha_old+alpha)/2; % reduce step size by half
45 | f = K*alpha;
46 | [lp,dlp,d2lp,d3lp] = feval(lik,y,f,'deriv'); W=-d2lp;
47 | Psi_new = -alpha'*f/2 + lp;
48 | i = i+1;
49 | end
50 | end % end Newton's iterations
51 |
52 | sW = sqrt(W); % recalculate L
53 | L = chol(eye(n)+sW*sW'.*K); % L'*L=B=eye(n)+sW*K*sW
54 | nlZ = alpha'*f/2 - lp + sum(log(diag(L))); % approx neg log marg likelihood
55 |
56 | if nlZ < best_nlZ % if best so far ..
57 | best_alpha = alpha; best_nlZ = nlZ; % .. then remember for next call
58 | end
59 |
60 | if nargout >= 4 % do we want derivatives?
61 | dnlZ = zeros(size(hyper)); % allocate space for derivatives
62 | Z = repmat(sW,1,n).*solve_chol(L, diag(sW));
63 | C = L'\(repmat(sW,1,n).*K);
64 | s2 = 0.5*(diag(K)-sum(C.^2,1)').*d3lp;
65 | for j=1:length(hyper)
66 | dK = feval(covfunc{:}, hyper, x, j);
67 | s1 = alpha'*dK*alpha/2-sum(sum(Z.*dK))/2;
68 | b = dK*dlp;
69 | s3 = b-K*(Z*b);
70 | dnlZ(j) = -s1-s2'*s3;
71 | end
72 | end
73 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/approximations.m:
--------------------------------------------------------------------------------
1 | % approximations: Exact inference for Gaussian process classification is
2 | % intractable, and approximations are necessary. Different approximation
3 | % techniques have been implemented, which all rely on a Gaussian approximation
4 | % to the non-Gaussian posterior:
5 | %
6 | % approxEP the Expectation Propagation (EP) algorithm
7 | % approxLA Laplace's method
8 | %
9 | % which are used by the Gaussian process classification funtion binaryGP.m.
10 | % The interface to the approximation methods is the following:
11 | %
12 | % function [alpha, sW, L, nlZ, dnlZ] = approx..(hyper, covfunc, lik, x, y)
13 | %
14 | % where:
15 | %
16 | % hyper is a column vector of hyperparameters
17 | % covfunc is the name of the covariance function (see covFunctions.m)
18 | % lik is the name of the likelihood function (see likelihoods.m)
19 | % x is a n by D matrix of training inputs
20 | % y is a (column) vector (of size n) of binary +1/-1 targets
21 | % nlZ is the returned value of the negative log marginal likelihood
22 | % dnlZ is a (column) vector of partial derivatives of the negative
23 | % log marginal likelihood wrt each hyperparameter
24 | % alpha is a (sparse or full column vector) containing inv(K)*m, where K
25 | % is the prior covariance matrix and m the approx posterior mean
26 | % sW is a (sparse or full column) vector containing diagonal of sqrt(W)
27 | % the approximate posterior covariance matrix is inv(inv(K)+W)
28 | % L is a (sparse or full) matrix, L = chol(sW*K*sW+eye(n))
29 | %
30 | % Usually, the approximate posterior to be returned admits the form
31 | % N(m=K*alpha, V=inv(inv(K)+W)), where alpha is a vector and W is diagonal;
32 | % if not, then L contains instead -inv(K+inv(W)), and sW is unused.
33 | %
34 | % For more information on the individual approximation methods and their
35 | % implementations, see the separate approx??.m files. See also binaryGP.m
36 | %
37 | % Copyright (c) by Carl Edward Rasmussen and Hannes Nickisch, 2007-06-25.
38 |
39 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/binaryEPGP.m:
--------------------------------------------------------------------------------
1 | function varargout = binaryEPGP(hyper, covfunc, varargin)
2 |
3 | % binaryEPGP - The Expectation Propagation approximation for binary Gaussian
4 | % process classification. Two modes are possible: training or testing: if no
5 | % test cases are supplied, then the approximate negative log marginal
6 | % likelihood and its partial derivatives wrt the hyperparameters is computed;
7 | % this mode is used to fit the hyperparameters. If test cases are given, then
8 | % the test set predictive probabilities are returned. The program is flexible
9 | % in allowing a multitude of covariance functions.
10 | %
11 | % usage: [nlZ, dnlZ ] = binaryEPGP(hyper, covfunc, x, y);
12 | % or: [p, mu, s2, nlZ] = binaryEPGP(hyper, covfunc, x, y, xstar);
13 | %
14 | % where:
15 | %
16 | % hyper is a (column) vector of hyperparameters
17 | % covfunc is the name of the covariance function (see below)
18 | % lik is the name of the likelihood function (see below)
19 | % x is a n by D matrix of training inputs
20 | % y is a (column) vector (of size n) of binary +1/-1 targets
21 | % xstar is a nn by D matrix of test inputs
22 | % nlZ is the returned value of the negative log marginal likelihood
23 | % dnlZ is a (column) vector of partial derivatives of the negative
24 | % log marginal likelihood wrt each log hyperparameter
25 | % p is a (column) vector (of length nn) of predictive probabilities
26 | % mu is a (column) vector (of length nn) of predictive latent means
27 | % s2 is a (column) vector (of length nn) of predictive latent variances
28 | %
29 | % The length of the vector of hyperparameters depends on the covariance
30 | % function, as specified by the "covfunc" input to the function, specifying the
31 | % name of a covariance function. A number of different covariance function are
32 | % implemented, and it is not difficult to add new ones. See "help covFunctions"
33 | % for the details
34 | %
35 | % The function can conveniently be used with the "minimize" function to train
36 | % a Gaussian process, eg:
37 | %
38 | % [hyper, fX, i] = minimize(hyper, 'binaryEPGP', length, 'covSEiso',
39 | % 'logistic', x, y);
40 | %
41 | % Copyright (c) 2004, 2005, 2006, 2007 Carl Edward Rasmussen, 2007-02-19.
42 |
43 | if nargin<4 || nargin>5
44 | disp('Usage: [nlZ, dnlZ ] = binaryEPGP(hyper, covfunc, x, y);')
45 | disp(' or: [p, mu, s2, nlZ] = binaryEPGP(hyper, covfunc, x, y, xstar);')
46 | return
47 | end
48 |
49 | % Note, this function is just a wrapper provided for backward compatibility,
50 | % the functionality is now provided by the more general binaryGP function.
51 |
52 | varargout = cell(nargout, 1); % allocate the right number of output arguments
53 | [varargout{:}] = binaryGP(hyper, 'approxEP', covfunc, 'cumGauss', varargin{:});
54 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/binaryLaplaceGP.m:
--------------------------------------------------------------------------------
1 | function varargout = binaryLaplaceGP(hyper, covfunc, lik, varargin)
2 |
3 | % binaryLaplaceGP - Laplace's approximation for binary Gaussian process
4 | % classification. Two modes are possible: training or testing: if no test
5 | % cases are supplied, then the approximate negative log marginal likelihood
6 | % and its partial derivatives wrt the hyperparameters is computed; this mode is
7 | % used to fit the hyperparameters. If test cases are given, then the test set
8 | % predictive probabilities are returned. The program is flexible in allowing
9 | % several different likelihood functions and a multitude of covariance
10 | % functions.
11 | %
12 | % usage: [nlZ, dnlZ ] = binaryLaplaceGP(hyper, covfunc, lik, x, y);
13 | % or: [p, mu, s2, nlZ] = binaryLaplaceGP(hyper, covfunc, lik, x, y, xstar);
14 | %
15 | % where:
16 | %
17 | % hyper is a (column) vector of hyperparameters
18 | % covfunc is the name of the covariance function (see below)
19 | % lik is the name of the likelihood function (see below)
20 | % x is a n by D matrix of training inputs
21 | % y is a (column) vector (of size n) of binary +1/-1 targets
22 | % xstar is a nn by D matrix of test inputs
23 | % nlZ is the returned value of the negative log marginal likelihood
24 | % dnlZ is a (column) vector of partial derivatives of the negative
25 | % log marginal likelihood wrt each log hyperparameter
26 | % p is a (column) vector (of length nn) of predictive probabilities
27 | % mu is a (column) vector (of length nn) of predictive latent means
28 | % s2 is a (column) vector (of length nn) of predictive latent variances
29 | %
30 | % The length of the vector of log hyperparameters depends on the covariance
31 | % function, as specified by the "covfunc" input to the function, specifying the
32 | % name of a covariance function. A number of different covariance function are
33 | % implemented, and it is not difficult to add new ones. See "help covFunctions"
34 | % for the details.
35 | %
36 | % The shape of the likelihood function is given by the "lik" input to the
37 | % function, specifying the name of the likelihood function. The two implemented
38 | % likelihood functions are:
39 | %
40 | % logistic the logistic function: 1/(1+exp(-x))
41 | % cumGauss the cumulative Gaussian (error function)
42 | %
43 | % The function can conveniently be used with the "minimize" function to train
44 | % a Gaussian process, eg:
45 | %
46 | % [hyper, fX, i] = minimize(hyper, 'binaryLaplaceGP', length, 'covSEiso',
47 | % 'logistic', x, y);
48 | %
49 | % Copyright (c) 2004, 2005, 2006, 2007 by Carl Edward Rasmussen, 2007-02-19.
50 |
51 | if nargin<5 || nargin>6
52 | disp('Usage: [nlZ, dnlZ ] = binaryLaplaceGP(hyper, covfunc, lik, x, y);')
53 | disp(' or: [p, mu, s2, nlZ] = binaryLaplaceGP(hyper, covfunc, lik, x, y, xstar);')
54 | return
55 | end
56 |
57 | % Note, this function is just a wrapper provided for backward compatibility,
58 | % the functionality is now provided by the more general binaryGP function.
59 |
60 | varargout = cell(nargout, 1); % allocate the right number of output arguments
61 | [varargout{:}] = binaryGP(hyper, 'approxLA', covfunc, lik, varargin{:});
62 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/covConst.m:
--------------------------------------------------------------------------------
1 | function [A, B] = covConst(logtheta, x, z);
2 |
3 | % covariance function for a constant function. The covariance function is
4 | % parameterized as:
5 | %
6 | % k(x^p,x^q) = 1/s2;
7 | %
8 | % The scalar hyperparameter is:
9 | %
10 | % logtheta = [ log(sqrt(s2)) ]
11 | %
12 | % For more help on design of covariance functions, try "help covFunctions".
13 | %
14 | % (C) Copyright 2006 by Carl Edward Rasmussen (2007-07-24)
15 |
16 | if nargin == 0, A = '1'; return; end % report number of parameters
17 |
18 | is2 = exp(-2*logtheta); % s2 inverse
19 |
20 | if nargin == 2
21 | A = is2;
22 | elseif nargout == 2 % compute test set covariances
23 | A = is2;
24 | B = is2;
25 | else % compute derivative matrix
26 | A = -2*is2*ones(size(x,1));
27 | end
28 |
29 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/covFunctions.m:
--------------------------------------------------------------------------------
1 | % covariance functions to be use by Gaussian process functions. There are two
2 | % different kinds of covariance functions: simple and composite:
3 | %
4 | % simple covariance functions:
5 | %
6 | % covConst.m - covariance for constant functions
7 | % covLINard.m - linear covariance function with ard
8 | % covLINone.m - linear covariance function
9 | % covMatern3iso.m - Matern covariance function with nu=3/2
10 | % covMatern5iso.m - Matern covariance function with nu=5/2
11 | % covNNone.m - neural network covariance function
12 | % covNoise.m - independent covariance function (ie white noise)
13 | % covPeriodic.m - covariance for smooth periodic function with unit period
14 | % covRQard.m - rational quadratic covariance function with ard
15 | % covRQiso.m - isotropic rational quadratic covariance function
16 | % covSEard.m - squared exponential covariance function with ard
17 | % covSEiso.m - isotropic squared exponential covariance function
18 | %
19 | % composite covariance functions (see explanation at the bottom):
20 | %
21 | % covProd - products of covariance functions
22 | % covSum - sums of covariance functions
23 | %
24 | % Naming convention: all covariance functions start with "cov". A trailing
25 | % "iso" means isotropic, "ard" means Automatic Relevance Determination, and
26 | % "one" means that the distance measure is parameterized by a single parameter.
27 | %
28 | % The covariance functions are written according to a special convention where
29 | % the exact behaviour depends on the number of input and output arguments
30 | % passed to the function. If you want to add new covariance functions, you
31 | % should follow this convention if you want them to work with the functions
32 | % gpr, binaryEPGP and binaryLaplaceGP. There are four different ways of calling
33 | % the covariance functions:
34 | %
35 | % 1) With no input arguments:
36 | %
37 | % p = covNAME
38 | %
39 | % The covariance function returns a string telling how many hyperparameters it
40 | % expects, using the convention that "D" is the dimension of the input space.
41 | % For example, calling "covRQard" returns the string '(D+2)'.
42 | %
43 | % 2) With two input arguments:
44 | %
45 | % K = covNAME(logtheta, x)
46 | %
47 | % The function computes and returns the covariance matrix where logtheta are
48 | % the log og the hyperparameters and x is an n by D matrix of cases, where
49 | % D is the dimension of the input space. The returned covariance matrix is of
50 | % size n by n.
51 | %
52 | % 3) With three input arguments and two output arguments:
53 | %
54 | % [v, B] = covNAME(loghyper, x, z)
55 | %
56 | % The function computes test set covariances; v is a vector of self covariances
57 | % for the test cases in z (of length nn) and B is a (n by nn) matrix of cross
58 | % covariances between training cases x and test cases z.
59 | %
60 | % 4) With three input arguments and a single output:
61 | %
62 | % D = covNAME(logtheta, x, z)
63 | %
64 | % The function computes and returns the n by n matrix of partial derivatives
65 | % of the training set covariance matrix with respect to logtheta(z), ie with
66 | % respect to the log of hyperparameter number z.
67 | %
68 | % The functions may retain a local copy of the covariance matrix for computing
69 | % derivatives, which is cleared as the last derivative is returned.
70 | %
71 | % About the specification of simple and composite covariance functions to be
72 | % used by the Gaussian process functions gpr, binaryEPGP and binaryLaplaceGP:
73 | % Covariance functions can be specified in two ways: either as a string
74 | % containing the name of the covariance function or using a cell array. For
75 | % example:
76 | %
77 | % covfunc = 'covRQard';
78 | % covfunc = {'covRQard'};
79 | %
80 | % are both supported. Only the second form using the cell array can be used
81 | % for specifying composite covariance functions, made up of several
82 | % contributions. For example:
83 | %
84 | % covfunc = {'covSum',{'covRQiso','covSEard','covNoise'}};
85 | %
86 | % specifies a covariance function which is the sum of three contributions. To
87 | % find out how many hyperparameters this covariance function requires, we do:
88 | %
89 | % feval(covfunc{:})
90 | %
91 | % which returns the string '3+(D+1)+1' (ie the 'covRQiso' contribution uses
92 | % 3 parameters, the 'covSEard' uses D+1 and 'covNoise' a single parameter).
93 | %
94 | % (C) copyright 2006, Carl Edward Rasmussen, 2006-04-07.
95 |
96 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/covLINard.m:
--------------------------------------------------------------------------------
1 | function [A, B] = covLINard(logtheta, x, z);
2 |
3 | % Linear covariance function with Automatic Relevance Determination (ARD). The
4 | % covariance function is parameterized as:
5 | %
6 | % k(x^p,x^q) = x^p'*inv(P)*x^q
7 | %
8 | % where the P matrix is diagonal with ARD parameters ell_1^2,...,ell_D^2, where
9 | % D is the dimension of the input space. The hyperparameters are:
10 | %
11 | % logtheta = [ log(ell_1)
12 | % log(ell_2)
13 | % .
14 | % log(ell_D) ]
15 | %
16 | % Note that there is no bias term; use covConst to add a bias.
17 | %
18 | % For more help on design of covariance functions, try "help covFunctions".
19 | %
20 | % (C) Copyright 2006 by Carl Edward Rasmussen (2006-03-24)
21 |
22 | if nargin == 0, A = 'D'; return; end % report number of parameters
23 |
24 | ell = exp(logtheta);
25 | x = x*diag(1./ell);
26 |
27 | if nargin == 2
28 | A = x*x';
29 | elseif nargout == 2 % compute test set covariances
30 | z = z*diag(1./ell);
31 | A = sum(z.*z,2);
32 | B = x*z';
33 | else % compute derivative matrices
34 | A = -2*x(:,z)*x(:,z)';
35 | end
36 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/covLINone.m:
--------------------------------------------------------------------------------
1 | function [A, B] = covLINone(logtheta, x, z);
2 |
3 | % Linear covariance function with a single hyperparameter. The covariance
4 | % function is parameterized as:
5 | %
6 | % k(x^p,x^q) = x^p'*inv(P)*x^q + 1./t2;
7 | %
8 | % where the P matrix is t2 times the unit matrix. The second term plays the
9 | % role of the bias. The hyperparameter is:
10 | %
11 | % logtheta = [ log(sqrt(t2)) ]
12 | %
13 | % For more help on design of covariance functions, try "help covFunctions".
14 | %
15 | % (C) Copyright 2006 by Carl Edward Rasmussen (2006-03-27)
16 |
17 | if nargin == 0, A = '1'; return; end % report number of parameters
18 |
19 | it2 = exp(-2*logtheta); % t2 inverse
20 |
21 | if nargin == 2 % compute covariance
22 | A = it2*(1+x*x');
23 | elseif nargout == 2 % compute test set covariances
24 | A = it2*(1+sum(z.*z,2));
25 | B = it2*(1+x*z');
26 | else % compute derivative matrix
27 | A = -2*it2*(1+x*x');
28 | end
29 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/covMatern3iso.m:
--------------------------------------------------------------------------------
1 | function [A, B] = covMatern3iso(loghyper, x, z)
2 |
3 | % Matern covariance function with nu = 3/2 and isotropic distance measure. The
4 | % covariance function is:
5 | %
6 | % k(x^p,x^q) = s2f * (1 + sqrt(3)*d(x^p,x^q)) * exp(-sqrt(3)*d(x^p,x^q))
7 | %
8 | % where d(x^p,x^q) is the distance sqrt((x^p-x^q)'*inv(P)*(x^p-x^q)), P is ell
9 | % times the unit matrix and sf2 is the signal variance. The hyperparameters
10 | % are:
11 | %
12 | % loghyper = [ log(ell)
13 | % log(sqrt(sf2)) ]
14 | %
15 | % For more help on design of covariance functions, try "help covFunctions".
16 | %
17 | % (C) Copyright 2006 by Carl Edward Rasmussen (2006-03-24)
18 |
19 | if nargin == 0, A = '2'; return; end
20 |
21 | persistent K;
22 | [n, D] = size(x);
23 | ell = exp(loghyper(1));
24 | sf2 = exp(2*loghyper(2));
25 |
26 | x = sqrt(3)*x/ell;
27 |
28 | if nargin == 2 % compute covariance matrix
29 | A = sqrt(sq_dist(x'));
30 | K = sf2*exp(-A).*(1+A);
31 | A = K;
32 | elseif nargout == 2 % compute test set covariances
33 | z = sqrt(3)*z/ell;
34 | A = sf2;
35 | B = sqrt(sq_dist(x',z'));
36 | B = sf2*exp(-B).*(1+B);
37 | else % compute derivative matrices
38 | if z == 1
39 | A = sf2*sq_dist(x').*exp(-sqrt(sq_dist(x')));
40 | else
41 | % check for correct dimension of the previously calculated kernel matrix
42 | if any(size(K)~=n)
43 | K = sqrt(sq_dist(x'));
44 | K = sf2*exp(-K).*(1+K);
45 | end
46 | A = 2*K;
47 | clear K;
48 | end
49 | end
50 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/covMatern5iso.m:
--------------------------------------------------------------------------------
1 | function [A, B] = covMatern5iso(loghyper, x, z)
2 |
3 | % Matern covariance function with nu = 5/2 and isotropic distance measure. The
4 | % covariance function is:
5 | %
6 | % k(x^p,x^q) = s2f * (1 + sqrt(5)*d + 5*d/3) * exp(-sqrt(5)*d)
7 | %
8 | % where d is the distance sqrt((x^p-x^q)'*inv(P)*(x^p-x^q)), P is ell times
9 | % the unit matrix and sf2 is the signal variance. The hyperparameters are:
10 | %
11 | % loghyper = [ log(ell)
12 | % log(sqrt(sf2)) ]
13 | %
14 | % For more help on design of covariance functions, try "help covFunctions".
15 | %
16 | % (C) Copyright 2006 by Carl Edward Rasmussen (2006-03-24)
17 |
18 | if nargin == 0, A = '2'; return; end
19 |
20 | persistent K;
21 | [n, D] = size(x);
22 | ell = exp(loghyper(1));
23 | sf2 = exp(2*loghyper(2));
24 |
25 | x = sqrt(5)*x/ell;
26 |
27 | if nargin == 2 % compute covariance matrix
28 | A = sq_dist(x');
29 | K = sf2*exp(-sqrt(A)).*(1+sqrt(A)+A/3);
30 | A = K;
31 | elseif nargout == 2 % compute test set covariances
32 | z = sqrt(5)*z/ell;
33 | A = sf2;
34 | B = sq_dist(x',z');
35 | B = sf2*exp(-sqrt(B)).*(1+sqrt(B)+B/3);
36 | else % compute derivative matrices
37 | if z == 1
38 | A = sq_dist(x');
39 | A = sf2*(A+sqrt(A).^3).*exp(-sqrt(A))/3;
40 | else
41 | % check for correct dimension of the previously calculated kernel matrix
42 | if any(size(K)~=n)
43 | K = sq_dist(x');
44 | K = sf2*exp(-sqrt(K)).*(1+sqrt(K)+K/3);
45 | end
46 | A = 2*K;
47 | clear K;
48 | end
49 | end
50 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/covNNone.m:
--------------------------------------------------------------------------------
1 | function [A, B] = covNNone(loghyper, x, z)
2 |
3 | % Neural network covariance function with a single parameter for the distance
4 | % measure. The covariance function is parameterized as:
5 | %
6 | % k(x^p,x^q) = sf2 * asin(x^p'*P*x^q / sqrt[(1+x^p'*P*x^p)*(1+x^q'*P*x^q)])
7 | %
8 | % where the x^p and x^q vectors on the right hand side have an added extra bias
9 | % entry with unit value. P is ell^-2 times the unit matrix and sf2 controls the
10 | % signal variance. The hyperparameters are:
11 | %
12 | % loghyper = [ log(ell)
13 | % log(sqrt(sf2) ]
14 | %
15 | % For more help on design of covariance functions, try "help covFunctions".
16 | %
17 | % (C) Copyright 2006 by Carl Edward Rasmussen (2006-03-24)
18 |
19 | if nargin == 0, A = '2'; return; end % report number of parameters
20 |
21 | persistent Q K;
22 | [n D] = size(x);
23 | ell = exp(loghyper(1)); em2 = ell^(-2);
24 | sf2 = exp(2*loghyper(2));
25 | x = x/ell;
26 |
27 | if nargin == 2 % compute covariance
28 | Q = x*x';
29 | K = (em2+Q)./(sqrt(1+em2+diag(Q))*sqrt(1+em2+diag(Q)'));
30 | A = sf2*asin(K);
31 | elseif nargout == 2 % compute test set covariances
32 | z = z/ell;
33 | A = sf2*asin((em2+sum(z.*z,2))./(1+em2+sum(z.*z,2)));
34 | B = sf2*asin((em2+x*z')./sqrt((1+em2+sum(x.*x,2))*(1+em2+sum(z.*z,2)')));
35 | else % compute derivative matrix
36 | % check for correct dimension of the previously calculated kernel matrix
37 | if any(size(Q)~=n)
38 | Q = x*x';
39 | end
40 | % check for correct dimension of the previously calculated kernel matrix
41 | if any(size(K)~=n)
42 | K = (em2+Q)./(sqrt(1+em2+diag(Q))*sqrt(1+em2+diag(Q)'));
43 | end
44 | if z == 1 % first parameter
45 | v = (em2+sum(x.*x,2))./(1+em2+diag(Q));
46 | A = -2*sf2*((em2+Q)./(sqrt(1+em2+diag(Q))*sqrt(1+em2+diag(Q)'))- ...
47 | K.*(repmat(v,1,n)+repmat(v',n,1))/2)./sqrt(1-K.^2);
48 | clear Q;
49 | else % second parameter
50 | A = 2*sf2*asin(K);
51 | clear K;
52 | end
53 | end
54 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/covNoise.m:
--------------------------------------------------------------------------------
1 | function [A, B] = covNoise(logtheta, x, z);
2 |
3 | % Independent covariance function, ie "white noise", with specified variance.
4 | % The covariance function is specified as:
5 | %
6 | % k(x^p,x^q) = s2 * \delta(p,q)
7 | %
8 | % where s2 is the noise variance and \delta(p,q) is a Kronecker delta function
9 | % which is 1 iff p=q and zero otherwise. The hyperparameter is
10 | %
11 | % logtheta = [ log(sqrt(s2)) ]
12 | %
13 | % For more help on design of covariance functions, try "help covFunctions".
14 | %
15 | % (C) Copyright 2006 by Carl Edward Rasmussen, 2006-03-24.
16 |
17 | if nargin == 0, A = '1'; return; end % report number of parameters
18 |
19 | s2 = exp(2*logtheta); % noise variance
20 |
21 | if nargin == 2 % compute covariance matrix
22 | A = s2*eye(size(x,1));
23 | elseif nargout == 2 % compute test set covariances
24 | A = s2;
25 | B = 0; % zeros cross covariance by independence
26 | else % compute derivative matrix
27 | A = 2*s2*eye(size(x,1));
28 | end
29 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/covPeriodic.m:
--------------------------------------------------------------------------------
1 | function [A, B] = covPeriodic(logtheta, x, z);
2 |
3 | % covariance function for a smooth periodic function, with unit period. The
4 | % covariance function is:
5 | %
6 | % k(x^p, x^q) = sf2 * exp(-2*sin^2(pi*(x_p-x_q))/ell^2)
7 | %
8 | % where the hyperparameters are:
9 | %
10 | % logtheta = [ log(ell)
11 | % log(sqrt(sf2)) ]
12 | %
13 | % For more help on design of covariance functions, try "help covFunctions".
14 | %
15 | % (C) Copyright 2006 by Carl Edward Rasmussen (2006-04-07)
16 |
17 | if nargin == 0, A = '2'; return; end
18 |
19 | [n D] = size(x);
20 | ell = exp(logtheta(1));
21 | sf2 = exp(2*logtheta(2));
22 |
23 | if nargin == 2
24 | A = sf2*exp(-2*(sin(pi*(repmat(x,1,n)-repmat(x',n,1)))/ell).^2);
25 | elseif nargout == 2 % compute test set covariances
26 | [nn D] = size(z);
27 | A = sf2*ones(nn,1);
28 | B = sf2*exp(-2*(sin(pi*(repmat(x,1,nn)-repmat(z',n,1)))/ell).^2);
29 | else % compute derivative matrices
30 | if z == 1
31 | r = (sin(pi*(repmat(x,1,n)-repmat(x',n,1)))/ell).^2;
32 | A = 4*sf2*exp(-2*r).*r;
33 | else
34 | A = 2*sf2*exp(-2*(sin(pi*(repmat(x,1,n)-repmat(x',n,1)))/ell).^2);
35 | end
36 | end
37 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/covProd.m:
--------------------------------------------------------------------------------
1 | function [A, B] = covProd(covfunc, logtheta, x, z);
2 |
3 | % covProd - compose a covariance function as the product of other covariance
4 | % functions. This function doesn't actually compute very much on its own, it
5 | % merely does some bookkeeping, and calls other covariance functions to do the
6 | % actual work.
7 | %
8 | % For more help on design of covariance functions, try "help covFunctions".
9 | %
10 | % (C) Copyright 2006 by Carl Edward Rasmussen, 2006-04-06.
11 |
12 | for i = 1:length(covfunc) % iterate over covariance functions
13 | f = covfunc(i);
14 | if iscell(f{:}), f = f{:}; end % dereference cell array if necessary
15 | j(i) = cellstr(feval(f{:}));
16 | end
17 |
18 | if nargin == 1, % report number of parameters
19 | A = char(j(1)); for i=2:length(covfunc), A = [A, '+', char(j(i))]; end
20 | return
21 | end
22 |
23 | [n, D] = size(x);
24 |
25 | v = []; % v vector indicates to which covariance parameters belong
26 | for i = 1:length(covfunc), v = [v repmat(i, 1, eval(char(j(i))))]; end
27 |
28 | switch nargin
29 | case 3 % compute covariance matrix
30 | A = ones(n, n); % allocate space for covariance matrix
31 | for i = 1:length(covfunc) % iteration over factor functions
32 | f = covfunc(i);
33 | if iscell(f{:}), f = f{:}; end % dereference cell array if necessary
34 | A = A .* feval(f{:}, logtheta(v==i), x); % multiply covariances
35 | end
36 |
37 | case 4 % compute derivative matrix or test set covariances
38 | if nargout == 2 % compute test set cavariances
39 | A = ones(size(z,1),1); B = ones(size(x,1),size(z,1)); % allocate space
40 | for i = 1:length(covfunc)
41 | f = covfunc(i);
42 | if iscell(f{:}), f = f{:}; end % dereference cell array if necessary
43 | [AA BB] = feval(f{:}, logtheta(v==i), x, z); % compute test covariances
44 | A = A .* AA; B = B .* BB; % and accumulate
45 | end
46 | else % compute derivative matrices
47 | A = ones(n, n);
48 | ii = v(z); % which covariance function
49 | j = sum(v(1:z)==ii); % which parameter in that covariance
50 | for i = 1:length(covfunc)
51 | f = covfunc(i);
52 | if iscell(f{:}), f = f{:}; end % dereference cell array if necessary
53 | if i == ii
54 | A = A .* feval(f{:}, logtheta(v==i), x, j); % multiply derivative
55 | else
56 | A = A .* feval(f{:}, logtheta(v==i), x); % multiply covariance
57 | end
58 | end
59 | end
60 |
61 | end
62 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/covRQard.m:
--------------------------------------------------------------------------------
1 | function [A, B] = covRQard(logtheta, x, z)
2 |
3 | % Rational Quadratic covariance function with Automatic Relevance Determination
4 | % (ARD) distance measure. The covariance function is parameterized as:
5 | %
6 | % k(x^p,x^q) = sf2 * [1 + (x^p - x^q)'*inv(P)*(x^p - x^q)/(2*alpha)]^(-alpha)
7 | %
8 | % where the P matrix is diagonal with ARD parameters ell_1^2,...,ell_D^2, where
9 | % D is the dimension of the input space, sf2 is the signal variance and alpha
10 | % is the shape parameter for the RQ covariance. The hyperparameters are:
11 | %
12 | % loghyper = [ log(ell_1)
13 | % log(ell_2)
14 | % .
15 | % log(ell_D)
16 | % log(sqrt(sf2))
17 | % log(alpha) ]
18 | %
19 | % For more help on design of covariance functions, try "help covFunctions".
20 | %
21 | % (C) Copyright 2006 by Carl Edward Rasmussen (2006-09-08)
22 |
23 | if nargin == 0, A = '(D+2)'; return; end
24 |
25 | persistent K;
26 | [n D] = size(x);
27 | ell = exp(loghyper(1:D));
28 | sf2 = exp(2*loghyper(D+1));
29 | alpha = exp(loghyper(D+2));
30 |
31 | if nargin == 2
32 | K = (1+0.5*sq_dist(diag(1./ell)*x')/alpha);
33 | A = sf2*(K.^(-alpha));
34 | elseif nargout == 2 % compute test set covariances
35 | A = sf2*ones(size(z,1),1);
36 | B = sf2*((1+0.5*sq_dist(diag(1./ell)*x',diag(1./ell)*z')/alpha).^(-alpha));
37 | else % compute derivative matrix
38 | % check for correct dimension of the previously calculated kernel matrix
39 | if any(size(K)~=n)
40 | K = (1+0.5*sq_dist(diag(1./ell)*x')/alpha);
41 | end
42 | if z <= D % length scale parameters
43 | A = sf2*K.^(-alpha-1).*sq_dist(x(:,z)'/ell(z));
44 | elseif z == D+1 % magnitude parameter
45 | A = 2*sf2*(K.^(-alpha));
46 | else
47 | A = sf2*K.^(-alpha).*(0.5*sq_dist(diag(1./ell)*x')./K - alpha*log(K));
48 | clear K;
49 | end
50 | end
51 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/covRQiso.m:
--------------------------------------------------------------------------------
1 | function [A, B] = covRQiso(loghyper, x, z)
2 |
3 | % Rational Quadratic covariance function with isotropic distance measure. The
4 | % covariance function is parameterized as:
5 | %
6 | % k(x^p,x^q) = sf2 * [1 + (x^p - x^q)'*inv(P)*(x^p - x^q)/(2*alpha)]^(-alpha)
7 | %
8 | % where the P matrix is ell^2 times the unit matrix, sf2 is the signal
9 | % variance and alpha is the shape parameter for the RQ covariance. The
10 | % hyperparameters are:
11 | %
12 | % loghyper = [ log(ell)
13 | % log(sqrt(sf2))
14 | % log(alpha) ]
15 | %
16 | % For more help on design of covariance functions, try "help covFunctions".
17 | %
18 | % (C) Copyright 2006 by Carl Edward Rasmussen (2006-09-08)
19 |
20 | if nargin == 0, A = '3'; return; end
21 |
22 | [n, D] = size(x);
23 |
24 | persistent K;
25 | ell = exp(loghyper(1));
26 | sf2 = exp(2*loghyper(2));
27 | alpha = exp(loghyper(3));
28 |
29 | if nargin == 2 % compute covariance matrix
30 | K = (1+0.5*sq_dist(x'/ell)/alpha);
31 | A = sf2*(K.^(-alpha));
32 | elseif nargout == 2 % compute test set covariances
33 | A = sf2*ones(size(z,1),1);
34 | B = sf2*((1+0.5*sq_dist(x'/ell,z'/ell)/alpha).^(-alpha));
35 | else % compute derivative matrices
36 | % check for correct dimension of the previously calculated kernel matrix
37 | if any(size(K)~=n)
38 | K = (1+0.5*sq_dist(x'/ell)/alpha);
39 | end
40 | if z == 1 % length scale parameters
41 | A = sf2*K.^(-alpha-1).*sq_dist(x'/ell);
42 | elseif z == 2 % magnitude parameter
43 | A = 2*sf2*(K.^(-alpha));
44 | else
45 | A = sf2*K.^(-alpha).*(0.5*sq_dist(x'/ell)./K - alpha*log(K));
46 | clear K;
47 | end
48 | end
49 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/covSEard.m:
--------------------------------------------------------------------------------
1 | function [A, B] = covSEard(loghyper, x, z)
2 |
3 | % Squared Exponential covariance function with Automatic Relevance Detemination
4 | % (ARD) distance measure. The covariance function is parameterized as:
5 | %
6 | % k(x^p,x^q) = sf2 * exp(-(x^p - x^q)'*inv(P)*(x^p - x^q)/2)
7 | %
8 | % where the P matrix is diagonal with ARD parameters ell_1^2,...,ell_D^2, where
9 | % D is the dimension of the input space and sf2 is the signal variance. The
10 | % hyperparameters are:
11 | %
12 | % loghyper = [ log(ell_1)
13 | % log(ell_2)
14 | % .
15 | % log(ell_D)
16 | % log(sqrt(sf2)) ]
17 | %
18 | % For more help on design of covariance functions, try "help covFunctions".
19 | %
20 | % (C) Copyright 2006 by Carl Edward Rasmussen (2006-03-24)
21 |
22 | if nargin == 0, A = '(D+1)'; return; end % report number of parameters
23 |
24 | persistent K;
25 |
26 | [n D] = size(x);
27 | ell = exp(loghyper(1:D)); % characteristic length scale
28 | sf2 = exp(2*loghyper(D+1)); % signal variance
29 |
30 | if nargin == 2
31 | K = sf2*exp(-sq_dist(diag(1./ell)*x')/2);
32 | A = K;
33 | elseif nargout == 2 % compute test set covariances
34 | A = sf2*ones(size(z,1),1);
35 | B = sf2*exp(-sq_dist(diag(1./ell)*x',diag(1./ell)*z')/2);
36 | else % compute derivative matrix
37 |
38 | % check for correct dimension of the previously calculated kernel matrix
39 | if any(size(K)~=n)
40 | K = sf2*exp(-sq_dist(diag(1./ell)*x')/2);
41 | end
42 |
43 | if z <= D % length scale parameters
44 | A = K.*sq_dist(x(:,z)'/ell(z));
45 | else % magnitude parameter
46 | A = 2*K;
47 | clear K;
48 | end
49 | end
50 |
51 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/covSEiso.m:
--------------------------------------------------------------------------------
1 | function [A, B] = covSEiso(loghyper, x, z);
2 |
3 | % Squared Exponential covariance function with isotropic distance measure. The
4 | % covariance function is parameterized as:
5 | %
6 | % k(x^p,x^q) = sf2 * exp(-(x^p - x^q)'*inv(P)*(x^p - x^q)/2)
7 | %
8 | % where the P matrix is ell^2 times the unit matrix and sf2 is the signal
9 | % variance. The hyperparameters are:
10 | %
11 | % loghyper = [ log(ell)
12 | % log(sqrt(sf2)) ]
13 | %
14 | % For more help on design of covariance functions, try "help covFunctions".
15 | %
16 | % (C) Copyright 2006 by Carl Edward Rasmussen (2007-06-25)
17 |
18 | if nargin == 0, A = '2'; return; end % report number of parameters
19 |
20 | [n D] = size(x);
21 | ell = exp(loghyper(1)); % characteristic length scale
22 | sf2 = exp(2*loghyper(2)); % signal variance
23 |
24 | if nargin == 2
25 | A = sf2*exp(-sq_dist(x'/ell)/2);
26 | elseif nargout == 2 % compute test set covariances
27 | A = sf2*ones(size(z,1),1);
28 | B = sf2*exp(-sq_dist(x'/ell,z'/ell)/2);
29 | else % compute derivative matrix
30 | if z == 1 % first parameter
31 | A = sf2*exp(-sq_dist(x'/ell)/2).*sq_dist(x'/ell);
32 | else % second parameter
33 | A = 2*sf2*exp(-sq_dist(x'/ell)/2);
34 | end
35 | end
36 |
37 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/covSum.m:
--------------------------------------------------------------------------------
1 | function [A, B] = covSum(covfunc, logtheta, x, z);
2 |
3 | % covSum - compose a covariance function as the sum of other covariance
4 | % functions. This function doesn't actually compute very much on its own, it
5 | % merely does some bookkeeping, and calls other covariance functions to do the
6 | % actual work.
7 | %
8 | % For more help on design of covariance functions, try "help covFunctions".
9 | %
10 | % (C) Copyright 2006 by Carl Edward Rasmussen, 2006-03-20.
11 |
12 | for i = 1:length(covfunc) % iterate over covariance functions
13 | f = covfunc(i);
14 | if iscell(f{:}), f = f{:}; end % dereference cell array if necessary
15 | j(i) = cellstr(feval(f{:}));
16 | end
17 |
18 | if nargin == 1, % report number of parameters
19 | A = char(j(1)); for i=2:length(covfunc), A = [A, '+', char(j(i))]; end
20 | return
21 | end
22 |
23 | [n, D] = size(x);
24 |
25 | v = []; % v vector indicates to which covariance parameters belong
26 | for i = 1:length(covfunc), v = [v repmat(i, 1, eval(char(j(i))))]; end
27 |
28 | switch nargin
29 | case 3 % compute covariance matrix
30 | A = zeros(n, n); % allocate space for covariance matrix
31 | for i = 1:length(covfunc) % iteration over summand functions
32 | f = covfunc(i);
33 | if iscell(f{:}), f = f{:}; end % dereference cell array if necessary
34 | A = A + feval(f{:}, logtheta(v==i), x); % accumulate covariances
35 | end
36 |
37 | case 4 % compute derivative matrix or test set covariances
38 | if nargout == 2 % compute test set cavariances
39 | A = zeros(size(z,1),1); B = zeros(size(x,1),size(z,1)); % allocate space
40 | for i = 1:length(covfunc)
41 | f = covfunc(i);
42 | if iscell(f{:}), f = f{:}; end % dereference cell array if necessary
43 | [AA BB] = feval(f{:}, logtheta(v==i), x, z); % compute test covariances
44 | A = A + AA; B = B + BB; % and accumulate
45 | end
46 | else % compute derivative matrices
47 | i = v(z); % which covariance function
48 | j = sum(v(1:z)==i); % which parameter in that covariance
49 | f = covfunc(i);
50 | if iscell(f{:}), f = f{:}; end % dereference cell array if necessary
51 | A = feval(f{:}, logtheta(v==i), x, j); % compute derivative
52 | end
53 |
54 | end
55 |
56 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/cumGauss.m:
--------------------------------------------------------------------------------
1 | function [out1, out2, out3, out4] = cumGauss(y, f, var)
2 |
3 | % cumGauss - Cumulative Gaussian likelihood function. The expression for the
4 | % likelihood is cumGauss(t) = normcdf(t) = (1+erf(t/sqrt(2)))/2.
5 | %
6 | % Three modes are provided, for computing likelihoods, derivatives and moments
7 | % respectively, see likelihoods.m for the details. In general, care is taken
8 | % to avoid numerical issues when the arguments are extreme. The
9 | % moments \int f^k cumGauss(y,f) N(f|mu,var) df are calculated analytically.
10 | %
11 | % Copyright (c) 2007 Carl Edward Rasmussen and Hannes Nickisch, 2007-03-29.
12 |
13 | if nargin>1, y=sign(y); end % allow only +/- 1 as values
14 |
15 | if nargin == 2 % (log) likelihood evaluation
16 |
17 | if numel(y)>0, yf = y.*f; else yf = f; end % product of latents and labels
18 |
19 | out1 = (1+erf(yf/sqrt(2)))/2; % likelihood
20 | if nargout>1
21 | out2 = zeros(size(f));
22 | b = 0.158482605320942; % quadratic asymptotics approximated at -6
23 | c = -1.785873318175113;
24 | ok = yf>-6; % normal evaluation for larger values
25 | out2( ok) = log(out1(ok));
26 | out2(~ok) = -yf(~ok).^2/2 + b*yf(~ok) + c; % log of sigmoid
27 | end
28 |
29 | elseif nargin == 3
30 |
31 | if strcmp(var,'deriv') % derivatives of the log
32 |
33 | if numel(y)==0, y=1; end
34 | yf = y.*f; % product of latents and labels
35 | [p,lp] = cumGauss(y,f);
36 | out1 = sum(lp);
37 |
38 | if nargout>1 % dlp, derivative of log likelihood
39 |
40 | n_p = zeros(size(f)); % safely compute Gaussian over cumulative Gaussian
41 | ok = yf>-5; % normal evaluation for large values of yf
42 | n_p(ok) = (exp(-yf(ok).^2/2)/sqrt(2*pi))./p(ok);
43 |
44 | bd = yf<-6; % tight upper bound evaluation
45 | n_p(bd) = sqrt(yf(bd).^2/4+1)-yf(bd)/2;
46 |
47 | interp = ~ok & ~bd; % linearly interpolate between both of them
48 | tmp = yf(interp);
49 | lam = -5-yf(interp);
50 | n_p(interp) = (1-lam).*(exp(-tmp.^2/2)/sqrt(2*pi))./p(interp) + ...
51 | lam .*(sqrt(tmp.^2/4+1)-tmp/2);
52 |
53 | out2 = y.*n_p; % dlp, derivative of log likelihood
54 | if nargout>2 % d2lp, 2nd derivative of log likelihood
55 | out3 = -n_p.^2 - yf.*n_p;
56 | if nargout>3 % d3lp, 3rd derivative of log likelihood
57 | out4 = 2*y.*n_p.^3 +3*f.*n_p.^2 +y.*(f.^2-1).*n_p;
58 | end
59 | end
60 | end
61 |
62 | else % compute moments
63 |
64 | mu = f; % 2nd argument is the mean of a Gaussian
65 | z = mu./sqrt(1+var);
66 | if numel(y)>0, z=z.*y; end
67 | out1 = cumGauss([],z); % zeroth raw moment
68 |
69 | [dummy,n_p] = cumGauss([],z,'deriv'); % Gaussian over cumulative Gaussian
70 |
71 | if nargout>1
72 | if numel(y)==0, y=1; end
73 | out2 = mu + y.*var.*n_p./sqrt(1+var); % 1st raw moment
74 | if nargout>2
75 | out3 = 2*mu.*out2 -mu.^2 +var -z.*var.^2.*n_p./(1+var); % 2nd raw moment
76 | out3 = out3.*out1;
77 | end
78 | out2 = out2.*out1;
79 | end
80 |
81 | end
82 |
83 | else
84 | error('No valid input provided.')
85 | end
86 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/gauher.m:
--------------------------------------------------------------------------------
1 | % compute abscissas and weight factors for Gaussian-Hermite quadrature
2 | %
3 | % CALL: [x,w]=gauher(N)
4 | %
5 | % x = base points (abscissas)
6 | % w = weight factors
7 | % N = number of base points (abscissas) (integrates a (2N-1)th order
8 | % polynomial exactly)
9 | %
10 | % p(x)=exp(-x^2/2)/sqrt(2*pi), a =-Inf, b = Inf
11 | %
12 | % The Gaussian Quadrature integrates a (2n-1)th order
13 | % polynomial exactly and the integral is of the form
14 | % b N
15 | % Int ( p(x)* F(x) ) dx = Sum ( w_j* F( x_j ) )
16 | % a j=1
17 | %
18 | % this procedure uses the coefficients a(j), b(j) of the
19 | % recurrence relation
20 | %
21 | % b p (x) = (x - a ) p (x) - b p (x)
22 | % j j j j-1 j-1 j-2
23 | %
24 | % for the various classical (normalized) orthogonal polynomials,
25 | % and the zero-th moment
26 | %
27 | % 1 = integral w(x) dx
28 | %
29 | % of the given polynomial's weight function w(x). Since the
30 | % polynomials are orthonormalized, the tridiagonal matrix is
31 | % guaranteed to be symmetric.
32 |
33 | function [x,w]=gauher(N)
34 | if N==20 % return precalculated values
35 | x=[ -7.619048541679757;-6.510590157013656;-5.578738805893203;
36 | -4.734581334046057;-3.943967350657318;-3.18901481655339 ;
37 | -2.458663611172367;-1.745247320814127;-1.042945348802751;
38 | -0.346964157081356; 0.346964157081356; 1.042945348802751;
39 | 1.745247320814127; 2.458663611172367; 3.18901481655339 ;
40 | 3.943967350657316; 4.734581334046057; 5.578738805893202;
41 | 6.510590157013653; 7.619048541679757];
42 | w=[ 0.000000000000126; 0.000000000248206; 0.000000061274903;
43 | 0.00000440212109 ; 0.000128826279962; 0.00183010313108 ;
44 | 0.013997837447101; 0.061506372063977; 0.161739333984 ;
45 | 0.260793063449555; 0.260793063449555; 0.161739333984 ;
46 | 0.061506372063977; 0.013997837447101; 0.00183010313108 ;
47 | 0.000128826279962; 0.00000440212109 ; 0.000000061274903;
48 | 0.000000000248206; 0.000000000000126 ];
49 | else
50 | b = sqrt( (1:N-1)/2 )';
51 | [V,D] = eig( diag(b,1) + diag(b,-1) );
52 | w = V(1,:)'.^2;
53 | x = sqrt(2)*diag(D);
54 | end
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/gpr.m:
--------------------------------------------------------------------------------
1 | function [out1, out2] = gpr(logtheta, covfunc, x, y, xstar);
2 |
3 | % gpr - Gaussian process regression, with a named covariance function. Two
4 | % modes are possible: training and prediction: if no test data are given, the
5 | % function returns minus the log likelihood and its partial derivatives with
6 | % respect to the hyperparameters; this mode is used to fit the hyperparameters.
7 | % If test data are given, then (marginal) Gaussian predictions are computed,
8 | % whose mean and variance are returned. Note that in cases where the covariance
9 | % function has noise contributions, the variance returned in S2 is for noisy
10 | % test targets; if you want the variance of the noise-free latent function, you
11 | % must substract the noise variance.
12 | %
13 | % usage: [nlml dnlml] = gpr(logtheta, covfunc, x, y)
14 | % or: [mu S2] = gpr(logtheta, covfunc, x, y, xstar)
15 | %
16 | % where:
17 | %
18 | % logtheta is a (column) vector of log hyperparameters
19 | % covfunc is the covariance function
20 | % x is a n by D matrix of training inputs
21 | % y is a (column) vector (of size n) of targets
22 | % xstar is a nn by D matrix of test inputs
23 | % nlml is the returned value of the negative log marginal likelihood
24 | % dnlml is a (column) vector of partial derivatives of the negative
25 | % log marginal likelihood wrt each log hyperparameter
26 | % mu is a (column) vector (of size nn) of prediced means
27 | % S2 is a (column) vector (of size nn) of predicted variances
28 | %
29 | % For more help on covariance functions, see "help covFunctions".
30 | %
31 | % (C) copyright 2006 by Carl Edward Rasmussen (2006-03-20).
32 |
33 | if ischar(covfunc), covfunc = cellstr(covfunc); end % convert to cell if needed
34 | [n, D] = size(x);
35 | if eval(feval(covfunc{:})) ~= size(logtheta, 1)
36 | error('Error: Number of parameters do not agree with covariance function')
37 | end
38 |
39 | K = feval(covfunc{:}, logtheta, x); % compute training set covariance matrix
40 |
41 | L = chol(K)'; % cholesky factorization of the covariance
42 | alpha = solve_chol(L',y);
43 |
44 | if nargin == 4 % if no test cases, compute the negative log marginal likelihood
45 |
46 | out1 = 0.5*y'*alpha + sum(log(diag(L))) + 0.5*n*log(2*pi);
47 |
48 | if nargout == 2 % ... and if requested, its partial derivatives
49 | out2 = zeros(size(logtheta)); % set the size of the derivative vector
50 | W = L'\(L\eye(n))-alpha*alpha'; % precompute for convenience
51 | for i = 1:length(out2)
52 | out2(i) = sum(sum(W.*feval(covfunc{:}, logtheta, x, i)))/2;
53 | end
54 | end
55 |
56 | else % ... otherwise compute (marginal) test predictions ...
57 |
58 | [Kss, Kstar] = feval(covfunc{:}, logtheta, x, xstar); % test covariances
59 |
60 | out1 = Kstar' * alpha; % predicted means
61 |
62 | if nargout == 2
63 | v = L\Kstar;
64 | out2 = Kss - sum(v.*v)';
65 | end
66 |
67 | end
68 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/gpr2.m:
--------------------------------------------------------------------------------
1 | function alpha = gpr2(logtheta, covfunc, x, y);
2 |
3 | % gpr - Gaussian process regression, with a named covariance function. Two
4 | % modes are possible: training and prediction: if no test data are given, the
5 | % function returns minus the log likelihood and its partial derivatives with
6 | % respect to the hyperparameters; this mode is used to fit the hyperparameters.
7 | % If test data are given, then (marginal) Gaussian predictions are computed,
8 | % whose mean and variance are returned. Note that in cases where the covariance
9 | % function has noise contributions, the variance returned in S2 is for noisy
10 | % test targets; if you want the variance of the noise-free latent function, you
11 | % must substract the noise variance.
12 | %
13 | % usage: [nlml dnlml] = gpr(logtheta, covfunc, x, y)
14 | % or: [mu S2] = gpr(logtheta, covfunc, x, y, xstar)
15 | %
16 | % where:
17 | %
18 | % logtheta is a (column) vector of log hyperparameters
19 | % covfunc is the covariance function
20 | % x is a n by D matrix of training inputs
21 | % y is a (column) vector (of size n) of targets
22 | % xstar is a nn by D matrix of test inputs
23 | % nlml is the returned value of the negative log marginal likelihood
24 | % dnlml is a (column) vector of partial derivatives of the negative
25 | % log marginal likelihood wrt each log hyperparameter
26 | % mu is a (column) vector (of size nn) of prediced means
27 | % S2 is a (column) vector (of size nn) of predicted variances
28 | %
29 | % For more help on covariance functions, see "help covFunctions".
30 | %
31 | % (C) copyright 2006 by Carl Edward Rasmussen (2006-03-20).
32 |
33 | if ischar(covfunc), covfunc = cellstr(covfunc); end % convert to cell if needed
34 | [n, D] = size(x);
35 | if eval(feval(covfunc{:})) ~= size(logtheta, 1)
36 | error('Error: Number of parameters do not agree with covariance function')
37 | end
38 |
39 | K = feval(covfunc{:}, logtheta, x); % compute training set covariance matrix
40 |
41 | L = chol(K)'; % cholesky factorization of the covariance
42 | alpha = solve_chol(L',y);
43 |
44 | if nargin == 4 % if no test cases, compute the negative log marginal likelihood
45 |
46 | out1 = 0.5*y'*alpha + sum(log(diag(L))) + 0.5*n*log(2*pi);
47 |
48 | if nargout == 2 % ... and if requested, its partial derivatives
49 | out2 = zeros(size(logtheta)); % set the size of the derivative vector
50 | W = L'\(L\eye(n))-alpha*alpha'; % precompute for convenience
51 | for i = 1:length(out2)
52 | out2(i) = sum(sum(W.*feval(covfunc{:}, logtheta, x, i)))/2;
53 | end
54 | end
55 |
56 | else % ... otherwise compute (marginal) test predictions ...
57 |
58 |
59 | end
60 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/gprSRPP.m:
--------------------------------------------------------------------------------
1 | function [mu, S2SR, S2PP] = gprSRPP(logtheta, covfunc, x, INDEX, y, xstar);
2 |
3 | % gprSRPP - Carries out approximate Gaussian process regression prediction
4 | % using the subset of regressors (SR) or projected process approximation (PP)
5 | % and the active set specified by INDEX.
6 | %
7 | % Usage
8 | %
9 | % [mu, S2SR, S2PP] = gprSRPP(logtheta, covfunc, x, INDEX, y, xstar)
10 | %
11 | % where
12 | %
13 | % logtheta is a (column) vector of log hyperparameters
14 | % covfunc is the covariance function, which is assumed to
15 | % be a covSum, and the last entry of the sum is covNoise
16 | % x is a n by D matrix of training inputs
17 | % INDEX is a vector of length m <= n used to specify which
18 | % inputs are used in the active set
19 | % y is a (column) vector (of size n) of targets
20 | % xstar is a nstar by D matrix of test inputs
21 | % mu is a (column) vector (of size nstar) of prediced means
22 | % S2SR is a (column) vector (of size nstar) of predicted variances under SR
23 | % S2PP is a (column) vector (of size nsstar) of predicted variances under PP
24 | %
25 | % where D is the dimension of the input.
26 | %
27 | % For more help on covariance functions, see "help covFunctions".
28 | %
29 | % (C) copyright 2005, 2006 by Chris Williams (2006-03-29).
30 |
31 | if ischar(covfunc), covfunc = cellstr(covfunc); end % convert to cell if needed
32 | [n, D] = size(x);
33 | if eval(feval(covfunc{:})) ~= size(logtheta, 1)
34 | error('Error: Number of parameters do not agree with covariance function')
35 | end
36 |
37 | % we check that the covfunc cell array is a covSum, with last entry 'covNoise'
38 | if length(covfunc) ~= 2 | ~strcmp(covfunc(1), 'covSum') | ...
39 | ~strcmp(covfunc{2}(end), 'covNoise')
40 | error('The covfunc must be "covSum" whose last summand must be "covNoise"')
41 | end
42 |
43 | sigma2n = exp(2*logtheta(end)); % noise variance
44 | [nstar, D] = size(xstar); % number of test cases and dimension of input space
45 | m = length(INDEX); % size of subset
46 |
47 | % note, that in the following Kmm is computed by extracting the relevant part
48 | % of Knm, thus it will be the "noise-free" covariance (although the covfunc
49 | % specification does include noise).
50 |
51 | [v, Knm] = feval(covfunc{:}, logtheta, x, x(INDEX,:));
52 | Kmm = Knm(INDEX,:); % Kmm is a noise-free covariance matrix
53 | jitter = 1e-9*trace(Kmm);
54 | Kmm = Kmm + jitter*eye(m); % as suggested in code of jqc
55 |
56 | % a is cov between active set and test points and vstar is variances at test
57 | % points, incl noise variance
58 |
59 | [vstar, a] = feval(covfunc{:}, logtheta, x(INDEX,:), xstar);
60 |
61 | mu = a'*((sigma2n*Kmm + Knm'*Knm)\(Knm'*y)); % pred mean eq. (8.14) and (8.26)
62 |
63 | e = (sigma2n*Kmm + Knm'*Knm) \ a;
64 |
65 | S2SR = sigma2n*sum(a.*e,1)'; % noise-free SR variance, eq. 8.15
66 | S2PP = vstar-sum(a.*(Kmm\a),1)'+S2SR; % PP variance eq. (8.27) including noise
67 | S2SR = S2SR + sigma2n; % SR variance inclusing noise
68 |
69 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/likelihoods.m:
--------------------------------------------------------------------------------
1 | % likelihood: likelihood functions are provided to be used by the binaryGP
2 | % function, for binary Gaussian process classification. Two likelihood
3 | % functions are provided:
4 | %
5 | % logistic
6 | % cumGauss
7 | %
8 | % The likelihood functions have three possible modes, the mode being selected
9 | % as follows (where "lik" stands for any likelihood function):
10 | %
11 | % (log) likelihood evaluation: [p, lp] = lik(y, f)
12 | %
13 | % where y are the targets, f the latent function values, p the probabilities
14 | % and lp the log probabilities. All vectors are the same size.
15 | %
16 | % derivatives (of the log): [lp, dlp, d2lp, d3lp] = lik(y, f, 'deriv')
17 | %
18 | % where lp is a number (sum of the log probablities for each case) and the
19 | % derivatives (up to order 3) of the logs wrt the latent values are vectors
20 | % (as the likelihood factorizes there are no mixed terms).
21 | %
22 | % moments wrt Gaussian measure: [m0, m1, m2] = lik(y, mu, var)
23 | %
24 | % where mk is the k'th moment: \int f^k lik(y,f) N(f|mu,var) df, and if y is
25 | % empty, it is assumed to be a vector of ones.
26 | %
27 | % See the help for the individual likelihood for the computations specific to
28 | % each likelihood function.
29 | %
30 | % Copyright (c) 2007 Carl Edward Rasmussen and Hannes Nickisch 2007-04-11.
31 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/logistic.m:
--------------------------------------------------------------------------------
1 | function [out1, out2, out3, out4] = logistic(y, f, var)
2 |
3 | % logistic - logistic likelihood function. The expression for the likelihood is
4 | % logistic(t) = 1./(1+exp(-t)).
5 | %
6 | % Three modes are provided, for computing likelihoods, derivatives and moments
7 | % respectively, see likelihoods.m for the details. In general, care is taken
8 | % to avoid numerical issues when the arguments are extreme. The moments
9 | % \int f^k cumGauss(y,f) N(f|mu,var) df are calculated using an approximation
10 | % to the cumulative Gaussian based on a mixture of 5 cumulative Gaussian
11 | % functions (or alternatively using Gauss-Hermite quadrature, which may be less
12 | % accurate).
13 | %
14 | % Copyright (c) 2007 Carl Edward Rasmussen and Hannes Nickisch, 2007-07-25.
15 |
16 | if nargin>1, y=sign(y); end % allow only +/- 1 as values
17 |
18 | if nargin == 2 % (log) likelihood evaluation
19 |
20 | if numel(y)>0, yf = y.*f; else yf = f; end % product of latents and labels
21 |
22 | out1 = 1./(1+exp(-yf)); % likelihood
23 | if nargout>1
24 | out2 = yf;
25 | ok = -351 % dlp - first derivatives
40 | s = min(0,f);
41 | p = exp(s)./(exp(s)+exp(s-f)); % p = 1./(1+exp(-f))
42 | out2 = (y+1)/2-p; % dlp, derivative of log likelihood
43 | if nargout>2 % d2lp, 2nd derivative of log likelihood
44 | out3 = -exp(2*s-f)./(exp(s)+exp(s-f)).^2;
45 | if nargout>3 % d3lp, 3rd derivative of log likelihood
46 | out4 = 2*out3.*(0.5-p);
47 | end
48 | end
49 | end
50 |
51 | else % compute moments
52 |
53 | mu = f; % 2nd argument is the mean of a Gaussian
54 | if numel(y)==0, y=ones(size(mu)); end % if empty, assume y=1
55 |
56 | % Two methods of integration are possible; the latter is more accurate
57 | % [out1,out2,out3] = gauherint(y, mu, var);
58 | [out1,out2,out3] = erfint(y, mu, var);
59 |
60 | end
61 |
62 | else
63 | error('No valid input provided.')
64 | end
65 |
66 |
67 | % The gauherint function approximates "\int t^k logistic(y t) N(t|mu,var)dt" by
68 | % means of Gaussian Hermite Quadrature. A call to gauher.m is made.
69 |
70 | function [m0,m1,m2] = gauherint(y, mu, var)
71 |
72 | N = 20; [f,w] = gauher(N); % 20 yields precalculated weights
73 | sz = size(mu);
74 |
75 | f0 = sqrt(var(:))*f'+repmat(mu(:),[1,N]); % center values of f
76 | sig = logistic( repmat(y(:),[1,N]), f0 ); % calculate the likelihood values
77 |
78 | m0 = reshape(sig*w, sz); % zeroth moment
79 | if nargout>1 % first moment
80 | m1 = reshape(f0.*sig*w, sz);
81 | if nargout>2, m2 = reshape(f0.*f0.*sig*w, sz); end % second moment
82 | end
83 |
84 |
85 | % The erfint function approximates "\int t^k logistic(y t) N(t|mu,s2) dt" by
86 | % setting:
87 | % logistic(t) \approx 1/2 + \sum_{i=1}^5 (c_i/2) erf(lambda_i t)
88 | % The integrals \int t^k erf(t) N(t|mu,s2) dt can be done analytically.
89 | %
90 | % The inputs y, mu and var have to be column vectors of equal lengths.
91 |
92 | function [m0,m1,m2] = erfint(y, mu, s2)
93 |
94 | l = [0.44 0.41 0.40 0.39 0.36]; % approximation coefficients lambda_i
95 |
96 | c = [1.146480988574439e+02; -1.508871030070582e+03; 2.676085036831241e+03;
97 | -1.356294962039222e+03; 7.543285642111850e+01 ];
98 |
99 | S2 = 2*s2.*(y.^2)*(l.^2) + 1; % zeroth moment
100 | S = sqrt( S2 );
101 | Z = mu.*y*l./S;
102 | M0 = erf(Z);
103 | m0 = ( 1 + M0*c )/2;
104 |
105 | if nargout>1 % first moment
106 | NormZ = exp(-Z.^2)/sqrt(2*pi);
107 | M0mu = M0.*repmat(mu,[1,5]);
108 | M1 = (2*sqrt(2)*y.*s2)*l.*NormZ./S + M0mu;
109 | m1 = ( mu + M1*c )/2;
110 |
111 | if nargout>2 % second moment
112 | M2 = repmat(2*mu,[1,5]).*(1+s2.*y.^2*(l.^2)).*(M1-M0mu)./S2 ...
113 | + repmat(s2+mu.^2,[1,5]).*M0;
114 | m2 = ( mu.^2 + s2 + M2*c )/2;
115 | end
116 | end
117 |
118 |
119 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/solve_chol.c:
--------------------------------------------------------------------------------
1 | /* solve_chol - solve a linear system A*X = B using the cholesky factorization
2 | of A (where A is square, symmetric and positive definite.
3 |
4 | Copyright (c) 2004 Carl Edward Rasmussen. 2004-10-19. */
5 |
6 | #include "mex.h"
7 | #include
8 |
9 | extern int dpotrs_(char *, int *, int *, double *, int *, double *, int *, int *);
10 |
11 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
12 | {
13 | double *C;
14 | int n, m, q;
15 |
16 | if (nrhs != 2 || nlhs > 1) /* check the input */
17 | mexErrMsgTxt("Usage: X = solve_chol(R, B)");
18 | n = mxGetN(prhs[0]);
19 | if (n != mxGetM(prhs[0]))
20 | mexErrMsgTxt("Error: First argument matrix must be square");
21 | if (n != mxGetM(prhs[1]))
22 | mexErrMsgTxt("Error: First and second argument matrices must have same number of rows");
23 | m = mxGetN(prhs[1]);
24 |
25 | plhs[0] = mxCreateDoubleMatrix(n, m, mxREAL); /* allocate space for output */
26 | C = mxGetPr(plhs[0]);
27 |
28 | if (n==0) return; /* if argument was empty matrix, do no more */
29 | memcpy(C,mxGetPr(prhs[1]),n*m*sizeof(double)); /* copy argument matrix */
30 | dpotrs_("U", &n, &m, mxGetPr(prhs[0]), &n, C, &n, &q); /* solve system */
31 | if (q > 0)
32 | mexErrMsgTxt("Error: illegal input to solve_chol");
33 | }
34 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/solve_chol.m:
--------------------------------------------------------------------------------
1 | % solve_chol - solve linear equations from the Cholesky factorization.
2 | % Solve A*X = B for X, where A is square, symmetric, positive definite. The
3 | % input to the function is R the Cholesky decomposition of A and the matrix B.
4 | % Example: X = solve_chol(chol(A),B);
5 | %
6 | % NOTE: The program code is written in the C language for efficiency and is
7 | % contained in the file solve_chol.c, and should be compiled using matlabs mex
8 | % facility. However, this file also contains a (less efficient) matlab
9 | % implementation, supplied only as a help to people unfamiliar with mex. If
10 | % the C code has been properly compiled and is avaiable, it automatically
11 | % takes precendence over the matlab code in this file.
12 | %
13 | % Copyright (c) 2004, 2005, 2006 by Carl Edward Rasmussen. 2006-02-08.
14 |
15 | function x = solve_chol(A, B);
16 |
17 | if nargin ~= 2 | nargout > 1
18 | error('Wrong number of arguments.');
19 | end
20 |
21 | if size(A,1) ~= size(A,2) | size(A,1) ~= size(B,1)
22 | error('Wrong sizes of matrix arguments.');
23 | end
24 |
25 | x = A\(A'\B);
26 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/sq_dist.c:
--------------------------------------------------------------------------------
1 | /* sq_dist - a mex function to compute a matrix of all pairwise squared
2 | distances between two sets of vectors, stored in the columns of the two
3 | matrices that are arguments to the function. The length of the vectors must
4 | agree. If only a single argument is given, the missing argument is taken to
5 | be identical to the first. If an optional third matrix argument Q is given,
6 | it must be of the same size as the output, but in this case a vector of the
7 | traces of the product of Q and the coordinatewise squared distances is
8 | returned.
9 |
10 | Copyright (c) 2003, 2004 Carl Edward Rasmussen. 2003-04-22. */
11 |
12 | #include "mex.h"
13 | #include
14 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
15 | {
16 | double *a, *b, *C, *Q, z, t;
17 | int D, n, m, i, j, k;
18 | if (nrhs < 1 || nrhs > 3 || nlhs > 1)
19 | mexErrMsgTxt("Usage: C = sq_dist(a,b)\n or: C = sq_dist(a)\n or: c = sq_dist(a,b,Q)\nwhere the b matrix may be empty.");
20 | a = mxGetPr(prhs[0]);
21 | m = mxGetN(prhs[0]);
22 | D = mxGetM(prhs[0]);
23 | if (nrhs == 1 || mxIsEmpty(prhs[1])) {
24 | b = a;
25 | n = m;
26 | } else {
27 | b = mxGetPr(prhs[1]);
28 | n = mxGetN(prhs[1]);
29 | if (D != mxGetM(prhs[1]))
30 | mexErrMsgTxt("Error: column lengths must agree");
31 | }
32 | if (nrhs < 3) {
33 | plhs[0] = mxCreateDoubleMatrix(m, n, mxREAL);
34 | C = mxGetPr(plhs[0]);
35 | for (i=0; i
10 | $
11 |
12 | _
13 | l.1 /* sq_
14 | dist - a mex function to compute a matrix of all pairwise squared
15 | ?
16 | ! Missing $ inserted.
17 |
18 | $
19 |
20 | \par
21 | l.9
22 |
23 | ?
24 |
25 | Overfull \hbox (1652.47543pt too wide) in paragraph at lines 1--9
26 | []\tenrm /* sq$[]\teni ist \tensy ^^@ \teni amexfunctiontocomputeamatrixofallpa
27 | irwisesquareddistancesbetweentwosetsofvectors; storedinthecolumnsofthetwomatric
28 | esthatareargumentstothefunction:Thelengthofthevectorsmustagree:Ifonlyasinglearg
29 | umentisgiven; themissingargumentistakentobeidenticaltothefirst:Ifanoptionalthir
30 | dmatrixargumentQisgiven; itmustbeofthesamesizeastheoutput; butinthiscaseavector
31 | ofthetracesoftheproductofQandthecoordinatewisesquareddistancesisreturned:$ |
32 |
33 | \hbox(7.5+2.5)x469.75499, glue set - 1.0
34 | .\hbox(0.0+0.0)x20.0
35 | .\tenrm /
36 | .\tenrm *
37 | .\glue 3.33333 plus 1.66666 minus 1.11111
38 | .\tenrm s
39 | .etc.
40 |
41 | ! You can't use `macro parameter character #' in vertical mode.
42 | l.12 #
43 | include "mex.h"
44 | ? q
45 | OK, entering \batchmode...
46 | ! You can't use `macro parameter character #' in horizontal mode.
47 | l.13 #
48 | include
49 | Sorry, but I'm not programmed to handle this case;
50 | I'll just pretend that you didn't ask for it.
51 | If you're in the wrong mode, you might be able to
52 | return to the right one by typing `I}' or `I$' or `I\par'.
53 |
54 | ! Missing $ inserted.
55 |
56 | $
57 |
58 | _
59 | l.19 mexErrMsgTxt("Usage: C = sq_
60 | dist(a,b)\n or: C = sq_dist(a)\n ...
61 | I've inserted a begin-math/end-math symbol since I think
62 | you left one out. Proceed, with fingers crossed.
63 |
64 | ! Undefined control sequence.
65 | l.19 mexErrMsgTxt("Usage: C = sq_dist(a,b)\n
66 | or: C = sq_dist(a)\n ...
67 | The control sequence at the end of the top line
68 | of your error message was never \def'ed. If you have
69 | misspelled it (e.g., `\hobx'), type `I' and the correct
70 | spelling (e.g., `I\hbox'). Otherwise just continue,
71 | and I'll forget about whatever was undefined.
72 |
73 | ! Undefined control sequence.
74 | l.19 ... sq_dist(a,b)\n or: C = sq_dist(a)\n
75 | or: c = sq_dist(a,b...
76 | The control sequence at the end of the top line
77 | of your error message was never \def'ed. If you have
78 | misspelled it (e.g., `\hobx'), type `I' and the correct
79 | spelling (e.g., `I\hbox'). Otherwise just continue,
80 | and I'll forget about whatever was undefined.
81 |
82 | ! Undefined control sequence.
83 | l.19 ...t(a)\n or: c = sq_dist(a,b,Q)\nwhere
84 | the b matrix may be empty...
85 | The control sequence at the end of the top line
86 | of your error message was never \def'ed. If you have
87 | misspelled it (e.g., `\hobx'), type `I' and the correct
88 | spelling (e.g., `I\hbox'). Otherwise just continue,
89 | and I'll forget about whatever was undefined.
90 |
91 | ! Extra }, or forgotten $.
92 | l.54 }
93 |
94 | I've deleted a group-closing symbol because it seems to be
95 | spurious, as in `$x}$'. But perhaps the } is legitimate and
96 | you forgot something else, as in `\hbox{$x}'. In such cases
97 | the way to recover is to insert both the forgotten and the
98 | deleted material, e.g., by typing `I$}'.
99 |
100 | )
101 | ! Emergency stop.
102 | <*> sq_dist.c
103 |
104 | *** (job aborted, no legal \end found)
105 |
106 | No pages of output.
107 |
--------------------------------------------------------------------------------
/KCI-test/gpml-matlab/gpml/sq_dist.m:
--------------------------------------------------------------------------------
1 | % sq_dist - a function to compute a matrix of all pairwise squared distances
2 | % between two sets of vectors, stored in the columns of the two matrices, a
3 | % (of size D by n) and b (of size D by m). If only a single argument is given
4 | % or the second matrix is empty, the missing matrix is taken to be identical
5 | % to the first.
6 | %
7 | % Special functionality: If an optional third matrix argument Q is given, it
8 | % must be of size n by m, and in this case a vector of the traces of the
9 | % product of Q' and the coordinatewise squared distances is returned.
10 | %
11 | % NOTE: The program code is written in the C language for efficiency and is
12 | % contained in the file sq_dist.c, and should be compiled using matlabs mex
13 | % facility. However, this file also contains a (less efficient) matlab
14 | % implementation, supplied only as a help to people unfamiliar with mex. If
15 | % the C code has been properly compiled and is avaiable, it automatically
16 | % takes precendence over the matlab code in this file.
17 | %
18 | % Usage: C = sq_dist(a, b)
19 | % or: C = sq_dist(a) or equiv.: C = sq_dist(a, [])
20 | % or: c = sq_dist(a, b, Q)
21 | % where the b matrix may be empty.
22 | %
23 | % where a is of size D by n, b is of size D by m (or empty), C and Q are of
24 | % size n by m and c is of size D by 1.
25 | %
26 | % Copyright (c) 2003, 2004, 2005 and 2006 Carl Edward Rasmussen. 2006-03-09.
27 |
28 | function C = sq_dist(a, b, Q);
29 |
30 | if nargin < 1 | nargin > 3 | nargout > 1
31 | error('Wrong number of arguments.');
32 | end
33 |
34 | if nargin == 1 | isempty(b) % input arguments are taken to be
35 | b = a; % identical if b is missing or empty
36 | end
37 |
38 | [D, n] = size(a);
39 | [d, m] = size(b);
40 | if d ~= D
41 | error('Error: column lengths must agree.');
42 | end
43 |
44 | if nargin < 3
45 | C = zeros(n,m);
46 | for d = 1:D
47 | C = C + (repmat(b(d,:), n, 1) - repmat(a(d,:)', 1, m)).^2;
48 | end
49 | % C = repmat(sum(a.*a)',1,m)+repmat(sum(b.*b),n,1)-2*a'*b could be used to
50 | % replace the 3 lines above; it would be faster, but numerically less stable.
51 | else
52 | if [n m] == size(Q)
53 | C = zeros(D,1);
54 | for d = 1:D
55 | C(d) = sum(sum((repmat(b(d,:), n, 1) - repmat(a(d,:)', 1, m)).^2.*Q));
56 | end
57 | else
58 | error('Third argument has wrong size.');
59 | end
60 | end
61 |
--------------------------------------------------------------------------------
/KCI-test/indtest_corr.m:
--------------------------------------------------------------------------------
1 | function [pval, stat] = indtest_corr(X, Y, Z, pars)
2 | % function [pval, stat] = indtest_corr(X, Y, Z, pars)
3 | %
4 | % Uses: statistics toolbox matlab
5 | %
6 | % Performs either a correlation test or a partial correlation test
7 | %
8 | % INPUT:
9 | % X Nxd1 matrix of samples (N data points, d1 dimensions)
10 | % Y Nxd2 matrix of samples (N data points, d2 dimensions)
11 | % Z Nxd3 matrix of samples (N data points, d3 dimensions)
12 | % pars structure containing parameters for the independence test
13 | % .bonferroni if true, bonferroni correction is performed (standard: false)
14 | %
15 | % OUTPUT:
16 | % pval p value of the test
17 | % stat test statistic
18 | %
19 | %
20 | % Copyright (c) 2011-2011 Kun Zhang
21 | % 2011-2011 Jonas Peters
22 | % All rights reserved. See the file COPYING for license terms.
23 |
24 |
25 | if ~isfield(pars,'bonferroni')
26 | pars.bonferroni = false;
27 | end;
28 |
29 | if isempty(Z)
30 | [sta,pp] = corr(X,Y);
31 | stat=max(sta);
32 | pval = min(min(pp));
33 | if pars.bonferroni
34 | pval=size(X,2)*size(Y,2)*pval;
35 | end
36 | else
37 | [sta, pp]=partialcorr(X,Y,Z);
38 | pval = min(min(pp));
39 | stat=max(sta);
40 | if pars.bonferroni
41 | pval=size(X,2)*size(Y,2)*pval;
42 | end
43 | end
44 |
45 | return
46 |
--------------------------------------------------------------------------------
/KCI-test/indtest_hsic.m:
--------------------------------------------------------------------------------
1 | function [pval, stat] = indtest_hsic(X, Y, Z, pars)
2 | % function [pval, stat] = indtest_hsic(X, Y, Z, pars)
3 | %
4 | % This function is a WRAPPER
5 | % Performs either an HSIC test (Gretton et al.) or a conditional HSIC test (Fukumizu et. al)
6 | %
7 | % INPUT:
8 | % X Nxd1 matrix of samples (N data points, d1 dimensions)
9 | % Y Nxd2 matrix of samples (N data points, d2 dimensions)
10 | % Z Nxd3 matrix of samples (N data points, d3 dimensions)
11 | % pars structure containing parameters for the independence test
12 | % .pairwise if true, the test is performed pairwise if d1>1 (standard: false)
13 | % .bonferroni if true, bonferroni correction is performed (standard: false)
14 | % .perm # of bootstrap samples for cond. hsic test (standard: 500)
15 | %
16 | % OUTPUT:
17 | % pval p value of the test
18 | % stat test statistic
19 | %
20 | %
21 | % Copyright (c) 2011-2011 Kun Zhang
22 | % 2011-2011 Jonas Peters
23 | % All rights reserved. See the file COPYING for license terms.
24 |
25 |
26 | if ~isfield(pars,'pairwise')
27 | pars.pairwise = false;
28 | end;
29 |
30 | if ~isfield(pars,'bonferroni')
31 | pars.bonferroni = false;
32 | end;
33 |
34 | if ~isfield(pars,'perm')
35 | pars.perm= 500;
36 | end;
37 |
38 | if isempty(Z) %unconditional HSIC
39 | if pars.pairwise
40 | p = zeros(size(X,2),size(Y,2));
41 | for i = 1:size(X,2);
42 | for j = 1:size(Y,2);
43 | [p(i,j) sta(i,j)] = fasthsic(X(:,i),Y(:,j));
44 | end
45 | end
46 | [pp iii] = min(p);
47 | [pval jj] = min(pp);
48 | stat = sta(iii(jj),jj);
49 | if pars.bonferroni
50 | pval=size(X,2)*size(Y,2)*pval;
51 | end
52 | else
53 | [pval stat]= fasthsic(X, Y);
54 | end
55 | else %conditional HSIC
56 | [aa, pval, stat]=hsiccondTestIC(X,Y,Z,0.8,pars.perm);
57 | end
58 |
59 | return
60 |
61 |
62 |
63 |
--------------------------------------------------------------------------------
/KCI-test/indtest_new.m:
--------------------------------------------------------------------------------
1 | function [pval stat] = indtest_new(X, Y, Z, pars)
2 | % function [pval] = indtest_new(X, Y, Z, pars)
3 | %
4 | %
5 | % This function is a WRAPPER
6 | % Performs new method (to be submitted to UAI 2011)
7 | %
8 | % INPUT:
9 | % X Nxd1 matrix of samples (N data points, d1 dimensions)
10 | % Y Nxd2 matrix of samples (N data points, d2 dimensions)
11 | % Z Nxd3 matrix of samples (N data points, d3 dimensions)
12 | % pars structure containing parameters for the independence test
13 | % .pairwise if true, the test is performed pairwise if d1>1 (standard: false)
14 | % .bonferroni if true, bonferroni correction is performed (standard: false)
15 | % .width kernel width (standard: 0, which results in an automatic -heuristic- choice)
16 | %
17 | % OUTPUT:
18 | % pval p value of the test
19 | % stat test statistic
20 | %
21 | %
22 | % Copyright (c) 2011-2011 Kun Zhang
23 | % 2011-2011 Jonas Peters
24 | % All rights reserved. See the file COPYING for license terms.
25 |
26 |
27 | if ~isfield(pars,'pairwise')
28 | pars.pairwise = false;
29 | end;
30 | if ~isfield(pars,'bonferroni')
31 | pars.bonferroni = false;
32 | end;
33 | if ~isfield(pars,'width')
34 | pars.width = 0;
35 | end;
36 |
37 | if size(X,2)>1||size(Y,2)>1
38 | % error('This test only works for one-dimensional random variables X and Y. Maybe it can be extended??')
39 | fprintf('Note that X and Y are random vectors.\n');
40 | end
41 |
42 | if isempty(Z) %unconditional HSIC
43 | if pars.pairwise
44 | p = zeros(size(X,2),size(Y,2));
45 | for i = 1:size(X,2);
46 | for j = 1:size(Y,2);
47 | [sta(i,j), Cri, p_vala, Cri_appr, p(i,j)] = UInd_KCItest(X(:,i), X(:,j), pars.width);
48 | end
49 | end
50 | [pp iii] = min(p);
51 | [pval jj] = min(pp);
52 | stat = sta(iii(jj),jj);
53 | if pars.bonferroni
54 | pval=size(X,2)*size(Y,2)*pval;
55 | end
56 | else
57 | [pval stat] = UInd_KCItest(X, Y, pars.width);
58 | end
59 | else % conditional independence test
60 | [stat, Cri, pval, Cri_appr, p_val_appr] = CInd_test_new_withGP(X, Y, Z, 0.01, pars.width);
61 | end
62 |
63 | return
64 |
65 |
66 |
67 |
--------------------------------------------------------------------------------
/KCI-test/indtest_new_t.m:
--------------------------------------------------------------------------------
1 | function [pval stat] = indtest_new_t(X, Y, Z, pars)
2 | % function [pval] = indtest_new(X, Y, Z, pars)
3 | %
4 | %
5 | % This function is a WRAPPER
6 | % Performs new method (to be submitted to UAI 2011)
7 | %
8 | % INPUT:
9 | % X Nxd1 matrix of samples (N data points, d1 dimensions)
10 | % Y Nxd2 matrix of samples (N data points, d2 dimensions)
11 | % Z Nxd3 matrix of samples (N data points, d3 dimensions)
12 | % pars structure containing parameters for the independence test
13 | % .pairwise if true, the test is performed pairwise if d1>1 (standard: false)
14 | % .bonferroni if true, bonferroni correction is performed (standard: false)
15 | % .width kernel width (standard: 0, which results in an automatic -heuristic- choice)
16 | %
17 | % OUTPUT:
18 | % pval p value of the test
19 | % stat test statistic
20 | %
21 | %
22 | % Copyright (c) 2011-2011 Kun Zhang
23 | % 2011-2011 Jonas Peters
24 | % All rights reserved. See the file COPYING for license terms.
25 |
26 |
27 |
28 | if size(X,2)>1||size(Y,2)>1
29 | % error('This test only works for one-dimensional random variables X and Y. Maybe it can be extended??')
30 | fprintf('Note that X and Y are random vectors.\n');
31 | end
32 |
33 | if isempty(Z) %unconditional HSIC
34 | if pars.pairwise
35 | p = zeros(size(X,2),size(Y,2));
36 | for i = 1:size(X,2)
37 | for j = 1:size(Y,2)
38 | [sta(i,j), Cri, p_vala, Cri_appr, p(i,j)] = UInd_KCItest(X(:,i), X(:,j), pars);
39 | end
40 | end
41 | [pp iii] = min(p);
42 | [pval jj] = min(pp);
43 | stat = sta(iii(jj),jj);
44 | if pars.bonferroni
45 | pval=size(X,2)*size(Y,2)*pval;
46 | end
47 | else
48 | [pval stat] = UInd_KCItest(X, Y, pars);
49 | end
50 | else % conditional independence test
51 | [pval, stat, Cri] = CInd_test_new_withGP_t(X, Y, Z, 0.01, pars);
52 | end
53 |
54 | return
55 |
56 |
57 |
58 |
--------------------------------------------------------------------------------
/KCI-test/indtest_new_t_RFF.m:
--------------------------------------------------------------------------------
1 | function [pval stat] = indtest_new_t_RFF(X, Y, Z, pars)
2 | % function [pval] = indtest_new(X, Y, Z, pars)
3 | %
4 | %
5 | % This function is a WRAPPER
6 | % Performs new method (to be submitted to UAI 2011)
7 | %
8 | % INPUT:
9 | % X Nxd1 matrix of samples (N data points, d1 dimensions)
10 | % Y Nxd2 matrix of samples (N data points, d2 dimensions)
11 | % Z Nxd3 matrix of samples (N data points, d3 dimensions)
12 | % pars structure containing parameters for the independence test
13 | % .pairwise if true, the test is performed pairwise if d1>1 (standard: false)
14 | % .bonferroni if true, bonferroni correction is performed (standard: false)
15 | % .width kernel width (standard: 0, which results in an automatic -heuristic- choice)
16 | %
17 | % OUTPUT:
18 | % pval p value of the test
19 | % stat test statistic
20 | %
21 | %
22 | % Copyright (c) 2011-2011 Kun Zhang
23 | % 2011-2011 Jonas Peters
24 | % All rights reserved. See the file COPYING for license terms.
25 |
26 |
27 | if size(X,2)>1||size(Y,2)>1
28 | % error('This test only works for one-dimensional random variables X and Y. Maybe it can be extended??')
29 | fprintf('Note that X and Y are random vectors.\n');
30 | end
31 |
32 | if isempty(Z) %unconditional HSIC
33 | if pars.pairwise
34 | p = zeros(size(X,2),size(Y,2));
35 | for i = 1:size(X,2)
36 | for j = 1:size(Y,2)
37 | [sta(i,j), Cri, p_vala, Cri_appr, p(i,j)] = UInd_KCItest_RFF(X(:,i), X(:,j), pars);
38 | end
39 | end
40 | [pp iii] = min(p);
41 | [pval jj] = min(pp);
42 | stat = sta(iii(jj),jj);
43 | if pars.bonferroni
44 | pval=size(X,2)*size(Y,2)*pval;
45 | end
46 | else
47 | [pval stat] = UInd_KCItest_RFF(X, Y, pars);
48 | end
49 | else % conditional independence test
50 | [pval, stat, Cri] = CInd_test_new_withGP_t_RFF(X, Y, Z, 0.01, pars);
51 | end
52 |
53 | return
54 |
55 |
56 |
57 |
--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/README
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Causal-Discovery-from-Nonstationary-Heterogeneous-Data
2 |
3 | Causal Discovery from Nonstationary/Heterogeneous Data. Copyright (c) 2017-2019 Biwei Huang & Kun Zhang
4 |
5 | ### MAIN FUNCTIONS
6 | function [g_skeleton, g_inv, gns, SP] = nonsta_cd_new(X,cond_ind_test,c_indx,maxFanIn,alpha, Type, pars)
7 |
8 | INPUT:
9 | * X: - T*n matrix. T is number of data points and n is the number of observed variables
10 | * cond_ind_test: - function handle that computes p-values for X ind. Y given Z: (p_val = cond_ind_test(X, Y, Z, pars))
11 | * c_indx: surrogate variable to capture the distribution shift. If data is nonstationary, then it is the time index. If data is from multiple domains, then it is the domain index
12 | * maxFanIn: - maximum number of variables in the conditioning set
13 | * alpha: - significance level of the independence test
14 | * Type: - run corresponding phases of CD-NOD
15 | * If Type=0, run all phases of CD-NOD (including phase 1: learning causal skeleton, phase 2: identifying causal directions with generalization of invariance, phase 3: identifying directions with independent change principle, and phase 4: recovering the nonstationarity driving force).
16 | * If Type = 1, perform phase 1 + phase 2 + phase 3
17 | * If Type = 2, perform phase 1 + phase 2
18 | * If Type = 3, only perform phase 1
19 | * pars: - including pars.pairwise, pars.bonferroni, pars.if_GP1, pars.if_GP2, pars.width, and pars.widthT
20 | * If pars.if_GP1 = 1, optimize the kernel width with GP in conditional independence tests; otherwise, use a fixed kernel width
21 | * If pars.if_GP2 = 1, optimize the kernel width with GP in direction determination with independent change principle & nonstationary driving force visualization
22 | * pars.width: kernel width on observational variables (except the time index). If it is 0, then use the default kernel width when IF_GP1 = 0
23 | * pars.widthT: kernel width on the time index
24 |
25 |
26 | OUTPUT:
27 | * g_skeleton: (n+1)*(n+1) matrix to represent recovered causal skeleton over augmented set of variables
28 | * i-j: g_skeleton(i,j)=-1 & g_skeleton(j,i)=-1; i j: g_skeleton(i,j)=0 & g_skeleton(j,i)=0
29 | * the last row of gns indicates the connection of nonstationarity indicator (C) with other observed variables
30 | * g_inv: (n+1)*(n+1) matrix to represent recovered graph structure up to the Markov equivalence class learning on augmented causal graph, with directions inferred by generalization of invariance
31 | * i->j: g_inv(i,j)=1 & g_inv(j,i)=0; i-j: g_inv(i,j)=-1 & g_inv(j,i)=-1; i j: g(i,j)=0 & g(j,i)=0
32 | * the last row of g indicates the connection of nonstationarity indicator (C) with other observed variables
33 | * gns: (n+1)*(n+1) matrix to represent recovered graph structure, with directions inferred by generalization of invariance & independent change principle
34 | * i->j: gns(i,j)=1 & gns(j,i)=1; i-j: gns(i,j)=-1 & gns(j,i)=-1; i j: gns(i,j)=0 & gns(j,i)=0
35 | * the last row of gns indicates the connection of nonstationarity indicator (C) with other observed variables
36 | * ("gns" should have more oriented edges than "g_inv")
37 | * SP: details of each independence test
38 |
39 |
40 | ### EXAMPLE
41 | example1.m, example2.m, and example3.m give three example of using this package.
42 | Specifically, example1.m and example2.m are for nonstationary data, and example3.m is
43 | for data from multiple domains.
44 |
45 | ### If there are multi-dimensional variables, use
46 | function [g_skeleton, g_inv, gns, SP] = nonsta_cd_new_multi(X,dlabel,cond_ind_test,c_indx,maxFanIn,alpha, Type, pars)
47 | * dlabel: - In the case with multi-dimensional variables, we use dlable to indicat the index of each variable
48 | Please see the example given in example4.m
49 |
50 | ### Notes
51 | For large-scale systems, there are several ways to speed up the process:
52 |
53 | - Fix the kernel width in conditional independence tests: set "pars.if_GP1 = 0".
54 | - Approximate the kernel learning with random fourier feature, by setting "cond_ind_test='indtest_new_t_RFF'".
55 | - Do a pre-processing step to remove some spurious edges, e.g., first using partial correlation to remove some edges.
56 |
57 | Note that if fixing the kernel width, you may need to tune the kernel width a bit to get better results,
58 | especially for the kernel width on the time index ("pars.widthT"), "width" and "Wt" in "infer_nonsta_dir.m" and "cd_non_con_fun.m".
59 |
60 | ### CITATION
61 | If you use this code, please cite the following paper:
62 |
63 | 1. Zhang, K., Huang, B., Zhang, J., Glymour, C., Scholkopf, B.. Causal Discovery from Nonstationary/Heterogeneous Data: Skeleton Estimation and Orientation Determination. IJCAI 2017.
64 | 2. Huang, B., Zhang, K., Zhang, J., Glymour, C., Scholkopf, B. Behind Distribution Shift: Mining Driving Forces of Changes and Causal Arrows. ICDM 2017.
65 | 3. Huang, B., Zhang, K., Zhang, J., Ramsey, J., Sanchez-Romero, R., Glymour, C., Scholkopf, B.. Causal Discovery from Heterogeneous/Nonstationary Data. JMLR, 21(89), 2020.
66 |
67 | If you have problems or questions, do not hesitate to send an email to biweih@andrew.cmu.edu
68 |
--------------------------------------------------------------------------------
/cd_non_con_fun.m:
--------------------------------------------------------------------------------
1 | function [Yg,Yl,Mg,Ml,D,eigValueg,eigValuel] = cd_non_con_fun(X,Y,c_indx,width,IF_GP)
2 | % learn the nonstationary driving force of the causal mechanism
3 | % X: parents; Y; effect
4 | % width: the kernel width for X and Y
5 | % c_indx: surrogate variable to capture the distribution shift;
6 | % If If_GP = 1, learning the kernel width for P(Y|X). Set it to 0 can speed up the process!!!
7 |
8 | if(width==0)
9 | width = 0.1;
10 | end
11 | Wt = 1; % the initial kernel width on C (or T). May need tunning for different data sets!!!
12 | [T,d] = size(X);
13 | X = X * diag(1./std(X));
14 | Y = Y/std(Y);
15 | theta = 1/width^2; % 0.2
16 | lambda = 1; % 0.05 0.3 10
17 | Ml = [];
18 |
19 | % size of Y should be T*1.
20 | Kyy = kernel(Y, Y, [theta,1]);
21 |
22 | if IF_GP
23 | Thresh = 1E-4;
24 | [eig_Ky, eiy] = eigdec((Kyy+Kyy')/2, min(400, floor(T/4))); % /2
25 | covfunc = {'covSum', {'covSEard','covNoise'}};
26 | % covfunc = {'covSum', {'covMatern3ard','covNoise'}};
27 |
28 | logtheta0 = [log(width)*ones(d,1); log(Wt); 0; log(sqrt(0.1))];
29 | fprintf('Optimization hyperparameters in GP regression:\n');
30 |
31 | IIy = find(eig_Ky > max(eig_Ky) * Thresh); eig_Ky = eig_Ky(IIy); eiy = eiy(:,IIy);
32 | [logtheta_y, fvals_y, iter_y] = minimize(logtheta0, 'gpr_multi', -350, covfunc, [X c_indx], 2*sqrt(T) *eiy * diag(sqrt(eig_Ky))/sqrt(eig_Ky(1)));
33 | exp(logtheta_y),
34 |
35 | covfunc_z = {'covSEard'};
36 | Kxt = feval(covfunc_z{:}, logtheta_y, [X c_indx]);
37 |
38 | % Note: in the conditional case, no need to do centering, as the regression
39 | % will automatically enforce that.
40 |
41 | % Kernel matrices of the errors
42 | invK = pdinv(Kxt + exp(2*logtheta_y(end))*eye(T));
43 |
44 | % Kxx = kernel(X, X, [1/exp(2*logtheta_y(1)),1]);
45 | % Ktt = kernel((1:T)', (1:T)', [1/exp(2*logtheta_y(d+1)),1]);
46 | Kxx = feval(covfunc_z{:}, logtheta_y([1:d,d+2]), X);
47 | Ktt = feval(covfunc_z{:}, logtheta_y([d+1,d+2]), c_indx);
48 | else
49 | Kxx = kernel(X, X, [theta,1]);
50 | Kyy = kernel(Y, Y, [theta,1]);
51 | Ktt = kernel(c_indx, c_indx, [1/Wt^2,1]);
52 | invK = pdinv( Kxx.* Ktt + lambda * eye(T));
53 | end
54 | Kxx3 = Kxx^3; %^3
55 |
56 | prod_invK = invK * Kyy * invK;
57 | % now finding Ml
58 |
59 | Ml = 1/T^2 * Ktt*( Kxx3 .* prod_invK) * Ktt;
60 |
61 | % Len = floor(T/50);
62 | % for c = 1:50
63 | % cc = Len*(c-1)+1;
64 | % for c1 = c:50
65 | % fprintf('.');
66 | % cc1 = Len*(c1-1)+1;
67 | % % Ml(c,c1) = trace(diag(Ktt(:,cc)) * Kxx3 * diag(Ktt(:,cc1)) * prod_invK);
68 | % Ml(c,c1) = trace( ((Ktt(:,cc) * Ktt(:,cc1)' ) .* Kxx3) * prod_invK );
69 | % if c1>c
70 | % Ml(c1,c) = Ml(c,c1);
71 | % end
72 | % end
73 | % end
74 | % Ml = 1/T^2 * Ml;
75 |
76 | % the square distance
77 | D = diag(diag(Ml)) * ones(size(Ml)) + ones(size(Ml)) * diag(diag(Ml)) - 2*Ml;
78 |
79 | % Gaussian kernel
80 | sigma2_square = median( D(find(tril(ones(size(D)),-1))) );
81 | Mg = exp(-D/sigma2_square/2);
82 |
83 |
84 | [Yg, eigVectorg, eigValueg]=kPCA_kernel_orig(Mg,3);
85 | [Yl, eigVectorl, eigValuel]=kPCA_kernel_orig(Ml,3);
86 | % figure, plot(Yg(:,1),'b'); hold on; plot(Yg(:,2),'k--'); title('VIsualization of change in PA^i \rightarrow V^i (with Gaussian kernel)')
87 | % legend(['First component of \lambda_i (eigenvalue: ' num2str(eigValueg(1)) ')'],['Second component of \lambda_i (eigenvalue: ' num2str(eigValueg(2)) ')']);
88 |
89 | % figure, plot(Yl(:,1),'b'); hold on; plot(Yl(:,2),'k--'); title('VIsualization of change in PA^i \rightarrow V^i (with linear kernel)')
90 | % legend(['First component of \lambda_i (eigenvalue: ' num2str(eigValuel(1)) ')'],['Second component of \lambda_i (eigenvalue: ' num2str(eigValuel(2)) ')']);
91 |
--------------------------------------------------------------------------------
/example1.m:
--------------------------------------------------------------------------------
1 | % example 1: nonstationary data (each variable is continuous & one-dimensional)
2 | clear all,clc,close all
3 | addpath(genpath(pwd))
4 | rng(10)
5 |
6 | %% generate the data
7 | % x1->x2->x3->x4, and the causal modules of x2 and x4 is nonstationary, and
8 | % their changes are related
9 | T = 500;
10 | x1 = randn(T,1);
11 | x2 = 0.8*x1 + 1.5*sin([1:T]'/50) + 0.5*randn(T,1);
12 | x3 = 0.8*x2 + 0.5*randn(T,1);
13 | x4 = 0.8*x3 + (sin([1:T]'/50)+sin([1:T]'/20)) + 0.5*randn(T,1);
14 | Data = [x1,x2,x3,x4];
15 |
16 | %% set the parameters
17 | alpha = 0.05; % signifcance level of independence test
18 | maxFanIn = 2; % maximum number of conditional variables
19 | if (T<=1000) % for small sample size, use GP to learn the kernel width in conditional independence tests
20 | cond_ind_test='indtest_new_t';
21 | IF_GP = 1;
22 | else
23 | if (T>1000 & T<=2000) % for relatively large sample size, fix the kernel width
24 | cond_ind_test='indtest_new_t';
25 | IF_GP = 0;
26 | else % for very large sample size, fix the kernel width and use random fourier feature to approximate the kernel
27 | cond_ind_test='indtest_new_t_RFF';
28 | IF_GP = 0;
29 | end
30 | end
31 | pars.pairwise = false;
32 | pars.bonferroni = false;
33 | pars.if_GP1 = IF_GP; % for conditional independence test, whether use GP to learn the kernel width
34 | pars.if_GP2 = 1; % for direction determination with independent change principle & nonstationary driving force visualization
35 | pars.width = 0; % kernel width on observational variables (except the time index). If it is 0, then use the default kernel width when IF_GP = 0
36 | pars.widthT = 0.1; % the kernel width on the time index
37 | c_indx = [1:T]'; % surrogate variable to capture the distribution shift;
38 | % here it is the time index, because the data is nonstationary
39 | Type = 1;
40 | % If Type=0, run all phases of CD-NOD (including
41 | % phase 1: learning causal skeleton,
42 | % phase 2: identifying causal directions with generalization of invariance,
43 | % phase 3: identifying directions with independent change principle, and
44 | % phase 4: recovering the nonstationarity driving force )
45 | % If Type = 1, perform phase 1 + phase 2 + phase 3
46 | % If Type = 2, perform phase 1 + phase 2
47 | % If Type = 3, only perform phase 1
48 |
49 | %% run CD-NOD
50 | [g_skeleton, g_inv, gns, SP] = nonsta_cd_new(Data, cond_ind_test, c_indx, maxFanIn, alpha, Type, pars);
51 |
52 |
53 |
--------------------------------------------------------------------------------
/example2.m:
--------------------------------------------------------------------------------
1 | % example 2: nonstationary data
2 | clear all,clc,close all
3 | addpath(genpath(pwd))
4 | rng(10)
5 |
6 | % x1->x2->x3, and the causal module of x1, x2, and x3 are nonstationary,
7 | % and the causal modules change independently
8 | load smooth_module
9 | % R0 saves generated nonstatioanry driving force which are independent of each other
10 | T = 500;
11 | x1 = 0.5*randn(T,1) + 5*R0{1}(1:T);
12 | x2 = 0.8*x1 + 4*R0{2}(1:T) + 0.5*randn(T,1);
13 | x3 = 6*R0{6}(1:T)+ 0.8*x2 + 0.3*randn(T,1);
14 | Data = [x1,x2,x3];
15 |
16 | %% set the parameters
17 | alpha = 0.05; % signifcance level of independence test
18 | maxFanIn = 2; % maximum number of conditional variables
19 | if (T<=1000) % for small sample size, use GP to learn the kernel width in conditional independence tests
20 | cond_ind_test='indtest_new_t';
21 | IF_GP = 1;
22 | else
23 | if (T>1000 & T<2000) % for relatively large sample size, fix the kernel width
24 | cond_ind_test='indtest_new_t';
25 | IF_GP = 0;
26 | else % for very large sample size, fix the kernel width and use random fourier feature to approximate the kernel
27 | cond_ind_test='indtest_new_t_RFF';
28 | IF_GP = 0;
29 | end
30 | end
31 | pars.pairwise = false;
32 | pars.bonferroni = false;
33 | pars.if_GP1 = IF_GP; % for conditional independence test
34 | pars.if_GP2 = 1; % for direction determination with independent change principle & nonstationary driving force visualization
35 | pars.width = 0; % kernel width on observational variables (except the time index). If it is 0, then use the default kernel width when IF_GP = 0
36 | pars.widthT = 0.1; % the kernel width on the time index
37 | c_indx = [1:T]'; % surrogate variable to capture the distribution shift;
38 | % here it is the time index, because the data is nonstationary
39 | Type = 0;
40 | % If Type=0, run all phases of CD-NOD (including
41 | % phase 1: learning causal skeleton,
42 | % phase 2: identifying causal directions with generalization of invariance,
43 | % phase 3: identifying directions with independent change principle, and
44 | % phase 4: recovering the nonstationarity driving force )
45 | % If Type = 1, perform phase 1 + phase 2 + phase 3
46 | % If Type = 2, perform phase 1 + phase 2
47 | % If Type = 3, only perform phase 1
48 |
49 | %% run CD-NOD
50 | [g_skeleton, g_inv, gns, SP] = nonsta_cd_new(Data, cond_ind_test, c_indx, maxFanIn, alpha, Type, pars);
51 |
52 |
--------------------------------------------------------------------------------
/example3.m:
--------------------------------------------------------------------------------
1 | % example 1: heterogeneous data (data from multiple domains)
2 | clear all,clc,close all
3 | addpath(genpath(pwd))
4 | rng(10)
5 | %% data generation
6 | % generate data from the first domain
7 | T_1 = 300;
8 | x1_1 = randn(T_1,1);
9 | x2_1 = 0.9*x1_1 + 0.6*randn(T_1,1);
10 | x3_1 = 0.9*x2_1 + 0.6*randn(T_1,1);
11 | x4_1 = 0.9*x3_1 + 0.6*randn(T_1,1);
12 | Data_1 = [x1_1,x2_1,x3_1,x4_1];
13 |
14 | % generate data from the second domain
15 | T_2 = 300;
16 | x1_2 = randn(T_2,1);
17 | x2_2 = sin(x1_2) + 0.2*randn(T_2,1);
18 | x3_2 = sin(x2_2) + 0.2*randn(T_2,1);
19 | x4_2 = sin(x3_2) + 0.2*randn(T_2,1);
20 | Data_2 = [x1_2,x2_2,x3_2,x4_2];
21 |
22 | % concateneate data from the two domains
23 | Data = [Data_1;Data_2];
24 |
25 |
26 | %% set the parameters
27 | alpha = 0.05; % signifcance level of independence test
28 | maxFanIn = 2; % maximum number of conditional variables
29 | T = size(Data,1);
30 | if (T<=1000) % for small sample size, use GP to learn the kernel width in conditional independence tests
31 | cond_ind_test='indtest_new_t';
32 | IF_GP = 1;
33 | else
34 | if (T>1000 & T<2000) % for relatively large sample size, fix the kernel width
35 | cond_ind_test='indtest_new_t';
36 | IF_GP = 0;
37 | else % for very large sample size, fix the kernel width and use random fourier feature to approximate the kernel
38 | cond_ind_test='indtest_new_t_RFF';
39 | IF_GP = 0;
40 | end
41 | end
42 |
43 | pars.pairwise = false;
44 | pars.bonferroni = false;
45 | pars.if_GP1 = IF_GP; % for conditional independence test
46 | pars.if_GP2 = 1; % for direction determination with independent change principle & nonstationary driving force visualization
47 | pars.width = 0.4; % kernel width on observational variables (except the time index). If it is 0, then use the default kernel width when IF_GP = 0
48 | pars.widthT = 0; % the kernel width on the time index; set it to zero for domain-varying data
49 | c_indx = [ones(1,T_1),2*ones(1,T_2)]'; % surrogate variable to capture the distribution shift;
50 | %here it is the doamin index, because the data is from multiple domains
51 | Type = 1;
52 | % If Type=0, run all phases of CD-NOD (including
53 | % phase 1: learning causal skeleton,
54 | % phase 2: identifying causal directions with generalization of invariance,
55 | % phase 3: identifying directions with independent change principle, and
56 | % phase 4: recovering the nonstationarity driving force )
57 | % If Type = 1, perform phase 1 + phase 2 + phase 3
58 | % If Type = 2, perform phase 1 + phase 2
59 | % If Type = 3, only perform phase 1
60 |
61 | %% run CD-NOD
62 | [g_skeleton, g_inv, gns, SP] = nonsta_cd_new(Data, cond_ind_test, c_indx, maxFanIn, alpha, Type, pars);
63 |
64 |
--------------------------------------------------------------------------------
/example4.m:
--------------------------------------------------------------------------------
1 | % example 4
2 | % variables with multi-dimensionals
3 | % non-stationary data
4 | clear all,clc,close all
5 | addpath(genpath(pwd))
6 |
7 | % x1->x2->x3->x4, and the causal modules of x2 and x4 is nonstationary, and
8 | % their changes are related
9 | T = 500;
10 | x1 = randn(T,2); % 2 dimension
11 | x2 = 0.6*x1 + 2*sin([1:T]'/50) + 0.5*randn(T,2); % 2 dimension
12 | x3 = x2*[0.3;0.3] + 0.5*randn(T,1); % 1 dimension
13 | x4 = 0.8*x3 + (sin([1:T]'/50)+sin([1:T]'/20)) + 0.5*randn(T,1); % 1 dimension
14 | Data = [x1,x2,x3,x4];
15 |
16 |
17 | %% set the parameters
18 | alpha = 0.05; % signifcance level of independence test
19 | maxFanIn = 2; % maximum number of conditional variables
20 | if (T<=1000) % for small sample size, use GP to learn the kernel width in conditional independence tests
21 | cond_ind_test='indtest_new_t';
22 | IF_GP = 1;
23 | else
24 | if (T>1000 & T<2000) % for relatively large sample size, fix the kernel width
25 | cond_ind_test='indtest_new_t';
26 | IF_GP = 0;
27 | else % for very large sample size, fix the kernel width and use random fourier feature to approximate the kernel
28 | cond_ind_test='indtest_new_t_RFF';
29 | IF_GP = 0;
30 | end
31 | end
32 | pars.pairwise = false;
33 | pars.bonferroni = false;
34 | pars.if_GP1 = IF_GP; % for conditional independence test
35 | pars.if_GP2 = 1; % for direction determination with independent change principle & nonstationary driving force visualization
36 | pars.width = 0; % kernel width on observational variables (except the time index). If it is 0, then use the default kernel width when IF_GP = 0
37 | pars.widthT = 0.1; % the kernel width on the time index
38 | dlabel{1} = [1,2]; dlabel{2} = [3,4]; dlabel{3} = [5]; dlabel{4} =[6];
39 | c_indx = [1:T]'; % surrogate variable to capture the distribution shift;
40 | % here it is the time index, because the data is nonstationary
41 | Type = 1;
42 | % If Type=0, run all phases of CD-NOD (including
43 | % phase 1: learning causal skeleton,
44 | % phase 2: identifying causal directions with generalization of invariance,
45 | % phase 3: identifying directions with independent change principle, and
46 | % phase 4: recovering the nonstationarity driving force )
47 | % If Type = 1, perform phase 1 + phase 2 + phase 3
48 | % If Type = 2, perform phase 1 + phase 2
49 | % If Type = 3, only perform phase 1
50 |
51 | %% run CD-NOD
52 | [g_skeleton, g_inv, gns, SP] = nonsta_cd_new_multi(Data,dlabel,cond_ind_test, c_indx, maxFanIn, alpha, Type, pars);
53 |
54 |
--------------------------------------------------------------------------------
/infer_nonsta_dir.m:
--------------------------------------------------------------------------------
1 | function [testStat] = infer_nonsta_dir(X,Y,c_indx,width,IF_GP)
2 | % infer the causal direction between X and Y when their causal modules are
3 | % both nonstationary but independent
4 | % X: parents; Y; effect
5 | % width: the kernel width for X and Y
6 | % c_indx: surrogate variable to capture the distribution shift;
7 | % If If_GP = 1, learning the kernel width for P(Y|X). Set it to 0 can speed up the process!!!
8 | % Don't forget to normalize the data
9 |
10 | if(width==0)
11 | width = 0.1; % May need tunning for different data sets!!!
12 | end
13 | Wt = 1; % the initial kernel width on C (or T). May need tunning for different data sets!!!
14 | [T,d] = size(X);
15 | X = X-repmat(mean(X),size(X,1),1); % normalization
16 | X = X * diag(1./std(X));
17 | Y = Y-repmat(mean(Y),size(Y,1),1);
18 | Y = Y * diag(1./std(Y));
19 | theta = 1/width^2;
20 | lambda = 2; % may need tunning!
21 | Ml = [];
22 |
23 | % size of Y should be T*1.
24 | Kyy = kernel(Y, Y, [theta,1]);
25 |
26 | %% P(Y|X)
27 | if IF_GP
28 | Thresh = 1E-4;
29 | [eig_Ky, eiy] = eigdec((Kyy+Kyy')/2, min(400, floor(T/4))); % /2
30 | covfunc = {'covSum', {'covSEard','covNoise'}};
31 | logtheta0 = [log(width)*ones(d,1); log(Wt); 0; log(sqrt(0.1))];
32 | fprintf('Optimization hyperparameters in GP regression:\n');
33 |
34 | IIy = find(eig_Ky > max(eig_Ky) * Thresh); eig_Ky = eig_Ky(IIy); eiy = eiy(:,IIy);
35 | [logtheta_y, fvals_y, iter_y] = minimize(logtheta0, 'gpr_multi', -350, covfunc, [X (1:T)'], 2*sqrt(T) *eiy * diag(sqrt(eig_Ky))/sqrt(eig_Ky(1)));
36 | % exp(logtheta_y),
37 | if(logtheta_y(d+1)>log(1e4)) % set a boound
38 | logtheta_y(d+1)=log(1e4);
39 | end
40 |
41 | covfunc_z = {'covSEard'};
42 | Kxt = feval(covfunc_z{:}, logtheta_y, [X c_indx]);
43 | % Note: in the conditional case, no need to do centering, as the regression
44 | % will automatically enforce that.
45 |
46 | % Kernel matrices of the errors
47 | invK = pdinv(Kxt + exp(2*logtheta_y(end))*eye(T));
48 |
49 | Kxx = feval(covfunc_z{:}, logtheta_y([1:d,d+2]), X);
50 | Ktt = feval(covfunc_z{:}, logtheta_y([d+1,d+2]), c_indx);
51 | else
52 | Kxx = kernel(X, X, [theta,1]);
53 | Kyy = kernel(Y, Y, [theta,1]);
54 | Ktt = kernel(c_indx, c_indx, [1/Wt^2,1]);
55 | invK = pdinv( Kxx.* Ktt + lambda * eye(T));
56 | end
57 | Kxx3 = Kxx^3;
58 | prod_invK = invK * Kyy * invK;
59 | % now finding Ml
60 | Ml = 1/T^2 * Ktt*( Kxx3 .* prod_invK) * Ktt;
61 | % the square distance
62 | D = diag(diag(Ml)) * ones(size(Ml)) + ones(size(Ml)) * diag(diag(Ml)) - 2*Ml;
63 | % Gaussian kernel
64 | sigma2_square = median( D(find(tril(ones(size(D)),-1))) );
65 | Mg = exp(-D/sigma2_square/2);
66 |
67 |
68 | %% P(X)
69 | Kxx = kernel(X,X, [theta,1]);
70 | if IF_GP
71 | [eig_Kx, eix] = eigdec((Kxx+Kxx')/2, min(400, floor(T/4))); % /2
72 | covfunc = {'covSum', {'covSEard','covNoise'}};
73 | logtheta0 = [log(Wt); 0; log(sqrt(0.1))];
74 | fprintf('Optimization hyperparameters in GP regression:\n');
75 | IIx = find(eig_Kx > max(eig_Kx) * Thresh); eig_Kx = eig_Kx(IIx); eix = eix(:,IIx);
76 | [logtheta_x, fvals_x, iter_x] = minimize(logtheta0, 'gpr_multi', -350, covfunc, c_indx, 2*sqrt(T) *eix * diag(sqrt(eig_Kx))/sqrt(eig_Kx(1)));
77 | % exp(logtheta_x),
78 | if(logtheta_x(1)>log(1e4))
79 | logtheta_x(1)=log(1e4);
80 | end
81 | Ktt = feval(covfunc_z{:}, logtheta_x(1:2), c_indx);
82 | invK2 = pdinv(Ktt + exp(2*logtheta_x(end))*eye(T));
83 | else
84 | Ktt = kernel(c_indx, c_indx, [1/Wt^2,1]);
85 | invK2 = pdinv(Ktt + lambda * eye(T));
86 | end
87 | Ml2 = Ktt*invK2*Kxx*invK2*Ktt;
88 | % the square distance
89 | D2 = diag(diag(Ml2)) * ones(size(Ml2)) + ones(size(Ml2)) * diag(diag(Ml2)) - 2*Ml2;
90 | % Gaussian kernel
91 | sigma2_square2 = median( D2(find(tril(ones(size(D2)),-1))) );
92 | Mg2 = exp(-D2/sigma2_square2/2);
93 |
94 | %%
95 | H = eye(T)-1/T*ones(T,T);
96 | Mg = H*Mg*H;
97 | Mg2 = H*Mg2*H;
98 | testStat = 1/T^2*sum(sum(Mg'.*Mg2));
99 | % eta = 1e-6;
100 | % Rg = Mg*pdinv(Mg+T*eta*eye(T));
101 | % Rg2 = Mg2*pdinv(Mg2+T*eta*eye(T));
102 | % % testStat = sum(sum(Rg'.*Rg2));
103 | % testStat = trace(Rg*Rg2);
104 |
105 |
106 |
107 |
108 |
--------------------------------------------------------------------------------
/kPCA_kernel_orig.m:
--------------------------------------------------------------------------------
1 | % X: data matrix, each row is one observation, each column is one feature
2 | % d: reduced dimension
3 | % type: type of kernel, can be 'simple', 'poly', or 'gaussian'
4 | % para: parameter for computing the 'poly' and 'gaussian' kernel,
5 | % for 'simple' it will be ignored
6 | % Y: dimensionanlity-reduced data
7 | % eigVector: eigen-vector, will later be used for pre-image
8 | % reconstruction
9 |
10 | % Copyright by Quan Wang, 2011/05/10
11 | % Please cite: Quan Wang. Kernel Principal Component Analysis and its
12 | % Applications in Face Recognition and Active Shape Models.
13 | % arXiv:1207.3538 [cs.CV], 2012.
14 |
15 | function [Y, eigVector, eigValue]=kPCA_kernel_orig(K0,d)
16 |
17 |
18 | %% kernel PCA
19 | %%K0=kernel(X,type,para); % input K0
20 | N = length(K0);
21 | oneN=ones(N,N)/N;
22 | K=K0-oneN*K0-K0*oneN+oneN*K0*oneN;
23 |
24 | %% eigenvalue analysis
25 | [V,D]=eig(K/N);
26 | eigValue=diag(D);
27 | [tmp,IX]=sort(eigValue,'descend');
28 | eigVector=V(:,IX);
29 | eigValue=eigValue(IX);
30 |
31 | %% normailization
32 | norm_eigVector=sqrt(sum(eigVector.^2));
33 | eigVector=eigVector./repmat(norm_eigVector,size(eigVector,1),1);
34 |
35 | %% dimensionality reduction
36 | eigVector=eigVector(:,1:d);
37 | Y=K0*eigVector;
38 |
39 |
--------------------------------------------------------------------------------
/meeks.m:
--------------------------------------------------------------------------------
1 | %Copyright (C)
2 | % 1997-2002 Kevin Murphy
3 | % 2010-2011 Robert Tillman
4 | %
5 | % This file is part of pc.
6 | %
7 | % discrete_anm is free software: you can redistribute it and/or modify
8 | % it under the terms of the GNU General Public License as published by
9 | % the Free Software Foundation, either version 3 of the License, or
10 | % (at your option) any later version.
11 | %
12 | % discrete_anm is distributed in the hope that it will be useful,
13 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | % GNU General Public License for more details.
16 | %
17 | % You should have received a copy of the GNU General Public License
18 | % along with discrete_anm. If not, see .
19 |
20 |
21 | % meeks rules - adapted from version in BNT
22 | function pdag = meeks(pdag,G)
23 |
24 | n = size(pdag,1);
25 | old_pdag=zeros(n,n);
26 | while ~isequal(pdag, old_pdag)
27 | old_pdag = pdag;
28 | % rule 1
29 | [A,B] = find(pdag==-1); % a -> b
30 | for i=1:length(A)
31 | a = A(i); b = B(i);
32 | C = find(pdag(b,:)==1 & G(a,:)==0); % all nodes adj to b but not a
33 | if ~isempty(C)
34 | pdag(b,C) = -1; pdag(C,b) = 0;
35 | %fprintf('rule 1: a=%d->b=%d and b=%d-c=%d implies %d->%d\n', a, b, b, C, b, C);
36 | end
37 | end
38 | % rule 2
39 | [A,B] = find(pdag==1); % unoriented a-b edge
40 | for i=1:length(A)
41 | a = A(i); b = B(i);
42 | if any( (pdag(a,:)==-1) & (pdag(:,b)==-1)' );
43 | pdag(a,b) = -1; pdag(b,a) = 0;
44 | %fprintf('rule 2: %d -> %d\n', a, b);
45 | end
46 | end
47 | % rule 3
48 | [A,B] = find(pdag==1); % a-b
49 | for i=1:length(A)
50 | a = A(i); b = B(i);
51 | C = find( (G(a,:)==1) & (pdag(:,b)==-1)' );
52 | % C contains nodes c s.t. a-c->ba
53 | G2 = setdiag(G(C, C), 1);
54 | if any(G2(:)==0) % there are 2 different non adjacent elements of C
55 | pdag(a,b) = -1; pdag(b,a) = 0;
56 | %fprintf('rule 3: %d -> %d\n', a, b);
57 | end
58 | end
59 | % rule 4
60 | [A, B] = find(pdag==1); % a-b
61 | for i=1:length(A)
62 | a = A(i); b = B(i);
63 | C = find((pdag(:,b)==-1) & (G(:,a)==1));
64 | for j=1:length(C)
65 | c = C(j); % c -> b and c - a
66 | D = find((pdag(:,c)==-1) & (pdag(:,a)==1)); % d -> c and d - a
67 | if (length(D)>0)
68 | pdag(a,b) = -1;
69 | %pdag(b,a) = -1; % It is a bug in the original version!!!!!!!!!!
70 | pdag(b,a) = 0;
71 | end
72 | end
73 | end
74 | end
75 |
--------------------------------------------------------------------------------
/pdinv.m:
--------------------------------------------------------------------------------
1 | function Ainv = pdinv(A);
2 |
3 | % PDINV Computes the inverse of a positive definite matrix
4 | % Copyright (c) 2010-2011 ...
5 | % All rights reserved. See the file COPYING for license terms.
6 | numData = size(A, 1);
7 | try
8 | U = chol(A);
9 | invU = eye(numData)/U;
10 | Ainv = invU*invU';
11 | catch
12 | [void, errid] = lasterr;
13 | if strcmp(errid, 'MATLAB:posdef')
14 | warning(['Matrix is not positive definite in pdinv, inverting' ...
15 | ' using svd'])
16 | [U, S, V] = svd(A);
17 | Ainv = V*diag(1./diag(S))*U';
18 | return
19 | else
20 | error(lasterr)
21 | end
22 | end
23 |
24 |
--------------------------------------------------------------------------------
/setdiag.m:
--------------------------------------------------------------------------------
1 | function M = setdiag(M, v)
2 | % SETDIAG Set the diagonal of a matrix to a specified scalar/vector.
3 | % M = set_diag(M, v)
4 | %Copyright (C)
5 | % 2010 Robert Tillman
6 | %
7 | % This file is part of pc.
8 | %
9 | % discrete_anm is free software: you can redistribute it and/or modify
10 | % it under the terms of the GNU General Public License as published by
11 | % the Free Software Foundation, either version 3 of the License, or
12 | % (at your option) any later version.
13 | %
14 | % discrete_anm is distributed in the hope that it will be useful,
15 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 | % GNU General Public License for more details.
18 | %
19 | % You should have received a copy of the GNU General Public License
20 | % along with discrete_anm. If not, see .
21 |
22 | n = length(M);
23 | if length(v)==1
24 | v = repmat(v, 1, n);
25 | end
26 |
27 | % e.g., for 3x3 matrix, elements are numbered
28 | % 1 4 7
29 | % 2 5 8
30 | % 3 6 9
31 | % so diagnoal = [1 5 9]
32 |
33 |
34 | J = 1:n+1:n^2;
35 | M(J) = v;
36 |
37 | %M = triu(M,1) + tril(M,-1) + diag(v);
38 |
39 |
--------------------------------------------------------------------------------
/smooth_module.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/smooth_module.mat
--------------------------------------------------------------------------------
/transformFeatures.m:
--------------------------------------------------------------------------------
1 | % random fourier feature to appropriate the kernel
2 |
3 | function [ Z ] = transformFeatures(X)
4 | %TRANSFORMFEATURES Transforms data to the random Fourier feature space
5 | %
6 | % Input:
7 | % X - n x p data matrix (each row is a sample) p is the dimension of the variable
8 | % Omega - p x D matrix of random Fourier directions (one for each
9 | % dimension of a sample x)
10 | % beta - 1 x D vector of random angles
11 | %
12 | % Output:
13 | % Z - D x n matrix of random Fourier features
14 |
15 | % sample random Fourier directions and angles
16 | [T, p] = size(X);
17 | if(T>=1000)
18 | D = 1000; % RFF dimension
19 | else
20 | D = 500;
21 | end
22 | Omega = randn(p,D); % RVs defining RFF transform
23 | beta = rand(1,D)*2*pi;
24 |
25 | Z = cos(bsxfun(@plus,X*Omega,beta))*sqrt(2/D);
26 | Z = Z';
27 |
--------------------------------------------------------------------------------