├── KCI-test ├── CI_PERM │ ├── COPYING │ ├── hsicTestBootIC.m │ ├── hsiccondIC.m │ ├── hsiccondTestIC.m │ ├── inchol.m │ ├── medbw.m │ ├── pickK.m │ └── rbf.m ├── CONTENT ├── COPYING ├── README ├── UInd_KCItest.m ├── UInd_KCItest_RFF.m ├── algorithms │ ├── CInd_test_new_withGP.m │ ├── CInd_test_new_withGP_t.m │ ├── CInd_test_new_withGP_t_RFF.m │ ├── check_markov_equiv.m │ ├── condVect.m │ ├── dist2.m │ ├── eigdec.m │ ├── gpr_multi.m │ ├── gpr_multi2.m │ ├── gpr_multi_alln.m │ ├── gpr_multi_alln_K.m │ ├── kernel.m │ ├── logdet.m │ ├── minimize.m │ ├── pdinv.m │ └── stack.m ├── data │ ├── README │ ├── abalone.dat │ ├── boston_names │ └── boston_wout_discrete.dat ├── exp │ └── simulation1 │ │ ├── CInd_test_new_withGP_UsedInUAIPaper_ButNotFinal.m │ │ ├── test_effect_D_caseI.m │ │ └── test_effect_D_caseII.m ├── gpml-matlab │ ├── README │ ├── doc │ │ ├── alg21.gif │ │ ├── alg31.gif │ │ ├── alg32.gif │ │ ├── alg35.gif │ │ ├── alg36.gif │ │ ├── alg51.gif │ │ ├── alg52.gif │ │ ├── classification.html │ │ ├── fig2d.gif │ │ ├── fig2de1.gif │ │ ├── fig2de2.gif │ │ ├── fig2de3.gif │ │ ├── fig2dl1.gif │ │ ├── fig2dl2.gif │ │ ├── fig2dl3.gif │ │ ├── figepp.gif │ │ ├── figepp2.gif │ │ ├── figl.gif │ │ ├── figl1.gif │ │ ├── figlapp.gif │ │ ├── figlapp2.gif │ │ ├── figlf.gif │ │ ├── figlm.gif │ │ ├── index.html │ │ ├── regression.html │ │ ├── sparse-approx.html │ │ └── style.css │ ├── gpml-demo │ │ ├── Contents.m │ │ ├── data_6darm.mat │ │ ├── data_boston.mat │ │ ├── demo_ep_2d.m │ │ ├── demo_ep_usps.m │ │ ├── demo_gparm.m │ │ ├── demo_gpr.m │ │ ├── demo_gprsparse.m │ │ ├── demo_laplace_2d.m │ │ └── demo_laplace_usps.m │ └── gpml │ │ ├── Contents.m │ │ ├── Copyright │ │ ├── Makefile │ │ ├── approxEP.m │ │ ├── approxLA.m │ │ ├── approximations.m │ │ ├── binaryEPGP.m │ │ ├── binaryGP.m │ │ ├── binaryLaplaceGP.m │ │ ├── covConst.m │ │ ├── covFunctions.m │ │ ├── covLINard.m │ │ ├── covLINone.m │ │ ├── covMatern3iso.m │ │ ├── covMatern5iso.m │ │ ├── covNNone.m │ │ ├── covNoise.m │ │ ├── covPeriodic.m │ │ ├── covProd.m │ │ ├── covRQard.m │ │ ├── covRQiso.m │ │ ├── covSEard.m │ │ ├── covSEiso.m │ │ ├── covSum.m │ │ ├── cumGauss.m │ │ ├── gauher.m │ │ ├── gpr.m │ │ ├── gpr2.m │ │ ├── gprSRPP.m │ │ ├── likelihoods.m │ │ ├── logistic.m │ │ ├── minimize.m │ │ ├── solve_chol.c │ │ ├── solve_chol.m │ │ ├── sq_dist.c │ │ ├── sq_dist.log │ │ └── sq_dist.m ├── indtest_corr.m ├── indtest_hsic.m ├── indtest_new.m ├── indtest_new_t.m └── indtest_new_t_RFF.m ├── README ├── README.md ├── cd_non_con_fun.m ├── example1.m ├── example2.m ├── example3.m ├── example4.m ├── infer_nonsta_dir.m ├── kPCA_kernel_orig.m ├── meeks.m ├── nonsta_cd_new.m ├── nonsta_cd_new_multi.m ├── pdinv.m ├── setdiag.m ├── smooth_module.mat └── transformFeatures.m /KCI-test/CI_PERM/COPYING: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010-2011 Robert Tillman [rtillman@cmu.edu] 2 | Copyright (c) 2007 Arthur Gretton [arthur.gretton@tuebingen.mpg.de] 3 | Copyright (c) 2005 Francis Bach [francis.bach@ens.fr] 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | - Redistributions of source code must retain the above copyright notice, 10 | this list of conditions and the following disclaimer. 11 | - Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 | POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /KCI-test/CI_PERM/hsicTestBootIC.m: -------------------------------------------------------------------------------- 1 | %This function implements the HSIC independence test using a bootstrap approximation 2 | %to the test threshold 3 | 4 | %Inputs: 5 | % X contains dx columns, m rows. Each row is an i.i.d sample 6 | % Y contains dy columns, m rows. Each row is an i.i.d sample 7 | % alpha is the level of the test 8 | % shuffles is number of shuffles to approximate null distribution 9 | 10 | %Outputs: 11 | % sig: boolean indicator of whether the test was significant 12 | % p: p-value 13 | 14 | %Set kernel size to median distance between points, if no kernel specified 15 | 16 | % Copyright (c) 2010 Robert Tillman [rtillman@cmu.edu] 17 | % 2007 Arthur Gretton [arthur.gretton@tuebingen.mpg.de] 18 | % All rights reserved. See the file COPYING for license terms. 19 | 20 | function [sig,p] = hsicTestBootIC(X,Y,alpha,shuffles); 21 | 22 | % tolerance for incomplete Cholesky 23 | tol = 1e-12; 24 | 25 | m=size(X,1); 26 | 27 | % set kernel size to median distance between points 28 | maxpoints = 1000; 29 | sigx = medbw(X, maxpoints); 30 | sigy = medbw(Y, maxpoints); 31 | 32 | %Compute the approximations of Gram matrices 33 | [K, Pk] = inchol(X,sigx,tol); 34 | [L, Pl] = inchol(Y,sigy,tol); 35 | 36 | %bone = ones(m,1); 37 | %H = eye(m)-1/m*ones(m,m); 38 | 39 | % center Gram matrices and permute indices 40 | Kc = K(Pk,:) - repmat((sum(K)/m),m,1); 41 | Lc = L(Pl,:) - repmat((sum(L)/m),m,1); 42 | 43 | testStat = (1/m^2)*sum(sum(Kc.*((Kc'*Lc)*Lc')')); 44 | 45 | HSICarr = zeros(shuffles,1); 46 | for whichSh=1:shuffles 47 | 48 | [notUsed,indL] = sort(rand(m,1)); 49 | 50 | newLc = Lc(indL,:); 51 | HSICarr(whichSh) = (1/m^2)*sum(sum(Kc.*((Kc'*newLc)*newLc')')); 52 | 53 | end 54 | 55 | % get p-value from empirical cdf 56 | p = length(find(HSICarr>=testStat))/shuffles; 57 | 58 | % determine significance 59 | sig=(p<=alpha); 60 | -------------------------------------------------------------------------------- /KCI-test/CI_PERM/hsiccondIC.m: -------------------------------------------------------------------------------- 1 | % Conditional dependence operator empirical estimator with incomplete Choleksy 2 | % factorization for low rank approximation of Gram matrices 3 | % 4 | % Arguments: 5 | % Gx low rank approximation for centered Gram matrix for X 6 | % Gy low rank approximation for centered Gram matrix for Y 7 | % Gz low rank approximation for centered Gram matrix for Z 8 | % epsilon the smoothing constant 9 | % 10 | % Output: 11 | % emphsic the test statistic 12 | % 13 | % Copyright (c) 2010 Robert Tillman [rtillman@cmu.edu] 14 | % All rights reserved. See the file COPYING for license terms. 15 | 16 | function emphsic = hsiccondIC(Gx,Gy,Gz,epsilon) 17 | 18 | n = size(Gx,1); 19 | if (n~=size(Gy,1) || n~=size(Gz,1)) 20 | error('Gx, Gy, and Gz must have the same number of rows'); 21 | end 22 | if (epsilon<=0) 23 | error('epsilon must > 0'); 24 | end 25 | 26 | mx = size(Gx,2); 27 | my = size(Gy,2); 28 | mz = size(Gz,2); 29 | 30 | [Ux, Sx, Vx] = svd(Gx,'econ'); 31 | [Uy, Sy, Vy] = svd(Gy,'econ'); 32 | [Uz, Sz, Vz] = svd(Gz,'econ'); 33 | 34 | Sxsq = diag(Sx).^2; 35 | Sysq = diag(Sy).^2; 36 | Szsq = diag(Sz).^2; 37 | Szsqe = Szsq + epsilon; 38 | Szsqt = Szsq./Szsqe; 39 | 40 | % first term - GxGx'GyGy' 41 | first = sum(sum((Ux*(diag(Sxsq)*(Ux'*Uy)*diag(Sysq))).*Uy)); 42 | 43 | % second term - 2GyGy'GzGz'(GzGz' - epsilonI)^(-2)GzGz'GxGx' 44 | second1 = Ux*(diag(Sxsq)*(Ux'*Uz)*diag(Szsqt)*(Uz'*Uy)*diag(Sysq)); 45 | second = -2*sum(sum(second1.*Uy)); 46 | 47 | % third term - 2GyGy'GzGz'(GzGz' - epsilonI)^(-2)GzGz'GxGx'GzGz'(GzGz' - epsilonI)^(-2)GzGz' 48 | third = sum(sum((second1*(Uy'*Uz)*diag(Szsqt)).*Uz)); 49 | 50 | % compute test statistic using first, second, and third terms above with 51 | % the U-statistic 52 | emphsic = (first+second+third)/((n-1)^2); 53 | -------------------------------------------------------------------------------- /KCI-test/CI_PERM/hsiccondTestIC.m: -------------------------------------------------------------------------------- 1 | % Statistical test for kernel conditional independence of X and Y given Z with 2 | % incomplete Cholesky factorization for low rank approximation of Gram matrices 3 | % 4 | % Arguments: 5 | % X n x p vector of data points 6 | % Y n x m vector of data points 7 | % Z n x r vector of data points 8 | % alpha significance level 9 | % shuffles number of shuffles for the permutation test 10 | % 11 | % Output: 12 | % sig boolean indicator of whether the test was significant for the given alpha 13 | % p resulting p-value 14 | % 15 | % Copyright (c) 2010 Robert Tillman [rtillman@cmu.edu] 16 | % All rights reserved. See the file COPYING for license terms. 17 | 18 | function [sig,p,testStat] = hsiccondTestIC(X,Y,Z,alpha,shuffles) 19 | 20 | n = size(X,1); 21 | if (n~=size(Y,1) || n~=size(Z,1)) 22 | error('X, Y, and Z must have the same number of data points'); 23 | end 24 | if (alpha<0 || alpha>1) 25 | error('alpha must be between 0 and 1'); 26 | end 27 | if (shuffles<=0 || shuffles~=int32(shuffles)) 28 | error('number of shuffles must be a positive integer'); 29 | end 30 | 31 | % smoothing constant for conditional cross covariance operator 32 | epsilon=1e-4; 33 | % threshold for eigenvalues to consider in low rank Gram matrix approximations 34 | tol = 1e-4; 35 | 36 | % augment X and Y for conditional test 37 | X = [X,Z]; 38 | Y = [Y,Z]; 39 | 40 | % set kernel size to median distance between points 41 | maxpoints = 1000; 42 | sigx = medbw(X, maxpoints); 43 | sigy = medbw(Y, maxpoints); 44 | sigz = medbw(Z, maxpoints); 45 | 46 | % low rank approximation of Gram matrices using incomplete Cholesky factorization 47 | [K, Pk] = inchol(X,sigx,tol); 48 | [L, Pl] = inchol(Y,sigy,tol); 49 | [M, Pm] = inchol(Z,sigz,tol); 50 | 51 | % center Gram matrices factoring in permutations made during low rank approximation 52 | Kc = K(Pk,:) - repmat((sum(K)/n),n,1); 53 | Lc = L(Pl,:) - repmat((sum(L)/n),n,1); 54 | Mc = M(Pm,:) - repmat((sum(M)/n),n,1); 55 | 56 | % compute the U-statistic 57 | %pairs = nchoosek(1:n,2); 58 | %bz = n*(n-1)/sum(rbf(Z(pairs(:,1)),Z(pairs(:,2)),sigz).^2); 59 | 60 | % compute HSIC dependence value 61 | testStat = hsiccondIC(Kc,Lc,Mc,epsilon); 62 | 63 | % first cluster Z; 64 | nc = pickK(Z); 65 | clusters = kmeans(Z,nc,'EmptyAction','drop','MaxIter',1000,'Display','off'); 66 | %[centers,clusters,datapoints] = MeanShiftCluster(Z,sigz,false); 67 | %nc = length(centers); 68 | 69 | % simulate null distribution and permutation test 70 | nullapprox = zeros(shuffles,1); 71 | for i=1:shuffles 72 | % permute within clusters 73 | Plnew = 1:n; 74 | for j=1:nc 75 | indj = find(clusters==j); 76 | pj = indj(randperm(length(indj))); 77 | Plnew(indj) = Plnew(pj); 78 | end 79 | % centered Gram matrix for new sample 80 | newLc = Lc(Plnew,:); 81 | % compute HSIC dependence value for new sample 82 | nullapprox(i)=hsiccondIC(Kc,newLc,Mc,epsilon); 83 | end 84 | 85 | % get p-value from empirical cdf 86 | p = length(find(nullapprox>=testStat))/shuffles; 87 | 88 | % determine significance 89 | sig=(p<=alpha); 90 | -------------------------------------------------------------------------------- /KCI-test/CI_PERM/inchol.m: -------------------------------------------------------------------------------- 1 | % Incomplete Cholesky factorization with RBF kernel 2 | % 3 | % Description: 4 | % Finds low rank approximation of RBF kernel Gram matrix K = PGG'P for the 5 | % n x p data matrix X. Here, K is an n x n Gram matrix, G is n x m with m << n, 6 | % and P is a permutation matrix. 7 | % 8 | % Arguments: 9 | % X n x p data matrix 10 | % sigma bandwidth for RBF kernel 11 | % tol threshold for remaining eigenvalues to consider 12 | % 13 | % Output: 14 | % G n x m matrix (m << n) 15 | % P n vector of permutation indices 16 | % 17 | % 18 | % Adapted from Francis Bach's Cholesky with side information implementation 19 | % 20 | % Copyright (c) 2010 Robert Tillman [rtillman@cmu.edu] 21 | % 2005 Francis Bach [francis.bach@ens.fr] 22 | % All rights reserved. See the file COPYING for license terms. 23 | function [G,P] = inchol(X,sigma,tol) 24 | 25 | if (sigma<=0) 26 | error('sigma must be > 0'); 27 | end 28 | if (tol<=0) 29 | error('tol must be > 0'); 30 | end 31 | 32 | n = size(X,1); 33 | % begin with full matrix 34 | G = zeros(n,n); 35 | % using RBF kernel so diagonal entries are ones 36 | diagK = ones(n,1); 37 | % permutation indices; 38 | P = 1:n; 39 | % updated diagonal elements 40 | D = diagK; 41 | 42 | % construct columns of K until threshold is met 43 | for k=1:n 44 | 45 | % select next most informative pivot 46 | best = D(k); 47 | bestInd = k; 48 | for j=k:n 49 | if (D(j) > best/.99) 50 | best = D(j); 51 | bestInd = j; 52 | end 53 | end 54 | 55 | % threshold met so remove columns to the right and break 56 | if bestmaxpoints) 27 | med = X(1:maxpoints,:); 28 | n = maxpoints; 29 | else 30 | med = X; 31 | end 32 | 33 | % finds median distance between points 34 | G = sum((med.*med),2); 35 | Q = repmat(G,1,n); 36 | R = repmat(G',n,1); 37 | dists = Q + R - 2*med*med'; 38 | dists = dists-tril(dists); 39 | dists=reshape(dists,n^2,1); 40 | sigma = sqrt(0.5*median(dists(dists>0))); 41 | -------------------------------------------------------------------------------- /KCI-test/CI_PERM/pickK.m: -------------------------------------------------------------------------------- 1 | % picks number of clusters for k-means clustering 2 | % Copyright (c) 2010 Robert Tillman [rtillman@cmu.edu] 3 | % All rights reserved. See the file COPYING for license terms. 4 | function k = pickK(X) 5 | 6 | a = 1; 7 | n = size(X,1); 8 | b = n; 9 | step = 2; 10 | 11 | v = sum(diag(diag(var(X)))); 12 | 13 | while (step>1&&b<=n) 14 | 15 | step = max(round((b-a+1)/10),1); 16 | 17 | for k=a:step:b 18 | 19 | [idx, c, sumd] = kmeans(X,k,'EmptyAction','drop','MaxIter',1000,'Display','off'); 20 | % [idx, c, sumd] = kmeans(X,k=k,maxloops=1000); 21 | 22 | c = sum(sumd)/n; 23 | 24 | if (k~=a) 25 | if ((lastc-c)/v<.05) 26 | k = k-step; 27 | break; 28 | end 29 | end 30 | 31 | lastc = c; 32 | 33 | end 34 | 35 | a = k; 36 | b = k+step; 37 | 38 | end 39 | -------------------------------------------------------------------------------- /KCI-test/CI_PERM/rbf.m: -------------------------------------------------------------------------------- 1 | % RBF kernel evaluation 2 | % 3 | % Description: 4 | % Evaluates RBF kernel for n points 5 | % 6 | % Input: 7 | % x1 n x p matrix (n points with dimensionality p) 8 | % x2 n x p matrix (n points with dimensionality p) 9 | % sigma kernel bandwidth 10 | % 11 | % Output: 12 | % k n x 1 matrix of k(x1,x2) evaluations 13 | % 14 | % Copyright (c) 2010 Robert Tillman [rtillman@cmu.edu] 15 | % All rights reserved. See the file COPYING for license terms. 16 | 17 | function k = rbf(x1,x2,sigma) 18 | 19 | if (size(x1,1)~=size(x2,1)) 20 | error('x1 and x2 must contain the same number of data points'); 21 | end 22 | if (size(x1,2)~=size(x2,2)) 23 | error('x1 and x2 must be of the same dimensionality'); 24 | end 25 | if (sigma<=0) 26 | error('sig must be > 0'); 27 | end 28 | 29 | k = exp(-.5*sum((x1-x2).^2,2)/(sigma^2)); 30 | -------------------------------------------------------------------------------- /KCI-test/CONTENT: -------------------------------------------------------------------------------- 1 | 2 | - indtest_new: WRAPPER performing KCI-test, the conditional independence testing method submitted to UAI 2011; 3 | 4 | - UInd_KCItest: function to perform unconditional independence testing given in our paper; it is theoretically equiavalent to Gretton et al. (2008) but is computationally easier when generating the null distribution with simulations. 5 | 6 | - indtest_corr: partial correlation test; 7 | 8 | - indtest_hsic: WRAPPER performing either an HSIC test (Gretton et al., 2008) or CI_PERM 9 | 10 | 11 | DIRECTORIES: 12 | - algorithms: contains the functions which are called by the conditional independence testing method or the PC algorithm; 13 | 14 | - exp: contains the files used in simulations and experiments; 15 | 16 | - data: contains the real-world data used in the experiments. 17 | 18 | - CI_PERM: contains the functions which are used in CI_PERM, the conditional independence testing method which combines the conditional dependence measure (Fukumizu et al., 2008) and local boostrapping. 19 | 20 | - gpml-matlab: the gpml toolbox. 21 | -------------------------------------------------------------------------------- /KCI-test/COPYING: -------------------------------------------------------------------------------- 1 | Copyright (c) 2 | 2010-2011 Kun Zhang 3 | 2010-2011 Jonas Peters 4 | 1996-2001 Ian T. Nabney 5 | 2001-2006 Carl Edward Rasmussen 6 | 7 | All rights reserved. 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are met: 11 | 12 | - Redistributions of source code must retain the above copyright notice, 13 | this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright notice, 15 | this list of conditions and the following disclaimer in the documentation 16 | and/or other materials provided with the distribution. 17 | - Neither the name of the Aston University, Birmingham, U.K. 18 | nor the names of its contributors may be used to endorse or promote products 19 | derived from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 | POSSIBILITY OF SUCH DAMAGE. 32 | -------------------------------------------------------------------------------- /KCI-test/README: -------------------------------------------------------------------------------- 1 | Copyright (c) 2 | 2010-2011 Kun Zhang 3 | 2010-2011 Jonas Peters 4 | 2001-2006 Carl Edward Rasmussen 5 | 1996-2001 Ian T. Nabney 6 | 7 | This package contains code to the paper 8 | "A kernel-based conditional independence test and application in causal discovery", K. Zhang, J. Peters, D. Janzing, and B. Schoelkopf, In Proceedings of the 27th Conference on Uncertainty in Artificial Intelligence (UAI 2011), Barcelona, Spain, July 14-17, 2011. 9 | 10 | % Please see CONTENT for the content 11 | 12 | It is written in Matlab 7.7.0 and should work on any machine. 13 | 14 | 15 | 16 | 17 | %%%%%%%%%%%% 18 | CODE NOT PROVIDED 19 | %%%%%%%%%%%% 20 | conditional hsic 21 | 22 | 23 | 24 | %%%%%%%%%%%%% 25 | IMPORTANT FUNCTIONS 26 | %%%%%%%%%%%%% 27 | function [pval, stat] = indtest_new(X, Y, Z, pars) 28 | % function [pval, stat] = indtest_new(X, Y, Z, pars) 29 | % 30 | % This function is a WRAPPER 31 | % Performs new method (submitted to UAI 2011) 32 | % 33 | % INPUT: 34 | % X Nxd1 matrix of samples (N data points, d1 dimensions) 35 | % Y Nxd2 matrix of samples (N data points, d2 dimensions) 36 | % Z Nxd3 matrix of samples (N data points, d3 dimensions) 37 | % pars structure containing parameters for the independence test 38 | % .pairwise if true, the test is performed pairwise if d1>1 (standard: false) 39 | % .bonferroni if true, bonferroni correction is performed (standard: false) 40 | % .width kernel width (standard: 0, which results in an automatic -heuristic- choice) 41 | % 42 | % OUTPUT: 43 | % pval p value of the test 44 | % stat test statistic 45 | 46 | 47 | %%%%%%%%%%%%% 48 | REQUIREMENTS 49 | %%%%%%%%%%%%% 50 | This code requires the statistics toolbox and further the GPML toolbox (the old version, unfortunately). It is provided in the subfolder. Have a look at its README how to compile the mex files. This makes the toolbox faster. Make sure, the path is added in Matlab (e.g. 51 | addpath('gpml-matlab/gpml') 52 | ) 53 | 54 | %%%%%%%%%%%%% 55 | EXAMPLE 56 | %%%%%%%%%%%%% 57 | As a first example type 58 | 59 | X=randn(300,1); 60 | Y=X+0.5*randn(300,1); 61 | Z=Y+0.5*randn(300,1); 62 | [p_val stat]=indtest_new(X,Z,[],[]); 63 | p_val % X and Z should be dependent 64 | [p_val stat]=indtest_new(X,Z,Y,[]); 65 | p_val % X and Z should be conditionally independent given Y 66 | 67 | into Matlab. 68 | 69 | 70 | %%%%%%%%%%%%% 71 | REPRODUCING FIGURES 72 | %%%%%%%%%%%%% 73 | The exp_ files in the folder exp describe (hopefully self-explaining), how the experiments were performed in the paper. The folder mat-files contains the results. 74 | 75 | 76 | %%%%%%%%%%%%% 77 | CITATION 78 | %%%%%%%%%%%%% 79 | If you use this code, please cite the following paper: 80 | K. Zhang, J. Peters, D. Janzing, and B. Schoelkopf, "A kernel-based conditional independence test and application in causal discovery," In UAI 2011. 81 | 82 | %%%%%%%%%%%%% 83 | PROBLEMS 84 | %%%%%%%%%%%%% 85 | If you have problems or questions, do not hesitate to send an email to 86 | kzhang@tuebingen.mpg.de, or kun.kzhang@gmail.com 87 | 88 | 89 | -------------------------------------------------------------------------------- /KCI-test/UInd_KCItest.m: -------------------------------------------------------------------------------- 1 | function [p_val, Sta] = UInd_KCItest(x, y, pars) 2 | % function [p_val] = UInd_test(x, y) 3 | % To test if x and y are unconditionally independent with bootstrap (which is 4 | % the same as in HSIC test) or with the finite-sample Gamma approximation. 5 | % INPUT: 6 | % X and Y: data matrices of size number_of_samples * dimensionality. 7 | % width (optional): the kernel width for x and y. 8 | % Output: 9 | % p_val: the p value obtained by bootstrapping (if the sample size is 10 | % smaller than 1000) or by Gamma approximation (if the sample size is 11 | % large). 12 | % Copyright (c) 2010-2011 Kun Zhang, Jonas Peters. 13 | % All rights reserved. See the file COPYING for license terms. 14 | % 15 | % For details of the method, see K. Zhang, J. Peters, D. Janzing, and B. Schoelkopf, 16 | % "A kernel-based conditional independence test and application in causal discovery," 17 | % In UAI 2011, 18 | % and 19 | % A. Gretton, K. Fukumizu, C.-H. Teo, L. Song, B. Schoelkopf and A. Smola, "A kernel 20 | % Statistical test of independence." In NIPS 21, 2007. 21 | 22 | T = length(y); % the sample size 23 | 24 | % Controlling parameters 25 | width = pars.width; 26 | Bootstrap = 1; 27 | Method_kernel_width = 1; % 1: empirical value; 2: median 28 | 29 | % Num_eig = floor(T/4); % how many eigenvalues are to be calculated? 30 | if T>1000 31 | Num_eig = floor(T/2); 32 | else 33 | Num_eig = T; 34 | end 35 | T_BS = 2000; 36 | lambda = 1E-3; % the regularization paramter 37 | Thresh = 1E-6; 38 | % normalize the data 39 | x = x - repmat(mean(x), T, 1); 40 | x = x * diag(1./std(x)); 41 | y = y - repmat(mean(y), T, 1); 42 | y = y * diag(1./std(y)); 43 | Cri = []; Sta = []; p_val = []; Cri_appr = []; p_appr = []; 44 | 45 | % use empirical kernel width instead of the median 46 | if ~exist('width', 'var')|isempty(width)|width==0 47 | if T < 200 48 | width = 0.8; 49 | elseif T < 1200 50 | width =0.5; 51 | else 52 | width = 0.3; 53 | end 54 | end 55 | if Method_kernel_width == 1 56 | theta = 1/(width^2); % I use this parameter to construct kernel matices. Watch out!! width = sqrt(2) sigma AND theta= 1/(2*sigma^2) 57 | else 58 | theta = 0; 59 | end 60 | % width = sqrt(2)*medbw(x, 1000); %use median heuristic for the band width. 61 | %theta = 1/(width^2); % I use this parameter to construct kernel matices. Watch out!! width = sqrt(2) sigma AND theta= 1/(2*sigma^2) 62 | 63 | H = eye(T) - ones(T,T)/T; % for centering of the data in feature space 64 | % Kx = kernel([x], [x], [theta/size(x,2),1]); Kx = H * Kx * H; %%%%Problem 65 | % Ky = kernel([y], [y], [theta/size(y,2),1]); Ky = H * Ky * H; %%%%Problem 66 | Kx = kernel([x], [x], [theta * size(x,2),1]); Kx = H * Kx * H; %%%%Problem 67 | Ky = kernel([y], [y], [theta * size(y,2),1]); Ky = H * Ky * H; %%%%Problem 68 | 69 | Sta = trace(Kx * Ky); 70 | 71 | 72 | Cri = -1; 73 | p_val = -1; 74 | if Bootstrap 75 | % calculate the eigenvalues that will be used later 76 | % Due to numerical issues, Kx and Ky may not be symmetric: 77 | [eig_Kx, eivx] = eigdec((Kx+Kx')/2,Num_eig); 78 | [eig_Ky, eivy] = eigdec((Ky+Ky')/2,Num_eig); 79 | % calculate Cri... 80 | % first calculate the product of the eigenvalues 81 | eig_prod = stack( (eig_Kx * ones(1,Num_eig)) .* (ones(Num_eig,1) * eig_Ky')); 82 | II = find(eig_prod > max(eig_prod) * Thresh); 83 | eig_prod = eig_prod(II); %%% new method 84 | 85 | % use mixture of F distributions to generate the Null dstr 86 | if length(eig_prod) * T < 1E6 87 | f_rand1 = chi2rnd(1,length(eig_prod),T_BS); 88 | Null_dstr = eig_prod'/T * f_rand1; %%%%Problem 89 | else 90 | % iteratively calcuate the null dstr to save memory 91 | Null_dstr = zeros(1,T_BS); 92 | Length = max(floor(1E6/T),100); 93 | Itmax = floor(length(eig_prod)/Length); 94 | for iter = 1:Itmax 95 | f_rand1 = chi2rnd(1,Length,T_BS); 96 | Null_dstr = Null_dstr + eig_prod((iter-1)*Length+1:iter*Length)'/T * f_rand1; 97 | 98 | end 99 | Null_dstr = Null_dstr + eig_prod(Itmax*Length+1:length(eig_prod))'/T *... %%%%Problem 100 | chi2rnd(1, length(eig_prod) - Itmax*Length,T_BS); 101 | end 102 | sort_Null_dstr = sort(Null_dstr); 103 | p_val = sum(Null_dstr>Sta)/T_BS; 104 | end 105 | 106 | -------------------------------------------------------------------------------- /KCI-test/UInd_KCItest_RFF.m: -------------------------------------------------------------------------------- 1 | function [p_val Sta] = UInd_KCItest_RFF(x, y, pars) 2 | % function [p_val] = UInd_test(x, y) 3 | % To test if x and y are unconditionally independent with bootstrap (which is 4 | % the same as in HSIC test) or with the finite-sample Gamma approximation. 5 | % INPUT: 6 | % X and Y: data matrices of size number_of_samples * dimensionality. 7 | % width (optional): the kernel width for x and y. 8 | % Output: 9 | % p_val: the p value obtained by bootstrapping (if the sample size is 10 | % smaller than 1000) or by Gamma approximation (if the sample size is 11 | % large). 12 | 13 | T = length(y); % the sample size 14 | 15 | % Controlling parameters 16 | width = pars.width; 17 | Bootstrap = 1; 18 | 19 | Method_kernel_width = 1; % 1: empirical value; 2: median 20 | 21 | % Num_eig = floor(T/4); % how many eigenvalues are to be calculated? 22 | if T>1000 23 | Num_eig = floor(T/2); 24 | else 25 | Num_eig = T; 26 | end 27 | T_BS = 5000; 28 | lambda = 1E-3; % the regularization paramter 29 | Thresh = 1E-6; 30 | % normalize the data 31 | x = x - repmat(mean(x), T, 1); 32 | x = x * diag(1./std(x)); 33 | y = y - repmat(mean(y), T, 1); 34 | y = y * diag(1./std(y)); 35 | Cri = []; Sta = []; p_val = []; Cri_appr = []; p_appr = []; 36 | 37 | % use empirical kernel width instead of the median 38 | if ~exist('width', 'var')|isempty(width)|width==0 39 | if T < 200 40 | width = 0.8; 41 | elseif T < 1200 42 | width =0.5; 43 | else 44 | width = 0.3; 45 | end 46 | end 47 | 48 | 49 | Zx = transformFeatures(x/width); % calculate random fourier featuress 50 | Zy = transformFeatures(y/width); 51 | C = (Zx - repmat(mean(Zx,2), 1, T)) * (Zy - repmat(mean(Zy,2), 1, T))'; 52 | Sta = norm(C,'fro')^2; 53 | 54 | 55 | Cri = -1; 56 | p_val = -1; 57 | if Bootstrap 58 | % calculate the eigenvalues that will be used later 59 | [eig_Kx, eivx] = eigdec(Zx'*Zx,Num_eig); 60 | [eig_Ky, eivy] = eigdec(Zy'*Zy,Num_eig); 61 | % calculate Cri... 62 | % first calculate the product of the eigenvalues 63 | eig_prod = stack( (eig_Kx * ones(1,length(eig_Kx))) .* (ones(length(eig_Kx),1) * eig_Ky')); 64 | II = find(eig_prod > max(eig_prod) * Thresh); 65 | eig_prod = eig_prod(II); %%% new method 66 | 67 | % use mixture of F distributions to generate the Null dstr 68 | if length(eig_prod) * T < 1E6 69 | f_rand1 = chi2rnd(1,length(eig_prod),T_BS); 70 | Null_dstr = eig_prod'/T * f_rand1; %%%%Problem 71 | else 72 | % iteratively calcuate the null dstr to save memory 73 | Null_dstr = zeros(1,T_BS); 74 | Length = max(floor(1E6/T),100); 75 | Itmax = floor(length(eig_prod)/Length); 76 | for iter = 1:Itmax 77 | f_rand1 = chi2rnd(1,Length,T_BS); 78 | Null_dstr = Null_dstr + eig_prod((iter-1)*Length+1:iter*Length)'/T * f_rand1; 79 | 80 | end 81 | Null_dstr = Null_dstr + eig_prod(Itmax*Length+1:length(eig_prod))'/T *... %%%%Problem 82 | chi2rnd(1, length(eig_prod) - Itmax*Length,T_BS); 83 | end 84 | sort_Null_dstr = sort(Null_dstr); 85 | p_val = sum(Null_dstr>Sta)/T_BS; 86 | end 87 | 88 | -------------------------------------------------------------------------------- /KCI-test/algorithms/CInd_test_new_withGP_t_RFF.m: -------------------------------------------------------------------------------- 1 | function [p_val, Sta, Cri] = CInd_test_new_withGP_t_RFF(x, y, z, alpha, pars) 2 | % To test if x and y are independent. 3 | % INPUT: 4 | % The number of rows of x and y is the sample size. 5 | % alpha is the significance level (we suggest 1%). 6 | % pars contains the kernel width and whether to use GP to optimize the kernel width. 7 | % Output: 8 | % Cri: the critical point at the p-value equal to alpha obtained by bootstrapping. 9 | % Sta: the statistic Tr(K_{\ddot{X}|Z} * K_{Y|Z}). 10 | % p_val: the p value obtained by bootstrapping. 11 | % If Sta > Cri, the null hypothesis (x is independent from y) is rejected. 12 | % Copyright (c) 2010-2011 ... 13 | % All rights reserved. See the file COPYING for license terms. 14 | 15 | % Controlling parameters 16 | width = pars.width; 17 | if(pars.widthT==0) % kernel width on time index when IF_GP=0, need tunning!!!!! 18 | widthT = 0.1; 19 | else 20 | widthT = pars.widthT; 21 | end 22 | IF_unbiased = 0; 23 | Bootstrap = 1; 24 | 25 | T = length(y); % the sample size 26 | % Num_eig = floor(T/4); % how many eigenvalues are to be calculated? 27 | Num_eig = T; 28 | T_BS = 10000; % 5000 29 | lambda = 1E-3; % the regularization paramter %%%%Problem 30 | Thresh = 1E-5; 31 | % normalize the data 32 | x = x - repmat(mean(x), T, 1); 33 | x = x * diag(1./std(x)); 34 | y = y - repmat(mean(y), T, 1); 35 | y = y * diag(1./std(y)); 36 | z = z - repmat(mean(z), T, 1); 37 | z = z * diag(1./std(z)); 38 | 39 | D = size(z, 2); 40 | logtheta_x = []; logtheta_y = []; df_x = []; df_y = []; 41 | Cri = []; Sta = []; p_val = []; Cri_appr = []; p_appr = []; 42 | 43 | if width ==0 44 | if T <= 200 45 | width = 1.2; % 0.8 46 | elseif T < 1200 47 | width = 0.6; 48 | else 49 | width = 0.4; % 0.3 50 | end 51 | end 52 | 53 | Zx = transformFeatures([x z/2]/(width*sqrt(D)/sqrt(2))); % calculate random fourier featuress 54 | Zx = Zx - repmat(mean(Zx,2), 1, T); 55 | Zy = transformFeatures(y/(width*sqrt(D)/sqrt(2))); 56 | Zy = Zy - repmat(mean(Zy,2), 1, T); 57 | % check whether the last dimension of z is the time index 58 | tmp = [1:T]'; 59 | tmp = tmp - repmat(mean(tmp), T, 1); 60 | tmp = tmp * diag(1./std(tmp)); 61 | if(norm(z(:,end)-tmp)<1e-5) 62 | if(D>1) 63 | Zz = transformFeatures([z(:,1:end-1)/(width*sqrt(D-1)/sqrt(2)),z(:,end)/(widthT/sqrt(2))]); 64 | else 65 | Zz = transformFeatures(z/(widthT/sqrt(2))); 66 | end 67 | else 68 | Zz = transformFeatures(z/(width*sqrt(D)/sqrt(2))); 69 | end 70 | Zz = Zz - repmat(mean(Zz,2), 1, T); 71 | 72 | 73 | Cxz = Zx*Zz'; 74 | Cyz = Zy*Zz'; 75 | Czz = Zz*Zz'; 76 | 77 | Ex = Zx - (Cxz/(Czz+1e-5*eye(size(Czz,1))))*Zz; 78 | Ex = Ex - repmat(mean(Ex,2), 1, T); 79 | Ey = Zy - (Cyz/(Czz+1e-5*eye(size(Czz,1))))*Zz; 80 | Ey = Ey - repmat(mean(Ey,2), 1, T); 81 | C = Ex * Ey'; 82 | % calculate the statistic 83 | Sta = norm(C,'fro')^2; 84 | 85 | % calculate the eigenvalues that will be used later 86 | [eig_Kxz, eivx] = eigdec(Ex'*Ex,Num_eig); 87 | [eig_Kyz, eivy] = eigdec(Ey'*Ey,Num_eig); 88 | 89 | % calculate the product of the square root of the eigvector and the eigenvector 90 | IIx = find(eig_Kxz > max(eig_Kxz) * Thresh); 91 | IIy = find(eig_Kyz > max(eig_Kyz) * Thresh); 92 | eig_Kxz = eig_Kxz(IIx); 93 | eivx = eivx(:,IIx); 94 | eig_Kyz = eig_Kyz(IIy); 95 | eivy = eivy(:,IIy); 96 | 97 | eiv_prodx = eivx * diag(sqrt(eig_Kxz)); 98 | eiv_prody = eivy * diag(sqrt(eig_Kyz)); 99 | clear eivx eig_Kxz eivy eig_Kyz 100 | % calculate their product 101 | Num_eigx = size(eiv_prodx, 2); 102 | Num_eigy = size(eiv_prody, 2); 103 | Size_u = Num_eigx * Num_eigy; 104 | uu = zeros(T, Size_u); 105 | for i=1:Num_eigx 106 | for j=1:Num_eigy 107 | uu(:,(i-1)*Num_eigy + j) = eiv_prodx(:,i) .* eiv_prody(:,j); 108 | end 109 | end 110 | if Size_u > T 111 | uu_prod = uu * uu'; 112 | else 113 | uu_prod = uu' * uu; 114 | end 115 | if Bootstrap 116 | eig_uu = eigdec(uu_prod,min(T,Size_u)); 117 | II_f = find(eig_uu > max(eig_uu) * Thresh); 118 | eig_uu = eig_uu(II_f); 119 | end 120 | 121 | Cri=-1; 122 | p_val=-1; 123 | 124 | 125 | if Bootstrap 126 | % use mixture of F distributions to generate the Null dstr 127 | if length(eig_uu) * T < 1E6 128 | f_rand1 = chi2rnd(1,length(eig_uu),T_BS); 129 | if IF_unbiased 130 | Null_dstr = T^2/(T-1-df_x)/(T-1-df_y) * eig_uu' * f_rand1; %%%%Problem 131 | else 132 | Null_dstr = eig_uu' * f_rand1; 133 | end 134 | else 135 | % iteratively calcuate the null dstr to save memory 136 | Null_dstr = zeros(1,T_BS); 137 | Length = max(floor(1E6/T),100); 138 | Itmax = floor(length(eig_uu)/Length); 139 | for iter = 1:Itmax 140 | f_rand1 = chi2rnd(1,Length,T_BS); 141 | if IF_unbiased 142 | Null_dstr = Null_dstr + T^2/(T-1-df_x)/(T-1-df_y) *... %%%%Problem 143 | eig_uu((iter-1)*Length+1:iter*Length)' * f_rand1; 144 | else 145 | Null_dstr = Null_dstr + ... %%%%Problem 146 | eig_uu((iter-1)*Length+1:iter*Length)' * f_rand1; 147 | end 148 | 149 | end 150 | end 151 | sort_Null_dstr = sort(Null_dstr); 152 | Cri = sort_Null_dstr(ceil((1-alpha)*T_BS)); 153 | p_val = sum(Null_dstr>Sta)/T_BS; 154 | end 155 | 156 | 157 | 158 | -------------------------------------------------------------------------------- /KCI-test/algorithms/check_markov_equiv.m: -------------------------------------------------------------------------------- 1 | function [res]=check_markov_equiv(g1,g2) 2 | % function res=check_markov_equiv(g1,g2) 3 | % INPUT: two graphs g1, g2. 4 | % g(i,j)=-1 if there is a directed arrow from i to j. 5 | % g(i,j)=g(j,i)=1 if there is an undirected edge between i and j 6 | % 7 | % OUTPUT: res==1: the two graphs are markov equivalent 8 | % res==0: they are not 9 | % 10 | % Copyright (c) 2010-2011 ... 11 | % All rights reserved. See the file COPYING for license terms. 12 | 13 | 14 | res=1; 15 | num_nodes=size(g1,1); 16 | 17 | 18 | %check whether they have the same skeleton 19 | % 20 | skeleton1=g1+g1'; skeleton1(skeleton1~=0)=skeleton1(skeleton1~=0)./skeleton1(skeleton1~=0); 21 | skeleton2=g2+g2'; skeleton2(skeleton2~=0)=skeleton2(skeleton2~=0)./skeleton2(skeleton2~=0); 22 | if ~isequal(skeleton1,skeleton2) 23 | res=0; 24 | fprintf('not the same skeletons\n'); 25 | end 26 | 27 | if res==1 28 | %check whether they have the same set of immoralites 29 | % 30 | for i=1:num_nodes 31 | i_parents=find(g1(:,i)==-1); 32 | for ii1=1:(length(i_parents)-1) 33 | for ii2=(ii1+1):length(i_parents) 34 | if g1(i_parents(ii2),i_parents(ii1))==0 & g1(i_parents(ii1),i_parents(ii2))==0 35 | if g2(i,i_parents(ii1))~=0 | g2(i,i_parents(ii2))~=0 36 | res=0; 37 | fprintf('there is an immorality in the 1st graph that is not in the 2nd graph\n'); 38 | end 39 | end 40 | end 41 | end 42 | end 43 | 44 | for i=1:num_nodes 45 | i_parents=find(g2(:,i)==-1); 46 | for ii1=1:(length(i_parents)-1) 47 | for ii2=(ii1+1):length(i_parents) 48 | if g2(i_parents(ii2),i_parents(ii1))==0 & g2(i_parents(ii1),i_parents(ii2))==0 49 | if g1(i,i_parents(ii1))~=0 | g1(i,i_parents(ii2))~=0 50 | res=0; 51 | fprintf('there is an immorality in the 2nd graph that is not in the 1st graph\n'); 52 | end 53 | end 54 | end 55 | end 56 | end 57 | end 58 | -------------------------------------------------------------------------------- /KCI-test/algorithms/condVect.m: -------------------------------------------------------------------------------- 1 | function[res] = condVect(S,T,U,M,ths,BG) 2 | %calculates independence measure on vectors 3 | % 4 | %S,T,U: disjoint lists of of indizes 5 | %M: covariance matrix of vectors 6 | %ths: threshold for independence if ths=0, then condVect returns I(S:T|U), 7 | %if ths > 0, then condVect returns 1, if I(S:T|U)<=ths and 0 otherwise 8 | %BG: list of indizes of background variables (are always conditioned on but do not count as variables for PC) 9 | % 10 | %example: 11 | %condVect([1,2],[3],[4,5],M,0,[6]) returns I(1,2 : 3 | 4,5,6) 12 | %condVect([1,2],[3],[],M,0.5,[]) returns 1, if I(1,2 : 3)<=0.5, and 0 13 | %otherwise 14 | % Copyright (c) 2010-2011 ... 15 | % All rights reserved. See the file COPYING for license terms. 16 | 17 | if (ths>=0) 18 | fprintf('Calculating I(%s : %s | %s) ',num2str(S,'%1.0d '),num2str(T,' %1.0d '),num2str(U,' %1.0d')); 19 | end 20 | U=union(U,BG); 21 | I = union(S,T);I=union(I,U); %set of indices 22 | 23 | res = entVect(M(union(S,U),union(S,U)))+entVect(M(union(T,U),union(T,U)))-entVect(M(U,U))-entVect(M(I,I)); 24 | 25 | if (ths >= 0) 26 | fprintf(' Result: %1.2d\n',res); 27 | res = (res<=ths); 28 | end 29 | end 30 | 31 | %information from covariance matrix 32 | function[res] = entVect(M) 33 | 34 | res = 1/2*log(det(M)); 35 | end 36 | -------------------------------------------------------------------------------- /KCI-test/algorithms/dist2.m: -------------------------------------------------------------------------------- 1 | function n2 = dist2(x, c) 2 | %DIST2 Calculates squared distance between two sets of points. 3 | % 4 | % Description 5 | % D = DIST2(X, C) takes two matrices of vectors and calculates the 6 | % squared Euclidean distance between them. Both matrices must be of 7 | % the same column dimension. If X has M rows and N columns, and C has 8 | % L rows and N columns, then the result has M rows and L columns. The 9 | % I, Jth entry is the squared distance from the Ith row of X to the 10 | % Jth row of C. 11 | % 12 | % See also 13 | % GMMACTIV, KMEANS, RBFFWD 14 | % 15 | 16 | % Copyright (c) Ian T Nabney (1996-2001) 17 | % All rights reserved. See the file COPYING for license terms. 18 | 19 | [ndata, dimx] = size(x); 20 | [ncentres, dimc] = size(c); 21 | if dimx ~= dimc 22 | error('Data dimension does not match dimension of centres') 23 | end 24 | 25 | n2 = (ones(ncentres, 1) * sum((x.^2)', 1))' + ... 26 | ones(ndata, 1) * sum((c.^2)',1) - ... 27 | 2.*(x*(c')); 28 | 29 | % Rounding errors occasionally cause negative entries in n2 30 | if any(any(n2<0)) 31 | n2(n2<0) = 0; 32 | end 33 | -------------------------------------------------------------------------------- /KCI-test/algorithms/eigdec.m: -------------------------------------------------------------------------------- 1 | function [evals, evec] = eigdec(x, N) 2 | %EIGDEC Sorted eigendecomposition 3 | % 4 | % Description 5 | % EVALS = EIGDEC(X, N computes the largest N eigenvalues of the 6 | % matrix X in descending order. [EVALS, EVEC] = EIGDEC(X, N) also 7 | % computes the corresponding eigenvectors. 8 | % 9 | % See also 10 | % PCA, PPCA 11 | % 12 | 13 | % Copyright (c) Ian T Nabney (1996-2001) 14 | % All rights reserved. See the file COPYING for license terms. 15 | 16 | if nargout == 1 17 | evals_only = logical(1); 18 | else 19 | evals_only = logical(0); 20 | end 21 | 22 | if N ~= round(N) | N < 1 | N > size(x, 2) 23 | error('Number of PCs must be integer, >0, < dim'); 24 | end 25 | 26 | % Find the eigenvalues of the data covariance matrix 27 | if evals_only 28 | % Use eig function as always more efficient than eigs here 29 | temp_evals = eig(x); 30 | else 31 | % Use eig function unless fraction of eigenvalues required is tiny 32 | if (N/size(x, 2)) > 0.04 33 | [temp_evec, temp_evals] = eig(x); 34 | else 35 | options.disp = 0; 36 | [temp_evec, temp_evals] = eigs(x, N, 'LM', options); 37 | end 38 | temp_evals = diag(temp_evals); 39 | end 40 | 41 | % Eigenvalues nearly always returned in descending order, but just 42 | % to make sure..... 43 | [evals perm] = sort(-temp_evals); 44 | evals = -evals(1:N); 45 | if ~evals_only 46 | if evals == temp_evals(1:N) 47 | % Originals were in order 48 | evec = temp_evec(:, 1:N); 49 | return 50 | else 51 | % Need to reorder the eigenvectors 52 | for i=1:N 53 | evec(:,i) = temp_evec(:,perm(i)); 54 | end 55 | end 56 | end 57 | -------------------------------------------------------------------------------- /KCI-test/algorithms/gpr_multi.m: -------------------------------------------------------------------------------- 1 | function [out1, out2] = gpr_multi(logtheta, covfunc, x, y, xstar); 2 | % Here we change the function gpr to gpr_multi, in which y contains a set 3 | % of vectors on which we do repression from x 4 | 5 | % gpr - Gaussian process regression, with a named covariance function. Two 6 | % modes are possible: training and prediction: if no test data are given, the 7 | % function returns minus the log likelihood and its partial derivatives with 8 | % respect to the hyperparameters; this mode is used to fit the hyperparameters. 9 | % If test data are given, then (marginal) Gaussian predictions are computed, 10 | % whose mean and variance are returned. Note that in cases where the covariance 11 | % function has noise contributions, the variance returned in S2 is for noisy 12 | % test targets; if you want the variance of the noise-free latent function, you 13 | % must substract the noise variance. 14 | % 15 | % usage: [nlml dnlml] = gpr(logtheta, covfunc, x, y) 16 | % or: [mu S2] = gpr(logtheta, covfunc, x, y, xstar) 17 | % 18 | % where: 19 | % 20 | % logtheta is a (column) vector of log hyperparameters 21 | % covfunc is the covariance function 22 | % x is a n by D matrix of training inputs 23 | % y is a (column) vector (of size n) of targets 24 | % xstar is a nn by D matrix of test inputs 25 | % nlml is the returned value of the negative log marginal likelihood 26 | % dnlml is a (column) vector of partial derivatives of the negative 27 | % log marginal likelihood wrt each log hyperparameter 28 | % mu is a (column) vector (of size nn) of prediced means 29 | % S2 is a (column) vector (of size nn) of predicted variances 30 | % 31 | % For more help on covariance functions, see "help covFunctions". 32 | % 33 | % (C) copyright 2006 by Carl Edward Rasmussen (2006-03-20). 34 | 35 | if ischar(covfunc), covfunc = cellstr(covfunc); end % convert to cell if needed 36 | [n, D] = size(x); 37 | [n, m] = size(y); 38 | if eval(feval(covfunc{:})) ~= size(logtheta, 1) 39 | error('Error: Number of parameters do not agree with covariance function') 40 | end 41 | 42 | K = feval(covfunc{:}, logtheta, x); % compute training set covariance matrix 43 | 44 | L = chol(K)'; % cholesky factorization of the covariance 45 | % for i = 1:m 46 | % alpha(:,i) = solve_chol(L',y(:,i)); 47 | % end 48 | alpha = solve_chol(L',y); 49 | 50 | if nargin == 4 % if no test cases, compute the negative log marginal likelihood 51 | 52 | out1 = 0.5* trace(y'*alpha) + m*sum(log(diag(L))) + 0.5*m*n*log(2*pi); 53 | 54 | if nargout == 2 % ... and if requested, its partial derivatives 55 | out2 = zeros(size(logtheta)); % set the size of the derivative vector 56 | W = m * (L'\(L\eye(n))) - alpha*alpha'; % precompute for convenience 57 | for i = 1:length(out2) 58 | out2(i) = sum(sum(W.*feval(covfunc{:}, logtheta, x, i)))/2; 59 | end 60 | end 61 | 62 | else % ... otherwise compute (marginal) test predictions ... 63 | 64 | [Kss, Kstar] = feval(covfunc{:}, logtheta, x, xstar); % test covariances 65 | 66 | out1 = Kstar' * alpha; % predicted means 67 | 68 | if nargout == 2 69 | v = L\Kstar; 70 | out2 = Kss - sum(v.*v)'; 71 | end 72 | 73 | end 74 | -------------------------------------------------------------------------------- /KCI-test/algorithms/gpr_multi2.m: -------------------------------------------------------------------------------- 1 | function [out1, out2] = gpr_multi2(logtheta, covfunc, x, y, xstar); 2 | % Here we change the function gpr to gpr_multi, in which y contains a set 3 | % of vectors on which we do repression from x 4 | 5 | % gpr - Gaussian process regression, with a named covariance function. Two 6 | % modes are possible: training and prediction: if no test data are given, the 7 | % function returns minus the log likelihood and its partial derivatives with 8 | % respect to the hyperparameters; this mode is used to fit the hyperparameters. 9 | % If test data are given, then (marginal) Gaussian predictions are computed, 10 | % whose mean and variance are returned. Note that in cases where the covariance 11 | % function has noise contributions, the variance returned in S2 is for noisy 12 | % test targets; if you want the variance of the noise-free latent function, you 13 | % must substract the noise variance. 14 | % 15 | % usage: [nlml dnlml] = gpr(logtheta, covfunc, x, y) 16 | % or: [mu S2] = gpr(logtheta, covfunc, x, y, xstar) 17 | % 18 | % where: 19 | % 20 | % logtheta is a (column) vector of log hyperparameters 21 | % covfunc is the covariance function 22 | % x is a n by D matrix of training inputs 23 | % y is a (column) vector (of size n) of targets 24 | % xstar is a nn by D matrix of test inputs 25 | % nlml is the returned value of the negative log marginal likelihood 26 | % dnlml is a (column) vector of partial derivatives of the negative 27 | % log marginal likelihood wrt each log hyperparameter 28 | % mu is a (column) vector (of size nn) of prediced means 29 | % S2 is a (column) vector (of size nn) of predicted variances 30 | % 31 | % For more help on covariance functions, see "help covFunctions". 32 | % 33 | % (C) copyright 2006 by Carl Edward Rasmussen (2006-03-20). 34 | 35 | if ischar(covfunc), covfunc = cellstr(covfunc); end % convert to cell if needed 36 | [n, D] = size(x); 37 | [n, m] = size(y); 38 | if eval(feval(covfunc{:})) ~= size(logtheta, 1) 39 | error('Error: Number of parameters do not agree with covariance function') 40 | end 41 | 42 | K = feval(covfunc{:}, logtheta, x); % compute training set covariance matrix 43 | 44 | L = chol(K)'; % cholesky factorization of the covariance 45 | % for i = 1:m 46 | % alpha(:,i) = solve_chol(L',y(:,i)); 47 | % end 48 | alpha = solve_chol(L',y); 49 | 50 | if nargin == 4 % if no test cases, compute the negative log marginal likelihood 51 | 52 | out1 = 0.5* trace(y'*alpha) + m*sum(log(diag(L))) + 0.5*m*n*log(2*pi); 53 | 54 | if nargout == 2 % ... and if requested, its partial derivatives 55 | out2 = zeros(size(logtheta)); % set the size of the derivative vector 56 | W = m * (L'\(L\eye(n))) - alpha*alpha'; % precompute for convenience 57 | for i = 1:length(out2) 58 | out2(i) = sum(sum(W.*feval(covfunc{:}, logtheta, x, i)))/2; 59 | end 60 | out2(end-2)=0; 61 | end 62 | 63 | else % ... otherwise compute (marginal) test predictions ... 64 | 65 | [Kss, Kstar] = feval(covfunc{:}, logtheta, x, xstar); % test covariances 66 | 67 | out1 = Kstar' * alpha; % predicted means 68 | 69 | if nargout == 2 70 | v = L\Kstar; 71 | out2 = Kss - sum(v.*v)'; 72 | end 73 | 74 | end 75 | -------------------------------------------------------------------------------- /KCI-test/algorithms/gpr_multi_alln.m: -------------------------------------------------------------------------------- 1 | function [out1, out2] = gpr_multi(logtheta, covfunc, x, y, xstar); 2 | % Here we change the function gpr to gpr_multi, in which y contains a set 3 | % of vectors on which we do repression from x 4 | 5 | % gpr - Gaussian process regression, with a named covariance function. Two 6 | % modes are possible: training and prediction: if no test data are given, the 7 | % function returns minus the log likelihood and its partial derivatives with 8 | % respect to the hyperparameters; this mode is used to fit the hyperparameters. 9 | % If test data are given, then (marginal) Gaussian predictions are computed, 10 | % whose mean and variance are returned. Note that in cases where the covariance 11 | % function has noise contributions, the variance returned in S2 is for noisy 12 | % test targets; if you want the variance of the noise-free latent function, you 13 | % must substract the noise variance. 14 | % 15 | % usage: [nlml dnlml] = gpr(logtheta, covfunc, x, y) 16 | % or: [mu S2] = gpr(logtheta, covfunc, x, y, xstar) 17 | % 18 | % where: 19 | % 20 | % logtheta is a (column) vector of log hyperparameters 21 | % covfunc is the covariance function 22 | % x is a n by D matrix of training inputs 23 | % y is a (column) vector (of size n) of targets 24 | % xstar is a nn by D matrix of test inputs 25 | % nlml is the returned value of the negative log marginal likelihood 26 | % dnlml is a (column) vector of partial derivatives of the negative 27 | % log marginal likelihood wrt each log hyperparameter 28 | % mu is a (column) vector (of size nn) of prediced means 29 | % S2 is a (column) vector (of size nn) of predicted variances 30 | % 31 | % For more help on covariance functions, see "help covFunctions". 32 | % 33 | % (C) copyright 2006 by Carl Edward Rasmussen (2006-03-20). 34 | 35 | if ischar(covfunc), covfunc = cellstr(covfunc); end % convert to cell if needed 36 | [n, D] = size(x); 37 | [n, m] = size(y); 38 | if eval(feval(covfunc{:})) ~= size(logtheta, 1) 39 | error('Error: Number of parameters do not agree with covariance function') 40 | end 41 | 42 | K = feval(covfunc{:}, logtheta, x); % compute training set covariance matrix 43 | 44 | L = chol(K)'; % cholesky factorization of the covariance 45 | % for i = 1:m 46 | % alpha(:,i) = solve_chol(L',y(:,i)); 47 | % end 48 | alpha = solve_chol(L',y); 49 | 50 | if nargin == 4 % if no test cases, compute the negative log marginal likelihood 51 | 52 | out1 = 0.5* trace(y'*alpha) + n*sum(log(diag(L))) + 0.5*n*n*log(2*pi); 53 | 54 | if nargout == 2 % ... and if requested, its partial derivatives 55 | out2 = zeros(size(logtheta)); % set the size of the derivative vector 56 | W = n * (L'\(L\eye(n))) - alpha*alpha'; % precompute for convenience 57 | for i = 1:length(out2) 58 | out2(i) = sum(sum(W.*feval(covfunc{:}, logtheta, x, i)))/2; 59 | end 60 | end 61 | 62 | else % ... otherwise compute (marginal) test predictions ... 63 | 64 | [Kss, Kstar] = feval(covfunc{:}, logtheta, x, xstar); % test covariances 65 | 66 | out1 = Kstar' * alpha; % predicted means 67 | 68 | if nargout == 2 69 | v = L\Kstar; 70 | out2 = Kss - sum(v.*v)'; 71 | end 72 | 73 | end 74 | -------------------------------------------------------------------------------- /KCI-test/algorithms/gpr_multi_alln_K.m: -------------------------------------------------------------------------------- 1 | function [out1, out2] = gpr_multi(logtheta, covfunc, x, Ky, xstar); 2 | % Here we change the function gpr to gpr_multi, in which y contains a set 3 | % of vectors on which we do repression from x 4 | 5 | % gpr - Gaussian process regression, with a named covariance function. Two 6 | % modes are possible: training and prediction: if no test data are given, the 7 | % function returns minus the log likelihood and its partial derivatives with 8 | % respect to the hyperparameters; this mode is used to fit the hyperparameters. 9 | % If test data are given, then (marginal) Gaussian predictions are computed, 10 | % whose mean and variance are returned. Note that in cases where the covariance 11 | % function has noise contributions, the variance returned in S2 is for noisy 12 | % test targets; if you want the variance of the noise-free latent function, you 13 | % must substract the noise variance. 14 | % 15 | % usage: [nlml dnlml] = gpr(logtheta, covfunc, x, y) 16 | % or: [mu S2] = gpr(logtheta, covfunc, x, y, xstar) 17 | % 18 | % where: 19 | % 20 | % logtheta is a (column) vector of log hyperparameters 21 | % covfunc is the covariance function 22 | % x is a n by D matrix of training inputs 23 | % y is a (column) vector (of size n) of targets 24 | % xstar is a nn by D matrix of test inputs 25 | % nlml is the returned value of the negative log marginal likelihood 26 | % dnlml is a (column) vector of partial derivatives of the negative 27 | % log marginal likelihood wrt each log hyperparameter 28 | % mu is a (column) vector (of size nn) of prediced means 29 | % S2 is a (column) vector (of size nn) of predicted variances 30 | % 31 | % For more help on covariance functions, see "help covFunctions". 32 | % 33 | % (C) copyright 2006 by Carl Edward Rasmussen (2006-03-20). 34 | 35 | if ischar(covfunc), covfunc = cellstr(covfunc); end % convert to cell if needed 36 | [n, D] = size(x); 37 | [n, m] = size(Ky); 38 | if eval(feval(covfunc{:})) ~= size(logtheta, 1) 39 | error('Error: Number of parameters do not agree with covariance function') 40 | end 41 | 42 | K = feval(covfunc{:}, logtheta, x); % compute training set covariance matrix 43 | 44 | L = chol(K)'; % cholesky factorization of the covariance 45 | % for i = 1:m 46 | % alpha(:,i) = solve_chol(L',y(:,i)); 47 | % end 48 | % alpha = solve_chol(L',y); 49 | K_inv = solve_chol(L',eye(n)); 50 | 51 | if nargin == 4 % if no test cases, compute the negative log marginal likelihood 52 | 53 | % out1 = 0.5* trace(y'*alpha) + n*sum(log(diag(L))) + 0.5*n*n*log(2*pi); 54 | out1 = 0.5* trace(K_inv * Ky) + n*sum(log(diag(L))) + 0.5*n*n*log(2*pi); 55 | 56 | if nargout == 2 % ... and if requested, its partial derivatives 57 | out2 = zeros(size(logtheta)); % set the size of the derivative vector 58 | W = K_inv * (n *eye(n) - Ky * K_inv); % precompute for convenience 59 | for i = 1:length(out2) 60 | out2(i) = sum(sum(W.*feval(covfunc{:}, logtheta, x, i)))/2; 61 | end 62 | end 63 | 64 | else % ... otherwise compute (marginal) test predictions ... 65 | 66 | [Kss, Kstar] = feval(covfunc{:}, logtheta, x, xstar); % test covariances 67 | 68 | out1 = Kstar' * alpha; % predicted means 69 | 70 | if nargout == 2 71 | v = L\Kstar; 72 | out2 = Kss - sum(v.*v)'; 73 | end 74 | 75 | end 76 | -------------------------------------------------------------------------------- /KCI-test/algorithms/kernel.m: -------------------------------------------------------------------------------- 1 | function [kx, bw_new] = kernel(x, xKern, theta) 2 | 3 | % KERNEL Compute the rbf kernel 4 | % Copyright (c) 2010-2011 ... 5 | % All rights reserved. See the file COPYING for license terms. 6 | n2 = dist2(x, xKern); 7 | if theta(1)==0 8 | theta(1)=2/median(n2(tril(n2)>0)); 9 | theta_new=theta(1); 10 | end 11 | wi2 = theta(1)/2; 12 | kx = theta(2)*exp(-n2*wi2); 13 | bw_new=1/theta(1); 14 | 15 | -------------------------------------------------------------------------------- /KCI-test/algorithms/logdet.m: -------------------------------------------------------------------------------- 1 | function y = logdet(A) 2 | % log(det(A)) where A is positive-definite. 3 | % This is faster and more stable than using log(det(A)). 4 | % Copyright (c) 2010-2011 ... 5 | % All rights reserved. See the file COPYING for license terms. 6 | try 7 | U = chol(A); 8 | y = 2*sum(log(diag(U))); 9 | catch 10 | [void, errid] = lasterr; 11 | if strcmp(errid, 'MATLAB:posdef') 12 | warning(['Matrix is not positive definite in logdet, using log(det())']) 13 | y = log(det(A)); 14 | return 15 | else 16 | error(lasterr) 17 | end 18 | end 19 | -------------------------------------------------------------------------------- /KCI-test/algorithms/pdinv.m: -------------------------------------------------------------------------------- 1 | function Ainv = pdinv(A); 2 | 3 | % PDINV Computes the inverse of a positive definite matrix 4 | % Copyright (c) 2010-2011 ... 5 | % All rights reserved. See the file COPYING for license terms. 6 | numData = size(A, 1); 7 | try 8 | U = chol(A); 9 | invU = eye(numData)/U; 10 | Ainv = invU*invU'; 11 | catch 12 | [void, errid] = lasterr; 13 | if strcmp(errid, 'MATLAB:posdef') 14 | warning(['Matrix is not positive definite in pdinv, inverting' ... 15 | ' using svd']) 16 | [U, S, V] = svd(A); 17 | Ainv = V*diag(1./diag(S))*U'; 18 | return 19 | else 20 | error(lasterr) 21 | end 22 | end 23 | 24 | -------------------------------------------------------------------------------- /KCI-test/algorithms/stack.m: -------------------------------------------------------------------------------- 1 | function v = stack(M) 2 | % stack the matrix M into the vector v 3 | % Copyright (c) 2010-2011 ... 4 | % All rights reserved. See the file COPYING for license terms. 5 | [n,t] = size(M); 6 | v = zeros(n*t,1); 7 | 8 | for i=1:t 9 | v((i-1)*n+1:i*n) = M(:,i); 10 | end 11 | -------------------------------------------------------------------------------- /KCI-test/data/README: -------------------------------------------------------------------------------- 1 | These data are taken from the UCI Machine Learning Repository 2 | Frank, A. & Asuncion, A. (2010). UCI Machine Learning Repository [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science. 3 | -------------------------------------------------------------------------------- /KCI-test/data/boston_names: -------------------------------------------------------------------------------- 1 | new1 1. CRIM: per capita crime rate by town 2 | new2 2. ZN: proportion of residential land zoned for lots over 25,000 sq.ft. 3 | new3 3. INDUS: proportion of non-retail business acres per town 4 | new4 5. NOX: nitric oxides concentration (parts per 10 million) 5 | new5 6. RM: average number of rooms per dwelling 6 | new6 7. AGE: proportion of owner-occupied units built prior to 1940 7 | new7 8. DIS: weighted distances to five Boston employment centres 8 | new8 10. TAX: full-value property-tax rate per $10,000 9 | new9 11. PTRATIO: pupil-teacher ratio by town 10 | new10 12. B: 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town 11 | new11 13. LSTAT: % lower status of the population 12 | new12 14. MEDV: Median value of owner-occupied homes in $1000's 13 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/README: -------------------------------------------------------------------------------- 1 | ABOUT THESE PROGRAMS 2 | ==================== 3 | 4 | This collection of matlab programs implement and demonstrates some fo the 5 | algorithms described in the book Rasmussen and Williams: "Gaussian Processes 6 | for Machine Learning", the MIT Press 2006. 7 | 8 | 9 | There are 3 subdirectories: gpml, gpml-demo and doc. 10 | 11 | gpml: contains code which implements the algorithms. Please see the Copyright 12 | notice contained in the file named "Copyright". 13 | 14 | gpml-demo: contains matlab scripts with names "demo_*.m". These provide small 15 | demonstrations of the various programs provided. 16 | 17 | doc: contains four html files providing documentation. The best place to start 18 | is index.html, the other pages are linked from there. This information 19 | is also available from http://www.GaussianProcess.org/gpml/code 20 | 21 | When running the demos, it is assumed that your current directory is the 22 | gpml-demo directory. Otherwise, you should manually add both the gpml-demo and 23 | gpml directories to the matab path. 24 | 25 | 26 | VERSION 27 | ======= 28 | 29 | The current version of the programs is dated: 2007-07-25. Previous versions of 30 | the code may be avaiable at http://www.gaussianprocess.org/gpml/code/old 31 | 32 | 33 | CHANGES FROM PREVIOUS VERSIONS 34 | ============================== 35 | 36 | 37 | Changes from the 2007-06-25 version: 38 | ------------------------------------ 39 | 40 | covConst.m: fixed a bug which caused an error in the derivative of the log marginal 41 | likelihood for certain combinations of covariance functions and approximation 42 | methods. (Thanks to Antonio Eleuteri for reporting the problem) 43 | 44 | gauher.m: added the function "gauher.m" which was mistakenly missing from the 45 | previous release. This caused an error for certain combinations of 46 | approximation method and likelihood function. 47 | 48 | logistic.m: modified the approximation of moments calculation to use a mixture 49 | of cumulative Gaussian, rather than Gauss-Hermite quadrature, as the former 50 | turns out to be more accurate. 51 | 52 | 53 | Changes from the 2006-09-08 version: 54 | ------------------------------------ 55 | 56 | Some code restructuring has taken place for the classification code to make it 57 | more modular, to facilitate addition of new likelihood functions and 58 | approximations methods. Now, all classification is done using the binaryGP 59 | function, which (among other things) takes an approximation method and a 60 | likelihood function as an arguments. Thus, binaryGP replaces both binaryEPGP 61 | and binaryLapaceGP, although wrapper functions are still provided for backward 62 | compatibility. This gives added flexibility: now EP can also be used wth the 63 | logistic likelihood function (implemented using Gauss-Hermite quadrature). 64 | 65 | approxEP.m: New file, containing the Expectation Propagation approximation 66 | method, which was previously contained in binaryEPGP.m 67 | 68 | approxLA.m: New file, containing Laplaces approximation method, which was 69 | previously contained in binaryLaplace.m 70 | 71 | approximations.m: New file, help for the approximation methods. 72 | 73 | binaryEPGP.m: This file has been replaced by a wrapper (for backward 74 | compatibility) which calls the more general binaryGP function. 75 | 76 | binaryGP.m: New general function to do binary classification. 77 | 78 | binaryLaplaceGP.m: This file has been replaced by a wrapper (for backward 79 | compatibility) which calls the more general binaryGP function. 80 | 81 | covMatern3iso.m, covMatern5iso.m, covNNone.m, covRQard.m, covRQiso.m, 82 | cosSEard, covSEiso: now check more carefully, that persistent variables have 83 | the correct sizes, and some variable names have been modified. 84 | 85 | cumGauss.m: New file, containing code for the cumulative Gaussian 86 | likelihood function 87 | 88 | likelihoods.m: New file, help for likelihood functions 89 | 90 | logistic.m: New file, logistic likelihood 91 | 92 | 93 | Changes from the 2006-05-10 version: 94 | ------------------------------------ 95 | 96 | covRQard.m: bugfix: replaced x with x' and z with z' in line 36 97 | 98 | covRQiso.m: bugfix: replaced x with x' and z with z' in line 28 99 | 100 | minimize.m: correction: replaced "error()" with "error('')", and 101 | made a few cosmetic changes 102 | 103 | binaryEPGP.m: added the line "lml = -n*log(2);" in line 77. This change 104 | should be largely inconsequential, but occationally may save things 105 | when the covariance matrix is exceptionally badly conditioned. 106 | 107 | 108 | Changes from the 2006-04-12 version: 109 | ------------------------------------ 110 | 111 | added the "erfint" function to "binaryLaplaceGP.m". The erfint function 112 | was missing by mistake, preventing the use of the "logistic" likelihood. 113 | 114 | 115 | Changes from the 2006-03-29 version: 116 | ------------------------------------ 117 | 118 | added files: "covProd.m" and "covPeriodic.m" 119 | 120 | changes: "covSEiso.m" was changed slightly to avoid the use of persistent 121 | variables 122 | 123 | 124 | DATASETS 125 | ======== 126 | 127 | The datasets needed for some of the demos can be downloaded from 128 | http://www.GaussianProcess.org/gpml/data 129 | 130 | 131 | 132 | ABOUT MEX FILES 133 | =============== 134 | 135 | Some of the programs make use of the mex facility in matlab for more efficient 136 | implementation. However, if you don't know about how to compile mex files, you 137 | do not need to worry about this - the code should run anyway. If you do 138 | compile the mex files, this should be automatically detected, and the program 139 | will run more efficiently. Particularly the demonstrations of classification 140 | on the usps digits require a lot of computation. 141 | 142 | 143 | 144 | COMPILING MEX FILES 145 | =================== 146 | 147 | As mentioned above, it is not necessary to compile the mex files, but it can 148 | speed up execution considerably. We cannot give a detailed account, but here 149 | are some hints: 150 | 151 | Generally, you just type "mex file.c" at the matlab prompt or in your shell to 152 | compile, where "file.c" is the program you want to compile. There is a Makefile 153 | which works for unix/linux on x86 machines. Just type "make". 154 | 155 | In some cases (solve_chol.c), routines from the lapack numerical library are 156 | used. This should pose no problem on linux. On windows, you have to 1) remove 157 | the trailing underscore from the name of the lapack function ("dpotrs", two 158 | occurences) and 2) pass the location of the lapack library to mex, ie 159 | something like 160 | 161 | mex file.c /extern/lib/win32/lcc/libmwlapack.lib 162 | 163 | where is the root of your matlab installation. If your installation 164 | doesn't include the libmwlapack.lib you may be able to get it from 165 | 166 | http://www.cise.ufl.edu/research/sparse/umfpack/v4.4/UMFPACKv4.4/UMFPACK/ 167 | MATLAB/lcc_lib/ 168 | 169 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/doc/alg21.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/alg21.gif -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/doc/alg31.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/alg31.gif -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/doc/alg32.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/alg32.gif -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/doc/alg35.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/alg35.gif -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/doc/alg36.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/alg36.gif -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/doc/alg51.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/alg51.gif -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/doc/alg52.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/alg52.gif -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/doc/fig2d.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/fig2d.gif -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/doc/fig2de1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/fig2de1.gif -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/doc/fig2de2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/fig2de2.gif -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/doc/fig2de3.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/fig2de3.gif -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/doc/fig2dl1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/fig2dl1.gif -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/doc/fig2dl2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/fig2dl2.gif -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/doc/fig2dl3.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/fig2dl3.gif -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/doc/figepp.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/figepp.gif -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/doc/figepp2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/figepp2.gif -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/doc/figl.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/figl.gif -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/doc/figl1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/figl1.gif -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/doc/figlapp.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/figlapp.gif -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/doc/figlapp2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/figlapp2.gif -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/doc/figlf.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/figlf.gif -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/doc/figlm.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/doc/figlm.gif -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/doc/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Documentation for GPML Matlab Code 5 | 6 | 7 | 8 | 9 | 10 |

Documentation for GPML Matlab Code

11 | 12 | The code provided here demonstrates the main algorithms from Rasmussen 13 | and Williams: Gaussian Processes 14 | for Machine Learning.

15 | 16 | The code is written in Matlab®, and should work with version 6 and 17 | version 7. Bug reports should be sent to the authors. All the code 18 | including demonstrations and html documentation can be downloaded in a 19 | tar 21 | or zip 23 | archive file. Previous versions of the code may be available here. Please 25 | read the copyright notice.

26 | 27 | After unpacking the tar or zip file you will find 28 | 3 subdirectories: gpml, gpml-demo and doc. 29 |

30 | 31 | The directory gpml contains the basic functions for GP regression, 32 | GP binary classification, and sparse approximate methods for GP regression. 33 |

34 | 35 | The directory gpml-demo contains 36 | Matlab® scripts with names "demo_*.m". These provide small 37 | demonstrations of the various programs provided. 38 |

39 | 40 | The directory doc contains four html files providing 41 | documentation. This information can also be accessed via 42 | the www at http://www.GaussianProcess.org/gpml/code. 43 |

44 | 45 | The code should run directly as provided, but some demos require a lot of 46 | computation. A significant speedup may be attained by compiling the mex 47 | files, see the rudimentary instructions on how to do this in the README file.

49 | 50 | The documentation is divided into three sections: 51 | 52 |

Regression

53 | 54 | Basic Gaussian process regression (GPR) 55 | code allowing flexible specification of the covariance function. 56 | 57 |

Binary Classification

58 | 59 | Gaussian process classification (GPC) 60 | demonstrates implementations of Laplace and EP approximation methods for binary 61 | GP classification. 62 | 63 |

Sparse Approximation methods for Gaussian Process Regression

64 | 65 | Approximation methods for GPR demonstrates the 66 | methods of subset of datapoints (SD), subset of regressors (SR) 67 | and projected process (PP) approximations. 68 | 69 |


70 | 71 |

Other Gaussian Process Code

72 | 73 | A table of other sources of useful Gaussian process software, unrelated to the 74 | book, may be found here. This 76 | includes pointers a number of packages that can handle multi-class 77 | classification, e.g. fbm (Radford Neal), 78 | c++-ivm (Neil Lawrence), gpclass (David 79 | Barber and Chris Williams), klr (kernel multiple 80 | logistic regression, by Matthias Seeger), and 81 | VBGP (Mark Girolami and Simon Rogers). 82 |

83 | 84 |


85 | 86 | Go back to the
web page for 87 | Gaussian Processes for Machine Learning. 88 | 89 |
90 | 91 | 92 | Last modified: Tue Jun 26 10:43:51 CET 2007 93 | 94 | 95 | 96 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/doc/style.css: -------------------------------------------------------------------------------- 1 | body {font-family: sans-serif; font-size: 16px} 2 | table {font-size: inherit;} 3 | 4 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml-demo/Contents.m: -------------------------------------------------------------------------------- 1 | % demonstration scripts and data for the gpml software 2 | % date: 2006-03-27. 3 | % 4 | % data_6darm.mat - MacKay's robot arm data, with 6 input dimensions 5 | % data_boston.mat - the boston housing data 6 | % 7 | % demo_ep_2d.m - Demonstrate EP GP classifier on 2-d data 8 | % demo_ep_usps.m - Demonstrate EP GP classifier on USPS digits data 9 | % demo_gparm.m - Demonstrate GP regression on MacKay's robot arm problem 10 | % demo_gpr.m - Demonstrate gpr function for flexible covariances 11 | % demo_gprsparse.m - Demonstrate approx GP regression on Boston housing data 12 | % demo_laplace_2d.m - Demonstrate Laplace binary GPC on 2-d data 13 | % demo_laplace_usps.m - Demonstrate Laplace GP classifier on USPS digits data 14 | % 15 | % Copyright (c) 2005, 2006 by Carl Edward Rasmussen and Chris Williams 16 | 17 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml-demo/data_6darm.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/gpml-demo/data_6darm.mat -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml-demo/data_boston.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/KCI-test/gpml-matlab/gpml-demo/data_boston.mat -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml-demo/demo_ep_usps.m: -------------------------------------------------------------------------------- 1 | % Demo script to illustrate use of binaryEP on a binary digit classification 2 | % task. 2006-03-29. 3 | 4 | if isempty(regexp(path,['gpml' pathsep])) 5 | cd ..; w = pwd; addpath([w, '/gpml']); cd gpml-demo % add code dir to path 6 | end 7 | 8 | hold off 9 | clear 10 | clf 11 | clc 12 | 13 | disp('This demonstration illustrates the use of the Expectation Propagation') 14 | disp('(EP) approximation for binary Gaussian process classification applied') 15 | disp('to a digit task.') 16 | disp(' ') 17 | 18 | if exist('loadBinaryUSPS') ~= 2 19 | disp('Error: can''t find the loadBinaryUSPS.m file. For this example, you') 20 | disp('need to download the usps_resampled archive. It is available at') 21 | disp('http://www.GaussianProcess.org/gpml/data') 22 | return 23 | end 24 | 25 | disp('The data consists of 16 by 16 grey scale images of handwritten digits') 26 | disp('derived from the USPS data set. We will consider the binary') 27 | disp('classification task of separating 3''s from 5''s. The training set') 28 | disp('contains 767 cases and the test set 773 cases. Here is an example') 29 | disp('image of a digit 3.'); 30 | disp(' '); 31 | 32 | disp(' [x y xx yy] = loadBinaryUSPS(3, 5);') 33 | [x y xx yy] = loadBinaryUSPS(3, 5); 34 | disp(' imagesc(reshape(x(3,:),16,16)''), colormap(''gray'')') 35 | imagesc(reshape(x(3,:),16,16)'), colormap('gray') 36 | 37 | disp(' ') 38 | disp('Press any key to continue.') 39 | disp(' ') 40 | pause 41 | 42 | disp('We must specify a covariance function. The demonstration uses the') 43 | disp('squared exponential (SE) covariance function but many other covariance') 44 | disp('functions are supported as well. The SE covariance function has two') 45 | disp('parameters; a log length-scale parameter and a log magnitude parameter.') 46 | disp('As an initial guess for the parameters, we set the log length-scale to') 47 | disp('the log of the average pairwise distance between training points,') 48 | disp('roughly log(22)=3 and the magnitude is set to unity, ie it''s log to 0.') 49 | disp('Other initial choices could be reasonable too.') 50 | disp(' '); 51 | disp('We then call the binaryEPGP function, which constructs the EP') 52 | disp('approximation of the posterior over functions based on the training set') 53 | disp('and produces probabilistic predictions for the test cases. This may') 54 | disp('take a few minutes or so... depending on whether you compiled the mex') 55 | disp('files... ') 56 | disp(' ') 57 | 58 | disp(' loghyper = [3.0; 0.0]; % set the log hyperparameters') 59 | loghyper = [3.0; 0.0]; % set the log hyperparameters 60 | disp(' p = binaryEPGP(loghyper, ''covSEiso'', x, y, xx);') 61 | p = binaryEPGP(loghyper, 'covSEiso', x, y, xx); 62 | disp(' ') 63 | 64 | disp(' plot(p,''.'')'); 65 | plot(p,'.') 66 | disp(' hold on'); 67 | hold on 68 | disp(' plot([1 length(p)],[0.5 0.5],''r'')'); 69 | plot([1 length(p)],[0.5 0.5],'r') 70 | xlabel('test case number') 71 | ylabel('predictive probability') 72 | axis([0 length(p) 0 1]) 73 | 74 | disp(' ') 75 | disp('Press any key to continue.') 76 | disp(' ') 77 | pause 78 | 79 | disp('Keep in mind that the test cases are ordered according to their') 80 | disp('target class. Notice that there are misclassifications, but there are') 81 | disp('no very confident misclassifications. The number of test set errors') 82 | disp('(out of 773 test cases) when thresholding the predictive probability at') 83 | disp('0.5 and the average amount of information about the test set labels in') 84 | disp('excess of a 50/50 model in bits are given by:') 85 | disp(' ') 86 | 87 | disp(' sum((p>0.5)~=(yy>0))') 88 | sum((p>0.5)~=(yy>0)) 89 | disp(' mean((yy==1).*log2(p)+(yy==-1).*log2(1-p))+1') 90 | mean((yy==1).*log2(p)+(yy==-1).*log2(1-p))+1 91 | 92 | disp(' ') 93 | disp('Press any key to continue.') 94 | disp(' ') 95 | pause 96 | 97 | disp('These results were obtained by simply guessing some values for the') 98 | disp('hyperparameters. We can instead optimize the marginal likelihood on') 99 | disp('the training set w.r.t. the hyperparameters. The current values'); 100 | disp('of the log hyperparameters (2 numbers), and the initial value') 101 | disp('of the negative log marginal likelihood are:') 102 | disp(' ') 103 | 104 | disp(' [loghyper'' binaryEPGP(loghyper, ''covSEiso'', x, y)]') 105 | [loghyper' binaryEPGP(loghyper, 'covSEiso', x, y)] 106 | 107 | 108 | 109 | disp(' ') 110 | disp('Press any key to continue.') 111 | disp(' ') 112 | pause 113 | 114 | disp('Now minimize the negative log marginal likelihood w.r.t. the') 115 | disp('hyperparameters, starting at the current values of loghyper. The third') 116 | disp('argument, -20, tells minimize to evaluate the function a maximum of 20') 117 | disp('times... WARNING: this may take 30 minutes or so... depending on your') 118 | disp('machine and whether you compiled the mex files... press ''ctrl-C'' to') 119 | disp('abort now, otherwise...') 120 | disp(' ') 121 | disp('Press any key to continue.') 122 | disp(' ') 123 | pause 124 | 125 | disp(' [newloghyper logmarglik] = minimize(loghyper, ''binaryEPGP'', -20, ''covSEiso'', x, y);') 126 | [newloghyper logmarglik] = minimize(loghyper, 'binaryEPGP', -20, 'covSEiso', x, y); 127 | disp(' [newloghyper'' logmarglik(end)]') 128 | [newloghyper' logmarglik(end)] 129 | 130 | disp(' ') 131 | disp('This shows that the log marginal likelihood was increased from -222 to') 132 | disp('-90 by optimizing the hyperparameters. This means that the marginal') 133 | disp('likelihood as increased by a factor of exp(295-90) = 2e+57.') 134 | 135 | disp(' ') 136 | disp('Press any key to continue.') 137 | disp(' ') 138 | pause 139 | 140 | disp('Finally, we can make test set predictions with the new hyperparameters:') 141 | disp(' ') 142 | 143 | disp(' pp = binaryEPGP(newloghyper, ''covSEiso'', x, y, xx);') 144 | pp = binaryEPGP(newloghyper, 'covSEiso', x, y, xx); 145 | disp(' plot(pp,''g.'')'); 146 | plot(pp,'g.') 147 | 148 | disp(' ') 149 | disp('We note that the new predictions (in green) take much more extreme') 150 | disp('values than the old ones (in blue).') 151 | 152 | disp(' ') 153 | disp('Press any key to continue.') 154 | disp(' ') 155 | pause 156 | 157 | disp('The number of test set errors (out of 773 test cases) when') 158 | disp('thresholding the predictive probability at 0.5 and the average amount') 159 | disp('of information about the test set labels in excess of a 50/50 model') 160 | disp('in bits are given by:') 161 | disp(' ') 162 | 163 | disp(' sum((pp>0.5)~=(yy>0))') 164 | sum((pp>0.5)~=(yy>0)) 165 | disp(' mean((yy==1).*log2(pp)+(yy==-1).*log2(1-pp))+1') 166 | mean((yy==1).*log2(pp)+(yy==-1).*log2(1-pp))+1 167 | 168 | disp(' ') 169 | disp('showing that misclassification rate has dropped and the information') 170 | disp('about the test target labels has increased compared to using the old') 171 | disp('initially guessed values for the hyperparaneters.') 172 | disp(' ') 173 | disp('Press any key to exit.') 174 | disp(' ') 175 | pause 176 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml-demo/demo_gprsparse.m: -------------------------------------------------------------------------------- 1 | % demo script to show Subset of Data (SD), Subset of Regressors (SR) and 2 | % Projected Process (PP) approximations to GPR described in chapter 8 on the 3 | % Boston Housing data 4 | 5 | % The Boston housing data set was originally published by Harrison, D. and 6 | % Rubinfeld, D. L., Hedonic housing prices and the demand for clean air, 7 | % Journal of Environmental Economics and Management 5, 81-102 (1978) and is 8 | % publicly available at the UCI database "UCI Repository of machine learning 9 | % databases", http://www.ics.uci.edu/~mlearn/MLRepository.html and DELVE 10 | % http://www.cs.utoronto.ca/~delve 11 | % 12 | % (C) Copyright 2005, 2006 by Chris Williams (2006-03-29) 13 | 14 | if isempty(regexp(path,['gpml' pathsep])) 15 | cd ..; w = pwd; addpath([w, '/gpml']); cd gpml-demo % add code dir to path 16 | end 17 | 18 | hold off 19 | clear 20 | clc 21 | 22 | disp('This demonstration illustrates the use of three approximate methods for') 23 | disp('GPR, namely the subset of datapoints (SD), subset of regressors (SR)') 24 | disp('and projected process (PP) methods.') 25 | disp(' '); 26 | disp('We use the Boston housing data of Harrison, D. and Rubinfeld, D. L.,') 27 | disp('Journal of Environmental Economics and Management 5, 81-102 (1978).') 28 | disp('This dataset has 13 input variables and one output target. A split of') 29 | disp('455 training points and 51 test points is used. The data has been') 30 | disp('scaled so that each variable has approximately zero mean and unit') 31 | disp('variance.') 32 | disp(' ') 33 | disp('We use Gaussian process regression with a squared exponential') 34 | disp('covariance function, and allow a separate lengthscale for each input') 35 | disp('dimension, as in eqs. 5.1 and 5.2 of Rasmussen and Williams (2006).') 36 | disp(' ') 37 | disp('Press any key to continue') 38 | pause; 39 | 40 | disp(' ') 41 | disp('The training and test data is contained in the file data_boston.mat') 42 | disp('The raw training data is in the input matrix X (455 by 13) and the') 43 | disp('target vector y (455 by 1). First, load the data') 44 | disp(' ') 45 | disp(' load data_boston;') 46 | load data_boston; 47 | 48 | disp('the data has been scaled to zero mean and unit variance') 49 | disp('over the training and test data'); 50 | 51 | [n,D]=size(X); 52 | nstar = size(Xstar,1); 53 | 54 | % compute error of mean(y) predictor 55 | 56 | diff = ystar - ones(nstar,1)*mean(y); 57 | mse_dumb=sum(diff.^2)/nstar; 58 | vdumb = var(y)*ones(nstar,1); 59 | pll_dumb = (-0.5*sum(log(2*pi*vdumb)) - 0.5*sum((diff.*diff)./vdumb))/nstar; 60 | 61 | disp(' ') 62 | disp(' m = 200; % choose size of the subset, m<=n') 63 | m = 200; % choose size of the subset, m<=n 64 | disp(' ') 65 | disp('A random subset of the training data points are selected using the') 66 | disp('randperm function. This set is of size m.') 67 | disp(' ') 68 | 69 | % now select random training set of size m 70 | rand('state',0); 71 | disp(' perm = randperm(n);') 72 | perm = randperm(n); 73 | disp(' INDEX = perm(1:m);') 74 | INDEX = perm(1:m); 75 | disp(' Xm = X(INDEX,:);') 76 | Xm = X(INDEX,:); 77 | disp(' ym = y(INDEX);') 78 | ym = y(INDEX); 79 | 80 | disp(' ') 81 | disp('We use a covariance function made up of the sum of a squared') 82 | disp('exponential (SE) covariance term with ARD, and independent noise.') 83 | disp('Thus, the covariance function is specified as follows:') 84 | disp(' ') 85 | disp(' covfunc = {''covSum'', {''covSEard'',''covNoise''}};') 86 | covfunc = {'covSum', {'covSEard','covNoise'}}; 87 | 88 | disp(' '); 89 | disp('The hyperparameters are stored as') 90 | disp(' ') 91 | disp(' logtheta = [log(ell_1), log(ell_2), ... log(ell_13), log(sigma_f), log(sigma_n)]') 92 | disp(' ') 93 | disp('(as D = 13), and are initialized to') 94 | disp(' ') 95 | disp(' logtheta0 = [0 0 ... 0 0 -1.15]') 96 | disp(' '); 97 | disp('Note that the noise standard deviation is set to exp(-1.15)') 98 | disp('corresponding to a noise variance of 0.1.') 99 | disp(' ') 100 | disp('The hyperparameters are trained by maximizing the approximate marginal') 101 | disp('likelihood of the SD method as per eq. 8.31, which simply computes the') 102 | disp('marginal likelihood of the subset of size m.') 103 | disp(' ') 104 | disp('Press any key to optimize the approximate marginal likelihood.') 105 | pause; 106 | 107 | % train hyperparameters 108 | logtheta0 = zeros(D+2,1); % starting values of log hyperparameters 109 | logtheta0(D+2) = -1.15; % starting value for log(noise std dev) 110 | 111 | disp(' ') 112 | disp(' logtheta = minimize(logtheta0, ''gpr'', -100, covfunc, Xm, ym);') 113 | disp(' ') 114 | logtheta = minimize(logtheta0, 'gpr', -100, covfunc, Xm, ym); 115 | 116 | disp(' ') 117 | disp('Predictions can now be made:') 118 | disp(' ') 119 | disp('(1) using the SD method, which is implemented by calling gpr.m with the') 120 | disp(' appropriate subset of the training data') 121 | disp('(2) using the SR method,') 122 | disp('(3) using the PP method.') 123 | disp(' ') 124 | disp('The SR and PP methods are implemented in the function gprSRPP.m') 125 | disp(' ') 126 | disp('For comparison we also make predictions using gpr.m on the full') 127 | disp('training dataset, and a dumb predictor that just predicts the mean and') 128 | disp('variance of the training data.') 129 | disp(' ') 130 | disp('Press any key to make the predictions.') 131 | pause; 132 | 133 | % now make predictions: SD method 134 | 135 | disp(' ') 136 | disp(' [fstarSD S2SD] = gpr(logtheta, covfunc, Xm, ym, Xstar); % SD method') 137 | [fstarSD S2SD] = gpr(logtheta, covfunc, Xm, ym, Xstar); 138 | 139 | resSD = fstarSD-ystar; % residuals 140 | mseSD = mean(resSD.^2); 141 | pllSD = (-0.5*sum(log(2*pi*S2SD)) - 0.5*sum((resSD.*resSD)./S2SD))/nstar; 142 | 143 | 144 | % now make predictions: SR and PP methods 145 | 146 | disp(' [fstarSRPP S2SR S2PP] = gprSRPP(logtheta, covfunc, X, INDEX, y, Xstar); % SR,PP') 147 | [fstarSRPP S2SR S2PP] = gprSRPP(logtheta, covfunc, X, INDEX, y, Xstar); 148 | 149 | resSR = fstarSRPP-ystar; 150 | mseSR = sum(resSR.^2)/nstar; 151 | msePP = mseSR; 152 | pllSR = -0.5*mean(log(2*pi*S2SR)+resSR.^2./S2SR); 153 | pllPP = -0.5*mean(log(2*pi*S2PP)+resSR.^2./S2PP); 154 | 155 | % for comparison, make predictions with the full training dataset 156 | 157 | [fstar S2] = gpr(logtheta, covfunc, X, y, Xstar); 158 | 159 | res = fstar-ystar; % residuals 160 | mse = mean(res.^2); 161 | pll = -0.5*mean(log(2*pi*S2)+res.^2./S2); 162 | 163 | 164 | disp(' ') 165 | disp('The test results are:') 166 | 167 | fprintf(1,'mse_full %g\t pll_full %g\n', mse, pll); 168 | fprintf(1,'mse_SD %g\t pll_SD %g\n', mseSD, pllSD); 169 | fprintf(1,'mse_SR %g\t pll_SR %g\n', mseSR, pllSR); 170 | fprintf(1,'mse_PP %g\t pll_PP %g\n', msePP, pllPP); 171 | fprintf(1,'mse_dumb %g\t pll_dumb %g\n', mse_dumb, pll_dumb); 172 | 173 | disp(' ') 174 | disp('where mse denotes mean squared error and pll denotes predictive log') 175 | disp('likelihood. A higher (less negative) pll is more desirable. Note that') 176 | disp('the mse for the SR and PP methods is identical as expected. The SR and') 177 | disp('PP methods outperform SD on mse, and are close to the full mse. On pll,') 178 | disp('the PP method does slightly better than the full predictor, followed by') 179 | disp('the SD and SR methods.') 180 | 181 | disp(' ') 182 | disp('Press any key to end.') 183 | pause 184 | 185 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml-demo/demo_laplace_usps.m: -------------------------------------------------------------------------------- 1 | % Demo script to illustrate use of binaryLaplaceGP.m on a binary digit 2 | % classification task. 2006-03-29. 3 | 4 | if isempty(regexp(path,['gpml' pathsep])) 5 | cd ..; w = pwd; addpath([w, '/gpml']); cd gpml-demo % add code dir to path 6 | end 7 | 8 | hold off 9 | clear 10 | clf 11 | clc 12 | 13 | disp('This demonstration illustrates the use of Laplace''s approximation for') 14 | disp('binary Gaussian process classification applied to a digit task.') 15 | disp(' ') 16 | 17 | if exist('loadBinaryUSPS') ~= 2 18 | disp('Error: can''t find the loadBinaryUSPS.m file. For this example, you') 19 | disp('need to download the usps_resampled archive. It is available at') 20 | disp('http://www.GaussianProcess.org/gpml/data') 21 | return 22 | end 23 | 24 | disp('The data consists of 16 by 16 grey scale images of handwritten digits') 25 | disp('derived from the USPS data set. We will consider the binary') 26 | disp('classification task of separating 3''s from 5''s. The training set') 27 | disp('contains 767 cases and the test set 773 cases. Here is an example') 28 | disp('image of a digit 3.'); 29 | disp(' '); 30 | 31 | disp(' [x y xx yy] = loadBinaryUSPS(3, 5);') 32 | [x y xx yy] = loadBinaryUSPS(3, 5); 33 | disp(' imagesc(reshape(x(3,:),16,16)''), colormap(''gray'')') 34 | imagesc(reshape(x(3,:),16,16)'), colormap('gray') 35 | 36 | disp(' ') 37 | disp('Press any key to continue.') 38 | disp(' ') 39 | pause 40 | 41 | disp('We must specify a covariance function and a likelihood function. For') 42 | disp('the demonstration we use the squared exponential (SE) covariance') 43 | disp('function but many other covariance functions are supported as well.') 44 | disp('The SE covariance function has two parameters; a log length-scale') 45 | disp('parameter and a log magnitude parameter. As an initial guess for the') 46 | disp('parameters, we set the log length-scale to the log of the average') 47 | disp('pairwise distance between training points, roughly log(22)=3 and the') 48 | disp('magnitude is set to unity, ie it''s log to 0. Other initial choices') 49 | disp('could be reasonable too. We chose the cumulative Gaussian as') 50 | disp('likelihood function.') 51 | disp(' '); 52 | disp('We then call the binaryLaplaceGP function, which constructs the Laplace') 53 | disp('approximation of the posterior over functions based on the training set') 54 | disp('and produces probabilistic predictions for the test cases. This may') 55 | disp('take a minute or so...') 56 | disp(' ') 57 | 58 | 59 | disp(' loghyper = [3.0; 0.0]; % set the log hyperparameters') 60 | loghyper = [3.0; 0.0]; % set the log hyperparameters 61 | disp(' p = binaryLaplaceGP(loghyper, ''covSEiso'', ''cumGauss'', x, y, xx);') 62 | p = binaryLaplaceGP(loghyper, 'covSEiso', 'cumGauss', x, y, xx); 63 | disp(' ') 64 | 65 | disp(' plot(p,''.'')'); 66 | plot(p,'.') 67 | disp(' hold on'); 68 | hold on 69 | disp(' plot([1 length(p)],[0.5 0.5],''r'')'); 70 | plot([1 length(p)],[0.5 0.5],'r') 71 | xlabel('test case number') 72 | ylabel('predictive probability') 73 | axis([0 length(p) 0 1]) 74 | 75 | disp(' ') 76 | disp('Press any key to continue.') 77 | disp(' ') 78 | pause 79 | 80 | disp('Keep in mind that the test cases are ordered according to their') 81 | disp('target class. Notice that there are misclassifications, but there are') 82 | disp('no very confident misclassifications. The number of test set errors') 83 | disp('(out of 773 test cases) when thresholding the predictive probability at') 84 | disp('0.5 and the average amount of information about the test set labels in') 85 | disp('excess of a 50/50 model in bits are given by:') 86 | disp(' ') 87 | 88 | disp(' sum((p>0.5)~=(yy>0))') 89 | sum((p>0.5)~=(yy>0)) 90 | disp(' mean((yy==1).*log2(p)+(yy==-1).*log2(1-p))+1') 91 | mean((yy==1).*log2(p)+(yy==-1).*log2(1-p))+1 92 | 93 | disp(' ') 94 | disp('Press any key to continue.') 95 | disp(' ') 96 | pause 97 | 98 | disp('These results were obtained by simply guessing some values for the') 99 | disp('hyperparameters. We can instead optimize the marginal likelihood on') 100 | disp('the training set w.r.t. the hyperparameters. The current values'); 101 | disp('of the log hyperparameters (2 numbers), and the initial value') 102 | disp('of the negative log marginal likelihood are:') 103 | disp(' ') 104 | 105 | disp(' [loghyper'' binaryLaplaceGP(loghyper, ''covSEiso'', ''cumGauss'', x, y)]') 106 | [loghyper' binaryLaplaceGP(loghyper, 'covSEiso', 'cumGauss', x, y)] 107 | 108 | disp(' ') 109 | disp('Press any key to continue.') 110 | disp(' ') 111 | pause 112 | 113 | disp('Now, we minimize the negative log marginal likelihood w.r.t. the') 114 | disp('hyperparameters. The third argument -20 tells minimize to evaluate the') 115 | disp('function a maximum of 20 times... this may take a few minutes or so...') 116 | disp('depending on whether you compiled the mex files...') 117 | disp(' ') 118 | 119 | disp(' [newloghyper logmarglik] = minimize(loghyper, ''binaryLaplaceGP'', -20, ''covSEiso'', ''cumGauss'', x, y);') 120 | [newloghyper logmarglik] = minimize(loghyper, 'binaryLaplaceGP', -20, 'covSEiso', 'cumGauss', x, y); 121 | disp(' [newloghyper'' logmarglik(end)]') 122 | [newloghyper' logmarglik(end)] 123 | 124 | disp(' ') 125 | disp('This shows that the log marginal likelihood was increased from -222 to') 126 | disp('-99 by optimizing the hyperparameters. This means that the marginal') 127 | disp('likelihood has increased by a factor of exp(222-99) = 3e+53.') 128 | 129 | disp(' ') 130 | disp('Press any key to continue.') 131 | disp(' ') 132 | pause 133 | 134 | disp('Finally, we can make test set predictions with the new hyperparameters:') 135 | disp(' ') 136 | 137 | disp(' pp = binaryLaplaceGP(newloghyper, ''covSEiso'', ''cumGauss'', x, y, xx);') 138 | pp = binaryLaplaceGP(newloghyper, 'covSEiso', 'cumGauss', x, y, xx); 139 | disp(' plot(pp,''g.'')'); 140 | plot(pp,'g.') 141 | 142 | disp(' ') 143 | disp('We note that the new predictions (in green) take much more extreme') 144 | disp('values values than the old ones (in blue).') 145 | 146 | disp(' ') 147 | disp('Press any key to continue.') 148 | disp(' ') 149 | pause 150 | 151 | disp('The number of test set errors (out of 773 test cases) when') 152 | disp('thresholding the predictive probability at 0.5 and the average amount') 153 | disp('of information about the test set labels in excess of a 50/50 model') 154 | disp('in bits are given by:') 155 | disp(' ') 156 | 157 | disp(' sum((pp>0.5)~=(yy>0))') 158 | sum((pp>0.5)~=(yy>0)) 159 | disp(' mean((yy==1).*log2(pp)+(yy==-1).*log2(1-pp))+1') 160 | mean((yy==1).*log2(pp)+(yy==-1).*log2(1-pp))+1 161 | 162 | disp(' ') 163 | disp('showing that misclassification rate has dropped and the information') 164 | disp('about the test target labels has increased compared to using the old') 165 | disp('initially guessed values for the hyperparaneters.') 166 | disp(' ') 167 | disp('Press any key to exit.') 168 | disp(' ') 169 | pause 170 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/Contents.m: -------------------------------------------------------------------------------- 1 | % gpml: code from Rasmussen & Williams: Gaussian Processes for Machine Learning 2 | % date: 2007-07-25. 3 | % 4 | % approxEP.m - the approximation method for Expectation Propagation 5 | % approxLA.m - the approximation method for Laplace's approximation 6 | % approximations.m - help for approximation methods 7 | % binaryEPGP.m - outdated, the EP approx for binary GP classification 8 | % binaryGP.m - binary Gaussian process classification 9 | % binaryLaplaceGP.m - outdated, Laplace's approx for binary GP classification 10 | % 11 | % covConst.m - covariance for constant functions 12 | % covFunctions.m - help file with overview of covariance functions 13 | % covLINard.m - linear covariance function with ard 14 | % covLINone.m - linear covaraince function 15 | % covMatern3iso.m - Matern covariance function with nu=3/2 16 | % covMatern5iso.m - Matern covaraince function with nu=5/2 17 | % covNNone.m - neural network covariance function 18 | % covNoise.m - independent covaraince function (ie white noise) 19 | % covPeriodic.m - covariance for smooth periodic function, with unit period 20 | % covProd.m - function for multiplying other covariance functions 21 | % covRQard.m - rational quadratic covariance function with ard 22 | % covRQiso.m - isotropic rational quadratic covariance function 23 | % covSEard.m - squared exponential covariance function with ard 24 | % covSEiso.m - isotropic squared exponential covariance function 25 | % covSum.m - function for adding other covariance functions 26 | % 27 | % cumGauss.m - cumulative Gaussian likelihood function 28 | % gpr.m - Gaussian process regression with general covariance 29 | % function 30 | % gprSRPP.m - Implements SR and PP approximations to GPR 31 | % likelihoods.m - help function for classification likelihoods 32 | % logistic.m - logistic likelihood function 33 | % minimize.m - Minimize a differentiable multivariate function 34 | % solve_chol.c - Solve linear equations from the Cholesky factorization 35 | % should be compiled into a mex file 36 | % solve_chol.m - A matlab implementation of the above, used only in case 37 | % the mex file wasn't generated (not very efficient) 38 | % sq_dist.c - Compute a matrix of all pairwise squared distances 39 | % should be compiled into a mex file 40 | % sq_dist.m - A matlab implementation of the above, used only in case 41 | % the mex file wasn't generated (not very efficient) 42 | % 43 | % See also the help for the demonstration scripts in the gpml-demo directory 44 | % 45 | % Copyright (c) 2005, 2006 by Carl Edward Rasmussen and Chris Williams 46 | 47 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/Copyright: -------------------------------------------------------------------------------- 1 | 2 | Software that implements 3 | 4 | GAUSSIAN PROCESS REGRESSION AND CLASSIFICATION 5 | 6 | Copyright (c) 2005 - 2007 by Carl Edward Rasmussen and Chris Williams 7 | 8 | Permission is granted for anyone to copy, use, or modify these programs for 9 | purposes of research or education, provided this copyright notice is retained, 10 | and note is made of any changes that have been made. 11 | 12 | These programs are distributed without any warranty, express or 13 | implied. As these programs were written for research purposes only, they 14 | have not been tested to the degree that would be advisable in any 15 | important application. All use of these programs is entirely at the 16 | user's own risk. 17 | 18 | The code and associated documentation are avaiable from 19 | 20 | http://www.GaussianProcess.org/gpml/code 21 | 22 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/Makefile: -------------------------------------------------------------------------------- 1 | all: sq_dist.mexglx solve_chol.mexglx 2 | 3 | sq_dist.mexglx: sq_dist.c 4 | mex sq_dist.c 5 | 6 | solve_chol.mexglx: solve_chol.c 7 | mex solve_chol.c 8 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/approxEP.m: -------------------------------------------------------------------------------- 1 | function [alpha, sW, L, nlZ, dnlZ] = approxEP(hyper, covfunc, lik, x, y) 2 | 3 | % Expectation Propagation approximation to the posterior Gaussian Process. 4 | % The function takes a specified covariance function (see covFunction.m) and 5 | % likelihood function (see likelihoods.m), and is designed to be used with 6 | % binaryGP.m. See also approximations.m. In the EP algorithm, the sites are 7 | % updated in random order, for better performance when cases are ordered 8 | % according to the targets. 9 | % 10 | % Copyright (c) 2006, 2007 Carl Edward Rasmussen and Hannes Nickisch 2007-07-24 11 | 12 | persistent best_ttau best_tnu best_nlZ % keep tilde parameters between calls 13 | tol = 1e-3; max_sweep = 10; % tolerance for when to stop EP iterations 14 | 15 | n = size(x,1); 16 | K = feval(covfunc{:}, hyper, x); % evaluate the covariance matrix 17 | 18 | % A note on naming: variables are given short but descriptive names in 19 | % accordance with Rasmussen & Williams "GPs for Machine Learning" (2006): mu 20 | % and s2 are mean and variance, nu and tau are natural parameters. A leading t 21 | % means tilde, a subscript _ni means "not i" (for cavity parameters), or _n 22 | % for a vector of cavity parameters. 23 | 24 | if any(size(best_ttau) ~= [n 1]) % find starting point for tilde parameters 25 | ttau = zeros(n,1); % initialize to zero if we have no better guess 26 | tnu = zeros(n,1); 27 | Sigma = K; % initialize Sigma and mu, the parameters of .. 28 | mu = zeros(n, 1); % .. the Gaussian posterior approximation 29 | nlZ = n*log(2); 30 | best_nlZ = Inf; 31 | else 32 | ttau = best_ttau; % try the tilde values from previous call 33 | tnu = best_tnu; 34 | [Sigma, mu, nlZ, L] = epComputeParams(K, y, ttau, tnu, lik); 35 | if nlZ > n*log(2) % if zero is better .. 36 | ttau = zeros(n,1); % .. then initialize with zero instead 37 | tnu = zeros(n,1); 38 | Sigma = K; % initialize Sigma and mu, the parameters of .. 39 | mu = zeros(n, 1); % .. the Gaussian posterior approximation 40 | nlZ = n*log(2); 41 | end 42 | end 43 | nlZ_old = Inf; sweep = 0; % make sure while loop starts 44 | 45 | while nlZ < nlZ_old - tol && sweep < max_sweep % converged or max. sweeps? 46 | 47 | nlZ_old = nlZ; sweep = sweep+1; 48 | for i = randperm(n) % iterate EP updates (in random order) over examples 49 | 50 | tau_ni = 1/Sigma(i,i)-ttau(i); % first find the cavity distribution .. 51 | nu_ni = mu(i)/Sigma(i,i)-tnu(i); % .. parameters tau_ni and nu_ni 52 | 53 | % compute the desired raw moments m0, m1=hmu and m2; m0 is not used 54 | [m0, m1, m2] = feval(lik, y(i), nu_ni/tau_ni, 1/tau_ni); 55 | hmu = m1./m0; 56 | hs2 = m2./m0 - hmu^2; % compute second central moment 57 | 58 | ttau_old = ttau(i); % then find the new tilde parameters 59 | ttau(i) = 1/hs2 - tau_ni; 60 | tnu(i) = hmu/hs2 - nu_ni; 61 | 62 | ds2 = ttau(i) - ttau_old; % finally rank-1 update Sigma .. 63 | si = Sigma(:,i); 64 | Sigma = Sigma - ds2/(1+ds2*si(i))*si*si'; % takes 70% of total time 65 | mu = Sigma*tnu; % .. and recompute mu 66 | 67 | end 68 | 69 | [Sigma, mu, nlZ, L] = epComputeParams(K, y, ttau, tnu, lik); % recompute 70 | % Sigma & mu since repeated rank-one updates can destroy numerical precision 71 | end 72 | 73 | if sweep == max_sweep 74 | disp('Warning: maximum number of sweeps reached in function approxEP') 75 | end 76 | 77 | if nlZ < best_nlZ % if best so far .. 78 | best_ttau = ttau; best_tnu = tnu; best_nlZ = nlZ; % .. keep for next call 79 | end 80 | 81 | sW = sqrt(ttau); % compute output arguments, L and nlZ are done 82 | alpha = tnu-sW.*solve_chol(L,sW.*(K*tnu)); 83 | 84 | if nargout > 4 % do we want derivatives? 85 | dnlZ = zeros(size(hyper)); % allocate space for derivatives 86 | F = alpha*alpha'-repmat(sW,1,n).*solve_chol(L,diag(sW)); 87 | for j=1:length(hyper) 88 | dK = feval(covfunc{:}, hyper, x, j); 89 | dnlZ(j) = -sum(sum(F.*dK))/2; 90 | end 91 | end 92 | 93 | 94 | % function to compute the parameters of the Gaussian approximation, Sigma and 95 | % mu, and the negative log marginal likelihood, nlZ, from the current site 96 | % parameters, ttau and tnu. Also returns L (useful for predictions). 97 | function [Sigma, mu, nlZ, L] = epComputeParams(K, y, ttau, tnu, lik) 98 | 99 | n = length(y); % number of training cases 100 | ssi = sqrt(ttau); % compute Sigma and mu 101 | L = chol(eye(n)+ssi*ssi'.*K); % L'*L=B=eye(n)+sW*K*sW 102 | V = L'\(repmat(ssi,1,n).*K); 103 | Sigma = K - V'*V; 104 | mu = Sigma*tnu; 105 | 106 | tau_n = 1./diag(Sigma)-ttau; % compute the log marginal likelihood 107 | nu_n = mu./diag(Sigma)-tnu; % vectors of cavity parameters 108 | nlZ = sum(log(diag(L))) - sum(log(feval(lik, y, nu_n./tau_n, 1./tau_n))) ... 109 | -tnu'*Sigma*tnu/2 - nu_n'*((ttau./tau_n.*nu_n-2*tnu)./(ttau+tau_n))/2 ... 110 | +sum(tnu.^2./(tau_n+ttau))/2-sum(log(1+ttau./tau_n))/2; 111 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/approxLA.m: -------------------------------------------------------------------------------- 1 | function [alpha, sW, L, nlZ, dnlZ] = approxLA(hyper, covfunc, lik, x, y) 2 | 3 | % Laplace approximation to the posterior Gaussian Process. 4 | % The function takes a specified covariance function (see covFunction.m) and 5 | % likelihood function (see likelihoods.m), and is designed to be used with 6 | % binaryGP.m. See also approximations.m. 7 | % 8 | % Copyright (c) 2006, 2007 Carl Edward Rasmussen and Hannes Nickisch 2007-03-29 9 | 10 | persistent best_alpha best_nlZ % copy of the best alpha and its obj value 11 | tol = 1e-6; % tolerance for when to stop the Newton iterations 12 | 13 | n = size(x,1); 14 | K = feval(covfunc{:}, hyper, x); % evaluate the covariance matrix 15 | 16 | if any(size(best_alpha) ~= [n,1]) % find a good starting point for alpha and f 17 | f = zeros(n,1); alpha = f; % start at zero 18 | [lp,dlp,d2lp] = feval(lik,y,f,'deriv'); W=-d2lp; 19 | Psi_new = lp; best_nlZ = Inf; 20 | else 21 | alpha = best_alpha; f = K*alpha; % try best so far 22 | [lp,dlp,d2lp] = feval(lik,y,f,'deriv'); W=-d2lp; 23 | Psi_new = -alpha'*f/2 + lp; 24 | if Psi_new < -n*log(2) % if zero is better .. 25 | f = zeros(n,1); alpha = f; % .. go back 26 | [lp,dlp,d2lp] = feval(lik,y,f,'deriv'); W=-d2lp; 27 | Psi_new = -alpha'*f/2 + lp; 28 | end 29 | end 30 | Psi_old = -Inf; % make sure while loop starts 31 | 32 | while Psi_new - Psi_old > tol % begin Newton's iterations 33 | Psi_old = Psi_new; alpha_old = alpha; 34 | sW = sqrt(W); 35 | L = chol(eye(n)+sW*sW'.*K); % L'*L=B=eye(n)+sW*K*sW 36 | b = W.*f+dlp; 37 | alpha = b - sW.*solve_chol(L,sW.*(K*b)); 38 | f = K*alpha; 39 | [lp,dlp,d2lp,d3lp] = feval(lik,y,f,'deriv'); W=-d2lp; 40 | 41 | Psi_new = -alpha'*f/2 + lp; 42 | i = 0; 43 | while i < 10 && Psi_new < Psi_old % if objective didn't increase 44 | alpha = (alpha_old+alpha)/2; % reduce step size by half 45 | f = K*alpha; 46 | [lp,dlp,d2lp,d3lp] = feval(lik,y,f,'deriv'); W=-d2lp; 47 | Psi_new = -alpha'*f/2 + lp; 48 | i = i+1; 49 | end 50 | end % end Newton's iterations 51 | 52 | sW = sqrt(W); % recalculate L 53 | L = chol(eye(n)+sW*sW'.*K); % L'*L=B=eye(n)+sW*K*sW 54 | nlZ = alpha'*f/2 - lp + sum(log(diag(L))); % approx neg log marg likelihood 55 | 56 | if nlZ < best_nlZ % if best so far .. 57 | best_alpha = alpha; best_nlZ = nlZ; % .. then remember for next call 58 | end 59 | 60 | if nargout >= 4 % do we want derivatives? 61 | dnlZ = zeros(size(hyper)); % allocate space for derivatives 62 | Z = repmat(sW,1,n).*solve_chol(L, diag(sW)); 63 | C = L'\(repmat(sW,1,n).*K); 64 | s2 = 0.5*(diag(K)-sum(C.^2,1)').*d3lp; 65 | for j=1:length(hyper) 66 | dK = feval(covfunc{:}, hyper, x, j); 67 | s1 = alpha'*dK*alpha/2-sum(sum(Z.*dK))/2; 68 | b = dK*dlp; 69 | s3 = b-K*(Z*b); 70 | dnlZ(j) = -s1-s2'*s3; 71 | end 72 | end 73 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/approximations.m: -------------------------------------------------------------------------------- 1 | % approximations: Exact inference for Gaussian process classification is 2 | % intractable, and approximations are necessary. Different approximation 3 | % techniques have been implemented, which all rely on a Gaussian approximation 4 | % to the non-Gaussian posterior: 5 | % 6 | % approxEP the Expectation Propagation (EP) algorithm 7 | % approxLA Laplace's method 8 | % 9 | % which are used by the Gaussian process classification funtion binaryGP.m. 10 | % The interface to the approximation methods is the following: 11 | % 12 | % function [alpha, sW, L, nlZ, dnlZ] = approx..(hyper, covfunc, lik, x, y) 13 | % 14 | % where: 15 | % 16 | % hyper is a column vector of hyperparameters 17 | % covfunc is the name of the covariance function (see covFunctions.m) 18 | % lik is the name of the likelihood function (see likelihoods.m) 19 | % x is a n by D matrix of training inputs 20 | % y is a (column) vector (of size n) of binary +1/-1 targets 21 | % nlZ is the returned value of the negative log marginal likelihood 22 | % dnlZ is a (column) vector of partial derivatives of the negative 23 | % log marginal likelihood wrt each hyperparameter 24 | % alpha is a (sparse or full column vector) containing inv(K)*m, where K 25 | % is the prior covariance matrix and m the approx posterior mean 26 | % sW is a (sparse or full column) vector containing diagonal of sqrt(W) 27 | % the approximate posterior covariance matrix is inv(inv(K)+W) 28 | % L is a (sparse or full) matrix, L = chol(sW*K*sW+eye(n)) 29 | % 30 | % Usually, the approximate posterior to be returned admits the form 31 | % N(m=K*alpha, V=inv(inv(K)+W)), where alpha is a vector and W is diagonal; 32 | % if not, then L contains instead -inv(K+inv(W)), and sW is unused. 33 | % 34 | % For more information on the individual approximation methods and their 35 | % implementations, see the separate approx??.m files. See also binaryGP.m 36 | % 37 | % Copyright (c) by Carl Edward Rasmussen and Hannes Nickisch, 2007-06-25. 38 | 39 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/binaryEPGP.m: -------------------------------------------------------------------------------- 1 | function varargout = binaryEPGP(hyper, covfunc, varargin) 2 | 3 | % binaryEPGP - The Expectation Propagation approximation for binary Gaussian 4 | % process classification. Two modes are possible: training or testing: if no 5 | % test cases are supplied, then the approximate negative log marginal 6 | % likelihood and its partial derivatives wrt the hyperparameters is computed; 7 | % this mode is used to fit the hyperparameters. If test cases are given, then 8 | % the test set predictive probabilities are returned. The program is flexible 9 | % in allowing a multitude of covariance functions. 10 | % 11 | % usage: [nlZ, dnlZ ] = binaryEPGP(hyper, covfunc, x, y); 12 | % or: [p, mu, s2, nlZ] = binaryEPGP(hyper, covfunc, x, y, xstar); 13 | % 14 | % where: 15 | % 16 | % hyper is a (column) vector of hyperparameters 17 | % covfunc is the name of the covariance function (see below) 18 | % lik is the name of the likelihood function (see below) 19 | % x is a n by D matrix of training inputs 20 | % y is a (column) vector (of size n) of binary +1/-1 targets 21 | % xstar is a nn by D matrix of test inputs 22 | % nlZ is the returned value of the negative log marginal likelihood 23 | % dnlZ is a (column) vector of partial derivatives of the negative 24 | % log marginal likelihood wrt each log hyperparameter 25 | % p is a (column) vector (of length nn) of predictive probabilities 26 | % mu is a (column) vector (of length nn) of predictive latent means 27 | % s2 is a (column) vector (of length nn) of predictive latent variances 28 | % 29 | % The length of the vector of hyperparameters depends on the covariance 30 | % function, as specified by the "covfunc" input to the function, specifying the 31 | % name of a covariance function. A number of different covariance function are 32 | % implemented, and it is not difficult to add new ones. See "help covFunctions" 33 | % for the details 34 | % 35 | % The function can conveniently be used with the "minimize" function to train 36 | % a Gaussian process, eg: 37 | % 38 | % [hyper, fX, i] = minimize(hyper, 'binaryEPGP', length, 'covSEiso', 39 | % 'logistic', x, y); 40 | % 41 | % Copyright (c) 2004, 2005, 2006, 2007 Carl Edward Rasmussen, 2007-02-19. 42 | 43 | if nargin<4 || nargin>5 44 | disp('Usage: [nlZ, dnlZ ] = binaryEPGP(hyper, covfunc, x, y);') 45 | disp(' or: [p, mu, s2, nlZ] = binaryEPGP(hyper, covfunc, x, y, xstar);') 46 | return 47 | end 48 | 49 | % Note, this function is just a wrapper provided for backward compatibility, 50 | % the functionality is now provided by the more general binaryGP function. 51 | 52 | varargout = cell(nargout, 1); % allocate the right number of output arguments 53 | [varargout{:}] = binaryGP(hyper, 'approxEP', covfunc, 'cumGauss', varargin{:}); 54 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/binaryLaplaceGP.m: -------------------------------------------------------------------------------- 1 | function varargout = binaryLaplaceGP(hyper, covfunc, lik, varargin) 2 | 3 | % binaryLaplaceGP - Laplace's approximation for binary Gaussian process 4 | % classification. Two modes are possible: training or testing: if no test 5 | % cases are supplied, then the approximate negative log marginal likelihood 6 | % and its partial derivatives wrt the hyperparameters is computed; this mode is 7 | % used to fit the hyperparameters. If test cases are given, then the test set 8 | % predictive probabilities are returned. The program is flexible in allowing 9 | % several different likelihood functions and a multitude of covariance 10 | % functions. 11 | % 12 | % usage: [nlZ, dnlZ ] = binaryLaplaceGP(hyper, covfunc, lik, x, y); 13 | % or: [p, mu, s2, nlZ] = binaryLaplaceGP(hyper, covfunc, lik, x, y, xstar); 14 | % 15 | % where: 16 | % 17 | % hyper is a (column) vector of hyperparameters 18 | % covfunc is the name of the covariance function (see below) 19 | % lik is the name of the likelihood function (see below) 20 | % x is a n by D matrix of training inputs 21 | % y is a (column) vector (of size n) of binary +1/-1 targets 22 | % xstar is a nn by D matrix of test inputs 23 | % nlZ is the returned value of the negative log marginal likelihood 24 | % dnlZ is a (column) vector of partial derivatives of the negative 25 | % log marginal likelihood wrt each log hyperparameter 26 | % p is a (column) vector (of length nn) of predictive probabilities 27 | % mu is a (column) vector (of length nn) of predictive latent means 28 | % s2 is a (column) vector (of length nn) of predictive latent variances 29 | % 30 | % The length of the vector of log hyperparameters depends on the covariance 31 | % function, as specified by the "covfunc" input to the function, specifying the 32 | % name of a covariance function. A number of different covariance function are 33 | % implemented, and it is not difficult to add new ones. See "help covFunctions" 34 | % for the details. 35 | % 36 | % The shape of the likelihood function is given by the "lik" input to the 37 | % function, specifying the name of the likelihood function. The two implemented 38 | % likelihood functions are: 39 | % 40 | % logistic the logistic function: 1/(1+exp(-x)) 41 | % cumGauss the cumulative Gaussian (error function) 42 | % 43 | % The function can conveniently be used with the "minimize" function to train 44 | % a Gaussian process, eg: 45 | % 46 | % [hyper, fX, i] = minimize(hyper, 'binaryLaplaceGP', length, 'covSEiso', 47 | % 'logistic', x, y); 48 | % 49 | % Copyright (c) 2004, 2005, 2006, 2007 by Carl Edward Rasmussen, 2007-02-19. 50 | 51 | if nargin<5 || nargin>6 52 | disp('Usage: [nlZ, dnlZ ] = binaryLaplaceGP(hyper, covfunc, lik, x, y);') 53 | disp(' or: [p, mu, s2, nlZ] = binaryLaplaceGP(hyper, covfunc, lik, x, y, xstar);') 54 | return 55 | end 56 | 57 | % Note, this function is just a wrapper provided for backward compatibility, 58 | % the functionality is now provided by the more general binaryGP function. 59 | 60 | varargout = cell(nargout, 1); % allocate the right number of output arguments 61 | [varargout{:}] = binaryGP(hyper, 'approxLA', covfunc, lik, varargin{:}); 62 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/covConst.m: -------------------------------------------------------------------------------- 1 | function [A, B] = covConst(logtheta, x, z); 2 | 3 | % covariance function for a constant function. The covariance function is 4 | % parameterized as: 5 | % 6 | % k(x^p,x^q) = 1/s2; 7 | % 8 | % The scalar hyperparameter is: 9 | % 10 | % logtheta = [ log(sqrt(s2)) ] 11 | % 12 | % For more help on design of covariance functions, try "help covFunctions". 13 | % 14 | % (C) Copyright 2006 by Carl Edward Rasmussen (2007-07-24) 15 | 16 | if nargin == 0, A = '1'; return; end % report number of parameters 17 | 18 | is2 = exp(-2*logtheta); % s2 inverse 19 | 20 | if nargin == 2 21 | A = is2; 22 | elseif nargout == 2 % compute test set covariances 23 | A = is2; 24 | B = is2; 25 | else % compute derivative matrix 26 | A = -2*is2*ones(size(x,1)); 27 | end 28 | 29 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/covFunctions.m: -------------------------------------------------------------------------------- 1 | % covariance functions to be use by Gaussian process functions. There are two 2 | % different kinds of covariance functions: simple and composite: 3 | % 4 | % simple covariance functions: 5 | % 6 | % covConst.m - covariance for constant functions 7 | % covLINard.m - linear covariance function with ard 8 | % covLINone.m - linear covariance function 9 | % covMatern3iso.m - Matern covariance function with nu=3/2 10 | % covMatern5iso.m - Matern covariance function with nu=5/2 11 | % covNNone.m - neural network covariance function 12 | % covNoise.m - independent covariance function (ie white noise) 13 | % covPeriodic.m - covariance for smooth periodic function with unit period 14 | % covRQard.m - rational quadratic covariance function with ard 15 | % covRQiso.m - isotropic rational quadratic covariance function 16 | % covSEard.m - squared exponential covariance function with ard 17 | % covSEiso.m - isotropic squared exponential covariance function 18 | % 19 | % composite covariance functions (see explanation at the bottom): 20 | % 21 | % covProd - products of covariance functions 22 | % covSum - sums of covariance functions 23 | % 24 | % Naming convention: all covariance functions start with "cov". A trailing 25 | % "iso" means isotropic, "ard" means Automatic Relevance Determination, and 26 | % "one" means that the distance measure is parameterized by a single parameter. 27 | % 28 | % The covariance functions are written according to a special convention where 29 | % the exact behaviour depends on the number of input and output arguments 30 | % passed to the function. If you want to add new covariance functions, you 31 | % should follow this convention if you want them to work with the functions 32 | % gpr, binaryEPGP and binaryLaplaceGP. There are four different ways of calling 33 | % the covariance functions: 34 | % 35 | % 1) With no input arguments: 36 | % 37 | % p = covNAME 38 | % 39 | % The covariance function returns a string telling how many hyperparameters it 40 | % expects, using the convention that "D" is the dimension of the input space. 41 | % For example, calling "covRQard" returns the string '(D+2)'. 42 | % 43 | % 2) With two input arguments: 44 | % 45 | % K = covNAME(logtheta, x) 46 | % 47 | % The function computes and returns the covariance matrix where logtheta are 48 | % the log og the hyperparameters and x is an n by D matrix of cases, where 49 | % D is the dimension of the input space. The returned covariance matrix is of 50 | % size n by n. 51 | % 52 | % 3) With three input arguments and two output arguments: 53 | % 54 | % [v, B] = covNAME(loghyper, x, z) 55 | % 56 | % The function computes test set covariances; v is a vector of self covariances 57 | % for the test cases in z (of length nn) and B is a (n by nn) matrix of cross 58 | % covariances between training cases x and test cases z. 59 | % 60 | % 4) With three input arguments and a single output: 61 | % 62 | % D = covNAME(logtheta, x, z) 63 | % 64 | % The function computes and returns the n by n matrix of partial derivatives 65 | % of the training set covariance matrix with respect to logtheta(z), ie with 66 | % respect to the log of hyperparameter number z. 67 | % 68 | % The functions may retain a local copy of the covariance matrix for computing 69 | % derivatives, which is cleared as the last derivative is returned. 70 | % 71 | % About the specification of simple and composite covariance functions to be 72 | % used by the Gaussian process functions gpr, binaryEPGP and binaryLaplaceGP: 73 | % Covariance functions can be specified in two ways: either as a string 74 | % containing the name of the covariance function or using a cell array. For 75 | % example: 76 | % 77 | % covfunc = 'covRQard'; 78 | % covfunc = {'covRQard'}; 79 | % 80 | % are both supported. Only the second form using the cell array can be used 81 | % for specifying composite covariance functions, made up of several 82 | % contributions. For example: 83 | % 84 | % covfunc = {'covSum',{'covRQiso','covSEard','covNoise'}}; 85 | % 86 | % specifies a covariance function which is the sum of three contributions. To 87 | % find out how many hyperparameters this covariance function requires, we do: 88 | % 89 | % feval(covfunc{:}) 90 | % 91 | % which returns the string '3+(D+1)+1' (ie the 'covRQiso' contribution uses 92 | % 3 parameters, the 'covSEard' uses D+1 and 'covNoise' a single parameter). 93 | % 94 | % (C) copyright 2006, Carl Edward Rasmussen, 2006-04-07. 95 | 96 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/covLINard.m: -------------------------------------------------------------------------------- 1 | function [A, B] = covLINard(logtheta, x, z); 2 | 3 | % Linear covariance function with Automatic Relevance Determination (ARD). The 4 | % covariance function is parameterized as: 5 | % 6 | % k(x^p,x^q) = x^p'*inv(P)*x^q 7 | % 8 | % where the P matrix is diagonal with ARD parameters ell_1^2,...,ell_D^2, where 9 | % D is the dimension of the input space. The hyperparameters are: 10 | % 11 | % logtheta = [ log(ell_1) 12 | % log(ell_2) 13 | % . 14 | % log(ell_D) ] 15 | % 16 | % Note that there is no bias term; use covConst to add a bias. 17 | % 18 | % For more help on design of covariance functions, try "help covFunctions". 19 | % 20 | % (C) Copyright 2006 by Carl Edward Rasmussen (2006-03-24) 21 | 22 | if nargin == 0, A = 'D'; return; end % report number of parameters 23 | 24 | ell = exp(logtheta); 25 | x = x*diag(1./ell); 26 | 27 | if nargin == 2 28 | A = x*x'; 29 | elseif nargout == 2 % compute test set covariances 30 | z = z*diag(1./ell); 31 | A = sum(z.*z,2); 32 | B = x*z'; 33 | else % compute derivative matrices 34 | A = -2*x(:,z)*x(:,z)'; 35 | end 36 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/covLINone.m: -------------------------------------------------------------------------------- 1 | function [A, B] = covLINone(logtheta, x, z); 2 | 3 | % Linear covariance function with a single hyperparameter. The covariance 4 | % function is parameterized as: 5 | % 6 | % k(x^p,x^q) = x^p'*inv(P)*x^q + 1./t2; 7 | % 8 | % where the P matrix is t2 times the unit matrix. The second term plays the 9 | % role of the bias. The hyperparameter is: 10 | % 11 | % logtheta = [ log(sqrt(t2)) ] 12 | % 13 | % For more help on design of covariance functions, try "help covFunctions". 14 | % 15 | % (C) Copyright 2006 by Carl Edward Rasmussen (2006-03-27) 16 | 17 | if nargin == 0, A = '1'; return; end % report number of parameters 18 | 19 | it2 = exp(-2*logtheta); % t2 inverse 20 | 21 | if nargin == 2 % compute covariance 22 | A = it2*(1+x*x'); 23 | elseif nargout == 2 % compute test set covariances 24 | A = it2*(1+sum(z.*z,2)); 25 | B = it2*(1+x*z'); 26 | else % compute derivative matrix 27 | A = -2*it2*(1+x*x'); 28 | end 29 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/covMatern3iso.m: -------------------------------------------------------------------------------- 1 | function [A, B] = covMatern3iso(loghyper, x, z) 2 | 3 | % Matern covariance function with nu = 3/2 and isotropic distance measure. The 4 | % covariance function is: 5 | % 6 | % k(x^p,x^q) = s2f * (1 + sqrt(3)*d(x^p,x^q)) * exp(-sqrt(3)*d(x^p,x^q)) 7 | % 8 | % where d(x^p,x^q) is the distance sqrt((x^p-x^q)'*inv(P)*(x^p-x^q)), P is ell 9 | % times the unit matrix and sf2 is the signal variance. The hyperparameters 10 | % are: 11 | % 12 | % loghyper = [ log(ell) 13 | % log(sqrt(sf2)) ] 14 | % 15 | % For more help on design of covariance functions, try "help covFunctions". 16 | % 17 | % (C) Copyright 2006 by Carl Edward Rasmussen (2006-03-24) 18 | 19 | if nargin == 0, A = '2'; return; end 20 | 21 | persistent K; 22 | [n, D] = size(x); 23 | ell = exp(loghyper(1)); 24 | sf2 = exp(2*loghyper(2)); 25 | 26 | x = sqrt(3)*x/ell; 27 | 28 | if nargin == 2 % compute covariance matrix 29 | A = sqrt(sq_dist(x')); 30 | K = sf2*exp(-A).*(1+A); 31 | A = K; 32 | elseif nargout == 2 % compute test set covariances 33 | z = sqrt(3)*z/ell; 34 | A = sf2; 35 | B = sqrt(sq_dist(x',z')); 36 | B = sf2*exp(-B).*(1+B); 37 | else % compute derivative matrices 38 | if z == 1 39 | A = sf2*sq_dist(x').*exp(-sqrt(sq_dist(x'))); 40 | else 41 | % check for correct dimension of the previously calculated kernel matrix 42 | if any(size(K)~=n) 43 | K = sqrt(sq_dist(x')); 44 | K = sf2*exp(-K).*(1+K); 45 | end 46 | A = 2*K; 47 | clear K; 48 | end 49 | end 50 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/covMatern5iso.m: -------------------------------------------------------------------------------- 1 | function [A, B] = covMatern5iso(loghyper, x, z) 2 | 3 | % Matern covariance function with nu = 5/2 and isotropic distance measure. The 4 | % covariance function is: 5 | % 6 | % k(x^p,x^q) = s2f * (1 + sqrt(5)*d + 5*d/3) * exp(-sqrt(5)*d) 7 | % 8 | % where d is the distance sqrt((x^p-x^q)'*inv(P)*(x^p-x^q)), P is ell times 9 | % the unit matrix and sf2 is the signal variance. The hyperparameters are: 10 | % 11 | % loghyper = [ log(ell) 12 | % log(sqrt(sf2)) ] 13 | % 14 | % For more help on design of covariance functions, try "help covFunctions". 15 | % 16 | % (C) Copyright 2006 by Carl Edward Rasmussen (2006-03-24) 17 | 18 | if nargin == 0, A = '2'; return; end 19 | 20 | persistent K; 21 | [n, D] = size(x); 22 | ell = exp(loghyper(1)); 23 | sf2 = exp(2*loghyper(2)); 24 | 25 | x = sqrt(5)*x/ell; 26 | 27 | if nargin == 2 % compute covariance matrix 28 | A = sq_dist(x'); 29 | K = sf2*exp(-sqrt(A)).*(1+sqrt(A)+A/3); 30 | A = K; 31 | elseif nargout == 2 % compute test set covariances 32 | z = sqrt(5)*z/ell; 33 | A = sf2; 34 | B = sq_dist(x',z'); 35 | B = sf2*exp(-sqrt(B)).*(1+sqrt(B)+B/3); 36 | else % compute derivative matrices 37 | if z == 1 38 | A = sq_dist(x'); 39 | A = sf2*(A+sqrt(A).^3).*exp(-sqrt(A))/3; 40 | else 41 | % check for correct dimension of the previously calculated kernel matrix 42 | if any(size(K)~=n) 43 | K = sq_dist(x'); 44 | K = sf2*exp(-sqrt(K)).*(1+sqrt(K)+K/3); 45 | end 46 | A = 2*K; 47 | clear K; 48 | end 49 | end 50 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/covNNone.m: -------------------------------------------------------------------------------- 1 | function [A, B] = covNNone(loghyper, x, z) 2 | 3 | % Neural network covariance function with a single parameter for the distance 4 | % measure. The covariance function is parameterized as: 5 | % 6 | % k(x^p,x^q) = sf2 * asin(x^p'*P*x^q / sqrt[(1+x^p'*P*x^p)*(1+x^q'*P*x^q)]) 7 | % 8 | % where the x^p and x^q vectors on the right hand side have an added extra bias 9 | % entry with unit value. P is ell^-2 times the unit matrix and sf2 controls the 10 | % signal variance. The hyperparameters are: 11 | % 12 | % loghyper = [ log(ell) 13 | % log(sqrt(sf2) ] 14 | % 15 | % For more help on design of covariance functions, try "help covFunctions". 16 | % 17 | % (C) Copyright 2006 by Carl Edward Rasmussen (2006-03-24) 18 | 19 | if nargin == 0, A = '2'; return; end % report number of parameters 20 | 21 | persistent Q K; 22 | [n D] = size(x); 23 | ell = exp(loghyper(1)); em2 = ell^(-2); 24 | sf2 = exp(2*loghyper(2)); 25 | x = x/ell; 26 | 27 | if nargin == 2 % compute covariance 28 | Q = x*x'; 29 | K = (em2+Q)./(sqrt(1+em2+diag(Q))*sqrt(1+em2+diag(Q)')); 30 | A = sf2*asin(K); 31 | elseif nargout == 2 % compute test set covariances 32 | z = z/ell; 33 | A = sf2*asin((em2+sum(z.*z,2))./(1+em2+sum(z.*z,2))); 34 | B = sf2*asin((em2+x*z')./sqrt((1+em2+sum(x.*x,2))*(1+em2+sum(z.*z,2)'))); 35 | else % compute derivative matrix 36 | % check for correct dimension of the previously calculated kernel matrix 37 | if any(size(Q)~=n) 38 | Q = x*x'; 39 | end 40 | % check for correct dimension of the previously calculated kernel matrix 41 | if any(size(K)~=n) 42 | K = (em2+Q)./(sqrt(1+em2+diag(Q))*sqrt(1+em2+diag(Q)')); 43 | end 44 | if z == 1 % first parameter 45 | v = (em2+sum(x.*x,2))./(1+em2+diag(Q)); 46 | A = -2*sf2*((em2+Q)./(sqrt(1+em2+diag(Q))*sqrt(1+em2+diag(Q)'))- ... 47 | K.*(repmat(v,1,n)+repmat(v',n,1))/2)./sqrt(1-K.^2); 48 | clear Q; 49 | else % second parameter 50 | A = 2*sf2*asin(K); 51 | clear K; 52 | end 53 | end 54 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/covNoise.m: -------------------------------------------------------------------------------- 1 | function [A, B] = covNoise(logtheta, x, z); 2 | 3 | % Independent covariance function, ie "white noise", with specified variance. 4 | % The covariance function is specified as: 5 | % 6 | % k(x^p,x^q) = s2 * \delta(p,q) 7 | % 8 | % where s2 is the noise variance and \delta(p,q) is a Kronecker delta function 9 | % which is 1 iff p=q and zero otherwise. The hyperparameter is 10 | % 11 | % logtheta = [ log(sqrt(s2)) ] 12 | % 13 | % For more help on design of covariance functions, try "help covFunctions". 14 | % 15 | % (C) Copyright 2006 by Carl Edward Rasmussen, 2006-03-24. 16 | 17 | if nargin == 0, A = '1'; return; end % report number of parameters 18 | 19 | s2 = exp(2*logtheta); % noise variance 20 | 21 | if nargin == 2 % compute covariance matrix 22 | A = s2*eye(size(x,1)); 23 | elseif nargout == 2 % compute test set covariances 24 | A = s2; 25 | B = 0; % zeros cross covariance by independence 26 | else % compute derivative matrix 27 | A = 2*s2*eye(size(x,1)); 28 | end 29 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/covPeriodic.m: -------------------------------------------------------------------------------- 1 | function [A, B] = covPeriodic(logtheta, x, z); 2 | 3 | % covariance function for a smooth periodic function, with unit period. The 4 | % covariance function is: 5 | % 6 | % k(x^p, x^q) = sf2 * exp(-2*sin^2(pi*(x_p-x_q))/ell^2) 7 | % 8 | % where the hyperparameters are: 9 | % 10 | % logtheta = [ log(ell) 11 | % log(sqrt(sf2)) ] 12 | % 13 | % For more help on design of covariance functions, try "help covFunctions". 14 | % 15 | % (C) Copyright 2006 by Carl Edward Rasmussen (2006-04-07) 16 | 17 | if nargin == 0, A = '2'; return; end 18 | 19 | [n D] = size(x); 20 | ell = exp(logtheta(1)); 21 | sf2 = exp(2*logtheta(2)); 22 | 23 | if nargin == 2 24 | A = sf2*exp(-2*(sin(pi*(repmat(x,1,n)-repmat(x',n,1)))/ell).^2); 25 | elseif nargout == 2 % compute test set covariances 26 | [nn D] = size(z); 27 | A = sf2*ones(nn,1); 28 | B = sf2*exp(-2*(sin(pi*(repmat(x,1,nn)-repmat(z',n,1)))/ell).^2); 29 | else % compute derivative matrices 30 | if z == 1 31 | r = (sin(pi*(repmat(x,1,n)-repmat(x',n,1)))/ell).^2; 32 | A = 4*sf2*exp(-2*r).*r; 33 | else 34 | A = 2*sf2*exp(-2*(sin(pi*(repmat(x,1,n)-repmat(x',n,1)))/ell).^2); 35 | end 36 | end 37 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/covProd.m: -------------------------------------------------------------------------------- 1 | function [A, B] = covProd(covfunc, logtheta, x, z); 2 | 3 | % covProd - compose a covariance function as the product of other covariance 4 | % functions. This function doesn't actually compute very much on its own, it 5 | % merely does some bookkeeping, and calls other covariance functions to do the 6 | % actual work. 7 | % 8 | % For more help on design of covariance functions, try "help covFunctions". 9 | % 10 | % (C) Copyright 2006 by Carl Edward Rasmussen, 2006-04-06. 11 | 12 | for i = 1:length(covfunc) % iterate over covariance functions 13 | f = covfunc(i); 14 | if iscell(f{:}), f = f{:}; end % dereference cell array if necessary 15 | j(i) = cellstr(feval(f{:})); 16 | end 17 | 18 | if nargin == 1, % report number of parameters 19 | A = char(j(1)); for i=2:length(covfunc), A = [A, '+', char(j(i))]; end 20 | return 21 | end 22 | 23 | [n, D] = size(x); 24 | 25 | v = []; % v vector indicates to which covariance parameters belong 26 | for i = 1:length(covfunc), v = [v repmat(i, 1, eval(char(j(i))))]; end 27 | 28 | switch nargin 29 | case 3 % compute covariance matrix 30 | A = ones(n, n); % allocate space for covariance matrix 31 | for i = 1:length(covfunc) % iteration over factor functions 32 | f = covfunc(i); 33 | if iscell(f{:}), f = f{:}; end % dereference cell array if necessary 34 | A = A .* feval(f{:}, logtheta(v==i), x); % multiply covariances 35 | end 36 | 37 | case 4 % compute derivative matrix or test set covariances 38 | if nargout == 2 % compute test set cavariances 39 | A = ones(size(z,1),1); B = ones(size(x,1),size(z,1)); % allocate space 40 | for i = 1:length(covfunc) 41 | f = covfunc(i); 42 | if iscell(f{:}), f = f{:}; end % dereference cell array if necessary 43 | [AA BB] = feval(f{:}, logtheta(v==i), x, z); % compute test covariances 44 | A = A .* AA; B = B .* BB; % and accumulate 45 | end 46 | else % compute derivative matrices 47 | A = ones(n, n); 48 | ii = v(z); % which covariance function 49 | j = sum(v(1:z)==ii); % which parameter in that covariance 50 | for i = 1:length(covfunc) 51 | f = covfunc(i); 52 | if iscell(f{:}), f = f{:}; end % dereference cell array if necessary 53 | if i == ii 54 | A = A .* feval(f{:}, logtheta(v==i), x, j); % multiply derivative 55 | else 56 | A = A .* feval(f{:}, logtheta(v==i), x); % multiply covariance 57 | end 58 | end 59 | end 60 | 61 | end 62 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/covRQard.m: -------------------------------------------------------------------------------- 1 | function [A, B] = covRQard(logtheta, x, z) 2 | 3 | % Rational Quadratic covariance function with Automatic Relevance Determination 4 | % (ARD) distance measure. The covariance function is parameterized as: 5 | % 6 | % k(x^p,x^q) = sf2 * [1 + (x^p - x^q)'*inv(P)*(x^p - x^q)/(2*alpha)]^(-alpha) 7 | % 8 | % where the P matrix is diagonal with ARD parameters ell_1^2,...,ell_D^2, where 9 | % D is the dimension of the input space, sf2 is the signal variance and alpha 10 | % is the shape parameter for the RQ covariance. The hyperparameters are: 11 | % 12 | % loghyper = [ log(ell_1) 13 | % log(ell_2) 14 | % . 15 | % log(ell_D) 16 | % log(sqrt(sf2)) 17 | % log(alpha) ] 18 | % 19 | % For more help on design of covariance functions, try "help covFunctions". 20 | % 21 | % (C) Copyright 2006 by Carl Edward Rasmussen (2006-09-08) 22 | 23 | if nargin == 0, A = '(D+2)'; return; end 24 | 25 | persistent K; 26 | [n D] = size(x); 27 | ell = exp(loghyper(1:D)); 28 | sf2 = exp(2*loghyper(D+1)); 29 | alpha = exp(loghyper(D+2)); 30 | 31 | if nargin == 2 32 | K = (1+0.5*sq_dist(diag(1./ell)*x')/alpha); 33 | A = sf2*(K.^(-alpha)); 34 | elseif nargout == 2 % compute test set covariances 35 | A = sf2*ones(size(z,1),1); 36 | B = sf2*((1+0.5*sq_dist(diag(1./ell)*x',diag(1./ell)*z')/alpha).^(-alpha)); 37 | else % compute derivative matrix 38 | % check for correct dimension of the previously calculated kernel matrix 39 | if any(size(K)~=n) 40 | K = (1+0.5*sq_dist(diag(1./ell)*x')/alpha); 41 | end 42 | if z <= D % length scale parameters 43 | A = sf2*K.^(-alpha-1).*sq_dist(x(:,z)'/ell(z)); 44 | elseif z == D+1 % magnitude parameter 45 | A = 2*sf2*(K.^(-alpha)); 46 | else 47 | A = sf2*K.^(-alpha).*(0.5*sq_dist(diag(1./ell)*x')./K - alpha*log(K)); 48 | clear K; 49 | end 50 | end 51 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/covRQiso.m: -------------------------------------------------------------------------------- 1 | function [A, B] = covRQiso(loghyper, x, z) 2 | 3 | % Rational Quadratic covariance function with isotropic distance measure. The 4 | % covariance function is parameterized as: 5 | % 6 | % k(x^p,x^q) = sf2 * [1 + (x^p - x^q)'*inv(P)*(x^p - x^q)/(2*alpha)]^(-alpha) 7 | % 8 | % where the P matrix is ell^2 times the unit matrix, sf2 is the signal 9 | % variance and alpha is the shape parameter for the RQ covariance. The 10 | % hyperparameters are: 11 | % 12 | % loghyper = [ log(ell) 13 | % log(sqrt(sf2)) 14 | % log(alpha) ] 15 | % 16 | % For more help on design of covariance functions, try "help covFunctions". 17 | % 18 | % (C) Copyright 2006 by Carl Edward Rasmussen (2006-09-08) 19 | 20 | if nargin == 0, A = '3'; return; end 21 | 22 | [n, D] = size(x); 23 | 24 | persistent K; 25 | ell = exp(loghyper(1)); 26 | sf2 = exp(2*loghyper(2)); 27 | alpha = exp(loghyper(3)); 28 | 29 | if nargin == 2 % compute covariance matrix 30 | K = (1+0.5*sq_dist(x'/ell)/alpha); 31 | A = sf2*(K.^(-alpha)); 32 | elseif nargout == 2 % compute test set covariances 33 | A = sf2*ones(size(z,1),1); 34 | B = sf2*((1+0.5*sq_dist(x'/ell,z'/ell)/alpha).^(-alpha)); 35 | else % compute derivative matrices 36 | % check for correct dimension of the previously calculated kernel matrix 37 | if any(size(K)~=n) 38 | K = (1+0.5*sq_dist(x'/ell)/alpha); 39 | end 40 | if z == 1 % length scale parameters 41 | A = sf2*K.^(-alpha-1).*sq_dist(x'/ell); 42 | elseif z == 2 % magnitude parameter 43 | A = 2*sf2*(K.^(-alpha)); 44 | else 45 | A = sf2*K.^(-alpha).*(0.5*sq_dist(x'/ell)./K - alpha*log(K)); 46 | clear K; 47 | end 48 | end 49 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/covSEard.m: -------------------------------------------------------------------------------- 1 | function [A, B] = covSEard(loghyper, x, z) 2 | 3 | % Squared Exponential covariance function with Automatic Relevance Detemination 4 | % (ARD) distance measure. The covariance function is parameterized as: 5 | % 6 | % k(x^p,x^q) = sf2 * exp(-(x^p - x^q)'*inv(P)*(x^p - x^q)/2) 7 | % 8 | % where the P matrix is diagonal with ARD parameters ell_1^2,...,ell_D^2, where 9 | % D is the dimension of the input space and sf2 is the signal variance. The 10 | % hyperparameters are: 11 | % 12 | % loghyper = [ log(ell_1) 13 | % log(ell_2) 14 | % . 15 | % log(ell_D) 16 | % log(sqrt(sf2)) ] 17 | % 18 | % For more help on design of covariance functions, try "help covFunctions". 19 | % 20 | % (C) Copyright 2006 by Carl Edward Rasmussen (2006-03-24) 21 | 22 | if nargin == 0, A = '(D+1)'; return; end % report number of parameters 23 | 24 | persistent K; 25 | 26 | [n D] = size(x); 27 | ell = exp(loghyper(1:D)); % characteristic length scale 28 | sf2 = exp(2*loghyper(D+1)); % signal variance 29 | 30 | if nargin == 2 31 | K = sf2*exp(-sq_dist(diag(1./ell)*x')/2); 32 | A = K; 33 | elseif nargout == 2 % compute test set covariances 34 | A = sf2*ones(size(z,1),1); 35 | B = sf2*exp(-sq_dist(diag(1./ell)*x',diag(1./ell)*z')/2); 36 | else % compute derivative matrix 37 | 38 | % check for correct dimension of the previously calculated kernel matrix 39 | if any(size(K)~=n) 40 | K = sf2*exp(-sq_dist(diag(1./ell)*x')/2); 41 | end 42 | 43 | if z <= D % length scale parameters 44 | A = K.*sq_dist(x(:,z)'/ell(z)); 45 | else % magnitude parameter 46 | A = 2*K; 47 | clear K; 48 | end 49 | end 50 | 51 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/covSEiso.m: -------------------------------------------------------------------------------- 1 | function [A, B] = covSEiso(loghyper, x, z); 2 | 3 | % Squared Exponential covariance function with isotropic distance measure. The 4 | % covariance function is parameterized as: 5 | % 6 | % k(x^p,x^q) = sf2 * exp(-(x^p - x^q)'*inv(P)*(x^p - x^q)/2) 7 | % 8 | % where the P matrix is ell^2 times the unit matrix and sf2 is the signal 9 | % variance. The hyperparameters are: 10 | % 11 | % loghyper = [ log(ell) 12 | % log(sqrt(sf2)) ] 13 | % 14 | % For more help on design of covariance functions, try "help covFunctions". 15 | % 16 | % (C) Copyright 2006 by Carl Edward Rasmussen (2007-06-25) 17 | 18 | if nargin == 0, A = '2'; return; end % report number of parameters 19 | 20 | [n D] = size(x); 21 | ell = exp(loghyper(1)); % characteristic length scale 22 | sf2 = exp(2*loghyper(2)); % signal variance 23 | 24 | if nargin == 2 25 | A = sf2*exp(-sq_dist(x'/ell)/2); 26 | elseif nargout == 2 % compute test set covariances 27 | A = sf2*ones(size(z,1),1); 28 | B = sf2*exp(-sq_dist(x'/ell,z'/ell)/2); 29 | else % compute derivative matrix 30 | if z == 1 % first parameter 31 | A = sf2*exp(-sq_dist(x'/ell)/2).*sq_dist(x'/ell); 32 | else % second parameter 33 | A = 2*sf2*exp(-sq_dist(x'/ell)/2); 34 | end 35 | end 36 | 37 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/covSum.m: -------------------------------------------------------------------------------- 1 | function [A, B] = covSum(covfunc, logtheta, x, z); 2 | 3 | % covSum - compose a covariance function as the sum of other covariance 4 | % functions. This function doesn't actually compute very much on its own, it 5 | % merely does some bookkeeping, and calls other covariance functions to do the 6 | % actual work. 7 | % 8 | % For more help on design of covariance functions, try "help covFunctions". 9 | % 10 | % (C) Copyright 2006 by Carl Edward Rasmussen, 2006-03-20. 11 | 12 | for i = 1:length(covfunc) % iterate over covariance functions 13 | f = covfunc(i); 14 | if iscell(f{:}), f = f{:}; end % dereference cell array if necessary 15 | j(i) = cellstr(feval(f{:})); 16 | end 17 | 18 | if nargin == 1, % report number of parameters 19 | A = char(j(1)); for i=2:length(covfunc), A = [A, '+', char(j(i))]; end 20 | return 21 | end 22 | 23 | [n, D] = size(x); 24 | 25 | v = []; % v vector indicates to which covariance parameters belong 26 | for i = 1:length(covfunc), v = [v repmat(i, 1, eval(char(j(i))))]; end 27 | 28 | switch nargin 29 | case 3 % compute covariance matrix 30 | A = zeros(n, n); % allocate space for covariance matrix 31 | for i = 1:length(covfunc) % iteration over summand functions 32 | f = covfunc(i); 33 | if iscell(f{:}), f = f{:}; end % dereference cell array if necessary 34 | A = A + feval(f{:}, logtheta(v==i), x); % accumulate covariances 35 | end 36 | 37 | case 4 % compute derivative matrix or test set covariances 38 | if nargout == 2 % compute test set cavariances 39 | A = zeros(size(z,1),1); B = zeros(size(x,1),size(z,1)); % allocate space 40 | for i = 1:length(covfunc) 41 | f = covfunc(i); 42 | if iscell(f{:}), f = f{:}; end % dereference cell array if necessary 43 | [AA BB] = feval(f{:}, logtheta(v==i), x, z); % compute test covariances 44 | A = A + AA; B = B + BB; % and accumulate 45 | end 46 | else % compute derivative matrices 47 | i = v(z); % which covariance function 48 | j = sum(v(1:z)==i); % which parameter in that covariance 49 | f = covfunc(i); 50 | if iscell(f{:}), f = f{:}; end % dereference cell array if necessary 51 | A = feval(f{:}, logtheta(v==i), x, j); % compute derivative 52 | end 53 | 54 | end 55 | 56 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/cumGauss.m: -------------------------------------------------------------------------------- 1 | function [out1, out2, out3, out4] = cumGauss(y, f, var) 2 | 3 | % cumGauss - Cumulative Gaussian likelihood function. The expression for the 4 | % likelihood is cumGauss(t) = normcdf(t) = (1+erf(t/sqrt(2)))/2. 5 | % 6 | % Three modes are provided, for computing likelihoods, derivatives and moments 7 | % respectively, see likelihoods.m for the details. In general, care is taken 8 | % to avoid numerical issues when the arguments are extreme. The 9 | % moments \int f^k cumGauss(y,f) N(f|mu,var) df are calculated analytically. 10 | % 11 | % Copyright (c) 2007 Carl Edward Rasmussen and Hannes Nickisch, 2007-03-29. 12 | 13 | if nargin>1, y=sign(y); end % allow only +/- 1 as values 14 | 15 | if nargin == 2 % (log) likelihood evaluation 16 | 17 | if numel(y)>0, yf = y.*f; else yf = f; end % product of latents and labels 18 | 19 | out1 = (1+erf(yf/sqrt(2)))/2; % likelihood 20 | if nargout>1 21 | out2 = zeros(size(f)); 22 | b = 0.158482605320942; % quadratic asymptotics approximated at -6 23 | c = -1.785873318175113; 24 | ok = yf>-6; % normal evaluation for larger values 25 | out2( ok) = log(out1(ok)); 26 | out2(~ok) = -yf(~ok).^2/2 + b*yf(~ok) + c; % log of sigmoid 27 | end 28 | 29 | elseif nargin == 3 30 | 31 | if strcmp(var,'deriv') % derivatives of the log 32 | 33 | if numel(y)==0, y=1; end 34 | yf = y.*f; % product of latents and labels 35 | [p,lp] = cumGauss(y,f); 36 | out1 = sum(lp); 37 | 38 | if nargout>1 % dlp, derivative of log likelihood 39 | 40 | n_p = zeros(size(f)); % safely compute Gaussian over cumulative Gaussian 41 | ok = yf>-5; % normal evaluation for large values of yf 42 | n_p(ok) = (exp(-yf(ok).^2/2)/sqrt(2*pi))./p(ok); 43 | 44 | bd = yf<-6; % tight upper bound evaluation 45 | n_p(bd) = sqrt(yf(bd).^2/4+1)-yf(bd)/2; 46 | 47 | interp = ~ok & ~bd; % linearly interpolate between both of them 48 | tmp = yf(interp); 49 | lam = -5-yf(interp); 50 | n_p(interp) = (1-lam).*(exp(-tmp.^2/2)/sqrt(2*pi))./p(interp) + ... 51 | lam .*(sqrt(tmp.^2/4+1)-tmp/2); 52 | 53 | out2 = y.*n_p; % dlp, derivative of log likelihood 54 | if nargout>2 % d2lp, 2nd derivative of log likelihood 55 | out3 = -n_p.^2 - yf.*n_p; 56 | if nargout>3 % d3lp, 3rd derivative of log likelihood 57 | out4 = 2*y.*n_p.^3 +3*f.*n_p.^2 +y.*(f.^2-1).*n_p; 58 | end 59 | end 60 | end 61 | 62 | else % compute moments 63 | 64 | mu = f; % 2nd argument is the mean of a Gaussian 65 | z = mu./sqrt(1+var); 66 | if numel(y)>0, z=z.*y; end 67 | out1 = cumGauss([],z); % zeroth raw moment 68 | 69 | [dummy,n_p] = cumGauss([],z,'deriv'); % Gaussian over cumulative Gaussian 70 | 71 | if nargout>1 72 | if numel(y)==0, y=1; end 73 | out2 = mu + y.*var.*n_p./sqrt(1+var); % 1st raw moment 74 | if nargout>2 75 | out3 = 2*mu.*out2 -mu.^2 +var -z.*var.^2.*n_p./(1+var); % 2nd raw moment 76 | out3 = out3.*out1; 77 | end 78 | out2 = out2.*out1; 79 | end 80 | 81 | end 82 | 83 | else 84 | error('No valid input provided.') 85 | end 86 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/gauher.m: -------------------------------------------------------------------------------- 1 | % compute abscissas and weight factors for Gaussian-Hermite quadrature 2 | % 3 | % CALL: [x,w]=gauher(N) 4 | % 5 | % x = base points (abscissas) 6 | % w = weight factors 7 | % N = number of base points (abscissas) (integrates a (2N-1)th order 8 | % polynomial exactly) 9 | % 10 | % p(x)=exp(-x^2/2)/sqrt(2*pi), a =-Inf, b = Inf 11 | % 12 | % The Gaussian Quadrature integrates a (2n-1)th order 13 | % polynomial exactly and the integral is of the form 14 | % b N 15 | % Int ( p(x)* F(x) ) dx = Sum ( w_j* F( x_j ) ) 16 | % a j=1 17 | % 18 | % this procedure uses the coefficients a(j), b(j) of the 19 | % recurrence relation 20 | % 21 | % b p (x) = (x - a ) p (x) - b p (x) 22 | % j j j j-1 j-1 j-2 23 | % 24 | % for the various classical (normalized) orthogonal polynomials, 25 | % and the zero-th moment 26 | % 27 | % 1 = integral w(x) dx 28 | % 29 | % of the given polynomial's weight function w(x). Since the 30 | % polynomials are orthonormalized, the tridiagonal matrix is 31 | % guaranteed to be symmetric. 32 | 33 | function [x,w]=gauher(N) 34 | if N==20 % return precalculated values 35 | x=[ -7.619048541679757;-6.510590157013656;-5.578738805893203; 36 | -4.734581334046057;-3.943967350657318;-3.18901481655339 ; 37 | -2.458663611172367;-1.745247320814127;-1.042945348802751; 38 | -0.346964157081356; 0.346964157081356; 1.042945348802751; 39 | 1.745247320814127; 2.458663611172367; 3.18901481655339 ; 40 | 3.943967350657316; 4.734581334046057; 5.578738805893202; 41 | 6.510590157013653; 7.619048541679757]; 42 | w=[ 0.000000000000126; 0.000000000248206; 0.000000061274903; 43 | 0.00000440212109 ; 0.000128826279962; 0.00183010313108 ; 44 | 0.013997837447101; 0.061506372063977; 0.161739333984 ; 45 | 0.260793063449555; 0.260793063449555; 0.161739333984 ; 46 | 0.061506372063977; 0.013997837447101; 0.00183010313108 ; 47 | 0.000128826279962; 0.00000440212109 ; 0.000000061274903; 48 | 0.000000000248206; 0.000000000000126 ]; 49 | else 50 | b = sqrt( (1:N-1)/2 )'; 51 | [V,D] = eig( diag(b,1) + diag(b,-1) ); 52 | w = V(1,:)'.^2; 53 | x = sqrt(2)*diag(D); 54 | end -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/gpr.m: -------------------------------------------------------------------------------- 1 | function [out1, out2] = gpr(logtheta, covfunc, x, y, xstar); 2 | 3 | % gpr - Gaussian process regression, with a named covariance function. Two 4 | % modes are possible: training and prediction: if no test data are given, the 5 | % function returns minus the log likelihood and its partial derivatives with 6 | % respect to the hyperparameters; this mode is used to fit the hyperparameters. 7 | % If test data are given, then (marginal) Gaussian predictions are computed, 8 | % whose mean and variance are returned. Note that in cases where the covariance 9 | % function has noise contributions, the variance returned in S2 is for noisy 10 | % test targets; if you want the variance of the noise-free latent function, you 11 | % must substract the noise variance. 12 | % 13 | % usage: [nlml dnlml] = gpr(logtheta, covfunc, x, y) 14 | % or: [mu S2] = gpr(logtheta, covfunc, x, y, xstar) 15 | % 16 | % where: 17 | % 18 | % logtheta is a (column) vector of log hyperparameters 19 | % covfunc is the covariance function 20 | % x is a n by D matrix of training inputs 21 | % y is a (column) vector (of size n) of targets 22 | % xstar is a nn by D matrix of test inputs 23 | % nlml is the returned value of the negative log marginal likelihood 24 | % dnlml is a (column) vector of partial derivatives of the negative 25 | % log marginal likelihood wrt each log hyperparameter 26 | % mu is a (column) vector (of size nn) of prediced means 27 | % S2 is a (column) vector (of size nn) of predicted variances 28 | % 29 | % For more help on covariance functions, see "help covFunctions". 30 | % 31 | % (C) copyright 2006 by Carl Edward Rasmussen (2006-03-20). 32 | 33 | if ischar(covfunc), covfunc = cellstr(covfunc); end % convert to cell if needed 34 | [n, D] = size(x); 35 | if eval(feval(covfunc{:})) ~= size(logtheta, 1) 36 | error('Error: Number of parameters do not agree with covariance function') 37 | end 38 | 39 | K = feval(covfunc{:}, logtheta, x); % compute training set covariance matrix 40 | 41 | L = chol(K)'; % cholesky factorization of the covariance 42 | alpha = solve_chol(L',y); 43 | 44 | if nargin == 4 % if no test cases, compute the negative log marginal likelihood 45 | 46 | out1 = 0.5*y'*alpha + sum(log(diag(L))) + 0.5*n*log(2*pi); 47 | 48 | if nargout == 2 % ... and if requested, its partial derivatives 49 | out2 = zeros(size(logtheta)); % set the size of the derivative vector 50 | W = L'\(L\eye(n))-alpha*alpha'; % precompute for convenience 51 | for i = 1:length(out2) 52 | out2(i) = sum(sum(W.*feval(covfunc{:}, logtheta, x, i)))/2; 53 | end 54 | end 55 | 56 | else % ... otherwise compute (marginal) test predictions ... 57 | 58 | [Kss, Kstar] = feval(covfunc{:}, logtheta, x, xstar); % test covariances 59 | 60 | out1 = Kstar' * alpha; % predicted means 61 | 62 | if nargout == 2 63 | v = L\Kstar; 64 | out2 = Kss - sum(v.*v)'; 65 | end 66 | 67 | end 68 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/gpr2.m: -------------------------------------------------------------------------------- 1 | function alpha = gpr2(logtheta, covfunc, x, y); 2 | 3 | % gpr - Gaussian process regression, with a named covariance function. Two 4 | % modes are possible: training and prediction: if no test data are given, the 5 | % function returns minus the log likelihood and its partial derivatives with 6 | % respect to the hyperparameters; this mode is used to fit the hyperparameters. 7 | % If test data are given, then (marginal) Gaussian predictions are computed, 8 | % whose mean and variance are returned. Note that in cases where the covariance 9 | % function has noise contributions, the variance returned in S2 is for noisy 10 | % test targets; if you want the variance of the noise-free latent function, you 11 | % must substract the noise variance. 12 | % 13 | % usage: [nlml dnlml] = gpr(logtheta, covfunc, x, y) 14 | % or: [mu S2] = gpr(logtheta, covfunc, x, y, xstar) 15 | % 16 | % where: 17 | % 18 | % logtheta is a (column) vector of log hyperparameters 19 | % covfunc is the covariance function 20 | % x is a n by D matrix of training inputs 21 | % y is a (column) vector (of size n) of targets 22 | % xstar is a nn by D matrix of test inputs 23 | % nlml is the returned value of the negative log marginal likelihood 24 | % dnlml is a (column) vector of partial derivatives of the negative 25 | % log marginal likelihood wrt each log hyperparameter 26 | % mu is a (column) vector (of size nn) of prediced means 27 | % S2 is a (column) vector (of size nn) of predicted variances 28 | % 29 | % For more help on covariance functions, see "help covFunctions". 30 | % 31 | % (C) copyright 2006 by Carl Edward Rasmussen (2006-03-20). 32 | 33 | if ischar(covfunc), covfunc = cellstr(covfunc); end % convert to cell if needed 34 | [n, D] = size(x); 35 | if eval(feval(covfunc{:})) ~= size(logtheta, 1) 36 | error('Error: Number of parameters do not agree with covariance function') 37 | end 38 | 39 | K = feval(covfunc{:}, logtheta, x); % compute training set covariance matrix 40 | 41 | L = chol(K)'; % cholesky factorization of the covariance 42 | alpha = solve_chol(L',y); 43 | 44 | if nargin == 4 % if no test cases, compute the negative log marginal likelihood 45 | 46 | out1 = 0.5*y'*alpha + sum(log(diag(L))) + 0.5*n*log(2*pi); 47 | 48 | if nargout == 2 % ... and if requested, its partial derivatives 49 | out2 = zeros(size(logtheta)); % set the size of the derivative vector 50 | W = L'\(L\eye(n))-alpha*alpha'; % precompute for convenience 51 | for i = 1:length(out2) 52 | out2(i) = sum(sum(W.*feval(covfunc{:}, logtheta, x, i)))/2; 53 | end 54 | end 55 | 56 | else % ... otherwise compute (marginal) test predictions ... 57 | 58 | 59 | end 60 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/gprSRPP.m: -------------------------------------------------------------------------------- 1 | function [mu, S2SR, S2PP] = gprSRPP(logtheta, covfunc, x, INDEX, y, xstar); 2 | 3 | % gprSRPP - Carries out approximate Gaussian process regression prediction 4 | % using the subset of regressors (SR) or projected process approximation (PP) 5 | % and the active set specified by INDEX. 6 | % 7 | % Usage 8 | % 9 | % [mu, S2SR, S2PP] = gprSRPP(logtheta, covfunc, x, INDEX, y, xstar) 10 | % 11 | % where 12 | % 13 | % logtheta is a (column) vector of log hyperparameters 14 | % covfunc is the covariance function, which is assumed to 15 | % be a covSum, and the last entry of the sum is covNoise 16 | % x is a n by D matrix of training inputs 17 | % INDEX is a vector of length m <= n used to specify which 18 | % inputs are used in the active set 19 | % y is a (column) vector (of size n) of targets 20 | % xstar is a nstar by D matrix of test inputs 21 | % mu is a (column) vector (of size nstar) of prediced means 22 | % S2SR is a (column) vector (of size nstar) of predicted variances under SR 23 | % S2PP is a (column) vector (of size nsstar) of predicted variances under PP 24 | % 25 | % where D is the dimension of the input. 26 | % 27 | % For more help on covariance functions, see "help covFunctions". 28 | % 29 | % (C) copyright 2005, 2006 by Chris Williams (2006-03-29). 30 | 31 | if ischar(covfunc), covfunc = cellstr(covfunc); end % convert to cell if needed 32 | [n, D] = size(x); 33 | if eval(feval(covfunc{:})) ~= size(logtheta, 1) 34 | error('Error: Number of parameters do not agree with covariance function') 35 | end 36 | 37 | % we check that the covfunc cell array is a covSum, with last entry 'covNoise' 38 | if length(covfunc) ~= 2 | ~strcmp(covfunc(1), 'covSum') | ... 39 | ~strcmp(covfunc{2}(end), 'covNoise') 40 | error('The covfunc must be "covSum" whose last summand must be "covNoise"') 41 | end 42 | 43 | sigma2n = exp(2*logtheta(end)); % noise variance 44 | [nstar, D] = size(xstar); % number of test cases and dimension of input space 45 | m = length(INDEX); % size of subset 46 | 47 | % note, that in the following Kmm is computed by extracting the relevant part 48 | % of Knm, thus it will be the "noise-free" covariance (although the covfunc 49 | % specification does include noise). 50 | 51 | [v, Knm] = feval(covfunc{:}, logtheta, x, x(INDEX,:)); 52 | Kmm = Knm(INDEX,:); % Kmm is a noise-free covariance matrix 53 | jitter = 1e-9*trace(Kmm); 54 | Kmm = Kmm + jitter*eye(m); % as suggested in code of jqc 55 | 56 | % a is cov between active set and test points and vstar is variances at test 57 | % points, incl noise variance 58 | 59 | [vstar, a] = feval(covfunc{:}, logtheta, x(INDEX,:), xstar); 60 | 61 | mu = a'*((sigma2n*Kmm + Knm'*Knm)\(Knm'*y)); % pred mean eq. (8.14) and (8.26) 62 | 63 | e = (sigma2n*Kmm + Knm'*Knm) \ a; 64 | 65 | S2SR = sigma2n*sum(a.*e,1)'; % noise-free SR variance, eq. 8.15 66 | S2PP = vstar-sum(a.*(Kmm\a),1)'+S2SR; % PP variance eq. (8.27) including noise 67 | S2SR = S2SR + sigma2n; % SR variance inclusing noise 68 | 69 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/likelihoods.m: -------------------------------------------------------------------------------- 1 | % likelihood: likelihood functions are provided to be used by the binaryGP 2 | % function, for binary Gaussian process classification. Two likelihood 3 | % functions are provided: 4 | % 5 | % logistic 6 | % cumGauss 7 | % 8 | % The likelihood functions have three possible modes, the mode being selected 9 | % as follows (where "lik" stands for any likelihood function): 10 | % 11 | % (log) likelihood evaluation: [p, lp] = lik(y, f) 12 | % 13 | % where y are the targets, f the latent function values, p the probabilities 14 | % and lp the log probabilities. All vectors are the same size. 15 | % 16 | % derivatives (of the log): [lp, dlp, d2lp, d3lp] = lik(y, f, 'deriv') 17 | % 18 | % where lp is a number (sum of the log probablities for each case) and the 19 | % derivatives (up to order 3) of the logs wrt the latent values are vectors 20 | % (as the likelihood factorizes there are no mixed terms). 21 | % 22 | % moments wrt Gaussian measure: [m0, m1, m2] = lik(y, mu, var) 23 | % 24 | % where mk is the k'th moment: \int f^k lik(y,f) N(f|mu,var) df, and if y is 25 | % empty, it is assumed to be a vector of ones. 26 | % 27 | % See the help for the individual likelihood for the computations specific to 28 | % each likelihood function. 29 | % 30 | % Copyright (c) 2007 Carl Edward Rasmussen and Hannes Nickisch 2007-04-11. 31 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/logistic.m: -------------------------------------------------------------------------------- 1 | function [out1, out2, out3, out4] = logistic(y, f, var) 2 | 3 | % logistic - logistic likelihood function. The expression for the likelihood is 4 | % logistic(t) = 1./(1+exp(-t)). 5 | % 6 | % Three modes are provided, for computing likelihoods, derivatives and moments 7 | % respectively, see likelihoods.m for the details. In general, care is taken 8 | % to avoid numerical issues when the arguments are extreme. The moments 9 | % \int f^k cumGauss(y,f) N(f|mu,var) df are calculated using an approximation 10 | % to the cumulative Gaussian based on a mixture of 5 cumulative Gaussian 11 | % functions (or alternatively using Gauss-Hermite quadrature, which may be less 12 | % accurate). 13 | % 14 | % Copyright (c) 2007 Carl Edward Rasmussen and Hannes Nickisch, 2007-07-25. 15 | 16 | if nargin>1, y=sign(y); end % allow only +/- 1 as values 17 | 18 | if nargin == 2 % (log) likelihood evaluation 19 | 20 | if numel(y)>0, yf = y.*f; else yf = f; end % product of latents and labels 21 | 22 | out1 = 1./(1+exp(-yf)); % likelihood 23 | if nargout>1 24 | out2 = yf; 25 | ok = -351 % dlp - first derivatives 40 | s = min(0,f); 41 | p = exp(s)./(exp(s)+exp(s-f)); % p = 1./(1+exp(-f)) 42 | out2 = (y+1)/2-p; % dlp, derivative of log likelihood 43 | if nargout>2 % d2lp, 2nd derivative of log likelihood 44 | out3 = -exp(2*s-f)./(exp(s)+exp(s-f)).^2; 45 | if nargout>3 % d3lp, 3rd derivative of log likelihood 46 | out4 = 2*out3.*(0.5-p); 47 | end 48 | end 49 | end 50 | 51 | else % compute moments 52 | 53 | mu = f; % 2nd argument is the mean of a Gaussian 54 | if numel(y)==0, y=ones(size(mu)); end % if empty, assume y=1 55 | 56 | % Two methods of integration are possible; the latter is more accurate 57 | % [out1,out2,out3] = gauherint(y, mu, var); 58 | [out1,out2,out3] = erfint(y, mu, var); 59 | 60 | end 61 | 62 | else 63 | error('No valid input provided.') 64 | end 65 | 66 | 67 | % The gauherint function approximates "\int t^k logistic(y t) N(t|mu,var)dt" by 68 | % means of Gaussian Hermite Quadrature. A call to gauher.m is made. 69 | 70 | function [m0,m1,m2] = gauherint(y, mu, var) 71 | 72 | N = 20; [f,w] = gauher(N); % 20 yields precalculated weights 73 | sz = size(mu); 74 | 75 | f0 = sqrt(var(:))*f'+repmat(mu(:),[1,N]); % center values of f 76 | sig = logistic( repmat(y(:),[1,N]), f0 ); % calculate the likelihood values 77 | 78 | m0 = reshape(sig*w, sz); % zeroth moment 79 | if nargout>1 % first moment 80 | m1 = reshape(f0.*sig*w, sz); 81 | if nargout>2, m2 = reshape(f0.*f0.*sig*w, sz); end % second moment 82 | end 83 | 84 | 85 | % The erfint function approximates "\int t^k logistic(y t) N(t|mu,s2) dt" by 86 | % setting: 87 | % logistic(t) \approx 1/2 + \sum_{i=1}^5 (c_i/2) erf(lambda_i t) 88 | % The integrals \int t^k erf(t) N(t|mu,s2) dt can be done analytically. 89 | % 90 | % The inputs y, mu and var have to be column vectors of equal lengths. 91 | 92 | function [m0,m1,m2] = erfint(y, mu, s2) 93 | 94 | l = [0.44 0.41 0.40 0.39 0.36]; % approximation coefficients lambda_i 95 | 96 | c = [1.146480988574439e+02; -1.508871030070582e+03; 2.676085036831241e+03; 97 | -1.356294962039222e+03; 7.543285642111850e+01 ]; 98 | 99 | S2 = 2*s2.*(y.^2)*(l.^2) + 1; % zeroth moment 100 | S = sqrt( S2 ); 101 | Z = mu.*y*l./S; 102 | M0 = erf(Z); 103 | m0 = ( 1 + M0*c )/2; 104 | 105 | if nargout>1 % first moment 106 | NormZ = exp(-Z.^2)/sqrt(2*pi); 107 | M0mu = M0.*repmat(mu,[1,5]); 108 | M1 = (2*sqrt(2)*y.*s2)*l.*NormZ./S + M0mu; 109 | m1 = ( mu + M1*c )/2; 110 | 111 | if nargout>2 % second moment 112 | M2 = repmat(2*mu,[1,5]).*(1+s2.*y.^2*(l.^2)).*(M1-M0mu)./S2 ... 113 | + repmat(s2+mu.^2,[1,5]).*M0; 114 | m2 = ( mu.^2 + s2 + M2*c )/2; 115 | end 116 | end 117 | 118 | 119 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/solve_chol.c: -------------------------------------------------------------------------------- 1 | /* solve_chol - solve a linear system A*X = B using the cholesky factorization 2 | of A (where A is square, symmetric and positive definite. 3 | 4 | Copyright (c) 2004 Carl Edward Rasmussen. 2004-10-19. */ 5 | 6 | #include "mex.h" 7 | #include 8 | 9 | extern int dpotrs_(char *, int *, int *, double *, int *, double *, int *, int *); 10 | 11 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) 12 | { 13 | double *C; 14 | int n, m, q; 15 | 16 | if (nrhs != 2 || nlhs > 1) /* check the input */ 17 | mexErrMsgTxt("Usage: X = solve_chol(R, B)"); 18 | n = mxGetN(prhs[0]); 19 | if (n != mxGetM(prhs[0])) 20 | mexErrMsgTxt("Error: First argument matrix must be square"); 21 | if (n != mxGetM(prhs[1])) 22 | mexErrMsgTxt("Error: First and second argument matrices must have same number of rows"); 23 | m = mxGetN(prhs[1]); 24 | 25 | plhs[0] = mxCreateDoubleMatrix(n, m, mxREAL); /* allocate space for output */ 26 | C = mxGetPr(plhs[0]); 27 | 28 | if (n==0) return; /* if argument was empty matrix, do no more */ 29 | memcpy(C,mxGetPr(prhs[1]),n*m*sizeof(double)); /* copy argument matrix */ 30 | dpotrs_("U", &n, &m, mxGetPr(prhs[0]), &n, C, &n, &q); /* solve system */ 31 | if (q > 0) 32 | mexErrMsgTxt("Error: illegal input to solve_chol"); 33 | } 34 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/solve_chol.m: -------------------------------------------------------------------------------- 1 | % solve_chol - solve linear equations from the Cholesky factorization. 2 | % Solve A*X = B for X, where A is square, symmetric, positive definite. The 3 | % input to the function is R the Cholesky decomposition of A and the matrix B. 4 | % Example: X = solve_chol(chol(A),B); 5 | % 6 | % NOTE: The program code is written in the C language for efficiency and is 7 | % contained in the file solve_chol.c, and should be compiled using matlabs mex 8 | % facility. However, this file also contains a (less efficient) matlab 9 | % implementation, supplied only as a help to people unfamiliar with mex. If 10 | % the C code has been properly compiled and is avaiable, it automatically 11 | % takes precendence over the matlab code in this file. 12 | % 13 | % Copyright (c) 2004, 2005, 2006 by Carl Edward Rasmussen. 2006-02-08. 14 | 15 | function x = solve_chol(A, B); 16 | 17 | if nargin ~= 2 | nargout > 1 18 | error('Wrong number of arguments.'); 19 | end 20 | 21 | if size(A,1) ~= size(A,2) | size(A,1) ~= size(B,1) 22 | error('Wrong sizes of matrix arguments.'); 23 | end 24 | 25 | x = A\(A'\B); 26 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/sq_dist.c: -------------------------------------------------------------------------------- 1 | /* sq_dist - a mex function to compute a matrix of all pairwise squared 2 | distances between two sets of vectors, stored in the columns of the two 3 | matrices that are arguments to the function. The length of the vectors must 4 | agree. If only a single argument is given, the missing argument is taken to 5 | be identical to the first. If an optional third matrix argument Q is given, 6 | it must be of the same size as the output, but in this case a vector of the 7 | traces of the product of Q and the coordinatewise squared distances is 8 | returned. 9 | 10 | Copyright (c) 2003, 2004 Carl Edward Rasmussen. 2003-04-22. */ 11 | 12 | #include "mex.h" 13 | #include 14 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) 15 | { 16 | double *a, *b, *C, *Q, z, t; 17 | int D, n, m, i, j, k; 18 | if (nrhs < 1 || nrhs > 3 || nlhs > 1) 19 | mexErrMsgTxt("Usage: C = sq_dist(a,b)\n or: C = sq_dist(a)\n or: c = sq_dist(a,b,Q)\nwhere the b matrix may be empty."); 20 | a = mxGetPr(prhs[0]); 21 | m = mxGetN(prhs[0]); 22 | D = mxGetM(prhs[0]); 23 | if (nrhs == 1 || mxIsEmpty(prhs[1])) { 24 | b = a; 25 | n = m; 26 | } else { 27 | b = mxGetPr(prhs[1]); 28 | n = mxGetN(prhs[1]); 29 | if (D != mxGetM(prhs[1])) 30 | mexErrMsgTxt("Error: column lengths must agree"); 31 | } 32 | if (nrhs < 3) { 33 | plhs[0] = mxCreateDoubleMatrix(m, n, mxREAL); 34 | C = mxGetPr(plhs[0]); 35 | for (i=0; i 10 | $ 11 | 12 | _ 13 | l.1 /* sq_ 14 | dist - a mex function to compute a matrix of all pairwise squared 15 | ? 16 | ! Missing $ inserted. 17 | 18 | $ 19 | 20 | \par 21 | l.9 22 | 23 | ? 24 | 25 | Overfull \hbox (1652.47543pt too wide) in paragraph at lines 1--9 26 | []\tenrm /* sq$[]\teni ist \tensy ^^@ \teni amexfunctiontocomputeamatrixofallpa 27 | irwisesquareddistancesbetweentwosetsofvectors; storedinthecolumnsofthetwomatric 28 | esthatareargumentstothefunction:Thelengthofthevectorsmustagree:Ifonlyasinglearg 29 | umentisgiven; themissingargumentistakentobeidenticaltothefirst:Ifanoptionalthir 30 | dmatrixargumentQisgiven; itmustbeofthesamesizeastheoutput; butinthiscaseavector 31 | ofthetracesoftheproductofQandthecoordinatewisesquareddistancesisreturned:$ | 32 | 33 | \hbox(7.5+2.5)x469.75499, glue set - 1.0 34 | .\hbox(0.0+0.0)x20.0 35 | .\tenrm / 36 | .\tenrm * 37 | .\glue 3.33333 plus 1.66666 minus 1.11111 38 | .\tenrm s 39 | .etc. 40 | 41 | ! You can't use `macro parameter character #' in vertical mode. 42 | l.12 # 43 | include "mex.h" 44 | ? q 45 | OK, entering \batchmode... 46 | ! You can't use `macro parameter character #' in horizontal mode. 47 | l.13 # 48 | include 49 | Sorry, but I'm not programmed to handle this case; 50 | I'll just pretend that you didn't ask for it. 51 | If you're in the wrong mode, you might be able to 52 | return to the right one by typing `I}' or `I$' or `I\par'. 53 | 54 | ! Missing $ inserted. 55 | 56 | $ 57 | 58 | _ 59 | l.19 mexErrMsgTxt("Usage: C = sq_ 60 | dist(a,b)\n or: C = sq_dist(a)\n ... 61 | I've inserted a begin-math/end-math symbol since I think 62 | you left one out. Proceed, with fingers crossed. 63 | 64 | ! Undefined control sequence. 65 | l.19 mexErrMsgTxt("Usage: C = sq_dist(a,b)\n 66 | or: C = sq_dist(a)\n ... 67 | The control sequence at the end of the top line 68 | of your error message was never \def'ed. If you have 69 | misspelled it (e.g., `\hobx'), type `I' and the correct 70 | spelling (e.g., `I\hbox'). Otherwise just continue, 71 | and I'll forget about whatever was undefined. 72 | 73 | ! Undefined control sequence. 74 | l.19 ... sq_dist(a,b)\n or: C = sq_dist(a)\n 75 | or: c = sq_dist(a,b... 76 | The control sequence at the end of the top line 77 | of your error message was never \def'ed. If you have 78 | misspelled it (e.g., `\hobx'), type `I' and the correct 79 | spelling (e.g., `I\hbox'). Otherwise just continue, 80 | and I'll forget about whatever was undefined. 81 | 82 | ! Undefined control sequence. 83 | l.19 ...t(a)\n or: c = sq_dist(a,b,Q)\nwhere 84 | the b matrix may be empty... 85 | The control sequence at the end of the top line 86 | of your error message was never \def'ed. If you have 87 | misspelled it (e.g., `\hobx'), type `I' and the correct 88 | spelling (e.g., `I\hbox'). Otherwise just continue, 89 | and I'll forget about whatever was undefined. 90 | 91 | ! Extra }, or forgotten $. 92 | l.54 } 93 | 94 | I've deleted a group-closing symbol because it seems to be 95 | spurious, as in `$x}$'. But perhaps the } is legitimate and 96 | you forgot something else, as in `\hbox{$x}'. In such cases 97 | the way to recover is to insert both the forgotten and the 98 | deleted material, e.g., by typing `I$}'. 99 | 100 | ) 101 | ! Emergency stop. 102 | <*> sq_dist.c 103 | 104 | *** (job aborted, no legal \end found) 105 | 106 | No pages of output. 107 | -------------------------------------------------------------------------------- /KCI-test/gpml-matlab/gpml/sq_dist.m: -------------------------------------------------------------------------------- 1 | % sq_dist - a function to compute a matrix of all pairwise squared distances 2 | % between two sets of vectors, stored in the columns of the two matrices, a 3 | % (of size D by n) and b (of size D by m). If only a single argument is given 4 | % or the second matrix is empty, the missing matrix is taken to be identical 5 | % to the first. 6 | % 7 | % Special functionality: If an optional third matrix argument Q is given, it 8 | % must be of size n by m, and in this case a vector of the traces of the 9 | % product of Q' and the coordinatewise squared distances is returned. 10 | % 11 | % NOTE: The program code is written in the C language for efficiency and is 12 | % contained in the file sq_dist.c, and should be compiled using matlabs mex 13 | % facility. However, this file also contains a (less efficient) matlab 14 | % implementation, supplied only as a help to people unfamiliar with mex. If 15 | % the C code has been properly compiled and is avaiable, it automatically 16 | % takes precendence over the matlab code in this file. 17 | % 18 | % Usage: C = sq_dist(a, b) 19 | % or: C = sq_dist(a) or equiv.: C = sq_dist(a, []) 20 | % or: c = sq_dist(a, b, Q) 21 | % where the b matrix may be empty. 22 | % 23 | % where a is of size D by n, b is of size D by m (or empty), C and Q are of 24 | % size n by m and c is of size D by 1. 25 | % 26 | % Copyright (c) 2003, 2004, 2005 and 2006 Carl Edward Rasmussen. 2006-03-09. 27 | 28 | function C = sq_dist(a, b, Q); 29 | 30 | if nargin < 1 | nargin > 3 | nargout > 1 31 | error('Wrong number of arguments.'); 32 | end 33 | 34 | if nargin == 1 | isempty(b) % input arguments are taken to be 35 | b = a; % identical if b is missing or empty 36 | end 37 | 38 | [D, n] = size(a); 39 | [d, m] = size(b); 40 | if d ~= D 41 | error('Error: column lengths must agree.'); 42 | end 43 | 44 | if nargin < 3 45 | C = zeros(n,m); 46 | for d = 1:D 47 | C = C + (repmat(b(d,:), n, 1) - repmat(a(d,:)', 1, m)).^2; 48 | end 49 | % C = repmat(sum(a.*a)',1,m)+repmat(sum(b.*b),n,1)-2*a'*b could be used to 50 | % replace the 3 lines above; it would be faster, but numerically less stable. 51 | else 52 | if [n m] == size(Q) 53 | C = zeros(D,1); 54 | for d = 1:D 55 | C(d) = sum(sum((repmat(b(d,:), n, 1) - repmat(a(d,:)', 1, m)).^2.*Q)); 56 | end 57 | else 58 | error('Third argument has wrong size.'); 59 | end 60 | end 61 | -------------------------------------------------------------------------------- /KCI-test/indtest_corr.m: -------------------------------------------------------------------------------- 1 | function [pval, stat] = indtest_corr(X, Y, Z, pars) 2 | % function [pval, stat] = indtest_corr(X, Y, Z, pars) 3 | % 4 | % Uses: statistics toolbox matlab 5 | % 6 | % Performs either a correlation test or a partial correlation test 7 | % 8 | % INPUT: 9 | % X Nxd1 matrix of samples (N data points, d1 dimensions) 10 | % Y Nxd2 matrix of samples (N data points, d2 dimensions) 11 | % Z Nxd3 matrix of samples (N data points, d3 dimensions) 12 | % pars structure containing parameters for the independence test 13 | % .bonferroni if true, bonferroni correction is performed (standard: false) 14 | % 15 | % OUTPUT: 16 | % pval p value of the test 17 | % stat test statistic 18 | % 19 | % 20 | % Copyright (c) 2011-2011 Kun Zhang 21 | % 2011-2011 Jonas Peters 22 | % All rights reserved. See the file COPYING for license terms. 23 | 24 | 25 | if ~isfield(pars,'bonferroni') 26 | pars.bonferroni = false; 27 | end; 28 | 29 | if isempty(Z) 30 | [sta,pp] = corr(X,Y); 31 | stat=max(sta); 32 | pval = min(min(pp)); 33 | if pars.bonferroni 34 | pval=size(X,2)*size(Y,2)*pval; 35 | end 36 | else 37 | [sta, pp]=partialcorr(X,Y,Z); 38 | pval = min(min(pp)); 39 | stat=max(sta); 40 | if pars.bonferroni 41 | pval=size(X,2)*size(Y,2)*pval; 42 | end 43 | end 44 | 45 | return 46 | -------------------------------------------------------------------------------- /KCI-test/indtest_hsic.m: -------------------------------------------------------------------------------- 1 | function [pval, stat] = indtest_hsic(X, Y, Z, pars) 2 | % function [pval, stat] = indtest_hsic(X, Y, Z, pars) 3 | % 4 | % This function is a WRAPPER 5 | % Performs either an HSIC test (Gretton et al.) or a conditional HSIC test (Fukumizu et. al) 6 | % 7 | % INPUT: 8 | % X Nxd1 matrix of samples (N data points, d1 dimensions) 9 | % Y Nxd2 matrix of samples (N data points, d2 dimensions) 10 | % Z Nxd3 matrix of samples (N data points, d3 dimensions) 11 | % pars structure containing parameters for the independence test 12 | % .pairwise if true, the test is performed pairwise if d1>1 (standard: false) 13 | % .bonferroni if true, bonferroni correction is performed (standard: false) 14 | % .perm # of bootstrap samples for cond. hsic test (standard: 500) 15 | % 16 | % OUTPUT: 17 | % pval p value of the test 18 | % stat test statistic 19 | % 20 | % 21 | % Copyright (c) 2011-2011 Kun Zhang 22 | % 2011-2011 Jonas Peters 23 | % All rights reserved. See the file COPYING for license terms. 24 | 25 | 26 | if ~isfield(pars,'pairwise') 27 | pars.pairwise = false; 28 | end; 29 | 30 | if ~isfield(pars,'bonferroni') 31 | pars.bonferroni = false; 32 | end; 33 | 34 | if ~isfield(pars,'perm') 35 | pars.perm= 500; 36 | end; 37 | 38 | if isempty(Z) %unconditional HSIC 39 | if pars.pairwise 40 | p = zeros(size(X,2),size(Y,2)); 41 | for i = 1:size(X,2); 42 | for j = 1:size(Y,2); 43 | [p(i,j) sta(i,j)] = fasthsic(X(:,i),Y(:,j)); 44 | end 45 | end 46 | [pp iii] = min(p); 47 | [pval jj] = min(pp); 48 | stat = sta(iii(jj),jj); 49 | if pars.bonferroni 50 | pval=size(X,2)*size(Y,2)*pval; 51 | end 52 | else 53 | [pval stat]= fasthsic(X, Y); 54 | end 55 | else %conditional HSIC 56 | [aa, pval, stat]=hsiccondTestIC(X,Y,Z,0.8,pars.perm); 57 | end 58 | 59 | return 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /KCI-test/indtest_new.m: -------------------------------------------------------------------------------- 1 | function [pval stat] = indtest_new(X, Y, Z, pars) 2 | % function [pval] = indtest_new(X, Y, Z, pars) 3 | % 4 | % 5 | % This function is a WRAPPER 6 | % Performs new method (to be submitted to UAI 2011) 7 | % 8 | % INPUT: 9 | % X Nxd1 matrix of samples (N data points, d1 dimensions) 10 | % Y Nxd2 matrix of samples (N data points, d2 dimensions) 11 | % Z Nxd3 matrix of samples (N data points, d3 dimensions) 12 | % pars structure containing parameters for the independence test 13 | % .pairwise if true, the test is performed pairwise if d1>1 (standard: false) 14 | % .bonferroni if true, bonferroni correction is performed (standard: false) 15 | % .width kernel width (standard: 0, which results in an automatic -heuristic- choice) 16 | % 17 | % OUTPUT: 18 | % pval p value of the test 19 | % stat test statistic 20 | % 21 | % 22 | % Copyright (c) 2011-2011 Kun Zhang 23 | % 2011-2011 Jonas Peters 24 | % All rights reserved. See the file COPYING for license terms. 25 | 26 | 27 | if ~isfield(pars,'pairwise') 28 | pars.pairwise = false; 29 | end; 30 | if ~isfield(pars,'bonferroni') 31 | pars.bonferroni = false; 32 | end; 33 | if ~isfield(pars,'width') 34 | pars.width = 0; 35 | end; 36 | 37 | if size(X,2)>1||size(Y,2)>1 38 | % error('This test only works for one-dimensional random variables X and Y. Maybe it can be extended??') 39 | fprintf('Note that X and Y are random vectors.\n'); 40 | end 41 | 42 | if isempty(Z) %unconditional HSIC 43 | if pars.pairwise 44 | p = zeros(size(X,2),size(Y,2)); 45 | for i = 1:size(X,2); 46 | for j = 1:size(Y,2); 47 | [sta(i,j), Cri, p_vala, Cri_appr, p(i,j)] = UInd_KCItest(X(:,i), X(:,j), pars.width); 48 | end 49 | end 50 | [pp iii] = min(p); 51 | [pval jj] = min(pp); 52 | stat = sta(iii(jj),jj); 53 | if pars.bonferroni 54 | pval=size(X,2)*size(Y,2)*pval; 55 | end 56 | else 57 | [pval stat] = UInd_KCItest(X, Y, pars.width); 58 | end 59 | else % conditional independence test 60 | [stat, Cri, pval, Cri_appr, p_val_appr] = CInd_test_new_withGP(X, Y, Z, 0.01, pars.width); 61 | end 62 | 63 | return 64 | 65 | 66 | 67 | -------------------------------------------------------------------------------- /KCI-test/indtest_new_t.m: -------------------------------------------------------------------------------- 1 | function [pval stat] = indtest_new_t(X, Y, Z, pars) 2 | % function [pval] = indtest_new(X, Y, Z, pars) 3 | % 4 | % 5 | % This function is a WRAPPER 6 | % Performs new method (to be submitted to UAI 2011) 7 | % 8 | % INPUT: 9 | % X Nxd1 matrix of samples (N data points, d1 dimensions) 10 | % Y Nxd2 matrix of samples (N data points, d2 dimensions) 11 | % Z Nxd3 matrix of samples (N data points, d3 dimensions) 12 | % pars structure containing parameters for the independence test 13 | % .pairwise if true, the test is performed pairwise if d1>1 (standard: false) 14 | % .bonferroni if true, bonferroni correction is performed (standard: false) 15 | % .width kernel width (standard: 0, which results in an automatic -heuristic- choice) 16 | % 17 | % OUTPUT: 18 | % pval p value of the test 19 | % stat test statistic 20 | % 21 | % 22 | % Copyright (c) 2011-2011 Kun Zhang 23 | % 2011-2011 Jonas Peters 24 | % All rights reserved. See the file COPYING for license terms. 25 | 26 | 27 | 28 | if size(X,2)>1||size(Y,2)>1 29 | % error('This test only works for one-dimensional random variables X and Y. Maybe it can be extended??') 30 | fprintf('Note that X and Y are random vectors.\n'); 31 | end 32 | 33 | if isempty(Z) %unconditional HSIC 34 | if pars.pairwise 35 | p = zeros(size(X,2),size(Y,2)); 36 | for i = 1:size(X,2) 37 | for j = 1:size(Y,2) 38 | [sta(i,j), Cri, p_vala, Cri_appr, p(i,j)] = UInd_KCItest(X(:,i), X(:,j), pars); 39 | end 40 | end 41 | [pp iii] = min(p); 42 | [pval jj] = min(pp); 43 | stat = sta(iii(jj),jj); 44 | if pars.bonferroni 45 | pval=size(X,2)*size(Y,2)*pval; 46 | end 47 | else 48 | [pval stat] = UInd_KCItest(X, Y, pars); 49 | end 50 | else % conditional independence test 51 | [pval, stat, Cri] = CInd_test_new_withGP_t(X, Y, Z, 0.01, pars); 52 | end 53 | 54 | return 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /KCI-test/indtest_new_t_RFF.m: -------------------------------------------------------------------------------- 1 | function [pval stat] = indtest_new_t_RFF(X, Y, Z, pars) 2 | % function [pval] = indtest_new(X, Y, Z, pars) 3 | % 4 | % 5 | % This function is a WRAPPER 6 | % Performs new method (to be submitted to UAI 2011) 7 | % 8 | % INPUT: 9 | % X Nxd1 matrix of samples (N data points, d1 dimensions) 10 | % Y Nxd2 matrix of samples (N data points, d2 dimensions) 11 | % Z Nxd3 matrix of samples (N data points, d3 dimensions) 12 | % pars structure containing parameters for the independence test 13 | % .pairwise if true, the test is performed pairwise if d1>1 (standard: false) 14 | % .bonferroni if true, bonferroni correction is performed (standard: false) 15 | % .width kernel width (standard: 0, which results in an automatic -heuristic- choice) 16 | % 17 | % OUTPUT: 18 | % pval p value of the test 19 | % stat test statistic 20 | % 21 | % 22 | % Copyright (c) 2011-2011 Kun Zhang 23 | % 2011-2011 Jonas Peters 24 | % All rights reserved. See the file COPYING for license terms. 25 | 26 | 27 | if size(X,2)>1||size(Y,2)>1 28 | % error('This test only works for one-dimensional random variables X and Y. Maybe it can be extended??') 29 | fprintf('Note that X and Y are random vectors.\n'); 30 | end 31 | 32 | if isempty(Z) %unconditional HSIC 33 | if pars.pairwise 34 | p = zeros(size(X,2),size(Y,2)); 35 | for i = 1:size(X,2) 36 | for j = 1:size(Y,2) 37 | [sta(i,j), Cri, p_vala, Cri_appr, p(i,j)] = UInd_KCItest_RFF(X(:,i), X(:,j), pars); 38 | end 39 | end 40 | [pp iii] = min(p); 41 | [pval jj] = min(pp); 42 | stat = sta(iii(jj),jj); 43 | if pars.bonferroni 44 | pval=size(X,2)*size(Y,2)*pval; 45 | end 46 | else 47 | [pval stat] = UInd_KCItest_RFF(X, Y, pars); 48 | end 49 | else % conditional independence test 50 | [pval, stat, Cri] = CInd_test_new_withGP_t_RFF(X, Y, Z, 0.01, pars); 51 | end 52 | 53 | return 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/README -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Causal-Discovery-from-Nonstationary-Heterogeneous-Data 2 | 3 | Causal Discovery from Nonstationary/Heterogeneous Data. Copyright (c) 2017-2019 Biwei Huang & Kun Zhang 4 | 5 | ### MAIN FUNCTIONS 6 | function [g_skeleton, g_inv, gns, SP] = nonsta_cd_new(X,cond_ind_test,c_indx,maxFanIn,alpha, Type, pars) 7 | 8 | INPUT: 9 | * X: - T*n matrix. T is number of data points and n is the number of observed variables 10 | * cond_ind_test: - function handle that computes p-values for X ind. Y given Z: (p_val = cond_ind_test(X, Y, Z, pars)) 11 | * c_indx: surrogate variable to capture the distribution shift. If data is nonstationary, then it is the time index. If data is from multiple domains, then it is the domain index 12 | * maxFanIn: - maximum number of variables in the conditioning set 13 | * alpha: - significance level of the independence test 14 | * Type: - run corresponding phases of CD-NOD 15 | * If Type=0, run all phases of CD-NOD (including phase 1: learning causal skeleton, phase 2: identifying causal directions with generalization of invariance, phase 3: identifying directions with independent change principle, and phase 4: recovering the nonstationarity driving force). 16 | * If Type = 1, perform phase 1 + phase 2 + phase 3 17 | * If Type = 2, perform phase 1 + phase 2 18 | * If Type = 3, only perform phase 1 19 | * pars: - including pars.pairwise, pars.bonferroni, pars.if_GP1, pars.if_GP2, pars.width, and pars.widthT 20 | * If pars.if_GP1 = 1, optimize the kernel width with GP in conditional independence tests; otherwise, use a fixed kernel width 21 | * If pars.if_GP2 = 1, optimize the kernel width with GP in direction determination with independent change principle & nonstationary driving force visualization 22 | * pars.width: kernel width on observational variables (except the time index). If it is 0, then use the default kernel width when IF_GP1 = 0 23 | * pars.widthT: kernel width on the time index 24 | 25 | 26 | OUTPUT: 27 | * g_skeleton: (n+1)*(n+1) matrix to represent recovered causal skeleton over augmented set of variables 28 | * i-j: g_skeleton(i,j)=-1 & g_skeleton(j,i)=-1; i j: g_skeleton(i,j)=0 & g_skeleton(j,i)=0 29 | * the last row of gns indicates the connection of nonstationarity indicator (C) with other observed variables 30 | * g_inv: (n+1)*(n+1) matrix to represent recovered graph structure up to the Markov equivalence class learning on augmented causal graph, with directions inferred by generalization of invariance 31 | * i->j: g_inv(i,j)=1 & g_inv(j,i)=0; i-j: g_inv(i,j)=-1 & g_inv(j,i)=-1; i j: g(i,j)=0 & g(j,i)=0 32 | * the last row of g indicates the connection of nonstationarity indicator (C) with other observed variables 33 | * gns: (n+1)*(n+1) matrix to represent recovered graph structure, with directions inferred by generalization of invariance & independent change principle 34 | * i->j: gns(i,j)=1 & gns(j,i)=1; i-j: gns(i,j)=-1 & gns(j,i)=-1; i j: gns(i,j)=0 & gns(j,i)=0 35 | * the last row of gns indicates the connection of nonstationarity indicator (C) with other observed variables 36 | * ("gns" should have more oriented edges than "g_inv") 37 | * SP: details of each independence test 38 | 39 | 40 | ### EXAMPLE 41 | example1.m, example2.m, and example3.m give three example of using this package. 42 | Specifically, example1.m and example2.m are for nonstationary data, and example3.m is 43 | for data from multiple domains. 44 | 45 | ### If there are multi-dimensional variables, use 46 | function [g_skeleton, g_inv, gns, SP] = nonsta_cd_new_multi(X,dlabel,cond_ind_test,c_indx,maxFanIn,alpha, Type, pars) 47 | * dlabel: - In the case with multi-dimensional variables, we use dlable to indicat the index of each variable 48 | Please see the example given in example4.m 49 | 50 | ### Notes 51 | For large-scale systems, there are several ways to speed up the process: 52 | 53 | - Fix the kernel width in conditional independence tests: set "pars.if_GP1 = 0". 54 | - Approximate the kernel learning with random fourier feature, by setting "cond_ind_test='indtest_new_t_RFF'". 55 | - Do a pre-processing step to remove some spurious edges, e.g., first using partial correlation to remove some edges. 56 | 57 | Note that if fixing the kernel width, you may need to tune the kernel width a bit to get better results, 58 | especially for the kernel width on the time index ("pars.widthT"), "width" and "Wt" in "infer_nonsta_dir.m" and "cd_non_con_fun.m". 59 | 60 | ### CITATION 61 | If you use this code, please cite the following paper: 62 | 63 | 1. Zhang, K., Huang, B., Zhang, J., Glymour, C., Scholkopf, B.. Causal Discovery from Nonstationary/Heterogeneous Data: Skeleton Estimation and Orientation Determination. IJCAI 2017. 64 | 2. Huang, B., Zhang, K., Zhang, J., Glymour, C., Scholkopf, B. Behind Distribution Shift: Mining Driving Forces of Changes and Causal Arrows. ICDM 2017. 65 | 3. Huang, B., Zhang, K., Zhang, J., Ramsey, J., Sanchez-Romero, R., Glymour, C., Scholkopf, B.. Causal Discovery from Heterogeneous/Nonstationary Data. JMLR, 21(89), 2020. 66 | 67 | If you have problems or questions, do not hesitate to send an email to biweih@andrew.cmu.edu 68 | -------------------------------------------------------------------------------- /cd_non_con_fun.m: -------------------------------------------------------------------------------- 1 | function [Yg,Yl,Mg,Ml,D,eigValueg,eigValuel] = cd_non_con_fun(X,Y,c_indx,width,IF_GP) 2 | % learn the nonstationary driving force of the causal mechanism 3 | % X: parents; Y; effect 4 | % width: the kernel width for X and Y 5 | % c_indx: surrogate variable to capture the distribution shift; 6 | % If If_GP = 1, learning the kernel width for P(Y|X). Set it to 0 can speed up the process!!! 7 | 8 | if(width==0) 9 | width = 0.1; 10 | end 11 | Wt = 1; % the initial kernel width on C (or T). May need tunning for different data sets!!! 12 | [T,d] = size(X); 13 | X = X * diag(1./std(X)); 14 | Y = Y/std(Y); 15 | theta = 1/width^2; % 0.2 16 | lambda = 1; % 0.05 0.3 10 17 | Ml = []; 18 | 19 | % size of Y should be T*1. 20 | Kyy = kernel(Y, Y, [theta,1]); 21 | 22 | if IF_GP 23 | Thresh = 1E-4; 24 | [eig_Ky, eiy] = eigdec((Kyy+Kyy')/2, min(400, floor(T/4))); % /2 25 | covfunc = {'covSum', {'covSEard','covNoise'}}; 26 | % covfunc = {'covSum', {'covMatern3ard','covNoise'}}; 27 | 28 | logtheta0 = [log(width)*ones(d,1); log(Wt); 0; log(sqrt(0.1))]; 29 | fprintf('Optimization hyperparameters in GP regression:\n'); 30 | 31 | IIy = find(eig_Ky > max(eig_Ky) * Thresh); eig_Ky = eig_Ky(IIy); eiy = eiy(:,IIy); 32 | [logtheta_y, fvals_y, iter_y] = minimize(logtheta0, 'gpr_multi', -350, covfunc, [X c_indx], 2*sqrt(T) *eiy * diag(sqrt(eig_Ky))/sqrt(eig_Ky(1))); 33 | exp(logtheta_y), 34 | 35 | covfunc_z = {'covSEard'}; 36 | Kxt = feval(covfunc_z{:}, logtheta_y, [X c_indx]); 37 | 38 | % Note: in the conditional case, no need to do centering, as the regression 39 | % will automatically enforce that. 40 | 41 | % Kernel matrices of the errors 42 | invK = pdinv(Kxt + exp(2*logtheta_y(end))*eye(T)); 43 | 44 | % Kxx = kernel(X, X, [1/exp(2*logtheta_y(1)),1]); 45 | % Ktt = kernel((1:T)', (1:T)', [1/exp(2*logtheta_y(d+1)),1]); 46 | Kxx = feval(covfunc_z{:}, logtheta_y([1:d,d+2]), X); 47 | Ktt = feval(covfunc_z{:}, logtheta_y([d+1,d+2]), c_indx); 48 | else 49 | Kxx = kernel(X, X, [theta,1]); 50 | Kyy = kernel(Y, Y, [theta,1]); 51 | Ktt = kernel(c_indx, c_indx, [1/Wt^2,1]); 52 | invK = pdinv( Kxx.* Ktt + lambda * eye(T)); 53 | end 54 | Kxx3 = Kxx^3; %^3 55 | 56 | prod_invK = invK * Kyy * invK; 57 | % now finding Ml 58 | 59 | Ml = 1/T^2 * Ktt*( Kxx3 .* prod_invK) * Ktt; 60 | 61 | % Len = floor(T/50); 62 | % for c = 1:50 63 | % cc = Len*(c-1)+1; 64 | % for c1 = c:50 65 | % fprintf('.'); 66 | % cc1 = Len*(c1-1)+1; 67 | % % Ml(c,c1) = trace(diag(Ktt(:,cc)) * Kxx3 * diag(Ktt(:,cc1)) * prod_invK); 68 | % Ml(c,c1) = trace( ((Ktt(:,cc) * Ktt(:,cc1)' ) .* Kxx3) * prod_invK ); 69 | % if c1>c 70 | % Ml(c1,c) = Ml(c,c1); 71 | % end 72 | % end 73 | % end 74 | % Ml = 1/T^2 * Ml; 75 | 76 | % the square distance 77 | D = diag(diag(Ml)) * ones(size(Ml)) + ones(size(Ml)) * diag(diag(Ml)) - 2*Ml; 78 | 79 | % Gaussian kernel 80 | sigma2_square = median( D(find(tril(ones(size(D)),-1))) ); 81 | Mg = exp(-D/sigma2_square/2); 82 | 83 | 84 | [Yg, eigVectorg, eigValueg]=kPCA_kernel_orig(Mg,3); 85 | [Yl, eigVectorl, eigValuel]=kPCA_kernel_orig(Ml,3); 86 | % figure, plot(Yg(:,1),'b'); hold on; plot(Yg(:,2),'k--'); title('VIsualization of change in PA^i \rightarrow V^i (with Gaussian kernel)') 87 | % legend(['First component of \lambda_i (eigenvalue: ' num2str(eigValueg(1)) ')'],['Second component of \lambda_i (eigenvalue: ' num2str(eigValueg(2)) ')']); 88 | 89 | % figure, plot(Yl(:,1),'b'); hold on; plot(Yl(:,2),'k--'); title('VIsualization of change in PA^i \rightarrow V^i (with linear kernel)') 90 | % legend(['First component of \lambda_i (eigenvalue: ' num2str(eigValuel(1)) ')'],['Second component of \lambda_i (eigenvalue: ' num2str(eigValuel(2)) ')']); 91 | -------------------------------------------------------------------------------- /example1.m: -------------------------------------------------------------------------------- 1 | % example 1: nonstationary data (each variable is continuous & one-dimensional) 2 | clear all,clc,close all 3 | addpath(genpath(pwd)) 4 | rng(10) 5 | 6 | %% generate the data 7 | % x1->x2->x3->x4, and the causal modules of x2 and x4 is nonstationary, and 8 | % their changes are related 9 | T = 500; 10 | x1 = randn(T,1); 11 | x2 = 0.8*x1 + 1.5*sin([1:T]'/50) + 0.5*randn(T,1); 12 | x3 = 0.8*x2 + 0.5*randn(T,1); 13 | x4 = 0.8*x3 + (sin([1:T]'/50)+sin([1:T]'/20)) + 0.5*randn(T,1); 14 | Data = [x1,x2,x3,x4]; 15 | 16 | %% set the parameters 17 | alpha = 0.05; % signifcance level of independence test 18 | maxFanIn = 2; % maximum number of conditional variables 19 | if (T<=1000) % for small sample size, use GP to learn the kernel width in conditional independence tests 20 | cond_ind_test='indtest_new_t'; 21 | IF_GP = 1; 22 | else 23 | if (T>1000 & T<=2000) % for relatively large sample size, fix the kernel width 24 | cond_ind_test='indtest_new_t'; 25 | IF_GP = 0; 26 | else % for very large sample size, fix the kernel width and use random fourier feature to approximate the kernel 27 | cond_ind_test='indtest_new_t_RFF'; 28 | IF_GP = 0; 29 | end 30 | end 31 | pars.pairwise = false; 32 | pars.bonferroni = false; 33 | pars.if_GP1 = IF_GP; % for conditional independence test, whether use GP to learn the kernel width 34 | pars.if_GP2 = 1; % for direction determination with independent change principle & nonstationary driving force visualization 35 | pars.width = 0; % kernel width on observational variables (except the time index). If it is 0, then use the default kernel width when IF_GP = 0 36 | pars.widthT = 0.1; % the kernel width on the time index 37 | c_indx = [1:T]'; % surrogate variable to capture the distribution shift; 38 | % here it is the time index, because the data is nonstationary 39 | Type = 1; 40 | % If Type=0, run all phases of CD-NOD (including 41 | % phase 1: learning causal skeleton, 42 | % phase 2: identifying causal directions with generalization of invariance, 43 | % phase 3: identifying directions with independent change principle, and 44 | % phase 4: recovering the nonstationarity driving force ) 45 | % If Type = 1, perform phase 1 + phase 2 + phase 3 46 | % If Type = 2, perform phase 1 + phase 2 47 | % If Type = 3, only perform phase 1 48 | 49 | %% run CD-NOD 50 | [g_skeleton, g_inv, gns, SP] = nonsta_cd_new(Data, cond_ind_test, c_indx, maxFanIn, alpha, Type, pars); 51 | 52 | 53 | -------------------------------------------------------------------------------- /example2.m: -------------------------------------------------------------------------------- 1 | % example 2: nonstationary data 2 | clear all,clc,close all 3 | addpath(genpath(pwd)) 4 | rng(10) 5 | 6 | % x1->x2->x3, and the causal module of x1, x2, and x3 are nonstationary, 7 | % and the causal modules change independently 8 | load smooth_module 9 | % R0 saves generated nonstatioanry driving force which are independent of each other 10 | T = 500; 11 | x1 = 0.5*randn(T,1) + 5*R0{1}(1:T); 12 | x2 = 0.8*x1 + 4*R0{2}(1:T) + 0.5*randn(T,1); 13 | x3 = 6*R0{6}(1:T)+ 0.8*x2 + 0.3*randn(T,1); 14 | Data = [x1,x2,x3]; 15 | 16 | %% set the parameters 17 | alpha = 0.05; % signifcance level of independence test 18 | maxFanIn = 2; % maximum number of conditional variables 19 | if (T<=1000) % for small sample size, use GP to learn the kernel width in conditional independence tests 20 | cond_ind_test='indtest_new_t'; 21 | IF_GP = 1; 22 | else 23 | if (T>1000 & T<2000) % for relatively large sample size, fix the kernel width 24 | cond_ind_test='indtest_new_t'; 25 | IF_GP = 0; 26 | else % for very large sample size, fix the kernel width and use random fourier feature to approximate the kernel 27 | cond_ind_test='indtest_new_t_RFF'; 28 | IF_GP = 0; 29 | end 30 | end 31 | pars.pairwise = false; 32 | pars.bonferroni = false; 33 | pars.if_GP1 = IF_GP; % for conditional independence test 34 | pars.if_GP2 = 1; % for direction determination with independent change principle & nonstationary driving force visualization 35 | pars.width = 0; % kernel width on observational variables (except the time index). If it is 0, then use the default kernel width when IF_GP = 0 36 | pars.widthT = 0.1; % the kernel width on the time index 37 | c_indx = [1:T]'; % surrogate variable to capture the distribution shift; 38 | % here it is the time index, because the data is nonstationary 39 | Type = 0; 40 | % If Type=0, run all phases of CD-NOD (including 41 | % phase 1: learning causal skeleton, 42 | % phase 2: identifying causal directions with generalization of invariance, 43 | % phase 3: identifying directions with independent change principle, and 44 | % phase 4: recovering the nonstationarity driving force ) 45 | % If Type = 1, perform phase 1 + phase 2 + phase 3 46 | % If Type = 2, perform phase 1 + phase 2 47 | % If Type = 3, only perform phase 1 48 | 49 | %% run CD-NOD 50 | [g_skeleton, g_inv, gns, SP] = nonsta_cd_new(Data, cond_ind_test, c_indx, maxFanIn, alpha, Type, pars); 51 | 52 | -------------------------------------------------------------------------------- /example3.m: -------------------------------------------------------------------------------- 1 | % example 1: heterogeneous data (data from multiple domains) 2 | clear all,clc,close all 3 | addpath(genpath(pwd)) 4 | rng(10) 5 | %% data generation 6 | % generate data from the first domain 7 | T_1 = 300; 8 | x1_1 = randn(T_1,1); 9 | x2_1 = 0.9*x1_1 + 0.6*randn(T_1,1); 10 | x3_1 = 0.9*x2_1 + 0.6*randn(T_1,1); 11 | x4_1 = 0.9*x3_1 + 0.6*randn(T_1,1); 12 | Data_1 = [x1_1,x2_1,x3_1,x4_1]; 13 | 14 | % generate data from the second domain 15 | T_2 = 300; 16 | x1_2 = randn(T_2,1); 17 | x2_2 = sin(x1_2) + 0.2*randn(T_2,1); 18 | x3_2 = sin(x2_2) + 0.2*randn(T_2,1); 19 | x4_2 = sin(x3_2) + 0.2*randn(T_2,1); 20 | Data_2 = [x1_2,x2_2,x3_2,x4_2]; 21 | 22 | % concateneate data from the two domains 23 | Data = [Data_1;Data_2]; 24 | 25 | 26 | %% set the parameters 27 | alpha = 0.05; % signifcance level of independence test 28 | maxFanIn = 2; % maximum number of conditional variables 29 | T = size(Data,1); 30 | if (T<=1000) % for small sample size, use GP to learn the kernel width in conditional independence tests 31 | cond_ind_test='indtest_new_t'; 32 | IF_GP = 1; 33 | else 34 | if (T>1000 & T<2000) % for relatively large sample size, fix the kernel width 35 | cond_ind_test='indtest_new_t'; 36 | IF_GP = 0; 37 | else % for very large sample size, fix the kernel width and use random fourier feature to approximate the kernel 38 | cond_ind_test='indtest_new_t_RFF'; 39 | IF_GP = 0; 40 | end 41 | end 42 | 43 | pars.pairwise = false; 44 | pars.bonferroni = false; 45 | pars.if_GP1 = IF_GP; % for conditional independence test 46 | pars.if_GP2 = 1; % for direction determination with independent change principle & nonstationary driving force visualization 47 | pars.width = 0.4; % kernel width on observational variables (except the time index). If it is 0, then use the default kernel width when IF_GP = 0 48 | pars.widthT = 0; % the kernel width on the time index; set it to zero for domain-varying data 49 | c_indx = [ones(1,T_1),2*ones(1,T_2)]'; % surrogate variable to capture the distribution shift; 50 | %here it is the doamin index, because the data is from multiple domains 51 | Type = 1; 52 | % If Type=0, run all phases of CD-NOD (including 53 | % phase 1: learning causal skeleton, 54 | % phase 2: identifying causal directions with generalization of invariance, 55 | % phase 3: identifying directions with independent change principle, and 56 | % phase 4: recovering the nonstationarity driving force ) 57 | % If Type = 1, perform phase 1 + phase 2 + phase 3 58 | % If Type = 2, perform phase 1 + phase 2 59 | % If Type = 3, only perform phase 1 60 | 61 | %% run CD-NOD 62 | [g_skeleton, g_inv, gns, SP] = nonsta_cd_new(Data, cond_ind_test, c_indx, maxFanIn, alpha, Type, pars); 63 | 64 | -------------------------------------------------------------------------------- /example4.m: -------------------------------------------------------------------------------- 1 | % example 4 2 | % variables with multi-dimensionals 3 | % non-stationary data 4 | clear all,clc,close all 5 | addpath(genpath(pwd)) 6 | 7 | % x1->x2->x3->x4, and the causal modules of x2 and x4 is nonstationary, and 8 | % their changes are related 9 | T = 500; 10 | x1 = randn(T,2); % 2 dimension 11 | x2 = 0.6*x1 + 2*sin([1:T]'/50) + 0.5*randn(T,2); % 2 dimension 12 | x3 = x2*[0.3;0.3] + 0.5*randn(T,1); % 1 dimension 13 | x4 = 0.8*x3 + (sin([1:T]'/50)+sin([1:T]'/20)) + 0.5*randn(T,1); % 1 dimension 14 | Data = [x1,x2,x3,x4]; 15 | 16 | 17 | %% set the parameters 18 | alpha = 0.05; % signifcance level of independence test 19 | maxFanIn = 2; % maximum number of conditional variables 20 | if (T<=1000) % for small sample size, use GP to learn the kernel width in conditional independence tests 21 | cond_ind_test='indtest_new_t'; 22 | IF_GP = 1; 23 | else 24 | if (T>1000 & T<2000) % for relatively large sample size, fix the kernel width 25 | cond_ind_test='indtest_new_t'; 26 | IF_GP = 0; 27 | else % for very large sample size, fix the kernel width and use random fourier feature to approximate the kernel 28 | cond_ind_test='indtest_new_t_RFF'; 29 | IF_GP = 0; 30 | end 31 | end 32 | pars.pairwise = false; 33 | pars.bonferroni = false; 34 | pars.if_GP1 = IF_GP; % for conditional independence test 35 | pars.if_GP2 = 1; % for direction determination with independent change principle & nonstationary driving force visualization 36 | pars.width = 0; % kernel width on observational variables (except the time index). If it is 0, then use the default kernel width when IF_GP = 0 37 | pars.widthT = 0.1; % the kernel width on the time index 38 | dlabel{1} = [1,2]; dlabel{2} = [3,4]; dlabel{3} = [5]; dlabel{4} =[6]; 39 | c_indx = [1:T]'; % surrogate variable to capture the distribution shift; 40 | % here it is the time index, because the data is nonstationary 41 | Type = 1; 42 | % If Type=0, run all phases of CD-NOD (including 43 | % phase 1: learning causal skeleton, 44 | % phase 2: identifying causal directions with generalization of invariance, 45 | % phase 3: identifying directions with independent change principle, and 46 | % phase 4: recovering the nonstationarity driving force ) 47 | % If Type = 1, perform phase 1 + phase 2 + phase 3 48 | % If Type = 2, perform phase 1 + phase 2 49 | % If Type = 3, only perform phase 1 50 | 51 | %% run CD-NOD 52 | [g_skeleton, g_inv, gns, SP] = nonsta_cd_new_multi(Data,dlabel,cond_ind_test, c_indx, maxFanIn, alpha, Type, pars); 53 | 54 | -------------------------------------------------------------------------------- /infer_nonsta_dir.m: -------------------------------------------------------------------------------- 1 | function [testStat] = infer_nonsta_dir(X,Y,c_indx,width,IF_GP) 2 | % infer the causal direction between X and Y when their causal modules are 3 | % both nonstationary but independent 4 | % X: parents; Y; effect 5 | % width: the kernel width for X and Y 6 | % c_indx: surrogate variable to capture the distribution shift; 7 | % If If_GP = 1, learning the kernel width for P(Y|X). Set it to 0 can speed up the process!!! 8 | % Don't forget to normalize the data 9 | 10 | if(width==0) 11 | width = 0.1; % May need tunning for different data sets!!! 12 | end 13 | Wt = 1; % the initial kernel width on C (or T). May need tunning for different data sets!!! 14 | [T,d] = size(X); 15 | X = X-repmat(mean(X),size(X,1),1); % normalization 16 | X = X * diag(1./std(X)); 17 | Y = Y-repmat(mean(Y),size(Y,1),1); 18 | Y = Y * diag(1./std(Y)); 19 | theta = 1/width^2; 20 | lambda = 2; % may need tunning! 21 | Ml = []; 22 | 23 | % size of Y should be T*1. 24 | Kyy = kernel(Y, Y, [theta,1]); 25 | 26 | %% P(Y|X) 27 | if IF_GP 28 | Thresh = 1E-4; 29 | [eig_Ky, eiy] = eigdec((Kyy+Kyy')/2, min(400, floor(T/4))); % /2 30 | covfunc = {'covSum', {'covSEard','covNoise'}}; 31 | logtheta0 = [log(width)*ones(d,1); log(Wt); 0; log(sqrt(0.1))]; 32 | fprintf('Optimization hyperparameters in GP regression:\n'); 33 | 34 | IIy = find(eig_Ky > max(eig_Ky) * Thresh); eig_Ky = eig_Ky(IIy); eiy = eiy(:,IIy); 35 | [logtheta_y, fvals_y, iter_y] = minimize(logtheta0, 'gpr_multi', -350, covfunc, [X (1:T)'], 2*sqrt(T) *eiy * diag(sqrt(eig_Ky))/sqrt(eig_Ky(1))); 36 | % exp(logtheta_y), 37 | if(logtheta_y(d+1)>log(1e4)) % set a boound 38 | logtheta_y(d+1)=log(1e4); 39 | end 40 | 41 | covfunc_z = {'covSEard'}; 42 | Kxt = feval(covfunc_z{:}, logtheta_y, [X c_indx]); 43 | % Note: in the conditional case, no need to do centering, as the regression 44 | % will automatically enforce that. 45 | 46 | % Kernel matrices of the errors 47 | invK = pdinv(Kxt + exp(2*logtheta_y(end))*eye(T)); 48 | 49 | Kxx = feval(covfunc_z{:}, logtheta_y([1:d,d+2]), X); 50 | Ktt = feval(covfunc_z{:}, logtheta_y([d+1,d+2]), c_indx); 51 | else 52 | Kxx = kernel(X, X, [theta,1]); 53 | Kyy = kernel(Y, Y, [theta,1]); 54 | Ktt = kernel(c_indx, c_indx, [1/Wt^2,1]); 55 | invK = pdinv( Kxx.* Ktt + lambda * eye(T)); 56 | end 57 | Kxx3 = Kxx^3; 58 | prod_invK = invK * Kyy * invK; 59 | % now finding Ml 60 | Ml = 1/T^2 * Ktt*( Kxx3 .* prod_invK) * Ktt; 61 | % the square distance 62 | D = diag(diag(Ml)) * ones(size(Ml)) + ones(size(Ml)) * diag(diag(Ml)) - 2*Ml; 63 | % Gaussian kernel 64 | sigma2_square = median( D(find(tril(ones(size(D)),-1))) ); 65 | Mg = exp(-D/sigma2_square/2); 66 | 67 | 68 | %% P(X) 69 | Kxx = kernel(X,X, [theta,1]); 70 | if IF_GP 71 | [eig_Kx, eix] = eigdec((Kxx+Kxx')/2, min(400, floor(T/4))); % /2 72 | covfunc = {'covSum', {'covSEard','covNoise'}}; 73 | logtheta0 = [log(Wt); 0; log(sqrt(0.1))]; 74 | fprintf('Optimization hyperparameters in GP regression:\n'); 75 | IIx = find(eig_Kx > max(eig_Kx) * Thresh); eig_Kx = eig_Kx(IIx); eix = eix(:,IIx); 76 | [logtheta_x, fvals_x, iter_x] = minimize(logtheta0, 'gpr_multi', -350, covfunc, c_indx, 2*sqrt(T) *eix * diag(sqrt(eig_Kx))/sqrt(eig_Kx(1))); 77 | % exp(logtheta_x), 78 | if(logtheta_x(1)>log(1e4)) 79 | logtheta_x(1)=log(1e4); 80 | end 81 | Ktt = feval(covfunc_z{:}, logtheta_x(1:2), c_indx); 82 | invK2 = pdinv(Ktt + exp(2*logtheta_x(end))*eye(T)); 83 | else 84 | Ktt = kernel(c_indx, c_indx, [1/Wt^2,1]); 85 | invK2 = pdinv(Ktt + lambda * eye(T)); 86 | end 87 | Ml2 = Ktt*invK2*Kxx*invK2*Ktt; 88 | % the square distance 89 | D2 = diag(diag(Ml2)) * ones(size(Ml2)) + ones(size(Ml2)) * diag(diag(Ml2)) - 2*Ml2; 90 | % Gaussian kernel 91 | sigma2_square2 = median( D2(find(tril(ones(size(D2)),-1))) ); 92 | Mg2 = exp(-D2/sigma2_square2/2); 93 | 94 | %% 95 | H = eye(T)-1/T*ones(T,T); 96 | Mg = H*Mg*H; 97 | Mg2 = H*Mg2*H; 98 | testStat = 1/T^2*sum(sum(Mg'.*Mg2)); 99 | % eta = 1e-6; 100 | % Rg = Mg*pdinv(Mg+T*eta*eye(T)); 101 | % Rg2 = Mg2*pdinv(Mg2+T*eta*eye(T)); 102 | % % testStat = sum(sum(Rg'.*Rg2)); 103 | % testStat = trace(Rg*Rg2); 104 | 105 | 106 | 107 | 108 | -------------------------------------------------------------------------------- /kPCA_kernel_orig.m: -------------------------------------------------------------------------------- 1 | % X: data matrix, each row is one observation, each column is one feature 2 | % d: reduced dimension 3 | % type: type of kernel, can be 'simple', 'poly', or 'gaussian' 4 | % para: parameter for computing the 'poly' and 'gaussian' kernel, 5 | % for 'simple' it will be ignored 6 | % Y: dimensionanlity-reduced data 7 | % eigVector: eigen-vector, will later be used for pre-image 8 | % reconstruction 9 | 10 | % Copyright by Quan Wang, 2011/05/10 11 | % Please cite: Quan Wang. Kernel Principal Component Analysis and its 12 | % Applications in Face Recognition and Active Shape Models. 13 | % arXiv:1207.3538 [cs.CV], 2012. 14 | 15 | function [Y, eigVector, eigValue]=kPCA_kernel_orig(K0,d) 16 | 17 | 18 | %% kernel PCA 19 | %%K0=kernel(X,type,para); % input K0 20 | N = length(K0); 21 | oneN=ones(N,N)/N; 22 | K=K0-oneN*K0-K0*oneN+oneN*K0*oneN; 23 | 24 | %% eigenvalue analysis 25 | [V,D]=eig(K/N); 26 | eigValue=diag(D); 27 | [tmp,IX]=sort(eigValue,'descend'); 28 | eigVector=V(:,IX); 29 | eigValue=eigValue(IX); 30 | 31 | %% normailization 32 | norm_eigVector=sqrt(sum(eigVector.^2)); 33 | eigVector=eigVector./repmat(norm_eigVector,size(eigVector,1),1); 34 | 35 | %% dimensionality reduction 36 | eigVector=eigVector(:,1:d); 37 | Y=K0*eigVector; 38 | 39 | -------------------------------------------------------------------------------- /meeks.m: -------------------------------------------------------------------------------- 1 | %Copyright (C) 2 | % 1997-2002 Kevin Murphy 3 | % 2010-2011 Robert Tillman 4 | % 5 | % This file is part of pc. 6 | % 7 | % discrete_anm is free software: you can redistribute it and/or modify 8 | % it under the terms of the GNU General Public License as published by 9 | % the Free Software Foundation, either version 3 of the License, or 10 | % (at your option) any later version. 11 | % 12 | % discrete_anm is distributed in the hope that it will be useful, 13 | % but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | % GNU General Public License for more details. 16 | % 17 | % You should have received a copy of the GNU General Public License 18 | % along with discrete_anm. If not, see . 19 | 20 | 21 | % meeks rules - adapted from version in BNT 22 | function pdag = meeks(pdag,G) 23 | 24 | n = size(pdag,1); 25 | old_pdag=zeros(n,n); 26 | while ~isequal(pdag, old_pdag) 27 | old_pdag = pdag; 28 | % rule 1 29 | [A,B] = find(pdag==-1); % a -> b 30 | for i=1:length(A) 31 | a = A(i); b = B(i); 32 | C = find(pdag(b,:)==1 & G(a,:)==0); % all nodes adj to b but not a 33 | if ~isempty(C) 34 | pdag(b,C) = -1; pdag(C,b) = 0; 35 | %fprintf('rule 1: a=%d->b=%d and b=%d-c=%d implies %d->%d\n', a, b, b, C, b, C); 36 | end 37 | end 38 | % rule 2 39 | [A,B] = find(pdag==1); % unoriented a-b edge 40 | for i=1:length(A) 41 | a = A(i); b = B(i); 42 | if any( (pdag(a,:)==-1) & (pdag(:,b)==-1)' ); 43 | pdag(a,b) = -1; pdag(b,a) = 0; 44 | %fprintf('rule 2: %d -> %d\n', a, b); 45 | end 46 | end 47 | % rule 3 48 | [A,B] = find(pdag==1); % a-b 49 | for i=1:length(A) 50 | a = A(i); b = B(i); 51 | C = find( (G(a,:)==1) & (pdag(:,b)==-1)' ); 52 | % C contains nodes c s.t. a-c->ba 53 | G2 = setdiag(G(C, C), 1); 54 | if any(G2(:)==0) % there are 2 different non adjacent elements of C 55 | pdag(a,b) = -1; pdag(b,a) = 0; 56 | %fprintf('rule 3: %d -> %d\n', a, b); 57 | end 58 | end 59 | % rule 4 60 | [A, B] = find(pdag==1); % a-b 61 | for i=1:length(A) 62 | a = A(i); b = B(i); 63 | C = find((pdag(:,b)==-1) & (G(:,a)==1)); 64 | for j=1:length(C) 65 | c = C(j); % c -> b and c - a 66 | D = find((pdag(:,c)==-1) & (pdag(:,a)==1)); % d -> c and d - a 67 | if (length(D)>0) 68 | pdag(a,b) = -1; 69 | %pdag(b,a) = -1; % It is a bug in the original version!!!!!!!!!! 70 | pdag(b,a) = 0; 71 | end 72 | end 73 | end 74 | end 75 | -------------------------------------------------------------------------------- /pdinv.m: -------------------------------------------------------------------------------- 1 | function Ainv = pdinv(A); 2 | 3 | % PDINV Computes the inverse of a positive definite matrix 4 | % Copyright (c) 2010-2011 ... 5 | % All rights reserved. See the file COPYING for license terms. 6 | numData = size(A, 1); 7 | try 8 | U = chol(A); 9 | invU = eye(numData)/U; 10 | Ainv = invU*invU'; 11 | catch 12 | [void, errid] = lasterr; 13 | if strcmp(errid, 'MATLAB:posdef') 14 | warning(['Matrix is not positive definite in pdinv, inverting' ... 15 | ' using svd']) 16 | [U, S, V] = svd(A); 17 | Ainv = V*diag(1./diag(S))*U'; 18 | return 19 | else 20 | error(lasterr) 21 | end 22 | end 23 | 24 | -------------------------------------------------------------------------------- /setdiag.m: -------------------------------------------------------------------------------- 1 | function M = setdiag(M, v) 2 | % SETDIAG Set the diagonal of a matrix to a specified scalar/vector. 3 | % M = set_diag(M, v) 4 | %Copyright (C) 5 | % 2010 Robert Tillman 6 | % 7 | % This file is part of pc. 8 | % 9 | % discrete_anm is free software: you can redistribute it and/or modify 10 | % it under the terms of the GNU General Public License as published by 11 | % the Free Software Foundation, either version 3 of the License, or 12 | % (at your option) any later version. 13 | % 14 | % discrete_anm is distributed in the hope that it will be useful, 15 | % but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | % GNU General Public License for more details. 18 | % 19 | % You should have received a copy of the GNU General Public License 20 | % along with discrete_anm. If not, see . 21 | 22 | n = length(M); 23 | if length(v)==1 24 | v = repmat(v, 1, n); 25 | end 26 | 27 | % e.g., for 3x3 matrix, elements are numbered 28 | % 1 4 7 29 | % 2 5 8 30 | % 3 6 9 31 | % so diagnoal = [1 5 9] 32 | 33 | 34 | J = 1:n+1:n^2; 35 | M(J) = v; 36 | 37 | %M = triu(M,1) + tril(M,-1) + diag(v); 38 | 39 | -------------------------------------------------------------------------------- /smooth_module.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Biwei-Huang/Causal-Discovery-from-Nonstationary-Heterogeneous-Data/8f882ab6d39ee40b2c1d84724d73d6cdef2ed353/smooth_module.mat -------------------------------------------------------------------------------- /transformFeatures.m: -------------------------------------------------------------------------------- 1 | % random fourier feature to appropriate the kernel 2 | 3 | function [ Z ] = transformFeatures(X) 4 | %TRANSFORMFEATURES Transforms data to the random Fourier feature space 5 | % 6 | % Input: 7 | % X - n x p data matrix (each row is a sample) p is the dimension of the variable 8 | % Omega - p x D matrix of random Fourier directions (one for each 9 | % dimension of a sample x) 10 | % beta - 1 x D vector of random angles 11 | % 12 | % Output: 13 | % Z - D x n matrix of random Fourier features 14 | 15 | % sample random Fourier directions and angles 16 | [T, p] = size(X); 17 | if(T>=1000) 18 | D = 1000; % RFF dimension 19 | else 20 | D = 500; 21 | end 22 | Omega = randn(p,D); % RVs defining RFF transform 23 | beta = rand(1,D)*2*pi; 24 | 25 | Z = cos(bsxfun(@plus,X*Omega,beta))*sqrt(2/D); 26 | Z = Z'; 27 | --------------------------------------------------------------------------------