├── .gitmodules ├── LICENSE ├── README.md ├── matlab ├── ComputeVarianceScatterPlot.m ├── PlotResultsEachMovieMarlinCompareKernels.m ├── PlotResultsEachMovieMarlinWeak.m ├── PlotResultsMovielens.m ├── PlotResultsMovielensMarlinCompareKernels.m ├── PlotResultsMovielensMarlinWeak.m ├── PlotResultsMovielensWeak.m ├── RecomputeMovieLens10MWeakPartialScript1.m ├── RecomputeMovieLens10MWeakScript1.m ├── RecomputeResults.m ├── changeNameMovielens.m ├── collabComponentPosteriorMeanVar.m ├── collabComputeM.m ├── collabComputeS.m ├── collabCreate.m ├── collabCreateTensor.m ├── collabDisplay.m ├── collabEstep.m ├── collabExpandParam.m ├── collabExtractParam.m ├── collabInitS.m ├── collabLoadData.m ├── collabLogLikeGradients.m ├── collabLogLikelihood.m ├── collabOptimise.m ├── collabOptimiseEachMovie.m ├── collabOptimiseOptions.m ├── collabOptions.m ├── collabOptionsTensor.m ├── collabPosteriorMeanVar.m ├── collabPosteriorMeanVarCell.m ├── collabTest.m ├── collabToolboxes.m ├── collabUpdateKernels.m ├── computeMeanVarianceWeak.m ├── computePredictionsErrorWeak.m ├── computeTestErrorEnsemblesWeak.m ├── computeTestErrorStrong.m ├── computeTestErrorWeak.m ├── computeTestErrorWeakCell.m ├── demAistats1.m ├── demEachMovie1.m ├── demEachMovie7.m ├── demEachMovieMarlinStrongLinearScript1.m ├── demEachMovieMarlinStrongScript1.m ├── demEachMovieMarlinWeakEnsemScript1.m ├── demEachMovieMarlinWeakLinear.m ├── demEachMovieMarlinWeakScript1.m ├── demEachMovieMarlinWeakTest1.m ├── demEachMovieMarlinWeakTestCustom1.m ├── demMixtoydata1.m ├── demMovieLens10MLetterWeakScript1.m ├── demMovieLens10MWeakScript1.m ├── demMovieLensMarlinStrongAdditionalScript1.m ├── demMovieLensMarlinStrongLinearRBFScript1.m ├── demMovieLensMarlinStrongLinearScript1.m ├── demMovieLensMarlinStrongScript1.m ├── demMovieLensMarlinWeakAdditionalScript1.m ├── demMovieLensMarlinWeakEnsemScript1.m ├── demMovieLensMarlinWeakLinearRBFScript1.m ├── demMovieLensMarlinWeakLinearScript1.m ├── demMovieLensMarlinWeakMLPScript1.m ├── demMovieLensMarlinWeakScript1.m ├── demMovielens1.m ├── demMovielens2.m ├── demMovielens3.m ├── demMovielens3Script.m ├── demMovielens4.m ├── demMovielens5.m ├── demMovielens6.m ├── demMovielens6Script.m ├── demMovielens7.m ├── demMovielens7Script.m ├── demMovielensOrdered1.m ├── demMovielensSmall1.m ├── demMovielensSmallHetero1.m ├── demMovielensSmallMix1.m ├── demMovielensSmallMixFromSingleScript1.m ├── demNetflix1.m ├── demNetflix2.m ├── demNetflix3.m ├── demNetflix4.m ├── demNetflix5.m ├── em-weak.m ├── generateCteNMAE.m ├── kernAdditionalKernCompute.m ├── loadAverageVariance.m ├── loadNetflix.m ├── loadNetflix2.m ├── lvmLoadData.m ├── ml-weak.m ├── netflixTest.m ├── rbfadditionalKernCompute.m ├── rbfadditionalKernDiagCompute.m ├── rbfadditionalKernDiagGradX.m ├── rbfadditionalKernExpandParam.m ├── rbfadditionalKernExtractParam.m ├── rbfadditionalKernGradX.m ├── rbfadditionalKernGradient.m ├── rbfadditionalKernParamInit.m ├── readEachMovieMarlinStrong.m ├── readEachMovieMarlinWeak.m ├── readEachMovieWeak.m ├── readMovieLens.m ├── readMovieLens10M.m ├── readMovieLens10MCell.m ├── readMovieLens10MCellLetter.m ├── readMovieLensMarlinStrong.m ├── readMovieLensMarlinWeak.m ├── readMovieLensStrong.m ├── readMovieLensWeak.m ├── restartNetflix1.m ├── restartNetflix5.m └── splitProbeData.m └── python ├── collab.py ├── demNetflix1.py ├── demNetflix10.py ├── demNetflix2.py ├── demNetflix3.py ├── demNetflix4.py ├── demNetflix5.py ├── demNetflix6.py ├── demNetflix7.py ├── demNetflix8.py ├── demNetflix9.py ├── restartNetflix10.py ├── restartNetflix2.py ├── restartNetflix3.py ├── restartNetflix4.py ├── restartNetflix5.py ├── restartNetflix6.py ├── restartNetflix7.py ├── restartNetflix8.py └── restartNetflix9.py /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "python/ndlml"] 2 | path = python/ndlml 3 | url = https://github.com/SheffieldML/ndlml 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, Sheffield Machine Learning Software (ML@SITraN) 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of collab nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # collab 2 | Collaborative filtering with the GP-LVM 3 | 4 | This repository contains the code for the paper: 5 | 6 | ["Non-linear Matrix Factorization with Gaussian Processes"](http://www.machinelearning.org/archive/icml2009/papers/384.pdf) by Neil D. Lawrence and Raquel Urtasun. It was published at ICML 2009. 7 | 8 | The main code used in the paper is in the matlab subdirectory. 9 | 10 | We also worked to do some experiments on netflix with a C++/Python variant of the code. These weren't done in time, but the code is included here although it is not well documented. The code makes use of the swig wrappers around [GPc](https://github.com/SheffieldML/GPc) for creating python objects. These are included as a submodule through [ndlml](https://github.com/SheffieldML/ndlml). 11 | 12 | -------------------------------------------------------------------------------- /matlab/PlotResultsEachMovieMarlinCompareKernels.m: -------------------------------------------------------------------------------- 1 | function [] = PlotResultsEachMovieMarlinCompareKernels(substract_mean, partNo_v, latentDim_v, iters, type, inverted, directories, if_plot, if_print) 2 | % 3 | %function [] = PlotResultsEachMovieMarlinCompareKernels(substract_mean, partNo_v, latentDim_v, iters, type, inverted, directories, if_plot, if_print) 4 | 5 | if strcmp(type,'weak') 6 | 7 | % directories are order to be 8 | directories{1} = 'marlin_eachmovie'; 9 | directories{2} = 'marlin_eachmovie_linear'; kern_type{2} = 'linear'; 10 | % directories{3} = 'marlin_eachmovie_metadata'; kern_type{3} = 'additional'; 11 | % directories{4} = 'marlin_eachmovie_linear_RBF'; kern_type{4} = 'linear_RBF'; 12 | 13 | end 14 | 15 | if strcmp(type,'strong') 16 | directories{1} = 'marlin_eachmovie_strong'; 17 | directories{2} = 'marlin_eachmovie_strong_linear'; kern_type{2} = 'linear'; 18 | % directories{3} = 'marlin_eachmovie_strong_metadata'; kern_type{3} = 'additional'; 19 | % directories{4} = 'marlin_eachmovie_strong_linear_RBF'; kern_type{4} = 'linear_RBF'; 20 | 21 | end 22 | 23 | 24 | for i=1:length(directories) 25 | 26 | cd (['../',directories{i}]); 27 | 28 | latentDim_v_p = latentDim_v; 29 | if strcmp(kern_type{i},'additional') 30 | latentDim_v_p = latentDim_v_p+1; 31 | end 32 | 33 | 34 | if (length(kern_type{i})>0) 35 | 36 | [L2_error_T{i},NMAE_error_T{i},NMAE_round_error_T{i}] = PlotResultsEachMovieMarlinWeak(substract_mean,partNo_v,latentDim_v_p, iters, type, inverted, kern_type{i}); 37 | 38 | 39 | else 40 | 41 | [L2_error_T{i},NMAE_error_T{i},NMAE_round_error_T{i}] = PlotResultsEachMovieMarlinWeak(substract_mean,partNo_v,latentDim_v_p, iters, type, inverted); 42 | end 43 | 44 | % 45 | 46 | mean_NMAE_round_error(:,i) = mean(NMAE_round_error_T{i}')'; 47 | std_NMAE_round_error(:,i) = std(NMAE_round_error_T{i}')'; 48 | mean_L2_error(:,i) = mean(L2_error_T{i}')'; 49 | std_L2_error(:,i) = std(L2_error_T{i}')'; 50 | 51 | end 52 | 53 | if if_plot 54 | figure(1); 55 | clf; 56 | 57 | font_size = 16; 58 | set(gca,'FontSize',font_size); 59 | set(get(gca,'Title'),'FontSize',font_size); 60 | set(get(gca,'Xlabel'),'FontSize',font_size); 61 | set(get(gca,'Ylabel'),'FontSize',font_size); 62 | legend([{'RBF'},{'linear'},{'metadata'},{'RBF+data'}]); 63 | for i=1:length(directories) 64 | hold on; 65 | plot(latentDim_v,mean_NMAE_round_error(:,i),[getColor(i),'s'],'lineWidth',3,'markersize',12); 66 | %errorbar(latentDim_v,mean_NMAE_round_error(:,i),std_NMAE_round_error(:,i),[getColor(i),'o'],'lineWidth',2,'markersize',12); 67 | end 68 | legend([{'RBF'},{'linear'},{'metadata'},{'RBF+data'}]); 69 | 70 | xlabel('latent dimensionality'); 71 | ylabel('NMAE error') 72 | set(gca,'XTick',latentDim_v); 73 | 74 | figure(2); 75 | clf; 76 | 77 | font_size = 16; 78 | set(gca,'FontSize',font_size); 79 | set(get(gca,'Title'),'FontSize',font_size); 80 | set(get(gca,'Xlabel'),'FontSize',font_size); 81 | set(get(gca,'Ylabel'),'FontSize',font_size); 82 | legend([{'RBF'},{'linear'},{'metadata'},{'RBF+data'}]); 83 | for i=1:length(directories) 84 | hold on; 85 | plot(latentDim_v,mean_L2_error(:,i),[getColor(i),'s'],'lineWidth',3,'markersize',12); 86 | end 87 | legend([{'RBF'},{'linear'},{'metadata'},{'RBF+data'}]); 88 | 89 | xlabel('latent dimensionality'); 90 | ylabel('RMSE error') 91 | set(gca,'XTick',latentDim_v); 92 | 93 | end 94 | 95 | nameFile = ['compare_kernels_',type,'_em']; 96 | nameFileRMSE = ['compare_kernels_',type,'_em']; 97 | 98 | if if_print 99 | figure(1); 100 | saveas(gcf,[nameFile,'.fig']); 101 | nameFile = [nameFile,'.eps']; 102 | print('-depsc',nameFile); 103 | figure(2); 104 | saveas(gcf,[nameFileRMSE,'_RMSE.fig']); 105 | nameFileRMSE = [nameFileRMSE,'_RMSE.eps']; 106 | print('-depsc',nameFileRMSE); 107 | 108 | end 109 | 110 | keyboard; 111 | 112 | end 113 | 114 | 115 | function [value] = getColor(index) 116 | switch index 117 | case 1 118 | value = 'r-'; 119 | case 2 120 | value = 'b-'; 121 | case 3 122 | value = 'g--' 123 | case 4 124 | value = 'm--'; 125 | case 5 126 | value = 'k-' 127 | end 128 | end 129 | -------------------------------------------------------------------------------- /matlab/PlotResultsEachMovieMarlinWeak.m: -------------------------------------------------------------------------------- 1 | function [L2_error_T,NMAE_error_T,NMAE_round_error_T] = PlotResultsEachMovieMarlinWeak(substract_mean,partNo_v,latentDim_v, iters, type, inverted, kern_type) 2 | % 3 | % [L2_error,NMAE_error,NMAE_round_error] = PlotResultsEachMovieMarlinWeak(substract_mean,partNo_v,latentDim_v, iters, kern_type) 4 | % 5 | % substract_mean --> bool if substract the mean 6 | % partNo_v --> vector with the partitions to compute results 7 | % latentDim_v --> vector with the latent dimensionalities to compute results 8 | % iters --> number of iterations 9 | % type --> strong or weak 10 | % inverted --> if it is inverted and we learn latent for subjects, not movies 11 | % kern_type --> '' for RBF 'linear', 'MLP' 12 | 13 | 14 | numDim = length(latentDim_v); 15 | numPartNo = length(partNo_v); 16 | 17 | L2_error_T = -ones(numDim,numPartNo); 18 | NMAE_error_T = -ones(numDim,numPartNo); 19 | NMAE_round_error_T = -ones(numDim,numPartNo); 20 | 21 | for i_latent=1:numDim 22 | q = latentDim_v(i_latent); 23 | for i_part=1:numPartNo 24 | partNo = partNo_v(i_part); 25 | 26 | dataSetName = ['eachmovie_marlin_',type,'_',num2str(partNo)]; 27 | 28 | 29 | 30 | % Save the results. 31 | capName = dataSetName; 32 | capName(1) = upper(capName(1)); 33 | if (nargin>6) 34 | loadResults = [capName,'_',kern_type,'_inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat']; 35 | else 36 | loadResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat']; 37 | end 38 | disp(['Loading ... ',loadResults]); 39 | try 40 | load(loadResults); 41 | catch 42 | continue; 43 | end 44 | L2_error_T(i_latent,i_part) = L2_error; 45 | NMAE_error_T(i_latent,i_part) = NMAE_error * 1.6/1.944; 46 | NMAE_round_error_T(i_latent,i_part) = NMAE_round_error*1.6/1.944; 47 | end 48 | end 49 | 50 | % plot the results 51 | 52 | mean_L2 = mean(L2_error_T,2); 53 | mean_NMAE = mean(NMAE_error_T,2); 54 | mean_NMAE_round = mean(NMAE_round_error_T,2); 55 | %keyboard; 56 | for j=1:size(mean_L2,2) 57 | std_L2(j) = std(permute(L2_error_T(j,:),[2 1])); 58 | std_NMAE(j) = std(permute(NMAE_error_T(j,:),[2 1])); 59 | std_NMAE_round(j) = std(permute(NMAE_round_error_T(j,:),[2 1])); 60 | end 61 | 62 | %figure(1) 63 | % clf; 64 | %hold on; 65 | %for i=1:length(latentDim_v) 66 | % plot(perc_train_v/100,mean_NMAE_round(:,i),[getColor(i),'x']); 67 | %errorbar(perc_train_v/100,mean_NMAE_round(:,i),std_NMAE_round(:,i),[getColor(i),'x']); 68 | %toLeg{i} = ['Dimension ',num2str(latentDim_v(i))]; 69 | %end 70 | %xlabel('percentage database'); 71 | %ylabel('NMAE round error'); 72 | %legend(toLeg); 73 | 74 | end 75 | 76 | 77 | function [value] = getColor(index) 78 | switch index 79 | case 1 80 | value = 'r-'; 81 | case 2 82 | value = 'b-'; 83 | case 3 84 | value = 'g--' 85 | case 4 86 | value = 'm--'; 87 | case 5 88 | value = 'k-' 89 | end 90 | end 91 | -------------------------------------------------------------------------------- /matlab/PlotResultsMovielensMarlinCompareKernels.m: -------------------------------------------------------------------------------- 1 | function [] = PlotResultsMovielensMarlinCompareKernels(substract_mean, partNo_v, latentDim_v, iters, type, inverted, directories, if_plot, if_print, new_iters) 2 | % 3 | %function [] = PlotResultsMovielensMarlinCompareKernels(substract_mean, partNo_v, latentDim_v, iters, type, inverted, directories, if_plot, if_print, new_iters) 4 | 5 | if strcmp(type,'weak') 6 | 7 | % directories are order to be 8 | directories{1} = 'marlin_movielens'; 9 | directories{2} = 'marlin_movielens_linear'; kern_type{2} = 'linear'; 10 | directories{3} = 'marlin_movielens_metadata'; kern_type{3} = 'additional'; 11 | %directories{4} = 'marlin_movielens_linear_RBF'; kern_type{4} = 'linear_RBF'; 12 | 13 | end 14 | 15 | if strcmp(type,'strong') 16 | directories{1} = 'marlin_movielens_strong'; 17 | directories{2} = 'marlin_movielens_linear_strong'; kern_type{2} = 'linear'; 18 | directories{3} = 'marlin_movielens_strong_metadata'; kern_type{3} = 'additional'; 19 | %directories{4} = 'marlin_movielens_strong_linear_RBF'; kern_type{4} = 'linear_RBF'; 20 | 21 | end 22 | 23 | 24 | for i=1:length(directories) 25 | 26 | cd (['../',directories{i}]); 27 | 28 | latentDim_v_p = latentDim_v; 29 | if strcmp(kern_type{i},'additional') 30 | latentDim_v_p = latentDim_v_p+1; 31 | end 32 | 33 | 34 | if (length(kern_type{i})>0) 35 | if (strcmp(kern_type{i},'linear_RBF')) 36 | [L2_error_T{i},NMAE_error_T{i},NMAE_round_error_T{i}] = PlotResultsMovielensMarlinWeak(substract_mean,partNo_v,latentDim_v_p, iters, type, inverted, kern_type{i},new_iters); 37 | 38 | 39 | else 40 | 41 | 42 | [L2_error_T{i},NMAE_error_T{i},NMAE_round_error_T{i}] = PlotResultsMovielensMarlinWeak(substract_mean,partNo_v,latentDim_v_p, iters, type, inverted, kern_type{i}); 43 | 44 | end 45 | else 46 | 47 | 48 | [L2_error_T{i},NMAE_error_T{i},NMAE_round_error_T{i}] = PlotResultsMovielensMarlinWeak(substract_mean,partNo_v,latentDim_v_p, iters, type, inverted); 49 | end 50 | 51 | 52 | % 53 | 54 | mean_NMAE_round_error(:,i) = mean(NMAE_round_error_T{i}')'; 55 | std_NMAE_round_error(:,i) = std(NMAE_round_error_T{i}')'; 56 | mean_L2_error(:,i) = mean(L2_error_T{i}')'; 57 | std_L2_error(:,i) = std(L2_error_T{i}')'; 58 | 59 | end 60 | 61 | if if_plot 62 | figure(1); 63 | clf; 64 | 65 | font_size = 16; 66 | set(gca,'FontSize',font_size); 67 | set(get(gca,'Title'),'FontSize',font_size); 68 | set(get(gca,'Xlabel'),'FontSize',font_size); 69 | set(get(gca,'Ylabel'),'FontSize',font_size); 70 | legend([{'RBF'},{'linear'},{'metadata'},{'RBF+data'}]); 71 | for i=1:length(directories) 72 | hold on; 73 | plot(latentDim_v,mean_NMAE_round_error(:,i),[getColor(i),'o'],'lineWidth',3,'markersize',12); 74 | end 75 | legend([{'RBF'},{'linear'},{'metadata'},{'RBF+data'}]); 76 | 77 | xlabel('latent dimensionality'); 78 | ylabel('NMAE error'); 79 | 80 | set(gca,'XTick',latentDim_v); 81 | 82 | figure(2); 83 | clf; 84 | 85 | font_size = 16; 86 | set(gca,'FontSize',font_size); 87 | set(get(gca,'Title'),'FontSize',font_size); 88 | set(get(gca,'Xlabel'),'FontSize',font_size); 89 | set(get(gca,'Ylabel'),'FontSize',font_size); 90 | legend([{'RBF'},{'linear'},{'metadata'},{'RBF+data'}]); 91 | for i=1:length(directories) 92 | hold on; 93 | plot(latentDim_v,mean_L2_error(:,i),[getColor(i),'o'],'lineWidth',3,'markersize',12); 94 | end 95 | legend([{'RBF'},{'linear'},{'metadata'},{'RBF+data'}]); 96 | 97 | xlabel('latent dimensionality'); 98 | ylabel('RMSE error'); 99 | 100 | set(gca,'XTick',latentDim_v); 101 | 102 | 103 | end 104 | 105 | nameFile = ['compare_kernels_',type]; 106 | nameFileRMSE = nameFile; 107 | 108 | if if_print 109 | figure(1) 110 | saveas(gcf,[nameFile,'.fig']); 111 | nameFile = [nameFile,'.eps']; 112 | print('-depsc',nameFile); 113 | figure(2) 114 | saveas(gcf,[nameFileRMSE,'_RMSE.fig']); 115 | nameFileRMSE = [nameFileRMSE,'_RMSE.eps']; 116 | print('-depsc',nameFileRMSE); 117 | 118 | end 119 | 120 | keyboard; 121 | 122 | end 123 | 124 | 125 | function [value] = getColor(index) 126 | switch index 127 | case 1 128 | value = 'r-'; 129 | case 2 130 | value = 'b-.'; 131 | case 3 132 | value = 'g--' 133 | case 4 134 | value = 'm--'; 135 | case 5 136 | value = 'k-' 137 | end 138 | end 139 | -------------------------------------------------------------------------------- /matlab/PlotResultsMovielensMarlinWeak.m: -------------------------------------------------------------------------------- 1 | function [L2_error_T,NMAE_error_T,NMAE_round_error_T] = PlotResultsMovielensMarlinWeak(substract_mean,partNo_v,latentDim_v, iters, type, inverted, kernel_type, new_iters) 2 | % 3 | % [L2_error,NMAE_error,NMAE_round_error] = PlotResultsMovielensMarlinWeak(substract_mean,partNo_v,latentDim_v, iters, type, inverted, kernel_type) 4 | % 5 | % substract_mean --> bool if substract the mean 6 | % partNo_v --> vector with the partitions to compute results 7 | % latentDim_v --> vector with the latent dimensionalities to compute results 8 | % iters --> number of iterations 9 | % type --> strong or weak 10 | % inverted --> if it is inverted and we learn latent for subjects, not movies 11 | % kernel type --> '' for RBF 'linear', 'MLP' 12 | 13 | 14 | numDim = length(latentDim_v); 15 | numPartNo = length(partNo_v); 16 | 17 | L2_error_T = -ones(numDim,numPartNo); 18 | NMAE_error_T = -ones(numDim,numPartNo); 19 | NMAE_round_error_T = -ones(numDim,numPartNo); 20 | 21 | for i_latent=1:numDim 22 | q = latentDim_v(i_latent); 23 | for i_part=1:numPartNo 24 | partNo = partNo_v(i_part); 25 | 26 | dataSetName = ['movielens_marlin_',type,'_',num2str(partNo)]; 27 | 28 | 29 | 30 | % Save the results. 31 | capName = dataSetName; 32 | capName(1) = upper(capName(1)); 33 | 34 | 35 | if (nargin>6) 36 | 37 | if (nargin>7) 38 | loadResults = [capName,'_',kernel_type,'_inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'_newiters_',num2str(new_iters),'.mat']; 39 | 40 | else 41 | 42 | loadResults = [capName,'_',kernel_type,'_inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat']; 43 | end 44 | else 45 | 46 | loadResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat']; 47 | end 48 | disp(['Loading ... ',loadResults]); 49 | try 50 | load(loadResults); 51 | catch 52 | continue; 53 | end 54 | L2_error_T(i_latent,i_part) = L2_error; 55 | NMAE_error_T(i_latent,i_part) = NMAE_error; 56 | NMAE_round_error_T(i_latent,i_part) = NMAE_round_error; 57 | end 58 | end 59 | 60 | % plot the results 61 | 62 | mean_L2 = mean(L2_error_T,2); 63 | mean_NMAE = mean(NMAE_error_T,2); 64 | mean_NMAE_round = mean(NMAE_round_error_T,2); 65 | %keyboard; 66 | for j=1:size(mean_L2,1) 67 | std_L2(j) = std(permute(L2_error_T(j,:),[2 1])); 68 | std_NMAE(j) = std(permute(NMAE_error_T(j,:),[2 1])); 69 | std_NMAE_round(j) = std(permute(NMAE_round_error_T(j,:),[2 1])); 70 | end 71 | 72 | %keyboard; 73 | 74 | %figure(1) 75 | % clf; 76 | %hold on; 77 | %for i=1:length(latentDim_v) 78 | % plot(perc_train_v/100,mean_NMAE_round(:,i),[getColor(i),'x']); 79 | %errorbar(perc_train_v/100,mean_NMAE_round(:,i),std_NMAE_round(:,i),[getColor(i),'x']); 80 | %toLeg{i} = ['Dimension ',num2str(latentDim_v(i))]; 81 | %end 82 | %xlabel('percentage database'); 83 | %ylabel('NMAE error'); 84 | %legend(toLeg); 85 | 86 | end 87 | 88 | 89 | function [value] = getColor(index) 90 | switch index 91 | case 1 92 | value = 'r-'; 93 | case 2 94 | value = 'b-'; 95 | case 3 96 | value = 'g--' 97 | case 4 98 | value = 'm--'; 99 | case 5 100 | value = 'k-' 101 | end 102 | end 103 | -------------------------------------------------------------------------------- /matlab/PlotResultsMovielensWeak.m: -------------------------------------------------------------------------------- 1 | function [L2_error_T,NMAE_error_T,NMAE_round_error_T] = PlotResultsMovielensWeak(substract_mean,partNo_v,latentDim_v, iters, type) 2 | % 3 | % [L2_error,NMAE_error,NMAE_round_error] = PlotResultsMovielensWeak(substract_mean,partNo_v,latentDim_v, iters) 4 | % 5 | % substract_mean --> bool if substract the mean 6 | % partNo_v --> vector with the partitions to compute results 7 | % latentDim_v --> vector with the latent dimensionalities to compute results 8 | % iters --> number of iterations 9 | % type --> strong or weak 10 | 11 | numDim = length(latentDim_v); 12 | numPartNo = length(partNo_v); 13 | 14 | L2_error_T = -ones(numDim,numPartNo); 15 | NMAE_error_T = -ones(numDim,numPartNo); 16 | NMAE_round_error_T = -ones(numDim,numPartNo); 17 | 18 | for i_latent=1:numDim 19 | q = latentDim_v(i_latent); 20 | for i_part=1:numPartNo 21 | partNo = partNo_v(i_part); 22 | 23 | dataSetName = ['movielens_',type,'_',num2str(partNo)]; 24 | 25 | 26 | 27 | % Save the results. 28 | capName = dataSetName; 29 | capName(1) = upper(capName(1)); 30 | 31 | loadResults = [capName,'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat']; 32 | disp(['Loading ... ',loadResults]); 33 | try 34 | load(loadResults); 35 | catch 36 | continue; 37 | end 38 | L2_error_T(i_latent,i_part) = L2_error; 39 | NMAE_error_T(i_latent,i_part) = NMAE_error; 40 | NMAE_round_error_T(i_latent,i_part) = NMAE_round_error; 41 | end 42 | end 43 | 44 | % plot the results 45 | 46 | mean_L2 = mean(L2_error_T,2); 47 | mean_NMAE = mean(NMAE_error_T,2); 48 | mean_NMAE_round = mean(NMAE_round_error_T,2); 49 | %keyboard; 50 | for j=1:size(mean_L2,2) 51 | std_L2(j) = std(permute(L2_error_T(j,:),[2 1])); 52 | std_NMAE(j) = std(permute(NMAE_error_T(j,:),[2 1])); 53 | std_NMAE_round(j) = std(permute(NMAE_round_error_T(j,:),[2 1])); 54 | end 55 | 56 | %figure(1) 57 | % clf; 58 | %hold on; 59 | %for i=1:length(latentDim_v) 60 | % plot(perc_train_v/100,mean_NMAE_round(:,i),[getColor(i),'x']); 61 | %errorbar(perc_train_v/100,mean_NMAE_round(:,i),std_NMAE_round(:,i),[getColor(i),'x']); 62 | %toLeg{i} = ['Dimension ',num2str(latentDim_v(i))]; 63 | %end 64 | %xlabel('percentage database'); 65 | %ylabel('NMAE round error'); 66 | %legend(toLeg); 67 | 68 | end 69 | 70 | 71 | function [value] = getColor(index) 72 | switch index 73 | case 1 74 | value = 'r-'; 75 | case 2 76 | value = 'b-'; 77 | case 3 78 | value = 'g--' 79 | case 4 80 | value = 'm--'; 81 | case 5 82 | value = 'k-' 83 | end 84 | end 85 | -------------------------------------------------------------------------------- /matlab/RecomputeMovieLens10MWeakPartialScript1.m: -------------------------------------------------------------------------------- 1 | function [] = RecomputeMovieLens10MWeakPartialScript1(substract_mean, partNo_v, latentDim_v,iters, inverted) 2 | % RECOMPUTEMOVIELENS10MWEAKPARTIALSCRIPT1 Recompute the test error for the 10M Movielens database 3 | % where the weak movielens experiment 4 | % 5 | % RecomputeMovieLens10MWeakPartialScript1(substract_mean, partNo_v, 6 | % latentDim_v,iters, inverted) 7 | % 8 | % substract_mean --> bool if substract the mean 9 | % partNo_v --> vector with the partitions to compute results 10 | % latentDim_v --> vector with the latent dimensionalities to compute results 11 | % iters --> number of iterations 12 | % if inverted = true, then learn users as examples and not movies 13 | 14 | randn('seed', 1e5); 15 | rand('seed', 1e5); 16 | 17 | experimentNo = 3; 18 | 19 | 20 | partLetter_v = 'ab'; 21 | 22 | 23 | for i_latent=1:length(latentDim_v) 24 | q = latentDim_v(i_latent); 25 | for i_part=1:length(partNo_v) 26 | partNo = partNo_v(i_part); 27 | 28 | partLetter = partLetter_v(partNo_v(i_part)); 29 | 30 | dataSetName = ['movielens_10M_',partLetter]; 31 | 32 | disp(['Reading ... ',dataSetName]); 33 | 34 | [Y, void, Ytest] = collabLoadData(dataSetName); 35 | 36 | if (inverted) 37 | Y = Y'; 38 | Ytest = Y'; 39 | end 40 | 41 | numFilms = size(Y,1); 42 | numUsers = size(Y,2); 43 | meanFilms = zeros(numFilms,1); 44 | stdFilms = ones(numFilms,1); 45 | 46 | 47 | % Save the results. 48 | capName = dataSetName; 49 | capName(1) = upper(capName(1)); 50 | 51 | 52 | saveResults = ['dem',capName,num2str(experimentNo),'_Iters',num2str(iters),'.mat']; 53 | 54 | disp(['Loading ... ',saveResults]); 55 | load(saveResults); 56 | 57 | % compute the test error 58 | disp('Computing test error'); 59 | 60 | 61 | [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorWeakCell(model,Y,Ytest) 62 | 63 | 64 | 65 | 66 | % Save the results. 67 | capName = dataSetName; 68 | capName(1) = upper(capName(1)); 69 | 70 | saveResults = ['dem',capName,num2str(experimentNo),'_Iters',num2str(iters),'_error.mat']; 71 | 72 | %saveResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat']; 73 | disp(['Saving ... ',saveResults]); 74 | save(saveResults, 'model', 'L2_error','NMAE_error','NMAE_round_error'); 75 | end 76 | end 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /matlab/RecomputeMovieLens10MWeakScript1.m: -------------------------------------------------------------------------------- 1 | function [] = RecomputeMovieLens10MWeakScript1(substract_mean, partNo_v, latentDim_v,iters, inverted) 2 | % RECOMPUTEMOVIELENS10MWEAKSCRIPT1 Recompute the test error for the 10M Movielens database 3 | % where the weak movielens experiment 4 | % 5 | % RecomputeMovieLens10MWeakScript1(substract_mean, partNo_v, 6 | % latentDim_v,iters, inverted) 7 | % 8 | % substract_mean --> bool if substract the mean 9 | % partNo_v --> vector with the partitions to compute results 10 | % latentDim_v --> vector with the latent dimensionalities to compute results 11 | % iters --> number of iterations 12 | % if inverted = true, then learn users as examples and not movies 13 | 14 | randn('seed', 1e5); 15 | rand('seed', 1e5); 16 | 17 | experimentNo = 3; 18 | 19 | 20 | for i_latent=1:length(latentDim_v) 21 | q = latentDim_v(i_latent); 22 | for i_part=1:length(partNo_v) 23 | partNo = partNo_v(i_part); 24 | 25 | dataSetName = ['movielens_10M_',num2str(partNo)]; 26 | 27 | disp(['Reading ... ',dataSetName]); 28 | 29 | [Y, void, Ytest] = collabLoadData(dataSetName); 30 | 31 | if (inverted) 32 | Y = Y'; 33 | Ytest = Y'; 34 | end 35 | 36 | numFilms = size(Y,1); 37 | numUsers = size(Y,2); 38 | meanFilms = zeros(numFilms,1); 39 | stdFilms = ones(numFilms,1); 40 | 41 | 42 | % Save the results. 43 | capName = dataSetName; 44 | capName(1) = upper(capName(1)); 45 | 46 | loadResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat']; 47 | disp(['Loading ... ',saveResults]); 48 | load(saveResults);, 'model', 'L2_error','options','NMAE_error','NMAE_round_error'); 49 | 50 | 51 | % compute the test error 52 | disp('Computing test error'); 53 | 54 | 55 | [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorWeakCell(model,Y,Ytest) 56 | 57 | 58 | % Save the results. 59 | capName = dataSetName; 60 | capName(1) = upper(capName(1)); 61 | 62 | saveResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat']; 63 | disp(['Saving ... ',saveResults]); 64 | save(saveResults, 'model', 'L2_error','options','NMAE_error','NMAE_round_error'); 65 | end 66 | end 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /matlab/RecomputeResults.m: -------------------------------------------------------------------------------- 1 | function[] = RecomputeResults(perc_train, substract_mean, partNo_v, latentDim_v) 2 | % RECOMPUTERESULTS Try collaborative filtering on the large movielens data. 3 | % 4 | % RecomputeResults(perc_train, substract_mean, partNo_v, latentDim_v) 5 | % 6 | % perc_train -> percentage of training 7 | % substract_mean --> bool if substract the mean 8 | % partNo_v --> vector with the partitions to compute results 9 | % latentDim_v --> vector with the latent dimensionalities to compute results 10 | 11 | randn('seed', 1e5); 12 | rand('seed', 1e5); 13 | 14 | experimentNo = 3; 15 | 16 | 17 | %partNo_v = [1:5]; 18 | %latentDim_v = [5, 2:4, 6]; 19 | 20 | 21 | for i_latent=1:length(latentDim_v) 22 | q = latentDim_v(i_latent); 23 | for i_part=1:length(partNo_v) 24 | partNo = partNo_v(i_part); 25 | 26 | dataSetName = ['movielens_',num2str(perc_train),'_',num2str(partNo)]; 27 | 28 | disp(['Reading ... ',dataSetName]); 29 | 30 | [Y, void, Ytest] = collabLoadData(dataSetName); 31 | 32 | if (substract_mean) 33 | % create the total vector 34 | s = nonzeros(Ytest); 35 | ratings = [nonzeros(Y); nonzeros(Ytest)]; 36 | meanY = mean(ratings); 37 | stdY = std(ratings); 38 | %keyboard; 39 | index = find(Y); 40 | Y(index) = Y(index) - meanY; 41 | Y(index) = Y(index) / stdY; 42 | %keyboard; 43 | end; 44 | 45 | % load the model 46 | % Save the results. 47 | capName = dataSetName; 48 | capName(1) = upper(capName(1)); 49 | 50 | loadResults = [capName,'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'.mat']; 51 | disp(['Loading ... ',loadResults]); 52 | load(loadResults); 53 | %, 'model', 'L2_error','options','NMAE_error','NMAE_round_error'); 54 | L2_error_before = L2_error; 55 | NMAE_error_before = NMAE_error; 56 | NMAE_round_error_before = NMAE_round_error; 57 | 58 | 59 | val_L2 = 0; 60 | tot_L2 = 0; 61 | val_NMAE = 0; 62 | tot_NMAE = 0; 63 | val_round_NMAE = 0; 64 | tot_round_NMAE = 0; 65 | val_round_NMAE_2 = 0; 66 | tot_round_NMAE_2 = 0; 67 | 68 | disp('Computing results'); 69 | ErrorValues = []; 70 | ErrorValues_round = []; 71 | for i = 1:size(Y, 2) 72 | ind = find(Ytest(:, i)); 73 | elim = find(ind>size(model.X, 1)); 74 | tind = ind; 75 | tind(elim) = []; 76 | [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :)); 77 | % normalize the values 78 | 79 | 80 | mu = mu*model.sd(1); 81 | mu = mu+model.mu(1); 82 | a = Ytest(tind, i) - mu; 83 | a = [a; Ytest(elim, i)]; 84 | a_round = Ytest(tind, i) - round(mu); 85 | a_round = [a_round; Ytest(elim, i)]; 86 | val_L2 = val_L2 + a'*a; 87 | tot_L2 = tot_L2 + length(a); 88 | val_NMAE = val_NMAE + sum(abs(a)); 89 | tot_NMAE = tot_NMAE + length(a); 90 | val_round_NMAE = val_round_NMAE + sum(abs(round(a))); 91 | tot_round_NMAE = tot_round_NMAE + length(a); 92 | val_round_NMAE_2 = val_round_NMAE_2 + sum(abs(a_round)); 93 | tot_round_NMAE_2 = tot_round_NMAE_2 + length(a_round); 94 | 95 | % ??? this doesn't work yet 96 | %keyboard; 97 | ErrorValues = [ErrorValues; full(abs(a))]; 98 | ErrorValues_round = [ErrorValues_round; full(abs(a))]; 99 | 100 | end 101 | L2_error = sqrt(val_L2/tot_L2); 102 | NMAE_error = (val_NMAE/tot_NMAE)/1.6; 103 | NMAE_round_error = (val_round_NMAE/tot_round_NMAE)/1.6; 104 | NMAE_round_error_2 = (val_round_NMAE_2/tot_round_NMAE_2)/1.6; 105 | 106 | 107 | [L2_error L2_error_before] 108 | [NMAE_error NMAE_error_before] 109 | [NMAE_round_error NMAE_round_error_before] 110 | NMAE_round_error_2 111 | mean(ErrorValues) 112 | std(ErrorValues) 113 | %keyboard; 114 | 115 | % Save the results. 116 | % capName = dataSetName; 117 | % capName(1) = upper(capName(1)); 118 | 119 | % saveResults = [capName,'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'.mat']; 120 | % disp(['Saving ... ',saveResults]); 121 | %save(saveResults, 'model', 'L2_error','options','NMAE_error','NMAE_round_error'); 122 | end 123 | end 124 | 125 | -------------------------------------------------------------------------------- /matlab/changeNameMovielens.m: -------------------------------------------------------------------------------- 1 | function [] = changeNameMovielens(mean_substraction, perc_train_v, latentDim_v, partNo_v, iters) 2 | % 3 | % changeNameMovielens(mean_substraction, perc_train_v, latentDim_v, partNo_v, iters) 4 | 5 | numDim = length(latentDim_v); 6 | numPerc = length(perc_train_v); 7 | numPart = length(partNo_v); 8 | 9 | for i_perc=1:numPerc 10 | perc_train = perc_train_v(i_perc); 11 | for i_latent=1:numDim 12 | latentDim = latentDim_v(i_latent); 13 | for i_part = 1:numPart 14 | partNo = partNo_v(i_part); 15 | 16 | toLoad = ['Movielens_',num2str(perc_train),'_',num2str(partNo),'_norm_',num2str(mean_substraction),'_',num2str(latentDim),'_',num2str(partNo),'.mat']; 17 | 18 | toSave = ['Movielens_',num2str(perc_train),'_',num2str(partNo),'_norm_',num2str(mean_substraction),'_',num2str(latentDim),'_',num2str(partNo),'_iters_',num2str(iters),'.mat']; 19 | 20 | 21 | disp(['Loading ... ',toLoad]); 22 | 23 | try 24 | load(toLoad) 25 | catch 26 | continue; 27 | end 28 | 29 | disp(['Saving ... ',toSave]); 30 | save(toSave,'options','model','L2_error','NMAE_error','NMAE_round_error'); 31 | 32 | end 33 | end 34 | end 35 | -------------------------------------------------------------------------------- /matlab/collabComponentPosteriorMeanVar.m: -------------------------------------------------------------------------------- 1 | function [mu, varsigma, secondMoment] = collabComponentPosteriorMeanVar(model, X) 2 | % COLLABCOMPONENTPOSTERIORMEANVAR Compute the posterior mean and variance for each component. 3 | % FORMAT 4 | % DESC computes the posterior mean and variance asssociated with each 5 | % component of the mixture model. 6 | % ARG model : the model for which means and variances are computed. 7 | % ARG x : optional input argment where means and variances are to be 8 | % computed. If not provided model.X is used. 9 | % RETURN mu : the mean associated with each of the components as a cell 10 | % array. 11 | % RETURN varsigma : the variance associated with each of the components 12 | % as a cell array. 13 | % RETURN secondMoment : the second moment associated with each of the 14 | % components as a cell array. 15 | % 16 | % SEEALSO : collabCreate 17 | % 18 | % COPYRIGHT : Neil D. Lawrence, 2009 19 | 20 | % COLLAB 21 | 22 | 23 | % Work out component means and variances. 24 | ind = find(model.m); 25 | if nargin > 1 26 | Kx = kernCompute(model.kern, model.X(ind, :), X); 27 | diagK = kernDiagCompute(model.kern, X); 28 | else 29 | Kx = model.K; 30 | diagK = diag(model.K); 31 | end 32 | ind = find(model.m); 33 | for m = 1:model.M 34 | Kinvk = model.invK{m}*Kx; 35 | mu{m} = Kinvk'*model.m(ind); 36 | varsigma{m} = diagK - sum(Kx.*Kinvk, 1)'; 37 | if nargout > 2 38 | secondMoment{m} = varsigma{m} + mu{m}.*mu{m}; 39 | end 40 | end 41 | end 42 | -------------------------------------------------------------------------------- /matlab/collabComputeM.m: -------------------------------------------------------------------------------- 1 | function m = collabComputeM(model) 2 | 3 | % COLLABCOMPUTEM Computes target values inside model. 4 | % FORMAT 5 | % DESC takes in a model and an output user and computes the target values 6 | % for that user. 7 | % ARG model : the model for which the values of m are to be computed. The 8 | % field currentOut should be set to which user is to be taken from the data. 9 | % 10 | % SEEALSO : collabCreate 11 | % 12 | % COPYRIGHT : Neil D. Lawrence, 2009 13 | 14 | % COLLAB 15 | 16 | if iscell(model.y) 17 | m = spalloc(model.N, 1, length(model.y{model.currentOut, 1})); 18 | m(model.y{model.currentOut, 1}, :) = double(y{model.currentOut, 2}); 19 | else 20 | m = model.y(:, model.currentOut); 21 | end 22 | ind = find(m); 23 | m(ind) = m(ind) - model.mu(ind); 24 | m(ind) = m(ind)./model.sd(ind); 25 | 26 | end -------------------------------------------------------------------------------- /matlab/collabComputeS.m: -------------------------------------------------------------------------------- 1 | function [s, numer] = collabComputeS(model) 2 | 3 | % COLLABCOMPUTES Compute the responsibilities for the mixture model. 4 | % FORMAT 5 | % DESC computes the responsibilities for the mixture model. 6 | % ARG model : the model for which the responsibilities are required. 7 | % RETURN s : the responsibilities associated with the components and the 8 | % data. 9 | % RETURN numer : the numerator when the expectations are computed. 10 | % 11 | % SEEALSO : collabCreate, collabEstep 12 | % 13 | % COPYRIGHT : Neil D. Lawrence, 2009 14 | 15 | % COLLAB 16 | 17 | % update the expected value of the components. 18 | ind = find(model.m); 19 | lognumer = zeros(size(ind, 1), model.M); 20 | for m = 1:model.M 21 | yhat = (model.m(ind) - model.expectation.f{m}); 22 | y2 = yhat.*yhat + model.expectation.varf{m}; 23 | if model.heteroNoise 24 | % Log of numerator of s. 25 | lognumer(:, m) = log(model.pi(m)) + (-.5*y2./model.diagvar(ind)); 26 | else 27 | % Log of numerator of s. 28 | lognumer(:, m) = log(model.pi(m)) + (-.5*y2/model.sigma2); 29 | end 30 | % subtract maximum value from log numerator to keep numerically stable. 31 | numer = exp(lognumer - repmat(max(lognumer, [], 2), 1, model.M)); 32 | numer = numer + 1e-6; 33 | s = spalloc(model.N, model.M, length(ind)*model.M); 34 | % normalize to obtain the expectations. 35 | s(ind, :) = (numer)./repmat(sum(numer, 2), 1, model.M); 36 | end 37 | end 38 | -------------------------------------------------------------------------------- /matlab/collabCreate.m: -------------------------------------------------------------------------------- 1 | function model = collabCreate(q, d, y, options); 2 | 3 | % COLLABCREATE Create a COLLAB model with inducing varibles/pseudo-inputs. 4 | % FORMAT 5 | % DESC creates a collaborative filter structure with a latent space of q. 6 | % ARG q : input data dimension. 7 | % ARG d : the number of processes (i.e. output data dimension). 8 | % ARG Y : the data. 9 | % ARG options : options structure as defined by collabOptions.m. 10 | % RETURN model : model structure containing the GP collaborative filter. 11 | % 12 | % SEEALSO : collabOptions, modelCreate 13 | % 14 | % COPYRIGHT : Neil D. Lawrence, 2008 15 | 16 | % COLLAB 17 | 18 | 19 | model.type = 'collab'; 20 | 21 | model.q = q; 22 | model.d = d; 23 | model.N = size(y, 1); 24 | model.y = y; 25 | model.mu = zeros(model.N, 1); 26 | model.sd = ones(model.N, 1); 27 | model.currentOut = 1; 28 | model.m = collabComputeM(model); 29 | model.numParams = model.N*model.q; 30 | model.kern = kernCreate(q, options.kern); 31 | model.numParams = model.numParams + model.kern.nParams; 32 | model.X = randn(model.N, q)*0.001; 33 | model.change = zeros(size(model.X)); 34 | model.changeParam = zeros(1, model.kern.nParams); 35 | % This forces kernel computation. 36 | %model = collabExpandParam(model, initParams); 37 | model.heteroNoise = options.heteroNoise; % Whether or not to have diagonal 38 | % noise variance. 39 | model.noiseTransform = optimiDefaultConstraint('positive'); 40 | model.M = options.numComps; 41 | if model.M > 1 42 | model.pi = repmat(1/model.M, 1, model.M); 43 | model.sigma2 = exp(-2); 44 | model.lnsigma2Change = 0; 45 | ind = find(model.m); 46 | model = collabInitS(model); 47 | model.numParams = model.numParams + 1; 48 | end 49 | if model.heteroNoise 50 | model.diagvar = repmat(exp(-2), model.N, 1); 51 | model.lndiagChange = zeros(model.N, 1); 52 | model.numParams = model.numParams + model.N; 53 | end 54 | initParams = collabExtractParam(model); 55 | model = collabExpandParam(model, initParams); 56 | end 57 | -------------------------------------------------------------------------------- /matlab/collabCreateTensor.m: -------------------------------------------------------------------------------- 1 | function model = collabCreateTensor(q, d, Y, options); 2 | 3 | % COLLABCREATE Create a COLLAB model with inducing varibles/pseudo-inputs. 4 | % FORMAT 5 | % DESC creates a collaborative filter structure with a latent space of q. 6 | % ARG q : input data dimension. 7 | % ARG d : the number of processes (i.e. output data dimension). 8 | % ARG options : options structure as defined by collabOptions.m. 9 | % RETURN model : model structure containing the GP collaborative filter. 10 | % 11 | % SEEALSO : collabOptions, modelCreate 12 | % 13 | % COPYRIGHT : Raquel Urtasun, 2008 14 | 15 | % COLLAB 16 | 17 | 18 | model.type = 'collab'; 19 | 20 | model.q = q; 21 | model.d = d; 22 | if size(Y, 1) == 1 && size(Y, 2) == 1 23 | model.N = Y; 24 | else 25 | model.N = size(Y, 1); 26 | end 27 | %keyboard; 28 | model.kern = kernCreate(q, options.kern); 29 | %initParams = collabExtractParam(model); 30 | model.X = randn(model.N, q)*0.001; 31 | % add the labels 32 | model.X(:,end) = [[1:model.N]']; 33 | model.change = zeros(size(model.X)); 34 | %keyboard; 35 | model.changeParam = zeros(1, model.kern.nParams); 36 | model.mu = zeros(model.N, 1); 37 | model.sd = ones(model.N, 1); 38 | % This forces kernel computation. 39 | %model = collabExpandParam(model, initParams); 40 | -------------------------------------------------------------------------------- /matlab/collabDisplay.m: -------------------------------------------------------------------------------- 1 | function collabDisplay(model, spaceNum) 2 | 3 | % COLLABDISPLAY Displays the provided collaborative filter model. 4 | % FORMAT 5 | % DESC displays the collaborative model as provided. 6 | % ARG model : the model to display. 7 | % ARG spaceNum : number of spaces to indent display. 8 | % 9 | % SEEALSO : modelDisplay 10 | % 11 | % COPYRIGHT : Neil D. Lawrence, 2008 12 | 13 | % COLLAB 14 | 15 | if nargin > 1 16 | spacing = repmat(32, 1, spaceNum); 17 | else 18 | spaceNum = 0; 19 | spacing = []; 20 | end 21 | spacing = char(spacing); 22 | fprintf(spacing); 23 | fprintf('Collaborative filter GPLVM:\n') 24 | fprintf(spacing); 25 | fprintf(' Number of data points: %d\n', model.N); 26 | fprintf(spacing); 27 | fprintf(' Input dimension: %d\n', model.q); 28 | fprintf(spacing); 29 | fprintf(' Number of processes: %d\n', model.d); 30 | if model.heteroNoise 31 | fprintf(spacing); 32 | fprintf(' Heteroschodastic noise model, mean %2.4f, sd %2.4f\n', mean(model.diagvar), sqrt(var(model.diagvar))) 33 | end 34 | if model.M > 1 35 | fprintf(spacing); 36 | fprintf(' Mixture model with %d components.\n', model.M) 37 | fprintf(spacing); 38 | fprintf(' Output variance %2.4f.\n', model.sigma2) 39 | end 40 | fprintf(spacing); 41 | fprintf(' Kernel:\n') 42 | 43 | kernDisplay(model.kern, spaceNum+2) 44 | end -------------------------------------------------------------------------------- /matlab/collabEstep.m: -------------------------------------------------------------------------------- 1 | function model = collabEstep(model, maxIters) 2 | 3 | % COLLABESTEP Do E step updates and compute resulting Kinv for each component. 4 | % FORMAT 5 | % DESC computes the means and variances of each component of the mixture 6 | % model. 7 | % ARG model : the model for which the means and variances are to be 8 | % computed. 9 | % ARG K : the computed covariance matrix. 10 | % ARG y : the target values. 11 | % RETURN mu : the mean for each component (as a cell array). 12 | % RETURN varsigma : the variance for each component (as a cell array). 13 | % 14 | % SEEALSO : collabLogLikeGradient 15 | % 16 | % COPYRIGHT : Neil D. Lawrence, 2009 17 | 18 | % COLLAB 19 | 20 | if nargin < 2 21 | maxIters = 100; 22 | end 23 | ind = find(model.m); 24 | 25 | [model.expectation.f, model.expectation.varf] = collabComponentPosteriorMeanVar(model); 26 | for i = 1:maxIters 27 | model.expectation.s{model.currentOut} = collabComputeS(model); 28 | model = collabUpdateKernels(model); 29 | [model.expectation.f, model.expectation.varf] = collabComponentPosteriorMeanVar(model); 30 | end 31 | 32 | end 33 | -------------------------------------------------------------------------------- /matlab/collabExpandParam.m: -------------------------------------------------------------------------------- 1 | function model = collabExpandParam(model, params) 2 | 3 | % COLLABEXPANDPARAM Expand a parameter vector into a COLLAB model. 4 | % FORMAT 5 | % DESC takes the given vector of parameters and places them in the 6 | % model structure, it then updates any stored representations that 7 | % are dependent on those parameters, for example kernel matrices 8 | % etc.. 9 | % ARG model : the model structure for which parameters are to be 10 | % updated. 11 | % ARG params : a vector of parameters for placing in the model 12 | % structure. 13 | % RETURN model : a returned model structure containing the updated 14 | % parameters. 15 | % 16 | % SEEALSO : collabCreate, collabExtractParam, modelExtractParam 17 | % 18 | % COPYRIGHT : Neil D. Lawrence, 2009 19 | 20 | % COLLAB 21 | 22 | 23 | startVal = 1; 24 | endVal = model.N*model.q; 25 | model.X = reshape(params(startVal:endVal), model.N, model.q); 26 | startVal = endVal +1; 27 | endVal = endVal + model.kern.nParams; 28 | model.kern = kernExpandParam(model.kern, params(startVal:endVal)); 29 | 30 | fhandle = str2func([model.noiseTransform 'Transform']); 31 | if isfield(model, 'heteroNoise') && model.heteroNoise 32 | startVal = endVal + 1; 33 | endVal = endVal + model.N; 34 | model.diagvar = fhandle(params(startVal:endVal), 'atox')'; 35 | elseif model.M>1 36 | startVal = endVal + 1; 37 | endVal = endVal + 1; 38 | model.sigma2 = fhandle(params(startVal:endVal), 'atox'); 39 | end 40 | model = collabUpdateKernels(model); 41 | end -------------------------------------------------------------------------------- /matlab/collabExtractParam.m: -------------------------------------------------------------------------------- 1 | function [params, names] = collabExtractParam(model) 2 | 3 | % COLLABEXTRACTPARAM Extract a parameter vector from a COLLAB model. 4 | % FORMAT 5 | % DESC extracts the model parameters from a structure containing 6 | % the information about a Gaussian process. 7 | % ARG model : the model structure containing the information about 8 | % the model. 9 | % RETURN params : a vector of parameters from the model. 10 | % 11 | % DESC does the same as above, but also returns parameter names. 12 | % ARG model : the model structure containing the information about 13 | % the model. 14 | % RETURN params : a vector of parameters from the model. 15 | % RETURN names : cell array of parameter names. 16 | % 17 | % SEEALSO : collabCreate, collabExpandParam, modelExtractParam 18 | % 19 | % COPYRIGHT : Neil D. Lawrence, 2009 20 | 21 | % COLLAB 22 | 23 | if nargout > 1 24 | returnNames = true; 25 | else 26 | returnNames = false; 27 | end 28 | 29 | if returnNames 30 | [params, names] = kernExtractParam(model.kern); 31 | for i = 1:length(names) 32 | names{i} = ['Kernel, ' names{i}]; 33 | end 34 | else 35 | params = kernExtractParam(model.kern); 36 | end 37 | params = [model.X(:)' params]; 38 | if returnNames 39 | for i = 1:size(model.X, 1) 40 | for j = 1:size(model.X, 2) 41 | Xnames{i, j} = ['X(' num2str(i) ', ' num2str(j) ')']; 42 | end 43 | end 44 | names = {Xnames{:}, names{:}}; 45 | end 46 | fhandle = str2func([model.noiseTransform 'Transform']); 47 | if model.heteroNoise 48 | params = [params fhandle(model.diagvar, 'xtoa')']; 49 | if returnNames 50 | for i = 1:model.N 51 | sigNames{i} = ['Sigma2(' num2str(i) ')']; 52 | end 53 | names = {names{:}, sigNames{:}}; 54 | end 55 | elseif model.M > 1 56 | params = [params fhandle(model.sigma2, 'xtoa')]; 57 | if returnNames 58 | names = {names{:}, 'Sigma2'}; 59 | end 60 | end -------------------------------------------------------------------------------- /matlab/collabInitS.m: -------------------------------------------------------------------------------- 1 | function model = collabInitS(model) 2 | 3 | % COLLABINITS Initialize the expectations of S for the collaborative filter. 4 | % FORMAT 5 | % DESC initilizes the expectations of S for the collaborative filter 6 | % model. 7 | % ARG model : the model structure for which expectations are being 8 | % initialized. 9 | % RETURN model : the model structure with the expectations initalized. 10 | % 11 | % SEEALSO : collabExpandParam, collabCreate 12 | % 13 | % COPYRIGHT : Neil D. Lawrence, 2009 14 | 15 | % COLLAB 16 | 17 | 18 | ind = find(model.m); 19 | model.expectation.s{model.currentOut} = spalloc(model.N, model.M, length(ind)); 20 | lognumer = repmat(log(model.pi), length(ind), 1) ... 21 | + randn(length(ind), model.M)*0.001; 22 | numer = exp(lognumer - repmat(max(lognumer, [], 2), 1, model.M)); 23 | 24 | model.expectation.s{model.currentOut}(ind, :) = numer./repmat(sum(numer, 2), 1, model.M); 25 | end -------------------------------------------------------------------------------- /matlab/collabLogLikeGradients.m: -------------------------------------------------------------------------------- 1 | function [g, g_param, g_noise] = collabLogLikeGradients(model) 2 | 3 | % COLLABLOGLIKEGRADIENTS Gradient of the latent points. 4 | % FORMAT 5 | % DESC computes the gradient of the latent points given ratings as a 6 | % sparse matrix. 7 | % ARG model : the model of the data. 8 | % ARG y : the ratings for an individual. 9 | % 10 | % SEEALSO : collabLogLikelihood 11 | % 12 | % COPYRIGHT : Neil D. Lawrence, 2008, 2009 13 | 14 | % COLLAB 15 | 16 | g_param = zeros(1, model.kern.nParams); 17 | fullInd = find(model.m); 18 | 19 | g = spalloc(size(model.X, 1), size(model.X, 2), length(fullInd)*model.q); 20 | if model.heteroNoise 21 | g_noise = spalloc(size(model.X, 1), 1, length(fullInd)); 22 | elseif model.M > 1 23 | g_noise = 0; 24 | else 25 | g_noise = []; 26 | end 27 | g_param = zeros(1, model.kern.nParams); 28 | % For large inputs, split them into blocks of maximum 1000. 29 | maxBlock = ceil(length(fullInd)/ceil(length(fullInd)/1000)); 30 | span = 0:maxBlock:length(fullInd); 31 | if rem(length(fullInd), maxBlock) 32 | span = [span length(fullInd)]; 33 | end 34 | 35 | for block = 2:length(span) 36 | ind = fullInd(span(block-1)+1:span(block)); 37 | m = model.m(ind, 1); 38 | 39 | X = model.X(ind, :); 40 | N = length(ind); 41 | if ~isfield(model, 'noise') || isempty(model.noise) 42 | if model.M > 1 43 | n = length(ind); 44 | % mixture model. 45 | gK = zeros(n); 46 | for i = 1:model.M 47 | invKy = model.invK{i}*m; 48 | gKm{i} = 0.5*(invKy*invKy'- model.invK{i}); 49 | gK = gK + gKm{i}; 50 | end 51 | else 52 | invKy = model.invK*m; 53 | gK = -model.invK + invKy*invKy'; 54 | gK = gK * 0.5; 55 | end 56 | %%% Prepare to Compute Gradients with respect to X %%% 57 | gKX = kernGradX(model.kern, X, X); 58 | gKX = gKX*2; 59 | dgKX = kernDiagGradX(model.kern, X); 60 | for i = 1:length(ind) 61 | gKX(i, :, i) = dgKX(i, :); 62 | end 63 | gX = zeros(N, model.q); 64 | 65 | counter = 0; 66 | for i = 1:N 67 | counter = counter + 1; 68 | for j = 1:model.q 69 | gX(i, j) = gX(i, j) + gKX(:, j, i)'*gK(:, counter); 70 | end 71 | end 72 | g(ind, :) = gX; 73 | g_param = g_param + kernGradient(model.kern, X, gK); 74 | 75 | fhandle = str2func([model.noiseTransform 'Transform']); 76 | if model.heteroNoise 77 | if model.M>1 78 | % Mixture model. 79 | for i = 1:model.M 80 | fact = fhandle(model.diagvar(ind), 'gradfact'); 81 | g_noise(ind, :) = g_noise(ind, :) ... 82 | + diag(gKm{i})./model.expectation.s{model.currentOut}(ind, i).*fact; 83 | end 84 | else 85 | g_noise(ind, :) = diag(gK); 86 | fact = fhandle(model.diagvar(ind), 'gradfact'); 87 | g_noise(ind, :) = g_noise(ind, :).*fact; 88 | end 89 | elseif model.M > 1 90 | % Mixture model. 91 | for i = 1:model.M 92 | fact = fhandle(model.sigma2, 'gradfact'); 93 | g_noise = g_noise ... 94 | + sum(diag(gKm{i})./model.expectation.s{model.currentOut}(ind, i))*fact; 95 | end 96 | end 97 | 98 | else 99 | muse = muse-1; % make muse start from zero. 100 | % Create an IVM model and update site parameters. 101 | options = ivmOptions; 102 | options.kern = model.kern; 103 | options.noise = model.noise; 104 | options.selectionCriterion = model.selectionCriterion; 105 | options.numActive = min(model.numActive, N); 106 | imodel = ivmCreate(model.q, 1, X, muse, options); 107 | imodel = ivmOptimiseIVM(imodel, options.display); 108 | gX = gplvmApproxLogLikeActiveSetGrad(imodel); 109 | gX = reshape(gX, length(imodel.I), size(imodel.X, 2)); 110 | g(ind(imodel.I), :) = gX; 111 | g_param = g_param + ivmApproxLogLikeKernGrad(imodel); 112 | end 113 | end 114 | if nargout < 2 115 | g = [g(:)' g_param g_noise']; 116 | end 117 | end 118 | -------------------------------------------------------------------------------- /matlab/collabLogLikelihood.m: -------------------------------------------------------------------------------- 1 | function ll = collabLogLikelihood(model) 2 | 3 | % COLLABLOGLIKELIHOOD Compute the log likelihood of a COLLAB. 4 | % FORMAT 5 | % DESC computes the log likelihood of a data set given a COLLAB model. 6 | % ARG model : the COLLAB model for which log likelihood is to be 7 | % computed. 8 | % RETURN ll : the log likelihood of the data in the COLLAB model. 9 | % 10 | % SEEALSO : collabCreate, collabLogLikeGradients, modelLogLikelihood 11 | % 12 | % COPYRIGHT : Neil D. Lawrence, 2009 13 | 14 | % COLLAB 15 | 16 | ll = 0; 17 | 18 | if iscell(model.y) 19 | total = size(model.y, 1); 20 | else 21 | total = size(model.y, 2); 22 | end 23 | 24 | for i = 1:total 25 | model.currentOut = i; 26 | model.m = collabComputeM(model, i); 27 | if model.M > 1 28 | model = collabInitS(model); 29 | end 30 | model = collabUpdateKernels(model); 31 | if model.M > 1 32 | model = collabEstep(model); 33 | end 34 | %/~ 35 | % This code was for splitting large data into blocks. 36 | % maxBlock = ceil(length(fullInd)/ceil(length(fullInd)/1000)); 37 | % span = 0:maxBlock:length(fullInd); 38 | % if rem(length(fullInd), maxBlock) 39 | % span = [span length(fullInd)]; 40 | % end 41 | 42 | % for block = 2:length(span) 43 | % ind = fullInd(span(block-1)+1:span(block)); 44 | % if iscell(y) 45 | % yuse = double(y{1, 2}(span(block-1)+1:span(block))); 46 | % else 47 | % yuse = y(ind, 1); 48 | % end 49 | 50 | % N = length(ind); 51 | %~/ 52 | if ~isfield(model, 'noise') || isempty(model.noise) 53 | 54 | ind = find(model.m); 55 | muse = model.m(ind); 56 | if model.M> 1 57 | for i = 1:model.M 58 | ll = ll - 0.5*model.logDetK(i) - 0.5*muse'*model.invK{i}*muse; 59 | end 60 | else 61 | ll = ll - 0.5*model.logDetK - 0.5*muse'*model.invK*muse; 62 | end 63 | end 64 | end 65 | end 66 | -------------------------------------------------------------------------------- /matlab/collabOptimiseOptions.m: -------------------------------------------------------------------------------- 1 | function options = collabOptimiseOptions 2 | 3 | % COLLABOPTIMISEOPTIONS returns default options for collaborative filter optimisation. 4 | % FORMAT 5 | % DESC returns default options for the optimization of the collaborative 6 | % filter. 7 | % RETURN options : the default options structure. 8 | % 9 | % SEEALSO : collabOptimise, collabCreate 10 | % 11 | % COPYRIGHT : Neil D. Lawrence, 2008 12 | 13 | % COLLAB 14 | 15 | options.momentum = 0.5; 16 | options.learnRate = 0.0001; 17 | options.paramMomentum = 0.5; 18 | options.paramLearnRate = 0.0001; 19 | options.noiseMomentum = 0.5; 20 | options.noiseLearnRate = 0.0001; 21 | options.optimiseParam = true; 22 | options.showEvery = 100; 23 | options.saveEvery = 10000; 24 | options.showLikelihood = false; 25 | options.numIters = 50; 26 | options.saveName = 'save'; 27 | end 28 | -------------------------------------------------------------------------------- /matlab/collabOptions.m: -------------------------------------------------------------------------------- 1 | function options = collabOptions(approx); 2 | 3 | % COLLABOPTIONS Return default options for COLLAB model. 4 | % FORMAT 5 | % DESC returns the default options in a structure for a COLLAB model. 6 | % RETURN options : structure containing the default options for the 7 | % given approximation type. 8 | % 9 | % SEEALSO : collabCreate 10 | % 11 | % COPYRIGHT : Neil D. Lawrence, 2008 12 | 13 | % COLLAB 14 | 15 | options.kern = {'rbf', 'bias', 'white'}; 16 | options.numActive = 0; 17 | options.beta = []; 18 | options.heteroNoise = false; 19 | options.numComps = 1; 20 | 21 | end 22 | -------------------------------------------------------------------------------- /matlab/collabOptionsTensor.m: -------------------------------------------------------------------------------- 1 | function options = collabOptionsTensor(approx); 2 | 3 | % COLLABOPTIONSTENSOR Return default options for COLLAB model with a tensor 4 | % FORMAT 5 | % DESC returns the default options in a structure for a COLLAB model. 6 | % RETURN options : structure containing the default options for the 7 | % given approximation type. 8 | % 9 | % SEEALSO : collabCreateTensor 10 | % 11 | % COPYRIGHT : Raquel Urtasun, 2008 12 | 13 | % COLLAB 14 | 15 | 16 | options.kern = {'cmpnd', {'tensor', 'rbf', 'rbf'}, 'bias', 'white'}; 17 | options.numActive = 0; 18 | options.beta = []; 19 | 20 | end 21 | -------------------------------------------------------------------------------- /matlab/collabPosteriorMeanVar.m: -------------------------------------------------------------------------------- 1 | function [mu, varsig] = collabPosteriorMeanVar(model, y, X); 2 | 3 | % COLLABPOSTERIORMEANVAR Mean and variances of the posterior at points given by X. 4 | % FORMAT 5 | % DESC returns the posterior mean and variance for a given set of 6 | % points. 7 | % ARG model : the model for which the posterior will be computed. 8 | % ARG x : the input positions for which the posterior will be 9 | % computed. 10 | % RETURN mu : the mean of the posterior distribution. 11 | % RETURN sigma : the variances of the posterior distributions. 12 | % 13 | % SEEALSO : collabCreate 14 | % 15 | % COPYRIGHT : Neil D. Lawrence, 2008 16 | 17 | % COLLAB 18 | 19 | if nargout > 1 20 | diagK = kernDiagCompute(model.kern, X); 21 | varsig = zeros(size(X, 1), size(y, 2)); 22 | sndMoment = zeros(size(X, 1), size(y, 2)); 23 | 24 | end 25 | 26 | mu = zeros(size(X, 1), size(y, 2)); 27 | % Compute kernel for new point. 28 | for i = 1:size(y, 2) 29 | ind = find(y(:, i)); 30 | model.m = y(:, i); 31 | yind = y(ind, i); 32 | if model.M > 1 33 | model = collabInitS(model); 34 | end 35 | model = collabUpdateKernels(model); 36 | KX_star = kernCompute(model.kern, model.X(ind, :), X); 37 | if model.M > 1 38 | model = collabEstep(model); 39 | for m = 1:model.M 40 | mum{m} = KX_star'*model.invK{m}*yind; 41 | mu(:, i) = mu(:,i) + model.pi(m)*mum{m}; 42 | end 43 | else 44 | mu(:, i) =KX_star'*model.invK*yind; 45 | end 46 | % Compute if variances required. 47 | if model.M > 1 48 | for m = 1:model.M 49 | Kinvk = model.invK{m}*KX_star; 50 | varsigm = diagK - sum(KX_star.*Kinvk, 1)'; 51 | sndMoment(:, i) = sndMoment(:, i) + model.pi(m)*(mum{m}.*mum{m} + varsigm); 52 | end 53 | varsig(:, i) = sndMoment(:, i) - mu(:, i).*mu(:, i); 54 | end 55 | end 56 | % Compute if variances required. 57 | if nargout > 1 && model.M == 1 58 | Kinvk = model.invK*KX_star; 59 | varsig = diagK - sum(KX_star.*Kinvk, 1)'; 60 | varsig = repmat(varsig, 1, size(y, 2)); 61 | end 62 | end 63 | -------------------------------------------------------------------------------- /matlab/collabPosteriorMeanVarCell.m: -------------------------------------------------------------------------------- 1 | function [mu, varsig] = collabPosteriorMeanVarCell(model, ind, y, X) 2 | 3 | % COLLABPOSTERIORMEANVARCELL Mean and variances of the posterior at points given by X. 4 | % FORMAT 5 | % DESC returns the posterior mean and variance for a given set of 6 | % points. 7 | % ARG model : the model for which the posterior will be computed. 8 | % ARG x : the input positions for which the posterior will be 9 | % computed. 10 | % ARG ind : the indices of the train data for that user 11 | % RETURN mu : the mean of the posterior distribution. 12 | % RETURN sigma : the variances of the posterior distributions. 13 | % 14 | % SEEALSO : collabCreate 15 | % 16 | % COPYRIGHT : Raquel Urtasun, 2009 17 | 18 | % COLLAB 19 | 20 | mu = zeros(size(X, 1), size(y, 2)); 21 | % Compute kernel for new point. 22 | for i = 1:size(y, 2) 23 | %ind = find(y(:, i)); 24 | KX_star = kernCompute(model.kern, model.X(ind, :), X); 25 | K = kernCompute(model.kern, model.X(ind, :)); 26 | invK = pdinv(K); 27 | %yind = y(ind, i); 28 | mu(:, i) =KX_star'*invK*y; 29 | % Compute if variances required. 30 | end 31 | if nargout > 1 32 | diagK = kernDiagCompute(model.kern, X); 33 | Kinvk = invK*KX_star; 34 | varsig = diagK - sum(KX_star.*Kinvk, 1)'; 35 | varsig = repmat(varsig, 1, size(y, 2)); 36 | end 37 | end 38 | -------------------------------------------------------------------------------- /matlab/collabTest.m: -------------------------------------------------------------------------------- 1 | % COLLABTEST Test collaborative filtering model. 2 | 3 | % COLLAB 4 | 5 | rand('seed', 1e5) 6 | randn('seed', 1e5) 7 | 8 | numItems = 50; 9 | numUsers = 10; 10 | y = randn(numItems, numUsers); 11 | y(find(rand(numItems, numUsers)>0.2)) = 0; 12 | y = sparse(y); 13 | 14 | options = collabOptions; 15 | 16 | for numComps = [1 2 4 8] 17 | for heteroNoise = [false true] 18 | options.numComps = numComps; 19 | options.heteroNoise = heteroNoise; 20 | fprintf('Testing model with %d component(s).\n', options.numComps) 21 | if heteroNoise 22 | fprintf('Heteroschedastic noise used.\n') 23 | end 24 | model = collabCreate(2, numUsers, y(:, 1), options); 25 | params = collabExtractParam(model); 26 | params = randn(size(params)); 27 | model = collabExpandParam(model, params); 28 | if model.M > 1 29 | model = collabEstep(model); 30 | end 31 | modelDisplay(model); 32 | modelGradientCheck(model); 33 | end 34 | end 35 | -------------------------------------------------------------------------------- /matlab/collabToolboxes.m: -------------------------------------------------------------------------------- 1 | % COLLABTOOLBOXES Load in the relevant toolboxes for collaborative filtering. 2 | 3 | importLatest('netlab'); 4 | importLatest('ndlutil'); 5 | importLatest('mltools'); 6 | importLatest('optimi'); 7 | importLatest('datasets'); 8 | importLatest('kern'); 9 | importLatest('ivm'); 10 | importLatest('gplvm'); 11 | importLatest('noise'); -------------------------------------------------------------------------------- /matlab/collabUpdateKernels.m: -------------------------------------------------------------------------------- 1 | function model = collabUpdateKernels(model) 2 | 3 | % COLLABUPDATEKERNELS Update the kernels that are needed. 4 | % FORMAT 5 | % DESC updates any representations of the kernel in the model 6 | % structure, such as invK, logDetK or K. 7 | % ARG model : the model structure for which kernels are being 8 | % updated. 9 | % RETURN model : the model structure with the kernels updated. 10 | % 11 | % SEEALSO : collabExpandParam, collabCreate 12 | % 13 | % COPYRIGHT : Neil D. Lawrence, 2009 14 | 15 | % COLLAB 16 | 17 | ind = find(model.m); 18 | n = length(ind); 19 | model.K = kernCompute(model.kern, model.X(ind, :)); 20 | s = model.expectation.s{model.currentOut}; 21 | if model.M > 1 22 | % mixture model. 23 | for m = 1:model.M 24 | if model.heteroNoise 25 | Binv = diag(model.diagvar(ind)./s(ind, m)); 26 | else 27 | Binv = diag(model.sigma2./s(ind, m)); 28 | end 29 | Kadd = model.K + Binv; 30 | [model.invK{m}, U] = pdinv(Kadd); 31 | model.logDetK(m) = logdet(model.K, U); 32 | end 33 | elseif model.heteroNoise 34 | n = length(ind); 35 | [model.invK, U] = pdinv(model.K + spdiags(model.diagvar(ind, :), 0, n, n)); 36 | model.logDetK = logdet(model.K, U); 37 | else 38 | [model.invK, U] = pdinv(model.K); 39 | model.logDetK = logdet(model.K, U); 40 | end 41 | 42 | end -------------------------------------------------------------------------------- /matlab/computeMeanVarianceWeak.m: -------------------------------------------------------------------------------- 1 | function [L2_error,NMAE_error,NMAE_round_error,pred_L2, pred_r_NMAE,pred_var, users, perUser_var, perUser_L2, perUser_r_NMAE, numUsers] = computeMeanVarianceWeak(model,Y,Ytest) 2 | % 3 | % [error_L2,error_NMAE,error_NMAE_round] = computeTestErrorWeak(model,Y,Ytest) 4 | 5 | val_L2 = 0; 6 | tot_L2 = 0; 7 | val_NMAE = 0; 8 | tot_NMAE = 0; 9 | val_round_NMAE = 0; 10 | tot_round_NMAE = 0; 11 | pred_L2 = []; 12 | pred_r_NMAE = []; 13 | pred_var = []; 14 | users = []; 15 | perUser_var = []; 16 | perUser_r_NMAE = []; 17 | perUser_L2 = []; 18 | 19 | for i = 1:size(Y, 2) 20 | ind = find(Ytest(:, i)); 21 | elim = find(ind>size(model.X, 1)); 22 | tind = ind; 23 | tind(elim) = []; 24 | [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :)); 25 | % normalize the values 26 | 27 | %if (length(mu)>0) 28 | % mu = mu.*model.sd(tind); 29 | % mu = mu+model.mu(tind); 30 | %end 31 | a = Ytest(tind, i) - mu; 32 | a = [a; Ytest(elim, i)]; 33 | val_L2 = val_L2 + a'*a; 34 | tot_L2 = tot_L2 + length(a); 35 | val_NMAE = val_NMAE + sum(abs(a)); 36 | tot_NMAE = tot_NMAE + length(a); 37 | val_round_NMAE = val_round_NMAE + sum(abs(round(a))); 38 | tot_round_NMAE = tot_round_NMAE + length(a); 39 | pred_L2 = [pred_L2; a'*a]; 40 | pred_r_NMAE = [pred_r_NMAE; abs(a)]; 41 | perUser_var =[perUser_var; mean(varsig)]; 42 | perUser_L2 = [perUser_L2; mean(a'*a)]; 43 | perUser_r_NMAE = [perUser_r_NMAE; mean(abs(a))]; 44 | pred_var = [pred_var; varsig]; 45 | users = [users; i*ones(length(varsig),1)]; 46 | numUsers(i) = length(a); 47 | end 48 | L2_error = sqrt(val_L2/tot_L2); 49 | NMAE_error = (val_NMAE/tot_NMAE)/1.6; 50 | NMAE_round_error = (val_round_NMAE/tot_round_NMAE)/1.6; 51 | -------------------------------------------------------------------------------- /matlab/computePredictionsErrorWeak.m: -------------------------------------------------------------------------------- 1 | 2 | function [mu_T] = computePredictionsErrorWeak(model,Y,Ytest) 3 | % 4 | % [error_L2,error_NMAE,error_NMAE_round] = computePredictionsErrorWeak(model,Y,Ytest) 5 | 6 | val_L2 = 0; 7 | tot_L2 = 0; 8 | val_NMAE = 0; 9 | tot_NMAE = 0; 10 | val_round_NMAE = 0; 11 | tot_round_NMAE = 0; 12 | accum = []; 13 | mu_T = []; 14 | 15 | for i = 1:size(Y, 2) 16 | ind = find(Ytest(:, i)); 17 | elim = find(ind>size(model.X, 1)); 18 | tind = ind; 19 | tind(elim) = []; 20 | [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :)); 21 | % normalize the values 22 | 23 | if (length(mu)>0) 24 | mu = mu.*model.sd(tind); 25 | mu = mu+model.mu(tind); 26 | end 27 | mu_T = [mu_T; mu]; 28 | end 29 | -------------------------------------------------------------------------------- /matlab/computeTestErrorEnsemblesWeak.m: -------------------------------------------------------------------------------- 1 | function [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorEnsemblesWeak(allModels,Y,Ytest) 2 | % 3 | % [error_L2,error_NMAE,error_NMAE_round] = computeTestErrorEnsemblesWeak(allModels,Y,Ytest) 4 | 5 | 6 | val_L2 = 0; 7 | tot_L2 = 0; 8 | val_NMAE = 0; 9 | tot_NMAE = 0; 10 | val_round_NMAE = 0; 11 | tot_round_NMAE = 0; 12 | accum = []; 13 | 14 | for i = 1:size(Y, 2) 15 | ind = find(Ytest(:, i)); 16 | elim = find(ind>size(allModels{1}.X, 1)); 17 | tind = ind; 18 | tind(elim) = []; 19 | mu_T = 0; 20 | for j=1:length(allModels) 21 | [mu, varsig] = collabPosteriorMeanVar(allModels{j}, Y(:, i), allModels{j}.X(tind, :)); 22 | % normalize the values 23 | 24 | if (length(mu)>0) 25 | mu = mu.*allModels{j}.sd(tind); 26 | mu = mu+allModels{j}.mu(tind); 27 | end 28 | mu_T = mu_T + mu; 29 | end 30 | mu_T = mu_T/length(allModels); 31 | a = Ytest(tind, i) - mu_T; 32 | a = [a; Ytest(elim, i)]; 33 | val_L2 = val_L2 + a'*a; 34 | tot_L2 = tot_L2 + length(a); 35 | val_NMAE = val_NMAE + sum(abs(a)); 36 | tot_NMAE = tot_NMAE + length(a); 37 | val_round_NMAE = val_round_NMAE + sum(abs(round(a))); 38 | tot_round_NMAE = tot_round_NMAE + length(a); 39 | accum = [accum; abs(a)]; 40 | end 41 | L2_error = sqrt(val_L2/tot_L2); 42 | NMAE_error = (val_NMAE/tot_NMAE)/1.6; 43 | NMAE_round_error = (val_round_NMAE/tot_round_NMAE)/1.6; 44 | -------------------------------------------------------------------------------- /matlab/computeTestErrorStrong.m: -------------------------------------------------------------------------------- 1 | function [error_L2,error_NMAE,error_NMAE_round] = computeTestErrorStrong(model,Ytest) 2 | % COMPUTETESTERRORSTRONG Compute the strong test error. 3 | % FORMAT 4 | % DESC computes the test error for the strong generalization. 5 | % ARG model : the model. 6 | % ARG Ytest : the test data. 7 | % RETURN L2_error : the l2 error. 8 | % RETURN NMAE_error : the NMAE error. 9 | % RETURN NMAE_round_error : the NMAE error with rounding on the outputs. 10 | % 11 | % SEEALSO : computeTestErrorWeak 12 | % 13 | % COPYRIGHT : Raquel Urtasun, 2009 14 | 15 | % COLLAB 16 | ??? this doesn't work 17 | 18 | val_L2 = 0; 19 | tot_L2 = 0; 20 | val_NMAE = 0; 21 | tot_NMAE = 0; 22 | val_NMAE_round = 0; 23 | tot_NMAE_round = 0; 24 | 25 | for i = 1:size(Ytest, 2) 26 | ind = find(Ytest(:, i)); 27 | elim = find(ind>size(model.X, 1)); 28 | tind = ind; 29 | tind(elim) = []; 30 | 31 | if (length(tind)==0) 32 | continue; 33 | end 34 | % in the case of STRONG experiments, the user is new, so we have to 35 | % compute the prediction using the test data 36 | % compute random (LOO --> leave one out) 37 | indexRand = randperm(length(tind)); 38 | Y_train_user = Ytest(:,i); 39 | Y_test_user = Y_train_user(tind(indexRand(end))); 40 | Y_train_user(tind(indexRand(end)),:) = 0; 41 | [mu, varsig] = collabPosteriorMeanVar(model, Y_train_user, model.X(tind(indexRand(end)), :)); 42 | 43 | %mu = mu*model.sd(tind); 44 | %mu = mu+model.mu(tind); 45 | 46 | a = Y_test_user - mu; 47 | a = [a; Ytest(elim, i)]; 48 | val_L2 = val_L2 + a'*a; 49 | tot_L2 = tot_L2 + length(a); 50 | val_NMAE = val_NMAE + sum(abs(a)); 51 | tot_NMAE = tot_NMAE + length(a); 52 | val_NMAE_round = val_NMAE_round + sum(abs(round(a))); 53 | tot_NMAE_round = tot_NMAE_round + length(a); 54 | end 55 | error_L2 = sqrt(val_L2/tot_L2); 56 | error_NMAE = (val_NMAE/tot_NMAE)/1.6; 57 | error_NMAE_round = (val_NMAE_round/tot_NMAE_round)/1.6; 58 | -------------------------------------------------------------------------------- /matlab/computeTestErrorWeak.m: -------------------------------------------------------------------------------- 1 | function [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorWeak(model,Y,Ytest) 2 | % COMPUTETESTERRORWEAK Compute the weak test error. 3 | % FORMAT 4 | % DESC computes the test error for the weak generalization. 5 | % ARG model : the model. 6 | % ARG Y : the training data. 7 | % ARG Ytest : the test data. 8 | % RETURN L2_error : the l2 error. 9 | % RETURN NMAE_error : the NMAE error. 10 | % RETURN NMAE_round_error : the NMAE error with rounding on the outputs. 11 | % 12 | % SEEALSO : computeTestErrorStrong 13 | % 14 | % COPYRIGHT : Raquel Urtasun, 2009 15 | 16 | % COLLAB 17 | 18 | 19 | val_L2 = 0; 20 | tot_L2 = 0; 21 | val_NMAE = 0; 22 | tot_NMAE = 0; 23 | val_round_NMAE = 0; 24 | tot_round_NMAE = 0; 25 | accum = []; 26 | 27 | for i = 1:size(Y, 2) 28 | ind = find(Ytest(:, i)); 29 | elim = find(ind>size(model.X, 1)); 30 | tind = ind; 31 | tind(elim) = []; 32 | [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :)); 33 | %/~ 34 | % normalize the values 35 | 36 | %if (length(mu)>0) 37 | % mu = mu.*model.sd(tind); 38 | % mu = mu+model.mu(tind); 39 | %end 40 | %~/ 41 | a = Ytest(tind, i) - mu; 42 | a = [a; Ytest(elim, i)]; 43 | val_L2 = val_L2 + a'*a; 44 | tot_L2 = tot_L2 + length(a); 45 | val_NMAE = val_NMAE + sum(abs(a)); 46 | tot_NMAE = tot_NMAE + length(a); 47 | val_round_NMAE = val_round_NMAE + sum(abs(round(a))); 48 | tot_round_NMAE = tot_round_NMAE + length(a); 49 | accum = [accum; abs(a)]; 50 | end 51 | L2_error = sqrt(val_L2/tot_L2); 52 | NMAE_error = (val_NMAE/tot_NMAE)/1.6; 53 | NMAE_round_error = (val_round_NMAE/tot_round_NMAE)/1.6; 54 | -------------------------------------------------------------------------------- /matlab/computeTestErrorWeakCell.m: -------------------------------------------------------------------------------- 1 | function [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorWeakCell(model,Y,Ytest) 2 | % 3 | % COMPUTETESTERRORWEAKCELL Compute the weak test error for data stored in a cell array. 4 | % FORMAT 5 | % DESC computes the test error for the weak generalization. 6 | % ARG model : the model. 7 | % ARG Y : the training data. 8 | % ARG Ytest : the test data. 9 | % RETURN L2_error : the l2 error. 10 | % RETURN NMAE_error : the NMAE error. 11 | % 12 | % 13 | % COPYRIGHT : Raquel Urtasun, 2009 14 | 15 | % COLLAB 16 | 17 | val_L2 = 0; 18 | tot_L2 = 0; 19 | val_NMAE = 0; 20 | tot_NMAE = 0; 21 | val_round_NMAE = 0; 22 | tot_round_NMAE = 0; 23 | accum = []; 24 | 25 | 26 | 27 | for i = 1:size(Y, 1) 28 | ind = Ytest{i,1}; 29 | elim = find(ind>size(model.X, 1)); 30 | tind = ind; 31 | tind(elim) = []; 32 | 33 | if (length(ind)<1) 34 | disp(['No test data for ',num2str(i),]); 35 | continue; 36 | end 37 | [mu, varsig] = collabPosteriorMeanVarCell(model, Y{i,1}, double(Y{i,2}), model.X(tind, :)); 38 | % normalize the values 39 | 40 | a = double(Ytest{i,2}) - mu; 41 | %a = [a; Ytest(elim, i)]; 42 | val_L2 = val_L2 + a'*a; 43 | tot_L2 = tot_L2 + length(a); 44 | val_NMAE = val_NMAE + sum(abs(a)); 45 | tot_NMAE = tot_NMAE + length(a); 46 | val_round_NMAE = val_round_NMAE + sum(abs(round(a))); 47 | tot_round_NMAE = tot_round_NMAE + length(a); 48 | %accum = [accum; abs(a)]; 49 | end 50 | L2_error = sqrt(val_L2/tot_L2); 51 | NMAE_error = (val_NMAE/tot_NMAE)/1.6; 52 | NMAE_round_error = (val_round_NMAE/tot_round_NMAE)/1.6; 53 | 54 | 55 | -------------------------------------------------------------------------------- /matlab/demAistats1.m: -------------------------------------------------------------------------------- 1 | % DEMAISTATS1 Try collaborative filtering on the Aistats Reviews 2 | 3 | % COLLAB 4 | 5 | randn('seed', 1e5); 6 | rand('seed', 1e5); 7 | 8 | experimentNo = 1; 9 | 10 | dataSetName = 'aistats'; 11 | [Y, void, Ytest] = collabLoadData(dataSetName); 12 | 13 | numPapers = size(Y,1); 14 | numReviewers = size(Y,2); 15 | meanPapers = zeros(numPapers,1); 16 | stdPapers = ones(numPapers,1); 17 | 18 | q = 2; 19 | options = collabOptions; 20 | model = collabCreate(q, size(Y, 2), Y, options); 21 | model.kern.comp{2}.variance = 0.11; 22 | model.kern.comp{3}.variance = 5; 23 | options = collabOptimiseOptions; 24 | 25 | % set parameters 26 | options.momentum = 0.9; 27 | options.learnRate = 0.0001; 28 | options.paramMomentum = 0.9; 29 | options.paramLearnRate = 0.0001; 30 | options.numIters = 20; % ??? put 10 back 31 | options.showLikelihood = true; 32 | 33 | capName = dataSetName; 34 | capName(1) = upper(capName(1)); 35 | options.saveName = ['dem' capName num2str(experimentNo)]; 36 | 37 | model.mu = meanPapers; 38 | model.sd = stdPapers; 39 | 40 | model = collabOptimise(model, Y, options) 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /matlab/demEachMovie1.m: -------------------------------------------------------------------------------- 1 | % DEMEACHMOVIE1 Try collaborative filtering on the EachMovie data with Marlin's partitions 2 | % where the weak movielens experiment 3 | 4 | % COLLAB 5 | 6 | randn('seed', 1e5); 7 | rand('seed', 1e5); 8 | 9 | experimentNo = 3; 10 | substract_mean = 0; 11 | 12 | dataSetName = 'eachmovie_marlin_weak_1'; 13 | [Y, void, Ytest] = collabLoadData(dataSetName); 14 | 15 | numFilms = size(Y,1); 16 | numUsers = size(Y,2); 17 | meanFilms = zeros(numFilms,1); 18 | stdFilms = ones(numFilms,1); 19 | if (substract_mean) 20 | % do for each film independently 21 | for i=1:numFilms 22 | % compute the mean and standard deviation of each film 23 | ind = find(Y(i,:)); 24 | mean_v = sum(Y(i,ind)); 25 | mean_v = mean_v + sum(nonzeros(Ytest(i,:))); 26 | length_v = length(ind) + nnz(Ytest(i,:)); 27 | mean_v = mean_v/length_v; 28 | std_v = (length(ind)*std(Y(i,ind)) + nnz(Ytest(i,:))*std(Ytest(i,:)))/length_v; 29 | Y(i,ind) = Y(i,ind) - mean_v; 30 | if (std_v>0) 31 | Y(i,ind) = Y(i,ind)/std_v; 32 | end 33 | meanFilms(i) = mean_v; 34 | stdFilms(i) = std_v; 35 | end 36 | end 37 | 38 | q = 5; 39 | options = collabOptions; 40 | model = collabCreate(q, size(Y, 2), Y, options); 41 | model.kern.comp{2}.variance = 0.11; 42 | model.kern.comp{3}.variance = 5; 43 | options = collabOptimiseOptions; 44 | 45 | % set parameters 46 | options.momentum = 0.9; 47 | options.learnRate = 0.0001; 48 | options.paramMomentum = 0.9; 49 | options.paramLearnRate = 0.0001; 50 | options.numIters = 1; % ??? put 10 back 51 | options.showLikelihood = false; 52 | 53 | capName = dataSetName; 54 | capName(1) = upper(capName(1)); 55 | options.saveName = ['dem' capName num2str(experimentNo) '_']; 56 | 57 | %%% ?? add the model.mu and model.sd 58 | model.mu = meanFilms; 59 | model.sd = stdFilms; 60 | 61 | model = collabOptimise(model, Y, options) 62 | 63 | % we have to divide the test data into two sets, train and test for the 64 | % prediction. All but one are the train 65 | 66 | 67 | 68 | 69 | disp('Computing test error'); 70 | 71 | % ????? this test is to be done 72 | 73 | keyboard 74 | 75 | % ??? check if the mean is substracted... 76 | 77 | [error_L2,error_NMAE,error_NMAE_round] = computeTestErrorWeak(model,Y,Ytest); 78 | 79 | % val_L2 = 0; 80 | % tot_L2 = 0; 81 | % val_NMAE = 0; 82 | % tot_NMAE = 0; 83 | % val_round_NMAE = 0; 84 | % tot_round_NMAE = 0; 85 | % 86 | % for i = 1:size(Y, 2) 87 | % ind = find(Ytest(:, i)); 88 | % elim = find(ind>size(model.X, 1)); 89 | % tind = ind; 90 | % tind(elim) = []; 91 | % [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :)); 92 | % % normalize the values 93 | % 94 | % 95 | % mu = mu*model.sd(1); 96 | % mu = mu+model.mu(1); 97 | % a = Ytest(tind, i) - mu; 98 | % a = [a; Ytest(elim, i)]; 99 | % val_L2 = val_L2 + a'*a; 100 | % tot_L2 = tot_L2 + length(a); 101 | % val_NMAE = val_NMAE + sum(abs(a)); 102 | % tot_NMAE = tot_NMAE + length(a); 103 | % val_round_NMAE = val_round_NMAE + sum(abs(round(a))); 104 | % tot_round_NMAE = tot_round_NMAE + length(a); 105 | % end 106 | % L2_error = sqrt(val_L2/tot_L2); 107 | % NMAE_error = (val_NMAE/tot_NMAE)/1.6; 108 | % NMAE_round_error = (val_round_NMAE/tot_round_NMAE)/1.6; 109 | 110 | 111 | % Save the results. 112 | capName = dataSetName; 113 | capName(1) = upper(capName(1)); 114 | save(['dem' capName num2str(experimentNo) '.mat'], 'model', 'error_L2', 'error_NMAE', 'error_NMAE_round'); 115 | 116 | 117 | 118 | -------------------------------------------------------------------------------- /matlab/demEachMovie7.m: -------------------------------------------------------------------------------- 1 | % DEMMOVIELENS5 Try collaborative filtering on the large movielens data. 2 | % where the strong movielens experiment 3 | 4 | randn('seed', 1e5); 5 | rand('seed', 1e5); 6 | 7 | experimentNo = 3; 8 | substract_mean = 0; 9 | 10 | dataSetName = 'eachmovie_weak_1'; 11 | [Y, void, Ytest] = collabLoadData(dataSetName); 12 | 13 | numFilms = size(Y,1); 14 | numUsers = size(Y,2); 15 | meanFilms = zeros(numFilms,1); 16 | stdFilms = ones(numFilms,1); 17 | if (substract_mean) 18 | % do for each film independently 19 | for i=1:numFilms 20 | % compute the mean and standard deviation of each film 21 | ind = find(Y(i,:)); 22 | mean_v = sum(Y(i,ind)); 23 | mean_v = mean_v + sum(nonzeros(Ytest(i,:))); 24 | length_v = length(ind) + nnz(Ytest(i,:)); 25 | mean_v = mean_v/length_v; 26 | std_v = (length(ind)*std(Y(i,ind)) + nnz(Ytest(i,:))*std(Ytest(i,:)))/length_v; 27 | Y(i,ind) = Y(i,ind) - mean_v; 28 | if (std_v>0) 29 | Y(i,ind) = Y(i,ind)/std_v; 30 | end 31 | meanFilms(i) = mean_v; 32 | stdFilms(i) = std_v; 33 | end 34 | end 35 | 36 | q = 5; 37 | options = collabOptions; 38 | model = collabCreate(q, size(Y, 2), Y, options); 39 | model.kern.comp{2}.variance = 0.11; 40 | model.kern.comp{3}.variance = 5; 41 | options = collabOptimiseOptions; 42 | 43 | % set parameters 44 | options.momentum = 0.9; 45 | options.learnRate = 0.0001; 46 | options.paramMomentum = 0.9; 47 | options.paramLearnRate = 0.0001; 48 | options.numIters = 1; % ??? put 10 back 49 | options.showLikelihood = false; 50 | 51 | capName = dataSetName; 52 | capName(1) = upper(capName(1)); 53 | options.saveName = ['dem' capName num2str(experimentNo) '_']; 54 | 55 | %%% ?? add the model.mu and model.sd 56 | model.mu = meanFilms; 57 | model.sd = stdFilms; 58 | 59 | model = collabOptimise(model, Y, options) 60 | 61 | % we have to divide the test data into two sets, train and test for the 62 | % prediction. All but one are the train 63 | 64 | 65 | 66 | 67 | disp('Computing test error'); 68 | 69 | % ????? this test is to be done 70 | 71 | keyboard 72 | 73 | % ??? check if the mean is substracted... 74 | 75 | [error_L2,error_NMAE,error_NMAE_round] = computeTestErrorWeak(model,Y,Ytest); 76 | 77 | % val_L2 = 0; 78 | % tot_L2 = 0; 79 | % val_NMAE = 0; 80 | % tot_NMAE = 0; 81 | % val_round_NMAE = 0; 82 | % tot_round_NMAE = 0; 83 | % 84 | % for i = 1:size(Y, 2) 85 | % ind = find(Ytest(:, i)); 86 | % elim = find(ind>size(model.X, 1)); 87 | % tind = ind; 88 | % tind(elim) = []; 89 | % [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :)); 90 | % % normalize the values 91 | % 92 | % 93 | % mu = mu*model.sd(1); 94 | % mu = mu+model.mu(1); 95 | % a = Ytest(tind, i) - mu; 96 | % a = [a; Ytest(elim, i)]; 97 | % val_L2 = val_L2 + a'*a; 98 | % tot_L2 = tot_L2 + length(a); 99 | % val_NMAE = val_NMAE + sum(abs(a)); 100 | % tot_NMAE = tot_NMAE + length(a); 101 | % val_round_NMAE = val_round_NMAE + sum(abs(round(a))); 102 | % tot_round_NMAE = tot_round_NMAE + length(a); 103 | % end 104 | % L2_error = sqrt(val_L2/tot_L2); 105 | % NMAE_error = (val_NMAE/tot_NMAE)/1.6; 106 | % NMAE_round_error = (val_round_NMAE/tot_round_NMAE)/1.6; 107 | 108 | 109 | % Save the results. 110 | capName = dataSetName; 111 | capName(1) = upper(capName(1)); 112 | save(['dem' capName num2str(experimentNo) '.mat'], 'model', 'error_L2', 'error_NMAE', 'error_NMAE_round'); 113 | 114 | 115 | 116 | -------------------------------------------------------------------------------- /matlab/demEachMovieMarlinStrongScript1.m: -------------------------------------------------------------------------------- 1 | function [] = demEachMovieMarlinStrongScript1(substract_mean, partNo_v, latentDim_v,iters, inverted) 2 | % DEMEACHMOVIEMARLINSTRONGSCRIPT1 EachMovie strong generalization. 3 | % FORMAT 4 | % DESC Try collaborative filtering with the RBF covariance function 5 | % on the EachMovie data with Marlin's partitions for strong generalization. 6 | % ARG substract_mean : bool if substract the mean. 7 | % ARG partNo : vector with the partitions to compute results. 8 | % ARG latentDim_v : vector with the latent dimensionalities to compute results. 9 | % ARG iters : number of iterations. 10 | % ARG inverted : if true, then learn users as examples and not items. 11 | % 12 | % SEEALSO collabCreate, collabOptimise 13 | % 14 | % COPYRIGHT : Raquel Urtasun, 2009 15 | 16 | % COLLAB 17 | 18 | randn('seed', 1e5); 19 | rand('seed', 1e5); 20 | 21 | experimentNo = 3; 22 | 23 | 24 | %partNo_v = [1:5]; 25 | %latentDim_v = [5, 2:4, 6]; 26 | 27 | 28 | for i_latent=1:length(latentDim_v) 29 | q = latentDim_v(i_latent); 30 | for i_part=1:length(partNo_v) 31 | partNo = partNo_v(i_part); 32 | 33 | dataSetName = ['eachmovie_marlin_strong_',num2str(partNo)]; 34 | 35 | disp(['Reading ... ',dataSetName]); 36 | 37 | [Y, lbls, Ytest] = collabLoadData(dataSetName); 38 | 39 | Ytraintest = lbls; 40 | 41 | if (inverted) 42 | Y = Y'; 43 | Ytest = Ytest'; 44 | end 45 | 46 | numFilms = size(Y,1); 47 | numUsers = size(Y,2); 48 | meanFilms = zeros(numFilms,1); 49 | stdFilms = ones(numFilms,1); 50 | 51 | if (substract_mean) 52 | if 0 53 | % this substract the global mean 54 | % create the total vector 55 | s = nonzeros(Ytest); 56 | ratings = [nonzeros(Y); nonzeros(Ytest)]; 57 | meanY = mean(ratings); 58 | stdY = std(ratings); 59 | %keyboard; 60 | index = find(Y); 61 | %Y(index) = Y(index) - meanY; 62 | %Y(index) = Y(index) / stdY; 63 | else 64 | for i=1:numFilms 65 | % compute the mean and standard deviation of each film 66 | ind = find(Y(i,:)); 67 | mean_v = sum(Y(i,ind)); 68 | mean_v = mean_v + sum(nonzeros(Ytest(i,:))); 69 | length_v = length(ind) + nnz(Ytest(i,:)); 70 | mean_v = mean_v/length_v; 71 | std_v = (length(ind)*std(Y(i,ind)) + nnz(Ytest(i,:))*std(Ytest(i,:)))/length_v; 72 | %Y(i,ind) = Y(i,ind) - mean_v; 73 | %if (std_v>0) 74 | % Y(i,ind) = Y(i,ind)/std_v; 75 | %end 76 | meanFilms(i) = mean_v; 77 | stdFilms(i) = std_v; 78 | end 79 | end 80 | %keyboard; 81 | end 82 | 83 | options = collabOptions; 84 | model = collabCreate(q, size(Y, 2), Y, options); 85 | % keyboard; 86 | if (substract_mean) 87 | if 0 88 | % this does the global mean 89 | model.mu = repmat(meanY,size(model.mu,1),1); 90 | model.sd = repmat(stdY,size(model.sd,1),1); 91 | else 92 | model.mu = meanFilms; 93 | model.sd = stdFilms; 94 | end 95 | 96 | end 97 | model.kern.comp{2}.variance = 0.11; 98 | model.kern.comp{3}.variance = 5; 99 | options = collabOptimiseOptions; 100 | 101 | 102 | % set parameters 103 | options.momentum = 0.9; 104 | options.learnRate = 0.0001; 105 | options.paramMomentum = 0.9; 106 | options.paramLearnRate = 0.0001; 107 | options.numIters = iters; 108 | options.showLikelihood = false; 109 | 110 | capName = dataSetName; 111 | capName(1) = upper(capName(1)); 112 | options.saveName = ['dem' capName num2str(experimentNo) '_']; 113 | 114 | model = collabOptimise(model, Y, options) 115 | 116 | % compute the test error 117 | disp('Computing test error'); 118 | 119 | 120 | [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorWeak(model,Ytraintest,Ytest) 121 | 122 | 123 | % Save the results. 124 | capName = dataSetName; 125 | capName(1) = upper(capName(1)); 126 | 127 | saveResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat']; 128 | disp(['Saving ... ',saveResults]); 129 | save(saveResults, 'model', 'L2_error','options','NMAE_error','NMAE_round_error'); 130 | end 131 | end 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /matlab/demEachMovieMarlinWeakEnsemScript1.m: -------------------------------------------------------------------------------- 1 | function [] = demEachMovieMarlinWeakEnsemScript1(substract_mean, partNo_v, latentDim_v,iters, inverted, type) 2 | % DEMEACHMOVIEMARLINWEAKENSEMSCRIPT1 Ensemble of models on Marlin's weak Eachmovie partions. 3 | % FORMAT 4 | % DESC Try collaborative filtering on the Eachmovie data with ensembles for 5 | % Marlin's partitions for weak generalization. 6 | % ARG substract_mean : bool if substract the mean. 7 | % ARG partNo : vector with the partitions to compute results. 8 | % ARG latentDim_v : vector with the latent dimensionalities to compute results. 9 | % ARG iters : number of iterations. 10 | % ARG inverted : if true, then learn users as examples and not items. 11 | % 12 | % SEEALSO collabCreate, collabOptimise 13 | % 14 | % COPYRIGHT : Raquel Urtasun, 2009 15 | 16 | % COLLAB 17 | 18 | randn('seed', 1e5); 19 | rand('seed', 1e5); 20 | 21 | experimentNo = 3; 22 | 23 | 24 | predictions = zeros(length(latentDim_v),length(partNo_v)); 25 | modelsActive = ones(length(latentDim_v),length(partNo_v)); 26 | 27 | %partNo_v = [1:5]; 28 | %latentDim_v = [5, 2:4, 6]; 29 | 30 | 31 | 32 | % for each partition load the data 33 | for i_part=1:length(partNo_v) 34 | partNo = partNo_v(i_part); 35 | numActive = 0; 36 | allModels = []; 37 | 38 | dataSetName = ['eachmovie_marlin_',type,'_',num2str(partNo)]; 39 | 40 | disp(['Reading ... ',dataSetName]); 41 | 42 | [Y, lbls, Ytest] = collabLoadData(dataSetName); 43 | 44 | if (inverted) 45 | Y = Y'; 46 | Ytest = Ytest'; 47 | end 48 | 49 | numFilms = size(Y,1); 50 | numUsers = size(Y,2); 51 | meanFilms = zeros(numFilms,1); 52 | stdFilms = ones(numFilms,1); 53 | 54 | if (substract_mean) 55 | if 0 56 | % this substract the global mean 57 | % create the total vector 58 | s = nonzeros(Ytest); 59 | ratings = [nonzeros(Y); nonzeros(Ytest)]; 60 | meanY = mean(ratings); 61 | stdY = std(ratings); 62 | %keyboard; 63 | index = find(Y); 64 | %Y(index) = Y(index) - meanY; 65 | %Y(index) = Y(index) / stdY; 66 | else 67 | for i=1:numFilms 68 | % compute the mean and standard deviation of each film 69 | ind = find(Y(i,:)); 70 | mean_v = sum(Y(i,ind)); 71 | mean_v = mean_v + sum(nonzeros(Ytest(i,:))); 72 | length_v = length(ind) + nnz(Ytest(i,:)); 73 | mean_v = mean_v/length_v; 74 | std_v = (length(ind)*std(Y(i,ind)) + nnz(Ytest(i,:))*std(Ytest(i,:)))/length_v; 75 | %Y(i,ind) = Y(i,ind) - mean_v; 76 | %if (std_v>0) 77 | % Y(i,ind) = Y(i,ind)/std_v; 78 | %end 79 | meanFilms(i) = mean_v; 80 | stdFilms(i) = std_v; 81 | end 82 | end 83 | %keyboard; 84 | end 85 | 86 | for i_latent=1:length(latentDim_v) 87 | q = latentDim_v(i_latent); 88 | 89 | % load the model 90 | % Save the results. 91 | capName = dataSetName; 92 | capName(1) = upper(capName(1)); 93 | 94 | loadResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat']; 95 | disp(['Loading ... ',loadResults]); 96 | try 97 | load(loadResults); 98 | catch 99 | disp(['Model not found ',loadResults]); 100 | %keyboard; 101 | continue; 102 | end 103 | numActive = numActive + 1; 104 | allModels{numActive} = model; 105 | 106 | 107 | %modelsActive(q) = 1; 108 | end 109 | 110 | 111 | %%%%%%%% 112 | % compute the test error 113 | disp('Computing test error'); 114 | 115 | % compute the test error for ensembles of models 116 | 117 | if strcmp(type,'weak') 118 | 119 | [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorEnsemblesWeak(allModels,Y,Ytest) 120 | else if strcmp(type,'strong') 121 | 122 | [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorEnsemblesWeak(allModels,lbls,Ytest) 123 | end 124 | end 125 | 126 | %[mu] = computePredictionsErrorWeak(model,Y,Ytest) 127 | 128 | % Save the results. 129 | capName = dataSetName; 130 | capName(1) = upper(capName(1)); 131 | 132 | saveResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(partNo),'_iters_',num2str(iters),'_ensembles.mat']; 133 | disp(['Saving ... ',saveResults]); 134 | save(saveResults, 'allModels', 'L2_error','NMAE_error','NMAE_round_error','modelsActive'); 135 | end 136 | 137 | 138 | 139 | -------------------------------------------------------------------------------- /matlab/demEachMovieMarlinWeakScript1.m: -------------------------------------------------------------------------------- 1 | function [] = demEachMovieMarlinWeakScript1(substract_mean, partNo_v, latentDim_v,iters,inverted) 2 | % DEMEACHMOVIEMARLINWEAKSCRIPT1 RBF covariance on Marlin's weak Eachmovie partitions. 3 | % FORMAT 4 | % DESC Try collaborative filtering with the RBF covariance 5 | % on the Eachmovie data for Marlin's partitions for weak generalization. 6 | % ARG substract_mean : bool if substract the mean. 7 | % ARG partNo : vector with the partitions to compute results. 8 | % ARG latentDim_v : vector with the latent dimensionalities to compute results. 9 | % ARG iters : number of iterations. 10 | % ARG inverted : if true, then learn users as examples and not items. 11 | % 12 | % SEEALSO collabCreate, collabOptimise 13 | % 14 | % COPYRIGHT : Raquel Urtasun, 2009 15 | 16 | % COLLAB 17 | 18 | 19 | 20 | randn('seed', 1e5); 21 | rand('seed', 1e5); 22 | 23 | experimentNo = 3; 24 | 25 | 26 | %partNo_v = [1:5]; 27 | %latentDim_v = [5, 2:4, 6]; 28 | 29 | 30 | for i_latent=1:length(latentDim_v) 31 | q = latentDim_v(i_latent); 32 | for i_part=1:length(partNo_v) 33 | partNo = partNo_v(i_part); 34 | 35 | 36 | dataSetName = ['eachmovie_marlin_weak_',num2str(partNo)]; 37 | 38 | disp(['Reading ... ',dataSetName]); 39 | 40 | [Y, void, Ytest] = collabLoadData(dataSetName); 41 | 42 | if (inverted) 43 | Y = Y'; 44 | Ytest = Y'; 45 | 46 | end 47 | 48 | numFilms = size(Y,1); 49 | numUsers = size(Y,2); 50 | meanFilms = zeros(numFilms,1); 51 | stdFilms = ones(numFilms,1); 52 | 53 | %if (substract_mean) 54 | if 0 55 | % this substract the global mean 56 | % create the total vector 57 | s = nonzeros(Ytest); 58 | ratings = [nonzeros(Y); nonzeros(Ytest)]; 59 | meanY = mean(ratings); 60 | stdY = std(ratings); 61 | %keyboard; 62 | index = find(Y); 63 | %Y(index) = Y(index) - meanY; 64 | %Y(index) = Y(index) / stdY; 65 | else 66 | for i=1:numFilms 67 | % compute the mean and standard deviation of each film 68 | ind = find(Y(i,:)); 69 | mean_v = sum(Y(i,ind)); 70 | mean_v = mean_v + sum(nonzeros(Ytest(i,:))); 71 | length_v = length(ind) + nnz(Ytest(i,:)); 72 | mean_v = mean_v/length_v; 73 | std_v = (length(ind)*std(Y(i,ind)) + nnz(Ytest(i,:))*std(Ytest(i,:)))/length_v; 74 | %Y(i,ind) = Y(i,ind) - mean_v; 75 | %if (std_v>0) 76 | % Y(i,ind) = Y(i,ind)/std_v; 77 | %end 78 | meanFilms(i) = mean_v; 79 | stdFilms(i) = std_v; 80 | end 81 | end 82 | %keyboard; 83 | %end 84 | 85 | 86 | options = collabOptions; 87 | model = collabCreate(q, size(Y, 2), Y, options); 88 | % keyboard; 89 | if (substract_mean) 90 | if 0 91 | % this does the global mean 92 | model.mu = repmat(meanY,size(model.mu,1),1); 93 | model.sd = repmat(stdY,size(model.sd,1),1); 94 | else 95 | model.mu = meanFilms; 96 | model.sd = stdFilms; 97 | end 98 | 99 | end 100 | model.kern.comp{2}.variance = 0.11; 101 | model.kern.comp{3}.variance = 5; 102 | options = collabOptimiseOptions; 103 | 104 | 105 | % set parameters 106 | options.momentum = 0.9; 107 | options.learnRate = 0.0001; 108 | options.paramMomentum = 0.9; 109 | options.paramLearnRate = 0.0001; 110 | options.numIters = iters; 111 | options.showLikelihood = false; 112 | 113 | capName = dataSetName; 114 | capName(1) = upper(capName(1)); 115 | options.saveName = ['dem' capName num2str(experimentNo) '_']; 116 | 117 | ind = find(model.sd==0); 118 | model.sd(ind) = 1; 119 | 120 | 121 | model = collabOptimise(model, Y, options) 122 | 123 | % compute the test error 124 | disp('Computing test error'); 125 | 126 | 127 | [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorWeak(model,Y,Ytest) 128 | 129 | 130 | % Save the results. 131 | capName = dataSetName; 132 | capName(1) = upper(capName(1)); 133 | 134 | saveResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat']; 135 | disp(['Saving ... ',saveResults]); 136 | save(saveResults, 'model', 'L2_error','options','NMAE_error','NMAE_round_error'); 137 | end 138 | end 139 | 140 | 141 | 142 | -------------------------------------------------------------------------------- /matlab/demMixtoydata1.m: -------------------------------------------------------------------------------- 1 | % DEMMIXTOYDATA1 Demonstrate model on toy data. 2 | 3 | % COLLAB 4 | 5 | dataSetName = 'mixtoydata'; 6 | [Y, lbls, Ytest, X] = collabLoadData(dataSetName); 7 | 8 | q = 2; 9 | options = collabOptions; 10 | options.kern = {'rbf', 'bias'} 11 | options.numComps = 2; 12 | model = collabCreate(q, size(Y, 2), Y, options); 13 | options = collabOptimiseOptions(); 14 | options.momentum = 0.9; 15 | options.learnRate = 0.0001; 16 | options.paramMomentum = 0.9; 17 | options.paramLearnRate = 0.0001; 18 | options.numIters = 5; 19 | model = collabOptimise(model, Y, options); 20 | %model.X = X; 21 | %model.kern.comp{1}.variance = 1; 22 | %model.kern.comp{2}.variance = 0.4; 23 | %model.sigma2 = 0.4; 24 | %model = collabEstep(model, 100); 25 | -------------------------------------------------------------------------------- /matlab/demMovieLens10MLetterWeakScript1.m: -------------------------------------------------------------------------------- 1 | function [] = demMovieLens10MLetterWeakScript1(substract_mean, partNo_v, latentDim_v,iters, inverted) 2 | 3 | % DEMMOVIELENS10MLETTERWEAKSCRIPT1 Try collaborative filtering on the 10M movielens data set. 4 | % FORMAT 5 | % DESC run a script on the 10M movielens data. 6 | % ARG substract_mean : bool if substract the mean. 7 | % ARG partNo : vector with the partitions to compute results. 8 | % ARG latentDim_v : vector with the latent dimensionalities to compute results. 9 | % ARG iters : number of iterations. 10 | % ARG inverted : if true, then learn users as examples and not items. 11 | % 12 | % SEEALSO collabCreate, collabOptimise 13 | % 14 | % COPYRIGHT : Raquel Urtasun, 2009 15 | 16 | % COLLAB 17 | 18 | randn('seed', 1e5); 19 | rand('seed', 1e5); 20 | 21 | experimentNo = 3; 22 | 23 | partLetter_v = 'ab'; 24 | 25 | %partNo_v = [1:5]; 26 | %latentDim_v = [5, 2:4, 6]; 27 | 28 | 29 | for i_latent=1:length(latentDim_v) 30 | q = latentDim_v(i_latent); 31 | for i_part=1:length(partNo_v) 32 | partLetter = partLetter_v(partNo_v(i_part)); 33 | 34 | dataSetName = ['movielens_10M_',partLetter]; 35 | 36 | disp(['Reading ... ',dataSetName]); 37 | 38 | [Y, void, Ytest] = collabLoadData(dataSetName); 39 | 40 | if (inverted) 41 | Y = Y'; 42 | Ytest = Ytest'; 43 | end 44 | 45 | numFilms = size(Y,1); 46 | numUsers = size(Y,2); 47 | meanFilms = zeros(numFilms,1); 48 | stdFilms = ones(numFilms,1); 49 | 50 | if (substract_mean) 51 | if 0 52 | % this substract the global mean 53 | % create the total vector 54 | s = nonzeros(Ytest); 55 | ratings = [nonzeros(Y); nonzeros(Ytest)]; 56 | meanY = mean(ratings); 57 | stdY = std(ratings); 58 | %keyboard; 59 | index = find(Y); 60 | %Y(index) = Y(index) - meanY; 61 | %Y(index) = Y(index) / stdY; 62 | else 63 | for i=1:numFilms 64 | % compute the mean and standard deviation of each film 65 | ind = find(Y(i,:)); 66 | mean_v = sum(Y(i,ind)); 67 | mean_v = mean_v + sum(nonzeros(Ytest(i,:))); 68 | length_v = length(ind) + nnz(Ytest(i,:)); 69 | mean_v = mean_v/length_v; 70 | std_v = (length(ind)*std(Y(i,ind)) + nnz(Ytest(i,:))*std(Ytest(i,:)))/length_v; 71 | %Y(i,ind) = Y(i,ind) - mean_v; 72 | %if (std_v>0) 73 | % Y(i,ind) = Y(i,ind)/std_v; 74 | %end 75 | meanFilms(i) = mean_v; 76 | stdFilms(i) = std_v; 77 | end 78 | end 79 | %keyboard; 80 | end 81 | 82 | options = collabOptions; 83 | model = collabCreate(q, size(Y, 2), Y, options); 84 | % keyboard; 85 | if (substract_mean) 86 | if 0 87 | % this does the global mean 88 | model.mu = repmat(meanY,size(model.mu,1),1); 89 | model.sd = repmat(stdY,size(model.sd,1),1); 90 | else 91 | model.mu = meanFilms; 92 | model.sd = stdFilms; 93 | end 94 | 95 | end 96 | model.kern.comp{2}.variance = 0.11; 97 | model.kern.comp{3}.variance = 5; 98 | options = collabOptimiseOptions; 99 | 100 | 101 | % set parameters 102 | options.momentum = 0.9; 103 | options.learnRate = 0.0001; 104 | options.paramMomentum = 0.9; 105 | options.paramLearnRate = 0.0001; 106 | options.numIters = iters; 107 | options.showLikelihood = false; 108 | 109 | capName = dataSetName; 110 | capName(1) = upper(capName(1)); 111 | options.saveName = ['dem' capName num2str(experimentNo) '_']; 112 | 113 | model = collabOptimise(model, Y, options) 114 | 115 | capName = dataSetName; 116 | capName(1) = upper(capName(1)); 117 | 118 | saveResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',partLetter,'_iters_',num2str(iters),'.mat']; 119 | disp(['Saving ... ',saveResults]); 120 | 121 | save(saveResults, 'model', 'options'); 122 | 123 | 124 | % compute the test error 125 | disp('Computing test error'); 126 | 127 | 128 | [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorWeakCell(model,Y,Ytest) 129 | 130 | 131 | % Save the results. 132 | disp(['Saving ... ',saveResults]); 133 | save(saveResults, 'model', 'L2_error','options','NMAE_error','NMAE_round_error'); 134 | end 135 | end 136 | 137 | 138 | 139 | -------------------------------------------------------------------------------- /matlab/demMovieLens10MWeakScript1.m: -------------------------------------------------------------------------------- 1 | function [] = demMovieLens10MWeakScript1(substract_mean, partNo_v, latentDim_v,iters, inverted) 2 | % DEMMOVIELENS10MWEAKSCRIPT1 Try collaborative filtering on the 10M movielens data set. 3 | % FORMAT 4 | % DESC run a script on the 10M movielens data. 5 | % ARG substract_mean : bool if substract the mean. 6 | % ARG partNo : vector with the partitions to compute results. 7 | % ARG latentDim_v : vector with the latent dimensionalities to compute results. 8 | % ARG iters : number of iterations. 9 | % ARG inverted : if true, then learn users as examples and not items. 10 | % 11 | % SEEALSO collabCreate, collabOptimise 12 | % 13 | % COPYRIGHT : Raquel Urtasun, 2009 14 | 15 | % COLLAB 16 | 17 | randn('seed', 1e5); 18 | rand('seed', 1e5); 19 | 20 | experimentNo = 3; 21 | 22 | 23 | %partNo_v = [1:5]; 24 | %latentDim_v = [5, 2:4, 6]; 25 | 26 | 27 | for i_latent=1:length(latentDim_v) 28 | q = latentDim_v(i_latent); 29 | for i_part=1:length(partNo_v) 30 | partNo = partNo_v(i_part); 31 | 32 | dataSetName = ['movielens_10M_',num2str(partNo)]; 33 | 34 | disp(['Reading ... ',dataSetName]); 35 | 36 | [Y, void, Ytest] = collabLoadData(dataSetName); 37 | 38 | if (inverted) 39 | Y = Y'; 40 | Ytest = Ytest'; 41 | end 42 | 43 | numFilms = size(Y,1); 44 | numUsers = size(Y,2); 45 | meanFilms = zeros(numFilms,1); 46 | stdFilms = ones(numFilms,1); 47 | 48 | if (substract_mean) 49 | if 0 50 | % this substract the global mean 51 | % create the total vector 52 | s = nonzeros(Ytest); 53 | ratings = [nonzeros(Y); nonzeros(Ytest)]; 54 | meanY = mean(ratings); 55 | stdY = std(ratings); 56 | %keyboard; 57 | index = find(Y); 58 | %Y(index) = Y(index) - meanY; 59 | %Y(index) = Y(index) / stdY; 60 | else 61 | for i=1:numFilms 62 | % compute the mean and standard deviation of each film 63 | ind = find(Y(i,:)); 64 | mean_v = sum(Y(i,ind)); 65 | mean_v = mean_v + sum(nonzeros(Ytest(i,:))); 66 | length_v = length(ind) + nnz(Ytest(i,:)); 67 | mean_v = mean_v/length_v; 68 | std_v = (length(ind)*std(Y(i,ind)) + nnz(Ytest(i,:))*std(Ytest(i,:)))/length_v; 69 | %Y(i,ind) = Y(i,ind) - mean_v; 70 | %if (std_v>0) 71 | % Y(i,ind) = Y(i,ind)/std_v; 72 | %end 73 | meanFilms(i) = mean_v; 74 | stdFilms(i) = std_v; 75 | end 76 | end 77 | %keyboard; 78 | end 79 | 80 | options = collabOptions; 81 | model = collabCreate(q, size(Y, 2), Y, options); 82 | % keyboard; 83 | if (substract_mean) 84 | if 0 85 | % this does the global mean 86 | model.mu = repmat(meanY,size(model.mu,1),1); 87 | model.sd = repmat(stdY,size(model.sd,1),1); 88 | else 89 | model.mu = meanFilms; 90 | model.sd = stdFilms; 91 | end 92 | 93 | end 94 | model.kern.comp{2}.variance = 0.11; 95 | model.kern.comp{3}.variance = 5; 96 | options = collabOptimiseOptions; 97 | 98 | 99 | % set parameters 100 | options.momentum = 0.9; 101 | options.learnRate = 0.0001; 102 | options.paramMomentum = 0.9; 103 | options.paramLearnRate = 0.0001; 104 | options.numIters = iters; 105 | options.showLikelihood = false; 106 | 107 | capName = dataSetName; 108 | capName(1) = upper(capName(1)); 109 | options.saveName = ['dem' capName num2str(experimentNo) '_']; 110 | 111 | model = collabOptimise(model, Y, options) 112 | 113 | % compute the test error 114 | disp('Computing test error'); 115 | 116 | 117 | [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorWeakCell(model,Y,Ytest) 118 | 119 | 120 | % Save the results. 121 | capName = dataSetName; 122 | capName(1) = upper(capName(1)); 123 | 124 | saveResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat']; 125 | disp(['Saving ... ',saveResults]); 126 | save(saveResults, 'model', 'L2_error','options','NMAE_error','NMAE_round_error'); 127 | end 128 | end 129 | 130 | 131 | 132 | -------------------------------------------------------------------------------- /matlab/demMovieLensMarlinStrongScript1.m: -------------------------------------------------------------------------------- 1 | function [] = demMovieLensMarlinStrongScript1(substract_mean, partNo_v, latentDim_v,iters, inverted) 2 | % DEMMOVIELENSMARLINSTRONGSCRIPT1 Movielens strong generalization. 3 | % FORMAT 4 | % DESC Try collaborative filtering with the RBF covariance function 5 | % on the Movielens data with Marlin's partitions for strong generalization. 6 | % ARG substract_mean : bool if substract the mean. 7 | % ARG partNo : vector with the partitions to compute results. 8 | % ARG latentDim_v : vector with the latent dimensionalities to compute results. 9 | % ARG iters : number of iterations. 10 | % ARG inverted : if true, then learn users as examples and not items. 11 | % 12 | % SEEALSO collabCreate, collabOptimise 13 | % 14 | % COPYRIGHT : Raquel Urtasun, 2009 15 | 16 | % COLLAB 17 | 18 | randn('seed', 1e5); 19 | rand('seed', 1e5); 20 | 21 | experimentNo = 3; 22 | 23 | 24 | %partNo_v = [1:5]; 25 | %latentDim_v = [5, 2:4, 6]; 26 | 27 | 28 | for i_latent=1:length(latentDim_v) 29 | q = latentDim_v(i_latent); 30 | for i_part=1:length(partNo_v) 31 | partNo = partNo_v(i_part); 32 | 33 | dataSetName = ['movielens_marlin_strong_',num2str(partNo)]; 34 | 35 | disp(['Reading ... ',dataSetName]); 36 | 37 | [Y, lbls, Ytest] = collabLoadData(dataSetName); 38 | 39 | Ytraintest = lbls; 40 | 41 | if (inverted) 42 | Y = Y'; 43 | Ytest = Ytest'; 44 | end 45 | 46 | numFilms = size(Y,1); 47 | numUsers = size(Y,2); 48 | meanFilms = zeros(numFilms,1); 49 | stdFilms = ones(numFilms,1); 50 | 51 | if (substract_mean) 52 | if 0 53 | % this substract the global mean 54 | % create the total vector 55 | s = nonzeros(Ytest); 56 | ratings = [nonzeros(Y); nonzeros(Ytest)]; 57 | meanY = mean(ratings); 58 | stdY = std(ratings); 59 | %keyboard; 60 | index = find(Y); 61 | %Y(index) = Y(index) - meanY; 62 | %Y(index) = Y(index) / stdY; 63 | else 64 | for i=1:numFilms 65 | % compute the mean and standard deviation of each film 66 | ind = find(Y(i,:)); 67 | mean_v = sum(Y(i,ind)); 68 | mean_v = mean_v + sum(nonzeros(Ytest(i,:))); 69 | length_v = length(ind) + nnz(Ytest(i,:)); 70 | mean_v = mean_v/length_v; 71 | std_v = (length(ind)*std(Y(i,ind)) + nnz(Ytest(i,:))*std(Ytest(i,:)))/length_v; 72 | %Y(i,ind) = Y(i,ind) - mean_v; 73 | %if (std_v>0) 74 | % Y(i,ind) = Y(i,ind)/std_v; 75 | %end 76 | meanFilms(i) = mean_v; 77 | stdFilms(i) = std_v; 78 | end 79 | end 80 | %keyboard; 81 | end 82 | 83 | options = collabOptions; 84 | model = collabCreate(q, size(Y, 2), Y, options); 85 | % keyboard; 86 | if (substract_mean) 87 | if 0 88 | % this does the global mean 89 | model.mu = repmat(meanY,size(model.mu,1),1); 90 | model.sd = repmat(stdY,size(model.sd,1),1); 91 | else 92 | model.mu = meanFilms; 93 | model.sd = stdFilms; 94 | end 95 | 96 | end 97 | model.kern.comp{2}.variance = 0.11; 98 | model.kern.comp{3}.variance = 5; 99 | options = collabOptimiseOptions; 100 | 101 | 102 | % set parameters 103 | options.momentum = 0.9; 104 | options.learnRate = 0.0001; 105 | options.paramMomentum = 0.9; 106 | options.paramLearnRate = 0.0001; 107 | options.numIters = iters; 108 | options.showLikelihood = false; 109 | 110 | capName = dataSetName; 111 | capName(1) = upper(capName(1)); 112 | options.saveName = ['dem' capName num2str(experimentNo) '_']; 113 | 114 | model = collabOptimise(model, Y, options) 115 | 116 | % compute the test error 117 | disp('Computing test error'); 118 | 119 | keyboard; 120 | 121 | [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorWeak(model,Ytraintest,Ytest) 122 | 123 | 124 | % Save the results. 125 | capName = dataSetName; 126 | capName(1) = upper(capName(1)); 127 | 128 | saveResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat']; 129 | disp(['Saving ... ',saveResults]); 130 | save(saveResults, 'model', 'L2_error','options','NMAE_error','NMAE_round_error'); 131 | end 132 | end 133 | 134 | 135 | 136 | -------------------------------------------------------------------------------- /matlab/demMovieLensMarlinWeakEnsemScript1.m: -------------------------------------------------------------------------------- 1 | function [] = demMovieLensMarlinWeakEnsemScript1(substract_mean, partNo_v, latentDim_v,iters, inverted, type) 2 | % DEMMOVIELENSMARLINWEAKENSEMSCRIPT1 Ensemble of models on Marlin's weak Movielens partions. 3 | % FORMAT 4 | % DESC Try collaborative filtering on the Movielens data with ensembles for 5 | % Marlin's partitions for weak generalization. 6 | % ARG substract_mean : bool if substract the mean. 7 | % ARG partNo : vector with the partitions to compute results. 8 | % ARG latentDim_v : vector with the latent dimensionalities to compute results. 9 | % ARG iters : number of iterations. 10 | % ARG inverted : if true, then learn users as examples and not items. 11 | % 12 | % SEEALSO collabCreate, collabOptimise 13 | % 14 | % COPYRIGHT : Raquel Urtasun, 2009 15 | 16 | % COLLAB 17 | 18 | randn('seed', 1e5); 19 | rand('seed', 1e5); 20 | 21 | experimentNo = 3; 22 | 23 | 24 | predictions = zeros(length(latentDim_v),length(partNo_v)); 25 | modelsActive = ones(length(latentDim_v),length(partNo_v)); 26 | 27 | %partNo_v = [1:5]; 28 | %latentDim_v = [5, 2:4, 6]; 29 | 30 | 31 | 32 | % for each partition load the data 33 | for i_part=1:length(partNo_v) 34 | partNo = partNo_v(i_part); 35 | numActive = 0; 36 | allModels = []; 37 | 38 | dataSetName = ['movielens_marlin_',type,'_',num2str(partNo)]; 39 | 40 | disp(['Reading ... ',dataSetName]); 41 | 42 | [Y, lbls, Ytest] = collabLoadData(dataSetName); 43 | 44 | if (inverted) 45 | Y = Y'; 46 | Ytest = Ytest'; 47 | end 48 | 49 | numFilms = size(Y,1); 50 | numUsers = size(Y,2); 51 | meanFilms = zeros(numFilms,1); 52 | stdFilms = ones(numFilms,1); 53 | 54 | if (substract_mean) 55 | if 0 56 | % this substract the global mean 57 | % create the total vector 58 | s = nonzeros(Ytest); 59 | ratings = [nonzeros(Y); nonzeros(Ytest)]; 60 | meanY = mean(ratings); 61 | stdY = std(ratings); 62 | %keyboard; 63 | index = find(Y); 64 | Y(index) = Y(index) - meanY; 65 | Y(index) = Y(index) / stdY; 66 | else 67 | for i=1:numFilms 68 | % compute the mean and standard deviation of each film 69 | ind = find(Y(i,:)); 70 | mean_v = sum(Y(i,ind)); 71 | mean_v = mean_v + sum(nonzeros(Ytest(i,:))); 72 | length_v = length(ind) + nnz(Ytest(i,:)); 73 | mean_v = mean_v/length_v; 74 | std_v = (length(ind)*std(Y(i,ind)) + nnz(Ytest(i,:))*std(Ytest(i,:)))/length_v; 75 | Y(i,ind) = Y(i,ind) - mean_v; 76 | if (std_v>0) 77 | Y(i,ind) = Y(i,ind)/std_v; 78 | end 79 | meanFilms(i) = mean_v; 80 | stdFilms(i) = std_v; 81 | end 82 | end 83 | %keyboard; 84 | end 85 | 86 | for i_latent=1:length(latentDim_v) 87 | q = latentDim_v(i_latent); 88 | 89 | % load the model 90 | % Save the results. 91 | capName = dataSetName; 92 | capName(1) = upper(capName(1)); 93 | 94 | loadResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat']; 95 | disp(['Loading ... ',loadResults]); 96 | try 97 | load(loadResults); 98 | catch 99 | disp(['Model not found ',loadResults]); 100 | %keyboard; 101 | continue; 102 | end 103 | numActive = numActive + 1; 104 | allModels{numActive} = model; 105 | 106 | 107 | %modelsActive(q) = 1; 108 | end 109 | 110 | 111 | %%%%%%%% 112 | % compute the test error 113 | disp('Computing test error'); 114 | 115 | % compute the test error for ensembles of models 116 | 117 | if strcmp(type,'weak') 118 | 119 | [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorEnsemblesWeak(allModels,Y,Ytest) 120 | elseif strcmp(type,'strong') 121 | 122 | [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorEnsemblesWeak(allModels,lbls,Ytest) 123 | end 124 | end 125 | 126 | %[mu] = computePredictionsErrorWeak(model,Y,Ytest) 127 | 128 | % Save the results. 129 | capName = dataSetName; 130 | capName(1) = upper(capName(1)); 131 | 132 | saveResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(partNo),'_iters_',num2str(iters),'_ensembles.mat']; 133 | disp(['Saving ... ',saveResults]); 134 | save(saveResults, 'allModels', 'L2_error','options','NMAE_error','NMAE_round_error','modelsActive'); 135 | end 136 | 137 | 138 | 139 | -------------------------------------------------------------------------------- /matlab/demMovieLensMarlinWeakScript1.m: -------------------------------------------------------------------------------- 1 | function [] = demMovieLensMarlinWeakScript1(substract_mean, partNo_v, latentDim_v,iters, inverted) 2 | % DEMMOVIELENSMARLINWEAKSCRIPT1 RBF covariance on Marlin's weak Movielens partitions. 3 | % FORMAT 4 | % DESC Try collaborative filtering with the RBF covariance 5 | % on the Movielens data for Marlin's partitions for weak generalization. 6 | % ARG substract_mean : bool if substract the mean. 7 | % ARG partNo : vector with the partitions to compute results. 8 | % ARG latentDim_v : vector with the latent dimensionalities to compute results. 9 | % ARG iters : number of iterations. 10 | % ARG inverted : if true, then learn users as examples and not items. 11 | % 12 | % SEEALSO collabCreate, collabOptimise 13 | % 14 | % COPYRIGHT : Raquel Urtasun, 2009 15 | 16 | % COLLAB 17 | 18 | 19 | randn('seed', 1e5); 20 | rand('seed', 1e5); 21 | 22 | experimentNo = 3; 23 | 24 | 25 | %partNo_v = [1:5]; 26 | %latentDim_v = [5, 2:4, 6]; 27 | 28 | 29 | for i_latent=1:length(latentDim_v) 30 | q = latentDim_v(i_latent); 31 | for i_part=1:length(partNo_v) 32 | partNo = partNo_v(i_part); 33 | 34 | dataSetName = ['movielens_marlin_weak_',num2str(partNo)]; 35 | 36 | disp(['Reading ... ',dataSetName]); 37 | 38 | [Y, void, Ytest] = collabLoadData(dataSetName); 39 | 40 | if (inverted) 41 | Y = Y'; 42 | Ytest = Ytest'; 43 | end 44 | 45 | numFilms = size(Y,1); 46 | numUsers = size(Y,2); 47 | meanFilms = zeros(numFilms,1); 48 | stdFilms = ones(numFilms,1); 49 | 50 | if (substract_mean) 51 | if 0 52 | % this substract the global mean 53 | % create the total vector 54 | s = nonzeros(Ytest); 55 | ratings = [nonzeros(Y); nonzeros(Ytest)]; 56 | meanY = mean(ratings); 57 | stdY = std(ratings); 58 | %keyboard; 59 | index = find(Y); 60 | Y(index) = Y(index) - meanY; 61 | Y(index) = Y(index) / stdY; 62 | else 63 | for i=1:numFilms 64 | % compute the mean and standard deviation of each film 65 | ind = find(Y(i,:)); 66 | mean_v = sum(Y(i,ind)); 67 | mean_v = mean_v + sum(nonzeros(Ytest(i,:))); 68 | length_v = length(ind) + nnz(Ytest(i,:)); 69 | mean_v = mean_v/length_v; 70 | std_v = (length(ind)*std(Y(i,ind)) + nnz(Ytest(i,:))*std(Ytest(i,:)))/length_v; 71 | Y(i,ind) = Y(i,ind) - mean_v; 72 | if (std_v>0) 73 | Y(i,ind) = Y(i,ind)/std_v; 74 | end 75 | meanFilms(i) = mean_v; 76 | stdFilms(i) = std_v; 77 | end 78 | end 79 | %keyboard; 80 | end 81 | 82 | options = collabOptions; 83 | model = collabCreate(q, size(Y, 2), Y, options); 84 | % keyboard; 85 | if (substract_mean) 86 | if 0 87 | % this does the global mean 88 | model.mu = repmat(meanY,size(model.mu,1),1); 89 | model.sd = repmat(stdY,size(model.sd,1),1); 90 | else 91 | model.mu = meanFilms; 92 | model.sd = stdFilms; 93 | end 94 | 95 | end 96 | model.kern.comp{2}.variance = 0.11; 97 | model.kern.comp{3}.variance = 5; 98 | options = collabOptimiseOptions; 99 | 100 | 101 | % set parameters 102 | options.momentum = 0.9; 103 | options.learnRate = 0.0001; 104 | options.paramMomentum = 0.9; 105 | options.paramLearnRate = 0.0001; 106 | options.numIters = iters; 107 | options.showLikelihood = false; 108 | 109 | capName = dataSetName; 110 | capName(1) = upper(capName(1)); 111 | options.saveName = ['dem' capName num2str(experimentNo) '_']; 112 | 113 | model = collabOptimise(model, Y, options) 114 | 115 | % compute the test error 116 | disp('Computing test error'); 117 | 118 | 119 | [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorWeak(model,Y,Ytest) 120 | 121 | 122 | % Save the results. 123 | capName = dataSetName; 124 | capName(1) = upper(capName(1)); 125 | 126 | saveResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat']; 127 | disp(['Saving ... ',saveResults]); 128 | save(saveResults, 'model', 'L2_error','options','NMAE_error','NMAE_round_error'); 129 | end 130 | end 131 | 132 | 133 | 134 | -------------------------------------------------------------------------------- /matlab/demMovielens1.m: -------------------------------------------------------------------------------- 1 | % DEMMOVIELENS1 Try collaborative filtering on the large movielens data. 2 | 3 | % COLLAB 4 | 5 | randn('seed', 1e5); 6 | rand('seed', 1e5); 7 | 8 | experimentNo = 1; 9 | 10 | dataSetName = 'movielens'; 11 | [Y, void, Ytest] = collabLoadData(dataSetName); 12 | 13 | q = 3; 14 | options = collabOptions; 15 | model = collabCreate(q, size(Y, 2), Y, options); 16 | model.kern.comp{2}.variance = 0.11; 17 | model.kern.comp{3}.variance = 5; 18 | options = collabOptimiseOptions; 19 | options.numIters = 30; 20 | options.showLikelihood = false; 21 | model = collabOptimise(model, Y, options) 22 | 23 | val = 0; 24 | tot = 0; 25 | for i = 1:size(Y, 2) 26 | ind = find(Ytest(:, i)); 27 | elim = find(ind>size(model.X, 1)); 28 | tind = ind; 29 | tind(elim) = []; 30 | [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :)); 31 | a = Ytest(tind, i) - mu; 32 | a = [a; Ytest(elim, i)]; 33 | val = val + a'*a; 34 | tot = tot + length(a); 35 | end 36 | error = sqrt(val/tot); 37 | 38 | % Save the results. 39 | capName = dataSetName; 40 | capName(1) = upper(capName(1)); 41 | save(['dem' capName num2str(experimentNo) '.mat'], 'model', 'error'); 42 | -------------------------------------------------------------------------------- /matlab/demMovielens2.m: -------------------------------------------------------------------------------- 1 | % DEMMOVIELENS2 Try collaborative filtering on the large movielens data. 2 | 3 | % COLLAB 4 | 5 | randn('seed', 1e5); 6 | rand('seed', 1e5); 7 | 8 | experimentNo = 2; 9 | 10 | dataSetName = 'movielens'; 11 | [Y, void, Ytest] = collabLoadData(dataSetName); 12 | 13 | q = 4; 14 | options = collabOptions; 15 | model = collabCreate(q, size(Y, 2), Y, options); 16 | model.kern.comp{2}.variance = 0.11; 17 | model.kern.comp{3}.variance = 5; 18 | options = collabOptimiseOptions; 19 | options.numIters = 10; 20 | options.showLikelihood = false; 21 | model = collabOptimise(model, Y, options) 22 | 23 | val = 0; 24 | tot = 0; 25 | for i = 1:size(Y, 2) 26 | ind = find(Ytest(:, i)); 27 | elim = find(ind>size(model.X, 1)); 28 | tind = ind; 29 | tind(elim) = []; 30 | [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :)); 31 | a = Ytest(tind, i) - mu; 32 | a = [a; Ytest(elim, i)]; 33 | val = val + a'*a; 34 | tot = tot + length(a); 35 | end 36 | error = sqrt(val/tot); 37 | 38 | % Save the results. 39 | capName = dataSetName; 40 | capName(1) = upper(capName(1)); 41 | save(['dem' capName num2str(experimentNo) '.mat'], 'model', 'error'); 42 | -------------------------------------------------------------------------------- /matlab/demMovielens3.m: -------------------------------------------------------------------------------- 1 | % DEMMOVIELENS3 Try collaborative filtering on the large movielens data. 2 | 3 | % COLLAB 4 | 5 | randn('seed', 1e5); 6 | rand('seed', 1e5); 7 | 8 | experimentNo = 3; 9 | 10 | dataSetName = 'movielens'; 11 | [Y, void, Ytest] = collabLoadData(dataSetName); 12 | 13 | q = 5; 14 | options = collabOptions; 15 | model = collabCreate(q, size(Y, 2), Y, options); 16 | model.kern.comp{2}.variance = 0.11; 17 | model.kern.comp{3}.variance = 5; 18 | options = collabOptimiseOptions; 19 | 20 | % set parameters 21 | options.momentum = 0.9; 22 | options.learnRate = 0.0001; 23 | options.paramMomentum = 0.9; 24 | options.paramLearnRate = 0.0001; 25 | options.numIters = 10; 26 | options.showLikelihood = false; 27 | 28 | capName = dataSetName; 29 | capName(1) = upper(capName(1)); 30 | options.saveName = ['dem' capName num2str(experimentNo) '_']; 31 | 32 | model = collabOptimise(model, Y, options) 33 | 34 | val = 0; 35 | tot = 0; 36 | for i = 1:size(Y, 2) 37 | ind = find(Ytest(:, i)); 38 | elim = find(ind>size(model.X, 1)); 39 | tind = ind; 40 | tind(elim) = []; 41 | [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :)); 42 | a = Ytest(tind, i) - mu; 43 | a = [a; Ytest(elim, i)]; 44 | val = val + a'*a; 45 | tot = tot + length(a); 46 | end 47 | error_L2 = sqrt(val/tot); 48 | 49 | % compute NMAE 50 | val = 0; 51 | tot = 0; 52 | for i = 1:size(Y, 2) 53 | ind = find(Ytest(:, i)); 54 | elim = find(ind>size(model.X, 1)); 55 | tind = ind; 56 | tind(elim) = []; 57 | [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :)); 58 | a = Ytest(tind, i) - mu; 59 | a = [a; Ytest(elim, i)]; 60 | val = val + sum(abs(a)); 61 | tot = tot + length(a); 62 | end 63 | error_NMAE = (val/tot)/1.6; 64 | 65 | % round NMAE 66 | val = 0; 67 | tot = 0; 68 | for i = 1:size(Y, 2) 69 | ind = find(Ytest(:, i)); 70 | elim = find(ind>size(model.X, 1)); 71 | tind = ind; 72 | tind(elim) = []; 73 | [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :)); 74 | a = Ytest(tind, i) - mu; 75 | a = [a; Ytest(elim, i)]; 76 | val = val + sum(abs(round(a))); 77 | tot = tot + length(a); 78 | end 79 | error_NMAE_round = (val/tot)/1.6; 80 | 81 | 82 | % Save the results. 83 | capName = dataSetName; 84 | capName(1) = upper(capName(1)); 85 | save(['dem' capName num2str(experimentNo) '.mat'], 'model', 'error'); 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /matlab/demMovielens4.m: -------------------------------------------------------------------------------- 1 | % DEMMOVIELENS4 Try collaborative filtering on the large movielens data. 2 | % try different kernels 3 | 4 | % COLLAB 5 | 6 | randn('seed', 1e5); 7 | rand('seed', 1e5); 8 | 9 | experimentNo = 3; 10 | 11 | dataSetName = 'movielens'; 12 | [Y, lbls, Ytest] = collabLoadData(dataSetName); 13 | 14 | % get the extra data in the labels 15 | 16 | q = 5; 17 | q = q+1; 18 | options = collabOptionsTensor; 19 | 20 | 21 | %%%%% as in gpReversible dynamics 22 | type = {'cmpnd', {'tensor', 'rbf', 'rbfadditional'}, 'bias', 'white'}; 23 | options.kern = kernCreate(q, type); 24 | %keyboard; 25 | options.kern.comp{1} = kernSetIndex(options.kern.comp{1}, 1, [1:q-1]); 26 | options.kern.comp{1} = kernSetIndex(options.kern.comp{1}, 2, [q]); 27 | options.kern.comp{1}.comp{2}.additional = lbls; 28 | %options.kern.comp{1}.comp{1}.inverseWidth = 0.2; 29 | %options.kern.comp{1}.comp{1}.variance = 0.001; 30 | %options.kern.comp{1}.comp{2}.variance = 2/pi; 31 | %options.kern.comp{1}.comp{2}.weightVariance = 1000; 32 | %options.kern.comp{1}.comp{2}.biasVariance = eps; 33 | 34 | 35 | % as previously 36 | %options.kern = {'cmpnd', {'tensor', 'rbf', 'rbfadditional'}, 'bias', 'white'}; 37 | %options.kern.comp{1}.comp{1}.index = 1:q-1; 38 | %options.kern.comp{1}.comp{2}.index = q; 39 | %options.kern.comp{1}.comp{2}.additional = lbls; 40 | %keyboard; 41 | model = collabCreateTensor(q, size(Y, 2), size(Y, 1), options); 42 | % put the last component to be the index 43 | %keyboard 44 | model.kern.comp{2}.variance = 0.11; 45 | model.kern.comp{3}.variance = 5; 46 | 47 | %keyboard; 48 | options = collabOptimiseOptions; 49 | 50 | % set parameters 51 | options.momentum = 0.9; 52 | options.learnRate = 0.0001; 53 | options.paramMomentum = 0.9; 54 | options.paramLearnRate = 0.0001; 55 | options.numIters = 1; 56 | options.showLikelihood = false; 57 | 58 | capName = dataSetName; 59 | capName(1) = upper(capName(1)); 60 | options.saveName = ['dem' capName num2str(experimentNo) '_']; 61 | 62 | model = collabOptimise(model, Y, options) 63 | 64 | val = 0; 65 | tot = 0; 66 | for i = 1:size(Y, 2) 67 | ind = find(Ytest(:, i)); 68 | elim = find(ind>size(model.X, 1)); 69 | tind = ind; 70 | tind(elim) = []; 71 | [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :)); 72 | a = Ytest(tind, i) - mu; 73 | a = [a; Ytest(elim, i)]; 74 | val = val + a'*a; 75 | tot = tot + length(a); 76 | end 77 | error = sqrt(val/tot); 78 | 79 | % Save the results. 80 | capName = dataSetName; 81 | capName(1) = upper(capName(1)); 82 | save(['dem' capName num2str(experimentNo) '.mat'], 'model', 'error'); 83 | 84 | -------------------------------------------------------------------------------- /matlab/demMovielens5.m: -------------------------------------------------------------------------------- 1 | % DEMMOVIELENS5 Try collaborative filtering on the large movielens data. 2 | % where now the latent space is in the users, not the films 3 | 4 | randn('seed', 1e5); 5 | rand('seed', 1e5); 6 | 7 | experimentNo = 3; 8 | 9 | ??? to be done 10 | 11 | dataSetName = 'movielens'; 12 | [Y, void, Ytest] = collabLoadData(dataSetName); 13 | 14 | % learn latent space of each user 15 | Y = Y'; 16 | Ytest = Ytest'; 17 | 18 | q = 5; 19 | options = collabOptions; 20 | model = collabCreate(q, size(Y, 2), Y, options); 21 | model.kern.comp{2}.variance = 0.11; 22 | model.kern.comp{3}.variance = 5; 23 | options = collabOptimiseOptions; 24 | 25 | % set parameters 26 | options.momentum = 0.9; 27 | options.learnRate = 0.0001; 28 | options.paramMomentum = 0.9; 29 | options.paramLearnRate = 0.0001; 30 | options.numIters = 10; 31 | options.showLikelihood = false; 32 | 33 | capName = dataSetName; 34 | capName(1) = upper(capName(1)); 35 | options.saveName = ['dem' capName num2str(experimentNo) '_']; 36 | 37 | model = collabOptimise(model, Y, options) 38 | 39 | % we have to divide the test data into two sets, train and test for the 40 | % prediction. All but one are the train 41 | 42 | 43 | val_L2 = 0; 44 | tot_L2 = 0; 45 | val_NMAE = 0; 46 | tot_NMAE = 0; 47 | val_NMAE_round = 0; 48 | tot_NMAE_round = 0; 49 | 50 | disp('Computing test error'); 51 | 52 | 53 | for i = 1:size(Y, 2) 54 | ind = find(Ytest(:, i)); 55 | elim = find(ind>size(model.X, 1)); 56 | tind = ind; 57 | tind(elim) = []; 58 | [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :)); 59 | a = Ytest(tind, i) - mu; 60 | a = [a; Ytest(elim, i)]; 61 | val_L2 = val_L2 + a'*a; 62 | tot_L2 = tot_L2 + length(a); 63 | val_NMAE = val_NMAE + sum(abs(a)); 64 | tot_NMAE = tot_NMAE + length(a); 65 | val_NMAE_round = val_NMAE_round + sum(abs(round(a))); 66 | tot_NMAE_round = tot_NMAE_round + length(a); 67 | end 68 | error_L2 = sqrt(val_L2/tot_L2); 69 | error_NMAE = (val_NMAE/tot_NMAE)/1.6; 70 | error_NMAE_round = (val_NMAE_round/tot_NMAE_round)/1.6; 71 | 72 | 73 | % Save the results. 74 | capName = dataSetName; 75 | capName(1) = upper(capName(1)); 76 | save(['dem' capName num2str(experimentNo) '.mat'], 'model', 'error_L2', 'error_NMAE', 'error_NMAE_round'); 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /matlab/demMovielens6.m: -------------------------------------------------------------------------------- 1 | % DEMMOVIELENS5 Try collaborative filtering on the large movielens data. 2 | % where the strong movielens experiment 3 | 4 | randn('seed', 1e5); 5 | rand('seed', 1e5); 6 | 7 | experimentNo = 3; 8 | 9 | dataSetName = 'movielens_strong_1'; 10 | [Y, void, Ytest] = collabLoadData(dataSetName); 11 | 12 | q = 5; 13 | options = collabOptions; 14 | model = collabCreate(q, size(Y, 2), Y, options); 15 | model.kern.comp{2}.variance = 0.11; 16 | model.kern.comp{3}.variance = 5; 17 | options = collabOptimiseOptions; 18 | 19 | % set parameters 20 | options.momentum = 0.9; 21 | options.learnRate = 0.0001; 22 | options.paramMomentum = 0.9; 23 | options.paramLearnRate = 0.0001; 24 | options.numIters = 1; % ??? put 10 back 25 | options.showLikelihood = false; 26 | 27 | capName = dataSetName; 28 | capName(1) = upper(capName(1)); 29 | options.saveName = ['dem' capName num2str(experimentNo) '_']; 30 | 31 | model = collabOptimise(model, Y, options) 32 | 33 | % we have to divide the test data into two sets, train and test for the 34 | % prediction. All but one are the train 35 | 36 | 37 | 38 | 39 | disp('Computing test error'); 40 | 41 | % ????? this test is to be done 42 | 43 | keyboard 44 | 45 | 46 | [error_L2,error_NMAE,error_NMAE_round] = computeTestErrorStrong(model,Ytest); 47 | % 48 | % val_L2 = 0; 49 | % tot_L2 = 0; 50 | % val_NMAE = 0; 51 | % tot_NMAE = 0; 52 | % val_NMAE_round = 0; 53 | % tot_NMAE_round = 0; 54 | % 55 | % for i = 1:size(Ytest, 2) 56 | % ind = find(Ytest(:, i)); 57 | % elim = find(ind>size(model.X, 1)); 58 | % tind = ind; 59 | % tind(elim) = []; 60 | % 61 | % if (length(tind)==0) 62 | % continue; 63 | % end 64 | % % in the case of STRONG experiments, the user is new, so we have to 65 | % % compute the prediction using the test data 66 | % % compute random (LOO --> leave one out) 67 | % indexRand = randperm(length(tind)); 68 | % Y_train_user = Ytest(:,i); 69 | % Y_test_user = Y_train_user(tind(indexRand(end))); 70 | % Y_train_user(tind(indexRand(end)),:) = 0; 71 | % [mu, varsig] = collabPosteriorMeanVar(model, Y_train_user, model.X(tind(indexRand(end)), :)); 72 | % a = Y_test_user - mu; 73 | % a = [a; Ytest(elim, i)]; 74 | % val_L2 = val_L2 + a'*a; 75 | % tot_L2 = tot_L2 + length(a); 76 | % val_NMAE = val_NMAE + sum(abs(a)); 77 | % tot_NMAE = tot_NMAE + length(a); 78 | % val_NMAE_round = val_NMAE_round + sum(abs(round(a))); 79 | % tot_NMAE_round = tot_NMAE_round + length(a); 80 | % end 81 | % error_L2 = sqrt(val_L2/tot_L2); 82 | % error_NMAE = (val_NMAE/tot_NMAE)/1.6; 83 | % error_NMAE_round = (val_NMAE_round/tot_NMAE_round)/1.6; 84 | 85 | 86 | % Save the results. 87 | capName = dataSetName; 88 | capName(1) = upper(capName(1)); 89 | save(['dem' capName num2str(experimentNo) '.mat'], 'model', 'error_L2', 'error_NMAE', 'error_NMAE_round'); 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /matlab/demMovielens6Script.m: -------------------------------------------------------------------------------- 1 | function[] = demMovielens6Script(substract_mean, partNo_v, latentDim_v,iters) 2 | % DEMMOVIELENS6Script Try collaborative filtering on the large movielens data. 3 | % 4 | % demMovielens6script(substract_mean, partNo_v, latentDim_v, iters) 5 | % 6 | % substract_mean --> bool if substract the mean 7 | % partNo_v --> vector with the partitions to compute results 8 | % latentDim_v --> vector with the latent dimensionalities to compute results 9 | % iters --> number of iterations 10 | 11 | randn('seed', 1e5); 12 | rand('seed', 1e5); 13 | 14 | experimentNo = 3; 15 | 16 | 17 | %partNo_v = [1:5]; 18 | %latentDim_v = [5, 2:4, 6]; 19 | 20 | 21 | for i_latent=1:length(latentDim_v) 22 | q = latentDim_v(i_latent); 23 | for i_part=1:length(partNo_v) 24 | partNo = partNo_v(i_part); 25 | 26 | dataSetName = ['movielens_strong_',num2str(partNo)]; 27 | 28 | disp(['Reading ... ',dataSetName]); 29 | 30 | [Y, void, Ytest] = collabLoadData(dataSetName); 31 | 32 | numFilms = size(Y,1); 33 | numUsers = size(Y,2); 34 | meanFilms = zeros(numFilms,1); 35 | stdFilms = ones(numFilms,1); 36 | 37 | if (substract_mean) 38 | if 0 39 | % this substract the global mean 40 | % create the total vector 41 | s = nonzeros(Ytest); 42 | ratings = [nonzeros(Y); nonzeros(Ytest)]; 43 | meanY = mean(ratings); 44 | stdY = std(ratings); 45 | %keyboard; 46 | index = find(Y); 47 | Y(index) = Y(index) - meanY; 48 | Y(index) = Y(index) / stdY; 49 | else 50 | for i=1:numFilms 51 | % compute the mean and standard deviation of each film 52 | ind = find(Y(i,:)); 53 | mean_v = sum(Y(i,ind)); 54 | mean_v = mean_v + sum(nonzeros(Ytest(i,:))); 55 | length_v = length(ind) + nnz(Ytest(i,:)); 56 | mean_v = mean_v/length_v; 57 | std_v = (length(ind)*std(Y(i,ind)) + nnz(Ytest(i,:))*std(Ytest(i,:)))/length_v; 58 | Y(i,ind) = Y(i,ind) - mean_v; 59 | if (std_v>0) 60 | Y(i,ind) = Y(i,ind)/std_v; 61 | end 62 | meanFilms(i) = mean_v; 63 | stdFilms(i) = std_v; 64 | end 65 | end 66 | %keyboard; 67 | end 68 | 69 | options = collabOptions; 70 | model = collabCreate(q, size(Y, 2), Y, options); 71 | % keyboard; 72 | if (substract_mean) 73 | if 0 74 | % this does the global mean 75 | model.mu = repmat(meanY,size(model.mu,1),1); 76 | model.sd = repmat(stdY,size(model.sd,1),1); 77 | else 78 | model.mu = meanFilms; 79 | model.sd = stdFilms; 80 | end 81 | 82 | end 83 | model.kern.comp{2}.variance = 0.11; 84 | model.kern.comp{3}.variance = 5; 85 | options = collabOptimiseOptions; 86 | 87 | 88 | % set parameters 89 | options.momentum = 0.9; 90 | options.learnRate = 0.0001; 91 | options.paramMomentum = 0.9; 92 | options.paramLearnRate = 0.0001; 93 | options.numIters = iters; 94 | options.showLikelihood = false; 95 | 96 | capName = dataSetName; 97 | capName(1) = upper(capName(1)); 98 | options.saveName = ['dem' capName num2str(experimentNo) '_']; 99 | 100 | model = collabOptimise(model, Y, options) 101 | 102 | % compute the test error 103 | disp('Computing test error'); 104 | 105 | 106 | [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorStrong(model,Ytest) 107 | 108 | 109 | % Save the results. 110 | capName = dataSetName; 111 | capName(1) = upper(capName(1)); 112 | 113 | saveResults = [capName,'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat']; 114 | disp(['Saving ... ',saveResults]); 115 | save(saveResults, 'model', 'L2_error','options','NMAE_error','NMAE_round_error'); 116 | end 117 | end 118 | 119 | -------------------------------------------------------------------------------- /matlab/demMovielens7.m: -------------------------------------------------------------------------------- 1 | % DEMMOVIELENS5 Try collaborative filtering on the large movielens data. 2 | % where the strong movielens experiment 3 | 4 | randn('seed', 1e5); 5 | rand('seed', 1e5); 6 | 7 | experimentNo = 3; 8 | substract_mean = 0; 9 | 10 | dataSetName = 'movielens_weak_1'; 11 | [Y, void, Ytest] = collabLoadData(dataSetName); 12 | 13 | numFilms = size(Y,1); 14 | numUsers = size(Y,2); 15 | meanFilms = zeros(numFilms,1); 16 | stdFilms = ones(numFilms,1); 17 | if (substract_mean) 18 | % do for each film independently 19 | for i=1:numFilms 20 | % compute the mean and standard deviation of each film 21 | ind = find(Y(i,:)); 22 | mean_v = sum(Y(i,ind)); 23 | mean_v = mean_v + sum(nonzeros(Ytest(i,:))); 24 | length_v = length(ind) + nnz(Ytest(i,:)); 25 | mean_v = mean_v/length_v; 26 | std_v = (length(ind)*std(Y(i,ind)) + nnz(Ytest(i,:))*std(Ytest(i,:)))/length_v; 27 | Y(i,ind) = Y(i,ind) - mean_v; 28 | if (std_v>0) 29 | Y(i,ind) = Y(i,ind)/std_v; 30 | end 31 | meanFilms(i) = mean_v; 32 | stdFilms(i) = std_v; 33 | end 34 | end 35 | 36 | q = 5; 37 | options = collabOptions; 38 | model = collabCreate(q, size(Y, 2), Y, options); 39 | model.kern.comp{2}.variance = 0.11; 40 | model.kern.comp{3}.variance = 5; 41 | options = collabOptimiseOptions; 42 | 43 | % set parameters 44 | options.momentum = 0.9; 45 | options.learnRate = 0.0001; 46 | options.paramMomentum = 0.9; 47 | options.paramLearnRate = 0.0001; 48 | options.numIters = 1; % ??? put 10 back 49 | options.showLikelihood = false; 50 | 51 | capName = dataSetName; 52 | capName(1) = upper(capName(1)); 53 | options.saveName = ['dem' capName num2str(experimentNo) '_']; 54 | 55 | %%% ?? add the model.mu and model.sd 56 | model.mu = meanFilms; 57 | model.sd = stdFilms; 58 | 59 | model = collabOptimise(model, Y, options) 60 | 61 | % we have to divide the test data into two sets, train and test for the 62 | % prediction. All but one are the train 63 | 64 | 65 | 66 | 67 | disp('Computing test error'); 68 | 69 | % ????? this test is to be done 70 | 71 | keyboard 72 | 73 | % ??? check if the mean is substracted... 74 | 75 | [error_L2,error_NMAE,error_NMAE_round] = computeTestErrorWeak(model,Y,Ytest); 76 | 77 | % val_L2 = 0; 78 | % tot_L2 = 0; 79 | % val_NMAE = 0; 80 | % tot_NMAE = 0; 81 | % val_round_NMAE = 0; 82 | % tot_round_NMAE = 0; 83 | % 84 | % for i = 1:size(Y, 2) 85 | % ind = find(Ytest(:, i)); 86 | % elim = find(ind>size(model.X, 1)); 87 | % tind = ind; 88 | % tind(elim) = []; 89 | % [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :)); 90 | % % normalize the values 91 | % 92 | % 93 | % mu = mu*model.sd(1); 94 | % mu = mu+model.mu(1); 95 | % a = Ytest(tind, i) - mu; 96 | % a = [a; Ytest(elim, i)]; 97 | % val_L2 = val_L2 + a'*a; 98 | % tot_L2 = tot_L2 + length(a); 99 | % val_NMAE = val_NMAE + sum(abs(a)); 100 | % tot_NMAE = tot_NMAE + length(a); 101 | % val_round_NMAE = val_round_NMAE + sum(abs(round(a))); 102 | % tot_round_NMAE = tot_round_NMAE + length(a); 103 | % end 104 | % L2_error = sqrt(val_L2/tot_L2); 105 | % NMAE_error = (val_NMAE/tot_NMAE)/1.6; 106 | % NMAE_round_error = (val_round_NMAE/tot_round_NMAE)/1.6; 107 | 108 | 109 | % Save the results. 110 | capName = dataSetName; 111 | capName(1) = upper(capName(1)); 112 | save(['dem' capName num2str(experimentNo) '.mat'], 'model', 'error_L2', 'error_NMAE', 'error_NMAE_round'); 113 | 114 | 115 | 116 | -------------------------------------------------------------------------------- /matlab/demMovielens7Script.m: -------------------------------------------------------------------------------- 1 | function[] = demMovielens7Script(substract_mean, partNo_v, latentDim_v,iters) 2 | % DEMMOVIELENS7Script Try collaborative filtering on the large movielens data. 3 | % 4 | % demMovielens7script(substract_mean, partNo_v, latentDim_v,iters) 5 | % 6 | % substract_mean --> bool if substract the mean 7 | % partNo_v --> vector with the partitions to compute results 8 | % latentDim_v --> vector with the latent dimensionalities to compute results 9 | % iters --> number of iterations 10 | 11 | randn('seed', 1e5); 12 | rand('seed', 1e5); 13 | 14 | experimentNo = 3; 15 | 16 | 17 | %partNo_v = [1:5]; 18 | %latentDim_v = [5, 2:4, 6]; 19 | 20 | 21 | for i_latent=1:length(latentDim_v) 22 | q = latentDim_v(i_latent); 23 | for i_part=1:length(partNo_v) 24 | partNo = partNo_v(i_part); 25 | 26 | dataSetName = ['movielens_weak_',num2str(partNo)]; 27 | 28 | disp(['Reading ... ',dataSetName]); 29 | 30 | [Y, void, Ytest] = collabLoadData(dataSetName); 31 | 32 | numFilms = size(Y,1); 33 | numUsers = size(Y,2); 34 | meanFilms = zeros(numFilms,1); 35 | stdFilms = ones(numFilms,1); 36 | 37 | if (substract_mean) 38 | if 0 39 | % this substract the global mean 40 | % create the total vector 41 | s = nonzeros(Ytest); 42 | ratings = [nonzeros(Y); nonzeros(Ytest)]; 43 | meanY = mean(ratings); 44 | stdY = std(ratings); 45 | %keyboard; 46 | index = find(Y); 47 | Y(index) = Y(index) - meanY; 48 | Y(index) = Y(index) / stdY; 49 | else 50 | for i=1:numFilms 51 | % compute the mean and standard deviation of each film 52 | ind = find(Y(i,:)); 53 | mean_v = sum(Y(i,ind)); 54 | mean_v = mean_v + sum(nonzeros(Ytest(i,:))); 55 | length_v = length(ind) + nnz(Ytest(i,:)); 56 | mean_v = mean_v/length_v; 57 | std_v = (length(ind)*std(Y(i,ind)) + nnz(Ytest(i,:))*std(Ytest(i,:)))/length_v; 58 | Y(i,ind) = Y(i,ind) - mean_v; 59 | if (std_v>0) 60 | Y(i,ind) = Y(i,ind)/std_v; 61 | end 62 | meanFilms(i) = mean_v; 63 | stdFilms(i) = std_v; 64 | end 65 | end 66 | %keyboard; 67 | end 68 | 69 | options = collabOptions; 70 | model = collabCreate(q, size(Y, 2), Y, options); 71 | % keyboard; 72 | if (substract_mean) 73 | if 0 74 | % this does the global mean 75 | model.mu = repmat(meanY,size(model.mu,1),1); 76 | model.sd = repmat(stdY,size(model.sd,1),1); 77 | else 78 | model.mu = meanFilms; 79 | model.sd = stdFilms; 80 | end 81 | 82 | end 83 | model.kern.comp{2}.variance = 0.11; 84 | model.kern.comp{3}.variance = 5; 85 | options = collabOptimiseOptions; 86 | 87 | 88 | % set parameters 89 | options.momentum = 0.9; 90 | options.learnRate = 0.0001; 91 | options.paramMomentum = 0.9; 92 | options.paramLearnRate = 0.0001; 93 | options.numIters = iters; 94 | options.showLikelihood = false; 95 | 96 | capName = dataSetName; 97 | capName(1) = upper(capName(1)); 98 | options.saveName = ['dem' capName num2str(experimentNo) '_']; 99 | 100 | model = collabOptimise(model, Y, options) 101 | 102 | % compute the test error 103 | disp('Computing test error'); 104 | 105 | 106 | [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorWeak(model,Y,Ytest) 107 | 108 | 109 | % Save the results. 110 | capName = dataSetName; 111 | capName(1) = upper(capName(1)); 112 | 113 | saveResults = [capName,'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat']; 114 | disp(['Saving ... ',saveResults]); 115 | save(saveResults, 'model', 'L2_error','options','NMAE_error','NMAE_round_error'); 116 | end 117 | end 118 | 119 | -------------------------------------------------------------------------------- /matlab/demMovielensOrdered1.m: -------------------------------------------------------------------------------- 1 | % DEMMOVIELENSORDERED1 Try collaborative filtering on the large movielens data. 2 | 3 | % COLLAB 4 | 5 | randn('seed', 1e5); 6 | rand('seed', 1e5); 7 | 8 | experimentNo = 1; 9 | 10 | dataSetName = 'movielens'; 11 | [Y, void, Ytest] = collabLoadData(dataSetName); 12 | 13 | q = 3; 14 | options = collabOptions; 15 | model = collabCreate(q, size(Y, 2), Y, options); 16 | model.kern.comp{2}.variance = 0.11; 17 | model.kern.comp{3}.variance = 1; 18 | model.selectionCriterion = 'random'; 19 | model.numActive = 1000; 20 | model.noise = 'ordered'; 21 | options = collabOptimiseOptions; 22 | options.numIters = 1; 23 | options.showLikelihood = false; 24 | options.showEvery = 10; 25 | Y = Y(:, 1:100); 26 | model = collabOptimise(model, Y, options) 27 | 28 | % val = 0; 29 | % tot = 0; 30 | % for i = 1:size(Y, 2) 31 | % ind = find(Ytest(:, i)); 32 | % elim = find(ind>size(model.X, 1)); 33 | % tind = ind; 34 | % tind(elim) = []; 35 | % [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :)); 36 | % a = Ytest(tind, i) - mu; 37 | % a = [a; Ytest(elim, i)]; 38 | % val = val + a'*a; 39 | % tot = tot + length(a); 40 | % end 41 | % error = sqrt(val/tot); 42 | 43 | % % Save the results. 44 | % capName = dataSetName; 45 | % capName(1) = upper(capName(1)); 46 | % save(['dem' capName num2str(experimentNo) '.mat'], 'model', 'error'); 47 | -------------------------------------------------------------------------------- /matlab/demMovielensSmall1.m: -------------------------------------------------------------------------------- 1 | % DEMMOVIELENSSMALL1 Try collaborative filtering on the small movielens data. 2 | 3 | % COLLAB 4 | 5 | randn('seed', 1e5); 6 | rand('seed', 1e5); 7 | 8 | experimentNo = 1; 9 | 10 | for partition = 1:5 11 | dataSetName = ['movielensSmall' num2str(partition)]; 12 | [Y, void, Ytest] = collabLoadData(dataSetName); 13 | q = 2; 14 | options = collabOptions; 15 | %/~ 16 | %options.heteroNoise = true; 17 | %options.kern = {'rbf', 'bias'}; 18 | %~/ 19 | model = collabCreate(q, size(Y, 2), Y, options); 20 | %/~ 21 | %model.diagvar = repmat(5.0, size(model.diagvar)); 22 | %~/ 23 | model.kern.comp{2}.variance = 0.11; 24 | model.kern.comp{3}.variance = 5; 25 | options = collabOptimiseOptions; 26 | 27 | % set parameters 28 | options.momentum = 0.9; 29 | options.learnRate = 0.0001; 30 | options.paramMomentum = 0.9; 31 | options.paramLearnRate = 0.0001; 32 | options.numIters = 10; 33 | 34 | capName = dataSetName; 35 | capName(1) = upper(capName(1)); 36 | options.saveName = ['dem' capName num2str(experimentNo) '_']; 37 | 38 | model = collabOptimise(model, Y, options); 39 | 40 | val = 0; 41 | tot = 0; 42 | for i = 1:size(Y, 2) 43 | ind = find(Ytest(:, i)); 44 | elim = find(ind>size(model.X, 1)); 45 | tind = ind; 46 | tind(elim) = []; 47 | [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :)); 48 | a = Ytest(tind, i) - mu; 49 | a = [a; Ytest(elim, i)]; 50 | val = val + a'*a; 51 | tot = tot + length(a); 52 | end 53 | error(partition) = sqrt(val/tot); 54 | % Save the results. 55 | capName = dataSetName; 56 | capName(1) = upper(capName(1)); 57 | save(['dem' capName '_' num2str(experimentNo) '.mat'], 'model', 'error'); 58 | 59 | end 60 | -------------------------------------------------------------------------------- /matlab/demMovielensSmallHetero1.m: -------------------------------------------------------------------------------- 1 | % DEMMOVIELENSSMALLHETERO1 Try collaborative filtering on the small movielens data. 2 | 3 | % COLLAB 4 | 5 | randn('seed', 1e5); 6 | rand('seed', 1e5); 7 | 8 | experimentNo = 1; 9 | 10 | for partition = 1:5 11 | dataSetName = ['movielensSmall' num2str(partition)]; 12 | [Y, void, Ytest] = collabLoadData(dataSetName); 13 | q = 2; 14 | options = collabOptions; 15 | options.heteroNoise = true; 16 | options.kern = {'rbf', 'bias'}; 17 | model = collabCreate(q, size(Y, 2), Y, options); 18 | model.diagvar = repmat(1.0, size(model.diagvar)); 19 | model.kern.comp{2}.variance = 0.11; 20 | options = collabOptimiseOptions; 21 | 22 | % set parameters 23 | options.momentum = 0.9; 24 | options.learnRate = 0.0001; 25 | options.paramMomentum = 0.9; 26 | options.paramLearnRate = 0.0001; 27 | options.noiseMomentum = 0.9; 28 | options.noiseLearnRate = 0.0001; 29 | options.numIters = 10; 30 | 31 | capName = dataSetName; 32 | capName(1) = upper(capName(1)); 33 | options.saveName = ['dem' capName 'Hetero' num2str(experimentNo) '_']; 34 | 35 | model = collabOptimise(model, Y, options); 36 | 37 | val = 0; 38 | tot = 0; 39 | for i = 1:size(Y, 2) 40 | ind = find(Ytest(:, i)); 41 | elim = find(ind>size(model.X, 1)); 42 | tind = ind; 43 | tind(elim) = []; 44 | [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :)); 45 | a = Ytest(tind, i) - mu; 46 | a = [a; Ytest(elim, i)]; 47 | val = val + a'*a; 48 | tot = tot + length(a); 49 | end 50 | error(partition) = sqrt(val/tot); 51 | % Save the results. 52 | capName = dataSetName; 53 | capName(1) = upper(capName(1)); 54 | save(['dem' capName 'Hetero' '_' num2str(experimentNo) '.mat'], 'model', 'error'); 55 | 56 | end 57 | -------------------------------------------------------------------------------- /matlab/demMovielensSmallMix1.m: -------------------------------------------------------------------------------- 1 | % DEMMOVIELENSSMALLMIX1 Try collaborative filtering on the small movielens data. 2 | 3 | % COLLAB 4 | 5 | randn('seed', 1e5); 6 | rand('seed', 1e5); 7 | 8 | experimentNo = 1; 9 | error = 0; 10 | for partition = 1:5 11 | dataSetName = ['movielensSmall' num2str(partition)]; 12 | [Y, void, Ytest] = collabLoadData(dataSetName); 13 | q = 2; 14 | options = collabOptions; 15 | options.numComps = 2; 16 | options.kern = {'rbf', 'bias'}; 17 | %/~ 18 | %options.heteroNoise = true; 19 | %~/ 20 | model = collabCreate(q, size(Y, 2), Y, options); 21 | %/~ 22 | %model.diagvar = repmat(5.0, size(model.diagvar)); 23 | %~/ 24 | model.kern.comp{2}.variance = 0.11; 25 | model.sigma2 = 5; 26 | %model.kern.comp{3}.variance = 5; 27 | options = collabOptimiseOptions; 28 | 29 | % set parameters 30 | options.momentum = 0.9; 31 | options.learnRate = 0.0001; 32 | options.paramMomentum = 0.9; 33 | options.paramLearnRate = 0.0001; 34 | options.noiseMomentum = 0.9; 35 | options.noiseLearnRate = 0.0001; 36 | options.numIters = 10; 37 | 38 | capName = dataSetName; 39 | capName(1) = upper(capName(1)); 40 | options.saveName = ['dem' capName 'Mix' num2str(experimentNo) '_']; 41 | 42 | model = collabOptimise(model, Y, options); 43 | 44 | val = 0; 45 | tot = 0; 46 | for i = 1:size(Y, 2) 47 | ind = find(Ytest(:, i)); 48 | elim = find(ind>size(model.X, 1)); 49 | tind = ind; 50 | tind(elim) = []; 51 | [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :)); 52 | a = Ytest(tind, i) - mu; 53 | a = [a; Ytest(elim, i)]; 54 | val = val + a'*a; 55 | tot = tot + length(a); 56 | end 57 | error(partition) = sqrt(val/tot); 58 | % Save the results. 59 | capName = dataSetName; 60 | capName(1) = upper(capName(1)); 61 | save(['dem' capName 'Mix_' num2str(experimentNo) '.mat'], 'model', 'error'); 62 | 63 | end 64 | -------------------------------------------------------------------------------- /matlab/demMovielensSmallMixFromSingleScript1.m: -------------------------------------------------------------------------------- 1 | % DEMMOVIELENSSMALLMIXFROMSINGLE1 Try collaborative filtering on the small movielens data. 2 | 3 | % COLLAB 4 | 5 | randn('seed', 1e5); 6 | rand('seed', 1e5); 7 | 8 | partNo_v = [1:5]; 9 | latentDim_v = [2 5 7 10]; 10 | numComps_v = [2]; 11 | experimentNo = 1; 12 | numIters = 0; 13 | numItersSingle = 10; 14 | itersFinalEstep = 100; 15 | 16 | error = 0; 17 | for i_latent=1:length(latentDim_v) 18 | q = latentDim_v(i_latent); 19 | for i_part=1:length(partNo_v) 20 | partNo = partNo_v(i_part); 21 | for i_comp = 1:length(numComps_v) 22 | 23 | dataSetName = ['movielensSmall' num2str(partNo)]; 24 | [Y, void, Ytest] = collabLoadData(dataSetName); 25 | 26 | options = collabOptions; 27 | options.numComps = numComps_v(i_comp); 28 | options.kern = {'rbf', 'bias'};%, 'white'}; 29 | %/~ 30 | %options.heteroNoise = true; 31 | %~/ 32 | model = collabCreate(q, size(Y, 2), Y, options); 33 | %/~ 34 | %model.diagvar = repmat(5.0, size(model.diagvar)); 35 | %~/ 36 | model.kern.comp{2}.variance = 0.11; 37 | model.sigma2 = 5; 38 | %model.kern.comp{3}.variance = 5; 39 | options = collabOptimiseOptions; 40 | 41 | capName = dataSetName; 42 | capName(1) = upper(capName(1)); 43 | options.saveName = ['dem' capName 'Mix' num2str(experimentNo) '_']; 44 | 45 | 46 | loadResults = [capName,'_',num2str(q),'_1_',num2str(partNo),'_iters_',num2str(numItersSingle),'.mat']; 47 | disp(['Loading ... ',loadResults]); 48 | 49 | % loading the model learn without a mixture 50 | model_single = load(loadResults); 51 | 52 | model.X = model_single.model.X; 53 | params_single = kernExtractParam(model_single.model.kern); 54 | %model.kern = kernExpandParam(model.kern,params_single); 55 | model = collabInitS(model); 56 | model = collabUpdateKernels(model); 57 | 58 | 59 | % set parameters 60 | options.momentum = 0.9; 61 | options.learnRate = 0.0001; 62 | options.paramMomentum = 0.9; 63 | options.paramLearnRate = 0.0001; 64 | options.noiseMomentum = 0.9; 65 | options.noiseLearnRate = 0.0001; 66 | options.numIters = numIters; 67 | 68 | options.numIters 69 | 70 | disp('Starting optimization'); 71 | 72 | 73 | model = collabOptimise(model, Y, options); 74 | 75 | disp('Ending optimization'); 76 | 77 | keyboard; 78 | 79 | % do an E-step 80 | model = collabUpdateKernels(model); 81 | disp(['Doing E-step ',num2str(itersFinalEstep)]); 82 | model = collabEstep(model,itersFinalEstep); 83 | 84 | keyboard; 85 | 86 | disp('Computing error'); 87 | 88 | 89 | [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorWeak(model,Y,Ytest) 90 | [L2_error_all,NMAE_error_all,NMAE_round_error_all] = computeTestErrorWeakAllModes(model,Y,Ytest) 91 | [L2_error_best,NMAE_error_best,NMAE_round_error_best] = computeTestErrorWeakBestMode(model,Y,Ytest) 92 | 93 | 94 | capName = dataSetName; 95 | capName(1) = upper(capName(1)); 96 | %save(['dem' capName 'Mix_' num2str(experimentNo) '.mat'], 'model', 'error'); 97 | saveResults = [capName,'_',num2str(q),'_',num2str(numComps_v(i_comp)),'_',num2str(partNo),'_iters_',num2str(numItersSingle),'_mix_',num2str(numComps_v(i_comp)),'_Estepiters_',num2str(itersFinalEstep),'_iters_',num2str(numItersSingle),'.mat']; 98 | disp(['Saving ... ',saveResults]); 99 | save(saveResults, 'model', 'L2_error','options','NMAE_error','NMAE_round_error', 'L2_error_best','NMAE_error_best','NMAE_round_error_best','L2_error_all','NMAE_error_all','NMAE_round_error_all'); 100 | end 101 | end 102 | end 103 | -------------------------------------------------------------------------------- /matlab/demNetflix1.m: -------------------------------------------------------------------------------- 1 | % DEMNETFLIX1 Try collaborative filtering on the netflix data. 2 | 3 | % COLLAB 4 | 5 | randn('seed', 1e5); 6 | rand('seed', 1e5); 7 | 8 | experimentNo = 1; 9 | dataSetName = 'netflix'; 10 | 11 | load /local/data/netFlixDataProbe.mat 12 | 13 | q = 5; 14 | options = collabOptions; 15 | model = collabCreate(q, size(Y, 1), Y, options); 16 | model.mu = ratingSum./ratingCount; 17 | model.sd = sqrt(ratingSquareSum./ratingCount - model.mu.*model.mu); 18 | 19 | model.kern.comp{2}.variance = 0.11; 20 | model.kern.comp{3}.variance = 5; 21 | options = collabOptimiseOptions; 22 | options.numIters = 5; 23 | options.showEvery = 400; 24 | options.saveEvery = 20000; 25 | capName = dataSetName; 26 | capName(1) = upper(capName(1)); 27 | options.saveName = ['dem' capName num2str(experimentNo) '_']; 28 | options.showLikelihood = false; 29 | model = collabOptimise(model, Y, options) 30 | 31 | % val = 0; 32 | % tot = 0; 33 | % for i = 1:size(Y, 2) 34 | % ind = find(Ytest(:, i)); 35 | % elim = find(ind>size(model.X, 1)); 36 | % tind = ind; 37 | % tind(elim) = []; 38 | % [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :)); 39 | % a = Ytest(tind, i) - mu; 40 | % a = [a; Ytest(elim, i)]; 41 | % val = val + a'*a; 42 | % tot = tot + length(a); 43 | % end 44 | % error = sqrt(val/tot); 45 | 46 | % Save the results. 47 | capName = dataSetName; 48 | capName(1) = upper(capName(1)); 49 | save(['dem' capName num2str(experimentNo) '.mat'], 'model'); 50 | -------------------------------------------------------------------------------- /matlab/demNetflix2.m: -------------------------------------------------------------------------------- 1 | % DEMNETFLIX2 Try collaborative filtering on the netflix data. 2 | 3 | % COLLAB 4 | 5 | randn('seed', 1e5); 6 | rand('seed', 1e5); 7 | 8 | experimentNo = 2; 9 | dataSetName = 'netflix'; 10 | 11 | load /home/neill/netFlixDataProbe.mat 12 | 13 | q = 4; 14 | options = collabOptions; 15 | model = collabCreate(q, size(Y, 1), Y, options); 16 | model.mu = ratingSum./ratingCount; 17 | model.sd = sqrt(ratingSquareSum./ratingCount - model.mu.*model.mu); 18 | model.kern.comp{2}.variance = 0.11; 19 | model.kern.comp{3}.variance = 5; 20 | options = collabOptimiseOptions; 21 | options.numIters = 5; 22 | options.showEvery = 400; 23 | options.saveEvery = 20000; 24 | capName = dataSetName; 25 | capName(1) = upper(capName(1)); 26 | options.saveName = ['dem' capName num2str(experimentNo) '_']; 27 | options.showLikelihood = false; 28 | model = collabOptimise(model, Y, options) 29 | 30 | % val = 0; 31 | % tot = 0; 32 | % for i = 1:size(Y, 2) 33 | % ind = find(Ytest(:, i)); 34 | % elim = find(ind>size(model.X, 1)); 35 | % tind = ind; 36 | % tind(elim) = []; 37 | % [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :)); 38 | % a = Ytest(tind, i) - mu; 39 | % a = [a; Ytest(elim, i)]; 40 | % val = val + a'*a; 41 | % tot = tot + length(a); 42 | % end 43 | % error = sqrt(val/tot); 44 | 45 | % Save the results. 46 | save(['dem' capName num2str(experimentNo) '.mat'], 'model'); 47 | -------------------------------------------------------------------------------- /matlab/demNetflix3.m: -------------------------------------------------------------------------------- 1 | % DEMNETFLIX3 Try collaborative filtering on the netflix data. 2 | 3 | % COLLAB 4 | 5 | randn('seed', 1e5); 6 | rand('seed', 1e5); 7 | 8 | experimentNo = 3; 9 | dataSetName = 'netflix'; 10 | load /local/data/netFlixDataProbe.mat 11 | 12 | q = 6; 13 | options = collabOptions; 14 | model = collabCreate(q, size(Y, 1), Y, options); 15 | model.mu = ratingSum./ratingCount; 16 | model.sd = sqrt(ratingSquareSum./ratingCount - model.mu.*model.mu); 17 | model.kern.comp{2}.variance = 0.11; 18 | model.kern.comp{3}.variance = 5; 19 | options = collabOptimiseOptions; 20 | options.numIters = 5; 21 | options.showEvery = 400; 22 | options.saveEvery = 20000; 23 | capName = dataSetName; 24 | capName(1) = upper(capName(1)); 25 | options.saveName = ['dem' capName num2str(experimentNo) '_']; 26 | options.showLikelihood = false; 27 | model = collabOptimise(model, Y, options) 28 | 29 | % val = 0; 30 | % tot = 0; 31 | % for i = 1:size(Y, 2) 32 | % ind = find(Ytest(:, i)); 33 | % elim = find(ind>size(model.X, 1)); 34 | % tind = ind; 35 | % tind(elim) = []; 36 | % [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :)); 37 | % a = Ytest(tind, i) - mu; 38 | % a = [a; Ytest(elim, i)]; 39 | % val = val + a'*a; 40 | % tot = tot + length(a); 41 | % end 42 | % error = sqrt(val/tot); 43 | 44 | % Save the results. 45 | save(['dem' capName num2str(experimentNo) '.mat'], 'model'); 46 | -------------------------------------------------------------------------------- /matlab/demNetflix4.m: -------------------------------------------------------------------------------- 1 | % DEMNETFLIX1 Try collaborative filtering on the netflix data. 2 | 3 | % COLLAB 4 | 5 | randn('seed', 1e5); 6 | rand('seed', 1e5); 7 | 8 | experimentNo = 4; 9 | dataSetName = 'netflix'; 10 | 11 | load /local/data/netFlixDataProbe.mat 12 | 13 | q = 5; 14 | options = collabOptions; 15 | options.kern = {'rbf', 'lin', 'bias', 'white'}; 16 | model = collabCreate(q, size(Y, 1), Y, options); 17 | model.mu = ratingSum./ratingCount; 18 | model.sd = sqrt(ratingSquareSum./ratingCount - model.mu.*model.mu); 19 | model.kern.comp{3}.variance = 0.11; 20 | model.kern.comp{4}.variance = 5; 21 | options = collabOptimiseOptions; 22 | options.numIters = 5; 23 | options.showEvery = 400; 24 | options.saveEvery = 20000; 25 | capName = dataSetName; 26 | capName(1) = upper(capName(1)); 27 | options.saveName = ['dem' capName num2str(experimentNo) '_']; 28 | options.showLikelihood = false; 29 | model = collabOptimise(model, Y, options) 30 | 31 | % val = 0; 32 | % tot = 0; 33 | % for i = 1:size(Y, 2) 34 | % ind = find(Ytest(:, i)); 35 | % elim = find(ind>size(model.X, 1)); 36 | % tind = ind; 37 | % tind(elim) = []; 38 | % [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :)); 39 | % a = Ytest(tind, i) - mu; 40 | % a = [a; Ytest(elim, i)]; 41 | % val = val + a'*a; 42 | % tot = tot + length(a); 43 | % end 44 | % error = sqrt(val/tot); 45 | 46 | % Save the results. 47 | capName = dataSetName; 48 | capName(1) = upper(capName(1)); 49 | save(['dem' capName num2str(experimentNo) '.mat'], 'model'); 50 | -------------------------------------------------------------------------------- /matlab/demNetflix5.m: -------------------------------------------------------------------------------- 1 | % DEMNETFLIX5 Try collaborative filtering on the netflix data. 2 | 3 | % COLLAB 4 | 5 | randn('seed', 1e5); 6 | rand('seed', 1e5); 7 | 8 | experimentNo = 5; 9 | dataSetName = 'netflix'; 10 | 11 | load /local/data/netFlixDataProbe.mat 12 | 13 | q = 5; 14 | options = collabOptions; 15 | model = collabCreate(q, size(Y, 1), Y, options); 16 | model.mu = ratingSum./ratingCount; 17 | model.sd = sqrt(ratingSquareSum./ratingCount - model.mu.*model.mu); 18 | 19 | model.kern.comp{2}.variance = 0.11; 20 | model.kern.comp{3}.variance = 5; 21 | options = collabOptimiseOptions; 22 | options.momentum = 0.5; 23 | options.learnRate = 0.00001; 24 | options.paramMomentum = 0.5; 25 | options.paramLearnRate = 0.00001; 26 | options.numIters = 5; 27 | options.showEvery = 400; 28 | options.saveEvery = 20000; 29 | capName = dataSetName; 30 | capName(1) = upper(capName(1)); 31 | options.saveName = ['dem' capName num2str(experimentNo) '_']; 32 | options.showLikelihood = false; 33 | model = collabOptimise(model, Y, options) 34 | 35 | % val = 0; 36 | % tot = 0; 37 | % for i = 1:size(Y, 2) 38 | % ind = find(Ytest(:, i)); 39 | % elim = find(ind>size(model.X, 1)); 40 | % tind = ind; 41 | % tind(elim) = []; 42 | % [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :)); 43 | % a = Ytest(tind, i) - mu; 44 | % a = [a; Ytest(elim, i)]; 45 | % val = val + a'*a; 46 | % tot = tot + length(a); 47 | % end 48 | % error = sqrt(val/tot); 49 | 50 | % Save the results. 51 | capName = dataSetName; 52 | capName(1) = upper(capName(1)); 53 | save(['dem' capName num2str(experimentNo) '.mat'], 'model'); 54 | -------------------------------------------------------------------------------- /matlab/em-weak.m: -------------------------------------------------------------------------------- 1 | % variables: strong{train,test}, weak{train,test} 2 | load 'em-split.mat' 3 | clear strongtrain strongtest; 4 | regvals = sqrt(sqrt(10)).^[9 8.5 8 7.5 7 6.5 6 5.5 5 4.5 4]; 5 | objgrad = @m3fshc_norm; 6 | tol = 1e-3; 7 | [n,m] = size(weaktrain{1}); 8 | p = 500; 9 | l = 5; 10 | i = 3; 11 | maxiter = 100; 12 | fprintf('p=%d maxiter=%d i=%d\n',p,maxiter,i); 13 | fn = sprintf('../result/WEAK_r%d_c%d_p%d_x%d_i%d',n,m,p,maxiter,i); 14 | v = randn(n*p+m*p+n*(l-1),1); 15 | for i3=1:length(regvals) 16 | fprintf(1,'Begin conjgrad: regval=%.1e\n',regvals(i3)); 17 | [v] = conjgrad(v,@cgLineSearch,{'c2',1e-2},objgrad,{weaktrain{i},regvals(i3),l,'verbose',0},'tol',tol,'maxiter',maxiter,'verbose',2); 18 | U = reshape(v(1:n*p),n,p); 19 | V = reshape(v(n*p+1:n*p+m*p),m,p); 20 | theta = reshape(v(n*p+m*p+1:n*p+m*p+n*(l-1)),n,l-1); 21 | [U,V] = normCols(U,V); 22 | X = U*V'; 23 | [y] = m3fSoftmax(X,theta); 24 | Xrank = rank(X); 25 | clear U V theta X; 26 | fprintf(1,'%d %s xi=%d p=%d tol=%.0e rank=%d %.2e ZOE: %.2f %.4f MAE: %.2f %.4f\n',i,func2str(objgrad),maxiter,p,tol,Xrank,regvals(i3),zoe(y,weaktrain{i}),zoe(y,weaktest{i}),mae(y,weaktrain{i}),mae(y,weaktest{i})); 27 | fh = fopen(fn,'a'); 28 | fprintf(fh,'%d %s xi=%d p=%d tol=%.0e rank=%d %.2e ZOE: %.2f %.4f MAE: %.2f %.4f\n',i,func2str(objgrad),maxiter,p,tol,Xrank,regvals(i3),zoe(y,weaktrain{i}),zoe(y,weaktest{i}),mae(y,weaktrain{i}),mae(y,weaktest{i})); 29 | fclose(fh); 30 | end 31 | -------------------------------------------------------------------------------- /matlab/generateCteNMAE.m: -------------------------------------------------------------------------------- 1 | function [mae] = generateCteNMAE(num_ordinals) 2 | 3 | % [mae] = generateCteNMAE(num_ordinals) 4 | % 5 | % generate the cte for NMAE normalization 6 | % num_ordinals is 5 for movielens and 6 for eachmovie 7 | 8 | size_data = 100000; 9 | 10 | % first generate a uniformly distributed random data set 11 | Y = ceil(rand(size_data,1)*num_ordinals); 12 | 13 | % generate predictions for the data 14 | pred = ceil(rand(size_data,1)*num_ordinals); 15 | 16 | % predict the mean absolute error 17 | mae = mean(abs(Y - pred)); 18 | -------------------------------------------------------------------------------- /matlab/kernAdditionalKernCompute.m: -------------------------------------------------------------------------------- 1 | function [k, n2] = kernAdditionalKernCompute(kern, x, x2) 2 | 3 | % KERNADDITIONALKERNCOMPUTE Compute the RBF kernel given the parameters and X. 4 | % 5 | % Description: 6 | % 7 | % K = KERNADDITIONALKERNCOMPUTE(KERN, X, X2) computes the kernel parameters for 8 | % the radial basis function kernel given inputs associated with rows 9 | % and columns. 10 | % Returns: 11 | % K - the kernel matrix computed at the given points. 12 | % Arguments: 13 | % KERN - the kernel structure for which the matrix is computed. 14 | % X - the index of the input matrix associated with the rows of the kernel. 15 | % X2 - the index of the input matrix associated with the columns of the kernel. 16 | % 17 | % K = KERNADDITIONALKERNCOMPUTE(KERN, X) computes the kernel matrix for the 18 | % radial basis function kernel given a design matrix of inputs. 19 | % Returns: 20 | % K - the kernel matrix computed at the given points. 21 | % Arguments: 22 | % KERN - the kernel structure for which the matrix is computed. 23 | % X - the index of the input data matrix in the form of a design matrix. 24 | % 25 | % 26 | % See also 27 | % RBFADDITIONALKERNPARAMINIT, KERNCOMPUTE, KERNCREATE, RBFADDITIONALKERNDIAGCOMPUTE 28 | 29 | 30 | % Copyright (c) 2009 Raquel Urtasun 31 | % rbfKernCompute.m version 1.0 32 | 33 | 34 | if nargin < 3 35 | n2 = dist2(additional(x,:), additional(x,:)); 36 | wi2 = (.5 .* kern.inverseWidth); 37 | k = kern.variance*exp(-n2*wi2); 38 | else 39 | n2 = dist2(additional(x,:), additional(x2,:)); 40 | wi2 = (.5 .* kern.inverseWidth); 41 | k = kern.variance*exp(-n2*wi2); 42 | end 43 | -------------------------------------------------------------------------------- /matlab/loadAverageVariance.m: -------------------------------------------------------------------------------- 1 | 2 | meanVals = zeros(17700, 1); 3 | varVals = zeros(17700, 1); 4 | for i = 1:17700 5 | vals = zeros(Y{i, 3}, 1); 6 | fileNameBase = num2str(filmNum); 7 | fileName = ['mv_' repmat('0', 1, 7-length(fileNameBase)) fileNameBase ... 8 | '.txt']; 9 | fid = fopen(fileName); 10 | void = fgetl(fid); 11 | while 1 12 | nextLine = fgetl(fid); 13 | if ~ischar(nextLine), break, end 14 | commas = find(nextLine==44); 15 | vals(count) = str2num(nextLine(commas(1)+1:commas(2)-1)); 16 | end 17 | meanVals(i) = mean(vals); 18 | varVals(i) = var(vals); 19 | end 20 | -------------------------------------------------------------------------------- /matlab/loadNetflix.m: -------------------------------------------------------------------------------- 1 | Y = cell(2649429, 3); 2 | ratingSum = zeros(17770, 1); 3 | ratingSquareSum = zeros(17770, 1); 4 | ratingCount = zeros(17770, 1); 5 | oldTotalRating = 0; 6 | totalRating = 0; 7 | tic 8 | for filmNumDouble = 1:17770 9 | filmNum = uint16(filmNumDouble); 10 | fileNameBase = num2str(filmNum); 11 | fileName = ['mv_' repmat('0', 1, 7-length(fileNameBase)) fileNameBase '.txt']; 12 | fid = fopen(fileName); 13 | void = fgetl(fid); 14 | while 1 15 | totalRating = totalRating +1; 16 | ratingCount(filmNumDouble) = ratingCount(filmNumDouble) + 1; 17 | nextLine = fgetl(fid); 18 | if ~ischar(nextLine), break, end 19 | commas = find(nextLine==44); 20 | uid = str2num(nextLine(1:commas(1)-1)); 21 | score = uint8(str2num(nextLine(commas(1)+1:commas(2)-1))); 22 | ratingSum(filmNumDouble) = ratingSum(filmNumDouble)+double(score); 23 | ratingSquareSum(filmNumDouble) = ratingSquareSum(filmNumDouble)+double(score)*double(score); 24 | if isempty(Y{uid, 1}) 25 | Y{uid, 1} = uint16(zeros(40, 1)); 26 | Y{uid, 2} = uint8(zeros(40, 1)); 27 | Y{uid, 3} = 0; 28 | end 29 | if Y{uid, 1}(end) ~= 0 30 | %fprintf('Allocating memory for %d, user %d\n', filmNum, uid) 31 | Y{uid, 1} = [Y{uid, 1}; uint16(zeros(20, 1))]; 32 | Y{uid, 2} = [Y{uid, 2}; uint8(zeros(20, 1))]; 33 | end 34 | Y{uid, 3} = Y{uid, 3} + 1; 35 | Y{uid, 1}(Y{uid, 3}) = filmNum; 36 | Y{uid, 2}(Y{uid, 3}) = score; 37 | end 38 | fclose(fid); 39 | n = ratingCount(filmNumDouble); 40 | diffRating = totalRating - oldTotalRating; 41 | oldTotalRating = totalRating; 42 | rps = diffRating/toc; 43 | remain = (100000000 - totalRating)/rps; 44 | remain = remain/(3600); 45 | tic 46 | if ~rem(filmNumDouble, 1) 47 | fprintf('Film %d done,\t ratings %d,\t mean %2.4f,\t std %2.4f,\t rps %2.4f,\t remain %2.4f hrs,\t total %d.\n', filmNumDouble, n,... 48 | ratingSum(filmNumDouble)/n, ... 49 | sqrt(ratingSquareSum(filmNumDouble)/n- ... 50 | ratingSum(filmNumDouble)*ratingSum(filmNumDouble)/(n*n)), ... 51 | rps, remain, totalRating); 52 | end 53 | end 54 | userCount = spalloc(2649429, 1, 480189); 55 | userSquareSum = spalloc(2649429, 1, 480189); 56 | userSum = spalloc(2649429, 1, 480189); 57 | 58 | for i = 1:size(Y, 1) 59 | if ~isempty(Y{i, 1}) 60 | userCount(i) = Y{i, 3}; 61 | userSum(i) = sum(Y{i, 2}); 62 | userSquareSum(i) = sum(Y{i, 2}.*Y{i, 2}); 63 | end 64 | end 65 | 66 | for i = 1:size(Y, 1) 67 | if ~isempty(Y{i, 1}) 68 | Y{i, 1} = Y{i, 1}(1:Y{i,3}); 69 | Y{i, 2} = Y{i, 2}(1:Y{i,3}); 70 | end 71 | end 72 | 73 | save netFlixData.mat Y ratingSum ratingSquareSum ratingCount userCount userSquareSum userSum 74 | 75 | -------------------------------------------------------------------------------- /matlab/loadNetflix2.m: -------------------------------------------------------------------------------- 1 | Y = cell(2649429, 3); 2 | userCount = spalloc(2649429, 1, 480189); 3 | userSquareSum = spalloc(2649429, 1, 480189); 4 | userSum = spalloc(2649429, 1, 480189); 5 | ratingSum = zeros(17700, 1); 6 | ratingSquareSum = zeros(17700, 1); 7 | ratingCount = zeros(17700, 1); 8 | oldTotalRating = 0; 9 | totalRating = 0; 10 | tic 11 | for filmNumDouble = 1:17700 12 | filmNum = uint16(filmNumDouble); 13 | fileNameBase = num2str(filmNum); 14 | fileName = ['mv_' repmat('0', 1, 7-length(fileNameBase)) fileNameBase '.txt']; 15 | fid = fopen(fileName); 16 | void = fgetl(fid); 17 | while 1 18 | totalRating = totalRating +1; 19 | ratingCount(filmNumDouble) = ratingCount(filmNumDouble) + 1; 20 | nextLine = fgetl(fid); 21 | if ~ischar(nextLine), break, end 22 | commas = find(nextLine==44); 23 | uid = str2num(nextLine(1:commas(1)-1)); 24 | score = uint8(str2num(nextLine(commas(1)+1:commas(2)-1))); 25 | ratingSum(filmNumDouble) = ratingSum(filmNumDouble)+double(score); 26 | ratingSquareSum(filmNumDouble) = ratingSquareSum(filmNumDouble)+double(score)*double(score); 27 | userCount(uid) = userCount(uid)+1; 28 | userSquareSum(uid) = userSquareSum(uid)+score*score; 29 | userSum(uid) = userSum(uid) + score; 30 | end 31 | fclose(fid); 32 | n = ratingCount(filmNumDouble); 33 | diffRating = totalRating - oldTotalRating; 34 | oldTotalRating = totalRating; 35 | rps = diffRating/toc; 36 | remain = (100000000 - totalRating)/rps; 37 | remain = remain/(3600); 38 | tic 39 | if ~rem(filmNumDouble, 1) 40 | fprintf('Film %d done,\t ratings %d,\t mean %2.4f,\t std %2.4f,\t rps %2.4f,\t remain %2.4f hrs,\t total %d.\n', filmNumDouble, n,... 41 | ratingSum(filmNumDouble)/n, ... 42 | sqrt(ratingSquareSum(filmNumDouble)/n- ... 43 | ratingSum(filmNumDouble)*ratingSum(filmNumDouble)/(n*n)), ... 44 | rps, remain, totalRating); 45 | end 46 | end 47 | -------------------------------------------------------------------------------- /matlab/ml-weak.m: -------------------------------------------------------------------------------- 1 | % variables: strong{train,test}, weak{train,test} 2 | load 'ml-split.mat' 3 | clear strongtrain strongtest; 4 | regvals = sqrt(sqrt(10)).^[8 7.5 7 6.5 6 5.5 5 4.5 4 3.5 3]; 5 | objgrad = @m3fshc_norm; 6 | tol = 1e-3; 7 | [n,m] = size(weaktrain{1}); 8 | p = 500; 9 | l = 5; 10 | i = 3; 11 | maxiter = 100; 12 | fprintf('p=%d maxiter=%d i=%d\n',p,maxiter,i); 13 | fn = sprintf('../result/WEAK_r%d_c%d_p%d_x%d_i%d',n,m,p,maxiter,i); 14 | v = randn(n*p+m*p+n*(l-1),1); 15 | for i3=1:length(regvals) 16 | fprintf(1,'Begin conjgrad: regval=%.1e\n',regvals(i3)); 17 | [v] = conjgrad(v,@cgLineSearch,{'c2',1e-2},objgrad,{weaktrain{i},regvals(i3),l,'verbose',0},'tol',tol,'maxiter',maxiter,'verbose',2); 18 | U = reshape(v(1:n*p),n,p); 19 | V = reshape(v(n*p+1:n*p+m*p),m,p); 20 | theta = reshape(v(n*p+m*p+1:n*p+m*p+n*(l-1)),n,l-1); 21 | [U,V] = normCols(U,V); 22 | X = U*V'; 23 | [y] = m3fSoftmax(X,theta); 24 | Xrank = rank(X); 25 | clear U V theta X; 26 | fprintf(1,'%d %s xi=%d p=%d tol=%.0e rank=%d %.2e ZOE: %.2f %.4f MAE: %.2f %.4f\n',i,func2str(objgrad),maxiter,p,tol,Xrank,regvals(i3),zoe(y,weaktrain{i}),zoe(y,weaktest{i}),mae(y,weaktrain{i}),mae(y,weaktest{i})); 27 | fh = fopen(fn,'a'); 28 | fprintf(fh,'%d %s xi=%d p=%d tol=%.0e rank=%d %.2e ZOE: %.2f %.4f MAE: %.2f %.4f\n',i,func2str(objgrad),maxiter,p,tol,Xrank,regvals(i3),zoe(y,weaktrain{i}),zoe(y,weaktest{i}),mae(y,weaktrain{i}),mae(y,weaktest{i})); 29 | fclose(fh); 30 | end 31 | -------------------------------------------------------------------------------- /matlab/netflixTest.m: -------------------------------------------------------------------------------- 1 | % Load in the probe set and test netflix performance. 2 | 3 | load /local/data/netFlixDataProbe.mat 4 | counter = 0; 5 | totalCount = 0; 6 | totalse = 0; 7 | totalse2 = 0; 8 | totalMeanSe = 0; 9 | rmseOne = spalloc(length(Y), 1, 480000); 10 | rmseTwo = spalloc(length(Y), 1, 480000); 11 | Ypred = cell(size(Yprobe, 1), 3); 12 | for i = 1:length(Y) 13 | if ~isempty(Y{i, 1}) 14 | counter = counter + 1; 15 | if counter > 1000 16 | break 17 | else 18 | if length(Yprobe{i, 1})>0 19 | ind = Y{i, 1}; 20 | if length(ind)<3000 21 | yprime = (double(Y{i, 2}) - model.mu(ind))./model.sd(ind); 22 | K = kernCompute(model.kern, model.X(ind, :)); 23 | invK = pdinv(K); 24 | 25 | testInd = Yprobe{i, 1}; 26 | diagK = kernDiagCompute(model.kern, model.X(testInd, :)); 27 | Kx = kernCompute(model.kern,model.X(ind, :), model.X(testInd, :)); 28 | KinvK = invK*Kx; 29 | sd = model.sd(testInd); 30 | Ypred{i, 1} = (KinvK'*yprime).*sd + model.mu(testInd); 31 | 32 | Ypred{i, 2} = (diagK - sum(Kx.*KinvK, 1)').*sd.*sd; 33 | thisMu = Ypred{i, 1}; 34 | thisSd = sqrt(Ypred{i, 2}); 35 | a = (1-thisMu)./thisSd; 36 | b = (5-thisMu)./thisSd; 37 | Ypred{i, 3} = thisMu ... 38 | + (gaussOverDiffCumGaussian(b, a, 2) ... 39 | - gaussOverDiffCumGaussian(b, a, 1)).*thisSd; 40 | 41 | vals = double(Yprobe{i, 2}) - Ypred{i, 1}; 42 | vals2 = double(Yprobe{i, 2}) - Ypred{i, 3}; 43 | dum = double(Yprobe{i, 2}) - model.mu(testInd); 44 | 45 | dumValsSq = dum'*dum; 46 | vals2Sq = vals2'*vals2; 47 | valsSq = vals'*vals; 48 | 49 | rmse1(i) = sqrt(valsSq/length(vals)); 50 | rmse2(i) = sqrt(vals2Sq/length(vals)); 51 | rmseDum(i) = sqrt(dumValsSq/length(vals)); 52 | totalMeanSe = totalMeanSe + dumValsSq; 53 | totalCount = totalCount + length(vals); 54 | totalse = totalse+valsSq; 55 | totalse2 = totalse2+vals2Sq; 56 | end 57 | end 58 | end 59 | end 60 | end 61 | rmseCorrected= sqrt(totalse2/totalCount); 62 | rmse = sqrt(totalse/totalCount); 63 | mrmse = sqrt(totalMeanSe/totalCount); -------------------------------------------------------------------------------- /matlab/rbfadditionalKernCompute.m: -------------------------------------------------------------------------------- 1 | function [k, sk, n2] = rbfadditionalKernCompute(kern, x, x2) 2 | 3 | % RBFADDITIONALKERNCOMPUTE Compute the RBF kernel given the parameters and X. 4 | % FORMAT 5 | % DESC computes the kernel parameters for the radial basis function kernel 6 | % given inputs associated with rows and columns. 7 | % RETURN K : the kernel matrix computed at the given points. 8 | % ARG kern : the kernel structure for which the matrix is computed. 9 | % ARG i : the index of the input matrix associated with the rows of the kernel. 10 | % ARG i2 : the index of the input matrix associated with the columns of the kernel. 11 | % 12 | % DESC computes the kernel matrix for the 13 | % radial basis function kernel given a design matrix of inputs. 14 | % RETURN k : the kernel matrix computed at the given points. 15 | % ARG kern : the kernel structure for which the matrix is computed. 16 | % ARG i : the index of the input data matrix in the form of a design matrix. 17 | % 18 | % SEEALSO : rbfadditionalKernParamInit, kernCompute, kernCreate, 19 | % rbfadditionalKernDiagCompute 20 | % 21 | % COPYRIGHT : Raquel Urtasun, 2009 22 | % 23 | % COPYRIGHT : Neil D. Lawrence, 2004, 2005, 2006 24 | 25 | % COLLAB 26 | 27 | if nargin < 3 28 | n2 = dist2(kern.additional(x,:), kern.additional(x,:)); 29 | wi2 = (.5 .* kern.inverseWidth); 30 | sk = exp(-n2*wi2); 31 | else 32 | n2 = dist2(kern.additional(x,:), kern.additional(x2,:)); 33 | wi2 = (.5 .* kern.inverseWidth); 34 | sk = exp(-n2*wi2); 35 | end 36 | k = sk*kern.variance; -------------------------------------------------------------------------------- /matlab/rbfadditionalKernDiagCompute.m: -------------------------------------------------------------------------------- 1 | function k = rbfadditionalKernDiagCompute(kern, x) 2 | 3 | % RBFADDITIONALKERNDIAGCOMPUTE Compute diagonal of RBF side information kernel. 4 | % FORMAT 5 | % DESC computes the diagonal of the kernel 6 | % matrix for the radial basis function kernel given a design matrix of 7 | % inputs. 8 | % RETURN k : a vector containing the diagonal of the kernel matrix computed 9 | % at the given points. 10 | % ARG kern : the kernel structure for which the matrix is computed. 11 | % ARG i - input data indices. 12 | % 13 | % SEEALSO : rbfadditionalKernParamInit, kernDiagCompute, kernCreate, 14 | % rbfaddtiionalKernCompute 15 | % 16 | % COPYRIGHT : Raquel Urtasun 2009 17 | % 18 | % COPYRIGHT : Neil D. Lawrence, 2004, 2005, 2006 19 | 20 | % COLLAB 21 | 22 | k = repmat(kern.variance, size(x, 1), 1); 23 | -------------------------------------------------------------------------------- /matlab/rbfadditionalKernDiagGradX.m: -------------------------------------------------------------------------------- 1 | function gX = rbfadditionalKernDiagGradX(kern, X) 2 | 3 | % RBFADDITIONALKERNDIAGGRADX Gradient of RBF with side information kernel's 4 | % diagonal with respect to X. 5 | % FORMAT 6 | % DESC computes the gradient of the diagonal of the radial basis function 7 | % side information kernel matrix with respect to the elements of the design 8 | % matrix given in X. 9 | % ARG kern : the kernel structure for which gradients are being computed. 10 | % ARG X : the input data in the form of a design matrix. 11 | % RETURN gX : the gradients of the diagonal with respect to each element 12 | % of X. The returned matrix has the same dimensions as X. 13 | % 14 | % SEEALSO : rbfadditionalKernParamInit, kernDiagGradX, rbfadditionalKernGradX 15 | % 16 | % COPYRIGHT : Neil D. Lawrence, 2004, 2005, 2006 17 | % 18 | % COPYRIGHT : Raquel Urtasun, 2009 19 | 20 | % COLLAB 21 | 22 | gX = zeros(size(X)); 23 | 24 | -------------------------------------------------------------------------------- /matlab/rbfadditionalKernExpandParam.m: -------------------------------------------------------------------------------- 1 | function kern = rbfadditionalKernExpandParam(kern, params) 2 | 3 | % RBFADDITIONALKERNEXPANDPARAM Create kernel structure from RBF kernel's parameters. 4 | % FORMAT 5 | % DESC returns a radial basis function kernel structure, for use with side 6 | % information, filled with the parameters in the given vector. This is used 7 | % as a helper function to enable parameters to be optimised in, for example, 8 | % the NETLAB optimisation functions. 9 | % ARG kern : the kernel structure in which the parameters are to be 10 | % placed. 11 | % ARG param : vector of parameters which are to be placed in the 12 | % kernel structure. 13 | % RETURN kern : kernel structure with the given parameters in the 14 | % relevant locations. 15 | % 16 | % SEEALSO : rbfKernParamInit, rbfKernExtractParam, kernExpandParam 17 | % 18 | % COPYRIGHT : Neil D. Lawrence, 2004, 2005, 2006 19 | % 20 | % COPYRIGHT : Raquel Urtasun 21 | 22 | % COLLAB 23 | 24 | kern.inverseWidth = params(1); 25 | kern.variance = params(2); 26 | -------------------------------------------------------------------------------- /matlab/rbfadditionalKernExtractParam.m: -------------------------------------------------------------------------------- 1 | function [params, names] = rbfadditionalKernExtractParam(kern) 2 | 3 | % RBFADDITIONALKERNEXTRACTPARAM Extract parameters from the RBF with side 4 | % information kernel structure. 5 | % FORMAT 6 | % DESC Extract parameters from the radial basis function with side 7 | % information kernel structure into a vector of parameters for optimisation. 8 | % ARG kern : the kernel structure containing the parameters to be 9 | % extracted. 10 | % RETURN param : vector of parameters extracted from the kernel. If 11 | % the field 'transforms' is not empty in the kernel matrix, the 12 | % parameters will be transformed before optimisation (for example 13 | % positive only parameters could be logged before being returned). 14 | % 15 | % FORMAT 16 | % DESC Extract parameters and parameter names from the radial basis 17 | % function with side information kernel structure. 18 | % ARG kern : the kernel structure containing the parameters to be 19 | % extracted. 20 | % RETURN param : vector of parameters extracted from the kernel. If 21 | % the field 'transforms' is not empty in the kernel matrix, the 22 | % parameters will be transformed before optimisation (for example 23 | % positive only parameters could be logged before being returned). 24 | % RETURN names : cell array of strings giving names to the parameters. 25 | % 26 | % SEEALSO rbfadditionalKernParamInit, rbfadditionalKernExpandParam, kernExtractParam, scg, conjgrad 27 | % 28 | % COPYRIGHT : Neil D. Lawrence, 2004, 2005, 2006 29 | % 30 | % COPYRIGHT : Raquel Urtasun, 2009 31 | 32 | % COLLAB 33 | 34 | params = [kern.inverseWidth kern.variance]; 35 | if nargout > 1 36 | names={'inverse width', 'variance'}; 37 | end 38 | -------------------------------------------------------------------------------- /matlab/rbfadditionalKernGradX.m: -------------------------------------------------------------------------------- 1 | function gX = rbfadditionalKernGradX(kern, X, X2) 2 | 3 | % RBFADDITIONALKERNGRADX Gradient of RBF kernel with respect to input locations. 4 | % FORMAT 5 | % DESC computes the gradident of the radial basis function 6 | % kernel with respect to the input positions where both the row 7 | % positions and column positions are provided separately. 8 | % ARG kern : kernel structure for which gradients are being 9 | % computed. 10 | % ARG i1 : row locations against which gradients are being computed. 11 | % ARG i2 : column locations against which gradients are being computed. 12 | % RETURN g : the returned gradients. The gradients are returned in 13 | % a matrix which is numData2 x numInputs x numData1. Where numData1 is 14 | % the number of data points in I1, numData2 is the number of data 15 | % points in I2 and numInputs is the number of input 16 | % dimensions in X. 17 | % 18 | % SEEALSO : rbfKernParamInit, kernGradX, rbfKernDiagGradX 19 | % 20 | % COPYRIGHT : Neil D. Lawrence, 2004, 2005, 2006 21 | % 22 | % COPYRIGHT : Raquel Urtasun, 2009 23 | 24 | % COLLAB 25 | 26 | gX = zeros(size(X2, 1), size(X2, 2), size(X, 1)); 27 | -------------------------------------------------------------------------------- /matlab/rbfadditionalKernGradient.m: -------------------------------------------------------------------------------- 1 | function g = rbfadditionalKernGradient(kern, x, varargin) 2 | 3 | % RBFADDITIONALKERNGRADIENT Gradient of RBF with side information kernel's parameters. 4 | % FORMAT 5 | % DESC computes the gradient of functions with respect to the 6 | % radial basis function with side information 7 | % kernel's parameters. As well as the kernel structure and the 8 | % input positions, the user provides a matrix PARTIAL which gives 9 | % the partial derivatives of the function with respect to the 10 | % relevant elements of the kernel matrix. 11 | % ARG kern : the kernel structure for which the gradients are being 12 | % computed. 13 | % ARG i : the input indices for which the gradients are being 14 | % computed. 15 | % ARG partial : matrix of partial derivatives of the function of 16 | % interest with respect to the kernel matrix. The argument takes 17 | % the form of a square matrix of dimension numData, where numData is 18 | % the number of rows in I. 19 | % RETURN g : gradients of the function of interest with respect to 20 | % the kernel parameters. The ordering of the vector should match 21 | % that provided by the function kernExtractParam. 22 | % 23 | % FORMAT 24 | % DESC computes the derivatives as above, but input locations are 25 | % now provided in two vectors associated with rows and columns of 26 | % the kernel matrix. 27 | % ARG kern : the kernel structure for which the gradients are being 28 | % computed. 29 | % ARG i1 : the input indices associated with the rows of the 30 | % kernel matrix. 31 | % ARG i2 : the input indices associated with the columns of the 32 | % kernel matrix. 33 | % ARG partial : matrix of partial derivatives of the function of 34 | % interest with respect to the kernel matrix. The matrix should 35 | % have the same number of rows as I1 and the same number of columns 36 | % as I2 has rows. 37 | % RETURN g : gradients of the function of interest with respect to 38 | % the kernel parameters. 39 | % 40 | % SEEALSO rbfadditionalKernParamInit, kernGradient, rbfadditionalKernDiagGradient, kernGradX 41 | % 42 | % COPYRIGHT : Neil D. Lawrence, 2004, 2005, 2006, 2009 43 | % 44 | % COPYRIGHT : Raquel Urtasun, 2009 45 | 46 | % COLLAB 47 | 48 | % The last argument is covGrad 49 | if nargin < 4 50 | [k, sk, dist2xx] = rbfadditionalKernCompute(kern, x); 51 | else 52 | [k, sk, dist2xx] = rbfadditionalKernCompute(kern, x, varargin{1}); 53 | end 54 | g(1) = - .5*sum(sum(varargin{end}.*k.*dist2xx)); 55 | g(2) = sum(sum(varargin{end}.*sk)); 56 | -------------------------------------------------------------------------------- /matlab/rbfadditionalKernParamInit.m: -------------------------------------------------------------------------------- 1 | function kern = rbfadditionalKernParamInit(kern) 2 | 3 | % RBFADDITIONALKERNPARAMINIT RBF kernel with side information. 4 | % FORMAT 5 | % The radial basis function kernel (RBF) is sometimes also known as the 6 | % squared exponential kernel. It is a very smooth non-linear kernel and is a 7 | % popular choice for generic use. 8 | % 9 | % k(x_i, x_j) = sigma2 * exp(-gamma/2 *(additional(x_i) - additional(x_j))'*(additional(x_i) - additional(x_j))) 10 | % 11 | % The parameters are sigma2, the process variance (kern.variance) and gamma, 12 | % the inverse width (kern.inverseWidth). The inverse width controls how wide 13 | % the basis functions are, the larger gamma, the smaller the basis functions 14 | % are. 15 | %r 16 | % DESC computes the RBF kernel with the side information for 17 | % collaborative filtering. 18 | % RETURN kern : the kernel structure with the default parameters placed in. 19 | % ARG kern : the kernel structure which requires initialisation. 20 | % 21 | % SEEALSO : rbfkernParamInit, kernCreate, kernParamInit 22 | % 23 | % COPYRIGHT : Raquel Urtasun, 2009 24 | % 25 | % COPYRIGHT : Neil D. Lawrence, 2004, 2005, 2006 26 | 27 | % COLLAB 28 | 29 | kern.inverseWidth = 1; 30 | kern.variance = 1; 31 | kern.nParams = 2; 32 | 33 | % Constrains parameters positive for optimisation. 34 | kern.transforms.index = [1 2]; 35 | kern.transforms.type = optimiDefaultConstraint('positive'); 36 | kern.isStationary = true; 37 | 38 | % it requires a field with the additional information 39 | -------------------------------------------------------------------------------- /matlab/readEachMovieMarlinStrong.m: -------------------------------------------------------------------------------- 1 | function [Y, lbls, Ytest] = readEachMovieMarlinStrong(partNo) 2 | 3 | % READEACHMOVIEMARLINSTRONG Reads the EachMovie strong partitions from Marlin. 4 | % FORMAT 5 | % DESC reads the EachMovie Marlin strong partitions. 6 | % ARG partLetter : the part of the 10M MovieLens data to read in. 7 | % RETURN Y : the data. 8 | % RETURN Ytest : the test data. 9 | % 10 | % SEEALSO : collabLoadData, readEachMovieMarlinWeak 11 | % 12 | % COPYRIGHT : Raquel Urtasun, 2009 13 | 14 | % COLLAB 15 | 16 | lbls = []; 17 | 18 | baseDir = datasetsDirectory; 19 | dirSep = filesep; 20 | 21 | % load the ratings 22 | fileName = [baseDir dirSep 'collab' dirSep 'project' dirSep 'em-mmmf' dirSep 'data' dirSep 'marlin.mat']; 23 | 24 | disp(['Reading ... ',fileName]); 25 | 26 | load(fileName); 27 | 28 | Y = weaktrain{partNo}'; 29 | Ytest = strongtest{partNo}'; 30 | lbls = strongtrain{partNo}'; 31 | 32 | %/~ 33 | % find movies with too big rates 34 | %max_film = max(Y'); 35 | %max_film_test = max(Ytest'); 36 | %max_film_train_test = max(lbls'); 37 | %ind = find(max_film>6); 38 | %ind_test = find(max_film_test>6); 39 | %ind_train_test = find(max_film_train_test>6); 40 | 41 | %ind = [ind, ind_test, ind_train_test]; 42 | %ind = unique(ind); 43 | 44 | 45 | % remove the corrupted data 46 | %Y(ind,:) = []; 47 | %Ytest(ind,:) = []; 48 | %lbls(ind,:) = []; 49 | 50 | %toRemove = []; 51 | 52 | % find movies that are not rated 53 | %for i=1:size(Y,1) 54 | % check empy rating movies 55 | % ind = find(Y(i,:)); 56 | %if (length(ind)<1) 57 | % toRemove = [toRemove, i]; 58 | %end 59 | %end 60 | 61 | %Y(toRemove,:) = []; 62 | %Ytest(toRemove,:) = []; 63 | %lbls(toRemove,:) = []; 64 | 65 | %~/ -------------------------------------------------------------------------------- /matlab/readEachMovieMarlinWeak.m: -------------------------------------------------------------------------------- 1 | function [Y, Ytest] = readEachMovieMarlinWeak(partNo) 2 | 3 | % READEACHMOVIEMARLINWEAK Read in Marlin's weak partitions for EachMovie. 4 | % FORMAT 5 | % DESC reads the EachMovie Marlin weak partitions. 6 | % ARG partNo : the part of the EachMovie data to read in. 7 | % RETURN Y : the data. 8 | % RETURN Ytest : the test data. 9 | % 10 | % SEEALSO : collabLoadData, readEachMovieMarlinStrong 11 | % 12 | % COPYRIGHT : Raquel Urtasun, 2009 13 | 14 | % COLLAB 15 | 16 | baseDir = datasetsDirectory; 17 | dirSep = filesep; 18 | 19 | % load the ratings 20 | 21 | 22 | fileName = [baseDir dirSep 'collab' dirSep 'project' dirSep 'em-mmmf' dirSep 'data' dirSep 'marlin.mat']; 23 | 24 | disp(['Reading ... ',fileName]); 25 | 26 | load(fileName); 27 | 28 | Y = weaktrain{partNo}'; 29 | Ytest = weaktest{partNo}'; 30 | end 31 | 32 | %/~ 33 | % find movies with too big rates 34 | %max_film = max(Y'); 35 | %max_film_test = max(Ytest'); 36 | %ind = find(max_film>6); 37 | %ind_test = find(max_film_test>6); 38 | 39 | %ind = [ind, ind_test]; 40 | %ind = unique(ind); 41 | 42 | 43 | % remove the corrupted data 44 | %Y(ind,:) = []; 45 | %Ytest(ind,:) = []; 46 | 47 | %toRemove = []; 48 | 49 | % find movies that are not rated 50 | %for i=1:size(Y,1) 51 | % check empy rating movies 52 | % ind = find(Y(i,:)); 53 | %if (length(ind)<1) 54 | % toRemove = [toRemove, i]; 55 | %end 56 | %end 57 | 58 | %Y(toRemove,:) = []; 59 | %Ytest(toRemove,:) = []; 60 | %~/ -------------------------------------------------------------------------------- /matlab/readEachMovieWeak.m: -------------------------------------------------------------------------------- 1 | function [Y, Ytest] = readEachMovieWeak(partNo) 2 | 3 | % READEACHMOVIEWEAK Read in EachMovie users with over 20 ratings. 4 | % FORMAT 5 | % DESC reads in the EachMovie users with over 20 ratings and saves them 6 | % to a mat file for later use. 7 | % ARG partNo : the partition number. 8 | % RETURN Y : the training data. 9 | % RETURN Ytest : the test data. 10 | % 11 | % SEEALSO : readEachMovieMarlinWeak, readEachMovieMarlinStrong 12 | % 13 | % COPYRIGHT : Raquel Urtasun, 2009 14 | 15 | % COLLAB 16 | 17 | baseDir = datasetsDirectory; 18 | dirSep = filesep; 19 | 20 | % load the ratings 21 | 22 | try 23 | fileName = [baseDir dirSep 'eachmovie' dirSep 'Vote_more_20.mat']; 24 | load(fileName); 25 | catch 26 | 27 | fileName = [baseDir dirSep 'eachmovie' dirSep 'Vote.txt']; 28 | 29 | disp(['Reading ... ',fileName]); 30 | 31 | [users, films, ratings, weights, dates, hours, minutes, seconds] = textread(fileName, '%n\t%n\t%n\t%n\t%s %n:%n:%n'); 32 | ind = randperm(size(users, 1)); 33 | users = users(ind, :); 34 | films = films(ind, :); 35 | ratings = ratings(ind, :); 36 | numUsers = max(users); 37 | numFilms = max(films); 38 | 39 | activeUsers = [1:numUsers]; 40 | % erase the users with less than 20 films 41 | disp('Removing users with less than 20 ratings'); 42 | mapUsers = -ones(numUsers,1); 43 | numActiveUsers = 0; 44 | indTotal = []; 45 | for i=1:numUsers 46 | ind = find(users==i); 47 | if (length(ind)<20) 48 | % remove the user 49 | [indTotal] = [indTotal; ind]; 50 | else 51 | numActiveUsers = numActiveUsers+1; 52 | mapUsers(i) = numActiveUsers; 53 | end 54 | end 55 | users(indTotal) = []; 56 | films(indTotal) = []; 57 | ratings(indTotal) = []; 58 | weights(indTotal) = []; 59 | dates(indTotal) = []; 60 | hours(indTotal) = []; 61 | minutes(indTotal) = []; 62 | second(indTotal) = []; 63 | users = mapUsers(users); 64 | fileName = [baseDir dirSep 'eachmovie' dirSep 'Vote_more_20.mat']; 65 | save(fileName,'users','films','ratings','weights','dates','hours','minutes','seconds'); 66 | end 67 | 68 | numUsers = max(users); 69 | numFilms = max(films); 70 | 71 | numRatings = size(users, 1); 72 | numUsersTrain = 30000; 73 | numUsers = max(users); 74 | for i=1:partNo 75 | % partition the users at random 76 | randIndexUsers = randperm(numUsers); 77 | 78 | end 79 | % get the films for those users 80 | numTrainRatings = 0; 81 | indexTrain = []; 82 | indexTest = []; 83 | for i=1:numUsersTrain 84 | indexUsers = find(users==randIndexUsers(i)); 85 | 86 | indexTest = [indexTest; indexUsers(end)]; 87 | 88 | % use one for testing and one for training 89 | indexUsers(end) = []; 90 | 91 | numTrainRatings = numTrainRatings + length(indexUsers); 92 | indexTrain = [indexTrain; indexUsers]; % ?? this takes too much time 93 | end 94 | numTestRatings = numUsersTrain; 95 | Y = spalloc(numFilms, numUsers, numTrainRatings); 96 | Ytest = spalloc(numFilms, numUsers, numTestRatings); 97 | numRatings = numTrainRatings + numTestRatings; 98 | 99 | 100 | indTrain = sub2ind(size(Y), films(indexTrain), users(indexTrain)); 101 | indTest = sub2ind(size(Ytest), films(indexTest), users(indexTest)); 102 | 103 | Y(indTrain) = ratings(indexTrain); 104 | Ytest(indTest) = ratings(indexTest); 105 | 106 | -------------------------------------------------------------------------------- /matlab/readMovieLens.m: -------------------------------------------------------------------------------- 1 | function [Y, lbls, Ytest] = readMovieLens(perc_train,partNo,if_random) 2 | 3 | % READMOVIELENS Read in a given percentage of the movielens data. 4 | % FORMAT 5 | % DESC reads the MovieLens 1M Marlin data. 6 | % ARG perc_train : the percentage to use as training. 7 | % ARG partNo : the partition number. 8 | % RETURN Y : the data. 9 | % RETURN lbls : the lables of the training data. 10 | % RETURN Ytest : the test data. 11 | % 12 | % SEEALSO : collabLoadData, readMovieLensMarlinStrong 13 | % 14 | % COPYRIGHT : Raquel Urtasun, 2009 15 | 16 | % COLLAB 17 | 18 | lbls = []; 19 | 20 | 21 | baseDir = datasetsDirectory; 22 | dirSep = filesep; 23 | 24 | % load the ratings 25 | 26 | fileName = [baseDir dirSep 'movielens' dirSep 'large' dirSep 'ratings.dat']; 27 | [users, films, ratings, timeStamp] = textread(fileName, '%n::%n::%n::%n'); 28 | ind = randperm(size(users, 1)); 29 | users = users(ind, :); 30 | films = films(ind, :); 31 | ratings = ratings(ind, :); 32 | numUsers = max(users); 33 | numFilms = max(films); 34 | 35 | numRatings = size(users, 1); 36 | numTrainRatings = ceil(perc_train*numRatings); 37 | Y = spalloc(numFilms, numUsers, numTrainRatings); 38 | Ytest = spalloc(numFilms, numUsers, numRatings-numTrainRatings); 39 | 40 | 41 | 42 | % this depends on the partition number, and it is ordered 43 | numTestRatings = numRatings - numTrainRatings; 44 | if (if_random) 45 | 46 | randn('seed', 1e5); 47 | rand('seed', 1e5); 48 | for i=1:partNo 49 | index = randperm(numRatings); 50 | end 51 | indexTrain = index(1:numTrainRatings); 52 | indexTest = index(1+numTrainRatings:end); 53 | 54 | 55 | else 56 | index_rand = 1:numRatings; 57 | maxPartNo = 1./(1-perc_train); 58 | indexTrain = []; 59 | 60 | indexTrain = [1:(partNo-1)*numTestRatings]; 61 | indexTrain = [indexTrain, 1+(partNo)*numTestRatings:numRatings]; 62 | indexTest = [1+(partNo-1)*numTestRatings:partNo*numTestRatings]; 63 | end 64 | indTrain = sub2ind(size(Y), films(indexTrain), users(indexTrain)); 65 | indTest = sub2ind(size(Ytest), films(indexTest), users(indexTest)); 66 | 67 | Y(indTrain) = ratings(indexTrain); 68 | Ytest(indTest) = ratings(indexTest); 69 | 70 | 71 | % save the additional information 72 | 73 | fileName = [baseDir dirSep 'movielens' dirSep 'large' dirSep 'movies.dat']; 74 | %[id, films, Type] = textread(fileName, '%n::%s::%s'); 75 | 76 | % create the structure 77 | lbls = zeros(size(Y,1),18); 78 | 79 | fid = fopen(fileName); 80 | readLine = 0; 81 | counter = 0; 82 | data = []; 83 | all_genres = [{'Action'},{'Adventure'},{'Animation'},{'Children''s'}, ... 84 | {'Comedy'},{'Crime'},{'Documentary'},{'Drama'},{'Fantasy'},{'Film-Noir'}, ... 85 | {'Horror'},{'Musical'},{'Mystery'},{'Romance'},{'Sci-Fi'},{'Thriller'},{'War'},{'Western'}]; 86 | 87 | 88 | readLine = fgets(fid); 89 | while readLine ~= -1 90 | 91 | parts = stringSplit(readLine,':'); 92 | id = str2num(parts{1}); 93 | title = parts(3); 94 | genre = parts{5}; 95 | % createMovieLensExtra(genre); 96 | 97 | for i=1:length(all_genres) 98 | if (strfind(genre,all_genres{i})) 99 | lbls(id,i) = 1; 100 | end 101 | end 102 | 103 | readLine = fgets(fid); 104 | 105 | end 106 | end 107 | 108 | 109 | -------------------------------------------------------------------------------- /matlab/readMovieLens10M.m: -------------------------------------------------------------------------------- 1 | function [Y, lbls, Ytest] = readMovieLens10M(partNo) 2 | 3 | % READMOVIELENS10M Read in a partition of the movielens 10M data. 4 | % FORMAT 5 | % DESC reads the MovieLens 10M Marlin data. 6 | % ARG partNo : the partition number. 7 | % RETURN Y : the data. 8 | % RETURN lbls : the lables of the training data. 9 | % RETURN Ytest : the test data. 10 | % 11 | % SEEALSO : collabLoadData, readMovieLens 12 | % 13 | % COPYRIGHT : Raquel Urtasun, 2009 14 | 15 | % COLLAB 16 | 17 | lbl = []; 18 | 19 | baseDir = datasetsDirectory; 20 | dirSep = filesep; 21 | 22 | % load the ratings 23 | 24 | fileName = [baseDir dirSep 'movielens' dirSep '10M' dirSep 'r',num2str(partNo),'.train']; 25 | [users, films, ratings, timeStamp] = textread(fileName, '%n::%n::%n::%n'); 26 | 27 | numUsers = max(users); 28 | numFilms = max(films); 29 | 30 | %keyboard; 31 | 32 | numTrainRatings = size(users,1); 33 | Y = spalloc(numFilms, numUsers, numTrainRatings); 34 | 35 | 36 | indTrain = sub2ind(size(Y), films, users); 37 | 38 | %keyboard; 39 | Y(indTrain) = ratings; 40 | 41 | %keyboard; 42 | 43 | 44 | users = []; 45 | films = []; 46 | ratings = []; 47 | timeStamp = []; 48 | 49 | 50 | fileNameTest = [baseDir dirSep 'movielens' dirSep '10M' dirSep 'r',num2str(partNo),'.test']; 51 | [users_test, films_test, ratings_test, timeStamp_test] = textread(fileNameTest, '%n::%n::%n::%n'); 52 | 53 | numTestRatings = size(users_test,1); 54 | numRatings = numTrainRatings + numTestRatings; 55 | Ytest = spalloc(numFilms, numUsers, numTestRatings); 56 | 57 | 58 | 59 | % this depends on the partition number, and it is ord; 60 | 61 | 62 | indTest = sub2ind(size(Ytest), films_test, users_test); 63 | Ytest(indTest) = ratings_test; 64 | 65 | 66 | % save the additional information 67 | 68 | %keyboard 69 | 70 | fileName = [baseDir dirSep 'movielens' dirSep 'large' dirSep 'movies.dat']; 71 | 72 | % create the structure 73 | lbls = zeros(size(Y,1),18); 74 | 75 | fid = fopen(fileName); 76 | readLine = 0; 77 | counter = 0; 78 | data = []; 79 | all_genres = [{'Action'},{'Adventure'},{'Animation'},{'Children''s'}, ... 80 | {'Comedy'},{'Crime'},{'Documentary'},{'Drama'},{'Fantasy'},{'Film-Noir'}, ... 81 | {'Horror'},{'Musical'},{'Mystery'},{'Romance'},{'Sci-Fi'},{'Thriller'},{'War'},{'Western'}]; 82 | 83 | 84 | readLine = fgets(fid); 85 | while readLine ~= -1 86 | 87 | parts = stringSplit(readLine,':'); 88 | id = str2num(parts{1}); 89 | title = parts(3); 90 | genre = parts{5}; 91 | % createMovieLensExtra(genre); 92 | 93 | for i=1:length(all_genres) 94 | if (strfind(genre,all_genres{i})) 95 | lbls(id,i) = 1; 96 | end 97 | end 98 | 99 | readLine = fgets(fid); 100 | 101 | end 102 | end 103 | 104 | 105 | -------------------------------------------------------------------------------- /matlab/readMovieLens10MCell.m: -------------------------------------------------------------------------------- 1 | function [Y, Ytest] = readMovieLens10MCell(partNo) 2 | 3 | % READMOVIELENS10MCELL Read the 10M Movielens into a cell array. 4 | % FORMAT 5 | % DESC reads the 10M MovieLens data into a cell array. 6 | % ARG partNo : the part of the 10M MovieLens data to read in. 7 | % RETURN Y : the data in a cell array. 8 | % RETURN Ytest : the test data in a cell array. 9 | % read the 10M movielens in a cell array. It is too big to do the regular way 10 | % 11 | % SEEALSO : collabLoadData 12 | % 13 | % COPYRIGHT : Raquel Urtasun, 2009 14 | 15 | % COLLAB 16 | 17 | 18 | baseDir = datasetsDirectory; 19 | dirSep = filesep; 20 | 21 | % load the ratings 22 | 23 | fileName = [baseDir dirSep 'movielens' dirSep '10M' dirSep 'r',num2str(partNo),'.train']; 24 | [users, films, ratings, timeStamp] = textread(fileName, '%n::%n::%n::%n'); 25 | 26 | 27 | [Y] = loadSparse10M(users,films,ratings); 28 | 29 | 30 | fileName = [baseDir dirSep 'movielens' dirSep '10M' dirSep 'r',num2str(partNo),'.test']; 31 | [users_test, films_test, ratings_test, timeStamp] = textread(fileName, '%n::%n::%n::%n'); 32 | 33 | 34 | [Ytest] = loadSparse10M(users_test,films_test,ratings_test); 35 | 36 | -------------------------------------------------------------------------------- /matlab/readMovieLens10MCellLetter.m: -------------------------------------------------------------------------------- 1 | function [Y, Ytest] = readMovieLens10MCellLetter(partLetter) 2 | 3 | % READMOVIELENS10MCELLLETTER Read the 10M Movielens into a cell array. 4 | % FORMAT 5 | % DESC reads the 10M MovieLens data into a cell array. 6 | % ARG partLetter : the part of the 10M MovieLens data to read in. 7 | % RETURN Y : the data in a cell array. 8 | % RETURN Ytest : the test data in a cell array. 9 | % read the 10M movielens in a cell array. It is too big to do the regular way 10 | % 11 | % SEEALSO : collabLoadData 12 | % 13 | % COPYRIGHT : Raquel Urtasun, 2009 14 | 15 | % COLLAB 16 | 17 | 18 | baseDir = datasetsDirectory; 19 | dirSep = filesep; 20 | 21 | % load the ratings 22 | 23 | fileName = [baseDir dirSep 'movielens' dirSep '10M' dirSep 'r',num2str(partLetter),'.train']; 24 | [users, films, ratings, timeStamp] = textread(fileName, '%n::%n::%n::%n'); 25 | 26 | 27 | [Y] = loadSparse10M(users,films,ratings); 28 | 29 | 30 | fileName = [baseDir dirSep 'movielens' dirSep '10M' dirSep 'r',num2str(partLetter),'.test']; 31 | [users_test, films_test, ratings_test, timeStamp] = textread(fileName, '%n::%n::%n::%n'); 32 | 33 | 34 | [Ytest] = loadSparse10M(users_test,films_test,ratings_test); 35 | end 36 | 37 | 38 | -------------------------------------------------------------------------------- /matlab/readMovieLensMarlinStrong.m: -------------------------------------------------------------------------------- 1 | function [Y, lbls, Ytest] = readMovieLensMarlinStrong(partNo) 2 | 3 | % READMOVIELENSMARLINSTRONG Read in Marlin's strong partitions for movielens 1M. 4 | % FORMAT 5 | % DESC reads the Movielens 1M Marlin strong partitions. 6 | % ARG partNo : the part of the Movielens data to read in. 7 | % RETURN Y : the data. 8 | % RETURN lbls : the labels associated with the movies. 9 | % RETURN Ytest : the test data. 10 | % 11 | % SEEALSO : collabLoadData, readMovieLensMarlinWeak 12 | % 13 | % COPYRIGHT : Raquel Urtasun, 2009 14 | 15 | % COLLAB 16 | 17 | 18 | lbls = []; 19 | 20 | baseDir = datasetsDirectory; 21 | dirSep = filesep; 22 | 23 | % load the ratings 24 | 25 | 26 | fileName = [baseDir dirSep 'collab' dirSep 'project' dirSep '1mml-mmmf' dirSep 'data' dirSep 'marlin.mat']; 27 | 28 | disp(['Reading ... ',fileName]); 29 | 30 | load(fileName); 31 | 32 | Y = weaktrain{partNo}'; 33 | lbls = strongtrain{partNo}'; 34 | Ytest = strongtest{partNo}'; 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /matlab/readMovieLensMarlinWeak.m: -------------------------------------------------------------------------------- 1 | function [Y, Ytest] = readMovieLensMarlinWeak(partNo) 2 | 3 | % READMOVIELENSMARLINWEAK Read in Marlin's weak partitions for movielens 1M. 4 | % FORMAT 5 | % DESC reads the Movielens 1M Marlin weak partitions. 6 | % ARG partNo : the part of the Movielens data to read in. 7 | % RETURN Y : the data. 8 | % RETURN Ytest : the test data. 9 | % 10 | % SEEALSO : collabLoadData, readMovieLensMarlinStrong 11 | % 12 | % COPYRIGHT : Raquel Urtasun, 2009 13 | 14 | % COLLAB 15 | 16 | 17 | baseDir = datasetsDirectory; 18 | dirSep = filesep; 19 | 20 | % load the ratings 21 | 22 | 23 | fileName = [baseDir dirSep 'collab' dirSep 'project' dirSep '1mml-mmmf' dirSep 'data' dirSep 'marlin.mat']; 24 | 25 | disp(['Reading ... ',fileName]); 26 | 27 | load(fileName); 28 | 29 | Y = weaktrain{partNo}'; 30 | Ytest = weaktest{partNo}'; 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /matlab/readMovieLensStrong.m: -------------------------------------------------------------------------------- 1 | function [Y,lbls,Ytest] = readMovieLensStrong(partNo) 2 | 3 | % READMOVIELENSSTRONG Read in the strong partitions for the Movielens. 4 | % FORMAT 5 | % DESC reads the MovieLens 1M Marlin weak partitions. 6 | % ARG partNo : the part of the 1M MovieLens data to read in. 7 | % RETURN Y : the data. 8 | % RETURN lbls : addiitonal information. 9 | % RETURN Ytest : the test data. 10 | % 11 | % SEEALSO : collabLoadData, readMovieLensMarlinStrong 12 | % 13 | % COPYRIGHT : Raquel Urtasun, 2009 14 | 15 | % COLLAB 16 | 17 | 18 | 19 | 20 | baseDir = datasetsDirectory; 21 | dirSep = filesep; 22 | 23 | % load the ratings 24 | 25 | fileName = [baseDir dirSep 'movielens' dirSep 'large' dirSep 'ratings.dat']; 26 | [users, films, ratings, timeStamp] = textread(fileName, '%n::%n::%n::%n'); 27 | ind = randperm(size(users, 1)); 28 | users = users(ind, :); 29 | films = films(ind, :); 30 | ratings = ratings(ind, :); 31 | numUsers = max(users); 32 | numFilms = max(films); 33 | 34 | numRatings = size(users, 1); 35 | numUsersTrain = 5000; 36 | numUsers = max(users); 37 | for i=1:partNo 38 | % partition the users at random 39 | randIndexUsers = randperm(numUsers); 40 | 41 | end 42 | % get the films for those users 43 | numTrainRatings = 0; 44 | indexTrain = []; 45 | for i=1:numUsersTrain 46 | indexUsers = find(users==randIndexUsers(i)); 47 | numTrainRatings = numTrainRatings + length(indexUsers); 48 | indexTrain = [indexTrain; indexUsers]; % ?? this takes too much time 49 | end 50 | Y = spalloc(numFilms, numUsers, numTrainRatings); 51 | Ytest = spalloc(numFilms, numUsers, numRatings-numTrainRatings); 52 | 53 | indexTest = 1:length(users); 54 | indexTest(indexTrain) = []; 55 | 56 | % this depends on the partition number, and it is ordered 57 | numTestRatings = numRatings - numTrainRatings; 58 | 59 | indTrain = sub2ind(size(Y), films(indexTrain), users(indexTrain)); 60 | indTest = sub2ind(size(Ytest), films(indexTest), users(indexTest)); 61 | 62 | Y(indTrain) = ratings(indexTrain); 63 | Ytest(indTest) = ratings(indexTest); 64 | 65 | 66 | % save the additional information 67 | 68 | fileName = [baseDir dirSep 'movielens' dirSep 'large' dirSep 'movies.dat']; 69 | %[id, films, Type] = textread(fileName, '%n::%s::%s'); 70 | 71 | % create the structure 72 | lbls = zeros(size(Y,1),18); 73 | 74 | fid = fopen(fileName); 75 | readLine = 0; 76 | counter = 0; 77 | data = []; 78 | all_genres = [{'Action'},{'Adventure'},{'Animation'},{'Children''s'}, ... 79 | {'Comedy'},{'Crime'},{'Documentary'},{'Drama'},{'Fantasy'},{'Film-Noir'}, ... 80 | {'Horror'},{'Musical'},{'Mystery'},{'Romance'},{'Sci-Fi'},{'Thriller'},{'War'},{'Western'}]; 81 | 82 | 83 | readLine = fgets(fid); 84 | while readLine ~= -1 85 | 86 | parts = stringSplit(readLine,':'); 87 | id = str2num(parts{1}); 88 | title = parts(3); 89 | genre = parts{5}; 90 | % createMovieLensExtra(genre); 91 | 92 | for i=1:length(all_genres) 93 | if (strfind(genre,all_genres{i})) 94 | lbls(id,i) = 1; 95 | end 96 | end 97 | 98 | readLine = fgets(fid); 99 | 100 | end 101 | end 102 | 103 | 104 | -------------------------------------------------------------------------------- /matlab/readMovieLensWeak.m: -------------------------------------------------------------------------------- 1 | function [Y, lbls, Ytest] = readMovieLensWeak(partNo) 2 | 3 | % READMOVIELENSWEAK Read in the weak partitions for the Movielens. 4 | % FORMAT 5 | % DESC reads the MovieLens 1M Marlin weak partitions. 6 | % ARG partNo : the part of the 1M MovieLens data to read in. 7 | % RETURN Y : the data. 8 | % RETURN lbls : addiitonal information. 9 | % RETURN Ytest : the test data. 10 | % 11 | % SEEALSO : collabLoadData, readMovieLensMarlinStrong 12 | % 13 | % COPYRIGHT : Raquel Urtasun, 2009 14 | 15 | % COLLAB 16 | 17 | 18 | 19 | baseDir = datasetsDirectory; 20 | dirSep = filesep; 21 | 22 | % load the ratings 23 | 24 | fileName = [baseDir dirSep 'movielens' dirSep 'large' dirSep 'ratings.dat']; 25 | [users, films, ratings, timeStamp] = textread(fileName, '%n::%n::%n::%n'); 26 | ind = randperm(size(users, 1)); 27 | users = users(ind, :); 28 | films = films(ind, :); 29 | ratings = ratings(ind, :); 30 | numUsers = max(users); 31 | numFilms = max(films); 32 | 33 | numRatings = size(users, 1); 34 | numUsersTrain = 5000; 35 | numUsers = max(users); 36 | for i=1:partNo 37 | % partition the users at random 38 | randIndexUsers = randperm(numUsers); 39 | 40 | end 41 | % get the films for those users 42 | numTrainRatings = 0; 43 | indexTrain = []; 44 | indexTest = []; 45 | for i=1:numUsersTrain 46 | indexUsers = find(users==randIndexUsers(i)); 47 | 48 | indexTest = [indexTest; indexUsers(end)]; 49 | 50 | % use one for testing and one for training 51 | indexUsers(end) = []; 52 | 53 | numTrainRatings = numTrainRatings + length(indexUsers); 54 | indexTrain = [indexTrain; indexUsers]; % ?? this takes too much time 55 | end 56 | numTestRatings = numUsersTrain; 57 | Y = spalloc(numFilms, numUsers, numTrainRatings); 58 | Ytest = spalloc(numFilms, numUsers, numTestRatings); 59 | numRatings = numTrainRatings + numTestRatings; 60 | 61 | %indexTest = 1:length(users); 62 | %indexTest(indexTrain) = []; 63 | 64 | indTrain = sub2ind(size(Y), films(indexTrain), users(indexTrain)); 65 | indTest = sub2ind(size(Ytest), films(indexTest), users(indexTest)); 66 | 67 | Y(indTrain) = ratings(indexTrain); 68 | Ytest(indTest) = ratings(indexTest); 69 | 70 | 71 | % save the additional information 72 | 73 | fileName = [baseDir dirSep 'movielens' dirSep 'large' dirSep 'movies.dat']; 74 | %[id, films, Type] = textread(fileName, '%n::%s::%s'); 75 | 76 | % create the structure 77 | lbls = zeros(size(Y,1),18); 78 | 79 | fid = fopen(fileName); 80 | readLine = 0; 81 | counter = 0; 82 | data = []; 83 | all_genres = [{'Action'},{'Adventure'},{'Animation'},{'Children''s'}, ... 84 | {'Comedy'},{'Crime'},{'Documentary'},{'Drama'},{'Fantasy'},{'Film-Noir'}, ... 85 | {'Horror'},{'Musical'},{'Mystery'},{'Romance'},{'Sci-Fi'},{'Thriller'},{'War'},{'Western'}]; 86 | 87 | 88 | readLine = fgets(fid); 89 | while readLine ~= -1 90 | 91 | parts = stringSplit(readLine,':'); 92 | id = str2num(parts{1}); 93 | title = parts(3); 94 | genre = parts{5}; 95 | % createMovieLensExtra(genre); 96 | 97 | for i=1:length(all_genres) 98 | if (strfind(genre,all_genres{i})) 99 | lbls(id,i) = 1; 100 | end 101 | end 102 | 103 | readLine = fgets(fid); 104 | 105 | end 106 | 107 | 108 | 109 | -------------------------------------------------------------------------------- /matlab/restartNetflix1.m: -------------------------------------------------------------------------------- 1 | % DEMNETFLIX1 Try collaborative filtering on the netflix data. 2 | 3 | % COLLAB 4 | 5 | randn('seed', 1e5); 6 | rand('seed', 1e5); 7 | 8 | experimentNo = 1; 9 | dataSetName = 'netflix'; 10 | 11 | load /local/data/netFlixDataProbe.mat 12 | load demNetflix1_1875881 13 | 14 | options = collabOptimiseOptions; 15 | options.numIters = 5; 16 | options.showEvery = 400; 17 | options.saveEvery = 20000; 18 | options.currIters = 17*400; 19 | options.randState = 1e5; 20 | options.startIter = 1; 21 | options.runIter = 1875882; 22 | options.startUser = 1875882; 23 | capName = dataSetName; 24 | capName(1) = upper(capName(1)); 25 | options.saveName = ['dem' capName num2str(experimentNo) '_']; 26 | options.showLikelihood = false; 27 | model = collabOptimise(model, Y, options) 28 | 29 | % val = 0; 30 | % tot = 0; 31 | % for i = 1:size(Y, 2) 32 | % ind = find(Ytest(:, i)); 33 | % elim = find(ind>size(model.X, 1)); 34 | % tind = ind; 35 | % tind(elim) = []; 36 | % [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :)); 37 | % a = Ytest(tind, i) - mu; 38 | % a = [a; Ytest(elim, i)]; 39 | % val = val + a'*a; 40 | % tot = tot + length(a); 41 | % end 42 | % error = sqrt(val/tot); 43 | 44 | % Save the results. 45 | capName = dataSetName; 46 | capName(1) = upper(capName(1)); 47 | save(['dem' capName num2str(experimentNo) '.mat'], 'model'); 48 | -------------------------------------------------------------------------------- /matlab/restartNetflix5.m: -------------------------------------------------------------------------------- 1 | % DEMNETFLIX5 Try collaborative filtering on the netflix data. 2 | 3 | % COLLAB 4 | 5 | randn('seed', 1e5); 6 | rand('seed', 1e5); 7 | 8 | experimentNo = 5; 9 | dataSetName = 'netflix'; 10 | 11 | load /local/data/netFlixDataProbe.mat 12 | load demNetflix5_2426657 13 | 14 | disp(['Restarting ' dataSetName ' experiment ' num2str(experimentNo)]); 15 | options = collabOptimiseOptions; 16 | options.numIters = 5; 17 | options.showEvery = 400; 18 | options.saveEvery = 20000; 19 | options.randState = randState; 20 | options.randnState = randnState; 21 | options.startIter = iters; 22 | options.runIter = runIter; 23 | options.startUser = user+1; 24 | capName = dataSetName; 25 | capName(1) = upper(capName(1)); 26 | options.saveName = ['dem' capName num2str(experimentNo) '_']; 27 | options.showLikelihood = false; 28 | model = collabOptimise(model, Y, options) 29 | 30 | % val = 0; 31 | % tot = 0; 32 | % for i = 1:size(Y, 2) 33 | % ind = find(Ytest(:, i)); 34 | % elim = find(ind>size(model.X, 1)); 35 | % tind = ind; 36 | % tind(elim) = []; 37 | % [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :)); 38 | % a = Ytest(tind, i) - mu; 39 | % a = [a; Ytest(elim, i)]; 40 | % val = val + a'*a; 41 | % tot = tot + length(a); 42 | % end 43 | % error = sqrt(val/tot); 44 | 45 | % Save the results. 46 | capName = dataSetName; 47 | capName(1) = upper(capName(1)); 48 | save(['dem' capName num2str(experimentNo) '.mat'], 'model'); 49 | -------------------------------------------------------------------------------- /matlab/splitProbeData.m: -------------------------------------------------------------------------------- 1 | fid = fopen('probe.txt'); 2 | count = 0; 3 | probeFilms = cell(17770, 1); 4 | while 1 5 | count = count + 1; 6 | nextLine = fgetl(fid); 7 | if ~ischar(nextLine), break, end 8 | if nextLine(end)==':' 9 | film = str2num(nextLine(1:end-1)); 10 | else 11 | probeFilms{film} = [probeFilms{film}; str2num(nextLine)]; 12 | end 13 | if(~rem(count, 10000)) 14 | fprintf('Loaded in %d rating locations.\n', count) 15 | end 16 | end 17 | fclose(fid); 18 | Yprobe = cell(2649429, 3); 19 | 20 | for i = 1:length(probeFilms) 21 | for j = 1:length(probeFilms{i}) 22 | userId = probeFilms{i}(j); 23 | ind = find(Y{userId, 1}==i); 24 | 25 | if isempty(Yprobe{userId, 1}) 26 | Yprobe{userId, 1} = zeros(20, 1); 27 | Yprobe{userId, 2} = zeros(20, 1); 28 | Yprobe{userId, 3} = 0; 29 | end 30 | if Yprobe{userId, 1}(end) ~= 0 31 | Yprobe{userId, 1} = [Yprobe{userId, 1}; zeros(20, 1)]; 32 | Yprobe{userId, 2} = [Yprobe{userId, 2}; zeros(20, 1)]; 33 | end 34 | Yprobe{userId, 3} = Yprobe{userId, 3} + 1; 35 | Yprobe{userId, 1}(Yprobe{userId, 3}) = i; 36 | Yprobe{userId, 2}(Yprobe{userId, 3}) = Y{userId,2}(ind); 37 | Y{userId, 1}(ind) = []; 38 | Y{userId, 2}(ind) = []; 39 | Y{userId, 3} = Y{userId, 3} - 1; 40 | end 41 | 42 | if(~rem(i, 10)) 43 | fprintf('Done %d films.\n', i) 44 | end 45 | end -------------------------------------------------------------------------------- /python/demNetflix10.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | 5 | #Try collaborative filtering on the netflix data. 6 | import collab 7 | import ndlml as nl 8 | opt = collab.options() 9 | opt.resultsBaseDir = "/local/data/results/netflix/" 10 | 11 | try: 12 | collab.run(latentDim=10, \ 13 | dataSetName='netflix', \ 14 | experimentNo=10, \ 15 | options=opt) 16 | except: 17 | import pdb, sys 18 | e, m, tb = sys.exc_info() 19 | pdb.post_mortem(tb) 20 | -------------------------------------------------------------------------------- /python/demNetflix2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | 5 | # Try collaborative filtering on the netflix data. 6 | import collab 7 | import ndlml as nl 8 | opt = collab.options() 9 | opt.resultsBaseDir = "/local/data/results/netflix/" 10 | 11 | try: 12 | collab.run(latentDim=2, \ 13 | dataSetName='netflix', \ 14 | experimentNo=2, \ 15 | options=opt) 16 | except: 17 | import pdb, sys 18 | e, m, tb = sys.exc_info() 19 | pdb.post_mortem(tb) 20 | b 21 | -------------------------------------------------------------------------------- /python/demNetflix3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | 5 | # Try collaborative filtering on the netflix data. 6 | import collab 7 | import ndlml as nl 8 | opt = collab.options() 9 | opt.resultsBaseDir = "/local/data/results/netflix/" 10 | 11 | 12 | try: 13 | collab.run(latentDim=3, \ 14 | dataSetName='netflix', \ 15 | experimentNo=3, \ 16 | options=opt) 17 | except: 18 | import pdb, sys 19 | e, m, tb = sys.exc_info() 20 | pdb.post_mortem(tb) 21 | -------------------------------------------------------------------------------- /python/demNetflix4.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | 5 | # Try collaborative filtering on the netflix data. 6 | import collab 7 | import ndlml as nl 8 | opt = collab.options() 9 | opt.resultsBaseDir = "/local/data/results/netflix/" 10 | 11 | try: 12 | collab.run(latentDim=4, \ 13 | dataSetName='netflix', \ 14 | experimentNo=4, \ 15 | options=opt) 16 | except: 17 | import pdb, sys 18 | e, m, tb = sys.exc_info() 19 | pdb.post_mortem(tb) 20 | -------------------------------------------------------------------------------- /python/demNetflix5.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | 5 | # Try collaborative filtering on the netflix data. 6 | import collab 7 | import ndlml as nl 8 | opt = collab.options() 9 | opt.resultsBaseDir = "/local/data/results/netflix/" 10 | 11 | try: 12 | collab.run(latentDim=5, \ 13 | dataSetName='netflix', \ 14 | experimentNo=5, \ 15 | options=opt) 16 | except: 17 | import pdb, sys 18 | e, m, tb = sys.exc_info() 19 | pdb.post_mortem(tb) 20 | -------------------------------------------------------------------------------- /python/demNetflix6.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | 5 | # Try collaborative filtering on the netflix data. 6 | import collab 7 | import ndlml as nl 8 | opt = collab.options() 9 | opt.resultsBaseDir = "/local/data/results/netflix/" 10 | 11 | try: 12 | collab.run(latentDim=6, \ 13 | dataSetName='netflix', \ 14 | experimentNo=6, \ 15 | options=opt) 16 | except: 17 | import pdb, sys 18 | e, m, tb = sys.exc_info() 19 | pdb.post_mortem(tb) 20 | -------------------------------------------------------------------------------- /python/demNetflix7.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | 5 | # Try collaborative filtering on the netflix data. 6 | import collab 7 | import ndlml as nl 8 | opt = collab.options() 9 | opt.resultsBaseDir = "/local/data/results/netflix/" 10 | 11 | try: 12 | collab.run(latentDim=7, \ 13 | dataSetName='netflix', \ 14 | experimentNo=7, \ 15 | options=opt) 16 | except: 17 | import pdb, sys 18 | e, m, tb = sys.exc_info() 19 | pdb.post_mortem(tb) 20 | -------------------------------------------------------------------------------- /python/demNetflix8.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | 5 | # Try collaborative filtering on the netflix data. 6 | import collab 7 | import ndlml as nl 8 | opt = collab.options() 9 | opt.resultsBaseDir = "/local/data/results/netflix/" 10 | 11 | try: 12 | collab.run(latentDim=8, \ 13 | dataSetName='netflix', \ 14 | experimentNo=8, \ 15 | options=opt) 16 | except: 17 | import pdb, sys 18 | e, m, tb = sys.exc_info() 19 | pdb.post_mortem(tb) 20 | -------------------------------------------------------------------------------- /python/demNetflix9.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | 5 | #Try collaborative filtering on the netflix data. 6 | import collab 7 | import ndlml as nl 8 | opt = collab.options() 9 | opt.resultsBaseDir = "/local/data/results/netflix/" 10 | 11 | try: 12 | collab.run(latentDim=9, \ 13 | dataSetName='netflix', \ 14 | experimentNo=9, \ 15 | options=opt) 16 | except: 17 | import pdb, sys 18 | e, m, tb = sys.exc_info() 19 | pdb.post_mortem(tb) 20 | -------------------------------------------------------------------------------- /python/restartNetflix10.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | 5 | # Try collaborative filtering on the netflix data. 6 | import collab 7 | import ndlml as nl 8 | opt = collab.options() 9 | opt.resultsBaseDir = "/local/data/results/netflix/" 10 | try: 11 | collab.restart(loadIter = 0, 12 | startCount = 200000, 13 | loadUser = 1786429, 14 | latentDim = 10, 15 | dataSetName = 'netflix', 16 | experimentNo = 10, 17 | options=opt) 18 | except: 19 | import pdb, sys 20 | e, m, tb = sys.exc_info() 21 | pdb.post_mortem(tb) 22 | 23 | -------------------------------------------------------------------------------- /python/restartNetflix2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | 5 | # Try collaborative filtering on the netflix data. 6 | import collab 7 | import ndlml as nl 8 | opt = collab.options() 9 | opt.resultsBaseDir = "/local/data/results/netflix/" 10 | try: 11 | collab.restart(loadIter = 9, 12 | startCount = 4600000, 13 | loadUser = 1288699, 14 | latentDim = 2, 15 | dataSetName = 'netflix', 16 | experimentNo = 2, 17 | options=opt) 18 | except: 19 | import pdb, sys 20 | e, m, tb = sys.exc_info() 21 | pdb.post_mortem(tb) 22 | 23 | -------------------------------------------------------------------------------- /python/restartNetflix3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | 5 | # Try collaborative filtering on the netflix data. 6 | import collab 7 | import ndlml as nl 8 | opt = collab.options() 9 | opt.resultsBaseDir = "/local/data/results/netflix/" 10 | try: 11 | collab.restart(loadIter = 5, 12 | startCount = 2660000, 13 | loadUser = 1499180, 14 | latentDim = 3, 15 | dataSetName = 'netflix', 16 | experimentNo = 3, 17 | options=opt) 18 | except: 19 | import pdb, sys 20 | e, m, tb = sys.exc_info() 21 | pdb.post_mortem(tb) 22 | 23 | -------------------------------------------------------------------------------- /python/restartNetflix4.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | 5 | # Try collaborative filtering on the netflix data. 6 | import collab 7 | import ndlml as nl 8 | opt = collab.options() 9 | opt.resultsBaseDir = "/local/data/results/netflix/" 10 | try: 11 | collab.restart(loadIter = 5, 12 | startCount = 2520000, 13 | loadUser = 1776294, 14 | latentDim = 4, 15 | dataSetName = 'netflix', 16 | experimentNo = 4, 17 | options=opt) 18 | except: 19 | import pdb, sys 20 | e, m, tb = sys.exc_info() 21 | pdb.post_mortem(tb) 22 | 23 | -------------------------------------------------------------------------------- /python/restartNetflix5.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | 5 | # Try collaborative filtering on the netflix data. 6 | import collab 7 | import ndlml as nl 8 | opt = collab.options() 9 | opt.resultsBaseDir = "/local/data/results/netflix/" 10 | try: 11 | collab.restart(loadIter = 9, 12 | startCount = 4640000, 13 | loadUser = 1361446, 14 | latentDim = 5, 15 | dataSetName = 'netflix', 16 | experimentNo = 5, 17 | options=opt) 18 | except: 19 | import pdb, sys 20 | e, m, tb = sys.exc_info() 21 | pdb.post_mortem(tb) 22 | 23 | -------------------------------------------------------------------------------- /python/restartNetflix6.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | 5 | # Try collaborative filtering on the netflix data. 6 | import collab 7 | import ndlml as nl 8 | opt = collab.options() 9 | opt.resultsBaseDir = "/local/data/results/netflix/" 10 | try: 11 | collab.restart(loadIter = 6, 12 | startCount = 3320000, 13 | loadUser = 2600176, 14 | latentDim = 6, 15 | dataSetName = 'netflix', 16 | experimentNo = 6, 17 | options=opt) 18 | except: 19 | import pdb, sys 20 | e, m, tb = sys.exc_info() 21 | pdb.post_mortem(tb) 22 | 23 | -------------------------------------------------------------------------------- /python/restartNetflix7.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | 5 | # Try collaborative filtering on the netflix data. 6 | import collab 7 | import ndlml as nl 8 | opt = collab.options() 9 | opt.resultsBaseDir = "/local/data/results/netflix/" 10 | try: 11 | collab.restart(loadIter = 5, 12 | startCount = 2620000, 13 | loadUser = 2190625, 14 | latentDim = 7, 15 | dataSetName = 'netflix', 16 | experimentNo = 7, 17 | options=opt) 18 | except: 19 | import pdb, sys 20 | e, m, tb = sys.exc_info() 21 | pdb.post_mortem(tb) 22 | 23 | -------------------------------------------------------------------------------- /python/restartNetflix8.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | 5 | # Try collaborative filtering on the netflix data. 6 | import collab 7 | import ndlml as nl 8 | opt = collab.options() 9 | opt.resultsBaseDir = "/local/data/results/netflix/" 10 | try: 11 | collab.restart(loadIter = 3, 12 | startCount = 1520000, 13 | loadUser = 560608, 14 | latentDim = 8, 15 | dataSetName = 'netflix', 16 | experimentNo = 8, 17 | options=opt) 18 | except: 19 | import pdb, sys 20 | e, m, tb = sys.exc_info() 21 | pdb.post_mortem(tb) 22 | 23 | -------------------------------------------------------------------------------- /python/restartNetflix9.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | 5 | # Try collaborative filtering on the netflix data. 6 | import collab 7 | import ndlml as nl 8 | opt = collab.options() 9 | opt.resultsBaseDir = "/local/data/results/netflix/" 10 | try: 11 | collab.restart(loadIter = 2, 12 | startCount = 1440000, 13 | loadUser = 2331578, 14 | latentDim = 9, 15 | dataSetName = 'netflix', 16 | experimentNo = 9, 17 | options=opt) 18 | except: 19 | import pdb, sys 20 | e, m, tb = sys.exc_info() 21 | pdb.post_mortem(tb) 22 | 23 | --------------------------------------------------------------------------------